Example #1
0
def dump_ddl(metadata: MetaData,
             dialect_name: str,
             fileobj: TextIO = sys.stdout,
             checkfirst: bool = True) -> None:
    """
    Sends schema-creating DDL from the metadata to the dump engine.
    This makes ``CREATE TABLE`` statements.

    Args:
        metadata: SQLAlchemy :class:`MetaData`
        dialect_name: string name of SQL dialect to generate DDL in
        fileobj: file-like object to send DDL to
        checkfirst: if ``True``, use ``CREATE TABLE IF NOT EXISTS`` or
            equivalent.
    """

    # http://docs.sqlalchemy.org/en/rel_0_8/faq.html#how-can-i-get-the-create-table-drop-table-output-as-a-string  # noqa
    # http://stackoverflow.com/questions/870925/how-to-generate-a-file-with-ddl-in-the-engines-sql-dialect-in-sqlalchemy  # noqa
    # https://github.com/plq/scripts/blob/master/pg_dump.py
    # noinspection PyUnusedLocal
    def dump(querysql, *multiparams, **params):
        compsql = querysql.compile(dialect=engine.dialect)
        writeline_nl(fileobj, f"{compsql};")

    writeline_nl(fileobj, sql_comment(f"Schema (for dialect {dialect_name}):"))
    engine = create_engine(f'{dialect_name}://',
                           strategy='mock',
                           executor=dump)
    metadata.create_all(engine, checkfirst=checkfirst)
Example #2
0
def dump_orm_tree_as_insert_sql(engine: Engine, baseobj: object,
                                fileobj: TextIO) -> None:
    """
    Sends an object, and all its relations (discovered via "relationship"
    links) as ``INSERT`` commands in SQL, to ``fileobj``.

    Args:
        engine: SQLAlchemy :class:`Engine`
        baseobj: starting SQLAlchemy ORM object
        fileobj: file-like object to write to

    Problem: foreign key constraints.
    
    - MySQL/InnoDB doesn't wait to the end of a transaction to check FK
      integrity (which it should):
      http://stackoverflow.com/questions/5014700/in-mysql-can-i-defer-referential-integrity-checks-until-commit  # noqa
    - PostgreSQL can.
    - Anyway, slightly ugly hacks...
      https://dev.mysql.com/doc/refman/5.5/en/optimizing-innodb-bulk-data-loading.html
    - Not so obvious how we can iterate through the list of ORM objects and
      guarantee correct insertion order with respect to all FKs.
    """  # noqa
    writeline_nl(
        fileobj,
        sql_comment("Data for all objects related to the first below:"))
    bulk_insert_extras(engine.dialect.name, fileobj, start=True)
    for part in walk_orm_tree(baseobj):
        dump_orm_object_as_insert_sql(engine, part, fileobj)
    bulk_insert_extras(engine.dialect.name, fileobj, start=False)
Example #3
0
def dump_connection_info(engine: Engine, fileobj: TextIO = sys.stdout) -> None:
    """
    Dumps some connection info, as an SQL comment. Obscures passwords.

    Args:
        engine: the SQLAlchemy :class:`Engine` to dump metadata information
            from
        fileobj: the file-like object (default ``sys.stdout``) to write
            information to
    """
    meta = MetaData(bind=engine)
    writeline_nl(fileobj, sql_comment(f'Database info: {meta}'))
Example #4
0
def dump_orm_object_as_insert_sql(engine: Engine, obj: object,
                                  fileobj: TextIO) -> None:
    """
    Takes a SQLAlchemy ORM object, and writes ``INSERT`` SQL to replicate it
    to the output file-like object.

    Args:
        engine: SQLAlchemy :class:`Engine`
        obj: SQLAlchemy ORM object to write
        fileobj: file-like object to write to
    """
    # literal_query = make_literal_query_fn(engine.dialect)
    insp = inspect(obj)
    # insp: an InstanceState
    # http://docs.sqlalchemy.org/en/latest/orm/internals.html#sqlalchemy.orm.state.InstanceState  # noqa
    # insp.mapper: a Mapper
    # http://docs.sqlalchemy.org/en/latest/orm/mapping_api.html#sqlalchemy.orm.mapper.Mapper  # noqa

    # Don't do this:
    #   table = insp.mapper.mapped_table
    # Do this instead. The method above gives you fancy data types like list
    # and Arrow on the Python side. We want the bog-standard datatypes drawn
    # from the database itself.
    meta = MetaData(bind=engine)
    table_name = insp.mapper.mapped_table.name
    # log.debug("table_name: {}", table_name)
    table = Table(table_name, meta, autoload=True)
    # log.debug("table: {}", table)

    # NewRecord = quick_mapper(table)
    # columns = table.columns.keys()
    query = select(table.columns)
    # log.debug("query: {}", query)
    for orm_pkcol in insp.mapper.primary_key:
        core_pkcol = table.columns.get(orm_pkcol.name)
        pkval = getattr(obj, orm_pkcol.name)
        query = query.where(core_pkcol == pkval)
    # log.debug("query: {}", query)
    cursor = engine.execute(query)
    row = cursor.fetchone()  # should only be one...
    row_dict = dict(row)
    # log.debug("obj: {}", obj)
    # log.debug("row_dict: {}", row_dict)
    statement = table.insert(values=row_dict)
    # insert_str = literal_query(statement)
    insert_str = get_literal_query(statement, bind=engine)
    writeline_nl(fileobj, insert_str)
Example #5
0
def bulk_hash(input_filename: str,
              output_filename: str,
              hash_method: str,
              key: str,
              keep_id: bool = True):
    """
    Hash lines from one file to another.

    Args:
        input_filename:
            input filename, or "-" for stdin
        output_filename:
            output filename, or "-" for stdin
        hash_method:
            method to use; e.g. ``HMAC_SHA256``
        key:
            secret key for hasher
        keep_id:
            produce CSV with ``hash,id`` pairs, rather than just lines with
            the hashes?

    Note that the hash precedes the ID with the ``keep_id`` option, which
    works best if the ID might contain commas.
    """
    log.info(f"Reading from: {input_filename}")
    log.info(f"Writing to: {output_filename}")
    log.info(f"Using hash method: {hash_method}")
    log.info(f"keep_id: {keep_id}")
    log.debug(f"Using key: {key!r}")  # NB security warning in help
    hasher = make_hasher(hash_method=hash_method, key=key)
    with smart_open(input_filename, "rt") as i:  # type: TextIO
        with smart_open(output_filename, "wt") as o:  # type: TextIO
            for line in gen_noncomment_lines(i):
                hashed = hasher.hash(line) if line else ""
                outline = f"{hashed},{line}" if keep_id else hashed
                # log.debug(f"{line!r} -> {hashed!r}")
                writeline_nl(o, outline)
Example #6
0
 def dump(querysql, *multiparams, **params):
     compsql = querysql.compile(dialect=engine.dialect)
     writeline_nl(fileobj, f"{compsql};")
Example #7
0
def dump_table_as_insert_sql(engine: Engine,
                             table_name: str,
                             fileobj: TextIO,
                             wheredict: Dict[str, Any] = None,
                             include_ddl: bool = False,
                             multirow: bool = False) -> None:
    """
    Reads a table from the database, and writes SQL to replicate the table's
    data to the output ``fileobj``.

    Args:
        engine: SQLAlchemy :class:`Engine`
        table_name: name of the table
        fileobj: file-like object to write to
        wheredict: optional dictionary of ``{column_name: value}`` to use as
            ``WHERE`` filters
        include_ddl: if ``True``, include the DDL to create the table as well
        multirow: write multi-row ``INSERT`` statements
    """
    # http://stackoverflow.com/questions/5631078/sqlalchemy-print-the-actual-query  # noqa
    # http://docs.sqlalchemy.org/en/latest/faq/sqlexpressions.html
    # http://www.tylerlesmann.com/2009/apr/27/copying-databases-across-platforms-sqlalchemy/  # noqa
    # https://github.com/plq/scripts/blob/master/pg_dump.py
    log.info("dump_data_as_insert_sql: table_name={}", table_name)
    writelines_nl(fileobj, [
        SEP1,
        sql_comment(f"Data for table: {table_name}"),
        SEP2,
        sql_comment(f"Filters: {wheredict}"),
    ])
    dialect = engine.dialect
    if not dialect.supports_multivalues_insert:
        multirow = False
    if multirow:
        log.warning("dump_data_as_insert_sql: multirow parameter substitution "
                    "not working yet")
        multirow = False

    # literal_query = make_literal_query_fn(dialect)

    meta = MetaData(bind=engine)
    log.debug("... retrieving schema")
    table = Table(table_name, meta, autoload=True)
    if include_ddl:
        log.debug("... producing DDL")
        dump_ddl(table.metadata,
                 dialect_name=engine.dialect.name,
                 fileobj=fileobj)
    # NewRecord = quick_mapper(table)
    # columns = table.columns.keys()
    log.debug("... fetching records")
    # log.debug("meta: {}", meta)  # obscures password
    # log.debug("table: {}", table)
    # log.debug("table.columns: {!r}", table.columns)
    # log.debug("multirow: {}", multirow)
    query = select(table.columns)
    if wheredict:
        for k, v in wheredict.items():
            col = table.columns.get(k)
            query = query.where(col == v)
    # log.debug("query: {}", query)
    cursor = engine.execute(query)
    if multirow:
        row_dict_list = []
        for r in cursor:
            row_dict_list.append(dict(r))
        # log.debug("row_dict_list: {}", row_dict_list)
        if row_dict_list:
            statement = table.insert().values(row_dict_list)
            # log.debug("statement: {!r}", statement)
            # insert_str = literal_query(statement)
            insert_str = get_literal_query(statement, bind=engine)
            # NOT WORKING FOR MULTIROW INSERTS. ONLY SUBSTITUTES FIRST ROW.
            writeline_nl(fileobj, insert_str)
        else:
            writeline_nl(fileobj, sql_comment("No data!"))
    else:
        found_one = False
        for r in cursor:
            found_one = True
            row_dict = dict(r)
            statement = table.insert(values=row_dict)
            # insert_str = literal_query(statement)
            insert_str = get_literal_query(statement, bind=engine)
            # log.debug("row_dict: {}", row_dict)
            # log.debug("insert_str: {}", insert_str)
            writeline_nl(fileobj, insert_str)
        if not found_one:
            writeline_nl(fileobj, sql_comment("No data!"))
    writeline_nl(fileobj, SEP2)
    log.debug("... done")