def dump_ddl(metadata: MetaData, dialect_name: str, fileobj: TextIO = sys.stdout, checkfirst: bool = True) -> None: """ Sends schema-creating DDL from the metadata to the dump engine. This makes ``CREATE TABLE`` statements. Args: metadata: SQLAlchemy :class:`MetaData` dialect_name: string name of SQL dialect to generate DDL in fileobj: file-like object to send DDL to checkfirst: if ``True``, use ``CREATE TABLE IF NOT EXISTS`` or equivalent. """ # http://docs.sqlalchemy.org/en/rel_0_8/faq.html#how-can-i-get-the-create-table-drop-table-output-as-a-string # noqa # http://stackoverflow.com/questions/870925/how-to-generate-a-file-with-ddl-in-the-engines-sql-dialect-in-sqlalchemy # noqa # https://github.com/plq/scripts/blob/master/pg_dump.py # noinspection PyUnusedLocal def dump(querysql, *multiparams, **params): compsql = querysql.compile(dialect=engine.dialect) writeline_nl(fileobj, f"{compsql};") writeline_nl(fileobj, sql_comment(f"Schema (for dialect {dialect_name}):")) engine = create_engine(f'{dialect_name}://', strategy='mock', executor=dump) metadata.create_all(engine, checkfirst=checkfirst)
def dump_orm_tree_as_insert_sql(engine: Engine, baseobj: object, fileobj: TextIO) -> None: """ Sends an object, and all its relations (discovered via "relationship" links) as ``INSERT`` commands in SQL, to ``fileobj``. Args: engine: SQLAlchemy :class:`Engine` baseobj: starting SQLAlchemy ORM object fileobj: file-like object to write to Problem: foreign key constraints. - MySQL/InnoDB doesn't wait to the end of a transaction to check FK integrity (which it should): http://stackoverflow.com/questions/5014700/in-mysql-can-i-defer-referential-integrity-checks-until-commit # noqa - PostgreSQL can. - Anyway, slightly ugly hacks... https://dev.mysql.com/doc/refman/5.5/en/optimizing-innodb-bulk-data-loading.html - Not so obvious how we can iterate through the list of ORM objects and guarantee correct insertion order with respect to all FKs. """ # noqa writeline_nl( fileobj, sql_comment("Data for all objects related to the first below:")) bulk_insert_extras(engine.dialect.name, fileobj, start=True) for part in walk_orm_tree(baseobj): dump_orm_object_as_insert_sql(engine, part, fileobj) bulk_insert_extras(engine.dialect.name, fileobj, start=False)
def dump_connection_info(engine: Engine, fileobj: TextIO = sys.stdout) -> None: """ Dumps some connection info, as an SQL comment. Obscures passwords. Args: engine: the SQLAlchemy :class:`Engine` to dump metadata information from fileobj: the file-like object (default ``sys.stdout``) to write information to """ meta = MetaData(bind=engine) writeline_nl(fileobj, sql_comment(f'Database info: {meta}'))
def dump_orm_object_as_insert_sql(engine: Engine, obj: object, fileobj: TextIO) -> None: """ Takes a SQLAlchemy ORM object, and writes ``INSERT`` SQL to replicate it to the output file-like object. Args: engine: SQLAlchemy :class:`Engine` obj: SQLAlchemy ORM object to write fileobj: file-like object to write to """ # literal_query = make_literal_query_fn(engine.dialect) insp = inspect(obj) # insp: an InstanceState # http://docs.sqlalchemy.org/en/latest/orm/internals.html#sqlalchemy.orm.state.InstanceState # noqa # insp.mapper: a Mapper # http://docs.sqlalchemy.org/en/latest/orm/mapping_api.html#sqlalchemy.orm.mapper.Mapper # noqa # Don't do this: # table = insp.mapper.mapped_table # Do this instead. The method above gives you fancy data types like list # and Arrow on the Python side. We want the bog-standard datatypes drawn # from the database itself. meta = MetaData(bind=engine) table_name = insp.mapper.mapped_table.name # log.debug("table_name: {}", table_name) table = Table(table_name, meta, autoload=True) # log.debug("table: {}", table) # NewRecord = quick_mapper(table) # columns = table.columns.keys() query = select(table.columns) # log.debug("query: {}", query) for orm_pkcol in insp.mapper.primary_key: core_pkcol = table.columns.get(orm_pkcol.name) pkval = getattr(obj, orm_pkcol.name) query = query.where(core_pkcol == pkval) # log.debug("query: {}", query) cursor = engine.execute(query) row = cursor.fetchone() # should only be one... row_dict = dict(row) # log.debug("obj: {}", obj) # log.debug("row_dict: {}", row_dict) statement = table.insert(values=row_dict) # insert_str = literal_query(statement) insert_str = get_literal_query(statement, bind=engine) writeline_nl(fileobj, insert_str)
def bulk_hash(input_filename: str, output_filename: str, hash_method: str, key: str, keep_id: bool = True): """ Hash lines from one file to another. Args: input_filename: input filename, or "-" for stdin output_filename: output filename, or "-" for stdin hash_method: method to use; e.g. ``HMAC_SHA256`` key: secret key for hasher keep_id: produce CSV with ``hash,id`` pairs, rather than just lines with the hashes? Note that the hash precedes the ID with the ``keep_id`` option, which works best if the ID might contain commas. """ log.info(f"Reading from: {input_filename}") log.info(f"Writing to: {output_filename}") log.info(f"Using hash method: {hash_method}") log.info(f"keep_id: {keep_id}") log.debug(f"Using key: {key!r}") # NB security warning in help hasher = make_hasher(hash_method=hash_method, key=key) with smart_open(input_filename, "rt") as i: # type: TextIO with smart_open(output_filename, "wt") as o: # type: TextIO for line in gen_noncomment_lines(i): hashed = hasher.hash(line) if line else "" outline = f"{hashed},{line}" if keep_id else hashed # log.debug(f"{line!r} -> {hashed!r}") writeline_nl(o, outline)
def dump(querysql, *multiparams, **params): compsql = querysql.compile(dialect=engine.dialect) writeline_nl(fileobj, f"{compsql};")
def dump_table_as_insert_sql(engine: Engine, table_name: str, fileobj: TextIO, wheredict: Dict[str, Any] = None, include_ddl: bool = False, multirow: bool = False) -> None: """ Reads a table from the database, and writes SQL to replicate the table's data to the output ``fileobj``. Args: engine: SQLAlchemy :class:`Engine` table_name: name of the table fileobj: file-like object to write to wheredict: optional dictionary of ``{column_name: value}`` to use as ``WHERE`` filters include_ddl: if ``True``, include the DDL to create the table as well multirow: write multi-row ``INSERT`` statements """ # http://stackoverflow.com/questions/5631078/sqlalchemy-print-the-actual-query # noqa # http://docs.sqlalchemy.org/en/latest/faq/sqlexpressions.html # http://www.tylerlesmann.com/2009/apr/27/copying-databases-across-platforms-sqlalchemy/ # noqa # https://github.com/plq/scripts/blob/master/pg_dump.py log.info("dump_data_as_insert_sql: table_name={}", table_name) writelines_nl(fileobj, [ SEP1, sql_comment(f"Data for table: {table_name}"), SEP2, sql_comment(f"Filters: {wheredict}"), ]) dialect = engine.dialect if not dialect.supports_multivalues_insert: multirow = False if multirow: log.warning("dump_data_as_insert_sql: multirow parameter substitution " "not working yet") multirow = False # literal_query = make_literal_query_fn(dialect) meta = MetaData(bind=engine) log.debug("... retrieving schema") table = Table(table_name, meta, autoload=True) if include_ddl: log.debug("... producing DDL") dump_ddl(table.metadata, dialect_name=engine.dialect.name, fileobj=fileobj) # NewRecord = quick_mapper(table) # columns = table.columns.keys() log.debug("... fetching records") # log.debug("meta: {}", meta) # obscures password # log.debug("table: {}", table) # log.debug("table.columns: {!r}", table.columns) # log.debug("multirow: {}", multirow) query = select(table.columns) if wheredict: for k, v in wheredict.items(): col = table.columns.get(k) query = query.where(col == v) # log.debug("query: {}", query) cursor = engine.execute(query) if multirow: row_dict_list = [] for r in cursor: row_dict_list.append(dict(r)) # log.debug("row_dict_list: {}", row_dict_list) if row_dict_list: statement = table.insert().values(row_dict_list) # log.debug("statement: {!r}", statement) # insert_str = literal_query(statement) insert_str = get_literal_query(statement, bind=engine) # NOT WORKING FOR MULTIROW INSERTS. ONLY SUBSTITUTES FIRST ROW. writeline_nl(fileobj, insert_str) else: writeline_nl(fileobj, sql_comment("No data!")) else: found_one = False for r in cursor: found_one = True row_dict = dict(r) statement = table.insert(values=row_dict) # insert_str = literal_query(statement) insert_str = get_literal_query(statement, bind=engine) # log.debug("row_dict: {}", row_dict) # log.debug("insert_str: {}", insert_str) writeline_nl(fileobj, insert_str) if not found_one: writeline_nl(fileobj, sql_comment("No data!")) writeline_nl(fileobj, SEP2) log.debug("... done")