def export_sql(meta, data, output): """ Outputs data as SQL INSERT statements. Parameters: meta (sqlalchemy.schema.MetaData): Metadata for the database structure data (dict): Dictionary keyed by table name of dictionaries corresponding to table rows output (string): Path for output file Returns: None """ tables = [table for table in meta.sorted_tables if table.name in data] preparer = IdentifierPreparer(meta.bind.dialect) prepare_column = lambda column: preparer.format_column(column, name=column.name) output_file = open(output, 'w') for table in tables: columns = ', '.join([ prepare_column(column) for column in table.columns.values() ]) for row in data[table.name].values(): values = list(map(_transform, list(row.values()))) insert = "INSERT INTO %s (%s) VALUES (%s);\n" % ( preparer.format_table(table, name=table.name), columns, ', '.join(values) ) output_file.write(insert) output_file.close()
def create(self, encoding, template): with sqlalchemy_engine(self.url) as engine: preparer = IdentifierPreparer(engine.dialect) template = template or 'template1' database, template = preparer.quote(self.database), preparer.quote(template) stmt = f"CREATE DATABASE {database} ENCODING '{encoding}' TEMPLATE {template}" with engine.connect().execution_options(isolation_level='AUTOCOMMIT') as conn: conn.execute(stmt)
def create(self, encoding, *arg): # Ignore any args (template) with sqlalchemy_engine(self.url) as engine: preparer = IdentifierPreparer(engine.dialect) database = preparer.quote(self.database) stmt = f"CREATE DATABASE {database} CHARACTER SET = '{encoding}'" with engine.connect().execution_options(isolation_level='AUTOCOMMIT') as conn: conn.execute(stmt)
def drop_database(db_url, database): """Drop database; connect with db_url. Used only for test purposes to cleanup after creating a test database. """ if db_url.startswith('postgresql') or db_url.startswith('mysql'): with sqlalchemy_engine(db_url) as engine: preparer = IdentifierPreparer(engine.dialect) database = preparer.quote(database) stmt = f'DROP DATABASE IF EXISTS {database}' with engine.connect().execution_options(isolation_level='AUTOCOMMIT') as conn: conn.execute(stmt) else: url = make_url(db_url) os.remove(url.database)
def _build_sample_query(self, physical_data_source: str, table_name: str, columns: List[str]) -> str: preparer = IdentifierPreparer(DremioDialect()) [*maybe_db_and_schema, only_table_name] = preparer.unformat_identifiers(table_name) source_table = table(preparer.quote_identifier(only_table_name)) source_table.schema = '.'.join( [preparer.quote_identifier(physical_data_source)] + [ preparer.quote_identifier(component) for component in maybe_db_and_schema ]) query = select([column(c) for c in columns]).select_from(source_table).limit( self.params.population_size) return compile_query(query, RUNTIME_DIALECTS[HuskyQueryRuntime.dremio])
class BaseDb: """Base functionality common to all types of sqlalchemy databases that we support.""" # Subclasses should override with a specific IdentifierPreparer, or identifier quoting will not work properly. preparer = IdentifierPreparer(DefaultDialect()) @classmethod def create_engine(cls, spec): """Create an engine for connecting to the database specified.""" raise NotImplementedError() @classmethod def quote(cls, identifier): """Conditionally quote the given identifier (ie, if it is a keyword or contains reserved characters.""" # Subclasses should override cls.preparer with a specific IdentifierPreparer. return cls.preparer.quote(identifier) @classmethod def quote_table(cls, table_name, db_schema=None): return cls.preparer.format_table(sa.table(table_name, schema=db_schema)) @classmethod def list_tables(cls, sess, db_schema=None): """ Find all the user tables (not system tables) in the database (or in a specific db_schema). Returns a dict of {table_name: table_title} """ raise NotImplementedError() @classmethod def db_schema_searchpath(cls, sess): """Returns a list of the db_schemas that the connection is configured to search in by default.""" raise NotImplementedError() @classmethod def _pool_class(cls): # Ordinarily, sqlalchemy engine's maintain a pool of connections ready to go. # When running tests, we run lots of kart commands, and each command creates an engine, and each engine # maintains a pool. This can quickly exhaust the database's allowed connection limit. # One fix would be to share engines between kart commands run during tests that are connecting to the same DB. # But this fix is simpler for now: disable the pool during testing. return NullPool if "PYTEST_CURRENT_TEST" in os.environ else None @classmethod def _replace_localhost_with_ip(cls, url_netloc): def _get_localhost_ip(*args, **kwargs): return socket.gethostbyname("localhost") return re.sub(r"\blocalhost\b", _get_localhost_ip, url_netloc) @classmethod def _append_query_to_url(cls, uri, new_query_dict): url = urlsplit(uri) url_query = cls._append_to_query(url.query, new_query_dict) return urlunsplit([url.scheme, url.netloc, url.path, url_query, ""]) @classmethod def _append_to_query(cls, existing_query, new_query_dict): query_dict = parse_qs(existing_query) # ignore new keys if they're already set in the querystring return urlencode({**new_query_dict, **query_dict}, doseq=True) @classmethod def drop_all_in_schema(cls, sess, db_schema): """Drops all tables, routines, and sequences in schema db_schema.""" for thing in ("table", "routine", "sequence"): cls._drop_things_in_schema(cls, sess, db_schema, thing) def _drop_things_in_schema(cls, sess, db_schema, thing): r = sess.execute( sa.text( f"SELECT {thing}_name FROM information_schema.{thing}s WHERE {thing}_schema=:db_schema;" ), {"db_schema": db_schema}, ) thing_identifiers = ", ".join( (cls.quote_table(row[0], db_schema=db_schema) for row in r)) if thing_identifiers: sess.execute(f"DROP {thing} IF EXISTS {thing_identifiers};")