def export_to_csv(db_engine: sqlalchemy.engine.Engine, table_or_view: str, output_file: t.Union[Path, str], schema: str = 'public') -> None: """Export table/view to a csv file""" output_file = Path(output_file).resolve() con: pg2ext.connection = db_engine.raw_connection() cur: pg2ext.cursor = con.cursor() query = f"COPY {schema + '.' + table_or_view} TO STDOUT WITH (FORMAT CSV, HEADER, DELIMITER ',')" with output_file.open(mode="wb") as fd: cur.copy_expert(sql=query, file=fd)
def import_from_csv(db_engine: sqlalchemy.engine.Engine, input_file: t.Union[Path, str], table: str, schema: str = 'public') -> None: """Import csv file into given table""" input_file = Path(input_file).resolve() con: pg2ext.connection = db_engine.raw_connection() cur: pg2ext.cursor = con.cursor() query = f"COPY {schema + '.' + table} FROM STDIN WITH (FORMAT CSV, HEADER, DELIMITER ',')" with input_file.open(mode="rb") as fd: cur.copy_expert(sql=query, file=fd) con.commit()
def _import_dump(archive_path, db_engine: sqlalchemy.engine.Engine, tables, schema_version: int, threads=DUMP_DEFAULT_THREAD_COUNT): """ Import dump present in passed archive path into postgres db. Arguments: archive_path: path to the .tar.xz archive to be imported db_engine: an sqlalchemy Engine instance for making a connection tables: dict of tables present in the archive with table name as key and columns to import as values schema_version: the current schema version, to compare against the dumped file threads (int): the number of threads to use while decompressing, defaults to db.DUMP_DEFAULT_THREAD_COUNT """ pxz_command = [ 'pxz', '--decompress', '--stdout', archive_path, '-T{threads}'.format(threads=threads) ] pxz = subprocess.Popen(pxz_command, stdout=subprocess.PIPE) connection = db_engine.raw_connection() try: cursor = connection.cursor() with tarfile.open(fileobj=pxz.stdout, mode='r|') as tar: for member in tar: file_name = member.name.split('/')[-1] if file_name == 'SCHEMA_SEQUENCE': # Verifying schema version schema_seq = int(tar.extractfile(member).read().strip()) if schema_seq != schema_version: raise SchemaMismatchException( 'Incorrect schema version! Expected: %d, got: %d.' 'Please, get the latest version of the dump.' % (schema_version, schema_seq)) else: current_app.logger.info('Schema version verified.') else: if file_name in tables: current_app.logger.info( 'Importing data into %s table...', file_name) try: cursor.copy_from(tar.extractfile(member), '%s' % file_name, columns=tables[file_name]) connection.commit() except IOError as e: current_app.logger.critical( 'IOError while extracting table %s: %s', file_name, str(e), exc_info=True) raise except Exception as e: current_app.logger.critical( 'Exception while importing table %s: %s', file_name, str(e), exc_info=True) raise current_app.logger.info('Imported table %s', file_name) finally: connection.close() pxz.stdout.close()
def add_sdo_geo_to_table(table_name: str, wkt_geo_column: str, geo_sdo_column: str, eng: sa.engine.Engine, is_wkt=True, no_asserts=False, dispose_eng=False): """ Adds a separate SDO_GEOMETRY column from an existing wkt/wkb column Args: table_name: The table we're working on wkt_geo_column: The name of the column containing the wkt/wkb geo_sdo_column: The name of the column we want to store the sdo_geometry object eng: An engine object connecting the db is_wkt: True if wk_geo_column contains data in wkt format (otherwise in wkb) no_asserts: True if no asserts on columns are made (will override existing data) dispose_eng: Whether to dispose of the engine after the function Returns: None """ df = get_df(f"SELECT * FROM {table_name} WHERE ROWNUM < 1", eng) # fetch only one row if not no_asserts: assert wkt_geo_column in df.columns, f"{wkt_geo_column} not in table {table_name}" assert geo_sdo_column not in df.columns, f"{geo_sdo_column} already in table {table_name}" if geo_sdo_column not in df.columns and wkt_geo_column in df.columns: eng.execute(f""" ALTER TABLE {table_name} ADD {geo_sdo_column} SDO_GEOMETRY """) eng.execute("COMMIT") # run for each feature seperetly feature_names = pd.read_sql( f"""select distinct {FEATURE_NAME} from {table_name}""", eng).iloc[:, 0] conn = eng.raw_connection() cur = conn.cursor() def add_sdo(feature_name): SELECT_SDO_GEO = f"""select SDO_GEOMETRY({wkt_geo_column}, 4326) as {geo_sdo_column}, ROWID as rid from {table_name} where {geo_sdo_column} IS NULL and {FEATURE_NAME} = '{feature_name}' """ # TIP: when using weird SDO_UTIL functions its better to use the raw connection. # In this case no values were returned by the merge into. only with the cur.execute(f""" merge into {table_name} curr using ({SELECT_SDO_GEO}) tmp on (curr.ROWID = tmp.rid) when matched then update set curr.{geo_sdo_column} = tmp.{geo_sdo_column} """) conn.commit() [ add_sdo(feature_name) for feature_name in tqdm( feature_names, desc='adding SDO to features', unit='feature') ] cur.close() # fix coordinate system eng.execute( f"update {table_name} T set T.{geo_sdo_column}.SDO_SRID = 4326 WHERE T.{geo_sdo_column} is not null" ) # add spatial index and add to user_sdo_geom_metadata table usersdo_df = get_df("SELECT * FROM user_sdo_geom_metadata", eng) if (table_name, geo_sdo_column) not in [ tuple(row) for row in usersdo_df[['TABLE_NAME', 'COLUMN_NAME']].values ]: eng.execute(f""" INSERT INTO user_sdo_geom_metadata VALUES ('{table_name}', '{geo_sdo_column}', sdo_dim_array(sdo_dim_element('X', -100, 100, 0.000005), sdo_dim_element('Y', -100, 100, 0.000005)), 4326) """) is_there_index = len( eng.execute(f""" select index_name from SYS.ALL_INDEXES where table_name = '{table_name}' """).fetchall()) > 0 if not is_there_index: acronym_short_geo_sdo = ''.join([ s[0] for s in geo_sdo_column.split('_') ]) # first letter of each word eng.execute(f""" CREATE INDEX {table_name}_{acronym_short_geo_sdo}_idx ON {table_name} ({geo_sdo_column}) INDEXTYPE IS MDSYS.SPATIAL_INDEX """) if dispose_eng: eng.dispose()