Esempio n. 1
0
def export_to_csv(db_engine: sqlalchemy.engine.Engine,
                  table_or_view: str,
                  output_file: t.Union[Path, str],
                  schema: str = 'public') -> None:
    """Export table/view to a csv file"""
    output_file = Path(output_file).resolve()
    con: pg2ext.connection = db_engine.raw_connection()
    cur: pg2ext.cursor = con.cursor()
    query = f"COPY {schema + '.' + table_or_view} TO STDOUT WITH (FORMAT CSV, HEADER, DELIMITER ',')"

    with output_file.open(mode="wb") as fd:
        cur.copy_expert(sql=query, file=fd)
Esempio n. 2
0
def import_from_csv(db_engine: sqlalchemy.engine.Engine,
                    input_file: t.Union[Path, str],
                    table: str,
                    schema: str = 'public') -> None:
    """Import csv file into given table"""
    input_file = Path(input_file).resolve()
    con: pg2ext.connection = db_engine.raw_connection()
    cur: pg2ext.cursor = con.cursor()
    query = f"COPY {schema + '.' + table} FROM STDIN WITH (FORMAT CSV, HEADER, DELIMITER ',')"

    with input_file.open(mode="rb") as fd:
        cur.copy_expert(sql=query, file=fd)
        con.commit()
Esempio n. 3
0
def _import_dump(archive_path,
                 db_engine: sqlalchemy.engine.Engine,
                 tables,
                 schema_version: int,
                 threads=DUMP_DEFAULT_THREAD_COUNT):
    """ Import dump present in passed archive path into postgres db.

        Arguments:
            archive_path: path to the .tar.xz archive to be imported
            db_engine: an sqlalchemy Engine instance for making a connection
            tables: dict of tables present in the archive with table name as key and
                    columns to import as values
            schema_version: the current schema version, to compare against the dumped file
            threads (int): the number of threads to use while decompressing, defaults to
                            db.DUMP_DEFAULT_THREAD_COUNT
    """

    pxz_command = [
        'pxz', '--decompress', '--stdout', archive_path,
        '-T{threads}'.format(threads=threads)
    ]
    pxz = subprocess.Popen(pxz_command, stdout=subprocess.PIPE)

    connection = db_engine.raw_connection()
    try:
        cursor = connection.cursor()
        with tarfile.open(fileobj=pxz.stdout, mode='r|') as tar:
            for member in tar:
                file_name = member.name.split('/')[-1]

                if file_name == 'SCHEMA_SEQUENCE':
                    # Verifying schema version
                    schema_seq = int(tar.extractfile(member).read().strip())
                    if schema_seq != schema_version:
                        raise SchemaMismatchException(
                            'Incorrect schema version! Expected: %d, got: %d.'
                            'Please, get the latest version of the dump.' %
                            (schema_version, schema_seq))
                    else:
                        current_app.logger.info('Schema version verified.')

                else:
                    if file_name in tables:
                        current_app.logger.info(
                            'Importing data into %s table...', file_name)
                        try:
                            cursor.copy_from(tar.extractfile(member),
                                             '%s' % file_name,
                                             columns=tables[file_name])
                            connection.commit()
                        except IOError as e:
                            current_app.logger.critical(
                                'IOError while extracting table %s: %s',
                                file_name,
                                str(e),
                                exc_info=True)
                            raise
                        except Exception as e:
                            current_app.logger.critical(
                                'Exception while importing table %s: %s',
                                file_name,
                                str(e),
                                exc_info=True)
                            raise

                        current_app.logger.info('Imported table %s', file_name)
    finally:
        connection.close()
        pxz.stdout.close()
Esempio n. 4
0
def add_sdo_geo_to_table(table_name: str,
                         wkt_geo_column: str,
                         geo_sdo_column: str,
                         eng: sa.engine.Engine,
                         is_wkt=True,
                         no_asserts=False,
                         dispose_eng=False):
    """
    Adds a separate SDO_GEOMETRY column from an existing wkt/wkb column
    Args:
        table_name: The table we're working on
        wkt_geo_column: The name of the column containing the wkt/wkb
        geo_sdo_column: The name of the column we want to store the sdo_geometry object
        eng: An engine object connecting the db
        is_wkt: True if wk_geo_column contains data in wkt format (otherwise in wkb)
        no_asserts: True if no asserts on columns are made (will override existing data)
        dispose_eng: Whether to dispose of the engine after the function

    Returns:
         None
    """
    df = get_df(f"SELECT * FROM {table_name} WHERE ROWNUM < 1",
                eng)  # fetch only one row
    if not no_asserts:
        assert wkt_geo_column in df.columns, f"{wkt_geo_column} not in table {table_name}"
        assert geo_sdo_column not in df.columns, f"{geo_sdo_column} already in table {table_name}"

    if geo_sdo_column not in df.columns and wkt_geo_column in df.columns:
        eng.execute(f"""
        ALTER TABLE {table_name}
        ADD {geo_sdo_column} SDO_GEOMETRY
                     """)
        eng.execute("COMMIT")

    # run for each feature seperetly
    feature_names = pd.read_sql(
        f"""select distinct {FEATURE_NAME} from {table_name}""", eng).iloc[:,
                                                                           0]
    conn = eng.raw_connection()
    cur = conn.cursor()

    def add_sdo(feature_name):
        SELECT_SDO_GEO = f"""select SDO_GEOMETRY({wkt_geo_column}, 4326) as {geo_sdo_column}, ROWID as rid 
                            from {table_name}
                            where {geo_sdo_column} IS NULL
                            and {FEATURE_NAME} = '{feature_name}'
                            """

        # TIP: when using weird SDO_UTIL functions its better to use the raw connection.
        # In this case no values were returned by the merge into. only with the
        cur.execute(f"""
        merge into {table_name} curr
                    using ({SELECT_SDO_GEO}) tmp
                    on (curr.ROWID = tmp.rid)
                    when matched then
                    update set curr.{geo_sdo_column} = tmp.{geo_sdo_column}    
        """)
        conn.commit()

    [
        add_sdo(feature_name) for feature_name in tqdm(
            feature_names, desc='adding SDO to features', unit='feature')
    ]
    cur.close()

    # fix coordinate system
    eng.execute(
        f"update {table_name} T set T.{geo_sdo_column}.SDO_SRID = 4326 WHERE T.{geo_sdo_column} is not null"
    )

    # add spatial index and add to user_sdo_geom_metadata table
    usersdo_df = get_df("SELECT * FROM user_sdo_geom_metadata", eng)
    if (table_name, geo_sdo_column) not in [
            tuple(row)
            for row in usersdo_df[['TABLE_NAME', 'COLUMN_NAME']].values
    ]:
        eng.execute(f"""
        INSERT INTO user_sdo_geom_metadata
        VALUES ('{table_name}', '{geo_sdo_column}', sdo_dim_array(sdo_dim_element('X', -100, 100, 0.000005),
                                                       sdo_dim_element('Y', -100, 100, 0.000005)), 4326)
        """)

    is_there_index = len(
        eng.execute(f"""
                    select index_name
                    from SYS.ALL_INDEXES
                    where table_name = '{table_name}'
                    """).fetchall()) > 0

    if not is_there_index:
        acronym_short_geo_sdo = ''.join([
            s[0] for s in geo_sdo_column.split('_')
        ])  # first letter of each word
        eng.execute(f"""
                    CREATE INDEX {table_name}_{acronym_short_geo_sdo}_idx
                    ON {table_name} ({geo_sdo_column}) INDEXTYPE IS MDSYS.SPATIAL_INDEX
                    """)

    if dispose_eng:
        eng.dispose()