Esempio n. 1
0
def __(source_db_alias: str,
       target_db_alias: str,
       target_table: str,
       timezone: str = None,
       csv_format: bool = None,
       delimiter_char: str = None):
    return copy_command(dbs.db(source_db_alias),
                        dbs.db(target_db_alias),
                        target_table=target_table,
                        timezone=timezone,
                        csv_format=csv_format,
                        delimiter_char=delimiter_char)
Esempio n. 2
0
def __(alias: str,
       header: bool = None,
       footer: bool = None,
       delimiter_char: str = None,
       csv_format: bool = None):
    return copy_to_stdout_command(dbs.db(alias),
                                  header=header,
                                  footer=footer,
                                  delimiter_char=delimiter_char,
                                  csv_format=csv_format)
Esempio n. 3
0
 def _target_name(self):
     db = dbs.db(self.target_db_alias)
     if isinstance(db, dbs.PostgreSQLDB):
         return 'target-postgres'
     elif isinstance(db, dbs.RedshiftDB):
         return 'target-redshift'
     elif isinstance(db, dbs.SQLiteDB):
         return 'target-sqlite'
     else:
         raise Exception(
             f'Not supported DB type {type(db)} for command SingerTapToDB')
Esempio n. 4
0
def __(alias: str,
       target_table: str,
       csv_format: bool = None,
       skip_header: bool = None,
       delimiter_char: str = None,
       quote_char: str = None,
       null_value_string: str = None,
       timezone: str = None):
    return copy_from_stdin_command(dbs.db(alias),
                                   target_table=target_table,
                                   csv_format=csv_format,
                                   skip_header=skip_header,
                                   delimiter_char=delimiter_char,
                                   quote_char=quote_char,
                                   null_value_string=null_value_string,
                                   timezone=timezone)
Esempio n. 5
0
    def _create_target_config(self, config: dict):
        db = dbs.db(self.target_db_alias)
        if isinstance(db, dbs.PostgreSQLDB):
            # Reference: https://github.com/datamill-co/target-postgres#configjson
            config.update({
                'postgres_host': db.host,
                'postgres_port': db.port,
                'postgres_database': db.database,
                'postgres_username': db.user,
                'postgres_password': db.password,
                'postgres_schema': self.target_schema,
                'postgres_sslmode': db.sslmode,
                'postgres_sslrootcert': db.sslrootcert,
                'postgres_sslcert': db.sslcert,
                'postgres_sslkey': db.sslkey
            })
        elif isinstance(db, dbs.RedshiftDB):
            # Reference: https://github.com/datamill-co/target-redshift#usage
            config.update({
                'redshift_host': db.host,
                'redshift_port': db.post,
                'redshift_database': db.database,
                'redshift_username': db.user,
                'redshift_password': db.password,
                'redshift_schema': self.target_schema,
                'target_s3': {
                    'aws_access_key_id': db.aws_access_key_id,
                    'aws_secret_access_key': db.aws_secret_access_key,
                    'aws_s3_bucket_name': db.aws_s3_bucket_name
                }
            })
        elif isinstance(db, dbs.SQLiteDB):
            # NOTE: self.target_schema is not used here because target-sqlite doesn't support this! ; we use optimistic behavior here and don't throw an error
            #       It would probably be better to implement a prefix option in target-sqlite instead of just ignoring self.target_schem

            # Reference: https://gitlab.com/meltano/target-sqlite
            config.update({'database': db.file_name})
        else:
            raise Exception(
                f'Not supported DB type {type(db)} for command SingerTapToDB')
Esempio n. 6
0
def compose_delete_query(db_alias: str,
                         target_table: str,
                         replication_method: ReplicationMethod,
                         replication_key=None) -> str:
    """
    Composes a delete query to a target table before starting sync.

    Args:
        db_alias
        target_table
        replication_method
        replication_key
    """
    db = dbs.db(db_alias)
    if replication_method == ReplicationMethod.FULL_TABLE:
        if isinstance(db, dbs.BigQueryDB):
            return f'DELETE FROM {target_table} WHERE 1=1'  # BigQuery needs a where clause!
        else:
            return f'DELETE FROM {target_table}'
    elif replication_method == ReplicationMethod.INCREMENTAL:
        raise NotImplementedError()
    elif replication_method == ReplicationMethod.LOG_BASED:
        return None  # delete nothing
Esempio n. 7
0
def __(alias: str, timezone: str = None, echo_queries: bool = None):
    return query_command(dbs.db(alias),
                         timezone=timezone,
                         echo_queries=echo_queries)
Esempio n. 8
0
def draw_schema(db_alias: str, schemas: str):
    """Shows a chart of the tables and FK relationships in a given database and schema list"""
    import graphviz

    if db_alias not in config.databases():
        flask.abort(404, f'unkown database {db_alias}')

    db = dbs.db(db_alias)
    assert (isinstance(db, mara_db.dbs.PostgreSQLDB)
            )  # currently only postgresql is supported

    schema_names = schemas.split('/')
    hide_columns = flask.request.args.get('hide-columns')
    engine = flask.request.args.get('engine')

    # get all table inheritance relations as dictionary: {(child_schema, child_table): (parent_schema, parent_table)
    inherited_tables = {}
    with mara_db.postgresql.postgres_cursor_context(db_alias) as cursor:
        cursor.execute("""
SELECT
  rel_namespace.nspname, rel.relname ,
  parent_namespace.nspname, parent.relname
FROM pg_inherits
  JOIN pg_class parent ON parent.oid = pg_inherits.inhparent
  JOIN pg_class rel ON rel.oid = pg_inherits.inhrelid
  JOIN pg_namespace parent_namespace ON parent_namespace.oid = parent.relnamespace
  JOIN pg_namespace rel_namespace ON rel_namespace.oid = rel.relnamespace""")
        for schema_name, table_name, parent_schema_name, parent_table_name in cursor.fetchall(
        ):
            inherited_tables[(schema_name, table_name)] = (parent_schema_name,
                                                           parent_table_name)

    # get all tables that have foreign key constrains on them or are referenced by foreign key constraints
    fk_constraints = set(
    )  # {((table_schema, table_name), (referred_schema_name, referred_table_name)}
    constrained_columns = {}  # {(schema_name, table_name): {columns}}
    tables = set()  # {(schema_name, table_name)}

    with mara_db.postgresql.postgres_cursor_context(db_alias) as cursor:
        cursor.execute(
            f'''
SELECT
  constrained_table_schema.nspname,
  constrained_table.relname,
  array_agg(constrained_column.attname),
  referenced_table_schema.nspname,
  referenced_table.relname
FROM pg_constraint
  JOIN pg_class constrained_table ON constrained_table.oid = pg_constraint.conrelid
  JOIN pg_namespace constrained_table_schema ON constrained_table.relnamespace = constrained_table_schema.oid
  JOIN pg_class referenced_table ON referenced_table.oid = pg_constraint.confrelid
  JOIN pg_namespace referenced_table_schema ON referenced_table.relnamespace = referenced_table_schema.oid
  JOIN pg_attribute constrained_column ON constrained_column.attrelid = constrained_table.oid AND attnum = ANY (conkey)
WHERE constrained_table_schema.nspname = ANY ({'%s'})
GROUP BY constrained_table_schema.nspname, constrained_table.relname, referenced_table_schema.nspname, referenced_table.relname;
''', (schema_names, ))
        for schema_name, table_name, table_columns, referred_schema_name, referred_table_name in cursor.fetchall(
        ):
            referring_table = (schema_name, table_name)
            if referring_table in inherited_tables:
                referring_table = inherited_tables[referring_table]
            tables.add(referring_table)
            referred_table = (referred_schema_name, referred_table_name)
            if referred_table in inherited_tables:
                referred_table = inherited_tables[referred_table]
            tables.add(referred_table)
            fk_constraints.add((referring_table, referred_table))
            if referring_table in constrained_columns:
                constrained_columns[referring_table].update(table_columns)
            else:
                constrained_columns[referring_table] = set(table_columns)

    # get enum usages
    enums = set()  # {(schema_name, table_name)}
    with mara_db.postgresql.postgres_cursor_context(db_alias) as cursor:
        cursor.execute(
            f'''
SELECT
  DISTINCT
  pg_namespace_table.nspname AS table_schema,
  pg_class_table.relname     AS table_name,

  pg_namespace_enum.nspname  AS enum_schema,
  pg_type.typname            AS enum_type
FROM pg_attribute
  JOIN pg_class pg_class_table ON pg_class_table.oid = attrelid
  JOIN pg_namespace pg_namespace_table ON pg_namespace_table.oid = pg_class_table.relnamespace
  JOIN pg_type ON atttypid = pg_type.OID
  JOIN pg_namespace pg_namespace_enum ON typnamespace = pg_namespace_enum.oid
  JOIN pg_enum ON pg_enum.enumtypid = pg_type.oid
WHERE pg_namespace_table.nspname = ANY ({'%s'})''', (schema_names, ))
        for table_schema, table_name, enum_schema, enum_name in cursor.fetchall(
        ):
            if (table_schema, table_name) in tables:
                tables.add((enum_schema, enum_name))
                fk_constraints.add(
                    ((table_schema, table_name), (enum_schema, enum_name)))
                enums.add((enum_schema, enum_name))

    # get all columns of all tables
    table_columns = {}  # {(schema_name, table_name): [columns]}
    with mara_db.postgresql.postgres_cursor_context(db_alias) as cursor:
        cursor.execute('''
SELECT
  table_schema, table_name,
  array_agg(column_name :: TEXT ORDER BY ordinal_position)
FROM information_schema.columns
GROUP BY table_schema, table_name''')
        for schema_name, table_name, columns in cursor.fetchall():
            table_columns[(schema_name, table_name)] = columns

    graph = graphviz.Digraph(engine=engine,
                             graph_attr={
                                 'splines': 'True',
                                 'overlap': 'ortho'
                             })

    schema_colors = {}
    fk_pattern = re.compile(config.schema_ui_foreign_key_column_regex())
    for schema_name, table_name in sorted(tables):
        if schema_name not in schema_colors:
            colors = [
                '#ffffcc', '#bbffcc', '#cceeff', '#eedd99', '#ddee99',
                '#99ddff', '#dddddd'
            ]
            schema_colors[schema_name] = colors[len(schema_colors) %
                                                len(colors)]

        label = '< <TABLE BORDER="1" CELLBORDER="0" CELLSPACING="0" CELLPADDING="1" BGCOLOR="' \
                + schema_colors[schema_name] + '"><TR>'

        node_name = schema_name + '.' + table_name
        if hide_columns:
            label += '<TD ALIGN="LEFT"> ' + table_name.replace(
                '_', '<BR/>') + ' </TD></TR>'
        elif (schema_name, table_name) in enums:
            label += '<TD ALIGN="LEFT"> ' + table_name.replace(
                '_', '<BR/>') + ' </TD></TR>'
        else:
            label += '<TD ALIGN="LEFT"><U><B> ' + table_name + ' </B></U></TD></TR>'
            for column in table_columns[(schema_name, table_name)]:
                label += '<TR><TD ALIGN="LEFT" > '
                if fk_pattern.match(column) \
                        and (schema_name, table_name) in constrained_columns \
                        and column not in constrained_columns[(schema_name, table_name)]:
                    label += '<B><I><FONT COLOR="#dd55dd"> ' + column + ' </FONT></I></B>'
                else:
                    label += column
                label += ' </TD></TR>'

        label += '</TABLE> >'

        graph.node(name=node_name,
                   label=label,
                   _attributes={
                       'fontname': 'Helvetica, Arial, sans-serif',
                       'fontsize': '10',
                       'fontcolor': '#555555',
                       'shape': 'none'
                   })

    for (schema_name, table_name), (referred_schema_name,
                                    referred_table_name) in fk_constraints:
        graph.edge(schema_name + '.' + table_name,
                   referred_schema_name + '.' + referred_table_name,
                   _attributes={'color': '#888888'})

    response = flask.Response(graph.pipe('svg').decode('utf-8'))
    response.headers[
        'Content-Disposition'] = f'attachment; filename="{datetime.date.today().isoformat()}-{db_alias}.svg"'
    return response
Esempio n. 9
0
def __(alias: str, sql_query: str):
    return read_dataframe(dbs.db(alias), sql_query=sql_query)
Esempio n. 10
0
def __(alias: str):
    return schemas_with_foreign_key_constraints(dbs.db(alias))
Esempio n. 11
0
def __(alias: str, schema_names: [str]):
    return extract_schema(dbs.db(alias), schema_names)
Esempio n. 12
0
def __(source_db: dbs.DB,
       target_db_alias: str,
       target_table: str,
       timezone: str = None):
    return copy_command(source_db, dbs.db(target_db_alias), target_table,
                        timezone)
Esempio n. 13
0
def __(alias: str, header: bool = False, footer: bool = False):
    return copy_to_stdout_command(dbs.db(alias), header=header, footer=footer)
Esempio n. 14
0
def __(alias: str):
    return supports_extract_schema(dbs.db(alias))
Esempio n. 15
0
def __(alias: str):
    return copy_to_stdout_command(dbs.db(alias))