예제 #1
0
def process_data(cur: extensions.cursor, conn: connection, filepath: str, func: Callable):
    """
    Function reads all *.json files in a directory, resolving it's absolut path and
        appending to a list.
    Prints out total number of found files and finally processes each them calling passed in callback.

    :param cur: connection cursor
    :param conn: connection object
    :param filepath: filepath to the directory with songs or logs data
    :param func: callback function to process single data file: songs or logs
    """

    all_files: List = []
    for root, dirs, files in os.walk(filepath):
        files = glob.glob(os.path.join(root, '*.json'))
        for f in files:
            all_files.append(os.path.abspath(f))

    num_files: int = len(all_files)
    print('{} files found in {}'.format(num_files, filepath))

    for i, datafile in enumerate(all_files, 1):
        func(cur, datafile)
        conn.commit()
        print('{}/{} files processed.'.format(i, num_files))
예제 #2
0
def get_df(query: str, conn: connection, dispose_conn=False) -> pd.DataFrame:
    """
    Executes the query and fetches the results
    Args:
        query: The sql query
        conn: The connection object to the postgres
        dispose_conn: Whether to close the connection after the query

    Returns:
        The results of the query as a DataFrame
    """
    res = sqlio.read_sql_query(query, conn)

    if dispose_conn:
        conn.close()

    return res
예제 #3
0
def execute_and_get_rows(db: connection, query: str, args: tuple, keys: list) -> list:
    rows = list()
    with db, db.cursor() as cur:
        cur.execute(query, args)
        reply = cur.fetchone()
        while reply:
            row = {key: value for key, value in list(zip(keys, reply))}
            rows.append(row)
            reply = cur.fetchone()
    return rows
예제 #4
0
def insert_bulk(conn: Conn, logs: List[Dict[str, Any]]) -> None:
    with conn:
        with conn.cursor() as c:
            values = ((log["timestamp"], log["raw_log"], log["stream"],
                       json.dumps(log["log"]), json.dumps(log["metadata"]))
                      for log in logs)
            psycopg2.extras.execute_batch(c,
                                          """
                INSERT INTO logs (timestamp, raw_log, stream, log, metadata)
                VALUES (%s, %s, %s, %s, %s)
                """,
                                          values,
                                          page_size=1000)
예제 #5
0
def build_references(config: Config, conn: connection = None) -> Dict[str, dict]:
    """
    Build a tables dependency graph

    Algorithm:
    1) Get all table names
    2) Get all Foreign Keys
    3) Build a tables dependency graph (references dict)
        For each table:
            For each child table:
                build dependency graph recursive

    Result:
    {
        'references': {
            'table_a': {
                'table_b': {
                    'references': [{'pk_ref': 'id', 'fk_ref': 'table_b_id'}]
                    'ref_tables': {
                        'table_c': {
                            'table_a': {},
                            'table_b': {}
                        },
                        ...
                    }
                },
                'table_c': {...}
            },
            'table_b': {...}
        },
        'primary_keys': {
            'table_a': 'id',
            'table_b': 'id',
            'table_c': 'id'
        }
    }
    """

    references = {}
    primary_keys = {}

    if not conn:
        conn = get_db_conn(config)

    try:
        tables = get_all_tables(conn, config.db_config)
        foreign_keys = get_all_fk(conn, config.db_config)

        for table in tables:
            references[table['table_name']] = {}

        for fk in foreign_keys:
            if fk['main_table'] not in references:
                references[fk['main_table']] = {}

            if not fk['ref_table'] in references[fk['main_table']]:
                references[fk['main_table']][fk['ref_table']] = {
                    'ref_tables': {},
                    'references': []
                }

            table_references = references[fk['main_table']][fk['ref_table']]['references']
            table_references.append(ForeignKey(
                pk_main=fk['main_table_column'],
                pk_ref=fk['ref_pk_columns'],
                fk_ref=fk['ref_fk_column'],
            ))

            primary_keys[fk['main_table']] = fk['main_table_column']

        if references:
            references = OrderedDict(sorted(references.items(), key=lambda row: len(row[1]), reverse=True))

        for parent, refs in references.items():
            for ref, ref_data in refs.items():
                visited = {parent, ref}
                ref_childs = ref_data['ref_tables']
                recursive_build(ref, ref_childs, references, visited)
    finally:
        conn.close()

    result = {
        'references': references,
        'primary_keys': primary_keys
    }
    return result
예제 #6
0
def execute_and_get_row(db: connection, query: str, args: tuple, keys: list) -> str:
    with db, db.cursor() as cur:
        cur.execute(query, args)
        reply = cur.fetchone()
    return {key: value for key, value in list(zip(keys, reply))}
예제 #7
0
def execute(db: connection, query: str, args: tuple) -> None:
    with db, db.cursor() as cur:
        cur.execute(query, args)