예제 #1
0
파일: utils.py 프로젝트: Gilbord/executor
def fetch_by_one(db_cursor: cursor) -> Iterable[tuple]:
    """
    Вытаскивает все записи из БД.

    :param db_cursor: Курсор базы данных.
    :return: Запись базы данных.
    """
    row = db_cursor.fetchone()
    while row:
        yield row
        row = db_cursor.fetchone()
예제 #2
0
 def manga_has_author(self,
                      manga_id: int,
                      *,
                      cur: Cursor = NotImplemented) -> bool:
     sql = 'SELECT EXISTS(SELECT 1 FROM manga_authors WHERE manga_id=%s) as "exists"'
     cur.execute(sql, (manga_id, ))
     return cur.fetchone()['exists']
예제 #3
0
def default_value(pg_cur: cursor, table_schema: str, table_name: str,
                  column: str) -> str:
    """
    Returns the default value of the column

    Parameters
    ----------
    pg_cur
        the psycopg cursor
    table_schema
        the table schema
    table_name
        the table name
    column
        the column name
    """
    # see https://stackoverflow.com/a/8148177/1548052

    sql = "SELECT pg_get_expr(d.adbin, d.adrelid) AS default_value\n" \
          "FROM pg_catalog.pg_attribute a\n" \
          "LEFT JOIN pg_catalog.pg_attrdef d ON (a.attrelid, a.attnum) = (d.adrelid,  d.adnum)\n" \
          "WHERE  NOT a.attisdropped   -- no dropped (dead) columns\n" \
          "AND    a.attnum > 0         -- no system columns\n" \
          "AND    a.attrelid = '{ts}.{tn}'::regclass\n" \
          "AND    a.attname = '{col}';" \
        .format(ts=table_schema,
                tn=table_name,
                col=column)
    pg_cur.execute(sql)
    return pg_cur.fetchone()[0] or 'NULL'
예제 #4
0
def primary_key(pg_cur: cursor, schema_name: str, table_name: str) -> str:
    """
    Returns the primary of a table

    Parameters
    ----------
    pg_cur
        psycopg cursor
    schema_name
        the schema name
    table_name
        the table name
    """
    sql = "SELECT c.column_name"\
          " FROM information_schema.key_column_usage AS c "\
          " LEFT JOIN information_schema.table_constraints AS t"\
          " ON t.constraint_name = c.constraint_name"\
          " WHERE t.table_name = '{t}'"\
          " AND t.table_schema = '{s}'"\
          " AND t.constraint_type = 'PRIMARY KEY'".format(s=schema_name, t=table_name)
    pg_cur.execute(sql)
    try:
        pkey = pg_cur.fetchone()[0]
    except Exception:
        raise TableHasNoPrimaryKey(sql)
    return pkey
예제 #5
0
def process_log_file(cur: extensions.cursor, filepath: str):
    """
    Function reads a file of logs data. Filters data only to "NextSong" action.
    Breaks timestamps of records in songplays down into specific units and saves in helper table.
    Saves users and songplay information in corresponding tables.

    :param cur: connection cursor
    :param filepath: filepath to the file with songs data
    """

    data: List = []
    with open(filepath, 'r') as f:
        while True:
            line = f.readline()
            if not line:
                break
            else:
                data.append(json.loads(line))

    df = pd.DataFrame(data)

    df = df.loc[df['page'] == 'NextSong']

    # convert timestamp column to datetime
    t = df['ts']
    t = pd.to_datetime(t, unit='ms')

    time_data = (
        t.dt.tz_localize('UTC').values,
        t.dt.hour.tolist(),
        t.dt.day.tolist(),
        t.dt.week.tolist(),
        # t.dt.isocalendar().week.tolist(),
        t.dt.month.tolist(),
        t.dt.year.tolist(),
        t.dt.weekday.tolist()
    )
    column_labels = ('timestamp', 'hour', 'day', 'week', 'month', 'year', 'weekday')
    time_df = pd.DataFrame(dict(zip(column_labels, time_data)))

    for i, row in time_df.iterrows():
        cur.execute(time_table_insert, list(row))

    user_df = df[['userId', 'firstName', 'lastName', 'gender', 'level']]
    for i, row in user_df.iterrows():
        cur.execute(users_table_insert, row)

    for index, row in df.iterrows():
        cur.execute(song_select, (row.song, row.artist, row.length))
        results = cur.fetchone()

        if results:
            song_id, artist_id = results
        else:
            song_id, artist_id = None, None

        d = pd.to_datetime(row.ts, unit='ms')
        start_time, user_id, level, session_id, user_agent = d.tz_localize('UTC'), row.userId, row.level, row.sessionId, row.userAgent
        songplay_data: Tuple = (start_time, user_id, level, song_id, artist_id, session_id, user_agent)
        cur.execute(songplays_table_insert, songplay_data)
def find_parent(cursor: extensions.cursor, named_location_id: int,
                parents: Dict[str, str]):
    """
    Recursively search for the site.

    :param cursor: A database cursor object.
    :param named_location_id: The named location ID.
    :param parents: Collection to append to.
    """
    sql = '''
        select
            prnt_nam_locn_id, nam_locn.nam_locn_name, type.type_name
        from
            nam_locn_tree
        join
            nam_locn on nam_locn.nam_locn_id = nam_locn_tree.prnt_nam_locn_id
        join
            type on type.type_id = nam_locn.type_id
        where
            chld_nam_locn_id = %s
    '''
    cursor.execute(sql, [named_location_id])
    row = cursor.fetchone()
    if row is not None:
        parent_id = row[0]
        name = row[1]
        type_name = row[2]
        if type_name.lower() == 'site':
            parents['site'] = name
        if type_name.lower() == 'domain':
            parents['domain'] = name
        find_parent(cursor, parent_id, parents)
예제 #7
0
def test_tables(cur: psycopg2Ext.cursor, conn: psycopg2Ext.connection) -> None:
    """
    Description: Test table status to make sure tables exists.

    Arguments:
        cur (psycopg2Ext.cursor): cursor object
        conn (psycopg2Ext.connection): connection object

    Returns:
        None
    """
    print("\n==================== TEST -- table status  ====================")

    for query in create_table_queries:
        tbl_name = query[query.find("EXISTS") +
                         len("EXISTS"):query.find("(")].strip()
        query = f"""select exists(select * from information_schema.tables
            where table_name='{tbl_name}')"""

        try:
            cur.execute(query)
        except psycopg2.Error as e:
            msg = f"ERROR: Could not retrieve table info with query: {query}"
            logger.warning(msg, e)
            return
        conn.commit()

        try:
            tbl_status = cur.fetchone()[0]
        except psycopg2.Error as e:
            msg = f"ERROR: Could not fetch table status for table: {tbl_name}"
            logger.warning(msg, e)
            return

        print(f"Table '{tbl_name}' exists status: {tbl_status}.")
예제 #8
0
def geometry_type(pg_cur: cursor,
                  table_schema: str,
                  table_name: str,
                  column: str = 'geometry') -> (str, int):
    """
    Returns the geometry type of a column as a tuple (type, srid)

    Parameters
    ----------
    pg_cur
        the psycopg cursor
    table_schema
        the table schema
    table_name
        the table name
    column:
        the geometry column name, defaults to "geometry"
    """
    sql = "SELECT type, srid " \
          "FROM geometry_columns " \
          "WHERE f_table_schema = '{s}' " \
          "AND f_table_name = '{t}' " \
          "AND f_geometry_column = '{c}';".format(s=table_schema,
                                                  t=table_name,
                                                  c=column)
    pg_cur.execute(sql)
    res = pg_cur.fetchone()
    if res:
        return res[0], res[1]
    else:
        return None
예제 #9
0
 def find_service_manga(self,
                        service_id: int,
                        title_id: str,
                        *,
                        cur: Cursor = NotImplemented) -> DictRow:
     sql = 'SELECT * from manga_service WHERE service_id=%s AND title_id=%s'
     cur.execute(sql, (service_id, title_id))
     return cur.fetchone()
예제 #10
0
 def find_manga_by_title(self,
                         title: str,
                         *,
                         cur: Cursor = NotImplemented) -> Optional[Manga]:
     sql = 'SELECT * FROM manga WHERE title=%s LIMIT 1'
     cur.execute(sql, (title, ))
     row = cur.fetchone()
     return None if not row else Manga(**row)
예제 #11
0
def check_table(db: Cursor, table_name: str) -> bool:
    sql = """
    SELECT EXISTS (
        SELECT * FROM information_schema.tables
        WHERE table_name = %s
    );
    """
    db.execute(sql, (table_name, ))
    return db.fetchone()[0]
예제 #12
0
def get_role(cursor: Cursor, name: str) -> Role:
    """
    Look up a single role by name.
    """
    query(cursor, "{} WHERE rolname = %s".format(_ROLE_SELECT), name)
    if cursor.rowcount:
        return cursor.fetchone()
    else:
        raise KeyError(name)
예제 #13
0
    def get_service_whole(
            self,
            service_id: int,
            *,
            cur: Cursor = NotImplemented) -> Optional[ServiceWhole]:
        sql = 'SELECT * FROM service_whole WHERE service_id=%s'
        cur.execute(sql, [service_id])
        row = cur.fetchone()

        return ServiceWhole(**row) if row else None
예제 #14
0
    def get_newest_chapter(self,
                           manga_id: int,
                           service_id: Optional[int] = None,
                           *,
                           cur: Cursor = NotImplemented):
        sql = f'SELECT * FROM chapters WHERE manga_id=%s{" AND service_id=%s" if service_id is not None else ""} ORDER BY release_date DESC LIMIT 1'
        args = (manga_id, ) if service_id is None else (manga_id, service_id)

        cur.execute(sql, args)
        return cur.fetchone()
예제 #15
0
 def get_manga(self,
               manga_id: int,
               *,
               cur: Cursor = NotImplemented) -> Optional[Manga]:
     """
     Get manga object from database
     """
     sql = 'SELECT * FROM manga WHERE manga_id=%s'
     cur.execute(sql, (manga_id, ))
     row = cur.fetchone()
     return Manga(**row) if row else None
예제 #16
0
    def get_author_by_name(self,
                           name: str,
                           *,
                           cur: Cursor = NotImplemented) -> Optional[Author]:
        sql = 'SELECT * FROM authors WHERE name=%s LIMIT 1'
        cur.execute(sql, (name, ))

        row = cur.fetchone()
        if row is None:
            return None

        return Author(**row)
예제 #17
0
    def get_manga_service(self,
                          service_id: int,
                          title_id: str,
                          *,
                          cur: Cursor = NotImplemented
                          ) -> Optional[MangaService]:
        sql = 'SELECT * FROM manga_service ms ' \
              'INNER JOIN manga m ON ms.manga_id = m.manga_id ' \
              'WHERE service_id=%s AND ms.title_id=%s'

        cur.execute(sql, (service_id, title_id))
        row = cur.fetchone()
        return MangaService(**row) if row else None
예제 #18
0
def process_log_file(cur: cursor, filepath: str) -> None:
    """
    process a given log file and load to database
    """

    # open log file
    df = pd.read_json(filepath, lines=True, convert_dates=['ts'])

    # filter by NextSong action
    df = df[df['page'] == 'NextSong']

    # convert timestamp column to datetime
    t = pd.to_datetime(df['ts'], unit='ms')

    # insert time data records
    time_data = [[x, x.hour, x.day, x.week, x.month, x.year, x.dayofweek]
                 for x in t]
    column_labels = [
        'start_time', 'hour', 'day', 'week', 'month', 'year', 'weekday'
    ]
    time_df = pd.DataFrame(time_data, columns=column_labels)

    for i, row in time_df.iterrows():
        cur.execute(time_table_insert, list(row))

    # load user table
    user_df = df.filter(['userId', 'firstName', 'lastName', 'gender',
                         'level']).drop_duplicates()

    # insert user records
    for i, row in user_df.iterrows():
        cur.execute(user_table_insert, row)

    # insert songplay records
    for index, row in df.iterrows():
        # get songid and artistid from song and artist tables
        cur.execute(song_select, (row.song, row.artist, row.length))
        results = cur.fetchone()

        if results:
            songid, artistid = results
        else:
            songid, artistid = None, None

        # insert songplay record
        songplay_data = [
            row.ts, row.userId, row.level, songid, artistid, row.sessionId,
            row.location, row.userAgent
        ]
        cur.execute(songplay_table_insert, songplay_data)
예제 #19
0
def get_row_count(schema_table: SchemaTable, cursor: extensions.cursor) -> int:
    """
    Given a SchemaTable and a cursor, this simple utility will run a SELECT COUNT(*) on the object and return an int

    :param schema_table: the SchemaTable object that we want to compute the row count
    :param cursor: a cursor for where to execute this query
    :return: the number of rows in the schema table object after querying the database with the cursor
    """
    cursor.execute(sql.SQL("""
          SELECT COUNT(*) FROM {}
                        """).format(schema_table))
    count = cursor.fetchone()[0]  # grab the first element of the tuple that is returned
    validate_is_int(count)
    return count
예제 #20
0
    def _db_exists(self, cur: cursor) -> bool:
        '''Checks if a database already exists.

        Args:
            cur: Cursor for SQL queries.
            db_name: Database name.

        Returns:
            True, if the database exists, False otherwise.

        '''

        sql_query = 'SELECT datname FROM pg_catalog.pg_database WHERE datname = \'auto_scheduler\';'
        cur.execute(sql_query)

        return cur.fetchone() is not None
예제 #21
0
def reference_columns(pg_cur: cursor, table_schema: str, table_name: str,
                      foreign_table_schema: str,
                      foreign_table_name: str) -> (str, str):
    """
    Returns the columns use in a reference constraint

    Parameters
    ----------
    pg_cur
        the psycopg cursor
    table_schema
        the table schema
    table_name
        the table name
    foreign_table_schema
        the schema of the foreign table
    foreign_table_name
        the name of the foreign table
    """
    # see https://stackoverflow.com/a/1152321/1548052
    sql = "SELECT kcu.column_name, ccu.column_name AS foreign_column_name " \
          "FROM information_schema.table_constraints AS tc " \
          "JOIN information_schema.key_column_usage AS kcu " \
          "ON tc.constraint_name = kcu.constraint_name " \
          "AND tc.table_schema = kcu.table_schema " \
          "JOIN information_schema.constraint_column_usage AS ccu " \
          "ON ccu.constraint_name = tc.constraint_name " \
          "AND ccu.table_schema = tc.table_schema " \
          "WHERE tc.constraint_type = 'FOREIGN KEY' " \
          "AND tc.table_name='{tn}' " \
          "AND tc.table_schema='{ts}' " \
          "AND ccu.table_name = '{ftn}' " \
          "AND ccu.table_schema = '{fts}';".format(tn=table_name,
                                                   ts=table_schema,
                                                   ftn=foreign_table_name,
                                                   fts=foreign_table_schema)
    pg_cur.execute(sql)
    cols = pg_cur.fetchone()
    if not cols:
        raise NoReferenceFound(
            '{ts}.{tn} has no reference to {fts}.{ftn}'.format(
                tn=table_name,
                ts=table_schema,
                ftn=foreign_table_name,
                fts=foreign_table_schema))
    return cols
예제 #22
0
    def get_service(self,
                    service: Union[int, str],
                    *,
                    cur: Cursor = NotImplemented) -> Optional[Service]:
        """
        Get service by url or by id
        Args:
            service: The id or url of the service
            cur: Optional cursor

        Returns:
            Service object
        """
        if isinstance(service, int):
            sql = 'SELECT * FROM services WHERE service_id=%s'
        else:
            sql = 'SELECT * FROM services WHERE url=%s'

        cur.execute(sql, (service, ))
        row = cur.fetchone()
        return Service(**row) if row else None
예제 #23
0
def get_column_count(schema_table: SchemaTable, cursor: extensions.cursor) -> int:
    """
    Given a SchemaTable and a cursor, this simple utility will query the information schema to find out how many
    columns are in it.

    Note that this works equally well if the schema_table actually refers to a view, but it won't work with a
    materialized view since they aren't part of the SQL standard (so they aren't in the information schema)

    :param schema_table: the SchemaTable object that we want to compute the row count
    :param cursor: a cursor for where to execute this query
    :return: the number of rows in the schema table object after querying the database with hte cursor
    """
    schema_name = schema_table.schema.string
    table_name = schema_table.table.string

    cursor.execute(sql.SQL("""
          SELECT COUNT(*) FROM information_schema.columns 
          WHERE table_schema = %s AND table_name = %s
                        """), (schema_name, table_name))
    count = cursor.fetchone()[0]  # grab the first element of the tuple that is returned
    validate_is_int(count)
    return count
예제 #24
0
def get_citus_version(cur: cursor) -> str:
    cur.execute("SELECT citus_version()")
    result = cur.fetchone()
    return result
예제 #25
0
def _get_latest_id_from_table(table: Table, cur: cursor) -> int:
    cur.execute(f"select id from {table} order by id desc limit 1")
    row = cur.fetchone()
    return row[0] if row else None
예제 #26
0
def process_log_file(cur: cursor, filepath: str):
    """Process a log file

    Args:
        cur (cursor): Conected cursor
        filepath (str): Path for a json file
    """
    # open log file
    df = pd.read_json(filepath, lines=True)

    # filter by NextSong action
    df = df.loc[df["page"] == "NextSong"]

    # convert timestamp column to datetime
    df.ts = df.ts.astype("datetime64[ms]")
    df["hour"] = df.ts.dt.hour
    df["day"] = df.ts.dt.day
    df["week"] = df.ts.dt.week
    df["month"] = df.ts.dt.month
    df["year"] = df.ts.dt.year
    df["weekday"] = df.ts.dt.weekday

    for row in df.itertuples(index=False):
        time_vals = (
            f"{row.ts:%Y-%m-%d %H:%M:%S}",
            row.hour,
            row.day,
            row.week,
            row.month,
            row.year,
            row.weekday,
        )
        time_vals = [str(col) for col in time_vals]
        cur.execute(sql.time_table_insert, time_vals)

        user_vals = (row.userId, row.firstName, row.lastName, row.gender,
                     row.level)
        cur.execute(sql.user_table_insert, user_vals)

        # get songid and artistid from song and artist tables
        cur.execute(sql.song_select, (row.song, row.artist, row.length))
        results = cur.fetchone()

        if results:
            songid, artistid = results
        else:
            songid, artistid = None, None

        # insert songplay record
        cur.execute(
            sql.songplay_table_insert,
            (
                f"{row.ts:%Y-%m-%d %H:%M:%S}",
                str(row.userId),
                str(row.level),
                str(songid),
                str(artistid),
                str(row.sessionId),
                str(row.location),
                str(row.userAgent),
            ),
        )