Esempio n. 1
0
def get_data_from_db(query: str, conn: psycopg2.extensions.connection,
                     cur: psycopg2.extensions.cursor, df: pd.DataFrame,
                     col_names: List[str]) -> pd.DataFrame:
    try:
        cur.execute(query)
        while True:
            # Fetch the next 100 rows
            query_results = cur.fetchmany(100)
            # If an empty list is returned, then we've reached the end of the results
            if query_results == list():
                break

            # Create a list of dictionaries where each dictionary represents a single row
            results_mapped = [{
                col_names[i]: row[i]
                for i in range(len(col_names))
            } for row in query_results]

            # Append the fetched rows to the DataFrame
            df = df.append(results_mapped, ignore_index=True)

        return df

    except Exception as error:
        print(f"{type(error).__name__}: {error}")
        print("Query:", cur.query)
        conn.rollback()
Esempio n. 2
0
def __stock_tags_bulk_insert(df: pd.DataFrame, column_name: str,
                             conn: psycopg2.extensions.connection,
                             cursor: psycopg2.extensions.cursor) -> Any:
    """
        Inserts all tag rows of df in the DB of the relationship between stock and tags.
    :param df: df with three columns of stock and a column of tag.
    :param column_name: str. The name of tag column.
    :param conn: psycopg2.extensions.connection. Connection to DB.
    :param cursor: cursor of DB.
    :return: None.
    """
    for index, series in df.iterrows():
        if not pd.isnull(series[column_name]):
            # first searchs the stock id of row.
            stock_id_query = "SELECT id FROM stock " \
                             "WHERE stock_name = '{}' AND exchange = '{}' AND ticker = '{}'".format(
                series['stock_name'],
                series['exchange'],
                series['ticker'])
            cursor.execute(stock_id_query)
            stock_id = cursor.fetchone()[0]

            # next, searchs the tag id of row.
            tag_id_query = "SELECT id FROM tag WHERE tag_name = '{}' AND category = '{}'".format(
                series[column_name], column_name)
            cursor.execute(tag_id_query)
            tag_id = cursor.fetchone()[0]

            # inserts stock_tag relation using stock_id and tag_id
            query = "INSERT INTO stock_tag (stock_id, tag_id) " \
                    "VALUES ('{}', '{}') ON CONFLICT DO NOTHING".format(stock_id, tag_id)
            cursor.execute(query)
    conn.commit()
Esempio n. 3
0
def copy_from(df: pd.DataFrame,
              table: str,
              connection: psycopg2.extensions.connection,
              chunk_size: int = 10000):
    cursor = connection.cursor()
    df = df.copy()

    escaped = {
        '\\': '\\\\',
        '\n': r'\n',
        '\r': r'\r',
        '\t': r'\t',
    }
    for col in df.columns:
        if df.dtypes[col] == 'object':
            for v, e in escaped.items():
                df[col] = df[col].str.replace(v, e)
    try:
        for i in tqdm(range(0, df.shape[0], chunk_size)):
            f = StringIO()
            chunk = df.iloc[i:(i + chunk_size)]

            chunk.to_csv(f,
                         index=False,
                         header=False,
                         sep='\t',
                         na_rep='\\N',
                         quoting=None)
            f.seek(0)
            cursor.copy_from(f, table, columns=[f'"{i}"' for i in df.columns])
            connection.commit()
    except psycopg2.Error as e:
        print(e)
        connection.rollback()
    cursor.close()
Esempio n. 4
0
def recognise_employee_sql(connection: psycopg2.extensions.connection,
                           encodings):
    """Распознавание сотрудника sql-запросом"""
    cursor = connection.cursor()
    try:
        query = '''
                SELECT first_name, last_name FROM employees 
                WHERE sqrt( power( CUBE( array[{}]) <-> vec_low, 2) + 
                power( CUBE( array[{}]) <-> vec_high, 2)) <= {} 
                '''.format(
            ','.join(str(s) for s in encodings[0:64]),
            ','.join(str(s) for s in encodings[64:128]),
            THRESHOLD,
        ) + \
                '''
            ORDER BY sqrt( power( CUBE( array[{}]) <-> vec_low, 2) + 
            power( CUBE( array[{}]) <-> vec_high, 2)) ASC LIMIT 1'''.format(
                    ','.join(str(s) for s in encodings[0:64]),
                    ','.join(str(s) for s in encodings[64:128]),
                )
        cursor.execute(query)
        print(cursor.fetchall())
    except (Exception, Error) as error:
        print('Ошибка при запросе к БД:', error)
        connection.rollback()
        return False
Esempio n. 5
0
def create_table(name: str, schema: str,
                 connection: psycopg2.extensions.connection):
    c = connection.cursor()
    ddl = f"""CREATE TABLE IF NOT EXISTS {name} ({schema})"""
    c.execute(ddl)
    connection.commit()
    c.close()
def _checkmaps(conn: psycopg2.extensions.connection, col: str, tbl: str,
               file_itms: set):
    ''' We do this to resync new data with what is in the db currently. check and remap data '''

    cur = conn.cursor()
    col = col.replace(" ", "_")
    col = col.lower()

    # pulling the 'key' column from the db
    LOG.debug(f'''SELECT DISTINCT "{col}" from {tbl};''')
    cur.execute(f'''SELECT DISTINCT "{col}" from {tbl};''')
    query = cur.fetchall()
    existing_items = set([i[0] for i in query])

    for each in file_itms:
        #print(str(type(each)))
        #print(each)
        if str(each) not in existing_items:
            # clean up formatting problems
            if each != None and type(each) == str:
                each = each.replace("\'", "")
            # now insert only the new items we didnt have before into the db
            cur.execute(f'''INSERT INTO {tbl} ("{col}") VALUES ('{each}');''')
            LOG.debug(f"Inserted new item {col}: {each} into table {tbl}")

    conn.commit()
Esempio n. 7
0
def process_data(cur: psycopg2.extensions.cursor,
                 conn: psycopg2.extensions.connection, filepath: str, func):
    """
    Given a connection to a PostgresSQL database, a path to a directory on the
    local filesystem and a processing function, do the following:
        1. Load all *.json files found in filepath and its subdirectories
        2. Print the number of files found in step one
        3. Apply the processing function func to all files found in step one
        
    :param cur: Cursor
    :param conn: Connection to PostgreSQL database
    :param filepath: Path to JSON file
    :param func: (Python) function to process data
    """
    # get all files matching extension from directory
    all_files = []
    for root, dirs, files in os.walk(filepath):
        files = glob.glob(os.path.join(root, '*.json'))
        for f in files:
            all_files.append(os.path.abspath(f))

    # get total number of files found
    num_files = len(all_files)
    print('{} files found in {}'.format(num_files, filepath))

    # iterate over files and process
    for i, datafile in enumerate(all_files, 1):
        func(cur, datafile)
        conn.commit()
        print('{}/{} files processed.'.format(i, num_files))
def copy_from(df: pd.DataFrame,
              table: str,
              connection: psycopg2.extensions.connection,
              chunk_size: int = 10000):

    cursor = connection.cursor()
    df = df.copy()
    escaped = {'\\': '\\\\', '\n': r'\n', '\r': r'\r', '\t': r'\t'}
    for col in df.columns:
        if df.dtypes[col] == 'object':
            for v, e in escaped.items():
                df[col] = df[col].str.replace(v, e)
    try:
        for i in range(0, df.shape[0], chunk_size):
            f = io.StringIO()
            chunk = df.iloc[i:(i + chunk_size)]
            # The default separator is a tab, and NULLs are indicated by the two character-string '\N'
            chunk.to_csv(f,
                         index=False,
                         header=False,
                         sep='\t',
                         na_rep='\\N',
                         quoting=None)
            f.seek(0)
            cursor.copy_from(f, table, columns=list(df.columns))
            connection.commit()
    except psycopg2.Error:
        connection.rollback()
        cursor.close()
Esempio n. 9
0
    def __init__(self, db: pg.extensions.connection, problem: int,
                 intermed_nodes: bool):
        """db : psycopg2.connection
            database to read from.
        problem : int
            index of the problem.
        intermed_nodes : bool
            if True calculates the shortest path between successive nodes.
        """
        LOGGER.debug("Creating %s for problem %d.", self.__class__.__name__,
                     problem)
        self.problem = problem
        self.intermed_nodes = intermed_nodes
        self.num_vars = None

        # wait for good connection
        status = db.get_transaction_status()
        sleeptimer = 0.5
        while status not in good_db_status():
            logging.warning("Waiting %fs for DB connection in status %s",
                            sleeptimer, PSYCOPG2_8_5_TASTATUS[status])
            sleep(sleeptimer)
            status = db.get_transaction_status()

        self.connection = db
Esempio n. 10
0
def stocks_bulk_insert(df: pd.DataFrame, conn: psycopg2.extensions.connection,
                       cursor: psycopg2.extensions.cursor) -> Any:
    """
        Inserts all rows of df in the DB.
    :param df: pd.DataFrame. stocks data.
    :param conn: psycopg2.extensions.connection. Connection to DB.
    :param cursor: cursor of DB.
    :return: None.
    """

    if df.empty:
        return

    # iterating in DataFrame rows.
    for index, series in df.iterrows():
        query = "INSERT INTO stock (ticker, stock_name, exchange) " \
                "VALUES ('{}', '{}', '{}') ON CONFLICT DO NOTHING".format(series["ticker"],
                                                                          series["stock_name"],
                                                                          series["exchange"])
        cursor.execute(query)
    conn.commit()

    print(">>> GETTING HISTORICAL DATA")
    ticker_list = list(df["ticker"])
    __get_historical_data(ticker_list)
def create_table(connection: psycopg2.extensions.connection,
                 cursor: psycopg2.extensions.cursor):
    """Function which creates the table if needed

    Args:
        connection (psycopg2.extensions.connection): database connection
        cursor (psycopg2.extensions.cursor): database cursor
    """
    # SQL query to templates table
    create_table_query = '''CREATE TABLE IF NOT EXISTS templates
          (id                   SERIAL      PRIMARY KEY     NOT NULL,
          name                  TEXT                        NOT NULL,
          year                  INT                         NOT NULL,
          month                 INT                         NOT NULL,
          category              TEXT                        NOT NULL,
          uw_category           TEXT,
          wikibreak_category1   TEXT,
          wikibreak_category2   TEXT,
          wikibreak_subcategory TEXT,
          amount                INT,
          cumulative_amount      INT); 
          '''
    # Execute a command
    cursor.execute(create_table_query)
    connection.commit()
    print("Table created successfully")
Esempio n. 12
0
def load_tables(config: list, connection: pg.extensions.connection):
    # Iterate and load
    cur = connection.cursor()
    for table in config:
        table_name = table.get('name')
        table_files = [
            filename for filename in os.listdir(downloads_path)
            if filename.startswith(table_name)
        ]
        table_files = check_for_fhv_2017_type(name=table_name,
                                              files=table_files)
        if not table_files:
            print("""No files to upload to {} table.""".format(table_name))
        else:
            for file in table_files:
                file_name = file.split('.')[0]
                table_source = downloads_path.joinpath(f"{file_name}.csv")
                print("""Started to load {} data to db from {}.""".format(
                    table_name, table_source))
                with open(table_source, 'r', encoding='utf-8') as f:
                    next(f)
                    cur.copy_expert(
                        f"COPY {table_name} FROM STDIN CSV NULL AS ''", f)
                connection.commit()
                print("""Completed loading file {} into {} table.""".format(
                    file, table_name))
Esempio n. 13
0
def add_employee_to_db(connection: psycopg2.extensions.connection,
                       employee: Person):
    try:
        cursor = connection.cursor()
        v_low = ','.join(str(s) for s in employee.face_encoding[0:64])
        v_high = ','.join(str(s) for s in employee.face_encoding[64:128])
        name, surname = employee.name.split()
        query = f'''
                INSERT INTO employees(
                    first_name, last_name, image_path, access, vec_low,vec_high
                    ) VALUES (
                    '{name}',
                    '{surname}',
                    '{employee.image_path}',
                    '{employee.access}',
                    CUBE(array[{v_low}]),
                    CUBE(array[{v_high}])
                    ) ON CONFLICT DO NOTHING;
            '''
        cursor.execute(query)
        cursor.close()
        return True
    except (Exception, Error) as error:
        print('Ошибка при добавлении пользователя в базу:', error)
        connection.rollback()
        return False
Esempio n. 14
0
def insert_DiscordUser_into_databse(
        member_id: int, cursor: psycopg2.extensions.cursor,
        connection: psycopg2.extensions.connection):
    try:
        cursor.execute(sql_insert_DiscordUser_into_database, (member_id, ))
        connection.commit()
    except (Exception, psycopg2.Error) as error:
        print(f"Error while inserting into DiscordUser on database: {error}")
Esempio n. 15
0
def create_tables(config: list, connection: pg.extensions.connection):
    cur = connection.cursor()
    for table in config:
        name = table.get('name')
        schema = table.get('schema')
        ddl = f"""CREATE TABLE IF NOT EXISTS {name} ({schema})"""
        cur.execute(ddl)

    connection.commit()
def insert_new_row(table: str, columns: T.List[str], variables: T.List[T.Any],
                   conn: psycopg2.extensions.connection):
    columns_str = ', '.join(columns)
    placeholders_str = ('%s,' * len(columns))[:-1]
    dml = f"INSERT INTO {table} ({columns_str}) VALUES ({placeholders_str})"

    cur = conn.cursor()
    cur.execute(dml, variables)
    conn.commit()
    cur.close()
Esempio n. 17
0
def close_connection_to_database(cursor: psycopg2.extensions.cursor,
                                 connection: psycopg2.extensions.connection):
    try:
        # closing database connection.
        if connection:
            cursor.close()
            connection.close()
            print("PostgreSQL connection is closed")
    except (Exception, psycopg2.Error) as error:
        print(f"Error while disconnection from PostgreSQL: {error}")
Esempio n. 18
0
def insert_DiscordUserVoiceSession_into_database(
        member_id: int, channel_id: int, session_amount: float,
        cursor: psycopg2.extensions.cursor,
        connection: psycopg2.extensions.connection):
    try:
        cursor.execute(sql_insert_DiscordUserVoiceSession_into_Database,
                       (member_id, channel_id, session_amount))
        connection.commit()
    except (Exception, psycopg2.Error) as error:
        print(
            f"Error while inserting into DiscordUserVoiceSession on database: {error}"
        )
def create_index(connection: psycopg2.extensions.connection,
                 cursor: psycopg2.extensions.cursor):
    """Creates an hash index on the name column

    Args:
        connection (psycopg2.extensions.connection): [description]
        cursor (psycopg2.extensions.cursor): [description]
    """
    create_index_query = "CREATE INDEX template_name_index ON templates USING hash (name)"
    cursor.execute(create_index_query)
    connection.commit()
    print("Index on the column name created successfully")
Esempio n. 20
0
def load_tables(config: list, connection: pg.extensions.connection):

    # iterate and load
    cur = connection.cursor()
    data_path = '../data/'

    for table in config:
        table_name = table.get('name')
        table_source = data_path + f"{table_name}.csv"
        with open(table_source, 'r') as f:
            next(f)
            cur.copy_expert(f"COPY {table_name} FROM STDIN CSV NULL AS ''", f)
        connection.commit()
Esempio n. 21
0
def create_table(sql_query: str, conn: psycopg2.extensions.connection,
                 cur: psycopg2.extensions.cursor) -> None:
    try:
        # Execute the table creation query
        cur.execute(sql_query)
    except Exception as e:
        print(f"{type(e).__name__}: {e}")
        print(f"Query: {cur.query}")
        conn.rollback()
        cur.close()
    else:
        # To take effect, changes need be committed to the database
        conn.commit()
def drop_table(connection: psycopg2.extensions.connection,
               cursor: psycopg2.extensions.cursor):
    """Function which drops the table

    Args:
        connection (psycopg2.extensions.connection): database connection
        cursor (psycopg2.extensions.cursor): database cursor
    """
    # SQL query to drop templates table
    drop_table_query = '''DROP TABLE templates;'''
    # Execute a command
    cursor.execute(drop_table_query)
    connection.commit()
    print("Table deleted successfully")
Esempio n. 23
0
def load_tables(config: list, connection: pg.extensions.connection):

    # iterate and load
    cur = connection.cursor()
    data_path = Path(os.environ['HOME'], 'Documents', 'data_science', 'ht_v2',
                     'data')

    for table in config:
        table_name = table.get('name')
        table_source = data_path.joinpath(f"{table_name}.csv")
        with open(table_source, 'r') as f:
            next(f)
            cur.copy_expert(f"COPY {table_name} FROM STDIN CSV NULL AS ''", f)
        connection.commit()
Esempio n. 24
0
def run_query(
    sql_query: str,
    conn: psycopg2.extensions.connection,
    cur: psycopg2.extensions.cursor,
) -> None:
    try:
        cur.execute(sql_query)
    except Exception as e:
        print(f"{type(e).__name__}: {e}")
        print(f"Query: {cur.query}")
        conn.rollback()
        cur.close()
    else:
        conn.commit()
Esempio n. 25
0
def get_curr_rev_id(conn: psycopg2.extensions.connection) -> Union[str, None]:
    curs = conn.cursor()
    try:
        curs.execute('SELECT ver FROM migro_ver')
        return curs.fetchone()[0]
    except psycopg2.ProgrammingError:
        conn.rollback()
        curs.execute(
            'CREATE TABLE migro_ver (ver VARCHAR(12) PRIMARY KEY)')
        conn.commit()
        return None
    except TypeError:
        return None
    finally:
        curs.close()
def _build_dictionaries(conn: psycopg2.extensions.connection) -> dict:

    cur = conn.cursor()
    master_dict = {}

    # remap center to an id
    cur.execute('select rowid, center_name from center_map;')
    master_dict['center'] = {k: v for v, k in cur.fetchall()}

    # remap device to an id
    cur.execute('select rowid, device_name from device_map;')
    master_dict['device'] = {k: v for v, k in cur.fetchall()}

    # remap software location to an id
    cur.execute('select rowid, software_hash from software_map;')
    master_dict['software'] = {k: v for v, k in cur.fetchall()}

    # remap os to an id
    cur.execute('select rowid, os_name from os_map;')
    master_dict['os'] = {k: v for v, k in cur.fetchall()}

    # remap ip to an id
    #cur.execute('select rowid, ip_address from ip_map;')
    #master_dict['ip'] = {k:v for v, k in cur.fetchall()}

    return master_dict
Esempio n. 27
0
def get_walking(lat: float, lng: float, t: int, conn: psycopg2.extensions.connection, walking_endpoint: str,
        timeout: int = 20, multiple_times_allowed: bool = False, raise_exceptions: bool = False, download_geometry_after_timeout: bool = False) -> Dict[str, Any]:
    lat, lng = round(lat, 6), round(lng, 6)
    with conn.cursor() as cur:
        cur.execute('SELECT ST_AsGeoJSON(geometry) FROM walking WHERE latitude = %s AND longitude = %s AND time = %s LIMIT 1', (lat, lng, t))
        res = cur.fetchone()
        if res is not None:
            return json.loads(res[0])
        try:
            return _get_walking_internal(lat, lng, t, conn, walking_endpoint, timeout, multiple_times_allowed) # type: ignore
        except (requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout) as ex:
            if download_geometry_after_timeout:
                thread = threading.Thread(target=lambda: _get_public_transport_internal(lat, lng, t, conn, walking_endpoint, timeout * 20))
                thread.start()
            if raise_exceptions:
                raise TimeoutError(ex)
            else:
                log.warning(f'Walking geometry download ({lat}, {lng}, {t}) failed with timeout')
            return {'type': 'Polygon', 'coordinates': []}
        except Exception as ex:
            if raise_exceptions:
                raise
            else:
                log.warning(f'Walking geometry download ({lat}, {lng}, {t}) failed with exception: {repr(ex)}')
            log.error(f'Walking geometry download for ({lat}, {lng}, {t}) failed: {repr(ex)}')
            cur.execute('SELECT ST_AsGeoJSON(geometry), ST_Distance(geometry, ST_SetSRID(ST_MakePoint(%s, %s), 4326)) AS min_distance'
                    'FROM walking WHERE time = %s ORDER BY 2 LIMIT 1', (lat, lng, t))
            res = cur.fetchone()
            if res is None:
                return {'type': 'Polygon', 'coordinates': []}
            return json.loads(res[0])
Esempio n. 28
0
def get_personal_transport(lat: float, lng: float, t: int, conn: psycopg2.extensions.connection, personal_transport_endpoint: str, timeout: int = 20,
        raise_exceptions: bool = False, download_geometry_after_timeout: bool = False) -> Dict[str, Any]:
    lat, lng = round(lat, 6), round(lng, 6)
    with conn.cursor() as cur:
        cur.execute('SELECT ST_AsGeoJSON(geometry) FROM car WHERE latitude = %s AND longitude = %s AND time = %s', (lat, lng, t))
        res = cur.fetchone()
        if res is not None:
            return json.loads(res[0])
        try:
            return _get_personal_transport_internal(lat, lng, t, conn, personal_transport_endpoint, timeout) # type: ignore
        except (requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout) as ex:
            if download_geometry_after_timeout:
                if download_geometry_after_timeout:
                    _execute_after(lambda: _get_personal_transport_internal(lat, lng, t, conn, personal_transport_endpoint, timeout * 20),
                            f'personal_transport_download ({lat}, {lng}, {t})')
            if raise_exceptions:
                raise TimeoutError(ex)
            else:
                log.warning(f'Personal transport geometry download ({lat}, {lng}, {t}) failed with timeout')
            cur.execute('SELECT ST_AsGeoJSON(geometry) FROM car WHERE time = %s'
                    ' ORDER BY ST_Distance(geometry, ST_SetSRID(ST_MakePoint(%s, %s), 4326)) LIMIT 1', (lat, lng, t))
            res = cur.fetchone()
            if res is None:
                return {'type': 'Polygon', 'coordinates': []}
            return json.loads(res[0])
        except Exception as ex:
            log.error(f'Personal transport download ({lat}, {lng}, {t}) failed (exception): {repr(ex)}')
            if raise_exceptions:
                raise
            return {'type': 'Polygon', 'coordinates': []}
Esempio n. 29
0
def select(conn: psycopg2.extensions.connection,
           table: str,
           qualifications: Dict = {},
           select_list: List = [],
           limit: Union[int, None] = None,
           arraysize: int = 100) -> Iterable[Any]:
    """A generator method that runs a select query on the table and yields the results.

    Args:
        conn: An open database connection object.
        table: The table name.
        qualifications: A dict of parameters for the WHERE clause.
            The dict's keys are the column names.
            Each value is either a single native type (str, int, ..), in which case the condition is
            key = value, or a tuple in the format (operator, value), like ('<', 3) or ('>=', 15), in
            which case the operator is used between the key and value instead of a '=' sign.
        select_list: The columns to fetch. If empty, returns all columns (*).
        limit: The maximum number of rows to fetch.
        arraysize: The cursor arraysize (only used when limit is None).

    Yields:
        Rows from the table that matched the qualifications.
    """
    query, params = _build_query(table, qualifications, select_list, limit)
    with conn:
        with conn.cursor() as cursor:
            cursor.arraysize = arraysize
            cursor.execute(query, params)
            rows: List[Any]
            for rows in iter(cursor.fetchmany, []):
                yield from rows
def get_nrows(table_name: str, conn: psycopg2.extensions.connection) -> int:
    cur = conn.cursor()
    dml = f"""SELECT COUNT(*) FROM {table_name};"""
    cur.execute(dml)
    nrows = cur.fetchone()[0]
    cur.close()
    return nrows