def get_data_from_db(query: str, conn: psycopg2.extensions.connection, cur: psycopg2.extensions.cursor, df: pd.DataFrame, col_names: List[str]) -> pd.DataFrame: try: cur.execute(query) while True: # Fetch the next 100 rows query_results = cur.fetchmany(100) # If an empty list is returned, then we've reached the end of the results if query_results == list(): break # Create a list of dictionaries where each dictionary represents a single row results_mapped = [{ col_names[i]: row[i] for i in range(len(col_names)) } for row in query_results] # Append the fetched rows to the DataFrame df = df.append(results_mapped, ignore_index=True) return df except Exception as error: print(f"{type(error).__name__}: {error}") print("Query:", cur.query) conn.rollback()
def __stock_tags_bulk_insert(df: pd.DataFrame, column_name: str, conn: psycopg2.extensions.connection, cursor: psycopg2.extensions.cursor) -> Any: """ Inserts all tag rows of df in the DB of the relationship between stock and tags. :param df: df with three columns of stock and a column of tag. :param column_name: str. The name of tag column. :param conn: psycopg2.extensions.connection. Connection to DB. :param cursor: cursor of DB. :return: None. """ for index, series in df.iterrows(): if not pd.isnull(series[column_name]): # first searchs the stock id of row. stock_id_query = "SELECT id FROM stock " \ "WHERE stock_name = '{}' AND exchange = '{}' AND ticker = '{}'".format( series['stock_name'], series['exchange'], series['ticker']) cursor.execute(stock_id_query) stock_id = cursor.fetchone()[0] # next, searchs the tag id of row. tag_id_query = "SELECT id FROM tag WHERE tag_name = '{}' AND category = '{}'".format( series[column_name], column_name) cursor.execute(tag_id_query) tag_id = cursor.fetchone()[0] # inserts stock_tag relation using stock_id and tag_id query = "INSERT INTO stock_tag (stock_id, tag_id) " \ "VALUES ('{}', '{}') ON CONFLICT DO NOTHING".format(stock_id, tag_id) cursor.execute(query) conn.commit()
def copy_from(df: pd.DataFrame, table: str, connection: psycopg2.extensions.connection, chunk_size: int = 10000): cursor = connection.cursor() df = df.copy() escaped = { '\\': '\\\\', '\n': r'\n', '\r': r'\r', '\t': r'\t', } for col in df.columns: if df.dtypes[col] == 'object': for v, e in escaped.items(): df[col] = df[col].str.replace(v, e) try: for i in tqdm(range(0, df.shape[0], chunk_size)): f = StringIO() chunk = df.iloc[i:(i + chunk_size)] chunk.to_csv(f, index=False, header=False, sep='\t', na_rep='\\N', quoting=None) f.seek(0) cursor.copy_from(f, table, columns=[f'"{i}"' for i in df.columns]) connection.commit() except psycopg2.Error as e: print(e) connection.rollback() cursor.close()
def recognise_employee_sql(connection: psycopg2.extensions.connection, encodings): """Распознавание сотрудника sql-запросом""" cursor = connection.cursor() try: query = ''' SELECT first_name, last_name FROM employees WHERE sqrt( power( CUBE( array[{}]) <-> vec_low, 2) + power( CUBE( array[{}]) <-> vec_high, 2)) <= {} '''.format( ','.join(str(s) for s in encodings[0:64]), ','.join(str(s) for s in encodings[64:128]), THRESHOLD, ) + \ ''' ORDER BY sqrt( power( CUBE( array[{}]) <-> vec_low, 2) + power( CUBE( array[{}]) <-> vec_high, 2)) ASC LIMIT 1'''.format( ','.join(str(s) for s in encodings[0:64]), ','.join(str(s) for s in encodings[64:128]), ) cursor.execute(query) print(cursor.fetchall()) except (Exception, Error) as error: print('Ошибка при запросе к БД:', error) connection.rollback() return False
def create_table(name: str, schema: str, connection: psycopg2.extensions.connection): c = connection.cursor() ddl = f"""CREATE TABLE IF NOT EXISTS {name} ({schema})""" c.execute(ddl) connection.commit() c.close()
def _checkmaps(conn: psycopg2.extensions.connection, col: str, tbl: str, file_itms: set): ''' We do this to resync new data with what is in the db currently. check and remap data ''' cur = conn.cursor() col = col.replace(" ", "_") col = col.lower() # pulling the 'key' column from the db LOG.debug(f'''SELECT DISTINCT "{col}" from {tbl};''') cur.execute(f'''SELECT DISTINCT "{col}" from {tbl};''') query = cur.fetchall() existing_items = set([i[0] for i in query]) for each in file_itms: #print(str(type(each))) #print(each) if str(each) not in existing_items: # clean up formatting problems if each != None and type(each) == str: each = each.replace("\'", "") # now insert only the new items we didnt have before into the db cur.execute(f'''INSERT INTO {tbl} ("{col}") VALUES ('{each}');''') LOG.debug(f"Inserted new item {col}: {each} into table {tbl}") conn.commit()
def process_data(cur: psycopg2.extensions.cursor, conn: psycopg2.extensions.connection, filepath: str, func): """ Given a connection to a PostgresSQL database, a path to a directory on the local filesystem and a processing function, do the following: 1. Load all *.json files found in filepath and its subdirectories 2. Print the number of files found in step one 3. Apply the processing function func to all files found in step one :param cur: Cursor :param conn: Connection to PostgreSQL database :param filepath: Path to JSON file :param func: (Python) function to process data """ # get all files matching extension from directory all_files = [] for root, dirs, files in os.walk(filepath): files = glob.glob(os.path.join(root, '*.json')) for f in files: all_files.append(os.path.abspath(f)) # get total number of files found num_files = len(all_files) print('{} files found in {}'.format(num_files, filepath)) # iterate over files and process for i, datafile in enumerate(all_files, 1): func(cur, datafile) conn.commit() print('{}/{} files processed.'.format(i, num_files))
def copy_from(df: pd.DataFrame, table: str, connection: psycopg2.extensions.connection, chunk_size: int = 10000): cursor = connection.cursor() df = df.copy() escaped = {'\\': '\\\\', '\n': r'\n', '\r': r'\r', '\t': r'\t'} for col in df.columns: if df.dtypes[col] == 'object': for v, e in escaped.items(): df[col] = df[col].str.replace(v, e) try: for i in range(0, df.shape[0], chunk_size): f = io.StringIO() chunk = df.iloc[i:(i + chunk_size)] # The default separator is a tab, and NULLs are indicated by the two character-string '\N' chunk.to_csv(f, index=False, header=False, sep='\t', na_rep='\\N', quoting=None) f.seek(0) cursor.copy_from(f, table, columns=list(df.columns)) connection.commit() except psycopg2.Error: connection.rollback() cursor.close()
def __init__(self, db: pg.extensions.connection, problem: int, intermed_nodes: bool): """db : psycopg2.connection database to read from. problem : int index of the problem. intermed_nodes : bool if True calculates the shortest path between successive nodes. """ LOGGER.debug("Creating %s for problem %d.", self.__class__.__name__, problem) self.problem = problem self.intermed_nodes = intermed_nodes self.num_vars = None # wait for good connection status = db.get_transaction_status() sleeptimer = 0.5 while status not in good_db_status(): logging.warning("Waiting %fs for DB connection in status %s", sleeptimer, PSYCOPG2_8_5_TASTATUS[status]) sleep(sleeptimer) status = db.get_transaction_status() self.connection = db
def stocks_bulk_insert(df: pd.DataFrame, conn: psycopg2.extensions.connection, cursor: psycopg2.extensions.cursor) -> Any: """ Inserts all rows of df in the DB. :param df: pd.DataFrame. stocks data. :param conn: psycopg2.extensions.connection. Connection to DB. :param cursor: cursor of DB. :return: None. """ if df.empty: return # iterating in DataFrame rows. for index, series in df.iterrows(): query = "INSERT INTO stock (ticker, stock_name, exchange) " \ "VALUES ('{}', '{}', '{}') ON CONFLICT DO NOTHING".format(series["ticker"], series["stock_name"], series["exchange"]) cursor.execute(query) conn.commit() print(">>> GETTING HISTORICAL DATA") ticker_list = list(df["ticker"]) __get_historical_data(ticker_list)
def create_table(connection: psycopg2.extensions.connection, cursor: psycopg2.extensions.cursor): """Function which creates the table if needed Args: connection (psycopg2.extensions.connection): database connection cursor (psycopg2.extensions.cursor): database cursor """ # SQL query to templates table create_table_query = '''CREATE TABLE IF NOT EXISTS templates (id SERIAL PRIMARY KEY NOT NULL, name TEXT NOT NULL, year INT NOT NULL, month INT NOT NULL, category TEXT NOT NULL, uw_category TEXT, wikibreak_category1 TEXT, wikibreak_category2 TEXT, wikibreak_subcategory TEXT, amount INT, cumulative_amount INT); ''' # Execute a command cursor.execute(create_table_query) connection.commit() print("Table created successfully")
def load_tables(config: list, connection: pg.extensions.connection): # Iterate and load cur = connection.cursor() for table in config: table_name = table.get('name') table_files = [ filename for filename in os.listdir(downloads_path) if filename.startswith(table_name) ] table_files = check_for_fhv_2017_type(name=table_name, files=table_files) if not table_files: print("""No files to upload to {} table.""".format(table_name)) else: for file in table_files: file_name = file.split('.')[0] table_source = downloads_path.joinpath(f"{file_name}.csv") print("""Started to load {} data to db from {}.""".format( table_name, table_source)) with open(table_source, 'r', encoding='utf-8') as f: next(f) cur.copy_expert( f"COPY {table_name} FROM STDIN CSV NULL AS ''", f) connection.commit() print("""Completed loading file {} into {} table.""".format( file, table_name))
def add_employee_to_db(connection: psycopg2.extensions.connection, employee: Person): try: cursor = connection.cursor() v_low = ','.join(str(s) for s in employee.face_encoding[0:64]) v_high = ','.join(str(s) for s in employee.face_encoding[64:128]) name, surname = employee.name.split() query = f''' INSERT INTO employees( first_name, last_name, image_path, access, vec_low,vec_high ) VALUES ( '{name}', '{surname}', '{employee.image_path}', '{employee.access}', CUBE(array[{v_low}]), CUBE(array[{v_high}]) ) ON CONFLICT DO NOTHING; ''' cursor.execute(query) cursor.close() return True except (Exception, Error) as error: print('Ошибка при добавлении пользователя в базу:', error) connection.rollback() return False
def insert_DiscordUser_into_databse( member_id: int, cursor: psycopg2.extensions.cursor, connection: psycopg2.extensions.connection): try: cursor.execute(sql_insert_DiscordUser_into_database, (member_id, )) connection.commit() except (Exception, psycopg2.Error) as error: print(f"Error while inserting into DiscordUser on database: {error}")
def create_tables(config: list, connection: pg.extensions.connection): cur = connection.cursor() for table in config: name = table.get('name') schema = table.get('schema') ddl = f"""CREATE TABLE IF NOT EXISTS {name} ({schema})""" cur.execute(ddl) connection.commit()
def insert_new_row(table: str, columns: T.List[str], variables: T.List[T.Any], conn: psycopg2.extensions.connection): columns_str = ', '.join(columns) placeholders_str = ('%s,' * len(columns))[:-1] dml = f"INSERT INTO {table} ({columns_str}) VALUES ({placeholders_str})" cur = conn.cursor() cur.execute(dml, variables) conn.commit() cur.close()
def close_connection_to_database(cursor: psycopg2.extensions.cursor, connection: psycopg2.extensions.connection): try: # closing database connection. if connection: cursor.close() connection.close() print("PostgreSQL connection is closed") except (Exception, psycopg2.Error) as error: print(f"Error while disconnection from PostgreSQL: {error}")
def insert_DiscordUserVoiceSession_into_database( member_id: int, channel_id: int, session_amount: float, cursor: psycopg2.extensions.cursor, connection: psycopg2.extensions.connection): try: cursor.execute(sql_insert_DiscordUserVoiceSession_into_Database, (member_id, channel_id, session_amount)) connection.commit() except (Exception, psycopg2.Error) as error: print( f"Error while inserting into DiscordUserVoiceSession on database: {error}" )
def create_index(connection: psycopg2.extensions.connection, cursor: psycopg2.extensions.cursor): """Creates an hash index on the name column Args: connection (psycopg2.extensions.connection): [description] cursor (psycopg2.extensions.cursor): [description] """ create_index_query = "CREATE INDEX template_name_index ON templates USING hash (name)" cursor.execute(create_index_query) connection.commit() print("Index on the column name created successfully")
def load_tables(config: list, connection: pg.extensions.connection): # iterate and load cur = connection.cursor() data_path = '../data/' for table in config: table_name = table.get('name') table_source = data_path + f"{table_name}.csv" with open(table_source, 'r') as f: next(f) cur.copy_expert(f"COPY {table_name} FROM STDIN CSV NULL AS ''", f) connection.commit()
def create_table(sql_query: str, conn: psycopg2.extensions.connection, cur: psycopg2.extensions.cursor) -> None: try: # Execute the table creation query cur.execute(sql_query) except Exception as e: print(f"{type(e).__name__}: {e}") print(f"Query: {cur.query}") conn.rollback() cur.close() else: # To take effect, changes need be committed to the database conn.commit()
def drop_table(connection: psycopg2.extensions.connection, cursor: psycopg2.extensions.cursor): """Function which drops the table Args: connection (psycopg2.extensions.connection): database connection cursor (psycopg2.extensions.cursor): database cursor """ # SQL query to drop templates table drop_table_query = '''DROP TABLE templates;''' # Execute a command cursor.execute(drop_table_query) connection.commit() print("Table deleted successfully")
def load_tables(config: list, connection: pg.extensions.connection): # iterate and load cur = connection.cursor() data_path = Path(os.environ['HOME'], 'Documents', 'data_science', 'ht_v2', 'data') for table in config: table_name = table.get('name') table_source = data_path.joinpath(f"{table_name}.csv") with open(table_source, 'r') as f: next(f) cur.copy_expert(f"COPY {table_name} FROM STDIN CSV NULL AS ''", f) connection.commit()
def run_query( sql_query: str, conn: psycopg2.extensions.connection, cur: psycopg2.extensions.cursor, ) -> None: try: cur.execute(sql_query) except Exception as e: print(f"{type(e).__name__}: {e}") print(f"Query: {cur.query}") conn.rollback() cur.close() else: conn.commit()
def get_curr_rev_id(conn: psycopg2.extensions.connection) -> Union[str, None]: curs = conn.cursor() try: curs.execute('SELECT ver FROM migro_ver') return curs.fetchone()[0] except psycopg2.ProgrammingError: conn.rollback() curs.execute( 'CREATE TABLE migro_ver (ver VARCHAR(12) PRIMARY KEY)') conn.commit() return None except TypeError: return None finally: curs.close()
def _build_dictionaries(conn: psycopg2.extensions.connection) -> dict: cur = conn.cursor() master_dict = {} # remap center to an id cur.execute('select rowid, center_name from center_map;') master_dict['center'] = {k: v for v, k in cur.fetchall()} # remap device to an id cur.execute('select rowid, device_name from device_map;') master_dict['device'] = {k: v for v, k in cur.fetchall()} # remap software location to an id cur.execute('select rowid, software_hash from software_map;') master_dict['software'] = {k: v for v, k in cur.fetchall()} # remap os to an id cur.execute('select rowid, os_name from os_map;') master_dict['os'] = {k: v for v, k in cur.fetchall()} # remap ip to an id #cur.execute('select rowid, ip_address from ip_map;') #master_dict['ip'] = {k:v for v, k in cur.fetchall()} return master_dict
def get_walking(lat: float, lng: float, t: int, conn: psycopg2.extensions.connection, walking_endpoint: str, timeout: int = 20, multiple_times_allowed: bool = False, raise_exceptions: bool = False, download_geometry_after_timeout: bool = False) -> Dict[str, Any]: lat, lng = round(lat, 6), round(lng, 6) with conn.cursor() as cur: cur.execute('SELECT ST_AsGeoJSON(geometry) FROM walking WHERE latitude = %s AND longitude = %s AND time = %s LIMIT 1', (lat, lng, t)) res = cur.fetchone() if res is not None: return json.loads(res[0]) try: return _get_walking_internal(lat, lng, t, conn, walking_endpoint, timeout, multiple_times_allowed) # type: ignore except (requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout) as ex: if download_geometry_after_timeout: thread = threading.Thread(target=lambda: _get_public_transport_internal(lat, lng, t, conn, walking_endpoint, timeout * 20)) thread.start() if raise_exceptions: raise TimeoutError(ex) else: log.warning(f'Walking geometry download ({lat}, {lng}, {t}) failed with timeout') return {'type': 'Polygon', 'coordinates': []} except Exception as ex: if raise_exceptions: raise else: log.warning(f'Walking geometry download ({lat}, {lng}, {t}) failed with exception: {repr(ex)}') log.error(f'Walking geometry download for ({lat}, {lng}, {t}) failed: {repr(ex)}') cur.execute('SELECT ST_AsGeoJSON(geometry), ST_Distance(geometry, ST_SetSRID(ST_MakePoint(%s, %s), 4326)) AS min_distance' 'FROM walking WHERE time = %s ORDER BY 2 LIMIT 1', (lat, lng, t)) res = cur.fetchone() if res is None: return {'type': 'Polygon', 'coordinates': []} return json.loads(res[0])
def get_personal_transport(lat: float, lng: float, t: int, conn: psycopg2.extensions.connection, personal_transport_endpoint: str, timeout: int = 20, raise_exceptions: bool = False, download_geometry_after_timeout: bool = False) -> Dict[str, Any]: lat, lng = round(lat, 6), round(lng, 6) with conn.cursor() as cur: cur.execute('SELECT ST_AsGeoJSON(geometry) FROM car WHERE latitude = %s AND longitude = %s AND time = %s', (lat, lng, t)) res = cur.fetchone() if res is not None: return json.loads(res[0]) try: return _get_personal_transport_internal(lat, lng, t, conn, personal_transport_endpoint, timeout) # type: ignore except (requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout) as ex: if download_geometry_after_timeout: if download_geometry_after_timeout: _execute_after(lambda: _get_personal_transport_internal(lat, lng, t, conn, personal_transport_endpoint, timeout * 20), f'personal_transport_download ({lat}, {lng}, {t})') if raise_exceptions: raise TimeoutError(ex) else: log.warning(f'Personal transport geometry download ({lat}, {lng}, {t}) failed with timeout') cur.execute('SELECT ST_AsGeoJSON(geometry) FROM car WHERE time = %s' ' ORDER BY ST_Distance(geometry, ST_SetSRID(ST_MakePoint(%s, %s), 4326)) LIMIT 1', (lat, lng, t)) res = cur.fetchone() if res is None: return {'type': 'Polygon', 'coordinates': []} return json.loads(res[0]) except Exception as ex: log.error(f'Personal transport download ({lat}, {lng}, {t}) failed (exception): {repr(ex)}') if raise_exceptions: raise return {'type': 'Polygon', 'coordinates': []}
def select(conn: psycopg2.extensions.connection, table: str, qualifications: Dict = {}, select_list: List = [], limit: Union[int, None] = None, arraysize: int = 100) -> Iterable[Any]: """A generator method that runs a select query on the table and yields the results. Args: conn: An open database connection object. table: The table name. qualifications: A dict of parameters for the WHERE clause. The dict's keys are the column names. Each value is either a single native type (str, int, ..), in which case the condition is key = value, or a tuple in the format (operator, value), like ('<', 3) or ('>=', 15), in which case the operator is used between the key and value instead of a '=' sign. select_list: The columns to fetch. If empty, returns all columns (*). limit: The maximum number of rows to fetch. arraysize: The cursor arraysize (only used when limit is None). Yields: Rows from the table that matched the qualifications. """ query, params = _build_query(table, qualifications, select_list, limit) with conn: with conn.cursor() as cursor: cursor.arraysize = arraysize cursor.execute(query, params) rows: List[Any] for rows in iter(cursor.fetchmany, []): yield from rows
def get_nrows(table_name: str, conn: psycopg2.extensions.connection) -> int: cur = conn.cursor() dml = f"""SELECT COUNT(*) FROM {table_name};""" cur.execute(dml) nrows = cur.fetchone()[0] cur.close() return nrows