def recognise_employee_sql(connection: psycopg2.extensions.connection, encodings): """Распознавание сотрудника sql-запросом""" cursor = connection.cursor() try: query = ''' SELECT first_name, last_name FROM employees WHERE sqrt( power( CUBE( array[{}]) <-> vec_low, 2) + power( CUBE( array[{}]) <-> vec_high, 2)) <= {} '''.format( ','.join(str(s) for s in encodings[0:64]), ','.join(str(s) for s in encodings[64:128]), THRESHOLD, ) + \ ''' ORDER BY sqrt( power( CUBE( array[{}]) <-> vec_low, 2) + power( CUBE( array[{}]) <-> vec_high, 2)) ASC LIMIT 1'''.format( ','.join(str(s) for s in encodings[0:64]), ','.join(str(s) for s in encodings[64:128]), ) cursor.execute(query) print(cursor.fetchall()) except (Exception, Error) as error: print('Ошибка при запросе к БД:', error) connection.rollback() return False
def _build_dictionaries(conn: psycopg2.extensions.connection) -> dict: cur = conn.cursor() master_dict = {} # remap center to an id cur.execute('select rowid, center_name from center_map;') master_dict['center'] = {k: v for v, k in cur.fetchall()} # remap device to an id cur.execute('select rowid, device_name from device_map;') master_dict['device'] = {k: v for v, k in cur.fetchall()} # remap software location to an id cur.execute('select rowid, software_hash from software_map;') master_dict['software'] = {k: v for v, k in cur.fetchall()} # remap os to an id cur.execute('select rowid, os_name from os_map;') master_dict['os'] = {k: v for v, k in cur.fetchall()} # remap ip to an id #cur.execute('select rowid, ip_address from ip_map;') #master_dict['ip'] = {k:v for v, k in cur.fetchall()} return master_dict
def copy_from(df: pd.DataFrame, table: str, connection: psycopg2.extensions.connection, chunk_size: int = 10000): cursor = connection.cursor() df = df.copy() escaped = { '\\': '\\\\', '\n': r'\n', '\r': r'\r', '\t': r'\t', } for col in df.columns: if df.dtypes[col] == 'object': for v, e in escaped.items(): df[col] = df[col].str.replace(v, e) try: for i in tqdm(range(0, df.shape[0], chunk_size)): f = StringIO() chunk = df.iloc[i:(i + chunk_size)] chunk.to_csv(f, index=False, header=False, sep='\t', na_rep='\\N', quoting=None) f.seek(0) cursor.copy_from(f, table, columns=[f'"{i}"' for i in df.columns]) connection.commit() except psycopg2.Error as e: print(e) connection.rollback() cursor.close()
def select(conn: psycopg2.extensions.connection, table: str, qualifications: Dict = {}, select_list: List = [], limit: Union[int, None] = None, arraysize: int = 100) -> Iterable[Any]: """A generator method that runs a select query on the table and yields the results. Args: conn: An open database connection object. table: The table name. qualifications: A dict of parameters for the WHERE clause. The dict's keys are the column names. Each value is either a single native type (str, int, ..), in which case the condition is key = value, or a tuple in the format (operator, value), like ('<', 3) or ('>=', 15), in which case the operator is used between the key and value instead of a '=' sign. select_list: The columns to fetch. If empty, returns all columns (*). limit: The maximum number of rows to fetch. arraysize: The cursor arraysize (only used when limit is None). Yields: Rows from the table that matched the qualifications. """ query, params = _build_query(table, qualifications, select_list, limit) with conn: with conn.cursor() as cursor: cursor.arraysize = arraysize cursor.execute(query, params) rows: List[Any] for rows in iter(cursor.fetchmany, []): yield from rows
def _checkmaps(conn: psycopg2.extensions.connection, col: str, tbl: str, file_itms: set): ''' We do this to resync new data with what is in the db currently. check and remap data ''' cur = conn.cursor() col = col.replace(" ", "_") col = col.lower() # pulling the 'key' column from the db LOG.debug(f'''SELECT DISTINCT "{col}" from {tbl};''') cur.execute(f'''SELECT DISTINCT "{col}" from {tbl};''') query = cur.fetchall() existing_items = set([i[0] for i in query]) for each in file_itms: #print(str(type(each))) #print(each) if str(each) not in existing_items: # clean up formatting problems if each != None and type(each) == str: each = each.replace("\'", "") # now insert only the new items we didnt have before into the db cur.execute(f'''INSERT INTO {tbl} ("{col}") VALUES ('{each}');''') LOG.debug(f"Inserted new item {col}: {each} into table {tbl}") conn.commit()
def load_tables(config: list, connection: pg.extensions.connection): # Iterate and load cur = connection.cursor() for table in config: table_name = table.get('name') table_files = [ filename for filename in os.listdir(downloads_path) if filename.startswith(table_name) ] table_files = check_for_fhv_2017_type(name=table_name, files=table_files) if not table_files: print("""No files to upload to {} table.""".format(table_name)) else: for file in table_files: file_name = file.split('.')[0] table_source = downloads_path.joinpath(f"{file_name}.csv") print("""Started to load {} data to db from {}.""".format( table_name, table_source)) with open(table_source, 'r', encoding='utf-8') as f: next(f) cur.copy_expert( f"COPY {table_name} FROM STDIN CSV NULL AS ''", f) connection.commit() print("""Completed loading file {} into {} table.""".format( file, table_name))
def copy_from(df: pd.DataFrame, table: str, connection: psycopg2.extensions.connection, chunk_size: int = 10000): cursor = connection.cursor() df = df.copy() escaped = {'\\': '\\\\', '\n': r'\n', '\r': r'\r', '\t': r'\t'} for col in df.columns: if df.dtypes[col] == 'object': for v, e in escaped.items(): df[col] = df[col].str.replace(v, e) try: for i in range(0, df.shape[0], chunk_size): f = io.StringIO() chunk = df.iloc[i:(i + chunk_size)] # The default separator is a tab, and NULLs are indicated by the two character-string '\N' chunk.to_csv(f, index=False, header=False, sep='\t', na_rep='\\N', quoting=None) f.seek(0) cursor.copy_from(f, table, columns=list(df.columns)) connection.commit() except psycopg2.Error: connection.rollback() cursor.close()
def export_tasks(*, conn: psycopg2.extensions.connection) -> None: path = EXPORT_DIR / "problems.json" logger.info("write: %s", path) with open(path, "w") as fh: with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur.execute(""" SELECT contest_id, task_id, alphabet, task_name FROM contests_tasks INNER JOIN tasks USING (task_id) ORDER BY (contest_id, task_id) """) for i, row in enumerate(cur.fetchall()): if i == 0: fh.write("[") else: fh.write(",") data = { "id": row["task_id"], "contest_id": row["contest_id"], "title": row["alphabet"] + ". " + row["task_name"], } fh.write( json.dumps(data, separators=(',', ':'), sort_keys=True, ensure_ascii=False) + "\n") fh.write("]\n")
def create_table(name: str, schema: str, connection: psycopg2.extensions.connection): c = connection.cursor() ddl = f"""CREATE TABLE IF NOT EXISTS {name} ({schema})""" c.execute(ddl) connection.commit() c.close()
def export_submissions(*, conn: psycopg2.extensions.connection) -> None: with conn.cursor() as cur: cur.execute(""" SELECT user_id FROM users """) for user_id, in cur.fetchall(): export_submissions_for_user(user_id, conn=conn)
def export_contests(*, conn: psycopg2.extensions.connection) -> None: path = EXPORT_DIR / "contests.json" logger.info("write: %s", path) with open(path, "w") as fh: with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur.execute(""" SELECT contest_id, contest_name, rated_range, start_at, end_at FROM contests ORDER BY contest_id """) for i, row in enumerate(cur.fetchall()): if i == 0: fh.write("[") else: fh.write(",") data = { "id": row["contest_id"], "title": row["contest_name"], "start_epoch_second": int(row["start_at"].timestamp()), "duration_second": int((row["end_at"] - row["start_at"]).total_seconds()), "rate_change": row["rated_range"], } fh.write( json.dumps(data, separators=(',', ':'), sort_keys=True, ensure_ascii=False) + "\n") fh.write("]\n")
def get_personal_transport(lat: float, lng: float, t: int, conn: psycopg2.extensions.connection, personal_transport_endpoint: str, timeout: int = 20, raise_exceptions: bool = False, download_geometry_after_timeout: bool = False) -> Dict[str, Any]: lat, lng = round(lat, 6), round(lng, 6) with conn.cursor() as cur: cur.execute('SELECT ST_AsGeoJSON(geometry) FROM car WHERE latitude = %s AND longitude = %s AND time = %s', (lat, lng, t)) res = cur.fetchone() if res is not None: return json.loads(res[0]) try: return _get_personal_transport_internal(lat, lng, t, conn, personal_transport_endpoint, timeout) # type: ignore except (requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout) as ex: if download_geometry_after_timeout: if download_geometry_after_timeout: _execute_after(lambda: _get_personal_transport_internal(lat, lng, t, conn, personal_transport_endpoint, timeout * 20), f'personal_transport_download ({lat}, {lng}, {t})') if raise_exceptions: raise TimeoutError(ex) else: log.warning(f'Personal transport geometry download ({lat}, {lng}, {t}) failed with timeout') cur.execute('SELECT ST_AsGeoJSON(geometry) FROM car WHERE time = %s' ' ORDER BY ST_Distance(geometry, ST_SetSRID(ST_MakePoint(%s, %s), 4326)) LIMIT 1', (lat, lng, t)) res = cur.fetchone() if res is None: return {'type': 'Polygon', 'coordinates': []} return json.loads(res[0]) except Exception as ex: log.error(f'Personal transport download ({lat}, {lng}, {t}) failed (exception): {repr(ex)}') if raise_exceptions: raise return {'type': 'Polygon', 'coordinates': []}
def get_walking(lat: float, lng: float, t: int, conn: psycopg2.extensions.connection, walking_endpoint: str, timeout: int = 20, multiple_times_allowed: bool = False, raise_exceptions: bool = False, download_geometry_after_timeout: bool = False) -> Dict[str, Any]: lat, lng = round(lat, 6), round(lng, 6) with conn.cursor() as cur: cur.execute('SELECT ST_AsGeoJSON(geometry) FROM walking WHERE latitude = %s AND longitude = %s AND time = %s LIMIT 1', (lat, lng, t)) res = cur.fetchone() if res is not None: return json.loads(res[0]) try: return _get_walking_internal(lat, lng, t, conn, walking_endpoint, timeout, multiple_times_allowed) # type: ignore except (requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout) as ex: if download_geometry_after_timeout: thread = threading.Thread(target=lambda: _get_public_transport_internal(lat, lng, t, conn, walking_endpoint, timeout * 20)) thread.start() if raise_exceptions: raise TimeoutError(ex) else: log.warning(f'Walking geometry download ({lat}, {lng}, {t}) failed with timeout') return {'type': 'Polygon', 'coordinates': []} except Exception as ex: if raise_exceptions: raise else: log.warning(f'Walking geometry download ({lat}, {lng}, {t}) failed with exception: {repr(ex)}') log.error(f'Walking geometry download for ({lat}, {lng}, {t}) failed: {repr(ex)}') cur.execute('SELECT ST_AsGeoJSON(geometry), ST_Distance(geometry, ST_SetSRID(ST_MakePoint(%s, %s), 4326)) AS min_distance' 'FROM walking WHERE time = %s ORDER BY 2 LIMIT 1', (lat, lng, t)) res = cur.fetchone() if res is None: return {'type': 'Polygon', 'coordinates': []} return json.loads(res[0])
def task_1_add_new_record_to_db(con: psycopg2.extensions.connection) -> None: """ Add a record for a new customer from Singapore { 'customer_name': 'Thomas', 'contactname': 'David', 'address': 'Some Address', 'city': 'London', 'postalcode': '774', 'country': 'Singapore', } Args: con: psycopg connection """ sql = """ INSERT INTO customers (customername, contactname, address, city, postalcode, country) VALUES (%(customername)s, %(contactname)s, %(address)s, %(city)s, %(postalcode)s, %(country)s); """ with con.cursor() as cursor: cursor.execute( sql, { 'customername': 'Thomas', 'contactname': 'David', 'address': 'Some Address', 'city': 'London', 'postalcode': '774', 'country': 'Singapore', })
def get_nrows(table_name: str, conn: psycopg2.extensions.connection) -> int: cur = conn.cursor() dml = f"""SELECT COUNT(*) FROM {table_name};""" cur.execute(dml) nrows = cur.fetchone()[0] cur.close() return nrows
def add_employee_to_db(connection: psycopg2.extensions.connection, employee: Person): try: cursor = connection.cursor() v_low = ','.join(str(s) for s in employee.face_encoding[0:64]) v_high = ','.join(str(s) for s in employee.face_encoding[64:128]) name, surname = employee.name.split() query = f''' INSERT INTO employees( first_name, last_name, image_path, access, vec_low,vec_high ) VALUES ( '{name}', '{surname}', '{employee.image_path}', '{employee.access}', CUBE(array[{v_low}]), CUBE(array[{v_high}]) ) ON CONFLICT DO NOTHING; ''' cursor.execute(query) cursor.close() return True except (Exception, Error) as error: print('Ошибка при добавлении пользователя в базу:', error) connection.rollback() return False
def view_names( conn: psycopg2.extensions.connection, ) -> typing.Generator[str, None, None]: """Return a generator containing all view names in Materialize.""" with conn.cursor() as cursor: cursor.execute("SHOW VIEWS") for row in cursor: yield row[0]
def source_names( conn: psycopg2.extensions.connection, ) -> typing.Generator[str, None, None]: """Return a generator containing all sources in Materialize.""" with conn.cursor() as cursor: cursor.execute("SELECT source_name FROM mz_source_info") for row in cursor: yield row[0]
def get_column_values(table: str, column: str, conn: psycopg2.extensions.connection) -> int: cur = conn.cursor() dml = f"""SELECT {column} FROM {table}""" cur.execute(dml) results = cur.fetchall() cur.close() return results
def insert_film(conn: psycopg2.extensions.connection, data: Dict): cur: psycopg2.extensions.cursor = conn.cursor() film_id: str country_id: str cur.execute("select id from countries where name=%s", (data['country'], )) country_id = cur.fetchone() if country_id is None: cur.execute("insert into countries (name) values (%s) returning id", (data['country'], )) country_id = cur.fetchone() cur.connection.commit() cur.execute( "select id from films where title=%s and country=%s and release_date=%s", (data['title'], country_id, data['release_date'])) film_id = cur.fetchone() if film_id is None: cur.execute( "insert into films (title, country, box_office, release_date) values (%s,%s,%s,%s) returning id", (data['title'], country_id, data['box_office'], data['release_date'])) film_id = cur.fetchone() type_id: str person_id: str person2content_exist: bool for i in data['actors']: cur.execute("select id from person_types where type=%s", (i[1], )) type_id = cur.fetchone() if type_id is None: cur.execute( "insert into person_types (type) values (%s) returning id", (i[1], )) type_id = cur.fetchone() cur.connection.commit() cur.execute("select id from persons where fio=%s", (i[0], )) person_id = cur.fetchone() if person_id is None: cur.execute("insert into persons (fio) values (%s) returning id", (i[0], )) person_id = cur.fetchone() cur.connection.commit() cur.execute( "select count(*) from persons2content where person_id=%s and film_id=%s and person_type=%s", (person_id, film_id, type_id)) person2content_exist = cur.fetchone()[0] > 0 if not person2content_exist: cur.execute( "insert into persons2content (person_id, film_id, person_type) values (%s,%s,%s)", (person_id, film_id, type_id)) cur.connection.commit() cur.connection.commit() cur.close()
def create_tables(config: list, connection: pg.extensions.connection): cur = connection.cursor() for table in config: name = table.get('name') schema = table.get('schema') ddl = f"""CREATE TABLE IF NOT EXISTS {name} ({schema})""" cur.execute(ddl) connection.commit()
def insert_new_row(table: str, columns: T.List[str], variables: T.List[T.Any], conn: psycopg2.extensions.connection): columns_str = ', '.join(columns) placeholders_str = ('%s,' * len(columns))[:-1] dml = f"INSERT INTO {table} ({columns_str}) VALUES ({placeholders_str})" cur = conn.cursor() cur.execute(dml, variables) conn.commit() cur.close()
def delete_data(conn: psycopg2.extensions.connection, tables: Set[str]) -> None: """Deletes all the data from the specified tables.""" if tables: with conn: with conn.cursor() as cursor: sql = psycopg2.sql.SQL('TRUNCATE {tables} CASCADE;').format( tables=psycopg2.sql.SQL(', ').join( psycopg2.sql.Identifier(table) for table in tables)) cursor.execute(sql)
def call(self, connection: psycopg2.extensions.connection) -> tuple: cursor = connection.cursor() cursor.execute(config.SQL_TO_OBTAIN_CATEGORIES) records = cursor.fetchall() categories = {} for record in records: categories[record[ config.ID_COLUMN_POSITION_IN_CATEGORIES_TABLE]] = record cursor.close() return categories
def iterate_aliases_for_user( user_id: str, *, conn: psycopg2.extensions.connection) -> Iterator[str]: with conn.cursor() as cur: cur.execute( """ SELECT user_id_to FROM renamed WHERE user_id_from = %s """, (user_id, )) if cur.fetchone() is not None: return while True: yield user_id with conn.cursor() as cur: cur.execute( """ SELECT user_id_from FROM renamed WHERE user_id_to = %s """, (user_id, )) row = cur.fetchone() if row is None: break else: user_id, = row
def load_tables(config: list, connection: pg.extensions.connection): # iterate and load cur = connection.cursor() data_path = '../data/' for table in config: table_name = table.get('name') table_source = data_path + f"{table_name}.csv" with open(table_source, 'r') as f: next(f) cur.copy_expert(f"COPY {table_name} FROM STDIN CSV NULL AS ''", f) connection.commit()
def load_tables(config: list, connection: pg.extensions.connection): # iterate and load cur = connection.cursor() data_path = Path(os.environ['HOME'], 'Documents', 'data_science', 'ht_v2', 'data') for table in config: table_name = table.get('name') table_source = data_path.joinpath(f"{table_name}.csv") with open(table_source, 'r') as f: next(f) cur.copy_expert(f"COPY {table_name} FROM STDIN CSV NULL AS ''", f) connection.commit()
def get_curr_rev_id(conn: psycopg2.extensions.connection) -> Union[str, None]: curs = conn.cursor() try: curs.execute('SELECT ver FROM migro_ver') return curs.fetchone()[0] except psycopg2.ProgrammingError: conn.rollback() curs.execute( 'CREATE TABLE migro_ver (ver VARCHAR(12) PRIMARY KEY)') conn.commit() return None except TypeError: return None finally: curs.close()
def task_5_delete_the_last_customer( con: psycopg2.extensions.connection) -> None: """ Delete the last customer Args: con: psycopg connection """ sql = """ DELETE FROM customers WHERE customerid = (SELECT MAX(customerid) FROM customers) """ with con.cursor() as cursor: cursor.execute(sql)
def load_shape_files(config: list, connection: pg.extensions.connection): # Iterate and load cur = connection.cursor() for table in config: table_name = table.get('name') table_files = [ filename for filename in os.listdir(downloads_path) if filename.startswith(table_name) ] table_files = check_for_fhv_2017_type(name=table_name, files=table_files) print(table_files) if not table_files: print("""No files to upload to {} table.""".format(table_name)) else: print("""Files to upload to {} table.""".format(table_name))