Beispiel #1
0
def recognise_employee_sql(connection: psycopg2.extensions.connection,
                           encodings):
    """Распознавание сотрудника sql-запросом"""
    cursor = connection.cursor()
    try:
        query = '''
                SELECT first_name, last_name FROM employees 
                WHERE sqrt( power( CUBE( array[{}]) <-> vec_low, 2) + 
                power( CUBE( array[{}]) <-> vec_high, 2)) <= {} 
                '''.format(
            ','.join(str(s) for s in encodings[0:64]),
            ','.join(str(s) for s in encodings[64:128]),
            THRESHOLD,
        ) + \
                '''
            ORDER BY sqrt( power( CUBE( array[{}]) <-> vec_low, 2) + 
            power( CUBE( array[{}]) <-> vec_high, 2)) ASC LIMIT 1'''.format(
                    ','.join(str(s) for s in encodings[0:64]),
                    ','.join(str(s) for s in encodings[64:128]),
                )
        cursor.execute(query)
        print(cursor.fetchall())
    except (Exception, Error) as error:
        print('Ошибка при запросе к БД:', error)
        connection.rollback()
        return False
def _build_dictionaries(conn: psycopg2.extensions.connection) -> dict:

    cur = conn.cursor()
    master_dict = {}

    # remap center to an id
    cur.execute('select rowid, center_name from center_map;')
    master_dict['center'] = {k: v for v, k in cur.fetchall()}

    # remap device to an id
    cur.execute('select rowid, device_name from device_map;')
    master_dict['device'] = {k: v for v, k in cur.fetchall()}

    # remap software location to an id
    cur.execute('select rowid, software_hash from software_map;')
    master_dict['software'] = {k: v for v, k in cur.fetchall()}

    # remap os to an id
    cur.execute('select rowid, os_name from os_map;')
    master_dict['os'] = {k: v for v, k in cur.fetchall()}

    # remap ip to an id
    #cur.execute('select rowid, ip_address from ip_map;')
    #master_dict['ip'] = {k:v for v, k in cur.fetchall()}

    return master_dict
Beispiel #3
0
def copy_from(df: pd.DataFrame,
              table: str,
              connection: psycopg2.extensions.connection,
              chunk_size: int = 10000):
    cursor = connection.cursor()
    df = df.copy()

    escaped = {
        '\\': '\\\\',
        '\n': r'\n',
        '\r': r'\r',
        '\t': r'\t',
    }
    for col in df.columns:
        if df.dtypes[col] == 'object':
            for v, e in escaped.items():
                df[col] = df[col].str.replace(v, e)
    try:
        for i in tqdm(range(0, df.shape[0], chunk_size)):
            f = StringIO()
            chunk = df.iloc[i:(i + chunk_size)]

            chunk.to_csv(f,
                         index=False,
                         header=False,
                         sep='\t',
                         na_rep='\\N',
                         quoting=None)
            f.seek(0)
            cursor.copy_from(f, table, columns=[f'"{i}"' for i in df.columns])
            connection.commit()
    except psycopg2.Error as e:
        print(e)
        connection.rollback()
    cursor.close()
Beispiel #4
0
def select(conn: psycopg2.extensions.connection,
           table: str,
           qualifications: Dict = {},
           select_list: List = [],
           limit: Union[int, None] = None,
           arraysize: int = 100) -> Iterable[Any]:
    """A generator method that runs a select query on the table and yields the results.

    Args:
        conn: An open database connection object.
        table: The table name.
        qualifications: A dict of parameters for the WHERE clause.
            The dict's keys are the column names.
            Each value is either a single native type (str, int, ..), in which case the condition is
            key = value, or a tuple in the format (operator, value), like ('<', 3) or ('>=', 15), in
            which case the operator is used between the key and value instead of a '=' sign.
        select_list: The columns to fetch. If empty, returns all columns (*).
        limit: The maximum number of rows to fetch.
        arraysize: The cursor arraysize (only used when limit is None).

    Yields:
        Rows from the table that matched the qualifications.
    """
    query, params = _build_query(table, qualifications, select_list, limit)
    with conn:
        with conn.cursor() as cursor:
            cursor.arraysize = arraysize
            cursor.execute(query, params)
            rows: List[Any]
            for rows in iter(cursor.fetchmany, []):
                yield from rows
def _checkmaps(conn: psycopg2.extensions.connection, col: str, tbl: str,
               file_itms: set):
    ''' We do this to resync new data with what is in the db currently. check and remap data '''

    cur = conn.cursor()
    col = col.replace(" ", "_")
    col = col.lower()

    # pulling the 'key' column from the db
    LOG.debug(f'''SELECT DISTINCT "{col}" from {tbl};''')
    cur.execute(f'''SELECT DISTINCT "{col}" from {tbl};''')
    query = cur.fetchall()
    existing_items = set([i[0] for i in query])

    for each in file_itms:
        #print(str(type(each)))
        #print(each)
        if str(each) not in existing_items:
            # clean up formatting problems
            if each != None and type(each) == str:
                each = each.replace("\'", "")
            # now insert only the new items we didnt have before into the db
            cur.execute(f'''INSERT INTO {tbl} ("{col}") VALUES ('{each}');''')
            LOG.debug(f"Inserted new item {col}: {each} into table {tbl}")

    conn.commit()
Beispiel #6
0
def load_tables(config: list, connection: pg.extensions.connection):
    # Iterate and load
    cur = connection.cursor()
    for table in config:
        table_name = table.get('name')
        table_files = [
            filename for filename in os.listdir(downloads_path)
            if filename.startswith(table_name)
        ]
        table_files = check_for_fhv_2017_type(name=table_name,
                                              files=table_files)
        if not table_files:
            print("""No files to upload to {} table.""".format(table_name))
        else:
            for file in table_files:
                file_name = file.split('.')[0]
                table_source = downloads_path.joinpath(f"{file_name}.csv")
                print("""Started to load {} data to db from {}.""".format(
                    table_name, table_source))
                with open(table_source, 'r', encoding='utf-8') as f:
                    next(f)
                    cur.copy_expert(
                        f"COPY {table_name} FROM STDIN CSV NULL AS ''", f)
                connection.commit()
                print("""Completed loading file {} into {} table.""".format(
                    file, table_name))
def copy_from(df: pd.DataFrame,
              table: str,
              connection: psycopg2.extensions.connection,
              chunk_size: int = 10000):

    cursor = connection.cursor()
    df = df.copy()
    escaped = {'\\': '\\\\', '\n': r'\n', '\r': r'\r', '\t': r'\t'}
    for col in df.columns:
        if df.dtypes[col] == 'object':
            for v, e in escaped.items():
                df[col] = df[col].str.replace(v, e)
    try:
        for i in range(0, df.shape[0], chunk_size):
            f = io.StringIO()
            chunk = df.iloc[i:(i + chunk_size)]
            # The default separator is a tab, and NULLs are indicated by the two character-string '\N'
            chunk.to_csv(f,
                         index=False,
                         header=False,
                         sep='\t',
                         na_rep='\\N',
                         quoting=None)
            f.seek(0)
            cursor.copy_from(f, table, columns=list(df.columns))
            connection.commit()
    except psycopg2.Error:
        connection.rollback()
        cursor.close()
Beispiel #8
0
def export_tasks(*, conn: psycopg2.extensions.connection) -> None:
    path = EXPORT_DIR / "problems.json"
    logger.info("write: %s", path)
    with open(path, "w") as fh:

        with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
            cur.execute("""
                SELECT contest_id, task_id, alphabet, task_name
                FROM contests_tasks
                INNER JOIN tasks USING (task_id)
                ORDER BY (contest_id, task_id)
            """)
            for i, row in enumerate(cur.fetchall()):
                if i == 0:
                    fh.write("[")
                else:
                    fh.write(",")
                data = {
                    "id": row["task_id"],
                    "contest_id": row["contest_id"],
                    "title": row["alphabet"] + ". " + row["task_name"],
                }
                fh.write(
                    json.dumps(data,
                               separators=(',', ':'),
                               sort_keys=True,
                               ensure_ascii=False) + "\n")
        fh.write("]\n")
Beispiel #9
0
def create_table(name: str, schema: str,
                 connection: psycopg2.extensions.connection):
    c = connection.cursor()
    ddl = f"""CREATE TABLE IF NOT EXISTS {name} ({schema})"""
    c.execute(ddl)
    connection.commit()
    c.close()
Beispiel #10
0
def export_submissions(*, conn: psycopg2.extensions.connection) -> None:
    with conn.cursor() as cur:
        cur.execute("""
            SELECT user_id FROM users
        """)
        for user_id, in cur.fetchall():
            export_submissions_for_user(user_id, conn=conn)
Beispiel #11
0
def export_contests(*, conn: psycopg2.extensions.connection) -> None:
    path = EXPORT_DIR / "contests.json"
    logger.info("write: %s", path)
    with open(path, "w") as fh:

        with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
            cur.execute("""
                SELECT contest_id, contest_name, rated_range, start_at, end_at
                FROM contests
                ORDER BY contest_id
            """)
            for i, row in enumerate(cur.fetchall()):
                if i == 0:
                    fh.write("[")
                else:
                    fh.write(",")
                data = {
                    "id":
                    row["contest_id"],
                    "title":
                    row["contest_name"],
                    "start_epoch_second":
                    int(row["start_at"].timestamp()),
                    "duration_second":
                    int((row["end_at"] - row["start_at"]).total_seconds()),
                    "rate_change":
                    row["rated_range"],
                }
                fh.write(
                    json.dumps(data,
                               separators=(',', ':'),
                               sort_keys=True,
                               ensure_ascii=False) + "\n")
        fh.write("]\n")
Beispiel #12
0
def get_personal_transport(lat: float, lng: float, t: int, conn: psycopg2.extensions.connection, personal_transport_endpoint: str, timeout: int = 20,
        raise_exceptions: bool = False, download_geometry_after_timeout: bool = False) -> Dict[str, Any]:
    lat, lng = round(lat, 6), round(lng, 6)
    with conn.cursor() as cur:
        cur.execute('SELECT ST_AsGeoJSON(geometry) FROM car WHERE latitude = %s AND longitude = %s AND time = %s', (lat, lng, t))
        res = cur.fetchone()
        if res is not None:
            return json.loads(res[0])
        try:
            return _get_personal_transport_internal(lat, lng, t, conn, personal_transport_endpoint, timeout) # type: ignore
        except (requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout) as ex:
            if download_geometry_after_timeout:
                if download_geometry_after_timeout:
                    _execute_after(lambda: _get_personal_transport_internal(lat, lng, t, conn, personal_transport_endpoint, timeout * 20),
                            f'personal_transport_download ({lat}, {lng}, {t})')
            if raise_exceptions:
                raise TimeoutError(ex)
            else:
                log.warning(f'Personal transport geometry download ({lat}, {lng}, {t}) failed with timeout')
            cur.execute('SELECT ST_AsGeoJSON(geometry) FROM car WHERE time = %s'
                    ' ORDER BY ST_Distance(geometry, ST_SetSRID(ST_MakePoint(%s, %s), 4326)) LIMIT 1', (lat, lng, t))
            res = cur.fetchone()
            if res is None:
                return {'type': 'Polygon', 'coordinates': []}
            return json.loads(res[0])
        except Exception as ex:
            log.error(f'Personal transport download ({lat}, {lng}, {t}) failed (exception): {repr(ex)}')
            if raise_exceptions:
                raise
            return {'type': 'Polygon', 'coordinates': []}
Beispiel #13
0
def get_walking(lat: float, lng: float, t: int, conn: psycopg2.extensions.connection, walking_endpoint: str,
        timeout: int = 20, multiple_times_allowed: bool = False, raise_exceptions: bool = False, download_geometry_after_timeout: bool = False) -> Dict[str, Any]:
    lat, lng = round(lat, 6), round(lng, 6)
    with conn.cursor() as cur:
        cur.execute('SELECT ST_AsGeoJSON(geometry) FROM walking WHERE latitude = %s AND longitude = %s AND time = %s LIMIT 1', (lat, lng, t))
        res = cur.fetchone()
        if res is not None:
            return json.loads(res[0])
        try:
            return _get_walking_internal(lat, lng, t, conn, walking_endpoint, timeout, multiple_times_allowed) # type: ignore
        except (requests.exceptions.ConnectTimeout, requests.exceptions.ReadTimeout) as ex:
            if download_geometry_after_timeout:
                thread = threading.Thread(target=lambda: _get_public_transport_internal(lat, lng, t, conn, walking_endpoint, timeout * 20))
                thread.start()
            if raise_exceptions:
                raise TimeoutError(ex)
            else:
                log.warning(f'Walking geometry download ({lat}, {lng}, {t}) failed with timeout')
            return {'type': 'Polygon', 'coordinates': []}
        except Exception as ex:
            if raise_exceptions:
                raise
            else:
                log.warning(f'Walking geometry download ({lat}, {lng}, {t}) failed with exception: {repr(ex)}')
            log.error(f'Walking geometry download for ({lat}, {lng}, {t}) failed: {repr(ex)}')
            cur.execute('SELECT ST_AsGeoJSON(geometry), ST_Distance(geometry, ST_SetSRID(ST_MakePoint(%s, %s), 4326)) AS min_distance'
                    'FROM walking WHERE time = %s ORDER BY 2 LIMIT 1', (lat, lng, t))
            res = cur.fetchone()
            if res is None:
                return {'type': 'Polygon', 'coordinates': []}
            return json.loads(res[0])
Beispiel #14
0
def task_1_add_new_record_to_db(con: psycopg2.extensions.connection) -> None:
    """
    Add a record for a new customer from Singapore
    {
        'customer_name': 'Thomas',
        'contactname': 'David',
        'address': 'Some Address',
        'city': 'London',
        'postalcode': '774',
        'country': 'Singapore',
    }

    Args:
        con: psycopg connection

    """

    sql = """
    INSERT INTO customers (customername, contactname, address, city, postalcode, country)
    VALUES (%(customername)s, %(contactname)s, %(address)s, %(city)s, %(postalcode)s, %(country)s);
    """
    with con.cursor() as cursor:
        cursor.execute(
            sql, {
                'customername': 'Thomas',
                'contactname': 'David',
                'address': 'Some Address',
                'city': 'London',
                'postalcode': '774',
                'country': 'Singapore',
            })
def get_nrows(table_name: str, conn: psycopg2.extensions.connection) -> int:
    cur = conn.cursor()
    dml = f"""SELECT COUNT(*) FROM {table_name};"""
    cur.execute(dml)
    nrows = cur.fetchone()[0]
    cur.close()
    return nrows
Beispiel #16
0
def add_employee_to_db(connection: psycopg2.extensions.connection,
                       employee: Person):
    try:
        cursor = connection.cursor()
        v_low = ','.join(str(s) for s in employee.face_encoding[0:64])
        v_high = ','.join(str(s) for s in employee.face_encoding[64:128])
        name, surname = employee.name.split()
        query = f'''
                INSERT INTO employees(
                    first_name, last_name, image_path, access, vec_low,vec_high
                    ) VALUES (
                    '{name}',
                    '{surname}',
                    '{employee.image_path}',
                    '{employee.access}',
                    CUBE(array[{v_low}]),
                    CUBE(array[{v_high}])
                    ) ON CONFLICT DO NOTHING;
            '''
        cursor.execute(query)
        cursor.close()
        return True
    except (Exception, Error) as error:
        print('Ошибка при добавлении пользователя в базу:', error)
        connection.rollback()
        return False
def view_names(
    conn: psycopg2.extensions.connection,
) -> typing.Generator[str, None, None]:
    """Return a generator containing all view names in Materialize."""
    with conn.cursor() as cursor:
        cursor.execute("SHOW VIEWS")
        for row in cursor:
            yield row[0]
def source_names(
    conn: psycopg2.extensions.connection,
) -> typing.Generator[str, None, None]:
    """Return a generator containing all sources in Materialize."""
    with conn.cursor() as cursor:
        cursor.execute("SELECT source_name FROM mz_source_info")
        for row in cursor:
            yield row[0]
def get_column_values(table: str, column: str,
                      conn: psycopg2.extensions.connection) -> int:
    cur = conn.cursor()
    dml = f"""SELECT {column} FROM {table}"""
    cur.execute(dml)
    results = cur.fetchall()
    cur.close()
    return results
Beispiel #20
0
def insert_film(conn: psycopg2.extensions.connection, data: Dict):
    cur: psycopg2.extensions.cursor = conn.cursor()
    film_id: str
    country_id: str

    cur.execute("select id from countries where name=%s", (data['country'], ))
    country_id = cur.fetchone()
    if country_id is None:
        cur.execute("insert into countries (name) values (%s) returning id",
                    (data['country'], ))
        country_id = cur.fetchone()
    cur.connection.commit()

    cur.execute(
        "select id from films where title=%s and country=%s and release_date=%s",
        (data['title'], country_id, data['release_date']))
    film_id = cur.fetchone()
    if film_id is None:
        cur.execute(
            "insert into films (title, country, box_office, release_date) values (%s,%s,%s,%s) returning id",
            (data['title'], country_id, data['box_office'],
             data['release_date']))
        film_id = cur.fetchone()

    type_id: str
    person_id: str
    person2content_exist: bool

    for i in data['actors']:
        cur.execute("select id from person_types where type=%s", (i[1], ))
        type_id = cur.fetchone()
        if type_id is None:
            cur.execute(
                "insert into person_types (type) values (%s) returning id",
                (i[1], ))
            type_id = cur.fetchone()
        cur.connection.commit()

        cur.execute("select id from persons where fio=%s", (i[0], ))
        person_id = cur.fetchone()
        if person_id is None:
            cur.execute("insert into persons (fio) values (%s) returning id",
                        (i[0], ))
            person_id = cur.fetchone()
        cur.connection.commit()

        cur.execute(
            "select count(*) from persons2content where person_id=%s and film_id=%s and person_type=%s",
            (person_id, film_id, type_id))
        person2content_exist = cur.fetchone()[0] > 0
        if not person2content_exist:
            cur.execute(
                "insert into persons2content (person_id, film_id, person_type)  values (%s,%s,%s)",
                (person_id, film_id, type_id))
        cur.connection.commit()

    cur.connection.commit()
    cur.close()
Beispiel #21
0
def create_tables(config: list, connection: pg.extensions.connection):
    cur = connection.cursor()
    for table in config:
        name = table.get('name')
        schema = table.get('schema')
        ddl = f"""CREATE TABLE IF NOT EXISTS {name} ({schema})"""
        cur.execute(ddl)

    connection.commit()
def insert_new_row(table: str, columns: T.List[str], variables: T.List[T.Any],
                   conn: psycopg2.extensions.connection):
    columns_str = ', '.join(columns)
    placeholders_str = ('%s,' * len(columns))[:-1]
    dml = f"INSERT INTO {table} ({columns_str}) VALUES ({placeholders_str})"

    cur = conn.cursor()
    cur.execute(dml, variables)
    conn.commit()
    cur.close()
Beispiel #23
0
def delete_data(conn: psycopg2.extensions.connection,
                tables: Set[str]) -> None:
    """Deletes all the data from the specified tables."""
    if tables:
        with conn:
            with conn.cursor() as cursor:
                sql = psycopg2.sql.SQL('TRUNCATE {tables} CASCADE;').format(
                    tables=psycopg2.sql.SQL(', ').join(
                        psycopg2.sql.Identifier(table) for table in tables))
                cursor.execute(sql)
    def call(self, connection: psycopg2.extensions.connection) -> tuple:
        cursor = connection.cursor()
        cursor.execute(config.SQL_TO_OBTAIN_CATEGORIES)
        records = cursor.fetchall()
        categories = {}
        for record in records:
            categories[record[
                config.ID_COLUMN_POSITION_IN_CATEGORIES_TABLE]] = record

        cursor.close()
        return categories
Beispiel #25
0
def iterate_aliases_for_user(
        user_id: str, *,
        conn: psycopg2.extensions.connection) -> Iterator[str]:
    with conn.cursor() as cur:
        cur.execute(
            """
            SELECT user_id_to FROM renamed WHERE user_id_from = %s
        """, (user_id, ))
        if cur.fetchone() is not None:
            return
    while True:
        yield user_id
        with conn.cursor() as cur:
            cur.execute(
                """
                SELECT user_id_from FROM renamed WHERE user_id_to = %s
            """, (user_id, ))
            row = cur.fetchone()
        if row is None:
            break
        else:
            user_id, = row
Beispiel #26
0
def load_tables(config: list, connection: pg.extensions.connection):

    # iterate and load
    cur = connection.cursor()
    data_path = '../data/'

    for table in config:
        table_name = table.get('name')
        table_source = data_path + f"{table_name}.csv"
        with open(table_source, 'r') as f:
            next(f)
            cur.copy_expert(f"COPY {table_name} FROM STDIN CSV NULL AS ''", f)
        connection.commit()
Beispiel #27
0
def load_tables(config: list, connection: pg.extensions.connection):

    # iterate and load
    cur = connection.cursor()
    data_path = Path(os.environ['HOME'], 'Documents', 'data_science', 'ht_v2',
                     'data')

    for table in config:
        table_name = table.get('name')
        table_source = data_path.joinpath(f"{table_name}.csv")
        with open(table_source, 'r') as f:
            next(f)
            cur.copy_expert(f"COPY {table_name} FROM STDIN CSV NULL AS ''", f)
        connection.commit()
Beispiel #28
0
def get_curr_rev_id(conn: psycopg2.extensions.connection) -> Union[str, None]:
    curs = conn.cursor()
    try:
        curs.execute('SELECT ver FROM migro_ver')
        return curs.fetchone()[0]
    except psycopg2.ProgrammingError:
        conn.rollback()
        curs.execute(
            'CREATE TABLE migro_ver (ver VARCHAR(12) PRIMARY KEY)')
        conn.commit()
        return None
    except TypeError:
        return None
    finally:
        curs.close()
Beispiel #29
0
def task_5_delete_the_last_customer(
        con: psycopg2.extensions.connection) -> None:
    """
    Delete the last customer

    Args:
        con: psycopg connection
    """

    sql = """
    DELETE FROM customers
    WHERE customerid = (SELECT MAX(customerid) FROM customers)
    """
    with con.cursor() as cursor:
        cursor.execute(sql)
Beispiel #30
0
def load_shape_files(config: list, connection: pg.extensions.connection):
    # Iterate and load
    cur = connection.cursor()
    for table in config:
        table_name = table.get('name')
        table_files = [
            filename for filename in os.listdir(downloads_path)
            if filename.startswith(table_name)
        ]
        table_files = check_for_fhv_2017_type(name=table_name,
                                              files=table_files)
        print(table_files)
        if not table_files:
            print("""No files to upload to {} table.""".format(table_name))
        else:
            print("""Files to upload to {} table.""".format(table_name))