Exemple #1
0
def set_row_mode(connection: sqlite3.Connection,
                 row_factory: Callable) -> None:
    """Simple context manager to make a sqlite3 Connection temporarily return rows
    processed in a specific way and then switch back to using the row factory that it
    was previously using.

    Can be used just like this:

    >>> with set_row_mode(my_connection, UserRow.from_row):
    ...     return my_cursor.execute("select * from users;").fetchall()
    """
    prev_row_factory = connection.row_factory
    connection.row_factory = row_factory
    yield
    connection.row_factory = prev_row_factory
Exemple #2
0
    def _Get(self,
             colNamesAndValues: typing.List[typing.Tuple[str,
                                                         typing.Any]] = [],
             conn: sqlite3.Connection = None) -> typing.List[T]:
        """Gets items that match an AND based column and value filter.

        Typically a manager will have a series of more type specific Get methods to call.

        If you pass an empty list (default) it gets all of them.  Which is probably faster than listing them.

        @param colNamesAndValues: a list of tupples of column name and value pairs
        e.g. [(foo,1),(bar,100)] means: WHERE foo = 1 AND bar = 100
        @return: A list of items.  Empty list if none meet the criteria.

        """
        if conn == None:
            conn = self._GetDBConnection()
        conn.row_factory = sqlite3.Row
        cur = conn.cursor()
        self._GetRowsByMultipleAND(cur, colNamesAndValues)
        rows = cur.fetchall()
        ret = []
        for row in rows:
            ret.append(self._ItemFromRow(row))
        return ret
Exemple #3
0
    def _configure_db_connection(self, connection: sqlite3.Connection):
        '''
		Configure connection-level settings on the SQLite database.
		'''
        # set database-specific settings
        connection.isolation_level = None  # autocommit mode; transactions can be explicitly created with BEGIN/COMMIT statements
        connection.row_factory = sqlite3.Row  # return dictionaries instead of tuples from SELECT statements
def query(
        conn: Connection,
        table_name: str,
        fields: Union[List[str], Tuple[str]] = None,
        row_factory: Callable[[Cursor, Tuple], Any] = None,
        where: Union[OperatorSQLArgument,
                     ConditionSQLArgument] = None) -> List:
    """Query All Rows from Custom Table

  Args:
      conn (Connection): The Connection object
      table_name (str): Name of table
      fields (Union[List[str], Tuple[str]], optional): Defaults to None.
      where (Union[OperatorSQLArgument, ConditionSQLArgument], optional): Defaults to None.

  Returns:
      List: List of rows
  """

    sql = f"""SELECT {', '.join(map(str, fields)) if fields is not None and len(fields) > 0 else '*'} FROM {table_name}
  {f'WHERE {where.sql}' if where is not None else ''}
  """
    logging.info(sql)

    if row_factory is not None:
        conn.row_factory = row_factory

    try:
        cur = conn.cursor()
        cur.execute(sql)
        rows = cur.fetchall()
        return rows
    except Error as e:
        logging.error(e)
Exemple #5
0
def get_one_variant(
    conn: sqlite3.Connection,
    variant_id: int,
    with_annotations=False,
    with_samples=False,
):
    r"""Get the variant with the given id

    TODO: with_annotations, with_samples are quite useless and not used for now

    Args:
        conn (sqlite3.Connection): sqlite3 connection
        variant_id (int): Database id of the variant
        with_annotations (bool, optional): Add annotations items. Default is True
        with_samples (bool, optional): add samples items. Default is True

    Returns:
        dict: A variant item with all fields in "variants" table;
            \+ all fields of annotations table if `with_annotations` is True;
            \+ all fields of sample_has_variant associated to all samples if
            `with_samples` is True.
            Example:

            .. code-block:: python

                {
                    variant fields as keys...,
                    "annotations": dict of annotations fields as keys...,
                    "samples": dict of samples fields as keys...,
                }

    """
    conn.row_factory = sqlite3.Row
    # Cast sqlite3.Row object to dict because later, we use items() method.
    variant = dict(
        conn.execute(
            f"SELECT * FROM variants WHERE variants.id = {variant_id}"
        ).fetchone()
    )

    variant["annotations"] = []
    if with_annotations:
        variant["annotations"] = [
            dict(annotation) for annotation in conn.execute(
                f"SELECT * FROM annotations WHERE variant_id = {variant_id}"
            )
        ]

    variant["samples"] = []
    if with_samples:
        variant["samples"] = [
            dict(sample) for sample in conn.execute(
                f"""SELECT samples.name, sample_has_variant.* FROM sample_has_variant
                LEFT JOIN samples on samples.id = sample_has_variant.sample_id
                WHERE variant_id = {variant_id}"""
            )
        ]

    return variant
Exemple #6
0
def read(data: Connection, id: int) -> Contact:
    sql = """SELECT f_name,l_name,tel FROM contacts WHERE id = ?"""
    data.row_factory = lambda _, row: Contact(row[0], row[1], row[2])
    cursor = data.cursor()
    cursor.execute(sql, (id, ))  # on passe un tuple
    # row = cursor.fetchone()
    # return Contact(row[0],row[1],row[2])
    return cursor.fetchone()
 def load_id(conn: Connection, path_id):
     conn.row_factory = sqlite3.Row
     cursor = conn.cursor()
     cursor.execute(
         """
         SELECT path, url, hash, author, title, description, alias FROM items
         WHERE id=?;""", (path_id, ))
     return Item(_id=path_id, **cursor.fetchone())
Exemple #8
0
def get_metadatas(conn: sqlite3.Connection):
    """Return a dictionary of metadatas

    Returns:
        [dict]: matadata fieldname as keys
    """
    conn.row_factory = sqlite3.Row
    g = (dict(data) for data in conn.execute("SELECT key, value FROM metadatas"))
    return {data["key"]: data["value"] for data in g}
Exemple #9
0
def findall(data: Connection) -> list[[Contact]]:
    sql = """SELECT f_name,l_name, tel FROM contacts"""
    data.row_factory = lambda _, row: Contact(row[0], row[1], row[2])
    cursor = data.cursor()
    cursor.execute(sql)
    # rows = cursor.fetchall()
    # contacts = []
    # for row in rows:
    #    contacts.append(Contact(row[0],row[1],row[2]))
    # return contacts
    return cursor.fetchall()
Exemple #10
0
    def __init__(self, conn: sqlite3.Connection, es_loader: ESLoader):
        def dict_factory(cursor: sqlite3.Cursor, row: sqlite3.Row):
            d = {}
            for idx, col in enumerate(cursor.description):
                d[col[0]] = row[idx]
            return d

        conn.row_factory = dict_factory

        self.sqlite = conn
        self.loader = es_loader
Exemple #11
0
def update(data: Connection, old_contact: Contact, new_contact: Contact):
    sql_update = """ UPDATE contacts SET f_name=?, l_name=?, tel=? WHERE id = ?"""
    sql_query = """ SELECT id FROM contacts WHERE f_name=? AND l_name=? AND tel=?"""

    data.row_factory = lambda _, row: row[0]

    cursor = data.cursor()
    cursor.execute(sql_query, old_contact)
    old_id = cursor.fetchone()
    cursor.execute(
        sql_update,
        (new_contact.f_name, new_contact.l_name, new_contact.tel, old_id))
    data.commit()
Exemple #12
0
async def composers_tracks(composer_name: str,
                           db: sqlite3.Connection = Depends(get_db)):
    db.row_factory = lambda c, x: x[0]
    data = db.execute(
        "SELECT name FROM tracks WHERE composer = ? ORDER BY name;",
        (composer_name, )).fetchall()
    if not data:
        raise HTTPException(
            status_code=status.HTTP_404_NOT_FOUND,
            detail={"error": f"Composer: {composer_name} not found"},
        )
    else:
        return data
Exemple #13
0
def filter_processed_files(log_files:List[str], conn: sqlite3.Connection, key=apache_access_log_file_chronological_decode)->List[str]:
    processed_files = []
    try:
        with conn:
            conn.row_factory = sqlite3.Row
            c = conn.cursor()
            for row in c.execute("SELECT content_hash, path FROM processed_log_file"):
                processed_files.append(row[0])
    except sqlite3.OperationalError:
        LOGGER.warning(
            "Cannot read table processed_files in database so use all expanded files")
    log_files_hash = map(lambda f: (content_hash(f), f), log_files)
    effective_log_files = sorted(filter(lambda hf: hf[0] not in processed_files, log_files_hash), key=lambda hf: key(hf[1]))
    return list(effective_log_files)
Exemple #14
0
def get_all_results(connection: sqlite3.Connection) -> List[Dict]:
    """
    Returns all measurements.
    """
    query = """
    SELECT m.timestamp, d.download_rate, u.upload_rate, p.ping
    FROM measurements m
    JOIN downloads d ON m.measurement_id = d.measurement_id
    JOIN uploads u ON m.measurement_id = u.measurement_id
    JOIN pings p ON m.measurement_id = p.measurement_id
    """
    connection.row_factory = sqlite3.Row
    cursor = connection.execute(query)

    return [dict(row) for row in cursor.fetchall()]
Exemple #15
0
def get_selections(conn: sqlite3.Connection):
    """Get selections in "selections" table

    Args:
        conn (sqlite3.connection): Sqlite3 connection

    Yield:
        Dictionnaries with as many keys as there are columnsin the table.

    Example::
        {"id": ..., "name": ..., "count": ..., "query": ...}

    """
    conn.row_factory = sqlite3.Row
    return (dict(data) for data in conn.execute("SELECT * FROM selections"))
Exemple #16
0
def get_result_for_measurement_id(connection: sqlite3.Connection,
                                  measurement_id: int) -> Dict:
    """
    Returns the download, upload, and ping measurements for `measurement_id`.
    """
    query = """
    SELECT m.timestamp, d.download_rate, u.upload_rate, p.ping
    FROM measurements m
    JOIN downloads d ON m.measurement_id = d.measurement_id
    JOIN uploads u ON m.measurement_id = u.measurement_id
    JOIN pings p ON m.measurement_id = p.measurement_id
    WHERE m.measurement_id = ?;
    """
    connection.row_factory = sqlite3.Row
    cursor = connection.execute(query, [measurement_id])
    return dict(cursor.fetchone())
Exemple #17
0
def get_results_for_start_and_end_date(connection: sqlite3.Connection,
                                       start_datetime: datetime,
                                       end_datetime: datetime) -> List[Dict]:
    """
    Returns all measurements between `start_datetime` and `end_datetime`.
    """
    query = """
    SELECT m.timestamp, d.download_rate, u.upload_rate, p.ping
    FROM measurements m
    JOIN downloads d ON m.measurement_id = d.measurement_id
    JOIN uploads u ON m.measurement_id = u.measurement_id
    JOIN pings p ON m.measurement_id = p.measurement_id
    WHERE m.timestamp >= ? AND m.timestamp <= ?;
    """
    connection.row_factory = sqlite3.Row
    cursor = connection.execute(query, [start_datetime, end_datetime])

    return [dict(row) for row in cursor.fetchall()]
def update_videos(conn: sqlite3.Connection,
                  api_auth,
                  update_age_cutoff=86400,
                  verbosity=1):
    verbosity_level_1 = verbosity >= 1
    verbosity_level_2 = verbosity >= 2
    verbosity_level_3 = verbosity >= 3
    records_passed, updated, newly_inactive, newly_active, deleted = [0] * 5
    conn.row_factory = sqlite3.Row
    cur = conn.cursor()
    cur.execute(
        """SELECT id, last_updated FROM videos
                   WHERE title NOT IN (?, ?) AND NOT status = ?
                   ORDER BY last_updated;""",
        ('unknown', 'YouTube Music', 'deleted'))
    records = {k: v for k, v in cur.fetchall()}
    cur.execute("""SELECT * FROM channels WHERE title is not NULL;""")
    channels = {k: v for k, v in cur.fetchall()}
    cur.execute("""SELECT * FROM tags;""")
    existing_tags = {v: k for k, v in cur.fetchall()}
    cur.execute("""SELECT * FROM videos_tags""")
    existing_videos_tags = {}
    for video_tag_entry in cur.fetchall():
        existing_videos_tags.setdefault(video_tag_entry[0], [])
        existing_videos_tags[video_tag_entry[0]].append(video_tag_entry[1])
    cur.execute("""SELECT * FROM videos_topics""")
    existing_topics_tags = {}
    for video_topic_entry in cur.fetchall():
        existing_topics_tags.setdefault(video_topic_entry[0], [])
        existing_topics_tags[video_topic_entry[0]].append(video_topic_entry[1])
    cur.close()

    now = datetime.utcnow()  # for determining if the record is old enough
    dt_strp = datetime.strptime
    dt_format = '%Y-%m-%d %H:%M:%S'
    records_filtered_by_age = [
        k for k, v in records.items()
        if (now - dt_strp(v, dt_format)).total_seconds() > update_age_cutoff
    ]
    sub_percent, sub_percent_int = calculate_subpercentage(
        len(records_filtered_by_age))
    commit_interval = calculate_commit_interval(sub_percent_int)
    commit_interval_counter = 0
    del records

    if verbosity_level_1:
        logger.info(f'\nStarting records\' updating...\n' + '-' * 100)
    for record in records_filtered_by_age:
        records_passed += 1
        if records_passed % sub_percent_int == 0:
            yield ((records_passed // sub_percent) / 10, records_passed,
                   updated, newly_inactive, newly_active, deleted)
        record = execute_query(conn, 'SELECT * FROM videos WHERE id = ?',
                               (record, ))
        record = dict(record[0])
        video_id = record['id']

        for attempt in range(1, 6):
            api_response = youtube.get_video_info(video_id, api_auth)
            time.sleep(0.01 * attempt**attempt)
            if api_response:
                if api_response['items']:
                    api_video_data = wrangle_video_record(
                        api_response['items'])
                    if len(api_video_data) >= 7:
                        # a record must have at least 7 fields after
                        # going through wrangle_video_record, otherwise it's a
                        # record of a deleted video with no valid data
                        api_video_data.pop('published_at', None)
                        if 'channel_title' not in api_video_data:
                            # the video is somehow available through API
                            # (though some data is missing), but not on YouTube
                            pass
                        else:
                            if record['status'] == 'inactive':
                                record['status'] = 'active'
                                newly_active += 1
                                if verbosity_level_1:
                                    logger.info(
                                        f'{get_record_id_and_title(record)}, '
                                        f'is now active')
                        record.update(api_video_data)
                    else:
                        record['status'] = 'deleted'
                        deleted += 1
                        if verbosity_level_1:
                            logger.info(f'{get_record_id_and_title(record)}, '
                                        f'is now deleted from YT')
                else:
                    if record['status'] == 'active':
                        record['status'] = 'inactive'
                        newly_inactive += 1
                        if verbosity_level_1:
                            logger.info(f'{get_record_id_and_title(record)}, '
                                        f'is now inactive')
                record['last_updated'] = datetime.utcnow().replace(
                    microsecond=0)
                break
        else:
            continue

        if 'tags' in record:
            tags = record.pop('tags')
            add_tags_to_table_and_videos(conn, tags, video_id, existing_tags,
                                         existing_videos_tags,
                                         verbosity_level_3)
            # perhaps, the record should also be checked for tags that have
            # been removed from the updated version and have them removed from
            # the DB as well. However, keeping a fuller record, despite what
            # the video's uploader/author might think about its accuracy,
            # seems like a better option
        channel_id = record['channel_id']
        if 'channel_title' in record:
            channel_title = record.pop('channel_title')
            try:
                if channel_title != channels[channel_id]:
                    update_channel(conn, channel_id, channel_title,
                                   channels[channel_id], verbosity_level_1)
                    channels[channel_id] = channel_title
            except KeyError:
                """The channel now has a different ID... it's a thing.
                One possible reason for this is large channels, belonging to 
                large media companies, getting split off into smaller
                channels. That's what it looked like when I came across it.
                
                Only encountered this once in ~19k of my own records."""
                add_channel(conn, channel_id, channel_title, verbosity_level_2)
        else:
            # Less than a handful of videos were not available on YouTube, but
            # were available through API, with channel id, but no channel title.
            # In Takeout, these had no channel title or id, but had regular
            # title/id. Very strange.
            if channel_id not in channels:
                add_channel(conn, channel_id)

        if 'relevant_topic_ids' in record:
            topics_list = record.pop('relevant_topic_ids')
            if existing_topics_tags.get(video_id):
                for topic in topics_list:
                    if topic not in existing_topics_tags[video_id]:
                        add_topic_to_video(conn, topic, video_id,
                                           verbosity_level_2)

        if update_video(conn, record, verbosity_level_3):
            updated += 1

        commit_interval_counter += 1
        if commit_interval_counter == commit_interval:
            conn.commit()
            commit_interval_counter = 0

    conn.commit()
    execute_query(conn, 'VACUUM')
    conn.row_factory = None

    results = {
        'records_processed': records_passed,
        'records_updated': updated,
        'newly_inactive': newly_inactive,
        'newly_active': newly_active,
        'deleted_from_youtube': deleted
    }
    if verbosity_level_1:
        logger.info(json.dumps(results, indent=4))
        logger.info('\n' + '-' * 100 + f'\nUpdating finished')
Exemple #19
0
def search(data: Connection, part_name: str) -> list[[Contact]]:
    sql = "SELECT f_name, l_name,tel FROM contacts WHERE l_name LIKE ?"
    data.row_factory = lambda _, row: Contact(row[0], row[1], row[2])
    cursor = data.cursor()
    cursor.execute(sql, (part_name + "%", ))
    return cursor.fetchall()