Beispiel #1
0
    def data_generator(conn: DBConnection):
        cursor = conn.get_named_cursor("compute_sentiment_cursor")
        cursor.itersize = args.batch_size

        # Get existing id_item from texts to avoid checking them again
        query = """
        SELECT id, title, text 
        FROM {table_items}
        WHERE NOT EXISTS (
        SELECT 
        FROM {table_texts}
        WHERE id = {table_texts}.id_item
        )
        ORDER BY id;
        """

        # query = "SELECT id, title, text FROM {table} ORDER BY id ASC;"
        query_sql = sql.SQL(query).format(
            table_items=sql.Identifier(TABLE_NAME_ITEMS),
            table_texts=sql.Identifier(TABLE_NAME_TEXTS),
        )
        cursor.execute(query_sql)

        while True:
            rows = cursor.fetchmany(args.batch_size)
            if not rows:
                break
            for row in rows:
                yield row
Beispiel #2
0
def get_column_values(
        conn: DBConnection,
        table_name: str,
        column_name: str,
        limit: Optional[int] = None,
        fetch_size: int = 10000,
        cursor_name: str = str(datetime.now()),
) -> Optional[Iterable[Any]]:
    """
    Get all values in a given column in a given table
    :param conn: DBConnection object
    :param table_name: name of the table
    :param column_name: name of the column
    :param limit: maximum number of values to return
    :param fetch_size: number of rows to fetch in one batch
    :param cursor_name: optional name of the cursor
    :return: generator of values (if any) in the column
    """
    logging.info("getting all values for column: %s", column_name)

    cursor = conn.get_named_cursor(str(cursor_name))
    cursor.itersize = fetch_size

    if limit:
        query = "SELECT {column} FROM {table} LIMIT %s;"
    else:
        query = "SELECT {column} FROM {table};"

    query_sql = sql.SQL(query).format(column=sql.Identifier(column_name),
                                      table=sql.Identifier(table_name))
    cursor.execute(query_sql, [limit] if limit else None)

    while True:
        rows = cursor.fetchmany(fetch_size)
        if not rows:
            break
        for row in rows:
            yield row[0]