Python DatabaseHandler.quote_date Beispiele

Programmiersprache: Python

Namespace / Paketname: mediawords.db

Klasse / Typ: DatabaseHandler

Methode / Funktion: quote_date

Beispiele auf hotexamples.com: 2

Python DatabaseHandler.quote_date - 2 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die mediawords.db.DatabaseHandler.quote_date, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

begin(30)

commit(30)

query(30)

create(30)

update_by_id(22)

require_by_id(21)

find_by_id(18)

find_or_create(9)

rollback(7)

in_transaction(4)

select(4)

delete_by_id(2)

copy_to(2)

quote_date(2)

quote_varchar(2)

insert(2)

quote_timestamp(1)

Beispiel #1

Datei anzeigen

def _insert_story_sentences(
    db: DatabaseHandler,
    story: dict,
    sentences: List[str],
    no_dedup_sentences: bool = False,
) -> List[str]:
    """Insert the story sentences into story_sentences, optionally skipping duplicate sentences by setting is_dup = 't'
    to the found duplicates that are already in the table.

    Returns list of sentences that were inserted into the table.
    """

    story = decode_object_from_bytes_if_needed(story)
    sentences = decode_object_from_bytes_if_needed(sentences)
    if isinstance(no_dedup_sentences, bytes):
        no_dedup_sentences = decode_object_from_bytes_if_needed(
            no_dedup_sentences)
    no_dedup_sentences = bool(int(no_dedup_sentences))

    stories_id = story['stories_id']
    media_id = story['media_id']

    # Story's publish date is the same for all the sentences, so we might as well pass it as a constant
    escaped_story_publish_date = db.quote_date(story['publish_date'])

    if len(sentences) == 0:
        log.warning(
            "Story sentences are empty for story {}.".format(stories_id))
        return []

    if no_dedup_sentences:
        log.debug(
            "Won't de-duplicate sentences for story {} because 'no_dedup_sentences' is set."
            .format(stories_id))

        dedup_sentences_statement = """

            -- Nothing to deduplicate, return empty list
            SELECT NULL
            WHERE 1 = 0

        """

    else:

        # Limit to unique sentences within a story
        sentences = _get_unique_sentences_in_story(sentences)

        # Set is_dup = 't' to sentences already in the table, return those to be later skipped on INSERT of new
        # sentences
        dedup_sentences_statement = """

            -- noinspection SqlResolve
            UPDATE story_sentences
            SET is_dup = 't'
            FROM new_sentences
            WHERE half_md5(story_sentences.sentence) = half_md5(new_sentences.sentence)
              AND week_start_date(story_sentences.publish_date::date) = week_start_date({})
              AND story_sentences.media_id = new_sentences.media_id
            RETURNING story_sentences.sentence

        """.format(escaped_story_publish_date)

    # Convert to list of dicts (values escaped for insertion into database)
    sentence_dicts = _get_db_escaped_story_sentence_dicts(db=db,
                                                          story=story,
                                                          sentences=sentences)

    # Ordered list of columns
    story_sentences_columns = sorted(sentence_dicts[0].keys())
    str_story_sentences_columns = ', '.join(story_sentences_columns)

    # List of sentences (in predefined column order)
    new_sentences_sql = []
    for sentence_dict in sentence_dicts:
        new_sentence_sql = []
        for column in story_sentences_columns:
            new_sentence_sql.append(sentence_dict[column])
        new_sentences_sql.append('({})'.format(', '.join(new_sentence_sql)))
    str_new_sentences_sql = "\n{}".format(",\n".join(new_sentences_sql))

    sql = """

        -- noinspection SqlType,SqlResolve
        WITH new_sentences ({str_story_sentences_columns}) AS (VALUES
            -- New sentences to potentially insert
            {str_new_sentences_sql}
        ),
        duplicate_sentences AS (
            -- Either a list of duplicate sentences already found in the table or an empty list if deduplication is
            -- disabled
            --
            -- The query assumes that there are no existing sentences for this story in the "story_sentences" table, so
            -- if you are reextracting a story, DELETE its sentences from "story_sentences" before running this query.
            {dedup_sentences_statement}
        )
        INSERT INTO story_sentences ({str_story_sentences_columns})
        SELECT {str_story_sentences_columns}
        FROM new_sentences
        WHERE sentence NOT IN (
            -- Skip the ones for which we've just set is_dup = 't'
            SELECT sentence
            FROM duplicate_sentences
        )
        RETURNING story_sentences.sentence

    """.format(
        str_story_sentences_columns=str_story_sentences_columns,
        str_new_sentences_sql=str_new_sentences_sql,
        dedup_sentences_statement=dedup_sentences_statement,
    )

    log.debug("Adding advisory lock on media ID {}...".format(media_id))
    db.query("SELECT pg_advisory_lock(%(media_id)s)", {'media_id': media_id})

    log.debug(
        "Running sentence insertion + deduplication query:\n{}".format(sql))

    # Insert sentences
    inserted_sentences = db.query(sql).flat()

    log.debug("Removing advisory lock on media ID {}...".format(media_id))
    db.query("SELECT pg_advisory_unlock(%(media_id)s)", {'media_id': media_id})

    return inserted_sentences

Beispiel #2

Datei anzeigen

Datei: story_vectors.py Projekt: vishalbelsare/mediacloud

def _insert_story_sentences(
    db: DatabaseHandler,
    story: dict,
    sentences: List[str],
    no_dedup_sentences: bool = False,
) -> List[str]:
    """Insert the story sentences into story_sentences, optionally skipping duplicate sentences by setting is_dup = 't'
    to the found duplicates that are already in the table.

    Returns list of sentences that were inserted into the table.
    """

    story = decode_object_from_bytes_if_needed(story)
    sentences = decode_object_from_bytes_if_needed(sentences)
    if isinstance(no_dedup_sentences, bytes):
        no_dedup_sentences = decode_object_from_bytes_if_needed(
            no_dedup_sentences)
    no_dedup_sentences = bool(int(no_dedup_sentences))

    stories_id = story['stories_id']
    media_id = story['media_id']

    # Story's publish date is the same for all the sentences, so we might as well pass it as a constant
    escaped_story_publish_date = db.quote_date(story['publish_date'])

    if len(sentences) == 0:
        log.warning(f"Story sentences are empty for story {stories_id}")
        return []

    if no_dedup_sentences:
        log.debug(
            f"Won't de-duplicate sentences for story {stories_id} because 'no_dedup_sentences' is set"
        )

        dedup_sentences_statement = """

            -- Nothing to deduplicate, return empty list
            SELECT NULL
            WHERE 1 = 0

        """

    else:

        # Limit to unique sentences within a story
        sentences = _get_unique_sentences_in_story(sentences)

        # Set is_dup = 't' to sentences already in the table, return those to be later skipped on INSERT of new
        # sentences
        dedup_sentences_statement = f"""

            -- noinspection SqlResolve
            UPDATE story_sentences
            SET is_dup = 't'
            FROM new_sentences
            WHERE public.half_md5(story_sentences.sentence) = public.half_md5(new_sentences.sentence)
              AND public.week_start_date(story_sentences.publish_date::date) = public.week_start_date({escaped_story_publish_date})
              AND story_sentences.media_id = new_sentences.media_id
            RETURNING story_sentences.sentence

        """

    # Convert to list of dicts (values escaped for insertion into database)
    sentence_dicts = _get_db_escaped_story_sentence_dicts(db=db,
                                                          story=story,
                                                          sentences=sentences)

    # Ordered list of columns
    story_sentences_columns = sorted(sentence_dicts[0].keys())
    str_story_sentences_columns = ', '.join(story_sentences_columns)

    # List of sentences (in predefined column order)
    new_sentences_sql = []
    for sentence_dict in sentence_dicts:
        new_sentence_sql = []
        for column in story_sentences_columns:
            new_sentence_sql.append(sentence_dict[column])
        new_sentences_sql.append(f"({', '.join(new_sentence_sql)})")
    str_new_sentences_sql = "\n{}".format(",\n".join(new_sentences_sql))

    # sometimes the big story_sentences query below deadlocks sticks in an idle state, holding this lock so we set a
    # short idle timeout for postgres just while we do this query. the timeout should not kick in while the
    # big story_sentences query is actively processing, so we can set it pretty short. we usually set this timeout
    # to 0 globally, but just to be safe store and reset the pre-existing value.
    idle_timeout = db.query(
        "SHOW idle_in_transaction_session_timeout").flat()[0]
    db.query("SET idle_in_transaction_session_timeout = 5000")

    db.query('SET citus.max_adaptive_executor_pool_size TO 64')

    sql = f"""
        -- noinspection SqlType,SqlResolve
        WITH new_sentences ({str_story_sentences_columns}) AS (VALUES
            -- New sentences to potentially insert
            {str_new_sentences_sql}
        )

        -- Either list of duplicate sentences already found in the table or return an empty list if deduplication is
        -- disabled
        --
        -- The query assumes that there are no existing sentences for this story in the "story_sentences" table, so
        -- if you are reextracting a story, DELETE its sentences from "story_sentences" before running this query.
        {dedup_sentences_statement}

    """
    log.debug(f"Running 'UPDATE story_sentences SET is_dup' query:\n{sql}")
    duplicate_sentences = db.query(sql).flat()

    duplicate_sentences = [
        db.quote_varchar(sentence) for sentence in duplicate_sentences
    ]

    sql = f"""
        -- noinspection SqlType,SqlResolve
        WITH new_sentences ({str_story_sentences_columns}) AS (VALUES
            {str_new_sentences_sql}
        ),
        duplicate_sentences AS (
            SELECT unnest(ARRAY[{', '.join(duplicate_sentences)}]::TEXT[]) AS sentence
        )
        INSERT INTO story_sentences (language, media_id, publish_date, sentence, sentence_number, stories_id)
        SELECT language, media_id, publish_date, sentence, sentence_number, stories_id
        FROM new_sentences
        WHERE sentence NOT IN (
            -- Skip the ones for which we've just set is_dup = 't'
            SELECT sentence
            FROM duplicate_sentences
        )
        RETURNING story_sentences.sentence
    """
    log.debug(f"Running 'INSERT INTO story_sentences' query:\n{sql}")
    inserted_sentences = db.query(sql).flat()

    db.query("SET idle_in_transaction_session_timeout = %(a)s",
             {'a': idle_timeout})

    return inserted_sentences