def test_get_dup_story_groups():
    db = connect_to_db()

    topic = create_test_topic(db, 'dupstories')
    medium = create_test_medium(db, 'dupstories')
    feed = create_test_feed(db, 'dupstories', medium=medium)

    num_stories = 9
    for i in range(num_stories):
        story = create_test_story(db, "dupstories " + str(i), feed=feed)
        add_to_topic_stories(db, story, topic)
        modi = i % 3
        divi = i // 3
        if modi == 0:
            db.update_by_id('stories', story['stories_id'],
                            {'title': 'TITLE ' + str(divi)})
        elif modi == 1:
            db.update_by_id('stories', story['stories_id'],
                            {'title': 'title ' + str(divi)})
        else:
            db.update_by_id('stories', story['stories_id'],
                            {'Title': 'title ' + str(divi)})

    dup_story_groups = _get_dup_story_groups(db, topic)

    assert len(dup_story_groups) == 3

    for dsg in dup_story_groups:
        for story in dsg:
            assert dsg[0]['title'].lower() == story['title'].lower()
Exemple #2
0
def _add_user_story(db: DatabaseHandler, topic: dict, user: dict, topic_fetch_urls: list) -> dict:
    """Generate a story based on the given user, as returned by the twitter api."""
    content = f"{user['name']} ({user['screen_name']}): {user['description']}"
    title = f"{user['name']} ({user['screen_name']}) | Twitter"
    tweet_date = sql_now()
    url = f"https://twitter.com/{user['screen_name']}"

    story = generate_story(db=db, url=url, content=content, title=title, publish_date=tweet_date)
    add_to_topic_stories(db=db, story=story, topic=topic, link_mined=True)

    for topic_fetch_url in topic_fetch_urls:
        topic_fetch_url = _log_tweet_added(db, topic_fetch_url, story)
        try_update_topic_link_ref_stories_id(db, topic_fetch_url)

    # twitter user pages are undateable because there is never a consistent version of the page
    undateable_tag = _get_undateable_tag(db)

    stories_id = story['stories_id']
    tags_id = undateable_tag['tags_id']

    db.query("""
        INSERT INTO public.stories_tags_map (stories_id, tags_id)
        VALUES (%(stories_id)s, %(tags_id)s)
        ON CONFLICT (stories_id, tags_id) DO NOTHING
    """, {
        'stories_id': stories_id,
        'tags_id': tags_id,
    })

    return story
Exemple #3
0
def test_merge_dup_media_stories():
    """Test merge_dup_media_stories()."""
    db = connect_to_db()

    topic = create_test_topic(db, 'merge')
    old_medium = create_test_medium(db, 'merge from')
    new_medium = create_test_medium(db, 'merge to')
    feed = create_test_feed(db, 'merge', medium=old_medium)

    num_stories = 10
    for i in range(num_stories):
        story = create_test_story(db, "merge " + str(i), feed=feed)
        add_to_topic_stories(db, story, topic)

    db.update_by_id('media', old_medium['media_id'], {'dup_media_id': new_medium['media_id']})

    merge_dup_media_stories(db, topic)

    got_stories = db.query(
        "select s.* from stories s join topic_stories ts using (stories_id) where topics_id = %(a)s",
        {'a': topic['topics_id']}).hashes()

    assert len(got_stories) == num_stories

    for got_story in got_stories:
        assert got_story['media_id'] == new_medium['media_id']
Exemple #4
0
def _add_tweet_story(db: DatabaseHandler,
                     topic: Dict[str, Any],
                     tweet: dict,
                     topic_fetch_urls: List[Dict[str, Any]]) -> dict:
    """Generate a story based on the given tweet, as returned by the twitter api."""
    screen_name = tweet['user']['screen_name']
    content = tweet['text']
    title = f"{screen_name}: {content}"
    tweet_date = tweet['created_at']
    url = f"https://twitter.com/{screen_name}/status/{tweet['id']}"

    story = generate_story(db=db, url=url, content=content, title=title, publish_date=tweet_date)
    add_to_topic_stories(db=db, story=story, topic=topic, link_mined=True)

    for topic_fetch_url in topic_fetch_urls:
        topic_fetch_url = _log_tweet_added(db, topic_fetch_url, story)
        try_update_topic_link_ref_stories_id(db, topic_fetch_url)

    urls = get_tweet_urls(tweet)
    for url in urls:
        if skip_self_linked_domain_url(db, topic['topics_id'], story['url'], url):
            log.debug("skipping self linked domain url...")
            continue

        topic_link = {
            'topics_id': topic['topics_id'],
            'stories_id': story['stories_id'],
            'url': url,
        }

        db.create('topic_links', topic_link)
        increment_domain_links(db, topic_link)

    return story
def test_copy_story_to_new_medium_with_download_error():
    """Test copy_story_to_new_medium with an associated download error."""
    db = connect_to_db()

    topic = create_test_topic(db, 'copy foo')

    new_medium = create_test_medium(db, 'copy new')

    old_medium = create_test_medium(db, 'copy old')
    old_feed = create_test_feed(db=db, label='copy old', medium=old_medium)
    old_story = create_test_story(db=db, label='copy old', feed=old_feed)

    add_content_to_test_story(db, old_story, old_feed)

    db.query("update downloads set state = 'error' where stories_id = %(a)s", {'a': old_story['stories_id']})

    add_to_topic_stories(db, old_story, topic)

    new_story = copy_story_to_new_medium(db, topic, old_story, new_medium)

    assert db.find_by_id('stories', new_story['stories_id']) is not None

    new_download = db.query(
        "select * from downloads where stories_id = %(a)s",
        {'a': new_story['stories_id']}).hash()
    assert new_download is not None
    assert new_download['state'] == 'error'
Exemple #6
0
def test_add_missing_normalized_title_hashes():
    db = connect_to_db()

    topic = create_test_topic(db, 'titles')
    medium = create_test_medium(db, 'titles')
    feed = create_test_feed(db, 'titles', medium=medium)

    num_stories = 10
    for i in range(num_stories):
        story = create_test_story(db, "titles " + str(i), feed=feed)
        add_to_topic_stories(db, story, topic)

    # disable trigger so that we can actually set normalized_title_hash to null
    db.query(
        "SELECT run_on_shards_or_raise('stories', %(command)s)",
        {
            'command': """
                -- noinspection SqlResolveForFile @ trigger/"stories_add_normalized_title"
                BEGIN;
                LOCK TABLE pg_proc IN ACCESS EXCLUSIVE MODE;
                ALTER TABLE %s DISABLE TRIGGER stories_add_normalized_title;
                COMMIT;
            """,
        }
    )

    db.query("""
        WITH all_story_ids AS (
            SELECT stories_id
            FROM stories
        )
        UPDATE stories SET
            normalized_title_hash = NULL
        WHERE stories_id IN (
            SELECT stories_id
            FROM all_story_ids
        )
    """)

    db.query(
        "SELECT run_on_shards_or_raise('stories', %(command)s)",
        {
            'command': """
                -- noinspection SqlResolveForFile @ trigger/"stories_add_normalized_title"
                BEGIN;
                LOCK TABLE pg_proc IN ACCESS EXCLUSIVE MODE;
                ALTER TABLE %s ENABLE TRIGGER stories_add_normalized_title;
                COMMIT;
            """,
        }
    )

    assert __count_null_title_stories(db=db, topic=topic) == num_stories

    _add_missing_normalized_title_hashes(db, topic)

    assert __count_null_title_stories(db=db, topic=topic) == 0
def test_copy_story_to_new_medium():
    """Test copy_story_to_new_medium."""
    db = connect_to_db()

    topic = create_test_topic(db, 'copy foo')

    new_medium = create_test_medium(db, 'copy new')

    old_medium = create_test_medium(db, 'copy old')
    old_feed = create_test_feed(db=db, label='copy old', medium=old_medium)
    old_story = create_test_story(db=db, label='copy old', feed=old_feed)

    add_content_to_test_story(db, old_story, old_feed)

    add_to_topic_stories(db, old_story, topic)

    new_story = copy_story_to_new_medium(db, topic, old_story, new_medium)

    assert db.find_by_id('stories', new_story['stories_id']) is not None

    for field in 'title url guid publish_date'.split():
        assert old_story[field] == new_story[field]

    topic_story_exists = db.query("""
        SELECT *
        FROM topic_stories
        WHERE
            topics_id = %(topics_id)s AND
            stories_id = %(stories_id)s
    """, {
        'topics_id': topic['topics_id'],
        'stories_id': new_story['stories_id'],
    }).hash()
    assert topic_story_exists is not None

    new_download = db.query("""
        SELECT *
        FROM downloads
        WHERE stories_id = %(stories_id)s
    """, {
        'stories_id': new_story['stories_id'],
    }).hash()
    assert new_download is not None

    content = fetch_content(db, new_download)
    assert content is not None and len(content) > 0

    story_sentences = db.query("""
        SELECT *
        FROM story_sentences
        WHERE stories_id = %(stories_id)s
    """, {
        'stories_id': new_story['stories_id'],
    }).hashes()
    assert len(story_sentences) > 0
Exemple #8
0
def test_merge_dup_stories():
    """Test merge_dup_stories()."""
    db = connect_to_db()

    topic = create_test_topic(db, 'merge')
    medium = create_test_medium(db, 'merge')
    feed = create_test_feed(db, 'merge', medium=medium)

    num_stories = 10
    stories = []
    for i in range(num_stories):
        story = create_test_story(db, "merge " + str(i), feed=feed)
        add_to_topic_stories(db, story, topic)
        stories.append(story)
        for j in range(i):
            # noinspection SqlInsertValues
            db.query(
                """
                INSERT INTO story_sentences (
                    stories_id,
                    sentence_number,
                    sentence,
                    media_id,
                    publish_date
                )
                    SELECT
                        stories_id,
                        %(sentence_number)s AS sentence_number,
                        'foo bar' AS sentence,
                        media_id,
                        publish_date
                    FROM stories
                    WHERE stories_id = %(stories_id)s
            """, {
                    'stories_id': story['stories_id'],
                    'sentence_number': j,
                })

    _merge_dup_stories(db, topic, stories)

    stories_ids = [s['stories_id'] for s in stories]
    merged_stories = db.query(
        """
        SELECT stories_id
        FROM topic_stories
        WHERE
            topics_id = %(topics_id)s AND
            stories_id = ANY(%(stories_ids)s)
    """, {
            'topics_id': topic['topics_id'],
            'stories_ids': stories_ids,
        }).flat()

    assert merged_stories == [stories_ids[-1]]
def test_find_and_merge_dup_stories():
    db = connect_to_db()

    topic = create_test_topic(db, 'dupstories')
    medium = create_test_medium(db, 'dupstories')
    feed = create_test_feed(db, 'dupstories', medium=medium)

    num_stories = 9
    for i in range(num_stories):
        story = create_test_story(db, "dupstories " + str(i), feed=feed)
        add_to_topic_stories(db, story, topic)
        modi = i % 3
        divi = i // 3
        if modi == 0:
            db.update_by_id('stories', story['stories_id'],
                            {'title': 'TITLE ' + str(divi)})
        elif modi == 1:
            db.update_by_id('stories', story['stories_id'],
                            {'title': 'title ' + str(divi)})
        else:
            db.update_by_id('stories', story['stories_id'],
                            {'Title': 'title ' + str(divi)})

    find_and_merge_dup_stories(db, topic)

    num_topic_stories = db.query(
        """
        SELECT COUNT(*)
        FROM topic_stories
        WHERE topics_id = %(topics_id)s
    """, {
            'topics_id': topic['topics_id'],
        }).flat()[0]

    assert num_topic_stories == 3

    num_distinct_titles = db.query(
        """
        SELECT COUNT(DISTINCT normalized_title_hash)
        FROM snap.live_stories
        WHERE topics_id = %(topics_id)s
    """, {
            'topics_id': topic['topics_id'],
        }).flat()[0]

    assert num_distinct_titles == 3
def test_merge_dup_media_stories():
    """Test merge_dup_media_stories()."""
    db = connect_to_db()

    topic = create_test_topic(db, 'merge')
    old_medium = create_test_medium(db, 'merge from')
    new_medium = create_test_medium(db, 'merge to')
    feed = create_test_feed(db, 'merge', medium=old_medium)

    num_stories = 10
    for i in range(num_stories):
        story = create_test_story(db, "merge " + str(i), feed=feed)
        add_to_topic_stories(db, story, topic)

    db.update_by_id('media', old_medium['media_id'],
                    {'dup_media_id': new_medium['media_id']})

    merge_dup_media_stories(db, topic)

    got_stories = db.query(
        """
        WITH found_topic_stories AS (
            SELECT stories_id
            FROM topic_stories
            WHERE topics_id = %(topics_id)s
        )

        SELECT *
        FROM stories
        WHERE stories_id IN (
            SELECT stories_id
            FROM found_topic_stories
        )
        """, {
            'topics_id': topic['topics_id']
        }).hashes()

    assert len(got_stories) == num_stories

    for got_story in got_stories:
        assert got_story['media_id'] == new_medium['media_id']
Exemple #11
0
def _add_user_story(db: DatabaseHandler, topic: dict, user: dict, topic_fetch_urls: list) -> dict:
    """Generate a story based on the given user, as returned by the twitter api."""
    content = '%s (%s): %s' % (user['name'], user['screen_name'], user['description'])
    title = '%s (%s) | Twitter' % (user['name'], user['screen_name'])
    tweet_date = sql_now()
    url = 'https://twitter.com/%s' % user['screen_name']

    story = generate_story(db=db, url=url, content=content, title=title, publish_date=tweet_date)
    add_to_topic_stories(db=db, story=story, topic=topic, link_mined=True)

    for topic_fetch_url in topic_fetch_urls:
        topic_fetch_url = _log_tweet_added(db, topic_fetch_url, story)
        try_update_topic_link_ref_stories_id(db, topic_fetch_url)

    # twitter user pages are undateable because there is never a consistent version of the page
    undateable_tag = _get_undateable_tag(db)
    db.query(
        "insert into stories_tags_map (stories_id, tags_id) values (%(a)s, %(b)s)",
        {'a': story['stories_id'], 'b': undateable_tag['tags_id']})

    return story
Exemple #12
0
def test_find_and_merge_dup_stories():
    db = connect_to_db()

    topic = create_test_topic(db, 'dupstories')
    medium = create_test_medium(db, 'dupstories')
    feed = create_test_feed(db, 'dupstories', medium=medium)

    num_stories = 9
    for i in range(num_stories):
        story = create_test_story(db, "dupstories " + str(i), feed=feed)
        add_to_topic_stories(db, story, topic)
        modi = i % 3
        divi = i // 3
        if modi == 0:
            db.update_by_id('stories', story['stories_id'],
                            {'title': 'TITLE ' + str(divi)})
        elif modi == 1:
            db.update_by_id('stories', story['stories_id'],
                            {'title': 'title ' + str(divi)})
        else:
            db.update_by_id('stories', story['stories_id'],
                            {'Title': 'title ' + str(divi)})

    find_and_merge_dup_stories(db, topic)

    num_topic_stories = db.query(
        "select count(*) from topic_stories where topics_id = %(a)s", {
            'a': topic['topics_id']
        }).flat()[0]

    assert num_topic_stories == 3

    num_distinct_titles = db.query(
        "select count(distinct normalized_title_hash) from snap.live_stories where topics_id = %(a)s",
        {
            'a': topic['topics_id']
        }).flat()[0]

    assert num_distinct_titles == 3
def test_merge_dup_stories():
    """Test merge_dup_stories()."""
    db = connect_to_db()

    topic = create_test_topic(db, 'merge')
    medium = create_test_medium(db, 'merge')
    feed = create_test_feed(db, 'merge', medium=medium)

    num_stories = 10
    stories = []
    for i in range(num_stories):
        story = create_test_story(db, "merge " + str(i), feed=feed)
        add_to_topic_stories(db, story, topic)
        stories.append(story)
        for j in range(i):
            # noinspection SqlInsertValues
            db.query(
                """
                insert into story_sentences (stories_id, sentence_number, sentence, media_id, publish_date)
                    select stories_id, %(b)s, 'foo bar', media_id, publish_date
                        from stories where stories_id = %(a)s
                """, {
                    'a': story['stories_id'],
                    'b': j
                })

    _merge_dup_stories(db, topic, stories)

    stories_ids = [s['stories_id'] for s in stories]
    merged_stories = db.query(
        "select stories_id from topic_stories where topics_id = %(a)s and stories_id = any(%(b)s)",
        {
            'a': topic['topics_id'],
            'b': stories_ids
        }).flat()

    assert merged_stories == [stories_ids[-1]]
def test_add_missing_normalized_title_hashes():
    db = connect_to_db()

    topic = create_test_topic(db, 'titles')
    medium = create_test_medium(db, 'titles')
    feed = create_test_feed(db, 'titles', medium=medium)

    num_stories = 10
    for i in range(num_stories):
        story = create_test_story(db, "titles " + str(i), feed=feed)
        add_to_topic_stories(db, story, topic)

    # disable trigger so that we can actually set normalized_title_hash to null
    db.query(
        "alter table stories disable trigger stories_add_normalized_title")
    # noinspection SqlWithoutWhere
    db.query("update stories set normalized_title_hash = null")
    db.query("alter table stories enable trigger stories_add_normalized_title")

    assert __count_null_title_stories(db=db, topic=topic) == num_stories

    _add_missing_normalized_title_hashes(db, topic)

    assert __count_null_title_stories(db=db, topic=topic) == 0
def test_merge_dup_story():
    """Test _merge_dup_story()."""
    db = connect_to_db()

    topic = create_test_topic(db, 'merge')
    medium = create_test_medium(db, 'merge')
    feed = create_test_feed(db, 'merge', medium=medium)

    old_story = create_test_story(db=db, label='merge old', feed=feed)
    new_story = create_test_story(db=db, label='merge new', feed=feed)

    linked_story = create_test_story(db=db, label='linked', feed=feed)
    linking_story = create_test_story(db=db, label='linking', feed=feed)

    for story in (old_story, new_story, linked_story, linking_story):
        add_to_topic_stories(db, story, topic)

    db.create(
        'topic_links', {
            'topics_id': topic['topics_id'],
            'stories_id': old_story['stories_id'],
            'url': old_story['url'],
            'ref_stories_id': linked_story['stories_id'],
        })
    db.create(
        'topic_links', {
            'topics_id': topic['topics_id'],
            'stories_id': linking_story['stories_id'],
            'url': old_story['url'],
            'ref_stories_id': old_story['stories_id'],
        })
    db.create('topic_seed_urls', {
        'topics_id': topic['topics_id'],
        'stories_id': old_story['stories_id'],
    })

    _merge_dup_story(db, topic, old_story, new_story)

    old_topic_links = db.query(
        """
        SELECT *
        FROM topic_links
        WHERE
            topics_id = %(topics_id)s AND
            %(stories_id)s IN (stories_id, ref_stories_id)
    """, {
            'topics_id': topic['topics_id'],
            'stories_id': old_story['stories_id'],
        }).hashes()
    assert len(old_topic_links) == 0

    new_topic_links_linked = db.query(
        """
        SELECT *
        FROM topic_links
        WHERE
            topics_id = %(topics_id)s AND
            stories_id = %(stories_id)s AND
            ref_stories_id = %(ref_stories_id)s
    """, {
            'topics_id': topic['topics_id'],
            'stories_id': new_story['stories_id'],
            'ref_stories_id': linked_story['stories_id'],
        }).hashes()
    assert len(new_topic_links_linked) == 1

    new_topic_links_linking = db.query(
        """
        SELECT *
        FROM topic_links
        WHERE
            topics_id = %(topics_id)s AND
            ref_stories_id = %(ref_stories_id)s AND
            stories_id = %(stories_id)s
    """, {
            'topics_id': topic['topics_id'],
            'ref_stories_id': new_story['stories_id'],
            'stories_id': linking_story['stories_id'],
        }).hashes()
    assert len(new_topic_links_linking) == 1

    old_topic_stories = db.query(
        """
        SELECT *
        FROM topic_stories
        WHERE
            topics_id = %(topics_id)s AND
            stories_id = %(stories_id)s
    """, {
            'topics_id': topic['topics_id'],
            'stories_id': old_story['stories_id'],
        }).hashes()
    assert len(old_topic_stories) == 0

    topic_merged_stories_maps = db.query(
        """
        SELECT *
        FROM topic_merged_stories_map
        WHERE
            target_stories_id = %(target_stories_id)s AND
            source_stories_id = %(source_stories_id)s
    """, {
            'target_stories_id': new_story['stories_id'],
            'source_stories_id': old_story['stories_id'],
        }).hashes()
    assert len(topic_merged_stories_maps) == 1
def test_merge_dup_story():
    """Test _merge_dup_story()."""
    db = connect_to_db()

    topic = create_test_topic(db, 'merge')
    medium = create_test_medium(db, 'merge')
    feed = create_test_feed(db, 'merge', medium=medium)

    old_story = create_test_story(db=db, label='merge old', feed=feed)
    new_story = create_test_story(db=db, label='merge new', feed=feed)

    linked_story = create_test_story(db=db, label='linked', feed=feed)
    linking_story = create_test_story(db=db, label='linking', feed=feed)

    for story in (old_story, new_story, linked_story, linking_story):
        add_to_topic_stories(db, story, topic)

    db.create(
        'topic_links', {
            'topics_id': topic['topics_id'],
            'stories_id': old_story['stories_id'],
            'url': old_story['url'],
            'ref_stories_id': linked_story['stories_id']
        })
    db.create(
        'topic_links', {
            'topics_id': topic['topics_id'],
            'stories_id': linking_story['stories_id'],
            'url': old_story['url'],
            'ref_stories_id': old_story['stories_id']
        })
    db.create('topic_seed_urls', {
        'topics_id': topic['topics_id'],
        'stories_id': old_story['stories_id']
    })

    _merge_dup_story(db, topic, old_story, new_story)

    old_topic_links = db.query(
        "select * from topic_links where topics_id = %(a)s and %(b)s in ( stories_id, ref_stories_id )",
        {
            'a': topic['topics_id'],
            'b': old_story['stories_id']
        }).hashes()
    assert len(old_topic_links) == 0

    new_topic_links_linked = db.query(
        "select * from topic_links where topics_id = %(a)s and stories_id = %(b)s and ref_stories_id = %(c)s",
        {
            'a': topic['topics_id'],
            'b': new_story['stories_id'],
            'c': linked_story['stories_id']
        }).hashes()
    assert len(new_topic_links_linked) == 1

    new_topic_links_linking = db.query(
        "select * from topic_links where topics_id = %(a)s and ref_stories_id = %(b)s and stories_id = %(c)s",
        {
            'a': topic['topics_id'],
            'b': new_story['stories_id'],
            'c': linking_story['stories_id']
        }).hashes()
    assert len(new_topic_links_linking) == 1

    old_topic_stories = db.query(
        "select * from topic_stories where topics_id = %(a)s and stories_id = %(b)s",
        {
            'a': topic['topics_id'],
            'b': old_story['stories_id']
        }).hashes()
    assert len(old_topic_stories) == 0

    topic_merged_stories_maps = db.query(
        "select * from topic_merged_stories_map where target_stories_id = %(a)s and source_stories_id = %(b)s",
        {
            'a': new_story['stories_id'],
            'b': old_story['stories_id']
        }).hashes()
    assert len(topic_merged_stories_maps) == 1
Exemple #17
0
def fetch_topic_url(db: DatabaseHandler,
                    topic_fetch_urls_id: int,
                    domain_timeout: Optional[int] = None) -> None:
    """Fetch a url for a topic and create a media cloud story from it if its content matches the topic pattern.

    Update the following fields in the topic_fetch_urls row:

    code - the status code of the http response
    fetch_date - the current time
    state - one of the FETCH_STATE_* constatnts
    message - message related to the state (eg. HTTP message for FETCH_STATE_REQUEST_FAILED)
    stories_id - the id of the story generated from the fetched content, or null if no story created'

    If topic_links_id is present in the topic_fetch_url and if a story was added or matched, assign the resulting
    topic_fetch_urls.stories_id to topic_links.ref_stories_id.

    If the state is anything but FETCH_STATE_PENDING or FETCH_STATE_REQUEUED, return without doing anything.

    If there is content for the corresponding url and topics_id in topic_seed_urls, use that content instead of
    fetching the url.

    This function catches almost all possible exceptions and stashes them topic_fetch_urls along with a state of
    FETCH_STATE_PYTHON_ERROR

    Arguments:
    db - db handle
    topic_fetch_urls_id - id of topic_fetch_urls row
    domain_timeout - pass through to fetch_link

    Returns:
    None

    """
    topic_fetch_url = db.require_by_id('topic_fetch_urls', topic_fetch_urls_id)

    try:
        log.info("fetch_link: %s" % topic_fetch_url['url'])
        _try_fetch_topic_url(db=db,
                             topic_fetch_url=topic_fetch_url,
                             domain_timeout=domain_timeout)

        if topic_fetch_url['topic_links_id'] and topic_fetch_url['stories_id']:
            try_update_topic_link_ref_stories_id(db, topic_fetch_url)

        if 'stories_id' in topic_fetch_url and topic_fetch_url[
                'stories_id'] is not None:
            story = db.require_by_id('stories', topic_fetch_url['stories_id'])
            topic = db.require_by_id('topics', topic_fetch_url['topics_id'])
            redirect_url = topic_fetch_url['url']
            assume_match = topic_fetch_url['assume_match']
            if _is_not_topic_story(db, topic_fetch_url):
                if _story_matches_topic(db,
                                        story,
                                        topic,
                                        redirect_url=redirect_url,
                                        assume_match=assume_match):
                    add_to_topic_stories(db, story, topic)

            # add redirect_url as a lookup url for the story, if it is different from the story url
            if not redirect_url == topic_fetch_url['url']:
                insert_story_urls(db, story, redirect_url)

        if topic_fetch_url['topic_links_id'] and topic_fetch_url['stories_id']:
            try_update_topic_link_ref_stories_id(db, topic_fetch_url)

    except McThrottledDomainException as ex:
        raise ex

    except Exception as ex:
        log.error("Error while fetching URL {}: {}".format(
            topic_fetch_url, ex))

        topic_fetch_url['state'] = FETCH_STATE_PYTHON_ERROR
        topic_fetch_url['message'] = traceback.format_exc()
        log.warning('topic_fetch_url %s failed: %s' %
                    (topic_fetch_url['url'], topic_fetch_url['message']))

    db.update_by_id('topic_fetch_urls', topic_fetch_url['topic_fetch_urls_id'],
                    topic_fetch_url)