Example #1
0
def _add_tweet_story(db: DatabaseHandler,
                     topic: Dict[str, Any],
                     tweet: dict,
                     topic_fetch_urls: List[Dict[str, Any]]) -> dict:
    """Generate a story based on the given tweet, as returned by the twitter api."""
    screen_name = tweet['user']['screen_name']
    content = tweet['text']
    title = f"{screen_name}: {content}"
    tweet_date = tweet['created_at']
    url = f"https://twitter.com/{screen_name}/status/{tweet['id']}"

    story = generate_story(db=db, url=url, content=content, title=title, publish_date=tweet_date)
    add_to_topic_stories(db=db, story=story, topic=topic, link_mined=True)

    for topic_fetch_url in topic_fetch_urls:
        topic_fetch_url = _log_tweet_added(db, topic_fetch_url, story)
        try_update_topic_link_ref_stories_id(db, topic_fetch_url)

    urls = get_tweet_urls(tweet)
    for url in urls:
        if skip_self_linked_domain_url(db, topic['topics_id'], story['url'], url):
            log.debug("skipping self linked domain url...")
            continue

        topic_link = {
            'topics_id': topic['topics_id'],
            'stories_id': story['stories_id'],
            'url': url,
        }

        db.create('topic_links', topic_link)
        increment_domain_links(db, topic_link)

    return story
def extract_links_for_topic_story(
    db: DatabaseHandler,
    stories_id: int,
    topics_id: int,
    test_throw_exception: bool = False,
) -> None:
    """
    Extract links from a story and insert them into the topic_links table for the given topic.

    After the story is processed, set topic_stories.spidered to true for that story.  Calls _get_links_from_story()
    on each story.

    Almost all errors are caught by this function saved in topic_stories.link_mine_error.  In the case of an error
    topic_stories.link_mined is also set to true.

    Arguments:
    db - db handle
    story - story dict from db
    topic - topic dict from db

    Returns:
    None

    """
    story = db.require_by_id(table='stories', object_id=stories_id)
    topic = db.require_by_id(table='topics', object_id=topics_id)

    try:
        if test_throw_exception:
            raise McExtractLinksForTopicStoryTestException(
                "Testing whether errors get logged.")

        log.info("mining %s %s for topic %s .." %
                 (story['title'], story['url'], topic['name']))
        links = _get_links_from_story(db, story)

        for link in links:
            if skip_self_linked_domain_url(db, topic['topics_id'],
                                           story['url'], link):
                log.debug("skipping self linked domain url...")
                continue

            topic_link = {
                'topics_id': topic['topics_id'],
                'stories_id': story['stories_id'],
                'url': link
            }

            db.create('topic_links', topic_link)
            increment_domain_links(db, topic_link)

        link_mine_error = ''
    except Exception as ex:
        log.error(f"Link mining error: {ex}")
        link_mine_error = traceback.format_exc()

    db.query(
        """
        update topic_stories set link_mined = 't', link_mine_error = %(c)s
            where stories_id = %(a)s and topics_id = %(b)s
        """, {
            'a': story['stories_id'],
            'b': topic['topics_id'],
            'c': link_mine_error
        })