예제 #1
0
    def setUp(self) -> None:
        self.db = connect_to_db()

        self.port = random_unused_port()

        self.__hs = HashServer(port=self.port, pages=self.hashserver_pages())
        self.__hs.start()

        self.media = create_test_story_stack(db=self.db,
                                             data={'A': {
                                                 'B': [1]
                                             }})
        self.feed = self.media['A']['feeds']['B']
예제 #2
0
def create_test_story_stack_for_indexing(db: DatabaseHandler, data: dict) -> dict:
    data = decode_object_from_bytes_if_needed(data)

    story_stack = create_test_story_stack(db=db, data=data)

    media = add_content_to_test_story_stack(db=db, story_stack=story_stack)

    test_stories = db.query("SELECT * FROM stories ORDER BY md5(stories_id::text)").hashes()

    # Add ancillary data so that it can be queried in Solr
    _add_story_tags_to_stories(db=db, stories=test_stories)
    _add_timespans_to_stories(db=db, stories=test_stories)

    return media
    def setUp(self):
        """Create test_story and test_download."""
        super().setUp()
        self.db = connect_to_db()

        media = create_test_story_stack(self.db, {'A': {'B': [1]}})

        story = media['A']['feeds']['B']['stories']['1']

        download = create_download_for_story(
            db=self.db,
            feed=media['A']['feeds']['B'],
            story=story,
        )

        store_content(self.db, download, '<p>foo</p>')

        self.test_story = story
        self.test_download = download
def test_find_dup_stories():
    db = connect_to_db()

    data = {
        'A': {
            'B': [1, 2, 3],
            'C': [4, 5, 6],
        },
        'D': {
            'E': [7, 8, 9],
        }
    }

    media = create_test_story_stack(db=db, data=data)
    for media_name, feeds in data.items():
        for feeds_name, stories in feeds.items():
            for num in stories:
                story = media[media_name]['feeds'][feeds_name]['stories'][str(num)]
                _test_story(db=db, story=story, num=num)
def test_get_and_store_story_stats():
    db = connect_to_db()

    media = create_test_story_stack(db=db, data={'A': {'B': [1, 2, 3]}})
    story = media['A']['feeds']['B']['stories']['1']

    # noinspection HttpUrlsUsage
    story['url'] = 'http://google.com'
    returned_stats = get_and_store_story_stats(db=db, story=story)

    stored_stats = db.query("""
        SELECT *
        FROM story_statistics
        WHERE stories_id = %(stories_id)s
    """, {'stories_id': story['stories_id']}).hash()

    assert stored_stats, "story_statistics row exists after initial insert."

    assert stored_stats.get('facebook_share_count', None) == returned_stats.share_count, "Share count."
    assert stored_stats.get('facebook_comment_count', None) == returned_stats.comment_count, "Comment count."
    assert stored_stats.get('facebook_reaction_count', None) == returned_stats.reaction_count, "Reaction count."
    assert stored_stats.get('facebook_api_error', None) is None, "Null URL share count error."

    story['url'] = 'boguschema://foobar'

    with pytest.raises(McFacebookSoftFailureException):
        get_and_store_story_stats(db=db, story=story)

    stored_stats = db.query("""
        SELECT *
        FROM story_statistics
        WHERE stories_id = %(stories_id)s
    """, {'stories_id': story['stories_id']}).hash()

    assert stored_stats, "story_statistics row exists after initial insert."

    assert stored_stats.get('facebook_share_count', None) is None, "Share count should be unset after error."
    assert stored_stats.get('facebook_comment_count', None) is None, "Comment count should be unset after error."
    assert stored_stats.get('facebook_reaction_count', None) is None, "Reaction count should be unset after error."
    assert stored_stats.get('facebook_api_error', None) is not None, "Facebook should have reported an error."
예제 #6
0
    def test_get_topic_url_variants(self):
        media = create_test_story_stack(db=self.db(),
                                        data={
                                            'A': {
                                                'B': [1, 2, 3],
                                                'C': [4, 5, 6],
                                            },
                                            'D': {
                                                'E': [7, 8, 9],
                                            }
                                        })

        story_1 = media['A']['feeds']['B']['stories']['1']
        story_2 = media['A']['feeds']['B']['stories']['2']
        story_3 = media['A']['feeds']['B']['stories']['3']
        story_4 = media['A']['feeds']['C']['stories']['4']

        self.db().query(
            """
            INSERT INTO topic_merged_stories_map (source_stories_id, target_stories_id)
            VALUES (%(source_stories_id)s, %(target_stories_id)s)
        """, {
                'source_stories_id': story_2['stories_id'],
                'target_stories_id': story_1['stories_id'],
            })

        self.db().query(
            """
            INSERT INTO topic_merged_stories_map (source_stories_id, target_stories_id)
            VALUES (%(source_stories_id)s, %(target_stories_id)s)
        """, {
                'source_stories_id': story_3['stories_id'],
                'target_stories_id': story_2['stories_id'],
            })

        self.db().create(
            table='tag_sets',
            insert_hash={'name': 'foo'},
        )

        topic = create_test_topic(db=self.db(), label='foo')

        self.db().create(table='topic_stories',
                         insert_hash={
                             'topics_id': topic['topics_id'],
                             'stories_id': story_4['stories_id'],
                         })

        self.db().create(table='topic_stories',
                         insert_hash={
                             'topics_id': topic['topics_id'],
                             'stories_id': story_1['stories_id'],
                         })

        self.db().create(table='topic_links',
                         insert_hash={
                             'topics_id': topic['topics_id'],
                             'stories_id': story_4['stories_id'],
                             'ref_stories_id': story_1['stories_id'],
                             'url': story_1['url'],
                             'redirect_url': story_1['url'] + "/redirect_url",
                         })

        self.db().create(table='topic_stories',
                         insert_hash={
                             'topics_id': topic['topics_id'],
                             'stories_id': story_2['stories_id'],
                         })

        self.db().create(table='topic_links',
                         insert_hash={
                             'topics_id': topic['topics_id'],
                             'stories_id': story_4['stories_id'],
                             'ref_stories_id': story_2['stories_id'],
                             'url': story_2['url'],
                             'redirect_url': story_2['url'] + "/redirect_url",
                         })

        self.db().create(table='topic_stories',
                         insert_hash={
                             'topics_id': topic['topics_id'],
                             'stories_id': story_3['stories_id']
                         })

        self.db().create(table='topic_links',
                         insert_hash={
                             'topics_id': topic['topics_id'],
                             'stories_id': story_4['stories_id'],
                             'ref_stories_id': story_3['stories_id'],
                             'url': story_3['url'] + '/alternate',
                         })

        test_url = story_1['url'] + self.CRUFT

        expected_urls = {
            story_1['url'],
            story_1['url'] + self.CRUFT,
            story_2['url'],
            story_1['url'] + "/redirect_url",
            story_2['url'] + "/redirect_url",
            story_3['url'],
            story_3['url'] + "/alternate",
        }

        url_variants = all_url_variants(db=self.db(), url=test_url)

        assert len(expected_urls) == len(url_variants)

        sorted_expected_urls = sorted(expected_urls)
        sorted_url_variants = sorted(url_variants)

        for i in range(len(sorted_expected_urls)):
            assert urls_are_equal(url1=sorted_expected_urls[i],
                                  url2=sorted_url_variants[i])
예제 #7
0
    def test_is_new(self):
        def _test_story(db: DatabaseHandler, story_: dict, num_: int) -> None:

            assert is_new(
                db=db,
                story=story_,
            ) is False, "{} identical".format(num_)

            assert is_new(
                db=db,
                story={
                    **story_,
                    **{
                        'media_id': story['media_id'] + 1,
                    }
                },
            ) is True, "{} media_id diff".format(num_)

            assert is_new(
                db=db,
                story={
                    **story_,
                    **{
                        'url': 'diff',
                        'guid': 'diff',
                    }
                },
            ) is False, "{} URL + GUID diff, title same".format(num_)

            assert is_new(
                db=db,
                story={
                    **story_,
                    **{
                        'url': 'diff',
                        'title': 'diff',
                    }
                },
            ) is False, "{} title + URL diff, GUID same".format(num_)

            assert is_new(
                db=db,
                story={
                    **story_,
                    **{
                        'guid': 'diff',
                        'title': 'diff',
                    }
                },
            ) is True, "{} title + GUID diff, URL same".format(num_)

            assert is_new(
                db=db,
                story={
                    **story_,
                    **{
                        'url':
                        'diff',
                        'guid':
                        'diff',
                        'publish_date':
                        increment_day(date=story['publish_date'], days=2),
                    }
                },
            ) is True, "{} date + 2 days".format(num_)

            assert is_new(
                db=db,
                story={
                    **story_,
                    **{
                        'url':
                        'diff',
                        'guid':
                        'diff',
                        'publish_date':
                        increment_day(date=story['publish_date'], days=-2),
                    }
                },
            ) is True, "{} date - 2 days".format(num_)

        data = {
            'A': {
                'B': [1, 2, 3],
                'C': [4, 5, 6],
            },
            'D': {
                'E': [7, 8, 9],
            }
        }

        media = create_test_story_stack(db=self.db(), data=data)
        for media_name, feeds in data.items():
            for feeds_name, stories in feeds.items():
                for num in stories:
                    story = media[media_name]['feeds'][feeds_name]['stories'][
                        str(num)]
                    _test_story(db=self.db(), story_=story, num_=num)
예제 #8
0
    def test_get_topic_url_variants(self):
        media = create_test_story_stack(
            db=self.db(),
            data={
                'A': {
                    'B': [1, 2, 3],
                    'C': [4, 5, 6],
                },
                'D': {
                    'E': [7, 8, 9],
                }
            }
        )

        story_1 = media['A']['feeds']['B']['stories']['1']
        story_2 = media['A']['feeds']['B']['stories']['2']
        story_3 = media['A']['feeds']['B']['stories']['3']
        story_4 = media['A']['feeds']['C']['stories']['4']

        self.db().query("""
            INSERT INTO topic_merged_stories_map (source_stories_id, target_stories_id)
            VALUES (%(source_stories_id)s, %(target_stories_id)s)
        """, {
            'source_stories_id': story_2['stories_id'],
            'target_stories_id': story_1['stories_id'],
        })

        self.db().query("""
            INSERT INTO topic_merged_stories_map (source_stories_id, target_stories_id)
            VALUES (%(source_stories_id)s, %(target_stories_id)s)
        """, {
            'source_stories_id': story_3['stories_id'],
            'target_stories_id': story_2['stories_id'],
        })

        self.db().create(
            table='tag_sets',
            insert_hash={'name': 'foo'},
        )

        topic = create_test_topic(db=self.db(), label='foo')

        self.db().create(
            table='topic_stories',
            insert_hash={
                'topics_id': topic['topics_id'],
                'stories_id': story_4['stories_id'],
            }
        )

        self.db().create(
            table='topic_stories',
            insert_hash={
                'topics_id': topic['topics_id'],
                'stories_id': story_1['stories_id'],
            }
        )

        self.db().create(
            table='topic_links',
            insert_hash={
                'topics_id': topic['topics_id'],
                'stories_id': story_4['stories_id'],
                'ref_stories_id': story_1['stories_id'],
                'url': story_1['url'],
                'redirect_url': story_1['url'] + "/redirect_url",
            }
        )

        self.db().create(
            table='topic_stories',
            insert_hash={
                'topics_id': topic['topics_id'],
                'stories_id': story_2['stories_id'],
            }
        )

        self.db().create(
            table='topic_links',
            insert_hash={
                'topics_id': topic['topics_id'],
                'stories_id': story_4['stories_id'],
                'ref_stories_id': story_2['stories_id'],
                'url': story_2['url'],
                'redirect_url': story_2['url'] + "/redirect_url",
            }
        )

        self.db().create(
            table='topic_stories',
            insert_hash={
                'topics_id': topic['topics_id'],
                'stories_id': story_3['stories_id']
            }
        )

        self.db().create(
            table='topic_links',
            insert_hash={
                'topics_id': topic['topics_id'],
                'stories_id': story_4['stories_id'],
                'ref_stories_id': story_3['stories_id'],
                'url': story_3['url'] + '/alternate',
            }
        )

        test_url = story_1['url'] + self.CRUFT

        expected_urls = {
            story_1['url'],
            story_1['url'] + self.CRUFT,
            story_2['url'],
            story_1['url'] + "/redirect_url",
            story_2['url'] + "/redirect_url",
            story_3['url'],
            story_3['url'] + "/alternate",
        }

        url_variants = all_url_variants(db=self.db(), url=test_url)

        assert len(expected_urls) == len(url_variants)

        sorted_expected_urls = sorted(expected_urls)
        sorted_url_variants = sorted(url_variants)

        for i in range(len(sorted_expected_urls)):
            assert urls_are_equal(url1=sorted_expected_urls[i], url2=sorted_url_variants[i])
예제 #9
0
    def test_find_dup_story(self):
        def _test_story(db: DatabaseHandler, story_: dict, num_: int) -> None:

            assert find_dup_story(
                db=db,
                story=story_,
            ) == story_, "{} identical".format(num_)

            assert find_dup_story(
                db=db,
                story={
                    **story_,
                    **{
                        'media_id': story['media_id'] + 1,
                    }
                },
            ) is None, "{} media_id diff".format(num_)

            assert find_dup_story(
                db=db,
                story={
                    **story_,
                    **{
                        'url': self.new_unique_str(),
                        'guid': self.new_unique_str()
                    }
                },
            ) == story_, "{} URL + GUID diff, title same".format(num_)

            assert find_dup_story(
                db=db,
                story={
                    **story_,
                    **{
                        'url': self.new_unique_str(),
                        'title': self.new_unique_str()
                    }
                },
            ) == story_, "{} title + URL diff, GUID same".format(num_)

            assert find_dup_story(
                db=db,
                story={
                    **story_,
                    **{
                        'guid': self.new_unique_str(),
                        'title': self.new_unique_str(),
                    }
                },
            ) == story_, "{} title + GUID diff, URL same".format(num_)

            assert find_dup_story(
                db=db,
                story={
                    **story_,
                    **{
                        'url': story_['url'].upper(),
                        'guid': self.new_unique_str(),
                        'title': self.new_unique_str(),
                    }
                },
            ) == story_, "{} title + GUID diff, nornmalized url same ".format(
                num_)

            assert find_dup_story(
                db=db,
                story={
                    **story_,
                    **{
                        'url':
                        self.new_unique_str(),
                        'guid':
                        self.new_unique_str(),
                        'publish_date':
                        increment_day(date=story['publish_date'], days=2),
                    }
                },
            ) is None, "{} date + 2 days".format(num_)

            assert find_dup_story(
                db=db,
                story={
                    **story_,
                    **{
                        'url':
                        self.new_unique_str(),
                        'guid':
                        self.new_unique_str(),
                        'publish_date':
                        increment_day(date=story['publish_date'], days=-2),
                    }
                },
            ) is None, "{} date - 2 days".format(num_)

            # verify that we can find dup story by the url or guid of a previously dup'd story
            dup_url = self.new_unique_str()
            dup_guid = self.new_unique_str()

            nondup_url = self.new_unique_str()
            nondup_guid = 'bogus unique guid'
            nondup_title = 'bogus unique title'

            dup_story = find_dup_story(db, {
                **story_,
                **{
                    'url': dup_url,
                    'guid': dup_guid
                }
            })
            assert dup_story == story_

            assert find_dup_story(db, {
                **story,
                **{
                    'url': dup_url,
                    'title': nondup_title
                }
            }) == story_
            assert find_dup_story(db, {
                **story,
                **{
                    'guid': dup_guid,
                    'title': nondup_title
                }
            }) == story_

            nondup_story = {
                **story,
                **{
                    'url': nondup_url,
                    'guid': nondup_guid,
                    'title': nondup_title
                }
            }
            assert find_dup_story(db, nondup_story) is None

        data = {
            'A': {
                'B': [1, 2, 3],
                'C': [4, 5, 6],
            },
            'D': {
                'E': [7, 8, 9],
            }
        }

        media = create_test_story_stack(db=self.db(), data=data)
        for media_name, feeds in data.items():
            for feeds_name, stories in feeds.items():
                for num in stories:
                    story = media[media_name]['feeds'][feeds_name]['stories'][
                        str(num)]
                    _test_story(db=self.db(), story_=story, num_=num)