def setUp(self) -> None: self.db = connect_to_db() self.port = random_unused_port() self.__hs = HashServer(port=self.port, pages=self.hashserver_pages()) self.__hs.start() self.media = create_test_story_stack(db=self.db, data={'A': { 'B': [1] }}) self.feed = self.media['A']['feeds']['B']
def create_test_story_stack_for_indexing(db: DatabaseHandler, data: dict) -> dict: data = decode_object_from_bytes_if_needed(data) story_stack = create_test_story_stack(db=db, data=data) media = add_content_to_test_story_stack(db=db, story_stack=story_stack) test_stories = db.query("SELECT * FROM stories ORDER BY md5(stories_id::text)").hashes() # Add ancillary data so that it can be queried in Solr _add_story_tags_to_stories(db=db, stories=test_stories) _add_timespans_to_stories(db=db, stories=test_stories) return media
def setUp(self): """Create test_story and test_download.""" super().setUp() self.db = connect_to_db() media = create_test_story_stack(self.db, {'A': {'B': [1]}}) story = media['A']['feeds']['B']['stories']['1'] download = create_download_for_story( db=self.db, feed=media['A']['feeds']['B'], story=story, ) store_content(self.db, download, '<p>foo</p>') self.test_story = story self.test_download = download
def test_find_dup_stories(): db = connect_to_db() data = { 'A': { 'B': [1, 2, 3], 'C': [4, 5, 6], }, 'D': { 'E': [7, 8, 9], } } media = create_test_story_stack(db=db, data=data) for media_name, feeds in data.items(): for feeds_name, stories in feeds.items(): for num in stories: story = media[media_name]['feeds'][feeds_name]['stories'][str(num)] _test_story(db=db, story=story, num=num)
def test_get_and_store_story_stats(): db = connect_to_db() media = create_test_story_stack(db=db, data={'A': {'B': [1, 2, 3]}}) story = media['A']['feeds']['B']['stories']['1'] # noinspection HttpUrlsUsage story['url'] = 'http://google.com' returned_stats = get_and_store_story_stats(db=db, story=story) stored_stats = db.query(""" SELECT * FROM story_statistics WHERE stories_id = %(stories_id)s """, {'stories_id': story['stories_id']}).hash() assert stored_stats, "story_statistics row exists after initial insert." assert stored_stats.get('facebook_share_count', None) == returned_stats.share_count, "Share count." assert stored_stats.get('facebook_comment_count', None) == returned_stats.comment_count, "Comment count." assert stored_stats.get('facebook_reaction_count', None) == returned_stats.reaction_count, "Reaction count." assert stored_stats.get('facebook_api_error', None) is None, "Null URL share count error." story['url'] = 'boguschema://foobar' with pytest.raises(McFacebookSoftFailureException): get_and_store_story_stats(db=db, story=story) stored_stats = db.query(""" SELECT * FROM story_statistics WHERE stories_id = %(stories_id)s """, {'stories_id': story['stories_id']}).hash() assert stored_stats, "story_statistics row exists after initial insert." assert stored_stats.get('facebook_share_count', None) is None, "Share count should be unset after error." assert stored_stats.get('facebook_comment_count', None) is None, "Comment count should be unset after error." assert stored_stats.get('facebook_reaction_count', None) is None, "Reaction count should be unset after error." assert stored_stats.get('facebook_api_error', None) is not None, "Facebook should have reported an error."
def test_get_topic_url_variants(self): media = create_test_story_stack(db=self.db(), data={ 'A': { 'B': [1, 2, 3], 'C': [4, 5, 6], }, 'D': { 'E': [7, 8, 9], } }) story_1 = media['A']['feeds']['B']['stories']['1'] story_2 = media['A']['feeds']['B']['stories']['2'] story_3 = media['A']['feeds']['B']['stories']['3'] story_4 = media['A']['feeds']['C']['stories']['4'] self.db().query( """ INSERT INTO topic_merged_stories_map (source_stories_id, target_stories_id) VALUES (%(source_stories_id)s, %(target_stories_id)s) """, { 'source_stories_id': story_2['stories_id'], 'target_stories_id': story_1['stories_id'], }) self.db().query( """ INSERT INTO topic_merged_stories_map (source_stories_id, target_stories_id) VALUES (%(source_stories_id)s, %(target_stories_id)s) """, { 'source_stories_id': story_3['stories_id'], 'target_stories_id': story_2['stories_id'], }) self.db().create( table='tag_sets', insert_hash={'name': 'foo'}, ) topic = create_test_topic(db=self.db(), label='foo') self.db().create(table='topic_stories', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_4['stories_id'], }) self.db().create(table='topic_stories', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_1['stories_id'], }) self.db().create(table='topic_links', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_4['stories_id'], 'ref_stories_id': story_1['stories_id'], 'url': story_1['url'], 'redirect_url': story_1['url'] + "/redirect_url", }) self.db().create(table='topic_stories', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_2['stories_id'], }) self.db().create(table='topic_links', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_4['stories_id'], 'ref_stories_id': story_2['stories_id'], 'url': story_2['url'], 'redirect_url': story_2['url'] + "/redirect_url", }) self.db().create(table='topic_stories', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_3['stories_id'] }) self.db().create(table='topic_links', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_4['stories_id'], 'ref_stories_id': story_3['stories_id'], 'url': story_3['url'] + '/alternate', }) test_url = story_1['url'] + self.CRUFT expected_urls = { story_1['url'], story_1['url'] + self.CRUFT, story_2['url'], story_1['url'] + "/redirect_url", story_2['url'] + "/redirect_url", story_3['url'], story_3['url'] + "/alternate", } url_variants = all_url_variants(db=self.db(), url=test_url) assert len(expected_urls) == len(url_variants) sorted_expected_urls = sorted(expected_urls) sorted_url_variants = sorted(url_variants) for i in range(len(sorted_expected_urls)): assert urls_are_equal(url1=sorted_expected_urls[i], url2=sorted_url_variants[i])
def test_is_new(self): def _test_story(db: DatabaseHandler, story_: dict, num_: int) -> None: assert is_new( db=db, story=story_, ) is False, "{} identical".format(num_) assert is_new( db=db, story={ **story_, **{ 'media_id': story['media_id'] + 1, } }, ) is True, "{} media_id diff".format(num_) assert is_new( db=db, story={ **story_, **{ 'url': 'diff', 'guid': 'diff', } }, ) is False, "{} URL + GUID diff, title same".format(num_) assert is_new( db=db, story={ **story_, **{ 'url': 'diff', 'title': 'diff', } }, ) is False, "{} title + URL diff, GUID same".format(num_) assert is_new( db=db, story={ **story_, **{ 'guid': 'diff', 'title': 'diff', } }, ) is True, "{} title + GUID diff, URL same".format(num_) assert is_new( db=db, story={ **story_, **{ 'url': 'diff', 'guid': 'diff', 'publish_date': increment_day(date=story['publish_date'], days=2), } }, ) is True, "{} date + 2 days".format(num_) assert is_new( db=db, story={ **story_, **{ 'url': 'diff', 'guid': 'diff', 'publish_date': increment_day(date=story['publish_date'], days=-2), } }, ) is True, "{} date - 2 days".format(num_) data = { 'A': { 'B': [1, 2, 3], 'C': [4, 5, 6], }, 'D': { 'E': [7, 8, 9], } } media = create_test_story_stack(db=self.db(), data=data) for media_name, feeds in data.items(): for feeds_name, stories in feeds.items(): for num in stories: story = media[media_name]['feeds'][feeds_name]['stories'][ str(num)] _test_story(db=self.db(), story_=story, num_=num)
def test_get_topic_url_variants(self): media = create_test_story_stack( db=self.db(), data={ 'A': { 'B': [1, 2, 3], 'C': [4, 5, 6], }, 'D': { 'E': [7, 8, 9], } } ) story_1 = media['A']['feeds']['B']['stories']['1'] story_2 = media['A']['feeds']['B']['stories']['2'] story_3 = media['A']['feeds']['B']['stories']['3'] story_4 = media['A']['feeds']['C']['stories']['4'] self.db().query(""" INSERT INTO topic_merged_stories_map (source_stories_id, target_stories_id) VALUES (%(source_stories_id)s, %(target_stories_id)s) """, { 'source_stories_id': story_2['stories_id'], 'target_stories_id': story_1['stories_id'], }) self.db().query(""" INSERT INTO topic_merged_stories_map (source_stories_id, target_stories_id) VALUES (%(source_stories_id)s, %(target_stories_id)s) """, { 'source_stories_id': story_3['stories_id'], 'target_stories_id': story_2['stories_id'], }) self.db().create( table='tag_sets', insert_hash={'name': 'foo'}, ) topic = create_test_topic(db=self.db(), label='foo') self.db().create( table='topic_stories', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_4['stories_id'], } ) self.db().create( table='topic_stories', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_1['stories_id'], } ) self.db().create( table='topic_links', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_4['stories_id'], 'ref_stories_id': story_1['stories_id'], 'url': story_1['url'], 'redirect_url': story_1['url'] + "/redirect_url", } ) self.db().create( table='topic_stories', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_2['stories_id'], } ) self.db().create( table='topic_links', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_4['stories_id'], 'ref_stories_id': story_2['stories_id'], 'url': story_2['url'], 'redirect_url': story_2['url'] + "/redirect_url", } ) self.db().create( table='topic_stories', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_3['stories_id'] } ) self.db().create( table='topic_links', insert_hash={ 'topics_id': topic['topics_id'], 'stories_id': story_4['stories_id'], 'ref_stories_id': story_3['stories_id'], 'url': story_3['url'] + '/alternate', } ) test_url = story_1['url'] + self.CRUFT expected_urls = { story_1['url'], story_1['url'] + self.CRUFT, story_2['url'], story_1['url'] + "/redirect_url", story_2['url'] + "/redirect_url", story_3['url'], story_3['url'] + "/alternate", } url_variants = all_url_variants(db=self.db(), url=test_url) assert len(expected_urls) == len(url_variants) sorted_expected_urls = sorted(expected_urls) sorted_url_variants = sorted(url_variants) for i in range(len(sorted_expected_urls)): assert urls_are_equal(url1=sorted_expected_urls[i], url2=sorted_url_variants[i])
def test_find_dup_story(self): def _test_story(db: DatabaseHandler, story_: dict, num_: int) -> None: assert find_dup_story( db=db, story=story_, ) == story_, "{} identical".format(num_) assert find_dup_story( db=db, story={ **story_, **{ 'media_id': story['media_id'] + 1, } }, ) is None, "{} media_id diff".format(num_) assert find_dup_story( db=db, story={ **story_, **{ 'url': self.new_unique_str(), 'guid': self.new_unique_str() } }, ) == story_, "{} URL + GUID diff, title same".format(num_) assert find_dup_story( db=db, story={ **story_, **{ 'url': self.new_unique_str(), 'title': self.new_unique_str() } }, ) == story_, "{} title + URL diff, GUID same".format(num_) assert find_dup_story( db=db, story={ **story_, **{ 'guid': self.new_unique_str(), 'title': self.new_unique_str(), } }, ) == story_, "{} title + GUID diff, URL same".format(num_) assert find_dup_story( db=db, story={ **story_, **{ 'url': story_['url'].upper(), 'guid': self.new_unique_str(), 'title': self.new_unique_str(), } }, ) == story_, "{} title + GUID diff, nornmalized url same ".format( num_) assert find_dup_story( db=db, story={ **story_, **{ 'url': self.new_unique_str(), 'guid': self.new_unique_str(), 'publish_date': increment_day(date=story['publish_date'], days=2), } }, ) is None, "{} date + 2 days".format(num_) assert find_dup_story( db=db, story={ **story_, **{ 'url': self.new_unique_str(), 'guid': self.new_unique_str(), 'publish_date': increment_day(date=story['publish_date'], days=-2), } }, ) is None, "{} date - 2 days".format(num_) # verify that we can find dup story by the url or guid of a previously dup'd story dup_url = self.new_unique_str() dup_guid = self.new_unique_str() nondup_url = self.new_unique_str() nondup_guid = 'bogus unique guid' nondup_title = 'bogus unique title' dup_story = find_dup_story(db, { **story_, **{ 'url': dup_url, 'guid': dup_guid } }) assert dup_story == story_ assert find_dup_story(db, { **story, **{ 'url': dup_url, 'title': nondup_title } }) == story_ assert find_dup_story(db, { **story, **{ 'guid': dup_guid, 'title': nondup_title } }) == story_ nondup_story = { **story, **{ 'url': nondup_url, 'guid': nondup_guid, 'title': nondup_title } } assert find_dup_story(db, nondup_story) is None data = { 'A': { 'B': [1, 2, 3], 'C': [4, 5, 6], }, 'D': { 'E': [7, 8, 9], } } media = create_test_story_stack(db=self.db(), data=data) for media_name, feeds in data.items(): for feeds_name, stories in feeds.items(): for num in stories: story = media[media_name]['feeds'][feeds_name]['stories'][ str(num)] _test_story(db=self.db(), story_=story, num_=num)