def test_ap_calls(self): ap_sentences = self.__get_ap_sentences() ap_content_single_16_sentence = None ap_content_32_sentences = [] for sentence in ap_sentences: if ap_content_single_16_sentence is None and len(sentence) < 32: ap_content_single_16_sentence = sentence if len(sentence) > 32: ap_content_32_sentences.append(sentence) assert ap_content_single_16_sentence is not None assert len(ap_content_32_sentences) > 0 ap_content_single_32_sentence = ap_content_32_sentences[0] assert self.__is_syndicated( content='foo') is False, "Simple unsyndicated story" assert self.__is_syndicated( content='(ap)') is True, "Simple ('ap') pattern" assert self.__is_syndicated( content="associated press") is False, "Only 'associated press'" assert self.__is_syndicated( content="'associated press'") is True, "Quoted 'associated press'" assert self.__is_syndicated( content="associated press.\n" + ap_content_single_32_sentence ) is True, "Associated press and AP sentence" assert self.__is_syndicated( content="associated press.\n" + ap_content_single_16_sentence ) is False, "Associated press and short AP sentence" assert self.__is_syndicated(content=ap_content_single_32_sentence ) is False, 'Single AP sentence' assert self.__is_syndicated(content="Boston (AP)\n" + ap_content_single_32_sentence ) is True, 'AP sentence and AP location' assert self.__is_syndicated( content=' '.join(ap_sentences)) is True, 'All AP sentences' assert is_syndicated( db=self.db(), story_text='foo') is False, "No DB story: simple story" assert is_syndicated( db=self.db(), story_text='(ap)') is True, "No DB story: ('ap') story" assert is_syndicated( db=self.db(), story_text=' '.join(self.__get_ap_sentences()), ) is True, "No DB story: AP sentences"
def test_ap_calls(self): ap_sentences = self.__get_ap_sentences() ap_content_single_16_sentence = None ap_content_32_sentences = [] for sentence in ap_sentences: if ap_content_single_16_sentence is None and len(sentence) < 32: ap_content_single_16_sentence = sentence if len(sentence) > 32: ap_content_32_sentences.append(sentence) assert ap_content_single_16_sentence is not None assert len(ap_content_32_sentences) > 0 ap_content_single_32_sentence = ap_content_32_sentences[0] assert self.__is_syndicated(content='foo') is False, "Simple unsyndicated story" assert self.__is_syndicated(content='(ap)') is True, "Simple ('ap') pattern" assert self.__is_syndicated(content="associated press") is False, "Only 'associated press'" assert self.__is_syndicated(content="'associated press'") is True, "Quoted 'associated press'" assert self.__is_syndicated( content="associated press.\n" + ap_content_single_32_sentence ) is True, "Associated press and AP sentence" assert self.__is_syndicated( content="associated press.\n" + ap_content_single_16_sentence ) is False, "Associated press and short AP sentence" assert self.__is_syndicated( content=ap_content_single_32_sentence ) is False, 'Single AP sentence' assert self.__is_syndicated( content="Boston (AP)\n" + ap_content_single_32_sentence ) is True, 'AP sentence and AP location' assert self.__is_syndicated( content=' '.join(ap_sentences) ) is True, 'All AP sentences' assert is_syndicated(db=self.db(), story_text='foo') is False, "No DB story: simple story" assert is_syndicated(db=self.db(), story_text='(ap)') is True, "No DB story: ('ap') story" assert is_syndicated( db=self.db(), story_text=' '.join(self.__get_ap_sentences()), ) is True, "No DB story: AP sentences"
def _update_ap_syndicated(db: DatabaseHandler, stories_id: int, story_title: str, story_text: str, story_language: str) -> bool: """Detect whether the story is syndicated, update stories.ap_syndicated and return the decision.""" # FIXME write a test once AP gets reenabled if isinstance(stories_id, bytes): stories_id = decode_object_from_bytes_if_needed(stories_id) stories_id = int(stories_id) story_title = decode_object_from_bytes_if_needed(story_title) story_text = decode_object_from_bytes_if_needed(story_text) story_language = decode_object_from_bytes_if_needed(story_language) ap_syndicated = is_syndicated(db=db, story_title=story_title, story_text=story_text, story_language=story_language) db.query( """ DELETE FROM stories_ap_syndicated WHERE stories_id = %(stories_id)s """, {'stories_id': stories_id}) db.query( """ INSERT INTO stories_ap_syndicated (stories_id, ap_syndicated) VALUES (%(stories_id)s, %(ap_syndicated)s) """, { 'stories_id': stories_id, 'ap_syndicated': ap_syndicated }) return ap_syndicated
def __is_syndicated(db: DatabaseHandler, content: str) -> bool: label = content[:64] medium = create_test_medium(db=db, label=label) feed = create_test_feed(db=db, label=label, medium=medium) story = create_test_story(db=db, label=label, feed=feed) story['content'] = content story = add_content_to_test_story(db=db, story=story, feed=feed) return is_syndicated(db=db, story_title=story['title'], story_text=content)
def __is_syndicated(self, content: str) -> bool: label = content[:64] medium = create_test_medium(db=self.db(), label=label) feed = create_test_feed(db=self.db(), label=label, medium=medium) story = create_test_story(db=self.db(), label=label, feed=feed) story['content'] = content story = add_content_to_test_story(db=self.db(), story=story, feed=feed) return is_syndicated(db=self.db(), story_title=story['title'], story_text=content)
def test_ap_calls(): db = connect_to_db() ap_medium = create_test_medium(db=db, label=get_ap_medium_name()) feed = create_test_feed(db=db, label='feed', medium=ap_medium) story = create_test_story(db=db, label='story', feed=feed) story['content'] = "\n".join(AP_SENTENCES) add_content_to_test_story(db=db, story=story, feed=feed) ap_content_single_16_sentence = None ap_content_32_sentences = [] for sentence in AP_SENTENCES: if ap_content_single_16_sentence is None and len(sentence) < 32: ap_content_single_16_sentence = sentence if len(sentence) > 32: ap_content_32_sentences.append(sentence) assert ap_content_single_16_sentence is not None assert len(ap_content_32_sentences) > 0 ap_content_single_32_sentence = ap_content_32_sentences[0] assert __is_syndicated(db=db, content='foo') is False, "Simple unsyndicated story" assert __is_syndicated(db=db, content='(ap)') is True, "Simple ('ap') pattern" assert __is_syndicated(db=db, content="associated press") is False, "Only 'associated press'" assert __is_syndicated(db=db, content="'associated press'") is True, "Quoted 'associated press'" assert __is_syndicated( db=db, content="associated press.\n" + ap_content_single_32_sentence ) is True, "Associated press and AP sentence" assert __is_syndicated( db=db, content="associated press.\n" + ap_content_single_16_sentence ) is False, "Associated press and short AP sentence" assert __is_syndicated( db=db, content=ap_content_single_32_sentence ) is False, 'Single AP sentence' assert __is_syndicated( db=db, content="Boston (AP)\n" + ap_content_single_32_sentence ) is True, 'AP sentence and AP location' assert __is_syndicated( db=db, content=' '.join(AP_SENTENCES) ) is True, 'All AP sentences' assert is_syndicated(db=db, story_text='foo') is False, "No DB story: simple story" assert is_syndicated(db=db, story_text='(ap)') is True, "No DB story: ('ap') story" assert is_syndicated( db=db, story_text=' '.join(AP_SENTENCES), ) is True, "No DB story: AP sentences"