예제 #1
0
    def test_ap_calls(self):

        ap_sentences = self.__get_ap_sentences()

        ap_content_single_16_sentence = None
        ap_content_32_sentences = []

        for sentence in ap_sentences:

            if ap_content_single_16_sentence is None and len(sentence) < 32:
                ap_content_single_16_sentence = sentence

            if len(sentence) > 32:
                ap_content_32_sentences.append(sentence)

        assert ap_content_single_16_sentence is not None
        assert len(ap_content_32_sentences) > 0

        ap_content_single_32_sentence = ap_content_32_sentences[0]

        assert self.__is_syndicated(
            content='foo') is False, "Simple unsyndicated story"
        assert self.__is_syndicated(
            content='(ap)') is True, "Simple ('ap') pattern"

        assert self.__is_syndicated(
            content="associated press") is False, "Only 'associated press'"
        assert self.__is_syndicated(
            content="'associated press'") is True, "Quoted 'associated press'"

        assert self.__is_syndicated(
            content="associated press.\n" + ap_content_single_32_sentence
        ) is True, "Associated press and AP sentence"
        assert self.__is_syndicated(
            content="associated press.\n" + ap_content_single_16_sentence
        ) is False, "Associated press and short AP sentence"

        assert self.__is_syndicated(content=ap_content_single_32_sentence
                                    ) is False, 'Single AP sentence'

        assert self.__is_syndicated(content="Boston (AP)\n" +
                                    ap_content_single_32_sentence
                                    ) is True, 'AP sentence and AP location'

        assert self.__is_syndicated(
            content=' '.join(ap_sentences)) is True, 'All AP sentences'

        assert is_syndicated(
            db=self.db(),
            story_text='foo') is False, "No DB story: simple story"

        assert is_syndicated(
            db=self.db(),
            story_text='(ap)') is True, "No DB story: ('ap') story"

        assert is_syndicated(
            db=self.db(),
            story_text=' '.join(self.__get_ap_sentences()),
        ) is True, "No DB story: AP sentences"
예제 #2
0
    def test_ap_calls(self):

        ap_sentences = self.__get_ap_sentences()

        ap_content_single_16_sentence = None
        ap_content_32_sentences = []

        for sentence in ap_sentences:

            if ap_content_single_16_sentence is None and len(sentence) < 32:
                ap_content_single_16_sentence = sentence

            if len(sentence) > 32:
                ap_content_32_sentences.append(sentence)

        assert ap_content_single_16_sentence is not None
        assert len(ap_content_32_sentences) > 0

        ap_content_single_32_sentence = ap_content_32_sentences[0]

        assert self.__is_syndicated(content='foo') is False, "Simple unsyndicated story"
        assert self.__is_syndicated(content='(ap)') is True, "Simple ('ap') pattern"

        assert self.__is_syndicated(content="associated press") is False, "Only 'associated press'"
        assert self.__is_syndicated(content="'associated press'") is True, "Quoted 'associated press'"

        assert self.__is_syndicated(
            content="associated press.\n" + ap_content_single_32_sentence
        ) is True, "Associated press and AP sentence"
        assert self.__is_syndicated(
            content="associated press.\n" + ap_content_single_16_sentence
        ) is False, "Associated press and short AP sentence"

        assert self.__is_syndicated(
            content=ap_content_single_32_sentence
        ) is False, 'Single AP sentence'

        assert self.__is_syndicated(
            content="Boston (AP)\n" + ap_content_single_32_sentence
        ) is True, 'AP sentence and AP location'

        assert self.__is_syndicated(
            content=' '.join(ap_sentences)
        ) is True, 'All AP sentences'

        assert is_syndicated(db=self.db(), story_text='foo') is False, "No DB story: simple story"

        assert is_syndicated(db=self.db(), story_text='(ap)') is True, "No DB story: ('ap') story"

        assert is_syndicated(
            db=self.db(),
            story_text=' '.join(self.__get_ap_sentences()),
        ) is True, "No DB story: AP sentences"
예제 #3
0
def _update_ap_syndicated(db: DatabaseHandler, stories_id: int,
                          story_title: str, story_text: str,
                          story_language: str) -> bool:
    """Detect whether the story is syndicated, update stories.ap_syndicated and return the decision."""
    # FIXME write a test once AP gets reenabled

    if isinstance(stories_id, bytes):
        stories_id = decode_object_from_bytes_if_needed(stories_id)
    stories_id = int(stories_id)

    story_title = decode_object_from_bytes_if_needed(story_title)
    story_text = decode_object_from_bytes_if_needed(story_text)
    story_language = decode_object_from_bytes_if_needed(story_language)

    ap_syndicated = is_syndicated(db=db,
                                  story_title=story_title,
                                  story_text=story_text,
                                  story_language=story_language)

    db.query(
        """
        DELETE FROM stories_ap_syndicated
        WHERE stories_id = %(stories_id)s
    """, {'stories_id': stories_id})

    db.query(
        """
        INSERT INTO stories_ap_syndicated (stories_id, ap_syndicated)
        VALUES (%(stories_id)s, %(ap_syndicated)s)
    """, {
            'stories_id': stories_id,
            'ap_syndicated': ap_syndicated
        })

    return ap_syndicated
예제 #4
0
def __is_syndicated(db: DatabaseHandler, content: str) -> bool:
    label = content[:64]

    medium = create_test_medium(db=db, label=label)
    feed = create_test_feed(db=db, label=label, medium=medium)
    story = create_test_story(db=db, label=label, feed=feed)

    story['content'] = content

    story = add_content_to_test_story(db=db, story=story, feed=feed)

    return is_syndicated(db=db, story_title=story['title'], story_text=content)
예제 #5
0
    def __is_syndicated(self, content: str) -> bool:

        label = content[:64]

        medium = create_test_medium(db=self.db(), label=label)
        feed = create_test_feed(db=self.db(), label=label, medium=medium)
        story = create_test_story(db=self.db(), label=label, feed=feed)

        story['content'] = content

        story = add_content_to_test_story(db=self.db(), story=story, feed=feed)

        return is_syndicated(db=self.db(), story_title=story['title'], story_text=content)
예제 #6
0
def test_ap_calls():
    db = connect_to_db()

    ap_medium = create_test_medium(db=db, label=get_ap_medium_name())
    feed = create_test_feed(db=db, label='feed', medium=ap_medium)
    story = create_test_story(db=db, label='story', feed=feed)

    story['content'] = "\n".join(AP_SENTENCES)

    add_content_to_test_story(db=db, story=story, feed=feed)

    ap_content_single_16_sentence = None
    ap_content_32_sentences = []

    for sentence in AP_SENTENCES:

        if ap_content_single_16_sentence is None and len(sentence) < 32:
            ap_content_single_16_sentence = sentence

        if len(sentence) > 32:
            ap_content_32_sentences.append(sentence)

    assert ap_content_single_16_sentence is not None
    assert len(ap_content_32_sentences) > 0

    ap_content_single_32_sentence = ap_content_32_sentences[0]

    assert __is_syndicated(db=db, content='foo') is False, "Simple unsyndicated story"
    assert __is_syndicated(db=db, content='(ap)') is True, "Simple ('ap') pattern"

    assert __is_syndicated(db=db, content="associated press") is False, "Only 'associated press'"
    assert __is_syndicated(db=db, content="'associated press'") is True, "Quoted 'associated press'"

    assert __is_syndicated(
        db=db,
        content="associated press.\n" + ap_content_single_32_sentence
    ) is True, "Associated press and AP sentence"
    assert __is_syndicated(
        db=db,
        content="associated press.\n" + ap_content_single_16_sentence
    ) is False, "Associated press and short AP sentence"

    assert __is_syndicated(
        db=db,
        content=ap_content_single_32_sentence
    ) is False, 'Single AP sentence'

    assert __is_syndicated(
        db=db,
        content="Boston (AP)\n" + ap_content_single_32_sentence
    ) is True, 'AP sentence and AP location'

    assert __is_syndicated(
        db=db,
        content=' '.join(AP_SENTENCES)
    ) is True, 'All AP sentences'

    assert is_syndicated(db=db, story_text='foo') is False, "No DB story: simple story"

    assert is_syndicated(db=db, story_text='(ap)') is True, "No DB story: ('ap') story"

    assert is_syndicated(
        db=db,
        story_text=' '.join(AP_SENTENCES),
    ) is True, "No DB story: AP sentences"