Ejemplo n.º 1
0
def test_candidates(article):
    """Verify we have candidates."""
    # from lxml.etree import tounicode
    found = False
    wanted_hash = '04e46055'

    for node in article.candidates.values():
        if node.hash_id == wanted_hash:
            found = node

    assert found

    # we have the right node, it must be deleted for some reason if it's
    # not still there when we need it to be.
    # Make sure it's not in our to drop list.
    for node in article._should_drop:
        assert node != found.node

    by_score = sorted([c for c in article.candidates.values()],
                      key=attrgetter('content_score'),
                      reverse=True)
    assert by_score[0].node == found.node

    updated_winner = check_siblings(by_score[0], article.candidates)
    updated_winner.node = prep_article(updated_winner.node)
Ejemplo n.º 2
0
    def test_candidates(self):
        """Verify we have candidates."""
        doc = Article(self.article)
        # from lxml.etree import tounicode
        found = False
        wanted_hash = '1f9378ed'

        # from breadability.logconfig import LNODE
        # from breadability.logconfig import set_logging_level
        # set_logging_level('DEBUG')
        # LNODE.activate()
        for node in doc.candidates.values():
            if node.hash_id == wanted_hash:
                found = node

        self.assertTrue(found)

        # we have the right node, it must be deleted for some reason if it's
        # not still there when we need it to be.
        # Make sure it's not in our to drop list.
        for node in doc._should_drop:
            self.assertFalse(node == found.node)

        by_score = sorted([c for c in doc.candidates.values()],
            key=attrgetter('content_score'), reverse=True)
        self.assertTrue(by_score[0].node == found.node)

        updated_winner = check_siblings(by_score[0], doc.candidates)
        updated_winner.node = prep_article(updated_winner.node)
Ejemplo n.º 3
0
def test_candidates(article):
    """Verify we have candidates."""
    # from lxml.etree import tounicode
    found = False
    wanted_hash = '04e46055'

    for node in article.candidates.values():
        if node.hash_id == wanted_hash:
            found = node

    assert found

    # we have the right node, it must be deleted for some reason if it's
    # not still there when we need it to be.
    # Make sure it's not in our to drop list.
    for node in article._should_drop:
        assert node != found.node

    by_score = sorted(
        [c for c in article.candidates.values()],
        key=attrgetter('content_score'), reverse=True)
    assert by_score[0].node == found.node

    updated_winner = check_siblings(by_score[0], article.candidates)
    updated_winner.node = prep_article(updated_winner.node)
Ejemplo n.º 4
0
    def test_candidates(self):
        """Verify we have candidates."""
        doc = Article(self.article)
        # from lxml.etree import tounicode
        found = False
        wanted_hash = '04e46055'
        # from breadability.logconfig import LNODE
        # from breadability.logconfig import set_logging_level
        # set_logging_level('DEBUG')
        # LNODE.activate()
        for node in doc.candidates.values():
            if node.hash_id == wanted_hash:
                found = node

        self.assertTrue(found)

        # we have the right node, it must be deleted for some reason if it's
        # not still there when we need it to be.
        # Make sure it's not in our to drop list.
        for node in doc._should_drop:
            self.assertFalse(node == found.node)

        by_score = sorted([c for c in doc.candidates.values()],
                          key=attrgetter('content_score'),
                          reverse=True)
        self.assertTrue(by_score[0].node == found.node)

        updated_winner = check_siblings(by_score[0], doc.candidates)
        updated_winner.node = prep_article(updated_winner.node)
Ejemplo n.º 5
0
    def slice(self, before=1, reverse=True):
        if self.candidates:
            # cleanup by removing the should_drop we spotted.
            [n.drop_tree() for n in self._should_drop
                if n.getparent() is not None]

            # right now we return the highest scoring candidate content
            by_score = sorted([c for c in self.candidates.values()],
                key=attrgetter('content_score'), reverse=reverse)

            # since we have several candidates, check the winner's siblings
            # for extra content

            for winner in by_score[:before]:
                winner = check_siblings(winner, self.candidates)

                # updated_winner.node = prep_article(updated_winner.node)
                if winner.node is not None:
                    yield winner.node
Ejemplo n.º 6
0
    def test_candidates(self):
        """Verify we have candidates."""
        doc = Article(self.article)
        # from lxml.etree import tounicode
        found = False
        wanted_hash = "04e46055"

        for node in doc.candidates.values():
            if node.hash_id == wanted_hash:
                found = node

        self.assertTrue(found)

        # we have the right node, it must be deleted for some reason if it's
        # not still there when we need it to be.
        # Make sure it's not in our to drop list.
        for node in doc._should_drop:
            self.assertFalse(node == found.node)

        by_score = sorted([c for c in doc.candidates.values()], key=attrgetter("content_score"), reverse=True)
        self.assertTrue(by_score[0].node == found.node)

        updated_winner = check_siblings(by_score[0], doc.candidates)
        updated_winner.node = prep_article(updated_winner.node)