Beispiel #1
0
    def test_link_density(self):
        """Test that we get a link density"""
        doc = document_fromstring(load_article('ars/ars.001.html'))
        for node in doc.getiterator():
            if node.tag in ['p', 'td', 'pre']:
                density = get_link_density(node)

                # the density must be between 0, 1
                self.assertTrue(density >= 0.0 and density <= 1.0)
Beispiel #2
0
    def test_link_density(self):
        """Test that we get a link density"""
        doc = document_fromstring(load_article('ars/ars.001.html'))
        for node in doc.getiterator():
            if node.tag in ['p', 'td', 'pre']:
                density = get_link_density(node)

                # the density must be between 0, 1
                self.assertTrue(density >= 0.0 and density <= 1.0)
Beispiel #3
0
    def test_we_get_candidates(self):
        """Processing candidates should get us a list of nodes to try out."""
        # we'll start out using our first real test document
        test_nodes = []
        doc = document_fromstring(load_article('ars/ars.001.html'))
        for node in doc.getiterator():
            if node.tag in ['p', 'td', 'pre']:
                test_nodes.append(node)

        candidates = score_candidates(test_nodes)

        # this might change as we tweak our algorithm, but if it does change,
        # it signifies we need to look at what we changed.
        self.assertEqual(len(candidates.keys()), 6)

        # one of these should have a decent score
        scores = sorted([c.content_score for c in candidates.values()])
        self.assertTrue(scores[-1] > 100)
Beispiel #4
0
    def test_we_get_candidates(self):
        """Processing candidates should get us a list of nodes to try out."""
        # we'll start out using our first real test document
        test_nodes = []
        doc = document_fromstring(load_article('ars/ars.001.html'))
        for node in doc.getiterator():
            if node.tag in ['p', 'td', 'pre']:
                test_nodes.append(node)

        candidates = score_candidates(test_nodes)

        # this might change as we tweak our algorithm, but if it does change,
        # it signifies we need to look at what we changed.
        self.assertEqual(len(candidates.keys()), 6)

        # one of these should have a decent score
        scores = sorted([c.content_score for c in candidates.values()])
        self.assertTrue(scores[-1] > 100)
Beispiel #5
0
 def test_article_enables_candidate_access(self):
     """Candidates are accessible after document processing."""
     doc = Article(load_article('ars/ars.001.html'))
     self.assertTrue(hasattr(doc, 'candidates'))
Beispiel #6
0
 def test_article_enables_candidate_access(self):
     """Candidates are accessible after document processing."""
     doc = Article(load_article('ars/ars.001.html'))
     self.assertTrue(hasattr(doc, 'candidates'))