def test_link_density(self): """Test that we get a link density""" doc = document_fromstring(load_article('ars/ars.001.html')) for node in doc.getiterator(): if node.tag in ['p', 'td', 'pre']: density = get_link_density(node) # the density must be between 0, 1 self.assertTrue(density >= 0.0 and density <= 1.0)
def test_we_get_candidates(self): """Processing candidates should get us a list of nodes to try out.""" # we'll start out using our first real test document test_nodes = [] doc = document_fromstring(load_article('ars/ars.001.html')) for node in doc.getiterator(): if node.tag in ['p', 'td', 'pre']: test_nodes.append(node) candidates = score_candidates(test_nodes) # this might change as we tweak our algorithm, but if it does change, # it signifies we need to look at what we changed. self.assertEqual(len(candidates.keys()), 6) # one of these should have a decent score scores = sorted([c.content_score for c in candidates.values()]) self.assertTrue(scores[-1] > 100)
def test_article_enables_candidate_access(self): """Candidates are accessible after document processing.""" doc = Article(load_article('ars/ars.001.html')) self.assertTrue(hasattr(doc, 'candidates'))