def test_calculate_links_length_and_clean_chunk(self): chunk = parser.Chunk( '<p><a href="#">asd<a href="#">asd</a>asd</a>asd</p>') chunk._calculate_length_with_tags() chunk._calculate_links_length_and_clean_chunk(self.cleaner) self.assertEqual(chunk.chunk, 'asd' * 4) self.assertEqual(chunk.length_of_links, 9)
def test_make_calculations(self): chunk = parser.Chunk(self.chunk) chunk.make_calculations(self.cleaner, self.punctuation) text_density = len(self.useful_text) / len(self.chunk) count_of_punctuation_marks = sum( (self.chunk.count(mark) for mark in self.punctuation)) weight = (text_density + count_of_punctuation_marks / 100) + 1 weight += (1 - count_of_punctuation_marks / len(self.useful_text)) self.assertEqual(chunk.weight, weight)
def test_calculate_count_of_punctuation_marks(self): count_of_punctuation_marks = sum( (self.chunk.count(mark) for mark in self.punctuation)) chunk = parser.Chunk(self.chunk) self.assertRaises(parser.ChunkProcedureException, chunk._calculate_count_of_punctuation_marks, self.punctuation) self.assertRaises(parser.ChunkProcedureException, chunk._calculate_links_length_and_clean_chunk, self.cleaner) chunk._calculate_length_with_tags() chunk._calculate_links_length_and_clean_chunk(self.cleaner) chunk._calculate_count_of_punctuation_marks(self.punctuation) self.assertEqual(chunk.count_of_punctuation_marks, count_of_punctuation_marks)
def test_calculate_length_without_tags(self): chunk = parser.Chunk(self.chunk) self.assertRaises(parser.ChunkProcedureException, chunk._calculate_length_without_tags) chunk._calculate_length_with_tags() self.assertFalse(chunk._cleaned) self.assertRaises(parser.ChunkProcedureException, chunk._calculate_length_without_tags) chunk._calculate_links_length_and_clean_chunk(self.cleaner) self.assertEqual(chunk._links_length, 0) chunk._calculate_length_without_tags() self.assertEqual(chunk.length_without_tags, len(self.useful_text))
def test_calculate_length_with_tags(self): chunk = parser.Chunk(self.chunk) chunk._calculate_length_with_tags() self.assertEqual(chunk.length_with_tags, len(self.chunk))