def test_highlight_document_valid_hitting_limit_300(self): expected = 300 self.max_number_of_characters = 300 self.dhc = DocumentHighlightClass(self.document,self.query,self.source_type,self.start_highlight, self.end_highlight,self.max_number_of_characters) self.dhc._highlight_document() result = len(self.dhc._snippet) self.assertEqual(expected,result)
def setUp(self): self.document = """Big Data Partnership, a leading 'Big Data' specialist service provider, today annouced that it has been chosen as one of a few select organizations from around the world to participate in the Microsoft Big Data Partner Incubation Program. Big Data Partnership's participation in the program followed a rigourous nomination process that was supported by Microsoft SQL Server global marketing managers and industry leads.""" self.query = "Big Data" self.start_highlight = "[[HIGHLIGHT]]" self.end_highlight = "[[ENDHIGHLIGHT]]" self.source_type = None self.max_number_of_characters = 180 self.dhc = DocumentHighlightClass(self.document,self.query,self.source_type,self.start_highlight, self.end_highlight,self.max_number_of_characters)
def test_different_highlighting_methods(self): ##Changed highlighting words to ensure we can have control over that aspect self.start_highlight = "[[RANDOM]]" self.end_highlight = "[[ENDRANDOM]]" self.dhc = DocumentHighlightClass(self.document,self.query,self.source_type,self.start_highlight, self.end_highlight,self.max_number_of_characters) expected = ["[[RANDOM]]","Big","Data","[[ENDRANDOM]]","Partnership's","participation","."] split_sentence = ["Big","Data","Partnership's","participation"] result = self.dhc._find_matches_per_sentence(split_sentence) self.assertEqual(expected,result)
class TestDocumentParserClass(unittest.TestCase): def setUp(self): self.document = """Big Data Partnership, a leading 'Big Data' specialist service provider, today annouced that it has been chosen as one of a few select organizations from around the world to participate in the Microsoft Big Data Partner Incubation Program. Big Data Partnership's participation in the program followed a rigourous nomination process that was supported by Microsoft SQL Server global marketing managers and industry leads.""" self.query = "Big Data" self.start_highlight = "[[HIGHLIGHT]]" self.end_highlight = "[[ENDHIGHLIGHT]]" self.source_type = None self.max_number_of_characters = 180 self.dhc = DocumentHighlightClass(self.document,self.query,self.source_type,self.start_highlight, self.end_highlight,self.max_number_of_characters) def test_split_document_on_delimiter(self): expected = """Big Data Partnership's participation in the program followed a rigourous nomination process that was supported by Microsoft SQL Server global marketing managers and industry leads""" self.dhc._split_document_on_delimiter() result = self.dhc._doc_to_sentences self.assertEqual(expected,result[1]) def test_find_matches_per_sentence(self): expected = ["[[HIGHLIGHT]]","Big","Data","[[ENDHIGHLIGHT]]","Partnership's","participation","."] split_sentence = ["Big","Data","Partnership's","participation"] result = self.dhc._find_matches_per_sentence(split_sentence) self.assertEqual(expected,result) def test_format_snippet_to_compute_word_length(self): processed_sentence = ["[[HIGHLIGHT]]","Big","Data","[[ENDHIGHLIGHT]]","Partnership's","participation"] expected = "[[HIGHLIGHT]] Big Data [[ENDHIGHLIGHT]] Partnership's participation" self.dhc._format_snippet(processed_sentence) result = self.dhc._snippet self.assertEqual(expected,result) def test_highlight_document_valid_hitting_limit(self): expected = 182 self.dhc._highlight_document() result = len(self.dhc._snippet) self.assertEqual(expected,result) def test_highlight_document_valid_hitting_limit_300(self): expected = 300 self.max_number_of_characters = 300 self.dhc = DocumentHighlightClass(self.document,self.query,self.source_type,self.start_highlight, self.end_highlight,self.max_number_of_characters) self.dhc._highlight_document() result = len(self.dhc._snippet) self.assertEqual(expected,result) def test_different_highlighting_methods(self): ##Changed highlighting words to ensure we can have control over that aspect self.start_highlight = "[[RANDOM]]" self.end_highlight = "[[ENDRANDOM]]" self.dhc = DocumentHighlightClass(self.document,self.query,self.source_type,self.start_highlight, self.end_highlight,self.max_number_of_characters) expected = ["[[RANDOM]]","Big","Data","[[ENDRANDOM]]","Partnership's","participation","."] split_sentence = ["Big","Data","Partnership's","participation"] result = self.dhc._find_matches_per_sentence(split_sentence) self.assertEqual(expected,result)