def test_highlight_document_valid_hitting_limit_300(self):
     expected = 300
     self.max_number_of_characters = 300
     self.dhc                      = DocumentHighlightClass(self.document,self.query,self.source_type,self.start_highlight,
                                                            self.end_highlight,self.max_number_of_characters)
     self.dhc._highlight_document()
     result = len(self.dhc._snippet)
     self.assertEqual(expected,result)
    def setUp(self):
        self.document        = """Big Data Partnership, a leading 'Big Data' specialist service provider, today annouced that it has been chosen as one of a few select organizations from around the world to participate in the Microsoft Big Data Partner Incubation Program. Big Data Partnership's participation in the program followed a rigourous nomination process that was supported by Microsoft SQL Server global marketing managers and industry leads."""

        self.query                    = "Big Data"
        self.start_highlight          = "[[HIGHLIGHT]]"
        self.end_highlight            = "[[ENDHIGHLIGHT]]"  
        self.source_type              = None    
        self.max_number_of_characters = 180
        self.dhc                      = DocumentHighlightClass(self.document,self.query,self.source_type,self.start_highlight,
                                                               self.end_highlight,self.max_number_of_characters)
 def test_different_highlighting_methods(self):
     ##Changed highlighting words to ensure we can have control over that aspect
     self.start_highlight          = "[[RANDOM]]"
     self.end_highlight            = "[[ENDRANDOM]]"
     self.dhc                      = DocumentHighlightClass(self.document,self.query,self.source_type,self.start_highlight,
                                                            self.end_highlight,self.max_number_of_characters)
     expected = ["[[RANDOM]]","Big","Data","[[ENDRANDOM]]","Partnership's","participation","."]
     split_sentence = ["Big","Data","Partnership's","participation"]
     
     result = self.dhc._find_matches_per_sentence(split_sentence)
     self.assertEqual(expected,result)
     
     
 
     
class TestDocumentParserClass(unittest.TestCase):
    def setUp(self):
        self.document        = """Big Data Partnership, a leading 'Big Data' specialist service provider, today annouced that it has been chosen as one of a few select organizations from around the world to participate in the Microsoft Big Data Partner Incubation Program. Big Data Partnership's participation in the program followed a rigourous nomination process that was supported by Microsoft SQL Server global marketing managers and industry leads."""

        self.query                    = "Big Data"
        self.start_highlight          = "[[HIGHLIGHT]]"
        self.end_highlight            = "[[ENDHIGHLIGHT]]"  
        self.source_type              = None    
        self.max_number_of_characters = 180
        self.dhc                      = DocumentHighlightClass(self.document,self.query,self.source_type,self.start_highlight,
                                                               self.end_highlight,self.max_number_of_characters)

    def test_split_document_on_delimiter(self):
        expected = """Big Data Partnership's participation in the program followed a rigourous nomination process that was supported by Microsoft SQL Server global marketing managers and industry leads"""

        self.dhc._split_document_on_delimiter()
        result = self.dhc._doc_to_sentences
        
        self.assertEqual(expected,result[1])
        
    def test_find_matches_per_sentence(self):
        expected = ["[[HIGHLIGHT]]","Big","Data","[[ENDHIGHLIGHT]]","Partnership's","participation","."]
        split_sentence = ["Big","Data","Partnership's","participation"]
        
        result = self.dhc._find_matches_per_sentence(split_sentence)
        self.assertEqual(expected,result)
   
    def test_format_snippet_to_compute_word_length(self):
        processed_sentence = ["[[HIGHLIGHT]]","Big","Data","[[ENDHIGHLIGHT]]","Partnership's","participation"]
        expected = "[[HIGHLIGHT]] Big Data [[ENDHIGHLIGHT]] Partnership's participation"
        self.dhc._format_snippet(processed_sentence)
        result   = self.dhc._snippet
        self.assertEqual(expected,result)
            
    def test_highlight_document_valid_hitting_limit(self):
        expected = 182
        self.dhc._highlight_document()
        result = len(self.dhc._snippet)
        self.assertEqual(expected,result)
        
    def test_highlight_document_valid_hitting_limit_300(self):
        expected = 300
        self.max_number_of_characters = 300
        self.dhc                      = DocumentHighlightClass(self.document,self.query,self.source_type,self.start_highlight,
                                                               self.end_highlight,self.max_number_of_characters)
        self.dhc._highlight_document()
        result = len(self.dhc._snippet)
        self.assertEqual(expected,result)
        
    def test_different_highlighting_methods(self):
        ##Changed highlighting words to ensure we can have control over that aspect
        self.start_highlight          = "[[RANDOM]]"
        self.end_highlight            = "[[ENDRANDOM]]"
        self.dhc                      = DocumentHighlightClass(self.document,self.query,self.source_type,self.start_highlight,
                                                               self.end_highlight,self.max_number_of_characters)
        expected = ["[[RANDOM]]","Big","Data","[[ENDRANDOM]]","Partnership's","participation","."]
        split_sentence = ["Big","Data","Partnership's","participation"]
        
        result = self.dhc._find_matches_per_sentence(split_sentence)
        self.assertEqual(expected,result)