def test_aHighlightingLS(self): highlighter = Highlights() for corpus in self.__getCorpusList(os.path.join(os.path.dirname(__file__), "yelp-beijing-restaurant.csv")) : highlighter.addCorpus(corpus) print highlighter.getTopWords(numWords=3, num=50) print highlighter.getTopWords(numWords=4, num=50) print "----------------------------------------"
def testHighlighting(self): highlighter = Highlights() highlighter.addCorpus("once upon a time was great people") phrases = highlighter.getTopWords(numWords=1, num=5) self.assertTrue(len(phrases) == 0 ) highlighter.addCorpus("There once was a cow named joe great people") phrases = highlighter.getTopWords(numWords=1, num=5) print phrases self.assertTrue(len(phrases) == 5) self.assertTrue(phrases[0] == self.STEMMER.stem("once")) self.assertTrue(phrases[1] == self.STEMMER.stem("great")) self.assertTrue(phrases[2] == self.__getHighlightSequence([self.STEMMER.stem("great"), self.STEMMER.stem("people")])) self.assertTrue(phrases[3] == self.STEMMER.stem("was")) self.assertTrue(phrases[4] == self.STEMMER.stem("people")) highlighter.addCorpus("Joe was a funny man but he really liked using Groupon because of the Customer service with great people") phrases = highlighter.getTopWords(numWords=1, num=5) print phrases self.assertTrue(len(phrases) == 5) self.assertTrue(phrases[0] == self.STEMMER.stem("great")) self.assertTrue(phrases[1] == self.STEMMER.stem("was")) self.assertTrue(phrases[2] == self.__getHighlightSequence([self.STEMMER.stem("great"), self.STEMMER.stem("people")])) self.assertTrue(phrases[3] == self.STEMMER.stem("people")) self.assertTrue(phrases[4] == self.STEMMER.stem("once")) highlighter.addCorpus("Groupon's customer service was fantastic!") phrases = highlighter.getTopWords(numWords=1, num=5) print phrases self.assertTrue(len(phrases) == 5) self.assertTrue(phrases[0] == self.STEMMER.stem("was")) self.assertTrue(phrases[1] == self.STEMMER.stem("great")) self.assertTrue(phrases[2] == self.__getHighlightSequence([self.STEMMER.stem("great"), self.STEMMER.stem("people")])) self.assertTrue(phrases[3] == self.STEMMER.stem("people")) self.assertTrue(phrases[4] == self.STEMMER.stem("once")) phrases = highlighter.getTopWords(numWords=2, num=5) print phrases self.assertTrue(len(phrases) == 2) self.assertTrue(phrases[0] == self.__getHighlightSequence([self.STEMMER.stem("great"), self.STEMMER.stem("people")])) self.assertTrue(phrases[1] == self.__getHighlightSequence([self.STEMMER.stem("customer"), self.STEMMER.stem("service")]))