def test_generateModel_SpaceSaving(self): time_start=datetime.now() modelgen=trigramModelGenerator.modelgenerator() gen=modelgen.GeneratorForTrigram(self.itemlistsentence) result_default=modelgen.nativecounting(gen) time_defaultEnded=datetime.now() modelgen2=trigramModelGenerator.modelgenerator() gen2=modelgen2.GeneratorForTrigram(self.itemlistsentence) result_SS=modelgen2.SpaceSaving(gen2,10000) result_cj2freq=modelgen2.cjtofreq(result_SS.items()) time_SSEnded=datetime.now() print time_start,time_defaultEnded,time_SSEnded time_default=time_defaultEnded - time_start time_SS=time_SSEnded - time_defaultEnded print 'Default,SS' print time_default,time_SS
def test_unigramtest3(self): itemlist=["A","A","B","A","C"] modelgen=trigramModelGenerator.modelgenerator() gen=modelgen.GeneratorForUnigram(itemlist) SS=modelgen.SpaceSaving(gen,3) self.assertEqual(modelgen.minimum,min(modelgen.cj.values())) valueslist=modelgen.buckets.values() self.assertTrue(all(len(x)>0 for x in valueslist)) print valueslist
def callSentenceGen(reply,k): with codecs.open("strarf_serif.txt","rb","utf-8") as f: srctxt=[] for line in f: srctxt.append(line) srctxt.append(u"EOS") modelgen=trigramModelGenerator.modelgenerator() gen=modelgen.GeneratorForUnigram(self.itemlist) SS=modelgen.SpaceSaving(gen,k) freq1=trigramModelGenerator.generateModel(srctxt) keyword1=keywordext.extraction(reply.text,freq1) sentenceGen=sentenceGenerator.sentenceGenerator(freq1) sentence=sentenceGen.generateSentence(keyword1) return sentence
def test_unigramtest2(self): itemlist=["B","A","C","D","B","C","C","A","D","A","C","B","C","D"] modelgen=trigramModelGenerator.modelgenerator() gen=modelgen.GeneratorForUnigram(itemlist) SS=modelgen.SpaceSaving(gen,3) self.assertEqual(modelgen.minimum,min(modelgen.cj.values()))
def test_generateModel_SpaceSavingUsingGenerator(self): modelgenunigram=trigramModelGenerator.modelgenerator() unigramgen=modelgenunigram.GeneratorForUnigram(self.itemlist) SS=modelgenunigram.SpaceSaving(unigramgen,3) print modelgenunigram.buckets self.assertEqual(modelgenunigram.minimum,7)