def callSentenceGen_com(reply,k): #with codecs.open("strarf_serif.txt","rb","utf-8") as f: #with codecs.open("Japanese.0304.text.non-mentions.txt","rb","utf-8") as f: with codecs.open("strarf_serif.txt","rb","utf-8") as f: srctxt=[] for line in f: srctxt.append(line) srctxt.append(u"EOS") bytesrctxt=" ".join(srctxt) freq1=trigramModelGenerator.generateModel(srctxt) cpickler.topickle(freq1) keyword1=keywordext.extraction(reply,freq1) sentenceGen=sentenceGenerator.sentenceGenerator(freq1) sentence=sentenceGen.generateSentence(keyword1) return sentence
def generate_unigram_model(favs): try: freqfav = cpickler.frompickle(filename='favs_model_unigram.dump') except: freqfav = {} esc = [u'…', u'・', u'.', u',', u'、', u'。', u'!', u'?', u'!', u'?'] for favlist in favs: for fav in favlist: itemlist = mecabCaller.parse(fav.text) for itemraw in itemlist: item = [x for x in itemraw if x in esc] if item in freqfav: freqfav[item] += 1 else: freqfav[item] = 1 cpickler.topickle(freqfav, filename='favs_model_unigram.dump') return freqfav