def OnExtract(self, events): text = self.sourcePage.GetValue().strip() keyword_result = '' result = '' if text != '': if self.languageType.GetSelection() == 1: sentences_percent = self.sentencesPercent.GetValue() similarity_function = self.similarityFunction.GetValue() print similarity_function extractor = Extractor( stop_words_file='./TextRank/trainer/stopword_zh.data') keyword, keyphrase = extractor.keyword_train(text=text) abstract = extractor.sentence_train( text, sentences_percent=sentences_percent, sim_func=similarity_function) keyword_result = '/'.join(keyword) keyword_result += '\n关键短语:\n' + '/'.join(keyphrase) result += '。'.join(abstract) + r'。' self.abstractPage.SetValue(result) #设置文本样式 #f = wx.Font(10, wx.ROMAN, wx.NORMAL, wx.NORMAL, True) #创建一个字体 #self.abstractPage.SetStyle(0, len(result), wx.TextAttr('black',wx.NullColor,f)) self.keywordPage.SetValue(keyword_result) else: art_type = self.articleType.GetSelection() extractor = EnExtractor( stop_words_file='./TextRank/trainer/stopword_en.data') if art_type == 1: keyphrase = extractor.keyphrase_train( text, article_type='Abstract') keyword_result = 'Keyphrases:\n' + '/'.join(keyphrase) else: sentences_percent = self.sentencesPercent.GetValue() similarity_function = self.similarityFunction.GetValue() keyphrase = extractor.keyphrase_train( text, article_type='Fulltext') summary = extractor.summary_train( text, sentences_percent=sentences_percent, sim_func=similarity_function) keyword_result = '/'.join(keyphrase) result += ' ' + ' '.join(summary) self.abstractPage.SetValue(result) #设置文本样式 f = wx.Font(10, wx.ROMAN, wx.NORMAL, wx.NORMAL, True) #创建一个字体 self.abstractPage.SetStyle( 0, len(result), wx.TextAttr('black', wx.NullColor, f)) self.keywordPage.SetValue(keyword_result) else: #test #sentences_percent = self.sentencesPercent.GetValue() #print filter(lambda x:x.isdigit(), sentences_percent) print "No article"
def get_keyphrases(basepath, filelist): correct_num = 0 system_key_num = 0 wrong_file = [] for f in filelist: #print 'deal with:',f evaluation = EnExtractor() text = open(basepath+'/abstracts/'+f,'r').read() system_keyphrases = evaluation.keyphrase_train(text=text, article_type='Abstract') system_key_num += len(system_keyphrases) #tag_word = evaluation.get_tag(text=text) content = open(basepath+'/keywords/'+f,'r').read() mannual_keyphrases = content.split(';') '''test for 0 corret_num file''' tmp = eval_keyphrase(system_keyphrases,mannual_keyphrases) correct_num += tmp print 'file-',f,':',correct_num if tmp == 0: wrong_file.append(f) return correct_num ,system_key_num , wrong_file
def get_keyphrases(basepath, filelist): correct_num = 0 system_key_num = 0 wrong_file = [] for f in filelist: #print 'deal with:',f evaluation = EnExtractor() text = open(basepath + '/abstracts/' + f, 'r').read() system_keyphrases = evaluation.keyphrase_train(text=text, article_type='Abstract') system_key_num += len(system_keyphrases) #tag_word = evaluation.get_tag(text=text) content = open(basepath + '/keywords/' + f, 'r').read() mannual_keyphrases = content.split(';') '''test for 0 corret_num file''' tmp = eval_keyphrase(system_keyphrases, mannual_keyphrases) correct_num += tmp print 'file-', f, ':', correct_num if tmp == 0: wrong_file.append(f) return correct_num, system_key_num, wrong_file
def get_summary(sentences): extractor = EnExtractor() summary = extractor.summary_train(sentences) return summary