Example #1
0
    def OnExtract(self, events):
        text = self.sourcePage.GetValue().strip()
        keyword_result = ''
        result = ''
        if text != '':
            if self.languageType.GetSelection() == 1:
                sentences_percent = self.sentencesPercent.GetValue()
                similarity_function = self.similarityFunction.GetValue()
                print similarity_function
                extractor = Extractor(
                    stop_words_file='./TextRank/trainer/stopword_zh.data')
                keyword, keyphrase = extractor.keyword_train(text=text)
                abstract = extractor.sentence_train(
                    text,
                    sentences_percent=sentences_percent,
                    sim_func=similarity_function)

                keyword_result = '/'.join(keyword)
                keyword_result += '\n关键短语:\n' + '/'.join(keyphrase)
                result += '。'.join(abstract) + r'。'
                self.abstractPage.SetValue(result)
                #设置文本样式
                #f = wx.Font(10, wx.ROMAN, wx.NORMAL, wx.NORMAL, True)  #创建一个字体
                #self.abstractPage.SetStyle(0, len(result), wx.TextAttr('black',wx.NullColor,f))
                self.keywordPage.SetValue(keyword_result)
            else:
                art_type = self.articleType.GetSelection()
                extractor = EnExtractor(
                    stop_words_file='./TextRank/trainer/stopword_en.data')
                if art_type == 1:
                    keyphrase = extractor.keyphrase_train(
                        text, article_type='Abstract')
                    keyword_result = 'Keyphrases:\n' + '/'.join(keyphrase)
                else:
                    sentences_percent = self.sentencesPercent.GetValue()
                    similarity_function = self.similarityFunction.GetValue()
                    keyphrase = extractor.keyphrase_train(
                        text, article_type='Fulltext')
                    summary = extractor.summary_train(
                        text,
                        sentences_percent=sentences_percent,
                        sim_func=similarity_function)
                    keyword_result = '/'.join(keyphrase)
                    result += '   ' + ' '.join(summary)
                self.abstractPage.SetValue(result)
                #设置文本样式
                f = wx.Font(10, wx.ROMAN, wx.NORMAL, wx.NORMAL, True)  #创建一个字体
                self.abstractPage.SetStyle(
                    0, len(result), wx.TextAttr('black', wx.NullColor, f))

                self.keywordPage.SetValue(keyword_result)
        else:
            #test
            #sentences_percent = self.sentencesPercent.GetValue()
            #print filter(lambda x:x.isdigit(), sentences_percent)
            print "No article"
def get_keyphrases(basepath, filelist):
	correct_num = 0
	system_key_num = 0
	wrong_file = []
	for f in filelist:
		#print 'deal with:',f
		evaluation = EnExtractor()
		text = open(basepath+'/abstracts/'+f,'r').read()
		system_keyphrases = evaluation.keyphrase_train(text=text, article_type='Abstract')
		system_key_num += len(system_keyphrases)

		#tag_word = evaluation.get_tag(text=text)
		content = open(basepath+'/keywords/'+f,'r').read()
		mannual_keyphrases = content.split(';')
		'''test for 0 corret_num file'''
		tmp = eval_keyphrase(system_keyphrases,mannual_keyphrases)
		correct_num += tmp
		print 'file-',f,':',correct_num
		if tmp == 0:
			wrong_file.append(f)
	return correct_num ,system_key_num , wrong_file
Example #3
0
def get_keyphrases(basepath, filelist):
    correct_num = 0
    system_key_num = 0
    wrong_file = []
    for f in filelist:
        #print 'deal with:',f
        evaluation = EnExtractor()
        text = open(basepath + '/abstracts/' + f, 'r').read()
        system_keyphrases = evaluation.keyphrase_train(text=text,
                                                       article_type='Abstract')
        system_key_num += len(system_keyphrases)

        #tag_word = evaluation.get_tag(text=text)
        content = open(basepath + '/keywords/' + f, 'r').read()
        mannual_keyphrases = content.split(';')
        '''test for 0 corret_num file'''
        tmp = eval_keyphrase(system_keyphrases, mannual_keyphrases)
        correct_num += tmp
        print 'file-', f, ':', correct_num
        if tmp == 0:
            wrong_file.append(f)
    return correct_num, system_key_num, wrong_file
def get_summary(sentences):
    extractor = EnExtractor()
    summary = extractor.summary_train(sentences)
    return summary
def get_summary(sentences):
	extractor = EnExtractor()
	summary = extractor.summary_train(sentences)
	return summary