class EnExtractor(object): """英文文摘提取器""" def __init__(self, stop_words_file = './TextRank/trainer/stopword_en.data'): super(EnExtractor, self).__init__() self.keyphrase_extraction = EnKeywordExtraction(stop_words_file=stop_words_file) self.summary_extraction = EnSentenceExtraction(stop_words_file=stop_words_file) def keyphrase_train(self,text,article_type='Abstract'): self.keyphrase_extraction.train(text=text,lower=True) keyphrase = self.keyphrase_extraction.get_keyphrases_maximal(article_type=article_type) #print self.get_tag(text) return keyphrase def summary_train(self,text,sentences_percent='default', sim_func='wordnet',num=100): self.summary_extraction.train(text=text, sim_func=sim_func) if sentences_percent == 'default': summary = self.summary_extraction.get_key_sentences_100w() else: summary = self.summary_extraction.get_key_sentences(sentences_percent=sentences_percent) return summary def get_tag(self,text): return self.keyphrase_extraction.get_tag(text)
def __init__(self, stop_words_file = './TextRank/trainer/stopword_en.data'): super(EnExtractor, self).__init__() self.keyphrase_extraction = EnKeywordExtraction(stop_words_file=stop_words_file) self.summary_extraction = EnSentenceExtraction(stop_words_file=stop_words_file)