Esempio n. 1
0
class EnExtractor(object):
	"""英文文摘提取器"""
	def __init__(self, stop_words_file = './TextRank/trainer/stopword_en.data'):
		super(EnExtractor, self).__init__()
		self.keyphrase_extraction = EnKeywordExtraction(stop_words_file=stop_words_file)
		self.summary_extraction = EnSentenceExtraction(stop_words_file=stop_words_file)

	def keyphrase_train(self,text,article_type='Abstract'):
		self.keyphrase_extraction.train(text=text,lower=True)
		keyphrase = self.keyphrase_extraction.get_keyphrases_maximal(article_type=article_type)
		#print self.get_tag(text)
		return keyphrase

	def summary_train(self,text,sentences_percent='default', sim_func='wordnet',num=100):
		self.summary_extraction.train(text=text, sim_func=sim_func)
		if sentences_percent == 'default':
			summary = self.summary_extraction.get_key_sentences_100w()
		else:
			summary = self.summary_extraction.get_key_sentences(sentences_percent=sentences_percent)
		return summary

	def get_tag(self,text):
		return self.keyphrase_extraction.get_tag(text)
Esempio n. 2
0
	def __init__(self, stop_words_file = './TextRank/trainer/stopword_en.data'):
		super(EnExtractor, self).__init__()
		self.keyphrase_extraction = EnKeywordExtraction(stop_words_file=stop_words_file)
		self.summary_extraction = EnSentenceExtraction(stop_words_file=stop_words_file)