def analyze(self, text, window=2, lower=False, vertex_source='all_filters', edge_source='no_stop_words', pagerank_config={ 'alpha': 0.85, }): """分析文本 Keyword arguments: text -- 文本内容,字符串。 window -- 窗口大小,int,用来构造单词之间的边。默认值为2。 lower -- 是否将文本转换为小写。默认为False。 vertex_source -- 选择使用words_no_filter, words_no_stop_words, words_all_filters中的哪一个来构造pagerank对应的图中的节点。 默认值为`'all_filters'`,可选值为`'no_filter', 'no_stop_words', 'all_filters'`。关键词也来自`vertex_source`。 edge_source -- 选择使用words_no_filter, words_no_stop_words, words_all_filters中的哪一个来构造pagerank对应的图中的节点之间的边。 默认值为`'no_stop_words'`,可选值为`'no_filter', 'no_stop_words', 'all_filters'`。边的构造要结合`window`参数。 """ # self.text = util.as_text(text) self.text = text self.word_index = {} self.index_word = {} self.keywords = [] self.graph = None result = self.seg.segment(text=text, lower=lower) self.sentences = result.sentences self.words_no_filter = result.words_no_filter self.words_no_stop_words = result.words_no_stop_words self.words_all_filters = result.words_all_filters util.debug(20 * '*') util.debug('self.sentences in TextRank4Keyword:\n', ' || '.join(self.sentences)) util.debug('self.words_no_filter in TextRank4Keyword:\n', self.words_no_filter) util.debug('self.words_no_stop_words in TextRank4Keyword:\n', self.words_no_stop_words) util.debug('self.words_all_filters in TextRank4Keyword:\n', self.words_all_filters) options = ['no_filter', 'no_stop_words', 'all_filters'] if vertex_source in options: _vertex_source = result['words_' + vertex_source] else: _vertex_source = result['words_all_filters'] if edge_source in options: _edge_source = result['words_' + edge_source] else: _edge_source = result['words_no_stop_words'] self.keywords = util.sort_words(_vertex_source, _edge_source, window=window, pagerank_config=pagerank_config)
def analyze(self, text,#对关键词进行网络计算 window=2, lower=False, vertex_source='all_filters', edge_source='no_stop_words', pagerank_config={'alpha': 0.85, }): #text - - 文本内容,字符串。 #window - - 窗口大小,int,用来构造单词之间的边。默认值为2。 #lower - - 是否将文本转换为小写。默认为False。 # self.text = util.as_text(text) self.text = text self.word_index = {} self.index_word = {} self.keywords = [] self.graph = None result = self.seg.segment(text=text, lower=lower) self.sentences = result.sentences self.words_no_filter = result.words_no_filter self.words_no_stop_words = result.words_no_stop_words self.words_all_filters = result.words_all_filters options = ['no_filter', 'no_stop_words', 'all_filters'] if vertex_source in options: _vertex_source = result['words_' + vertex_source] else: _vertex_source = result['words_all_filters'] if edge_source in options: _edge_source = result['words_' + edge_source] else: _edge_source = result['words_no_stop_words'] self.keywords = util.sort_words(_vertex_source, _edge_source, window=window, pagerank_config=pagerank_config)