for abstract in abstracts_cleaned: gs.append(terms_to_graph(abstract, window_size)) ################################## # graph-based keyword extraction # ################################## print('\n -> Graph based keyword extraction \n') my_percentage = 0.33 # for PR and TF-IDF method_names = ['kc', 'wkc', 'pr', 'tfidf'] keywords = dict(zip(method_names, [[], [], [], []])) for counter, g in enumerate(gs): # k-core core_numbers = core_dec(g, False) # core_numbers = dict(zip(g.vs['name'], g.coreness())) ### fill the gaps (retain main core as keywords and append the resulting list to 'keywords['kc']') ### max_c_n = max(core_numbers.values()) keywords['kc'].append( [kwd for kwd, c_n in core_numbers.items() if c_n == max_c_n]) # weighted k-core ### fill the gaps (repeat the procedure used for k-core) ### core_numbers = core_dec(g, True) max_c_n = max(core_numbers.values()) keywords['wkc'].append( [kwd for kwd, c_n in core_numbers.items() if c_n == max_c_n]) # PageRank
# number of edges print(len(g.es)) # the number of nodes should be equal to the number of unique terms len(g.vs) == len(set(my_tokens)) edge_weights = [] for edge in g.es: source = g.vs[edge.source]['name'] target = g.vs[edge.target]['name'] weight = edge['weight'] edge_weights.append([source, target, weight]) print(edge_weights) for w in range(2, 30): g = terms_to_graph(my_tokens, w) ### fill the gap (print density of g) ### print(g.density()) # decompose g core_numbers = core_dec(g, False) print("\n", core_numbers) ### fill the gap (compare 'core_numbers' with the output of the .coreness() igraph method) ### # retain main core as keywords max_c_n = max(core_numbers.values()) keywords = [kwd for kwd, c_n in core_numbers.items() if c_n == max_c_n] print(keywords) print(g.coreness())
print(edge_weights) layout = g.layout("kk") visual_style = {} visual_style["vertex_size"] = 20 visual_style["vertex_label"] = g.vs["name"] visual_style["edge_width"] = [ 1 + 2 * int(is_formal) for is_formal in g.es['weight'] ] visual_style["layout"] = layout visual_style["bbox"] = (300, 300) visual_style["margin"] = 20 plot(g, **visual_style) for w in range(2, 10): g = terms_to_graph(my_tokens, w) ## fill the gap (print density of g) ### print('The density with a window of size {} is: {}'.format(w, g.density())) # decompose g g = terms_to_graph(my_tokens, 4) core_numbers = core_dec(g, True) print(core_numbers) ### fill the gap (compare 'core_numbers' with the output of the .coreness() igraph method) ### print(g.coreness()) # retain main core as keywords max_c_n = max(core_numbers.values()) keywords = [kwd for kwd, c_n in core_numbers.items() if c_n == max_c_n] print(keywords)
gs = [] for abstract in abstracts_cleaned: gs.append(terms_to_graph(abstract, 4)) ################################## # graph-based keyword extraction # ################################## my_percentage = 0.33 # for PR and TF-IDF method_names = ['kc', 'wkc', 'pr', 'tfidf'] keywords = dict(zip(method_names, [[], [], [], []])) for counter, g in enumerate(gs): # k-core kcore = core_dec(g, False) core_numbers = list(kcore.items()) keywords['kc'].append([ tuple[0] for tuple in core_numbers if tuple[1] == max(kcore.values()) ]) # weighted k-core ### fill the gaps (repeat the procedure used for k-core) ### wkcore = core_dec(g, True) weighted_core_numbers = list(wkcore.items()) keywords['wkc'].append([ tuple[0] for tuple in weighted_core_numbers if tuple[1] == max(wkcore.values()) ])