Example #1
0
for abstract in abstracts_cleaned:
    gs.append(terms_to_graph(abstract, window_size))

##################################
# graph-based keyword extraction #
##################################

print('\n -> Graph based keyword extraction \n')
my_percentage = 0.33  # for PR and TF-IDF

method_names = ['kc', 'wkc', 'pr', 'tfidf']
keywords = dict(zip(method_names, [[], [], [], []]))

for counter, g in enumerate(gs):
    # k-core
    core_numbers = core_dec(g, False)
    # core_numbers = dict(zip(g.vs['name'], g.coreness()))

    ### fill the gaps (retain main core as keywords and append the resulting list to 'keywords['kc']') ###
    max_c_n = max(core_numbers.values())
    keywords['kc'].append(
        [kwd for kwd, c_n in core_numbers.items() if c_n == max_c_n])

    # weighted k-core
    ### fill the gaps (repeat the procedure used for k-core) ###
    core_numbers = core_dec(g, True)
    max_c_n = max(core_numbers.values())
    keywords['wkc'].append(
        [kwd for kwd, c_n in core_numbers.items() if c_n == max_c_n])

    # PageRank
Example #2
0
# number of edges
print(len(g.es))

# the number of nodes should be equal to the number of unique terms
len(g.vs) == len(set(my_tokens))

edge_weights = []
for edge in g.es:
    source = g.vs[edge.source]['name']
    target = g.vs[edge.target]['name']
    weight = edge['weight']
    edge_weights.append([source, target, weight])

print(edge_weights)

for w in range(2, 30):
    g = terms_to_graph(my_tokens, w)
    ### fill the gap (print density of g) ###
    print(g.density())
# decompose g
core_numbers = core_dec(g, False)
print("\n", core_numbers)

### fill the gap (compare 'core_numbers' with the output of the .coreness() igraph method) ###

# retain main core as keywords
max_c_n = max(core_numbers.values())
keywords = [kwd for kwd, c_n in core_numbers.items() if c_n == max_c_n]
print(keywords)
print(g.coreness())
Example #3
0
print(edge_weights)
layout = g.layout("kk")
visual_style = {}
visual_style["vertex_size"] = 20
visual_style["vertex_label"] = g.vs["name"]
visual_style["edge_width"] = [
    1 + 2 * int(is_formal) for is_formal in g.es['weight']
]
visual_style["layout"] = layout
visual_style["bbox"] = (300, 300)
visual_style["margin"] = 20
plot(g, **visual_style)

for w in range(2, 10):
    g = terms_to_graph(my_tokens, w)
    ## fill the gap (print density of g) ###
    print('The density with a window of size {} is: {}'.format(w, g.density()))

# decompose g
g = terms_to_graph(my_tokens, 4)
core_numbers = core_dec(g, True)
print(core_numbers)

### fill the gap (compare 'core_numbers' with the output of the .coreness() igraph method) ###
print(g.coreness())
# retain main core as keywords
max_c_n = max(core_numbers.values())
keywords = [kwd for kwd, c_n in core_numbers.items() if c_n == max_c_n]
print(keywords)
gs = []
for abstract in abstracts_cleaned:
    gs.append(terms_to_graph(abstract, 4))

##################################
# graph-based keyword extraction #
##################################

my_percentage = 0.33  # for PR and TF-IDF

method_names = ['kc', 'wkc', 'pr', 'tfidf']
keywords = dict(zip(method_names, [[], [], [], []]))

for counter, g in enumerate(gs):
    # k-core
    kcore = core_dec(g, False)
    core_numbers = list(kcore.items())

    keywords['kc'].append([
        tuple[0] for tuple in core_numbers if tuple[1] == max(kcore.values())
    ])
    # weighted k-core
    ### fill the gaps (repeat the procedure used for k-core) ###
    wkcore = core_dec(g, True)
    weighted_core_numbers = list(wkcore.items())

    keywords['wkc'].append([
        tuple[0] for tuple in weighted_core_numbers
        if tuple[1] == max(wkcore.values())
    ])