Exemple #1
0
        len(nonzero) *
        my_percentage)  # retain top 'my_percentage' % words as keywords
    keywords['tfidf'].append([tuple[0] for tuple in nonzero[:numb_to_retain]])

    if counter % round(len(vectors_list) / 5) == 0:
        print(counter)

##########################
# performance comparison #
##########################

perf = dict(zip(method_names, [[], [], [], []]))

for idx, truth in enumerate(keywds_gold_standard):
    for mn in method_names:
        ### fill the gap (append to the 'perf[mn]' list by using the 'accuracy_metrics' function) ###
        # print(mn)
        # print(keywords[mn][idx])
        # print(truth)
        # print('')
        perf[mn].append(accuracy_metrics(keywords[mn][idx], truth))

lkgs = len(keywds_gold_standard)

# print macro-averaged results (averaged at the collection level)
for k, v in perf.items():
    print('')
    print(f' -> {k} performance: \n')
    print('precision:', round(100 * sum([tuple[0] for tuple in v]) / lkgs, 2))
    print('recall:', round(100 * sum([tuple[1] for tuple in v]) / lkgs, 2))
    print('F-1 score:', round(100 * sum([tuple[2] for tuple in v]) / lkgs, 2))
    # print progress
    if counter % round(len(vectors_list) / 10) == 0:
        print counter, 'vectors processed'

##########################
# performance evaluation #
##########################

perf_gow = []
perf_gow_w = []
perf_tfidf = []
perf_pr = []

for idx, truth in enumerate(keywords_gold_standard):
    perf_gow.append(accuracy_metrics(keywords_gow[idx], truth))
    perf_gow_w.append(accuracy_metrics(keywords_gow_w[idx], truth))
    perf_tfidf.append(accuracy_metrics(keywords_tfidf[idx], truth))
    perf_pr.append(accuracy_metrics(keywords_pr[idx], truth))

lkgs = len(keywords_gold_standard)

# macro-averaged results (averaged at the collection level)

results = {
    'gow': perf_gow,
    'gow_w': perf_gow_w,
    'tfidf': perf_tfidf,
    'pr': perf_pr
}
    nonzero = [tuple for tuple in terms_weights if tuple[1] != 0]
    nonzero = sorted(nonzero, key=operator.itemgetter(1),
                     reverse=True)  # in decreasing order
    numb_to_retain = int(
        len(nonzero) *
        my_percentage)  # retain top 'my_percentage' % words as keywords
    keywords['tfidf'].append([tuple[0] for tuple in nonzero[:numb_to_retain]])

    if counter % round(len(vectors_list) / 5) == 0:
        print(counter)

##########################
# performance comparison #
##########################

perf = dict(zip(method_names, [[], [], [], []]))

for idx, truth in enumerate(keywds_gold_standard):
    for mn in method_names:
        perf[mn].append(accuracy_metrics(keywords.get(mn)[idx], truth))
        ### fill the gap (append to the 'perf[mn]' list by using the 'accuracy_metrics' function) ###

lkgs = len(keywds_gold_standard)

# print macro-averaged results (averaged at the collection level)
for k, v in perf.items():
    print(k + ' performance: \n')
    print('precision:', round(100 * sum([tuple[0] for tuple in v]) / lkgs, 2))
    print('recall:', round(100 * sum([tuple[1] for tuple in v]) / lkgs, 2))
    print('F-1 score:', round(100 * sum([tuple[2] for tuple in v]) / lkgs, 2))
    print('\n')
Exemple #4
0
        print(counter)

# In[10]:

##########################
# performance comparison #
##########################

perf_kc = []
perf_tfidf = []
perf_pr = []

for idx, truth in enumerate(keywords_gold_standard):
    # use the 'accuracy_metrics' function

    perf_kc.append(accuracy_metrics(keywords_kc[idx], truth))
    perf_tfidf.append(accuracy_metrics(keywords_tfidf[idx], truth))
    perf_pr.append(accuracy_metrics(keywords_pr[idx], truth))

lkgs = len(keywords_gold_standard)

# print macro-averaged results (averaged at the collection level)
results = {'k-core': perf_kc, 'tfidf': perf_tfidf, 'PageRank': perf_pr}

for name, result in results.items():
    print(name + ' performance: \n')
    print('precision:',
          round(100 * sum([tuple[0] for tuple in result]) / lkgs, 2))
    print('recall:', round(100 * sum([tuple[1] for tuple in result]) / lkgs,
                           2))
    print('F-1 score:',
Exemple #5
0
    counter += 1
    if counter % 100 == 0:
        print counter, 'abstracts processed'

##########################
# performance evaluation #
##########################

perf_gow = []
perf_tfidf = []
perf_pr = []
perf_rand = []

for ind, truth in enumerate(keywords_gold_standard):
    perf_gow.append(accuracy_metrics(keywords_gow[ind], truth))
    perf_tfidf.append(accuracy_metrics(keywords_tfidf[ind], truth))
    perf_pr.append(accuracy_metrics(keywords_pr[ind], truth))
    perf_rand.append(accuracy_metrics(keywords_rand[ind], truth))

lkgs = len(keywords_gold_standard)

# macro-averaged results (averaged at the collection level)

results = {
    'gow': perf_gow,
    'tfidf': perf_tfidf,
    'pr': perf_pr,
    'rand': perf_rand
}