len(nonzero) * my_percentage) # retain top 'my_percentage' % words as keywords keywords['tfidf'].append([tuple[0] for tuple in nonzero[:numb_to_retain]]) if counter % round(len(vectors_list) / 5) == 0: print(counter) ########################## # performance comparison # ########################## perf = dict(zip(method_names, [[], [], [], []])) for idx, truth in enumerate(keywds_gold_standard): for mn in method_names: ### fill the gap (append to the 'perf[mn]' list by using the 'accuracy_metrics' function) ### # print(mn) # print(keywords[mn][idx]) # print(truth) # print('') perf[mn].append(accuracy_metrics(keywords[mn][idx], truth)) lkgs = len(keywds_gold_standard) # print macro-averaged results (averaged at the collection level) for k, v in perf.items(): print('') print(f' -> {k} performance: \n') print('precision:', round(100 * sum([tuple[0] for tuple in v]) / lkgs, 2)) print('recall:', round(100 * sum([tuple[1] for tuple in v]) / lkgs, 2)) print('F-1 score:', round(100 * sum([tuple[2] for tuple in v]) / lkgs, 2))
# print progress if counter % round(len(vectors_list) / 10) == 0: print counter, 'vectors processed' ########################## # performance evaluation # ########################## perf_gow = [] perf_gow_w = [] perf_tfidf = [] perf_pr = [] for idx, truth in enumerate(keywords_gold_standard): perf_gow.append(accuracy_metrics(keywords_gow[idx], truth)) perf_gow_w.append(accuracy_metrics(keywords_gow_w[idx], truth)) perf_tfidf.append(accuracy_metrics(keywords_tfidf[idx], truth)) perf_pr.append(accuracy_metrics(keywords_pr[idx], truth)) lkgs = len(keywords_gold_standard) # macro-averaged results (averaged at the collection level) results = { 'gow': perf_gow, 'gow_w': perf_gow_w, 'tfidf': perf_tfidf, 'pr': perf_pr }
nonzero = [tuple for tuple in terms_weights if tuple[1] != 0] nonzero = sorted(nonzero, key=operator.itemgetter(1), reverse=True) # in decreasing order numb_to_retain = int( len(nonzero) * my_percentage) # retain top 'my_percentage' % words as keywords keywords['tfidf'].append([tuple[0] for tuple in nonzero[:numb_to_retain]]) if counter % round(len(vectors_list) / 5) == 0: print(counter) ########################## # performance comparison # ########################## perf = dict(zip(method_names, [[], [], [], []])) for idx, truth in enumerate(keywds_gold_standard): for mn in method_names: perf[mn].append(accuracy_metrics(keywords.get(mn)[idx], truth)) ### fill the gap (append to the 'perf[mn]' list by using the 'accuracy_metrics' function) ### lkgs = len(keywds_gold_standard) # print macro-averaged results (averaged at the collection level) for k, v in perf.items(): print(k + ' performance: \n') print('precision:', round(100 * sum([tuple[0] for tuple in v]) / lkgs, 2)) print('recall:', round(100 * sum([tuple[1] for tuple in v]) / lkgs, 2)) print('F-1 score:', round(100 * sum([tuple[2] for tuple in v]) / lkgs, 2)) print('\n')
print(counter) # In[10]: ########################## # performance comparison # ########################## perf_kc = [] perf_tfidf = [] perf_pr = [] for idx, truth in enumerate(keywords_gold_standard): # use the 'accuracy_metrics' function perf_kc.append(accuracy_metrics(keywords_kc[idx], truth)) perf_tfidf.append(accuracy_metrics(keywords_tfidf[idx], truth)) perf_pr.append(accuracy_metrics(keywords_pr[idx], truth)) lkgs = len(keywords_gold_standard) # print macro-averaged results (averaged at the collection level) results = {'k-core': perf_kc, 'tfidf': perf_tfidf, 'PageRank': perf_pr} for name, result in results.items(): print(name + ' performance: \n') print('precision:', round(100 * sum([tuple[0] for tuple in result]) / lkgs, 2)) print('recall:', round(100 * sum([tuple[1] for tuple in result]) / lkgs, 2)) print('F-1 score:',
counter += 1 if counter % 100 == 0: print counter, 'abstracts processed' ########################## # performance evaluation # ########################## perf_gow = [] perf_tfidf = [] perf_pr = [] perf_rand = [] for ind, truth in enumerate(keywords_gold_standard): perf_gow.append(accuracy_metrics(keywords_gow[ind], truth)) perf_tfidf.append(accuracy_metrics(keywords_tfidf[ind], truth)) perf_pr.append(accuracy_metrics(keywords_pr[ind], truth)) perf_rand.append(accuracy_metrics(keywords_rand[ind], truth)) lkgs = len(keywords_gold_standard) # macro-averaged results (averaged at the collection level) results = { 'gow': perf_gow, 'tfidf': perf_tfidf, 'pr': perf_pr, 'rand': perf_rand }