def test(): authorities = get_authorities() algo_ii.populate_iks_dict() report = open( 'report.txt', 'wb' ) data = open('papers30000.csv', 'r') reader = csv.reader(data, delimiter=',', quotechar='\"') total_hits = 0 count = 0 total_citations = 0 for row in reader: index = row[1] count += 1 citations = row[6].split(';') keywords = row[9].split(';') del keywords[-1] total_citations += len(citations) recommendations = algo_ii.predict_citations( keywords, authorities ) hits = 0 for citation in citations: if recommendations.has_key(citation): hits += 1 total_hits += hits print( index + " accuracy: " + str(hits) + "/" + str(len(citations)) + "\n" ) report.write(index + " accuracy: " + str(hits) + "/" + str(len(citations)) + "\n") if (count % 50) == 0: print( "temp is:" ) print( total_hits ) print( "out of " ) print( total_citations ) print( "achieved accuracy:" ) print( total_hits/float(total_citations) ) report.close() data.close()
def _specific_test( papers, authorities, test_ids ): for pid in test_ids: print( 'ID = '+ pid ) pid = int(pid) abstract = list( papers[papers['INDEX'] == pid]['ABSTRACT'] )[0] print( abstract[:45] +'...' ) keywords = list( papers[papers['INDEX'] == pid]['KEYWORDS'] )[0].split(';') del keywords[-1] recommendations = algo_ii.predict_citations( keywords, authorities ) citations = list( papers[papers['INDEX'] == pid]['REF_ID'] )[0].split(';') hits = 0 for citation in citations: if recommendations.has_key(citation): hits += 1 print( 'Recommendations for '+ str(pid) +':' ) print( '> '+ str( sorted( recommendations.keys() ) ) ) print( 'Actuals:\n> '+ str( sorted(citations) ) ) print( 'Hits = '+ str(hits) +'\n' )