def test(): authorities = get_authorities() algo_ii.populate_iks_dict() report = open( 'report.txt', 'wb' ) data = open('papers30000.csv', 'r') reader = csv.reader(data, delimiter=',', quotechar='\"') total_hits = 0 count = 0 total_citations = 0 for row in reader: index = row[1] count += 1 citations = row[6].split(';') keywords = row[9].split(';') del keywords[-1] total_citations += len(citations) recommendations = algo_ii.predict_citations( keywords, authorities ) hits = 0 for citation in citations: if recommendations.has_key(citation): hits += 1 total_hits += hits print( index + " accuracy: " + str(hits) + "/" + str(len(citations)) + "\n" ) report.write(index + " accuracy: " + str(hits) + "/" + str(len(citations)) + "\n") if (count % 50) == 0: print( "temp is:" ) print( total_hits ) print( "out of " ) print( total_citations ) print( "achieved accuracy:" ) print( total_hits/float(total_citations) ) report.close() data.close()
def test_models( FULL_SIM, models_files ): test_papers = pd.read_csv( TEST_FILEPATH ) # NOTE: Only need for testing with AII: keywords_docsrels = populate_iks_dict() authorities = initialize_authorities() for mod_f in models_files: print( 'Testing '+ mod_f ) model = Doc2Vec.load( mod_f ) print( 'Model loaded.' ) test_model( FULL_SIM, model, test_papers, keywords_docsrels, authorities )
def specific_test(): import pandas as pd # NOTE: The first row of data-kw.csv should be the header, i.e.: # "INDEX","TITLE","AUTHORS","YEAR","PUB_VENUE","REF_ID","REF_NUM","ABSTRACT","KEYWORDS" papers = pd.read_csv( 'data-kw.csv' ) authorities = get_authorities() algo_ii.populate_iks_dict() print( "HUMAN EVAL #1:\n" ) _specific_test( papers, authorities, ['105542', '586892', '695628', '209104', '139162'] ) print( "\n=================================\n" ) print( "HUMAN EVAL #2:\n" ) _specific_test( papers, authorities, ['751328', '619377', '686318', '283022', '591411'] ) print( "\n=================================\n" ) print( "HUMAN EVAL #3:\n" ) _specific_test( papers, authorities, ['360556', '1022648', '1071218', '1112586', '451082'] ) print( "\n=================================\n" ) print( "HUMAN EVAL #4:\n" ) _specific_test( papers, authorities, ['90992', '784131', '1080100', '96640', '503999'] ) print( "\n=================================\n" )
title = list( papers[papers['INDEX'] == pid]['TITLE'] )[0] authors = list( papers[papers['INDEX'] == pid]['AUTHORS'] )[0] summary = list( papers[papers['INDEX'] == pid]['ABSTRACT'] )[0] year = list( papers[papers['INDEX'] == pid]['YEAR'] )[0] jso['title'] = title jso['authors'] = authors jso['summary'] = summary.replace('\n', ' ') jso['year'] = str(year) recs.append( jso ) except: # Probably an IndexError (which happens when 'pid' not in DB), but cover our bases pass #return json.dumps( recs ) return recs # NOTE: Not actually JSON; it's a list of dicts def load_papers(): import pandas as pd return pd.read_csv( 'data-kw.csv' ) if __name__ == '__main__': if 'tagmap1' in DOC2VEC_MODEL_FILE: USE_DBLP_IDS = False else: USE_DBLP_IDS = True d2v_model = initialize_d2v_model() keywords_docsrels = populate_iks_dict() authorities = initialize_authorities() papers = load_papers() serve( d2v_model, keywords_docsrels, authorities, papers )