def evaluate_all_systems(p_dictoffolderpaths, p_run_all_systems=True): if p_run_all_systems: Task1.execute_system('') Task2.execute_system('') Task3A.execute_system('') Task3B.execute_system('') print "Evaluating all systems..." create_directory(ldirpath) lquerydict = get_given_queries_in_dict(CACM_QUERY_FILE + FILE_EXT) lquerydict = get_sorted_dict(lquerydict) lallsystemsmeanvaluesdict = {} lallsystemsavgprecisionvaluesdict = {} for lkey, lvalue in p_dictoffolderpaths.iteritems(): lsystemname = get_system_name(lkey) print "Evaluating system: " + lsystemname lsystemmeanvaluesdict = {} ldictofavgprecisionvalues = {} llistofprecisionandrecallvalues = [ "Query Id,DocId,Rank,Precision,Recall" ] llistofpatkvalues = ["Query Id,P@5,P@20"] evaluate_system( lvalue, # results folder path to evaluate lquerydict, # dictionary containing all queries with query id lsystemmeanvaluesdict, # dictionary to hold the mean values for all systems ldictofavgprecisionvalues, # dictionary to hold avg precision values of systems llistofprecisionandrecallvalues, # results of precision recall values for all queries llistofpatkvalues # list to hold pat5 and pat20 values for all queries for this system ) # evaluate_system.. lallsystemsmeanvaluesdict[lkey] = lsystemmeanvaluesdict # ldictofavgprecisionvalues = get_sorted_dict(ldictofavgprecisionvalues) lallsystemsavgprecisionvaluesdict[lkey] = ldictofavgprecisionvalues # print "Writing Precision and Recall values for system: " + lsystemname llfilename = ldirpath + "/" + FILE_FOR_PRECISON_RECALL_RESULTS_OF_SYSTEM + "_" + lsystemname + CSV_FILE_EXT create_file(llfilename, '') convert_data_from_collection_to_file(llfilename, llistofprecisionandrecallvalues) # print "Writing P@5 and P@20 values for system: " + lsystemname llfilename = ldirpath + "/" + FILE_FOR_PATK_RESULTS_OF_SYSTEM + "_" + lsystemname + CSV_FILE_EXT create_file(llfilename, '') convert_data_from_collection_to_file(llfilename, llistofpatkvalues) # print "Writing mean values to file" llistofmeanvalues = ["System,MAP,MRR,P@5,P@20"] for lkey, lvalue in lallsystemsmeanvaluesdict.iteritems(): lsystemname = get_system_name(lkey) lstr = lsystemname + "," + str(lvalue[MAP_CONST]) + "," + str(lvalue[MRR_CONST]) + \ "," + str(lvalue[PAT5_CONST]) + "," + str(lvalue[PAT20_CONST]) llistofmeanvalues.append(lstr) lfilename = ldirpath + "/" + FILE_FOR_ALL_SYSTEMS_MEAN_VALUES + CSV_FILE_EXT create_file(lfilename, '') convert_data_from_collection_to_file(lfilename, llistofmeanvalues) # print "Writing average precision values to file" llistavgprecisionresults = ["System,Query Id,Average Precision"] for lkey, lvalue in lallsystemsavgprecisionvaluesdict.iteritems(): lsystemname = get_system_name(lkey) for ljkey, ljvalue in lvalue.iteritems(): lstr = lsystemname + "," + str(ljkey) + "," + str(ljvalue) llistavgprecisionresults.append(lstr) lfilename = ldirpath + "/" + FILE_FOR_ALL_SYSTEMS_AVG_PRECISION_VALUES + CSV_FILE_EXT create_file(lfilename, '') convert_data_from_collection_to_file(lfilename, llistavgprecisionresults) # print "Run t-tests for models" run_tests_for_models(lallsystemsavgprecisionvaluesdict, len(lquerydict))
# read command line params for stemmed data file lstemmeddatafile = CACM_STEM_FILE + FILE_EXT if len(input_arguments) > 2: lstemmeddatafile = input_arguments[2] # Tokenize raw text tokenize_raw_data(ldatafilesdir) # Write given queries to a file create_directory(DIR_FOR_OUTPUT_FILES) write_given_queries_to_file( CACM_QUERY_FILE + FILE_EXT, DIR_FOR_OUTPUT_FILES + "/" + FILE_FOR_QUERIES + FILE_EXT) Task1.execute_system(ldatafilesdir) Task2.execute_system(ldatafilesdir) Task3A.execute_system(ldatafilesdir) Task3B.execute_system(lstemmeddatafile) ldictoffolderpaths = {} ldictoffolderpaths[ 1] = DIR_FOR_OUTPUT_FILES + "/" + TASK1_CONST + "/" + DIR_FOR_BM25_OUTPUT ldictoffolderpaths[ 2] = DIR_FOR_OUTPUT_FILES + "/" + TASK1_CONST + "/" + DIR_FOR_TFIDF_OUTPUT ldictoffolderpaths[3] = LUCENE + "/" + LUCENE_RESULTS ldictoffolderpaths[ 4] = DIR_FOR_OUTPUT_FILES + "/" + TASK2_CONST + "/" + DIR_FOR_BM25_OUTPUT ldictoffolderpaths[ 5] = DIR_FOR_OUTPUT_FILES + "/" + TASK2_CONST + "/" + DIR_FOR_TFIDF_OUTPUT ldictoffolderpaths[ 6] = DIR_FOR_OUTPUT_FILES + "/" + TASK3A_CONST + "/" + DIR_FOR_BM25_OUTPUT