def main(): # parse args args = parseArgs() # execute AIs process1 = subprocess.Popen(['./' + args['program1'], str(args['port1'])], cwd='../',\ bufsize=0) process2 = subprocess.Popen(['./' + args['program2'], str(args['port2'])], cwd='../',\ bufsize=0) time.sleep(3) # create player rule = const.RULE_FREESTYLE if args[ 'rule'] == 'freestyle' else const.RULE_RENJU_BASIC player1 = player.Player(args['port1'], rule, args['program1_level']) player2 = player.Player(args['port2'], rule, args['program2_level']) # compare comp = compare.Compare(player1, player2, args['rounds']) comp.run() # terminate AI programs process1.terminate() process2.terminate() # print result printResult(args, player1, player2)
def main(): log.configLog() compareOb = compare.Compare() compareOb.doCompare() difFileInfo = copy.deepcopy(compareOb.difFileInfo) setResultPage( configure.oldPath, configure.newPath,difFileInfo) upFile()
def do_boxplots(df_in, splitting_fields, compare_fields): """ For every splitting_field, partitions data into high and low groups by dividing at the value closest to the median that is not equal to either max or min. Then performs statistical comparisons for each compare_field. A sorted df with means, t-test, MWW, and Cohen's d is returned. """ #stats = [] #for feature in df.columns.drop('y'): stats = [] for sfield in splitting_fields: for cfield in compare_fields: print('PROCESSING: ' + sfield + ' ' + cfield) if sfield == cfield: continue if (sfield not in df_in.columns) or (cfield not in df_in.columns): continue df = df_in[['root', sfield, cfield]].dropna() amax = df[sfield].max() amin = df[sfield].min() amedian = df[sfield].median() if amin == amax: print('WARNING: no variation in splitting field ' + sfield) if amedian == amin: amedian += 0.0001 elif amedian == amax: amedian -= 0.0001 HI = df[['truth_val', 'root', cfield]][df[sfield] > amedian].groupby('root').mean() LO = df[['root', cfield]][df[sfield] <= amedian].groupby('root').mean() comp = compare.Compare(HI, LO, label_=cfield, x_label_=sfield + '_HI', y_label_=sfield + '_LO') stats_f = comp.calc_stats() #plt.boxplot((HI,LO)) title = cfield + ' partitioned by ' + sfield + '\nMWW $\mu$ = ' title += '' # TODO HI_label = sfield + '_HI\n' HI_label += cfield + ' $\mu$ = ' + '%.2f' % stats_f[0] LO_label = sfield + '_LO\n' LO_label += cfield + ' $\mu$ = ' + '%.2f' % stats_f[1] plt.xticks([1, 2], [HI_label, LO_label]) #f = comp.plot_all() #plt.savefig(compare.label + '_plot.png',dpi=70) #f.savefig(comp.label + '_plot.png',dpi=70) plt.show()
def do_stats(df_X,y): """ For every column in df_X, compares truthful and bluffing groups using compare. A df with means, t-test, MWW, and Cohen's d is returned. """ stats = [] for feature in df_X.columns: A = df_X.ix[y,feature].dropna() B = df_X.ix[~y,feature].dropna() comp = compare.Compare(A,B,label_=feature, x_label_='Truthful',y_label_='Lying') stats_f = comp.calc_stats() if(feature == 'smile'): comp.plot_all() stats.append(stats_f) df_stats = pd.DataFrame(stats,index=df_X.columns, columns=['mean T','mean B','t-test','MWW','Cohens d']) df_stats.sort_values(by=['MWW'],ascending=True,inplace=True) #display(df_stats) return df_stats
def do_stats(df_in, splitting_fields, compare_fields): """ For every splitting_field, partitions data into high and low groups by dividing at the value closest to the median that is not equal to either max or min. Then performs statistical comparisons for each compare_field. A sorted df with means, t-test, MWW, and Cohen's d is returned. """ #stats = [] #for feature in df.columns.drop('y'): stats = [] for sfield in splitting_fields: for cfield in compare_fields: print('PROCESSING: ' + sfield + ' ' + cfield) if sfield == cfield: continue if (sfield not in df_in.columns) or (cfield not in df_in.columns): continue df = df_in[['truth_val', 'root', sfield, cfield]].dropna() amax = df[sfield].max() amin = df[sfield].min() amedian = df[sfield].median() if amin == amax: print('WARNING: no variation in splitting field ' + sfield) if amedian == amin: amedian += 0.0001 elif amedian == amax: amedian -= 0.0001 hi_quant = df[sfield].quantile(.75) lo_quant = df[sfield].quantile(.25) #HI = df[['root','truth_val',cfield]][df[sfield] > df[sfield].quantile(.75)].groupby('root').mean() #LO = df[['root','truth_val',cfield]][df[sfield] <= df[sfield].quantile(.25)].groupby('root').mean() HI = df[['root', 'truth_val', cfield]][df[sfield] > amedian].groupby('root').mean() LO = df[['root', 'truth_val', cfield]][df[sfield] <= amedian].groupby('root').mean() comp = compare.Compare(HI[cfield][HI['truth_val'] == 1], HI[cfield][HI['truth_val'] == 0], label_=cfield, x_label_=sfield + '_HIQ_T', y_label_=sfield + '_HIQ_B') stats_f = comp.calc_stats() #comp.plot_all() stats.append([sfield + '_HI', cfield] + list(stats_f)) comp = compare.Compare(LO[cfield][LO['truth_val'] == 1], LO[cfield][LO['truth_val'] == 0], label_=cfield, x_label_=sfield + '_LOQ_T', y_label_=sfield + '_LOQ_B') stats_f = comp.calc_stats() #comp.plot_all() stats.append([sfield + '_LO', cfield] + list(stats_f) + [hi_quant, lo_quant]) print(stats) #df_stats = pd.DataFrame(stats,columns=['sfield','cfield','mean T','mean B','t-test','MWW','Cohens d']) df_stats = pd.DataFrame(stats, columns=[ 'sfield', 'cfield', 'mean T', 'mean B', 't-test', 'MWW', 'Cohens d', 'HI quant', 'LO quant' ]) df_stats.sort_values(by=['MWW'], ascending=True, inplace=True) display(df_stats) return df_stats
import os from os import path import compare gen_file_dir = '../bin/interpreters' result_dir = 'test_result' log_file = result_dir + '/log.txt' gen_file_list = os.listdir(gen_file_dir) for cur_gen_file_name in gen_file_list: if cur_gen_file_name.split('.')[1] == 'json': cur_gen_name = cur_gen_file_name.split('.')[0] result_file_name = result_dir + '/' + cur_gen_name + '_result.json' if path.exists(result_file_name): print 'Comparing ' + cur_gen_file_name + ' with ' + result_file_name + '...' compare_result = compare.Compare( gen_file_dir + '/' + cur_gen_file_name, result_file_name) if len(compare_result) != 0: print 'Different OpCodes:' print compare_result else: print 'Same result.' else: print cur_gen_name + ' missing!!!' time_list = [] with open(log_file, 'r') as fpLog: time_list = fpLog.readlines() total_time = 0.0 for cur_time in time_list: total_time = total_time + float(cur_time)
import os import sys sys.path.insert(0, '../') import compare m = compare.Compare(n_neighbors=10, directory=os.path.abspath('../../data/test_data/'), mock=False, path='D', file_name='classified_values.txt', create_folder=False, export_file=True, folder_name='folder', separator=None, norm=True, rows=100, columns=100, coverage_tree_file='tree.txt', embedding_tree_file='tree1.txt') os.system('source delete.sh')
def compareOnePair(originCode, compCode, pairNum, compareMethod, commentList, tokenizerList, originLineNumber, blockSize): global output preprocess_filter = [preprocessor.RemoveBlank(), tokenizerList] inputs = [originCode, compCode] outputs = [] for i in range(len(inputs)): lineNumInfo = [] for j in range(len(inputs[i].split('\n'))): lineNumInfo.append(j) if len(commentList) > 0: for comment in commentList[i]: preprocess_filter.insert(0, comment) for task in preprocess_filter: if isinstance(task, list): task = task[i] task.setInput(inputs[i]) task.setLineNumInfo(lineNumInfo) output, lineNumInfo = task.process() inputs[i] = output outputs.append([output, lineNumInfo]) if len(commentList) > 0: for j in range(len(commentList[i])): preprocess_filter.pop(0) # ori, comp = preprocessor.numberMapping(outputs[0][0], outputs[1][0]) compareClass = [ '', compare.LCS(), compare.TokenMatching(), compare.EditDistance() ] checkFunction = compareClass[compareMethod] compa = compare.Compare(checkFunction) compa.setInput(outputs[0][0], outputs[1][0]) ret = compa.process(blockSize=blockSize) #mid_time = time.time() similLine = 0.0 entireLine = originLineNumber similLine += len(ret.keys()) similarity = similLine / entireLine * 100 result = [[pairNum, similarity]] for key in ret.keys(): # key : 원본 라인 번호 -1 result.append({ 'pairID': pairNum, 'originLine': outputs[0][1][key] + 1, 'compareLine': outputs[1][1][ret[key][0]] + 1, 'rType': ret[key][1] }) return result
def main(haystack_files, needle_path, needle_direct, needle_flag, sig_flag, max_matches, margin): total_start = time.time() DIRECT_NEEDLE_FLAG = needle_flag #setting variable DROP_NON_SIGNIFICANT_FLAG = sig_flag #Setting variable MAX_MATCHES = max_matches #setting variable MARGIN = margin #setting variable NEEDLE_DIRECT = needle_direct #--------------------------------------------Parse Needle and Haystack Loci-------------------------------- start = time.time() print("Parsing haystack and needle loci...") haystack_objs = [] for file in haystack_files: haystack = pl.parse_loci(file) haystack_objs.append(haystack) if DIRECT_NEEDLE_FLAG: needle_name = NEEDLE_DIRECT #used for file_write to output the needle name in the results file needle = pl.parse_loci(NEEDLE_DIRECT, DIRECT_INPUT_FLAG=True) else: needle_name = needle_path #used for file_write to output the needle name in the results file needle = pl.parse_loci(needle_path, DIRECT_INPUT_FLAG=False) delta = time.time() - start print("Parsing execution time: %fs" % delta) print("--------------------------------") #---------------------------------------------------------------------------------------------------------- #--------------------------------------------Compare Needles to Haystack----------------------------------- #--Input in this block-- #A vector of needle chromosome numbers, a vector of needle loci start coords, and a vector of needle loci end coords #A vector of haystack chromosome numbers, a vector of haystack loci start coords, and a vector of haystack loci end coords #--Result of this block is two lists-- #List of needle match indices (1d vector): needle_match_indices #List of haystack match indices (list of lists - asymetric 2d matrix): haystack_match_indices #--How this block works-- #Iterate over all genes in the needle vector #For each iteration, pull a single needle chr#, locus start, and locus end (add margins to each end of the locus) #create a comparison object for each needle gene. This object takes care of searching through the haystack to find matches #If a match is found, append the haystack index (or indices), the needle index, and the match statistics to their corresponding lists start = time.time() print("Comparing needle(s) to haystack(s)...") overlap_stats = [] for i in range(len(haystack_objs)): filename = ut.get_filename(haystack_objs[i].file_path, True) h_locus_df = haystack_objs[i].locus_df compare_obj = new_cp.Compare(needle.locus_df, h_locus_df, filename, max_matches, margin) overlap_stats.append(compare_obj.overlap_stats_df) delta = time.time() - start print("Done!") print("Comparison execution time: %fs" % delta) print("--------------------------------") #---------------------------------------------------------------------------------------------------------- #--------------------------------------Format Match Indices------------------------------------------------ #Result of this block is two lists: #A list of needle matches containing a string of the gene info (1d vector) #A list of haystack matches containing a string of gene info (2d list of lists - each match list contains all matches to a given needle) start = time.time() print("Formatting matches...") output_dfs = [] rename_flag = True for i in range(len(overlap_stats)): overlap_stats_df = overlap_stats[i] if not isinstance(overlap_stats_df, pd.DataFrame): print("this one is empty") print(ut.get_filename(overlap_stats_df)) continue h_df = haystack_objs[i].df n_df = needle.df output_df = fmt.Format(h_df, n_df, overlap_stats_df, rename_flag) output_df = output_df.output_df output_dfs.append(output_df) rename_flag = False output_df = ut.append_dfs(output_dfs) delta = time.time() - start print("Done!") print("Formatting execution time: %fs" % delta) print("--------------------------------") #---------------------------------------------------------------------------------------------------------- #---------------------------------------------Write Matches to File---------------------------------------- #This block writes all formatted matches to a CSV file. Nothing is returned. output_df.to_csv("../results/results.csv", index=False) #----------------------------------------------------------------------------------------------------------- total_delta = time.time() - total_start print("Total execution time: %fs" % total_delta)