Ejemplo n.º 1
0
def main():
    # parse args
    args = parseArgs()

    # execute AIs
    process1 = subprocess.Popen(['./' + args['program1'], str(args['port1'])], cwd='../',\
     bufsize=0)
    process2 = subprocess.Popen(['./' + args['program2'], str(args['port2'])], cwd='../',\
     bufsize=0)

    time.sleep(3)
    # create player
    rule = const.RULE_FREESTYLE if args[
        'rule'] == 'freestyle' else const.RULE_RENJU_BASIC
    player1 = player.Player(args['port1'], rule, args['program1_level'])
    player2 = player.Player(args['port2'], rule, args['program2_level'])

    # compare
    comp = compare.Compare(player1, player2, args['rounds'])
    comp.run()

    # terminate AI programs
    process1.terminate()
    process2.terminate()

    # print result
    printResult(args, player1, player2)
Ejemplo n.º 2
0
def main():
    
    log.configLog()     
    compareOb = compare.Compare()  
    compareOb.doCompare()
    difFileInfo = copy.deepcopy(compareOb.difFileInfo)  
    setResultPage( configure.oldPath, configure.newPath,difFileInfo)
    upFile()
Ejemplo n.º 3
0
def do_boxplots(df_in, splitting_fields, compare_fields):
    """ For every splitting_field, partitions data into high and low groups
    by dividing at the value closest to the median that is not equal to either
    max or min. Then performs statistical comparisons for each compare_field. 
    A sorted df with means, t-test, MWW, and Cohen's d is returned.
    """
    #stats = []
    #for feature in df.columns.drop('y'):

    stats = []
    for sfield in splitting_fields:
        for cfield in compare_fields:
            print('PROCESSING: ' + sfield + ' ' + cfield)
            if sfield == cfield:
                continue
            if (sfield not in df_in.columns) or (cfield not in df_in.columns):
                continue
            df = df_in[['root', sfield, cfield]].dropna()
            amax = df[sfield].max()
            amin = df[sfield].min()
            amedian = df[sfield].median()
            if amin == amax:
                print('WARNING: no variation in splitting field ' + sfield)
            if amedian == amin:
                amedian += 0.0001
            elif amedian == amax:
                amedian -= 0.0001

            HI = df[['truth_val', 'root',
                     cfield]][df[sfield] > amedian].groupby('root').mean()
            LO = df[['root',
                     cfield]][df[sfield] <= amedian].groupby('root').mean()

            comp = compare.Compare(HI,
                                   LO,
                                   label_=cfield,
                                   x_label_=sfield + '_HI',
                                   y_label_=sfield + '_LO')
            stats_f = comp.calc_stats()
            #plt.boxplot((HI,LO))
            title = cfield + ' partitioned by ' + sfield + '\nMWW $\mu$ = '
            title += ''  # TODO
            HI_label = sfield + '_HI\n'
            HI_label += cfield + ' $\mu$ = ' + '%.2f' % stats_f[0]
            LO_label = sfield + '_LO\n'
            LO_label += cfield + ' $\mu$ = ' + '%.2f' % stats_f[1]
            plt.xticks([1, 2], [HI_label, LO_label])
            #f = comp.plot_all()
            #plt.savefig(compare.label + '_plot.png',dpi=70)
            #f.savefig(comp.label + '_plot.png',dpi=70)
            plt.show()
Ejemplo n.º 4
0
def do_stats(df_X,y):
    """ For every column in df_X, compares truthful and bluffing groups using
        compare. A df with means, t-test, MWW, and Cohen's d is returned.
    """
    stats = []
    for feature in df_X.columns:
        A = df_X.ix[y,feature].dropna()
        B = df_X.ix[~y,feature].dropna()
        comp = compare.Compare(A,B,label_=feature,
                               x_label_='Truthful',y_label_='Lying')
        stats_f = comp.calc_stats()
        if(feature == 'smile'):
            comp.plot_all()
        stats.append(stats_f)
    
    df_stats = pd.DataFrame(stats,index=df_X.columns, columns=['mean T','mean B','t-test','MWW','Cohens d'])
    df_stats.sort_values(by=['MWW'],ascending=True,inplace=True)
    #display(df_stats)        
    return df_stats
Ejemplo n.º 5
0
def do_stats(df_in, splitting_fields, compare_fields):
    """ For every splitting_field, partitions data into high and low groups
    by dividing at the value closest to the median that is not equal to either
    max or min. Then performs statistical comparisons for each compare_field. 
    A sorted df with means, t-test, MWW, and Cohen's d is returned.
    """
    #stats = []
    #for feature in df.columns.drop('y'):

    stats = []
    for sfield in splitting_fields:
        for cfield in compare_fields:
            print('PROCESSING: ' + sfield + ' ' + cfield)
            if sfield == cfield:
                continue
            if (sfield not in df_in.columns) or (cfield not in df_in.columns):
                continue
            df = df_in[['truth_val', 'root', sfield, cfield]].dropna()
            amax = df[sfield].max()
            amin = df[sfield].min()
            amedian = df[sfield].median()
            if amin == amax:
                print('WARNING: no variation in splitting field ' + sfield)
            if amedian == amin:
                amedian += 0.0001
            elif amedian == amax:
                amedian -= 0.0001

            hi_quant = df[sfield].quantile(.75)
            lo_quant = df[sfield].quantile(.25)

            #HI = df[['root','truth_val',cfield]][df[sfield] > df[sfield].quantile(.75)].groupby('root').mean()
            #LO = df[['root','truth_val',cfield]][df[sfield] <= df[sfield].quantile(.25)].groupby('root').mean()
            HI = df[['root', 'truth_val',
                     cfield]][df[sfield] > amedian].groupby('root').mean()
            LO = df[['root', 'truth_val',
                     cfield]][df[sfield] <= amedian].groupby('root').mean()
            comp = compare.Compare(HI[cfield][HI['truth_val'] == 1],
                                   HI[cfield][HI['truth_val'] == 0],
                                   label_=cfield,
                                   x_label_=sfield + '_HIQ_T',
                                   y_label_=sfield + '_HIQ_B')
            stats_f = comp.calc_stats()
            #comp.plot_all()
            stats.append([sfield + '_HI', cfield] + list(stats_f))

            comp = compare.Compare(LO[cfield][LO['truth_val'] == 1],
                                   LO[cfield][LO['truth_val'] == 0],
                                   label_=cfield,
                                   x_label_=sfield + '_LOQ_T',
                                   y_label_=sfield + '_LOQ_B')
            stats_f = comp.calc_stats()
            #comp.plot_all()
            stats.append([sfield + '_LO', cfield] + list(stats_f) +
                         [hi_quant, lo_quant])

    print(stats)
    #df_stats = pd.DataFrame(stats,columns=['sfield','cfield','mean T','mean B','t-test','MWW','Cohens d'])
    df_stats = pd.DataFrame(stats,
                            columns=[
                                'sfield', 'cfield', 'mean T', 'mean B',
                                't-test', 'MWW', 'Cohens d', 'HI quant',
                                'LO quant'
                            ])
    df_stats.sort_values(by=['MWW'], ascending=True, inplace=True)
    display(df_stats)
    return df_stats
Ejemplo n.º 6
0
import os
from os import path
import compare

gen_file_dir = '../bin/interpreters'
result_dir = 'test_result'
log_file = result_dir + '/log.txt'

gen_file_list = os.listdir(gen_file_dir)
for cur_gen_file_name in gen_file_list:
    if cur_gen_file_name.split('.')[1] == 'json':
        cur_gen_name = cur_gen_file_name.split('.')[0]
        result_file_name = result_dir + '/' + cur_gen_name + '_result.json'
        if path.exists(result_file_name):
            print 'Comparing ' + cur_gen_file_name + ' with ' + result_file_name + '...'
            compare_result = compare.Compare(
                gen_file_dir + '/' + cur_gen_file_name, result_file_name)
            if len(compare_result) != 0:
                print 'Different OpCodes:'
                print compare_result
            else:
                print 'Same result.'
        else:
            print cur_gen_name + ' missing!!!'

time_list = []
with open(log_file, 'r') as fpLog:
    time_list = fpLog.readlines()

total_time = 0.0
for cur_time in time_list:
    total_time = total_time + float(cur_time)
Ejemplo n.º 7
0
import os
import sys
sys.path.insert(0, '../')
import compare

m = compare.Compare(n_neighbors=10,
                    directory=os.path.abspath('../../data/test_data/'),
                    mock=False,
                    path='D',
                    file_name='classified_values.txt',
                    create_folder=False,
                    export_file=True,
                    folder_name='folder',
                    separator=None,
                    norm=True,
                    rows=100,
                    columns=100,
                    coverage_tree_file='tree.txt',
                    embedding_tree_file='tree1.txt')

os.system('source delete.sh')
Ejemplo n.º 8
0
def compareOnePair(originCode, compCode, pairNum, compareMethod, commentList,
                   tokenizerList, originLineNumber, blockSize):
    global output

    preprocess_filter = [preprocessor.RemoveBlank(), tokenizerList]

    inputs = [originCode, compCode]
    outputs = []

    for i in range(len(inputs)):
        lineNumInfo = []
        for j in range(len(inputs[i].split('\n'))):
            lineNumInfo.append(j)

        if len(commentList) > 0:
            for comment in commentList[i]:
                preprocess_filter.insert(0, comment)

        for task in preprocess_filter:
            if isinstance(task, list):
                task = task[i]
            task.setInput(inputs[i])
            task.setLineNumInfo(lineNumInfo)
            output, lineNumInfo = task.process()
            inputs[i] = output

        outputs.append([output, lineNumInfo])

        if len(commentList) > 0:
            for j in range(len(commentList[i])):
                preprocess_filter.pop(0)

    # ori, comp = preprocessor.numberMapping(outputs[0][0], outputs[1][0])

    compareClass = [
        '', compare.LCS(),
        compare.TokenMatching(),
        compare.EditDistance()
    ]
    checkFunction = compareClass[compareMethod]

    compa = compare.Compare(checkFunction)
    compa.setInput(outputs[0][0], outputs[1][0])
    ret = compa.process(blockSize=blockSize)

    #mid_time = time.time()

    similLine = 0.0
    entireLine = originLineNumber
    similLine += len(ret.keys())

    similarity = similLine / entireLine * 100

    result = [[pairNum, similarity]]
    for key in ret.keys():
        # key : 원본 라인 번호 -1
        result.append({
            'pairID': pairNum,
            'originLine': outputs[0][1][key] + 1,
            'compareLine': outputs[1][1][ret[key][0]] + 1,
            'rType': ret[key][1]
        })

    return result
Ejemplo n.º 9
0
def main(haystack_files, needle_path, needle_direct, needle_flag, sig_flag, max_matches, margin):
	total_start = time.time()

	DIRECT_NEEDLE_FLAG = needle_flag #setting variable
	DROP_NON_SIGNIFICANT_FLAG = sig_flag #Setting variable
	MAX_MATCHES = max_matches #setting variable
	MARGIN = margin #setting variable
	NEEDLE_DIRECT = needle_direct


	#--------------------------------------------Parse Needle and Haystack Loci--------------------------------
	start = time.time()
	print("Parsing haystack and needle loci...")

	haystack_objs = []

	for file in haystack_files:
		haystack = pl.parse_loci(file)
		haystack_objs.append(haystack)

	if DIRECT_NEEDLE_FLAG:
		needle_name = NEEDLE_DIRECT #used for file_write to output the needle name in the results file
		needle = pl.parse_loci(NEEDLE_DIRECT, DIRECT_INPUT_FLAG=True)
	else:
		needle_name = needle_path #used for file_write to output the needle name in the results file
		needle = pl.parse_loci(needle_path, DIRECT_INPUT_FLAG=False)

	delta = time.time() - start
	print("Parsing execution time: %fs" % delta)
	print("--------------------------------")
	#----------------------------------------------------------------------------------------------------------

	#--------------------------------------------Compare Needles to Haystack-----------------------------------
	#--Input in this block--
	#A vector of needle chromosome numbers, a vector of needle loci start coords, and a vector of needle loci end coords
	#A vector of haystack chromosome numbers, a vector of haystack loci start coords, and a vector of haystack loci end coords

	#--Result of this block is two lists--
	#List of needle match indices (1d vector): needle_match_indices
	#List of haystack match indices (list of lists - asymetric 2d matrix): haystack_match_indices

	#--How this block works--
	#Iterate over all genes in the needle vector
	#For each iteration, pull a single needle chr#, locus start, and locus end (add margins to each end of the locus)
	#create a comparison object for each needle gene. This object takes care of searching through the haystack to find matches
	#If a match is found, append the haystack index (or indices), the needle index, and the match statistics to their corresponding lists
	start = time.time()
	print("Comparing needle(s) to haystack(s)...")

	overlap_stats = []

	for i in range(len(haystack_objs)):
		filename = ut.get_filename(haystack_objs[i].file_path, True)
		h_locus_df = haystack_objs[i].locus_df
		compare_obj = new_cp.Compare(needle.locus_df, h_locus_df, filename, max_matches, margin)
		overlap_stats.append(compare_obj.overlap_stats_df)
	
	delta = time.time() - start
	print("Done!")
	print("Comparison execution time: %fs" % delta)
	print("--------------------------------")

	#----------------------------------------------------------------------------------------------------------

	#--------------------------------------Format Match Indices------------------------------------------------
	#Result of this block is two lists:
	#A list of needle matches containing a string of the gene info (1d vector)
	#A list of haystack matches containing a string of gene info (2d list of lists - each match list contains all matches to a given needle)
	start = time.time()
	print("Formatting matches...")

	output_dfs = []
	rename_flag = True

	for i in range(len(overlap_stats)):
		overlap_stats_df = overlap_stats[i]

		if not isinstance(overlap_stats_df, pd.DataFrame):
			print("this one is empty")
			print(ut.get_filename(overlap_stats_df))
			continue

		h_df = haystack_objs[i].df
		n_df = needle.df
		output_df = fmt.Format(h_df, n_df, overlap_stats_df, rename_flag)
		output_df = output_df.output_df
		output_dfs.append(output_df)
		rename_flag = False

	output_df = ut.append_dfs(output_dfs)

	delta = time.time() - start
	print("Done!")
	print("Formatting execution time: %fs" % delta)
	print("--------------------------------")
	#----------------------------------------------------------------------------------------------------------

	#---------------------------------------------Write Matches to File----------------------------------------
	#This block writes all formatted matches to a CSV file. Nothing is returned.

	output_df.to_csv("../results/results.csv", index=False)

	#-----------------------------------------------------------------------------------------------------------

	total_delta = time.time() - total_start
	print("Total execution time: %fs" % total_delta)