def get_test_xml_csv(dir_res='/home/ise/test/res'): d_l = [] res = pt.walk_rec(dir_res, [], 'Result', False) for item_dir in res: bug_id = str(item_dir).split('/')[-2].split('_')[1] if bug_id == '133': print "" bug_name = str(item_dir).split('/')[-2].split('_')[0] xml_files = pt.walk_rec(item_dir, [], '.xml') for xml_item in xml_files: if str(xml_item).endswith('xml') is False: continue d = None name_dir = str(xml_item).split('/')[-2] name_file_xml = str(xml_item).split('/')[-1] test_mode = str(name_dir).split('_')[-1] test_it = str(name_dir).split('_')[-2].split('=')[1] test_time_b = str(name_dir).split('_')[-3].split('=')[1] test_date = '_'.join(str(name_dir).split('_')[3:7]) d = pars_xml_test_file(xml_item) d['test_mode'] = test_mode d['test_it'] = test_it d['test_time_b'] = test_time_b d['test_date'] = test_date d['bug_id'] = bug_id d['bug_name'] = bug_name d_l.append(d) df = pd.DataFrame(d_l) dir_father = '/'.join(str(dir_res).split('/')[:-1]) df.to_csv("{}/res.csv".format(dir_father)) return "{}/res.csv".format(dir_father)
def get_weka_info(p_path, mode): d_tags = {} res = pt.walk_rec(p_path, [], '_{}'.format(mode), False) arff_path, pred_1_path = None, None for item in res: if str(item).endswith('arff_{}'.format(mode)): arff_path = item elif str(item).endswith('pred_1_{}'.format(mode)): pred_1_path = item res_minor = pt.walk_rec(pred_1_path, [], '', False, lv=-1) res_models = pt.walk_rec(arff_path, [], '.arff') for item in res_minor: name = '_'.join(str(item).split('/')[-1].split('_')[1:]) index_sort = str(item).split('/')[-1].split('_')[0] files_res = pt.walk_rec(item, [], '') d_tags[name] = {'sort_index': index_sort} d_tags[name]['model'] = None for file_i in files_res: if str(file_i).endswith('.csv'): d_tags[name]['name'] = file_i elif str(file_i).endswith(".arff"): d_tags[name]['test'] = file_i for item_arff in res_models: name = str(item_arff).split('/')[-1].split('.')[0] if name in d_tags: d_tags[name]['model'] = item_arff else: d_tags[name] = { 'model': item_arff, 'test': None, 'name': None, 'sort_index': None } return d_tags
def get_diff_fix_buggy(root_dir_bug, root_dir_fix, if_count_tset_cases=False): d_start = {} d = {'bug': {}, 'fix': {}} res_fix = pt.walk_rec(root_dir_fix, [], '.txt') res_bug = pt.walk_rec(root_dir_bug, [], '.txt') for item_fix in res_fix: name = str(item_fix).split('/')[-1][:-4] d_start[name] = {'fix': item_fix} for item_bug in res_bug: name_bug = str(item_bug).split('/')[-1][:-4] if name_bug in d_start: d_start[name_bug]['bug'] = item_bug else: d_start[name_bug] = {'bug': item_bug} d_both = {} for ky in d_start: if 'bug' in d_start[ky] and 'fix' in d_start[ky]: d_both[ky] = {'bug': d_start[ky]['bug'], 'fix': d_start[ky]['fix']} # print "missing --> {}".format(len(d_start)-len(d_both)) d_l = [] for key_i in d_both.keys(): diff_bug, diff_fix = diff_function(d_both[key_i]['bug'], d_both[key_i]['fix']) # pars the bug_id and itr number from the path dir d_buggy = pars_bug_id_iter_id(d_both[key_i]['bug']) d_buggy['mode'] = 'buggy' d_fixed = pars_bug_id_iter_id(d_both[key_i]['fix']) d_fixed['mode'] = 'fixed' # Test add name d_buggy['name'] = str(key_i).split('_')[0] d_fixed['name'] = str(key_i).split('_')[0] # pars the itration number and time budget if if_count_tset_cases: num_test_bug = tests_regex_count(d_both[key_i]['bug']) num_test_fix = tests_regex_count(d_both[key_i]['fix']) d_fixed['num_of_test_cases'] = num_test_fix d_buggy['num_of_test_cases'] = num_test_bug d_l.append(d_buggy) d_l.append(d_fixed) continue list_junit_res = get_regex_all(diff_bug, r'(test\d+.+\n\njava.lang.+)', 0, False) info_list = pars_junit_regex(list_junit_res, d_extand=d_buggy) if info_list is not None: d_l.extend(info_list) list_junit_res = get_regex_all(diff_fix, r'(test\d+.+\n\njava.lang.+)', 0, False) info_list = pars_junit_regex(list_junit_res, d_extand=d_fixed) if info_list is not None: d_l.extend(info_list) return d_l
def get_static_dir(root): bug_dir = pt.walk_rec(root, [], 'P_', False) list_d = [] for dir_i in bug_dir: time_budget = str(dir_i).split('/')[-2].split('=')[1] proj_name = str(dir_i).split('/')[-2].split('_')[0] bug_id = str(dir_i).split('/')[-1].split('_')[3] evo_dir = os.path.isdir('{}/Evo_Test'.format(dir_i)) if evo_dir: evo_dir_num = 1 num_test_generated = len( pt.walk_rec('{}/Evo_Test'.format(dir_i), [], '.java')) num_test_generated = num_test_generated / float(2) else: num_test_generated = -1 evo_dir_num = 0 d_i = { "time_budget": time_budget, 'proj_name': proj_name, 'bug_id': bug_id, 'evo_dir': evo_dir, 'num_test_generated': num_test_generated } list_d.append(d_i) df = pd.DataFrame(list_d) df.to_csv('{}/static.csv'.format(root))
def count_package_number(commit, p_name, prefix='tools'): repo_path = '/home/ise/bug_miner/{0}/{0}'.format(p_name) run_GIT_command_and_log(repo_path, 'git checkout {}'.format(commit), None, None, False) src_folder = pt.walk_rec(repo_path, [], prefix, False) src_folder = [ x for x in src_folder if str(x).__contains__('/resources/') is False ] src_folder = [ x for x in src_folder if str(x).__contains__('/test/') is False ] src_folder = [ x for x in src_folder if str(x).__contains__('/ftp2/') is False ] src_folder = [ x for x in src_folder if str(x).__contains__('/opennlp/tools') is True ] #src_folder print src_folder if len(src_folder) == 1: folderz = pt.walk_rec(src_folder[0], [], '', False) num_of_package = len(folderz) return num_of_package return None
def get_miss_classes(project_path_repo, fp_name_dir, out_info): ''' the main func that count the missing class inrespect to the bug commit and FP results tags ''' project = str(project_path_repo).split('/')[-1] atg_path = os.getcwd() df_bug = pd.read_csv('{}/tmp_files/{}_bug.csv'.format(atg_path, project)) print list(df_bug) res_file = pt.walk_rec(fp_name_dir, [], 'Most_names') d_name_tag = {} tag_l = [] for item in res_file: tag_name = '_'.join(str(item).split('/')[-2].split('_')[1:]) tag_index = str(item).split('/')[-2].split('_')[0] tag_l.append([tag_name, int(tag_index)]) d_name_tag[tag_name] = {'csv': item, 'index': tag_index} # get sorted list tags sorted_tags = sorted(tag_l, key=lambda tup: tup[-1]) tags_sort = [] for item_t in sorted_tags: tags_sort.append(item_t[0]) # Go over each bug commit and get the list of classes # df_bug.apply(get_miss_classes_applyer,out_dir=out_info,repo_path=project_path_repo,axis=1) # make a comparison res_csv = pt.walk_rec(out_info, [], '.csv') for item in res_csv: df_commit = pd.read_csv(item, index_col=0) if len(df_commit) == 0: continue tag_bug = df_commit['tag_bug'].iloc[0] tag_bug = str(tag_bug).replace('-', '_') df_fp_res_tag_cur = pd.read_csv(d_name_tag[tag_bug]['csv'], names=['path']) index = tags_sort.index(tag_name) if index > 0: old_tag = tags_sort[index - 1] df_fp_res_tag_old = pd.read_csv(d_name_tag[old_tag]['csv'], names=['path']) df_fp_res_tag_old['name'] = df_fp_res_tag_old['path'].apply( lambda x: path_to_package_name(None, x)) else: df_fp_res_tag_old = None df_fp_res_tag_cur['name'] = df_fp_res_tag_cur['path'].apply( lambda x: path_to_package_name(None, x)) df_commit['is_exists'] = df_commit.apply(is_exists_helper, df_cur=df_fp_res_tag_cur, df_old=df_fp_res_tag_old, axis=1) df_commit.to_csv('{}_mod.csv'.format(str(item)[:-4])) # get all mod file res_mod(out_info)
def dependency_getter(repo, dir_jars, m2='/home/ise/.m2/repository'): ''' get all dependency jars ''' res_jar2 = pt.walk_rec('/home/ise/.m2/repository', [], '.jar') print len(res_jar2) res_jar2 = [x for x in res_jar2 if str(x).split('.')[-1] == 'jar'] print len(res_jar2) res_jar1 = pt.walk_rec('{}/{}'.format(repo, dir_jars), [], '.jar') jarz = res_jar2 + res_jar1 str_jarz = ':'.join(jarz) return str_jarz
def make_jar_file(project_dir_path): ''' make a jar file with the builder mvn or ant ''' fix_dir = '{}/fixed'.format(project_dir_path) log_dir = '{}/log'.format(project_dir_path) mvn_builder = False ant_builder = False if os.path.isfile('{}/pom.xml'.format(fix_dir)): mvn_builder = True if os.path.isfile('{}/build.xml'.format(fix_dir)): ant_builder = True os.chdir(fix_dir) out_jar = pt.mkdir_system(project_dir_path, 'jar_dir', False) if mvn_builder: command = 'mvn package -Dmaven.test.skip=true' process = Popen(shlex.split(command), stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() loging_os_command(log_dir, 'jar_command', stdout, "stdout") loging_os_command(log_dir, 'jar_command', stderr, "stderr") # os.system(command) ans = pt.walk_rec("{}/target".format(fix_dir), [], '.jar') command = 'mvn dependency:copy-dependencies -DoutputDirectory={}'.format( out_jar) process = Popen(shlex.split(command), stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() loging_os_command(log_dir, 'copy_dependencies', stdout, "stdout") loging_os_command(log_dir, 'copy_dependencies', stderr, "stderr") #os.system(command) if len(ans) == 1: cp_command = 'mv {} {}'.format(ans[0], out_jar) print '[OS] {}'.format(cp_command) os.system(cp_command) return ans[0] if ant_builder: command = 'ant jar' process = Popen(shlex.split(command), stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() loging_os_command(log_dir, 'jar_command', stdout, "stdout") loging_os_command(log_dir, 'jar_command', stderr, "stderr") # os.system(command) ans = pt.walk_rec("{}/target".format(fix_dir), [], '.jar') if len(ans) == 1: cp_command = 'mv {} {}'.format(ans[0], out_jar) print '[OS] {}'.format(cp_command) os.system(cp_command) return ans[0] return None
def to_del(p='/home/ise/test/pom_3'): res_tiks = pt.walk_rec(p, [], 'TIKA', False, lv=-1) res_org = pt.walk_rec(p, [], 'org', False, lv=-6) print "res_tiks =", len(res_tiks) print "res_org =", len(res_org) res_org = ['/'.join(str(x).split('/')[:-2]) for x in res_org] dif = [] for y in res_tiks: if y not in res_org: dif.append(y) ans = [] for item in dif: ans.append(str(item).split('/')[-1]) # print str(item).split('/')[-1] exit()
def make_FP_pred(dir_target='/home/ise/tmp_d4j/out_pred/out/Lang/Lang_2'): ''' concat the two csv files from the weka dir to one big Dateframe and make the probabily for bug, by 1-probablit for a vaild component ''' out = '/'.join(str(dir_target).split('/')[:-1]) name = str(dir_target).split('/')[-1] p_name = str(name).split('_')[0] res_test_set = pt.walk_rec(dir_target, [], 'testing__results_pred.csv') most_csv = pt.walk_rec(dir_target, [], 'Most_names_File.csv') if len(most_csv) == 1 and len(res_test_set) == 1 is False: print "[Error] no csv in the dir-> {}".format(dir_target) return None connect_name_pred_FP(most_csv, name, p_name, res_test_set, dir_target)
def mk_call_graph_raw_data(root_dir, name_find='jars_dir', java_caller='/home/ise/programs/java-callgraph/target/javacg-0.1-SNAPSHOT-static.jar'): res = pt.walk_rec(root_dir, [], name_find, False) for dir_i in res: father_dir = '/'.join(str(dir_i).split('/')[:-1]) jars = pt.walk_rec(dir_i, [], '.jar') if len(jars) != 2: print "[Error] in dir --> {}\nfind:\n{}".format(dir_i, jars) continue out_jars = pt.mkdir_system(father_dir, 'out_jar') command_java_1 = 'java -jar {} {} {} '.format(java_caller, jars[1], father_dir) command_java_0 = 'java -jar {} {} {} '.format(java_caller, jars[0], father_dir) util_d4j.execute_command(command_java_1, 'call_graph', out_jars) util_d4j.execute_command(command_java_0, 'call_graph', out_jars)
def get_miss_classes_applyer(row, out_dir, repo_path): ''' Go over each bug commit and get the list of classes ''' commit_bug = row['parent'] commit_fix = row['commit'] bug_tag = row['tag_parent'] issue_id = row['issue'] index_bug = row['index_bug'] #checkout the buugy version git_cmd = 'git checkout {}'.format(commit_bug) print ge.run_GIT_command_and_log(repo_path, git_cmd, None, None, False) # get classes from src d_l = [] res = pt.walk_rec('{}/src'.format(repo_path), [], '.java') for item_java in res: class_name = pt.path_to_package('org', item_java, -5) d_l.append({ 'class_path': item_java, 'name': class_name, 'tag_bug': bug_tag, 'commit_bug': commit_bug }) df = pd.DataFrame(d_l) df.to_csv('{}/{}_{}.csv'.format(out_dir, issue_id, index_bug))
def get_pair_fix_bug_folder(folder_path): folder_bug = pt.walk_rec(folder_path, [], 'test_suite_t', lv=-2, file_t=False) d_pair = {} for item in folder_bug: folder_mode = str(item).split('/')[-2].split('_')[0] if folder_mode == 'complie': continue mode = str(item).split('/')[-2].split('_')[-1] iter = str(item).split('/')[-1].split('_it_')[-1] if iter not in d_pair: d_pair[iter] = {} if mode == 'fixed': d_pair[iter]['fixed'] = item elif mode == 'buggy': d_pair[iter]['buggy'] = item d_l = [] for key_i in d_pair.keys(): if 'buggy' in d_pair[key_i] and 'fixed' in d_pair[key_i]: info = get_diff_fix_buggy(d_pair[key_i]['buggy'], d_pair[key_i]['fixed']) d_l.extend(info) if len(d_l) == 0: return None df = pd.DataFrame(d_l) df.to_csv("{}/indep_report_v1.csv".format(folder_path)) return "{}/indep_report_v1.csv".format(folder_path)
def add_loc(project_name, pass_loc=False): csv_p = '/home/ise/bug_miner/{}/fin_df_buggy.csv'.format(project_name) df_fin = pd.read_csv(csv_p, index_col=0) p_name = str(csv_p).split('/')[-2] father_dir = '/'.join(str(csv_p).split('/')[:-1]) out_loc = pt.mkdir_system(father_dir, 'LOC', False) repo_path = "{}/{}".format('/'.join(str(csv_p).split('/')[:-1]), p_name) print repo_path df_info = pd.read_csv("{}/tmp_files/{}_bug.csv".format( os.getcwd(), p_name), index_col=0) list_bug_generated = df_fin['bug_name'].unique() print list(df_info) print len(df_info) df_info = df_info[df_info['issue'].isin(list_bug_generated)] if pass_loc is False: df_info.apply(add_loc_helper, repo=repo_path, out=out_loc, axis=1) # get all df loc from LOC folder res_df_loc_path = pt.walk_rec(out_loc, [], '.csv') all_loc_list = [] for item_loc_path in res_df_loc_path: all_loc_list.append(pd.read_csv(item_loc_path, index_col=0)) df_all_loc = pd.concat(all_loc_list) print list(df_all_loc) print list(df_fin) print len(df_fin) df_all_loc.to_csv('{}/{}.csv'.format(father_dir, 'loc')) else: df_all_loc = pd.read_csv('{}/{}.csv'.format(father_dir, 'loc'), index_col=0) result_df = pd.merge(df_all_loc, df_fin, 'right', on=['bug_name', 'name']) result_df.to_csv('{}/{}.csv'.format(father_dir, 'exp')) print len(result_df)
def add_loc_helper(row, repo, out, prefix_name='org'): ''' getting the loc info to LOC dir :param repo: path to repo :param out: path where to write the csv ''' commit_buggy = row['parent'] commit_buggy = row['commit'] bug_id = row['issue'] path_to_faulty = row['component_path'] package_name = row['package'] # if str(bug_id ) == '1261': # print "" print path_to_faulty checkout_version(commit_buggy, repo, None) pack = '/'.join(str(path_to_faulty).split('\\')[:-1]) # #TODO: remove it # pack = str(pack).split('/') # indx =pack.index(prefix_name) # pack = '/'.join(pack[:indx+1]) # ##### klasses = pt.walk_rec('{}/{}'.format(repo, pack), [], '.java') d_l = [] for class_i in klasses: name = pt.path_to_package(prefix_name, class_i, -5) size = get_LOC(class_i) d_l.append({'name': name, 'LOC': size, 'bug_name': bug_id}) df = pd.DataFrame(d_l) df.to_csv('{}/{}_LOC.csv'.format(out, bug_id))
def count_class(commit, p_name): repo_path = '/home/ise/bug_miner/{0}/{0}'.format(p_name) run_GIT_command_and_log(repo_path, 'git checkout {}'.format(commit), None, None, False) list_java = pt.walk_rec(repo_path, [], '.java') print len(list_java) return len(list_java)
def compile_java_class(dir_to_compile, output_dir, dependent_dir): """ this function compile the .java tests to .class :param dir_to_compile: path where .java files :param output_dir: output dir where .class will be found :param dependent_dir: .jar for the compilation process :return: output dir path """ #if path.isdir(dir_to_compile) is False: # msg = "no dir : {}".format(dir_to_compile) # raise Exception(msg) out_dir = pt.mkdir_system(output_dir, 'test_classes') files = pt.walk_rec(dependent_dir, [], '.jar', lv=-2) files.append( '/home/ise/eran/evosuite/jar/evosuite-standalone-runtime-1.0.6.jar') jars_string = ':'.join(files) dir_to_compile = '{}*'.format(dir_to_compile) string_command = "javac {0} -verbose -Xlint -cp {1} -d {2} -s {2} -h {2}".format( dir_to_compile, jars_string, out_dir) print "[OS] {}".format(string_command) os.system(string_command) return process = Popen(shlex.split(string_command), stdout=PIPE, stderr=PIPE) stdout, stderr = process.communicate() print "----stdout----" print stdout print "----stderr----" print stderr return out_dir
def get_snapshot_to_jar_dir(repo, path_to_target_folder): res = pt.walk_rec("{}".format(repo), [], 'SNAPSHOT.jar', lv=-6) res = [x for x in res if str(x).__contains__('/libb/') is False] for item in res: command_cp = 'cp {} {}'.format(item, path_to_target_folder) print "[OS] {}".format(command_cp) os.system(command_cp)
def add_hamcrest(path, jar_path='/home/ise/eran/evosuite/dep/hamcrest-all-1.3.jar'): res = pt.walk_rec(path, [], 'hamcrest') res = [x for x in res if str(x).endswith('.jar')] for item in res: os.system('rm {}'.format(item)) os.system('cp {} {}'.format(jar_path, path))
def get_all_self_report(res_folder): csv_files = pt.walk_rec(res_folder, [], 'report.csv') df_list = [] for item_csv in csv_files: df_list.append(pd.read_csv(item_csv)) df_all = pd.concat(df_list) father_dir = '/'.join(str(res_folder).split('/')[:-1]) print list(df_all)
def res_mod(out_info): res_mod = pt.walk_rec(out_info, [], 'mod.csv') l_df = [] for item in res_mod: l_df.append(pd.read_csv(item, index_col=0)) df_all = pd.concat(l_df) x = df_all['is_exists'].value_counts() print "missing class % \n {}".format(x)
def get_results(dir_res='/home/ise/test/pom_3'): res = pt.walk_rec(dir_res, [], 'TIKA', False, lv=-1) d_l = [] d_l_empty = [] for item in res: print "----{}----".format(str(item).split('/')[-1]) name = str(item).split('/')[-1] d_test = {} id_bug = str(name).split('_')[1] bug_name = str(name).split('_')[0] folder_log_evo = pt.walk_rec(item, [], 'log_evo', False) folder_org = pt.walk_rec(item, [], 'org', False) res_log_test = pt.walk_rec(folder_log_evo[0], [], '.txt') for log_t in res_log_test: name = str(log_t).split('/')[-1][:-4] if name not in d_test: d_test[name] = { 'id': id_bug, 'bug_name': bug_name, 'log': 1, 'name': name, 'test': 0 } else: msg = '[Error] duplication in the test log dir := {}'.format( folder_log_evo) raise Exception(msg) if len(folder_org) > 0: res_test = pt.walk_rec(folder_org[0], [], 'ESTest.java') for test_i in res_test: test_name_package = pt.path_to_package('org', test_i, -5) test_name_package = test_name_package[:-7] if test_name_package not in d_test: d_test[test_name_package] = { 'id': id_bug, 'bug_name': bug_name, 'log': 0, 'name': test_name_package, 'test': 1 } else: d_test[test_name_package]['test'] = 1 d_l_empty.extend(d_test.values()) df = pd.DataFrame(d_l_empty) father = '/'.join(str(dir_res).split('/')[:-1]) df.to_csv("{}/result_info_empty.csv".format(father))
def remove_junit(path, path_to_junit='/home/ise/eran/evosuite/junit-4.12.jar'): res = pt.walk_rec(path, [], 'junit') res = [x for x in res if str(x).endswith('.jar')] for item in res: os.system('rm {}'.format(item)) os.system('cp {} {}'.format(path_to_junit, path)) # add hamcrest add_hamcrest(path)
def replication_table(root_p): ''' this function help to make table replication to see if more replication with less time budget is better then more time in Evosuite, for e,g, given T time if one test suite with T is better then T/n --> n* test suites (each test suite with T/n time budget) :param root_p: the pass for out_xml dir :return: csv file ''' arr_sign = ['NO_COVERAGE', 'SURVIVED', 'TIMED_OUT', 'RUN_ERROR'] out_xml_dir = pit_render_test.walk_rec(root_p, [], 'out_xml', False, -2) for dir_out in out_xml_dir: print dir_out proj_cur = '/'.join(str(dir_out).split('/')[:-1]) rep_name = pit_render_test.walk_rec(proj_cur, [], 'ALL', False, -2, False) list_p = pit_render_test.walk_rec(dir_out, [], '.csv', -1) cols = ['ID'] acc = 0 cols.extend(rep_name) big_df = pd.DataFrame(columns=cols) dir_out = dir_out[:-8] name = str(dir_out).split('t=')[1] time_budget = str(name).split('_')[0] name = "replication_table_t={}".format(time_budget) for csv_item in list_p: print "csv_item =", csv_item df = pd.read_csv(csv_item, index_col=0) acc += int(len(df)) print "---" print "df col :", list(df) print "big col:", list(big_df) print "--" for col_name in cols: if col_name not in df: df[col_name] = np.nan df = df[cols] big_df = pd.concat([big_df, df]) if acc != int(len(big_df)): print "acc: {} big: {}".format(acc, int(len(big_df))) big_df[big_df == 'KILLED'] = 1 for x in arr_sign: big_df[big_df == x] = 0 size = len(rep_name) for i in range(1, size): big_df['max_0-{}'.format(i)] = big_df[rep_name[:i + 1]].max(axis=1) flush_csv(root_p, big_df, '{}'.format(name))
def eval_xgb_test_dir(dir_p,name_file='FP_'): res = pt.walk_rec(dir_p, [], name_file, True) res = [x for x in res if str(x).endswith('.csv')] df_l = [] for itm in res: tag_name = '_'.join(str(itm).split('/')[-1].split('_')[-6:-3]) conf_num=str(itm).split('/')[-1].split('_')[-2] print "tag_name:\t{}".format(tag_name) df = pd.read_csv(itm) y_pred = df['test_predictions'].values y_test = df['hasBug'].values precision, recall, thresholds, Avg_PR = reacall_precision(y_test, y_pred,ploting=False,full_out=True) area = auc(recall, precision) roc = roc_auc_score(y_test,y_pred) df_bug = df[df['hasBug'] == 1] df_valid = df[df['hasBug'] == 0] if len(df_bug) == 0: continue mse_valid_test = mean_squared_error(df_valid['hasBug'], df_valid['test_predictions']) mse_buggy_test = mean_squared_error(df_bug['hasBug'], df_bug['test_predictions']) #precsion_buggy = precision_score(df_bug['hasBug'], df_bug['test_predictions']) #recall_buggy = recall_score(df_bug['hasBug'], df_bug['test_predictions']) # F1_buggy = f1_score(df_bug['hasBug'], df_bug['test_predictions']) d_k={} for k in [10,20,30,100]: k_recall,k_precsion = metric_precsion_at_k(y_test,y_pred,k=k) d_k['k_{}_recall'.format(k)]=k_recall d_k['k_{}_precsion'.format(k)] = k_precsion print 'size buggy', len(df_bug) print 'size vaild', len(df_valid) print 'precntage vaild', float(len(df_valid))/float(len(df_bug)+len(df_valid)) *100.0 print 'precntage buggy', float(len(df_bug))/float(len(df_bug)+len(df_valid))*100.0 d_out = {'tag': tag_name, 'ROC': roc, 'conf': conf_num, 'MSE_Test_Bug': mse_buggy_test,'num_buggy':len(df_bug),'num_vaild':len(df_valid),'num_all':len(df_valid)+len(df_bug), 'MSE_Test_Valid': mse_valid_test, #'F1_score':F1_buggy,'precsion_buggy':precsion_buggy,'recall_buggy':recall_buggy, 'area-PRC (buggy)': area, 'Average precision-recall score': Avg_PR} for d_k_key in d_k.keys(): d_out[d_k_key]=d_k[d_k_key] print "TEST:\t bug MSE = {}".format(((mse_buggy_test))) print "TEST:\t valid MSE = {}".format(((mse_valid_test))) print Avg_PR df_l.append(d_out) df_res = pd.DataFrame(df_l) dir_p = '/'.join(str(dir_p).split('/')[:-1]) df_res.to_csv('{}/eval_res.csv'.format(dir_p))
def helper_get_arrf_fiels(p_path='/home/ise/bug_miner/commons-lang1432698/FP/all_lang', mode='most', validtion=True): d_tags = {} res = pt.walk_rec(p_path, [], '_{}'.format(mode), False) arff_path, pred_1_path = None, None for item in res: if str(item).endswith('arff_{}'.format(mode)): arff_path = item elif str(item).endswith('pred_1_{}'.format(mode)): pred_1_path = item res_minor = pt.walk_rec(pred_1_path, [], '', False, lv=-1) res_models = pt.walk_rec(arff_path, [], '.arff') for item in res_minor: name = '_'.join(str(item).split('/')[-1].split('_')[1:]) index_sort = str(item).split('/')[-1].split('_')[0] files_res = pt.walk_rec(item, [], '') d_tags[name] = {'sort_index': index_sort} d_tags[name]['model'] = None for file_i in files_res: if str(file_i).endswith('.csv'): d_tags[name]['name'] = file_i elif str(file_i).endswith(".arff"): d_tags[name]['test'] = file_i for item_arff in res_models: name = str(item_arff).split('/')[-1].split('.')[0] if name in d_tags: d_tags[name]['model'] = item_arff else: d_tags[name] = {'model': item_arff, 'test': None, 'name': None, 'sort_index': None} # find validation set: keys_list = d_tags.keys() keys_list = [[x, int(d_tags[x]['sort_index'])] for x in keys_list] keys_list_sorted = sorted(keys_list, key=lambda tup: tup[1]) print keys_list only_key_sort = [x[0] for x in keys_list_sorted] for ky in d_tags.keys(): index = only_key_sort.index(ky) if index < len(keys_list_sorted) - 1: ky_son = keys_list_sorted[index + 1][0] d_tags[ky]['validation_set'] = d_tags[ky_son]['test'] else: d_tags[ky]['validation_set'] = None manger(d_tags)
def rearrange_folder_conf_xgb( p_path_dir='/home/ise/bug_miner/XGB/Lang_DATA/csv_res/TEST'): res_csv_all = pt.walk_rec(p_path_dir, [], '.csv') for i in res_csv_all: tmp = str(i).split('/')[-1].split('_') num_conf = tmp[-2] path_conf_dir = pt.mkdir_system(p_path_dir, 'conf_{}'.format(num_conf), False) os.system('mv {} {}'.format(i, path_conf_dir)) exit()
def self_complie_bulider_func(repo, dir_cur, prefix, suffix='fix', bug_id=''): if os.path.isdir("{}/EVOSUITE".format(dir_cur)): d = {} java_dirz = pt.walk_rec("{}/EVOSUITE".format(dir_cur), [], '', False, lv=-1) for item in java_dirz: if os.path.isdir("{}/{}".format(item, prefix)): name_folder = str(item).split('/')[-1] tmp = pt.walk_rec("{}/{}".format(item, prefix), [], '.java') path2 = '/'.join(str(tmp[0]).split('/')[:-1]) tmp = str(name_folder).split('_') name_folder = 'test_suite_t_{}_it_{}'.format( tmp[-2].split('=')[1], tmp[-1].split('=')[1]) d[name_folder] = { 'name': name_folder, 'path': "{}/{}/*".format(item, prefix), 'path2': '{}/*'.format(path2) } else: print "[error] no dir {}/EVOSUITE".format(dir_cur) return None d_adder = {'bug_id': str(dir_cur).split('/')[-1], 'mode': suffix} res, path_jarz = package_mvn_cycle(repo) if path_jarz is None: return remove_junit(path_jarz) out_path_complie = pt.mkdir_system(dir_cur, 'complie_out_{}'.format(suffix)) out_path_junit = pt.mkdir_system(dir_cur, 'junit_out_{}'.format(suffix)) for ky_i in d.keys(): out_i_complie = pt.mkdir_system(out_path_complie, d[ky_i]['name']) out_i_junit = pt.mkdir_system(out_path_junit, d[ky_i]['name']) indep_bulilder.compile_java_class(d[ky_i]['path2'], out_i_complie, path_jarz) report_d = indep_bulilder.test_junit_commandLine("{}/{}".format( out_i_complie, 'test_classes'), path_jarz, out_i_junit, prefix_package=prefix, d_add=d_adder) print "end"
def mk_call_graph_df(root_dir, name_find='call_graph_stdout.txt'): res = pt.walk_rec(root_dir, [], name_find) for item in res: father_dir = '/'.join(str(item).split('/')[:-3]) graph_obj = call_g.Call_g(item, father_dir) graph_obj.read_and_process(False) graph_obj.info_graph_csv() graph_obj.step_matrix() graph_obj.adj_matrix() graph_obj.coverage_matrix_BFS()
def del_dependency_dir(repo): ''' del the libb dir ''' if os.path.isdir("{}/libb".format(repo)): os.system('rm -r {}'.format("{}/libb".format(repo))) else: res = pt.walk_rec(repo, [], 'libb', False, lv=-3) if len(res) > 0: for x in res: os.system('rm -r {}'.format(x))