def get_all_dir(root_p, name=None): if name is None: walker = pit_render_test.walker(root_p) classes_list = walker.walk("org", False, 0) print "class_list = ", classes_list else: walker = pit_render_test.walker(root_p) classes_list = walker.walk(name, False, -1) print "class_list = ", classes_list return classes_list
def get_all_class_by_name(path_root,out_path=None): if path_root[-1] != '/': path_root = path_root + '/' if out_path is None: out_path=path_root+'out/' if os.path.isdir(out_path) is False: os.mkdir(out_path) if out_path[-1] != '/': out_path = out_path + '/' dict_package_prefix=dict() walker=pit_render_test.walker(path_root) classes_list = walker.walk('ALL', False, 2,False) print classes_list for class_item in classes_list: walker = pit_render_test.walker(path_root+class_item+'/commons-math3-3.5-src/target/pit-reports/') #print path_root+class_item+'/commons-math3-3.5-src/target/pit-reports/' classes_list = walker.walk('org.apache.commons.math',False) d = {} ctr_empty = 0 for item in classes_list: name = str(item).split('/')[-1] if os.path.exists(item+'/mutations.csv'): d[name]=item+'/mutations.csv' else: ctr_empty = ctr_empty + 1 d[name]=None print "empty:=",ctr_empty dict_package_prefix[class_item] = d dico = merge_dict_by_class(dict_package_prefix) res_dataframe={} for ky in dico : #if not str(ky).__contains__("org.apache.commons.math3.linear"): # continue tmp = get_data_df_by_name(dict(dico[ky]).values()) if tmp is None: continue print ky print 'tmp shape = ',tmp.shape df_to_csv=mean_all_FPU(tmp) print 'writing ',ky,' size = ',df_to_csv.shape write_to_csv(out_path+ky+'.csv',df_to_csv) print 'Done !!!! ', ky, ' size = ', df_to_csv.shape #for k in res_dataframe : # if res_dataframe[k] is None: # continue # write_to_csv(out_path+k+'.csv',res_dataframe[k]) print "done"
def rev_func(root_path): ''' :param root_path: the path for the rev project :return: file.csv ''' out_P = mkdir_system(root_path,'table_out',False) set_globvar_path(root_path) crate_file('rev_err') walker = pit_render_test.walker(root_path) classes_list = walker.walk('_t=', False, 0) classes_list = [x + '/pit_test/' for x in classes_list] ### classes_list = walker.walk('pit_test', False, 3) #TODO: remove this line print classes_list time = '' d = {} last = len("/pit_test/") + 1 for p_path in classes_list: y = str(p_path).find("t=") if y > 0: time = p_path[y + 2:-last] d[p_path] = time else: d[p_path] = None #get_all_class_by_name(p_path) df_dico = summarize_rev(classes_list,d,out_P) df_dico_fp = summarize_rev(classes_list,d,out_P,mod='FP')
def main_pars(arr): if len(arr) > 2 : mod = arr[1] if mod == "fin" : die_p = arr[2] # '/home/eran/thesis/test_gen/experiment/t30_distr/pit_res/' fpcsv = arr[3] # '/home/eran/thesis/test_gen/experiment/t30_distr/pit_res/FP_budget_time.csv' uni = arr[4] # '30' clac_by_package(die_p, fpcsv, uni) elif mod=='all': dico = init_clac( arr[2] ,arr[3],'org.apache.commons.math3.linear.PreconditionedIterativeLinearSolver') elif mod == 'arg': aggregate_time_budget(arr[2]) elif mod == 'rev': rev_func(arr[2]) elif mod =='class': if len(arr) > 3 : dico = get_all_class_by_name( arr[2] , arr[3]) else: walker = pit_render_test.walker(arr[2]) classes_list = walker.walk('t=', False, 0) for pp in classes_list: if os.path.isdir(pp+"/pit_test"): dico = get_all_class_by_name(pp) else : # fin_mereg("/home/ise/eran/idel/geometry_pac/") # data_mutation #new_FP print "[Error] ----no args------" exit(0)
def analysis(self): walker = pit_render_test.walker(self._path) list_java = walker.walk("ESTest.java") list_txt = walker.walk("ESTest.txt") list_class = walker.walk("ESTest.class") list_java, list_txt, list_class = self.list_clean( list_java, list_txt, list_class) for klass in list_java: if klass[0][:-7] in self.dict: self.dict[klass[0][:-7]] = { "generate": 1, "un_compile": 1, "report": 0 } for comp_klass in list_class: if comp_klass[0][:-7] in self.dict: tmp = self.dict[comp_klass[0][:-7]] tmp["compile"] = 1 tmp["un_compile"] = 0 else: raise Exception("[Exception] file in txt but not in java: ", comp_klass[0]) for txt_f in list_txt: if txt_f[0][:-7] in self.dict: tmp = self.dict[txt_f[0][:-7]] tmp["p"] = txt_f[1] tmp["report"] = 1 else: raise Exception("[Exception] file in txt but not in java: ", txt_f[0]) self.read_txt() return self.dict
def constractor(self): for x in self.list_org: self.dict_fin_U[x] = { "Errors": 0, "report": 0, "run": 0, "compile": 0, "un_compile": 0, "Skipped": 0, "Failures": 0, "generate": 0, "size": 0 } self.dict_fin_FP[x] = { "Errors": 0, "report": 0, "run": 0, "compile": 0, "un_compile": 0, "Skipped": 0, "Failures": 0, "generate": 0, "size": 0 } prefix = self._path.find("_t=") + 3 suf = self._path.find("_/pit_test") self._time = self._path[prefix:suf] walker = pit_render_test.walker(self._path) list_projects = walker.walk("ALL_", False, 0) for p in list_projects: tmp_obj = ProjectCase(p, self.list_org) self.proj_list.append(tmp_obj)
def get_all_bugs_dir(pit_dir_path,arg): d={} script_py = extract_script(pit_dir_path) print script_py obj_walk = walker(pit_dir_path) all_dir = obj_walk.walk('org',False) empty_dir=[] diff_names_dir = [] empty_xml = [] for dir in all_dir: name_class_dir = str(dir).split('/')[-1] if is_empty_dir(str(dir)) is False: xml_file_path = dir+'/mutations.xml' if is_empty_file(xml_file_path): d[name_class_dir]={'class':name_class_dir,'path':xml_file_path,'empty_file':1} empty_xml.append(xml_file_path) continue test_name = pars_xml_name(xml_file_path,arg) if test_name is not None: if test_name != name_class_dir: d[name_class_dir] = {'class': name_class_dir, 'path': xml_file_path, 'diff': 1} diff_names_dir.append([dir,name_class_dir]) else: d[name_class_dir] = {'class': name_class_dir, 'path': dir, 'empty_dir': 1} empty_dir.append([dir,name_class_dir]) print "" p_log = '{}/logS'.format(script_py) log_to_dir_pit(d,p_log) #fix_error_list(bugs_dir,script_py,'bug_dir',arg) #fix_error_list(empty_csv, script_py,'empty_csv',arg) fix_error_list(empty_dir, script_py,'empty_dir',arg)
def get_all_pit_dir_exp(root_exp,arg=None): obj = walker(root=root_exp) all_pp = obj.walk('pit-reports',False) if len(all_pp)==0: print "No dir pit-report dir in the following path : {}".format(root_exp) for p in all_pp: get_all_bugs_dir(p,arg)
def analysis(root, li_org): walker = pit_render_test.walker(root) list_dir = walker.walk("t=", False, 0) object_fin_arr = [] for dir_p in list_dir: obj_main = BigProject(dir_p + "/pit_test/", li_org) object_fin_arr.append(obj_main) return object_fin_arr
def get_sum_df(p_path): walker = pit_render_test.walker(p_path) classes_list = walker.walk('.csv') d={} for csv_p in classes_list: name_class = str(csv_p).split('/')[-1][:-4] d[name_class] = csv_p d[name_class] = pd.read_csv(csv_p) return d
def origin_java(root_path_java): walker = pit_render_test.walker(root_path_java) list_java = walker.walk(".class", True, -1) filter_java = [ klass for klass in list_java if not str(klass).__contains__("$") ] list_java = [] for x in filter_java: list_java.append(clean_path_math(x)) return list_java
def get_name(path): walker=pit_render_test.walker(path) name ='null' list_p = walker.walk("csv") if len(list_p)>0: val_s = 'ESTest' csv_file = csv.reader(open(path+'/mutations.csv', "rb"), delimiter=",") for row in csv_file: if str(row[6]).__contains__(val_s) is True: name = row[6] break return name[:len(name)/2]
def summarize_rev(classes_list,d,out_p,on_key='KILLED',mod='U'): on_key_U = '{}_AVG_U'.format(on_key) on_key_FP = '{}_AVG_FP'.format(on_key) classes_list = [x + 'out/' for x in classes_list] for item in classes_list: dict_prefix = {} walker = pit_render_test.walker(item) csv_list = walker.walk('.csv') size_ctr = len(csv_list) df_table = pd.DataFrame(columns=['class', 'mutation-type','method','line']) if mod=='U': out_dir = mkdir_system(out_p,'Uni') elif mod =='FP': out_dir = mkdir_system(out_p, 'FP') else: raise Exception('[Error] in csv_PIT args=rev , the mode parameter is wrong can be FP/U --> {}'.format(mod)) for file_csv in csv_list: print size_ctr size_ctr -= 1 col_filed = ['class', 'mutation-type', 'method', 'line'] tmp_arr = str(file_csv).split('/') item_name = tmp_arr[-1][:-4] prefix = str(item_name).split('.')[:-1] prefix = '.'.join(prefix) if prefix not in dict_prefix: dict_prefix[prefix] = df_table.copy(deep=True) matrix = dict_prefix[prefix] df_item = pd.read_csv(file_csv) if mod == 'U': if on_key_U in df_item: col_filed.append(on_key_U) df_item = df_item[col_filed] matrix = pd.merge(matrix, df_item, how='outer', on=['class', 'mutation-type', 'method', 'line']) if on_key_U in col_filed: matrix.rename(columns={on_key_U: '{}_ESTest_U'.format(item_name)}, inplace=True) elif mod=='FP': if on_key_FP in df_item: col_filed.append(on_key_FP) df_item = df_item[col_filed] matrix = pd.merge(matrix, df_item, how='outer', on=['class', 'mutation-type', 'method', 'line']) if on_key_FP in col_filed: matrix.rename(columns={on_key_FP: '{}_ESTest_FP'.format(item_name)}, inplace=True) dict_prefix[prefix] = matrix if size_ctr == 1: for key_i in dict_prefix.keys(): df_tmp = dict_prefix[key_i] df_tmp.to_csv('{}/{}__T_{}_.csv'.format(out_dir,key_i,mod)) return dict_prefix
def fin_mereg(path): walker = pit_render_test.walker(path) list_p = walker.walk("fin.csv") list_d = [] big_dico = {} for p in list_p: ISstop = True time_b = "null" if str(p).__contains__("t="): pos = str(p).find("t=") i = pos + 2 while (ISstop and i < len(str(p))): if p[i] == '_': ISstop = False time_b = p[pos + 2:i] break i += 1 list_d.append({"path": p, "time": time_b}) for item in list_d: csv_file = csv.reader(open(item["path"], "rb"), delimiter=",") for row in csv_file: if str(row[2]).__contains__("org"): item_tmp = {} if row[2] in big_dico: val = big_dico[str(row[2])] else: big_dico[str(row[2])] = [] val = big_dico[str(row[2])] item_tmp["FP"] = row[0] item_tmp["test_suite_U"] = row[1] item_tmp["pacakge"] = row[2] item_tmp["test_suite_FP"] = row[3] item_tmp["kil_uni"] = row[4] item_tmp["kil_fp"] = row[5] item_tmp["uni"] = row[6] item_tmp["time"] = item["time"] val.append(item_tmp) big_dico[str(row[2])] = val for key_i in big_dico.keys(): df = pd.DataFrame(big_dico[key_i]) df.to_csv(path + str(key_i) + '_fin.csv', encoding='utf-8', index=False)
def tmp_csv_fin(path_p): walker = pit_render_test.walker(path_p) list_p = walker.walk(".csv") df_reg = [] df_FP = [] for p in list_p: if str(p).__contains__('FP.csv') is True: df_FP.append(pd.read_csv(p)) else: df_reg.append(pd.read_csv(p)) new_df = df_FP[0][['index', "class", "method", "line"]].copy() counter = 0 for df in df_FP: if counter == 0: new_df['kill_R_FP'] = np.where(df['KILLED_sum'] > 0, 1, 0) new_df['total_FP'] = df['total'] else: new_df['total_FP'] += df['total'] new_df['kill_R_FP'] += np.where(df['KILLED_sum'] > 0, 1, 0) for leb in arr_sign: if counter == 0: new_df[leb + '_FP'] = df[leb + '_sum'] else: new_df[leb + '_FP'] += df[leb + '_sum'] counter += 1 counter = 0 for df in df_reg: if counter == 0: new_df['kill_R_uni'] = np.where(df['KILLED_sum'] > 0, 1, 0) new_df['total_uni'] = df['total'] else: new_df['total_uni'] += df['total'] new_df['kill_R_uni'] += np.where(df['KILLED_sum'] > 0, 1, 0) for leb in arr_sign: if counter == 0: new_df[leb + '_uni'] = df[leb + '_sum'] else: new_df[leb + '_FP'] += df[leb + '_sum'] counter += 1 write_to_csv('/home/eran/Desktop/fin.csv', new_df) return new_df
def get_name_CUT(root_p): walker = pit_render_test.walker(root_p) list_p = walker.walk("html") size = len('<h2>Tests examined</h2><ul>*</ul>') name = [] for page in list_p: with open(page, 'r') as myfile: data = myfile.read() data = data.replace('\n', '') tmp = re.findall('<h2>Tests examined</h2><ul>.*?</ul>', data) if tmp == None: continue if len(tmp) > 0 and len(tmp[0]) > size: name.append(tmp[0][31:-20]) break if len(name) == 0: raise Exception("problem with the ", root_p) size = len(name[0]) res_name = name[0][:size / 2] return res_name
def aggregate_time_budget(root_path): print "in" set_globvar_path(root_path) crate_file() dict_package_prefix = dict() walker = pit_render_test.walker(root_path) classes_list = walker.walk('_t=', False, 0) classes_list = [x+'/pit_test/' for x in classes_list] ### classes_list = walker.walk('pit_test', False, 3) #TODO: remove this line print classes_list time = '' d={} last=len("/pit_test/")+1 for p_path in classes_list: y = str(p_path).find("t=") if y > 0 : time = p_path[y+2:-last] d[p_path] = time else: d[p_path] = None get_all_class_by_name(p_path) classes_list = [x + 'out/' for x in classes_list] time_arr = d.values() time_arr_fp = [str(x)+"_budget_FP" for x in time_arr] time_arr_u = [str(x) + "_budget_U" for x in time_arr] print classes_list all_c = ['class']+time_arr_fp + time_arr_u df_big_d = {} list_end={} for i in range(len(classes_list)): tmp_dico = get_sum_df(classes_list[i]) insert_to_big(df_big_d,tmp_dico, d[classes_list[i][:-4]] ) #merge_df_sum_by_class(tmp_dico,list_end,d[classes_list[i][:-4]]) df_big = pd.DataFrame(df_big_d.values()) #list_colo = list(df_big) #_list10 = [x for x in list_colo if str(x).__contains__("10") ] #_df10 = df_big.copy(deep=True) #for col0 in _list10: # _df10 = _df10[np.isfinite(_df10[col0])] path_out = mkdir_os('fin_out',root_path) write_to_csv(path_out+'big.csv',df_big)
def analyse_budget(root_path_csv, out_path_name): print "starting...." walker = pit_render_test.walker(root_path_csv) list_csv = walker.walk("statistics.csv") list_df = [] list_dic_df = {} #df_ans = pd.DataFrame() for item in list_csv: time_str = get_time_from_path(str(item), "_t=") df_item = pd.read_csv(str(item)) #print list(df_item) df_item_budget = pd.DataFrame(df_item[[' TARGET_CLASS', 'Total_Time']]) df_item_budget['Total_Time_' + time_str] = df_item_budget['Total_Time'] del df_item_budget['Total_Time'] if time_str in list_dic_df: list_dic_df[time_str].append(df_item_budget) df_ans = pd.DataFrame(df_item_budget[' TARGET_CLASS'].copy()) else: list_dic_df[time_str] = [df_item_budget] list_df.append(df_item_budget) print "done" list_fin_df = [] if len(list_dic_df) > 1: for key in list_dic_df.keys(): budget_df_i = mereg_df(list_dic_df[key], ' TARGET_CLASS') list_col = list(budget_df_i) print key # print budget_df_i list_col.remove(' TARGET_CLASS') budget_df_i["mean_" + key] = budget_df_i[list_col].mean(axis=1) df_ans = df_ans.merge(budget_df_i[[' TARGET_CLASS', "mean_" + key]], on=' TARGET_CLASS', how='outer') #df_ans["mean_"+key] = budget_df_i["mean_"+key].copy() df_ans.to_csv(out_path_name, encoding='utf-8', index=False)
def get_csv_summary(root_p): walker = pit_render_test.walker(root_p) classes_list = walker.walk("csv") return classes_list
def find_All_dir(root_path_dir): print "" walker = pit_render_test.walker(root_path_dir) list_ALL = walker.walk("commons-math3-3.5-src",False)
def clac_by_package(dir_path, path_fp_budget, uni_time): walker = pit_render_test.walker(dir_path) list_p = walker.walk(".csv") arr_uni = [] arr_fp = [] for p in list_p: if str(p).__contains__('_U_'): arr_uni.append(pd.read_csv(p)) elif str(p).__contains__('_FP_'): arr_fp.append(pd.read_csv(p)) max_num = 0 max_obj = None if len(arr_uni) > 0: for m in arr_uni: if len(list(m)) > max_num: max_obj = m max_num = len(list(m)) elif len(arr_fp) > 0: for m in arr_fp: if len(list(m)) > max_num: max_obj = m max_num = len(list(m)) else: print("BAD_Args: no FP or UNI is found") exit(0) tmper = max_obj new_df = tmper[['index', "class", "mutation-type", "method", "line"]].copy() all_df = tmper[['index', "class", "mutation-type", "method", "line"]].copy() budget_df = pd.read_csv(path_fp_budget, names=["class", "pred", "time"]) new_df = pd.merge(new_df, budget_df, how='left', on=["class"]) new_df['uni_budget'] = uni_time new_df['FP_budget'] = np.where(new_df['time'] > int(uni_time), uni_time, new_df['time']) #new_df['pred_bug'] = new_df['time'] / float(uni_time) del new_df['time'] list_name = list(tmper) res = [k for k in list_name if 'org' in k] dict_list = [] all_df['UNI'] = 0 all_df['FP'] = 0 size_uni = len(arr_uni) size_fp = len(arr_fp) for k in res: ctr_uni = [] ctr_fp = [] new_df['UNI'] = 0 new_df['FP'] = 0 for df_uni in arr_uni: if k in df_uni.columns: ctr_uni.append(df_uni) continue for df_fp in arr_fp: if k in df_fp.columns: ctr_fp.append(df_fp) continue if len(ctr_fp) > 0: for df_fp in ctr_fp: new_df['FP'] += np.where(df_fp[k] == 'KILLED', 1, 0) if len(ctr_uni) > 0: for df_uni in ctr_uni: new_df['UNI'] += np.where(df_uni[k] == 'KILLED', 1, 0) new_df['kill_fp'] = np.where(new_df['FP'] > 0, 1, 0) new_df['kill_uni'] = np.where(new_df['UNI'] > 0, 1, 0) if (size_fp) > 0: tmp_size_fp = float(len(ctr_fp)) / float(size_fp) new_df["test_suite_FP"] = tmp_size_fp else: tmp_size_fp = 0 new_df["test_suite_FP"] = tmp_size_fp if (size_uni) > 0: tmp_size_u = float(len(ctr_uni)) / float(size_uni) new_df["test_suite_U"] = tmp_size_u else: tmp_size_u = 0 new_df["test_suite_U"] = tmp_size_u dict_list.append({ "package": k, "FP": new_df['FP'].sum(), "UNI": new_df['UNI'].sum(), "kill_fp": new_df['kill_fp'].sum(), "kill_uni": new_df['kill_uni'].sum(), "test_suite_FP": tmp_size_fp, "test_suite_U": tmp_size_u }) all_df['FP'] += new_df['FP'] all_df['UNI'] += new_df['UNI'] all_df['kill_fp'] = new_df['kill_fp'] all_df['kill_uni'] = new_df['kill_uni'] new_df.to_csv(dir_path + str(k) + '.csv', encoding='utf-8', index=False) if len(dict_list) > 0: df = pd.DataFrame(dict_list, columns=dict_list[0].keys()) df.to_csv(dir_path + 'fin.csv', encoding='utf-8', index=False) all_df.to_csv(dir_path + 'all.csv', encoding='utf-8', index=False)