def generate_i(func_index, i0, comb_i0, A, mode_input_relation): i_1_to_end = np.dot(A, comb_i0) # equal input relation if mode_input_relation == 1: i = np.concatenate((i0.reshape(1, -1), i_1_to_end), axis=0) # inequality input relation. Only applicable to 2 inputs now. else: input_range = settings.get_input_range(func_index) min_input = np.zeros((i0.shape)) max_input = np.zeros((i0.shape)) for index in i0.shape[0]: min_input[index] = input_range[index][0] max_input[index] = input_range[index][1] i_1_to_end_min = np.tile(min_input, (i_1_to_end.shape[0], 1)) i_1_to_end_max = np.tile(max_input, (i_1_to_end.shape[0], 1)) if mode_input_relation == 2: i_1_to_end = np.random.uniform(low=i_1_to_end, high=i_1_to_end_max, size=i_1_to_end.shape) i = np.concatenate((i0.reshape(1, -1), i_1_to_end), axis=0) elif mode_input_relation == 3: i_1_to_end = np.random.uniform(low=i_1_to_end_min, high=i_1_to_end, size=i_1_to_end.shape) i = np.concatenate((i0.reshape(1, -1), i_1_to_end), axis=0) input_types = settings.get_input_datatype(func_index) for index in range(i.shape[0]): i[index] = match_type(i[index], input_types) return i
def run_phase1(): # print("start phase1: searching for MRs...") func_indices = settings.func_indices parameters_collection = settings.parameters_collection output_path = settings.output_path if not os.path.isdir("{}".format(output_path)): os.makedirs(output_path) pso_runs = settings.pso_runs pso_iterations = settings.pso_iterations coeff_range = settings.coeff_range const_range = settings.const_range if os.path.isfile(f"{output_path}/performance.csv"): times = pd.read_csv(f"{output_path}/performance.csv", index_col=0) else: times = pd.DataFrame() for func_index in func_indices: inputcases_range = settings.get_input_range(func_index) for parameters in parameters_collection: t1 = datetime.datetime.now() phase1(pso_runs, output_path, func_index, parameters, inputcases_range, const_range, coeff_range) t2 = datetime.datetime.now() cost_time = np.round((t2 - t1).total_seconds(), decimals=3) # times.loc[f"{func_index}_{parameters}", "pso_iterations"] = pso_iterations times.loc[f"{func_index}_{parameters}", "phase1"] = cost_time
def run(self): i0_all = generate_i0_all(settings.get_input_datatype(self.func_index), settings.get_input_range(self.func_index), self.no_of_inputcases) A_all = self.generate_initial_A_all() B_all = self.generate_initial_B_all() A_v_all = np.zeros(A_all.shape) B_v_all = np.zeros(B_all.shape) A_all_p_best = np.copy(A_all) B_all_p_best = np.copy(B_all) cost_of_AB_all_p_best = get_cost_of_AB_all( self.program, self.func_index, A_all, B_all, i0_all, self.mode_input_relation, self.mode_output_relation, self.degree_input_relation, self.degree_output_relation, self.no_of_elements_output) index_g_best = np.argmin(cost_of_AB_all_p_best) omega_s = 0.9 omega_e = 0.4 iterations = settings.pso_iterations for iteration in range(iterations): omega = omega_s - (omega_s - omega_e) * ( (iteration / iterations)**2) A_all, B_all, A_v_all, B_v_all, A_all_p_best, B_all_p_best, index_g_best, cost_of_AB_all_p_best = self.update_AB_all( i0_all, A_all, A_v_all, B_all, B_v_all, A_all_p_best, B_all_p_best, index_g_best, omega, cost_of_AB_all_p_best) min_cost = cost_of_AB_all_p_best[index_g_best] ## break the iteration in advance if solution is found # if self.mode_output_relation == 1: # if min_cost < 1: # break # else: # if min_cost < 0.05: # break # print(f'iteration is {iteration}, min_cost is {np.round(min_cost, decimals=3)}') # print("A:") # print(A_all_p_best[index_g_best]) # print("B:") # print(B_all_p_best[index_g_best]) # print("----------") A = A_all_p_best[index_g_best] B = B_all_p_best[index_g_best] return min_cost, A, B
def phase2(output_path, parameters, func_index, output_name): no_of_inputcases = 100 if os.path.isfile(f"{output_path}/"): file_statistics = pd.read_csv(f"{output_path}/counts.csv", index_col=0) else: file_statistics = pd.DataFrame() parameters_int = [int(e) for e in parameters.split("_")] no_of_inputs = parameters_int[0] mode_input_relation = parameters_int[1] mode_output_relation = parameters_int[2] degree_of_input_relation = parameters_int[3] degree_of_output_relation = parameters_int[4] no_of_elements_input = settings.getNEI(func_index) no_of_elements_output = settings.getNEO(func_index) A_candidates_after_filter = [] B_candidates_after_filter = [] ini_count = 0 survive_count = 0 results_all = np.load('{}/phase1/{}'.format(output_path, output_name)) min_cost_candidates = results_all['min_cost_candidates'] A_candidates = results_all['A_candidates'] B_candidates = results_all['B_candidates'] all_count = min_cost_candidates.shape[0] for index_candidate in range(all_count): min_cost = min_cost_candidates[index_candidate] A = A_candidates[index_candidate] B = B_candidates[index_candidate] isPass = True isPassPhase1 = False if mode_output_relation == 1: if min_cost < 5: ini_count += 1 isPassPhase1 = True else: if min_cost < 0.05: ini_count += 1 isPassPhase1 = True if isPassPhase1: for index_test in range(100): i0_all = Phase1_PSOSearch.generate_i0_all( settings.get_input_datatype(func_index), settings.get_input_range(func_index), no_of_inputcases) survive_cost = get_cost_of_AB(settings.program, func_index, A, B, i0_all, mode_input_relation, mode_output_relation, degree_of_input_relation, degree_of_output_relation, no_of_elements_output) if survive_cost >= 0.05: isPass = False break if isPass: survive_count += 1 A_candidates_after_filter.append(A) B_candidates_after_filter.append(B) results_all.close() A_candidates_after_filter = np.array(A_candidates_after_filter) B_candidates_after_filter = np.array(B_candidates_after_filter) if not os.path.isdir("{}/phase2".format(output_path)): os.mkdir("{}/phase2".format(output_path)) np.savez( f'{output_path}/phase2/{func_index}_{parameters}_after_filter.npz', A_candidates=A_candidates_after_filter, B_candidates=B_candidates_after_filter) file_statistics.loc[f"{func_index}_{parameters}", "pso"] = all_count file_statistics.loc[f"{func_index}_{parameters}", "phase1"] = ini_count file_statistics.loc[f"{func_index}_{parameters}", "phase2"] = survive_count
def checkAfterSVD(folder_path, func_indices): filer_phase3_df = pd.read_csv(f"{folder_path}/counts.csv", index_col=0) for func_index in func_indices: no_of_elements_output = settings.getNEO(func_index) no_of_elements_input = settings.getNEI(func_index) inputcases_range = settings.get_input_range(func_index) no_of_testcases = 100 MRs_types = os.listdir(f"{folder_path}/phase3") for MRs_type in MRs_types: # for the MRs stored in npz format if MRs_type.startswith(f"{func_index}_") and MRs_type.endswith( ".npz"): pass # for the MRs stored in pkl format elif MRs_type.startswith(f"{func_index}_") and MRs_type.endswith( "group_after_cs_svd.pkl"): i0_all = Phase1_PSOSearch.generate_i0_all( settings.get_input_datatype(func_index), settings.get_input_range(func_index), no_of_testcases) with open(f"{folder_path}/phase3/{MRs_type}", "rb") as f: MRs_dict = pickle.load(f) for parameters, MRs in MRs_dict.items(): filer_phase3 = 0 # print(f"func_index is {func_index}, parameters = {parameters}") x_all_dict = MRs[0] # print(x_all_dict) y_all_df = MRs[1] hDIR = MRs[2] y_o_isKill_df = pd.DataFrame() for index_i0 in range(i0_all.shape[0]): i0 = i0_all[index_i0] u = Phase1_PSOSearch.comb(i0, hDIR) x_value_dict = {} y_element_value_dict = {} for x_name, A in x_all_dict.items(): # print(f"x_name is {x_name}") # print(f"A is {A}") x = np.dot(A, u) x_value_dict[x_name] = x y = settings.program(x, func_index) for index_eo in range(no_of_elements_output): y_element_value_dict[ f"f{x_name}_{index_eo + 1}"] = y[index_eo] y0 = settings.program(i0, func_index) for index_eo in range(no_of_elements_output): y_element_value_dict[f"fx0_{index_eo + 1}"] = y0[ index_eo] y_all_names = y_all_df.columns.values y_all_values = np.zeros(y_all_names.shape) for index_y in range(y_all_names.shape[0]): y_names = list(y_all_names[index_y]) y_elements = [] for ii in range(len(y_names)): try: y_elements.append(float(y_names[ii])) except: y_elements.append( y_element_value_dict[y_names[ii]]) y_all_values[index_y] = np.product(y_elements) for index_MR in range(y_all_df.shape[0]): B = y_all_df.iloc[index_MR, :].values Bv = np.dot(B, y_all_values) if np.isreal(Bv) and not np.isnan(Bv): if np.abs(Bv) < 0.1: y_o_isKill_df.loc[index_MR, index_i0] = 0 else: y_o_isKill_df.loc[index_MR, index_i0] = 1 else: y_o_isKill_df.loc[index_MR, index_i0] = 1 for index_MR in range(y_o_isKill_df.shape[0]): kill_o_number = np.sum( y_o_isKill_df.iloc[index_MR, :].values) cost_o = np.divide(kill_o_number, no_of_testcases) if cost_o < 0.05: filer_phase3 += 1 else: # print(MRs_dict[parameters][1]) MRs_dict[parameters][1] = MRs_dict[parameters][ 1].drop([index_MR]) # print("----------") # print(parameters) # print(f"before filter {len(y_all_df)}") # print(f"after filter left {filer_phase3}") filer_phase3_df.loc[f"{func_index}_{parameters}", "phase3"] = filer_phase3 with open( f"{folder_path}/phase3/{func_index}_MRs_group_after_cs_svd.pkl", "wb") as f2: pickle.dump(MRs_dict, f2, pickle.HIGHEST_PROTOCOL) elif MRs_type.startswith(f"{func_index}_") and MRs_type.endswith( "other_types_after_cs_svd.pkl"): i0_all = Phase1_PSOSearch.generate_i0_all( settings.get_input_datatype(func_index), settings.get_input_range(func_index), no_of_testcases) with open(f"{folder_path}/phase3/{MRs_type}", "rb") as f: MRs_dict = pickle.load(f) for parameters, MRs in MRs_dict.items(): filer_phase3 = 0 parameters_int = [int(e) for e in parameters.split("_")] no_of_inputs = parameters_int[0] mode_input_relation = parameters_int[1] mode_output_relation = parameters_int[2] degree_of_input_relation = parameters_int[3] degree_of_output_relation = parameters_int[4] x_all_dict = MRs[0] y_all_df = MRs[1] y_o_isKill_df = pd.DataFrame() for index_i0 in range(i0_all.shape[0]): i0 = i0_all[index_i0] u = Phase1_PSOSearch.comb(i0, degree_of_input_relation) # print(u) x_value_dict = {} y_element_value_dict = {} for x_name, A in x_all_dict.items(): # print(x_name) # print(A) x = np.dot(A, u) x_value_dict[x_name] = x y = settings.program(x, func_index) for index_eo in range(no_of_elements_output): y_element_value_dict[ f"f{x_name}_{index_eo + 1}"] = y[index_eo] y0 = settings.program(i0, func_index) for index_eo in range(no_of_elements_output): y_element_value_dict[f"fx0_{index_eo + 1}"] = y0[ index_eo] y_all_names = y_all_df.columns.values y_all_values = np.zeros(y_all_names.shape) for index_y in range(y_all_names.shape[0]): y_names = list(y_all_names[index_y]) y_elements = [] for ii in range(len(y_names)): try: y_elements.append(float(y_names[ii])) except: y_elements.append( y_element_value_dict[y_names[ii]]) y_all_values[index_y] = np.product(y_elements) for index_MR in range(y_all_df.shape[0]): B = y_all_df.iloc[index_MR, :].values Bv = np.dot(B, y_all_values) if np.isreal(Bv) and not np.isnan(Bv): if np.abs(Bv) < 0.1: y_o_isKill_df.loc[index_MR, index_i0] = 0 else: y_o_isKill_df.loc[index_MR, index_i0] = 1 else: y_o_isKill_df.loc[index_MR, index_i0] = 1 for index_MR in range(y_o_isKill_df.shape[0]): kill_o_number = np.sum( y_o_isKill_df.iloc[index_MR, :].values) cost_o = np.divide(kill_o_number, no_of_testcases) if cost_o < 0.05: filer_phase3 += 1 else: MRs_dict[parameters][1] = MRs_dict[parameters][ 1].drop([index_MR]) # print("----------") # print(parameters) # print(f"before filter {len(y_all_df)}") # print(f"after filter left {filer_phase3}") filer_phase3_df.loc[f"{func_index}_{parameters}", "phase3"] = filer_phase3 with open( f"{folder_path}/phase3/{func_index}_MRs_other_types_after_cs_svd.pkl", "wb") as f2: pickle.dump(MRs_dict, f2, pickle.HIGHEST_PROTOCOL)