Ejemplo n.º 1
0
def generate_i(func_index, i0, comb_i0, A, mode_input_relation):
    i_1_to_end = np.dot(A, comb_i0)

    # equal input relation
    if mode_input_relation == 1:
        i = np.concatenate((i0.reshape(1, -1), i_1_to_end), axis=0)

    # inequality input relation. Only applicable to 2 inputs now.
    else:
        input_range = settings.get_input_range(func_index)
        min_input = np.zeros((i0.shape))
        max_input = np.zeros((i0.shape))
        for index in i0.shape[0]:
            min_input[index] = input_range[index][0]
            max_input[index] = input_range[index][1]

        i_1_to_end_min = np.tile(min_input, (i_1_to_end.shape[0], 1))
        i_1_to_end_max = np.tile(max_input, (i_1_to_end.shape[0], 1))

        if mode_input_relation == 2:
            i_1_to_end = np.random.uniform(low=i_1_to_end,
                                           high=i_1_to_end_max,
                                           size=i_1_to_end.shape)
            i = np.concatenate((i0.reshape(1, -1), i_1_to_end), axis=0)
        elif mode_input_relation == 3:
            i_1_to_end = np.random.uniform(low=i_1_to_end_min,
                                           high=i_1_to_end,
                                           size=i_1_to_end.shape)
            i = np.concatenate((i0.reshape(1, -1), i_1_to_end), axis=0)

    input_types = settings.get_input_datatype(func_index)
    for index in range(i.shape[0]):
        i[index] = match_type(i[index], input_types)
    return i
Ejemplo n.º 2
0
def run_phase1():
    # print("start phase1: searching for MRs...")
    func_indices = settings.func_indices
    parameters_collection = settings.parameters_collection

    output_path = settings.output_path
    if not os.path.isdir("{}".format(output_path)):
        os.makedirs(output_path)
    pso_runs = settings.pso_runs
    pso_iterations = settings.pso_iterations

    coeff_range = settings.coeff_range
    const_range = settings.const_range

    if os.path.isfile(f"{output_path}/performance.csv"):
        times = pd.read_csv(f"{output_path}/performance.csv", index_col=0)
    else:
        times = pd.DataFrame()

    for func_index in func_indices:
        inputcases_range = settings.get_input_range(func_index)
        for parameters in parameters_collection:
            t1 = datetime.datetime.now()
            phase1(pso_runs, output_path, func_index, parameters,
                   inputcases_range, const_range, coeff_range)
            t2 = datetime.datetime.now()
            cost_time = np.round((t2 - t1).total_seconds(), decimals=3)

            # times.loc[f"{func_index}_{parameters}", "pso_iterations"] = pso_iterations
            times.loc[f"{func_index}_{parameters}", "phase1"] = cost_time
Ejemplo n.º 3
0
    def run(self):
        i0_all = generate_i0_all(settings.get_input_datatype(self.func_index),
                                 settings.get_input_range(self.func_index),
                                 self.no_of_inputcases)
        A_all = self.generate_initial_A_all()
        B_all = self.generate_initial_B_all()

        A_v_all = np.zeros(A_all.shape)
        B_v_all = np.zeros(B_all.shape)
        A_all_p_best = np.copy(A_all)
        B_all_p_best = np.copy(B_all)
        cost_of_AB_all_p_best = get_cost_of_AB_all(
            self.program, self.func_index, A_all, B_all, i0_all,
            self.mode_input_relation, self.mode_output_relation,
            self.degree_input_relation, self.degree_output_relation,
            self.no_of_elements_output)

        index_g_best = np.argmin(cost_of_AB_all_p_best)

        omega_s = 0.9
        omega_e = 0.4

        iterations = settings.pso_iterations
        for iteration in range(iterations):
            omega = omega_s - (omega_s - omega_e) * (
                (iteration / iterations)**2)
            A_all, B_all, A_v_all, B_v_all, A_all_p_best, B_all_p_best, index_g_best, cost_of_AB_all_p_best = self.update_AB_all(
                i0_all, A_all, A_v_all, B_all, B_v_all, A_all_p_best,
                B_all_p_best, index_g_best, omega, cost_of_AB_all_p_best)
            min_cost = cost_of_AB_all_p_best[index_g_best]

            ## break the iteration in advance if solution is found
            # if self.mode_output_relation == 1:
            #     if min_cost < 1:
            #         break
            # else:
            #     if min_cost < 0.05:
            #         break
            # print(f'iteration is {iteration}, min_cost is {np.round(min_cost, decimals=3)}')
            # print("A:")
            # print(A_all_p_best[index_g_best])
            # print("B:")
            # print(B_all_p_best[index_g_best])
            # print("----------")

        A = A_all_p_best[index_g_best]
        B = B_all_p_best[index_g_best]

        return min_cost, A, B
Ejemplo n.º 4
0
def phase2(output_path, parameters, func_index, output_name):
    no_of_inputcases = 100

    if os.path.isfile(f"{output_path}/"):
        file_statistics = pd.read_csv(f"{output_path}/counts.csv", index_col=0)
    else:
        file_statistics = pd.DataFrame()

    parameters_int = [int(e) for e in parameters.split("_")]
    no_of_inputs = parameters_int[0]
    mode_input_relation = parameters_int[1]
    mode_output_relation = parameters_int[2]
    degree_of_input_relation = parameters_int[3]
    degree_of_output_relation = parameters_int[4]

    no_of_elements_input = settings.getNEI(func_index)
    no_of_elements_output = settings.getNEO(func_index)

    A_candidates_after_filter = []
    B_candidates_after_filter = []
    ini_count = 0
    survive_count = 0

    results_all = np.load('{}/phase1/{}'.format(output_path, output_name))
    min_cost_candidates = results_all['min_cost_candidates']
    A_candidates = results_all['A_candidates']
    B_candidates = results_all['B_candidates']
    all_count = min_cost_candidates.shape[0]

    for index_candidate in range(all_count):
        min_cost = min_cost_candidates[index_candidate]
        A = A_candidates[index_candidate]
        B = B_candidates[index_candidate]

        isPass = True
        isPassPhase1 = False

        if mode_output_relation == 1:
            if min_cost < 5:
                ini_count += 1
                isPassPhase1 = True
        else:
            if min_cost < 0.05:
                ini_count += 1
                isPassPhase1 = True

        if isPassPhase1:
            for index_test in range(100):
                i0_all = Phase1_PSOSearch.generate_i0_all(
                    settings.get_input_datatype(func_index),
                    settings.get_input_range(func_index), no_of_inputcases)
                survive_cost = get_cost_of_AB(settings.program, func_index, A,
                                              B, i0_all, mode_input_relation,
                                              mode_output_relation,
                                              degree_of_input_relation,
                                              degree_of_output_relation,
                                              no_of_elements_output)
                if survive_cost >= 0.05:
                    isPass = False
                    break

            if isPass:
                survive_count += 1
                A_candidates_after_filter.append(A)
                B_candidates_after_filter.append(B)

    results_all.close()

    A_candidates_after_filter = np.array(A_candidates_after_filter)
    B_candidates_after_filter = np.array(B_candidates_after_filter)

    if not os.path.isdir("{}/phase2".format(output_path)):
        os.mkdir("{}/phase2".format(output_path))

    np.savez(
        f'{output_path}/phase2/{func_index}_{parameters}_after_filter.npz',
        A_candidates=A_candidates_after_filter,
        B_candidates=B_candidates_after_filter)

    file_statistics.loc[f"{func_index}_{parameters}", "pso"] = all_count
    file_statistics.loc[f"{func_index}_{parameters}", "phase1"] = ini_count
    file_statistics.loc[f"{func_index}_{parameters}", "phase2"] = survive_count
Ejemplo n.º 5
0
def checkAfterSVD(folder_path, func_indices):
    filer_phase3_df = pd.read_csv(f"{folder_path}/counts.csv", index_col=0)
    for func_index in func_indices:
        no_of_elements_output = settings.getNEO(func_index)
        no_of_elements_input = settings.getNEI(func_index)
        inputcases_range = settings.get_input_range(func_index)

        no_of_testcases = 100
        MRs_types = os.listdir(f"{folder_path}/phase3")

        for MRs_type in MRs_types:
            # for the MRs stored in npz format
            if MRs_type.startswith(f"{func_index}_") and MRs_type.endswith(
                    ".npz"):
                pass
            # for the MRs stored in pkl format
            elif MRs_type.startswith(f"{func_index}_") and MRs_type.endswith(
                    "group_after_cs_svd.pkl"):
                i0_all = Phase1_PSOSearch.generate_i0_all(
                    settings.get_input_datatype(func_index),
                    settings.get_input_range(func_index), no_of_testcases)

                with open(f"{folder_path}/phase3/{MRs_type}", "rb") as f:
                    MRs_dict = pickle.load(f)
                for parameters, MRs in MRs_dict.items():
                    filer_phase3 = 0
                    # print(f"func_index is {func_index}, parameters = {parameters}")
                    x_all_dict = MRs[0]
                    # print(x_all_dict)
                    y_all_df = MRs[1]
                    hDIR = MRs[2]

                    y_o_isKill_df = pd.DataFrame()
                    for index_i0 in range(i0_all.shape[0]):
                        i0 = i0_all[index_i0]
                        u = Phase1_PSOSearch.comb(i0, hDIR)
                        x_value_dict = {}
                        y_element_value_dict = {}
                        for x_name, A in x_all_dict.items():
                            # print(f"x_name is {x_name}")
                            # print(f"A is {A}")
                            x = np.dot(A, u)
                            x_value_dict[x_name] = x
                            y = settings.program(x, func_index)
                            for index_eo in range(no_of_elements_output):
                                y_element_value_dict[
                                    f"f{x_name}_{index_eo + 1}"] = y[index_eo]
                        y0 = settings.program(i0, func_index)
                        for index_eo in range(no_of_elements_output):
                            y_element_value_dict[f"fx0_{index_eo + 1}"] = y0[
                                index_eo]
                        y_all_names = y_all_df.columns.values
                        y_all_values = np.zeros(y_all_names.shape)
                        for index_y in range(y_all_names.shape[0]):
                            y_names = list(y_all_names[index_y])
                            y_elements = []
                            for ii in range(len(y_names)):
                                try:
                                    y_elements.append(float(y_names[ii]))
                                except:
                                    y_elements.append(
                                        y_element_value_dict[y_names[ii]])
                            y_all_values[index_y] = np.product(y_elements)
                        for index_MR in range(y_all_df.shape[0]):
                            B = y_all_df.iloc[index_MR, :].values
                            Bv = np.dot(B, y_all_values)
                            if np.isreal(Bv) and not np.isnan(Bv):
                                if np.abs(Bv) < 0.1:
                                    y_o_isKill_df.loc[index_MR, index_i0] = 0
                                else:
                                    y_o_isKill_df.loc[index_MR, index_i0] = 1
                            else:
                                y_o_isKill_df.loc[index_MR, index_i0] = 1

                    for index_MR in range(y_o_isKill_df.shape[0]):
                        kill_o_number = np.sum(
                            y_o_isKill_df.iloc[index_MR, :].values)
                        cost_o = np.divide(kill_o_number, no_of_testcases)
                        if cost_o < 0.05:
                            filer_phase3 += 1
                        else:
                            # print(MRs_dict[parameters][1])
                            MRs_dict[parameters][1] = MRs_dict[parameters][
                                1].drop([index_MR])
                    # print("----------")
                    # print(parameters)
                    # print(f"before filter {len(y_all_df)}")
                    # print(f"after filter left {filer_phase3}")

                    filer_phase3_df.loc[f"{func_index}_{parameters}",
                                        "phase3"] = filer_phase3

                with open(
                        f"{folder_path}/phase3/{func_index}_MRs_group_after_cs_svd.pkl",
                        "wb") as f2:
                    pickle.dump(MRs_dict, f2, pickle.HIGHEST_PROTOCOL)

            elif MRs_type.startswith(f"{func_index}_") and MRs_type.endswith(
                    "other_types_after_cs_svd.pkl"):
                i0_all = Phase1_PSOSearch.generate_i0_all(
                    settings.get_input_datatype(func_index),
                    settings.get_input_range(func_index), no_of_testcases)

                with open(f"{folder_path}/phase3/{MRs_type}", "rb") as f:
                    MRs_dict = pickle.load(f)
                for parameters, MRs in MRs_dict.items():
                    filer_phase3 = 0
                    parameters_int = [int(e) for e in parameters.split("_")]
                    no_of_inputs = parameters_int[0]
                    mode_input_relation = parameters_int[1]
                    mode_output_relation = parameters_int[2]
                    degree_of_input_relation = parameters_int[3]
                    degree_of_output_relation = parameters_int[4]

                    x_all_dict = MRs[0]
                    y_all_df = MRs[1]

                    y_o_isKill_df = pd.DataFrame()

                    for index_i0 in range(i0_all.shape[0]):
                        i0 = i0_all[index_i0]
                        u = Phase1_PSOSearch.comb(i0, degree_of_input_relation)
                        # print(u)
                        x_value_dict = {}
                        y_element_value_dict = {}
                        for x_name, A in x_all_dict.items():
                            # print(x_name)
                            # print(A)
                            x = np.dot(A, u)
                            x_value_dict[x_name] = x
                            y = settings.program(x, func_index)
                            for index_eo in range(no_of_elements_output):
                                y_element_value_dict[
                                    f"f{x_name}_{index_eo + 1}"] = y[index_eo]
                        y0 = settings.program(i0, func_index)
                        for index_eo in range(no_of_elements_output):
                            y_element_value_dict[f"fx0_{index_eo + 1}"] = y0[
                                index_eo]

                        y_all_names = y_all_df.columns.values
                        y_all_values = np.zeros(y_all_names.shape)
                        for index_y in range(y_all_names.shape[0]):
                            y_names = list(y_all_names[index_y])
                            y_elements = []
                            for ii in range(len(y_names)):
                                try:
                                    y_elements.append(float(y_names[ii]))
                                except:
                                    y_elements.append(
                                        y_element_value_dict[y_names[ii]])
                            y_all_values[index_y] = np.product(y_elements)

                        for index_MR in range(y_all_df.shape[0]):
                            B = y_all_df.iloc[index_MR, :].values
                            Bv = np.dot(B, y_all_values)
                            if np.isreal(Bv) and not np.isnan(Bv):
                                if np.abs(Bv) < 0.1:
                                    y_o_isKill_df.loc[index_MR, index_i0] = 0
                                else:
                                    y_o_isKill_df.loc[index_MR, index_i0] = 1
                            else:
                                y_o_isKill_df.loc[index_MR, index_i0] = 1

                    for index_MR in range(y_o_isKill_df.shape[0]):
                        kill_o_number = np.sum(
                            y_o_isKill_df.iloc[index_MR, :].values)
                        cost_o = np.divide(kill_o_number, no_of_testcases)
                        if cost_o < 0.05:
                            filer_phase3 += 1
                        else:
                            MRs_dict[parameters][1] = MRs_dict[parameters][
                                1].drop([index_MR])

                    # print("----------")
                    # print(parameters)
                    # print(f"before filter {len(y_all_df)}")
                    # print(f"after filter left {filer_phase3}")
                    filer_phase3_df.loc[f"{func_index}_{parameters}",
                                        "phase3"] = filer_phase3

                with open(
                        f"{folder_path}/phase3/{func_index}_MRs_other_types_after_cs_svd.pkl",
                        "wb") as f2:
                    pickle.dump(MRs_dict, f2, pickle.HIGHEST_PROTOCOL)