Exemple #1
0
    def change(self,x_train, y_train, percetage, mnb, change_plan):
        number_change_requested = int(percetage / 100 * x_train.shape[0])
        print("{} percentage error is equal to {} change \n".format(percetage, number_change_requested))

        used_row ={}
        occurred_change = 0
        all_changed = 1
        change_done = False
        x_train_changed = np.copy(x_train)

        #find the order of the feature according to information gain
        model = ExtraTreesClassifier()
        model.fit(x_train, y_train)


        information_gain = {}
        for i in range(len(model.feature_importances_)):
            information_gain.update({i: model.feature_importances_[i]})

        ranked_information_dic = {}
        sum_gain = 0
        for L in range(0,x_train.shape[1] + 1):
            for subset in Change_Combination.combinations_index(self,information_gain.keys(), L):
                if not subset:
                    pass
                else:

                    for key in subset:
                        sum_gain = sum_gain + information_gain.get(key)
                    ranked_information_dic.update({tuple(subset): sum_gain})
                    sum_gain = 0

        all_subset = sorted(ranked_information_dic.items(), key=lambda item: len(item[0]) * 1000 - item[1], reverse=False)



        #changing
        for i in range(len(change_plan["key"])):
            occurred_change = 0

            indices = [t for t, x in enumerate(y_train) if x == change_plan["key"][i][0]]
            print("{} rows have target {} \n".format(len(indices), change_plan["key"][i][0]))

            for p in range(len(indices)):
                if (all_changed == number_change_requested + 1):
                    print("your requests have been done :)")
                    break
                if y_train[indices[p]] == mnb.predict([x_train[indices[p]]]) and indices[p] not in used_row:

                    change_done = False
                    for subset in all_subset:
                        if change_done:
                            break
                        else:

                            if (occurred_change == change_plan["number"][i]):
                                #                         print("part of your request has been done :))))")
                                break


                            print("try to change with change index {}".format(list(subset[0])))
                            x_train_changed[indices[p]][list(subset[0])] = 0

                            if (change_plan["key"][i][1] == mnb.predict([x_train_changed[indices[p]]])[0]):

                                print(x_train[indices[p]], mnb.predict([x_train[indices[p]]])[0])
                                print(x_train_changed[indices[p]],
                                      mnb.predict([x_train_changed[indices[p]]])[0])
                                print(" \n change number {} \n".format(all_changed))

                                used_row.update({indices[p]: indices[p]})
                                occurred_change = occurred_change + 1
                                change_done = True
                                all_changed = all_changed + 1
                                break

                            else:
                                x_train_changed[indices[p]] = np.copy(x_train[indices[p]])




        if (all_changed <= number_change_requested):
            print("your request doesn't complete! please change your plan")
        return np.copy(x_train_changed)
    def change(self, x_train, y_train, percetage, mnb, change_plan):

        number_change_requested = int(percetage / 100 * x_train.shape[0])
        print("{} percentage error is equal to {} change \n".format(
            percetage, number_change_requested))

        used_row = {}
        occurred_change = 0
        all_changed = 1
        change_done = False
        x_train_changed = np.copy(x_train)

        #---------------------find the order of the feature according to information gain-----------------------

        model = ExtraTreesClassifier()
        model.fit(x_train, y_train)

        print("combination of feature")

        information_gain = {}
        for i in range(len(model.feature_importances_)):
            information_gain.update({i: model.feature_importances_[i]})

        ranked_information_dic = {}
        sum_gain = 0
        for L in range(0, x_train.shape[1] + 1):
            for subset in Change_Combination.combinations_index(
                    self, information_gain.keys(), L):
                if not subset:
                    pass
                else:

                    for key in subset:
                        sum_gain = sum_gain + information_gain.get(key)
                    ranked_information_dic.update({tuple(subset): sum_gain})
                    sum_gain = 0

        print("create all subset")

        all_subset = sorted(ranked_information_dic.items(),
                            key=lambda item: len(item[0]) * 1000 - item[1],
                            reverse=False)

        probability = mnb.predict_proba(x_train)
        #print(probability)
        probability_distance = {}

        #----------------------------------------------changing--------------------------------------------------

        for i in range(len(change_plan["key"])):
            occurred_change = 0

            indices = [
                t for t, x in enumerate(y_train)
                if x == change_plan["key"][i][0]
            ]
            #print(indices)
            print("{} rows have target {} \n".format(len(indices),
                                                     change_plan["key"][i][0]))

            probability_distance.clear()
            probability_distance_sorted = []

            # find the distance probability between the class that user need to change

            for elements in indices:
                probability_distance.update({
                    elements:
                    np.abs(probability[elements][change_plan["key"][i][0] -
                                                 1] -
                           probability[elements][change_plan["key"][i][1] - 1])
                })
            # ---------------------------finding the order of the row according to probability distance-------------------------
            # sort the row according the distance probability

            probability_distance_sorted = sorted(probability_distance.items(),
                                                 key=lambda x: x[1],
                                                 reverse=False)
            indices = []
            for j in probability_distance_sorted:
                indices.append(j[0])

            #print(indices)

            print("try in indices")
            for p in range(len(indices)):

                if (all_changed == number_change_requested + 1):
                    print("your requests have been done :)")
                    break
                if y_train[indices[p]] == mnb.predict(
                    [x_train[indices[p]]]) and indices[p] not in used_row:

                    change_done = False
                    for subset in all_subset:
                        if change_done:
                            break
                        else:

                            if (occurred_change == change_plan["number"][i]):
                                #print("part of your request has been done :))))")
                                break

                            #
                            # if len(list(subset[0]))>5:
                            #     print("max number of the operations")
                            #     break

                            print(
                                "try to change, with changing index {} on row {}"
                                .format(list(subset[0]), indices[p]))

                            #######################################################
                            # impose Outlier insted of 0

                            # mean = np.mean(x_train[:,list(subset[0])])
                            # std = np.std(x_train[:,list(subset[0])])
                            # maximum = np.max(x_train[:, list(subset[0])])
                            #
                            # threshold = mean + 2 * std
                            # outlier = x_train[:,list(subset[0])][x_train[:,list(subset[0])]>threshold]
                            #
                            # if len(outlier):
                            #     x_train_changed[indices[p]][list(subset[0])] = outlier[0]
                            #
                            # else:
                            #     x_train_changed[indices[p]][list(subset[0])] = threshold +1

                            #impose of outlier the column insted of the 0
                            # x_train_changed[indices[p]][list(subset[0])] = maximum +0.1*maximum

                            #find index of values that belongs to new target
                            # indices_2 = [t for t, x in enumerate(y_train) if x == change_plan["key"][i][1]]

                            #---------- put avg rows that belongs to new target for this specific columns
                            # print(np.mean(x_train[indices_2,list(subset[0])[0]]))
                            # x_train_changed[indices[p]][list(subset[0])] = np.mean(x_train[indices_2,list(subset[0])[0]])

                            #----------- put the first value that match to new target
                            # x_train_changed[indices[p]][list(subset[0])] = x_train_changed[indices_2[0]][list(subset[0])]

                            ########################################################
                            x_train_changed[indices[p]][list(subset[0])] = 0

                            if (change_plan["key"][i][1] == mnb.predict(
                                [x_train_changed[indices[p]]])[0]):

                                print(x_train[indices[p]],
                                      mnb.predict([x_train[indices[p]]])[0])
                                print(
                                    x_train_changed[indices[p]],
                                    mnb.predict([x_train_changed[indices[p]]
                                                 ])[0])
                                print(
                                    " \n change number {} on row {} \n".format(
                                        all_changed, indices[p]))

                                used_row.update({indices[p]: indices[p]})
                                occurred_change = occurred_change + 1
                                change_done = True
                                all_changed = all_changed + 1
                                #break

                            else:
                                x_train_changed[indices[p]] = np.copy(
                                    x_train[indices[p]])

        if (all_changed <= number_change_requested):
            print("your request doesn't complete! please change your plan")
        return np.copy(x_train_changed)
    def change(self, x_train, y_train, percetage, mnb,
               change_plan):  #check_combination_change_plan_probability
        number_change_requested = int(percetage / 100 * x_train.shape[0])
        print("{} percentage error is equal to {} change \n".format(
            percetage, number_change_requested))

        used_row = {}
        occurred_change = 0
        all_changed = 1
        change_done = False
        x_train_changed = np.copy(x_train)
        possible_changes = {
        }  # key: number of changes and  value:[row,[columns should change]]

        for i in range(len(change_plan["key"])):

            occurred_change = 0
            indices = [
                t for t, x in enumerate(y_train)
                if x == change_plan["key"][i][0]
            ]
            possible_changes = {x: [] for x in range(len(x_train[0]) + 1)}
            print("{} rows have target {} \n".format(len(indices),
                                                     change_plan["key"][i][0]))

            for p in range(len(indices)):

                if y_train[indices[p]] == mnb.predict(
                    [x_train[indices[p]]]) and indices[p] not in used_row:
                    change_done = False
                    for L in range(0, len(x_train_changed[indices[p]]) + 1):
                        if change_done:
                            break
                        else:
                            for subset in Change_Combination.combinations_index(
                                    self, x_train_changed[indices[p]], L):
                                if not subset:
                                    pass
                                else:

                                    x_train_changed[indices[p]][subset] = 0

                                    if (change_plan["key"][i]
                                        [1] == mnb.predict(
                                            [x_train_changed[indices[p]]])[0]):
                                        possible_changes[len(subset)].append(
                                            [indices[p], subset])
                                        change_done = True
                                        x_train_changed[indices[p]] = np.copy(
                                            x_train[indices[p]])
                                        break
                                    else:
                                        x_train_changed[indices[p]] = np.copy(
                                            x_train[indices[p]])

            if (all(value == [] for value in possible_changes.values())):
                print("part of your request not possible!")
                break

            for key in sorted(possible_changes):
                if (occurred_change == change_plan["number"][i]):
                    break
                print(
                    "there are {} candidate for changing target with change {} features"
                    .format(len(possible_changes[key]), key))
                variable = possible_changes[key]
                for t in range(len(variable)):

                    print(x_train[variable[t][0]],
                          mnb.predict([x_train[variable[t][0]]])[0])

                    x_train_changed[variable[t][0]][variable[t][1]] = 0
                    print(x_train_changed[variable[t][0]],
                          mnb.predict([x_train_changed[variable[t][0]]])[0])
                    print(" \n change number {} \n".format(all_changed))
                    used_row.update({variable[t][0]: variable[t][0]})
                    occurred_change = occurred_change + 1
                    all_changed = all_changed + 1
                    if (occurred_change == change_plan["number"][i]):
                        print("part of your request has been done :)")
                        break

            #plotting

            print("----plotting----")
            x_pos = (range(0, len(x_train_changed[indices[p]]) + 1))
            y_pos = np.arange(len(x_train_changed[indices[p]]) + 1)
            chart_freq = []
            print("number of feature,how many changes is possible")
            for key, value in possible_changes.items():
                print(key, len([item for item in value if item]))
                chart_freq.append(len([item for item in value if item]))

            fig = plt.figure()
            outputFile = "./outputs/fig_output/change_combination_min/request{}.png".format(
                i)
            plt.bar(y_pos, chart_freq, align='center', alpha=0.5)
            plt.xticks(y_pos, x_pos)
            plt.ylabel('frequency')
            plt.xlabel('with changing X feature you can change target')
            plt.title('Summary of your request for change target {}'.format(
                change_plan['key'][i]))
            fig.savefig(outputFile)

        if (all_changed <= number_change_requested):
            print("your request doesn't complete! please change your plan")
        else:
            print("your request is done :)")

        return np.copy(x_train_changed)
    def change(self, x_train, y_train, percetage, mnb,
               change_plan):  #check_combination_change_plan_features
        number_change_requested = int(percetage / 100 * x_train.shape[0])
        print("{} percentage error is equal to {} change \n".format(
            percetage, number_change_requested))

        used_row = {}
        occurred_change = 0
        all_changed = 1

        x_train_changed = np.copy(x_train)

        for i in range(len(change_plan["key"])):

            occurred_change = 0
            indices = [
                t for t, x in enumerate(y_train)
                if x == change_plan["key"][i][0]
            ]

            print("{} rows have target {} \n".format(len(indices),
                                                     change_plan["key"][i][0]))

            for L in range(0, len(x_train_changed[0]) + 1):
                print(
                    "changing target, with change {} features ----".format(L))

                for subset in Change_Combination.combinations_index(
                        self, x_train_changed[0], L):
                    if not subset:
                        pass
                    else:
                        if (occurred_change == change_plan["number"][i]):
                            #print("part of your request has been done :))))")
                            break

                        for p in indices:

                            if y_train[p] == mnb.predict(
                                [x_train[p]]) and p not in used_row:
                                change_done = False

                                if change_done:
                                    break
                                else:

                                    if (occurred_change ==
                                            change_plan["number"][i]):

                                        break
                                    else:

                                        x_train_changed[p][subset] = 0

                                        if (change_plan["key"][i][1] ==
                                                mnb.predict(
                                                    [x_train_changed[p]])[0]):

                                            change_done = True
                                            print(
                                                "with change features index number {} row number {} has been changed"
                                                .format(subset, p))
                                            print(x_train[p],
                                                  mnb.predict([x_train[p]])[0])
                                            print(
                                                x_train_changed[p],
                                                mnb.predict(
                                                    [x_train_changed[p]])[0])

                                            print(" \n change number {} \n".
                                                  format(all_changed))
                                            used_row.update({p: p})
                                            occurred_change = occurred_change + 1
                                            all_changed = all_changed + 1

                                        else:
                                            x_train_changed[p] = np.copy(
                                                x_train[p])

        if (all_changed <= number_change_requested):
            print("your request doesn't complete! please change your plan")
        else:
            print("your request is done :)")

        return np.copy(x_train_changed)
Exemple #5
0
    def change(self, x_train, y_train, percetage, mnb, change_plan):
        number_change_requested = int(percetage / 100 * x_train.shape[0])
        print("{} percentage error is equal to {} change \n".format(
            percetage, number_change_requested))

        #find the most important feature

        sfs = SFS(mnb,
                  k_features=len(x_train[0]),
                  forward=True,
                  floating=False,
                  verbose=2,
                  scoring='accuracy',
                  cv=5)
        pipe = make_pipeline(StandardScaler(), sfs)
        pipe.fit(x_train, y_train)

        #-------------plotting------------------
        fig = plot_sfs(sfs.get_metric_dict(), kind='std_err')
        plt.show()

        #get future of the sfs order and only change them.
        x_train_changed = np.copy(x_train)
        used_row = {}
        all_changed = 1

        for i in range(len(change_plan["key"])):

            occurred_change = 0
            indices = [
                t for t, x in enumerate(y_train)
                if x == change_plan["key"][i][0]
            ]

            print("{} rows have target {} \n".format(len(indices),
                                                     change_plan["key"][i][0]))

            for L in range(1, len(sfs.subsets_) + 1):  #number of the features
                subset = list(sfs.subsets_[L]['feature_idx'])

                if (occurred_change == change_plan["number"][i]):
                    break
                print("change feature index {} ----".format(subset))
                for p in range(len(indices)):
                    x_train_changed[indices[p]][subset] = 0

                    if y_train[indices[p]] == mnb.predict(
                        [x_train[indices[p]]]) and indices[p] not in used_row:

                        if (change_plan["key"][i][1] == mnb.predict(
                            [x_train_changed[indices[p]]])[0]):

                            print(
                                "with change features index {} row number {} has been changed"
                                .format(subset, indices[p]))
                            print(x_train[indices[p]],
                                  mnb.predict([x_train[indices[p]]])[0])
                            print(
                                x_train_changed[indices[p]],
                                mnb.predict([x_train_changed[indices[p]]])[0])

                            print(
                                " \n change number {} \n".format(all_changed))
                            used_row.update({indices[p]: indices[p]})
                            occurred_change = occurred_change + 1
                            all_changed = all_changed + 1

                            if (occurred_change == change_plan["number"][i]):
                                print("part of your request has been done :)")
                                break
                        else:
                            x_train_changed[indices[p]] = np.copy(
                                x_train[indices[p]])
                    else:
                        x_train_changed[indices[p]] = np.copy(
                            x_train[indices[p]])

            #check for rest of the possible changes

            # for LL in range(0, len(x_train_changed[0]) + 1):
                print(
                    "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$"
                )

                for subsets in Change_Combination.combinations_index(
                        self, x_train_changed[0], L):
                    if (subset != subsets):
                        if not subsets:
                            pass
                        else:
                            if (occurred_change == change_plan["number"][i]):
                                #print("part of your request has been done :))))")
                                break
                            print(
                                "change feature index {} ----".format(subsets))
                            for pp in range(len(indices)):
                                x_train_changed[indices[pp]][subsets] = 0

                                if y_train[indices[pp]] == mnb.predict([
                                        x_train[indices[pp]]
                                ]) and indices[pp] not in used_row:

                                    if (change_plan["key"][i][1] ==
                                            mnb.predict([
                                                x_train_changed[indices[pp]]
                                            ])[0]):

                                        print(
                                            "with change features index {} row number {} has been changed"
                                            .format(subsets, indices[pp]))
                                        print(
                                            x_train[indices[pp]],
                                            mnb.predict([x_train[indices[pp]]
                                                         ])[0])
                                        print(
                                            x_train_changed[indices[pp]],
                                            mnb.predict([
                                                x_train_changed[indices[pp]]
                                            ])[0])

                                        print(" \n change number {} \n".format(
                                            all_changed))
                                        used_row.update(
                                            {indices[pp]: indices[pp]})
                                        occurred_change = occurred_change + 1
                                        all_changed = all_changed + 1

                                        if (occurred_change ==
                                                change_plan["number"][i]):
                                            print(
                                                "part of your request has been done :)"
                                            )
                                            break
                                    else:
                                        x_train_changed[indices[pp]] = np.copy(
                                            x_train[indices[pp]])
                                else:
                                    x_train_changed[indices[pp]] = np.copy(
                                        x_train[indices[pp]])
                    else:
                        print(
                            "subsets are equal {}----------------------------------------------"
                            .format(subsets))

        if (all_changed <= number_change_requested):
            print("your request doesn't complete! please change your plan")
        else:
            print("your request is done :)")

        return np.copy(x_train_changed)
Exemple #6
0
    def change(self, x_train, y_train, percetage, mnb, change_plan):
        number_change_requested = int(percetage / 100 * x_train.shape[0])
        print("{} percentage error is equal to {} change \n".format(
            percetage, number_change_requested))

        used_row = {}
        occurred_change = 0
        all_changed = 1
        change_done = False
        x_train_changed = np.copy(x_train)

        #---------------------find the order of the feature according to information gain-----------------------

        model = ExtraTreesClassifier()
        model.fit(x_train, y_train)

        print("combinatio of feature")

        information_gain = {}
        for i in range(len(model.feature_importances_)):
            information_gain.update({i: model.feature_importances_[i]})

        print(information_gain)
        ranked_information_dic = {}
        sum_gain = 0
        for L in range(0, x_train.shape[1] + 1):
            for subset in Change_Combination.combinations_index(
                    self, information_gain.keys(), L):
                if not subset:
                    pass
                else:
                    print(subset)
                    for key in subset:
                        sum_gain = sum_gain + information_gain.get(key)
                    ranked_information_dic.update({tuple(subset): sum_gain})
                    sum_gain = 0

        print("create all subset")

        all_subset = sorted(ranked_information_dic.items(),
                            key=lambda item: len(item[0]) * 1000 - item[1],
                            reverse=False)
        print(all_subset)

        #---------------------------finding the order of the row according to uncertainity-------------------------

        probability = mnb.predict_proba(x_train)
        print(probability)

        print("finding uncertainity")

        uncertainty = {}
        for index, roww in enumerate(probability):
            largest_val = heapq.nlargest(2, roww)
            uncertainty.update({
                index:
                1 - (np.abs(np.subtract(largest_val[0], largest_val[1])))
            })
            largest_val = []
            # print(index,row,np.subtract(largest_val[0],largest_val[1]))

        #sort the uncertainty
        uncertainty_sorted = sorted(uncertainty.items(),
                                    key=lambda x: x[1],
                                    reverse=True)
        print(uncertainty_sorted)

        print("changing")
        #---------------------------------------------changing--------------------------------------------

        for i in range(len(change_plan["key"])):
            occurred_change = 0
            #sort the row according to uncertainty

            indices = []

            for key_dic in uncertainty_sorted:
                if y_train[key_dic[0]] == change_plan["key"][i][0]:
                    indices.append(key_dic[0])

            print(indices)

            #this is normal indices
            # indices_2 = [t for t, x in enumerate(y_train) if x == change_plan["key"][i][0]]

            print("{} rows have target {} \n".format(len(indices),
                                                     change_plan["key"][i][0]))
            print("try in indices")
            for p in range(len(indices)):

                if (all_changed == number_change_requested + 1):
                    print("your requests have been done :)")
                    break
                if y_train[indices[p]] == mnb.predict(
                    [x_train[indices[p]]]) and indices[p] not in used_row:
                    print(indices[p])

                    change_done = False
                    for subset in all_subset:
                        if change_done:
                            break
                        else:

                            if (occurred_change == change_plan["number"][i]):
                                #print("part of your request has been done :))))")
                                break

                            print(
                                "try to change, with change index {} on row {}"
                                .format(list(subset[0]), indices[p]))
                            x_train_changed[indices[p]][list(subset[0])] = 0

                            if (change_plan["key"][i][1] == mnb.predict(
                                [x_train_changed[indices[p]]])[0]):

                                print(x_train[indices[p]],
                                      mnb.predict([x_train[indices[p]]])[0])
                                print(
                                    x_train_changed[indices[p]],
                                    mnb.predict([x_train_changed[indices[p]]
                                                 ])[0])
                                print(
                                    " \n change number {} on row {} \n".format(
                                        all_changed, indices[p]))

                                used_row.update({indices[p]: indices[p]})
                                occurred_change = occurred_change + 1
                                change_done = True
                                all_changed = all_changed + 1
                                # break

                            else:
                                x_train_changed[indices[p]] = np.copy(
                                    x_train[indices[p]])

        if (all_changed <= number_change_requested):
            print("your request doesn't complete! please change your plan")
        return np.copy(x_train_changed)