def change(self,x_train, y_train, percetage, mnb, change_plan): number_change_requested = int(percetage / 100 * x_train.shape[0]) print("{} percentage error is equal to {} change \n".format(percetage, number_change_requested)) used_row ={} occurred_change = 0 all_changed = 1 change_done = False x_train_changed = np.copy(x_train) #find the order of the feature according to information gain model = ExtraTreesClassifier() model.fit(x_train, y_train) information_gain = {} for i in range(len(model.feature_importances_)): information_gain.update({i: model.feature_importances_[i]}) ranked_information_dic = {} sum_gain = 0 for L in range(0,x_train.shape[1] + 1): for subset in Change_Combination.combinations_index(self,information_gain.keys(), L): if not subset: pass else: for key in subset: sum_gain = sum_gain + information_gain.get(key) ranked_information_dic.update({tuple(subset): sum_gain}) sum_gain = 0 all_subset = sorted(ranked_information_dic.items(), key=lambda item: len(item[0]) * 1000 - item[1], reverse=False) #changing for i in range(len(change_plan["key"])): occurred_change = 0 indices = [t for t, x in enumerate(y_train) if x == change_plan["key"][i][0]] print("{} rows have target {} \n".format(len(indices), change_plan["key"][i][0])) for p in range(len(indices)): if (all_changed == number_change_requested + 1): print("your requests have been done :)") break if y_train[indices[p]] == mnb.predict([x_train[indices[p]]]) and indices[p] not in used_row: change_done = False for subset in all_subset: if change_done: break else: if (occurred_change == change_plan["number"][i]): # print("part of your request has been done :))))") break print("try to change with change index {}".format(list(subset[0]))) x_train_changed[indices[p]][list(subset[0])] = 0 if (change_plan["key"][i][1] == mnb.predict([x_train_changed[indices[p]]])[0]): print(x_train[indices[p]], mnb.predict([x_train[indices[p]]])[0]) print(x_train_changed[indices[p]], mnb.predict([x_train_changed[indices[p]]])[0]) print(" \n change number {} \n".format(all_changed)) used_row.update({indices[p]: indices[p]}) occurred_change = occurred_change + 1 change_done = True all_changed = all_changed + 1 break else: x_train_changed[indices[p]] = np.copy(x_train[indices[p]]) if (all_changed <= number_change_requested): print("your request doesn't complete! please change your plan") return np.copy(x_train_changed)
def change(self, x_train, y_train, percetage, mnb, change_plan): number_change_requested = int(percetage / 100 * x_train.shape[0]) print("{} percentage error is equal to {} change \n".format( percetage, number_change_requested)) used_row = {} occurred_change = 0 all_changed = 1 change_done = False x_train_changed = np.copy(x_train) #---------------------find the order of the feature according to information gain----------------------- model = ExtraTreesClassifier() model.fit(x_train, y_train) print("combination of feature") information_gain = {} for i in range(len(model.feature_importances_)): information_gain.update({i: model.feature_importances_[i]}) ranked_information_dic = {} sum_gain = 0 for L in range(0, x_train.shape[1] + 1): for subset in Change_Combination.combinations_index( self, information_gain.keys(), L): if not subset: pass else: for key in subset: sum_gain = sum_gain + information_gain.get(key) ranked_information_dic.update({tuple(subset): sum_gain}) sum_gain = 0 print("create all subset") all_subset = sorted(ranked_information_dic.items(), key=lambda item: len(item[0]) * 1000 - item[1], reverse=False) probability = mnb.predict_proba(x_train) #print(probability) probability_distance = {} #----------------------------------------------changing-------------------------------------------------- for i in range(len(change_plan["key"])): occurred_change = 0 indices = [ t for t, x in enumerate(y_train) if x == change_plan["key"][i][0] ] #print(indices) print("{} rows have target {} \n".format(len(indices), change_plan["key"][i][0])) probability_distance.clear() probability_distance_sorted = [] # find the distance probability between the class that user need to change for elements in indices: probability_distance.update({ elements: np.abs(probability[elements][change_plan["key"][i][0] - 1] - probability[elements][change_plan["key"][i][1] - 1]) }) # ---------------------------finding the order of the row according to probability distance------------------------- # sort the row according the distance probability probability_distance_sorted = sorted(probability_distance.items(), key=lambda x: x[1], reverse=False) indices = [] for j in probability_distance_sorted: indices.append(j[0]) #print(indices) print("try in indices") for p in range(len(indices)): if (all_changed == number_change_requested + 1): print("your requests have been done :)") break if y_train[indices[p]] == mnb.predict( [x_train[indices[p]]]) and indices[p] not in used_row: change_done = False for subset in all_subset: if change_done: break else: if (occurred_change == change_plan["number"][i]): #print("part of your request has been done :))))") break # # if len(list(subset[0]))>5: # print("max number of the operations") # break print( "try to change, with changing index {} on row {}" .format(list(subset[0]), indices[p])) ####################################################### # impose Outlier insted of 0 # mean = np.mean(x_train[:,list(subset[0])]) # std = np.std(x_train[:,list(subset[0])]) # maximum = np.max(x_train[:, list(subset[0])]) # # threshold = mean + 2 * std # outlier = x_train[:,list(subset[0])][x_train[:,list(subset[0])]>threshold] # # if len(outlier): # x_train_changed[indices[p]][list(subset[0])] = outlier[0] # # else: # x_train_changed[indices[p]][list(subset[0])] = threshold +1 #impose of outlier the column insted of the 0 # x_train_changed[indices[p]][list(subset[0])] = maximum +0.1*maximum #find index of values that belongs to new target # indices_2 = [t for t, x in enumerate(y_train) if x == change_plan["key"][i][1]] #---------- put avg rows that belongs to new target for this specific columns # print(np.mean(x_train[indices_2,list(subset[0])[0]])) # x_train_changed[indices[p]][list(subset[0])] = np.mean(x_train[indices_2,list(subset[0])[0]]) #----------- put the first value that match to new target # x_train_changed[indices[p]][list(subset[0])] = x_train_changed[indices_2[0]][list(subset[0])] ######################################################## x_train_changed[indices[p]][list(subset[0])] = 0 if (change_plan["key"][i][1] == mnb.predict( [x_train_changed[indices[p]]])[0]): print(x_train[indices[p]], mnb.predict([x_train[indices[p]]])[0]) print( x_train_changed[indices[p]], mnb.predict([x_train_changed[indices[p]] ])[0]) print( " \n change number {} on row {} \n".format( all_changed, indices[p])) used_row.update({indices[p]: indices[p]}) occurred_change = occurred_change + 1 change_done = True all_changed = all_changed + 1 #break else: x_train_changed[indices[p]] = np.copy( x_train[indices[p]]) if (all_changed <= number_change_requested): print("your request doesn't complete! please change your plan") return np.copy(x_train_changed)
def change(self, x_train, y_train, percetage, mnb, change_plan): #check_combination_change_plan_probability number_change_requested = int(percetage / 100 * x_train.shape[0]) print("{} percentage error is equal to {} change \n".format( percetage, number_change_requested)) used_row = {} occurred_change = 0 all_changed = 1 change_done = False x_train_changed = np.copy(x_train) possible_changes = { } # key: number of changes and value:[row,[columns should change]] for i in range(len(change_plan["key"])): occurred_change = 0 indices = [ t for t, x in enumerate(y_train) if x == change_plan["key"][i][0] ] possible_changes = {x: [] for x in range(len(x_train[0]) + 1)} print("{} rows have target {} \n".format(len(indices), change_plan["key"][i][0])) for p in range(len(indices)): if y_train[indices[p]] == mnb.predict( [x_train[indices[p]]]) and indices[p] not in used_row: change_done = False for L in range(0, len(x_train_changed[indices[p]]) + 1): if change_done: break else: for subset in Change_Combination.combinations_index( self, x_train_changed[indices[p]], L): if not subset: pass else: x_train_changed[indices[p]][subset] = 0 if (change_plan["key"][i] [1] == mnb.predict( [x_train_changed[indices[p]]])[0]): possible_changes[len(subset)].append( [indices[p], subset]) change_done = True x_train_changed[indices[p]] = np.copy( x_train[indices[p]]) break else: x_train_changed[indices[p]] = np.copy( x_train[indices[p]]) if (all(value == [] for value in possible_changes.values())): print("part of your request not possible!") break for key in sorted(possible_changes): if (occurred_change == change_plan["number"][i]): break print( "there are {} candidate for changing target with change {} features" .format(len(possible_changes[key]), key)) variable = possible_changes[key] for t in range(len(variable)): print(x_train[variable[t][0]], mnb.predict([x_train[variable[t][0]]])[0]) x_train_changed[variable[t][0]][variable[t][1]] = 0 print(x_train_changed[variable[t][0]], mnb.predict([x_train_changed[variable[t][0]]])[0]) print(" \n change number {} \n".format(all_changed)) used_row.update({variable[t][0]: variable[t][0]}) occurred_change = occurred_change + 1 all_changed = all_changed + 1 if (occurred_change == change_plan["number"][i]): print("part of your request has been done :)") break #plotting print("----plotting----") x_pos = (range(0, len(x_train_changed[indices[p]]) + 1)) y_pos = np.arange(len(x_train_changed[indices[p]]) + 1) chart_freq = [] print("number of feature,how many changes is possible") for key, value in possible_changes.items(): print(key, len([item for item in value if item])) chart_freq.append(len([item for item in value if item])) fig = plt.figure() outputFile = "./outputs/fig_output/change_combination_min/request{}.png".format( i) plt.bar(y_pos, chart_freq, align='center', alpha=0.5) plt.xticks(y_pos, x_pos) plt.ylabel('frequency') plt.xlabel('with changing X feature you can change target') plt.title('Summary of your request for change target {}'.format( change_plan['key'][i])) fig.savefig(outputFile) if (all_changed <= number_change_requested): print("your request doesn't complete! please change your plan") else: print("your request is done :)") return np.copy(x_train_changed)
def change(self, x_train, y_train, percetage, mnb, change_plan): #check_combination_change_plan_features number_change_requested = int(percetage / 100 * x_train.shape[0]) print("{} percentage error is equal to {} change \n".format( percetage, number_change_requested)) used_row = {} occurred_change = 0 all_changed = 1 x_train_changed = np.copy(x_train) for i in range(len(change_plan["key"])): occurred_change = 0 indices = [ t for t, x in enumerate(y_train) if x == change_plan["key"][i][0] ] print("{} rows have target {} \n".format(len(indices), change_plan["key"][i][0])) for L in range(0, len(x_train_changed[0]) + 1): print( "changing target, with change {} features ----".format(L)) for subset in Change_Combination.combinations_index( self, x_train_changed[0], L): if not subset: pass else: if (occurred_change == change_plan["number"][i]): #print("part of your request has been done :))))") break for p in indices: if y_train[p] == mnb.predict( [x_train[p]]) and p not in used_row: change_done = False if change_done: break else: if (occurred_change == change_plan["number"][i]): break else: x_train_changed[p][subset] = 0 if (change_plan["key"][i][1] == mnb.predict( [x_train_changed[p]])[0]): change_done = True print( "with change features index number {} row number {} has been changed" .format(subset, p)) print(x_train[p], mnb.predict([x_train[p]])[0]) print( x_train_changed[p], mnb.predict( [x_train_changed[p]])[0]) print(" \n change number {} \n". format(all_changed)) used_row.update({p: p}) occurred_change = occurred_change + 1 all_changed = all_changed + 1 else: x_train_changed[p] = np.copy( x_train[p]) if (all_changed <= number_change_requested): print("your request doesn't complete! please change your plan") else: print("your request is done :)") return np.copy(x_train_changed)
def change(self, x_train, y_train, percetage, mnb, change_plan): number_change_requested = int(percetage / 100 * x_train.shape[0]) print("{} percentage error is equal to {} change \n".format( percetage, number_change_requested)) #find the most important feature sfs = SFS(mnb, k_features=len(x_train[0]), forward=True, floating=False, verbose=2, scoring='accuracy', cv=5) pipe = make_pipeline(StandardScaler(), sfs) pipe.fit(x_train, y_train) #-------------plotting------------------ fig = plot_sfs(sfs.get_metric_dict(), kind='std_err') plt.show() #get future of the sfs order and only change them. x_train_changed = np.copy(x_train) used_row = {} all_changed = 1 for i in range(len(change_plan["key"])): occurred_change = 0 indices = [ t for t, x in enumerate(y_train) if x == change_plan["key"][i][0] ] print("{} rows have target {} \n".format(len(indices), change_plan["key"][i][0])) for L in range(1, len(sfs.subsets_) + 1): #number of the features subset = list(sfs.subsets_[L]['feature_idx']) if (occurred_change == change_plan["number"][i]): break print("change feature index {} ----".format(subset)) for p in range(len(indices)): x_train_changed[indices[p]][subset] = 0 if y_train[indices[p]] == mnb.predict( [x_train[indices[p]]]) and indices[p] not in used_row: if (change_plan["key"][i][1] == mnb.predict( [x_train_changed[indices[p]]])[0]): print( "with change features index {} row number {} has been changed" .format(subset, indices[p])) print(x_train[indices[p]], mnb.predict([x_train[indices[p]]])[0]) print( x_train_changed[indices[p]], mnb.predict([x_train_changed[indices[p]]])[0]) print( " \n change number {} \n".format(all_changed)) used_row.update({indices[p]: indices[p]}) occurred_change = occurred_change + 1 all_changed = all_changed + 1 if (occurred_change == change_plan["number"][i]): print("part of your request has been done :)") break else: x_train_changed[indices[p]] = np.copy( x_train[indices[p]]) else: x_train_changed[indices[p]] = np.copy( x_train[indices[p]]) #check for rest of the possible changes # for LL in range(0, len(x_train_changed[0]) + 1): print( "$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$" ) for subsets in Change_Combination.combinations_index( self, x_train_changed[0], L): if (subset != subsets): if not subsets: pass else: if (occurred_change == change_plan["number"][i]): #print("part of your request has been done :))))") break print( "change feature index {} ----".format(subsets)) for pp in range(len(indices)): x_train_changed[indices[pp]][subsets] = 0 if y_train[indices[pp]] == mnb.predict([ x_train[indices[pp]] ]) and indices[pp] not in used_row: if (change_plan["key"][i][1] == mnb.predict([ x_train_changed[indices[pp]] ])[0]): print( "with change features index {} row number {} has been changed" .format(subsets, indices[pp])) print( x_train[indices[pp]], mnb.predict([x_train[indices[pp]] ])[0]) print( x_train_changed[indices[pp]], mnb.predict([ x_train_changed[indices[pp]] ])[0]) print(" \n change number {} \n".format( all_changed)) used_row.update( {indices[pp]: indices[pp]}) occurred_change = occurred_change + 1 all_changed = all_changed + 1 if (occurred_change == change_plan["number"][i]): print( "part of your request has been done :)" ) break else: x_train_changed[indices[pp]] = np.copy( x_train[indices[pp]]) else: x_train_changed[indices[pp]] = np.copy( x_train[indices[pp]]) else: print( "subsets are equal {}----------------------------------------------" .format(subsets)) if (all_changed <= number_change_requested): print("your request doesn't complete! please change your plan") else: print("your request is done :)") return np.copy(x_train_changed)
def change(self, x_train, y_train, percetage, mnb, change_plan): number_change_requested = int(percetage / 100 * x_train.shape[0]) print("{} percentage error is equal to {} change \n".format( percetage, number_change_requested)) used_row = {} occurred_change = 0 all_changed = 1 change_done = False x_train_changed = np.copy(x_train) #---------------------find the order of the feature according to information gain----------------------- model = ExtraTreesClassifier() model.fit(x_train, y_train) print("combinatio of feature") information_gain = {} for i in range(len(model.feature_importances_)): information_gain.update({i: model.feature_importances_[i]}) print(information_gain) ranked_information_dic = {} sum_gain = 0 for L in range(0, x_train.shape[1] + 1): for subset in Change_Combination.combinations_index( self, information_gain.keys(), L): if not subset: pass else: print(subset) for key in subset: sum_gain = sum_gain + information_gain.get(key) ranked_information_dic.update({tuple(subset): sum_gain}) sum_gain = 0 print("create all subset") all_subset = sorted(ranked_information_dic.items(), key=lambda item: len(item[0]) * 1000 - item[1], reverse=False) print(all_subset) #---------------------------finding the order of the row according to uncertainity------------------------- probability = mnb.predict_proba(x_train) print(probability) print("finding uncertainity") uncertainty = {} for index, roww in enumerate(probability): largest_val = heapq.nlargest(2, roww) uncertainty.update({ index: 1 - (np.abs(np.subtract(largest_val[0], largest_val[1]))) }) largest_val = [] # print(index,row,np.subtract(largest_val[0],largest_val[1])) #sort the uncertainty uncertainty_sorted = sorted(uncertainty.items(), key=lambda x: x[1], reverse=True) print(uncertainty_sorted) print("changing") #---------------------------------------------changing-------------------------------------------- for i in range(len(change_plan["key"])): occurred_change = 0 #sort the row according to uncertainty indices = [] for key_dic in uncertainty_sorted: if y_train[key_dic[0]] == change_plan["key"][i][0]: indices.append(key_dic[0]) print(indices) #this is normal indices # indices_2 = [t for t, x in enumerate(y_train) if x == change_plan["key"][i][0]] print("{} rows have target {} \n".format(len(indices), change_plan["key"][i][0])) print("try in indices") for p in range(len(indices)): if (all_changed == number_change_requested + 1): print("your requests have been done :)") break if y_train[indices[p]] == mnb.predict( [x_train[indices[p]]]) and indices[p] not in used_row: print(indices[p]) change_done = False for subset in all_subset: if change_done: break else: if (occurred_change == change_plan["number"][i]): #print("part of your request has been done :))))") break print( "try to change, with change index {} on row {}" .format(list(subset[0]), indices[p])) x_train_changed[indices[p]][list(subset[0])] = 0 if (change_plan["key"][i][1] == mnb.predict( [x_train_changed[indices[p]]])[0]): print(x_train[indices[p]], mnb.predict([x_train[indices[p]]])[0]) print( x_train_changed[indices[p]], mnb.predict([x_train_changed[indices[p]] ])[0]) print( " \n change number {} on row {} \n".format( all_changed, indices[p])) used_row.update({indices[p]: indices[p]}) occurred_change = occurred_change + 1 change_done = True all_changed = all_changed + 1 # break else: x_train_changed[indices[p]] = np.copy( x_train[indices[p]]) if (all_changed <= number_change_requested): print("your request doesn't complete! please change your plan") return np.copy(x_train_changed)