def predict_ensemble(self, data_X, with_acc, data_Y): #print(data_Y.shape) predictions = [] for index in range(len(self.models)): if (self.models[index][0] == "svm" or self.models[index][0] == "dt" or self.models[index][0] == "knn" or self.models[index][0] == "lr"): if (self.models[index][0] == "svm" or self.models[index][0] == "knn"): if (self.models[index][0] == "svm"): #new_data_X = self.pca.fit_transform(data_X) new_data_X = data_X #print(pca.explained_variance_ratio_) print(new_data_X.shape) #a = input() probs2 = np.array( self.models[index][1].predict_proba(new_data_X)) else: new_data_X = self.pca2.fit_transform(data_X) #new_data_X = data_X print(new_data_X.shape) probs2 = np.array( self.models[index][1].predict_proba(new_data_X)) else: probs2 = np.array( self.models[index][1].predict_model(data_X)) probs = convert_to_all_classes_array( probs2, self.models[index][1].classes_, self.output_size) print( "Accuracy of " + str(self.models[index][0]) + " classifier:", accuracy_score(np.argmax(data_Y, axis=-1), np.argmax(probs, axis=-1))) if (probs2.shape[-1] == self.output_size): assert (probs2.all() == probs.all()) elif (self.models[index][0] == "ann" or self.models[index][0] == "cnn"): probs, _, _, acc = self.models[index][1].get_predictions( data_X, with_acc, data_Y) print("Accuracy of " + str(self.models[index][0]) + " : " + str(acc)) probs = np.array(probs) if (probs.shape[0] == 1): probs = np.squeeze(probs, axis=0) predictions.append(probs) vals = self.apply_ensembling(predictions) print("Accuracy of ensemble on the" + self.dataset_name + " dataset is: " + str( accuracy_score(np.argmax(data_Y, axis=-1), np.argmax(vals, axis=-1)))) return vals
def calculate_entropy(self, probs_B, name, split): # prob = np.array(self.classifier.decision_function(self.data_X)) # prob_B_indexes = np.argmax(predictions_model_B, axis = -1) preds = np.argmax(probs_B, axis=-1) if (split == 0): if (self.interpretability_mode == 'original'): data = self.data_X output = self.data_Y elif (self.interpretability_mode == 'counter_factual'): data = self.cf_data_X output = self.cf_data_Y #print("Train split") elif (split == 1): if (self.interpretability_mode == 'original'): data = self.cross_validation_X output = self.cross_validation_Y elif (self.interpretability_mode == 'counter_factual'): data = self.cf_cv_X output = self.cf_cv_Y #print("Cross Validation split ") elif (split == 2): if (self.interpretability_mode == 'original'): data = self.test_X output = self.test_Y elif (self.interpretability_mode == 'counter_factual'): data = self.cf_test_X output = self.cf_test_Y #print("Test split") else: #print("Invalid Split Value") return None if (self.model_name == 'svm' or self.model_name == 'naive_bayes'): #probs_train = np.array(self.classifier.predict_proba(self.data_X[:10000])) CHANGE probs2 = np.array(self.classifier.predict_proba(data)) print(probs2.shape) probs = convert_to_all_classes_array(probs2, self.classifier.classes_, self.output_classes) if (probs2.shape[-1] == self.output_classes): assert (probs2.all() == probs.all()) #print(probs) #a = input() ######CHANGE MADE IN LINE BELOW, CONFIRM : Earlier: probs2, Now: probs categorical_outputs = to_categorical(output) print( "Accuracy of Model A on the current split of the dataset is : ", accuracy_score(np.argmax(categorical_outputs, axis=-1), np.argmax(probs, axis=-1))) elif (self.model_name == 'ann'): probs, _, _, acc = self.NN.get_predictions( data, True, convert_one_hot( output, self.output_classes)) # These are 1X50000 arrays print(probs.shape) print("Accuracy of Model A on the" + self.dataset_name + "Training Dataset is: " + str(acc)) #print("Accuracy of Model A on the MNIST CrossValidation Dataset is: " + str(acc2)) probs = np.array(probs) if (probs.shape[0] == 1): probs = np.squeeze(probs, axis=0) elif (self.model_name == 'cnn' or self.model_name == 'inceptionv3'): if (self.model_name == 'inceptionv3'): probs, _, _, acc = self.inception_classifier.get_output( data, True, output) else: probs, _, _, acc = self.CNN_classifier.get_predictions( data, True, output) print("Accuracy of Model A on the" + self.dataset_name + "Training Dataset is: " + str(acc)) probs = np.array(probs) if (probs.shape[0] == 1): probs = np.squeeze(probs, axis=0) elif (self.model_name == 'dt'): probs2 = self.classifier.predict_model(data) #print(probs2[0]) probs = convert_to_all_classes_array(probs2, self.classifier.classes_, self.output_classes) if (probs2.shape[-1] == self.output_classes): assert (probs2.all() == probs.all()) #print("Accuracy of Model A on the current split of the dataset is : ", accuracy_score(output, np.argmax(probs2, axis = -1))) elif (self.model_name == "ensemble"): probs = np.array( self.classifier.predict_ensemble( data, True, convert_one_hot(output, self.output_classes))) # actual_probs = np.exp(probs2)/(np.sum(np.exp(probs2), axis = 1)) prob_A_indexes = np.argmax(probs, axis=-1) print( "Accuracy of Model A on the current split of the predictions of Model B of the dataset is: ", accuracy_score(preds, prob_A_indexes)) ##print("Classes predicted by the model A: ", np.unique(prob_A_indexes)) ##print(probs_train.shape, preds_train.shape, prob_A_indexes.shape) total_diff = 0.0 if (self.dataset_name == "sentiment_analysis"): count_equal, count_unequal = 0, 0 for i in range(probs.shape[0]): if (preds[i] == prob_A_indexes[i]): count_equal += 1 else: count_unequal += 1 diff1 = abs(probs[i][preds[i]] - probs_B[i][preds[i]]) val1 = 0.0 if (diff1 != 1.0): val1 = -1.0 * (math.log2(1.0 - diff1)) else: max_val = -1.0 * math.ceil(math.log2(entropy_precision)) val1 = max_val total_diff += val1 total_diff = (total_diff) / (probs.shape[0]) prob_equal = (count_equal * 1.0) / (probs.shape[0]) prob_unequal = (count_unequal * 1.0) / (probs.shape[0]) #total_diff = -1.0 * (prob_equal) * math.log2(prob_equal) if (prob_equal == 0 or prob_unequal == 0): total_diff = 0 else: #total_diff = -1.0 * math.log2(prob_equal) total_diff = (-1.0 * (prob_equal) * math.log2(prob_equal)) + ( -1.0 * (prob_unequal) * (math.log2(prob_unequal))) else: list_diff = [] for i in range(probs.shape[0]): if (prob_A_indexes[i] != preds[i]): list_diff.append([i, prob_A_indexes[i], preds[i]]) #print(probs[i][prob_A_indexes[i]]) val = (abs(probs[i][prob_A_indexes[i]] - probs[i][preds[i]])) if (val <= 0): total_diff += 0.0 else: total_diff += -1.0 * (math.log2(val)) total_diff = (total_diff) / (probs.shape[0]) if (len(list_diff) == 0): print( "For Model A " + str(self.model_name) + " and Model B, the final predictions on this split of the dataset are same" ) else: None #print("The no of different values are: " + str(len(list_diff))) # + " and list is: " #print(list_diff) return total_diff
def calculate_entropy(self, probs_B, name, split): preds = np.argmax(probs_B, axis=-1) if (split == 0): data = self.data_X output = self.data_Y #print("Train split") elif (split == 1): data = self.cross_validation_X output = self.cross_validation_Y #print("Cross Validation split ") elif (split == 2): data = self.test_X output = self.test_Y #print("Test split ") else: #print("Invalid Split Value") return None if (self.model_name == 'svm' or self.model_name == 'naive_bayes'): probs2 = np.array(self.classifier.predict_proba(data)) probs = convert_to_all_classes_array(probs2, self.classifier.classes_, self.output_classes) if (probs2.shape[-1] == self.output_classes): assert (probs2.all() == probs.all()) print( "Accuracy of Model A on the current split of the dataset is : ", accuracy_score(output, np.argmax(probs, axis=-1))) elif (self.model_name == 'ann'): probs, _, _, acc = self.NN.get_predictions( data, True, convert_one_hot( output, self.output_classes)) # These are 1X50000 arrays print(probs.shape) print("Accuracy of Model A on the" + self.dataset_name + "Training Dataset is: " + str(acc)) probs = np.array(probs) if (probs.shape[0] == 1): probs = np.squeeze(probs, axis=0) elif (self.model_name == 'cnn' or self.model_name == 'inceptionv3'): if (self.model_name == 'inceptionv3'): probs, _, _, acc = self.inception_classifier.get_output( data, True, output) else: probs, _, _, acc = self.CNN_classifier.get_predictions( data, True, output) print("Accuracy of Model A on the" + self.dataset_name + "Training Dataset is: " + str(acc)) probs = np.array(probs) if (probs.shape[0] == 1): probs = np.squeeze(probs, axis=0) elif (self.model_name == 'dt'): probs2 = self.classifier.predict_model(data) probs = convert_to_all_classes_array(probs2, self.classifier.classes_, self.output_classes) if (probs2.shape[-1] == self.output_classes): assert (probs2.all() == probs.all()) elif (self.model_name == "ensemble"): probs = np.array( self.classifier.predict_ensemble( data, True, convert_one_hot(output, self.output_classes))) prob_A_indexes = np.argmax(probs, axis=-1) print( "Accuracy of Model A on the current split of the predictions of Model B of the dataset is: ", accuracy_score(preds, prob_A_indexes)) total_diff = 0.0 list_diff = [] for i in range(probs.shape[0]): if (prob_A_indexes[i] != preds[i]): list_diff.append([i, prob_A_indexes[i], preds[i]]) val = (abs(probs[i][prob_A_indexes[i]] - probs[i][preds[i]])) if (val <= 0): total_diff += 0.0 else: total_diff += -1.0 * (math.log2(val)) total_diff = (total_diff) / (probs.shape[0]) if (len(list_diff) == 0): print( "For Model A " + str(self.model_name) + " and Model B, the final predictions on this split of the dataset are same" ) else: None return total_diff
def calculate_entropy(self, preds, dump_bool, name, split): # prob = np.array(self.classifier.decision_function(self.data_X)) # prob_B_indexes = np.argmax(predictions_model_B, axis = -1) if(split == 0): data = self.data_X output = self.data_Y #print("Train split") elif(split == 1): data = self.cross_validation_X output = self.cross_validation_Y #print("Cross Validation split ") elif(split == 2): data = self.test_X output = self.test_Y #print("Test split") else: #print("Invalid Split Value") return None if (self.model_name == 'svm'): #probs_train = np.array(self.classifier.predict_proba(self.data_X[:10000])) CHANGE probs2 = np.array(self.classifier.predict_proba(data)) probs = convert_to_all_classes_array(probs2, self.classifier.classes_, self.output_classes) if(probs2.shape[-1] == self.output_classes): assert(probs2.all() == probs.all()) print("Accuracy of Model A on the current split of the dataset is : ", accuracy_score(output, np.argmax(probs2, axis = -1))) elif (self.model_name == 'ann'): probs, _, _, acc = self.NN.get_predictions(data, True, convert_one_hot(output, self.output_classes)) # These are 1X50000 arrays print(probs.shape) print("Accuracy of Model A on the" + self.dataset_name + "Training Dataset is: " + str(acc)) #print("Accuracy of Model A on the MNIST CrossValidation Dataset is: " + str(acc2)) probs = np.array(probs) if (probs.shape[0] == 1): probs = np.squeeze(probs, axis=0) elif(self.model_name == 'cnn' or self.model_name == 'inceptionv3'): if(self.model_name == 'inceptionv3'): probs, _, _, acc = self.inception_classifier.get_output(data, True, output) else: probs, _, _, acc = self.CNN_classifier.get_predictions(data, True, output) print("Accuracy of Model A on the" + self.dataset_name + "Training Dataset is: " + str(acc)) probs = np.array(probs) if (probs.shape[0] == 1): probs = np.squeeze(probs, axis=0) elif(self.model_name == 'dt'): probs2 = self.classifier.predict_model(data) #print(probs2[0]) probs = convert_to_all_classes_array(probs2, self.classifier.classes_, self.output_classes) if(probs2.shape[-1] == self.output_classes): assert(probs2.all() == probs.all()) #print("Accuracy of Model A on the current split of the dataset is : ", accuracy_score(output, np.argmax(probs2, axis = -1))) elif(self.model_name == "ensemble"): probs = np.array(self.classifier.predict_ensemble(data, True, convert_one_hot(output, self.output_classes))) # actual_probs = np.exp(probs2)/(np.sum(np.exp(probs2), axis = 1)) prob_A_indexes = np.argmax(probs, axis=-1) ##print("Classes predicted by the model A: ", np.unique(prob_A_indexes)) if(dump_bool): name = name + '_model_A_predictions' dict1 = {} dict1['val'] = prob_A_indexes dict1['name'] = name self.dump_data(name + '.npz', preds_A_dict = dict1) ##print(probs_train.shape, preds_train.shape, prob_A_indexes.shape) total_diff = 0.0 list_diff = [] for i in range(probs.shape[0]): if (prob_A_indexes[i] != preds[i]): list_diff.append([i, prob_A_indexes[i], preds[i]]) #print(probs[i][prob_A_indexes[i]]) val = (abs(probs[i][prob_A_indexes[i]] - probs[i][preds[i]])) if(val <= 0): total_diff += 0.0 else: total_diff += -1.0 * (math.log2(val)) total_diff = (total_diff) / (probs.shape[0]) if (len(list_diff) == 0): print("For Model A " + str( self.model_name) + " and Model B as ANN, the final predictions on this split of the dataset are same") else: None #print("The no of different values are: " + str(len(list_diff))) # + " and list is: " #print(list_diff) return total_diff