コード例 #1
0
    def predict_ensemble(self, data_X, with_acc, data_Y):
        #print(data_Y.shape)
        predictions = []
        for index in range(len(self.models)):
            if (self.models[index][0] == "svm" or self.models[index][0] == "dt"
                    or self.models[index][0] == "knn"
                    or self.models[index][0] == "lr"):
                if (self.models[index][0] == "svm"
                        or self.models[index][0] == "knn"):
                    if (self.models[index][0] == "svm"):
                        #new_data_X = self.pca.fit_transform(data_X)
                        new_data_X = data_X
                        #print(pca.explained_variance_ratio_)
                        print(new_data_X.shape)
                        #a = input()
                        probs2 = np.array(
                            self.models[index][1].predict_proba(new_data_X))
                    else:
                        new_data_X = self.pca2.fit_transform(data_X)
                        #new_data_X = data_X
                        print(new_data_X.shape)
                        probs2 = np.array(
                            self.models[index][1].predict_proba(new_data_X))
                else:
                    probs2 = np.array(
                        self.models[index][1].predict_model(data_X))
                probs = convert_to_all_classes_array(
                    probs2, self.models[index][1].classes_, self.output_size)
                print(
                    "Accuracy of " + str(self.models[index][0]) +
                    " classifier:",
                    accuracy_score(np.argmax(data_Y, axis=-1),
                                   np.argmax(probs, axis=-1)))
                if (probs2.shape[-1] == self.output_size):
                    assert (probs2.all() == probs.all())

            elif (self.models[index][0] == "ann"
                  or self.models[index][0] == "cnn"):
                probs, _, _, acc = self.models[index][1].get_predictions(
                    data_X, with_acc, data_Y)
                print("Accuracy of " + str(self.models[index][0]) + " : " +
                      str(acc))
                probs = np.array(probs)
                if (probs.shape[0] == 1):
                    probs = np.squeeze(probs, axis=0)

            predictions.append(probs)
        vals = self.apply_ensembling(predictions)
        print("Accuracy of ensemble on the" + self.dataset_name +
              " dataset is: " + str(
                  accuracy_score(np.argmax(data_Y, axis=-1),
                                 np.argmax(vals, axis=-1))))
        return vals
コード例 #2
0
    def calculate_entropy(self, probs_B, name, split):
        # prob = np.array(self.classifier.decision_function(self.data_X))
        # prob_B_indexes = np.argmax(predictions_model_B, axis = -1)
        preds = np.argmax(probs_B, axis=-1)
        if (split == 0):
            if (self.interpretability_mode == 'original'):
                data = self.data_X
                output = self.data_Y
            elif (self.interpretability_mode == 'counter_factual'):
                data = self.cf_data_X
                output = self.cf_data_Y
            #print("Train split")
        elif (split == 1):
            if (self.interpretability_mode == 'original'):
                data = self.cross_validation_X
                output = self.cross_validation_Y
            elif (self.interpretability_mode == 'counter_factual'):
                data = self.cf_cv_X
                output = self.cf_cv_Y
            #print("Cross Validation split ")
        elif (split == 2):
            if (self.interpretability_mode == 'original'):
                data = self.test_X
                output = self.test_Y
            elif (self.interpretability_mode == 'counter_factual'):
                data = self.cf_test_X
                output = self.cf_test_Y
            #print("Test split")
        else:
            #print("Invalid Split Value")
            return None
        if (self.model_name == 'svm' or self.model_name == 'naive_bayes'):
            #probs_train = np.array(self.classifier.predict_proba(self.data_X[:10000]))                CHANGE
            probs2 = np.array(self.classifier.predict_proba(data))
            print(probs2.shape)
            probs = convert_to_all_classes_array(probs2,
                                                 self.classifier.classes_,
                                                 self.output_classes)
            if (probs2.shape[-1] == self.output_classes):
                assert (probs2.all() == probs.all())
            #print(probs)
            #a = input()
            ######CHANGE MADE IN LINE BELOW, CONFIRM : Earlier: probs2, Now: probs
            categorical_outputs = to_categorical(output)
            print(
                "Accuracy of Model A on the current split of the dataset is : ",
                accuracy_score(np.argmax(categorical_outputs, axis=-1),
                               np.argmax(probs, axis=-1)))

        elif (self.model_name == 'ann'):
            probs, _, _, acc = self.NN.get_predictions(
                data, True, convert_one_hot(
                    output, self.output_classes))  # These are 1X50000 arrays
            print(probs.shape)
            print("Accuracy of Model A on the" + self.dataset_name +
                  "Training Dataset is: " + str(acc))
            #print("Accuracy of Model A on the MNIST CrossValidation Dataset is: " + str(acc2))
            probs = np.array(probs)
            if (probs.shape[0] == 1):
                probs = np.squeeze(probs, axis=0)

        elif (self.model_name == 'cnn' or self.model_name == 'inceptionv3'):
            if (self.model_name == 'inceptionv3'):
                probs, _, _, acc = self.inception_classifier.get_output(
                    data, True, output)
            else:
                probs, _, _, acc = self.CNN_classifier.get_predictions(
                    data, True, output)
            print("Accuracy of Model A on the" + self.dataset_name +
                  "Training Dataset is: " + str(acc))
            probs = np.array(probs)
            if (probs.shape[0] == 1):
                probs = np.squeeze(probs, axis=0)

        elif (self.model_name == 'dt'):
            probs2 = self.classifier.predict_model(data)
            #print(probs2[0])
            probs = convert_to_all_classes_array(probs2,
                                                 self.classifier.classes_,
                                                 self.output_classes)
            if (probs2.shape[-1] == self.output_classes):
                assert (probs2.all() == probs.all())
            #print("Accuracy of Model A on the current split of the dataset is : ", accuracy_score(output, np.argmax(probs2, axis = -1)))
        elif (self.model_name == "ensemble"):
            probs = np.array(
                self.classifier.predict_ensemble(
                    data, True, convert_one_hot(output, self.output_classes)))

        # actual_probs = np.exp(probs2)/(np.sum(np.exp(probs2), axis = 1))
        prob_A_indexes = np.argmax(probs, axis=-1)
        print(
            "Accuracy of Model A on the current split of the predictions of Model B of the dataset is: ",
            accuracy_score(preds, prob_A_indexes))
        ##print("Classes predicted by the model A: ", np.unique(prob_A_indexes))
        ##print(probs_train.shape, preds_train.shape, prob_A_indexes.shape)

        total_diff = 0.0
        if (self.dataset_name == "sentiment_analysis"):
            count_equal, count_unequal = 0, 0
            for i in range(probs.shape[0]):
                if (preds[i] == prob_A_indexes[i]):
                    count_equal += 1
                else:
                    count_unequal += 1
                diff1 = abs(probs[i][preds[i]] - probs_B[i][preds[i]])
                val1 = 0.0
                if (diff1 != 1.0):
                    val1 = -1.0 * (math.log2(1.0 - diff1))
                else:
                    max_val = -1.0 * math.ceil(math.log2(entropy_precision))
                    val1 = max_val
                total_diff += val1
            total_diff = (total_diff) / (probs.shape[0])
            prob_equal = (count_equal * 1.0) / (probs.shape[0])
            prob_unequal = (count_unequal * 1.0) / (probs.shape[0])
            #total_diff = -1.0 * (prob_equal) * math.log2(prob_equal)
            if (prob_equal == 0 or prob_unequal == 0):
                total_diff = 0
            else:
                #total_diff = -1.0 * math.log2(prob_equal)
                total_diff = (-1.0 * (prob_equal) * math.log2(prob_equal)) + (
                    -1.0 * (prob_unequal) * (math.log2(prob_unequal)))

        else:
            list_diff = []
            for i in range(probs.shape[0]):
                if (prob_A_indexes[i] != preds[i]):
                    list_diff.append([i, prob_A_indexes[i], preds[i]])

                #print(probs[i][prob_A_indexes[i]])
                val = (abs(probs[i][prob_A_indexes[i]] - probs[i][preds[i]]))
                if (val <= 0):
                    total_diff += 0.0
                else:
                    total_diff += -1.0 * (math.log2(val))

            total_diff = (total_diff) / (probs.shape[0])

            if (len(list_diff) == 0):
                print(
                    "For Model A " + str(self.model_name) +
                    " and Model B, the final predictions on this split of the dataset are same"
                )
            else:
                None
                #print("The no of different values are: " + str(len(list_diff)))
                # + " and list is: "
                #print(list_diff)

        return total_diff
    def calculate_entropy(self, probs_B, name, split):
        preds = np.argmax(probs_B, axis=-1)
        if (split == 0):
            data = self.data_X
            output = self.data_Y
            #print("Train split")
        elif (split == 1):
            data = self.cross_validation_X
            output = self.cross_validation_Y
            #print("Cross Validation split ")
        elif (split == 2):
            data = self.test_X
            output = self.test_Y
            #print("Test split ")
        else:
            #print("Invalid Split Value")
            return None
        if (self.model_name == 'svm' or self.model_name == 'naive_bayes'):
            probs2 = np.array(self.classifier.predict_proba(data))
            probs = convert_to_all_classes_array(probs2,
                                                 self.classifier.classes_,
                                                 self.output_classes)
            if (probs2.shape[-1] == self.output_classes):
                assert (probs2.all() == probs.all())
            print(
                "Accuracy of Model A on the current split of the dataset is : ",
                accuracy_score(output, np.argmax(probs, axis=-1)))

        elif (self.model_name == 'ann'):
            probs, _, _, acc = self.NN.get_predictions(
                data, True, convert_one_hot(
                    output, self.output_classes))  # These are 1X50000 arrays
            print(probs.shape)
            print("Accuracy of Model A on the" + self.dataset_name +
                  "Training Dataset is: " + str(acc))
            probs = np.array(probs)
            if (probs.shape[0] == 1):
                probs = np.squeeze(probs, axis=0)

        elif (self.model_name == 'cnn' or self.model_name == 'inceptionv3'):
            if (self.model_name == 'inceptionv3'):
                probs, _, _, acc = self.inception_classifier.get_output(
                    data, True, output)
            else:
                probs, _, _, acc = self.CNN_classifier.get_predictions(
                    data, True, output)
            print("Accuracy of Model A on the" + self.dataset_name +
                  "Training Dataset is: " + str(acc))
            probs = np.array(probs)
            if (probs.shape[0] == 1):
                probs = np.squeeze(probs, axis=0)

        elif (self.model_name == 'dt'):
            probs2 = self.classifier.predict_model(data)
            probs = convert_to_all_classes_array(probs2,
                                                 self.classifier.classes_,
                                                 self.output_classes)
            if (probs2.shape[-1] == self.output_classes):
                assert (probs2.all() == probs.all())

        elif (self.model_name == "ensemble"):
            probs = np.array(
                self.classifier.predict_ensemble(
                    data, True, convert_one_hot(output, self.output_classes)))

        prob_A_indexes = np.argmax(probs, axis=-1)
        print(
            "Accuracy of Model A on the current split of the predictions of Model B of the dataset is: ",
            accuracy_score(preds, prob_A_indexes))

        total_diff = 0.0
        list_diff = []
        for i in range(probs.shape[0]):
            if (prob_A_indexes[i] != preds[i]):
                list_diff.append([i, prob_A_indexes[i], preds[i]])

            val = (abs(probs[i][prob_A_indexes[i]] - probs[i][preds[i]]))
            if (val <= 0):
                total_diff += 0.0
            else:
                total_diff += -1.0 * (math.log2(val))

        total_diff = (total_diff) / (probs.shape[0])
        if (len(list_diff) == 0):
            print(
                "For Model A " + str(self.model_name) +
                " and Model B, the final predictions on this split of the dataset are same"
            )
        else:
            None

        return total_diff
コード例 #4
0
    def calculate_entropy(self, preds, dump_bool, name, split):
        # prob = np.array(self.classifier.decision_function(self.data_X))
        # prob_B_indexes = np.argmax(predictions_model_B, axis = -1)
        if(split == 0):
            data = self.data_X
            output = self.data_Y
            #print("Train split")
        elif(split == 1):
            data = self.cross_validation_X
            output = self.cross_validation_Y
            #print("Cross Validation split ")
        elif(split == 2):
            data = self.test_X
            output = self.test_Y
            #print("Test split")
        else:
            #print("Invalid Split Value")
            return None
        if (self.model_name == 'svm'):
            #probs_train = np.array(self.classifier.predict_proba(self.data_X[:10000]))                CHANGE
            probs2 = np.array(self.classifier.predict_proba(data))
            probs = convert_to_all_classes_array(probs2, self.classifier.classes_, self.output_classes)
            if(probs2.shape[-1] == self.output_classes):
                assert(probs2.all() == probs.all())
            print("Accuracy of Model A on the current split of the dataset is : ", accuracy_score(output, np.argmax(probs2, axis = -1)))
            
        elif (self.model_name == 'ann'):
            probs, _, _, acc = self.NN.get_predictions(data, True, convert_one_hot(output, self.output_classes))  # These are 1X50000 arrays
            print(probs.shape)
            print("Accuracy of Model A on the" + self.dataset_name + "Training Dataset is: " + str(acc))
            #print("Accuracy of Model A on the MNIST CrossValidation Dataset is: " + str(acc2))
            probs = np.array(probs)
            if (probs.shape[0] == 1):
                probs = np.squeeze(probs, axis=0)
        
        elif(self.model_name == 'cnn' or self.model_name == 'inceptionv3'):
            if(self.model_name == 'inceptionv3'):
                probs, _, _, acc = self.inception_classifier.get_output(data, True, output)
            else:
                probs, _, _, acc = self.CNN_classifier.get_predictions(data, True, output)
            print("Accuracy of Model A on the" + self.dataset_name + "Training Dataset is: " + str(acc))
            probs = np.array(probs)
            if (probs.shape[0] == 1):
                probs = np.squeeze(probs, axis=0)
                
        elif(self.model_name == 'dt'):
            probs2 = self.classifier.predict_model(data)
            #print(probs2[0])
            probs = convert_to_all_classes_array(probs2, self.classifier.classes_, self.output_classes)
            if(probs2.shape[-1] == self.output_classes):
                assert(probs2.all() == probs.all())
            #print("Accuracy of Model A on the current split of the dataset is : ", accuracy_score(output, np.argmax(probs2, axis = -1)))
        elif(self.model_name == "ensemble"):
            probs = np.array(self.classifier.predict_ensemble(data, True, convert_one_hot(output, self.output_classes)))
        
        # actual_probs = np.exp(probs2)/(np.sum(np.exp(probs2), axis = 1))
        prob_A_indexes = np.argmax(probs, axis=-1)
        ##print("Classes predicted by the model A: ", np.unique(prob_A_indexes))
        if(dump_bool):
            name = name + '_model_A_predictions'
            dict1 = {}
            dict1['val'] = prob_A_indexes
            dict1['name'] = name
            self.dump_data(name + '.npz', preds_A_dict = dict1)
            
        ##print(probs_train.shape, preds_train.shape, prob_A_indexes.shape)
        total_diff = 0.0
        list_diff = []
        for i in range(probs.shape[0]):
            if (prob_A_indexes[i] != preds[i]):
                list_diff.append([i, prob_A_indexes[i], preds[i]])
            
            #print(probs[i][prob_A_indexes[i]])
            val = (abs(probs[i][prob_A_indexes[i]] - probs[i][preds[i]]))
            if(val <= 0):
                total_diff += 0.0
            else:
                total_diff += -1.0 * (math.log2(val))

        total_diff = (total_diff) / (probs.shape[0])

        if (len(list_diff) == 0):
            print("For Model A " + str(
                self.model_name) + " and Model B as ANN, the final predictions on this split of the dataset are same")
        else:
            None
            #print("The no of different values are: " + str(len(list_diff)))
            # + " and list is: "
            #print(list_diff)
            
        return total_diff