Exemple #1
0
    def __kfold_prob_tp_fp(self, X, y, n_folds=2):
        # if isinstance(X, csr_matrix) and isinstance(y, np.ndarray):
        #     X=X.toarray()
        # elif isinstance(X, np.ndarray) and isinstance(y, np.ndarray):
        #     if len(y.shape)==1:
        #         y=MultiLabelBinarizer(classes=self.classes).fit_transform([[y_p] for y_p in y])
        #     elif len(y.shape)==2:
        #         pass
        if isinstance(y, list):
            y = np.asarray(y)

        try:
            with open(
                    self.prefix + self.dir_name + '/' + str(n_folds) +
                    'FCV_prob.pickle', 'rb') as f:
                [tp_av, fp_av] = pickle.load(f)
        except:
            kf = KFold(y.shape[0], n_folds=n_folds)
            TP_avr = []
            FP_avr = []
            for train_index, test_index in kf:
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                model = self.model
                model = model.fit(X_train, y_train)
                y_predict = model.predict(X_test)
                y_prob_predict = model.predict_proba(X_test)
                TP = []
                FP = []
                if len(y.shape) == 1:
                    y_predict = MultiLabelBinarizer(
                        classes=self.classes).fit_transform(
                            [[y_p] for y_p in y_predict])
                elif len(y.shape) == 2:
                    pass
                for class_ind, class_prob in zip(y_predict.transpose(),
                                                 y_prob_predict.transpose()):
                    TP_class = []
                    FP_class = []
                    for ind, prob in zip(class_ind, class_prob):
                        if ind == 1: TP_class.append(prob)
                        elif ind == 0: FP_class.append(prob)
                    TP.append(np.sum(TP_class) / len(class_ind))
                    FP.append(np.sum(FP_class) / len(class_ind))
                TP_avr.append(TP)
                FP_avr.append(FP)
            tp_av, fp_av = np.average(TP_avr, axis=0), np.average(FP_avr,
                                                                  axis=0)
            with open(
                    self.prefix + self.dir_name + '/' + str(n_folds) +
                    'FCV_prob.pickle', 'wb') as f:
                pickle.dump([tp_av, fp_av], f)
                f.close()
            #print('tp, fp by prob', tp_av, fp_av)
        return [tp_av, fp_av]
    def __kfold_prob_tp_fp(self, X, y, n_folds=2):
        # if isinstance(X, csr_matrix) and isinstance(y, np.ndarray):
        #     X=X.toarray()
        # elif isinstance(X, np.ndarray) and isinstance(y, np.ndarray):
        #     if len(y.shape)==1:
        #         y=MultiLabelBinarizer(classes=self.classes).fit_transform([[y_p] for y_p in y])
        #     elif len(y.shape)==2:
        #         pass
        if isinstance(y, list):
            y = np.asarray(y)

        try:
            with open(self.prefix + self.dir_name + "/" + str(n_folds) + "FCV_prob.pickle", "rb") as f:
                [tp_av, fp_av] = pickle.load(f)
        except:
            kf = KFold(y.shape[0], n_folds=n_folds)
            TP_avr = []
            FP_avr = []
            for train_index, test_index in kf:
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                model = self.model
                model = model.fit(X_train, y_train)
                y_predict = model.predict(X_test)
                y_prob_predict = model.predict_proba(X_test)
                TP = []
                FP = []
                if len(y.shape) == 1:
                    y_predict = MultiLabelBinarizer(classes=self.classes).fit_transform([[y_p] for y_p in y_predict])
                elif len(y.shape) == 2:
                    pass
                for class_ind, class_prob in zip(y_predict.transpose(), y_prob_predict.transpose()):
                    TP_class = []
                    FP_class = []
                    for ind, prob in zip(class_ind, class_prob):
                        if ind == 1:
                            TP_class.append(prob)
                        elif ind == 0:
                            FP_class.append(prob)
                    TP.append(np.sum(TP_class) / len(class_ind))
                    FP.append(np.sum(FP_class) / len(class_ind))
                TP_avr.append(TP)
                FP_avr.append(FP)
            tp_av, fp_av = np.average(TP_avr, axis=0), np.average(FP_avr, axis=0)
            with open(self.prefix + self.dir_name + "/" + str(n_folds) + "FCV_prob.pickle", "wb") as f:
                pickle.dump([tp_av, fp_av], f)
                f.close()
            # print('tp, fp by prob', tp_av, fp_av)
        return [tp_av, fp_av]