コード例 #1
0
    def __init__(self, model, sklearn_model: bool):
        r"""__init__ method

        This method is used to adapt the input `model` so it can be used for creating 
        confidente intervals with conformal prediction.

        Parameters
        ----------
        model:
            Model we want to use as the underlying model to generate predictions and the
            confidence interval. This model can only be a scikit learn model, LGBMRegressor,
            LGBMClassifier, XGBRegressor, XGBClassifier, CatBoostRegressor or CatBoostClassifier.
        sklearn_model: bool
            This variable indicates if the model belongs to scikit learn or not.

        Returns
        -------
        cp: obj: Adapt_to_CP
            The class of the adapted model.

        Examples
        --------
        >>> model = lightgbm.LGBMRegressor()
        >>> cp = Adapt_to_CP(model)
        """
        self.model = model
        if sklearn_model:
            if is_classifier(model):
                self.icp = IcpClassifier(NcFactory.create_nc(model))
            elif is_regressor(model):
                self.icp = IcpRegressor(NcFactory.create_nc(model))
        else:
            model_adapter = NonConformistAdapter(model)
            if is_classifier(model):
                self.icp = IcpClassifier(ClassifierNc(model_adapter))
            elif is_regressor(model):
                self.icp = IcpRegressor(RegressorNc(model_adapter))
            elif model.__class__.__name__ == "Booster":
                self.icp = IcpRegressor(RegressorNc(model_adapter))
コード例 #2
0
    def ccp_predict(self, data_lbld, data_unlbld, new_lbld):

        # Create SMOTE instance for class rebalancing
        smote = SMOTE(random_state=self.random_state)

        # Create instance of classifier
        classifier_y = self.classifiers['classifier_y']
        parameters_y = self.clf_parameters['classifier_y']

        clf = classifier_y.set_params(**parameters_y)

        X = data_lbld.iloc[:, :-2]
        y = data_lbld.iloc[:, -1]

        X_new = new_lbld.iloc[:, :-2]
        y_new = new_lbld.iloc[:, -1]

        X = X.append(X_new, sort=False)
        y = y.append(y_new)

        X_unlbld = data_unlbld.iloc[:, :-2]

        sss = StratifiedKFold(n_splits=5, random_state=self.random_state)
        sss.get_n_splits(X, y)

        p_values = []

        for train_index, calib_index in sss.split(X, y):
            X_train, X_calib = X.iloc[train_index], X.iloc[calib_index]
            y_train, y_calib = y.iloc[train_index], y.iloc[calib_index]

            if self.rebalancing_parameters['SMOTE_y']:
                X_train, y_train = smote.fit_resample(X_train, y_train)
                clf.fit(X_train[:, :-1], y_train, sample_weight=X_train[:, -1])
            else:
                clf.fit(X_train.iloc[:, :-1],
                        y_train,
                        sample_weight=X_train.iloc[:, -1])

            nc = NcFactory.create_nc(clf, MarginErrFunc())
            icp = IcpClassifier(nc)

            if self.rebalancing_parameters['SMOTE_y']:
                icp.fit(X_train[:, :-1], y_train)
            else:
                icp.fit(X_train.iloc[:, :-1].values, y_train)

            icp.calibrate(X_calib.iloc[:, :-1].values, y_calib)

            # Predict confidences for validation sample and unlabeled sample
            p_values.append(
                icp.predict(X_unlbld.iloc[:, :-1].values, significance=None))

        mean_p_values = np.array(p_values).mean(axis=0)
        ccp_predictions = pd.DataFrame(mean_p_values,
                                       columns=['mean_p_0', 'mean_p_1'])
        ccp_predictions["credibility"] = [
            row.max() for _, row in ccp_predictions.iterrows()
        ]
        ccp_predictions["confidence"] = [
            1 - row.min() for _, row in ccp_predictions.iterrows()
        ]

        ccp_predictions.index = X_unlbld.index

        return ccp_predictions
コード例 #3
0
        folds=10,
        scoring_funcs=scoring_funcs,
        significance_levels=[0.05, 0.1, 0.2],
    )

    print("\n{}: {}".format(icp_name, ds_name))
    scores = scores.drop(["fold", "iter"], axis=1)
    print(scores.groupby(["significance"]).mean())


# -----------------------------------------------------------------------------
# Classification
# -----------------------------------------------------------------------------
data = load_iris()

nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100))
icp = IcpClassifier(nc)
icp_cv = ClassIcpCvHelper(icp)
score_model(icp_cv, "IcpClassifier", data, "iris",
            [class_mean_errors, class_avg_c])

# -----------------------------------------------------------------------------
# Classification (normalized)
# -----------------------------------------------------------------------------
data = load_iris()

nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100),
                         normalizer_model=KNeighborsRegressor())
icp = IcpClassifier(nc)
icp_cv = ClassIcpCvHelper(icp)
コード例 #4
0
ファイル: nc_factory.py プロジェクト: woonkij/nonconformist
	                         ds.target,
	                         iterations=10,
	                         folds=10,
	                         scoring_funcs=scoring_funcs,
	                         significance_levels=[0.05, 0.1, 0.2])

	print('\n{}: {}'.format(icp_name, ds_name))
	scores = scores.drop(['fold', 'iter'], axis=1)
	print(scores.groupby(['significance']).mean())

# -----------------------------------------------------------------------------
# Classification
# -----------------------------------------------------------------------------
data = load_iris()

nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100))
icp = IcpClassifier(nc)
icp_cv = ClassIcpCvHelper(icp)
score_model(icp_cv,
            'IcpClassifier',
            data,
            'iris',
            [class_mean_errors, class_avg_c])

# -----------------------------------------------------------------------------
# Classification (normalized)
# -----------------------------------------------------------------------------
data = load_iris()

nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100),
                         normalizer_model=KNeighborsRegressor())
コード例 #5
0
# -----------------------------------------------------------------
# force_prediction

result_summary = []
s_folder = StratifiedKFold(n_splits=10, shuffle=True)
for index, (train, test) in enumerate(s_folder.split(X, y)):
    x_train_std, x_test_std = X[train], X[test]
    y_train, y_test = y[train], y[test]
    truth = y_test.reshape((-1, 1))

    lda = LinearDiscriminantAnalysis(n_components=9)
    x_train_lda = lda.fit_transform(x_train_std, y_train)
    x_test_lda = lda.transform(x_test_std)

    nc_fun = NcFactory.create_nc(model=simple_model)
    model = BootstrapConformalClassifier(IcpClassifier(nc_fun))
    model.fit(x_train_lda, y_train)
    prediction = model.predict(x_test_lda, significance=None)
    table = np.hstack((prediction, truth))
    result = [1 - force_mean_errors(prediction, truth)]

    if index == 0:
        result_summary = result
    else:
        result_summary = np.vstack((result_summary, result))
    print('\nBCP_Force')
    if np.unique(truth).shape[0] == 10:
        print('True')
    else:
        print(
コード例 #6
0
ファイル: DesignDataIO.py プロジェクト: qkgautier/ATNE
    def __updatePlot(self):
        plotIdx = 0

        # Plot sampling over ground truth
        if self.groundTruth is not None:
            self.ax[plotIdx].clear()
            self.ax[plotIdx].set_xlim([0, 1.05])
            self.ax[plotIdx].set_ylim([0, 1.05])
            self.ax[plotIdx].set_title("ATNE sampling")
            self.ax[plotIdx].plot(self.groundTruth[:, 0],
                                  self.groundTruth[:, 1],
                                  'x',
                                  color="0.7",
                                  markeredgewidth=1.8,
                                  markersize=5)
            if len(self.hintEliminatedIndexes
                   ) > 0 and self.elimWeights is None:
                self.ax[plotIdx].plot(
                    self.groundTruth[self.hintEliminatedIndexes, 0],
                    self.groundTruth[self.hintEliminatedIndexes, 1],
                    'x',
                    color="indianred",
                    markeredgewidth=1.8,
                    markersize=5)
            if self.elimWeights is None:
                self.ax[plotIdx].plot(self.groundTruth[self.relaxedIndexes, 0],
                                      self.groundTruth[self.relaxedIndexes, 1],
                                      'x',
                                      color="g",
                                      markeredgewidth=1.8,
                                      markersize=5)
            else:
                #self.ax[plotIdx].plot(self.groundTruth[self.sampledIndexes,0], self.groundTruth[self.sampledIndexes,1], 'o', color="b", markeredgewidth=1.8, markersize=5, alpha=0.6)
                weights = np.sum(
                    np.mean(self.elimWeights, axis=1) /
                    np.max(np.mean(self.elimWeights, axis=1), axis=0),
                    axis=1)
                #                 weights = np.mean(self.elimWeights[:,:,0], axis=1)
                alpha = 1 - (weights / np.max(weights)) / 2
                red = weights / np.max(weights)
                for i in range(self.groundTruth.shape[0]):
                    if i not in self.relaxedIndexes: continue
                    if np.isnan(red[i]): red[i] = 0
                    self.ax[plotIdx].plot(self.groundTruth[i, 0],
                                          self.groundTruth[i, 1],
                                          'x',
                                          color=[red[i], 1 - red[i], 0],
                                          markeredgewidth=1.8,
                                          markersize=5)
            self.ax[plotIdx].plot(self.groundTruth[self.sampledIndexes, 0],
                                  self.groundTruth[self.sampledIndexes, 1],
                                  'x',
                                  color="b",
                                  markeredgewidth=1.8,
                                  markersize=5)
            plotIdx += 1

        # Plot predicted design space
        if self.predictions is not None and self.doPlotPredictions:
            self.ax[plotIdx].clear()

            labeledMask = np.in1d(self.predictionsIndexes, self.sampledIndexes)
            labeledMaskIdx = np.where(labeledMask)[0]
            cmap = self.plt.cm.get_cmap('hsv')
            shapes = ['x', '.', '+']

            # Plot type 1
            #self.ax[plotIdx].set_title("Estimated design spaces by each forest")
            #for f in range(self.predictions.shape[0]):
            #    self.ax[plotIdx].plot(self.predictions[f,:,0], self.predictions[f,:,1], 'x', markeredgewidth=1.8, markersize=5)
            #    #self.ax[plotIdx].plot(self.predictions[f,labeledMask,0], self.predictions[f,labeledMask,1], 'x', markeredgewidth=1.8, markersize=5)

            # Plot type 2
            #import matplotlib
            #for i,p in enumerate(labeledMaskIdx):
            #    color = cmap(i/len(labeledMaskIdx))
            #    predmean = self.predictions[:,p,:].mean(0)
            #    predmed  = np.median(self.predictions[:,p,:], 0)
            #    predstd  = self.predictions[:,p,:].std(0)

            #    # Plot type 2.1
            #    #self.ax[plotIdx].plot(self.predictions[:,p,0], self.predictions[:,p,1], shapes[i%len(shapes)], markeredgewidth=1.8, markersize=5, color=color)
            #    #self.ax[plotIdx].plot(predmean[0], predmean[1], shapes[0], markeredgewidth=1.8, markersize=5, color=color)
            #    #self.ax[plotIdx].plot(predmed[0], predmed[1], shapes[1], markeredgewidth=1.8, markersize=5, color=color)

            #    # Plot type 2.2
            #    circle = matplotlib.patches.Ellipse(predmean[[0,1]], predstd[0], predstd[1])
            #    self.ax[plotIdx].add_artist(circle)

            # Plot type 3 (Mean predictions)
            #             self.ax[plotIdx].set_title("Average estimated P_relaxed")
            #             pred_mean = self.predictions.mean(0)
            #             self.ax[plotIdx].plot(pred_mean[:,0], pred_mean[:,1], 'x', markeredgewidth=1.8, markersize=5)

            # Plot type 4 (Mean predictions of the entire space)
            self.ax[plotIdx].set_title("Average estimated design space")
            if self.estimators is not None:
                predictions = np.empty([
                    self.predictions.shape[0],
                    self.designs.getNumDesigns(), self.predictions.shape[2]
                ])
                for f in range(self.predictions.shape[0]):
                    for o in range(self.predictions.shape[2]):
                        predictions[f, :, o] = self.estimators[f][o].predict(
                            self.allKnobs)
                pred_mean = predictions.mean(0)
                self.ax[plotIdx].scatter(pred_mean[:, 0],
                                         pred_mean[:, 1],
                                         marker='x',
                                         c=np.arange(pred_mean.shape[0]) /
                                         pred_mean.shape[0])

                # Some tests here, although I can't remember what I was testing exactly...
                if False:
                    from nonconformist.cp import IcpRegressor
                    from nonconformist.nc import NcFactory
                    from sklearn.ensemble import RandomForestRegressor

                    model1 = RandomForestRegressor()
                    nc1 = NcFactory.create_nc(model1)
                    icp1 = IcpRegressor(nc1)

                    model2 = RandomForestRegressor()
                    nc2 = NcFactory.create_nc(model2)
                    icp2 = IcpRegressor(nc2)

                    n = self.sampledIndexes.size

                    idx = np.random.permutation(n)
                    idx_train, idx_cal = idx[:int(0.8 * n)], idx[int(0.8 * n):]

                    icp1.fit(
                        self.allKnobs[self.sampledIndexes][idx_train, :],
                        self.groundTruth[self.sampledIndexes, 0][idx_train])
                    icp2.fit(
                        self.allKnobs[self.sampledIndexes][idx_train, :],
                        self.groundTruth[self.sampledIndexes, 1][idx_train])

                    icp1.calibrate(
                        self.allKnobs[self.sampledIndexes][idx_cal, :],
                        self.groundTruth[self.sampledIndexes, 0][idx_cal])
                    icp2.calibrate(
                        self.allKnobs[self.sampledIndexes][idx_cal, :],
                        self.groundTruth[self.sampledIndexes, 1][idx_cal])

                    prediction1 = icp1.predict(self.allKnobs,
                                               significance=0.05)
                    prediction2 = icp2.predict(self.allKnobs,
                                               significance=0.05)

                    print(prediction1)

                    self.ax[plotIdx].errorbar(pred_mean[:, 0],
                                              pred_mean[:, 1],
                                              xerr=prediction1,
                                              yerr=prediction2,
                                              linestyle="None")

            # Keep this
            #self.ax[plotIdx].set_xlim(left=0, right=2)
            #self.ax[plotIdx].set_ylim(bottom=0, top=2)
            plotIdx += 1

        # Plot hint space if available
        if self.doPlotHintSpace and self.hintSpace is not None:
            self.ax[plotIdx].clear()
            self.ax[plotIdx].set_xlim([0, 1.05])
            self.ax[plotIdx].set_ylim([0, 1.05])
            self.ax[plotIdx].set_title("Hint space")
            self.ax[plotIdx].plot(self.hintSpace[:, 0],
                                  self.hintSpace[:, 1],
                                  'x',
                                  markeredgewidth=1.8,
                                  markersize=5)
            plotIdx += 1

        # Plot distances for labeled samples
        if self.selectedDistances and self.doPlotDistances:
            self.ax[plotIdx].clear()
            self.ax[plotIdx].set_title(
                "Estimated distances for labeled samples")
            for d in self.selectedDistances:
                self.ax[plotIdx].hist(d.flatten(), 50, alpha=0.65)
                #self.ax[plotIdx].hist(d.mean(0), 50, alpha=0.65)
            #for d in self.gtDistances:
            #    self.ax[plotIdx].hist(d.flatten(), 50, alpha=0.65)
            plotIdx += 1

        # Plot distances for unlabeled samples
        if self.predictedDistances and self.doPlotDistances:
            self.ax[plotIdx].clear()
            self.ax[plotIdx].set_title(
                "Estimated distances for unlabeled samples (within P_relaxed)")
            predictedDistances = np.array(self.predictedDistances)
            for d in range(predictedDistances.shape[2]):
                self.ax[plotIdx].hist(predictedDistances[:, :, d].flatten(),
                                      50,
                                      alpha=0.65)
            plotIdx += 1

        self.plt.show()
        try:
            self.plt.pause(0.00001)
        except:
            pass
        self.fig.canvas.draw()
        if self.blocking:
            self.fig.waitforbuttonpress()

        self.selectedDistances = []
        self.gtDistances = []
        self.predictedDistances = []
コード例 #7
0
import sys
sys.path.append('/Users/staffan/git/peptid_studie/experiments/src') # Nonconformist

from nonconformist.cp import TcpClassifier
from nonconformist.nc import NcFactory


iris = load_iris()

idx = np.random.permutation(iris.target.size)

# Divide the data into training set and test set
idx_train, idx_test = idx[:100], idx[100:]

model = SVC(probability=True)	# Create the underlying model
nc = NcFactory.create_nc(model)	# Create a default nonconformity function
tcp = TcpClassifier(nc)			# Create a transductive conformal classifier

# Fit the TCP using the proper training set
tcp.fit(iris.data[idx_train, :], iris.target[idx_train])

# Produce predictions for the test set
predictions = tcp.predict(iris.data[idx_test, :])

# 
targets = np.array(iris.target[idx_test], copy=True)
targets.shape = (len(targets),1)
output = np.hstack((targets, predictions))

np.savetxt('resources/multiclass.csv', output, delimiter=',')
コード例 #8
0
#-----------------------------------------------------------
# force_prediction
s_folder = StratifiedKFold(n_splits=10, shuffle=True)
for index, (train, test) in enumerate(s_folder.split(X, y)):
    X_train, X_test = X[train], X[test]
    y_train, y_test = y[train], y[test]
    x_train_sp, x_cal, y_train_sp, y_cal = train_test_split(
        X_train, y_train, test_size=test_size, shuffle=True)
    y_test = y_test.reshape((-1, 1))

    lda = LinearDiscriminantAnalysis(n_components=9)
    x_train_lda = lda.fit_transform(x_train_sp, y_train_sp)
    x_cal_lda = lda.transform(x_cal)
    x_test_lda = lda.transform(X_test)

    nc = NcFactory.create_nc(model=model)
    icp = IcpClassifier(nc)

    icp.fit(x_train_lda, y_train_sp)
    icp.calibrate(x_cal_lda, y_cal)
    prediction = icp.predict(x_test_lda, significance=None)

    result = [1 - force_mean_errors(prediction, y_test)]
    if index == 0:
        result_summary = result
    else:
        result_summary = np.vstack((result_summary, result))
    print('\nICP_Force')
    if np.unique(y_test).shape[0] == 10:
        print('True')
    else:
コード例 #9
0
ファイル: run_experiment.py プロジェクト: valeman/cqr
def run_experiment(dataset_name,
                   test_method,
                   random_state_train_test,
                   save_to_csv=True):
    """ Estimate prediction intervals and print the average length and coverage

    Parameters
    ----------

    dataset_name : array of strings, list of datasets
    test_method  : string, method to be tested, estimating
                   the 90% prediction interval
    random_state_train_test : integer, random seed to be used
    save_to_csv : boolean, save average length and coverage to csv (True)
                  or not (False)

    """

    dataset_name_vec = []
    method_vec = []
    coverage_vec = []
    length_vec = []
    seed_vec = []

    seed = random_state_train_test
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    coverage_linear=0
    length_linear=0
    coverage_linear_local=0
    length_linear_local=0

    coverage_net=0
    length_net=0
    coverage_net_local=0
    length_net_local=0

    coverage_forest=0
    length_forest=0
    coverage_forest_local=0
    length_forest_local=0

    coverage_cp_qnet=0
    length_cp_qnet=0
    coverage_qnet=0
    length_qnet=0

    coverage_cp_sign_qnet=0
    length_cp_sign_qnet=0

    coverage_cp_re_qnet=0
    length_cp_re_qnet=0
    coverage_re_qnet=0
    length_re_qnet=0

    coverage_cp_sign_re_qnet=0
    length_cp_sign_re_qnet=0

    coverage_cp_qforest=0
    length_cp_qforest=0
    coverage_qforest=0
    length_qforest=0

    coverage_cp_sign_qforest=0
    length_cp_sign_qforest=0


    # determines the size of test set
    test_ratio = 0.2

    # conformal prediction miscoverage level
    significance = 0.1
    # desired quantile levels, used by the quantile regression methods
    quantiles = [0.05, 0.95]

    # Random forests parameters (shared by conditional quantile random forests
    # and conditional mean random forests regression).
    n_estimators = 1000 # usual random forests n_estimators parameter
    min_samples_leaf = 1 # default parameter of sklearn

    # Quantile random forests parameters.
    # See QuantileForestRegressorAdapter class for more details
    quantiles_forest = [5, 95]
    CV_qforest = True
    coverage_factor = 0.85
    cv_test_ratio = 0.05
    cv_random_state = 1
    cv_range_vals = 30
    cv_num_vals = 10

    # Neural network parameters  (shared by conditional quantile neural network
    # and conditional mean neural network regression)
    # See AllQNet_RegressorAdapter and MSENet_RegressorAdapter in helper.py
    nn_learn_func = torch.optim.Adam
    epochs = 1000
    lr = 0.0005
    hidden_size = 64
    batch_size = 64
    dropout = 0.1
    wd = 1e-6

    # Ask for a reduced coverage when tuning the network parameters by
    # cross-validation to avoid too conservative initial estimation of the
    # prediction interval. This estimation will be conformalized by CQR.
    quantiles_net = [0.1, 0.9]


    # local conformal prediction parameter.
    # See RegressorNc class for more details.
    beta = 1
    beta_net = 1

    # local conformal prediction parameter. The local ridge regression method
    # uses nearest neighbor regression as the MAD estimator.
    # Number of neighbors used by nearest neighbor regression.
    n_neighbors = 11

    print(dataset_name)
    sys.stdout.flush()

    try:
        # load the dataset
        X, y = datasets.GetDataset(dataset_name, base_dataset_path)
    except:
        print("CANNOT LOAD DATASET!")
        return

    # Dataset is divided into test and train data based on test_ratio parameter
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=test_ratio,
                                                        random_state=random_state_train_test)

    # zero mean and unit variance scaling of the train and test features
    scalerX = StandardScaler()
    scalerX = scalerX.fit(X_train)
    X_train = scalerX.transform(X_train)
    X_test = scalerX.transform(X_test)

    # scale the labels by dividing each by the mean absolute response
    max_ytrain = np.mean(np.abs(y_train))
    y_train = y_train/max_ytrain
    y_test = y_test/max_ytrain

    # fit a simple ridge regression model (sanity check)
    model = linear_model.RidgeCV()
    model = model.fit(X_train, y_train)
    predicted_data = model.predict(X_test).astype(np.float32)

    # calculate the normalized mean squared error
    print("Ridge relative error: %f" % (np.sum((y_test-predicted_data)**2)/np.sum(y_test**2)))
    sys.stdout.flush()

    # reshape the data
    X_train = np.asarray(X_train)
    y_train = np.squeeze(np.asarray(y_train))
    X_test = np.asarray(X_test)
    y_test = np.squeeze(np.asarray(y_test))

    # input dimensions
    n_train = X_train.shape[0]
    in_shape = X_train.shape[1]

    print("Size: train (%d, %d), test (%d, %d)" % (X_train.shape[0], X_train.shape[1], X_test.shape[0], X_test.shape[1]))
    sys.stdout.flush()

    # set seed for splitting the data into proper train and calibration
    np.random.seed(seed)
    idx = np.random.permutation(n_train)

    # divide the data into proper training set and calibration set
    n_half = int(np.floor(n_train/2))
    idx_train, idx_cal = idx[:n_half], idx[n_half:2*n_half]

    ######################## Linear

    if 'linear' == test_method:

        model = linear_model.RidgeCV()
        nc = RegressorNc(model)

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Ridge")
        coverage_linear, length_linear = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Ridge")

        dataset_name_vec.append(dataset_name)
        method_vec.append('Ridge')
        coverage_vec.append(coverage_linear)
        length_vec.append(length_linear)
        seed_vec.append(seed)

        nc = NcFactory.create_nc(
            linear_model.RidgeCV(),
            normalizer_model=KNeighborsRegressor(n_neighbors=n_neighbors)
        )

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Ridge-L")
        coverage_linear_local, length_linear_local = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Ridge-L")

        dataset_name_vec.append(dataset_name)
        method_vec.append('Ridge-L')
        coverage_vec.append(coverage_linear_local)
        length_vec.append(length_linear_local)
        seed_vec.append(seed)

    ######################### Neural net

    if 'neural_net' == test_method:

        model = helper.MSENet_RegressorAdapter(model=None,
                                               fit_params=None,
                                               in_shape = in_shape,
                                               hidden_size = hidden_size,
                                               learn_func = nn_learn_func,
                                               epochs = epochs,
                                               batch_size=batch_size,
                                               dropout=dropout,
                                               lr=lr,
                                               wd=wd,
                                               test_ratio=cv_test_ratio,
                                               random_state=cv_random_state)
        nc = RegressorNc(model)

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Net")
        coverage_net, length_net = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Net")

        dataset_name_vec.append(dataset_name)
        method_vec.append('Net')
        coverage_vec.append(coverage_net)
        length_vec.append(length_net)
        seed_vec.append(seed)

        normalizer_adapter = helper.MSENet_RegressorAdapter(model=None,
                                                            fit_params=None,
                                                            in_shape = in_shape,
                                                            hidden_size = hidden_size,
                                                            learn_func = nn_learn_func,
                                                            epochs = epochs,
                                                            batch_size=batch_size,
                                                            dropout=dropout,
                                                            lr=lr,
                                                            wd=wd,
                                                            test_ratio=cv_test_ratio,
                                                            random_state=cv_random_state)
        adapter = helper.MSENet_RegressorAdapter(model=None,
                                                fit_params=None,
                                                in_shape = in_shape,
                                                hidden_size = hidden_size,
                                                learn_func = nn_learn_func,
                                                epochs = epochs,
                                                batch_size=batch_size,
                                                dropout=dropout,
                                                lr=lr,
                                                wd=wd,
                                                test_ratio=cv_test_ratio,
                                                random_state=cv_random_state)

        normalizer = RegressorNormalizer(adapter,
                                         normalizer_adapter,
                                         AbsErrorErrFunc())
        nc = RegressorNc(adapter, AbsErrorErrFunc(), normalizer, beta=beta_net)
        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Net-L")
        coverage_net_local, length_net_local = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Net-L")

        dataset_name_vec.append(dataset_name)
        method_vec.append('Net-L')
        coverage_vec.append(coverage_net_local)
        length_vec.append(length_net_local)
        seed_vec.append(seed)

    ################## Random Forest

    if 'random_forest' == test_method:

        model = RandomForestRegressor(n_estimators=n_estimators,min_samples_leaf=min_samples_leaf, random_state=0)
        nc = RegressorNc(model, AbsErrorErrFunc())

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"RF")
        coverage_forest, length_forest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"RF")

        dataset_name_vec.append(dataset_name)
        method_vec.append('RF')
        coverage_vec.append(coverage_forest)
        length_vec.append(length_forest)
        seed_vec.append(seed)

        normalizer_adapter = RandomForestRegressor(n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=0)
        adapter = RandomForestRegressor(n_estimators=n_estimators, min_samples_leaf=min_samples_leaf, random_state=0)
        normalizer = RegressorNormalizer(adapter,
                                         normalizer_adapter,
                                         AbsErrorErrFunc())
        nc = RegressorNc(adapter, AbsErrorErrFunc(), normalizer, beta=beta)

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"RF-L")
        coverage_forest_local, length_forest_local = helper.compute_coverage(y_test,y_lower,y_upper,significance,"RF-L")

        dataset_name_vec.append(dataset_name)
        method_vec.append('RF-L')
        coverage_vec.append(coverage_forest_local)
        length_vec.append(length_forest_local)
        seed_vec.append(seed)

    ################## Quantile Net

    if 'quantile_net' == test_method:

        model_full = helper.AllQNet_RegressorAdapter(model=None,
                                             fit_params=None,
                                             in_shape = in_shape,
                                             hidden_size = hidden_size,
                                             quantiles = quantiles,
                                             learn_func = nn_learn_func,
                                             epochs = epochs,
                                             batch_size=batch_size,
                                             dropout=dropout,
                                             lr=lr,
                                             wd=wd,
                                             test_ratio=cv_test_ratio,
                                             random_state=cv_random_state,
                                             use_rearrangement=False)
        model_full.fit(X_train, y_train)
        tmp = model_full.predict(X_test)
        y_lower = tmp[:,0]
        y_upper = tmp[:,1]
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"QNet")
        coverage_qnet, length_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"QNet")

        dataset_name_vec.append(dataset_name)
        method_vec.append('QNet')
        coverage_vec.append(coverage_qnet)
        length_vec.append(length_qnet)
        seed_vec.append(seed)

    if 'cqr_quantile_net' == test_method:

        model = helper.AllQNet_RegressorAdapter(model=None,
                                             fit_params=None,
                                             in_shape = in_shape,
                                             hidden_size = hidden_size,
                                             quantiles = quantiles_net,
                                             learn_func = nn_learn_func,
                                             epochs = epochs,
                                             batch_size=batch_size,
                                             dropout=dropout,
                                             lr=lr,
                                             wd=wd,
                                             test_ratio=cv_test_ratio,
                                             random_state=cv_random_state,
                                             use_rearrangement=False)
        nc = RegressorNc(model, QuantileRegErrFunc())

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"CQR Net")
        coverage_cp_qnet, length_cp_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"CQR Net")


        dataset_name_vec.append(dataset_name)
        method_vec.append('CQR Net')
        coverage_vec.append(coverage_cp_qnet)
        length_vec.append(length_cp_qnet)
        seed_vec.append(seed)


    if 'cqr_asymmetric_quantile_net' == test_method:

        model = helper.AllQNet_RegressorAdapter(model=None,
                                             fit_params=None,
                                             in_shape = in_shape,
                                             hidden_size = hidden_size,
                                             quantiles = quantiles_net,
                                             learn_func = nn_learn_func,
                                             epochs = epochs,
                                             batch_size=batch_size,
                                             dropout=dropout,
                                             lr=lr,
                                             wd=wd,
                                             test_ratio=cv_test_ratio,
                                             random_state=cv_random_state,
                                             use_rearrangement=False)
        nc = RegressorNc(model, QuantileRegAsymmetricErrFunc())

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"CQR Sign Net")
        coverage_cp_sign_qnet, length_cp_sign_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"CQR Sign Net")


        dataset_name_vec.append(dataset_name)
        method_vec.append('CQR Sign Net')
        coverage_vec.append(coverage_cp_sign_qnet)
        length_vec.append(length_cp_sign_qnet)
        seed_vec.append(seed)


    ################### Rearrangement Quantile Net

    if 'rearrangement' == test_method:

        model_full = helper.AllQNet_RegressorAdapter(model=None,
                                             fit_params=None,
                                             in_shape = in_shape,
                                             hidden_size = hidden_size,
                                             quantiles = quantiles,
                                             learn_func = nn_learn_func,
                                             epochs = epochs,
                                             batch_size=batch_size,
                                             dropout=dropout,
                                             lr=lr,
                                             wd=wd,
                                             test_ratio=cv_test_ratio,
                                             random_state=cv_random_state,
                                             use_rearrangement=True)
        model_full.fit(X_train, y_train)
        tmp = model_full.predict(X_test)
        y_lower = tmp[:,0]
        y_upper = tmp[:,1]
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Rearrange QNet")
        coverage_re_qnet, length_re_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Rearrange QNet")

        dataset_name_vec.append(dataset_name)
        method_vec.append('Rearrange QNet')
        coverage_vec.append(coverage_re_qnet)
        length_vec.append(length_re_qnet)
        seed_vec.append(seed)

    if 'cqr_rearrangement' == test_method:

        model = helper.AllQNet_RegressorAdapter(model=None,
                                                 fit_params=None,
                                                 in_shape = in_shape,
                                                 hidden_size = hidden_size,
                                                 quantiles = quantiles_net,
                                                 learn_func = nn_learn_func,
                                                 epochs = epochs,
                                                 batch_size=batch_size,
                                                 dropout=dropout,
                                                 lr=lr,
                                                 wd=wd,
                                                 test_ratio=cv_test_ratio,
                                                 random_state=cv_random_state,
                                                 use_rearrangement=True)
        nc = RegressorNc(model, QuantileRegErrFunc())

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Rearrange CQR Net")
        coverage_cp_re_qnet, length_cp_re_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Rearrange CQR Net")


        dataset_name_vec.append(dataset_name)
        method_vec.append('Rearrange CQR Net')
        coverage_vec.append(coverage_cp_re_qnet)
        length_vec.append(length_cp_re_qnet)
        seed_vec.append(seed)


    if 'cqr_asymmetric_rearrangement' == test_method:

        model = helper.AllQNet_RegressorAdapter(model=None,
                                                 fit_params=None,
                                                 in_shape = in_shape,
                                                 hidden_size = hidden_size,
                                                 quantiles = quantiles_net,
                                                 learn_func = nn_learn_func,
                                                 epochs = epochs,
                                                 batch_size=batch_size,
                                                 dropout=dropout,
                                                 lr=lr,
                                                 wd=wd,
                                                 test_ratio=cv_test_ratio,
                                                 random_state=cv_random_state,
                                                 use_rearrangement=True)
        nc = RegressorNc(model, QuantileRegAsymmetricErrFunc())

        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"Rearrange CQR Sign Net")
        coverage_cp_sign_re_qnet, length_cp_sign_re_qnet = helper.compute_coverage(y_test,y_lower,y_upper,significance,"Rearrange CQR Net")


        dataset_name_vec.append(dataset_name)
        method_vec.append('Rearrange CQR Sign Net')
        coverage_vec.append(coverage_cp_sign_re_qnet)
        length_vec.append(length_cp_sign_re_qnet)
        seed_vec.append(seed)

    ################### Quantile Random Forest

    if 'quantile_forest' == test_method:

        params_qforest = dict()
        params_qforest["random_state"] = 0
        params_qforest["min_samples_leaf"] = min_samples_leaf
        params_qforest["n_estimators"] = n_estimators
        params_qforest["max_features"] = X_train.shape[1]

        params_qforest["CV"]=False
        params_qforest["coverage_factor"] = coverage_factor
        params_qforest["test_ratio"]=cv_test_ratio
        params_qforest["random_state"]=cv_random_state
        params_qforest["range_vals"] = cv_range_vals
        params_qforest["num_vals"] = cv_num_vals

        model_full = helper.QuantileForestRegressorAdapter(model = None,
                                                      fit_params=None,
                                                      quantiles=np.dot(100,quantiles),
                                                      params = params_qforest)
        model_full.fit(X_train, y_train)
        tmp = model_full.predict(X_test)
        y_lower = tmp[:,0]
        y_upper = tmp[:,1]
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"QRF")
        coverage_qforest, length_qforest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"QRF")

        dataset_name_vec.append(dataset_name)
        method_vec.append('QRF')
        coverage_vec.append(coverage_qforest)
        length_vec.append(length_qforest)
        seed_vec.append(seed)

    if 'cqr_quantile_forest' == test_method:

        params_qforest = dict()
        params_qforest["random_state"] = 0
        params_qforest["min_samples_leaf"] = min_samples_leaf
        params_qforest["n_estimators"] = n_estimators
        params_qforest["max_features"] = X_train.shape[1]

        params_qforest["CV"]=CV_qforest
        params_qforest["coverage_factor"] = coverage_factor
        params_qforest["test_ratio"]=cv_test_ratio
        params_qforest["random_state"]=cv_random_state
        params_qforest["range_vals"] = cv_range_vals
        params_qforest["num_vals"] = cv_num_vals


        model = helper.QuantileForestRegressorAdapter(model = None,
                                                      fit_params=None,
                                                      quantiles=quantiles_forest,
                                                      params = params_qforest)

        nc = RegressorNc(model, QuantileRegErrFunc())
        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"CQR RF")
        coverage_cp_qforest, length_cp_qforest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"CQR RF")

        dataset_name_vec.append(dataset_name)
        method_vec.append('CQR RF')
        coverage_vec.append(coverage_cp_qforest)
        length_vec.append(length_cp_qforest)
        seed_vec.append(seed)

    if 'cqr_asymmetric_quantile_forest' == test_method:

        params_qforest = dict()
        params_qforest["random_state"] = 0
        params_qforest["min_samples_leaf"] = min_samples_leaf
        params_qforest["n_estimators"] = n_estimators
        params_qforest["max_features"] = X_train.shape[1]

        params_qforest["CV"]=CV_qforest
        params_qforest["coverage_factor"] = coverage_factor
        params_qforest["test_ratio"]=cv_test_ratio
        params_qforest["random_state"]=cv_random_state
        params_qforest["range_vals"] = cv_range_vals
        params_qforest["num_vals"] = cv_num_vals


        model = helper.QuantileForestRegressorAdapter(model = None,
                                                      fit_params=None,
                                                      quantiles=quantiles_forest,
                                                      params = params_qforest)

        nc = RegressorNc(model, QuantileRegAsymmetricErrFunc())
        y_lower, y_upper = helper.run_icp(nc, X_train, y_train, X_test, idx_train, idx_cal, significance)
        if plot_results:
            helper.plot_func_data(y_test,y_lower,y_upper,"CQR Sign RF")
        coverage_cp_sign_qforest, length_cp_sign_qforest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"CQR Sign RF")

        dataset_name_vec.append(dataset_name)
        method_vec.append('CQR Sign RF')
        coverage_vec.append(coverage_cp_sign_qforest)
        length_vec.append(length_cp_sign_qforest)
        seed_vec.append(seed)


#        tmp = model.predict(X_test)
#        y_lower = tmp[:,0]
#        y_upper = tmp[:,1]
#        if plot_results:
#            helper.plot_func_data(y_test,y_lower,y_upper,"QRF")
#        coverage_qforest, length_qforest = helper.compute_coverage(y_test,y_lower,y_upper,significance,"QRF")
#
#        dataset_name_vec.append(dataset_name)
#        method_vec.append('QRF')
#        coverage_vec.append(coverage_qforest)
#        length_vec.append(length_qforest)
#        seed_vec.append(seed)



    ############### Summary

    coverage_str = 'Coverage (expected ' + str(100 - significance*100) + '%)'
    results = np.array([[dataset_name, coverage_str, 'Avg. Length', 'Seed'],
                     ['CP Linear', coverage_linear, length_linear, seed],
                     ['CP Linear Local', coverage_linear_local, length_linear_local, seed],
                     ['CP Neural Net', coverage_net, length_net, seed],
                     ['CP Neural Net Local', coverage_net_local, length_net_local, seed],
                     ['CP Random Forest', coverage_forest, length_forest, seed],
                     ['CP Random Forest Local', coverage_forest_local, length_forest_local, seed],
                     ['CP Quantile Net', coverage_cp_qnet, length_cp_qnet, seed],
                     ['CP Asymmetric Quantile Net', coverage_cp_sign_qnet, length_cp_sign_qnet, seed],
                     ['Quantile Net', coverage_qnet, length_qnet, seed],
                     ['CP Rearrange Quantile Net', coverage_cp_re_qnet, length_cp_re_qnet, seed],
                     ['CP Asymmetric Rearrange Quantile Net', coverage_cp_sign_re_qnet, length_cp_sign_re_qnet, seed],
                     ['Rearrange Quantile Net', coverage_re_qnet, length_re_qnet, seed],
                     ['CP Quantile Random Forest', coverage_cp_qforest, length_cp_qforest, seed],
                     ['CP Asymmetric Quantile Random Forest', coverage_cp_sign_qforest, length_cp_sign_qforest, seed],
                     ['Quantile Random Forest', coverage_qforest, length_qforest, seed]])

    results_ = pd.DataFrame(data=results[1:,1:],
                      index=results[1:,0],
                      columns=results[0,1:])

    print("== SUMMARY == ")
    print("dataset name: " + dataset_name)
    print(results_)
    sys.stdout.flush()

    if save_to_csv:
        results = pd.DataFrame(results)

        outdir = './results/'
        if not os.path.exists(outdir):
            os.mkdir(outdir)

        out_name = outdir + 'results.csv'

        df = pd.DataFrame({'name': dataset_name_vec,
                           'method': method_vec,
                           coverage_str : coverage_vec,
                           'Avg. Length' : length_vec,
                           'seed': seed_vec})

        if os.path.isfile(out_name):
            df2 = pd.read_csv(out_name)
            df = pd.concat([df2, df], ignore_index=True)

        df.to_csv(out_name, index=False)
コード例 #10
0
df_test = pd.read_csv(TEST)
trainX, trainY = df_train.drop(['TARGET'], axis=1), df_train['TARGET']
calX, calY = df_cal.drop(['TARGET'], axis=1), df_cal['TARGET']

model = joblib.load(os.path.join("models", f"{MODEL}.pkl"))
if 'TARGET' in df_test.columns:
    testX, testY = df_test.drop(['id', 'TARGET'], axis=1), df_test['TARGET']
else:
    testX = df_test.drop(['id'], axis=1)

if PROBLEM_TYPE == 'classification':
    if MODEL == 'catboost':
        raise Exception('Cant compute inervals for CatBoostClassifier!')

    nc = NcFactory.create_nc(
        model, normalizer_model=KNeighborsRegressor(
            n_neighbors=11))  # Create a default nonconformity function
    icp = IcpClassifier(nc)

    icp.fit(trainX.values, trainY.values)

    # Calibrate the ICP using the calibration set
    icp.calibrate(calX.values, calY.values)

    # Produce predictions for the test set, with confidence 95%
    prediction = icp.predict(testX.to_numpy(), significance=0.05)

else:
    if MODEL == 'catboost':
        params = joblib.load("models/params.pkl")
コード例 #11
0
    def test_nc_factory(self):
        def score_model(icp, icp_name, ds, ds_name, scoring_funcs):
            scores = cross_val_score(
                icp,
                ds.data,
                ds.target,
                iterations=10,
                folds=10,
                scoring_funcs=scoring_funcs,
                significance_levels=[0.05, 0.1, 0.2],
            )

            print("\n{}: {}".format(icp_name, ds_name))
            scores = scores.drop(["fold", "iter"], axis=1)
            print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Classification
        # -----------------------------------------------------------------------------
        data = load_iris()

        nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100))
        icp = IcpClassifier(nc)
        icp_cv = ClassIcpCvHelper(icp)
        score_model(icp_cv, "IcpClassifier", data, "iris",
                    [class_mean_errors, class_avg_c])

        # -----------------------------------------------------------------------------
        # Classification (normalized)
        # -----------------------------------------------------------------------------
        data = load_iris()

        nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100),
                                 normalizer_model=KNeighborsRegressor())
        icp = IcpClassifier(nc)
        icp_cv = ClassIcpCvHelper(icp)

        score_model(icp_cv, "IcpClassifier (normalized)", data, "iris",
                    [class_mean_errors, class_avg_c])

        # -----------------------------------------------------------------------------
        # Classification OOB
        # -----------------------------------------------------------------------------
        data = load_iris()

        nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100,
                                                        oob_score=True),
                                 oob=True)
        icp_cv = OobCpClassifier(nc)

        score_model(icp_cv, "IcpClassifier (OOB)", data, "iris",
                    [class_mean_errors, class_avg_c])

        # -----------------------------------------------------------------------------
        # Classification OOB normalized
        # -----------------------------------------------------------------------------
        data = load_iris()

        nc = NcFactory.create_nc(
            RandomForestClassifier(n_estimators=100, oob_score=True),
            oob=True,
            normalizer_model=KNeighborsRegressor(),
        )
        icp_cv = OobCpClassifier(nc)

        score_model(
            icp_cv,
            "IcpClassifier (OOB, normalized)",
            data,
            "iris",
            [class_mean_errors, class_avg_c],
        )

        # -----------------------------------------------------------------------------
        # Regression
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100))
        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        score_model(icp_cv, "IcpRegressor", data, "diabetes",
                    [reg_mean_errors, reg_median_size])

        # -----------------------------------------------------------------------------
        # Regression (normalized)
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100),
                                 normalizer_model=KNeighborsRegressor())
        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        score_model(
            icp_cv,
            "IcpRegressor (normalized)",
            data,
            "diabetes",
            [reg_mean_errors, reg_median_size],
        )

        # -----------------------------------------------------------------------------
        # Regression OOB
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100,
                                                       oob_score=True),
                                 oob=True)
        icp_cv = OobCpRegressor(nc)

        score_model(icp_cv, "IcpRegressor (OOB)", data, "diabetes",
                    [reg_mean_errors, reg_median_size])

        # -----------------------------------------------------------------------------
        # Regression OOB normalized
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        nc = NcFactory.create_nc(
            RandomForestRegressor(n_estimators=100, oob_score=True),
            oob=True,
            normalizer_model=KNeighborsRegressor(),
        )
        icp_cv = OobCpRegressor(nc)

        score_model(
            icp_cv,
            "IcpRegressor (OOB, normalized)",
            data,
            "diabetes",
            [reg_mean_errors, reg_median_size],
        )