예제 #1
0
from nonconformist.nc import MarginErrFunc
from nonconformist.nc import ClassifierNc, RegressorNc, RegressorNormalizer
from nonconformist.nc import AbsErrorErrFunc, SignErrorErrFunc

from nonconformist.evaluation import cross_val_score
from nonconformist.evaluation import ClassIcpCvHelper, RegIcpCvHelper
from nonconformist.evaluation import class_avg_c, class_mean_errors
from nonconformist.evaluation import reg_mean_errors, reg_median_size

# -----------------------------------------------------------------------------
# Classification
# -----------------------------------------------------------------------------
data = load_iris()

icp = IcpClassifier(
    ClassifierNc(ClassifierAdapter(RandomForestClassifier(n_estimators=100)),
                 MarginErrFunc()))
icp_cv = ClassIcpCvHelper(icp)

scores = cross_val_score(icp_cv,
                         data.data,
                         data.target,
                         iterations=5,
                         folds=5,
                         scoring_funcs=[class_mean_errors, class_avg_c],
                         significance_levels=[0.05, 0.1, 0.2])

print('Classification: iris')
scores = scores.drop(['fold', 'iter'], axis=1)
print(scores.groupby(['significance']).mean())

# -----------------------------------------------------------------------------
예제 #2
0
    def test_acp_classification_tree(self):

        # -----------------------------------------------------------------------------
        # Experiment setup
        # -----------------------------------------------------------------------------
        data = load_iris()

        idx = np.random.permutation(data.target.size)
        train = idx[:int(2 * idx.size / 3)]
        test = idx[int(2 * idx.size / 3):]

        truth = data.target[test].reshape(-1, 1)
        columns = ["C-{}".format(i)
                   for i in np.unique(data.target)] + ["truth"]
        significance = 0.1

        # -----------------------------------------------------------------------------
        # Define models
        # -----------------------------------------------------------------------------

        models = {
            "ACP-RandomSubSampler":
            AggregatedCp(
                IcpClassifier(
                    ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))),
                RandomSubSampler(),
            ),
            "ACP-CrossSampler":
            AggregatedCp(
                IcpClassifier(
                    ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))),
                CrossSampler(),
            ),
            "ACP-BootstrapSampler":
            AggregatedCp(
                IcpClassifier(
                    ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))),
                BootstrapSampler(),
            ),
            "CCP":
            CrossConformalClassifier(
                IcpClassifier(
                    ClassifierNc(ClassifierAdapter(
                        DecisionTreeClassifier())))),
            "BCP":
            BootstrapConformalClassifier(
                IcpClassifier(
                    ClassifierNc(ClassifierAdapter(
                        DecisionTreeClassifier())))),
        }

        # -----------------------------------------------------------------------------
        # Train, predict and evaluate
        # -----------------------------------------------------------------------------
        for name, model in models.items():
            model.fit(data.data[train, :], data.target[train])
            prediction = model.predict(data.data[test, :],
                                       significance=significance)
            table = np.hstack((prediction, truth))
            df = pd.DataFrame(table, columns=columns)
            print("\n{}".format(name))
            print("Error rate: {}".format(
                class_mean_errors(prediction, truth, significance)))
            print(df)

        self.assertTrue(True)
# Calibrate the ICP using the calibration set
icp.calibrate(x_val_np, y_val_np)
print('predicting inductive conformal prediction')
# Produce predictions for the test set, with confidence 95%
prediction = icp.predict(x_test_np, significance=0.05)
prediction_conf_cred = pd.DataFrame(
    icp.predict_conf(x_test_np),
    columns=['Label', 'Confidence', 'Credibility'])
# %%
#Cross validation of the conformal predictor

#icp = IcpClassifier(ClassifierNc(ClassifierAdapter(model),MarginErrFunc()))

icp = OobCpClassifier(
    ClassifierNc(
        OobClassifierAdapter(
            RandomForestClassifier(n_estimators=300, oob_score=True))))

significance = np.arange(0, 1, 0.025)
significance[0] = 0.01
icp_cv = ClassIcpCvHelper(icp)

scores = cross_val_score(icp_cv,
                         x_train_np,
                         y_train_np,
                         iterations=1,
                         folds=5,
                         scoring_funcs=[
                             class_mean_errors, class_one_err, class_avg_c,
                             class_one_c, class_empty, class_two_c
                         ],
예제 #4
0
파일: example.py 프로젝트: valeman/cphmm
    X, H = sample_hmm(N, L, H_n, start_prob, trans_prob, emi_means, emi_vars)

    # Training and test sets. The test set is only
    # composed by the last sampled sequence.
    train = range(N - 1)
    Xtrain = X[train]
    Htrain = H[train]
    Xtest = X[N - 1]
    Htest = H[N - 1]

    n, l, _ = X.shape
    X = X.flatten()
    H = H.flatten()
    lengths = [l] * n

    # NCM
    knn = KNeighborsClassifier(n_neighbors=1)
    ncm = ClassifierNc(ClassifierAdapter(knn))
    cphmm = CPHMM(ncm, n_states=H_n, smooth=False)

    # HMM trained using Maximum Likelihood.
    ml_pred = ml_hmm_predict(Xtest, Xtrain, Htrain)
    ml_error = error(ml_pred, Htest)
    print("Maximum likelihood error: {}".format(ml_error))

    # CP-HMM training and prediction.
    cphmm.fit(X, H, lengths)
    CP_pred = cphmm.predict(Xtest, SIGNIFICANCE_LEVEL)
    CP_error = error(CP_pred[0], Htest)
    print("CP error: {}".format(CP_error))
예제 #5
0
    def test_cross_validation(self):
        # -----------------------------------------------------------------------------
        # Classification
        # -----------------------------------------------------------------------------
        data = load_iris()

        icp = IcpClassifier(
            ClassifierNc(
                ClassifierAdapter(RandomForestClassifier(n_estimators=100)),
                MarginErrFunc()))
        icp_cv = ClassIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[class_mean_errors, class_avg_c],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Classification: iris")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, absolute error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        icp = IcpRegressor(
            RegressorNc(
                RegressorAdapter(RandomForestRegressor(n_estimators=100)),
                AbsErrorErrFunc()))
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Absolute error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, normalized absolute error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        underlying_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer = RegressorNormalizer(underlying_model, normalizer_model,
                                         AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)

        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Normalized absolute error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, normalized signed error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        icp = IcpRegressor(
            RegressorNc(
                RegressorAdapter(RandomForestRegressor(n_estimators=100)),
                SignErrorErrFunc()))
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Signed error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, signed error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        underlying_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))

        # The normalization model can use a different error function than is
        # used to measure errors on the underlying model
        normalizer = RegressorNormalizer(underlying_model, normalizer_model,
                                         AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, SignErrorErrFunc(), normalizer)

        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Normalized signed error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())
예제 #6
0
    X_test, y_test = X[test_index], y[test_index]

    lda = LinearDiscriminantAnalysis(n_components=9)
    X_train_lda = lda.fit_transform(X_train, y_train)
    X_test_lda = lda.transform(X_test)
    x_anomaly_lda = lda.transform(x_anomaly)

    x_train, x_cal, y_train, y_cal = train_test_split(X_train_lda,
                                                      y_train, test_size=0.3, shuffle=False, random_state=1)

    model = KNeighborsClassifier(n_neighbors=5)
    # -----------------------------------------------------------------------------
    # Train and calibrate
    # -----------------------------------------------------------------------------

    icp = IcpClassifier(ClassifierNc(ClassifierAdapter(model)))
    icp.fit(x_train, y_train)
    icp.calibrate(x_cal, y_cal)

    # -----------------------------------------------------------------------------
    # Predict
    # -----------------------------------------------------------------------------
    SIG = 0.2
    prediction = icp.predict(X_test_lda, significance=SIG)
    result = np.sum(prediction, axis=1)
    zero_sum_correct = (48 - result.sum(axis=0))/48
    correct.append(zero_sum_correct)
    print("the correct prediction")
    print(result)

    prediction_anomaly = icp.predict(x_anomaly_lda, significance=SIG)
예제 #7
0
    def CF_qualitative_validation(self):
        ''' performs validation for conformal qualitative models '''

        # Make a copy of original matrices.
        X = self.X.copy()
        Y = self.Y.copy()

        # Total number of class 0 correct predictions.
        c0_correct_all = 0
        # Total number of class 0 incorrect predictions.
        c0_incorrect_all = 0
        # Total number of class 1 correct predictions.
        c1_correct_all = 0
        # Total number of class 1 incorrect predictions
        c1_incorrect_all = 0
        # Total number of instances out of the applicability domain.
        not_predicted_all = 0

        info = []

        kf = KFold(n_splits=5, shuffle=True, random_state=46)
        # Copy Y vector to use it as template to assign predictions
        Y_pred = copy.copy(Y).tolist()
        try:
            for train_index, test_index in kf.split(X):
                # Generate training and test sets
                X_train, X_test = X[train_index], X[test_index]
                Y_train, Y_test = Y[train_index], Y[test_index]
                # Create the aggregated conformal classifier.
                conformal_pred = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator_temp),
                                     MarginErrFunc())), BootstrapSampler())
                # Fit the conformal classifier to the data
                conformal_pred.fit(X_train, Y_train)
                # Perform prediction on test set
                prediction = conformal_pred.predict(
                    X_test, self.param.getVal('conformalSignificance'))
                # Assign the prediction the correct index.
                for index, el in enumerate(test_index):
                    Y_pred[el] = prediction[index]
            # Iterate over the prediction and check the result
            for i in range(len(Y_pred)):
                real = float(Y[i])
                predicted = Y_pred[i]
                if predicted[0] != predicted[1]:
                    if real == 0 and predicted[0] == True:
                        c0_correct_all += 1
                    if real == 0 and predicted[1] == True:
                        c0_incorrect_all += 1
                    if real == 1 and predicted[1] == True:
                        c1_correct_all += 1
                    if real == 1 and predicted[0] == True:
                        c1_incorrect_all += 1
                else:
                    not_predicted_all += 1

        except Exception as e:
            LOG.error(f'Qualitative conformal validation'
                      f' failed with exception: {e}')
            raise e
        # Get the mean confusion matrix.
        self.TN = c0_correct_all
        self.FP = c0_incorrect_all
        self.TP = c1_correct_all
        self.FN = c1_incorrect_all
        not_predicted_all = not_predicted_all

        info.append(('TP', 'True positives in cross-validation', self.TP))
        info.append(('TN', 'True negatives in cross-validation', self.TN))
        info.append(('FP', 'False positives in cross-validation', self.FP))
        info.append(('FN', 'False negatives in cross-validation', self.FN))

        # Compute sensitivity, specificity and MCC
        try:
            self.sensitivity = (self.TP / (self.TP + self.FN))
        except Exception as e:
            LOG.error(f'Failed to compute sensibility with' f'exception {e}')
            self.sensitivity = '-'
        try:
            self.specificity = (self.TN / (self.TN + self.FP))
        except Exception as e:
            LOG.error(f'Failed to compute specificity with' f'exception {e}')
            self.specificity = '-'
        try:
            # Compute Matthews Correlation Coefficient
            self.mcc = (((self.TP * self.TN) - (self.FP * self.FN)) / np.sqrt(
                (self.TP + self.FP) * (self.TP + self.FN) *
                (self.TN + self.FP) * (self.TN + self.FN)))
        except Exception as e:
            LOG.error(f'Failed to compute Mathews Correlation Coefficient'
                      f'exception {e}')
            self.mcc = '-'

        info.append(('Sensitivity', 'Sensitivity in cross-validation',
                     self.sensitivity))
        info.append(('Specificity', 'Specificity in cross-validation',
                     self.specificity))
        info.append(
            ('MCC', 'Matthews Correlation Coefficient in cross-validation',
             self.mcc))
        try:
            # Compute coverage (% of compounds inside the applicability domain)
            self.conformal_coverage = (
                self.TN + self.FP + self.TP + self.FN) / (
                    (self.TN + self.FP + self.TP + self.FN) +
                    not_predicted_all)
        except Exception as e:
            LOG.error(f'Failed to compute conformal coverage with'
                      f'exception {e}')
            self.conformal_coverage = '-'

        try:
            # Compute accuracy (% of correct predictions)
            self.conformal_accuracy = (
                float(self.TN + self.TP) /
                float(self.FP + self.FN + self.TN + self.TP))
        except Exception as e:
            LOG.error(f'Failed to compute conformal accuracy with'
                      f'exception {e}')
            self.conformal_accuracy = '-'

        info.append(('Conformal_coverage', 'Conformal coverage',
                     self.conformal_coverage))
        info.append(('Conformal_accuracy', 'Conformal accuracy',
                     self.conformal_accuracy))

        results = {}
        results['quality'] = info
        #results ['classes'] = prediction
        return True, results
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc

data = load_iris()
x, y = data.data, data.target

for i, y_ in enumerate(np.unique(y)):
    y[y == y_] = i

n_instances = y.size
idx = np.random.permutation(n_instances)

train_idx = idx[:int(n_instances / 3)]
cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)]
test_idx = idx[2 * int(n_instances / 3):]

nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier(n_estimators=100)))
icp = IcpClassifier(nc)

icp.fit(x[train_idx, :], y[train_idx])
icp.calibrate(x[cal_idx, :], y[cal_idx])

print(
    pd.DataFrame(icp.predict_conf(x[test_idx, :]),
                 columns=['Label', 'Confidence', 'Credibility']))
예제 #9
0
classification_method = DecisionTreeClassifier()
file_name = 'decision_tree.xls'

ACP_Random = []
ACP_Cross = []
ACP_Boot = []
CCP = []
BCP = []
# -----------------------------------------------------------------------------
# Define models
# -----------------------------------------------------------------------------

models = {
    'ACP-RandomSubSampler':
    AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))),
        RandomSubSampler()),
    'ACP-CrossSampler':
    AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))),
        CrossSampler()),
    'ACP-BootstrapSampler':
    AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))),
        BootstrapSampler()),
    'CCP':
    CrossConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method)))),
    'BCP':
    BootstrapConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))))
# --------------------------------------------------------------------------------------------
# force_prediction
save_path = os.getcwd()+'/force_summary/' + framework_name+'/'+model_name+'/'
if os.path.exists(save_path) is not True:
    os.makedirs(save_path)

s_folder = StratifiedKFold(n_splits=10, shuffle=True)

for index, (train, test) in enumerate(s_folder.split(X, y)):
    x_train, x_test = X[train], X[test]
    y_train, y_test = y[train], y[test]
    truth = y_test.reshape((-1, 1))
    # -----------------------------------------------
    # BCP
    conformal_model = BootstrapConformalClassifier(IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model))),
                                                   n_models=10)
    conformal_model.fit(x_train, y_train)

    # ------------------------------------------
    # ICP
    # x_train_sp, x_cal, y_train_sp, y_cal = train_test_split(x_train, y_train, test_size=0.3, shuffle=True,
    #                                                         random_state=1)
    # nc = NcFactory.create_nc(model=simple_model)
    # conformal_model = IcpClassifier(nc)
    # conformal_model.fit(x_train_sp, y_train_sp)
    # conformal_model.calibrate(x_cal, y_cal)

    # ---------------------------------------------------
    # CP
    # nc = NcFactory.create_nc(model=simple_model)
예제 #11
0
from nonconformist.evaluation import class_mean_errors

# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = load_iris()

idx = np.random.permutation(data.target.size)
train = idx[:int(idx.size / 2)]
test = idx[int(idx.size / 2):]

# -----------------------------------------------------------------------------
# Train and calibrate TCP
# -----------------------------------------------------------------------------
tcp = TcpClassifier(
    ClassifierNc(ClassifierAdapter(SVC(probability=True, gamma='scale')),
                 MarginErrFunc()))

tcp.fit(data.data[train, :], data.target[train])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = tcp.predict(data.data[test, :], significance=0.1)
header = np.array(['c0', 'c1', 'c2', 'Truth'])
table = np.vstack([prediction.T, data.target[test]]).T
df = pd.DataFrame(np.vstack([header, table]))
print('TCP')
print('---')
print(df)

error_rate = class_mean_errors(tcp.predict(data.data[test, :]),
예제 #12
0
idx = np.random.permutation(data.target.size)
train = idx[: int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3) :]

truth = data.target[test].reshape(-1, 1)
columns = ["C-{}".format(i) for i in np.unique(data.target)] + ["truth"]
significance = 0.1

# -----------------------------------------------------------------------------
# Define models
# -----------------------------------------------------------------------------

models = {
    "ACP-RandomSubSampler": AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))),
        RandomSubSampler(),
    ),
    "ACP-CrossSampler": AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))),
        CrossSampler(),
    ),
    "ACP-BootstrapSampler": AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))),
        BootstrapSampler(),
    ),
    "CCP": CrossConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier())))
    ),
    "BCP": BootstrapConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier())))
simple_model = RandomForestClassifier(n_estimators=500, criterion='entropy')
model_name = "RF(500)"

# simple_model = KNeighborsClassifier(n_neighbors=1)
# model_name = '1NN'

# simple_model = SVC(C=6000.0, gamma=0.001, probability=True)
# model_name = "SVM(6000,0.001)"

# -----------------------------------------------------------------------------
# Define models
# -----------------------------------------------------------------------------

models = {
    'ACP-RandomSubSampler':
    AggregatedCp(IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model))),
                 RandomSubSampler()),
    'ACP-CrossSampler':
    AggregatedCp(IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model))),
                 CrossSampler()),
    'ACP-BootstrapSampler':
    AggregatedCp(IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model))),
                 BootstrapSampler()),
    'CCP':
    CrossConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model)))),
    'BCP':
    BootstrapConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model)))),
}
error_summary = []
예제 #14
0
    def CF_qualitative_validation(self):
        ''' performs validation for conformal qualitative models '''

        # Make a copy of original matrices.
        X = self.X.copy()
        Y = self.Y.copy()

        # Number of external validations for the
        # aggregated conformal estimator.
        seeds = [5, 7, 35]
        # Total number of class 0 correct predictions.
        c0_correct_all = []
        # Total number of class 0 incorrect predictions.
        c0_incorrect_all = []
        # Total number of class 1 correct predictions.
        c1_correct_all = []
        # Total number of class 1 incorrect predictions
        c1_incorrect_all = []
        # Total number of instances out of the applicability domain.
        not_predicted_all = []

        results = []
        # Iterate over the seeds.
        try:
            for i in range(len(seeds)):
                # Generate training and test sets
                X_train, X_test,\
                Y_train, Y_test = train_test_split(X, Y,
                                                    test_size=0.25,
                                                    random_state=i,
                                                    shuffle=True)
                # Create the aggregated conformal classifier.
                conformal_pred = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator),
                                     MarginErrFunc())), BootstrapSampler())
                # Fit the conformal classifier to the data
                conformal_pred.fit(X_train, Y_train)
                # Perform prediction on test set
                prediction = conformal_pred.predict(X_test,
                                                    self.conformalSignificance)

                c0_correct = 0
                c1_correct = 0
                not_predicted = 0
                c0_incorrect = 0
                c1_incorrect = 0

                # Iterate over the prediction and check the result
                for i in range(len(Y_test)):
                    real = float(Y_test[i])
                    predicted = prediction[i]
                    if predicted[0] != predicted[1]:
                        if real == 0 and predicted[0] == True:
                            c0_correct += 1
                        if real == 0 and predicted[1] == True:
                            c0_incorrect += 1
                        if real == 1 and predicted[1] == True:
                            c1_correct += 1
                        if real == 1 and predicted[0] == True:
                            c1_incorrect += 1
                    else:
                        not_predicted += 1
                # Add the results to the lists.
                c0_correct_all.append(c0_correct)
                c0_incorrect_all.append(c0_incorrect)
                c1_correct_all.append(c1_correct)
                c1_incorrect_all.append(c1_incorrect)
                not_predicted_all.append(not_predicted)
        except Exception as e:
            LOG.error(f'Qualitative conformal validation'
                      f' failed with exception: {e}')
            raise e
        # Get the mean confusion matrix.
        self.TN = np.int(np.mean(c0_correct_all))
        self.FP = np.int(np.mean(c0_incorrect_all))
        self.TP = np.int(np.mean(c1_correct_all))
        self.FN = np.int(np.mean(c1_incorrect_all))
        not_predicted_all = np.int(np.mean(not_predicted_all))

        results.append(('TP', 'True positives in cross-validation', self.TP))
        results.append(('TN', 'True negatives in cross-validation', self.TN))
        results.append(('FP', 'False positives in cross-validation', self.FP))
        results.append(('FN', 'False negatives in cross-validation', self.FN))

        # Compute sensitivity and specificity
        self.sensitivity = (self.TP / (self.TP + self.FN))
        self.specificity = (self.TN / (self.TN + self.FP))
        # Compute Matthews Correlation Coefficient
        self.mcc = (((self.TP * self.TN) - (self.FP * self.FN)) / np.sqrt(
            (self.TP + self.FP) * (self.TP + self.FN) * (self.TN + self.FP) *
            (self.TN + self.FN)))
        results.append(('Sensitivity', 'Sensitivity in cross-validation',
                        self.sensitivity))
        results.append(('Specificity', 'Specificity in cross-validation',
                        self.specificity))
        results.append(
            ('MCC', 'Matthews Correlation Coefficient in cross-validation',
             self.mcc))

        # Compute coverage (% of compouds inside the applicability domain)
        self.conformal_coverage = (self.TN + self.FP + self.TP + self.FN) / (
            (self.TN + self.FP + self.TP + self.FN) + not_predicted_all)
        # Compute accuracy (% of correct predictions)
        self.conformal_accuracy = float(self.TN +
                                        self.TP) / float(self.FP + self.FN +
                                                         self.TN + self.TP)

        results.append(('Conformal_coverage', 'Conformal coverage',
                        self.conformal_coverage))
        results.append(('Conformal_accuracy', 'Conformal accuracy',
                        self.conformal_accuracy))

        return True, (results, )
# -----------------------------------------------------------------
# prediction with significance

s_folder = StratifiedKFold(n_splits=10, shuffle=True)
for k, (train, test) in enumerate(s_folder.split(X, y)):
    x_train, x_test = X[train], X[test]
    y_train, y_test = y[train], y[test]
    truth = y_test.reshape((-1, 1))

    lda = LinearDiscriminantAnalysis(n_components=9)
    x_train_lda = lda.fit_transform(x_train, y_train)
    x_test_lda = lda.transform(x_test)

    model = CrossConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(simple_model))))
    model.fit(x_train_lda, y_train)
    prediction = model.predict(x_test_lda, significance=None)
    table = np.hstack((prediction, truth))
    result = [
        1 - class_mean_errors(prediction, truth, significance=significance),
        class_avg_c(prediction, truth, significance=significance)
    ]
    if k == 0:
        summary = result
    else:
        summary = np.vstack((summary, result))
    print('\nCCP')
    print('Accuracy: {}'.format(result[0]))
    print('Average count: {}'.format(result[1]))
예제 #16
0
from nonconformist.nc import ClassifierNc, RegressorNc

from nonconformist.evaluation import cross_val_score
from nonconformist.evaluation import ClassIcpCvHelper, RegIcpCvHelper
from nonconformist.evaluation import class_avg_c, class_mean_errors
from nonconformist.evaluation import reg_mean_errors, reg_median_size


# -----------------------------------------------------------------------------
# Classification
# -----------------------------------------------------------------------------
data = load_iris()

icp = OobCpClassifier(
    ClassifierNc(
        OobClassifierAdapter(RandomForestClassifier(n_estimators=100, oob_score=True))
    )
)
icp_cv = ClassIcpCvHelper(icp)

scores = cross_val_score(
    icp_cv,
    data.data,
    data.target,
    iterations=5,
    folds=5,
    scoring_funcs=[class_mean_errors, class_avg_c],
    significance_levels=[0.05, 0.1, 0.2],
)

print("Classification: iris")
예제 #17
0
파일: RF.py 프로젝트: e7dal/flame
    def build(self):
        '''Build a new RF model with the X and Y numpy matrices '''

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))
        results.append(('model', 'model type', 'RF'))

        conformal = self.param.getVal('conformal')
        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):

            LOG.info("Optimizing RF estimator")

            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.estimator = RandomForestRegressor(
                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    # results.append(('model','model type','RF quantitative (optimized)'))
                else:
                    self.estimator = RandomForestClassifier(
                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    # results.append(('model','model type','RF qualitative (optimized)'))

            except Exception as e:
                return False, f'Exception optimizing RF estimator with exception {e}'

        else:
            try:
                if self.param.getVal('quantitative'):

                    self.estimator = RandomForestRegressor(
                        **self.estimator_parameters)

                    if not conformal:
                        LOG.info("Building Quantitative RF model")
                        # results.append(('model', 'model type', 'RF quantitative'))
                else:

                    self.estimator = RandomForestClassifier(
                        **self.estimator_parameters)

                    if not conformal:
                        LOG.info("Building Qualitative RF model")
                        # results.append(('model', 'model type', 'RF qualitative'))

                self.estimator.fit(X, Y)

            except Exception as e:
                return False, f'Exception building RF estimator with exception {e}'

        if not conformal:
            return True, results

        self.estimator_temp = copy(self.estimator)

        # Create the conformal estimator
        try:
            # Conformal regressor
            if self.param.getVal('quantitative'):
                conformal_settings = self.param.getDict('conformal_settings')
                LOG.info("Building conformal Quantitative RF model")

                underlying_model = RegressorAdapter(self.estimator_temp)
                self.normalizing_model = RegressorAdapter(
                    KNeighborsRegressor(
                        n_neighbors=conformal_settings['KNN_NN']))
                # normalizing_model = RegressorAdapter(self.estimator_temp)
                normalizer = RegressorNormalizer(underlying_model,
                                                 copy(self.normalizing_model),
                                                 AbsErrorErrFunc())
                nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                 normalizer)

                # self.conformal_pred = AggregatedCp(IcpRegressor
                # (RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.estimator = AggregatedCp(IcpRegressor(nc),
                                              BootstrapSampler())

                self.estimator.fit(X, Y)
                # results.append(('model', 'model type', 'conformal RF quantitative'))

            # Conformal classifier
            else:

                LOG.info("Building conformal Qualitative RF model")

                self.estimator = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator_temp),
                                     MarginErrFunc())), BootstrapSampler())

                # Fit estimator to the data
                self.estimator.fit(X, Y)
                # results.append(('model', 'model type', 'conformal RF qualitative'))

        except Exception as e:
            return False, f'Exception building conformal RF estimator with exception {e}'

        return True, results


## Overriding of parent methods

# def CF_quantitative_validation(self):
#     ''' performs validation for conformal quantitative models '''

# def CF_qualitative_validation(self):
#     ''' performs validation for conformal qualitative models '''

# def quantitativeValidation(self):
#     ''' performs validation for quantitative models '''

# def qualitativeValidation(self):
#     ''' performs validation for qualitative models '''

# def validate(self):
#     ''' Validates the model and computes suitable model quality scoring values'''

# def optimize(self, X, Y, estimator, tune_parameters):
#     ''' optimizes a model using a grid search over a range of values for diverse parameters'''

# def regularProject(self, Xb, results):
#     ''' projects a collection of query objects in a regular model, for obtaining predictions '''

# def conformalProject(self, Xb, results):
#     ''' projects a collection of query objects in a conformal model, for obtaining predictions '''

# def project(self, Xb, results):
#     ''' Uses the X matrix provided as argument to predict Y'''
idx = np.random.permutation(data.target.size)
train = idx[:int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

truth = data.target[test].reshape(-1, 1)
columns = ['C-{}'.format(i) for i in np.unique(data.target)] + ['truth']
significance = 0.1

# -----------------------------------------------------------------------------
# Define models
# -----------------------------------------------------------------------------

models = {
    'ACP-RandomSubSampler':
    AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(
            DecisionTreeClassifier()))), RandomSubSampler()),
    'ACP-CrossSampler':
    AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(
            DecisionTreeClassifier()))), CrossSampler()),
    'ACP-BootstrapSampler':
    AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(
            DecisionTreeClassifier()))), BootstrapSampler()),
    'CCP':
    CrossConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(
            DecisionTreeClassifier())))),
    'BCP':
    BootstrapConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(
from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc, MarginErrFunc

# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = load_iris()

idx = np.random.permutation(data.target.size)
train = idx[:int(idx.size / 3)]
calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
icp = IcpClassifier(
    ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()), MarginErrFunc()))
icp.fit(data.data[train, :], data.target[train])
icp.calibrate(data.data[calibrate, :], data.target[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(data.data[test, :], significance=0.1)
header = np.array(["c0", "c1", "c2", "Truth"])
table = np.vstack([prediction.T, data.target[test]]).T
df = pd.DataFrame(np.vstack([header, table]))
print(df)
예제 #20
0
    def build(self):
        '''Build a new RF model with the X and Y numpy matrices '''

        if self.failed:
            return False

        X = self.X.copy()
        Y = self.Y.copy()

        results = []

        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        if self.cv:
            self.cv = getCrossVal(self.cv,
                                  self.estimator_parameters["random_state"],
                                  self.n, self.p)
        if self.tune:
            if self.quantitative:
                self.optimize(X, Y, RandomForestRegressor(),
                              self.tune_parameters)
                results.append(
                    ('model', 'model type', 'RF quantitative (optimized)'))
            else:
                self.optimize(X, Y, RandomForestClassifier(),
                              self.tune_parameters)
                results.append(
                    ('model', 'model type', 'RF qualitative (optimized)'))
        else:
            if self.quantitative:
                log.info("Building Quantitative RF model")
                self.estimator_parameters.pop('class_weight', None)

                self.estimator = RandomForestRegressor(
                    **self.estimator_parameters)
                results.append(('model', 'model type', 'RF quantitative'))

            else:
                log.info("Building Qualitative RF model")
                self.estimator = RandomForestClassifier(
                    **self.estimator_parameters)
                results.append(('model', 'model type', 'RF qualitative'))

        if self.conformal:
            if self.quantitative:
                underlying_model = RegressorAdapter(self.estimator)
                normalizing_model = RegressorAdapter(
                    KNeighborsRegressor(n_neighbors=5))
                normalizing_model = RegressorAdapter(self.estimator)
                normalizer = RegressorNormalizer(underlying_model,
                                                 normalizing_model,
                                                 AbsErrorErrFunc())
                nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                 normalizer)
                # self.conformal_pred = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.conformal_pred = AggregatedCp(IcpRegressor(nc),
                                                   BootstrapSampler())
                self.conformal_pred.fit(X, Y)
                # overrides non-conformal
                results.append(
                    ('model', 'model type', 'conformal RF quantitative'))

            else:
                self.conformal_pred = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator),
                                     MarginErrFunc())), BootstrapSampler())
                self.conformal_pred.fit(X, Y)
                # overrides non-conformal
                results.append(
                    ('model', 'model type', 'conformal RF qualitative'))

        self.estimator.fit(X, Y)

        return True, results


#### Overriding of parent methods

# def CF_quantitative_validation(self):
#     ''' performs validation for conformal quantitative models '''

# def CF_qualitative_validation(self):
#     ''' performs validation for conformal qualitative models '''

# def quantitativeValidation(self):
#     ''' performs validation for quantitative models '''

# def qualitativeValidation(self):
#     ''' performs validation for qualitative models '''

# def validate(self):
#     ''' Validates the model and computes suitable model quality scoring values'''

# def optimize(self, X, Y, estimator, tune_parameters):
#     ''' optimizes a model using a grid search over a range of values for diverse parameters'''

# def regularProject(self, Xb, results):
#     ''' projects a collection of query objects in a regular model, for obtaining predictions '''

# def conformalProject(self, Xb, results):
#     ''' projects a collection of query objects in a conformal model, for obtaining predictions '''

# def project(self, Xb, results):
#     ''' Uses the X matrix provided as argument to predict Y'''
예제 #21
0
파일: SVM.py 프로젝트: ChrisHill8/flame
    def build(self):
        '''Build a new SVM model with the X and Y numpy matrices'''

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):
            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.optimize(X, Y, svm.SVR(), self.tune_parameters)
                    results.append(('model', 'model type',
                                    'SVM quantitative (optimized)'))

                else:
                    self.optimize(X, Y, svm.SVC(probability=True),
                                  self.tune_parameters)
                    results.append(
                        ('model', 'model type', 'SVM qualitative (optimized)'))
                LOG.debug('SVM estimator optimized')
            except Exception as e:
                LOG.error(f'Exception optimizing SVM'
                          f'estimator with exception {e}')
        else:
            try:
                LOG.info("Building  SVM model")
                if self.param.getVal('quantitative'):
                    LOG.info("Building Quantitative SVM-R model")
                    self.estimator = svm.SVR(**self.estimator_parameters)
                    results.append(('model', 'model type', 'SVM quantitative'))
                else:
                    self.estimator = svm.SVC(**self.estimator_parameters)
                    results.append(('model', 'model type', 'SVM qualitative'))
            except Exception as e:
                LOG.error(f'Exception building SVM'
                          f'estimator with exception {e}')
        self.estimator.fit(X, Y)
        self.estimator_temp = copy(self.estimator)
        if self.param.getVal('conformal'):
            try:
                LOG.info("Building aggregated conformal SVM model")
                if self.param.getVal('quantitative'):
                    underlying_model = RegressorAdapter(self.estimator_temp)
                    # normalizing_model = RegressorAdapter(
                    # KNeighborsRegressor(n_neighbors=5))
                    normalizing_model = RegressorAdapter(self.estimator_temp)
                    normalizer = RegressorNormalizer(underlying_model,
                                                     normalizing_model,
                                                     AbsErrorErrFunc())
                    nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                     normalizer)
                    # self.conformal_pred = AggregatedCp(IcpRegressor(
                    # RegressorNc(RegressorAdapter(self.estimator))),
                    #                                   BootstrapSampler())

                    self.estimator = AggregatedCp(IcpRegressor(nc),
                                                  BootstrapSampler())
                    self.estimator.fit(X, Y)
                    # overrides non-conformal
                    results.append(
                        ('model', 'model type', 'conformal SVM quantitative'))

                else:
                    self.estimator = AggregatedCp(
                        IcpClassifier(
                            ClassifierNc(
                                ClassifierAdapter(self.estimator_temp),
                                MarginErrFunc())), BootstrapSampler())
                    self.estimator.fit(X, Y)
                    # overrides non-conformal
                    results.append(
                        ('model', 'model type', 'conformal SVM qualitative'))
            except Exception as e:
                LOG.error(f'Exception building aggregated conformal SVM '
                          f'estimator with exception {e}')
        # Fit estimator to the data
        return True, results
예제 #22
0
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc

data = load_iris()
x, y = data.data, data.target

for i, y_ in enumerate(np.unique(y)):
    y[y == y_] = i

n_instances = y.size
idx = np.random.permutation(n_instances)

train_idx = idx[:int(n_instances / 3)]
cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)]
test_idx = idx[2 * int(n_instances / 3):]

nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier()))
icp = IcpClassifier(nc)

icp.fit(x[train_idx, :], y[train_idx])
icp.calibrate(x[cal_idx, :], y[cal_idx])

print(
    pd.DataFrame(icp.predict_conf(x[test_idx, :]),
                 columns=['Label', 'Confidence', 'Credibility']))