Ejemplo n.º 1
0
    def build(self):
        '''Build a new qualitative GNB model with the X and Y numpy matrices'''

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        # Build estimator
        LOG.info('Building GaussianNB model')
        self.estimator = GaussianNB(**self.estimator_parameters)
        results.append(('model', 'model type', 'GNB qualitative'))

        self.estimator.fit(X, Y)

        if not self.param.getVal('conformal'):
            return True, results

        # If conformal, then create aggregated conformal classifier
        self.estimator_temp = copy(self.estimator)
        self.estimator = AggregatedCp(
            IcpClassifier(
                ClassifierNc(ClassifierAdapter(self.estimator_temp),
                             MarginErrFunc())), BootstrapSampler())

        # Fit estimator to the data
        self.estimator.fit(X, Y)
        results.append(('model', 'model type', 'conformal GNB qualitative'))

        return True, results
Ejemplo n.º 2
0
def CF_QualVal(X, Y, estimator, conformalSignificance):
    """ Qualitative conformal predictor validation"""

    print("Starting qualitative conformal prediction validation")
    icp = AggregatedCp(
        IcpClassifier(
            ClassifierNc(ClassifierAdapter(estimator), MarginErrFunc())),
        BootstrapSampler())
    Y = np.asarray(Y).reshape(-1, 1)
    loo = LeaveOneOut()
    predictions = []
    for train, test in loo.split(X):
        Xn = [X[i] for i in train]
        Yn = [Y[i] for i in train]
        Xn, mux = center(Xn)
        Xn, wgx = scale(Xn, True)
        Yn = np.asarray(Yn)
        Xout = X[test]
        Yout = Y[test[0]]
        icp.fit(Xn, Yn)
        predictions.append(icp.predict(Xout, significance=0.15))
    predictions = [(x[0]).tolist() for x in predictions]
    predictions = np.asarray(predictions)
    table = np.hstack((predictions, Y))
    print('Error rate: {}'.format(class_mean_errors(predictions, Y, 0.15)))
    print('Class one: ', class_one_c(predictions, Y, 0.15))
    return icp
Ejemplo n.º 3
0
    def test_icp_classification_tree(self):
        # -----------------------------------------------------------------------------
        # Setup training, calibration and test indices
        # -----------------------------------------------------------------------------
        data = load_iris()

        idx = np.random.permutation(data.target.size)
        train = idx[:int(idx.size / 3)]
        calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
        test = idx[int(2 * idx.size / 3):]

        # -----------------------------------------------------------------------------
        # Train and calibrate
        # -----------------------------------------------------------------------------
        icp = IcpClassifier(
            ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()),
                         MarginErrFunc()))
        icp.fit(data.data[train, :], data.target[train])
        icp.calibrate(data.data[calibrate, :], data.target[calibrate])

        # -----------------------------------------------------------------------------
        # Predict
        # -----------------------------------------------------------------------------
        prediction = icp.predict(data.data[test, :], significance=0.1)
        header = np.array(["c0", "c1", "c2", "Truth"])
        table = np.vstack([prediction.T, data.target[test]]).T
        df = pd.DataFrame(np.vstack([header, table]))
        print(df)
Ejemplo n.º 4
0
def CF_QualCal(X, Y, estimator):
    """Qualitative conformal predictor calibration"""

    acp = AggregatedCp(
        IcpClassifier(
            ClassifierNc(ClassifierAdapter(estimator), MarginErrFunc())),
        BootstrapSampler())

    acp.fit(X, Y)

    # X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=0.30, random_state=42)
    # icp = IcpClassifier(ClassifierNc(ClassifierAdapter(estimator),
    # MarginErrFunc()))

    # icp.fit(X_train, y_train)
    # icp.calibrate(X_test, y_test)
    return acp
Ejemplo n.º 5
0
    def SelectLabeled(self, labeled_data_x, labeled_data_y, unlabeled_data_x):
        # just append train data to labeled data
        labeled_x = np.concatenate(
            (self.init_labeled_data_x, labeled_data_x
             )) if len(labeled_data_x) > 0 else self.init_labeled_data_x
        labeled_y = np.concatenate(
            (self.init_labeled_data_y, labeled_data_y
             )) if len(labeled_data_x) > 0 else self.init_labeled_data_y
        #

        # create model to predict with confidence and credibility
        model = ClassifierAdapter(
            DecisionTreeClassifier(random_state=config.random_state,
                                   min_samples_leaf=config.min_samples_leaf))
        model_acp = AggregatedCp(
            IcpClassifier(ClassifierNc(model), smoothing=True),
            RandomSubSampler())
        model_acp.fit(labeled_x, labeled_y)
        s = model_acp.predict(unlabeled_data_x)
        # print(s)
        #

        # selection method
        labeled_ind = [
            i for i, a in enumerate(s)
            if 1 - a.min() > config.confidence and a.max() > config.credibility
        ]
        unlabeled_ind = [
            i for i, a in enumerate(s)
            if 1 - a.min() < config.confidence or a.max() < config.credibility
        ]

        labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x = \
            np.take(unlabeled_data_x, labeled_ind, axis=0), np.take(s.argmax(axis=1), labeled_ind), np.take(
                unlabeled_data_x, unlabeled_ind, axis=0)
        #

        return labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x
Ejemplo n.º 6
0
    def SelectLabeled(self, labeled_data_x, labeled_data_y, unlabeled_data_x):
        # just append train data to labeled data
        labeled_x = np.concatenate((self.init_labeled_data_x, labeled_data_x)) \
            if len(labeled_data_x) > 0 else self.init_labeled_data_x
        labeled_y = np.concatenate((self.init_labeled_data_y, labeled_data_y)) \
            if len(labeled_data_x) > 0 else self.init_labeled_data_y
        #

        # create model to predict with confidence and credibility
        model = ClassifierAdapter(
            DecisionTreeClassifier(random_state=config.random_state,
                                   min_samples_leaf=config.min_samples_leaf))
        nc = ClassifierNc(model, MarginErrFunc())
        model_icp = IcpClassifier(nc, smoothing=True)
        model_ccp = CrossConformalClassifier(model_icp)
        model_ccp.fit(labeled_x, labeled_y)

        s = model_ccp.predict(unlabeled_data_x)
        # print(s)
        #

        # selection method
        labeled_ind = [
            i for i, a in enumerate(s)
            if a.max() > config.confidence and 1 - a.min() > config.credibility
        ]
        unlabeled_ind = [
            i for i, a in enumerate(s)
            if a.max() < config.confidence or 1 - a.min() < config.credibility
        ]

        labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x = \
            np.take(unlabeled_data_x, labeled_ind, axis=0), np.take(s.argmax(axis=1), labeled_ind), np.take(
                unlabeled_data_x, unlabeled_ind, axis=0)
        #

        return labeled_unlabeled_x, labeled_unlabeled_y, unlabeled_data_x
Ejemplo n.º 7
0
    def build(self):
        '''Build a new qualitative GNB model with the X and Y numpy matrices'''
        if self.failed:
            return False, "Error initiating model"

        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        if self.cv:
            self.cv = getCrossVal(self.cv, 46, self.n, self.p)

        if self.quantitative:
            print("GNB only applies to qualitative data")
            return False, "GNB only applies to qualitative data"

        else:
            print("Building GaussianNB model")
            print(self.estimator_parameters)
            self.estimator = GaussianNB(**self.estimator_parameters)
            results.append(('model', 'model type', 'GNB qualitative'))

        if self.conformal:
            self.conformal_pred = AggregatedCp(
                IcpClassifier(
                    ClassifierNc(ClassifierAdapter(self.estimator),
                                 MarginErrFunc())), BootstrapSampler())
            self.conformal_pred.fit(X, Y)
            # overrides non-conformal
            results.append(
                ('model', 'model type', 'conformal GNB qualitative'))

        self.estimator.fit(X, Y)
        return True, results
Ejemplo n.º 8
0
    def test_confidence_credibility(self):

        data = load_iris()
        x, y = data.data, data.target

        for i, y_ in enumerate(np.unique(y)):
            y[y == y_] = i

        n_instances = y.size
        idx = np.random.permutation(n_instances)

        train_idx = idx[:int(n_instances / 3)]
        cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)]
        test_idx = idx[2 * int(n_instances / 3):]

        nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier()))
        icp = IcpClassifier(nc)

        icp.fit(x[train_idx, :], y[train_idx])
        icp.calibrate(x[cal_idx, :], y[cal_idx])

        print(
            pd.DataFrame(icp.predict_conf(x[test_idx, :]),
                         columns=["Label", "Confidence", "Credibility"]))
from sklearn.datasets import load_iris

from nonconformist.icp import IcpClassifier
from nonconformist.nc import ProbEstClassifierNc, margin

# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = load_iris()

idx = np.random.permutation(data.target.size)
train = idx[: int(idx.size / 3)]
calibrate = idx[int(idx.size / 3) : int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3) :]

# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
icp = IcpClassifier(ProbEstClassifierNc(DecisionTreeClassifier(), margin))
icp.fit(data.data[train, :], data.target[train])
icp.calibrate(data.data[calibrate, :], data.target[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(data.data[test, :], significance=0.1)
header = np.array(["c0", "c1", "c2", "Truth"])
table = np.vstack([prediction.T, data.target[test]]).T
df = pd.DataFrame(np.vstack([header, table]))
print(df)
Ejemplo n.º 10
0
    def build(self):
        '''Build a new XGBOOST model with the X and Y numpy matrices '''

        try:
            from xgboost.sklearn import XGBClassifier
            from xgboost.sklearn import XGBRegressor
        except Exception as e:
            return False,  'XGboost not found, please revise your environment'

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):

            LOG.info("Optimizing XGBOOST estimator")
            
            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.estimator = XGBRegressor(
                                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    results.append(('model','model type','XGBOOST quantitative (optimized)'))
                else:
                    self.estimator = XGBClassifier(
                                        **self.estimator_parameters)
                    params = self.estimator.get_params()
                    params['num_class'] = 2
                    self.optimize(X, Y, self.estimator,
                                  self.tune_parameters)
                    results.append(('model','model type','XGBOOST qualitative (optimized)'))

            except Exception as e:
                return False, f'Exception optimizing XGBOOST estimator with exception {e}'
            
        else:
            try:
                if self.param.getVal('quantitative'):

                    LOG.info("Building Quantitative XGBOOST model")
                    # params = {
                    #     'objective': 'reg:squarederror',
                    #     'missing': -99.99999,
                    #     # 'max_depth': 20,
                    #     # 'learning_rate': 1.0,
                    #     # 'silent': 1,
                    #     # 'n_estimators': 25
                    #     }
                    # self.estimator = XGBRegressor(**params)
                    self.estimator = XGBRegressor(**self.estimator_parameters)
                    results.append(('model', 'model type', 'XGBOOST quantitative'))
                else:

                    LOG.info("Building Qualitative XGBOOST model")
                    # params = {
                    #     'objective': 'binary:logistic',
                    #      'max_depth': 3,
                    #      #'learning_rate': 0.7,
                    #      #'silent': 1,
                    #      'n_estimators': 100
                    #     }
                    self.estimator = XGBClassifier(**self.estimator_parameters)
                    results.append(('model', 'model type', 'XGBOOST qualitative'))

                self.estimator.fit(X, Y)
                print(self.estimator)

            except Exception as e:
                raise e
                return False, f'Exception building XGBOOST estimator with exception {e}'

        self.estimator_temp = copy(self.estimator)

        if not self.param.getVal('conformal'):
            return True, results
        # Create the conformal estimator
        try:
            # Conformal regressor
            if self.param.getVal('quantitative'):

                LOG.info("Building conformal Quantitative XGBOOST model")

                underlying_model = RegressorAdapter(self.estimator_temp)
                #normalizing_model = RegressorAdapter(
                    #KNeighborsRegressor(n_neighbors=5))
                normalizing_model = RegressorAdapter(self.estimator_temp)
                normalizer = RegressorNormalizer(
                                underlying_model,
                                normalizing_model,
                                AbsErrorErrFunc())
                nc = RegressorNc(underlying_model,
                                    AbsErrorErrFunc(),
                                    normalizer)

                # self.conformal_pred = AggregatedCp(IcpRegressor
                # (RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.estimator = AggregatedCp(IcpRegressor(nc),
                                                BootstrapSampler())

                self.estimator.fit(X, Y)
                results.append(('model', 'model type', 'conformal XGBOOST quantitative'))

            # Conformal classifier
            else:

                LOG.info("Building conformal Qualitative XGBOOST model")

                self.estimator = AggregatedCp(
                                    IcpClassifier(
                                        ClassifierNc(
                                            ClassifierAdapter(self.estimator_temp),
                                            MarginErrFunc()
                                        )
                                    ),
                                    BootstrapSampler())

                # Fit estimator to the data
                self.estimator.fit(X, Y)
                results.append(('model', 'model type', 'conformal XGBOOST qualitative'))

        except Exception as e:
            raise e
            return False, f'Exception building conformal XGBOOST estimator with exception {e}'

        return True, results



## Overriding of parent methods

    # def CF_quantitative_validation(self):
    #     ''' performs validation for conformal quantitative models '''

      

    # def CF_qualitative_validation(self):
    #     ''' performs validation for conformal qualitative models '''


    # def quantitativeValidation(self):
    #     ''' performs validation for quantitative models '''

    # def qualitativeValidation(self):
    #     ''' performs validation for qualitative models '''


    # def validate(self):
    #     ''' Validates the model and computes suitable model quality scoring values'''


    # def optimize(self, X, Y, estimator, tune_parameters):
    #     ''' optimizes a model using a grid search over a range of values for diverse parameters'''


    # def regularProject(self, Xb, results):
    #     ''' projects a collection of query objects in a regular model, for obtaining predictions '''


    # def conformalProject(self, Xb, results):
    #     ''' projects a collection of query objects in a conformal model, for obtaining predictions '''


    # def project(self, Xb, results):
    #     ''' Uses the X matrix provided as argument to predict Y'''
Ejemplo n.º 11
0
    def build(self):
        '''Build a new DL model with the X and Y numpy matrices '''

        try:
            from keras.wrappers.scikit_learn import KerasClassifier
            from keras.wrappers.scikit_learn import KerasRegressor
        except Exception as e:
            return False, 'Keras not found, please revise your environment'

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):

            LOG.info("Optimizing Keras estimator")

            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.estimator = KerasRegressor(
                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    results.append(('model', 'model type',
                                    'KERAS quantitative (optimized)'))
                else:
                    self.estimator = KerasClassifier(
                        **self.estimator_parameters)
                    #params = self.estimator.get_params()
                    #params['num_class'] = 2
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    results.append(('model', 'model type',
                                    'KERAS qualitative (optimized)'))

            except Exception as e:
                return False, f'Exception optimizing KERAS estimator with exception {e}'

        else:
            try:
                if self.param.getVal('quantitative'):

                    LOG.info("Building Quantitative KERAS mode")
                    self.estimator = KerasRegressor(
                        build_fn=self.create_model,
                        **self.estimator_parameters,
                        verbose=0)
                    results.append(
                        ('model', 'model type', 'Keras quantitative'))
                else:

                    LOG.info("Building Qualitative Keras model")
                    self.estimator = KerasClassifier(
                        build_fn=self.create_model,
                        dim=self.X.shape[1],
                        **self.estimator_parameters,
                        verbose=0)
                    results.append(
                        ('model', 'model type', 'Keras qualitative'))

                self.estimator.fit(X, Y)
                print(self.estimator)

            except Exception as e:
                raise e
                return False, f'Exception building Keras estimator with exception {e}'

        self.estimator_temp = clone(self.estimator)

        if not self.param.getVal('conformal'):
            return True, results
        # Create the conformal estimator
        try:
            # Conformal regressor
            if self.param.getVal('quantitative'):

                LOG.info("Building conformal Quantitative Keras model")

                underlying_model = RegressorAdapter(self.estimator_temp)
                normalizing_model = RegressorAdapter(
                    KNeighborsRegressor(n_neighbors=15))
                # normalizing_model = RegressorAdapter(self.estimator_temp)
                normalizer = RegressorNormalizer(underlying_model,
                                                 normalizing_model,
                                                 AbsErrorErrFunc())
                nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                 normalizer)

                # self.conformal_pred = AggregatedCp(IcpRegressor
                # (RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.estimator = AggregatedCp(IcpRegressor(nc),
                                              BootstrapSampler())

                self.estimator.fit(X, Y)
                results.append(
                    ('model', 'model type', 'conformal Keras quantitative'))

            # Conformal classifier
            else:

                LOG.info("Building conformal Qualitative Keras model")

                self.estimator = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator_temp),
                                     MarginErrFunc())), BootstrapSampler())

                # Fit estimator to the data
                print('build finished')
                self.estimator.fit(X, Y)
                results.append(
                    ('model', 'model type', 'conformal Keras qualitative'))

        except Exception as e:
            raise e
            return False, f'Exception building conformal Keras estimator with exception {e}'

        return True, []
Ejemplo n.º 12
0
def split_data(data, n_train, n_test):
    n_train = n_train * len(data) // (n_train + n_test)
    n_test = len(data) - n_train
    ind = np.random.permutation(len(data))
    return data[ind[:n_train]], data[ind[n_train:n_train + n_test]]


#data = Orange.data.Table("../data/usps.tab")
data = Orange.data.Table("iris")

for sig in np.linspace(0.0, 0.4, 11):
    errs, szs = [], []
    for rep in range(10):
        #train, test = split_data(data, 7200, 2098)
        train, test = split_data(data, 2, 1)
        train, calib = split_data(train, 2, 1)

        #icp = IcpClassifier(ProbEstClassifierNc(DecisionTreeClassifier(), margin))
        icp = IcpClassifier(ProbEstClassifierNc(LogisticRegression(), margin))
        #icp = ICP()
        icp.fit(train.X, train.Y)
        icp.calibrate(calib.X, calib.Y)
        pred = icp.predict(test.X, significance=sig)

        acc = sum(p[y] for p, y in zip(pred, test.Y)) / len(pred)
        err = 1 - acc
        sz = sum(sum(p) for p in pred) / len(pred)
        errs.append(err)
        szs.append(sz)
    print(sig, np.mean(errs), np.mean(szs))
Ejemplo n.º 13
0
#             'ACP-CrossSampler'      : AggregatedCp(
#                                         IcpClassifier(
#                                             ClassifierNc(
#                                                 ClassifierAdapter(gbm))),
#                                         CrossSampler())
#           #   'ACP-BootstrapSampler'  : AggregatedCp(
#           #                               IcpClassifier(
#           #                                   ClassifierNc(
#           #                                       ClassifierAdapter(DecisionTreeClassifier()))),
#           #                               BootstrapSampler()),
#           #   'CCP'                   : CrossConformalClassifier(
#           #                               IcpClassifier(
#           #                                   ClassifierNc(
#           #                                       ClassifierAdapter(DecisionTreeClassifier())))),
#           #   'BCP'                   : BootstrapConformalClassifier(
#           #                               IcpClassifier(
#           #                                   ClassifierNc(
#           #                                       ClassifierAdapter(DecisionTreeClassifier()))))
#           }


model = AggregatedCp(
            IcpClassifier(
                ClassifierNc(
                    ClassifierAdapter(gbm))),
            CrossSampler())
model.fit(x_train, y_train)
print('predicting')
prediction = model.predict(x_test, significance=None)
np.savetxt(os.getcwd()+"/prediction/prediction_acp_cross_1.txt", prediction, delimiter=',')
Ejemplo n.º 14
0
idx = np.random.permutation(data.target.size)
train = idx[: int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3) :]

truth = data.target[test].reshape(-1, 1)
columns = ["C-{}".format(i) for i in np.unique(data.target)] + ["truth"]
significance = 0.1

# -----------------------------------------------------------------------------
# Define models
# -----------------------------------------------------------------------------

models = {
    "ACP-RandomSubSampler": AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))),
        RandomSubSampler(),
    ),
    "ACP-CrossSampler": AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))),
        CrossSampler(),
    ),
    "ACP-BootstrapSampler": AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))),
        BootstrapSampler(),
    ),
    "CCP": CrossConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier())))
    ),
    "BCP": BootstrapConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(DecisionTreeClassifier())))
Ejemplo n.º 15
0
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc

data = load_iris()
x, y = data.data, data.target

for i, y_ in enumerate(np.unique(y)):
    y[y == y_] = i

n_instances = y.size
idx = np.random.permutation(n_instances)

train_idx = idx[:int(n_instances / 3)]
cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)]
test_idx = idx[2 * int(n_instances / 3):]

nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier()))
icp = IcpClassifier(nc)

icp.fit(x[train_idx, :], y[train_idx])
icp.calibrate(x[cal_idx, :], y[cal_idx])

print(
    pd.DataFrame(icp.predict_conf(x[test_idx, :]),
                 columns=["Label", "Confidence", "Credibility"]))
Ejemplo n.º 16
0
# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = Orange.data.Table('iris')
X, y = data.X, data.Y

idx = np.random.permutation(y.size)
train = idx[:idx.size // 3]
calibrate = idx[idx.size // 3:2 * idx.size // 3]
test = idx[2 * idx.size // 3:]

# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
icp = IcpClassifier(
    ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability))
icp.fit(X[train, :], y[train])
icp.calibrate(X[calibrate, :], y[calibrate])

ccp = CrossConformalClassifier(
    IcpClassifier(
        ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability)))
ccp.fit(X[train, :], y[train])

acp = AggregatedCp(
    IcpClassifier(
        ProbEstClassifierNc(DecisionTreeClassifier(), inverse_probability)),
    CrossSampler())
acp.fit(X[train, :], y[train])

# -----------------------------------------------------------------------------
from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc, MarginErrFunc

# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = load_iris()

idx = np.random.permutation(data.target.size)
train = idx[:int(idx.size / 3)]
calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
icp = IcpClassifier(
    ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()), MarginErrFunc()))
icp.fit(data.data[train, :], data.target[train])
icp.calibrate(data.data[calibrate, :], data.target[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(data.data[test, :], significance=0.1)
header = np.array(["c0", "c1", "c2", "Truth"])
table = np.vstack([prediction.T, data.target[test]]).T
df = pd.DataFrame(np.vstack([header, table]))
print(df)
Ejemplo n.º 18
0
Archivo: RF.py Proyecto: e7dal/flame
    def build(self):
        '''Build a new RF model with the X and Y numpy matrices '''

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))
        results.append(('model', 'model type', 'RF'))

        conformal = self.param.getVal('conformal')
        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):

            LOG.info("Optimizing RF estimator")

            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.estimator = RandomForestRegressor(
                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    # results.append(('model','model type','RF quantitative (optimized)'))
                else:
                    self.estimator = RandomForestClassifier(
                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    # results.append(('model','model type','RF qualitative (optimized)'))

            except Exception as e:
                return False, f'Exception optimizing RF estimator with exception {e}'

        else:
            try:
                if self.param.getVal('quantitative'):

                    self.estimator = RandomForestRegressor(
                        **self.estimator_parameters)

                    if not conformal:
                        LOG.info("Building Quantitative RF model")
                        # results.append(('model', 'model type', 'RF quantitative'))
                else:

                    self.estimator = RandomForestClassifier(
                        **self.estimator_parameters)

                    if not conformal:
                        LOG.info("Building Qualitative RF model")
                        # results.append(('model', 'model type', 'RF qualitative'))

                self.estimator.fit(X, Y)

            except Exception as e:
                return False, f'Exception building RF estimator with exception {e}'

        if not conformal:
            return True, results

        self.estimator_temp = copy(self.estimator)

        # Create the conformal estimator
        try:
            # Conformal regressor
            if self.param.getVal('quantitative'):
                conformal_settings = self.param.getDict('conformal_settings')
                LOG.info("Building conformal Quantitative RF model")

                underlying_model = RegressorAdapter(self.estimator_temp)
                self.normalizing_model = RegressorAdapter(
                    KNeighborsRegressor(
                        n_neighbors=conformal_settings['KNN_NN']))
                # normalizing_model = RegressorAdapter(self.estimator_temp)
                normalizer = RegressorNormalizer(underlying_model,
                                                 copy(self.normalizing_model),
                                                 AbsErrorErrFunc())
                nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                 normalizer)

                # self.conformal_pred = AggregatedCp(IcpRegressor
                # (RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.estimator = AggregatedCp(IcpRegressor(nc),
                                              BootstrapSampler())

                self.estimator.fit(X, Y)
                # results.append(('model', 'model type', 'conformal RF quantitative'))

            # Conformal classifier
            else:

                LOG.info("Building conformal Qualitative RF model")

                self.estimator = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator_temp),
                                     MarginErrFunc())), BootstrapSampler())

                # Fit estimator to the data
                self.estimator.fit(X, Y)
                # results.append(('model', 'model type', 'conformal RF qualitative'))

        except Exception as e:
            return False, f'Exception building conformal RF estimator with exception {e}'

        return True, results


## Overriding of parent methods

# def CF_quantitative_validation(self):
#     ''' performs validation for conformal quantitative models '''

# def CF_qualitative_validation(self):
#     ''' performs validation for conformal qualitative models '''

# def quantitativeValidation(self):
#     ''' performs validation for quantitative models '''

# def qualitativeValidation(self):
#     ''' performs validation for qualitative models '''

# def validate(self):
#     ''' Validates the model and computes suitable model quality scoring values'''

# def optimize(self, X, Y, estimator, tune_parameters):
#     ''' optimizes a model using a grid search over a range of values for diverse parameters'''

# def regularProject(self, Xb, results):
#     ''' projects a collection of query objects in a regular model, for obtaining predictions '''

# def conformalProject(self, Xb, results):
#     ''' projects a collection of query objects in a conformal model, for obtaining predictions '''

# def project(self, Xb, results):
#     ''' Uses the X matrix provided as argument to predict Y'''
Ejemplo n.º 19
0
from nonconformist.nc import MarginErrFunc
from nonconformist.nc import ClassifierNc, RegressorNc, RegressorNormalizer
from nonconformist.nc import AbsErrorErrFunc, SignErrorErrFunc

from nonconformist.evaluation import cross_val_score
from nonconformist.evaluation import ClassIcpCvHelper, RegIcpCvHelper
from nonconformist.evaluation import class_avg_c, class_mean_errors
from nonconformist.evaluation import reg_mean_errors, reg_median_size

# -----------------------------------------------------------------------------
# Classification
# -----------------------------------------------------------------------------
data = load_iris()

icp = IcpClassifier(
    ClassifierNc(ClassifierAdapter(RandomForestClassifier(n_estimators=100)),
                 MarginErrFunc()))
icp_cv = ClassIcpCvHelper(icp)

scores = cross_val_score(icp_cv,
                         data.data,
                         data.target,
                         iterations=5,
                         folds=5,
                         scoring_funcs=[class_mean_errors, class_avg_c],
                         significance_levels=[0.05, 0.1, 0.2])

print('Classification: iris')
scores = scores.drop(['fold', 'iter'], axis=1)
print(scores.groupby(['significance']).mean())
Ejemplo n.º 20
0
    def test_nc_factory(self):
        def score_model(icp, icp_name, ds, ds_name, scoring_funcs):
            scores = cross_val_score(
                icp,
                ds.data,
                ds.target,
                iterations=10,
                folds=10,
                scoring_funcs=scoring_funcs,
                significance_levels=[0.05, 0.1, 0.2],
            )

            print("\n{}: {}".format(icp_name, ds_name))
            scores = scores.drop(["fold", "iter"], axis=1)
            print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Classification
        # -----------------------------------------------------------------------------
        data = load_iris()

        nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100))
        icp = IcpClassifier(nc)
        icp_cv = ClassIcpCvHelper(icp)
        score_model(icp_cv, "IcpClassifier", data, "iris",
                    [class_mean_errors, class_avg_c])

        # -----------------------------------------------------------------------------
        # Classification (normalized)
        # -----------------------------------------------------------------------------
        data = load_iris()

        nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100),
                                 normalizer_model=KNeighborsRegressor())
        icp = IcpClassifier(nc)
        icp_cv = ClassIcpCvHelper(icp)

        score_model(icp_cv, "IcpClassifier (normalized)", data, "iris",
                    [class_mean_errors, class_avg_c])

        # -----------------------------------------------------------------------------
        # Classification OOB
        # -----------------------------------------------------------------------------
        data = load_iris()

        nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100,
                                                        oob_score=True),
                                 oob=True)
        icp_cv = OobCpClassifier(nc)

        score_model(icp_cv, "IcpClassifier (OOB)", data, "iris",
                    [class_mean_errors, class_avg_c])

        # -----------------------------------------------------------------------------
        # Classification OOB normalized
        # -----------------------------------------------------------------------------
        data = load_iris()

        nc = NcFactory.create_nc(
            RandomForestClassifier(n_estimators=100, oob_score=True),
            oob=True,
            normalizer_model=KNeighborsRegressor(),
        )
        icp_cv = OobCpClassifier(nc)

        score_model(
            icp_cv,
            "IcpClassifier (OOB, normalized)",
            data,
            "iris",
            [class_mean_errors, class_avg_c],
        )

        # -----------------------------------------------------------------------------
        # Regression
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100))
        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        score_model(icp_cv, "IcpRegressor", data, "diabetes",
                    [reg_mean_errors, reg_median_size])

        # -----------------------------------------------------------------------------
        # Regression (normalized)
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100),
                                 normalizer_model=KNeighborsRegressor())
        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        score_model(
            icp_cv,
            "IcpRegressor (normalized)",
            data,
            "diabetes",
            [reg_mean_errors, reg_median_size],
        )

        # -----------------------------------------------------------------------------
        # Regression OOB
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        nc = NcFactory.create_nc(RandomForestRegressor(n_estimators=100,
                                                       oob_score=True),
                                 oob=True)
        icp_cv = OobCpRegressor(nc)

        score_model(icp_cv, "IcpRegressor (OOB)", data, "diabetes",
                    [reg_mean_errors, reg_median_size])

        # -----------------------------------------------------------------------------
        # Regression OOB normalized
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        nc = NcFactory.create_nc(
            RandomForestRegressor(n_estimators=100, oob_score=True),
            oob=True,
            normalizer_model=KNeighborsRegressor(),
        )
        icp_cv = OobCpRegressor(nc)

        score_model(
            icp_cv,
            "IcpRegressor (OOB, normalized)",
            data,
            "diabetes",
            [reg_mean_errors, reg_median_size],
        )
Ejemplo n.º 21
0
    def test_acp_classification_tree(self):

        # -----------------------------------------------------------------------------
        # Experiment setup
        # -----------------------------------------------------------------------------
        data = load_iris()

        idx = np.random.permutation(data.target.size)
        train = idx[:int(2 * idx.size / 3)]
        test = idx[int(2 * idx.size / 3):]

        truth = data.target[test].reshape(-1, 1)
        columns = ["C-{}".format(i)
                   for i in np.unique(data.target)] + ["truth"]
        significance = 0.1

        # -----------------------------------------------------------------------------
        # Define models
        # -----------------------------------------------------------------------------

        models = {
            "ACP-RandomSubSampler":
            AggregatedCp(
                IcpClassifier(
                    ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))),
                RandomSubSampler(),
            ),
            "ACP-CrossSampler":
            AggregatedCp(
                IcpClassifier(
                    ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))),
                CrossSampler(),
            ),
            "ACP-BootstrapSampler":
            AggregatedCp(
                IcpClassifier(
                    ClassifierNc(ClassifierAdapter(DecisionTreeClassifier()))),
                BootstrapSampler(),
            ),
            "CCP":
            CrossConformalClassifier(
                IcpClassifier(
                    ClassifierNc(ClassifierAdapter(
                        DecisionTreeClassifier())))),
            "BCP":
            BootstrapConformalClassifier(
                IcpClassifier(
                    ClassifierNc(ClassifierAdapter(
                        DecisionTreeClassifier())))),
        }

        # -----------------------------------------------------------------------------
        # Train, predict and evaluate
        # -----------------------------------------------------------------------------
        for name, model in models.items():
            model.fit(data.data[train, :], data.target[train])
            prediction = model.predict(data.data[test, :],
                                       significance=significance)
            table = np.hstack((prediction, truth))
            df = pd.DataFrame(table, columns=columns)
            print("\n{}".format(name))
            print("Error rate: {}".format(
                class_mean_errors(prediction, truth, significance)))
            print(df)

        self.assertTrue(True)
Ejemplo n.º 22
0
    train = data.drop(['id'], axis=1)
    train = train.drop(['target'], axis=1).values
    part1 = int(0.7 * len(train))
    for xx in range(1, nmodels + 1):
        modelfile2 = infile + "_nonconf" + "_" + str(xx) + ".model"
        print("Working on model", xx)

        idx = np.random.permutation(int(len(train)))
        print(idx)
        trainset = idx[:part1]
        calset = idx[part1:]

        nc = ProbEstClassifierNc(RandomForestClassifier,
                                 margin,
                                 model_params={'n_estimators': 100})
        icp_norm = IcpClassifier(nc, condition=lambda instance: instance[1])

        icp_norm.fit(train[trainset], target[trainset])
        icp_norm.calibrate(train[calset], target[calset])
        cloudpickle.dump(icp_norm, f)
    f.close()

if mode != 't':
    outfile = predfile + "_nonconf_pred100sum.csv"
    f2 = open(outfile, 'w')
    f2.write('id\tp-value_low_class\tp-value_high_class\tclass\tmodel\n')
    f2.close()

    data = pd.read_csv(predfile, sep='\t', header=0, index_col=None)
    data.loc[data['target'] < 0, 'target'] = 0
    labels = data['id']
Ejemplo n.º 23
0
idx = np.random.permutation(data.target.size)
train = idx[:int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

truth = data.target[test].reshape(-1, 1)
columns = ['C-{}'.format(i) for i in np.unique(data.target)] + ['truth']
significance = 0.1

# -----------------------------------------------------------------------------
# Define models
# -----------------------------------------------------------------------------

models = {
    'ACP-RandomSubSampler':
    AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(
            DecisionTreeClassifier()))), RandomSubSampler()),
    'ACP-CrossSampler':
    AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(
            DecisionTreeClassifier()))), CrossSampler()),
    'ACP-BootstrapSampler':
    AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(
            DecisionTreeClassifier()))), BootstrapSampler()),
    'CCP':
    CrossConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(
            DecisionTreeClassifier())))),
    'BCP':
    BootstrapConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(
Ejemplo n.º 24
0
    def build(self):
        '''Build a new RF model with the X and Y numpy matrices '''

        if self.failed:
            return False

        X = self.X.copy()
        Y = self.Y.copy()

        results = []

        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        if self.cv:
            self.cv = getCrossVal(self.cv,
                                  self.estimator_parameters["random_state"],
                                  self.n, self.p)
        if self.tune:
            if self.quantitative:
                self.optimize(X, Y, RandomForestRegressor(),
                              self.tune_parameters)
                results.append(
                    ('model', 'model type', 'RF quantitative (optimized)'))
            else:
                self.optimize(X, Y, RandomForestClassifier(),
                              self.tune_parameters)
                results.append(
                    ('model', 'model type', 'RF qualitative (optimized)'))
        else:
            if self.quantitative:
                log.info("Building Quantitative RF model")
                self.estimator_parameters.pop('class_weight', None)

                self.estimator = RandomForestRegressor(
                    **self.estimator_parameters)
                results.append(('model', 'model type', 'RF quantitative'))

            else:
                log.info("Building Qualitative RF model")
                self.estimator = RandomForestClassifier(
                    **self.estimator_parameters)
                results.append(('model', 'model type', 'RF qualitative'))

        if self.conformal:
            if self.quantitative:
                underlying_model = RegressorAdapter(self.estimator)
                normalizing_model = RegressorAdapter(
                    KNeighborsRegressor(n_neighbors=5))
                normalizing_model = RegressorAdapter(self.estimator)
                normalizer = RegressorNormalizer(underlying_model,
                                                 normalizing_model,
                                                 AbsErrorErrFunc())
                nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                 normalizer)
                # self.conformal_pred = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.conformal_pred = AggregatedCp(IcpRegressor(nc),
                                                   BootstrapSampler())
                self.conformal_pred.fit(X, Y)
                # overrides non-conformal
                results.append(
                    ('model', 'model type', 'conformal RF quantitative'))

            else:
                self.conformal_pred = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator),
                                     MarginErrFunc())), BootstrapSampler())
                self.conformal_pred.fit(X, Y)
                # overrides non-conformal
                results.append(
                    ('model', 'model type', 'conformal RF qualitative'))

        self.estimator.fit(X, Y)

        return True, results


#### Overriding of parent methods

# def CF_quantitative_validation(self):
#     ''' performs validation for conformal quantitative models '''

# def CF_qualitative_validation(self):
#     ''' performs validation for conformal qualitative models '''

# def quantitativeValidation(self):
#     ''' performs validation for quantitative models '''

# def qualitativeValidation(self):
#     ''' performs validation for qualitative models '''

# def validate(self):
#     ''' Validates the model and computes suitable model quality scoring values'''

# def optimize(self, X, Y, estimator, tune_parameters):
#     ''' optimizes a model using a grid search over a range of values for diverse parameters'''

# def regularProject(self, Xb, results):
#     ''' projects a collection of query objects in a regular model, for obtaining predictions '''

# def conformalProject(self, Xb, results):
#     ''' projects a collection of query objects in a conformal model, for obtaining predictions '''

# def project(self, Xb, results):
#     ''' Uses the X matrix provided as argument to predict Y'''
Ejemplo n.º 25
0
    def build(self):
        '''Build a new SVM model with the X and Y numpy matrices'''

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):
            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.optimize(X, Y, svm.SVR(**self.estimator_parameters),
                                  self.tune_parameters)
                    results.append(('model', 'model type',
                                    'SVM quantitative (optimized)'))

                else:
                    self.optimize(X, Y, svm.SVC(**self.estimator_parameters),
                                  self.tune_parameters)
                    results.append(
                        ('model', 'model type', 'SVM qualitative (optimized)'))
                LOG.debug('SVM estimator optimized')
            except Exception as e:
                LOG.error(f'Exception optimizing SVM'
                          f'estimator with exception {e}')
        else:
            try:
                LOG.info("Building  SVM model")
                if self.param.getVal('quantitative'):
                    LOG.info("Building Quantitative SVM-R model")
                    self.estimator = svm.SVR(**self.estimator_parameters)
                    results.append(('model', 'model type', 'SVM quantitative'))
                else:
                    self.estimator = svm.SVC(**self.estimator_parameters)
                    results.append(('model', 'model type', 'SVM qualitative'))
            except Exception as e:
                LOG.error(f'Exception building SVM'
                          f'estimator with exception {e}')
        self.estimator.fit(X, Y)
        self.estimator_temp = copy(self.estimator)
        if self.param.getVal('conformal'):
            try:
                LOG.info("Building aggregated conformal SVM model")
                if self.param.getVal('quantitative'):
                    underlying_model = RegressorAdapter(self.estimator_temp)
                    # normalizing_model = RegressorAdapter(
                    # KNeighborsRegressor(n_neighbors=5))
                    normalizing_model = RegressorAdapter(self.estimator_temp)
                    normalizer = RegressorNormalizer(underlying_model,
                                                     normalizing_model,
                                                     AbsErrorErrFunc())
                    nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                     normalizer)
                    # self.conformal_pred = AggregatedCp(IcpRegressor(
                    # RegressorNc(RegressorAdapter(self.estimator))),
                    #                                   BootstrapSampler())

                    self.estimator = AggregatedCp(IcpRegressor(nc),
                                                  BootstrapSampler())
                    self.estimator.fit(X, Y)
                    # overrides non-conformal
                    results.append(
                        ('model', 'model type', 'conformal SVM quantitative'))

                else:
                    self.estimator = AggregatedCp(
                        IcpClassifier(
                            ClassifierNc(
                                ClassifierAdapter(self.estimator_temp),
                                MarginErrFunc())), BootstrapSampler())
                    self.estimator.fit(X, Y)
                    # overrides non-conformal
                    results.append(
                        ('model', 'model type', 'conformal SVM qualitative'))
            except Exception as e:
                LOG.error(f'Exception building aggregated conformal SVM '
                          f'estimator with exception {e}')
        # Fit estimator to the data
        return True, results
Ejemplo n.º 26
0
    def test_cross_validation(self):
        # -----------------------------------------------------------------------------
        # Classification
        # -----------------------------------------------------------------------------
        data = load_iris()

        icp = IcpClassifier(
            ClassifierNc(
                ClassifierAdapter(RandomForestClassifier(n_estimators=100)),
                MarginErrFunc()))
        icp_cv = ClassIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[class_mean_errors, class_avg_c],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Classification: iris")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, absolute error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        icp = IcpRegressor(
            RegressorNc(
                RegressorAdapter(RandomForestRegressor(n_estimators=100)),
                AbsErrorErrFunc()))
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Absolute error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, normalized absolute error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        underlying_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer = RegressorNormalizer(underlying_model, normalizer_model,
                                         AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)

        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Normalized absolute error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, normalized signed error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        icp = IcpRegressor(
            RegressorNc(
                RegressorAdapter(RandomForestRegressor(n_estimators=100)),
                SignErrorErrFunc()))
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Signed error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, signed error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        underlying_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))

        # The normalization model can use a different error function than is
        # used to measure errors on the underlying model
        normalizer = RegressorNormalizer(underlying_model, normalizer_model,
                                         AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, SignErrorErrFunc(), normalizer)

        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Normalized signed error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())
Ejemplo n.º 27
0
    def CF_qualitative_validation(self):
        ''' performs validation for conformal qualitative models '''

        # Make a copy of original matrices.
        X = self.X.copy()
        Y = self.Y.copy()

        # Total number of class 0 correct predictions.
        c0_correct_all = 0
        # Total number of class 0 incorrect predictions.
        c0_incorrect_all = 0
        # Total number of class 1 correct predictions.
        c1_correct_all = 0
        # Total number of class 1 incorrect predictions
        c1_incorrect_all = 0
        # Total number of instances out of the applicability domain.
        not_predicted_all = 0

        info = []

        kf = KFold(n_splits=5, shuffle=True, random_state=46)
        # Copy Y vector to use it as template to assign predictions
        Y_pred = copy.copy(Y).tolist()
        try:
            for train_index, test_index in kf.split(X):
                # Generate training and test sets
                X_train, X_test = X[train_index], X[test_index]
                Y_train, Y_test = Y[train_index], Y[test_index]
                # Create the aggregated conformal classifier.
                conformal_pred = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator_temp),
                                     MarginErrFunc())), BootstrapSampler())
                # Fit the conformal classifier to the data
                conformal_pred.fit(X_train, Y_train)
                # Perform prediction on test set
                prediction = conformal_pred.predict(
                    X_test, self.param.getVal('conformalSignificance'))
                # Assign the prediction the correct index.
                for index, el in enumerate(test_index):
                    Y_pred[el] = prediction[index]
            # Iterate over the prediction and check the result
            for i in range(len(Y_pred)):
                real = float(Y[i])
                predicted = Y_pred[i]
                if predicted[0] != predicted[1]:
                    if real == 0 and predicted[0] == True:
                        c0_correct_all += 1
                    if real == 0 and predicted[1] == True:
                        c0_incorrect_all += 1
                    if real == 1 and predicted[1] == True:
                        c1_correct_all += 1
                    if real == 1 and predicted[0] == True:
                        c1_incorrect_all += 1
                else:
                    not_predicted_all += 1

        except Exception as e:
            LOG.error(f'Qualitative conformal validation'
                      f' failed with exception: {e}')
            raise e
        # Get the mean confusion matrix.
        self.TN = c0_correct_all
        self.FP = c0_incorrect_all
        self.TP = c1_correct_all
        self.FN = c1_incorrect_all
        not_predicted_all = not_predicted_all

        info.append(('TP', 'True positives in cross-validation', self.TP))
        info.append(('TN', 'True negatives in cross-validation', self.TN))
        info.append(('FP', 'False positives in cross-validation', self.FP))
        info.append(('FN', 'False negatives in cross-validation', self.FN))

        # Compute sensitivity, specificity and MCC
        try:
            self.sensitivity = (self.TP / (self.TP + self.FN))
        except Exception as e:
            LOG.error(f'Failed to compute sensibility with' f'exception {e}')
            self.sensitivity = '-'
        try:
            self.specificity = (self.TN / (self.TN + self.FP))
        except Exception as e:
            LOG.error(f'Failed to compute specificity with' f'exception {e}')
            self.specificity = '-'
        try:
            # Compute Matthews Correlation Coefficient
            self.mcc = (((self.TP * self.TN) - (self.FP * self.FN)) / np.sqrt(
                (self.TP + self.FP) * (self.TP + self.FN) *
                (self.TN + self.FP) * (self.TN + self.FN)))
        except Exception as e:
            LOG.error(f'Failed to compute Mathews Correlation Coefficient'
                      f'exception {e}')
            self.mcc = '-'

        info.append(('Sensitivity', 'Sensitivity in cross-validation',
                     self.sensitivity))
        info.append(('Specificity', 'Specificity in cross-validation',
                     self.specificity))
        info.append(
            ('MCC', 'Matthews Correlation Coefficient in cross-validation',
             self.mcc))
        try:
            # Compute coverage (% of compounds inside the applicability domain)
            self.conformal_coverage = (
                self.TN + self.FP + self.TP + self.FN) / (
                    (self.TN + self.FP + self.TP + self.FN) +
                    not_predicted_all)
        except Exception as e:
            LOG.error(f'Failed to compute conformal coverage with'
                      f'exception {e}')
            self.conformal_coverage = '-'

        try:
            # Compute accuracy (% of correct predictions)
            self.conformal_accuracy = (
                float(self.TN + self.TP) /
                float(self.FP + self.FN + self.TN + self.TP))
        except Exception as e:
            LOG.error(f'Failed to compute conformal accuracy with'
                      f'exception {e}')
            self.conformal_accuracy = '-'

        info.append(('Conformal_coverage', 'Conformal coverage',
                     self.conformal_coverage))
        info.append(('Conformal_accuracy', 'Conformal accuracy',
                     self.conformal_accuracy))

        results = {}
        results['quality'] = info
        #results ['classes'] = prediction
        return True, results
Ejemplo n.º 28
0
classification_method = DecisionTreeClassifier()
file_name = 'decision_tree.xls'

ACP_Random = []
ACP_Cross = []
ACP_Boot = []
CCP = []
BCP = []
# -----------------------------------------------------------------------------
# Define models
# -----------------------------------------------------------------------------

models = {
    'ACP-RandomSubSampler':
    AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))),
        RandomSubSampler()),
    'ACP-CrossSampler':
    AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))),
        CrossSampler()),
    'ACP-BootstrapSampler':
    AggregatedCp(
        IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))),
        BootstrapSampler()),
    'CCP':
    CrossConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method)))),
    'BCP':
    BootstrapConformalClassifier(
        IcpClassifier(ClassifierNc(ClassifierAdapter(classification_method))))
Ejemplo n.º 29
0
        scoring_funcs=scoring_funcs,
        significance_levels=[0.05, 0.1, 0.2],
    )

    print("\n{}: {}".format(icp_name, ds_name))
    scores = scores.drop(["fold", "iter"], axis=1)
    print(scores.groupby(["significance"]).mean())


# -----------------------------------------------------------------------------
# Classification
# -----------------------------------------------------------------------------
data = load_iris()

nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100))
icp = IcpClassifier(nc)
icp_cv = ClassIcpCvHelper(icp)
score_model(icp_cv, "IcpClassifier", data, "iris",
            [class_mean_errors, class_avg_c])

# -----------------------------------------------------------------------------
# Classification (normalized)
# -----------------------------------------------------------------------------
data = load_iris()

nc = NcFactory.create_nc(RandomForestClassifier(n_estimators=100),
                         normalizer_model=KNeighborsRegressor())
icp = IcpClassifier(nc)
icp_cv = ClassIcpCvHelper(icp)

score_model(icp_cv, "IcpClassifier (normalized)", data, "iris",
Ejemplo n.º 30
0
import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

from nonconformist.base import ClassifierAdapter
from nonconformist.icp import IcpClassifier
from nonconformist.nc import ClassifierNc

data = load_iris()
x, y = data.data, data.target

for i, y_ in enumerate(np.unique(y)):
	y[y == y_] = i

n_instances = y.size
idx = np.random.permutation(n_instances)

train_idx = idx[:int(n_instances / 3)]
cal_idx = idx[int(n_instances / 3):2 * int(n_instances / 3)]
test_idx = idx[2 * int(n_instances / 3):]

nc = ClassifierNc(ClassifierAdapter(RandomForestClassifier()))
icp = IcpClassifier(nc)

icp.fit(x[train_idx, :], y[train_idx])
icp.calibrate(x[cal_idx, :], y[cal_idx])


print(pd.DataFrame(icp.predict_conf(x[test_idx, :]),
				   columns=['Label', 'Confidence', 'Credibility']))
    def ccp_predict(self, data_lbld, data_unlbld, new_lbld):

        # Create SMOTE instance for class rebalancing
        smote = SMOTE(random_state=self.random_state)

        # Create instance of classifier
        classifier_y = self.classifiers['classifier_y']
        parameters_y = self.clf_parameters['classifier_y']

        clf = classifier_y.set_params(**parameters_y)

        X = data_lbld.iloc[:, :-2]
        y = data_lbld.iloc[:, -1]

        X_new = new_lbld.iloc[:, :-2]
        y_new = new_lbld.iloc[:, -1]

        X = X.append(X_new, sort=False)
        y = y.append(y_new)

        X_unlbld = data_unlbld.iloc[:, :-2]

        sss = StratifiedKFold(n_splits=5, random_state=self.random_state)
        sss.get_n_splits(X, y)

        p_values = []

        for train_index, calib_index in sss.split(X, y):
            X_train, X_calib = X.iloc[train_index], X.iloc[calib_index]
            y_train, y_calib = y.iloc[train_index], y.iloc[calib_index]

            if self.rebalancing_parameters['SMOTE_y']:
                X_train, y_train = smote.fit_resample(X_train, y_train)
                clf.fit(X_train[:, :-1], y_train, sample_weight=X_train[:, -1])
            else:
                clf.fit(X_train.iloc[:, :-1],
                        y_train,
                        sample_weight=X_train.iloc[:, -1])

            nc = NcFactory.create_nc(clf, MarginErrFunc())
            icp = IcpClassifier(nc)

            if self.rebalancing_parameters['SMOTE_y']:
                icp.fit(X_train[:, :-1], y_train)
            else:
                icp.fit(X_train.iloc[:, :-1].values, y_train)

            icp.calibrate(X_calib.iloc[:, :-1].values, y_calib)

            # Predict confidences for validation sample and unlabeled sample
            p_values.append(
                icp.predict(X_unlbld.iloc[:, :-1].values, significance=None))

        mean_p_values = np.array(p_values).mean(axis=0)
        ccp_predictions = pd.DataFrame(mean_p_values,
                                       columns=['mean_p_0', 'mean_p_1'])
        ccp_predictions["credibility"] = [
            row.max() for _, row in ccp_predictions.iterrows()
        ]
        ccp_predictions["confidence"] = [
            1 - row.min() for _, row in ccp_predictions.iterrows()
        ]

        ccp_predictions.index = X_unlbld.index

        return ccp_predictions
Ejemplo n.º 32
0
    def CF_qualitative_validation(self):
        ''' performs validation for conformal qualitative models '''

        # Make a copy of original matrices.
        X = self.X.copy()
        Y = self.Y.copy()

        # Number of external validations for the
        # aggregated conformal estimator.
        seeds = [5, 7, 35]
        # Total number of class 0 correct predictions.
        c0_correct_all = []
        # Total number of class 0 incorrect predictions.
        c0_incorrect_all = []
        # Total number of class 1 correct predictions.
        c1_correct_all = []
        # Total number of class 1 incorrect predictions
        c1_incorrect_all = []
        # Total number of instances out of the applicability domain.
        not_predicted_all = []

        results = []
        # Iterate over the seeds.
        try:
            for i in range(len(seeds)):
                # Generate training and test sets
                X_train, X_test,\
                Y_train, Y_test = train_test_split(X, Y,
                                                    test_size=0.25,
                                                    random_state=i,
                                                    shuffle=True)
                # Create the aggregated conformal classifier.
                conformal_pred = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator),
                                     MarginErrFunc())), BootstrapSampler())
                # Fit the conformal classifier to the data
                conformal_pred.fit(X_train, Y_train)
                # Perform prediction on test set
                prediction = conformal_pred.predict(X_test,
                                                    self.conformalSignificance)

                c0_correct = 0
                c1_correct = 0
                not_predicted = 0
                c0_incorrect = 0
                c1_incorrect = 0

                # Iterate over the prediction and check the result
                for i in range(len(Y_test)):
                    real = float(Y_test[i])
                    predicted = prediction[i]
                    if predicted[0] != predicted[1]:
                        if real == 0 and predicted[0] == True:
                            c0_correct += 1
                        if real == 0 and predicted[1] == True:
                            c0_incorrect += 1
                        if real == 1 and predicted[1] == True:
                            c1_correct += 1
                        if real == 1 and predicted[0] == True:
                            c1_incorrect += 1
                    else:
                        not_predicted += 1
                # Add the results to the lists.
                c0_correct_all.append(c0_correct)
                c0_incorrect_all.append(c0_incorrect)
                c1_correct_all.append(c1_correct)
                c1_incorrect_all.append(c1_incorrect)
                not_predicted_all.append(not_predicted)
        except Exception as e:
            LOG.error(f'Qualitative conformal validation'
                      f' failed with exception: {e}')
            raise e
        # Get the mean confusion matrix.
        self.TN = np.int(np.mean(c0_correct_all))
        self.FP = np.int(np.mean(c0_incorrect_all))
        self.TP = np.int(np.mean(c1_correct_all))
        self.FN = np.int(np.mean(c1_incorrect_all))
        not_predicted_all = np.int(np.mean(not_predicted_all))

        results.append(('TP', 'True positives in cross-validation', self.TP))
        results.append(('TN', 'True negatives in cross-validation', self.TN))
        results.append(('FP', 'False positives in cross-validation', self.FP))
        results.append(('FN', 'False negatives in cross-validation', self.FN))

        # Compute sensitivity and specificity
        self.sensitivity = (self.TP / (self.TP + self.FN))
        self.specificity = (self.TN / (self.TN + self.FP))
        # Compute Matthews Correlation Coefficient
        self.mcc = (((self.TP * self.TN) - (self.FP * self.FN)) / np.sqrt(
            (self.TP + self.FP) * (self.TP + self.FN) * (self.TN + self.FP) *
            (self.TN + self.FN)))
        results.append(('Sensitivity', 'Sensitivity in cross-validation',
                        self.sensitivity))
        results.append(('Specificity', 'Specificity in cross-validation',
                        self.specificity))
        results.append(
            ('MCC', 'Matthews Correlation Coefficient in cross-validation',
             self.mcc))

        # Compute coverage (% of compouds inside the applicability domain)
        self.conformal_coverage = (self.TN + self.FP + self.TP + self.FN) / (
            (self.TN + self.FP + self.TP + self.FN) + not_predicted_all)
        # Compute accuracy (% of correct predictions)
        self.conformal_accuracy = float(self.TN +
                                        self.TP) / float(self.FP + self.FN +
                                                         self.TN + self.TP)

        results.append(('Conformal_coverage', 'Conformal coverage',
                        self.conformal_coverage))
        results.append(('Conformal_accuracy', 'Conformal accuracy',
                        self.conformal_accuracy))

        return True, (results, )