Beispiel #1
0
    def test_icp_regression_tree(self):
        # -----------------------------------------------------------------------------
        # Setup training, calibration and test indices
        # -----------------------------------------------------------------------------
        data = load_boston()

        idx = np.random.permutation(data.target.size)
        train = idx[:int(idx.size / 3)]
        calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
        test = idx[int(2 * idx.size / 3):]

        # -----------------------------------------------------------------------------
        # Without normalization
        # -----------------------------------------------------------------------------
        # Train and calibrate
        # -----------------------------------------------------------------------------
        underlying_model = RegressorAdapter(
            DecisionTreeRegressor(min_samples_leaf=5))
        nc = RegressorNc(underlying_model, AbsErrorErrFunc())
        icp = IcpRegressor(nc)
        icp.fit(data.data[train, :], data.target[train])
        icp.calibrate(data.data[calibrate, :], data.target[calibrate])

        # -----------------------------------------------------------------------------
        # Predict
        # -----------------------------------------------------------------------------
        prediction = icp.predict(data.data[test, :], significance=0.1)
        header = ["min", "max", "truth", "size"]
        size = prediction[:, 1] - prediction[:, 0]
        table = np.vstack([prediction.T, data.target[test], size.T]).T
        df = pd.DataFrame(table, columns=header)
        print(df)

        # -----------------------------------------------------------------------------
        # With normalization
        # -----------------------------------------------------------------------------
        # Train and calibrate
        # -----------------------------------------------------------------------------
        underlying_model = RegressorAdapter(
            DecisionTreeRegressor(min_samples_leaf=5))
        normalizing_model = RegressorAdapter(
            KNeighborsRegressor(n_neighbors=1))
        normalizer = RegressorNormalizer(underlying_model, normalizing_model,
                                         AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)
        icp = IcpRegressor(nc)
        icp.fit(data.data[train, :], data.target[train])
        icp.calibrate(data.data[calibrate, :], data.target[calibrate])

        # -----------------------------------------------------------------------------
        # Predict
        # -----------------------------------------------------------------------------
        prediction = icp.predict(data.data[test, :], significance=0.1)
        header = ["min", "max", "truth", "size"]
        size = prediction[:, 1] - prediction[:, 0]
        table = np.vstack([prediction.T, data.target[test], size.T]).T
        df = pd.DataFrame(table, columns=header)
        print(df)
Beispiel #2
0
    def build(self):
        if not self.quantitative:
            print("PLSR only applies to quantitative data")
            return False, "PLSR only applies to quantitative data"

        if self.failed:
            return False, "Error initiating model"

        X = self.X.copy()
        Y = self.Y.copy()


        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        if self.cv:
            self.cv = getCrossVal(self.cv, 46, self.n, self.p)

        if self.tune:
            if self.optimiz == 'auto':
                super(PLSR, self).optimize(X, Y, PLS_r(
                    **self.estimator_parameters), self.tune_parameters)
            elif self.optimiz == 'manual':
                self.optimize(X, Y, PLS_r(
                    **self.estimator_parameters), self.tune_parameters)

            results.append(
                ('model', 'model type', 'PLSR quantitative (optimized)'))

        else:
            print("Building  Quantitative PLSR")
            self.estimator = PLS_r(**self.estimator_parameters)
            results.append(('model', 'model type', 'PLSR quantitative'))

        if self.conformal:
            underlying_model = RegressorAdapter(self.estimator)
            normalizing_model = RegressorAdapter(
                KNeighborsRegressor(n_neighbors=1))
            normalizing_model = RegressorAdapter(self.estimator)
            normalizer = RegressorNormalizer(
                underlying_model, normalizing_model, AbsErrorErrFunc())
            nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)
            self.conformal_pred = AggregatedCp(IcpRegressor(nc),
                                               BootstrapSampler())

            # self.conformal_pred = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(self.estimator))),
            #                                    BootstrapSampler())

            self.conformal_pred.fit(X, Y)
            # overrides non-conformal
            results.append(
                ('model', 'model type', 'conformal PLSR quantitative'))

        self.estimator.fit(X, Y)

        return True, results
Beispiel #3
0
    def test_acp_regression_tree(self):
        # -----------------------------------------------------------------------------
        # Experiment setup
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        idx = np.random.permutation(data.target.size)
        train = idx[:int(2 * idx.size / 3)]
        test = idx[int(2 * idx.size / 3):]

        truth = data.target[test]
        columns = ["min", "max", "truth"]
        significance = 0.1

        # -----------------------------------------------------------------------------
        # Define models
        # -----------------------------------------------------------------------------

        models = {
            "ACP-RandomSubSampler":
            AggregatedCp(
                IcpRegressor(
                    RegressorNc(RegressorAdapter(DecisionTreeRegressor()))),
                RandomSubSampler(),
            ),
            "ACP-CrossSampler":
            AggregatedCp(
                IcpRegressor(
                    RegressorNc(RegressorAdapter(DecisionTreeRegressor()))),
                CrossSampler(),
            ),
            "ACP-BootstrapSampler":
            AggregatedCp(
                IcpRegressor(
                    RegressorNc(RegressorAdapter(DecisionTreeRegressor()))),
                BootstrapSampler(),
            ),
        }

        # -----------------------------------------------------------------------------
        # Train, predict and evaluate
        # -----------------------------------------------------------------------------
        for name, model in models.items():
            model.fit(data.data[train, :], data.target[train])
            prediction = model.predict(data.data[test, :])
            prediction_sign = model.predict(data.data[test, :],
                                            significance=significance)
            table = np.vstack((prediction_sign.T, truth)).T
            df = pd.DataFrame(table, columns=columns)
            print("\n{}".format(name))
            print("Error rate: {}".format(
                reg_mean_errors(prediction, truth, significance)))
            print(df)
Beispiel #4
0
    def create_nc(model,
                  err_func=None,
                  normalizer_model=None,
                  oob=False,
                  fit_params=None,
                  fit_params_normalizer=None):
        if normalizer_model is not None:
            normalizer_adapter = RegressorAdapter(normalizer_model,
                                                  fit_params_normalizer)
        else:
            normalizer_adapter = None

        if isinstance(model, sklearn.base.ClassifierMixin):
            err_func = MarginErrFunc() if err_func is None else err_func
            if oob:
                c = sklearn.base.clone(model)
                c.fit([[0], [1]], [0, 1])
                if hasattr(c, 'oob_decision_function_'):
                    adapter = OobClassifierAdapter(model, fit_params)
                else:
                    raise AttributeError('Cannot use out-of-bag '
                                         'calibration with {}'.format(
                                             model.__class__.__name__))
            else:
                adapter = ClassifierAdapter(model, fit_params)

            if normalizer_adapter is not None:
                normalizer = RegressorNormalizer(adapter, normalizer_adapter,
                                                 err_func)
                return ClassifierNc(adapter, err_func, normalizer)
            else:
                return ClassifierNc(adapter, err_func)

        elif isinstance(model, sklearn.base.RegressorMixin):
            err_func = AbsErrorErrFunc() if err_func is None else err_func
            if oob:
                c = sklearn.base.clone(model)
                c.fit([[0], [1]], [0, 1])
                if hasattr(c, 'oob_prediction_'):
                    adapter = OobRegressorAdapter(model, fit_params)
                else:
                    raise AttributeError('Cannot use out-of-bag '
                                         'calibration with {}'.format(
                                             model.__class__.__name__))
            else:
                adapter = RegressorAdapter(model, fit_params)

            if normalizer_adapter is not None:
                normalizer = RegressorNormalizer(adapter, normalizer_adapter,
                                                 err_func)
                return RegressorNc(adapter, err_func, normalizer)
            else:
                return RegressorNc(adapter, err_func)
Beispiel #5
0
    def CF_quantitative_validation(self):
        ''' Performs internal  validation for conformal quantitative models '''

        # Make a copy of original matrices.
        X = self.X.copy()
        Y = self.Y.copy()

        # Number of external validations for the aggregated conformal estimator.
        seeds = [5, 7, 35]
        # Interval means for each aggregated  conformal estimator (out of 3)
        interval_means = []
        # Accuracies for each aggregated conformal estimator (out of 3)
        accuracies = []
        results = []
        try:
            for i in range(len(seeds)):
                # Generate training a test sets
                X_train, X_test, Y_train, Y_test = train_test_split(
                    X, Y, test_size=0.25, random_state=i, shuffle=False)
                # Create the aggregated conformal regressor.
                conformal_pred = AggregatedCp(
                    IcpRegressor(RegressorNc(RegressorAdapter(
                        self.estimator))), BootstrapSampler())
                # Fit conformal regressor to the data
                conformal_pred.fit(X_train, Y_train)

                # Perform prediction on test set
                prediction = conformal_pred.predict(X_test,
                                                    self.conformalSignificance)
                # Add the n validation interval means
                interval_means.append(
                    np.mean(
                        np.abs(prediction[:, 0]) - np.abs(prediction[:, 1])))
                Y_test = Y_test.reshape(-1, 1)
                # Get boolean mask of instances within the applicability domain.
                inside_interval = ((prediction[:, 0].reshape(-1, 1) < Y_test) &
                                   (prediction[:, 1].reshape(-1, 1) > Y_test))
                # Compute the accuracy (number of instances within the AD).
                accuracy = np.sum(inside_interval) / len(Y_test)
                # Add validation result to the list of accuracies.
                accuracies.append(accuracy)
        except Exception as e:
            LOG.error(f'Quantitative conformal validation'
                      f' failed with exception: {e}')
            raise e

        # Compute mean interval_means and accuracy.
        interval_means = np.mean(interval_means)
        accuracies = np.mean(accuracies)
        # Cut into two decimals.
        self.conformal_accuracy = float("{0:.2f}".format(accuracies))
        self.conformal_mean_interval = float("{0:.2f}".format(interval_means))
        #Add quality metrics to results.

        results.append(('Conformal_mean_interval', 'Conformal mean interval',
                        self.conformal_mean_interval))
        results.append(('Conformal_accuracy', 'Conformal accuracy',
                        self.conformal_accuracy))

        return True, (results, )
def CF_QuanVal(X, Y, estimator, conformalSignificance):
    print("Starting quantitative conformal prediction validation")

    icp = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(estimator))),
                       BootstrapSampler())

    # icp = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(estimator),
    #                               AbsErrorErrFunc(), RegressorNormalizer(estimator,
    #                                RegressorAdapter(copy.copy(estimator)), AbsErrorErrFunc()))))
    # icp_cv = RegIcpCvHelper(icp)
    # scores = conformal_cross_val_score(icp_cv,
    #                          X,
    #                          Y,
    #                          iterations=5,
    #                          folds=5,
    #                          scoring_funcs=[reg_mean_errors, reg_median_size, reg_mean_size],
    #                          significance_levels=[0.05, 0.1, 0.2, conformalSignificance])

    icp.fit(X[:30], Y[:30])
    prediction = icp.predict(X[30:])
    prediction_sign = icp.predict(X[30:], significance=0.25)

    interval = prediction_sign[:, 0] - prediction_sign[:, 1]
    print(np.mean(interval))
    print(interval)
    print("\n")
    print(prediction)
    print(prediction_sign)
    return (icp)
def CF_QuanCal(X, Y, estimator):
    # X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=0.20, random_state=42)
    acp = AggregatedCp(
        IcpRegressor(
            RegressorNc(
                RegressorAdapter(estimator), AbsErrorErrFunc(),
                RegressorNormalizer(estimator, copy.copy(estimator),
                                    AbsErrorErrFunc())), RandomSubSampler()), )
    acp.fit(X, Y)
    # icp.calibrate(X_test, y_test)
    return acp
Beispiel #8
0
from nonconformist.base import RegressorAdapter
from nonconformist.icp import IcpRegressor
from nonconformist.nc import RegressorNc, AbsErrorErrFunc, SignErrorErrFunc

# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = load_boston()

idx = np.random.permutation(data.target.size)
train = idx[:int(idx.size / 3)]
calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
icp = IcpRegressor(
    RegressorNc(RegressorAdapter(DecisionTreeRegressor()), SignErrorErrFunc()))
icp.fit(data.data[train, :], data.target[train])
icp.calibrate(data.data[calibrate, :], data.target[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(data.data[test, :], significance=0.05)
header = np.array(['min', 'max', 'Truth'])
table = np.vstack([prediction.T, data.target[test]]).T
df = pd.DataFrame(np.vstack([header, table]))
print(df)
Beispiel #9
0
    def CF_quantitative_validation(self):
        ''' Performs internal  validation for conformal quantitative models '''

        # Make a copy of original matrices.
        X = self.X.copy()
        Y = self.Y.copy()

        info = []
        kf = KFold(n_splits=self.param.getVal('ModelValidationN'),
                   shuffle=True,
                   random_state=46)
        # Copy Y vector to use it as template to assign predictions
        Y_pred = copy.copy(Y).tolist()
        try:
            for train_index, test_index in kf.split(X):
                # Generate training and test sets
                X_train, X_test = X[train_index], X[test_index]
                Y_train, Y_test = Y[train_index], Y[test_index]
                # Generate training a test sets
                # Create the aggregated conformal regressor.
                conformal_pred = AggregatedCp(
                    IcpRegressor(
                        RegressorNc(RegressorAdapter(self.estimator_temp))),
                    BootstrapSampler())
                # Fit conformal regressor to the data
                conformal_pred.fit(X_train, Y_train)

                # Perform prediction on test set
                prediction = conformal_pred.predict(
                    X_test, self.param.getVal('conformalSignificance'))
                # Assign the prediction its original index
                for index, el in enumerate(test_index):
                    Y_pred[el] = prediction[index]

        except Exception as e:
            LOG.error(f'Quantitative conformal validation'
                      f' failed with exception: {e}')
            raise e

        Y_pred = np.asarray(Y_pred)
        # Add the n validation interval means
        interval_mean = np.mean(np.abs((Y_pred[:, 0]) - (Y_pred[:, 1])))
        # Get boolean mask of instances
        #  within the applicability domain.
        inside_interval = ((Y_pred[:, 0].reshape(-1, 1) < Y) &
                           (Y_pred[:, 1].reshape(-1, 1) > Y))
        # Compute the accuracy (number of instances within the AD).
        accuracy = np.sum(inside_interval) / len(Y)

        # Cut into two decimals.
        self.conformal_interval_medians = (np.mean(Y_pred, axis=1))
        self.conformal_accuracy = float("{0:.2f}".format(accuracy))
        self.conformal_mean_interval = float("{0:.2f}".format(interval_mean))

        #Add quality metrics to results.
        info.append(('Conformal_mean_interval', 'Conformal mean interval',
                     self.conformal_mean_interval))
        info.append(('Conformal_accuracy', 'Conformal accuracy',
                     self.conformal_accuracy))
        info.append(
            ('Conformal_interval_medians', 'Conformal interval medians',
             self.conformal_interval_medians))
        info.append(('Conformal_prediction_ranges',
                     'Conformal prediction ranges', Y_pred))

        results = {}
        results['quality'] = info
        return True, results
Beispiel #10
0
    def build(self):
        '''Build a new RF model with the X and Y numpy matrices '''

        if self.failed:
            return False

        X = self.X.copy()
        Y = self.Y.copy()

        results = []

        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        if self.cv:
            self.cv = getCrossVal(self.cv,
                                  self.estimator_parameters["random_state"],
                                  self.n, self.p)
        if self.tune:
            if self.quantitative:
                self.optimize(X, Y, RandomForestRegressor(),
                              self.tune_parameters)
                results.append(
                    ('model', 'model type', 'RF quantitative (optimized)'))
            else:
                self.optimize(X, Y, RandomForestClassifier(),
                              self.tune_parameters)
                results.append(
                    ('model', 'model type', 'RF qualitative (optimized)'))
        else:
            if self.quantitative:
                log.info("Building Quantitative RF model")
                self.estimator_parameters.pop('class_weight', None)

                self.estimator = RandomForestRegressor(
                    **self.estimator_parameters)
                results.append(('model', 'model type', 'RF quantitative'))

            else:
                log.info("Building Qualitative RF model")
                self.estimator = RandomForestClassifier(
                    **self.estimator_parameters)
                results.append(('model', 'model type', 'RF qualitative'))

        if self.conformal:
            if self.quantitative:
                underlying_model = RegressorAdapter(self.estimator)
                normalizing_model = RegressorAdapter(
                    KNeighborsRegressor(n_neighbors=5))
                normalizing_model = RegressorAdapter(self.estimator)
                normalizer = RegressorNormalizer(underlying_model,
                                                 normalizing_model,
                                                 AbsErrorErrFunc())
                nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                 normalizer)
                # self.conformal_pred = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.conformal_pred = AggregatedCp(IcpRegressor(nc),
                                                   BootstrapSampler())
                self.conformal_pred.fit(X, Y)
                # overrides non-conformal
                results.append(
                    ('model', 'model type', 'conformal RF quantitative'))

            else:
                self.conformal_pred = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator),
                                     MarginErrFunc())), BootstrapSampler())
                self.conformal_pred.fit(X, Y)
                # overrides non-conformal
                results.append(
                    ('model', 'model type', 'conformal RF qualitative'))

        self.estimator.fit(X, Y)

        return True, results


#### Overriding of parent methods

# def CF_quantitative_validation(self):
#     ''' performs validation for conformal quantitative models '''

# def CF_qualitative_validation(self):
#     ''' performs validation for conformal qualitative models '''

# def quantitativeValidation(self):
#     ''' performs validation for quantitative models '''

# def qualitativeValidation(self):
#     ''' performs validation for qualitative models '''

# def validate(self):
#     ''' Validates the model and computes suitable model quality scoring values'''

# def optimize(self, X, Y, estimator, tune_parameters):
#     ''' optimizes a model using a grid search over a range of values for diverse parameters'''

# def regularProject(self, Xb, results):
#     ''' projects a collection of query objects in a regular model, for obtaining predictions '''

# def conformalProject(self, Xb, results):
#     ''' projects a collection of query objects in a conformal model, for obtaining predictions '''

# def project(self, Xb, results):
#     ''' Uses the X matrix provided as argument to predict Y'''
Beispiel #11
0
    def build(self):

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        if self.param.getVal('tune'):

            # Optimize estimator using sklearn-gridsearch
            if self.estimator_parameters['optimize'] == 'auto':
                try:

                    LOG.info('Optimizing PLSR using SK-LearnGridSearch')

                    # Remove optimize key from parameter dictionary
                    # to avoid sklearn estimator error (unexpected keyword)
                    self.estimator_parameters.pop("optimize")   

                    super(PLSR, self).optimize(X, Y, PLS_r(
                        **self.estimator_parameters), 
                        self.param.getDict('PLSR_optimize'))

                except Exception as e:
                    LOG.error(f'Error performing SK-LearnGridSearch'
                              f' on PLSR estimator with exception {e}')
                    return False, f'Error performing SK-LearnGridSearch on PLSR estimator with exception {e}'

            # Optimize using flame implementation (recommended)
            elif self.estimator_parameters['optimize'] == 'manual':

                LOG.info('Optimizing PLSR using manual method')

                # Remove optimize key from parameter dictionary
                # to avoid sklearn estimator error (unexpected keyword)
                self.estimator_parameters.pop("optimize")   

                success, message = self.optimize(X, Y, PLS_r(
                    **self.estimator_parameters), 
                    self.param.getDict('PLSR_optimize'))

                if not success:
                    return False, message

            else: 
                LOG.error('Type of tune not recognized, check the input')
                return False, 'Type of tune not recognized, check the input'    

            results.append(('model', 'model type', 'PLSR quantitative (optimized)'))

        else:
            LOG.info('Building Quantitative PLSR with no optimization')
            try:
                # Remove optimize key from parameters to avoid error
                self.estimator_parameters.pop("optimize") 

                # as the sklearn estimator does not have this key
                self.estimator = PLS_r(**self.estimator_parameters)
            except Exception as e:
                LOG.error(f'Error at PLS_r instantiation with '
                          f'exception {e}')
                return False, f'Error at PLS_da instantiation with exception {e}'

            results.append(('model', 'model type', 'PLSR quantitative'))
        
        # Fit estimator to the data
        self.estimator.fit(X, Y)

        if not self.param.getVal('conformal'):
            return True, results

        self.estimator_temp = copy(self.estimator)
        try:
            
            LOG.info('Building PLSR aggregated conformal predictor')

            underlying_model = RegressorAdapter(self.estimator_temp)
            # normalizing_model = RegressorAdapter(
            #     KNeighborsRegressor(n_neighbors=1))
            normalizing_model = RegressorAdapter(self.estimator_temp)
            normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc())

            nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)
            self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler())

        except Exception as e:
            LOG.error(f'Error building aggregated PLSR conformal'
                        f' regressor with exception: {e}')
            return False, f'Error building aggregated PLSR conformal regressor with exception: {e}'

            # self.conformal_pred = AggregatedCp(IcpRegressor(
            # RegressorNc(RegressorAdapter(self.estimator))),
            #                                    BootstrapSampler())

        # Fit conformal estimator to the data
        self.estimator.fit(X, Y)

        # overrides non-conformal
        results.append(('model', 'model type', 'conformal PLSR quantitative'))

        return True, results
# -----------------------------------------------------------------------------
# Setup training, calibration and test indices
# -----------------------------------------------------------------------------
data = load_boston()

idx = np.random.permutation(data.target.size)
train = idx[:int(idx.size / 3)]
calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

# -----------------------------------------------------------------------------
# Without normalization
# -----------------------------------------------------------------------------
# Train and calibrate
# -----------------------------------------------------------------------------
underlying_model = RegressorAdapter(DecisionTreeRegressor(min_samples_leaf=5))
nc = RegressorNc(underlying_model, AbsErrorErrFunc())
icp = IcpRegressor(nc)
icp.fit(data.data[train, :], data.target[train])
icp.calibrate(data.data[calibrate, :], data.target[calibrate])

# -----------------------------------------------------------------------------
# Predict
# -----------------------------------------------------------------------------
prediction = icp.predict(data.data[test, :], significance=0.1)
header = ['min','max','truth','size']
size = prediction[:, 1] - prediction[:, 0]
table = np.vstack([prediction.T, data.target[test], size.T]).T
df = pd.DataFrame(table, columns=header)
print(df)
Beispiel #13
0
    def build(self):
        '''Build a new XGBOOST model with the X and Y numpy matrices '''

        try:
            from xgboost.sklearn import XGBClassifier
            from xgboost.sklearn import XGBRegressor
        except Exception as e:
            return False,  'XGboost not found, please revise your environment'

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):

            LOG.info("Optimizing XGBOOST estimator")
            
            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.estimator = XGBRegressor(
                                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    results.append(('model','model type','XGBOOST quantitative (optimized)'))
                else:
                    self.estimator = XGBClassifier(
                                        **self.estimator_parameters)
                    params = self.estimator.get_params()
                    params['num_class'] = 2
                    self.optimize(X, Y, self.estimator,
                                  self.tune_parameters)
                    results.append(('model','model type','XGBOOST qualitative (optimized)'))

            except Exception as e:
                return False, f'Exception optimizing XGBOOST estimator with exception {e}'
            
        else:
            try:
                if self.param.getVal('quantitative'):

                    LOG.info("Building Quantitative XGBOOST model")
                    # params = {
                    #     'objective': 'reg:squarederror',
                    #     'missing': -99.99999,
                    #     # 'max_depth': 20,
                    #     # 'learning_rate': 1.0,
                    #     # 'silent': 1,
                    #     # 'n_estimators': 25
                    #     }
                    # self.estimator = XGBRegressor(**params)
                    self.estimator = XGBRegressor(**self.estimator_parameters)
                    results.append(('model', 'model type', 'XGBOOST quantitative'))
                else:

                    LOG.info("Building Qualitative XGBOOST model")
                    # params = {
                    #     'objective': 'binary:logistic',
                    #      'max_depth': 3,
                    #      #'learning_rate': 0.7,
                    #      #'silent': 1,
                    #      'n_estimators': 100
                    #     }
                    self.estimator = XGBClassifier(**self.estimator_parameters)
                    results.append(('model', 'model type', 'XGBOOST qualitative'))

                self.estimator.fit(X, Y)
                print(self.estimator)

            except Exception as e:
                raise e
                return False, f'Exception building XGBOOST estimator with exception {e}'

        self.estimator_temp = copy(self.estimator)

        if not self.param.getVal('conformal'):
            return True, results
        # Create the conformal estimator
        try:
            # Conformal regressor
            if self.param.getVal('quantitative'):

                LOG.info("Building conformal Quantitative XGBOOST model")

                underlying_model = RegressorAdapter(self.estimator_temp)
                #normalizing_model = RegressorAdapter(
                    #KNeighborsRegressor(n_neighbors=5))
                normalizing_model = RegressorAdapter(self.estimator_temp)
                normalizer = RegressorNormalizer(
                                underlying_model,
                                normalizing_model,
                                AbsErrorErrFunc())
                nc = RegressorNc(underlying_model,
                                    AbsErrorErrFunc(),
                                    normalizer)

                # self.conformal_pred = AggregatedCp(IcpRegressor
                # (RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.estimator = AggregatedCp(IcpRegressor(nc),
                                                BootstrapSampler())

                self.estimator.fit(X, Y)
                results.append(('model', 'model type', 'conformal XGBOOST quantitative'))

            # Conformal classifier
            else:

                LOG.info("Building conformal Qualitative XGBOOST model")

                self.estimator = AggregatedCp(
                                    IcpClassifier(
                                        ClassifierNc(
                                            ClassifierAdapter(self.estimator_temp),
                                            MarginErrFunc()
                                        )
                                    ),
                                    BootstrapSampler())

                # Fit estimator to the data
                self.estimator.fit(X, Y)
                results.append(('model', 'model type', 'conformal XGBOOST qualitative'))

        except Exception as e:
            raise e
            return False, f'Exception building conformal XGBOOST estimator with exception {e}'

        return True, results



## Overriding of parent methods

    # def CF_quantitative_validation(self):
    #     ''' performs validation for conformal quantitative models '''

      

    # def CF_qualitative_validation(self):
    #     ''' performs validation for conformal qualitative models '''


    # def quantitativeValidation(self):
    #     ''' performs validation for quantitative models '''

    # def qualitativeValidation(self):
    #     ''' performs validation for qualitative models '''


    # def validate(self):
    #     ''' Validates the model and computes suitable model quality scoring values'''


    # def optimize(self, X, Y, estimator, tune_parameters):
    #     ''' optimizes a model using a grid search over a range of values for diverse parameters'''


    # def regularProject(self, Xb, results):
    #     ''' projects a collection of query objects in a regular model, for obtaining predictions '''


    # def conformalProject(self, Xb, results):
    #     ''' projects a collection of query objects in a conformal model, for obtaining predictions '''


    # def project(self, Xb, results):
    #     ''' Uses the X matrix provided as argument to predict Y'''
def cv(df, parameters):
    end = len(df) - 120
    out = np.zeros(3)
    out2 = np.zeros(3)
    p = parameters.copy()
    p.pop('algorithm')
    p.pop('randomized_calibration')
    p.pop('alpha_')
    if parameters.get('algorithm') == 'RandomForest':
        algorithm = RandomForestRegressor(**p)
        d = {'n_estimators': parameters.get('n_estimators'),
             "criterion": parameters.get("criterion"),
             "max_features": parameters.get("max_features"),
             "min_samples_split": parameters.get("min_samples_split"),
             "min_samples_leaf": parameters.get("min_samples_leaf")
             }
    if parameters.get('algorithm') == 'K-NearestNeighbours':
        algorithm = KNeighborsRegressor(**p)
        d = {
            'n_neighbours': parameters.get('n_neighbours'),
            'weights': parameters.get('weights'),
            'metric': parameters.get('metric')
        }
    if parameters.get('algorithm') == 'LightGBM':
        algorithm = LGBMRegressor(**p)
        d = {"metric": parameters.get("metric"),
             "num_leaves": parameters.get('num_leaves'),
             "learning_rate": parameters.get('learning_rate'),
             "feature_fraction": parameters.get('feature_fraction'),
             "bagging_fraction": parameters.get('bagging_fraction'),
             "bagging_freq": parameters.get('bagging_freq'),
             }

    if parameters.get('algorithm') == 'LassoRegression':
        algorithm = Lasso(**p)
        d = {'alpha_': parameters.get('alpha_')}

    if parameters.get('algorithm') == 'NeuralNetwork':
        algorithm = NeuralNetworkAlgorithm(p)

    if parameters.get('algorithm') == 'LSTM':
        algorithm = BiLSTM(**p)
        d = {}
    d = p
    d['alpha_'] = parameters.get('alpha_')

    m, s = df['NetPosUsd'].mean(), df['NetPosUsd'].std()
    df=df.drop(['QdfTime' ], axis=1)
    mean = df.mean(axis=0)
    std = df.std(axis=0)
    df = (df - mean) / std

    for i, ratio in enumerate(([.5, 0.66, .84])):
        if parameters.get('randomized_calibration') == True:

            train_ = df.drop([  'NetPosUsd'], axis=1).iloc[:int(end * ratio), :].values
            choose = np.random.choice(len(train_), int(end / 6), replace=False)
            calibrate = train_[choose, :]
            mask = np.ones(len(train_), dtype=bool)
            mask[choose] = False

            train = train_[mask, :]
            test = (df.drop([  'NetPosUsd'], axis=1)).iloc[int(end * ratio):int(end * ratio) + int(end / 6),
                   :].values

            ytrain_ = df['NetPosUsd'][:int(end * ratio)].values

            ycalibrate = ytrain_[choose]
            ytrain = ytrain_[mask]

            ytest = df['NetPosUsd'].iloc[int(end * ratio):int(end * ratio) + int(end / 6)]

        else:
            train = df.drop([  'NetPosUsd'], axis=1).iloc[:int(end * ratio) - int(end / 6), :].values

            calibrate = df.drop([  'NetPosUsd'], axis=1).iloc[int(end * ratio) - int(end / 6):int(end * ratio),
                        :].values

            test = df.drop([  'NetPosUsd'], axis=1).iloc[int(end * ratio):int(end * ratio) + int(end / 6),
                   :].values

            ytrain = df['NetPosUsd'][:int(end * ratio) - int(end / 6)].values

            ycalibrate = df['NetPosUsd'][int(end * ratio) - int(end / 6):int(end * ratio)].values

            ytest = df['NetPosUsd'][int(end * ratio):int(end * ratio) + int(end / 6)].values
            # print(len(train),len(ytrain),len(calibrate),len(ycalibrate),len(test),len(ytest))

            # Train and calibrate
        # -----------------------------------------------------------------------------

        underlying_model = RegressorAdapter(algorithm)
        normalizing_model = RegressorAdapter(KNeighborsRegressor(n_neighbors=50))
        normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)

        icp = IcpRegressor(nc)
        icp.fit(train, ytrain)
        icp.calibrate(calibrate, ycalibrate)

        # -----------------------------------------------------------------------------
        # Predict
        # -----------------------------------------------------------------------------
        prediction = icp.predict(test, significance=parameters.get('alpha_'))
        header = ['NCP_lower', 'NCP_upper', 'NetPosUsd', 'prediction']
        size = prediction[:, 1] / 2 + prediction[:, 0] / 2

        prediction = prediction * s + m
        ytest = ytest * s + m
        size = size * s + m

        table = np.vstack([prediction.T, ytest, size.T]).T

        dfncp = pd.DataFrame(table, columns=header)

        underlying_model = RegressorAdapter(algorithm)

        nc = RegressorNc(underlying_model, AbsErrorErrFunc())
        icp = IcpRegressor(nc)
        icp.fit(train, ytrain)
        icp.calibrate(calibrate, ycalibrate)

        prediction = icp.predict(test, significance=parameters.get('alpha_'))
        header = ['cp_lower', 'cp_upper']

        prediction = prediction * s + m

        table = np.vstack([prediction.T]).T

        dfcp = pd.DataFrame(table, columns=header)
        dfncp['CP_lower'] = dfcp['cp_lower']
        dfncp['CP_upper'] = dfcp['cp_upper']

        out[i] = qd_objective(dfncp.NetPosUsd, dfncp['CP_lower'], dfncp['CP_upper'], parameters.get('alpha_'))

        out2[i] = qd_objective(dfncp.NetPosUsd, dfncp['NCP_lower'], dfncp['NCP_upper'], parameters.get('alpha_'))

    d['CP_loss'] = np.mean(out)
    d['NCP_loss'] = np.mean(out2)

    if os.path.exists(parameters.get('algorithm') + '_cv.csv') == True:

        pd.DataFrame(data=d, index=[0]).to_csv(parameters.get('algorithm') + '_cv.csv', mode='a', header=False,
                                               index=False)

    else:
        pd.DataFrame(data=d, index=[0]).to_csv(parameters.get('algorithm') + '_cv.csv', encoding='utf-8', index=False)
Beispiel #15
0
def train_and_test_cp_algo(i):
    window = 96
    p = {'window': window}
    algorithm = BiLSTM(p)

    path = 'data\EURUSD_NETPOSUSD_hourly_for_regresion' + str(i) + '.csv'
    df = pd.read_csv(path).drop(['QdfTime', 'Unnamed: 0'], axis=1).fillna(0)
    y_raw_test = df.NetPosUsd[-120:]
    median_ = df.NetPosUsd.median()
    mad_ = mad(df.NetPosUsd.values)
    df.NetPosUsd = mlog_trans(df.NetPosUsd.values)

    # mean = df.NetPosUsd.mean()
    # std = df.NetPosUsd.std()
    # df.NetPosUsd = (df.NetPosUsd - mean) / std

    data = df.NetPosUsd.values

    def generate_index(window, data_matrix):
        '''

        :return:
        '''

        num_elements = data_matrix.shape[0]

        for start, stop in zip(range(0, num_elements - window, 1), range(window, num_elements, 1)):
            yield data_matrix[stop - window:stop].reshape((-1, 1))

    cnt = []

    for sequence in generate_index(window, data):
        cnt.append(sequence)
    cnt = np.array(cnt)

    X = cnt
    y = data[window:]

    X = X.reshape(X.shape[0], X.shape[1])

    train_test_split = X.shape[0] - 120 - 3480
    train = X[:train_test_split, :]

    calibrate = X[train_test_split:train_test_split + 3480, :]

    test = X[-120:]

    ytrain = y[:train_test_split]

    ycalibrate = y[train_test_split:train_test_split + 3480]

    ytest = y[-120:]

    underlying_model = RegressorAdapter(algorithm)
    normalizing_model = RegressorAdapter(KNeighborsRegressor(n_neighbors=50))
    normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc())
    nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)
    icp = IcpRegressor(nc)
    icp.fit(train, ytrain)
    icp.calibrate(calibrate, ycalibrate)

    underlying_model2 = RegressorAdapter(algorithm)
    nc2 = RegressorNc(underlying_model2, AbsErrorErrFunc())
    icp2 = IcpRegressor(nc2)
    icp2.fit(train, ytrain)
    icp2.calibrate(calibrate, ycalibrate)

    for a in tqdm(np.linspace(5, 95, 19)):

        # -----------------------------------------------------------------------------
        # Predict
        # -----------------------------------------------------------------------------
        prediction = icp.predict(test, significance=a / 100)
        header = ['NCP_lower', 'NCP_upper', 'NetPosUsd', 'prediction']
        lower, upper = prediction[:, 0], prediction[:, 1]

        lower = mlog_inverse(lower, median_, mad_)
        upper = mlog_inverse(upper, median_, mad_)
        ytest = mlog_inverse(ytest, median_, mad_)
        # lower=lower*std+mean
        # upper=upper*std+mean
        # ytest=ytest*std+mean
        size = upper / 2 + lower / 2
        table = np.vstack([lower, upper, y_raw_test, size.T]).T

        dfncp = pd.DataFrame(table, columns=header)

        # -----------------------------------------------------------------------------
        # Predict
        # -----------------------------------------------------------------------------
        prediction = icp2.predict(test, significance=a / 100)
        header = ['CP_lower', 'CP_upper', 'NetPosUsd', 'prediction']
        lower, upper = prediction[:, 0], prediction[:, 1]

        lower = mlog_inverse(lower, median_, mad_)
        upper = mlog_inverse(upper, median_, mad_)
        ytest = mlog_inverse(ytest, median_, mad_)

        # lower=lower*std+mean
        # upper=upper*std+mean
        # ytest=ytest*std+mean
        size = upper / 2 + lower / 2
        table = np.vstack([lower, upper, y_raw_test, size.T]).T

        dfcp = pd.DataFrame(table, columns=header)

        if i == 0:
            dfcp.to_csv(
                'CP' + '_' + 'cudaLSTM' + '_' + str(
                    np.round(a).astype(int)) + '_' + 'calibrationwindow' + str(
                    3480) + '.csv',
                encoding='utf-8', index=False)
        else:
            dfcp.to_csv(
                'CP' + '_' + 'cudaLSTM' + '_' + str(
                    np.round(a).astype(int)) + '_' + 'calibrationwindow' + str(
                    3480) + '.csv', mode='a',
                header=False, index=False)

        if i == 0:
            dfncp.to_csv(
                'NCP' + '_' + 'cudaLSTM' + '_' + str(
                    np.round(a).astype(int)) + '_' + 'calibrationwindow' + str(
                    3480) + '.csv',
                encoding='utf-8', index=False)
        else:
            dfncp.to_csv(
                'NCP' + '_' + 'cudaLSTM' + '_' + str(
                    np.round(a).astype(int)) + '_' + 'calibrationwindow' + str(
                    3480) + '.csv', mode='a',
                header=False, index=False)
Beispiel #16
0
    def build(self):
        '''Build a new DL model with the X and Y numpy matrices '''

        try:
            from keras.wrappers.scikit_learn import KerasClassifier
            from keras.wrappers.scikit_learn import KerasRegressor
        except Exception as e:
            return False, 'Keras not found, please revise your environment'

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):

            LOG.info("Optimizing Keras estimator")

            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.estimator = KerasRegressor(
                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    results.append(('model', 'model type',
                                    'KERAS quantitative (optimized)'))
                else:
                    self.estimator = KerasClassifier(
                        **self.estimator_parameters)
                    #params = self.estimator.get_params()
                    #params['num_class'] = 2
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    results.append(('model', 'model type',
                                    'KERAS qualitative (optimized)'))

            except Exception as e:
                return False, f'Exception optimizing KERAS estimator with exception {e}'

        else:
            try:
                if self.param.getVal('quantitative'):

                    LOG.info("Building Quantitative KERAS mode")
                    self.estimator = KerasRegressor(
                        build_fn=self.create_model,
                        **self.estimator_parameters,
                        verbose=0)
                    results.append(
                        ('model', 'model type', 'Keras quantitative'))
                else:

                    LOG.info("Building Qualitative Keras model")
                    self.estimator = KerasClassifier(
                        build_fn=self.create_model,
                        dim=self.X.shape[1],
                        **self.estimator_parameters,
                        verbose=0)
                    results.append(
                        ('model', 'model type', 'Keras qualitative'))

                self.estimator.fit(X, Y)
                print(self.estimator)

            except Exception as e:
                raise e
                return False, f'Exception building Keras estimator with exception {e}'

        self.estimator_temp = clone(self.estimator)

        if not self.param.getVal('conformal'):
            return True, results
        # Create the conformal estimator
        try:
            # Conformal regressor
            if self.param.getVal('quantitative'):

                LOG.info("Building conformal Quantitative Keras model")

                underlying_model = RegressorAdapter(self.estimator_temp)
                normalizing_model = RegressorAdapter(
                    KNeighborsRegressor(n_neighbors=15))
                # normalizing_model = RegressorAdapter(self.estimator_temp)
                normalizer = RegressorNormalizer(underlying_model,
                                                 normalizing_model,
                                                 AbsErrorErrFunc())
                nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                 normalizer)

                # self.conformal_pred = AggregatedCp(IcpRegressor
                # (RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.estimator = AggregatedCp(IcpRegressor(nc),
                                              BootstrapSampler())

                self.estimator.fit(X, Y)
                results.append(
                    ('model', 'model type', 'conformal Keras quantitative'))

            # Conformal classifier
            else:

                LOG.info("Building conformal Qualitative Keras model")

                self.estimator = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator_temp),
                                     MarginErrFunc())), BootstrapSampler())

                # Fit estimator to the data
                print('build finished')
                self.estimator.fit(X, Y)
                results.append(
                    ('model', 'model type', 'conformal Keras qualitative'))

        except Exception as e:
            raise e
            return False, f'Exception building conformal Keras estimator with exception {e}'

        return True, []
def train_and_test_cp_algo(parameters):
    p = parameters.copy()
    p.pop('algorithm')
    p.pop('randomized_calibration')
    p.pop('alpha_')
    p.pop('calibration_size')
    p.pop('WhichCP')

    for i in tqdm(range(29)):
        if parameters.get('algorithm') == 'RandomForest':
            algorithm = RandomForestRegressor(**p)
        if parameters.get('algorithm') == 'K-NearestNeighbours':
            algorithm = KNeighborsRegressor(**p)
        if parameters.get('algorithm') == 'LightGBM':
            algorithm = LGBMRegressor(**p)
        if parameters.get('algorithm') == 'LassoRegression':
            algorithm = Lasso(**p)
        if parameters.get('algorithm') == 'NeuralNetwork':
            algorithm = NeuralNetworkAlgorithm(p)
        if parameters.get('algorithm') == 'LSTM':
            algorithm = BiLSTM(**p)
        if parameters.get('algorithm') == 'GradientBoosting':
            algorithm =GradientBoostingRegressor(**p)


        path = 'data\EURUSD_NETPOSUSD_hourly_for_regresion' + str(i) + '.csv'
        df = pd.read_csv(path).drop(['Unnamed: 0','QdfTime'], axis=1).fillna(0)
        m, s = df['NetPosUsd'].mean(), df['NetPosUsd'].std()

        mean = df.mean(axis=0)
        std = df.std(axis=0)
        df = (df - mean) / std

        if parameters.get('randomized_calibration') == True:

            train_test_split = len(df) - 120
            train_ = df.drop([ 'NetPosUsd'], axis=1).iloc[:train_test_split, :].values
            choose = np.random.choice(len(train_), parameters.get("calibration_size"), replace=False)
            calibrate = train_[choose, :]
            mask = np.ones(len(train_), dtype=bool)
            mask[choose] = False
            train = train_[mask, :]

            test = (df.drop([  'NetPosUsd'], axis=1)).iloc[train_test_split:,
                   :].values

            ytrain_ = df['NetPosUsd'][:train_test_split].values

            ycalibrate = ytrain_[choose]
            ytrain = ytrain_[mask]

            ytest = df['NetPosUsd'].iloc[train_test_split:]


        else:
            train_test_split = len(df) - 120 - parameters.get("calibration_size")
            train = df.drop([  'NetPosUsd'], axis=1).iloc[:train_test_split, :].values

            calibrate = df.drop([ 'NetPosUsd'], axis=1).iloc[train_test_split:train_test_split + parameters.get("calibration_size"), :].values

            test = (df.drop([  'NetPosUsd'], axis=1)).iloc[-120:,:].values

            ytrain = df['NetPosUsd'][:train_test_split].values

            ycalibrate = df['NetPosUsd'][train_test_split:train_test_split + parameters.get("calibration_size")]

            ytest = df['NetPosUsd'].iloc[-120:]

        if parameters.get("WhichCP") == 'NCP':
            underlying_model = RegressorAdapter(algorithm)
            normalizing_model = RegressorAdapter(KNeighborsRegressor(n_neighbors=50))
            normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc())
            nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)
            icp = IcpRegressor(nc)
            icp.fit(train, ytrain)
            icp.calibrate(calibrate, ycalibrate)

            # -----------------------------------------------------------------------------
            # Predict
            # -----------------------------------------------------------------------------
            prediction = icp.predict(test, significance=parameters.get('alpha_'))
            header = ['NCP_lower', 'NCP_upper', 'NetPosUsd', 'prediction']
            size = prediction[:, 1] / 2 + prediction[:, 0] / 2

            prediction=prediction*s+m
            ytest=ytest*s+m
            size=size*s+m

            table = np.vstack([prediction.T, ytest, size.T]).T

            dfncp = pd.DataFrame(table, columns=header)

        else:
            underlying_model = RegressorAdapter(algorithm)
            nc = RegressorNc(underlying_model, AbsErrorErrFunc())
            icp = IcpRegressor(nc)
            icp.fit(train, ytrain)
            icp.calibrate(calibrate, ycalibrate)

            # -----------------------------------------------------------------------------
            # Predict
            # -----------------------------------------------------------------------------
            prediction = icp.predict(test, significance=parameters.get('alpha_'))
            header = ['CP_lower', 'CP_upper', 'NetPosUsd', 'prediction']
            size = prediction[:, 1] / 2 + prediction[:, 0] / 2

            prediction = prediction * s + m
            ytest = ytest * s + m
            size = size * s + m

            table = np.vstack([prediction.T, ytest, size.T]).T

            dfncp = pd.DataFrame(table, columns=header)

        if i == 0:
            dfncp.to_csv(
                parameters.get("WhichCP") + '_' + parameters.get('algorithm') + '_' + str(
                    np.round(parameters.get('alpha_') * 100).astype(int)) + '_' + 'calibrationwindow' + str(
                    parameters.get('calibration_size')) + '.csv',
                encoding='utf-8', index=False)
        else:
            dfncp.to_csv(
                parameters.get("WhichCP") + '_' + parameters.get('algorithm') + '_' + str(
                    np.round(parameters.get('alpha_') * 100).astype(int)) + '_' + 'calibrationwindow' + str(
                    parameters.get('calibration_size')) + '.csv', mode='a',
                header=False, index=False)

        del algorithm
Beispiel #18
0
idx = np.random.permutation(data.target.size)
train = idx[:int(2 * idx.size / 3)]
test = idx[int(2 * idx.size / 3):]

truth = data.target[test]
columns = ['min', 'max', 'truth']
significance = 0.1

# -----------------------------------------------------------------------------
# Define models
# -----------------------------------------------------------------------------

models = {
    'ACP-RandomSubSampler':
    AggregatedCp(
        IcpRegressor(RegressorNc(RegressorAdapter(DecisionTreeRegressor()))),
        RandomSubSampler()),
    'ACP-CrossSampler':
    AggregatedCp(
        IcpRegressor(RegressorNc(RegressorAdapter(DecisionTreeRegressor()))),
        CrossSampler()),
    'ACP-BootstrapSampler':
    AggregatedCp(
        IcpRegressor(RegressorNc(RegressorAdapter(DecisionTreeRegressor()))),
        BootstrapSampler())
}

# -----------------------------------------------------------------------------
# Train, predict and evaluate
# -----------------------------------------------------------------------------
for name, model in models.iteritems():
Beispiel #19
0
Datei: RF.py Projekt: e7dal/flame
    def build(self):
        '''Build a new RF model with the X and Y numpy matrices '''

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))
        results.append(('model', 'model type', 'RF'))

        conformal = self.param.getVal('conformal')
        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):

            LOG.info("Optimizing RF estimator")

            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.estimator = RandomForestRegressor(
                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    # results.append(('model','model type','RF quantitative (optimized)'))
                else:
                    self.estimator = RandomForestClassifier(
                        **self.estimator_parameters)
                    self.optimize(X, Y, self.estimator, self.tune_parameters)
                    # results.append(('model','model type','RF qualitative (optimized)'))

            except Exception as e:
                return False, f'Exception optimizing RF estimator with exception {e}'

        else:
            try:
                if self.param.getVal('quantitative'):

                    self.estimator = RandomForestRegressor(
                        **self.estimator_parameters)

                    if not conformal:
                        LOG.info("Building Quantitative RF model")
                        # results.append(('model', 'model type', 'RF quantitative'))
                else:

                    self.estimator = RandomForestClassifier(
                        **self.estimator_parameters)

                    if not conformal:
                        LOG.info("Building Qualitative RF model")
                        # results.append(('model', 'model type', 'RF qualitative'))

                self.estimator.fit(X, Y)

            except Exception as e:
                return False, f'Exception building RF estimator with exception {e}'

        if not conformal:
            return True, results

        self.estimator_temp = copy(self.estimator)

        # Create the conformal estimator
        try:
            # Conformal regressor
            if self.param.getVal('quantitative'):
                conformal_settings = self.param.getDict('conformal_settings')
                LOG.info("Building conformal Quantitative RF model")

                underlying_model = RegressorAdapter(self.estimator_temp)
                self.normalizing_model = RegressorAdapter(
                    KNeighborsRegressor(
                        n_neighbors=conformal_settings['KNN_NN']))
                # normalizing_model = RegressorAdapter(self.estimator_temp)
                normalizer = RegressorNormalizer(underlying_model,
                                                 copy(self.normalizing_model),
                                                 AbsErrorErrFunc())
                nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                 normalizer)

                # self.conformal_pred = AggregatedCp(IcpRegressor
                # (RegressorNc(RegressorAdapter(self.estimator))),
                #                                   BootstrapSampler())

                self.estimator = AggregatedCp(IcpRegressor(nc),
                                              BootstrapSampler())

                self.estimator.fit(X, Y)
                # results.append(('model', 'model type', 'conformal RF quantitative'))

            # Conformal classifier
            else:

                LOG.info("Building conformal Qualitative RF model")

                self.estimator = AggregatedCp(
                    IcpClassifier(
                        ClassifierNc(ClassifierAdapter(self.estimator_temp),
                                     MarginErrFunc())), BootstrapSampler())

                # Fit estimator to the data
                self.estimator.fit(X, Y)
                # results.append(('model', 'model type', 'conformal RF qualitative'))

        except Exception as e:
            return False, f'Exception building conformal RF estimator with exception {e}'

        return True, results


## Overriding of parent methods

# def CF_quantitative_validation(self):
#     ''' performs validation for conformal quantitative models '''

# def CF_qualitative_validation(self):
#     ''' performs validation for conformal qualitative models '''

# def quantitativeValidation(self):
#     ''' performs validation for quantitative models '''

# def qualitativeValidation(self):
#     ''' performs validation for qualitative models '''

# def validate(self):
#     ''' Validates the model and computes suitable model quality scoring values'''

# def optimize(self, X, Y, estimator, tune_parameters):
#     ''' optimizes a model using a grid search over a range of values for diverse parameters'''

# def regularProject(self, Xb, results):
#     ''' projects a collection of query objects in a regular model, for obtaining predictions '''

# def conformalProject(self, Xb, results):
#     ''' projects a collection of query objects in a conformal model, for obtaining predictions '''

# def project(self, Xb, results):
#     ''' Uses the X matrix provided as argument to predict Y'''
Beispiel #20
0
                         iterations=5,
                         folds=5,
                         scoring_funcs=[class_mean_errors, class_avg_c],
                         significance_levels=[0.05, 0.1, 0.2])

print('Classification: iris')
scores = scores.drop(['fold', 'iter'], axis=1)
print(scores.groupby(['significance']).mean())

# -----------------------------------------------------------------------------
# Regression, absolute error
# -----------------------------------------------------------------------------
data = load_diabetes()

icp = IcpRegressor(
    RegressorNc(RegressorAdapter(RandomForestRegressor(n_estimators=100)),
                AbsErrorErrFunc()))
icp_cv = RegIcpCvHelper(icp)

scores = cross_val_score(icp_cv,
                         data.data,
                         data.target,
                         iterations=5,
                         folds=5,
                         scoring_funcs=[reg_mean_errors, reg_median_size],
                         significance_levels=[0.05, 0.1, 0.2])

print('Absolute error regression: diabetes')
scores = scores.drop(['fold', 'iter'], axis=1)
print(scores.groupby(['significance']).mean())
Beispiel #21
0
    def test_cross_validation(self):
        # -----------------------------------------------------------------------------
        # Classification
        # -----------------------------------------------------------------------------
        data = load_iris()

        icp = IcpClassifier(
            ClassifierNc(
                ClassifierAdapter(RandomForestClassifier(n_estimators=100)),
                MarginErrFunc()))
        icp_cv = ClassIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[class_mean_errors, class_avg_c],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Classification: iris")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, absolute error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        icp = IcpRegressor(
            RegressorNc(
                RegressorAdapter(RandomForestRegressor(n_estimators=100)),
                AbsErrorErrFunc()))
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Absolute error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, normalized absolute error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        underlying_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer = RegressorNormalizer(underlying_model, normalizer_model,
                                         AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer)

        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Normalized absolute error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, normalized signed error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        icp = IcpRegressor(
            RegressorNc(
                RegressorAdapter(RandomForestRegressor(n_estimators=100)),
                SignErrorErrFunc()))
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Signed error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())

        # -----------------------------------------------------------------------------
        # Regression, signed error
        # -----------------------------------------------------------------------------
        data = load_diabetes()

        underlying_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))
        normalizer_model = RegressorAdapter(
            RandomForestRegressor(n_estimators=100))

        # The normalization model can use a different error function than is
        # used to measure errors on the underlying model
        normalizer = RegressorNormalizer(underlying_model, normalizer_model,
                                         AbsErrorErrFunc())
        nc = RegressorNc(underlying_model, SignErrorErrFunc(), normalizer)

        icp = IcpRegressor(nc)
        icp_cv = RegIcpCvHelper(icp)

        scores = cross_val_score(
            icp_cv,
            data.data,
            data.target,
            iterations=5,
            folds=5,
            scoring_funcs=[reg_mean_errors, reg_median_size],
            significance_levels=[0.05, 0.1, 0.2],
        )

        print("Normalized signed error regression: diabetes")
        scores = scores.drop(["fold", "iter"], axis=1)
        print(scores.groupby(["significance"]).mean())
Beispiel #22
0
    def build(self):
        '''Build a new SVM model with the X and Y numpy matrices'''

        # Make a copy of data matrices
        X = self.X.copy()
        Y = self.Y.copy()

        results = []
        results.append(('nobj', 'number of objects', self.nobj))
        results.append(('nvarx', 'number of predictor variables', self.nvarx))

        # If tune then call gridsearch to optimize the estimator
        if self.param.getVal('tune'):
            try:
                # Check type of model
                if self.param.getVal('quantitative'):
                    self.optimize(X, Y, svm.SVR(**self.estimator_parameters),
                                  self.tune_parameters)
                    results.append(('model', 'model type',
                                    'SVM quantitative (optimized)'))

                else:
                    self.optimize(X, Y, svm.SVC(**self.estimator_parameters),
                                  self.tune_parameters)
                    results.append(
                        ('model', 'model type', 'SVM qualitative (optimized)'))
                LOG.debug('SVM estimator optimized')
            except Exception as e:
                LOG.error(f'Exception optimizing SVM'
                          f'estimator with exception {e}')
        else:
            try:
                LOG.info("Building  SVM model")
                if self.param.getVal('quantitative'):
                    LOG.info("Building Quantitative SVM-R model")
                    self.estimator = svm.SVR(**self.estimator_parameters)
                    results.append(('model', 'model type', 'SVM quantitative'))
                else:
                    self.estimator = svm.SVC(**self.estimator_parameters)
                    results.append(('model', 'model type', 'SVM qualitative'))
            except Exception as e:
                LOG.error(f'Exception building SVM'
                          f'estimator with exception {e}')
        self.estimator.fit(X, Y)
        self.estimator_temp = copy(self.estimator)
        if self.param.getVal('conformal'):
            try:
                LOG.info("Building aggregated conformal SVM model")
                if self.param.getVal('quantitative'):
                    underlying_model = RegressorAdapter(self.estimator_temp)
                    # normalizing_model = RegressorAdapter(
                    # KNeighborsRegressor(n_neighbors=5))
                    normalizing_model = RegressorAdapter(self.estimator_temp)
                    normalizer = RegressorNormalizer(underlying_model,
                                                     normalizing_model,
                                                     AbsErrorErrFunc())
                    nc = RegressorNc(underlying_model, AbsErrorErrFunc(),
                                     normalizer)
                    # self.conformal_pred = AggregatedCp(IcpRegressor(
                    # RegressorNc(RegressorAdapter(self.estimator))),
                    #                                   BootstrapSampler())

                    self.estimator = AggregatedCp(IcpRegressor(nc),
                                                  BootstrapSampler())
                    self.estimator.fit(X, Y)
                    # overrides non-conformal
                    results.append(
                        ('model', 'model type', 'conformal SVM quantitative'))

                else:
                    self.estimator = AggregatedCp(
                        IcpClassifier(
                            ClassifierNc(
                                ClassifierAdapter(self.estimator_temp),
                                MarginErrFunc())), BootstrapSampler())
                    self.estimator.fit(X, Y)
                    # overrides non-conformal
                    results.append(
                        ('model', 'model type', 'conformal SVM qualitative'))
            except Exception as e:
                LOG.error(f'Exception building aggregated conformal SVM '
                          f'estimator with exception {e}')
        # Fit estimator to the data
        return True, results
                         iterations=5,
                         folds=5,
                         scoring_funcs=[class_mean_errors, class_avg_c],
                         significance_levels=[0.05, 0.1, 0.2])

print('Classification: iris')
scores = scores.drop(['fold', 'iter'], axis=1)
print(scores.groupby(['significance']).mean())

# -----------------------------------------------------------------------------
# Regression, absolute error
# -----------------------------------------------------------------------------
data = load_diabetes()

icp = IcpRegressor(
    RegressorNc(RegressorAdapter(RandomForestRegressor(n_estimators=100)),
                AbsErrorErrFunc()))
icp_cv = RegIcpCvHelper(icp)

scores = cross_val_score(icp_cv,
                         data.data,
                         data.target,
                         iterations=5,
                         folds=5,
                         scoring_funcs=[reg_mean_errors, reg_median_size],
                         significance_levels=[0.05, 0.1, 0.2])

print('Absolute error regression: diabetes')
scores = scores.drop(['fold', 'iter'], axis=1)
print(scores.groupby(['significance']).mean())