def test_icp_regression_tree(self): # ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_boston() idx = np.random.permutation(data.target.size) train = idx[:int(idx.size / 3)] calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] # ----------------------------------------------------------------------------- # Without normalization # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- underlying_model = RegressorAdapter( DecisionTreeRegressor(min_samples_leaf=5)) nc = RegressorNc(underlying_model, AbsErrorErrFunc()) icp = IcpRegressor(nc) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.1) header = ["min", "max", "truth", "size"] size = prediction[:, 1] - prediction[:, 0] table = np.vstack([prediction.T, data.target[test], size.T]).T df = pd.DataFrame(table, columns=header) print(df) # ----------------------------------------------------------------------------- # With normalization # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- underlying_model = RegressorAdapter( DecisionTreeRegressor(min_samples_leaf=5)) normalizing_model = RegressorAdapter( KNeighborsRegressor(n_neighbors=1)) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) icp = IcpRegressor(nc) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.1) header = ["min", "max", "truth", "size"] size = prediction[:, 1] - prediction[:, 0] table = np.vstack([prediction.T, data.target[test], size.T]).T df = pd.DataFrame(table, columns=header) print(df)
def build(self): if not self.quantitative: print("PLSR only applies to quantitative data") return False, "PLSR only applies to quantitative data" if self.failed: return False, "Error initiating model" X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) if self.cv: self.cv = getCrossVal(self.cv, 46, self.n, self.p) if self.tune: if self.optimiz == 'auto': super(PLSR, self).optimize(X, Y, PLS_r( **self.estimator_parameters), self.tune_parameters) elif self.optimiz == 'manual': self.optimize(X, Y, PLS_r( **self.estimator_parameters), self.tune_parameters) results.append( ('model', 'model type', 'PLSR quantitative (optimized)')) else: print("Building Quantitative PLSR") self.estimator = PLS_r(**self.estimator_parameters) results.append(('model', 'model type', 'PLSR quantitative')) if self.conformal: underlying_model = RegressorAdapter(self.estimator) normalizing_model = RegressorAdapter( KNeighborsRegressor(n_neighbors=1)) normalizing_model = RegressorAdapter(self.estimator) normalizer = RegressorNormalizer( underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) self.conformal_pred = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) # self.conformal_pred = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.conformal_pred.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal PLSR quantitative')) self.estimator.fit(X, Y) return True, results
def test_acp_regression_tree(self): # ----------------------------------------------------------------------------- # Experiment setup # ----------------------------------------------------------------------------- data = load_diabetes() idx = np.random.permutation(data.target.size) train = idx[:int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] truth = data.target[test] columns = ["min", "max", "truth"] significance = 0.1 # ----------------------------------------------------------------------------- # Define models # ----------------------------------------------------------------------------- models = { "ACP-RandomSubSampler": AggregatedCp( IcpRegressor( RegressorNc(RegressorAdapter(DecisionTreeRegressor()))), RandomSubSampler(), ), "ACP-CrossSampler": AggregatedCp( IcpRegressor( RegressorNc(RegressorAdapter(DecisionTreeRegressor()))), CrossSampler(), ), "ACP-BootstrapSampler": AggregatedCp( IcpRegressor( RegressorNc(RegressorAdapter(DecisionTreeRegressor()))), BootstrapSampler(), ), } # ----------------------------------------------------------------------------- # Train, predict and evaluate # ----------------------------------------------------------------------------- for name, model in models.items(): model.fit(data.data[train, :], data.target[train]) prediction = model.predict(data.data[test, :]) prediction_sign = model.predict(data.data[test, :], significance=significance) table = np.vstack((prediction_sign.T, truth)).T df = pd.DataFrame(table, columns=columns) print("\n{}".format(name)) print("Error rate: {}".format( reg_mean_errors(prediction, truth, significance))) print(df)
def create_nc(model, err_func=None, normalizer_model=None, oob=False, fit_params=None, fit_params_normalizer=None): if normalizer_model is not None: normalizer_adapter = RegressorAdapter(normalizer_model, fit_params_normalizer) else: normalizer_adapter = None if isinstance(model, sklearn.base.ClassifierMixin): err_func = MarginErrFunc() if err_func is None else err_func if oob: c = sklearn.base.clone(model) c.fit([[0], [1]], [0, 1]) if hasattr(c, 'oob_decision_function_'): adapter = OobClassifierAdapter(model, fit_params) else: raise AttributeError('Cannot use out-of-bag ' 'calibration with {}'.format( model.__class__.__name__)) else: adapter = ClassifierAdapter(model, fit_params) if normalizer_adapter is not None: normalizer = RegressorNormalizer(adapter, normalizer_adapter, err_func) return ClassifierNc(adapter, err_func, normalizer) else: return ClassifierNc(adapter, err_func) elif isinstance(model, sklearn.base.RegressorMixin): err_func = AbsErrorErrFunc() if err_func is None else err_func if oob: c = sklearn.base.clone(model) c.fit([[0], [1]], [0, 1]) if hasattr(c, 'oob_prediction_'): adapter = OobRegressorAdapter(model, fit_params) else: raise AttributeError('Cannot use out-of-bag ' 'calibration with {}'.format( model.__class__.__name__)) else: adapter = RegressorAdapter(model, fit_params) if normalizer_adapter is not None: normalizer = RegressorNormalizer(adapter, normalizer_adapter, err_func) return RegressorNc(adapter, err_func, normalizer) else: return RegressorNc(adapter, err_func)
def CF_quantitative_validation(self): ''' Performs internal validation for conformal quantitative models ''' # Make a copy of original matrices. X = self.X.copy() Y = self.Y.copy() # Number of external validations for the aggregated conformal estimator. seeds = [5, 7, 35] # Interval means for each aggregated conformal estimator (out of 3) interval_means = [] # Accuracies for each aggregated conformal estimator (out of 3) accuracies = [] results = [] try: for i in range(len(seeds)): # Generate training a test sets X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=0.25, random_state=i, shuffle=False) # Create the aggregated conformal regressor. conformal_pred = AggregatedCp( IcpRegressor(RegressorNc(RegressorAdapter( self.estimator))), BootstrapSampler()) # Fit conformal regressor to the data conformal_pred.fit(X_train, Y_train) # Perform prediction on test set prediction = conformal_pred.predict(X_test, self.conformalSignificance) # Add the n validation interval means interval_means.append( np.mean( np.abs(prediction[:, 0]) - np.abs(prediction[:, 1]))) Y_test = Y_test.reshape(-1, 1) # Get boolean mask of instances within the applicability domain. inside_interval = ((prediction[:, 0].reshape(-1, 1) < Y_test) & (prediction[:, 1].reshape(-1, 1) > Y_test)) # Compute the accuracy (number of instances within the AD). accuracy = np.sum(inside_interval) / len(Y_test) # Add validation result to the list of accuracies. accuracies.append(accuracy) except Exception as e: LOG.error(f'Quantitative conformal validation' f' failed with exception: {e}') raise e # Compute mean interval_means and accuracy. interval_means = np.mean(interval_means) accuracies = np.mean(accuracies) # Cut into two decimals. self.conformal_accuracy = float("{0:.2f}".format(accuracies)) self.conformal_mean_interval = float("{0:.2f}".format(interval_means)) #Add quality metrics to results. results.append(('Conformal_mean_interval', 'Conformal mean interval', self.conformal_mean_interval)) results.append(('Conformal_accuracy', 'Conformal accuracy', self.conformal_accuracy)) return True, (results, )
def CF_QuanVal(X, Y, estimator, conformalSignificance): print("Starting quantitative conformal prediction validation") icp = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(estimator))), BootstrapSampler()) # icp = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(estimator), # AbsErrorErrFunc(), RegressorNormalizer(estimator, # RegressorAdapter(copy.copy(estimator)), AbsErrorErrFunc())))) # icp_cv = RegIcpCvHelper(icp) # scores = conformal_cross_val_score(icp_cv, # X, # Y, # iterations=5, # folds=5, # scoring_funcs=[reg_mean_errors, reg_median_size, reg_mean_size], # significance_levels=[0.05, 0.1, 0.2, conformalSignificance]) icp.fit(X[:30], Y[:30]) prediction = icp.predict(X[30:]) prediction_sign = icp.predict(X[30:], significance=0.25) interval = prediction_sign[:, 0] - prediction_sign[:, 1] print(np.mean(interval)) print(interval) print("\n") print(prediction) print(prediction_sign) return (icp)
def CF_QuanCal(X, Y, estimator): # X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=0.20, random_state=42) acp = AggregatedCp( IcpRegressor( RegressorNc( RegressorAdapter(estimator), AbsErrorErrFunc(), RegressorNormalizer(estimator, copy.copy(estimator), AbsErrorErrFunc())), RandomSubSampler()), ) acp.fit(X, Y) # icp.calibrate(X_test, y_test) return acp
from nonconformist.base import RegressorAdapter from nonconformist.icp import IcpRegressor from nonconformist.nc import RegressorNc, AbsErrorErrFunc, SignErrorErrFunc # ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_boston() idx = np.random.permutation(data.target.size) train = idx[:int(idx.size / 3)] calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- icp = IcpRegressor( RegressorNc(RegressorAdapter(DecisionTreeRegressor()), SignErrorErrFunc())) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.05) header = np.array(['min', 'max', 'Truth']) table = np.vstack([prediction.T, data.target[test]]).T df = pd.DataFrame(np.vstack([header, table])) print(df)
def CF_quantitative_validation(self): ''' Performs internal validation for conformal quantitative models ''' # Make a copy of original matrices. X = self.X.copy() Y = self.Y.copy() info = [] kf = KFold(n_splits=self.param.getVal('ModelValidationN'), shuffle=True, random_state=46) # Copy Y vector to use it as template to assign predictions Y_pred = copy.copy(Y).tolist() try: for train_index, test_index in kf.split(X): # Generate training and test sets X_train, X_test = X[train_index], X[test_index] Y_train, Y_test = Y[train_index], Y[test_index] # Generate training a test sets # Create the aggregated conformal regressor. conformal_pred = AggregatedCp( IcpRegressor( RegressorNc(RegressorAdapter(self.estimator_temp))), BootstrapSampler()) # Fit conformal regressor to the data conformal_pred.fit(X_train, Y_train) # Perform prediction on test set prediction = conformal_pred.predict( X_test, self.param.getVal('conformalSignificance')) # Assign the prediction its original index for index, el in enumerate(test_index): Y_pred[el] = prediction[index] except Exception as e: LOG.error(f'Quantitative conformal validation' f' failed with exception: {e}') raise e Y_pred = np.asarray(Y_pred) # Add the n validation interval means interval_mean = np.mean(np.abs((Y_pred[:, 0]) - (Y_pred[:, 1]))) # Get boolean mask of instances # within the applicability domain. inside_interval = ((Y_pred[:, 0].reshape(-1, 1) < Y) & (Y_pred[:, 1].reshape(-1, 1) > Y)) # Compute the accuracy (number of instances within the AD). accuracy = np.sum(inside_interval) / len(Y) # Cut into two decimals. self.conformal_interval_medians = (np.mean(Y_pred, axis=1)) self.conformal_accuracy = float("{0:.2f}".format(accuracy)) self.conformal_mean_interval = float("{0:.2f}".format(interval_mean)) #Add quality metrics to results. info.append(('Conformal_mean_interval', 'Conformal mean interval', self.conformal_mean_interval)) info.append(('Conformal_accuracy', 'Conformal accuracy', self.conformal_accuracy)) info.append( ('Conformal_interval_medians', 'Conformal interval medians', self.conformal_interval_medians)) info.append(('Conformal_prediction_ranges', 'Conformal prediction ranges', Y_pred)) results = {} results['quality'] = info return True, results
def build(self): '''Build a new RF model with the X and Y numpy matrices ''' if self.failed: return False X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) if self.cv: self.cv = getCrossVal(self.cv, self.estimator_parameters["random_state"], self.n, self.p) if self.tune: if self.quantitative: self.optimize(X, Y, RandomForestRegressor(), self.tune_parameters) results.append( ('model', 'model type', 'RF quantitative (optimized)')) else: self.optimize(X, Y, RandomForestClassifier(), self.tune_parameters) results.append( ('model', 'model type', 'RF qualitative (optimized)')) else: if self.quantitative: log.info("Building Quantitative RF model") self.estimator_parameters.pop('class_weight', None) self.estimator = RandomForestRegressor( **self.estimator_parameters) results.append(('model', 'model type', 'RF quantitative')) else: log.info("Building Qualitative RF model") self.estimator = RandomForestClassifier( **self.estimator_parameters) results.append(('model', 'model type', 'RF qualitative')) if self.conformal: if self.quantitative: underlying_model = RegressorAdapter(self.estimator) normalizing_model = RegressorAdapter( KNeighborsRegressor(n_neighbors=5)) normalizing_model = RegressorAdapter(self.estimator) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor(RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.conformal_pred = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.conformal_pred.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal RF quantitative')) else: self.conformal_pred = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator), MarginErrFunc())), BootstrapSampler()) self.conformal_pred.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal RF qualitative')) self.estimator.fit(X, Y) return True, results #### Overriding of parent methods # def CF_quantitative_validation(self): # ''' performs validation for conformal quantitative models ''' # def CF_qualitative_validation(self): # ''' performs validation for conformal qualitative models ''' # def quantitativeValidation(self): # ''' performs validation for quantitative models ''' # def qualitativeValidation(self): # ''' performs validation for qualitative models ''' # def validate(self): # ''' Validates the model and computes suitable model quality scoring values''' # def optimize(self, X, Y, estimator, tune_parameters): # ''' optimizes a model using a grid search over a range of values for diverse parameters''' # def regularProject(self, Xb, results): # ''' projects a collection of query objects in a regular model, for obtaining predictions ''' # def conformalProject(self, Xb, results): # ''' projects a collection of query objects in a conformal model, for obtaining predictions ''' # def project(self, Xb, results): # ''' Uses the X matrix provided as argument to predict Y'''
def build(self): # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) if self.param.getVal('tune'): # Optimize estimator using sklearn-gridsearch if self.estimator_parameters['optimize'] == 'auto': try: LOG.info('Optimizing PLSR using SK-LearnGridSearch') # Remove optimize key from parameter dictionary # to avoid sklearn estimator error (unexpected keyword) self.estimator_parameters.pop("optimize") super(PLSR, self).optimize(X, Y, PLS_r( **self.estimator_parameters), self.param.getDict('PLSR_optimize')) except Exception as e: LOG.error(f'Error performing SK-LearnGridSearch' f' on PLSR estimator with exception {e}') return False, f'Error performing SK-LearnGridSearch on PLSR estimator with exception {e}' # Optimize using flame implementation (recommended) elif self.estimator_parameters['optimize'] == 'manual': LOG.info('Optimizing PLSR using manual method') # Remove optimize key from parameter dictionary # to avoid sklearn estimator error (unexpected keyword) self.estimator_parameters.pop("optimize") success, message = self.optimize(X, Y, PLS_r( **self.estimator_parameters), self.param.getDict('PLSR_optimize')) if not success: return False, message else: LOG.error('Type of tune not recognized, check the input') return False, 'Type of tune not recognized, check the input' results.append(('model', 'model type', 'PLSR quantitative (optimized)')) else: LOG.info('Building Quantitative PLSR with no optimization') try: # Remove optimize key from parameters to avoid error self.estimator_parameters.pop("optimize") # as the sklearn estimator does not have this key self.estimator = PLS_r(**self.estimator_parameters) except Exception as e: LOG.error(f'Error at PLS_r instantiation with ' f'exception {e}') return False, f'Error at PLS_da instantiation with exception {e}' results.append(('model', 'model type', 'PLSR quantitative')) # Fit estimator to the data self.estimator.fit(X, Y) if not self.param.getVal('conformal'): return True, results self.estimator_temp = copy(self.estimator) try: LOG.info('Building PLSR aggregated conformal predictor') underlying_model = RegressorAdapter(self.estimator_temp) # normalizing_model = RegressorAdapter( # KNeighborsRegressor(n_neighbors=1)) normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) except Exception as e: LOG.error(f'Error building aggregated PLSR conformal' f' regressor with exception: {e}') return False, f'Error building aggregated PLSR conformal regressor with exception: {e}' # self.conformal_pred = AggregatedCp(IcpRegressor( # RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) # Fit conformal estimator to the data self.estimator.fit(X, Y) # overrides non-conformal results.append(('model', 'model type', 'conformal PLSR quantitative')) return True, results
# ----------------------------------------------------------------------------- # Setup training, calibration and test indices # ----------------------------------------------------------------------------- data = load_boston() idx = np.random.permutation(data.target.size) train = idx[:int(idx.size / 3)] calibrate = idx[int(idx.size / 3):int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] # ----------------------------------------------------------------------------- # Without normalization # ----------------------------------------------------------------------------- # Train and calibrate # ----------------------------------------------------------------------------- underlying_model = RegressorAdapter(DecisionTreeRegressor(min_samples_leaf=5)) nc = RegressorNc(underlying_model, AbsErrorErrFunc()) icp = IcpRegressor(nc) icp.fit(data.data[train, :], data.target[train]) icp.calibrate(data.data[calibrate, :], data.target[calibrate]) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(data.data[test, :], significance=0.1) header = ['min','max','truth','size'] size = prediction[:, 1] - prediction[:, 0] table = np.vstack([prediction.T, data.target[test], size.T]).T df = pd.DataFrame(table, columns=header) print(df)
def build(self): '''Build a new XGBOOST model with the X and Y numpy matrices ''' try: from xgboost.sklearn import XGBClassifier from xgboost.sklearn import XGBRegressor except Exception as e: return False, 'XGboost not found, please revise your environment' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): LOG.info("Optimizing XGBOOST estimator") try: # Check type of model if self.param.getVal('quantitative'): self.estimator = XGBRegressor( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) results.append(('model','model type','XGBOOST quantitative (optimized)')) else: self.estimator = XGBClassifier( **self.estimator_parameters) params = self.estimator.get_params() params['num_class'] = 2 self.optimize(X, Y, self.estimator, self.tune_parameters) results.append(('model','model type','XGBOOST qualitative (optimized)')) except Exception as e: return False, f'Exception optimizing XGBOOST estimator with exception {e}' else: try: if self.param.getVal('quantitative'): LOG.info("Building Quantitative XGBOOST model") # params = { # 'objective': 'reg:squarederror', # 'missing': -99.99999, # # 'max_depth': 20, # # 'learning_rate': 1.0, # # 'silent': 1, # # 'n_estimators': 25 # } # self.estimator = XGBRegressor(**params) self.estimator = XGBRegressor(**self.estimator_parameters) results.append(('model', 'model type', 'XGBOOST quantitative')) else: LOG.info("Building Qualitative XGBOOST model") # params = { # 'objective': 'binary:logistic', # 'max_depth': 3, # #'learning_rate': 0.7, # #'silent': 1, # 'n_estimators': 100 # } self.estimator = XGBClassifier(**self.estimator_parameters) results.append(('model', 'model type', 'XGBOOST qualitative')) self.estimator.fit(X, Y) print(self.estimator) except Exception as e: raise e return False, f'Exception building XGBOOST estimator with exception {e}' self.estimator_temp = copy(self.estimator) if not self.param.getVal('conformal'): return True, results # Create the conformal estimator try: # Conformal regressor if self.param.getVal('quantitative'): LOG.info("Building conformal Quantitative XGBOOST model") underlying_model = RegressorAdapter(self.estimator_temp) #normalizing_model = RegressorAdapter( #KNeighborsRegressor(n_neighbors=5)) normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer( underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor # (RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) results.append(('model', 'model type', 'conformal XGBOOST quantitative')) # Conformal classifier else: LOG.info("Building conformal Qualitative XGBOOST model") self.estimator = AggregatedCp( IcpClassifier( ClassifierNc( ClassifierAdapter(self.estimator_temp), MarginErrFunc() ) ), BootstrapSampler()) # Fit estimator to the data self.estimator.fit(X, Y) results.append(('model', 'model type', 'conformal XGBOOST qualitative')) except Exception as e: raise e return False, f'Exception building conformal XGBOOST estimator with exception {e}' return True, results ## Overriding of parent methods # def CF_quantitative_validation(self): # ''' performs validation for conformal quantitative models ''' # def CF_qualitative_validation(self): # ''' performs validation for conformal qualitative models ''' # def quantitativeValidation(self): # ''' performs validation for quantitative models ''' # def qualitativeValidation(self): # ''' performs validation for qualitative models ''' # def validate(self): # ''' Validates the model and computes suitable model quality scoring values''' # def optimize(self, X, Y, estimator, tune_parameters): # ''' optimizes a model using a grid search over a range of values for diverse parameters''' # def regularProject(self, Xb, results): # ''' projects a collection of query objects in a regular model, for obtaining predictions ''' # def conformalProject(self, Xb, results): # ''' projects a collection of query objects in a conformal model, for obtaining predictions ''' # def project(self, Xb, results): # ''' Uses the X matrix provided as argument to predict Y'''
def cv(df, parameters): end = len(df) - 120 out = np.zeros(3) out2 = np.zeros(3) p = parameters.copy() p.pop('algorithm') p.pop('randomized_calibration') p.pop('alpha_') if parameters.get('algorithm') == 'RandomForest': algorithm = RandomForestRegressor(**p) d = {'n_estimators': parameters.get('n_estimators'), "criterion": parameters.get("criterion"), "max_features": parameters.get("max_features"), "min_samples_split": parameters.get("min_samples_split"), "min_samples_leaf": parameters.get("min_samples_leaf") } if parameters.get('algorithm') == 'K-NearestNeighbours': algorithm = KNeighborsRegressor(**p) d = { 'n_neighbours': parameters.get('n_neighbours'), 'weights': parameters.get('weights'), 'metric': parameters.get('metric') } if parameters.get('algorithm') == 'LightGBM': algorithm = LGBMRegressor(**p) d = {"metric": parameters.get("metric"), "num_leaves": parameters.get('num_leaves'), "learning_rate": parameters.get('learning_rate'), "feature_fraction": parameters.get('feature_fraction'), "bagging_fraction": parameters.get('bagging_fraction'), "bagging_freq": parameters.get('bagging_freq'), } if parameters.get('algorithm') == 'LassoRegression': algorithm = Lasso(**p) d = {'alpha_': parameters.get('alpha_')} if parameters.get('algorithm') == 'NeuralNetwork': algorithm = NeuralNetworkAlgorithm(p) if parameters.get('algorithm') == 'LSTM': algorithm = BiLSTM(**p) d = {} d = p d['alpha_'] = parameters.get('alpha_') m, s = df['NetPosUsd'].mean(), df['NetPosUsd'].std() df=df.drop(['QdfTime' ], axis=1) mean = df.mean(axis=0) std = df.std(axis=0) df = (df - mean) / std for i, ratio in enumerate(([.5, 0.66, .84])): if parameters.get('randomized_calibration') == True: train_ = df.drop([ 'NetPosUsd'], axis=1).iloc[:int(end * ratio), :].values choose = np.random.choice(len(train_), int(end / 6), replace=False) calibrate = train_[choose, :] mask = np.ones(len(train_), dtype=bool) mask[choose] = False train = train_[mask, :] test = (df.drop([ 'NetPosUsd'], axis=1)).iloc[int(end * ratio):int(end * ratio) + int(end / 6), :].values ytrain_ = df['NetPosUsd'][:int(end * ratio)].values ycalibrate = ytrain_[choose] ytrain = ytrain_[mask] ytest = df['NetPosUsd'].iloc[int(end * ratio):int(end * ratio) + int(end / 6)] else: train = df.drop([ 'NetPosUsd'], axis=1).iloc[:int(end * ratio) - int(end / 6), :].values calibrate = df.drop([ 'NetPosUsd'], axis=1).iloc[int(end * ratio) - int(end / 6):int(end * ratio), :].values test = df.drop([ 'NetPosUsd'], axis=1).iloc[int(end * ratio):int(end * ratio) + int(end / 6), :].values ytrain = df['NetPosUsd'][:int(end * ratio) - int(end / 6)].values ycalibrate = df['NetPosUsd'][int(end * ratio) - int(end / 6):int(end * ratio)].values ytest = df['NetPosUsd'][int(end * ratio):int(end * ratio) + int(end / 6)].values # print(len(train),len(ytrain),len(calibrate),len(ycalibrate),len(test),len(ytest)) # Train and calibrate # ----------------------------------------------------------------------------- underlying_model = RegressorAdapter(algorithm) normalizing_model = RegressorAdapter(KNeighborsRegressor(n_neighbors=50)) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) icp = IcpRegressor(nc) icp.fit(train, ytrain) icp.calibrate(calibrate, ycalibrate) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(test, significance=parameters.get('alpha_')) header = ['NCP_lower', 'NCP_upper', 'NetPosUsd', 'prediction'] size = prediction[:, 1] / 2 + prediction[:, 0] / 2 prediction = prediction * s + m ytest = ytest * s + m size = size * s + m table = np.vstack([prediction.T, ytest, size.T]).T dfncp = pd.DataFrame(table, columns=header) underlying_model = RegressorAdapter(algorithm) nc = RegressorNc(underlying_model, AbsErrorErrFunc()) icp = IcpRegressor(nc) icp.fit(train, ytrain) icp.calibrate(calibrate, ycalibrate) prediction = icp.predict(test, significance=parameters.get('alpha_')) header = ['cp_lower', 'cp_upper'] prediction = prediction * s + m table = np.vstack([prediction.T]).T dfcp = pd.DataFrame(table, columns=header) dfncp['CP_lower'] = dfcp['cp_lower'] dfncp['CP_upper'] = dfcp['cp_upper'] out[i] = qd_objective(dfncp.NetPosUsd, dfncp['CP_lower'], dfncp['CP_upper'], parameters.get('alpha_')) out2[i] = qd_objective(dfncp.NetPosUsd, dfncp['NCP_lower'], dfncp['NCP_upper'], parameters.get('alpha_')) d['CP_loss'] = np.mean(out) d['NCP_loss'] = np.mean(out2) if os.path.exists(parameters.get('algorithm') + '_cv.csv') == True: pd.DataFrame(data=d, index=[0]).to_csv(parameters.get('algorithm') + '_cv.csv', mode='a', header=False, index=False) else: pd.DataFrame(data=d, index=[0]).to_csv(parameters.get('algorithm') + '_cv.csv', encoding='utf-8', index=False)
def train_and_test_cp_algo(i): window = 96 p = {'window': window} algorithm = BiLSTM(p) path = 'data\EURUSD_NETPOSUSD_hourly_for_regresion' + str(i) + '.csv' df = pd.read_csv(path).drop(['QdfTime', 'Unnamed: 0'], axis=1).fillna(0) y_raw_test = df.NetPosUsd[-120:] median_ = df.NetPosUsd.median() mad_ = mad(df.NetPosUsd.values) df.NetPosUsd = mlog_trans(df.NetPosUsd.values) # mean = df.NetPosUsd.mean() # std = df.NetPosUsd.std() # df.NetPosUsd = (df.NetPosUsd - mean) / std data = df.NetPosUsd.values def generate_index(window, data_matrix): ''' :return: ''' num_elements = data_matrix.shape[0] for start, stop in zip(range(0, num_elements - window, 1), range(window, num_elements, 1)): yield data_matrix[stop - window:stop].reshape((-1, 1)) cnt = [] for sequence in generate_index(window, data): cnt.append(sequence) cnt = np.array(cnt) X = cnt y = data[window:] X = X.reshape(X.shape[0], X.shape[1]) train_test_split = X.shape[0] - 120 - 3480 train = X[:train_test_split, :] calibrate = X[train_test_split:train_test_split + 3480, :] test = X[-120:] ytrain = y[:train_test_split] ycalibrate = y[train_test_split:train_test_split + 3480] ytest = y[-120:] underlying_model = RegressorAdapter(algorithm) normalizing_model = RegressorAdapter(KNeighborsRegressor(n_neighbors=50)) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) icp = IcpRegressor(nc) icp.fit(train, ytrain) icp.calibrate(calibrate, ycalibrate) underlying_model2 = RegressorAdapter(algorithm) nc2 = RegressorNc(underlying_model2, AbsErrorErrFunc()) icp2 = IcpRegressor(nc2) icp2.fit(train, ytrain) icp2.calibrate(calibrate, ycalibrate) for a in tqdm(np.linspace(5, 95, 19)): # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(test, significance=a / 100) header = ['NCP_lower', 'NCP_upper', 'NetPosUsd', 'prediction'] lower, upper = prediction[:, 0], prediction[:, 1] lower = mlog_inverse(lower, median_, mad_) upper = mlog_inverse(upper, median_, mad_) ytest = mlog_inverse(ytest, median_, mad_) # lower=lower*std+mean # upper=upper*std+mean # ytest=ytest*std+mean size = upper / 2 + lower / 2 table = np.vstack([lower, upper, y_raw_test, size.T]).T dfncp = pd.DataFrame(table, columns=header) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp2.predict(test, significance=a / 100) header = ['CP_lower', 'CP_upper', 'NetPosUsd', 'prediction'] lower, upper = prediction[:, 0], prediction[:, 1] lower = mlog_inverse(lower, median_, mad_) upper = mlog_inverse(upper, median_, mad_) ytest = mlog_inverse(ytest, median_, mad_) # lower=lower*std+mean # upper=upper*std+mean # ytest=ytest*std+mean size = upper / 2 + lower / 2 table = np.vstack([lower, upper, y_raw_test, size.T]).T dfcp = pd.DataFrame(table, columns=header) if i == 0: dfcp.to_csv( 'CP' + '_' + 'cudaLSTM' + '_' + str( np.round(a).astype(int)) + '_' + 'calibrationwindow' + str( 3480) + '.csv', encoding='utf-8', index=False) else: dfcp.to_csv( 'CP' + '_' + 'cudaLSTM' + '_' + str( np.round(a).astype(int)) + '_' + 'calibrationwindow' + str( 3480) + '.csv', mode='a', header=False, index=False) if i == 0: dfncp.to_csv( 'NCP' + '_' + 'cudaLSTM' + '_' + str( np.round(a).astype(int)) + '_' + 'calibrationwindow' + str( 3480) + '.csv', encoding='utf-8', index=False) else: dfncp.to_csv( 'NCP' + '_' + 'cudaLSTM' + '_' + str( np.round(a).astype(int)) + '_' + 'calibrationwindow' + str( 3480) + '.csv', mode='a', header=False, index=False)
def build(self): '''Build a new DL model with the X and Y numpy matrices ''' try: from keras.wrappers.scikit_learn import KerasClassifier from keras.wrappers.scikit_learn import KerasRegressor except Exception as e: return False, 'Keras not found, please revise your environment' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): LOG.info("Optimizing Keras estimator") try: # Check type of model if self.param.getVal('quantitative'): self.estimator = KerasRegressor( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) results.append(('model', 'model type', 'KERAS quantitative (optimized)')) else: self.estimator = KerasClassifier( **self.estimator_parameters) #params = self.estimator.get_params() #params['num_class'] = 2 self.optimize(X, Y, self.estimator, self.tune_parameters) results.append(('model', 'model type', 'KERAS qualitative (optimized)')) except Exception as e: return False, f'Exception optimizing KERAS estimator with exception {e}' else: try: if self.param.getVal('quantitative'): LOG.info("Building Quantitative KERAS mode") self.estimator = KerasRegressor( build_fn=self.create_model, **self.estimator_parameters, verbose=0) results.append( ('model', 'model type', 'Keras quantitative')) else: LOG.info("Building Qualitative Keras model") self.estimator = KerasClassifier( build_fn=self.create_model, dim=self.X.shape[1], **self.estimator_parameters, verbose=0) results.append( ('model', 'model type', 'Keras qualitative')) self.estimator.fit(X, Y) print(self.estimator) except Exception as e: raise e return False, f'Exception building Keras estimator with exception {e}' self.estimator_temp = clone(self.estimator) if not self.param.getVal('conformal'): return True, results # Create the conformal estimator try: # Conformal regressor if self.param.getVal('quantitative'): LOG.info("Building conformal Quantitative Keras model") underlying_model = RegressorAdapter(self.estimator_temp) normalizing_model = RegressorAdapter( KNeighborsRegressor(n_neighbors=15)) # normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor # (RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) results.append( ('model', 'model type', 'conformal Keras quantitative')) # Conformal classifier else: LOG.info("Building conformal Qualitative Keras model") self.estimator = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) # Fit estimator to the data print('build finished') self.estimator.fit(X, Y) results.append( ('model', 'model type', 'conformal Keras qualitative')) except Exception as e: raise e return False, f'Exception building conformal Keras estimator with exception {e}' return True, []
def train_and_test_cp_algo(parameters): p = parameters.copy() p.pop('algorithm') p.pop('randomized_calibration') p.pop('alpha_') p.pop('calibration_size') p.pop('WhichCP') for i in tqdm(range(29)): if parameters.get('algorithm') == 'RandomForest': algorithm = RandomForestRegressor(**p) if parameters.get('algorithm') == 'K-NearestNeighbours': algorithm = KNeighborsRegressor(**p) if parameters.get('algorithm') == 'LightGBM': algorithm = LGBMRegressor(**p) if parameters.get('algorithm') == 'LassoRegression': algorithm = Lasso(**p) if parameters.get('algorithm') == 'NeuralNetwork': algorithm = NeuralNetworkAlgorithm(p) if parameters.get('algorithm') == 'LSTM': algorithm = BiLSTM(**p) if parameters.get('algorithm') == 'GradientBoosting': algorithm =GradientBoostingRegressor(**p) path = 'data\EURUSD_NETPOSUSD_hourly_for_regresion' + str(i) + '.csv' df = pd.read_csv(path).drop(['Unnamed: 0','QdfTime'], axis=1).fillna(0) m, s = df['NetPosUsd'].mean(), df['NetPosUsd'].std() mean = df.mean(axis=0) std = df.std(axis=0) df = (df - mean) / std if parameters.get('randomized_calibration') == True: train_test_split = len(df) - 120 train_ = df.drop([ 'NetPosUsd'], axis=1).iloc[:train_test_split, :].values choose = np.random.choice(len(train_), parameters.get("calibration_size"), replace=False) calibrate = train_[choose, :] mask = np.ones(len(train_), dtype=bool) mask[choose] = False train = train_[mask, :] test = (df.drop([ 'NetPosUsd'], axis=1)).iloc[train_test_split:, :].values ytrain_ = df['NetPosUsd'][:train_test_split].values ycalibrate = ytrain_[choose] ytrain = ytrain_[mask] ytest = df['NetPosUsd'].iloc[train_test_split:] else: train_test_split = len(df) - 120 - parameters.get("calibration_size") train = df.drop([ 'NetPosUsd'], axis=1).iloc[:train_test_split, :].values calibrate = df.drop([ 'NetPosUsd'], axis=1).iloc[train_test_split:train_test_split + parameters.get("calibration_size"), :].values test = (df.drop([ 'NetPosUsd'], axis=1)).iloc[-120:,:].values ytrain = df['NetPosUsd'][:train_test_split].values ycalibrate = df['NetPosUsd'][train_test_split:train_test_split + parameters.get("calibration_size")] ytest = df['NetPosUsd'].iloc[-120:] if parameters.get("WhichCP") == 'NCP': underlying_model = RegressorAdapter(algorithm) normalizing_model = RegressorAdapter(KNeighborsRegressor(n_neighbors=50)) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) icp = IcpRegressor(nc) icp.fit(train, ytrain) icp.calibrate(calibrate, ycalibrate) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(test, significance=parameters.get('alpha_')) header = ['NCP_lower', 'NCP_upper', 'NetPosUsd', 'prediction'] size = prediction[:, 1] / 2 + prediction[:, 0] / 2 prediction=prediction*s+m ytest=ytest*s+m size=size*s+m table = np.vstack([prediction.T, ytest, size.T]).T dfncp = pd.DataFrame(table, columns=header) else: underlying_model = RegressorAdapter(algorithm) nc = RegressorNc(underlying_model, AbsErrorErrFunc()) icp = IcpRegressor(nc) icp.fit(train, ytrain) icp.calibrate(calibrate, ycalibrate) # ----------------------------------------------------------------------------- # Predict # ----------------------------------------------------------------------------- prediction = icp.predict(test, significance=parameters.get('alpha_')) header = ['CP_lower', 'CP_upper', 'NetPosUsd', 'prediction'] size = prediction[:, 1] / 2 + prediction[:, 0] / 2 prediction = prediction * s + m ytest = ytest * s + m size = size * s + m table = np.vstack([prediction.T, ytest, size.T]).T dfncp = pd.DataFrame(table, columns=header) if i == 0: dfncp.to_csv( parameters.get("WhichCP") + '_' + parameters.get('algorithm') + '_' + str( np.round(parameters.get('alpha_') * 100).astype(int)) + '_' + 'calibrationwindow' + str( parameters.get('calibration_size')) + '.csv', encoding='utf-8', index=False) else: dfncp.to_csv( parameters.get("WhichCP") + '_' + parameters.get('algorithm') + '_' + str( np.round(parameters.get('alpha_') * 100).astype(int)) + '_' + 'calibrationwindow' + str( parameters.get('calibration_size')) + '.csv', mode='a', header=False, index=False) del algorithm
idx = np.random.permutation(data.target.size) train = idx[:int(2 * idx.size / 3)] test = idx[int(2 * idx.size / 3):] truth = data.target[test] columns = ['min', 'max', 'truth'] significance = 0.1 # ----------------------------------------------------------------------------- # Define models # ----------------------------------------------------------------------------- models = { 'ACP-RandomSubSampler': AggregatedCp( IcpRegressor(RegressorNc(RegressorAdapter(DecisionTreeRegressor()))), RandomSubSampler()), 'ACP-CrossSampler': AggregatedCp( IcpRegressor(RegressorNc(RegressorAdapter(DecisionTreeRegressor()))), CrossSampler()), 'ACP-BootstrapSampler': AggregatedCp( IcpRegressor(RegressorNc(RegressorAdapter(DecisionTreeRegressor()))), BootstrapSampler()) } # ----------------------------------------------------------------------------- # Train, predict and evaluate # ----------------------------------------------------------------------------- for name, model in models.iteritems():
def build(self): '''Build a new RF model with the X and Y numpy matrices ''' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) results.append(('model', 'model type', 'RF')) conformal = self.param.getVal('conformal') # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): LOG.info("Optimizing RF estimator") try: # Check type of model if self.param.getVal('quantitative'): self.estimator = RandomForestRegressor( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) # results.append(('model','model type','RF quantitative (optimized)')) else: self.estimator = RandomForestClassifier( **self.estimator_parameters) self.optimize(X, Y, self.estimator, self.tune_parameters) # results.append(('model','model type','RF qualitative (optimized)')) except Exception as e: return False, f'Exception optimizing RF estimator with exception {e}' else: try: if self.param.getVal('quantitative'): self.estimator = RandomForestRegressor( **self.estimator_parameters) if not conformal: LOG.info("Building Quantitative RF model") # results.append(('model', 'model type', 'RF quantitative')) else: self.estimator = RandomForestClassifier( **self.estimator_parameters) if not conformal: LOG.info("Building Qualitative RF model") # results.append(('model', 'model type', 'RF qualitative')) self.estimator.fit(X, Y) except Exception as e: return False, f'Exception building RF estimator with exception {e}' if not conformal: return True, results self.estimator_temp = copy(self.estimator) # Create the conformal estimator try: # Conformal regressor if self.param.getVal('quantitative'): conformal_settings = self.param.getDict('conformal_settings') LOG.info("Building conformal Quantitative RF model") underlying_model = RegressorAdapter(self.estimator_temp) self.normalizing_model = RegressorAdapter( KNeighborsRegressor( n_neighbors=conformal_settings['KNN_NN'])) # normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer(underlying_model, copy(self.normalizing_model), AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor # (RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) # results.append(('model', 'model type', 'conformal RF quantitative')) # Conformal classifier else: LOG.info("Building conformal Qualitative RF model") self.estimator = AggregatedCp( IcpClassifier( ClassifierNc(ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) # Fit estimator to the data self.estimator.fit(X, Y) # results.append(('model', 'model type', 'conformal RF qualitative')) except Exception as e: return False, f'Exception building conformal RF estimator with exception {e}' return True, results ## Overriding of parent methods # def CF_quantitative_validation(self): # ''' performs validation for conformal quantitative models ''' # def CF_qualitative_validation(self): # ''' performs validation for conformal qualitative models ''' # def quantitativeValidation(self): # ''' performs validation for quantitative models ''' # def qualitativeValidation(self): # ''' performs validation for qualitative models ''' # def validate(self): # ''' Validates the model and computes suitable model quality scoring values''' # def optimize(self, X, Y, estimator, tune_parameters): # ''' optimizes a model using a grid search over a range of values for diverse parameters''' # def regularProject(self, Xb, results): # ''' projects a collection of query objects in a regular model, for obtaining predictions ''' # def conformalProject(self, Xb, results): # ''' projects a collection of query objects in a conformal model, for obtaining predictions ''' # def project(self, Xb, results): # ''' Uses the X matrix provided as argument to predict Y'''
iterations=5, folds=5, scoring_funcs=[class_mean_errors, class_avg_c], significance_levels=[0.05, 0.1, 0.2]) print('Classification: iris') scores = scores.drop(['fold', 'iter'], axis=1) print(scores.groupby(['significance']).mean()) # ----------------------------------------------------------------------------- # Regression, absolute error # ----------------------------------------------------------------------------- data = load_diabetes() icp = IcpRegressor( RegressorNc(RegressorAdapter(RandomForestRegressor(n_estimators=100)), AbsErrorErrFunc())) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score(icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2]) print('Absolute error regression: diabetes') scores = scores.drop(['fold', 'iter'], axis=1) print(scores.groupby(['significance']).mean())
def test_cross_validation(self): # ----------------------------------------------------------------------------- # Classification # ----------------------------------------------------------------------------- data = load_iris() icp = IcpClassifier( ClassifierNc( ClassifierAdapter(RandomForestClassifier(n_estimators=100)), MarginErrFunc())) icp_cv = ClassIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[class_mean_errors, class_avg_c], significance_levels=[0.05, 0.1, 0.2], ) print("Classification: iris") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, absolute error # ----------------------------------------------------------------------------- data = load_diabetes() icp = IcpRegressor( RegressorNc( RegressorAdapter(RandomForestRegressor(n_estimators=100)), AbsErrorErrFunc())) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Absolute error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, normalized absolute error # ----------------------------------------------------------------------------- data = load_diabetes() underlying_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) normalizer_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) normalizer = RegressorNormalizer(underlying_model, normalizer_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Normalized absolute error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, normalized signed error # ----------------------------------------------------------------------------- data = load_diabetes() icp = IcpRegressor( RegressorNc( RegressorAdapter(RandomForestRegressor(n_estimators=100)), SignErrorErrFunc())) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Signed error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean()) # ----------------------------------------------------------------------------- # Regression, signed error # ----------------------------------------------------------------------------- data = load_diabetes() underlying_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) normalizer_model = RegressorAdapter( RandomForestRegressor(n_estimators=100)) # The normalization model can use a different error function than is # used to measure errors on the underlying model normalizer = RegressorNormalizer(underlying_model, normalizer_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, SignErrorErrFunc(), normalizer) icp = IcpRegressor(nc) icp_cv = RegIcpCvHelper(icp) scores = cross_val_score( icp_cv, data.data, data.target, iterations=5, folds=5, scoring_funcs=[reg_mean_errors, reg_median_size], significance_levels=[0.05, 0.1, 0.2], ) print("Normalized signed error regression: diabetes") scores = scores.drop(["fold", "iter"], axis=1) print(scores.groupby(["significance"]).mean())
def build(self): '''Build a new SVM model with the X and Y numpy matrices''' # Make a copy of data matrices X = self.X.copy() Y = self.Y.copy() results = [] results.append(('nobj', 'number of objects', self.nobj)) results.append(('nvarx', 'number of predictor variables', self.nvarx)) # If tune then call gridsearch to optimize the estimator if self.param.getVal('tune'): try: # Check type of model if self.param.getVal('quantitative'): self.optimize(X, Y, svm.SVR(**self.estimator_parameters), self.tune_parameters) results.append(('model', 'model type', 'SVM quantitative (optimized)')) else: self.optimize(X, Y, svm.SVC(**self.estimator_parameters), self.tune_parameters) results.append( ('model', 'model type', 'SVM qualitative (optimized)')) LOG.debug('SVM estimator optimized') except Exception as e: LOG.error(f'Exception optimizing SVM' f'estimator with exception {e}') else: try: LOG.info("Building SVM model") if self.param.getVal('quantitative'): LOG.info("Building Quantitative SVM-R model") self.estimator = svm.SVR(**self.estimator_parameters) results.append(('model', 'model type', 'SVM quantitative')) else: self.estimator = svm.SVC(**self.estimator_parameters) results.append(('model', 'model type', 'SVM qualitative')) except Exception as e: LOG.error(f'Exception building SVM' f'estimator with exception {e}') self.estimator.fit(X, Y) self.estimator_temp = copy(self.estimator) if self.param.getVal('conformal'): try: LOG.info("Building aggregated conformal SVM model") if self.param.getVal('quantitative'): underlying_model = RegressorAdapter(self.estimator_temp) # normalizing_model = RegressorAdapter( # KNeighborsRegressor(n_neighbors=5)) normalizing_model = RegressorAdapter(self.estimator_temp) normalizer = RegressorNormalizer(underlying_model, normalizing_model, AbsErrorErrFunc()) nc = RegressorNc(underlying_model, AbsErrorErrFunc(), normalizer) # self.conformal_pred = AggregatedCp(IcpRegressor( # RegressorNc(RegressorAdapter(self.estimator))), # BootstrapSampler()) self.estimator = AggregatedCp(IcpRegressor(nc), BootstrapSampler()) self.estimator.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal SVM quantitative')) else: self.estimator = AggregatedCp( IcpClassifier( ClassifierNc( ClassifierAdapter(self.estimator_temp), MarginErrFunc())), BootstrapSampler()) self.estimator.fit(X, Y) # overrides non-conformal results.append( ('model', 'model type', 'conformal SVM qualitative')) except Exception as e: LOG.error(f'Exception building aggregated conformal SVM ' f'estimator with exception {e}') # Fit estimator to the data return True, results