def predict(self, X, return_probabilities=False): """Predict the targets for a data matrix X. Arguments: X {array} -- The data matrix. Keyword Arguments: return_probabilities {bool} -- Whether to return a tuple, where the second entry is the true network output (default: {False}) Returns: result -- The predicted targets. """ # run predict pipeline X, = self.check_data_array_types(X) autonet_config = self.get_current_autonet_config() Y_pred = self.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X)['Y'] # reverse one hot encoding if OneHotEncoding.get_name() in self.pipeline: OHE = self.pipeline[OneHotEncoding.get_name()] result = OHE.reverse_transform_y( Y_pred, OHE.fit_output['y_one_hot_encoder']) return result if not return_probabilities else (result, Y_pred) else: result = dict() result['Y'] = Y_pred return result if not return_probabilities else (result, Y_pred)
def score(self, X_test, Y_test, return_loss_value=False): """Calculate the sore on test data using the specified optimize_metric Arguments: X_test {array} -- The test data matrix. Y_test {array} -- The test targets. Returns: score -- The score for the test data. """ # run predict pipeline X_test, Y_test = self.check_data_array_types(X_test, Y_test) autonet_config = self.autonet_config or self.base_config self.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X_test) Y_pred = self.pipeline[ OptimizationAlgorithm.get_name()].predict_output['Y'] # one hot encode Y OHE = self.pipeline[OneHotEncoding.get_name()] Y_test = OHE.transform_y(Y_test, OHE.fit_output['y_one_hot_encoder']) metric = self.pipeline[ MetricSelector.get_name()].fit_output['optimize_metric'] if return_loss_value: return metric.get_loss_value(Y_pred, Y_test) return metric(Y_pred, Y_test)
def predict(self, X, return_probabilities=False, return_metric=False): # run predict pipeline X, = self.check_data_array_types(X) prediction = None autonet_config = self.get_current_autonet_config() identifiers_with_budget, weights = self.fit_result["ensemble"].identifiers_, self.fit_result["ensemble"].weights_ baseline_id2model = BaselineTrainer.identifiers_ens model_dirs = [os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + ".torch") for ident in identifiers_with_budget] # get data preprocessing pipeline for ident, weight in zip(identifiers_with_budget, weights): if weight==0: continue if ident[0]>=0: model_dir = os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + ".torch") logging.info("==> Inferring model model " + model_dir + ", adding preds with weight " + str(weight)) model = torch.load(model_dir) autonet_config["model"] = model current_prediction = self.trained_autonet.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X)['Y'] prediction = current_prediction if prediction is None else prediction + weight * current_prediction OHE = self.trained_autonet.pipeline[OneHotEncoding.get_name()] metric = self.trained_autonet.pipeline[MetricSelector.get_name()].fit_output['optimize_metric'] else: model_dir = os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + ".pkl") info_dir = os.path.join(self.autonet_config["result_logger_dir"], "models", str(ident) + "_info.pkl") logging.info("==> Inferring model model " + model_dir + ", adding preds with weight " + str(weight)) baseline_model = baseline_id2model[ident[0]]() baseline_model.load(model_dir, info_dir) current_prediction = baseline_model.predict(X_test=X, predict_proba=True) prediction = current_prediction if prediction is None else prediction + weight * current_prediction # reverse one hot encoding result = OHE.reverse_transform_y(prediction, OHE.fit_output['y_one_hot_encoder']) if not return_probabilities and not return_metric: return result result = [result] if return_probabilities: result.append(prediction) if return_metric: result.append(metric) return tuple(result) """
def _apply_default_pipeline_settings(pipeline): from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector from autoPyTorch.pipeline.nodes.train_node import TrainNode from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector from autoPyTorch.components.preprocessing.resampling import RandomOverSamplingWithReplacement, RandomUnderSamplingWithReplacement, SMOTE, \ TargetSizeStrategyAverageSample, TargetSizeStrategyDownsample, TargetSizeStrategyMedianSample, TargetSizeStrategyUpsample import torch.nn as nn from autoPyTorch.components.metrics.standard_metrics import accuracy from autoPyTorch.components.preprocessing.loss_weight_strategies import LossWeightStrategyWeighted AutoNetFeatureData._apply_default_pipeline_settings(pipeline) net_selector = pipeline[NetworkSelector.get_name()] net_selector.add_final_activation('softmax', nn.Softmax(1)) loss_selector = pipeline[LossModuleSelector.get_name()] loss_selector.add_loss_module('cross_entropy', nn.CrossEntropyLoss, None, True) loss_selector.add_loss_module('cross_entropy_weighted', nn.CrossEntropyLoss, LossWeightStrategyWeighted(), True) metric_selector = pipeline[MetricSelector.get_name()] metric_selector.add_metric('accuracy', accuracy) resample_selector = pipeline[ResamplingStrategySelector.get_name()] resample_selector.add_over_sampling_method( 'random', RandomOverSamplingWithReplacement) resample_selector.add_over_sampling_method('smote', SMOTE) resample_selector.add_under_sampling_method( 'random', RandomUnderSamplingWithReplacement) resample_selector.add_target_size_strategy('upsample', TargetSizeStrategyUpsample) resample_selector.add_target_size_strategy( 'downsample', TargetSizeStrategyDownsample) resample_selector.add_target_size_strategy( 'average', TargetSizeStrategyAverageSample) resample_selector.add_target_size_strategy( 'median', TargetSizeStrategyMedianSample) train_node = pipeline[TrainNode.get_name()] train_node.default_minimize_value = False cv = pipeline[CrossValidation.get_name()] cv.use_stratified_cv_split_default = True one_hot_encoding_node = pipeline[OneHotEncoding.get_name()] one_hot_encoding_node.encode_Y = True return pipeline
def get_default_pipeline(cls): from autoPyTorch.pipeline.base.pipeline import Pipeline from autoPyTorch.pipeline.nodes.autonet_settings import AutoNetSettings from autoPyTorch.pipeline.nodes.optimization_algorithm import OptimizationAlgorithm from autoPyTorch.pipeline.nodes.cross_validation import CrossValidation from autoPyTorch.pipeline.nodes.imputation import Imputation from autoPyTorch.pipeline.nodes.normalization_strategy_selector import NormalizationStrategySelector from autoPyTorch.pipeline.nodes.one_hot_encoding import OneHotEncoding from autoPyTorch.pipeline.nodes.preprocessor_selector import PreprocessorSelector from autoPyTorch.pipeline.nodes.resampling_strategy_selector import ResamplingStrategySelector from autoPyTorch.pipeline.nodes.embedding_selector import EmbeddingSelector from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector from autoPyTorch.pipeline.nodes.optimizer_selector import OptimizerSelector from autoPyTorch.pipeline.nodes.lr_scheduler_selector import LearningrateSchedulerSelector from autoPyTorch.pipeline.nodes.log_functions_selector import LogFunctionsSelector from autoPyTorch.pipeline.nodes.metric_selector import MetricSelector from autoPyTorch.pipeline.nodes.loss_module_selector import LossModuleSelector from autoPyTorch.pipeline.nodes.train_node import TrainNode # build the pipeline pipeline = Pipeline([ AutoNetSettings(), OptimizationAlgorithm([ CrossValidation([ Imputation(), NormalizationStrategySelector(), OneHotEncoding(), PreprocessorSelector(), ResamplingStrategySelector(), EmbeddingSelector(), NetworkSelector(), OptimizerSelector(), LearningrateSchedulerSelector(), LogFunctionsSelector(), MetricSelector(), LossModuleSelector(), TrainNode() ]) ]) ]) cls._apply_default_pipeline_settings(pipeline) return pipeline
def score(self, X_test, Y_test, return_loss_value=False): """Calculate the sore on test data using the specified optimize_metric Arguments: X_test {array} -- The test data matrix. Y_test {array} -- The test targets. Returns: score -- The score for the test data. """ # Update config if needed X_test, Y_test = self.check_data_array_types(X_test, Y_test) autonet_config = self.get_current_autonet_config() res = self.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X_test) if 'score' in res: # in case of default dataset like CIFAR10 - the pipeline will compute the score of the according pytorch test set return res['score'] Y_pred = res['Y'] # run predict pipeline #self.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X_test) #Y_pred = self.pipeline[OptimizationAlgorithm.get_name()].predict_output['Y'] # one hot encode Y try: OHE = self.pipeline[OneHotEncoding.get_name()] Y_test = OHE.transform_y(Y_test, OHE.fit_output['y_one_hot_encoder']) except: print("No one-hot encodig possible. Continuing without.") pass metric = self.pipeline[ MetricSelector.get_name()].fit_output['optimize_metric'] if return_loss_value: return metric.get_loss_value(Y_pred, Y_test) return metric(torch.from_numpy(Y_pred.astype(np.float32)), torch.from_numpy(Y_test.astype(np.float32)))
def score(self, X_test, Y_test): """Calculate the sore on test data using the specified train_metric Arguments: X_test {array} -- The test data matrix. Y_test {array} -- The test targets. Returns: score -- The score for the test data. """ # run predict pipeline self.pipeline.predict_pipeline(pipeline_config=self.autonet_config, X=X_test) Y_pred = self.pipeline[OptimizationAlgorithm.get_name()].predict_output['Y'] # one hot encode Y OHE = self.pipeline[OneHotEncoding.get_name()] Y_test = OHE.transform_y(Y_test, OHE.fit_output['y_one_hot_encoder']) metric = self.pipeline[MetricSelector.get_name()].fit_output['train_metric'] return metric(torch.from_numpy(Y_test), torch.from_numpy(Y_pred))
def predict(self, X, return_probabilities=False): """Predict the targets for a data matrix X. Arguments: X {array} -- The data matrix. Keyword Arguments: return_probabilities {bool} -- Whether to return a tuple, where the second entry is the true network output (default: {False}) Returns: result -- The predicted targets. """ # run predict pipeline self.pipeline.predict_pipeline(pipeline_config=self.autonet_config, X=X) Y_pred = self.pipeline[OptimizationAlgorithm.get_name()].predict_output['Y'] # reverse one hot encoding OHE = self.pipeline[OneHotEncoding.get_name()] result = OHE.reverse_transform_y(Y_pred, OHE.fit_output['y_one_hot_encoder']) return result if not return_probabilities else (result, Y_pred)
def predict(self, X, return_probabilities=False, return_metric=False): # run predict pipeline X, = self.check_data_array_types(X) prediction = None models_with_weights = self.fit_result["ensemble"].get_models_with_weights(self.trained_autonets) autonet_config = self.autonet_config or self.base_config for weight, autonet in models_with_weights: current_prediction = autonet.pipeline.predict_pipeline(pipeline_config=autonet_config, X=X)["Y"] prediction = current_prediction if prediction is None else prediction + weight * current_prediction OHE = autonet.pipeline[OneHotEncoding.get_name()] metric = autonet.pipeline[MetricSelector.get_name()].fit_output['optimize_metric'] # reverse one hot encoding result = OHE.reverse_transform_y(prediction, OHE.fit_output['y_one_hot_encoder']) if not return_probabilities and not return_metric: return result result = [result] if return_probabilities: result.append(prediction) if return_metric: result.append(metric) return tuple(result)
def score(self, X_test, Y_test): # run predict pipeline X_test, Y_test = self.check_data_array_types(X_test, Y_test) _, Y_pred, metric = self.predict(X_test, return_probabilities=True, return_metric=True) Y_test, _ = self.pipeline[OneHotEncoding.get_name()].complete_y_tranformation(Y_test) return metric(Y_pred, Y_test)