Ejemplo n.º 1
0
 def __init__(self,
              data_set_name,
              metric=RootMeanSquaredError,
              models=None,
              ensembles=None,
              benchmark_id=None):
     """Initializes benchmark environment."""
     self.benchmark_id = benchmark_id
     self.data_set_name = data_set_name
     # Creates file name as combination of data set name and and date.
     self.file_name = self.data_set_name + "_" + self.benchmark_id + "__" + _now.strftime(
         "%Y_%m_%d__%H_%M_%S")
     # Loads samples into object.
     # self.samples = [load_samples(data_set_name, index) for index in range(10)]
     # self.samples = [load_samples_no_val(data_set_name, index) for index in range(10)] # changed from 30 , change back at the end
     self.samples = load_standardized_samples(data_set_name)
     self.metric = metric
     self.ensembles = ensembles
     self.models = models
     # If data set is classification problem, remove regression models. Else, vice versa.
     if is_classification(
             self.samples):  # original self.samples[0][0] new self.samples
         self.classification = True
         if 'mlpr_lbfgs' in self.models.keys():
             del self.models['mlpr_lbfgs']
         if 'mlpr_adam' in self.models.keys():
             del self.models['mlpr_adam']
         if 'mlpr_sgd' in self.models.keys():
             del self.models['mlpr_sgd']
     else:
         self.classification = False
         if 'mlpc_lbfgs' in self.models.keys():
             del self.models['mlpc_lbfgs']
         if 'mlpc_adam' in self.models.keys():
             del self.models['mlpc_adam']
         if 'mlpc_sgd' in self.models.keys():
             del self.models['mlpc_sgd']
     # if models = MLP, remove Random Independent Weighting
     if self.ensembles != None:
         if 'mlpc_lbfgs' in self.models.keys(
         ) or 'mlpr_lbfgs' in self.models.keys():
             if 'riw' in self.ensembles.keys():
                 del self.ensembles['riw']
     # Create results dictionary with models under study.
     self.results = {
         k: [None for i in range(_OUTER_FOLDS)]
         for k in self.models.keys()
     }
     if self.ensembles != None:
         self.results_ensemble = {
             ensemble: [None for i in range(_OUTER_FOLDS)]
             for ensemble in self.ensembles.keys()
         }
     self.best_result = [None for i in range(_OUTER_FOLDS)]
     # Serialize benchmark environment.
     benchmark_to_pickle(self)
Ejemplo n.º 2
0
    def __init__(self,
                 dataset_name,
                 learning_metric=None,
                 selection_metric=None,
                 models=None,
                 ensembles=None,
                 benchmark_id=None,
                 file_path=None):
        """Initializes benchmark environment."""

        self.benchmark_id = benchmark_id
        self.dataset_name = dataset_name
        # Creates file name as combination of dataset name and and date
        self.file_name = self.dataset_name + "_" + self.benchmark_id + "__" + _now.strftime(
            "%Y_%m_%d__%H_%M_%S")

        # Loads samples into object
        self.samples = load_standardized_samples(dataset_name, file_path)
        self.ensembles = ensembles
        self.models = models

        # todo make it more general (small fix can be done later ...)
        if self.dataset_name == 'data_batch_1':
            print(self.samples.keys())
            b = self.samples[b'labels']
            self.samples = pd.DataFrame(self.samples[b'data'])
            self.samples = self.samples / 255
            self.samples[3072] = b
            self.samples = self.samples.head(
                1000)  # change it to get all data not sample
            print(self.samples.shape)

        # If dataset is classification problem, remove regression models. Else, vice versa.
        if is_binary(
                self.samples):  # original self.samples[0][0] new self.samples
            self.classification = True
            self.binary = True

            if learning_metric != None:
                self.learning_metric = learning_metric
            else:
                self.learning_metric = RootMeanSquaredError

            if selection_metric != None:
                self.selection_metric = selection_metric
            else:
                self.selection_metric = AUROC

            if 'mlpr_lbfgs' in self.models.keys():
                del self.models['mlpr_lbfgs']
            if 'mlpr_adam' in self.models.keys():
                del self.models['mlpr_adam']
            if 'mlpr_sgd' in self.models.keys():
                del self.models['mlpr_sgd']

        elif is_classification(self.samples):
            self.classification = True
            self.binary = False

            if learning_metric != None:
                self.learning_metric = learning_metric
            else:
                self.learning_metric = RootMeanSquaredError

            if selection_metric != None:
                self.selection_metric = selection_metric
            else:
                self.selection_metric = AUROC

        else:
            self.classification = False

            if learning_metric != None:
                self.learning_metric = learning_metric
            else:
                self.learning_metric = RootMeanSquaredError

            if selection_metric != None:
                self.selection_metric = selection_metric
            else:
                self.selection_metric = RootMeanSquaredError

            if 'mlpc_lbfgs' in self.models.keys():
                del self.models['mlpc_lbfgs']
            if 'mlpc_adam' in self.models.keys():
                del self.models['mlpc_adam']
            if 'mlpc_sgd' in self.models.keys():
                del self.models['mlpc_sgd']

        # if models = MLP, remove Random Independent Weighting
        if self.ensembles != None:
            if 'mlpc_lbfgs' in self.models.keys(
            ) or 'mlpr_lbfgs' in self.models.keys():
                if 'riw' in self.ensembles.keys():
                    del self.ensembles['riw']

        # Create results dictionary with models under study.
        self.results = {
            k: [None for i in range(_OUTER_FOLDS)]
            for k in self.models.keys()
        }

        if self.ensembles != None:
            self.results_ensemble = {
                ensemble: [None for i in range(_OUTER_FOLDS)]
                for ensemble in self.ensembles.keys()
            }

        self.best_result = [None for i in range(_OUTER_FOLDS)]

        # Serialize benchmark environment.
        benchmark_to_pickle(self)