def __reduce_overfitting(symbol, model_container):
     """ Recursive method to reduce Variance & get a better Validation score for the metric """
     print('-- Exploring Model Generalization --')
     print('On Trainng data...')
     model_container.train_score = ModelEvaluator.evaluate(
         model_container.model, model_container.data.train_X,
         model_container.data.train_y)
     print('On Test data...')
     model_container.val_score = ModelEvaluator.evaluate(
         model_container.model, model_container.data.val_X,
         model_container.data.val_y)
     print(
         f'Train score: {model_container.train_score} & Validation score: {model_container.val_score}'
     )
     if (
             model_container.train_score - model_container.val_score
     ) / model_container.train_score > 0.15 and model_container.hyperparams.dropout < 0.55 and model_container.train_score > 0.65:
         # Try improving generalisation if difference between training & validation score should be less than 10% (but if validation score is good then don't and don't continue if validation score is below threshold)
         model_container.hyperparams.dropout += 0.2
         model_container.model = Trainer.train_model(
             symbol, model_container.data_prep_params, model_container.data,
             model_container.hyperparams)
         return StockatronCore.__reduce_overfitting(symbol, model_container)
     else:
         return model_container
 def find_best_single_feature_parameters(self, dataset):
     for feature in dataset.suggested_discretize_features:
         permutations = self.generate_feature_parameters(feature)
         print(permutations)
         best_mean_fcs = self.best_fcs[dataset]
         best_perm = None
         for p, perm in enumerate(permutations):
             logging.error("[Parameters Tester][{}][{}][Perm {:03d}] Current permutation: {}".format(dataset, feature, p+1,  perm))
             dm = DataModel.generate_from_file(dataset, discretize_params=perm)
             classes_list = dm.get_classes_list()
             f_scores = []
             a = 1
             for _ in range(self.best_fold[dataset][1]):
                 for train_set, test_set in dm.generate_k_folds_stratified(self.best_fold[dataset][0]):
                     model_evaluator = ModelEvaluator(train_set, test_set, classes_list)
                     model_evaluator.evaluate()
                     f_scores.append(model_evaluator.get_f_score())
                     logging.error("[Parameters Tester][{}][{}][Perm {:03d}][{:03d}] FCS: {}".format(dataset, feature, p+1, a, f_scores[-1]))
                     a += 1
             f_score_mean = sum(f_scores) / len(f_scores)
             logging.error("[Parameters Tester][{}][{}][Perm {:03d}] Best FCS: {}, Mean FCS {}".format(dataset, feature, p+1, max(f_scores), f_score_mean))
             if f_score_mean > best_mean_fcs:
                 best_perm = perm[0]
                 best_mean_fcs = f_score_mean
         if best_perm is not None:
             self.best_discretize_feature_params[dataset].append(best_perm)
         logging.error("[Parameters Tester][{}][{}] Best mean FCS: {}, Best parameters: {}".format(dataset, feature, best_mean_fcs, best_perm))
Beispiel #3
0
    def evaluate(self,
                 *,
                 images: np.ndarray = None,
                 folder_path: str = None) -> None:
        """
        Evaluate the model: calculate accuracy, show confusion matrix
        and print classification report. Works with either the images
        provided, or the path to these images. If none of them are
        provided, use the default test images path.
        """
        if not self._model_loaded:
            raise ModelNotLoadedError(
                'You have to load the model before evaluating it.')

        evaluator = ModelEvaluator(self._model,
                                   images=images,
                                   folder_path=folder_path)
        evaluator.evaluate()
 def find_best_fold(self, dataset):
     dm = DataModel.generate_from_file(dataset)
     classes_list = dm.get_classes_list()
     for fold in FOLDS:
         f_scores = []
         a = 1
         for _ in range(fold[1]):
             for train_set, test_set in dm.generate_k_folds_stratified(fold[0]):
                 model_evaluator = ModelEvaluator(train_set, test_set, classes_list)
                 model_evaluator.evaluate()
                 f_scores.append(model_evaluator.get_f_score())
                 logging.error("[Parameters Tester][{}][CV{:02d}][{:03d}] FCS: {}".format(dataset, fold[0], a, f_scores[-1]))
                 a += 1
         f_score_mean = sum(f_scores) / len(f_scores)
         logging.error("[Parameters Tester][{}][CV{:02d}] Best FCS: {}, Mean FCS {}".format(dataset, fold[0], max(f_scores), f_score_mean))
         self.append_result({'dataset':dataset.name, 'fold':fold[0], 'f_score':f_score_mean, 'permutation':-1})
         if f_score_mean > self.best_fcs[dataset]:
             self.best_fold[dataset] = fold
             self.best_fcs[dataset] = f_score_mean
     logging.error("[Parameters Tester][{}] Best mean FCS: {}, Best fold: {}".format(dataset, self.best_fcs[dataset], self.best_fold[dataset]))   
 def find_best_parameters(self, dataset):
     permutations = self.generate_permutations(dataset)
     for p, perm in enumerate(permutations):
         logging.error("[Parameters Tester][{}][Perm {:08d}] Current permutation: {}".format(dataset, p+1, perm))
         dm = DataModel.generate_from_file(dataset, discretize_params=perm)
         classes_list = dm.get_classes_list()
         f_scores = []
         a = 1
         for _ in range(self.best_fold[dataset][1]):
             for train_set, test_set in dm.generate_k_folds_stratified(self.best_fold[dataset][0]):
                 model_evaluator = ModelEvaluator(train_set, test_set, classes_list)
                 model_evaluator.evaluate()
                 f_scores.append(model_evaluator.get_f_score())
                 logging.error("[Parameters Tester][{}][Perm {:08d}][{:03d}] FCS: {}".format(dataset, p+1, a, f_scores[-1]))
                 a += 1
         f_score_mean = sum(f_scores) / len(f_scores)
         logging.error("[Parameters Tester][{}][Perm {:08d}] Best FCS: {}, Mean FCS {}".format(dataset, p+1, max(f_scores), f_score_mean))
         for param in perm:
             self.append_result({'dataset':dataset.name, 'fold':self.best_fold[dataset][0], 'f_score':f_score_mean, 'permutation':p + 1, 'feature':param.feature_name, 'function':param.discretize_function.__name__, 'bins':param.buckets_amount})
         if f_score_mean > self.best_fcs[dataset]:
             self.best_discretize_parameters[dataset] = perm
             self.best_fcs[dataset] = f_score_mean
     logging.error("[Parameters Tester][{}] Best mean FCS: {}, Best parameters: {}".format(dataset, self.best_fcs[dataset], self.best_discretize_parameters[dataset]))
def results(regressors, datasets, epochs=1000, verbose=False):
    '''Saves timings for regressors to filename.txt'''

    col_names = [''] * len(regressors)
    row_names = [''] * len(datasets)
    results = np.zeros((len(datasets), len(regressors), 2))

    for dataset_i, DatasetInitializer in enumerate(datasets):
        # intialize dataset
        dataset = DatasetInitializer()
        row_names[dataset_i] = dataset.name
        if verbose:
            print(dataset.name)

        for regressor_i, Regressor in enumerate(regressors):
            # intialize model
            regualizer = getattr(dataset.regualizer, Regressor.transform_type)
            regression = Regressor(input_size=dataset.input_size,
                                   output_size=dataset.output_size,
                                   random_state=42,
                                   regualizer=regualizer,
                                   learning_rate=dataset.learning_rate)
            col_names[regressor_i] = regression.name
            if verbose:
                print('  ' + regression.name)

            with regression as model:
                model.reset()
                model.update(dataset.train.inputs,
                             dataset.train.targets,
                             epochs=min(epochs, dataset.epochs))

                divergence = ModelEvaluator.evaluate(model,
                                                     dataset.test.inputs,
                                                     dataset.test.targets)
                results[dataset_i, regressor_i, 0] = divergence

                if dataset.multi_class:
                    missrate = np.nan
                else:
                    missrate = model.error(dataset.test.inputs,
                                           dataset.test.targets)
                results[dataset_i, regressor_i, 1] = missrate

                if verbose:
                    print('    %f / %f' % (divergence, missrate))

    return (results, col_names, row_names)
 def train_model(self, symbol):
     models = []
     # clean up previous training plots
     for file in glob.glob(f'training_plots/{symbol}/*'):
         os.remove(file)
     df = yf.get_ticker(symbol, start_date=self.start_date)
     num_time_steps_to_try = [30]
     for num_time_steps in num_time_steps_to_try:
         data_prep_params = DataPrepParameters(
             scaler=StandardScaler(),
             num_time_steps=num_time_steps,
             features=['change', 'sp500_change'])
         data = self.data_chef.prepare_model_data(df, data_prep_params)
         for batch_size in [
                 1, 5
         ]:  # can try more batch sizes as stateless LSTM's only keep state/context within a batch so it's an important hyperparameter to explore
             hyperparams = ModelHyperparameters(
                 epochs=100,
                 number_hidden_layers=2,
                 number_units_in_hidden_layers=20,
                 hidden_activation_fn='tanh',
                 optimizer='adam',
                 dropout=0,
                 kernel_initializer="glorot_uniform",
                 batch_size=batch_size)
             model = Trainer.train_model(symbol, data_prep_params, data,
                                         hyperparams)
             model_container = StockatronCore.__reduce_underfitting(
                 symbol, model, hyperparams, data, data_prep_params)
             models.append(model_container)
             if model_container.train_score > 0.85:
                 break
     best_fit_model_container = max(models,
                                    key=operator.attrgetter("train_score"))
     best_fit_model_container = StockatronCore.__reduce_overfitting(
         symbol, best_fit_model_container)
     # Only now that the model has been selected, evaluate its worth using the untouched test set
     best_fit_model_container.test_score = ModelEvaluator.evaluate(
         best_fit_model_container.model,
         best_fit_model_container.data.test_X,
         best_fit_model_container.data.test_y)
     print(
         f'Best Model for {symbol} has train score={best_fit_model_container.train_score} validation score={best_fit_model_container.val_score} & test score={best_fit_model_container.test_score}'
     )
     best_fit_model_container.version = f'{symbol}_{date.today().strftime("%Y-%m-%d")}'
     StockatronCore.__save_new_model(best_fit_model_container)
 def __reduce_underfitting(symbol, model, hyperparams, data,
                           data_prep_params):
     """ Recursive method to reduce Bias & get a better Training score for the metric """
     print('-- Exploring Model Fit --')
     train_score = ModelEvaluator.evaluate(model, data.train_X,
                                           data.train_y)
     if train_score < 0.7 and hyperparams.number_hidden_layers < 3:
         if hyperparams.epochs < 800:  # first run for longer
             hyperparams.epochs += 100
         elif hyperparams.number_hidden_layers < 5:  # if still not meeting the training score threshold then increase complexity of model
             hyperparams.number_hidden_layers += 1
         model = Trainer.train_model(symbol, data_prep_params, data,
                                     hyperparams)
         return StockatronCore.__reduce_underfitting(
             symbol, model, hyperparams, data, data_prep_params)
     else:
         return ModelContainer(model=model,
                               hyperparams=hyperparams,
                               data_prep_params=data_prep_params,
                               data=data,
                               train_score=train_score)