def test_evaluate_array(self):
        predictions = {'y': [[1], [2], [3], [4]]}

        col_stats = {
            'y': {
                'typing': {
                    'data_type': DATA_TYPES.SEQUENTIAL,
                    'data_subtype': DATA_SUBTYPES.ARRAY
                }
            }
        }

        output_columns = ['y']

        data_frame = pd.DataFrame({'y': [1, 2, 3, 5]})

        accuracy = evaluate_accuracy(predictions, data_frame, col_stats,
                                     output_columns)

        assert round(accuracy, 2) == 0.8

        predictions = {'y': [[1, 2, 3, 4], [2, 3, 4, 5]]}
        data_frame = pd.DataFrame({'y': [[1, 2, 3, 5], [2, 3, 4, 6]]})

        accuracy = evaluate_accuracy(predictions, data_frame, col_stats,
                                     output_columns)

        assert round(accuracy, 2) == 0.8
Beispiel #2
0
    def test_evaluate_weird_data_types(self):
        for dtype, data_subtype in [(DATA_TYPES.DATE, DATA_SUBTYPES.DATE),
                                    (DATA_TYPES.TEXT, DATA_SUBTYPES.SHORT),
                                    (DATA_TYPES.SEQUENTIAL,
                                     DATA_SUBTYPES.ARRAY),
                                    (DATA_TYPES.FILE_PATH, None)]:
            predictions = {'y': ["1", "2", "3", "4"]}

            col_stats = {
                'y': {
                    'typing': {
                        'data_type': dtype,
                        'data_subtype': data_subtype
                    }
                }
            }

            output_columns = ['y']

            data_frame = pd.DataFrame({'y': ["1", "2", "3", "5"]})

            accuracy = evaluate_accuracy(predictions, data_frame, col_stats,
                                         output_columns)

            assert round(accuracy, 2) == 0.75
Beispiel #3
0
    def test_evaluate_regression(self):
        predictions = {
            'y': [1, 2, 3, 4],
            'y_confidence_range': [
                [0, 2],
                [0, 2],
                [1, 3],
                [4, 4],
            ]
        }

        col_stats = {
            'y': {
                'typing': {
                    'data_type': DATA_TYPES.NUMERIC,
                    'data_subtype': DATA_SUBTYPES.INT
                }
            }
        }

        output_columns = ['y']

        data_frame = pd.DataFrame({'y': [1, 2, 3, 5]})

        accuracy = evaluate_accuracy(predictions, data_frame, col_stats,
                                     output_columns)

        assert round(accuracy, 2) == 0.75
Beispiel #4
0
    def test_evaluate_two_columns(self):
        predictions = {
            'y1': [1, 2, 3, 4],
            'y1_confidence_range': [
                [0, 2],
                [0, 2],
                [1, 3],
                [4, 4],
            ],
            'y2': [0, 0, 1, 1]
        }

        col_stats = {
            'y1': {
                'typing': {
                    'data_type': DATA_TYPES.NUMERIC,
                    'data_subtype': DATA_SUBTYPES.FLOAT
                }
            },
            'y2': {
                'typing': {
                    'data_type': DATA_TYPES.CATEGORICAL,
                    'data_subtype': DATA_SUBTYPES.MULTIPLE
                }
            }
        }

        output_columns = ['y1', 'y2']

        data_frame = pd.DataFrame({'y1': [1, 2, 3, 5], 'y2': [1, 0, 1, 0]})

        accuracy = evaluate_accuracy(predictions, data_frame, col_stats,
                                     output_columns)

        assert round(accuracy, 2) == round((0.75 + 0.5) / 2, 2)
Beispiel #5
0
    def test_evaluate_classification(self):
        predictions = {'y': [1, 2, 3, 4]}

        col_stats = {
            'y': {
                'typing': {
                    'data_type': DATA_TYPES.CATEGORICAL,
                    'data_subtype': DATA_SUBTYPES.MULTIPLE
                }
            }
        }

        output_columns = ['y']

        data_frame = pd.DataFrame({'y': [1, 2, 3, 5]})

        accuracy = evaluate_accuracy(predictions, data_frame, col_stats,
                                     output_columns)

        assert round(accuracy, 2) == 0.75
Beispiel #6
0
    def run(self):
        np.seterr(divide='warn', invalid='warn')
        """
        # Runs the model on the validation set in order to fit a probabilistic model that will evaluate the accuracy of future predictions
        """

        output_columns = self.transaction.lmd['predict_columns']
        input_columns = [
            col for col in self.transaction.lmd['columns']
            if col not in output_columns
            and col not in self.transaction.lmd['columns_to_ignore']
        ]

        # Make predictions on the validation dataset normally and with various columns missing
        normal_predictions = self.transaction.model_backend.predict('validate')

        normal_predictions_test = self.transaction.model_backend.predict(
            'test')
        normal_accuracy = evaluate_accuracy(
            normal_predictions,
            self.transaction.input_data.validation_df,
            self.transaction.lmd['stats_v2'],
            output_columns,
            backend=self.transaction.model_backend)

        for col in output_columns:
            reals = self.transaction.input_data.validation_df[col]
            preds = normal_predictions[col]

            fails = False

            data_type = self.transaction.lmd['stats_v2'][col]['typing'][
                'data_type']
            data_subtype = self.transaction.lmd['stats_v2'][col]['typing'][
                'data_subtype']

            if data_type == DATA_TYPES.CATEGORICAL:
                if data_subtype == DATA_SUBTYPES.TAGS:
                    encoder = self.transaction.model_backend.predictor._mixer.encoders[
                        col]
                    if accuracy_score(
                            encoder.encode(reals),
                            encoder.encode(preds)) <= self.transaction.lmd[
                                'stats_v2'][col]['guess_probability']:
                        fails = True
                else:
                    if accuracy_score(reals, preds) <= self.transaction.lmd[
                            'stats_v2'][col]['guess_probability']:
                        fails = True
            elif data_type == DATA_TYPES.NUMERIC:
                if r2_score(reals, preds) < 0:
                    fails = True
            else:
                pass

            if fails:
                if not self.transaction.lmd['force_predict']:

                    def predict_wrapper(*args, **kwargs):
                        raise Exception('Failed to train model')

                    self.session.predict = predict_wrapper
                log.error('Failed to train model to predict {}'.format(col))

        empty_input_predictions = {}
        empty_input_accuracy = {}
        empty_input_predictions_test = {}

        ignorable_input_columns = [
            x for x in input_columns if self.transaction.lmd['stats_v2'][x]
            ['typing']['data_type'] != DATA_TYPES.FILE_PATH and x not in
            [y[0] for y in self.transaction.lmd['model_order_by']]
        ]

        for col in ignorable_input_columns:
            empty_input_predictions[
                col] = self.transaction.model_backend.predict(
                    'validate', ignore_columns=[col])
            empty_input_predictions_test[
                col] = self.transaction.model_backend.predict(
                    'test', ignore_columns=[col])
            empty_input_accuracy[col] = evaluate_accuracy(
                empty_input_predictions[col],
                self.transaction.input_data.validation_df,
                self.transaction.lmd['stats_v2'],
                output_columns,
                backend=self.transaction.model_backend)

        # Get some information about the importance of each column
        self.transaction.lmd['column_importances'] = {}
        for col in ignorable_input_columns:
            accuracy_increase = (normal_accuracy - empty_input_accuracy[col])
            # normalize from 0 to 10
            self.transaction.lmd['column_importances'][col] = 10 * max(
                0, accuracy_increase)

        # Run Probabilistic Validator
        overall_accuracy_arr = []
        self.transaction.lmd['accuracy_histogram'] = {}
        self.transaction.lmd['confusion_matrices'] = {}
        self.transaction.lmd['accuracy_samples'] = {}
        self.transaction.hmd['probabilistic_validators'] = {}

        self.transaction.lmd['train_data_accuracy'] = {}
        self.transaction.lmd['test_data_accuracy'] = {}
        self.transaction.lmd['valid_data_accuracy'] = {}

        for col in output_columns:

            # Training data accuracy
            predictions = self.transaction.model_backend.predict(
                'predict_on_train_data',
                ignore_columns=self.transaction.lmd['stats_v2']
                ['columns_to_ignore'])
            self.transaction.lmd['train_data_accuracy'][
                col] = evaluate_accuracy(
                    predictions,
                    self.transaction.input_data.train_df,
                    self.transaction.lmd['stats_v2'], [col],
                    backend=self.transaction.model_backend)

            # Testing data accuracy
            predictions = self.transaction.model_backend.predict(
                'test',
                ignore_columns=self.transaction.lmd['stats_v2']
                ['columns_to_ignore'])
            self.transaction.lmd['test_data_accuracy'][
                col] = evaluate_accuracy(
                    predictions,
                    self.transaction.input_data.test_df,
                    self.transaction.lmd['stats_v2'], [col],
                    backend=self.transaction.model_backend)

            # Validation data accuracy
            predictions = self.transaction.model_backend.predict(
                'validate',
                ignore_columns=self.transaction.lmd['stats_v2']
                ['columns_to_ignore'])
            self.transaction.lmd['valid_data_accuracy'][
                col] = evaluate_accuracy(
                    predictions,
                    self.transaction.input_data.validation_df,
                    self.transaction.lmd['stats_v2'], [col],
                    backend=self.transaction.model_backend)

        for col in output_columns:
            pval = ProbabilisticValidator(
                col_stats=self.transaction.lmd['stats_v2'][col],
                col_name=col,
                input_columns=input_columns)
            predictions_arr = [normal_predictions_test] + [
                x for x in empty_input_predictions_test.values()
            ]

            pval.fit(self.transaction.input_data.test_df, predictions_arr,
                     [[ignored_column]
                      for ignored_column in empty_input_predictions_test])
            overall_accuracy, accuracy_histogram, cm, accuracy_samples = pval.get_accuracy_stats(
            )
            overall_accuracy_arr.append(overall_accuracy)

            self.transaction.lmd['accuracy_histogram'][
                col] = accuracy_histogram
            self.transaction.lmd['confusion_matrices'][col] = cm
            self.transaction.lmd['accuracy_samples'][col] = accuracy_samples
            self.transaction.hmd['probabilistic_validators'][col] = pickle_obj(
                pval)

        self.transaction.lmd['validation_set_accuracy'] = sum(
            overall_accuracy_arr) / len(overall_accuracy_arr)
    def run(self):
        np.seterr(divide='warn', invalid='warn')
        """
        # Runs the model on the validation set in order to fit a probabilistic model that will evaluate the accuracy of future predictions
        """

        output_columns = self.transaction.lmd['predict_columns']
        input_columns = [
            col for col in self.transaction.lmd['columns']
            if col not in output_columns
            and col not in self.transaction.lmd['columns_to_ignore']
        ]

        # Make predictions on the validation dataset normally and with various columns missing
        normal_predictions = self.transaction.model_backend.predict('validate')

        normal_predictions_test = self.transaction.model_backend.predict(
            'test')
        normal_accuracy = evaluate_accuracy(
            normal_predictions,
            self.transaction.input_data.validation_df,
            self.transaction.lmd['stats_v2'],
            output_columns,
            backend=self.transaction.model_backend)

        for col in output_columns:
            if self.transaction.lmd['tss']['is_timeseries']:
                reals = list(self.transaction.input_data.validation_df[
                    self.transaction.input_data.
                    validation_df['make_predictions'] == True][col])
            else:
                reals = self.transaction.input_data.validation_df[col]
            preds = normal_predictions[col]

            fails = False

            data_type = self.transaction.lmd['stats_v2'][col]['typing'][
                'data_type']
            data_subtype = self.transaction.lmd['stats_v2'][col]['typing'][
                'data_subtype']

            if data_type == DATA_TYPES.CATEGORICAL:
                if data_subtype == DATA_SUBTYPES.TAGS:
                    encoder = self.transaction.model_backend.predictor._mixer.encoders[
                        col]
                    if balanced_accuracy_score(
                            encoder.encode(reals).argmax(axis=1),
                            encoder.encode(preds).argmax(
                                axis=1)) <= self.transaction.lmd['stats_v2'][
                                    col]['balanced_guess_probability']:
                        fails = True
                else:
                    if balanced_accuracy_score(
                            reals, preds) <= self.transaction.lmd['stats_v2'][
                                col]['balanced_guess_probability']:
                        fails = True
            elif data_type == DATA_TYPES.NUMERIC:
                if r2_score(reals, preds) < 0:
                    fails = True
            else:
                pass

            if fails:
                if not self.transaction.lmd['force_predict']:

                    def predict_wrapper(*args, **kwargs):
                        raise Exception('Failed to train model')

                    self.session.predict = predict_wrapper
                log.error('Failed to train model to predict {}'.format(col))

        empty_input_predictions = {}
        empty_input_accuracy = {}
        empty_input_predictions_test = {}

        ignorable_input_columns = [
            x for x in input_columns if self.transaction.lmd['stats_v2'][x]
            ['typing']['data_type'] != DATA_TYPES.FILE_PATH and (
                not self.transaction.lmd['tss']['is_timeseries']
                or x not in self.transaction.lmd['tss']['order_by'])
        ]

        for col in ignorable_input_columns:
            empty_input_predictions[
                col] = self.transaction.model_backend.predict(
                    'validate', ignore_columns=[col])
            empty_input_predictions_test[
                col] = self.transaction.model_backend.predict(
                    'test', ignore_columns=[col])
            empty_input_accuracy[col] = evaluate_accuracy(
                empty_input_predictions[col],
                self.transaction.input_data.validation_df,
                self.transaction.lmd['stats_v2'],
                output_columns,
                backend=self.transaction.model_backend)

        # Get some information about the importance of each column
        self.transaction.lmd['column_importances'] = {}
        for col in ignorable_input_columns:
            accuracy_increase = (normal_accuracy - empty_input_accuracy[col])
            # normalize from 0 to 10
            self.transaction.lmd['column_importances'][col] = 10 * max(
                0, accuracy_increase)

        # Run Probabilistic Validator
        overall_accuracy_arr = []
        self.transaction.lmd['accuracy_histogram'] = {}
        self.transaction.lmd['confusion_matrices'] = {}
        self.transaction.lmd['accuracy_samples'] = {}
        self.transaction.hmd['probabilistic_validators'] = {}

        self.transaction.lmd['train_data_accuracy'] = {}
        self.transaction.lmd['test_data_accuracy'] = {}
        self.transaction.lmd['valid_data_accuracy'] = {}

        for col in output_columns:

            # Training data accuracy
            predictions = self.transaction.model_backend.predict(
                'predict_on_train_data',
                ignore_columns=self.transaction.lmd['stats_v2']
                ['columns_to_ignore'])
            self.transaction.lmd['train_data_accuracy'][
                col] = evaluate_accuracy(
                    predictions,
                    self.transaction.input_data.train_df,
                    self.transaction.lmd['stats_v2'], [col],
                    backend=self.transaction.model_backend)

            # Testing data accuracy
            predictions = self.transaction.model_backend.predict(
                'test',
                ignore_columns=self.transaction.lmd['stats_v2']
                ['columns_to_ignore'])
            self.transaction.lmd['test_data_accuracy'][
                col] = evaluate_accuracy(
                    predictions,
                    self.transaction.input_data.test_df,
                    self.transaction.lmd['stats_v2'], [col],
                    backend=self.transaction.model_backend)

            # Validation data accuracy
            predictions = self.transaction.model_backend.predict(
                'validate',
                ignore_columns=self.transaction.lmd['stats_v2']
                ['columns_to_ignore'])
            self.transaction.lmd['valid_data_accuracy'][
                col] = evaluate_accuracy(
                    predictions,
                    self.transaction.input_data.validation_df,
                    self.transaction.lmd['stats_v2'], [col],
                    backend=self.transaction.model_backend)

        for col in output_columns:
            pval = ProbabilisticValidator(
                col_stats=self.transaction.lmd['stats_v2'][col],
                col_name=col,
                input_columns=input_columns)
            predictions_arr = [normal_predictions_test] + [
                x for x in empty_input_predictions_test.values()
            ]

            pval.fit(self.transaction.input_data.test_df, predictions_arr,
                     [[ignored_column]
                      for ignored_column in empty_input_predictions_test])
            overall_accuracy, accuracy_histogram, cm, accuracy_samples = pval.get_accuracy_stats(
            )
            overall_accuracy_arr.append(overall_accuracy)

            self.transaction.lmd['accuracy_histogram'][
                col] = accuracy_histogram
            self.transaction.lmd['confusion_matrices'][col] = cm
            self.transaction.lmd['accuracy_samples'][col] = accuracy_samples
            self.transaction.hmd['probabilistic_validators'][col] = pickle_obj(
                pval)

        self.transaction.lmd['validation_set_accuracy'] = sum(
            overall_accuracy_arr) / len(overall_accuracy_arr)

        # conformal prediction confidence estimation
        self.transaction.lmd['stats_v2']['train_std_dev'] = {}
        self.transaction.hmd['label_encoders'] = {}
        self.transaction.hmd['icp'] = {'active': False}

        for target in output_columns:
            data_type = self.transaction.lmd['stats_v2'][target]['typing'][
                'data_type']
            data_subtype = self.transaction.lmd['stats_v2'][target]['typing'][
                'data_subtype']
            is_classification = data_type == DATA_TYPES.CATEGORICAL

            fit_params = {
                'target': target,
                'all_columns': self.transaction.lmd['columns'],
                'columns_to_ignore': []
            }
            fit_params['columns_to_ignore'].extend(
                self.transaction.lmd['columns_to_ignore'])
            fit_params['columns_to_ignore'].extend(
                [col for col in output_columns if col != target])

            if is_classification:
                if data_subtype != DATA_SUBTYPES.TAGS:
                    all_targets = [
                        elt[1][target].values for elt in inspect.getmembers(
                            self.transaction.input_data)
                        if elt[0] in {'test_df', 'train_df', 'validation_df'}
                    ]
                    all_classes = np.unique(
                        np.concatenate([np.unique(arr)
                                        for arr in all_targets]))

                    enc = OneHotEncoder(sparse=False, handle_unknown='ignore')
                    enc.fit(all_classes.reshape(-1, 1))
                    fit_params['one_hot_enc'] = enc
                    self.transaction.hmd['label_encoders'][target] = enc
                else:
                    fit_params['one_hot_enc'] = None
                    self.transaction.hmd['label_encoders'][target] = None

                adapter = ConformalClassifierAdapter
                nc_function = MarginErrFunc(
                )  # better than IPS as we'd need the complete distribution over all classes
                nc_class = ClassifierNc
                icp_class = IcpClassifier

            else:
                adapter = ConformalRegressorAdapter
                nc_function = AbsErrorErrFunc()
                nc_class = RegressorNc
                icp_class = IcpRegressor

            if (data_type == DATA_TYPES.NUMERIC or
                (is_classification and data_subtype != DATA_SUBTYPES.TAGS)
                ) and not self.transaction.lmd['tss']['is_timeseries']:
                model = adapter(self.transaction.model_backend.predictor,
                                fit_params=fit_params)
                nc = nc_class(model, nc_function)

                X = deepcopy(self.transaction.input_data.train_df)
                y = X.pop(target)

                if is_classification:
                    self.transaction.hmd['icp'][target] = icp_class(
                        nc, smoothing=False)
                else:
                    self.transaction.hmd['icp'][target] = icp_class(nc)
                    self.transaction.lmd['stats_v2']['train_std_dev'][
                        target] = self.transaction.input_data.train_df[
                            target].std()

                X = clean_df(X, self.transaction.lmd['stats_v2'],
                             output_columns)
                self.transaction.hmd['icp'][target].fit(X.values, y.values)
                self.transaction.hmd['icp']['active'] = True

                # calibrate conformal estimator on test set
                X = deepcopy(self.transaction.input_data.validation_df)
                y = X.pop(target).values

                if is_classification:
                    if isinstance(enc.categories_[0][0], str):
                        cats = enc.categories_[0].tolist()
                        y = np.array([cats.index(i) for i in y])
                    y = y.astype(int)

                X = clean_df(X, self.transaction.lmd['stats_v2'],
                             output_columns)
                self.transaction.hmd['icp'][target].calibrate(X.values, y)
Beispiel #8
0
    def train(self):
        if self.transaction.lmd['use_gpu'] is not None:
            lightwood.config.config.CONFIG.USE_CUDA = self.transaction.lmd[
                'use_gpu']

        secondary_type_dict = {}
        if self.transaction.lmd['tss']['is_timeseries']:
            self.transaction.log.debug(
                'Reshaping data into timeseries format, this may take a while !'
            )
            train_df, secondary_type_dict = self._create_timeseries_df(
                self.transaction.input_data.train_df)
            test_df, _ = self._create_timeseries_df(
                self.transaction.input_data.test_df)
            self.transaction.log.debug(
                'Done reshaping data into timeseries format !')
        else:
            if self.transaction.lmd['sample_settings']['sample_for_training']:
                sample_margin_of_error = self.transaction.lmd[
                    'sample_settings']['sample_margin_of_error']
                sample_confidence_level = self.transaction.lmd[
                    'sample_settings']['sample_confidence_level']
                sample_percentage = self.transaction.lmd['sample_settings'][
                    'sample_percentage']
                sample_function = self.transaction.hmd['sample_function']

                train_df = sample_function(
                    self.transaction.input_data.train_df,
                    sample_margin_of_error, sample_confidence_level,
                    sample_percentage)

                test_df = sample_function(self.transaction.input_data.test_df,
                                          sample_margin_of_error,
                                          sample_confidence_level,
                                          sample_percentage)

                sample_size = len(train_df)
                population_size = len(self.transaction.input_data.train_df)

                self.transaction.log.warning(
                    f'Training on a sample of {round(sample_size * 100 / population_size, 1)}% your data, results can be unexpected.'
                )
            else:
                train_df = self.transaction.input_data.train_df
                test_df = self.transaction.input_data.test_df

        lightwood_config = self._create_lightwood_config(secondary_type_dict)

        lightwood_train_ds = lightwood.api.data_source.DataSource(
            train_df, config=lightwood_config)
        lightwood_test_ds = lightwood_train_ds.make_child(test_df)

        self.transaction.lmd['lightwood_data']['save_path'] = os.path.join(
            CONFIG.MINDSDB_STORAGE_PATH, self.transaction.lmd['name'],
            'lightwood_data')
        Path(CONFIG.MINDSDB_STORAGE_PATH).joinpath(
            self.transaction.lmd['name']).mkdir(mode=0o777,
                                                exist_ok=True,
                                                parents=True)

        logging.getLogger().setLevel(logging.DEBUG)

        predictors_and_accuracies = []

        use_mixers = self.transaction.lmd.get('use_mixers', None)
        if use_mixers is not None:
            if isinstance(use_mixers, list):
                mixer_classes = use_mixers
            else:
                mixer_classes = [use_mixers]
        else:
            mixer_classes = lightwood.mixers.BaseMixer.__subclasses__()

        for mixer_class in mixer_classes:
            lightwood_config['mixer']['kwargs'] = {}
            lightwood_config['mixer']['class'] = mixer_class

            if lightwood_config['mixer']['class'] == lightwood.mixers.NnMixer:
                # Evaluate less often for larger datasets and vice-versa
                eval_every_x_epochs = int(
                    round(1 * pow(10, 6) * (1 / len(train_df))))

                # Within some limits
                if eval_every_x_epochs > 200:
                    eval_every_x_epochs = 200
                if eval_every_x_epochs < 3:
                    eval_every_x_epochs = 3

                kwargs = lightwood_config['mixer']['kwargs']

                kwargs['callback_on_iter'] = self.callback_on_iter
                kwargs['eval_every_x_epochs'] = eval_every_x_epochs / len(
                    mixer_classes)
                kwargs['stop_training_after_seconds'] = self.transaction.lmd[
                    'stop_training_in_x_seconds']

            self.predictor = lightwood.Predictor(lightwood_config.copy())

            self.predictor.learn(from_data=lightwood_train_ds,
                                 test_data=lightwood_test_ds)

            self.transaction.log.info('[{}] Training accuracy of: {}'.format(
                mixer_class.__name__, self.predictor.train_accuracy))

            validation_predictions = self.predict('validate')
            validation_accuracy = evaluate_accuracy(
                validation_predictions,
                self.transaction.input_data.validation_df[
                    self.transaction.input_data.
                    validation_df['make_predictions'].astype(bool) == True]
                if self.transaction.lmd['tss']['is_timeseries'] else
                self.transaction.input_data.validation_df,
                self.transaction.lmd['stats_v2'],
                self.transaction.lmd['predict_columns'],
                backend=self,
                use_conf_intervals=False  # r2_score will be used for regression
            )

            predictors_and_accuracies.append(
                (self.predictor, validation_accuracy))

        best_predictor, best_accuracy = max(predictors_and_accuracies,
                                            key=lambda x: x[1])

        # Find predictor with NnMixer
        for predictor, accuracy in predictors_and_accuracies:
            if isinstance(predictor._mixer, lightwood.mixers.NnMixer):
                nn_mixer_predictor, nn_mixer_predictor_accuracy = predictor, accuracy
                break
        else:
            nn_mixer_predictor, nn_mixer_predictor_accuracy = None, None

        self.predictor = best_predictor

        # If difference between accuracies of best predictor and NnMixer predictor
        # is small, then use NnMixer predictor
        if nn_mixer_predictor is not None:
            SMALL_ACCURACY_DIFFERENCE = 0.01
            if (best_accuracy -
                    nn_mixer_predictor_accuracy) < SMALL_ACCURACY_DIFFERENCE:
                self.predictor = nn_mixer_predictor

        self.predictor.save(
            path_to=self.transaction.lmd['lightwood_data']['save_path'])