Ejemplo n.º 1
0
    config = predictor_config_schema.validate(config)
    # For Classification
    data = {
        'x': [i for i in range(10)],
        'y': [random.randint(i, i + 20) for i in range(10)]
    }
    nums = [data['x'][i] * data['y'][i] for i in range(10)]

    data['z'] = ['low' if i < 50 else 'high' for i in nums]

    data_frame = pandas.DataFrame(data)

    # print(data_frame)

    ds = DataSource(data_frame, config)
    ds.prepare_encoders()
    predict_input_ds = DataSource(data_frame[['x', 'y']], config)
    predict_input_ds.prepare_encoders()
    ####################

    mixer = NnMixer({}, config)

    for i in mixer.iter_fit(ds):
        if i < 0.01:
            break

    predictions = mixer.predict(predict_input_ds)
    print(predictions)

    # For Regression
Ejemplo n.º 2
0
    def learn(self,
              from_data,
              test_data=None,
              callback_on_iter=None,
              eval_every_x_epochs=20,
              stop_training_after_seconds=None,
              stop_model_building_after_seconds=None):
        """
        Train and save a model (you can use this to retrain model from data)

        :param from_data: (Pandas DataFrame) The data to learn from
        :param test_data: (Pandas DataFrame) The data to test accuracy and learn_error from
        :param callback_on_iter: This is function that can be called on every X evaluation cycle
        :param eval_every_x_epochs: This is every how many epochs we want to calculate the test error and accuracy

        :return: None
        """

        # This is a helper function that will help us auto-determine roughly what data types are in each column
        # NOTE: That this assumes the data is clean and will only return types for 'CATEGORICAL', 'NUMERIC' and 'TEXT'
        def type_map(col_name):
            col_pd_type = from_data[col_name].dtype
            col_pd_type = str(col_pd_type)

            if col_pd_type in ['int64', 'float64', 'timedelta']:
                return COLUMN_DATA_TYPES.NUMERIC
            elif col_pd_type in ['bool', 'category']:
                return COLUMN_DATA_TYPES.CATEGORICAL
            else:
                # if the number of uniques is elss than 100 or less,
                # than 10% of the total number of rows then keep it as categorical
                unique = from_data[col_name].nunique()
                if unique < 100 or unique < len(from_data[col_name]) / 10:
                    return COLUMN_DATA_TYPES.CATEGORICAL
                # else assume its text
                return COLUMN_DATA_TYPES.TEXT

        # generate the configuration and set the order for the input and output columns
        if self._generate_config is True:
            self._input_columns = [
                col for col in from_data if col not in self._output_columns
            ]
            self.config = {
                'input_features': [{
                    'name': col,
                    'type': type_map(col)
                } for col in self._input_columns],
                'output_features': [{
                    'name': col,
                    'type': type_map(col)
                } for col in self._output_columns]
            }
            self.config = predictor_config_schema.validate(self.config)
            logging.info('Automatically generated a configuration')
            logging.info(self.config)
        else:
            self._output_columns = [
                col['name'] for col in self.config['output_features']
            ]
            self._input_columns = [
                col['name'] for col in self.config['input_features']
            ]

        if stop_training_after_seconds is None:
            stop_training_after_seconds = round(from_data.shape[0] *
                                                from_data.shape[1] / 5)

        if stop_model_building_after_seconds is None:
            stop_model_building_after_seconds = stop_training_after_seconds * 3

        from_data_ds = DataSource(from_data, self.config)

        if test_data is not None:
            test_data_ds = DataSource(test_data, self.config)
        else:
            test_data_ds = from_data_ds.extractRandomSubset(0.1)

        from_data_ds.training = True

        mixer_class = NnMixer
        mixer_params = {}

        if 'mixer' in self.config:
            if 'class' in self.config['mixer']:
                mixer_class = self.config['mixer']['class']
            if 'attrs' in self.config['mixer']:
                mixer_params = self.config['mixer']['attrs']

        # Initialize data sources
        if len(from_data_ds) > 100:
            nr_subsets = 3
        else:
            # Don't use k-fold cross validation for very small input sizes
            nr_subsets = 1

        from_data_ds.prepare_encoders()
        from_data_ds.create_subsets(nr_subsets)
        try:
            mixer_class({}).fit_data_source(from_data_ds)
        except Exception as e:
            # Not all mixers might require this
            # print(e)
            pass

        input_size = len(from_data_ds[0][0])
        training_data_length = len(from_data_ds)

        test_data_ds.transformer = from_data_ds.transformer
        test_data_ds.encoders = from_data_ds.encoders
        test_data_ds.output_weights = from_data_ds.output_weights
        test_data_ds.create_subsets(nr_subsets)

        if 'optimizer' in self.config:
            optimizer = self.config['optimizer']()

            while True:
                training_time_per_iteration = stop_model_building_after_seconds / optimizer.total_trials

                # Some heuristics...
                if training_time_per_iteration > input_size:
                    if training_time_per_iteration > min(
                        (training_data_length /
                         (4 * input_size)), 16 * input_size):
                        break

                optimizer.total_trials = optimizer.total_trials - 1
                if optimizer.total_trials < 8:
                    optimizer.total_trials = 8
                    break

            training_time_per_iteration = stop_model_building_after_seconds / optimizer.total_trials

            best_parameters = optimizer.evaluate(
                lambda dynamic_parameters: Predictor.evaluate_mixer(
                    self.config,
                    mixer_class,
                    mixer_params,
                    from_data_ds,
                    test_data_ds,
                    dynamic_parameters,
                    max_training_time=training_time_per_iteration,
                    max_epochs=None))

            logging.info('Using hyperparameter set: ', best_parameters)
        else:
            best_parameters = {}

        self._mixer = mixer_class(best_parameters, self.config)

        for param in mixer_params:
            if hasattr(self._mixer, param):
                setattr(self._mixer, param, mixer_params[param])
            else:
                logging.warning(
                    'trying to set mixer param {param} but mixerclass {mixerclass} does not have such parameter'
                    .format(param=param, mixerclass=str(type(self._mixer))))

        def callback_on_iter_w_acc(epoch, training_error, test_error,
                                   delta_mean):
            callback_on_iter(epoch, training_error, test_error, delta_mean,
                             self.calculate_accuracy(test_data_ds))

        self._mixer.fit(
            train_ds=from_data_ds,
            test_ds=test_data_ds,
            callback=callback_on_iter_w_acc,
            stop_training_after_seconds=stop_training_after_seconds,
            eval_every_x_epochs=eval_every_x_epochs)
        self.train_accuracy = self.calculate_accuracy(test_data_ds)

        # Train some alternative mixers
        if CONFIG.HELPER_MIXERS and self.has_boosting_mixer and (
                CONFIG.FORCE_HELPER_MIXERS
                or len(from_data_ds) < 12 * pow(10, 3)):
            try:
                self._helper_mixers = self.train_helper_mixers(
                    from_data_ds, test_data_ds,
                    self._mixer.quantiles[self._mixer.quantiles_pair[0] +
                                          1:self._mixer.quantiles_pair[1] + 1])
            except Exception as e:
                logging.warning(
                    f'Failed to train helper mixers with error: {e}')

        return self
Ejemplo n.º 3
0
    def learn(self,
              from_data,
              test_data=None,
              callback_on_iter=None,
              eval_every_x_epochs=20,
              stop_training_after_seconds=None,
              stop_model_building_after_seconds=None):
        """
        Train and save a model (you can use this to retrain model from data)

        :param from_data: (Pandas DataFrame) The data to learn from
        :param test_data: (Pandas DataFrame) The data to test accuracy and learn_error from
        :param callback_on_iter: This is function that can be called on every X evaluation cycle
        :param eval_every_x_epochs: This is every how many epochs we want to calculate the test error and accuracy

        :return: None
        """

        # This is a helper function that will help us auto-determine roughly what data types are in each column
        # NOTE: That this assumes the data is clean and will only return types for 'CATEGORICAL', 'NUMERIC' and 'TEXT'
        def type_map(col_name):
            col_pd_type = from_data[col_name].dtype
            col_pd_type = str(col_pd_type)

            if col_pd_type in ['int64', 'float64', 'timedelta']:
                return COLUMN_DATA_TYPES.NUMERIC
            elif col_pd_type in ['bool', 'category']:
                return COLUMN_DATA_TYPES.CATEGORICAL
            else:
                # if the number of uniques is elss than 100 or less than 10% of the total number of rows then keep it as categorical
                unique = from_data[col_name].nunique()
                if unique < 100 or unique < len(from_data[col_name]) / 10:
                    return COLUMN_DATA_TYPES.CATEGORICAL
                # else assume its text
                return COLUMN_DATA_TYPES.TEXT

        # generate the configuration and set the order for the input and output columns
        if self._generate_config == True:
            self._input_columns = [
                col for col in from_data if col not in self._output_columns
            ]
            self.config = {
                'input_features': [{
                    'name': col,
                    'type': type_map(col)
                } for col in self._input_columns],
                'output_features': [{
                    'name': col,
                    'type': type_map(col)
                } for col in self._output_columns]
            }
            logging.info('Automatically generated a configuration')
            logging.info(self.config)
        else:
            self._output_columns = [
                col['name'] for col in self.config['input_features']
            ]
            self._input_columns = [
                col['name'] for col in self.config['output_features']
            ]

        # @TODO Make Cross Entropy Loss work with multiple outputs
        if len(self.config['output_features']
               ) == 1 and self.config['output_features'][0]['type'] in (
                   COLUMN_DATA_TYPES.CATEGORICAL):
            is_categorical_output = True
        else:
            is_categorical_output = False

        if stop_training_after_seconds is None:
            stop_training_after_seconds = round(from_data.shape[0] *
                                                from_data.shape[1] / 5)

        if stop_model_building_after_seconds is None:
            stop_model_building_after_seconds = stop_training_after_seconds * 3

        from_data_ds = DataSource(from_data, self.config)

        if test_data is not None:
            test_data_ds = DataSource(test_data, self.config)
        else:
            test_data_ds = from_data_ds.extractRandomSubset(0.1)

        from_data_ds.training = True

        mixer_class = NnMixer
        mixer_params = {}

        if 'mixer' in self.config:
            if 'class' in self.config['mixer']:
                mixer_class = self.config['mixer']['class']
            if 'attrs' in self.config['mixer']:
                mixer_params = self.config['mixer']['attrs']

        # Initialize data sources
        nr_subsets = 3
        from_data_ds.prepare_encoders()
        from_data_ds.create_subsets(nr_subsets)

        try:
            mixer_class({}).fit_data_source(from_data_ds)
        except:
            # Not all mixers might require this
            pass

        input_size = len(from_data_ds[0][0])
        training_data_length = len(from_data_ds)

        test_data_ds.transformer = from_data_ds.transformer
        test_data_ds.encoders = from_data_ds.encoders
        test_data_ds.output_weights = from_data_ds.output_weights
        test_data_ds.create_subsets(nr_subsets)

        if 'optimizer' in self.config:
            optimizer = self.config['optimizer']()

            while True:
                training_time_per_iteration = stop_model_building_after_seconds / optimizer.total_trials

                # Some heuristics...
                if training_time_per_iteration > input_size:
                    if training_time_per_iteration > min(
                        (training_data_length /
                         (4 * input_size)), 16 * input_size):
                        break

                optimizer.total_trials = optimizer.total_trials - 1
                if optimizer.total_trials < 8:
                    optimizer.total_trials = 8
                    break

            training_time_per_iteration = stop_model_building_after_seconds / optimizer.total_trials

            best_parameters = optimizer.evaluate(
                lambda dynamic_parameters: Predictor.evaluate_mixer(
                    mixer_class,
                    mixer_params,
                    from_data_ds,
                    test_data_ds,
                    dynamic_parameters,
                    is_categorical_output,
                    max_training_time=training_time_per_iteration,
                    max_epochs=None))
            logging.info('Using hyperparameter set: ', best_parameters)
        else:
            best_parameters = {}

        mixer = mixer_class(best_parameters,
                            is_categorical_output=is_categorical_output)
        self._mixer = mixer

        for param in mixer_params:
            if hasattr(mixer, param):
                setattr(mixer, param, mixer_params[param])
            else:
                logging.warning(
                    'trying to set mixer param {param} but mixerclass {mixerclass} does not have such parameter'
                    .format(param=param, mixerclass=str(type(mixer))))

        started = time.time()
        epoch = 0
        eval_next_on_epoch = eval_every_x_epochs

        stop_training = False

        for subset_iteration in [1, 2]:
            if stop_training:
                break
            for subset_id in [*from_data_ds.subsets.keys()]:
                if stop_training:
                    break

                subset_train_ds = from_data_ds.subsets[subset_id]
                subset_test_ds = test_data_ds.subsets[subset_id]

                lowest_error = None
                last_test_error = None
                last_subset_test_error = None
                test_error_delta_buff = []
                subset_test_error_delta_buff = []
                best_model = None

                #iterate over the iter_fit and see what the epoch and mixer error is
                for epoch, training_error in enumerate(
                        mixer.iter_fit(subset_train_ds)):
                    logging.info(
                        'training iteration {iter_i}, error {error}'.format(
                            iter_i=epoch, error=training_error))

                    if epoch >= eval_next_on_epoch:
                        # Prime the model on each subset for a bit
                        if subset_iteration == 1:
                            break

                        eval_next_on_epoch += eval_every_x_epochs

                        test_error = mixer.error(test_data_ds)
                        subset_test_error = mixer.error(subset_test_ds)

                        if lowest_error is None or test_error < lowest_error:
                            lowest_error = test_error
                            best_model = mixer.get_model_copy()

                        if last_subset_test_error is None:
                            subset_test_error_delta_buff.append(0)
                        else:
                            subset_test_error_delta_buff.append(
                                last_subset_test_error - subset_test_error)

                        if last_test_error is None:
                            test_error_delta_buff.append(0)
                        else:
                            test_error_delta_buff.append(last_test_error -
                                                         test_error)

                        last_test_error = test_error

                        delta_mean = np.mean(test_error_delta_buff[-10:])
                        subset_delta_mean = np.mean(
                            subset_test_error_delta_buff[-10:])

                        if callback_on_iter is not None:
                            callback_on_iter(
                                epoch, training_error, test_error, delta_mean,
                                self.calculate_accuracy(test_data_ds))

                        ## Stop if the model is overfitting
                        if delta_mean < 0 and len(test_error_delta_buff) > 9:
                            stop_training = True

                        # Stop if we're past the time limit allocated for training
                        if (time.time() -
                                started) > stop_training_after_seconds:
                            stop_training = True

                        # If the training subset is overfitting on it's associated testing subset
                        if subset_delta_mean < 0 and len(
                                subset_test_error_delta_buff) > 9:
                            break

                        if stop_training:
                            mixer.update_model(best_model)
                            self._mixer = mixer
                            self.train_accuracy = self.calculate_accuracy(
                                test_data_ds)
                            self.overall_certainty = mixer.overall_certainty()
                            if subset_id == 'full':
                                logging.info('Finished training model !')
                            else:
                                logging.info(
                                    'Finished fitting on {subset_id} of {no_subsets} subset'
                                    .format(subset_id=subset_id,
                                            no_subsets=len(
                                                from_data_ds.subsets.keys())))
                            break

        self._mixer.encoders = from_data_ds.encoders
        return self
Ejemplo n.º 4
0
    def learn(self,
              from_data,
              test_data=None,
              callback_on_iter=None,
              eval_every_x_epochs=20,
              stop_training_after_seconds=None,
              stop_model_building_after_seconds=None):
        """
        Train and save a model (you can use this to retrain model from data)

        :param from_data: (Pandas DataFrame) The data to learn from
        :param test_data: (Pandas DataFrame) The data to test accuracy and learn_error from
        :param callback_on_iter: This is function that can be called on every X evaluation cycle
        :param eval_every_x_epochs: This is every how many epochs we want to calculate the test error and accuracy

        :return: None
        """

        # This is a helper function that will help us auto-determine roughly what data types are in each column
        # NOTE: That this assumes the data is clean and will only return types for 'CATEGORICAL', 'NUMERIC' and 'TEXT'
        def type_map(col_name):
            col_pd_type = from_data[col_name].dtype
            col_pd_type = str(col_pd_type)

            if col_pd_type in ['int64', 'float64', 'timedelta']:
                return COLUMN_DATA_TYPES.NUMERIC
            elif col_pd_type in ['bool', 'category']:
                return COLUMN_DATA_TYPES.CATEGORICAL
            else:
                # if the number of uniques is elss than 100 or less,
                # than 10% of the total number of rows then keep it as categorical
                unique = from_data[col_name].nunique()
                if unique < 100 or unique < len(from_data[col_name]) / 10:
                    return COLUMN_DATA_TYPES.CATEGORICAL
                # else assume its text
                return COLUMN_DATA_TYPES.TEXT

        # generate the configuration and set the order for the input and output columns
        if self._generate_config is True:
            self._input_columns = [
                col for col in from_data if col not in self._output_columns
            ]
            self.config = {
                'input_features': [{
                    'name': col,
                    'type': type_map(col)
                } for col in self._input_columns],
                'output_features': [{
                    'name': col,
                    'type': type_map(col)
                } for col in self._output_columns]
            }
            self.config = predictor_config_schema.validate(self.config)
            logging.info('Automatically generated a configuration')
            logging.info(self.config)
        else:
            self._output_columns = [
                col['name'] for col in self.config['output_features']
            ]
            self._input_columns = [
                col['name'] for col in self.config['input_features']
            ]

        if stop_training_after_seconds is None:
            stop_training_after_seconds = round(from_data.shape[0] *
                                                from_data.shape[1] / 5)

        if stop_model_building_after_seconds is None:
            stop_model_building_after_seconds = stop_training_after_seconds * 3

        from_data_ds = DataSource(from_data, self.config)

        if test_data is not None:
            test_data_ds = DataSource(test_data, self.config)
        else:
            test_data_ds = from_data_ds.extractRandomSubset(0.1)

        from_data_ds.training = True

        mixer_class = NnMixer
        mixer_params = {}

        if 'mixer' in self.config:
            if 'class' in self.config['mixer']:
                mixer_class = self.config['mixer']['class']
            if 'attrs' in self.config['mixer']:
                mixer_params = self.config['mixer']['attrs']

        # Initialize data sources
        nr_subsets = 3
        from_data_ds.prepare_encoders()
        from_data_ds.create_subsets(nr_subsets)
        try:
            mixer_class({}).fit_data_source(from_data_ds)
        except Exception as e:
            # Not all mixers might require this
            # print(e)
            pass

        input_size = len(from_data_ds[0][0])
        training_data_length = len(from_data_ds)

        test_data_ds.transformer = from_data_ds.transformer
        test_data_ds.encoders = from_data_ds.encoders
        test_data_ds.output_weights = from_data_ds.output_weights
        test_data_ds.create_subsets(nr_subsets)

        if 'optimizer' in self.config:
            optimizer = self.config['optimizer']()

            while True:
                training_time_per_iteration = stop_model_building_after_seconds / optimizer.total_trials

                # Some heuristics...
                if training_time_per_iteration > input_size:
                    if training_time_per_iteration > min(
                        (training_data_length /
                         (4 * input_size)), 16 * input_size):
                        break

                optimizer.total_trials = optimizer.total_trials - 1
                if optimizer.total_trials < 8:
                    optimizer.total_trials = 8
                    break

            training_time_per_iteration = stop_model_building_after_seconds / optimizer.total_trials

            best_parameters = optimizer.evaluate(
                lambda dynamic_parameters: Predictor.evaluate_mixer(
                    self.config,
                    mixer_class,
                    mixer_params,
                    from_data_ds,
                    test_data_ds,
                    dynamic_parameters,
                    max_training_time=training_time_per_iteration,
                    max_epochs=None))

            logging.info('Using hyperparameter set: ', best_parameters)
        else:
            best_parameters = {}

        if CONFIG.HELPER_MIXERS and self.has_boosting_mixer and (
                CONFIG.FORCE_HELPER_MIXERS
                or len(from_data_ds) < 12 * pow(10, 3)):
            try:
                self._helper_mixers = self.train_helper_mixers(
                    from_data_ds, test_data_ds)
            except Exception as e:
                logging.warning(
                    f'Failed to train helper mixers with error: {e}')

        mixer = mixer_class(best_parameters, self.config)
        self._mixer = mixer

        for param in mixer_params:
            if hasattr(mixer, param):
                setattr(mixer, param, mixer_params[param])
            else:
                logging.warning(
                    'trying to set mixer param {param} but mixerclass {mixerclass} does not have such parameter'
                    .format(param=param, mixerclass=str(type(mixer))))

        started = time.time()
        log_reasure = time.time()
        first_run = True
        stop_training = False

        for subset_iteration in [1, 2]:
            if stop_training:
                break
            subset_id_arr = [*from_data_ds.subsets.keys()]  # [1]
            for subset_id in subset_id_arr:
                started_subset = time.time()
                if stop_training:
                    break

                #subset_train_ds = from_data_ds #.subsets[subset_id]
                #subset_test_ds = test_data_ds #.subsets[subset_id]

                subset_train_ds = from_data_ds.subsets[subset_id]
                subset_test_ds = test_data_ds.subsets[subset_id]

                lowest_error = None
                last_test_error = None
                last_subset_test_error = None
                test_error_delta_buff = []
                subset_test_error_delta_buff = []
                best_model = None
                best_selfaware_model = None

                #iterate over the iter_fit and see what the epoch and mixer error is
                for epoch, training_error in enumerate(
                        mixer.iter_fit(subset_train_ds,
                                       initialize=first_run,
                                       subset_id=subset_id)):
                    first_run = False

                    # Log this every now and then so that the user knows it's running
                    if (int(time.time()) - log_reasure) > 30:
                        log_reasure = time.time()
                        logging.info(
                            f'Lightwood training, iteration {epoch}, training error {training_error}'
                        )

                    # Prime the model on each subset for a bit
                    if subset_iteration == 1:
                        break

                    # Once the training error is getting smaller, enable dropout to teach the network to predict without certain features
                    if subset_iteration > 1 and training_error < 0.4 and not from_data_ds.enable_dropout:
                        eval_every_x_epochs = max(1,
                                                  int(eval_every_x_epochs / 2))
                        logging.info('Enabled dropout !')
                        from_data_ds.enable_dropout = True
                        lowest_error = None
                        last_test_error = None
                        last_subset_test_error = None
                        test_error_delta_buff = []
                        subset_test_error_delta_buff = []
                        continue

                    # If the selfaware network isn't able to train, go back to the original network
                    if subset_iteration > 1 and (
                            np.isnan(training_error)
                            or np.isinf(training_error) or training_error >
                            pow(10, 5)) and not mixer.stop_selfaware_training:
                        mixer.start_selfaware_training = False
                        mixer.stop_selfaware_training = True
                        lowest_error = None
                        last_test_error = None
                        last_subset_test_error = None
                        test_error_delta_buff = []
                        subset_test_error_delta_buff = []
                        continue

                    # Once we are past the priming/warmup period, start training the selfaware network

                    if subset_iteration > 1 and not mixer.is_selfaware and self.config[
                            'mixer'][
                                'selfaware'] and not mixer.stop_selfaware_training and training_error < 0.35:
                        logging.info('Started selfaware training !')
                        mixer.start_selfaware_training = True
                        lowest_error = None
                        last_test_error = None
                        last_subset_test_error = None
                        test_error_delta_buff = []
                        subset_test_error_delta_buff = []
                        continue

                    if epoch % eval_every_x_epochs == 0:
                        test_error = mixer.error(test_data_ds)
                        subset_test_error = mixer.error(subset_test_ds,
                                                        subset_id=subset_id)
                        logging.info(
                            f'Subtest test error: {subset_test_error} on subset {subset_id}, overall test error: {test_error}'
                        )

                        if lowest_error is None or test_error < lowest_error:
                            lowest_error = test_error
                            if mixer.is_selfaware:
                                best_selfaware_model = mixer.get_model_copy()
                            else:
                                best_model = mixer.get_model_copy()

                        if last_subset_test_error is None:
                            pass
                        else:
                            subset_test_error_delta_buff.append(
                                last_subset_test_error - subset_test_error)

                        last_subset_test_error = subset_test_error

                        if last_test_error is None:
                            pass
                        else:
                            test_error_delta_buff.append(last_test_error -
                                                         test_error)

                        last_test_error = test_error

                        delta_mean = np.mean(test_error_delta_buff[-5:])
                        subset_delta_mean = np.mean(
                            subset_test_error_delta_buff[-5:])

                        if callback_on_iter is not None:
                            callback_on_iter(
                                epoch, training_error, test_error, delta_mean,
                                self.calculate_accuracy(test_data_ds))

                        ## Stop if the model is overfitting
                        #if delta_mean <= 0 and len(test_error_delta_buff) > 4:
                        #    stop_training = True

                        # Stop if we're past the time limit allocated for training
                        if (time.time() -
                                started) > stop_training_after_seconds:
                            stop_training = True

                        # If the trauining subset is overfitting on it's associated testing subset
                        if (subset_delta_mean <= 0
                                and len(subset_test_error_delta_buff) > 4
                            ) or (time.time() - started_subset
                                  ) > stop_training_after_seconds / len(
                                      from_data_ds.subsets.keys()):
                            logging.info(
                                'Finished fitting on {subset_id} of {no_subsets} subset'
                                .format(subset_id=subset_id,
                                        no_subsets=len(
                                            from_data_ds.subsets.keys())))

                            if mixer.is_selfaware:
                                if best_selfaware_model is not None:
                                    mixer.update_model(best_selfaware_model)
                            else:
                                mixer.update_model(best_model)

                            if subset_id == subset_id_arr[-1]:
                                stop_training = True
                            elif not stop_training:
                                break

                        if stop_training:
                            if mixer.is_selfaware:
                                mixer.update_model(best_selfaware_model)
                            else:
                                mixer.update_model(best_model)
                            self._mixer = mixer
                            self.train_accuracy = self.calculate_accuracy(
                                test_data_ds)
                            self.overall_certainty = mixer.overall_certainty()
                            logging.info('Finished training model !')
                            break

        self._mixer.build_confidence_normalization_data(test_data_ds)
        self._mixer.encoders = from_data_ds.encoders
        return self