def validate(self, iterations):
        self.fixed_params_dict, self.hyperparameters_dict = self.reference_object.get_params()
        for i in range(iterations):
            # sample a random parameter from the dictionary
            sampled_params = {}
            for key, value in self.hyperparameters_dict.items():
                sampled_params[key] = self.sample(value)

            params_dict = {**self.fixed_params_dict, **sampled_params}
            print(params_dict)
            model = self.reference_object.__class__(**params_dict)
            score = model.evaluate()

            # assign it again in case a fixed parameter has changed
            self.fixed_params_dict, self.hyperparameters_dict = model.get_params()
            params_dict = {**self.fixed_params_dict, **sampled_params}
            if self.automatic_export != None:
                self.automatic_export.check_if_export(score, params_dict)

            self.writer.write(
                'params: {}\n MRR is: {}\n\n'.format(params_dict, score))

            # sending a message on the telegram channel
            HERA.send_message(
               'name: {} params: {}\n MRR is: {}\n\n'.format(model.name, params_dict, score), self.user)
            print('name: {} params: {}\n MRR is: {}\n\n'.format(model.name, params_dict, score))
Esempio n. 2
0
    def train():
        mode = menu.mode_selection()

        # build the model
        opt = menu.single_choice('Optimizer?', ['Adam', 'RMSProp'],
                                 ['adam', 'rmsprop'])
        lr = menu.single_choice('Learning rate?', ['e-3', 'e-4', 'e-5'],
                                [1e-3, 1e-4, 1e-5])
        if opt == 'adam':
            optim = keras.optimizers.Adam(lr=lr)
        else:
            optim = keras.optimizers.RMSprop(lr=lr)

        model = interactive_model(mode, optim=optim)

        # fit the model
        model.fit(epochs=10000)
        print('\nFit completed!')

        best_accuracy = np.max(model.history.history['val_acc'])

        model.save(folderpath='saved_models/',
                   suffix='_{}'.format(round(best_accuracy,
                                             5)).replace('.', ''))

        # evaluate
        report = model.evaluate()
        bot.send_message(report, account='parro')

        print('Opt: {}'.format(opt))
        print('Lr: {}'.format(lr))
 def export(self, obj, params_dict, mode, mrr):
     params_dict['mode'] = mode
     instance = obj(**params_dict)
     # print('EXPORTING sub and scores algo {} with score {} in mode {} with params {}'.format(instance.name, mrr, mode, params_dict))
     HERA.send_message(
         'EXPORTING sub and scores algo {} with score {} in mode {} with params {}'
         .format(instance.name, mrr, mode, params_dict), self.user)
     instance.run(export_sub=True, export_scores=True)
Esempio n. 4
0
 def _hera_callback(param):
     iteration_num = param[2]
     if iteration_num % param[1]['print_every'] == 0:
         message = f'PARAMS:\n'
         for k in param[1]:
             message += f'{k}: {param[1][k]}\n'
         Hera.send_message(
             f'ITERATION_NUM: {iteration_num}\n {message}\n MRR: {param[5][0][2]}',
             account='edo')
def train_and_test():
    features, labels = load_libsvm_data(FLAGS.train_path, FLAGS.list_size)
    train_input_fn, train_hook = get_train_inputs(features, labels,
                                                  FLAGS.train_batch_size)
    features_test, labels_test = load_libsvm_data(FLAGS.test_path,
                                                  FLAGS.list_size)

    def _train_op_fn(loss):
        """Defines train op used in ranking head."""
        return tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.train.get_global_step(),
            learning_rate=FLAGS.learning_rate,
            optimizer="Adagrad")

    if FLAGS.loss == 'list_mle_loss':
        lambda_weight = tfr.losses.create_p_list_mle_lambda_weight(list_size=25)
    elif FLAGS.loss == 'approx_ndcg_loss':
        lambda_weight = tfr.losses.create_ndcg_lambda_weight(topn=25)
    else:
        lambda_weight = tfr.losses.create_reciprocal_rank_lambda_weight(topn=25)
    ranking_head = tfr.head.create_ranking_head(
        loss_fn=tfr.losses.make_loss_fn(FLAGS.loss, lambda_weight=lambda_weight),
        eval_metric_fns=get_eval_metric_fns(),
        train_op_fn=_train_op_fn)
    # tfr.losses.create_p_list_mle_lambda_weight(25)
    # lambda_weight=tfr.losses.create_reciprocal_rank_lambda_weight()

    estimator = tf.estimator.Estimator(
        model_fn=tfr.model.make_groupwise_ranking_fn(
            group_score_fn=make_score_fn(),
            group_size=FLAGS.group_size,
            transform_fn=tfr.feature.make_identity_transform_fn(FLAGS.train_context_features_id),
            ranking_head=ranking_head))

    estimator.train(train_input_fn, hooks=[train_hook], steps=FLAGS.num_train_steps)

    # predict also for the train to get the scores for the staking
    pred_train = np.array(list(estimator.predict(lambda: batch_inputs(features, labels, 128))))
    pred = np.array(list(estimator.predict(lambda: batch_inputs(features_test, labels_test, 128))))


    pred_name_train=f'train_predictions_{FLAGS.loss}_learning_rate_{FLAGS.learning_rate}_train_batch_size_{FLAGS.train_batch_size}_' \
        f'hidden_layers_dim_{FLAGS.hidden_layer_dims}_num_train_steps_{FLAGS.num_train_steps}_dropout_{FLAGS.dropout_rate}_{FLAGS.group_size}'
    pred_name=f'predictions_{FLAGS.loss}_learning_rate_{FLAGS.learning_rate}_train_batch_size_{FLAGS.train_batch_size}_' \
        f'hidden_layers_dim_{FLAGS.hidden_layer_dims}_num_train_steps_{FLAGS.num_train_steps}_dropout_{FLAGS.dropout_rate}_{FLAGS.group_size}'
    np.save(f'{FLAGS.save_path}/{pred_name}', pred)
    np.save(f'{FLAGS.save_path}/{pred_name_train}', pred_train)

    for name in [pred_name, pred_name_train]:
        HERA.send_message(f'EXPORTING A SUB... mode:{FLAGS.mode}, name:{name}')
        model = TensorflowRankig(mode=FLAGS.mode, cluster='no_cluster', dataset_name=FLAGS.dataset_name,
                                 pred_name=name)
        model.name = f'tf_ranking_{name}'
        model.run()
        HERA.send_message(f'EXPORTED... mode:{FLAGS.mode}, name:{name}')
def callbak(obj):
    global _best_so_far
    if -obj[6][1][1] > _best_so_far:
        _best_so_far = -obj[6][1][1]
        if _best_so_far > 0.6765:
            HERA.send_message(
                'xgboost {} iteration {} mrr is {}'.format(
                    _kind, obj.iteration, _best_so_far), 'teo')
        print('xgboost iteration {} mrr is {}'.format(obj.iteration,
                                                      _best_so_far))
Esempio n. 7
0
def create_dataset(mode, cluster, features_array, dataset_name, stacking_scores_path):
    _SAVE_BASE_PATH = f'dataset/preprocessed/tf_ranking/{cluster}/{mode}/{dataset_name}'
    cf.check_folder(_SAVE_BASE_PATH)
    train_df, vali_test_df, context_features_id = merge_features_tf(mode, cluster, features_array, stacking_scores_path)

    # save context features id
    print(f'saving context feature id to: {_SAVE_BASE_PATH}/context_features_id.npy')
    np.save(f'{_SAVE_BASE_PATH}/context_features_id', context_features_id)

    parse_dataset(train_df, _SAVE_BASE_PATH, 'train')
    parse_dataset(vali_test_df, _SAVE_BASE_PATH, 'test')
    Hera.send_message('tf ranking dataset saved !')
    print('PROCEDURE ENDED CORRECTLY')
Esempio n. 8
0
        def create_sub(estimator,
                       checkpoint_path,
                       eval_result,
                       batch_size=128,
                       patience=0.001):
            # now works also for local and small it will create a sub
            # create a sub only if the MMR is > 0.65
            if self.mode == 'local':
                eval_result_f = eval_result['metric/mrr']
                global_step = eval_result['global_step']
                if eval_result_f > self.min_mrr + patience:
                    # set as new threshold the new mrr
                    self.min_mrr = eval_result_f

                    # predict the test...
                    pred = np.array(
                        list(
                            estimator.predict(lambda: batch_inputs(
                                self.test_x, self.test_y, batch_size))))
                    pred_train = np.array(
                        list(
                            estimator.predict(lambda: batch_inputs(
                                self.x, self.y, batch_size))))

                    pred_name_train = 'train_predictions_{}_learning_rate_{}_train_batch_size_{}_hidden_layers_dim_{}_num_train_steps_{}' \
                    '_dropout_{}_global_steps_{}_{}_mrr_{}'.format(self.params['loss'], self.params['learning_rate'], self.params['train_batch_size'], self.params['hidden_layer_dims'],
                    self.params['num_train_steps'], self.params['dropout_rate'], global_step, self.params['group_size'], eval_result_f)

                    pred_name = 'predictions_{}_learning_rate_{}_train_batch_size_{}_hidden_layers_dim_{}_num_train_steps_{}' \
                    '_dropout_{}_global_steps_{}_{}_mrr_{}'.format(self.params['loss'], self.params['learning_rate'], self.params['train_batch_size'], self.params['hidden_layer_dims'],
                    self.params['num_train_steps'], self.params['dropout_rate'], global_step, self.params['group_size'], eval_result_f)

                    np.save(f'{self.save_path}/{pred_name_train}', pred_train)
                    np.save(f'{self.save_path}/{pred_name}', pred)

                    for name in [pred_name, pred_name_train]:
                        HERA.send_message(
                            f'EXPORTING A SUB... {eval_result_f} mode:{self.mode}, name:{name}'
                        )
                        model = TensorflowRankig(
                            mode=self.mode,
                            cluster='no_cluster',
                            dataset_name=self.dataset_name,
                            pred_name=name)
                        model.name = f'tf_ranking_{name}'
                        model.run()
                        HERA.send_message(
                            f'EXPORTED... {eval_result_f} mode:{self.mode}, name:{name}'
                        )
Esempio n. 9
0
    def evaluate(self, send_MRR_on_telegram=False):
        self.fit()
        print(self.xgb.feature_importances_)
        Y_test, Y_pred = self.recommend_batch()
        report = classification_report(Y_test, Y_pred)
        report += "\n Accuracy: {} %".format(
            accuracy_score(Y_test, Y_pred) * 100)
        print(report)

        if send_MRR_on_telegram:
            HERA.send_message(
                'evaluating classifier {} on {}.\n Classification report is: \n {}\n\n'
                .format(self.name, self.mode, report))

        return report
    def _validate_step(self, **dict):
        # initialize the recommender
        params_dict = {**self.fixed_params_dict, **dict}
        #partial_initialized_model = partial(self.reference_object.__init__, **self.fixed_params_dict)
        model = self.reference_object.__class__(**params_dict)
        score = model.evaluate()
        del model
        gc.collect()
        self.writer.write('params: {}\n MRR is: {}\n\n'.format(
            params_dict, score))

        # sending a message on the telegram channel
        HERA.send_message('params: {}\n MRR is: {}\n\n'.format(
            params_dict, score))

        return score
Esempio n. 11
0
        def get_mrr(arg_list):

            learning_rate, num_leaves, min_split_gain, min_child_weight, \
                min_child_samples, bagging_freq, feature_fraction = arg_list

            params_dict = {
                'boosting_type': 'gbdt',
                'num_leaves': num_leaves,
                'max_depth': -1,
                'n_estimators': 5000,
                'learning_rate': learning_rate,
                'subsample_for_bin': 200000,
                'class_weights': None,
                #'min_data_in_leaf': min_data_in_leaf,
                'min_split_gain': min_split_gain,
                'min_child_weight': min_child_weight,
                'min_child_samples': min_child_samples,
                'bagging_freq': bagging_freq,
                'feature_fraction': feature_fraction,
                'subsample': 1,
                'subsample_freq': 0,
                'colsample_bytree': 1,
                'reg_alpha': 0.0,
                'reg_lambda': 0.0,
                'random_state': None,
                'n_jobs': -1,
                'silent': False,
                'importance_type': 'split',
                'metric': 'None',
                'print_every': 10000,
            }
            lgb = lightGBM(mode=mode,
                           cluster=cluster,
                           dataset_name=dataset_name,
                           params_dict=params_dict)
            mrr = lgb.validate()
            best_it = lgb.model._Booster.best_iteration
            Hera.send_message(
                f'MRR: {mrr}\n'
                f'params:\n'
                f'num_iteration:{best_it}, learning_rate:{learning_rate}, num_leaves:{num_leaves}, '
                f'min_split_gain: {min_split_gain}, min_child_weight: {min_child_weight}, min_child_samples: {min_child_samples}'
            )
            return -mrr
Esempio n. 12
0
    def iterations_validation(self, max_trees, range_step=25, mode='auto'):
        if self.ctb is None:
            self.fit()

        test_df = self.get_preprocessed_dataset(mode='test')

        test_df.drop(['user_id', 'session_id', 'item_id'],
                     inplace=True,
                     axis=1)

        if mode == 'auto':
            list_num_trees = [
                max_trees - i * range_step for i in range(max_trees)
            ]
            for trees in list_num_trees:
                self.set_limit_trees(trees)

                self.predictions = []
                self.scores_batch = []
                test_df.groupby('id', as_index=False).progress_apply(self.func)

                MRR = self.compute_MRR(self.predictions[1:])
                HERA.send_message(
                    'evaluating recommender {} on {}. Iterations used {}\n MRR is: {}\n\n'
                    .format(self.name, self.cluster, trees, MRR))
        else:
            while True:
                # Getting user input
                while True:
                    trees = input("How many iterations?")
                    try:
                        self.set_limit_trees(int(trees))
                        break
                    except ValueError:
                        pass

                self.predictions = []
                self.scores_batch = []
                test_df.groupby('id', as_index=False).progress_apply(self.func)

                MRR = self.compute_MRR(self.predictions[1:])
                HERA.send_message(
                    'evaluating recommender {} on {}. Iterations used {}\n MRR is: {}\n\n'
                    .format(self.name, self.cluster, trees, MRR))
def create_lightGBM_dataset(mode, cluster, features_array, dataset_name):
    def _create_groups(df):
        """
        function used to retrieve the len of the groups
        :param df:
        :return:
        """
        df = df[['user_id', 'session_id']]
        group = df.groupby(['user_id', 'session_id'],
                           sort=False).apply(lambda x: len(x)).values
        return group

    def _save_dataset(base_path, mode, df):
        assert mode in ['train', 'vali'], 'the mode has to be train or vali'
        print('reducing memory usage...')
        df = reduce_mem_usage(df)

        check_folder(base_path)

        x = df.drop(['index', 'user_id', 'session_id', 'item_id', 'label'],
                    axis=1)
        x.to_hdf(f'{_BASE_PATH}/x_{mode}.hdf',
                 key='df',
                 index=False,
                 format='table')
        print(f'x_{mode} saved at: {_BASE_PATH}/x_{mode}.hdf')

        y = df['label'].values
        np.save(f'{_BASE_PATH}/y_{mode}', y)
        print(f'y_{mode} saved at: {_BASE_PATH}/y_{mode}.npy')

        groups = _create_groups(df)
        np.save(f'{_BASE_PATH}/groups_{mode}', groups)
        print(f'groups_{mode} saved at: {_BASE_PATH}/groups_{mode}.npy')

        user_session_item = df[['user_id', 'session_id', 'item_id']]
        user_session_item.to_csv(f'{_BASE_PATH}/user_session_item_{mode}.csv',
                                 index=False)
        print(
            f'user_session_item_{mode} saved at: {_BASE_PATH}/user_session_item_{mode}.csv'
        )

    # base save path
    _BASE_PATH = f'dataset/preprocessed/lightGBM/{cluster}/{mode}/{dataset_name}'

    # retrieve the TRAIN and VALIDATION/TEST data
    train_df, validation_df = merge_features_lgb(mode, cluster, features_array)

    print('saving features names...')
    check_folder(f"{_BASE_PATH}")
    with open(f"{_BASE_PATH}/Features.txt", "w+") as text_file:
        text_file.write(str([str(fn) for fn in features_array]))

    Hera.send_message('SAVING TRAIN LIGHTGBM...')
    _save_dataset(_BASE_PATH, 'train', train_df)
    Hera.send_message('SAVING VALI LIGHTGBM...')
    _save_dataset(_BASE_PATH, 'vali', validation_df)
    Hera.send_message('PROCEDURE ENDED CORRECTLY')
def get_scores_cv(k):
    df_scores = []
    for i in range(k):
        i = i + 1
        HERA.send_message(f'fold_{i} start')
        base_path = '{}/fold_{}'.format(flags_dict['save_path'], i)

        # load usi
        usi_df = pd.read_csv(f'{base_path}/usi.csv')
        pred = np.array(train_cv(base_path))

        # create the df of the scores
        usi_df['score_tf'] = pred.flatten()

        #append the score df
        df_scores.append(usi_df)
        HERA.send_message(f'fold_{i} end')

    _BASE_PATH = 'dataset/preprocessed/tf_ranking/no_cluster/full/{}'.format(
        flags_dict['dataset_name'])

    HERA.send_message('retrieving the score for full')

    # retrieve the full scores
    pred = train_cv(_BASE_PATH)

    # load usi of the full
    usi_df = pd.read_csv(f'{_BASE_PATH}/usi.csv')
    usi_df['score_tf'] = pred.flatten()

    # append the full scores
    df_scores.append(usi_df)

    # concat all the scores
    final_scores = pd.concat(df_scores)

    # save the scores
    save_path = flags_dict['save_path']
    _loss = flags_dict['loss']
    final_scores.to_csv(f'{save_path}/scores_{_loss}.csv.gz',
                        compression='gzip',
                        index=False)

    HERA.send_message(f'SCORES SAVED SUCCESFULLY')
def train_and_test():
    path = flags_dict['save_path']
    features, labels = load_data(path, 'train')
    train_input_fn, train_hook = get_train_inputs(
        features, labels, flags_dict['train_batch_size'])
    features_test, labels_test = load_data(path, 'test')

    def _train_op_fn(loss):
        """Defines train op used in ranking head."""
        return tf.contrib.layers.optimize_loss(
            loss=loss,
            global_step=tf.train.get_global_step(),
            learning_rate=flags_dict['learning_rate'],
            optimizer="Adagrad")

    if flags_dict['loss'] == 'list_mle_loss':
        lambda_weight = tfr.losses.create_p_list_mle_lambda_weight(
            list_size=25)
    elif flags_dict['loss'] == 'approx_ndcg_loss':
        lambda_weight = tfr.losses.create_ndcg_lambda_weight(topn=25)
    else:
        lambda_weight = tfr.losses.create_reciprocal_rank_lambda_weight(
            topn=25)
    ranking_head = tfr.head.create_ranking_head(
        loss_fn=tfr.losses.make_loss_fn(flags_dict['loss'],
                                        lambda_weight=lambda_weight),
        eval_metric_fns=get_eval_metric_fns(),
        train_op_fn=_train_op_fn)
    # tfr.losses.create_p_list_mle_lambda_weight(25)
    # lambda_weight=tfr.losses.create_reciprocal_rank_lambda_weight()

    estimator = tf.estimator.Estimator(
        model_fn=tfr.model.make_groupwise_ranking_fn(
            group_score_fn=make_score_fn(),
            group_size=flags_dict['group_size'],
            transform_fn=tfr.feature.make_identity_transform_fn(
                flags_dict['train_context_features_id']),
            ranking_head=ranking_head))

    estimator.train(train_input_fn,
                    hooks=[train_hook],
                    steps=flags_dict['num_train_steps'])

    # predict also for the train to get the scores for the staking
    pred_train = np.array(
        list(estimator.predict(lambda: batch_inputs(features, labels, 128))))
    pred = np.array(
        list(
            estimator.predict(
                lambda: batch_inputs(features_test, labels_test, 128))))




    pred_name_train='train_predictions_{}_learning_rate_{}_train_batch_size_{}_' \
        'hidden_layers_dim_{}_num_train_steps_{}_dropout_{}_group_size_{}'.format(flags_dict['loss'],
                                                                                  flags_dict['learning_rate'],
                                                                                  flags_dict['train_batch_size'],
                                                                                  flags_dict['hidden_layer_dims'],
                                                                                  flags_dict['num_train_steps'],
                                                                                  flags_dict['dropout_rate'],
                                                                                  flags_dict['group_size'])

    pred_name ='predictions_{}_learning_rate_{}_train_batch_size_{}_' \
        'hidden_layers_dim_{}_num_train_steps_{}_dropout_{}_group_size_{}'.format(flags_dict['loss'],
                                                                                  flags_dict['learning_rate'],
                                                                                  flags_dict['train_batch_size'],
                                                                                  flags_dict['hidden_layer_dims'],
                                                                                  flags_dict['num_train_steps'],
                                                                                  flags_dict['dropout_rate'],
                                                                                  flags_dict['group_size'])
    np.save('{}/{}'.format(flags_dict['save_path'], pred_name), pred)
    np.save('{}/{}'.format(flags_dict['save_path'], pred_name_train),
            pred_train)

    for name in [pred_name, pred_name_train]:
        HERA.send_message('EXPORTING A SUB... mode:{}, name:{}'.format(
            flags_dict['mode'], name))
        model = TensorflowRankig(mode=flags_dict['mode'],
                                 cluster='no_cluster',
                                 dataset_name=flags_dict['dataset_name'],
                                 pred_name=name)
        model.name = f'tf_ranking_{name}'
        model.run()
        HERA.send_message('EXPORTED... mode:{}, name:{}'.format(
            flags_dict['mode'], name))
Esempio n. 16
0
    def export(self, estimator, export_path, checkpoint_path, eval_result,
               is_the_final_export):
        def batch_inputs(features, labels, batch_size):
            dataset = tf.data.Dataset.from_tensor_slices((features, labels))
            return dataset.batch(batch_size)

        def create_sub(estimator,
                       checkpoint_path,
                       eval_result,
                       batch_size=128,
                       patience=0.001):
            # now works also for local and small it will create a sub
            # create a sub only if the MMR is > 0.65
            if self.mode == 'local':
                eval_result_f = eval_result['metric/mrr']
                global_step = eval_result['global_step']
                if eval_result_f > self.min_mrr + patience:
                    # set as new threshold the new mrr
                    self.min_mrr = eval_result_f

                    # predict the test...
                    pred = np.array(
                        list(
                            estimator.predict(lambda: batch_inputs(
                                self.test_x, self.test_y, batch_size))))
                    pred_train = np.array(
                        list(
                            estimator.predict(lambda: batch_inputs(
                                self.x, self.y, batch_size))))

                    pred_name_train = 'train_predictions_{}_learning_rate_{}_train_batch_size_{}_hidden_layers_dim_{}_num_train_steps_{}' \
                    '_dropout_{}_global_steps_{}_{}_mrr_{}'.format(self.params['loss'], self.params['learning_rate'], self.params['train_batch_size'], self.params['hidden_layer_dims'],
                    self.params['num_train_steps'], self.params['dropout_rate'], global_step, self.params['group_size'], eval_result_f)

                    pred_name = 'predictions_{}_learning_rate_{}_train_batch_size_{}_hidden_layers_dim_{}_num_train_steps_{}' \
                    '_dropout_{}_global_steps_{}_{}_mrr_{}'.format(self.params['loss'], self.params['learning_rate'], self.params['train_batch_size'], self.params['hidden_layer_dims'],
                    self.params['num_train_steps'], self.params['dropout_rate'], global_step, self.params['group_size'], eval_result_f)

                    np.save(f'{self.save_path}/{pred_name_train}', pred_train)
                    np.save(f'{self.save_path}/{pred_name}', pred)

                    for name in [pred_name, pred_name_train]:
                        HERA.send_message(
                            f'EXPORTING A SUB... {eval_result_f} mode:{self.mode}, name:{name}'
                        )
                        model = TensorflowRankig(
                            mode=self.mode,
                            cluster='no_cluster',
                            dataset_name=self.dataset_name,
                            pred_name=name)
                        model.name = f'tf_ranking_{name}'
                        model.run()
                        HERA.send_message(
                            f'EXPORTED... {eval_result_f} mode:{self.mode}, name:{name}'
                        )

        self._log('export checkpoint {}'.format(checkpoint_path))
        step = eval_result['global_step']
        score = eval_result['metric/mrr']
        checkpoint = Checkpoint(path=checkpoint_path, score=score)

        HERA.send_message(
            'mode: {}\n step:{}\nTFRANKING mrr is: {}\n dropout:{}\n'
            'learning_rate:{}\n train_batch_size:{}\n'
            'hidden_layer_dims:{}\n loss:{}\n group_size:{}'.format(
                self.mode, step, score, self.params['dropout_rate'],
                self.params['learning_rate'], self.params['train_batch_size'],
                self.params['hidden_layer_dims'], self.params['loss'],
                self.params['group_size']))
        if self._shouldKeep(checkpoint):
            self._keepCheckpoint(checkpoint)

            create_sub(estimator, checkpoint_path, eval_result)

            self._pruneCheckpoints(checkpoint)
        else:
            self._log('skipping checkpoint {}'.format(checkpoint.path))