Beispiel #1
0
    def __call__(self, trial):
        # sample params
        sf_para_dict = self.sf_parameter.grid_search(trial)
        if sf_para_dict['id'] == 'ffnns':
            sf_para_dict['ffnns'].update(
                dict(num_features=self.data_dict['num_features']))

        model_para_dict = self.model_parameter.grid_search(trial)

        k_flod_average = 0.
        for i in range(self.fold_num):  # evaluation over k-fold data
            fold_k = i + 1
            study = self.k_studies[i]

            train_data, test_data, vali_data = self.load_data(
                self.eval_dict, self.data_dict, fold_k)

            ranker = self.load_ranker(model_para_dict=model_para_dict,
                                      sf_para_dict=sf_para_dict)

            if self.eval_dict['vali_obj']:
                study.optimize(LTRObjective(ranker=ranker,
                                            epochs=self.epochs,
                                            label_type=train_data.label_type,
                                            train_data=train_data,
                                            vali_objective=True,
                                            vali_k=self.vali_k,
                                            vali_data=vali_data),
                               n_trials=1)
            else:
                study.optimize(LTRObjective(ranker=ranker,
                                            epochs=self.epochs,
                                            label_type=train_data.label_type,
                                            train_data=train_data,
                                            vali_objective=False),
                               n_trials=1)
            ''' the meaning of n_trials
            The number of trials. If this argument is set to None, there is no limitation on the number of trials.
            If timeout is also set to None, the study continues to create trials until it receives a termination signal
            such as Ctrl+C or SIGTERM.
            '''
            # store loss
            test_eval_tmp = ndcg_at_k(ranker=ranker,
                                      test_data=test_data,
                                      k=1,
                                      label_type=test_data.label_type,
                                      gpu=self.gpu,
                                      device=self.device)
            test_eval_v = test_eval_tmp.data.numpy()
            k_flod_average += test_eval_v

        # calculate loss todo average k-fold validation score
        k_flod_average /= self.fold_num

        return k_flod_average
Beispiel #2
0
    def __call__(self, trial):
        if self.vali_objective:
            for epoch_k in range(
                    1, self.epochs +
                    1):  # one-epoch fitting over the entire training data
                presort = self.train_data.presort
                for qid, batch_rankings, batch_stds in self.train_data:  # _, [batch, ranking_size, num_features], [batch, ranking_size]
                    if self.gpu:
                        batch_rankings, batch_stds = batch_rankings.to(
                            self.device), batch_stds.to(self.device)
                    batch_loss, stop_training = self.ranker.train(
                        batch_rankings,
                        batch_stds,
                        qid=qid,
                        epoch_k=epoch_k,
                        presort=presort,
                        label_type=self.label_type)
                    #print(batch_loss)
                    if stop_training: break
                # Report intermediate objective value.
                vali_eval_tmp = ndcg_at_k(ranker=self.ranker,
                                          test_data=self.vali_data,
                                          k=self.vali_k,
                                          label_type=self.label_type,
                                          gpu=self.gpu,
                                          device=self.device)
                vali_eval_v = vali_eval_tmp.data.numpy()
                print(vali_eval_v)
                intermediate_value = vali_eval_v
                trial.report(intermediate_value, epoch_k)
                '''
                Report an objective function value for a given step.
                The reported values are used by the pruners to determine whether this trial should be pruned.
                '''
                # Handle pruning based on the intermediate value.
                if trial.should_prune():
                    raise optuna.TrialPruned()

            # using validation data again
            vali_eval_tmp = ndcg_at_k(ranker=self.ranker,
                                      test_data=self.vali_data,
                                      k=self.vali_k,
                                      label_type=self.label_type,
                                      gpu=self.gpu,
                                      device=self.device)
            vali_eval_v = vali_eval_tmp.data.numpy()

            return vali_eval_v
        else:
            for epoch_k in range(
                    1, self.epochs +
                    1):  # one-epoch fitting over the entire training data
                epoch_loss = torch.cuda.FloatTensor(
                    [0.0]) if self.gpu else torch.FloatTensor([0.0])
                presort = self.train_data.presort
                for qid, batch_rankings, batch_stds in self.train_data:  # _, [batch, ranking_size, num_features], [batch, ranking_size]
                    if self.gpu:
                        batch_rankings, batch_stds = batch_rankings.to(
                            self.device), batch_stds.to(self.device)
                    batch_loss, stop_training = self.ranker.train(
                        batch_rankings,
                        batch_stds,
                        qid=qid,
                        epoch_k=epoch_k,
                        presort=presort,
                        label_type=self.label_type)
                    #print(batch_loss)
                    if stop_training:
                        break
                    else:
                        epoch_loss += batch_loss.item()

                intermediate_value = -epoch_loss  # due to the maximize setting
                trial.report(intermediate_value, epoch_k)
                '''
                Report an objective function value for a given step.
                The reported values are used by the pruners to determine whether this trial should be pruned.
                '''
                # Handle pruning based on the intermediate value.
                if trial.should_prune():
                    raise optuna.TrialPruned()

            return -epoch_loss
Beispiel #3
0
    def ad_cv_eval(self,
                   data_dict=None,
                   eval_dict=None,
                   ad_para_dict=None,
                   sf_para_dict=None):
        """
        Adversarial training and evaluation
        :param data_dict:
        :param eval_dict:
        :param ad_para_dict:
        :param sf_para_dict:
        :return:
        """
        self.check_consistency(data_dict, eval_dict)
        self.display_information(data_dict, model_para_dict=ad_para_dict)
        self.setup_eval(data_dict,
                        eval_dict,
                        sf_para_dict,
                        model_para_dict=ad_para_dict)

        model_id = ad_para_dict['model_id']
        fold_num = data_dict['fold_num']
        # for quick access of common evaluation settings
        epochs, loss_guided = eval_dict['epochs'], eval_dict['loss_guided']
        vali_k, log_step, cutoffs = eval_dict['vali_k'], eval_dict[
            'log_step'], eval_dict['cutoffs']
        do_vali, do_summary = eval_dict['do_validation'], eval_dict[
            'do_summary']

        if sf_para_dict['id'] == 'ffnns':
            sf_para_dict['ffnns'].update(
                dict(num_features=data_dict['num_features']))
        else:
            raise NotImplementedError

        ad_machine = self.get_ad_machine(eval_dict=eval_dict,
                                         data_dict=data_dict,
                                         sf_para_dict=sf_para_dict,
                                         ad_para_dict=ad_para_dict)

        time_begin = datetime.datetime.now()  # timing
        g_l2r_cv_avg_scores, d_l2r_cv_avg_scores = np.zeros(
            len(cutoffs)), np.zeros(len(cutoffs))  # fold average

        for fold_k in range(1, fold_num + 1):
            dict_buffer = dict()  # for buffering frequently used objs
            ad_machine.reset_generator_discriminator()

            fold_optimal_checkpoint = '-'.join(['Fold', str(fold_k)])

            train_data, test_data, vali_data = self.load_data(
                eval_dict, data_dict, fold_k)

            if do_vali: g_fold_optimal_ndcgk, d_fold_optimal_ndcgk = 0.0, 0.0
            if do_summary:
                list_epoch_loss = []  # not used yet
                g_list_fold_k_train_eval_track, g_list_fold_k_test_eval_track, g_list_fold_k_vali_eval_track = [], [], []
                d_list_fold_k_train_eval_track, d_list_fold_k_test_eval_track, d_list_fold_k_vali_eval_track = [], [], []

            for _ in range(10):
                ad_machine.burn_in(train_data=train_data)

            for epoch_k in range(1, epochs + 1):

                if model_id == 'IR_GMAN_List':
                    stop_training = ad_machine.mini_max_train(
                        train_data=train_data,
                        generator=ad_machine.generator,
                        pool_discriminator=ad_machine.pool_discriminator,
                        dict_buffer=dict_buffer)

                    g_ranker = ad_machine.get_generator()
                    d_ranker = ad_machine.pool_discriminator[0]
                else:
                    stop_training = ad_machine.mini_max_train(
                        train_data=train_data,
                        generator=ad_machine.generator,
                        discriminator=ad_machine.discriminator,
                        dict_buffer=dict_buffer)

                    g_ranker = ad_machine.get_generator()
                    d_ranker = ad_machine.get_discriminator()

                if stop_training:
                    print('training is failed !')
                    break

                if (do_summary
                        or do_vali) and (epoch_k % log_step == 0
                                         or epoch_k == 1):  # stepwise check
                    if do_vali:
                        g_vali_eval_tmp = ndcg_at_k(
                            ranker=g_ranker,
                            test_data=vali_data,
                            k=vali_k,
                            multi_level_rele=self.data_setting.
                            data_dict['multi_level_rele'],
                            batch_mode=True)
                        d_vali_eval_tmp = ndcg_at_k(
                            ranker=d_ranker,
                            test_data=vali_data,
                            k=vali_k,
                            multi_level_rele=self.data_setting.
                            data_dict['multi_level_rele'],
                            batch_mode=True)
                        g_vali_eval_v, d_vali_eval_v = g_vali_eval_tmp.data.numpy(
                        ), d_vali_eval_tmp.data.numpy()

                        if epoch_k > 1:
                            g_buffer, g_tmp_metric_val, g_tmp_epoch = \
                                self.per_epoch_validation(ranker=g_ranker, curr_metric_val=g_vali_eval_v,
                                                          fold_optimal_metric_val=g_fold_optimal_ndcgk, curr_epoch=epoch_k,
                                                          id_str='G', fold_optimal_checkpoint=fold_optimal_checkpoint, epochs=epochs)
                            # observe better performance
                            if g_buffer:
                                g_fold_optimal_ndcgk, g_fold_optimal_epoch_val = g_tmp_metric_val, g_tmp_epoch

                            d_buffer, d_tmp_metric_val, d_tmp_epoch = \
                                self.per_epoch_validation(ranker=d_ranker, curr_metric_val=d_vali_eval_v,
                                                          fold_optimal_metric_val=d_fold_optimal_ndcgk, curr_epoch=epoch_k,
                                                          id_str='D', fold_optimal_checkpoint=fold_optimal_checkpoint, epochs=epochs)
                            if d_buffer:
                                d_fold_optimal_ndcgk, d_fold_optimal_epoch_val = d_tmp_metric_val, d_tmp_epoch

                    if do_summary:  # summarize per-step performance w.r.t. train, test
                        self.per_epoch_summary_step1(
                            ranker=g_ranker,
                            train_data=train_data,
                            test_data=test_data,
                            list_fold_k_train_eval_track=
                            g_list_fold_k_train_eval_track,
                            list_fold_k_test_eval_track=
                            g_list_fold_k_test_eval_track,
                            vali_eval_v=g_vali_eval_v,
                            list_fold_k_vali_eval_track=
                            g_list_fold_k_vali_eval_track,
                            cutoffs=cutoffs,
                            do_vali=do_vali)

                        self.per_epoch_summary_step1(
                            ranker=d_ranker,
                            train_data=train_data,
                            test_data=test_data,
                            list_fold_k_train_eval_track=
                            d_list_fold_k_train_eval_track,
                            list_fold_k_test_eval_track=
                            d_list_fold_k_test_eval_track,
                            vali_eval_v=d_vali_eval_v,
                            list_fold_k_vali_eval_track=
                            d_list_fold_k_vali_eval_track,
                            cutoffs=cutoffs,
                            do_vali=do_vali)

            if do_summary:
                self.per_epoch_summary_step2(
                    id_str='G',
                    fold_k=fold_k,
                    list_fold_k_train_eval_track=g_list_fold_k_train_eval_track,
                    list_fold_k_test_eval_track=g_list_fold_k_test_eval_track,
                    do_vali=do_vali,
                    list_fold_k_vali_eval_track=g_list_fold_k_vali_eval_track)

                self.per_epoch_summary_step2(
                    id_str='D',
                    fold_k=fold_k,
                    list_fold_k_train_eval_track=d_list_fold_k_train_eval_track,
                    list_fold_k_test_eval_track=d_list_fold_k_test_eval_track,
                    do_vali=do_vali,
                    list_fold_k_vali_eval_track=d_list_fold_k_vali_eval_track)

            if do_vali:  # using the fold-wise optimal model for later testing based on validation data #
                g_buffered_model = '_'.join(
                    ['net_params_epoch',
                     str(g_fold_optimal_epoch_val), 'G']) + '.pkl'
                g_ranker.load(self.dir_run + fold_optimal_checkpoint + '/' +
                              g_buffered_model)
                g_fold_optimal_ranker = g_ranker

                d_buffered_model = '_'.join(
                    ['net_params_epoch',
                     str(d_fold_optimal_epoch_val), 'D']) + '.pkl'
                d_ranker.load(self.dir_run + fold_optimal_checkpoint + '/' +
                              d_buffered_model)
                d_fold_optimal_ranker = d_ranker

            else:  # using default G # buffer the model after a fixed number of training-epoches if no validation is deployed
                g_ranker.save(
                    dir=self.dir_run + fold_optimal_checkpoint + '/',
                    name='_'.join(['net_params_epoch',
                                   str(epoch_k), 'G']) + '.pkl')
                g_fold_optimal_ranker = g_ranker

                d_ranker.save(
                    dir=self.dir_run + fold_optimal_checkpoint + '/',
                    name='_'.join(['net_params_epoch',
                                   str(epoch_k), 'D']) + '.pkl')
                d_fold_optimal_ranker = d_ranker

            g_torch_fold_ndcg_ks = ndcg_at_ks(
                ranker=g_fold_optimal_ranker,
                test_data=test_data,
                ks=cutoffs,
                multi_level_rele=self.data_setting.
                data_dict['multi_level_rele'],
                batch_mode=True)
            g_fold_ndcg_ks = g_torch_fold_ndcg_ks.data.numpy()

            d_torch_fold_ndcg_ks = ndcg_at_ks(
                ranker=d_fold_optimal_ranker,
                test_data=test_data,
                ks=cutoffs,
                multi_level_rele=self.data_setting.
                data_dict['multi_level_rele'],
                batch_mode=True)
            d_fold_ndcg_ks = d_torch_fold_ndcg_ks.data.numpy()

            performance_list = [' Fold-' + str(fold_k)
                                ]  # fold-wise performance
            performance_list.append('Generator')
            for i, co in enumerate(cutoffs):
                performance_list.append('nDCG@{}:{:.4f}'.format(
                    co, g_fold_ndcg_ks[i]))

            performance_list.append('\nDiscriminator')
            for i, co in enumerate(cutoffs):
                performance_list.append('nDCG@{}:{:.4f}'.format(
                    co, d_fold_ndcg_ks[i]))

            performance_str = '\t'.join(performance_list)
            print('\t', performance_str)

            g_l2r_cv_avg_scores = np.add(
                g_l2r_cv_avg_scores,
                g_fold_ndcg_ks)  # sum for later cv-performance
            d_l2r_cv_avg_scores = np.add(d_l2r_cv_avg_scores, d_fold_ndcg_ks)

        time_end = datetime.datetime.now()  # overall timing
        elapsed_time_str = str(time_end - time_begin)
        print('Elapsed time:\t', elapsed_time_str + "\n\n")

        # begin to print either cv or average performance
        g_l2r_cv_avg_scores = np.divide(g_l2r_cv_avg_scores, fold_num)
        d_l2r_cv_avg_scores = np.divide(d_l2r_cv_avg_scores, fold_num)

        if do_vali:
            eval_prefix = str(fold_num) + '-fold cross validation scores:'
        else:
            eval_prefix = str(fold_num) + '-fold average scores:'

        print(
            'Generator', eval_prefix,
            metric_results_to_string(list_scores=g_l2r_cv_avg_scores,
                                     list_cutoffs=cutoffs))
        print(
            'Discriminator', eval_prefix,
            metric_results_to_string(list_scores=d_l2r_cv_avg_scores,
                                     list_cutoffs=cutoffs))
Beispiel #4
0
    def kfold_cv_eval(self,
                      data_dict=None,
                      eval_dict=None,
                      sf_para_dict=None,
                      model_para_dict=None):
        """
        Evaluation learning-to-rank methods via k-fold cross validation if there are k folds, otherwise one fold.
        :param data_dict:       settings w.r.t. data
        :param eval_dict:       settings w.r.t. evaluation
        :param sf_para_dict:    settings w.r.t. scoring function
        :param model_para_dict: settings w.r.t. the ltr_adhoc model
        :return:
        """
        self.display_information(data_dict, model_para_dict)
        self.check_consistency(data_dict, eval_dict, sf_para_dict)
        self.setup_eval(data_dict, eval_dict, sf_para_dict, model_para_dict)

        model_id = model_para_dict['model_id']
        fold_num = data_dict['fold_num']
        # for quick access of common evaluation settings
        epochs, loss_guided = eval_dict['epochs'], eval_dict['loss_guided']
        vali_k, log_step, cutoffs = eval_dict['vali_k'], eval_dict[
            'log_step'], eval_dict['cutoffs']
        do_vali, do_summary = eval_dict['do_validation'], eval_dict[
            'do_summary']

        ranker = self.load_ranker(model_para_dict=model_para_dict,
                                  sf_para_dict=sf_para_dict)

        time_begin = datetime.datetime.now()  # timing
        l2r_cv_avg_scores = np.zeros(len(cutoffs))  # fold average

        for fold_k in range(1, fold_num + 1):  # evaluation over k-fold data
            ranker.reset_parameters(
            )  # reset with the same random initialization

            train_data, test_data, vali_data = self.load_data(
                eval_dict, data_dict, fold_k)

            if do_vali: fold_optimal_ndcgk = 0.0
            if do_summary:                list_epoch_loss, list_fold_k_train_eval_track, list_fold_k_test_eval_track, list_fold_k_vali_eval_track = [], [], [], []
            if not do_vali and loss_guided:
                first_round = True
                threshold_epoch_loss = torch.cuda.FloatTensor([
                    10000000.0
                ]) if self.gpu else torch.FloatTensor([10000000.0])

            for epoch_k in range(1, epochs + 1):
                torch_fold_k_epoch_k_loss, stop_training = self.train_ranker(
                    ranker=ranker,
                    train_data=train_data,
                    model_para_dict=model_para_dict,
                    epoch_k=epoch_k)

                ranker.scheduler.step(
                )  # adaptive learning rate with step_size=40, gamma=0.5

                if stop_training:
                    print('training is failed !')
                    break

                if (do_summary
                        or do_vali) and (epoch_k % log_step == 0
                                         or epoch_k == 1):  # stepwise check
                    if do_vali:  # per-step validation score
                        vali_eval_tmp = ndcg_at_k(ranker=ranker,
                                                  test_data=vali_data,
                                                  k=vali_k,
                                                  gpu=self.gpu,
                                                  device=self.device,
                                                  label_type=self.data_setting.
                                                  data_dict['label_type'])
                        vali_eval_v = vali_eval_tmp.data.numpy()
                        if epoch_k > 1:  # further validation comparison
                            curr_vali_ndcg = vali_eval_v
                            if (curr_vali_ndcg > fold_optimal_ndcgk) or (
                                    epoch_k == epochs
                                    and curr_vali_ndcg == fold_optimal_ndcgk
                            ):  # we need at least a reference, in case all zero
                                print('\t', epoch_k,
                                      '- nDCG@{} - '.format(vali_k),
                                      curr_vali_ndcg)
                                fold_optimal_ndcgk = curr_vali_ndcg
                                fold_optimal_checkpoint = '-'.join(
                                    ['Fold', str(fold_k)])
                                fold_optimal_epoch_val = epoch_k
                                ranker.save(
                                    dir=self.dir_run +
                                    fold_optimal_checkpoint + '/',
                                    name='_'.join(
                                        ['net_params_epoch',
                                         str(epoch_k)]) +
                                    '.pkl')  # buffer currently optimal model
                            else:
                                print('\t\t', epoch_k,
                                      '- nDCG@{} - '.format(vali_k),
                                      curr_vali_ndcg)

                    if do_summary:  # summarize per-step performance w.r.t. train, test
                        fold_k_epoch_k_train_ndcg_ks = ndcg_at_ks(
                            ranker=ranker,
                            test_data=train_data,
                            ks=cutoffs,
                            gpu=self.gpu,
                            device=self.device,
                            label_type=self.data_setting.
                            data_dict['label_type'])
                        np_fold_k_epoch_k_train_ndcg_ks = fold_k_epoch_k_train_ndcg_ks.cpu(
                        ).numpy(
                        ) if self.gpu else fold_k_epoch_k_train_ndcg_ks.data.numpy(
                        )
                        list_fold_k_train_eval_track.append(
                            np_fold_k_epoch_k_train_ndcg_ks)

                        fold_k_epoch_k_test_ndcg_ks = ndcg_at_ks(
                            ranker=ranker,
                            test_data=test_data,
                            ks=cutoffs,
                            gpu=self.gpu,
                            device=self.device,
                            label_type=self.data_setting.
                            data_dict['label_type'])
                        np_fold_k_epoch_k_test_ndcg_ks = fold_k_epoch_k_test_ndcg_ks.cpu(
                        ).numpy(
                        ) if self.gpu else fold_k_epoch_k_test_ndcg_ks.data.numpy(
                        )
                        list_fold_k_test_eval_track.append(
                            np_fold_k_epoch_k_test_ndcg_ks)

                        fold_k_epoch_k_loss = torch_fold_k_epoch_k_loss.cpu(
                        ).numpy(
                        ) if self.gpu else torch_fold_k_epoch_k_loss.data.numpy(
                        )
                        list_epoch_loss.append(fold_k_epoch_k_loss)

                        if do_vali:
                            list_fold_k_vali_eval_track.append(vali_eval_v)

                elif loss_guided:  # stopping check via epoch-loss
                    if first_round and torch_fold_k_epoch_k_loss >= threshold_epoch_loss:
                        print('Bad threshold: ', torch_fold_k_epoch_k_loss,
                              threshold_epoch_loss)

                    if torch_fold_k_epoch_k_loss < threshold_epoch_loss:
                        first_round = False
                        print('\tFold-', str(fold_k), ' Epoch-', str(epoch_k),
                              'Loss: ', torch_fold_k_epoch_k_loss)
                        threshold_epoch_loss = torch_fold_k_epoch_k_loss
                    else:
                        print('\tStopped according epoch-loss!',
                              torch_fold_k_epoch_k_loss, threshold_epoch_loss)
                        break

            if do_summary:  # track
                sy_prefix = '_'.join(['Fold', str(fold_k)])
                fold_k_train_eval = np.vstack(list_fold_k_train_eval_track)
                fold_k_test_eval = np.vstack(list_fold_k_test_eval_track)
                pickle_save(fold_k_train_eval,
                            file=self.dir_run +
                            '_'.join([sy_prefix, 'train_eval.np']))
                pickle_save(fold_k_test_eval,
                            file=self.dir_run +
                            '_'.join([sy_prefix, 'test_eval.np']))

                fold_k_epoch_loss = np.hstack(list_epoch_loss)
                pickle_save(
                    (fold_k_epoch_loss, train_data.__len__()),
                    file=self.dir_run + '_'.join([sy_prefix, 'epoch_loss.np']))
                if do_vali:
                    fold_k_vali_eval = np.hstack(list_fold_k_vali_eval_track)
                    pickle_save(fold_k_vali_eval,
                                file=self.dir_run +
                                '_'.join([sy_prefix, 'vali_eval.np']))

            if do_vali:  # using the fold-wise optimal model for later testing based on validation data
                buffered_model = '_'.join(
                    ['net_params_epoch',
                     str(fold_optimal_epoch_val)]) + '.pkl'
                ranker.load(self.dir_run + fold_optimal_checkpoint + '/' +
                            buffered_model)
                fold_optimal_ranker = ranker
            else:  # buffer the model after a fixed number of training-epoches if no validation is deployed
                fold_optimal_checkpoint = '-'.join(['Fold', str(fold_k)])
                ranker.save(dir=self.dir_run + fold_optimal_checkpoint + '/',
                            name='_'.join(['net_params_epoch',
                                           str(epoch_k)]) + '.pkl')
                fold_optimal_ranker = ranker

            torch_fold_ndcg_ks = ndcg_at_ks(
                ranker=fold_optimal_ranker,
                test_data=test_data,
                ks=cutoffs,
                gpu=self.gpu,
                device=self.device,
                label_type=self.data_setting.data_dict['label_type'])
            fold_ndcg_ks = torch_fold_ndcg_ks.data.numpy()

            performance_list = [model_id + ' Fold-' + str(fold_k)
                                ]  # fold-wise performance
            for i, co in enumerate(cutoffs):
                performance_list.append('nDCG@{}:{:.4f}'.format(
                    co, fold_ndcg_ks[i]))
            performance_str = '\t'.join(performance_list)
            print('\t', performance_str)

            l2r_cv_avg_scores = np.add(
                l2r_cv_avg_scores,
                fold_ndcg_ks)  # sum for later cv-performance

        time_end = datetime.datetime.now()  # overall timing
        elapsed_time_str = str(time_end - time_begin)
        print('Elapsed time:\t', elapsed_time_str + "\n\n")

        l2r_cv_avg_scores = np.divide(l2r_cv_avg_scores, fold_num)
        eval_prefix = str(
            fold_num) + '-fold cross validation scores:' if do_vali else str(
                fold_num) + '-fold average scores:'
        print(model_id, eval_prefix,
              metric_results_to_string(list_scores=l2r_cv_avg_scores,
                                       list_cutoffs=cutoffs)
              )  # print either cv or average performance

        return l2r_cv_avg_scores
Beispiel #5
0
    def __call__(self, trial):
        # sample params
        model_id = self.model_parameter.model_id
        para_dict = self.model_parameter.grid_search(trial)
        self.setup_eval(data_dict=self.data_dict, eval_dict=self.eval_dict)

        k_flod_average = 0.
        for i in range(self.fold_num):  # evaluation over k-fold data
            fold_k = i + 1
            study = self.k_studies[i]

            train_data, test_data, vali_data = self.load_data(
                self.eval_dict, self.data_dict, fold_k)

            data_id = self.data_dict['data_id']

            train_presort, validation_presort, test_presort = self.data_dict['train_presort'], self.data_dict['validation_presort'],\
                                                                self.data_dict['test_presort']

            file_train, file_vali, file_test = self.determine_files(
                data_dict=self.data_dict, fold_k=fold_k)

            self.update_save_model_dir(data_dict=self.data_dict, fold_k=fold_k)

            # prepare training & testing datasets
            file_train_data, file_train_group = load_letor_data_as_libsvm_data(
                file_train,
                split_type=SPLIT_TYPE.Train,
                data_dict=self.data_dict,
                eval_dict=self.eval_dict,
                presort=train_presort)
            x_train, y_train = load_svmlight_file(file_train_data)
            group_train = np.loadtxt(file_train_group)
            train_set = Dataset(data=x_train, label=y_train, group=group_train)

            file_test_data, file_test_group = load_letor_data_as_libsvm_data(
                file_test,
                split_type=SPLIT_TYPE.Test,
                data_dict=self.data_dict,
                eval_dict=self.eval_dict,
                presort=test_presort)
            x_test, y_test = load_svmlight_file(file_test_data)
            group_test = np.loadtxt(file_test_group)

            file_vali_data, file_vali_group = load_letor_data_as_libsvm_data(
                file_vali,
                split_type=SPLIT_TYPE.Validation,
                data_dict=self.data_dict,
                eval_dict=self.eval_dict,
                presort=validation_presort)
            x_valid, y_valid = load_svmlight_file(file_vali_data)
            group_valid = np.loadtxt(file_vali_group)
            valid_set = Dataset(data=x_valid, label=y_valid, group=group_valid)

            if para_dict['custom_dict']['custom'] and para_dict['custom_dict'][
                    'use_LGBMRanker']:
                lgbm_ranker = lgbm.LGBMRanker()
            else:
                lgbm_ranker = lgbm

            study.optimize(TreeLTRObjective(model_id=model_id, data_id = data_id, x_train=x_train , y_train=y_train, group_train=group_train, train_set=train_set, x_valid=x_valid , y_valid=y_valid, group_valid=group_valid, valid_set=valid_set, \
                ranker=lgbm_ranker, fold_k=fold_k, para_dict=para_dict, data_dict=self.data_dict, eval_dict=self.eval_dict, save_model_dir=self.save_model_dir),
                           n_trials=1) # ??? the meaning of n_trials
            # store loss
            if data_id in YAHOO_LTR:
                model_file = self.save_model_dir + 'model.txt'
            else:
                model_file = self.save_model_dir + '_'.join(
                    ['fold', str(fold_k), 'model']) + '.txt'

            lgbm_ranker = lgbm.Booster(model_file=model_file)

            vali_eval_tmp = ndcg_at_k(ranker=lgbm_ranker,
                                      test_data=vali_data,
                                      k=self.vali_k,
                                      label_type=vali_data.label_type,
                                      gpu=self.gpu,
                                      device=self.device)
            vali_eval_v = vali_eval_tmp.data.numpy()
            k_flod_average += vali_eval_v

        # calculate loss todo average k-fold validation score
        k_flod_average /= self.fold_num

        return k_flod_average