Beispiel #1
0
def fit(model,
        train,
        test,
        num_boost_round=360,
        verbose_eval=1,
        export=False,
        training_params=None,
        export_params=None,
        **kwargs):
    if not use_gpu():
        print_errors('XGBoost can only be executed on a GPU for the moment',
                     do_exit=True)

    training_params = {} if training_params is None else training_params
    export_params = {} if export_params is None else export_params

    d_test = xgb.DMatrix(np.asarray(test.get_vectors()),
                         label=np.asarray(test.labels))

    if not validation_only:
        print_h1('Training: ' + special_parameters.setup_name)
        print_info("get vectors...")

        X = np.asarray(train.get_vectors())
        y = np.asarray(train.labels)

        d_train = xgb.DMatrix(X, label=y)

        gpu_id = first_device().index

        kwargs['verbosity'] = verbose_level()
        kwargs['gpu_id'] = gpu_id

        eval_list = [(d_test, 'eval'), (d_train, 'train')]

        print_info("fit model...")

        bst = xgb.train(kwargs,
                        d_train,
                        num_boost_round=num_boost_round,
                        verbose_eval=verbose_eval,
                        evals=eval_list,
                        xgb_model=model)

        print_info("Save model...")
        save_model(bst)

    else:
        bst = load_model()

    print_h1('Validation/Export: ' + special_parameters.setup_name)
    predictions = bst.predict(d_test, ntree_limit=bst.best_ntree_limit)
    res = validate(predictions,
                   np.array(test.labels),
                   training_params['metrics']
                   if 'metrics' in training_params else tuple(),
                   final=True)
    print_notification(res, end='')
    if export:
        export_results(test, predictions, **export_params)
Beispiel #2
0
def fit(model,
        train,
        test,
        export=False,
        nb_classes=4520,
        training_params=None,
        export_params=None,
        save=True):
    training_params = {} if training_params is None else training_params
    export_params = {} if export_params is None else export_params
    clf = model
    if not validation_only:
        print_h1('Training: ' + setup_name)
        print_info("get vectors...")
        X = np.array(train.get_vectors())
        y = np.array(train.labels)

        print_info("fit model...")

        clf.fit(X, y)

        if save:
            save_model(clf)
    print_h1('Validation/Export: ' + setup_name)
    restricted_predictions = clf.predict_proba(np.array(test.get_vectors()))
    predictions = np.zeros((restricted_predictions.shape[0], nb_classes))
    predictions[:, clf.classes_] = restricted_predictions
    res = validate(predictions,
                   np.array(test.labels),
                   training_params['metrics']
                   if 'metrics' in training_params else tuple(),
                   final=True)
    print_notification(res, end='')
    if export:
        export_results(test, predictions, **export_params)
Beispiel #3
0
def fit(model_z,
        train,
        test,
        val=None,
        training_params=None,
        predict_params=None,
        validation_params=None,
        export_params=None,
        optim_params=None,
        model_selection_params=None):
    """
    This function is the core of an experiment. It performs the ml procedure as well as the call to validation.
    :param training_params: parameters for the training procedure
    :param val: validation set
    :param test: the test set
    :param train: The training set
    :param optim_params:
    :param export_params:
    :param validation_params:
    :param predict_params:
    :param model_z: the model that should be trained
    :param model_selection_params:
    """
    # configuration

    training_params, predict_params, validation_params, export_params, optim_params, \
        cv_params = merge_dict_set(
            training_params, TRAINING_PARAMS,
            predict_params, PREDICT_PARAMS,
            validation_params, VALIDATION_PARAMS,
            export_params, EXPORT_PARAMS,
            optim_params, OPTIM_PARAMS,
            model_selection_params, MODEL_SELECTION_PARAMS
        )

    train_loader, test_loader, val_loader = _dataset_setup(
        train, test, val, **training_params)

    statistics_path = output_path('metric_statistics.dump')

    metrics_stats = Statistics(
        model_z, statistics_path, **
        cv_params) if cv_params.pop('cross_validation') else None

    validation_path = output_path('validation.txt')

    # training parameters
    optim = optim_params.pop('optimizer')
    iterations = training_params.pop('iterations')
    gamma = training_params.pop('gamma')
    loss = training_params.pop('loss')
    log_modulo = training_params.pop('log_modulo')
    val_modulo = training_params.pop('val_modulo')
    first_epoch = training_params.pop('first_epoch')

    # callbacks for ml tests
    vcallback = validation_params.pop(
        'vcallback') if 'vcallback' in validation_params else None

    if iterations is None:
        print_errors(
            'Iterations must be set',
            exception=TrainingConfigurationException('Iterations is None'))

    # before ml callback
    if vcallback is not None and special_parameters.train and first_epoch < max(
            iterations):
        init_callbacks(vcallback, val_modulo,
                       max(iterations) // val_modulo, train_loader.dataset,
                       model_z)

    max_iterations = max(iterations)

    if special_parameters.train and first_epoch < max(iterations):
        print_h1('Training: ' + special_parameters.setup_name)

        loss_logs = [] if first_epoch < 1 else load_loss('loss_train')

        loss_val_logs = [] if first_epoch < 1 else load_loss('loss_validation')

        opt = create_optimizer(model_z.parameters(), optim, optim_params)

        scheduler = MultiStepLR(opt, milestones=list(iterations), gamma=gamma)

        # number of batches in the ml
        epoch_size = len(train_loader)

        # one log per epoch if value is -1
        log_modulo = epoch_size if log_modulo == -1 else log_modulo

        epoch = 0
        for epoch in range(max_iterations):

            if epoch < first_epoch:
                # opt.step()
                _skip_step(scheduler, epoch)
                continue
            # saving epoch to enable restart
            export_epoch(epoch)
            model_z.train()

            # printing new epoch
            print_h2('-' * 5 + ' Epoch ' + str(epoch + 1) + '/' +
                     str(max_iterations) + ' (lr: ' + str(scheduler.get_lr()) +
                     ') ' + '-' * 5)

            running_loss = 0.0

            for idx, data in enumerate(train_loader):

                # get the inputs
                inputs, labels = data

                # wrap labels in Variable as input is managed through a decorator
                # labels = model_z.p_label(labels)
                if use_gpu():
                    labels = labels.cuda()

                # zero the parameter gradients
                opt.zero_grad()
                outputs = model_z(inputs)
                loss_value = loss(outputs, labels)
                loss_value.backward()

                opt.step()

                # print math
                running_loss += loss_value.item()
                if idx % log_modulo == log_modulo - 1:  # print every log_modulo mini-batches
                    print('[%d, %5d] loss: %.5f' %
                          (epoch + 1, idx + 1, running_loss / log_modulo))

                    # tensorboard support
                    add_scalar('Loss/train', running_loss / log_modulo)
                    loss_logs.append(running_loss / log_modulo)
                    running_loss = 0.0

            # end of epoch update of learning rate scheduler
            scheduler.step(epoch + 1)

            # saving the model and the current loss after each epoch
            save_checkpoint(model_z, optimizer=opt)

            # validation of the model
            if epoch % val_modulo == val_modulo - 1:
                validation_id = str(int((epoch + 1) / val_modulo))

                # validation call
                predictions, labels, loss_val = predict(
                    model_z, val_loader, loss, **predict_params)
                loss_val_logs.append(loss_val)

                res = '\n[validation_id:' + validation_id + ']\n' + validate(
                    predictions,
                    labels,
                    validation_id=validation_id,
                    statistics=metrics_stats,
                    **validation_params)

                # save statistics for robust cross validation
                if metrics_stats:
                    metrics_stats.save()

                print_notification(res)

                if special_parameters.mail == 2:
                    send_email(
                        'Results for XP ' + special_parameters.setup_name +
                        ' (epoch: ' + str(epoch + 1) + ')', res)
                if special_parameters.file:
                    save_file(
                        validation_path,
                        'Results for XP ' + special_parameters.setup_name +
                        ' (epoch: ' + str(epoch + 1) + ')', res)

                # checkpoint
                save_checkpoint(model_z,
                                optimizer=opt,
                                validation_id=validation_id)

                # callback
                if vcallback is not None:
                    run_callbacks(vcallback, (epoch + 1) // val_modulo)

            # save loss
            save_loss(
                {  # // log_modulo * log_modulo in case log_modulo does not divide epoch_size
                    'train': (loss_logs, log_modulo),
                    'validation':
                    (loss_val_logs,
                     epoch_size // log_modulo * log_modulo * val_modulo)
                },
                ylabel=str(loss))

        # saving last epoch
        export_epoch(epoch +
                     1)  # if --restart is set, the train will not be executed

        # callback
        if vcallback is not None:
            finish_callbacks(vcallback)

    # final validation
    if special_parameters.evaluate or special_parameters.export:
        print_h1('Validation/Export: ' + special_parameters.setup_name)
        if metrics_stats is not None:
            # change the parameter states of the model to best model
            metrics_stats.switch_to_best_model()

        predictions, labels, val_loss = predict(model_z,
                                                test_loader,
                                                loss,
                                                validation_size=-1,
                                                **predict_params)

        if special_parameters.evaluate:

            res = validate(predictions,
                           labels,
                           statistics=metrics_stats,
                           **validation_params,
                           final=True)

            print_notification(res, end='')

            if special_parameters.mail >= 1:
                send_email(
                    'Final results for XP ' + special_parameters.setup_name,
                    res)
            if special_parameters.file:
                save_file(
                    validation_path,
                    'Final results for XP ' + special_parameters.setup_name,
                    res)

        if special_parameters.export:
            export_results(test_loader.dataset, predictions, **export_params)

    return metrics_stats
Beispiel #4
0
def fit(train,
        test,
        validation=None,
        validation_params=None,
        export_params=None,
        model_name='model',
        **kwargs):
    """
    Fit a light GBM model. If validation_only or export is True, then the training is not performed and the model is
    loaded.
    :param model_name:
    :param export_params:
    :param validation_params:
    :param train:
    :param test:
    :param validation:
    :param kwargs:
    :return:
    """

    nb_labels = _nb_labels(train, test, validation)

    train_data = _to_lgb_dataset(train)
    test_data = _to_lgb_dataset(test)
    val_data = test_data if validation is None else _to_lgb_dataset(validation)

    if not (special_parameters.validation_only or special_parameters.export):
        print_h1('Training: ' + special_parameters.setup_name)
        num_round = 10

        param = kwargs
        merge_smooth(param, _default_params)
        param['num_class'] = nb_labels

        bst = lgb.train(param, train_data, num_round, valid_sets=[val_data])
        bst.save_model(output_path('models/{}.bst'.format(model_name)))
    else:
        bst = lgb.Booster(
            model_file=output_path('models/{}.bst'.format(model_name)))

    print_h1('Validation/Export: ' + special_parameters.setup_name)

    testset, labels = test.numpy()
    predictions = bst.predict(testset)

    # validation
    if special_parameters.validation_only or not special_parameters.export:
        res = validate(
            predictions,
            labels,
            **({} if validation_params is None else validation_params),
            final=True)

        print_notification(res, end='')

        if special_parameters.mail >= 1:
            send_email('Final results for XP ' + special_parameters.setup_name,
                       res)
        if special_parameters.file:
            save_file(output_path('validation.txt'),
                      'Final results for XP ' + special_parameters.setup_name,
                      res)

    if special_parameters.export:
        export_results(test, predictions,
                       **({} if export_params is None else export_params))