def fit(model, train, test, num_boost_round=360, verbose_eval=1, export=False, training_params=None, export_params=None, **kwargs): if not use_gpu(): print_errors('XGBoost can only be executed on a GPU for the moment', do_exit=True) training_params = {} if training_params is None else training_params export_params = {} if export_params is None else export_params d_test = xgb.DMatrix(np.asarray(test.get_vectors()), label=np.asarray(test.labels)) if not validation_only: print_h1('Training: ' + special_parameters.setup_name) print_info("get vectors...") X = np.asarray(train.get_vectors()) y = np.asarray(train.labels) d_train = xgb.DMatrix(X, label=y) gpu_id = first_device().index kwargs['verbosity'] = verbose_level() kwargs['gpu_id'] = gpu_id eval_list = [(d_test, 'eval'), (d_train, 'train')] print_info("fit model...") bst = xgb.train(kwargs, d_train, num_boost_round=num_boost_round, verbose_eval=verbose_eval, evals=eval_list, xgb_model=model) print_info("Save model...") save_model(bst) else: bst = load_model() print_h1('Validation/Export: ' + special_parameters.setup_name) predictions = bst.predict(d_test, ntree_limit=bst.best_ntree_limit) res = validate(predictions, np.array(test.labels), training_params['metrics'] if 'metrics' in training_params else tuple(), final=True) print_notification(res, end='') if export: export_results(test, predictions, **export_params)
def fit(model, train, test, export=False, nb_classes=4520, training_params=None, export_params=None, save=True): training_params = {} if training_params is None else training_params export_params = {} if export_params is None else export_params clf = model if not validation_only: print_h1('Training: ' + setup_name) print_info("get vectors...") X = np.array(train.get_vectors()) y = np.array(train.labels) print_info("fit model...") clf.fit(X, y) if save: save_model(clf) print_h1('Validation/Export: ' + setup_name) restricted_predictions = clf.predict_proba(np.array(test.get_vectors())) predictions = np.zeros((restricted_predictions.shape[0], nb_classes)) predictions[:, clf.classes_] = restricted_predictions res = validate(predictions, np.array(test.labels), training_params['metrics'] if 'metrics' in training_params else tuple(), final=True) print_notification(res, end='') if export: export_results(test, predictions, **export_params)
def fit(model_z, train, test, val=None, training_params=None, predict_params=None, validation_params=None, export_params=None, optim_params=None, model_selection_params=None): """ This function is the core of an experiment. It performs the ml procedure as well as the call to validation. :param training_params: parameters for the training procedure :param val: validation set :param test: the test set :param train: The training set :param optim_params: :param export_params: :param validation_params: :param predict_params: :param model_z: the model that should be trained :param model_selection_params: """ # configuration training_params, predict_params, validation_params, export_params, optim_params, \ cv_params = merge_dict_set( training_params, TRAINING_PARAMS, predict_params, PREDICT_PARAMS, validation_params, VALIDATION_PARAMS, export_params, EXPORT_PARAMS, optim_params, OPTIM_PARAMS, model_selection_params, MODEL_SELECTION_PARAMS ) train_loader, test_loader, val_loader = _dataset_setup( train, test, val, **training_params) statistics_path = output_path('metric_statistics.dump') metrics_stats = Statistics( model_z, statistics_path, ** cv_params) if cv_params.pop('cross_validation') else None validation_path = output_path('validation.txt') # training parameters optim = optim_params.pop('optimizer') iterations = training_params.pop('iterations') gamma = training_params.pop('gamma') loss = training_params.pop('loss') log_modulo = training_params.pop('log_modulo') val_modulo = training_params.pop('val_modulo') first_epoch = training_params.pop('first_epoch') # callbacks for ml tests vcallback = validation_params.pop( 'vcallback') if 'vcallback' in validation_params else None if iterations is None: print_errors( 'Iterations must be set', exception=TrainingConfigurationException('Iterations is None')) # before ml callback if vcallback is not None and special_parameters.train and first_epoch < max( iterations): init_callbacks(vcallback, val_modulo, max(iterations) // val_modulo, train_loader.dataset, model_z) max_iterations = max(iterations) if special_parameters.train and first_epoch < max(iterations): print_h1('Training: ' + special_parameters.setup_name) loss_logs = [] if first_epoch < 1 else load_loss('loss_train') loss_val_logs = [] if first_epoch < 1 else load_loss('loss_validation') opt = create_optimizer(model_z.parameters(), optim, optim_params) scheduler = MultiStepLR(opt, milestones=list(iterations), gamma=gamma) # number of batches in the ml epoch_size = len(train_loader) # one log per epoch if value is -1 log_modulo = epoch_size if log_modulo == -1 else log_modulo epoch = 0 for epoch in range(max_iterations): if epoch < first_epoch: # opt.step() _skip_step(scheduler, epoch) continue # saving epoch to enable restart export_epoch(epoch) model_z.train() # printing new epoch print_h2('-' * 5 + ' Epoch ' + str(epoch + 1) + '/' + str(max_iterations) + ' (lr: ' + str(scheduler.get_lr()) + ') ' + '-' * 5) running_loss = 0.0 for idx, data in enumerate(train_loader): # get the inputs inputs, labels = data # wrap labels in Variable as input is managed through a decorator # labels = model_z.p_label(labels) if use_gpu(): labels = labels.cuda() # zero the parameter gradients opt.zero_grad() outputs = model_z(inputs) loss_value = loss(outputs, labels) loss_value.backward() opt.step() # print math running_loss += loss_value.item() if idx % log_modulo == log_modulo - 1: # print every log_modulo mini-batches print('[%d, %5d] loss: %.5f' % (epoch + 1, idx + 1, running_loss / log_modulo)) # tensorboard support add_scalar('Loss/train', running_loss / log_modulo) loss_logs.append(running_loss / log_modulo) running_loss = 0.0 # end of epoch update of learning rate scheduler scheduler.step(epoch + 1) # saving the model and the current loss after each epoch save_checkpoint(model_z, optimizer=opt) # validation of the model if epoch % val_modulo == val_modulo - 1: validation_id = str(int((epoch + 1) / val_modulo)) # validation call predictions, labels, loss_val = predict( model_z, val_loader, loss, **predict_params) loss_val_logs.append(loss_val) res = '\n[validation_id:' + validation_id + ']\n' + validate( predictions, labels, validation_id=validation_id, statistics=metrics_stats, **validation_params) # save statistics for robust cross validation if metrics_stats: metrics_stats.save() print_notification(res) if special_parameters.mail == 2: send_email( 'Results for XP ' + special_parameters.setup_name + ' (epoch: ' + str(epoch + 1) + ')', res) if special_parameters.file: save_file( validation_path, 'Results for XP ' + special_parameters.setup_name + ' (epoch: ' + str(epoch + 1) + ')', res) # checkpoint save_checkpoint(model_z, optimizer=opt, validation_id=validation_id) # callback if vcallback is not None: run_callbacks(vcallback, (epoch + 1) // val_modulo) # save loss save_loss( { # // log_modulo * log_modulo in case log_modulo does not divide epoch_size 'train': (loss_logs, log_modulo), 'validation': (loss_val_logs, epoch_size // log_modulo * log_modulo * val_modulo) }, ylabel=str(loss)) # saving last epoch export_epoch(epoch + 1) # if --restart is set, the train will not be executed # callback if vcallback is not None: finish_callbacks(vcallback) # final validation if special_parameters.evaluate or special_parameters.export: print_h1('Validation/Export: ' + special_parameters.setup_name) if metrics_stats is not None: # change the parameter states of the model to best model metrics_stats.switch_to_best_model() predictions, labels, val_loss = predict(model_z, test_loader, loss, validation_size=-1, **predict_params) if special_parameters.evaluate: res = validate(predictions, labels, statistics=metrics_stats, **validation_params, final=True) print_notification(res, end='') if special_parameters.mail >= 1: send_email( 'Final results for XP ' + special_parameters.setup_name, res) if special_parameters.file: save_file( validation_path, 'Final results for XP ' + special_parameters.setup_name, res) if special_parameters.export: export_results(test_loader.dataset, predictions, **export_params) return metrics_stats
def fit(train, test, validation=None, validation_params=None, export_params=None, model_name='model', **kwargs): """ Fit a light GBM model. If validation_only or export is True, then the training is not performed and the model is loaded. :param model_name: :param export_params: :param validation_params: :param train: :param test: :param validation: :param kwargs: :return: """ nb_labels = _nb_labels(train, test, validation) train_data = _to_lgb_dataset(train) test_data = _to_lgb_dataset(test) val_data = test_data if validation is None else _to_lgb_dataset(validation) if not (special_parameters.validation_only or special_parameters.export): print_h1('Training: ' + special_parameters.setup_name) num_round = 10 param = kwargs merge_smooth(param, _default_params) param['num_class'] = nb_labels bst = lgb.train(param, train_data, num_round, valid_sets=[val_data]) bst.save_model(output_path('models/{}.bst'.format(model_name))) else: bst = lgb.Booster( model_file=output_path('models/{}.bst'.format(model_name))) print_h1('Validation/Export: ' + special_parameters.setup_name) testset, labels = test.numpy() predictions = bst.predict(testset) # validation if special_parameters.validation_only or not special_parameters.export: res = validate( predictions, labels, **({} if validation_params is None else validation_params), final=True) print_notification(res, end='') if special_parameters.mail >= 1: send_email('Final results for XP ' + special_parameters.setup_name, res) if special_parameters.file: save_file(output_path('validation.txt'), 'Final results for XP ' + special_parameters.setup_name, res) if special_parameters.export: export_results(test, predictions, **({} if export_params is None else export_params))