def train_and_evaluate(model, data_iter_tr, data_iter_ts, loss_fn, optimizer,
                       metrics, exp_dir):
    loss_vect = []
    n_epoch = ut.model_param['num_epochs']
    for epoch in range(1, n_epoch + 1):
        print('Epoch {0} of {1}'.format(epoch, n_epoch))

        start = time()
        mrn, encoded, encoded_avg, loss_mean = train(model, optimizer, loss_fn,
                                                     data_iter_tr)
        print('-- time = ', round(time() - start, 3))
        print('-- mean loss: {0}'.format(round(loss_mean, 3)))
        loss_vect.append(loss_mean)

        is_best_1 = loss_mean < 0.1
        is_best_2 = epoch == n_epoch
        if is_best_1 or is_best_2:

            outfile = os.path.join(exp_dir, 'TRconvae-avg_vect.csv')
            with open(outfile, 'w') as f:
                wr = csv.writer(f)
                for m, e in zip(mrn, encoded_avg):
                    wr.writerow([m] + e)

            outfile = os.path.join(exp_dir, 'TRconvae_vect.csv')
            with open(outfile, 'w') as f:
                wr = csv.writer(f)
                for m, evs in zip(mrn, encoded):
                    for e in evs:
                        wr.writerow([m] + e)

            outfile = os.path.join(exp_dir, 'TRmetrics.txt')
            with open(outfile, 'w') as f:
                f.write('Mean Loss: %.3f\n' % loss_mean)

            outfile = os.path.join(exp_dir, 'TRlosses.csv')
            with open(outfile, 'w') as f:
                wr = csv.writer(f)
                wr.writerow(['Epoch', 'Loss'])
                for idx, l in enumerate(loss_vect):
                    wr.writerow([idx, l])

            print('\nFound new best model at epoch {0}'.format(epoch))
            ut.save_best_model(epoch, model, optimizer, loss_mean, exp_dir)

            print('\nEvaluating the model')
            mrn, encoded, encoded_avg, test_metrics = evaluate(model,
                                                               loss_fn,
                                                               data_iter_ts,
                                                               metrics,
                                                               best_eval=True)

            return mrn, encoded, encoded_avg, test_metrics
Exemple #2
0
def train_and_evaluate(model, data_iterator, loss_fn, optimizer, model_dir,
                       metrics, experiment_folder):
    #best_eval_acc = 0.0
    num_epochs = model_pars['num_epochs']
    for epoch in range(num_epochs):
        print("Epoch {0} of {1}".format(epoch, num_epochs))
        mrn, encoded, loss_mean = train(model, optimizer, loss_fn,
                                        data_iterator)
        print("Mean loss: {0}, epoch {1}".format(loss_mean, epoch))
        #with torch.no_grad():
        #    _, _, test_metrics = evaluate(model, loss_fn, data_iterator, metrics)

        #acc_epoch = test_metrics['accuracy']
        #is_best = acc_epoch < best_eval_acc
        is_best = loss_mean < 0.001

        if (is_best or epoch == (num_epochs - 1)):

            with open(experiment_folder + '/TRencoded_vect.csv', 'w') as f:
                wr = csv.writer(f, delimiter=',')
                for e in encoded:
                    wr.writerow(e)

            with open(experiment_folder + '/TRmrns.csv', 'w') as f:
                wr = csv.writer(f, delimiter=',')
                for m in mrn:
                    wr.writerow([m])

            with open(experiment_folder + '/TRmetrics.txt', 'w') as f:
                wr = csv.writer(f, delimiter='\t')
                #for m, v in metrics_average.items():
                #    wr.writerow([m, v])
                wr.writerow(["Mean loss:", loss_mean])

            #utils.save_best_model({'epoch':epoch,
            #                       'state_dict':model.state_dict(),
            #                       'optim_dict':optimizer.state_dict(),
            #                       #'best_acc':acc_epoch
            #                      },
            #                      folder=model_dir)
            print("-- Found new best  at epoch {0}".format(epoch))
            utils.save_best_model(model, experiment_folder)
            print("Evaluating the model...")
            mrn, encoded, test_metrics = evaluate(model,
                                                  loss_fn,
                                                  data_iterator,
                                                  metrics,
                                                  best_eval=True)

            #acc_epoch = test_metrics['accuracy']
            #is_best = acc_epoch < best_eval_acc
            return mrn, encoded, test_metrics
Exemple #3
0
def train(
          net,
          criterion,
          args, 
          experiment_dir, 
          train_loader, 
          valid_loader
          ):
  optimizer = torch.optim.Adamax(net.parameters(), lr=args.lrate)
  scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max')
  # Nb epochs completed tracked in case training interrupted
  for i in range(args.nb_epochs_complete, args.nb_epoch):
    # Update learning rate in optimizer
    t0 = time.time()
    logging.info("\nEpoch {}".format(i+1))
    logging.info("Learning rate: {0:.3g}".format(args.lrate))
    
    train_stats = train_one_epoch(net,
                                  criterion,
                                  optimizer,
                                  args,
                                  experiment_dir,
                                  train_loader)
    val_stats = evaluate(net, criterion, experiment_dir, args,
                            valid_loader, 'Valid')
                                
    utils.track_epoch_stats(i, args.lrate, 0, train_stats, val_stats, experiment_dir)

    # Update learning rate, remaining nb epochs to train
    scheduler.step(val_stats[0])
    args.lrate = optimizer.param_groups[0]['lr']
    args.nb_epochs_complete += 1

    # Track best model performance
    if (val_stats[0] > args.best_tpr):
      logging.warning("Best performance on valid set.")
      args.best_tpr = float(val_stats[0])
      utils.update_best_plots(experiment_dir)
      utils.save_best_model(experiment_dir, net)
      utils.save_best_scores(i, val_stats[2], val_stats[0], val_stats[1], experiment_dir)

    utils.save_epoch_model(experiment_dir, net)
    utils.save_args(experiment_dir, args)
    logging.info("Epoch took {} seconds.".format(int(time.time()-t0)))
    
    if args.lrate < 10**-6:
        logging.warning("Minimum learning rate reched.")
        break

  logging.warning("Training completed.")
def train_and_evaluate(model, data_iterator, loss_fn, optimizer, metrics,
                       experiment_folder):
    num_epochs = model_pars['num_epochs']
    loss_vect = []
    for epoch in range(num_epochs):
        print("Epoch {0} of {1}".format(epoch, num_epochs))
        mrn, encoded, loss_mean = train(model, optimizer, loss_fn,
                                        data_iterator)
        print("Mean loss: {0}, epoch {1}".format(loss_mean, epoch))
        loss_vect.append(loss_mean)
        is_best = loss_mean < 0.001
        if (is_best or epoch == (num_epochs - 1)):

            with open(experiment_folder + '/TRencoded_vect.csv', 'w') as f:
                wr = csv.writer(f, delimiter=',')
                for e in encoded:
                    wr.writerow(e)

            with open(experiment_folder + '/TRmrns.csv', 'w') as f:
                wr = csv.writer(f, delimiter=',')
                for m in mrn:
                    wr.writerow([m])

            with open(experiment_folder + '/TRmetrics.txt', 'w') as f:
                wr = csv.writer(f, delimiter='\t')
                wr.writerow(["Mean loss:", loss_mean])

            with open(experiment_folder + '/TRlosses.txt', 'w') as f:
                wr = csv.writer(f, delimiter=',')
                wr.writerow(["Epoch", "loss"])
                for idx, l in enumerate(loss_vect):
                    wr.writerow([idx, l])

            print("-- Found new best  at epoch {0}".format(epoch))
            utils.save_best_model(model, experiment_folder)
            print("Evaluating the model...")
            mrn, encoded, test_metrics = evaluate(model,
                                                  loss_fn,
                                                  data_iterator,
                                                  metrics,
                                                  best_eval=True)

            return mrn, encoded, test_metrics
Exemple #5
0
 def on_after_epoch(model, df_hist, images, epoch, saveEpoch):
     utils.save_best_model(MODEL_PATH, model, df_hist)
     utils.checkpoint_model(MODEL_PATH, model, epoch, saveEpoch)
     utils.log_hist(logger, df_hist)
     utils.write_on_board_losses_stg2(writer, df_hist)
     utils.write_on_board_images_stg2(writer, images, epoch)
Exemple #6
0
def train(model, optim, sche, db, opt, exp_id):
    """
    Args:
        model: the model to be trained
        optim: pytorch optimizer to be used
        db : prepared torch dataset object
        opt: command line input from the user
        exp_id: experiment id
    """

    best_model_dir = os.path.join(opt.save_dir, str(exp_id))
    if not os.path.exists(best_model_dir):
        os.makedirs(best_model_dir)

    # (For FG-NET only) carry out leave-one-out validation according to the list length
    assert len(db['train']) == len(db['eval'])

    # record for each training experiment
    best_MAE = []
    train_set = db['train'][exp_id]
    eval_set = db['eval'][exp_id]
    eval_loss, min_MAE, _ = evaluate(model, eval_set, opt)
    # in drop out mode, each time only leaf nodes of one tree is updated
    if opt.dropout:
        current_tree = 0

    # save training and validation history
    if opt.history:
        train_loss_history = []
        eval_loss_history = []

    for epoch in range(1, opt.epochs + 1):
        # At each epoch, train the neural decision forest and update
        # the leaf node distribution separately

        # Train neural decision forest
        # set the model in the training mode
        model.train()
        # data loader
        train_loader = torch.utils.data.DataLoader(train_set,
                                                   batch_size=opt.batch_size,
                                                   shuffle=True,
                                                   num_workers=opt.num_threads)

        for batch_idx, batch in enumerate(train_loader):
            data = batch['image']
            target = batch['age']
            target = target.view(len(target), -1)
            if opt.cuda:
                with torch.no_grad():
                    # move to GPU
                    data, target = data.cuda(), target.cuda()
            # erase all computed gradient
            optim.zero_grad()
            #prediction, decision_loss = model(data)

            # forward pass to get prediction
            prediction, reg_loss = model(data)

            loss = F.mse_loss(prediction, target) + reg_loss

            # compute gradient in the computational graph
            loss.backward()

            # update parameters in the model
            optim.step()

            # logging
            if batch_idx % opt.report_every == 0:
                logging.info(
                    'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f} '.format(
                        epoch, batch_idx * opt.batch_size,
                        len(train_set), 100. * batch_idx / len(train_loader),
                        loss.data.item()))
            # record loss
            if opt.history:
                train_loss_history.append((epoch, batch_idx, loss.data.item()))

            # Update the leaf node estimation
            if opt.leaf_node_type == 'simple' and batch_idx % opt.update_every == 0:
                logging.info("Epoch %d : Update leaf node prediction" %
                             (epoch))
                target_batches = prepare_batches(model, train_set, opt)
                # Update label prediction for each tree
                logging.info("Update leaf node prediction...")
                for i in range(opt.label_iter_time):
                    # prepare features from the last feature layer
                    # some cache is also stored in the forest for leaf node
                    if opt.dropout:
                        model.forest.trees[
                            current_tree].update_label_distribution(
                                target_batches)
                        current_tree = (current_tree + 1) % opt.n_tree
                    else:
                        for tree in model.forest.trees:
                            tree.update_label_distribution(target_batches)
                # release cache
                for tree in model.forest.trees:
                    del tree.mu_cache
                    tree.mu_cache = []

            if opt.eval and batch_idx != 0 and batch_idx % opt.eval_every == 0:
                # evaluate model
                eval_loss, MAE, CS = evaluate(model, eval_set, opt)
                # update learning rate
                sche.step(MAE.data.item())
                # record the final MAE
                if epoch == opt.epochs:
                    last_MAE = MAE
                # record the best MAE
                if MAE < min_MAE:
                    min_MAE = MAE
                    # save the best model
                    model_name = opt.model_type + train_set.name
                    best_model_path = os.path.join(best_model_dir, model_name)
                    utils.save_best_model(model.cpu(), best_model_path)
                    model.cuda()
                # update log
                utils.update_log(
                    best_model_dir,
                    (str(MAE.data.item()), str(min_MAE.data.item())), str(CS))
                if opt.history:
                    eval_loss_history.append(
                        (epoch, batch_idx, eval_loss, MAE))
                # reset to training mode
                model.train()
        best_MAE.append(min_MAE.data.item())
    if opt.history:
        utils.save_history(np.array(train_loss_history),
                           np.array(eval_loss_history), opt)
    logging.info('Training finished.')
    return model, best_MAE, last_MAE
Exemple #7
0
                input_target_key=global_params["input_target_key"],
                writer=train_writer)

            strong = config["inference"]["prediction_type"] == "strong"
            valid_loss, valid_score = eval_one_epoch(
                model,
                loaders["valid"],
                criterion,
                device,
                input_key=global_params["input_key"],
                input_target_key=global_params["input_target_key"],
                epoch=epoch,
                writer=valid_writer,
                strong=strong)

            best_score, updated = utils.save_best_model(
                model, checkpoints_dir, valid_score, prev_metric=best_score)

            if updated:
                _metrics["best"] = {"lwlrap": best_score, "loss": valid_loss, "epoch": epoch + 1}
            _metrics["last"] = {"lwlrap": valid_score, "loss": valid_loss, "epoch": epoch + 1}
            _metrics[f"epoch_{epoch + 1}"] = {"lwlrap": valid_score, "loss": valid_loss}

            utils.save_json(_metrics, checkpoints_dir / "_metrics.json")

            logger.info(
                f"{epoch + 1}/{global_params['num_epochs']} * Epoch {epoch + 1} "
                f"(train): lwlrap={train_score:.4f} | loss={train_loss:.4f}")
            logger.info(
                f"{epoch + 1}/{global_params['num_epochs']} * Epoch {epoch + 1} "
                f"(valid): lwlrap={valid_score:.4f} | loss={valid_loss:.4f}")
        logger.info(
 def on_after_epoch(model, df_hist, epoch):
     utils.save_best_model(MODEL_PATH, model, df_hist)
     utils.log_hist(logger, df_hist)
     utils.write_on_board_losses_stg1(writer, df_hist)