Example #1
0
    def configure_optimizers(self):
        if self.params["generator"]["optimizer"] == "adam":
            generator_optimizer = torch.optim.Adam(
                self.generator_model.parameters(),
                lr=self.params['generator']['learning_rate'],
                weight_decay=self.params['generator']['weight_decay'],
                betas=(self.params["generator"]["beta1"],
                       self.params["generator"]["beta2"]))
        elif self.params["generator"]["optimizer"] == "qhadam":
            generator_optimizer = QHAdam(self.generator_model.parameters(),
                                         nus=(0.7, 1.0),
                                         betas=(0.95, 0.998))
        else:
            raise NameError("Unknown optimizer name")

        if self.params["critic"]["optimizer"] == "adam":
            critic_optimizer = torch.optim.Adam(
                self.critic_model.parameters(),
                lr=self.params['critic']['learning_rate'],
                weight_decay=self.params['critic']['weight_decay'],
                betas=(self.params["critic"]["beta1"],
                       self.params["critic"]["beta2"]))
        elif self.params["critic"]["optimizer"] == "qhadam":
            critic_optimizer = QHAdam(self.critic_model.parameters(),
                                      nus=(0.7, 1.0),
                                      betas=(0.95, 0.998))
        else:
            raise NameError("Unknown optimizer name")

        return generator_optimizer, critic_optimizer
 def init_trainer_network(self):
     nus = (0.7, 1.0)
     self.gen_paramas = list(self.generator.parameters())
     self.dsc_params = list(self.discriminators.parameters())
     if self.cfg.proj_lang:
         self.language_params = list(self.language_model.proj.parameters())
         self.optim_language = QHAdam(self.language_params,
                                      lr=self.cfg.lang_lr,
                                      betas=self.cfg.lang_betas,
                                      nus=(0.7, 0.8))
     self.style_params = list(self.style_model.parameters())
     self.content_params = list(self.content_model.parameters())
     self.optim_generator = QHAdam(self.gen_paramas,
                                   lr=self.cfg.gen_lr,
                                   betas=self.cfg.gen_betas,
                                   nus=(0.7, 0.8))
     self.optim_discriminator = QHAdam(self.dsc_params,
                                       lr=self.cfg.dsc_lr,
                                       betas=self.cfg.dsc_betas,
                                       nus=(0.7, 0.8))
     self.optim_style = QHAdam(self.style_params,
                               lr=self.cfg.lmf_lr,
                               betas=self.cfg.lmf_betas,
                               nus=(0.7, 0.8))
     self.optim_content = QHAdam(self.content_params,
                                 lr=self.cfg.lmf_lr,
                                 betas=self.cfg.lmf_betas,
                                 nus=(0.7, 0.8))
Example #3
0
def define_optimizer(model, args):
    if args.optimizer.startswith('adam'):
        optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
                                            model.parameters()),
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
    elif args.optimizer.startswith('rmsprop'):
        optimizer = torch.optim.RMSprop(filter(lambda p: p.requires_grad,
                                               model.parameters()),
                                        lr=args.lr,
                                        weight_decay=args.weight_decay)
    elif args.optimizer.startswith('sgd'):
        optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                           model.parameters()),
                                    lr=args.lr,
                                    weight_decay=args.weight_decay)
    elif args.optimizer.startswith('qhadam'):
        optimizer = QHAdam(filter(lambda p: p.requires_grad,
                                  model.parameters()),
                           lr=args.lr,
                           nus=[0.7, 1.0],
                           betas=[0.995, 0.999],
                           weight_decay=args.weight_decay)
    else:
        raise ValueError('Optimizer not supported')
    print('Optimizer: ', optimizer)
    return optimizer
Example #4
0
 def qhadam_ctor(params):
     return QHAdam(params,
                   lr=lr,
                   betas=betas,
                   weight_decay=l2 * 2.0,
                   nus=(1.0, 1.0),
                   eps=eps)
Example #5
0
    def __init__(self,
                 data_processor,
                 bottleneck_dim=128,
                 num_codebooks=16,
                 hidden_dim=512,
                 decoder_layers=2,
                 encoder_layers=2,
                 **kwargs):
        super().__init__()
        self.data_processor = data_processor
        self.encoder1 = nn.Sequential(
            Feedforward(self.data_processor.input_dim,
                        hidden_dim,
                        num_layers=encoder_layers,
                        **kwargs), nn.Linear(hidden_dim, bottleneck_dim))

        self.quntizer = Model(input_dim=bottleneck_dim,
                              hidden_dim=1024,
                              bottleneck_dim=256,
                              encoder_layers=2,
                              decoder_layers=2,
                              Activation=nn.ReLU,
                              num_codebooks=8,
                              codebook_size=256,
                              initial_entropy=3.0,
                              share_codewords=True).cuda()
        self.distance = DISTANCES['euclidian_squared']
        self.triplet_delta = 5
        all_parameters = list(self.encoder1.parameters()) + list(
            self.quntizer.parameters())
        self.optimizer = OneCycleSchedule(QHAdam(all_parameters,
                                                 nus=(0.8, 0.7),
                                                 betas=(0.95, 0.998)),
                                          learning_rate_base=1e-3,
                                          warmup_steps=10000,
                                          decay_rate=0.2)
        self.experiment_path = 'logs'

        self.writer = SummaryWriter(self.experiment_path, comment='Cora')
trainloader = DataLoader(TorchDataset(data.X_train, data.y_train), batch_size=BATCH_SIZE, num_workers=16, shuffle=True)
valloader = DataLoader(TorchDataset(data.X_valid, data.y_valid), batch_size=BATCH_SIZE*2, num_workers=16, shuffle=False)
testloader = DataLoader(TorchDataset(data.X_test, data.y_test), batch_size=BATCH_SIZE*2, num_workers=16, shuffle=False)

test_losses, train_time, test_time = [], [], []

for SEED in [1225, 1337, 2020, 6021991]:
    save_dir = Path("./results/tabular/") / DATA_NAME / "depth={}/reg={}/mlp-layers={}/dropout={}/seed={}".format(TREE_DEPTH, REG, MLP_LAYERS, DROPOUT, SEED)
    save_dir.mkdir(parents=True, exist_ok=True)

    deterministic(SEED)

    model = LTRegressor(TREE_DEPTH, in_features, out_features, reg=REG, linear=LINEAR, layers=MLP_LAYERS, dropout=DROPOUT)

    # init optimizer
    optimizer = QHAdam(model.parameters(), lr=LR, nus=(0.7, 1.0), betas=(0.995, 0.998))

    # init loss
    loss = MSELoss(reduction="sum")
    criterion = lambda x, y: loss(x.float(), y.float())

    # init learning rate scheduler
    lr_scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=2)

    # init train-eval monitoring 
    monitor = MonitorTree(pruning, save_dir)

    state = {
        'batch-size': BATCH_SIZE,
        'loss-function': 'MSE',
        'learning-rate': LR,
Example #7
0
from common import *
from model import vocab
option = dict(edim=256,
              epochs=1.5,
              maxgrad=1.,
              learningrate=1e-3,
              sdt_decay_step=1,
              batchsize=8,
              vocabsize=vocab,
              fp16=2,
              saveInterval=10,
              logInterval=.4)
option['loss'] = lambda opt, model, y, out, *_, rewards=[]: F.cross_entropy(
    out.transpose(-1, -2), y, reduction='none')
option['criterion'] = lambda y, out, mask, *_: (out[:, :, 1:vocab].max(-1)[
    1] + 1).ne(y).float() * mask.float()
option['startEnv'] = lambda x, y, l, *args: (x, y, l, *args)
option['stepEnv'] = lambda i, pred, l, *args: (
    False, 1., None, None
)  # done episode, fake reward, Null next input, Null length, Null args
option['cumOut'] = False  # True to keep trajectory
option['devices'] = [0] if torch.cuda.is_available() else []  # list of GPUs
option[
    'init_method'] = 'file:///tmp/sharedfile'  # initial configuration for multiple-GPU training
try:
    from qhoptim.pyt import QHAdam
    option['newOptimizer'] = lambda opt, params, _: QHAdam(
        params, lr=opt.learningrate, nus=(.7, .8), betas=(0.995, 0.999))
except ImportError:
    pass
Example #8
0
def fit_model(model,
              X_train,
              y_train,
              batch_size=None,
              validation_data=tuple(),
              epochs=50,
              optimizer='Adam',
              learning_rate=1e-2,
              device=None,
              verbose=1):

    #For early stopping
    patience = int(epochs * 0.2)
    min_val_loss = float('inf')
    stop_count = 0

    #For saving tentative best model
    best_model_param = None

    #Use GPU if it is available when "device" is not specified
    if device == None:
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

    #Set up parallel processing when multiple GPUs are available
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)

    opt = None
    if optimizer == 'SGD':
        opt = torch.optim.SGD(model.parameters(), lr=learning_rate)
    if optimizer == 'RMSprop':
        opt = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
    if optimizer == 'Adadelta':
        opt = torch.optim.Adadelta(model.parameters(), lr=learning_rate)
    if optimizer == 'Adam':
        opt = torch.optim.Adam(model.parameters(), lr=learning_rate)
    if optimizer == 'QHAdam':
        opt = QHAdam(model.parameters(),
                     lr=learning_rate,
                     nus=(0.7, 1.0),
                     betas=(0.95, 0.998))

    num_unique = np.unique(np.argmax(y_train[:, -1, :], axis=1),
                           return_counts=True)
    num_sample = num_unique[1]
    weight_list = 1 / (num_sample / num_sample.max())
    class_weight = torch.as_tensor(weight_list, device=device)
    #sample_weight = np.array([weight_list[list(num_unique[0]).index(label)] for label in np.argmax(y_train[:,-1,:], axis=1)])

    critertion = nn.CrossEntropyLoss(weight=class_weight, reduction='none') if class_weight.shape[0]>2\
                                     else nn.BCELoss(weight=class_weight, reduction='none')

    path = r'.\pytorch_check_point'
    os.makedirs(path, exist_ok=True)

    for i in range(epochs):

        #train
        model.train()

        shuffled_idx = random.sample(list(range(len(y_train))), len(y_train))
        train_loss = 0
        for st in list(
                range(0, len(y_train),
                      (batch_size if batch_size != None else len(y_train)))):
            en = st + (batch_size if batch_size != None else len(y_train))
            if en > len(y_train): en = len(y_train)
            target_idx = shuffled_idx[st:en]

            inputs = torch.as_tensor(X_train[target_idx], device=device)
            inputs.requires_grad = True
            target = torch.as_tensor(np.argmax(y_train[target_idx], axis=2),
                                     device=device)

            #Predict the train data
            out = model(inputs)

            #Calculate average loss of all timesteps per sample
            loss = 0
            for j in range(out.shape[0]):
                loss += critertion(torch.log(out[j]), target[j]).mean(
                )  #Softmax is part of the model architecture
            train_loss += loss.item()

            #Get the avrage loss of one batch
            loss = loss.mean()

            #Initialize gradient
            opt.zero_grad()

            #Calculate gradient
            loss.backward()

            #Update the parameter
            opt.step()

            del loss, inputs, target, out

        #Get the average train loss of all samples
        train_loss /= len(y_train)

        #train_loss += model.train_on_batch(X_train[target_idx], y_train[target_idx],
        #                                  sample_weight=sample_weight[target_idx])

        #test
        if len(validation_data) > 0:
            X_test = validation_data[0]
            y_true = validation_data[1]

            val_loss = test_model(model,
                                  X_test,
                                  y_true,
                                  batch_size=batch_size,
                                  critertion=critertion,
                                  device=device)

            #val_loss += model.evaluate(X_test[target_idx], y_true[target_idx], verbose=0)

            if verbose == 1:
                print('epoch{0}:\t train_loss = {1}\t val_loss = {2}'.format(
                    i + 1, train_loss, val_loss))

            #early stop
            if min_val_loss > val_loss:
                min_val_loss = val_loss
                best_model_param = model.state_dict(
                )  #Save tentative best model
                stop_count = 0

            else:
                stop_count += 1
                if stop_count > patience: break

        else:
            if verbose == 1:
                print('epoch{0}:\t train_loss = {1}'.format(i + 1, train_loss))

    del class_weight

    if best_model_param != None:
        model.load_state_dict(best_model_param)
        min_val_loss = val_loss

    return model
Example #9
0
def objective(trial):

    TREE_DEPTH = trial.suggest_int('TREE_DEPTH', 2, 6)
    REG = trial.suggest_loguniform('REG', 1e-3, 1e3)
    print(f'TREE_DEPTH={TREE_DEPTH}, REG={REG}')

    if not LINEAR:
        MLP_LAYERS = trial.suggest_int('MLP_LAYERS', 2, 7)
        DROPOUT = trial.suggest_uniform('DROPOUT', 0.0, 0.5)
        print(f'MLP_LAYERS={MLP_LAYERS}, DROPOUT={DROPOUT}')

    pruning = REG > 0

    if LINEAR:
        save_dir = root_dir / "depth={}/reg={}/seed={}".format(
            TREE_DEPTH, REG, SEED)
        model = LTBinaryClassifier(TREE_DEPTH,
                                   data.X_train.shape[1],
                                   reg=REG,
                                   linear=LINEAR)

    else:
        save_dir = root_dir / "depth={}/reg={}/mlp-layers={}/dropout={}/seed={}".format(
            TREE_DEPTH, REG, MLP_LAYERS, DROPOUT, SEED)
        model = LTBinaryClassifier(TREE_DEPTH,
                                   data.X_train.shape[1],
                                   reg=REG,
                                   linear=LINEAR,
                                   layers=MLP_LAYERS,
                                   dropout=DROPOUT)

    print(model.count_parameters(), "model's parameters")

    save_dir.mkdir(parents=True, exist_ok=True)

    # init optimizer
    optimizer = QHAdam(model.parameters(),
                       lr=LR,
                       nus=(0.7, 1.0),
                       betas=(0.995, 0.998))

    # init learning rate scheduler
    lr_scheduler = ReduceLROnPlateau(optimizer, 'min', factor=0.1, patience=2)

    # init loss
    criterion = BCELoss(reduction="sum")

    # evaluation criterion => error rate
    eval_criterion = lambda x, y: (x.long() != y.long()).sum()

    # init train-eval monitoring
    monitor = MonitorTree(pruning, save_dir)

    state = {
        'batch-size': BATCH_SIZE,
        'loss-function': 'BCE',
        'learning-rate': LR,
        'seed': SEED,
        'dataset': DATA_NAME,
    }

    best_val_loss = float("inf")
    best_e = -1
    no_improv = 0
    for e in range(EPOCHS):
        train_stochastic(trainloader,
                         model,
                         optimizer,
                         criterion,
                         epoch=e,
                         monitor=monitor)

        val_loss = evaluate(valloader,
                            model, {'ER': eval_criterion},
                            epoch=e,
                            monitor=monitor)

        no_improv += 1
        if val_loss['ER'] < best_val_loss:
            best_val_loss = val_loss['ER']
            best_e = e
            no_improv = 0
            # save_model(model, optimizer, state, save_dir)

        # reduce learning rate if needed
        lr_scheduler.step(val_loss['ER'])
        monitor.write(model, e, train={"lr": optimizer.param_groups[0]['lr']})

        trial.report(val_loss['ER'], e)
        # Handle pruning based on the intermediate value.
        if trial.should_prune() or np.isnan(val_loss['ER']):
            monitor.close()
            raise optuna.TrialPruned()

        if no_improv == 10:
            break

    print("Best validation ER:", best_val_loss)
    monitor.close()

    return best_val_loss
def trainingNetwork(images_folder_train, labels_folder_train, images_folder_val, labels_folder_val,
                    dictionary, target_classes, output_classes, save_network_as, classifier_name,
                    epochs, batch_sz, batch_mult, learning_rate, L2_penalty, validation_frequency, loss_to_use,
                    epochs_switch, epochs_transition, tversky_alpha, tversky_gamma, optimiz,
                    flag_shuffle, flag_training_accuracy, experiment_name):

    ##### DATA #####

    # setup the training dataset
    datasetTrain = CoralsDataset(images_folder_train, labels_folder_train, dictionary, target_classes)

    print("Dataset setup..", end='')
    datasetTrain.computeAverage()
    datasetTrain.computeWeights()
    print(datasetTrain.dict_target)
    print(datasetTrain.weights)
    freq = 1.0 / datasetTrain.weights
    print(freq)
    print("done.")

    save_classifier_as = save_network_as.replace(".net", ".json")
    writeClassifierInfo(save_classifier_as, classifier_name, datasetTrain, output_classes)

    datasetTrain.enableAugumentation()

    datasetVal = CoralsDataset(images_folder_val, labels_folder_val, dictionary, target_classes)
    datasetVal.dataset_average = datasetTrain.dataset_average
    datasetVal.weights = datasetTrain.weights

    #AUGUMENTATION IS NOT APPLIED ON THE VALIDATION SET
    datasetVal.disableAugumentation()

    # setup the data loader
    dataloaderTrain = DataLoader(datasetTrain, batch_size=batch_sz, shuffle=flag_shuffle, num_workers=0, drop_last=True,
                                 pin_memory=True)

    validation_batch_size = 4
    dataloaderVal = DataLoader(datasetVal, batch_size=validation_batch_size, shuffle=False, num_workers=0, drop_last=True,
                                 pin_memory=True)

    training_images_number = len(datasetTrain.images_names)
    validation_images_number = len(datasetVal.images_names)

    print("NETWORK USED: DEEPLAB V3+")

    if os.path.exists(save_network_as):
        net = DeepLab(backbone='resnet', output_stride=16, num_classes=output_classes)
        net.load_state_dict(torch.load(save_network_as))
        print("Checkpoint loaded.")
    else:
        ###### SETUP THE NETWORK #####
        net = DeepLab(backbone='resnet', output_stride=16, num_classes=output_classes)
        state = torch.load("deeplab-resnet.pth.tar")
        # RE-INIZIALIZE THE CLASSIFICATION LAYER WITH THE RIGHT NUMBER OF CLASSES, DON'T LOAD WEIGHTS OF THE CLASSIFICATION LAYER
        new_dictionary = state['state_dict']
        del new_dictionary['decoder.last_conv.8.weight']
        del new_dictionary['decoder.last_conv.8.bias']
        net.load_state_dict(state['state_dict'], strict=False)

    # OPTIMIZER
    if optimiz == "SGD":
        optimizer = optim.SGD(net.parameters(), lr=learning_rate, weight_decay=L2_penalty, momentum=0.9)
    elif optimiz == "ADAM":
        optimizer = optim.Adam(net.parameters(), lr=learning_rate, weight_decay=L2_penalty)
    elif optimiz == "QHADAM":
        optimizer = QHAdam(net.parameters(), lr=learning_rate, weight_decay=L2_penalty,
                           nus = (0.7, 1.0), betas = (0.99, 0.999))


    USE_CUDA = torch.cuda.is_available()

    if USE_CUDA:
        device = torch.device("cuda")
        net.to(device)

    ##### TRAINING LOOP #####

    # Writer will output to ./runs/ directory by default
    writer = SummaryWriter(comment=experiment_name)

    reduce_lr_patience = 2
    if loss_to_use == "DICE+BOUNDARY":
        reduce_lr_patience = 200
        print("patience increased !")

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=reduce_lr_patience, verbose=True)

    best_accuracy = 0.0
    best_jaccard_score = 0.0


    # Crossentropy loss
    weights = datasetTrain.weights
    class_weights = torch.FloatTensor(weights).cuda()
    CEloss = nn.CrossEntropyLoss(weight=class_weights, ignore_index=-1)

    # weights for GENERALIZED DICE LOSS (GDL)
    freq = 1.0 / datasetTrain.weights[1:]
    w = 1.0 / (freq * freq)
    w = w / w.sum() + 0.00001
    w_for_GDL = torch.from_numpy(w)
    w_for_GDL = w_for_GDL.to(device)

    # Focal Tversky loss
    focal_tversky_gamma = torch.tensor(tversky_gamma)
    focal_tversky_gamma = focal_tversky_gamma.to(device)

    tversky_loss_alpha = torch.tensor(tversky_alpha)
    tversky_loss_beta = torch.tensor(1.0 - tversky_alpha)
    tversky_loss_alpha = tversky_loss_alpha.to(device)
    tversky_loss_beta = tversky_loss_beta.to(device)

    print("Training Network")
    for epoch in range(epochs):  # loop over the dataset multiple times

        net.train()
        optimizer.zero_grad()

        writer.add_scalar('LR/train', optimizer.param_groups[0]['lr'], epoch)

        loss_values = []
        for i, minibatch in enumerate(dataloaderTrain):
            # get the inputs
            images_batch = minibatch['image']
            labels_batch = minibatch['labels']

            if USE_CUDA:
                images_batch = images_batch.to(device)
                labels_batch = labels_batch.to(device)

            # forward+loss+backward
            outputs = net(images_batch)

            loss = computeLoss(loss_to_use, CEloss, w_for_GDL, tversky_loss_alpha, tversky_loss_beta, focal_tversky_gamma,
                               epoch, epochs_switch, epochs_transition, labels_batch, outputs)

            loss.backward()

            # TO AVOID MEMORY TROUBLE UPDATE WEIGHTS EVERY BATCH SIZE X BATCH MULT
            if (i+1)% batch_mult == 0:
                optimizer.step()
                optimizer.zero_grad()

            print(epoch, i, loss.item())
            loss_values.append(loss.item())

        mean_loss_train = sum(loss_values) / len(loss_values)
        print("Epoch: %d , Mean loss = %f" % (epoch, mean_loss_train))
        writer.add_scalar('Loss/train', mean_loss_train, epoch)

        ### VALIDATION ###
        if epoch > 0 and (epoch+1) % validation_frequency == 0:

            print("RUNNING VALIDATION.. ", end='')

            metrics_val, mean_loss_val = evaluateNetwork(datasetVal, dataloaderVal, loss_to_use, CEloss, w_for_GDL,
                                                         tversky_loss_alpha, tversky_loss_beta, focal_tversky_gamma,
                                                         epoch, epochs_switch, epochs_transition,
                                                         output_classes, net, flag_compute_mIoU=False)
            accuracy = metrics_val['Accuracy']
            jaccard_score = metrics_val['JaccardScore']

            scheduler.step(mean_loss_val)

            accuracy_training = 0.0
            jaccard_training = 0.0

            if flag_training_accuracy is True:
                metrics_train, mean_loss_train = evaluateNetwork(datasetTrain, dataloaderTrain, loss_to_use, CEloss, w_for_GDL,
                                                                 tversky_loss_alpha, tversky_loss_beta, focal_tversky_gamma,
                                                                 epoch, epochs_switch, epochs_transition,
                                                                 output_classes, net, flag_compute_mIoU=False)
                accuracy_training = metrics_train['Accuracy']
                jaccard_training = metrics_train['JaccardScore']


            #writer.add_scalar('Loss/train', mean_loss_train, epoch)
            writer.add_scalar('Loss/validation', mean_loss_val, epoch)
            writer.add_scalar('Accuracy/train', accuracy_training, epoch)
            writer.add_scalar('Accuracy/validation', accuracy, epoch)

            #if jaccard_score > best_jaccard_score:
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_jaccard_score = jaccard_score
                torch.save(net.state_dict(), save_network_as)
                # performance of the best accuracy network on the validation dataset
                metrics_filename = save_network_as[:len(save_network_as) - 4] + "-val-metrics.txt"
                saveMetrics(metrics_val, metrics_filename)
                if flag_training_accuracy is True:
                    metrics_filename = save_network_as[:len(save_network_as) - 4] + "-train-metrics.txt"
                    saveMetrics(metrics_train, metrics_filename)

            print("-> CURRENT BEST ACCURACY ", best_accuracy)


    # main loop ended - reload it and evaluate mIoU
    torch.cuda.empty_cache()
    del net
    net = None

    print("Final evaluation..")
    net = DeepLab(backbone='resnet', output_stride=16, num_classes=datasetTrain.num_classes)
    net.load_state_dict(torch.load(save_network_as))

    metrics_val, mean_loss_val = evaluateNetwork(datasetVal, dataloaderVal, loss_to_use, CEloss, w_for_GDL,
                                                 tversky_loss_alpha, tversky_loss_beta, focal_tversky_gamma,
                                                 epoch, epochs_switch, epochs_transition,
                                                 datasetVal.num_classes, net, flag_compute_mIoU=True)

    writer.add_hparams({'LR': learning_rate, 'Decay': L2_penalty, 'Loss': loss_to_use, 'Transition': epochs_transition,
                        'Gamma': tversky_gamma, 'Alpha': tversky_alpha }, {'hparam/Accuracy': best_accuracy, 'hparam/mIoU': best_jaccard_score})

    writer.close()

    print("***** TRAINING FINISHED *****")
    print("BEST ACCURACY REACHED ON THE VALIDATION SET: %.3f " % best_accuracy)