def validation_loop(val_loader, network, epoch, parameters, timer_epoch): """ validation_loop do a loop over the validation set :param val_loader: Dataloader which contains input and target of the validation dataset :param network: Network that will be learned :param epoch: Actual epoch of the program :param parameters: List of parameters of the network :param timer_epoch: The time since the beginning of the epoch :return: The mean validation_error over the entire validation set. This function also save this error. """ # Validation_error contains the error on the validation set validation_error = 0 # Save the error of the validation DataSet for i, (x_val_batch, y_val_batch, _) in enumerate(val_loader): if torch.cuda.is_available(): # TODO It is useless to creat a Variable for y x_val_batch, y_val_batch = Variable(x_val_batch.cuda()), Variable(y_val_batch.cuda()) else: x_val_batch, y_val_batch = Variable(x_val_batch), Variable(y_val_batch) validation_error += Save_import.save_error(x=x_val_batch, y=y_val_batch, network=network, epoch=epoch, set_type="validation", parameters=parameters) with open(parameters.path_print, 'a') as txtfile: txtfile.write( "\nEpoch : " + str(epoch) + ". Batch : " + str(i) + ".\nValidation error : " + str( validation_error / (i + 1)) + ".\nTime total batch : " + Save_import.time_to_string(time.time() - timer_epoch) + "\n \n") # Divide by the the number of element in the entire batch return validation_error / (i + 1)
def validation_loop(val_loader, network, epoch, parameters, timer_epoch): """ validation_loop do a loop over the validation set :param val_loader: Dataloader which contains input and target of the validation dataset :param network: Network that will be learned :param epoch: Actual epoch of the program :param parameters: List of parameters of the network :param timer_epoch: The time since the beginning of the epoch :return: The mean validation_error over the entire validation set. This function also save this error. """ # Save the error of the validation DataSet for i, (x_val_batch, y_val_batch) in enumerate(val_loader): if torch.cuda.is_available(): x_val_batch, y_val_batch = Variable(x_val_batch.cuda()), Variable( y_val_batch.cuda()) else: x_val_batch, y_val_batch = Variable(x_val_batch), Variable( y_val_batch) loss = Loss_Error.criterion_pretrain(y_estimated=network(x_val_batch), y=y_val_batch, parameters=parameters) loss = ["validation", epoch, loss.data[0]] # Save the loss with open( parameters.path_CSV + "CSV_loss_" + parameters.name_network + str(parameters.train_number) + ".csv", 'a') as csvfile: writer = csv.writer(csvfile, quoting=csv.QUOTE_NONNUMERIC) writer.writerows([loss]) with open(parameters.path_print, 'a') as txtfile: txtfile.write("\nEpoch : " + str(epoch) + ". Batch : " + str(i) + ".\nValidation error : " + str(loss[2]) + ".\nTime total batch : " + Save_import.time_to_string(time.time() - timer_epoch) + "\n \n")
def batch_loop(optimizer, train_loader, network, epoch, parameters, timer_batch, timer_epoch, inter_union=None): """ :param optimizer: The optimiser that containt parameter of Adam optimizer :param train_loader: Dataloader which contains input and target of the train dataset :param network: Network that will be learned :param epoch: Actual epoch of the program :param parameters: List of parameters of the network :param timer_batch: The time since the beginning of the batch :param timer_epoch: The time since the beginning of the epoch :return: Nothing but update the network and save the train error """ train_error = 0 # Loop over the mini-batch, the size of the mini match is define in the train_loader for i, (x_batch, y_batch, _) in enumerate(train_loader): # zero the gradient buffers optimizer.zero_grad() # Transform into Variable if torch.cuda.is_available(): x_batch, y_batch = Variable(x_batch.cuda()), Variable(y_batch.cuda()) else: x_batch, y_batch = Variable(x_batch), Variable(y_batch) # Compute the forward function y_batch_estimated = network(x_batch) # Get the error loss = Loss_Error.criterion(y_estimated=y_batch_estimated, y=y_batch, parameters=parameters, global_IoU_modif=False) # Compute the backward function loss.backward() # Does the update according to the optimizer define above optimizer.step() # Save error of the training DataSet train_error += Save_import.save_error(x=x_batch, y=y_batch, network=network, epoch=epoch, set_type="train", parameters=parameters, loss=loss, y_estimated=y_batch_estimated) # Similar to a "print" but in a textfile with open(parameters.path_print, 'a') as txtfile: txtfile.write( "\nEpoch : " + str(epoch) + ". Batch : " + str(i) + ".\nTrain_Error : " + str(train_error / (i + 1)) + "\n" + "Time batch : " + Save_import.time_to_string(time.time() - timer_batch) + ".\nTime total batch : " + Save_import.time_to_string(time.time() - timer_epoch) + "\n \n") timer_batch = time.time() return ()
def train(parameters, network, train_loader, val_loader): """ :param parameters: List of parameters of the network :param network: Network that will be learned :param train_loader: Dataloader which contains input and target of the train dataset :param val_loader: Dataloader which contains input and target of the validation dataset :return: Nothing but modify the weight of the network and call save_error to store the error. """ # Store the time at the beginning of the training timer_init = time.time() # create your optimizer optimizer = optim.Adam(params=network.parameters(), lr=parameters.learning_rate, betas=(parameters.beta1, parameters.beta2), eps=parameters.epsilon, weight_decay=parameters.weight_decay) # High value just to initialize this variable # validation error min will store the lowest validation result validation_error_min = 9999 # Store the index of the next checkpoint. This value is 0 or 1. We always keep one checkpoint untouched # while the other one is changed. index_save_regular = 0 # Loop from the actual epoch (not 0 if we already train) to the last epoch initial_epoch = parameters.actual_epoch for epoch in range(initial_epoch, parameters.epoch_total): # Store the time at the begining of each epoch timer_epoch = time.time() timer_batch = time.time() batch_loop(optimizer=optimizer, train_loader=train_loader, network=network, epoch=epoch, parameters=parameters, timer_batch=timer_batch, timer_epoch=timer_epoch) validation_error = validation_loop(val_loader=val_loader, network=network, epoch=epoch, parameters=parameters, timer_epoch=timer_epoch) # checkpoint will save the network if needed validation_error_min, index_save_regular = Save_import.checkpoint(validation_error=validation_error, validation_error_min=validation_error_min, index_save_regular=index_save_regular, epoch=epoch, network=network, parameters=parameters, optimizer=optimizer) # Update the optimizer # optimizer.param_groups[0]['lr'] = parameters.learning_rate / 10 # Similar to a "print" but in a text file with open(parameters.path_print, 'a') as txtfile: txtfile.write("\n End of Epoch :" + str(epoch) + "/" + str(parameters.epoch_total - 1) + ". Validation Loss : " + str(validation_error) + ".\nTime Epoch :" + Save_import.time_to_string(time.time() - timer_epoch) + ".\nTime total : " + Save_import.time_to_string(time.time() - timer_init) + ".\n \n") if (epoch % 10) == 0: Save_import.organise_CSV(path_CSV=parameters.path_CSV, name_network=parameters.name_network, train_number=parameters.train_number) # Increase the actual epoch parameters.actual_epoch += 1 # Similar to a "print" but in a text file with open(parameters.path_print, 'a') as txtfile: txtfile.write("Finish. Total time : " + Save_import.time_to_string(time.time() - timer_init) + "\n") return ()
def batch_loop(optimizer, train_loader, network, epoch, parameters, timer_batch, timer_epoch): """ :param optimizer: The optimiser that containt parameter of Adam optimizer :param train_loader: Dataloader which contains input and target of the train dataset :param network: Network that will be learned :param epoch: Actual epoch of the program :param parameters: List of parameters of the network :param timer_batch: The time since the beginning of the batch :param timer_epoch: The time since the beginning of the epoch :return: Nothing but update the network and save the train error """ # Loop over the mini-batch, the size of the mini match is define in the train_loader for i, (x_batch, y_batch) in enumerate(train_loader): # zero the gradient buffers optimizer.zero_grad() # Transform into Variable if torch.cuda.is_available(): x_batch, y_batch = Variable(x_batch.cuda()), Variable( y_batch.cuda()) else: x_batch, y_batch = Variable(x_batch), Variable(y_batch) # Compute the forward function y_batch_estimated = network(x_batch) #count = 0 #for child in network.children(): #if count == 0: #for param in child.parameters(): #with open(parameters.path_print, 'a') as txtfile: #txtfile.write("param of linear1"+str(param)+"\n") #break #count += 1 # Get the error loss = Loss_Error.criterion_pretrain(y_estimated=y_batch_estimated, y=y_batch, parameters=parameters) # Compute the backward function loss.backward() # Does the update according to the optimizer define above optimizer.step() # Update the optimizer # optimizer.param_groups[0]['lr'] = parameters.learning_rate/(1 + (epoch-390) * parameters.learning_rate_decay) # Save error of the training DataSet loss = ["train", epoch, loss.data[0]] # Save the loss with open( parameters.path_CSV + "CSV_loss_" + parameters.name_network + str(parameters.train_number) + ".csv", 'a') as csvfile: writer = csv.writer(csvfile, quoting=csv.QUOTE_NONNUMERIC) writer.writerows([loss]) # Similar to a "print" but in a textfile with open(parameters.path_print, 'a') as txtfile: txtfile.write( "\nEpoch : " + str(epoch) + ". Batch : " + str(i) + ".\nTrain_Error : " + str(loss[2]) + "\n" + "Time batch : " + Save_import.time_to_string(time.time() - timer_batch) + ".\nTime total batch : " + Save_import.time_to_string(time.time() - timer_epoch) + "\n \n") timer_batch = time.time() return ()