def main_program(path_data, path_save_model, path_load_existing_model, path_model_weights_test, use_gpu, do_training, do_testing, nb_row_per_classe, nb_epoch, batch_size, learning_rate, type_schedule): # Label encoding and decoding dicts enc_dict, dec_dict = create_encoding_deconding_dict(path_data) #Data_set size_image_train = 224 data_train = create_huge_data_set(path_data, nb_rows=nb_row_per_classe, size_image=size_image_train, encoding_dict=enc_dict) data_valid = create_huge_data_set(path_data, nb_rows=100, size_image=size_image_train, skip_rows=range(1, nb_row_per_classe), encoding_dict=enc_dict) # Model model = create_model(use_gpu) if use_gpu: model.cuda() #Loss criterion = nn.CrossEntropyLoss() #Optimiser optimizer = optim.SGD(model.parameters(), lr=learning_rate) # Scheduler LR scheduler = create_scheduler(start_lr=learning_rate, type=type_schedule, optimizer=optimizer) #Data loader train_loader = DataLoader(data_train, batch_size=batch_size, shuffle=True) valid_loader = DataLoader(data_valid, batch_size=batch_size, shuffle=True) #Train if do_training: train_model(model, train_loader, valid_loader, nb_epoch, scheduler, optimizer, criterion, use_gpu, path_save=path_save_model, path_start_from_existing_model=path_load_existing_model) #Test if do_testing: data_test = create_huge_data_set(path_data, nb_rows=100, size_image=size_image_train, skip_rows=range( 1, nb_row_per_classe + 100), encoding_dict=enc_dict) test_loader = DataLoader(data_test, batch_size=batch_size) model_final, history = load_model_weights(model, path_model_weights_test, type="best", use_gpu=use_gpu, get_history=True) history.display() acc, loss, score_top3 = calcul_metric_concours(model_final, test_loader, use_gpu=use_gpu, show_acc_per_class=True) print("Accuracy test: {}".format(acc)) print("Score top 3 concours: {}".format(score_top3))
def main_program(path_data, path_save_model, path_load_existing_model, path_model_weights_test, use_gpu, do_training, do_testing, nb_row_per_classe, nb_generation_random_dataset_train, nb_row_class_valid, nb_row_class_test, skip_test, use_acc_proportionate_sampling, val_acc_class_save_name, nb_epoch, batch_size, learning_rate, type_schedule, seed): #Seed torch.manual_seed(123) np.random.seed(123) random.seed(123) torch.cuda.manual_seed(123) torch.cuda.manual_seed_all(123) # Label encoding, decoding dicts, nb_ligne dict enc_dict, dec_dict = create_encoding_deconding_dict(path_data) nb_ligne_dict = create_dict_nb_ligne(path_data) # Model model = create_model(use_gpu) if use_gpu: model.cuda() #Loss criterion = nn.CrossEntropyLoss() #Optimiser optimizer = optim.SGD(model.parameters(), lr=learning_rate) # Scheduler LR scheduler = create_scheduler(start_lr=learning_rate, type=type_schedule, optimizer=optimizer) # Data_set size_image_train = 224 # data_train = create_huge_data_set(path_data, nb_rows=nb_row_per_classe, size_image=size_image_train,encoding_dict=enc_dict) data_valid = create_huge_data_set(path_data, nb_rows=nb_row_class_valid, size_image=size_image_train, encoding_dict=enc_dict) #Data loader # train_loader=DataLoader(data_train,batch_size=batch_size,shuffle=True) valid_loader = DataLoader(data_valid, batch_size=batch_size, shuffle=True) #Train if do_training: for i in range(nb_generation_random_dataset_train): data_train = generate_random_dataset( path_data, nb_row_class_valid, nb_row_class_test, nb_row_per_classe, dict_nb_lignes=nb_ligne_dict, size_image=size_image_train, encoding_dict=enc_dict, use_acc_proportionate_sampling=use_acc_proportionate_sampling, val_acc_class_save_name=val_acc_class_save_name) train_loader = DataLoader(data_train, batch_size=batch_size, shuffle=True) if i > 0: path_load_existing_model = path_save_model train_model( model, train_loader, valid_loader, nb_epoch, scheduler, optimizer, criterion, use_gpu, path_save=path_save_model, path_start_from_existing_model=path_load_existing_model, val_acc_class_save_name=val_acc_class_save_name) #Test if do_testing: data_test = create_huge_data_set(path_data, nb_rows=nb_row_class_test, size_image=size_image_train, skip_rows=skip_test, encoding_dict=enc_dict) test_loader = DataLoader(data_test, batch_size=batch_size) model_final, history = load_model_weights(model, path_model_weights_test, type="best", use_gpu=use_gpu, get_history=True) history.display() acc, loss, score_top3, conf_mat, acc_per_class = calcul_metric_concours( model_final, test_loader, use_gpu=use_gpu, show_acc_per_class=True) print("Accuracy test: {}".format(acc)) print("Score top 3 concours: {}".format(score_top3)) print(acc_per_class) #Log experiment experiment_sacred.log_scalar("Test accuracy", acc) experiment_sacred.log_scalar("Test loss", loss) experiment_sacred.log_scalar("Test score top3", score_top3) experiment_sacred.log_scalar("Test confusion matrix", conf_mat) experiment_sacred.log_scalar("Test accuracy per class", acc_per_class)
def train_model(model, train_loader, val_loader, n_epoch, scheduler, optimizer, criterion, use_gpu=False, path_save=None, path_start_from_existing_model=None): if path_start_from_existing_model is not None and os.path.isfile( path_start_from_existing_model): # Loading state checkpoint = torch.load(path_start_from_existing_model) model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['optimizer_state_dict']) next_epoch = checkpoint['epoch'] + 1 loss = checkpoint['loss'] history = checkpoint["history"] best_acc = checkpoint["best_acc"] best_model_weights = checkpoint["best_model_weights"] scheduler.load_state_dict(checkpoint["lr_scheduler_state"]) print("Modèle chargé pour entraînement") else: # best_model_weights = copy.deepcopy(model.state_dict()) history = History() next_epoch = 0 best_acc = 0 print("Aucun modèle chargé pour entraînement") # Entrainement for epoch in range(0, n_epoch): model.train() scheduler.step() for j, batch in enumerate(train_loader): inputs, targets = batch if use_gpu: inputs = inputs.cuda() targets = targets.cuda() optimizer.zero_grad() output = model(inputs) loss = criterion(output, targets) loss.backward() optimizer.step() train_acc, train_loss, train_top3_score, train_conf_mat, train_acc_per_class = calcul_metric_concours( model, train_loader, use_gpu, show_acc_per_class=True) val_acc, val_loss, val_top3_score, val_conf_mat, val_acc_per_class = calcul_metric_concours( model, val_loader, use_gpu, show_acc_per_class=True) #Current LR for param_group in optimizer.param_groups: current_lr = param_group["lr"] history.save(train_acc, val_acc, train_loss, val_loss, current_lr) print( 'Epoch {} - Train acc: {:.2f} - Val acc: {:.2f} - Train loss: {:.4f} - Val loss: {:.4f} -Val score top3 :{:.4f}' .format(epoch, train_acc, val_acc, train_loss, val_loss, val_top3_score)) print(val_acc_per_class) #Best model if val_acc > best_acc: best_acc = val_acc best_model_weights = copy.deepcopy(model.state_dict()) # Sauvegarde if path_save is not None: torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'loss': loss, "history": history, "best_acc": best_acc, "best_model_weights": best_model_weights, "lr_scheduler_state": scheduler.state_dict() }, path_save)