def main_program(path_data, path_save_model, path_load_existing_model,
                 path_model_weights_test, use_gpu, do_training, do_testing,
                 nb_row_per_classe, nb_epoch, batch_size, learning_rate,
                 type_schedule):

    # Label encoding and decoding dicts
    enc_dict, dec_dict = create_encoding_deconding_dict(path_data)

    #Data_set
    size_image_train = 224
    data_train = create_huge_data_set(path_data,
                                      nb_rows=nb_row_per_classe,
                                      size_image=size_image_train,
                                      encoding_dict=enc_dict)
    data_valid = create_huge_data_set(path_data,
                                      nb_rows=100,
                                      size_image=size_image_train,
                                      skip_rows=range(1, nb_row_per_classe),
                                      encoding_dict=enc_dict)

    # Model
    model = create_model(use_gpu)

    if use_gpu:
        model.cuda()

    #Loss
    criterion = nn.CrossEntropyLoss()

    #Optimiser
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    # Scheduler LR
    scheduler = create_scheduler(start_lr=learning_rate,
                                 type=type_schedule,
                                 optimizer=optimizer)

    #Data loader
    train_loader = DataLoader(data_train, batch_size=batch_size, shuffle=True)
    valid_loader = DataLoader(data_valid, batch_size=batch_size, shuffle=True)

    #Train
    if do_training:
        train_model(model,
                    train_loader,
                    valid_loader,
                    nb_epoch,
                    scheduler,
                    optimizer,
                    criterion,
                    use_gpu,
                    path_save=path_save_model,
                    path_start_from_existing_model=path_load_existing_model)

    #Test
    if do_testing:
        data_test = create_huge_data_set(path_data,
                                         nb_rows=100,
                                         size_image=size_image_train,
                                         skip_rows=range(
                                             1, nb_row_per_classe + 100),
                                         encoding_dict=enc_dict)
        test_loader = DataLoader(data_test, batch_size=batch_size)

        model_final, history = load_model_weights(model,
                                                  path_model_weights_test,
                                                  type="best",
                                                  use_gpu=use_gpu,
                                                  get_history=True)
        history.display()

        acc, loss, score_top3 = calcul_metric_concours(model_final,
                                                       test_loader,
                                                       use_gpu=use_gpu,
                                                       show_acc_per_class=True)

        print("Accuracy test: {}".format(acc))
        print("Score top 3 concours: {}".format(score_top3))
Ejemplo n.º 2
0
def main_program(path_data, path_save_model, path_load_existing_model,
                 path_model_weights_test, use_gpu, do_training, do_testing,
                 nb_row_per_classe, nb_generation_random_dataset_train,
                 nb_row_class_valid, nb_row_class_test, skip_test,
                 use_acc_proportionate_sampling, val_acc_class_save_name,
                 nb_epoch, batch_size, learning_rate, type_schedule, seed):

    #Seed
    torch.manual_seed(123)
    np.random.seed(123)
    random.seed(123)
    torch.cuda.manual_seed(123)
    torch.cuda.manual_seed_all(123)

    # Label encoding, decoding dicts, nb_ligne dict
    enc_dict, dec_dict = create_encoding_deconding_dict(path_data)
    nb_ligne_dict = create_dict_nb_ligne(path_data)

    # Model
    model = create_model(use_gpu)

    if use_gpu:
        model.cuda()

    #Loss
    criterion = nn.CrossEntropyLoss()

    #Optimiser
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    # Scheduler LR
    scheduler = create_scheduler(start_lr=learning_rate,
                                 type=type_schedule,
                                 optimizer=optimizer)

    # Data_set
    size_image_train = 224
    # data_train = create_huge_data_set(path_data, nb_rows=nb_row_per_classe, size_image=size_image_train,encoding_dict=enc_dict)
    data_valid = create_huge_data_set(path_data,
                                      nb_rows=nb_row_class_valid,
                                      size_image=size_image_train,
                                      encoding_dict=enc_dict)

    #Data loader
    # train_loader=DataLoader(data_train,batch_size=batch_size,shuffle=True)
    valid_loader = DataLoader(data_valid, batch_size=batch_size, shuffle=True)

    #Train
    if do_training:

        for i in range(nb_generation_random_dataset_train):
            data_train = generate_random_dataset(
                path_data,
                nb_row_class_valid,
                nb_row_class_test,
                nb_row_per_classe,
                dict_nb_lignes=nb_ligne_dict,
                size_image=size_image_train,
                encoding_dict=enc_dict,
                use_acc_proportionate_sampling=use_acc_proportionate_sampling,
                val_acc_class_save_name=val_acc_class_save_name)

            train_loader = DataLoader(data_train,
                                      batch_size=batch_size,
                                      shuffle=True)

            if i > 0:
                path_load_existing_model = path_save_model

            train_model(
                model,
                train_loader,
                valid_loader,
                nb_epoch,
                scheduler,
                optimizer,
                criterion,
                use_gpu,
                path_save=path_save_model,
                path_start_from_existing_model=path_load_existing_model,
                val_acc_class_save_name=val_acc_class_save_name)

    #Test
    if do_testing:
        data_test = create_huge_data_set(path_data,
                                         nb_rows=nb_row_class_test,
                                         size_image=size_image_train,
                                         skip_rows=skip_test,
                                         encoding_dict=enc_dict)
        test_loader = DataLoader(data_test, batch_size=batch_size)

        model_final, history = load_model_weights(model,
                                                  path_model_weights_test,
                                                  type="best",
                                                  use_gpu=use_gpu,
                                                  get_history=True)
        history.display()

        acc, loss, score_top3, conf_mat, acc_per_class = calcul_metric_concours(
            model_final, test_loader, use_gpu=use_gpu, show_acc_per_class=True)

        print("Accuracy test: {}".format(acc))
        print("Score top 3 concours: {}".format(score_top3))
        print(acc_per_class)

        #Log experiment
        experiment_sacred.log_scalar("Test accuracy", acc)
        experiment_sacred.log_scalar("Test loss", loss)
        experiment_sacred.log_scalar("Test score top3", score_top3)
        experiment_sacred.log_scalar("Test confusion matrix", conf_mat)
        experiment_sacred.log_scalar("Test accuracy per class", acc_per_class)
def train_model(model,
                train_loader,
                val_loader,
                n_epoch,
                scheduler,
                optimizer,
                criterion,
                use_gpu=False,
                path_save=None,
                path_start_from_existing_model=None):

    if path_start_from_existing_model is not None and os.path.isfile(
            path_start_from_existing_model):

        # Loading state
        checkpoint = torch.load(path_start_from_existing_model)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        next_epoch = checkpoint['epoch'] + 1
        loss = checkpoint['loss']
        history = checkpoint["history"]
        best_acc = checkpoint["best_acc"]
        best_model_weights = checkpoint["best_model_weights"]
        scheduler.load_state_dict(checkpoint["lr_scheduler_state"])

        print("Modèle chargé pour entraînement")

    else:
        # best_model_weights = copy.deepcopy(model.state_dict())
        history = History()
        next_epoch = 0
        best_acc = 0
        print("Aucun modèle chargé pour entraînement")

    # Entrainement
    for epoch in range(0, n_epoch):
        model.train()
        scheduler.step()
        for j, batch in enumerate(train_loader):

            inputs, targets = batch
            if use_gpu:
                inputs = inputs.cuda()
                targets = targets.cuda()

            optimizer.zero_grad()
            output = model(inputs)

            loss = criterion(output, targets)
            loss.backward()
            optimizer.step()

        train_acc, train_loss, train_top3_score, train_conf_mat, train_acc_per_class = calcul_metric_concours(
            model, train_loader, use_gpu, show_acc_per_class=True)
        val_acc, val_loss, val_top3_score, val_conf_mat, val_acc_per_class = calcul_metric_concours(
            model, val_loader, use_gpu, show_acc_per_class=True)

        #Current LR
        for param_group in optimizer.param_groups:
            current_lr = param_group["lr"]

        history.save(train_acc, val_acc, train_loss, val_loss, current_lr)
        print(
            'Epoch {} - Train acc: {:.2f} - Val acc: {:.2f} - Train loss: {:.4f} - Val loss: {:.4f} -Val score top3 :{:.4f}'
            .format(epoch, train_acc, val_acc, train_loss, val_loss,
                    val_top3_score))

        print(val_acc_per_class)

        #Best model
        if val_acc > best_acc:
            best_acc = val_acc
            best_model_weights = copy.deepcopy(model.state_dict())

        # Sauvegarde
        if path_save is not None:
            torch.save(
                {
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': loss,
                    "history": history,
                    "best_acc": best_acc,
                    "best_model_weights": best_model_weights,
                    "lr_scheduler_state": scheduler.state_dict()
                }, path_save)