Esempio n. 1
0
def main():

    ########################################################################
    ######################## training parameters ###########################
    ########################################################################

    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset',
                        type=str,
                        default='ImageNet',
                        metavar='N',
                        help='dataset to run experiments on')
    parser.add_argument(
        '--batch_size',
        type=int,
        default=256,
        metavar='N',
        help=
        'input batch size for training (default: 256; note that batch_size 64 gives worse performance for imagenet, so don\'t change this. )'
    )
    parser.add_argument('--exp',
                        type=str,
                        default='default',
                        metavar='N',
                        help='name of experiment')
    parser.add_argument('--logits_exp',
                        type=str,
                        default='default',
                        metavar='N',
                        help='name of experiment containing logits')
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=5021,
                        metavar='S',
                        help='random seed (default: 5021)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.1,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--weight_decay',
                        type=float,
                        default=5 * 1e-4,
                        help='weight_decay (default: 1e-5)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.9,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--step_size',
                        type=float,
                        default=30,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.1,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument(
        '--stopping_criterion',
        type=int,
        default=30,
        metavar='N',
    )
    parser.add_argument('--test',
                        action='store_true',
                        default=False,
                        help='test mode')
    parser.add_argument('--load_model',
                        type=str,
                        default=None,
                        help='model to initialise from')

    args = parser.parse_args()

    print("\n==================Options=================")
    pprint(vars(args), indent=4)
    print("==========================================\n")

    use_cuda = not args.no_cuda and torch.cuda.is_available()
    # make everything deterministic, reproducible
    if (args.seed is not None):
        print('Seeding everything with seed {}.'.format(args.seed))
        seed_everything(args.seed)
    else:
        print('Note : Seed is random.')

    device = torch.device("cuda" if use_cuda else "cpu")

    exp_dir = os.path.join('checkpoint', args.exp)
    if not os.path.isdir(exp_dir):
        os.makedirs(exp_dir)

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
    dataset = args.dataset
    num_classes = 1000 if dataset.lower() == 'imagenet' else 365

    ########################################################################
    ########################         load data  		####################
    ########################################################################

    datadir = './checkpoint/{}'.format(args.logits_exp)

    if (not args.test):
        data_manyshot = torch.load('{}/results_val_manyshot.pickle'.format(
            datadir))  # for experts with reject option
        data_mediumshot = torch.load('{}/results_val_mediumshot.pickle'.format(
            datadir))  # for experts with reject option
        data_fewshot = torch.load('{}/results_val_fewshot.pickle'.format(
            datadir))  # for experts with reject option

    else:
        data_manyshot = torch.load(
            '{}/results_test_aligned_manyshot.pickle'.format(
                datadir))  # for experts with reject option
        data_mediumshot = torch.load(
            '{}/results_test_aligned_mediumshot.pickle'.format(
                datadir))  # for experts with reject option
        data_fewshot = torch.load(
            '{}/results_test_aligned_fewshot.pickle'.format(
                datadir))  # for experts with reject option
        data_general = torch.load(
            '{}/results_test_aligned_general.pickle'.format(dataset.lower()))

    manyshot_logits = data_manyshot['logits'].clone().detach()
    mediumshot_logits = data_mediumshot['logits'].clone().detach()
    fewshot_logits = data_fewshot['logits'].clone().detach()
    labels = data_manyshot['labels'] if not args.test else data_general[
        'labels']

    manyshotClassMask, mediumshotClassMask, fewshotClassMask = data_manyshot[
        'class_mask'], data_mediumshot['class_mask'], data_fewshot[
            'class_mask']

    # logit tuning to correct for open set sampling ratio
    if (dataset.lower() == 'imagenet'):
        manyshot_logits[:, -1] = manyshot_logits[:, -1] - np.log(2 / (1 + 16))
        mediumshot_logits[:,
                          -1] = mediumshot_logits[:, -1] - np.log(2 / (1 + 16))
        fewshot_logits[:, -1] = fewshot_logits[:, -1] - np.log(2 / (1 + 16))

    else:
        manyshot_logits[:, -1] = manyshot_logits[:, -1] - np.log(2 / (1 + 16))
        mediumshot_logits[:,
                          -1] = mediumshot_logits[:, -1] - np.log(2 / (1 + 8))
        fewshot_logits[:, -1] = fewshot_logits[:, -1] - np.log(2 / (1 + 8))

    manyshot_features = manyshot_logits.data.cpu().numpy()
    mediumshot_features = mediumshot_logits.data.cpu().numpy()
    fewshot_features = fewshot_logits.data.cpu().numpy()
    labels = labels.data.cpu().numpy()

    if (not args.test):
        train_loader = torch.utils.data.DataLoader(Calibration_Dataset(
            orig_txt='./data/{}_LT/{}_LT_train.txt'.format(
                args.dataset, args.dataset),
            manyshot_features=manyshot_features,
            mediumshot_features=mediumshot_features,
            fewshot_features=fewshot_features,
            labels=labels),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)
    else:
        test_loader = torch.utils.data.DataLoader(
            Calibration_Dataset(orig_txt='./data/{}_LT/{}_LT_train.txt'.format(
                args.dataset, args.dataset),
                                manyshot_features=manyshot_features,
                                mediumshot_features=mediumshot_features,
                                fewshot_features=fewshot_features,
                                labels=labels),
            batch_size=args.batch_size,
            shuffle=False,
            **kwargs)  # dont shuffle test set as usual

    ########################################################################
    ######################## initialise model and optimizer ################
    ########################################################################

    model = CalibrateExperts(args.dataset.lower(), manyshotClassMask,
                             mediumshotClassMask, fewshotClassMask).cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=args.step_size,
                                          gamma=args.gamma)
    print(
        'Using StepLR scheduler with params, stepsize : {}, gamma : {}'.format(
            args.step_size, args.gamma))

    if (args.test):
        pretrained_model = torch.load(args.load_model)
        weights = pretrained_model['state_dict_best']['model']
        weights = {
            k:
            weights['module.' + k] if 'module.' + k in weights else weights[k]
            for k in model.state_dict()
        }
        model.load_state_dict(weights)  # loading model weights
        print('Loaded pretrained model.')

    ########################################################################
    ######################## training with early stopping ##################
    ########################################################################

    if (not args.test):

        results = vars(args)
        results['train_losses'], results['train_accuracies'] = [], []
        best_acc, best_epoch = 0, 0

        epoch = 1
        while (True):

            sys.stdout.flush()

            train_loss, train_acc = train(args, model, device, train_loader,
                                          optimizer, scheduler, epoch)

            results['train_losses'].append(train_loss)
            results['train_accuracies'].append(train_acc)

            if (train_acc > best_acc):
                best_acc = train_acc
                best_epoch = epoch
                results['best_acc'], results[
                    'best_epoch'] = best_acc, best_epoch

                # save best model
                best_model_weights = {}
                best_model_weights['model'] = copy.deepcopy(model.state_dict())
                model_states = {
                    'epoch': epoch,
                    'best_epoch': best_epoch,
                    'state_dict_best': best_model_weights,
                    'best_acc': best_acc,
                }
                torch.save(model_states, os.path.join(exp_dir,
                                                      "best_model.pt"))

            elif (epoch > best_epoch + args.stopping_criterion):
                print('Best model obtained. Error : ', best_acc)
                plot_curves(results, exp_dir)  # plot
                break

            savepath = os.path.join(exp_dir, 'results.pickle')
            with open(savepath, 'wb') as f:
                pickle.dump(results, f)
            plot_curves(results, exp_dir)  # plot
            epoch = epoch + 1

    ########################################################################
    ########################        testing         ########################
    ########################################################################

    else:

        loss, acc, preds = test(args, model, device, test_loader)

        if (dataset == 'ImageNet'):
            split_ranges = {
                'manyshot': [0, 19550],
                'medianshot': [19550, 43200],
                'fewshot': [43200, 50000],
                'all': [0, 50000]
            }  # imagenet
        else:
            split_ranges = {
                'manyshot': [0, 13200],
                'medianshot': [13200, 29400],
                'fewshot': [29400, 36500],
                'all': [0, 36500]
            }  # places

        for split_name, split_range in split_ranges.items():

            gt_target = torch.from_numpy(
                labels[int(split_range[0]):int(split_range[1])]).cuda()
            split_preds = preds[int(split_range[0]):int(split_range[1])]

            correct = split_preds.eq(
                gt_target.view_as(split_preds)).sum().item()
            accuracy = 100 * (correct / (split_range[1] - split_range[0]))

            print('{} accuracy : {:.2f}'.format(split_name, accuracy))
Esempio n. 2
0
def evaluate(model, load_path, plot):
    with open(load_path + 'trained_params_best.npz') as f:
        loaded = np.load(f)
        blocks_model = Model(model.cost)
        params_dicts = blocks_model.get_parameter_dict()
        params_names = params_dicts.keys()
        for param_name in params_names:
            param = params_dicts[param_name]
            # '/f_6_.W' --> 'f_6_.W'
            slash_index = param_name.find('/')
            param_name = param_name[slash_index + 1:]
            assert param.get_value().shape == loaded[param_name].shape
            param.set_value(loaded[param_name])

    if plot:
        train_data_stream, valid_data_stream = get_streams(20)
        # T x B x F
        data = train_data_stream.get_epoch_iterator().next()
        cg = ComputationGraph(model.cost)
        f = theano.function(cg.inputs, [model.location, model.scale],
                            on_unused_input='ignore',
                            allow_input_downcast=True)
        res = f(data[1], data[0])
        for i in range(10):
            visualize_attention(data[0][:, i, :],
                                res[0][:, i, :], res[1][:, i, :],
                                image_shape=(512, 512), prefix=str(i))

        plot_curves(path=load_path,
                    to_be_plotted=['train_categoricalcrossentropy_apply_cost',
                                   'valid_categoricalcrossentropy_apply_cost'],
                    yaxis='Cross Entropy',
                    titles=['train', 'valid'],
                    main_title='CE')

        plot_curves(path=load_path,
                    to_be_plotted=['train_learning_rate',
                                   'train_learning_rate'],
                    yaxis='lr',
                    titles=['train', 'train'],
                    main_title='lr')

        plot_curves(path=load_path,
                    to_be_plotted=['train_total_gradient_norm',
                                   'valid_total_gradient_norm'],
                    yaxis='GradientNorm',
                    titles=['train', 'valid'],
                    main_title='GradientNorm')

        for grad in ['_total_gradient_norm',
                     '_total_gradient_norm',
                     '_/lstmattention.W_patch_grad_norm',
                     '_/lstmattention.W_state_grad_norm',
                     '_/lstmattention.initial_cells_grad_norm',
                     '_/lstmattention.initial_location_grad_norm',
                     '_/lstmattention/lstmattention_mlp/linear_0.W_grad_norm',
                     '_/lstmattention/lstmattention_mlp/linear_1.W_grad_norm',
                     '_/mlp/linear_0.W_grad_norm',
                     '_/mlp/linear_1.W_grad_norm']:
            plot_curves(path=load_path,
                        to_be_plotted=['train' + grad,
                                       'valid' + grad],
                        yaxis='GradientNorm',
                        titles=['train',
                                'valid'],
                        main_title=grad.replace(
                            "_", "").replace("/", "").replace(".", ""))

        plot_curves(path=load_path,
                    to_be_plotted=[
                        'train_misclassificationrate_apply_error_rate',
                        'valid_misclassificationrate_apply_error_rate'],
                    yaxis='Error rate',
                    titles=['train', 'valid'],
                    main_title='Error')
        print 'plot printed'
Esempio n. 3
0
def main():

    # # CELL 1
    # '''
    #  Imporation des bibliothèques python générales
    # '''
    # import numpy as np
    # import matplotlib.pyplot as plt
    # import itertools
    # from sklearn.datasets import make_classification

    # '''
    #  Imporation des bibliothèques spécifiques au devoir
    # '''
    # import utils
    # from linear_classifier import LinearClassifier
    # from two_layer_classifier import TwoLayerClassifier

    # %matplotlib inline
    # plt.rcParams['figure.figsize'] = (14.0, 8.0) # set default size of plots

    # %load_ext autoreload
    # %autoreload 2

    # CELL 2
    # Générer des données
    X_, y_ = make_classification(1000,
                                 n_features=2,
                                 n_redundant=0,
                                 n_informative=2,
                                 n_clusters_per_class=1,
                                 n_classes=3,
                                 random_state=6)

    # Centrer et réduire les données (moyenne = 0, écart-type = 1)
    mean = np.mean(X_, axis=0)
    std = np.std(X_, axis=0)
    X_ = (X_ - mean) / std

    # Afficher
    plt.figure(figsize=(8, 6))
    plt.scatter(X_[:, 0], X_[:, 1], c=y_, edgecolors='k', cmap=plt.cm.Paired)
    plt.show(block=False)

    # CELL 3
    num_val = 200
    num_test = 200
    num_train = 600
    np.random.seed(1)
    idx = np.random.permutation(len(X_))

    train_idx = idx[:num_train]
    val_idx = idx[num_train:num_train + num_val]
    test_idx = idx[-num_test:]

    X_train = X_[train_idx]
    y_train = y_[train_idx]
    X_val = X_[val_idx]
    y_val = y_[val_idx]
    X_test = X_[test_idx]
    y_test = y_[test_idx]

    # Afficher
    plt.figure(figsize=(8, 6))
    plt.scatter(X_train[:, 0],
                X_train[:, 1],
                c=y_train,
                edgecolors='k',
                cmap=plt.cm.Paired)
    plt.title('Data train')
    plt.show(block=False)

    plt.figure(figsize=(8, 6))
    plt.scatter(X_val[:, 0],
                X_val[:, 1],
                c=y_val,
                edgecolors='k',
                cmap=plt.cm.Paired)
    plt.title('Data Validation')
    plt.show(block=False)

    plt.figure(figsize=(8, 6))
    plt.scatter(X_test[:, 0],
                X_test[:, 1],
                c=y_test,
                edgecolors='k',
                cmap=plt.cm.Paired)
    plt.title('Data test')
    plt.show(block=False)

    # CELL 4
    accu = utils.test_sklearn_svm(X_train, y_train, X_test, y_test)
    print('Test accuracy: {:.3f}'.format(accu))
    if accu < 0.7:
        print(
            'ERREUR: L\'accuracy est trop faible. Il y a un problème avec les données. Vous pouvez essayer de refaire le mélange (case ci-haut).'
        )

# CELL 5
# En premier, vérifier la prédiction du modèle, la "forward pass"
    # 1. Générer le modèle avec des poids W aléatoires
    model = LinearClassifier(X_train,
                             y_train,
                             X_val,
                             y_val,
                             num_classes=3,
                             bias=True)

    # 2. Appeler la fonction qui calcule l'accuracy et la loss moyenne pour l'ensemble des données d'entraînement
    _, loss = model.global_accuracy_and_cross_entropy_loss(X_train, y_train)

    # 3. Comparer au résultat attendu
    loss_attendu = -np.log(
        1.0 / 3.0)  # résultat aléatoire attendu soit -log(1/nb_classes)
    print('Sortie: {}  Attendu: {}'.format(loss, loss_attendu))
    if abs(loss - loss_attendu) > 0.05:
        print('ERREUR: la sortie de la fonction est incorrecte.')
    else:
        print('SUCCÈS')

# CELL 6
# Vérification: Vous devez pouvoir faire du surapprentissage sur quelques échantillons.
    # Si l'accuracy reste faible, votre implémentation a un bogue.
    n_check = 5
    X_check = X_train[:n_check]
    y_check = y_train[:n_check]
    model = LinearClassifier(X_check,
                             y_check,
                             X_val,
                             y_val,
                             num_classes=3,
                             bias=True)
    loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train(
        num_epochs=10, lr=1.0, l2_reg=0.0)
    accu_train_finale = accu_train_curve[-1]
    print('Accuracy d\'entraînement, devrait être 1.0: {:.3f}'.format(
        accu_train_finale))
    if accu_train_finale < 0.9999:
        print('ATTENTION: L\'accuracy n\'est pas 100%.')
        utils.plot_curves(loss_train_curve, loss_val_curve, accu_train_curve,
                          accu_val_curve)
    else:
        print('SUCCÈS')

# CELL 7
# Prenons encore un petit échantillon et testons différentes valeurs de l2_reg
    n_check = 5
    X_check = X_train[:n_check]
    y_check = y_train[:n_check]
    model = LinearClassifier(X_check,
                             y_check,
                             X_val,
                             y_val,
                             num_classes=3,
                             bias=True)

    for l2_r in np.arange(0, 1, 0.05):
        loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train(
            num_epochs=10, lr=1.0, l2_reg=l2_r)
        print(
            'l2_reg= {:.4f} >> Loss/accuracy d\'entraînement : {:.3f} {:.3f}'.
            format(l2_r, loss_train_curve[-1], accu_train_curve[-1]))

# CELL 8
# On instancie et entraîne notre modèle; cette fois-ci avec les données complètes.
    model = LinearClassifier(X_train,
                             y_train,
                             X_val,
                             y_val,
                             num_classes=3,
                             bias=True)
    loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train(
        lr=0.001, num_epochs=25, l2_reg=0.01)

    # Illustration de la loss et de l'accuracy (le % de biens classés) à chaque itération
    utils.plot_curves(loss_train_curve, loss_val_curve, accu_train_curve,
                      accu_val_curve)

    print('[Training]   Loss: {:.3f}   Accuracy: {:.3f}'.format(
        loss_train_curve[-1], accu_train_curve[-1]))
    print('[Validation] Loss: {:.3f}   Accuracy: {:.3f}'.format(
        loss_val_curve[-1], accu_val_curve[-1]))

    # CELL 9
    lr_choices = [1e-2, 1e-1, 1.0, 10.0]
    reg_choices = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6]
    lr_decay = 0.995  # learning rate is multiplied by this factor after each step

    best_accu = -1
    best_params = None
    best_model = None
    best_curves = None

    for lr, reg in itertools.product(lr_choices, reg_choices):
        params = (lr, reg)
        curves = model.train(num_epochs=25,
                             lr=lr,
                             l2_reg=reg,
                             lr_decay=lr_decay)
        loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = curves

        val_accu = accu_val_curve[-1]
        if val_accu > best_accu:
            print('Best val accuracy: {:.3f} | lr: {:.0e} | l2_reg: {:.0e}'.
                  format(val_accu, lr, reg))
            best_accu = val_accu
            best_params = params
            best_model = model
            best_curves = curves

    model = best_model
    utils.plot_curves(*best_curves)

    # CELL 10
    # On ré-entraîne le modèle avec les meilleurs hyper-paramètres
    lr, reg = best_params
    model.train(num_epochs=25, lr=lr, l2_reg=reg, lr_decay=lr_decay)

    pred = model.predict(X_test)
    accu = (pred == y_test).mean()
    print('Test accuracy: {:.3f}'.format(accu))

    # CELL 11
    h = 0.01  # contrôle la résolution de la grille
    x_min, x_max = X_[:,
                      0].min() - .5, X_[:,
                                        0].max() + .5  # Limites de la grille
    y_min, y_max = X_[:, 1].min() - .5, X_[:, 1].max() + .5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))  # Créer la grille

    X_predict = np.c_[xx.ravel(),
                      yy.ravel()]  # Convertir la grille en une liste de points
    Z = model.predict(X_predict)  # Classifier chaque point de la grille
    Z = Z.reshape(xx.shape)  # Remettre en 2D

    plt.figure(figsize=(14, 8))
    plt.pcolormesh(
        xx, yy, Z,
        cmap=plt.cm.Paired)  # Colorier les cases selon les prédictions

    X_plot, y_plot = X_train, y_train
    X_plot, y_plot = X_train, y_train
    plt.scatter(X_plot[:, 0],
                X_plot[:, 1],
                c=y_plot,
                edgecolors='k',
                cmap=plt.cm.Paired)  # Tracer les données

    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())

    plt.title('Frontières de décision')
    plt.show(block=False)

    # CELL 12
    #Choisissez le type de données que vous voulez

    # NOTE IMPORTANTE: on vous encourage à tester différentes bases de données.  Ceci dit,
    # votre solution sera testée avec Ncircles (N=4).  Vous devez donc tester cette option.
    dataset_type = 'Ncircles'
    if dataset_type == 'moons':
        X_, y_ = sklearn.datasets.make_moons(n_samples=200, noise=0.5)
        num_classes = 2
    elif dataset_type == 'gaussian_quantiles':
        X_, y_ = sklearn.datasets.make_gaussian_quantiles(n_samples=200,
                                                          n_classes=2)
        num_classes = 2
    elif dataset_type == '4blobs':
        d = 4
        c1a = np.random.randn(50, 2)
        c1b = np.random.randn(50, 2) + (d, d)
        c2a = np.random.randn(50, 2) + (0, d)
        c2b = np.random.randn(50, 2) + (d, 0)
        X_ = np.concatenate([c1a, c1b, c2a, c2b], axis=0)
        y_ = np.array([0] * 100 + [1] * 100)
        num_classes = 2
    elif dataset_type == '2circles':
        X_, y_ = sklearn.datasets.make_circles(n_samples=200)
        num_classes = 2
    elif dataset_type == 'Ncircles':
        samples_per_class = 100
        num_classes = 4
        angles = np.linspace(0, 2 * np.pi, samples_per_class)
        radius = 1.0 + np.arange(num_classes) * 0.3
        px = np.cos(angles[:, None]) * radius[None, :]  # (100, 3)
        py = np.sin(angles[:, None]) * radius[None, :]  # (100, 3)
        X_ = np.stack([px, py], axis=-1).reshape(
            (samples_per_class * num_classes, 2))
        X_ += np.random.randn(len(X_[:, 0]), 2) / 8
        y_ = np.array(list(range(num_classes)) * samples_per_class)
    else:
        print('Invalid dataset type')

# CELL 13
    plt.figure()
    plt.scatter(X_[:, 0], X_[:, 1], c=y_, cmap=plt.cm.Paired)
    plt.title('Données complètes')

    plt.show(block=False)

    # CELL 14
    train_proportion = 0.5
    val_proportion = 0.2
    num_train = int(len(X_) * train_proportion)
    num_val = int(len(X_) * val_proportion)

    np.random.seed(0)
    idx = np.random.permutation(len(X_))

    train_idx = idx[:num_train]
    val_idx = idx[num_train:num_train + num_val]
    test_idx = idx[num_train + num_val:]

    X_train = X_[train_idx]
    y_train = y_[train_idx]
    X_val = X_[val_idx]
    y_val = y_[val_idx]
    X_test = X_[test_idx]
    y_test = y_[test_idx]

    # CELL 15
    # Affichons maintenant les données d'entraînement, de validation et de test.
    plt.figure()
    plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=plt.cm.Paired)
    plt.title('Train')
    plt.show(block=False)

    plt.figure()
    plt.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=plt.cm.Paired)
    plt.title('Validation')
    plt.show(block=False)

    plt.figure()
    plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=plt.cm.Paired)
    plt.title('Test')
    plt.show(block=False)

    # CELL 16
    num_hidden_neurons = 10
    model = TwoLayerClassifier(X_train,
                               y_train,
                               X_val,
                               y_val,
                               num_features=2,
                               num_hidden_neurons=num_hidden_neurons,
                               num_classes=num_classes)

    # CELL 17
    # Vérifier que la sortie du réseau initialisé au hasard donne bien une prédiction égale pour chaque classe
    num_hidden_neurons = 10
    model = TwoLayerClassifier(X_train,
                               y_train,
                               X_val,
                               y_val,
                               num_features=2,
                               num_hidden_neurons=num_hidden_neurons,
                               num_classes=num_classes)

    # 2. Appeler la fonction qui calcule l'accuracy et la loss moyenne pour l'ensemble des données d'entraînement
    _, loss = model.global_accuracy_and_cross_entropy_loss(X_train, y_train, 0)

    # 3. Comparer au résultat attendu
    loss_attendu = -np.log(
        1.0 /
        num_classes)  # résultat aléatoire attendu soit -log(1/nb_classes)
    print('Sortie: {}  Attendu: {}'.format(loss, loss_attendu))
    if abs(loss - loss_attendu) > 0.05:
        print('ERREUR: la sortie de la fonction est incorrecte.')
    else:
        print('SUCCÈS')

    # CELL 18
    # Vérifier que le fait d'augmenter la régularisation L2 augmente également la loss
    for l2_r in np.arange(0, 2, 0.1):
        _, loss = model.global_accuracy_and_cross_entropy_loss(
            X_train, y_train, l2_r)
        print(
            'l2_reg= {:.4f} >> Loss/accuracy d\'entraînement : {:.3f}'.format(
                l2_r, loss))

# CELL 19
# Vérification: Vous devez pouvoir faire du surapprentissage sur quelques échantillons.
    # Si l'accuracy reste faible, votre implémentation a un bogue.
    n_check = 5
    X_check = X_train[:n_check]
    y_check = y_train[:n_check]
    model = TwoLayerClassifier(X_check,
                               y_check,
                               X_val,
                               y_val,
                               num_features=2,
                               num_hidden_neurons=num_hidden_neurons,
                               num_classes=num_classes)

    loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train(
        num_epochs=200, lr=0.01, l2_reg=0.0)
    print('Accuracy d\'entraînement, devrait être 1.0: {:.3f}'.format(
        accu_train_curve[-1]))
    if accu_train_curve[-1] < 0.98:
        print('ATTENTION: L\'accuracy n\'est pas 100%.')
        utils.plot_curves(loss_train_curve, loss_val_curve, accu_train_curve,
                          accu_val_curve)
    else:
        print('SUCCÈS')

    # CELL 20
    # Vérifier que le fait d'entraîner avec une régularisation L2 croissante augmente la loss et, éventuellement, diminue l'accuracy
    for l2_r in np.arange(0, 1, 0.1):
        loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train(
            num_epochs=200, lr=0.01, l2_reg=l2_r)
        print(
            'l2_reg= {:.4f} >> Loss/accuracy d\'entraînement : {:.3f} {:.3f}'.
            format(l2_r, loss_train_curve[-1], accu_train_curve[-1]))

# CELL 21
# On instancie notre modèle; cette fois-ci avec les données complètes.
    num_hidden_neurons = 20
    model = TwoLayerClassifier(X_train,
                               y_train,
                               X_val,
                               y_val,
                               num_features=2,
                               num_hidden_neurons=num_hidden_neurons,
                               num_classes=num_classes,
                               activation='relu')

    # CELL 22
    loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train(
        num_epochs=200, lr=1e-2, l2_reg=0.0, momentum=0.5)

    # Illustration de la loss et de l'accuracy (le % de biens classés) à chaque itération
    utils.plot_curves(loss_train_curve, loss_val_curve, accu_train_curve,
                      accu_val_curve)

    print('[Training]   Loss: {:.3f}   Accuracy: {:.3f}'.format(
        loss_train_curve[-1], accu_train_curve[-1]))
    print('[Validation] Loss: {:.3f}   Accuracy: {:.3f}'.format(
        loss_val_curve[-1], accu_val_curve[-1]))

    # CELL 23
    # Find the best hyperparameters lr and l2_reg
    lr_choices = [1e-4, 1e-3, 1e-2]
    reg_choices = [1e-1, 1e-2, 1e-3, 1e-4, 0]
    lr_decay = 1.0  # 0.995  # learning rate is multiplied by this factor after each step

    best_accu = -1
    best_params = None
    best_model = None
    best_curves = None

    for lr, reg in itertools.product(lr_choices, reg_choices):
        params = (lr, reg)
        curves = model.train(num_epochs=50,
                             lr=lr,
                             l2_reg=reg,
                             lr_decay=lr_decay,
                             momentum=0.5)
        loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = curves

        val_accu = accu_val_curve[-1]
        if val_accu > best_accu:
            print('Best val accuracy: {:.3f} | lr: {:.0e} | l2_reg: {:.0e}'.
                  format(val_accu, lr, reg))
            best_accu = val_accu
            best_params = params
            best_model = model
            best_curves = curves
        else:
            print('accuracy: {:.3f} | lr: {:.0e} | l2_reg: {:.0e}'.format(
                val_accu, lr, reg))

    model = best_model
    utils.plot_curves(*best_curves)

    # CELL 24
    # On ré-entraîne le modèle avec les meilleurs hyper-paramètres
    lr, reg = best_params
    print(best_params)
    curves = model.train(num_epochs=200, lr=lr, l2_reg=reg, momentum=0.5)
    loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = curves

    pred = model.predict(X_test)
    accu = (pred == y_test).mean()
    print('Test accuracy: {:.3f}'.format(accu))
    utils.plot_curves(*curves)

    # CELL 25
    # Visualisation des résultats

    h = 0.05  # contrôle la résolution de la grille
    x_min, x_max = X_[:,
                      0].min() - .5, X_[:,
                                        0].max() + .5  # Limites de la grille
    y_min, y_max = X_[:, 1].min() - .5, X_[:, 1].max() + .5
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))  # Créer la grille

    X_predict = np.c_[xx.ravel(),
                      yy.ravel()]  # Convertir la grille en une liste de points
    Z = model.predict(X_predict)  # Classifier chaque point de la grille
    Z = Z.reshape(xx.shape)  # Remettre en 2D

    plt.figure(figsize=(14, 8))
    plt.pcolormesh(
        xx, yy, Z,
        cmap=plt.cm.Paired)  # Colorier les cases selon les prédictions

    X_plot, y_plot = X_, y_
    plt.scatter(X_plot[:, 0],
                X_plot[:, 1],
                c=y_plot,
                edgecolors='k',
                cmap=plt.cm.Paired)  # Tracer les données

    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    plt.xticks(())
    plt.yticks(())

    plt.title('Frontières de décision')
    plt.show()
Esempio n. 4
0
def train_model_1(model,
                  n_epoch,
                  labelsdict,
                  criterion,
                  optimizer,
                  device,
                  trainloader,
                  validloader,
                  train_data,
                  model_name,
                  model_path,
                  model_path_best,
                  loss_graph,
                  accuracy_graph,
                  start_epoch=0,
                  valid_loss=1000):
    """
    Commence training of model
    
    model: model used
    n_epoch: number of epoch used for training
    labelsdict: dictionary containing class names which correnspond to their respective indexes
    optimizer: choice of optimizer use for training
    device: 'cuda' or 'cpu' (speed up training)
    trainloader: input training data split in batches
    validloader: input validation data split in batches
    train_data: input training data
    model_name: name of model indicated
    model_path: path where model checkpoint is saved at every epoch
    model_path_best: path where model yields best training result is saved (lowest val acc)
    loss_graph: path of graph indicating training and validation losses of model is saved
    accuracy_graph: path of graph indicating training and validation accuracies of model is saved
    start_epoch: indicate start epoch.(where start epoch != 0 when model is not trained from scratch but loaded and retrained)
    valid_acc: indicate value of best validation accuracy during point of training
    """
    print(
        f'Training custom CNN Model to distinguish normal and infected lungs')
    print(f'total epochs: {n_epoch}')
    if start_epoch != 0:
        print(f'Retraining model continuing from epoch {start_epoch+1}')
    n_in = next(model.fc2.modules()).in_features
    model.to(device)
    start = time.time()
    epochs = n_epoch
    steps = 0
    running_loss = 0
    running_acc = 0
    print_every = len(trainloader)
    train_loss = []
    val_loss = []
    train_acc = []
    val_acc = []
    val_loss_max = valid_loss
    Singapore = pytz.timezone('Asia/Singapore')
    for e in range(start_epoch, epochs):
        # Make sure training is on
        model.train()
        for images, labels, path in trainloader:  # for each batch
            images, labels = images.to(device), labels.to(device)

            steps += 1

            optimizer.zero_grad()
            output = model.forward(images)
            # getting loss
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            # getting accuracy
            ps = torch.exp(output)
            equality = (labels == ps.max(dim=1)[1])

            running_acc += equality.type(torch.FloatTensor).mean()
            running_loss += loss.item()

            # At the end of every epoch...
            if steps % print_every == 0:
                # Eval mode for predictions
                model.eval()
                # Turn off gradients for validation
                with torch.no_grad():
                    test_loss, accuracy = validation(model, validloader,
                                                     criterion, device)
                # log results at every epoch
                print(
                    "Epoch: {}/{} - ".format(e + 1, epochs),
                    "Time: {} ".format(datetime.now(Singapore)),
                    "Training Loss: {:.3f} - ".format(running_loss /
                                                      len(trainloader)),
                    "Validation Loss: {:.3f} - ".format(test_loss /
                                                        len(validloader)),
                    "Training Accuracy: {:.3f} - ".format(running_acc /
                                                          len(trainloader)),
                    "Validation Accuracy: {:.3f}".format(accuracy /
                                                         len(validloader)))

                # saving results into a list for plotting
                train_loss.append(running_loss / print_every)
                val_loss.append(test_loss / len(validloader))
                train_acc.append(running_acc / len(trainloader))
                val_acc.append(accuracy / len(validloader))

                valid_loss = test_loss / len(validloader)
                # saving checkpoint
                model.n_in = n_in
                model.n_out = len(labelsdict)
                model.labelsdict = labelsdict
                model.optimizer = optimizer
                model.optimizer_state_dict = optimizer.state_dict()
                model.model_name = model_name
                model.loss = criterion
                model.val_loss = valid_loss

                loss_acc = []
                loss_acc.append(train_loss)
                loss_acc.append(val_loss)
                loss_acc.append(train_acc)
                loss_acc.append(val_acc)
                model.loss_acc = loss_acc
                model.start_epoch = start_epoch
                model.epoch = e + 1
                path = model_path
                path_best = model_path_best
                # saving checkpoint model at every epoch
                save_checkpoint(model, path)

                # saving best model during training, best indicated by highest validation accuracy obtained
                if valid_loss <= val_loss_max:
                    print(
                        'Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'
                        .format(val_loss_max, valid_loss))
                    # update threshold
                    val_loss_max = valid_loss
                    save_checkpoint(model, path_best)
                # reset training loss and accuracy after validation, which is used again for subsequent training epoch
                running_loss = 0
                running_acc = 0

    print('model:', model_name, '- epochs:', n_epoch)
    print(f"Run time: {(time.time() - start)/60:.3f} min")

    # plotting the graph on training and validation loss for model
    plot_curves(start_epoch, model.epoch, loss_acc, model_name, loss_graph,
                accuracy_graph)

    return model
Esempio n. 5
0
def main(premise_hidden_size,
         hypo_hidden_size,
         linear_hidden_size,
         interaction_type,
         device,
         kind,
         num_layers=1,
         bidirectional=True,
         kernel_size=3,
         lr=1e-4,
         test=False,
         model_dir='models'):
    valid_types = ('cat', 'element_wise_mult')
    if interaction_type not in valid_types:
        raise ValueError('interaction_type can only be: ', valid_types)

    # data
    batch_size = 32
    save_freq = 500
    max_epochs = 40
    train_loader, val_loader = data.get_loaders(batch_size, test=test)

    # model
    embed_size = 300
    ind2vec = data.get_table_lookup()
    if kind == 'rnn':
        model = models.SNLI_Model(ind2vec,
                                  embed_size,
                                  premise_hidden_size,
                                  hypo_hidden_size,
                                  linear_hidden_size,
                                  interaction_type,
                                  device,
                                  kind='rnn',
                                  num_layers=num_layers,
                                  bidirectional=bidirectional)
    else:
        model = models.SNLI_Model(ind2vec,
                                  embed_size,
                                  premise_hidden_size,
                                  hypo_hidden_size,
                                  linear_hidden_size,
                                  interaction_type,
                                  device,
                                  kind='cnn',
                                  kernel_size=kernel_size)
    model = model.to(device)
    optimizer = torch.optim.Adam(
        [param for param in model.parameters() if param.requires_grad], lr=lr)
    loss_fn = torch.nn.CrossEntropyLoss()

    model_name = f'{kind}_model_{premise_hidden_size}_{interaction_type}'
    model_dir = os.path.join(model_dir, model_name)
    train_helper = train_helpers.TrainHelper(device, model, loss_fn, optimizer,
                                             models.batch_params_key,
                                             model_dir, test)
    train_loss, val_loss, train_acc, val_acc = train_helper.train_loop(
        train_loader, val_loader, max_epochs=max_epochs, save_freq=save_freq)

    if 'cpu' in device:
        os.makedirs('figures', exist_ok=True)
        path = f'figures/{model_name}'
        utils.plot_curves(train_loss, val_loss, train_acc, val_acc, path)

    utils.save_pkl_data(train_loss, 'train_loss.p', data_dir=model_dir)
    utils.save_pkl_data(val_loss, 'val_loss.p', data_dir=model_dir)
    utils.save_pkl_data(train_acc, 'train_acc.p', data_dir=model_dir)
    utils.save_pkl_data(val_acc, 'val_acc.p', data_dir=model_dir)
def main():

    ########################################################################
    ######################## training parameters ###########################
    ########################################################################

    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset',
                        type=str,
                        default='ImageNet',
                        metavar='N',
                        help='dataset to run experiments on')
    parser.add_argument(
        '--batch_size',
        type=int,
        default=256,
        metavar='N',
        help=
        'input batch size for training (default: 256; note that batch_size 64 gives worse performance for imagenet, so don\'t change this. )'
    )
    parser.add_argument('--exp',
                        type=str,
                        default='default',
                        metavar='N',
                        help='name of experiment')
    parser.add_argument('--epochs',
                        type=int,
                        default=30,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.2,
                        metavar='LR',
                        help='learning rate (default: 0.01)')
    parser.add_argument('--weight_decay',
                        type=float,
                        default=5 * 1e-4,
                        help='weight_decay (default: 1e-5)')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.9,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--step_size',
                        type=float,
                        default=10,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.1,
                        metavar='M',
                        help='SGD momentum (default: 0.5)')
    parser.add_argument('--load_model',
                        type=str,
                        default=None,
                        help='model to initialise from')
    parser.add_argument('--caffe',
                        action='store_true',
                        default=False,
                        help='caffe pretrained model')
    parser.add_argument('--test',
                        action='store_true',
                        default=False,
                        help='run in test mode')
    parser.add_argument(
        '--ensemble_inference',
        action='store_true',
        default=True,
        help='run in ensemble inference mode'
    )  # testing is always in ensemble inference mode anyways !
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=5021,
                        metavar='S',
                        help='random seed (default: 5021)')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument(
        '--stopping_criterion',
        type=int,
        default=15,
        metavar='N',
    )
    parser.add_argument(
        '--low_threshold',
        type=int,
        default=0,
        metavar='N',
    )
    parser.add_argument(
        '--high_threshold',
        type=int,
        default=100000,
        metavar='N',
    )
    parser.add_argument(
        '--open_ratio',
        type=int,
        default=1,
        help='ratio of closed_set to open_set data',
    )
    parser.add_argument(
        '--picker',
        type=str,
        default='generalist',
        help=
        'dataloader or model picker - experts | generalist : experts uses manyshot, medianshot, fewshot partitioning; \
                                                                    generalist uses the generalist model',
    )
    parser.add_argument(
        '--num_learnable',
        type=int,
        default='-1',
        help=
        'number of learnable layers : -1 ( all ) | 1 ( only classifier ) | 2 ( classifier and last fc ) | 3 - 6 ( classifier, fc + $ind - 2$ resnet super-blocks ) '
    )
    parser.add_argument('--scheduler',
                        type=str,
                        default='stepLR',
                        help=' stepLR | cosine lr scheduler')
    parser.add_argument('--max_epochs',
                        type=int,
                        default=None,
                        help='max number of epochs, for cosine lr scheduler')

    args = parser.parse_args()

    print("\n==================Options=================")
    pprint(vars(args), indent=4)
    print("==========================================\n")

    use_cuda = not args.no_cuda and torch.cuda.is_available()

    # make everything deterministic
    if (args.seed is not None):
        print('Seeding everything with seed {}.'.format(args.seed))
        seed_everything(args.seed)
    else:
        print('Note : Seed is random.')

    device = torch.device("cuda" if use_cuda else "cpu")

    exp_dir = os.path.join('checkpoint', args.exp)
    if not os.path.isdir(exp_dir):
        os.makedirs(exp_dir)

    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # batch size settings : note that these are important for memory and performance reasons
    if (args.dataset.lower() == 'imagenet' and args.test):
        args.batch_size = 64
    elif (args.dataset.lower() == 'imagenet' and not (args.test)):
        args.batch_size = 256
    elif (args.dataset.lower() == 'places' and not (args.test)):
        args.batch_size = 32
    elif (args.dataset.lower() == 'places' and args.test):
        args.batch_size = 8

    ########################################################################
    ######################## load data and pre-trained models ##############
    ########################################################################

    print('Loading train loader.')
    train_loader = torch.utils.data.DataLoader(Threshold_Dataset(
        root=data_root[args.dataset],
        orig_txt='./data/{}_LT/{}_LT_train.txt'.format(args.dataset,
                                                       args.dataset),
        txt='./data/{}_LT/{}_LT_train.txt'.format(args.dataset, args.dataset),
        low_threshold=args.low_threshold,
        high_threshold=args.high_threshold,
        open_ratio=args.open_ratio,
        transform=data_transforms['train'],
        picker=args.picker),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               **kwargs)
    print('Loading val loader.')
    val_loader = torch.utils.data.DataLoader(Threshold_Dataset(
        root=data_root[args.dataset],
        orig_txt='./data/{}_LT/{}_LT_train.txt'.format(args.dataset,
                                                       args.dataset),
        txt='./data/{}_LT/{}_LT_val.txt'.format(args.dataset, args.dataset),
        low_threshold=args.low_threshold,
        high_threshold=args.high_threshold,
        open_ratio=1,
        transform=data_transforms['val'],
        picker=args.picker),
                                             batch_size=args.batch_size,
                                             shuffle=True,
                                             **kwargs)

    num_classes = train_loader.dataset.num_classes + 1 - int(
        args.picker == 'generalist'
    )  # add 1 for the open/dustbin class if not generalist model
    if (args.dataset.lower() == 'imagenet'):
        feature_extractor = create_model_resnet10().to(
            device)  # use this for imagenet
        args.lr = 1e-1
    else:
        feature_extractor = create_model_resnet152(caffe=True).to(
            device
        )  # use this for places. pass caffe=true to load pretrained imagenet model
        args.lr = 1e-2

    print('Learning rate : {:.4f}'.format(args.lr))
    classifier = DotProduct_Classifier(num_classes=num_classes,
                                       feat_dim=512).to(device)
    optimizer = torch.optim.SGD(chain(feature_extractor.parameters(),
                                      classifier.parameters()),
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    if (args.scheduler == 'stepLR'):
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=args.step_size,
                                              gamma=args.gamma)
        print('Using StepLR scheduler with params, stepsize : {}, gamma : {}'.
              format(args.step_size, args.gamma))
    elif (args.scheduler == 'cosine'):
        scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                         T_max=args.max_epochs)
        print(
            'Using CosineAnnealingLR scheduler with params, T_max : {}'.format(
                args.max_epochs))
    else:
        raise Exception('Invalid scheduler argument.')

    # load pretrained model
    if (args.load_model is not None):

        if (not args.caffe):

            pretrained_model = torch.load(args.load_model)

            weights_feat = pretrained_model['state_dict_best']['feat_model']
            weights_feat = {
                k: weights_feat['module.' + k] if 'module.' +
                k in weights_feat else weights_feat[k]
                for k in feature_extractor.state_dict()
            }
            feature_extractor.load_state_dict(
                weights_feat)  # loading feature extractor weights

            weights_class = pretrained_model['state_dict_best']['classifier']
            weights_class = {
                k: weights_class['module.' + k] if 'module.' +
                k in weights_class else weights_class[k]
                for k in classifier.state_dict()
            }

            if (classifier.state_dict()['fc.weight'].shape ==
                    weights_class['fc.weight'].shape):
                classifier.load_state_dict(
                    weights_class
                )  # loading classifier weights if classifiers match
            else:
                print(
                    'Classifiers of pretrained model and current model are different with dimensions : ',
                    classifier.state_dict()['fc.weight'].shape,
                    weights_class['fc.weight'].shape)

            print(
                'Loaded pretrained model on entire dataset from epoch : {:d} with acc : {:.4f}'
                .format(pretrained_model['best_epoch'],
                        pretrained_model['best_acc']))
        else:

            weights_feat = torch.load(args.load_model)
            weights_feat = {
                k: weights_feat[k]
                if k in weights_feat else feature_extractor.state_dict()[k]
                for k in feature_extractor.state_dict()
            }
            feature_extractor.load_state_dict(
                weights_feat)  # loading feature extractor weights
            print('Loaded imagenet pretrained model from Caffe.')

    ########################################################################
    ######################## set learnable layers ##########################
    ########################################################################

    if (args.num_learnable == -1):
        print('Learning feature extractor and classifier.')

    elif (args.num_learnable >= 1 and args.num_learnable <= 6):

        if (args.num_learnable == 1):

            set_weights('feature_extractor', feature_extractor, False)
            set_weights('classifier', classifier, True)

        elif (args.num_learnable == 2):

            print('Setting feature extractor weights.')
            for ind, (name,
                      layer) in enumerate(feature_extractor.named_children()):
                if (ind == 9):
                    set_weights(name, layer, True)
                else:
                    set_weights(name, layer, False)
            set_weights('classifier', classifier, True)

        else:

            print('Setting feature extractor weights.')
            for ind, (name,
                      layer) in enumerate(feature_extractor.named_children()):
                if (ind >= 10 - args.num_learnable):
                    set_weights(name, layer, True)
                else:
                    set_weights(name, layer, False)
            set_weights('classifier', classifier, True)

    else:
        raise Exception('Invalid num_learnable layers : {}'.format(
            args.num_learnable))

    ########################################################################
    ######################## training with early stopping ##################
    ########################################################################
    if (not args.test):

        results = vars(args)
        results['train_losses'] = []
        results['train_accuracies'] = []
        results['test_losses'] = []
        results['test_accuracies'] = []
        best_acc, best_epoch = -0.1, 0

        epoch = 1
        while (True):

            sys.stdout.flush()
            train_loss, train_err = train(args, feature_extractor, classifier,
                                          device, train_loader, optimizer,
                                          scheduler, epoch)
            test_loss, test_err = test(args, feature_extractor, classifier,
                                       device, val_loader)

            results['train_losses'].append(train_loss)
            results['test_losses'].append(test_loss)
            results['train_accuracies'].append(train_err)
            results['test_accuracies'].append(test_err)

            if (test_err > best_acc):
                best_acc = test_err
                best_epoch = epoch
                results['best_acc'], results[
                    'best_epoch'] = best_acc, best_epoch

                # save best model
                best_model_weights = {}
                best_model_weights['feat_model'] = copy.deepcopy(
                    feature_extractor.state_dict())
                best_model_weights['classifier'] = copy.deepcopy(
                    classifier.state_dict())
                model_states = {
                    'epoch': epoch,
                    'best_epoch': best_epoch,
                    'state_dict_best': best_model_weights,
                    'best_acc': best_acc,
                }
                torch.save(model_states, os.path.join(exp_dir,
                                                      "best_model.pt"))

            elif (epoch > best_epoch + args.stopping_criterion):
                print('Best model obtained. Error : ', best_acc)
                plot_curves(results, exp_dir)
                break

            elif (args.scheduler == 'cosine' and epoch == args.max_epochs):
                print('Best model obtained. Error : ', best_acc)
                plot_curves(results, exp_dir)
                break

            savepath = os.path.join(exp_dir, 'results.pickle')
            with open(savepath, 'wb') as f:
                pickle.dump(results, f)
            plot_curves(results, exp_dir)
            epoch = epoch + 1
Esempio n. 7
0
import argparse
from dataloader import load_data
from utils import plot_curves
from classifiers.LinearSVM import LinearSVM
from classifiers.RbfSVM import RbfSVM

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Choose model")
    parser.add_argument("--type",
                        type=str,
                        default="Kernel",
                        help="Linear/Kernel")
    parser.add_argument("--norm", type=int, default=1, help="Normalize or not")

    args = parser.parse_args()
    data = load_data(args.norm)

    if args.type == 'Linear':
        svm = LinearSVM(data)
    else:
        svm = RbfSVM(data)
    loss_train_list, loss_test, acc_test = svm.run()
    plot_curves(loss_train_list)
Esempio n. 8
0
def run(split_file, pose_data_root, configs, save_model_to=None):
    epochs = configs.max_epochs
    log_interval = configs.log_interval
    num_samples = configs.num_samples
    hidden_size = configs.hidden_size
    drop_p = configs.drop_p
    num_stages = configs.num_stages

    # setup dataset
    train_dataset = Sign_Dataset(index_file_path=split_file,
                                 split=['train', 'val'],
                                 pose_root=pose_data_root,
                                 img_transforms=None,
                                 video_transforms=None,
                                 num_samples=num_samples)

    train_data_loader = torch.utils.data.DataLoader(
        dataset=train_dataset, batch_size=configs.batch_size, shuffle=True)

    val_dataset = Sign_Dataset(index_file_path=split_file,
                               split='test',
                               pose_root=pose_data_root,
                               img_transforms=None,
                               video_transforms=None,
                               num_samples=num_samples,
                               sample_strategy='k_copies')
    val_data_loader = torch.utils.data.DataLoader(
        dataset=val_dataset, batch_size=configs.batch_size, shuffle=True)

    logging.info('\n'.join(
        ['Class labels are: '] +
        [(str(i) + ' - ' + label)
         for i, label in enumerate(train_dataset.label_encoder.classes_)]))

    # setup the model
    model = GCN_muti_att(input_feature=num_samples * 2,
                         hidden_feature=num_samples * 2,
                         num_class=len(train_dataset.label_encoder.classes_),
                         p_dropout=drop_p,
                         num_stage=num_stages).cuda()

    # setup training parameters, learning rate, optimizer, scheduler
    lr = configs.init_lr
    # optimizer = optim.SGD(vgg_gru.parameters(), lr=lr, momentum=0.00001)
    optimizer = optim.Adam(model.parameters(),
                           lr=lr,
                           eps=configs.adam_eps,
                           weight_decay=configs.adam_weight_decay)

    # record training process
    epoch_train_losses = []
    epoch_train_scores = []
    epoch_val_losses = []
    epoch_val_scores = []

    best_test_acc = 0
    # start training
    for epoch in range(int(epochs)):
        # train, test model

        print('start training.')
        train_losses, train_scores, train_gts, train_preds = train(
            log_interval, model, train_data_loader, optimizer, epoch)
        print('start testing.')
        val_loss, val_score, val_gts, val_preds, incorrect_samples = validation(
            model, val_data_loader, epoch, save_to=save_model_to)
        # print('start testing.')
        # val_loss, val_score, val_gts, val_preds, incorrect_samples = validation(model,
        #                                                                         val_data_loader, epoch,
        #                                                                         save_to=save_model_to)

        logging.info(
            '========================\nEpoch: {} Average loss: {:.4f}'.format(
                epoch, val_loss))
        logging.info('Top-1 acc: {:.4f}'.format(100 * val_score[0]))
        logging.info('Top-3 acc: {:.4f}'.format(100 * val_score[1]))
        logging.info('Top-5 acc: {:.4f}'.format(100 * val_score[2]))
        logging.info('Top-10 acc: {:.4f}'.format(100 * val_score[3]))
        logging.info('Top-30 acc: {:.4f}'.format(100 * val_score[4]))
        logging.debug('mislabelled val. instances: ' + str(incorrect_samples))

        # save results
        epoch_train_losses.append(train_losses)
        epoch_train_scores.append(train_scores)
        epoch_val_losses.append(val_loss)
        epoch_val_scores.append(val_score[0])

        # save all train test results
        np.save('output/epoch_training_losses.npy',
                np.array(epoch_train_losses))
        np.save('output/epoch_training_scores.npy',
                np.array(epoch_train_scores))
        np.save('output/epoch_test_loss.npy', np.array(epoch_val_losses))
        np.save('output/epoch_test_score.npy', np.array(epoch_val_scores))

        if val_score[0] > best_test_acc:
            best_test_acc = val_score[0]
            best_epoch_num = epoch

            torch.save(
                model.state_dict(),
                os.path.join(
                    'checkpoints', subset,
                    'gcn_epoch={}_val_acc={}.pth'.format(
                        best_epoch_num, best_test_acc)))

    utils.plot_curves()

    class_names = train_dataset.label_encoder.classes_
    utils.plot_confusion_matrix(train_gts,
                                train_preds,
                                classes=class_names,
                                normalize=False,
                                save_to='output/train-conf-mat')
    utils.plot_confusion_matrix(val_gts,
                                val_preds,
                                classes=class_names,
                                normalize=False,
                                save_to='output/val-conf-mat')
Esempio n. 9
0
def main(args):
    config = cfg.get_default()
    cfg.set_params(config, args.config_path, args.set)
    cfg.freeze(config, True)
    print('- Configuration:')
    print(config)

    if config.dataset == 'FluidIceShake':
        n_groups = 2
        n_particles = 348
    elif config.dataset == 'RigidFall':
        n_groups = 3
        n_particles = 192
    elif config.dataset == 'MassRope':
        n_groups = 2
        n_particles = 95
    else:
        raise ValueError('Unsupported environment')

    train_loader = get_dataloader(config, 'train')
    valid_loader = get_dataloader(config, 'valid')

    # build model
    model = PointSetNet(
        config.n_frames,
        config.pred_hidden,
        n_particles,
        n_groups,
        config.batchnorm,
        single_out=config.single_out,
        recur_pred=config.recur_pred,
        use_temp_encoder=config.use_temp_encoder,
        conv_temp_encoder=config.conv_temp_encoder,
        temp_embedding_size=config.temp_embedding_size).to(_DEVICE)

    print('- Model architecture:')
    print(model)

    if config.load_path != '':
        print('- Loading model from {}'.format(config.load_path))
        model.load_state_dict(torch.load(config.load_path))

    # build optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)

    if not config.debug:
        print('- Training start')
        stats = {
            'epoch': [],
            'valid_losses': [],
            'train_losses': [],
            'train_pos_losses': [],
            'train_group_losses': [],
            'valid_pos_losses': [],
            'valid_group_losses': [],
        }
        best_valid_loss = np.Inf

        for epoch in range(config.n_epochs):

            # training
            print('- Training epoch {:d}'.format(epoch))
            epoch_train_losses = []
            epoch_train_pos_losses = []
            epoch_train_grp_losses = []

            pbar = tqdm(train_loader)
            did_vis = False
            for images, positions, groups in pbar:
                (model, optimizer, train_loss,
                 train_pos_loss, train_grp_loss) = step(
                    config, model, optimizer, images, positions, groups, True)
                epoch_train_losses.append(train_loss)
                epoch_train_pos_losses.append(train_pos_loss)
                epoch_train_grp_losses.append(train_grp_loss)
                pbar.set_description('Train loss {:f}'.format(train_loss))

                # visualize training results
                if not did_vis \
                        and config.vis and (epoch + 1) % config.vis_every == 0:
                    pbar.set_description('Generating video')
                    visualize(config, model, epoch, n_particles,
                              images, positions, groups, True)
                    did_vis = True

            train_loss = np.average(epoch_train_losses)
            train_pos_loss = np.average(epoch_train_pos_losses)
            train_grp_loss = np.average(epoch_train_grp_losses)

            print(('- Finish training epoch {:d}, training loss {:f},'
                   ' pos loss {:f}, group loss {:f}').format(
                epoch, train_loss, train_pos_loss, train_grp_loss))

            # validation
            print('- Evaluating epoch {:d}'.format(epoch))
            epoch_valid_losses = []
            epoch_valid_pos_losses = []
            epoch_valid_grp_losses = []

            pbar = tqdm(valid_loader)
            did_vis = False
            for images, positions, groups in pbar:
                with torch.no_grad():
                    (model, _, valid_loss,
                     valid_pos_loss, valid_grp_loss) = step(
                        config, model, optimizer,
                        images, positions, groups, False)
                epoch_valid_losses.append(valid_loss)
                epoch_valid_pos_losses.append(valid_pos_loss)
                epoch_valid_grp_losses.append(valid_grp_loss)
                pbar.set_description('Valid loss {:f}'.format(valid_loss))

                # visualize validation results
                if not did_vis \
                        and config.vis and (epoch + 1) % config.vis_every == 0:
                    pbar.set_description('Generating video')
                    visualize(config, model, epoch, n_particles,
                              images, positions, groups, False)
                    did_vis = True

            valid_loss = np.average(epoch_valid_losses)
            valid_pos_loss = np.average(epoch_valid_pos_losses)
            valid_grp_loss = np.average(epoch_valid_grp_losses)

            print('- Finish eval epoch {:d}, validation loss {:f}'.format(
                epoch, valid_loss))
            if valid_loss < best_valid_loss:
                print('- Best model')
                best_valid_loss = valid_loss
                torch.save(model.state_dict(),
                           os.path.join(config.run_dir, 'checkpoint_best.pth'))
            torch.save(model.state_dict(),
                       os.path.join(config.run_dir, 'checkpoint_latest.pth'))
            print()

            stats['epoch'].append(epoch)
            stats['train_losses'].append(train_loss)
            stats['valid_losses'].append(valid_loss)
            stats['train_pos_losses'].append(train_pos_loss)
            stats['train_group_losses'].append(train_grp_loss)
            stats['valid_pos_losses'].append(valid_pos_loss)
            stats['valid_group_losses'].append(valid_grp_loss)
            with open(os.path.join(config.run_dir, 'stats.json'), 'w') as fout:
                json.dump(stats, fout)

            # Plot loss curves
            plot_dir = os.path.join(config.run_dir, 'curves')
            if not os.path.isdir(plot_dir):
                os.makedirs(plot_dir)
            utils.plot_curves(
                x=stats['epoch'],
                ys=[stats['train_losses'], stats['valid_losses']],
                save_path=os.path.join(plot_dir, 'loss.png'),
                curve_labels=['train', 'valid'],
                x_label='epoch',
                y_label='total_loss',
                title='Total loss')
            utils.plot_curves(
                x=stats['epoch'],
                ys=[stats['train_pos_losses'], stats['valid_pos_losses']],
                save_path=os.path.join(plot_dir, 'loss_pos.png'),
                curve_labels=['train', 'valid'],
                x_label='epoch',
                y_label='pos_loss',
                title='Position loss')
            utils.plot_curves(
                x=stats['epoch'],
                ys=[stats['train_group_losses'], stats['valid_group_losses']],
                save_path=os.path.join(plot_dir, 'loss_grp.png'),
                curve_labels=['train', 'valid'],
                x_label='epoch',
                y_label='grp_loss',
                title='Grouping loss')

    else:  # Debug on a single batch
        images, positions, groups = next(iter(train_loader))
        images = images[:5, :15, ...]
        positions = positions[:5, :15, ...]
        groups = groups[:5, ...]
        for epoch in range(config.n_epochs):
            (model, optimizer, train_loss,
             train_pos_loss, train_grp_loss) = step(
                config, model, optimizer, images, positions, groups, True)
            print(train_loss, train_pos_loss, train_grp_loss)
Esempio n. 10
0
train_f1s = []
test_losses = []
test_accs = []
test_f1s = []
print("Training...")
for epoch in range(1, epochs + 1):
    train_loss, train_acc, train_f1, test_loss, test_acc, test_f1 = train(
        model, device, ld_train, ld_test, optimizer, epoch, weights)
    train_losses.append(train_loss)
    train_accs.append(train_acc)
    train_f1s.append(train_f1)
    test_losses.append(test_loss)
    test_accs.append(test_acc)
    test_f1s.append(test_f1)

    scheduler.step()

    # save best checkpoint based on test F1 score
    if epoch == 1:
        best_test_score = test_f1

    if save_dir:
        if test_f1 >= best_test_score:
            torch.save(model.state_dict(), save_dir)
            best_test_score = test_f1

# plot learning curves
plot_curves(train_losses, test_losses, "Loss curves")
plot_curves(train_accs, test_accs, "Accuracy curves")
plot_curves(train_f1s, test_f1s, "F1 curves")
Esempio n. 11
0
print("Calculating Initial Loss for the Fine Tuning")

val_curve = [
    utils.test(dnet, cost, trainloader, validationloader, **training_params)
]

print("Fine Tuning the NN")
for epoch in range(1, args.epochs + 1):
    utils.train(dnet, cost, optimizer, trainloader, **training_params)
    losses = utils.test(dnet, cost, trainloader, validationloader,
                        **training_params)
    val_curve.append(losses)
print("Done")

autoencoder_fig = utils.plot_curves(val_curveAE,
                                    title='Autoencoder Loss Curves')
fine_tune_fig = utils.plot_curves(val_curve, title='Fine Tuning Loss Curves')

dnet.eval()
dnet.cpu()
if args.name:
    fine_tune_fig.savefig("{}_val_curve_fine_tune.png".format(args.name))
    autoencoder_fig.savefig("{}_val_curveAE.png".format(args.name))
    trainset.save_scaler("{}_standardizer.npz".format(args.name))
    th.save(dnet.state_dict(), "{}_fine_tuned_net.pth".format(args.name))
else:
    fine_tune_fig.savefig("{}_val_curve_fine_tune.png".format(args.name))
    autoencoder_fig.savefig("{}_val_curveAE.png".format(args.name))
    trainset.save_scaler("data_standardizer.npz")
    th.save(dnet.state_dict(), "{}_fine_tuned_net.pth".format(args.name))
Esempio n. 12
0
    )
    print(
        f'Val loss: {np.round(valid_loss,6)} \t Val acc: {np.round(valid_acc,4)} \t Val acc pp: {np.round(valid_acc_pp,4)}\n'
    )

bvl = np.round(best_valid_loss, 6)
bvl_acc = np.round(val_accuracies[val_losses.index(best_valid_loss)], 4)
if params.post_process:
    bvl_acc_pp = np.round(val_accuracies_pp[val_losses.index(best_valid_loss)],
                          4)
bacc = np.round(np.max(val_accuracies), 4)
print(
    f'End of training: best val loss = {bvl} | associated val_acc = {bvl_acc}, val_acc_pp = {bvl_acc_pp} | best val acc = {bacc}\n'
)

plot_curves(train_losses, train_accuracies, val_losses, val_accuracies, params)
if params.post_process:
    plot_curves(train_losses, train_accuracies_pp, val_losses,
                val_accuracies_pp, params)

### TESTING

params = open_config_file(args.config)
print('Beginning testing...')
test_loader = torch.utils.data.DataLoader(test_dataset,
                                          batch_size=1,
                                          shuffle=False)
# load best weights from training (based on params.tracking value)
best_model = create_model(params)
best_model.load_state_dict(torch.load('best_model.pt'))
best_model = best_model.cuda()
Esempio n. 13
0
def main():
    train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = run()

    plot_curves(train_loss_history, train_acc_history, valid_loss_history, valid_acc_history)