def main(): ######################################################################## ######################## training parameters ########################### ######################################################################## parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, default='ImageNet', metavar='N', help='dataset to run experiments on') parser.add_argument( '--batch_size', type=int, default=256, metavar='N', help= 'input batch size for training (default: 256; note that batch_size 64 gives worse performance for imagenet, so don\'t change this. )' ) parser.add_argument('--exp', type=str, default='default', metavar='N', help='name of experiment') parser.add_argument('--logits_exp', type=str, default='default', metavar='N', help='name of experiment containing logits') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=5021, metavar='S', help='random seed (default: 5021)') parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--weight_decay', type=float, default=5 * 1e-4, help='weight_decay (default: 1e-5)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--step_size', type=float, default=30, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--gamma', type=float, default=0.1, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument( '--stopping_criterion', type=int, default=30, metavar='N', ) parser.add_argument('--test', action='store_true', default=False, help='test mode') parser.add_argument('--load_model', type=str, default=None, help='model to initialise from') args = parser.parse_args() print("\n==================Options=================") pprint(vars(args), indent=4) print("==========================================\n") use_cuda = not args.no_cuda and torch.cuda.is_available() # make everything deterministic, reproducible if (args.seed is not None): print('Seeding everything with seed {}.'.format(args.seed)) seed_everything(args.seed) else: print('Note : Seed is random.') device = torch.device("cuda" if use_cuda else "cpu") exp_dir = os.path.join('checkpoint', args.exp) if not os.path.isdir(exp_dir): os.makedirs(exp_dir) kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} dataset = args.dataset num_classes = 1000 if dataset.lower() == 'imagenet' else 365 ######################################################################## ######################## load data #################### ######################################################################## datadir = './checkpoint/{}'.format(args.logits_exp) if (not args.test): data_manyshot = torch.load('{}/results_val_manyshot.pickle'.format( datadir)) # for experts with reject option data_mediumshot = torch.load('{}/results_val_mediumshot.pickle'.format( datadir)) # for experts with reject option data_fewshot = torch.load('{}/results_val_fewshot.pickle'.format( datadir)) # for experts with reject option else: data_manyshot = torch.load( '{}/results_test_aligned_manyshot.pickle'.format( datadir)) # for experts with reject option data_mediumshot = torch.load( '{}/results_test_aligned_mediumshot.pickle'.format( datadir)) # for experts with reject option data_fewshot = torch.load( '{}/results_test_aligned_fewshot.pickle'.format( datadir)) # for experts with reject option data_general = torch.load( '{}/results_test_aligned_general.pickle'.format(dataset.lower())) manyshot_logits = data_manyshot['logits'].clone().detach() mediumshot_logits = data_mediumshot['logits'].clone().detach() fewshot_logits = data_fewshot['logits'].clone().detach() labels = data_manyshot['labels'] if not args.test else data_general[ 'labels'] manyshotClassMask, mediumshotClassMask, fewshotClassMask = data_manyshot[ 'class_mask'], data_mediumshot['class_mask'], data_fewshot[ 'class_mask'] # logit tuning to correct for open set sampling ratio if (dataset.lower() == 'imagenet'): manyshot_logits[:, -1] = manyshot_logits[:, -1] - np.log(2 / (1 + 16)) mediumshot_logits[:, -1] = mediumshot_logits[:, -1] - np.log(2 / (1 + 16)) fewshot_logits[:, -1] = fewshot_logits[:, -1] - np.log(2 / (1 + 16)) else: manyshot_logits[:, -1] = manyshot_logits[:, -1] - np.log(2 / (1 + 16)) mediumshot_logits[:, -1] = mediumshot_logits[:, -1] - np.log(2 / (1 + 8)) fewshot_logits[:, -1] = fewshot_logits[:, -1] - np.log(2 / (1 + 8)) manyshot_features = manyshot_logits.data.cpu().numpy() mediumshot_features = mediumshot_logits.data.cpu().numpy() fewshot_features = fewshot_logits.data.cpu().numpy() labels = labels.data.cpu().numpy() if (not args.test): train_loader = torch.utils.data.DataLoader(Calibration_Dataset( orig_txt='./data/{}_LT/{}_LT_train.txt'.format( args.dataset, args.dataset), manyshot_features=manyshot_features, mediumshot_features=mediumshot_features, fewshot_features=fewshot_features, labels=labels), batch_size=args.batch_size, shuffle=True, **kwargs) else: test_loader = torch.utils.data.DataLoader( Calibration_Dataset(orig_txt='./data/{}_LT/{}_LT_train.txt'.format( args.dataset, args.dataset), manyshot_features=manyshot_features, mediumshot_features=mediumshot_features, fewshot_features=fewshot_features, labels=labels), batch_size=args.batch_size, shuffle=False, **kwargs) # dont shuffle test set as usual ######################################################################## ######################## initialise model and optimizer ################ ######################################################################## model = CalibrateExperts(args.dataset.lower(), manyshotClassMask, mediumshotClassMask, fewshotClassMask).cuda() optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) print( 'Using StepLR scheduler with params, stepsize : {}, gamma : {}'.format( args.step_size, args.gamma)) if (args.test): pretrained_model = torch.load(args.load_model) weights = pretrained_model['state_dict_best']['model'] weights = { k: weights['module.' + k] if 'module.' + k in weights else weights[k] for k in model.state_dict() } model.load_state_dict(weights) # loading model weights print('Loaded pretrained model.') ######################################################################## ######################## training with early stopping ################## ######################################################################## if (not args.test): results = vars(args) results['train_losses'], results['train_accuracies'] = [], [] best_acc, best_epoch = 0, 0 epoch = 1 while (True): sys.stdout.flush() train_loss, train_acc = train(args, model, device, train_loader, optimizer, scheduler, epoch) results['train_losses'].append(train_loss) results['train_accuracies'].append(train_acc) if (train_acc > best_acc): best_acc = train_acc best_epoch = epoch results['best_acc'], results[ 'best_epoch'] = best_acc, best_epoch # save best model best_model_weights = {} best_model_weights['model'] = copy.deepcopy(model.state_dict()) model_states = { 'epoch': epoch, 'best_epoch': best_epoch, 'state_dict_best': best_model_weights, 'best_acc': best_acc, } torch.save(model_states, os.path.join(exp_dir, "best_model.pt")) elif (epoch > best_epoch + args.stopping_criterion): print('Best model obtained. Error : ', best_acc) plot_curves(results, exp_dir) # plot break savepath = os.path.join(exp_dir, 'results.pickle') with open(savepath, 'wb') as f: pickle.dump(results, f) plot_curves(results, exp_dir) # plot epoch = epoch + 1 ######################################################################## ######################## testing ######################## ######################################################################## else: loss, acc, preds = test(args, model, device, test_loader) if (dataset == 'ImageNet'): split_ranges = { 'manyshot': [0, 19550], 'medianshot': [19550, 43200], 'fewshot': [43200, 50000], 'all': [0, 50000] } # imagenet else: split_ranges = { 'manyshot': [0, 13200], 'medianshot': [13200, 29400], 'fewshot': [29400, 36500], 'all': [0, 36500] } # places for split_name, split_range in split_ranges.items(): gt_target = torch.from_numpy( labels[int(split_range[0]):int(split_range[1])]).cuda() split_preds = preds[int(split_range[0]):int(split_range[1])] correct = split_preds.eq( gt_target.view_as(split_preds)).sum().item() accuracy = 100 * (correct / (split_range[1] - split_range[0])) print('{} accuracy : {:.2f}'.format(split_name, accuracy))
def evaluate(model, load_path, plot): with open(load_path + 'trained_params_best.npz') as f: loaded = np.load(f) blocks_model = Model(model.cost) params_dicts = blocks_model.get_parameter_dict() params_names = params_dicts.keys() for param_name in params_names: param = params_dicts[param_name] # '/f_6_.W' --> 'f_6_.W' slash_index = param_name.find('/') param_name = param_name[slash_index + 1:] assert param.get_value().shape == loaded[param_name].shape param.set_value(loaded[param_name]) if plot: train_data_stream, valid_data_stream = get_streams(20) # T x B x F data = train_data_stream.get_epoch_iterator().next() cg = ComputationGraph(model.cost) f = theano.function(cg.inputs, [model.location, model.scale], on_unused_input='ignore', allow_input_downcast=True) res = f(data[1], data[0]) for i in range(10): visualize_attention(data[0][:, i, :], res[0][:, i, :], res[1][:, i, :], image_shape=(512, 512), prefix=str(i)) plot_curves(path=load_path, to_be_plotted=['train_categoricalcrossentropy_apply_cost', 'valid_categoricalcrossentropy_apply_cost'], yaxis='Cross Entropy', titles=['train', 'valid'], main_title='CE') plot_curves(path=load_path, to_be_plotted=['train_learning_rate', 'train_learning_rate'], yaxis='lr', titles=['train', 'train'], main_title='lr') plot_curves(path=load_path, to_be_plotted=['train_total_gradient_norm', 'valid_total_gradient_norm'], yaxis='GradientNorm', titles=['train', 'valid'], main_title='GradientNorm') for grad in ['_total_gradient_norm', '_total_gradient_norm', '_/lstmattention.W_patch_grad_norm', '_/lstmattention.W_state_grad_norm', '_/lstmattention.initial_cells_grad_norm', '_/lstmattention.initial_location_grad_norm', '_/lstmattention/lstmattention_mlp/linear_0.W_grad_norm', '_/lstmattention/lstmattention_mlp/linear_1.W_grad_norm', '_/mlp/linear_0.W_grad_norm', '_/mlp/linear_1.W_grad_norm']: plot_curves(path=load_path, to_be_plotted=['train' + grad, 'valid' + grad], yaxis='GradientNorm', titles=['train', 'valid'], main_title=grad.replace( "_", "").replace("/", "").replace(".", "")) plot_curves(path=load_path, to_be_plotted=[ 'train_misclassificationrate_apply_error_rate', 'valid_misclassificationrate_apply_error_rate'], yaxis='Error rate', titles=['train', 'valid'], main_title='Error') print 'plot printed'
def main(): # # CELL 1 # ''' # Imporation des bibliothèques python générales # ''' # import numpy as np # import matplotlib.pyplot as plt # import itertools # from sklearn.datasets import make_classification # ''' # Imporation des bibliothèques spécifiques au devoir # ''' # import utils # from linear_classifier import LinearClassifier # from two_layer_classifier import TwoLayerClassifier # %matplotlib inline # plt.rcParams['figure.figsize'] = (14.0, 8.0) # set default size of plots # %load_ext autoreload # %autoreload 2 # CELL 2 # Générer des données X_, y_ = make_classification(1000, n_features=2, n_redundant=0, n_informative=2, n_clusters_per_class=1, n_classes=3, random_state=6) # Centrer et réduire les données (moyenne = 0, écart-type = 1) mean = np.mean(X_, axis=0) std = np.std(X_, axis=0) X_ = (X_ - mean) / std # Afficher plt.figure(figsize=(8, 6)) plt.scatter(X_[:, 0], X_[:, 1], c=y_, edgecolors='k', cmap=plt.cm.Paired) plt.show(block=False) # CELL 3 num_val = 200 num_test = 200 num_train = 600 np.random.seed(1) idx = np.random.permutation(len(X_)) train_idx = idx[:num_train] val_idx = idx[num_train:num_train + num_val] test_idx = idx[-num_test:] X_train = X_[train_idx] y_train = y_[train_idx] X_val = X_[val_idx] y_val = y_[val_idx] X_test = X_[test_idx] y_test = y_[test_idx] # Afficher plt.figure(figsize=(8, 6)) plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, edgecolors='k', cmap=plt.cm.Paired) plt.title('Data train') plt.show(block=False) plt.figure(figsize=(8, 6)) plt.scatter(X_val[:, 0], X_val[:, 1], c=y_val, edgecolors='k', cmap=plt.cm.Paired) plt.title('Data Validation') plt.show(block=False) plt.figure(figsize=(8, 6)) plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, edgecolors='k', cmap=plt.cm.Paired) plt.title('Data test') plt.show(block=False) # CELL 4 accu = utils.test_sklearn_svm(X_train, y_train, X_test, y_test) print('Test accuracy: {:.3f}'.format(accu)) if accu < 0.7: print( 'ERREUR: L\'accuracy est trop faible. Il y a un problème avec les données. Vous pouvez essayer de refaire le mélange (case ci-haut).' ) # CELL 5 # En premier, vérifier la prédiction du modèle, la "forward pass" # 1. Générer le modèle avec des poids W aléatoires model = LinearClassifier(X_train, y_train, X_val, y_val, num_classes=3, bias=True) # 2. Appeler la fonction qui calcule l'accuracy et la loss moyenne pour l'ensemble des données d'entraînement _, loss = model.global_accuracy_and_cross_entropy_loss(X_train, y_train) # 3. Comparer au résultat attendu loss_attendu = -np.log( 1.0 / 3.0) # résultat aléatoire attendu soit -log(1/nb_classes) print('Sortie: {} Attendu: {}'.format(loss, loss_attendu)) if abs(loss - loss_attendu) > 0.05: print('ERREUR: la sortie de la fonction est incorrecte.') else: print('SUCCÈS') # CELL 6 # Vérification: Vous devez pouvoir faire du surapprentissage sur quelques échantillons. # Si l'accuracy reste faible, votre implémentation a un bogue. n_check = 5 X_check = X_train[:n_check] y_check = y_train[:n_check] model = LinearClassifier(X_check, y_check, X_val, y_val, num_classes=3, bias=True) loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train( num_epochs=10, lr=1.0, l2_reg=0.0) accu_train_finale = accu_train_curve[-1] print('Accuracy d\'entraînement, devrait être 1.0: {:.3f}'.format( accu_train_finale)) if accu_train_finale < 0.9999: print('ATTENTION: L\'accuracy n\'est pas 100%.') utils.plot_curves(loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve) else: print('SUCCÈS') # CELL 7 # Prenons encore un petit échantillon et testons différentes valeurs de l2_reg n_check = 5 X_check = X_train[:n_check] y_check = y_train[:n_check] model = LinearClassifier(X_check, y_check, X_val, y_val, num_classes=3, bias=True) for l2_r in np.arange(0, 1, 0.05): loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train( num_epochs=10, lr=1.0, l2_reg=l2_r) print( 'l2_reg= {:.4f} >> Loss/accuracy d\'entraînement : {:.3f} {:.3f}'. format(l2_r, loss_train_curve[-1], accu_train_curve[-1])) # CELL 8 # On instancie et entraîne notre modèle; cette fois-ci avec les données complètes. model = LinearClassifier(X_train, y_train, X_val, y_val, num_classes=3, bias=True) loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train( lr=0.001, num_epochs=25, l2_reg=0.01) # Illustration de la loss et de l'accuracy (le % de biens classés) à chaque itération utils.plot_curves(loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve) print('[Training] Loss: {:.3f} Accuracy: {:.3f}'.format( loss_train_curve[-1], accu_train_curve[-1])) print('[Validation] Loss: {:.3f} Accuracy: {:.3f}'.format( loss_val_curve[-1], accu_val_curve[-1])) # CELL 9 lr_choices = [1e-2, 1e-1, 1.0, 10.0] reg_choices = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6] lr_decay = 0.995 # learning rate is multiplied by this factor after each step best_accu = -1 best_params = None best_model = None best_curves = None for lr, reg in itertools.product(lr_choices, reg_choices): params = (lr, reg) curves = model.train(num_epochs=25, lr=lr, l2_reg=reg, lr_decay=lr_decay) loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = curves val_accu = accu_val_curve[-1] if val_accu > best_accu: print('Best val accuracy: {:.3f} | lr: {:.0e} | l2_reg: {:.0e}'. format(val_accu, lr, reg)) best_accu = val_accu best_params = params best_model = model best_curves = curves model = best_model utils.plot_curves(*best_curves) # CELL 10 # On ré-entraîne le modèle avec les meilleurs hyper-paramètres lr, reg = best_params model.train(num_epochs=25, lr=lr, l2_reg=reg, lr_decay=lr_decay) pred = model.predict(X_test) accu = (pred == y_test).mean() print('Test accuracy: {:.3f}'.format(accu)) # CELL 11 h = 0.01 # contrôle la résolution de la grille x_min, x_max = X_[:, 0].min() - .5, X_[:, 0].max() + .5 # Limites de la grille y_min, y_max = X_[:, 1].min() - .5, X_[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Créer la grille X_predict = np.c_[xx.ravel(), yy.ravel()] # Convertir la grille en une liste de points Z = model.predict(X_predict) # Classifier chaque point de la grille Z = Z.reshape(xx.shape) # Remettre en 2D plt.figure(figsize=(14, 8)) plt.pcolormesh( xx, yy, Z, cmap=plt.cm.Paired) # Colorier les cases selon les prédictions X_plot, y_plot = X_train, y_train X_plot, y_plot = X_train, y_train plt.scatter(X_plot[:, 0], X_plot[:, 1], c=y_plot, edgecolors='k', cmap=plt.cm.Paired) # Tracer les données plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.title('Frontières de décision') plt.show(block=False) # CELL 12 #Choisissez le type de données que vous voulez # NOTE IMPORTANTE: on vous encourage à tester différentes bases de données. Ceci dit, # votre solution sera testée avec Ncircles (N=4). Vous devez donc tester cette option. dataset_type = 'Ncircles' if dataset_type == 'moons': X_, y_ = sklearn.datasets.make_moons(n_samples=200, noise=0.5) num_classes = 2 elif dataset_type == 'gaussian_quantiles': X_, y_ = sklearn.datasets.make_gaussian_quantiles(n_samples=200, n_classes=2) num_classes = 2 elif dataset_type == '4blobs': d = 4 c1a = np.random.randn(50, 2) c1b = np.random.randn(50, 2) + (d, d) c2a = np.random.randn(50, 2) + (0, d) c2b = np.random.randn(50, 2) + (d, 0) X_ = np.concatenate([c1a, c1b, c2a, c2b], axis=0) y_ = np.array([0] * 100 + [1] * 100) num_classes = 2 elif dataset_type == '2circles': X_, y_ = sklearn.datasets.make_circles(n_samples=200) num_classes = 2 elif dataset_type == 'Ncircles': samples_per_class = 100 num_classes = 4 angles = np.linspace(0, 2 * np.pi, samples_per_class) radius = 1.0 + np.arange(num_classes) * 0.3 px = np.cos(angles[:, None]) * radius[None, :] # (100, 3) py = np.sin(angles[:, None]) * radius[None, :] # (100, 3) X_ = np.stack([px, py], axis=-1).reshape( (samples_per_class * num_classes, 2)) X_ += np.random.randn(len(X_[:, 0]), 2) / 8 y_ = np.array(list(range(num_classes)) * samples_per_class) else: print('Invalid dataset type') # CELL 13 plt.figure() plt.scatter(X_[:, 0], X_[:, 1], c=y_, cmap=plt.cm.Paired) plt.title('Données complètes') plt.show(block=False) # CELL 14 train_proportion = 0.5 val_proportion = 0.2 num_train = int(len(X_) * train_proportion) num_val = int(len(X_) * val_proportion) np.random.seed(0) idx = np.random.permutation(len(X_)) train_idx = idx[:num_train] val_idx = idx[num_train:num_train + num_val] test_idx = idx[num_train + num_val:] X_train = X_[train_idx] y_train = y_[train_idx] X_val = X_[val_idx] y_val = y_[val_idx] X_test = X_[test_idx] y_test = y_[test_idx] # CELL 15 # Affichons maintenant les données d'entraînement, de validation et de test. plt.figure() plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=plt.cm.Paired) plt.title('Train') plt.show(block=False) plt.figure() plt.scatter(X_val[:, 0], X_val[:, 1], c=y_val, cmap=plt.cm.Paired) plt.title('Validation') plt.show(block=False) plt.figure() plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap=plt.cm.Paired) plt.title('Test') plt.show(block=False) # CELL 16 num_hidden_neurons = 10 model = TwoLayerClassifier(X_train, y_train, X_val, y_val, num_features=2, num_hidden_neurons=num_hidden_neurons, num_classes=num_classes) # CELL 17 # Vérifier que la sortie du réseau initialisé au hasard donne bien une prédiction égale pour chaque classe num_hidden_neurons = 10 model = TwoLayerClassifier(X_train, y_train, X_val, y_val, num_features=2, num_hidden_neurons=num_hidden_neurons, num_classes=num_classes) # 2. Appeler la fonction qui calcule l'accuracy et la loss moyenne pour l'ensemble des données d'entraînement _, loss = model.global_accuracy_and_cross_entropy_loss(X_train, y_train, 0) # 3. Comparer au résultat attendu loss_attendu = -np.log( 1.0 / num_classes) # résultat aléatoire attendu soit -log(1/nb_classes) print('Sortie: {} Attendu: {}'.format(loss, loss_attendu)) if abs(loss - loss_attendu) > 0.05: print('ERREUR: la sortie de la fonction est incorrecte.') else: print('SUCCÈS') # CELL 18 # Vérifier que le fait d'augmenter la régularisation L2 augmente également la loss for l2_r in np.arange(0, 2, 0.1): _, loss = model.global_accuracy_and_cross_entropy_loss( X_train, y_train, l2_r) print( 'l2_reg= {:.4f} >> Loss/accuracy d\'entraînement : {:.3f}'.format( l2_r, loss)) # CELL 19 # Vérification: Vous devez pouvoir faire du surapprentissage sur quelques échantillons. # Si l'accuracy reste faible, votre implémentation a un bogue. n_check = 5 X_check = X_train[:n_check] y_check = y_train[:n_check] model = TwoLayerClassifier(X_check, y_check, X_val, y_val, num_features=2, num_hidden_neurons=num_hidden_neurons, num_classes=num_classes) loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train( num_epochs=200, lr=0.01, l2_reg=0.0) print('Accuracy d\'entraînement, devrait être 1.0: {:.3f}'.format( accu_train_curve[-1])) if accu_train_curve[-1] < 0.98: print('ATTENTION: L\'accuracy n\'est pas 100%.') utils.plot_curves(loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve) else: print('SUCCÈS') # CELL 20 # Vérifier que le fait d'entraîner avec une régularisation L2 croissante augmente la loss et, éventuellement, diminue l'accuracy for l2_r in np.arange(0, 1, 0.1): loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train( num_epochs=200, lr=0.01, l2_reg=l2_r) print( 'l2_reg= {:.4f} >> Loss/accuracy d\'entraînement : {:.3f} {:.3f}'. format(l2_r, loss_train_curve[-1], accu_train_curve[-1])) # CELL 21 # On instancie notre modèle; cette fois-ci avec les données complètes. num_hidden_neurons = 20 model = TwoLayerClassifier(X_train, y_train, X_val, y_val, num_features=2, num_hidden_neurons=num_hidden_neurons, num_classes=num_classes, activation='relu') # CELL 22 loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = model.train( num_epochs=200, lr=1e-2, l2_reg=0.0, momentum=0.5) # Illustration de la loss et de l'accuracy (le % de biens classés) à chaque itération utils.plot_curves(loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve) print('[Training] Loss: {:.3f} Accuracy: {:.3f}'.format( loss_train_curve[-1], accu_train_curve[-1])) print('[Validation] Loss: {:.3f} Accuracy: {:.3f}'.format( loss_val_curve[-1], accu_val_curve[-1])) # CELL 23 # Find the best hyperparameters lr and l2_reg lr_choices = [1e-4, 1e-3, 1e-2] reg_choices = [1e-1, 1e-2, 1e-3, 1e-4, 0] lr_decay = 1.0 # 0.995 # learning rate is multiplied by this factor after each step best_accu = -1 best_params = None best_model = None best_curves = None for lr, reg in itertools.product(lr_choices, reg_choices): params = (lr, reg) curves = model.train(num_epochs=50, lr=lr, l2_reg=reg, lr_decay=lr_decay, momentum=0.5) loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = curves val_accu = accu_val_curve[-1] if val_accu > best_accu: print('Best val accuracy: {:.3f} | lr: {:.0e} | l2_reg: {:.0e}'. format(val_accu, lr, reg)) best_accu = val_accu best_params = params best_model = model best_curves = curves else: print('accuracy: {:.3f} | lr: {:.0e} | l2_reg: {:.0e}'.format( val_accu, lr, reg)) model = best_model utils.plot_curves(*best_curves) # CELL 24 # On ré-entraîne le modèle avec les meilleurs hyper-paramètres lr, reg = best_params print(best_params) curves = model.train(num_epochs=200, lr=lr, l2_reg=reg, momentum=0.5) loss_train_curve, loss_val_curve, accu_train_curve, accu_val_curve = curves pred = model.predict(X_test) accu = (pred == y_test).mean() print('Test accuracy: {:.3f}'.format(accu)) utils.plot_curves(*curves) # CELL 25 # Visualisation des résultats h = 0.05 # contrôle la résolution de la grille x_min, x_max = X_[:, 0].min() - .5, X_[:, 0].max() + .5 # Limites de la grille y_min, y_max = X_[:, 1].min() - .5, X_[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Créer la grille X_predict = np.c_[xx.ravel(), yy.ravel()] # Convertir la grille en une liste de points Z = model.predict(X_predict) # Classifier chaque point de la grille Z = Z.reshape(xx.shape) # Remettre en 2D plt.figure(figsize=(14, 8)) plt.pcolormesh( xx, yy, Z, cmap=plt.cm.Paired) # Colorier les cases selon les prédictions X_plot, y_plot = X_, y_ plt.scatter(X_plot[:, 0], X_plot[:, 1], c=y_plot, edgecolors='k', cmap=plt.cm.Paired) # Tracer les données plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.xticks(()) plt.yticks(()) plt.title('Frontières de décision') plt.show()
def train_model_1(model, n_epoch, labelsdict, criterion, optimizer, device, trainloader, validloader, train_data, model_name, model_path, model_path_best, loss_graph, accuracy_graph, start_epoch=0, valid_loss=1000): """ Commence training of model model: model used n_epoch: number of epoch used for training labelsdict: dictionary containing class names which correnspond to their respective indexes optimizer: choice of optimizer use for training device: 'cuda' or 'cpu' (speed up training) trainloader: input training data split in batches validloader: input validation data split in batches train_data: input training data model_name: name of model indicated model_path: path where model checkpoint is saved at every epoch model_path_best: path where model yields best training result is saved (lowest val acc) loss_graph: path of graph indicating training and validation losses of model is saved accuracy_graph: path of graph indicating training and validation accuracies of model is saved start_epoch: indicate start epoch.(where start epoch != 0 when model is not trained from scratch but loaded and retrained) valid_acc: indicate value of best validation accuracy during point of training """ print( f'Training custom CNN Model to distinguish normal and infected lungs') print(f'total epochs: {n_epoch}') if start_epoch != 0: print(f'Retraining model continuing from epoch {start_epoch+1}') n_in = next(model.fc2.modules()).in_features model.to(device) start = time.time() epochs = n_epoch steps = 0 running_loss = 0 running_acc = 0 print_every = len(trainloader) train_loss = [] val_loss = [] train_acc = [] val_acc = [] val_loss_max = valid_loss Singapore = pytz.timezone('Asia/Singapore') for e in range(start_epoch, epochs): # Make sure training is on model.train() for images, labels, path in trainloader: # for each batch images, labels = images.to(device), labels.to(device) steps += 1 optimizer.zero_grad() output = model.forward(images) # getting loss loss = criterion(output, labels) loss.backward() optimizer.step() # getting accuracy ps = torch.exp(output) equality = (labels == ps.max(dim=1)[1]) running_acc += equality.type(torch.FloatTensor).mean() running_loss += loss.item() # At the end of every epoch... if steps % print_every == 0: # Eval mode for predictions model.eval() # Turn off gradients for validation with torch.no_grad(): test_loss, accuracy = validation(model, validloader, criterion, device) # log results at every epoch print( "Epoch: {}/{} - ".format(e + 1, epochs), "Time: {} ".format(datetime.now(Singapore)), "Training Loss: {:.3f} - ".format(running_loss / len(trainloader)), "Validation Loss: {:.3f} - ".format(test_loss / len(validloader)), "Training Accuracy: {:.3f} - ".format(running_acc / len(trainloader)), "Validation Accuracy: {:.3f}".format(accuracy / len(validloader))) # saving results into a list for plotting train_loss.append(running_loss / print_every) val_loss.append(test_loss / len(validloader)) train_acc.append(running_acc / len(trainloader)) val_acc.append(accuracy / len(validloader)) valid_loss = test_loss / len(validloader) # saving checkpoint model.n_in = n_in model.n_out = len(labelsdict) model.labelsdict = labelsdict model.optimizer = optimizer model.optimizer_state_dict = optimizer.state_dict() model.model_name = model_name model.loss = criterion model.val_loss = valid_loss loss_acc = [] loss_acc.append(train_loss) loss_acc.append(val_loss) loss_acc.append(train_acc) loss_acc.append(val_acc) model.loss_acc = loss_acc model.start_epoch = start_epoch model.epoch = e + 1 path = model_path path_best = model_path_best # saving checkpoint model at every epoch save_checkpoint(model, path) # saving best model during training, best indicated by highest validation accuracy obtained if valid_loss <= val_loss_max: print( 'Validation loss decreased ({:.6f} --> {:.6f}). Saving model ...' .format(val_loss_max, valid_loss)) # update threshold val_loss_max = valid_loss save_checkpoint(model, path_best) # reset training loss and accuracy after validation, which is used again for subsequent training epoch running_loss = 0 running_acc = 0 print('model:', model_name, '- epochs:', n_epoch) print(f"Run time: {(time.time() - start)/60:.3f} min") # plotting the graph on training and validation loss for model plot_curves(start_epoch, model.epoch, loss_acc, model_name, loss_graph, accuracy_graph) return model
def main(premise_hidden_size, hypo_hidden_size, linear_hidden_size, interaction_type, device, kind, num_layers=1, bidirectional=True, kernel_size=3, lr=1e-4, test=False, model_dir='models'): valid_types = ('cat', 'element_wise_mult') if interaction_type not in valid_types: raise ValueError('interaction_type can only be: ', valid_types) # data batch_size = 32 save_freq = 500 max_epochs = 40 train_loader, val_loader = data.get_loaders(batch_size, test=test) # model embed_size = 300 ind2vec = data.get_table_lookup() if kind == 'rnn': model = models.SNLI_Model(ind2vec, embed_size, premise_hidden_size, hypo_hidden_size, linear_hidden_size, interaction_type, device, kind='rnn', num_layers=num_layers, bidirectional=bidirectional) else: model = models.SNLI_Model(ind2vec, embed_size, premise_hidden_size, hypo_hidden_size, linear_hidden_size, interaction_type, device, kind='cnn', kernel_size=kernel_size) model = model.to(device) optimizer = torch.optim.Adam( [param for param in model.parameters() if param.requires_grad], lr=lr) loss_fn = torch.nn.CrossEntropyLoss() model_name = f'{kind}_model_{premise_hidden_size}_{interaction_type}' model_dir = os.path.join(model_dir, model_name) train_helper = train_helpers.TrainHelper(device, model, loss_fn, optimizer, models.batch_params_key, model_dir, test) train_loss, val_loss, train_acc, val_acc = train_helper.train_loop( train_loader, val_loader, max_epochs=max_epochs, save_freq=save_freq) if 'cpu' in device: os.makedirs('figures', exist_ok=True) path = f'figures/{model_name}' utils.plot_curves(train_loss, val_loss, train_acc, val_acc, path) utils.save_pkl_data(train_loss, 'train_loss.p', data_dir=model_dir) utils.save_pkl_data(val_loss, 'val_loss.p', data_dir=model_dir) utils.save_pkl_data(train_acc, 'train_acc.p', data_dir=model_dir) utils.save_pkl_data(val_acc, 'val_acc.p', data_dir=model_dir)
def main(): ######################################################################## ######################## training parameters ########################### ######################################################################## parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, default='ImageNet', metavar='N', help='dataset to run experiments on') parser.add_argument( '--batch_size', type=int, default=256, metavar='N', help= 'input batch size for training (default: 256; note that batch_size 64 gives worse performance for imagenet, so don\'t change this. )' ) parser.add_argument('--exp', type=str, default='default', metavar='N', help='name of experiment') parser.add_argument('--epochs', type=int, default=30, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.2, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--weight_decay', type=float, default=5 * 1e-4, help='weight_decay (default: 1e-5)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--step_size', type=float, default=10, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--gamma', type=float, default=0.1, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--load_model', type=str, default=None, help='model to initialise from') parser.add_argument('--caffe', action='store_true', default=False, help='caffe pretrained model') parser.add_argument('--test', action='store_true', default=False, help='run in test mode') parser.add_argument( '--ensemble_inference', action='store_true', default=True, help='run in ensemble inference mode' ) # testing is always in ensemble inference mode anyways ! parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=5021, metavar='S', help='random seed (default: 5021)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument( '--stopping_criterion', type=int, default=15, metavar='N', ) parser.add_argument( '--low_threshold', type=int, default=0, metavar='N', ) parser.add_argument( '--high_threshold', type=int, default=100000, metavar='N', ) parser.add_argument( '--open_ratio', type=int, default=1, help='ratio of closed_set to open_set data', ) parser.add_argument( '--picker', type=str, default='generalist', help= 'dataloader or model picker - experts | generalist : experts uses manyshot, medianshot, fewshot partitioning; \ generalist uses the generalist model', ) parser.add_argument( '--num_learnable', type=int, default='-1', help= 'number of learnable layers : -1 ( all ) | 1 ( only classifier ) | 2 ( classifier and last fc ) | 3 - 6 ( classifier, fc + $ind - 2$ resnet super-blocks ) ' ) parser.add_argument('--scheduler', type=str, default='stepLR', help=' stepLR | cosine lr scheduler') parser.add_argument('--max_epochs', type=int, default=None, help='max number of epochs, for cosine lr scheduler') args = parser.parse_args() print("\n==================Options=================") pprint(vars(args), indent=4) print("==========================================\n") use_cuda = not args.no_cuda and torch.cuda.is_available() # make everything deterministic if (args.seed is not None): print('Seeding everything with seed {}.'.format(args.seed)) seed_everything(args.seed) else: print('Note : Seed is random.') device = torch.device("cuda" if use_cuda else "cpu") exp_dir = os.path.join('checkpoint', args.exp) if not os.path.isdir(exp_dir): os.makedirs(exp_dir) kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} # batch size settings : note that these are important for memory and performance reasons if (args.dataset.lower() == 'imagenet' and args.test): args.batch_size = 64 elif (args.dataset.lower() == 'imagenet' and not (args.test)): args.batch_size = 256 elif (args.dataset.lower() == 'places' and not (args.test)): args.batch_size = 32 elif (args.dataset.lower() == 'places' and args.test): args.batch_size = 8 ######################################################################## ######################## load data and pre-trained models ############## ######################################################################## print('Loading train loader.') train_loader = torch.utils.data.DataLoader(Threshold_Dataset( root=data_root[args.dataset], orig_txt='./data/{}_LT/{}_LT_train.txt'.format(args.dataset, args.dataset), txt='./data/{}_LT/{}_LT_train.txt'.format(args.dataset, args.dataset), low_threshold=args.low_threshold, high_threshold=args.high_threshold, open_ratio=args.open_ratio, transform=data_transforms['train'], picker=args.picker), batch_size=args.batch_size, shuffle=True, **kwargs) print('Loading val loader.') val_loader = torch.utils.data.DataLoader(Threshold_Dataset( root=data_root[args.dataset], orig_txt='./data/{}_LT/{}_LT_train.txt'.format(args.dataset, args.dataset), txt='./data/{}_LT/{}_LT_val.txt'.format(args.dataset, args.dataset), low_threshold=args.low_threshold, high_threshold=args.high_threshold, open_ratio=1, transform=data_transforms['val'], picker=args.picker), batch_size=args.batch_size, shuffle=True, **kwargs) num_classes = train_loader.dataset.num_classes + 1 - int( args.picker == 'generalist' ) # add 1 for the open/dustbin class if not generalist model if (args.dataset.lower() == 'imagenet'): feature_extractor = create_model_resnet10().to( device) # use this for imagenet args.lr = 1e-1 else: feature_extractor = create_model_resnet152(caffe=True).to( device ) # use this for places. pass caffe=true to load pretrained imagenet model args.lr = 1e-2 print('Learning rate : {:.4f}'.format(args.lr)) classifier = DotProduct_Classifier(num_classes=num_classes, feat_dim=512).to(device) optimizer = torch.optim.SGD(chain(feature_extractor.parameters(), classifier.parameters()), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if (args.scheduler == 'stepLR'): scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma) print('Using StepLR scheduler with params, stepsize : {}, gamma : {}'. format(args.step_size, args.gamma)) elif (args.scheduler == 'cosine'): scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.max_epochs) print( 'Using CosineAnnealingLR scheduler with params, T_max : {}'.format( args.max_epochs)) else: raise Exception('Invalid scheduler argument.') # load pretrained model if (args.load_model is not None): if (not args.caffe): pretrained_model = torch.load(args.load_model) weights_feat = pretrained_model['state_dict_best']['feat_model'] weights_feat = { k: weights_feat['module.' + k] if 'module.' + k in weights_feat else weights_feat[k] for k in feature_extractor.state_dict() } feature_extractor.load_state_dict( weights_feat) # loading feature extractor weights weights_class = pretrained_model['state_dict_best']['classifier'] weights_class = { k: weights_class['module.' + k] if 'module.' + k in weights_class else weights_class[k] for k in classifier.state_dict() } if (classifier.state_dict()['fc.weight'].shape == weights_class['fc.weight'].shape): classifier.load_state_dict( weights_class ) # loading classifier weights if classifiers match else: print( 'Classifiers of pretrained model and current model are different with dimensions : ', classifier.state_dict()['fc.weight'].shape, weights_class['fc.weight'].shape) print( 'Loaded pretrained model on entire dataset from epoch : {:d} with acc : {:.4f}' .format(pretrained_model['best_epoch'], pretrained_model['best_acc'])) else: weights_feat = torch.load(args.load_model) weights_feat = { k: weights_feat[k] if k in weights_feat else feature_extractor.state_dict()[k] for k in feature_extractor.state_dict() } feature_extractor.load_state_dict( weights_feat) # loading feature extractor weights print('Loaded imagenet pretrained model from Caffe.') ######################################################################## ######################## set learnable layers ########################## ######################################################################## if (args.num_learnable == -1): print('Learning feature extractor and classifier.') elif (args.num_learnable >= 1 and args.num_learnable <= 6): if (args.num_learnable == 1): set_weights('feature_extractor', feature_extractor, False) set_weights('classifier', classifier, True) elif (args.num_learnable == 2): print('Setting feature extractor weights.') for ind, (name, layer) in enumerate(feature_extractor.named_children()): if (ind == 9): set_weights(name, layer, True) else: set_weights(name, layer, False) set_weights('classifier', classifier, True) else: print('Setting feature extractor weights.') for ind, (name, layer) in enumerate(feature_extractor.named_children()): if (ind >= 10 - args.num_learnable): set_weights(name, layer, True) else: set_weights(name, layer, False) set_weights('classifier', classifier, True) else: raise Exception('Invalid num_learnable layers : {}'.format( args.num_learnable)) ######################################################################## ######################## training with early stopping ################## ######################################################################## if (not args.test): results = vars(args) results['train_losses'] = [] results['train_accuracies'] = [] results['test_losses'] = [] results['test_accuracies'] = [] best_acc, best_epoch = -0.1, 0 epoch = 1 while (True): sys.stdout.flush() train_loss, train_err = train(args, feature_extractor, classifier, device, train_loader, optimizer, scheduler, epoch) test_loss, test_err = test(args, feature_extractor, classifier, device, val_loader) results['train_losses'].append(train_loss) results['test_losses'].append(test_loss) results['train_accuracies'].append(train_err) results['test_accuracies'].append(test_err) if (test_err > best_acc): best_acc = test_err best_epoch = epoch results['best_acc'], results[ 'best_epoch'] = best_acc, best_epoch # save best model best_model_weights = {} best_model_weights['feat_model'] = copy.deepcopy( feature_extractor.state_dict()) best_model_weights['classifier'] = copy.deepcopy( classifier.state_dict()) model_states = { 'epoch': epoch, 'best_epoch': best_epoch, 'state_dict_best': best_model_weights, 'best_acc': best_acc, } torch.save(model_states, os.path.join(exp_dir, "best_model.pt")) elif (epoch > best_epoch + args.stopping_criterion): print('Best model obtained. Error : ', best_acc) plot_curves(results, exp_dir) break elif (args.scheduler == 'cosine' and epoch == args.max_epochs): print('Best model obtained. Error : ', best_acc) plot_curves(results, exp_dir) break savepath = os.path.join(exp_dir, 'results.pickle') with open(savepath, 'wb') as f: pickle.dump(results, f) plot_curves(results, exp_dir) epoch = epoch + 1
import argparse from dataloader import load_data from utils import plot_curves from classifiers.LinearSVM import LinearSVM from classifiers.RbfSVM import RbfSVM if __name__ == "__main__": parser = argparse.ArgumentParser(description="Choose model") parser.add_argument("--type", type=str, default="Kernel", help="Linear/Kernel") parser.add_argument("--norm", type=int, default=1, help="Normalize or not") args = parser.parse_args() data = load_data(args.norm) if args.type == 'Linear': svm = LinearSVM(data) else: svm = RbfSVM(data) loss_train_list, loss_test, acc_test = svm.run() plot_curves(loss_train_list)
def run(split_file, pose_data_root, configs, save_model_to=None): epochs = configs.max_epochs log_interval = configs.log_interval num_samples = configs.num_samples hidden_size = configs.hidden_size drop_p = configs.drop_p num_stages = configs.num_stages # setup dataset train_dataset = Sign_Dataset(index_file_path=split_file, split=['train', 'val'], pose_root=pose_data_root, img_transforms=None, video_transforms=None, num_samples=num_samples) train_data_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=configs.batch_size, shuffle=True) val_dataset = Sign_Dataset(index_file_path=split_file, split='test', pose_root=pose_data_root, img_transforms=None, video_transforms=None, num_samples=num_samples, sample_strategy='k_copies') val_data_loader = torch.utils.data.DataLoader( dataset=val_dataset, batch_size=configs.batch_size, shuffle=True) logging.info('\n'.join( ['Class labels are: '] + [(str(i) + ' - ' + label) for i, label in enumerate(train_dataset.label_encoder.classes_)])) # setup the model model = GCN_muti_att(input_feature=num_samples * 2, hidden_feature=num_samples * 2, num_class=len(train_dataset.label_encoder.classes_), p_dropout=drop_p, num_stage=num_stages).cuda() # setup training parameters, learning rate, optimizer, scheduler lr = configs.init_lr # optimizer = optim.SGD(vgg_gru.parameters(), lr=lr, momentum=0.00001) optimizer = optim.Adam(model.parameters(), lr=lr, eps=configs.adam_eps, weight_decay=configs.adam_weight_decay) # record training process epoch_train_losses = [] epoch_train_scores = [] epoch_val_losses = [] epoch_val_scores = [] best_test_acc = 0 # start training for epoch in range(int(epochs)): # train, test model print('start training.') train_losses, train_scores, train_gts, train_preds = train( log_interval, model, train_data_loader, optimizer, epoch) print('start testing.') val_loss, val_score, val_gts, val_preds, incorrect_samples = validation( model, val_data_loader, epoch, save_to=save_model_to) # print('start testing.') # val_loss, val_score, val_gts, val_preds, incorrect_samples = validation(model, # val_data_loader, epoch, # save_to=save_model_to) logging.info( '========================\nEpoch: {} Average loss: {:.4f}'.format( epoch, val_loss)) logging.info('Top-1 acc: {:.4f}'.format(100 * val_score[0])) logging.info('Top-3 acc: {:.4f}'.format(100 * val_score[1])) logging.info('Top-5 acc: {:.4f}'.format(100 * val_score[2])) logging.info('Top-10 acc: {:.4f}'.format(100 * val_score[3])) logging.info('Top-30 acc: {:.4f}'.format(100 * val_score[4])) logging.debug('mislabelled val. instances: ' + str(incorrect_samples)) # save results epoch_train_losses.append(train_losses) epoch_train_scores.append(train_scores) epoch_val_losses.append(val_loss) epoch_val_scores.append(val_score[0]) # save all train test results np.save('output/epoch_training_losses.npy', np.array(epoch_train_losses)) np.save('output/epoch_training_scores.npy', np.array(epoch_train_scores)) np.save('output/epoch_test_loss.npy', np.array(epoch_val_losses)) np.save('output/epoch_test_score.npy', np.array(epoch_val_scores)) if val_score[0] > best_test_acc: best_test_acc = val_score[0] best_epoch_num = epoch torch.save( model.state_dict(), os.path.join( 'checkpoints', subset, 'gcn_epoch={}_val_acc={}.pth'.format( best_epoch_num, best_test_acc))) utils.plot_curves() class_names = train_dataset.label_encoder.classes_ utils.plot_confusion_matrix(train_gts, train_preds, classes=class_names, normalize=False, save_to='output/train-conf-mat') utils.plot_confusion_matrix(val_gts, val_preds, classes=class_names, normalize=False, save_to='output/val-conf-mat')
def main(args): config = cfg.get_default() cfg.set_params(config, args.config_path, args.set) cfg.freeze(config, True) print('- Configuration:') print(config) if config.dataset == 'FluidIceShake': n_groups = 2 n_particles = 348 elif config.dataset == 'RigidFall': n_groups = 3 n_particles = 192 elif config.dataset == 'MassRope': n_groups = 2 n_particles = 95 else: raise ValueError('Unsupported environment') train_loader = get_dataloader(config, 'train') valid_loader = get_dataloader(config, 'valid') # build model model = PointSetNet( config.n_frames, config.pred_hidden, n_particles, n_groups, config.batchnorm, single_out=config.single_out, recur_pred=config.recur_pred, use_temp_encoder=config.use_temp_encoder, conv_temp_encoder=config.conv_temp_encoder, temp_embedding_size=config.temp_embedding_size).to(_DEVICE) print('- Model architecture:') print(model) if config.load_path != '': print('- Loading model from {}'.format(config.load_path)) model.load_state_dict(torch.load(config.load_path)) # build optimizer optimizer = torch.optim.Adam(model.parameters(), lr=config.lr) if not config.debug: print('- Training start') stats = { 'epoch': [], 'valid_losses': [], 'train_losses': [], 'train_pos_losses': [], 'train_group_losses': [], 'valid_pos_losses': [], 'valid_group_losses': [], } best_valid_loss = np.Inf for epoch in range(config.n_epochs): # training print('- Training epoch {:d}'.format(epoch)) epoch_train_losses = [] epoch_train_pos_losses = [] epoch_train_grp_losses = [] pbar = tqdm(train_loader) did_vis = False for images, positions, groups in pbar: (model, optimizer, train_loss, train_pos_loss, train_grp_loss) = step( config, model, optimizer, images, positions, groups, True) epoch_train_losses.append(train_loss) epoch_train_pos_losses.append(train_pos_loss) epoch_train_grp_losses.append(train_grp_loss) pbar.set_description('Train loss {:f}'.format(train_loss)) # visualize training results if not did_vis \ and config.vis and (epoch + 1) % config.vis_every == 0: pbar.set_description('Generating video') visualize(config, model, epoch, n_particles, images, positions, groups, True) did_vis = True train_loss = np.average(epoch_train_losses) train_pos_loss = np.average(epoch_train_pos_losses) train_grp_loss = np.average(epoch_train_grp_losses) print(('- Finish training epoch {:d}, training loss {:f},' ' pos loss {:f}, group loss {:f}').format( epoch, train_loss, train_pos_loss, train_grp_loss)) # validation print('- Evaluating epoch {:d}'.format(epoch)) epoch_valid_losses = [] epoch_valid_pos_losses = [] epoch_valid_grp_losses = [] pbar = tqdm(valid_loader) did_vis = False for images, positions, groups in pbar: with torch.no_grad(): (model, _, valid_loss, valid_pos_loss, valid_grp_loss) = step( config, model, optimizer, images, positions, groups, False) epoch_valid_losses.append(valid_loss) epoch_valid_pos_losses.append(valid_pos_loss) epoch_valid_grp_losses.append(valid_grp_loss) pbar.set_description('Valid loss {:f}'.format(valid_loss)) # visualize validation results if not did_vis \ and config.vis and (epoch + 1) % config.vis_every == 0: pbar.set_description('Generating video') visualize(config, model, epoch, n_particles, images, positions, groups, False) did_vis = True valid_loss = np.average(epoch_valid_losses) valid_pos_loss = np.average(epoch_valid_pos_losses) valid_grp_loss = np.average(epoch_valid_grp_losses) print('- Finish eval epoch {:d}, validation loss {:f}'.format( epoch, valid_loss)) if valid_loss < best_valid_loss: print('- Best model') best_valid_loss = valid_loss torch.save(model.state_dict(), os.path.join(config.run_dir, 'checkpoint_best.pth')) torch.save(model.state_dict(), os.path.join(config.run_dir, 'checkpoint_latest.pth')) print() stats['epoch'].append(epoch) stats['train_losses'].append(train_loss) stats['valid_losses'].append(valid_loss) stats['train_pos_losses'].append(train_pos_loss) stats['train_group_losses'].append(train_grp_loss) stats['valid_pos_losses'].append(valid_pos_loss) stats['valid_group_losses'].append(valid_grp_loss) with open(os.path.join(config.run_dir, 'stats.json'), 'w') as fout: json.dump(stats, fout) # Plot loss curves plot_dir = os.path.join(config.run_dir, 'curves') if not os.path.isdir(plot_dir): os.makedirs(plot_dir) utils.plot_curves( x=stats['epoch'], ys=[stats['train_losses'], stats['valid_losses']], save_path=os.path.join(plot_dir, 'loss.png'), curve_labels=['train', 'valid'], x_label='epoch', y_label='total_loss', title='Total loss') utils.plot_curves( x=stats['epoch'], ys=[stats['train_pos_losses'], stats['valid_pos_losses']], save_path=os.path.join(plot_dir, 'loss_pos.png'), curve_labels=['train', 'valid'], x_label='epoch', y_label='pos_loss', title='Position loss') utils.plot_curves( x=stats['epoch'], ys=[stats['train_group_losses'], stats['valid_group_losses']], save_path=os.path.join(plot_dir, 'loss_grp.png'), curve_labels=['train', 'valid'], x_label='epoch', y_label='grp_loss', title='Grouping loss') else: # Debug on a single batch images, positions, groups = next(iter(train_loader)) images = images[:5, :15, ...] positions = positions[:5, :15, ...] groups = groups[:5, ...] for epoch in range(config.n_epochs): (model, optimizer, train_loss, train_pos_loss, train_grp_loss) = step( config, model, optimizer, images, positions, groups, True) print(train_loss, train_pos_loss, train_grp_loss)
train_f1s = [] test_losses = [] test_accs = [] test_f1s = [] print("Training...") for epoch in range(1, epochs + 1): train_loss, train_acc, train_f1, test_loss, test_acc, test_f1 = train( model, device, ld_train, ld_test, optimizer, epoch, weights) train_losses.append(train_loss) train_accs.append(train_acc) train_f1s.append(train_f1) test_losses.append(test_loss) test_accs.append(test_acc) test_f1s.append(test_f1) scheduler.step() # save best checkpoint based on test F1 score if epoch == 1: best_test_score = test_f1 if save_dir: if test_f1 >= best_test_score: torch.save(model.state_dict(), save_dir) best_test_score = test_f1 # plot learning curves plot_curves(train_losses, test_losses, "Loss curves") plot_curves(train_accs, test_accs, "Accuracy curves") plot_curves(train_f1s, test_f1s, "F1 curves")
print("Calculating Initial Loss for the Fine Tuning") val_curve = [ utils.test(dnet, cost, trainloader, validationloader, **training_params) ] print("Fine Tuning the NN") for epoch in range(1, args.epochs + 1): utils.train(dnet, cost, optimizer, trainloader, **training_params) losses = utils.test(dnet, cost, trainloader, validationloader, **training_params) val_curve.append(losses) print("Done") autoencoder_fig = utils.plot_curves(val_curveAE, title='Autoencoder Loss Curves') fine_tune_fig = utils.plot_curves(val_curve, title='Fine Tuning Loss Curves') dnet.eval() dnet.cpu() if args.name: fine_tune_fig.savefig("{}_val_curve_fine_tune.png".format(args.name)) autoencoder_fig.savefig("{}_val_curveAE.png".format(args.name)) trainset.save_scaler("{}_standardizer.npz".format(args.name)) th.save(dnet.state_dict(), "{}_fine_tuned_net.pth".format(args.name)) else: fine_tune_fig.savefig("{}_val_curve_fine_tune.png".format(args.name)) autoencoder_fig.savefig("{}_val_curveAE.png".format(args.name)) trainset.save_scaler("data_standardizer.npz") th.save(dnet.state_dict(), "{}_fine_tuned_net.pth".format(args.name))
) print( f'Val loss: {np.round(valid_loss,6)} \t Val acc: {np.round(valid_acc,4)} \t Val acc pp: {np.round(valid_acc_pp,4)}\n' ) bvl = np.round(best_valid_loss, 6) bvl_acc = np.round(val_accuracies[val_losses.index(best_valid_loss)], 4) if params.post_process: bvl_acc_pp = np.round(val_accuracies_pp[val_losses.index(best_valid_loss)], 4) bacc = np.round(np.max(val_accuracies), 4) print( f'End of training: best val loss = {bvl} | associated val_acc = {bvl_acc}, val_acc_pp = {bvl_acc_pp} | best val acc = {bacc}\n' ) plot_curves(train_losses, train_accuracies, val_losses, val_accuracies, params) if params.post_process: plot_curves(train_losses, train_accuracies_pp, val_losses, val_accuracies_pp, params) ### TESTING params = open_config_file(args.config) print('Beginning testing...') test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False) # load best weights from training (based on params.tracking value) best_model = create_model(params) best_model.load_state_dict(torch.load('best_model.pt')) best_model = best_model.cuda()
def main(): train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = run() plot_curves(train_loss_history, train_acc_history, valid_loss_history, valid_acc_history)