def train_network(nnet): if Cfg.reconstruction_loss: nnet.ae_n_epochs = nnet.n_epochs train_autoencoder(nnet) return print("Starting training with %s" % nnet.sgd_solver) # save initial network parameters for diagnostics nnet.save_initial_parameters() if Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # initialize diagnostics for first epoch (detailed diagnostics per batch) nnet.initialize_diagnostics(Cfg.n_batches + 1) else: nnet.initialize_diagnostics(nnet.n_epochs) # initialize c from mean of network feature representations in deep SVDD if specified if Cfg.svdd_loss and Cfg.c_mean_init: initialize_c_as_mean(nnet, Cfg.c_mean_init_n_batches) for epoch in range(nnet.n_epochs): # get copy of current network parameters to track differences between epochs nnet.copy_parameters() # In each epoch, we do a full pass over the training data: start_time = time.time() # learning rate decay if Cfg.lr_decay: decay_learning_rate(nnet, epoch) if Cfg.lr_drop and (epoch == Cfg.lr_drop_in_epoch): # Drop the learning rate in epoch specified in Cfg.lr_drop_after_epoch by factor Cfg.lr_drop_factor # Thus, a simple separation of learning into a "region search" and "finetuning" stage. lr_new = Cfg.floatX( (1.0 / Cfg.lr_drop_factor) * Cfg.learning_rate.get_value()) print("") print( "Learning rate drop in epoch {} from {:.6f} to {:.6f}".format( epoch, Cfg.floatX(Cfg.learning_rate.get_value()), lr_new)) print("") Cfg.learning_rate.set_value(lr_new) # train on epoch i_batch = 0 for batch in nnet.data.get_epoch_train(): if Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # Evaluation before training if (epoch == 0) and (i_batch == 0): _, _ = performance(nnet, which_set='train', epoch=i_batch) if nnet.data.n_val > 0: _, _ = performance(nnet, which_set='val', epoch=i_batch) _, _ = performance(nnet, which_set='test', epoch=i_batch) # train inputs, targets, _ = batch if Cfg.svdd_loss: if Cfg.block_coordinate: _, _ = nnet.backprop_without_R(inputs, targets) elif Cfg.hard_margin: _, _ = nnet.backprop_ball(inputs, targets) else: _, _ = nnet.backprop(inputs, targets) else: _, _ = nnet.backprop(inputs, targets) if Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # Get detailed diagnostics (per batch) for the first epoch if epoch == 0: _, _ = performance(nnet, which_set='train', epoch=i_batch + 1) if nnet.data.n_val > 0: _, _ = performance(nnet, which_set='val', epoch=i_batch + 1) _, _ = performance(nnet, which_set='test', epoch=i_batch + 1) nnet.copy_parameters() i_batch += 1 if (epoch == 0) & Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # Plot diagnostics for first epoch plot_diagnostics(nnet, Cfg.xp_path, Cfg.title_suffix, xlabel="Batches", file_prefix="e1_") # Re-initialize diagnostics on epoch level nnet.initialize_diagnostics(nnet.n_epochs) nnet.copy_initial_parameters_to_cache() # Performance on training set (use forward pass with deterministic=True) to get the exact training objective train_objective, train_accuracy = performance(nnet, which_set='train', epoch=epoch, print_=True) # Adjust radius R for the SVDD hard-margin objective if Cfg.svdd_loss and (Cfg.hard_margin or (Cfg.block_coordinate and (epoch < Cfg.warm_up_n_epochs))): # set R to be the (1-nu)-th quantile of distances out_idx = int(np.floor(nnet.data.n_train * Cfg.nu.get_value())) sort_idx = nnet.diag['train']['scores'][:, epoch].argsort() R_new = nnet.diag['train']['scores'][ sort_idx, epoch][-out_idx] + nnet.Rvar.get_value() nnet.Rvar.set_value(Cfg.floatX(R_new)) # Update radius R and center c if block coordinate optimization is chosen if Cfg.block_coordinate and (epoch >= Cfg.warm_up_n_epochs) and ( (epoch % Cfg.k_update_epochs) == 0): if Cfg.center_fixed: nnet.update_R() else: nnet.update_R_c() if Cfg.nnet_diagnostics: # Performance on validation and test set if nnet.data.n_val > 0: val_objective, val_accuracy = performance(nnet, which_set='val', epoch=epoch, print_=True) test_objective, test_accuracy = performance(nnet, which_set='test', epoch=epoch, print_=True) # log performance nnet.log['train_objective'].append(train_objective) nnet.log['train_accuracy'].append(train_accuracy) if nnet.data.n_val > 0: nnet.log['val_objective'].append(val_objective) nnet.log['val_accuracy'].append(val_accuracy) nnet.log['test_objective'].append(test_objective) nnet.log['test_accuracy'].append(test_accuracy) nnet.log['time_stamp'].append(time.time() - nnet.clock) print("Epoch {} of {} took {:.3f}s".format(epoch + 1, nnet.n_epochs, time.time() - start_time)) print('') # save model as required if epoch + 1 == nnet.save_at: nnet.dump_weights(nnet.save_to) # save train time nnet.train_time = time.time() - nnet.clock # test for adversarial model # print 'Start adversarial model' # floatX = Cfg.floatX # nnet.cvar1 = shared(floatX(nnet.cvar.eval())) # nnet.Rvar1 = shared(floatX(nnet.Rvar.eval())) # # distants = T.sum((nnet.feature_layer - nnet.cvar1) ** 2) # logits = distants - nnet.Rvar1 # nnet.logits_layer = logits # for batch in nnet.data.get_epoch('test'): # inputs, targets, batch_idx = batch # err, acc, b_scores, l2, b_rec, b_rep, b_rep_norm, _, b_loss, R = nnet.forward(inputs, targets) # ad(nnet, inputs, targets) # print 'End adversaril model' # Get final performance in last epoch if no running diagnostics are taken if not Cfg.nnet_diagnostics: nnet.initialize_diagnostics(1) nnet.copy_parameters() # perform forward passes on train, val, and test set print("Get final performance...") train_objective, train_accuracy = performance(nnet, which_set='train', epoch=0, print_=True) if nnet.data.n_val > 0: val_objective, val_accuracy = performance(nnet, which_set='val', epoch=0, print_=True) test_objective, test_accuracy = performance(nnet, which_set='test', epoch=0, print_=True) print("Evaluation completed.") # log performance nnet.log['train_objective'].append(train_objective) nnet.log['train_accuracy'].append(train_accuracy) if nnet.data.n_val > 0: nnet.log['val_objective'].append(val_objective) nnet.log['val_accuracy'].append(val_accuracy) nnet.log['test_objective'].append(test_objective) nnet.log['test_accuracy'].append(test_accuracy) nnet.log['time_stamp'].append(time.time() - nnet.clock) nnet.stop_clock() nnet.test_time = time.time() - (nnet.train_time + nnet.clock) # save final weights (and best weights in case of two-class dataset) nnet.dump_weights("{}/weights_final.p".format(Cfg.xp_path)) if nnet.data.n_classes == 2: nnet.dump_best_weights("{}/weights_best_ep.p".format(Cfg.xp_path))
def main(): args = parser.parse_args() if Cfg.print_options: print('Options:') for (key, value) in vars(args).iteritems(): print("{:16}: {}".format(key, value)) assert os.path.exists(args.xp_dir) # default value for basefile: string basis for all exported file names if args.out_name: base_file = "{}/{}".format(args.xp_dir, args.out_name) else: base_file = "{}/{}_{}_{}".format(args.xp_dir, args.dataset, args.solver, args.loss) # if pickle file already there, consider run already done if not Cfg.only_test and (os.path.exists("{}_weights.p".format(base_file)) and os.path.exists( "{}_results.p".format(base_file))): sys.exit() # computation device # if 'gpu' in args.device: # theano.sandbox.cuda.use(args.device) # set save_at to n_epochs if not provided save_at = args.n_epochs if not args.save_at else args.save_at save_to = "{}_weights.p".format(base_file) weights = "{}/{}.p".format(args.xp_dir, args.in_name) if args.in_name else None print(weights) # update config data # plot parameters Cfg.xp_path = args.xp_dir # dataset Cfg.seed = args.seed Cfg.out_frac = args.out_frac Cfg.ad_experiment = bool(args.ad_experiment) Cfg.weight_dict_init = bool(args.weight_dict_init) Cfg.pca = bool(args.pca) Cfg.unit_norm_used = args.unit_norm_used Cfg.gcn = bool(args.gcn) Cfg.zca_whitening = bool(args.zca_whitening) Cfg.mnist_val_frac = args.mnist_val_frac Cfg.mnist_bias = bool(args.mnist_bias) Cfg.mnist_rep_dim = args.mnist_rep_dim Cfg.mnist_architecture = args.mnist_architecture Cfg.mnist_normal = args.mnist_normal Cfg.mnist_outlier = args.mnist_outlier Cfg.cifar10_bias = bool(args.cifar10_bias) Cfg.cifar10_rep_dim = args.cifar10_rep_dim Cfg.cifar10_architecture = args.cifar10_architecture Cfg.cifar10_normal = args.cifar10_normal Cfg.cifar10_outlier = args.cifar10_outlier Cfg.gtsrb_rep_dim = args.gtsrb_rep_dim # Cfg.bdd100k_rep_dim = args.bdd100k_rep_dim # Cfg.bdd100k_architecture = args.bdd100k_architecture # Cfg.bdd100k_val_frac = args.bdd100k_val_frac # Cfg.bdd100k_bias = args.bdd100k_bias # Cfg.bdd100k_n_train = args.bdd100k_n_train # Cfg.bdd100k_n_test = args.bdd100k_n_test # neural network Cfg.softmax_loss = (args.loss == 'ce') Cfg.svdd_loss = (args.loss == 'svdd') Cfg.reconstruction_loss = (args.loss == 'autoencoder') Cfg.use_batch_norm = bool(args.use_batch_norm) Cfg.learning_rate.set_value(args.lr) Cfg.lr_decay = bool(args.lr_decay) Cfg.lr_decay_after_epoch = args.lr_decay_after_epoch Cfg.lr_drop = bool(args.lr_drop) Cfg.lr_drop_in_epoch = args.lr_drop_in_epoch Cfg.lr_drop_factor = args.lr_drop_factor Cfg.momentum.set_value(args.momentum) if args.solver == "rmsprop": Cfg.rho.set_value(0.9) if args.solver == "adadelta": Cfg.rho.set_value(0.95) Cfg.block_coordinate = bool(args.block_coordinate) Cfg.k_update_epochs = args.k_update_epochs Cfg.center_fixed = bool(args.center_fixed) Cfg.R_update_solver = args.R_update_solver Cfg.R_update_scalar_method = args.R_update_scalar_method Cfg.R_update_lp_obj = args.R_update_lp_obj Cfg.warm_up_n_epochs = args.warm_up_n_epochs Cfg.batch_size = args.batch_size Cfg.leaky_relu = bool(args.leaky_relu) # Pre-training and autoencoder configuration Cfg.pretrain = bool(args.pretrain) Cfg.ae_loss = args.ae_loss Cfg.ae_lr_drop = bool(args.ae_lr_drop) Cfg.ae_lr_drop_in_epoch = args.ae_lr_drop_in_epoch Cfg.ae_lr_drop_factor = args.ae_lr_drop_factor Cfg.ae_weight_decay = bool(args.ae_weight_decay) Cfg.ae_C.set_value(args.ae_C) # SVDD parameters Cfg.nu.set_value(args.nu) Cfg.c_mean_init = bool(args.c_mean_init) if args.c_mean_init_n_batches == -1: Cfg.c_mean_init_n_batches = "all" else: Cfg.c_mean_init_n_batches = args.c_mean_init_n_batches Cfg.hard_margin = bool(args.hard_margin) # regularization Cfg.weight_decay = bool(args.weight_decay) Cfg.C.set_value(args.C) Cfg.reconstruction_penalty = bool(args.reconstruction_penalty) Cfg.C_rec.set_value(args.C_rec) Cfg.dropout = bool(args.dropout) Cfg.dropout_architecture = bool(args.dropout_arch) # diagnostics Cfg.nnet_diagnostics = bool(args.nnet_diagnostics) Cfg.e1_diagnostics = bool(args.e1_diagnostics) Cfg.ae_diagnostics = bool(args.ae_diagnostics) # Check for previous copy of configuration and compare, abort if not equal logged_config = args.xp_dir + "/configuration.py" current_config = "./config.py" if os.path.exists(logged_config): print("Comparing logged and current config") # assert(files_equal(logged_config,current_config, "dataset =")) else: copyfile(current_config, logged_config) if not Cfg.only_test: # Run original DSVDD code, both training and testing in one # train # load from checkpoint if available start_new_nnet = False if os.path.exists(args.xp_dir + "/ae_pretrained_weights.p"): print("Pretrained AE found") Cfg.pretrain = False nnet = NeuralNet(dataset=args.dataset, use_weights=args.xp_dir + "/ae_pretrained_weights.p", pretrain=False) elif Cfg.pretrain: if os.path.exists(args.xp_dir + "/ae_checkpoint.p"): print("AE checkpoint found, resuming training") nnet = NeuralNet(dataset=args.dataset, use_weights=args.xp_dir + "/ae_checkpoint.p", pretrain=True) else: start_new_nnet = True elif os.path.exists(args.xp_dir + "/checkpoint.p"): print("DSVDD checkpoint found, resuming training") nnet = NeuralNet(dataset=args.dataset, use_weights=args.xp_dir + "/checkpoint.p", pretrain=False) else: start_new_nnet = True if start_new_nnet: nnet = NeuralNet(dataset=args.dataset, use_weights=weights, pretrain=Cfg.pretrain) # pre-train weights via autoencoder, if specified if Cfg.pretrain: nnet.pretrain(solver="adam", lr=Cfg.pretrain_learning_rate, n_epochs=Cfg.n_pretrain_epochs) nnet.train(solver=args.solver, n_epochs=args.n_epochs, save_at=save_at, save_to=save_to) # pickle/serialize AD results if Cfg.ad_experiment: nnet.log_results(filename=Cfg.xp_path + "/AD_results.p") # text log nnet.log.save_to_file("{}_results.p".format(base_file)) # save log log_exp_config(Cfg.xp_path, args.dataset) log_NeuralNet(Cfg.xp_path, args.loss, args.solver, args.lr, args.momentum, None, args.n_epochs, args.C, args.C_rec, args.nu, args.dataset) if Cfg.ad_experiment: log_AD_results(Cfg.xp_path, nnet) # plot diagnostics if Cfg.nnet_diagnostics: # common suffix for plot titles str_lr = "lr = " + str(args.lr) C = int(args.C) if not Cfg.weight_decay: C = None str_C = "C = " + str(C) Cfg.title_suffix = "(" + args.solver + ", " + str_C + ", " + str_lr + ")" if args.loss == 'autoencoder': plot_ae_diagnostics(nnet, Cfg.xp_path, Cfg.title_suffix) else: plot_diagnostics(nnet, Cfg.xp_path, Cfg.title_suffix) if Cfg.plot_filters: print("Plotting filters") plot_filters(nnet, Cfg.xp_path, Cfg.title_suffix) # If AD experiment, plot most anomalous and most normal if Cfg.ad_experiment and Cfg.plot_most_out_and_norm: n_img = 32 plot_outliers_and_most_normal(nnet, n_img, Cfg.xp_path) else: # Load previous network and run only test # Load parameters from previous training ae_net = NeuralNet(dataset=args.dataset, use_weights=args.xp_dir + "/ae_pretrained_weights.p", pretrain=True) ae_net.ae_solver = args.solver.lower() #ae_net.ae_learning_rate = args.lr #ae_net.ae_n_epochs = args.n_epochs # set learning rate #lr_tmp = Cfg.learning_rate.get_value() #Cfg.learning_rate.set_value(Cfg.floatX(lr)) ae_net.compile_autoencoder() _, recon_errors = ae_performance(ae_net, 'test') print("Computed reconstruction errors") nnet = NeuralNet(dataset=args.dataset, use_weights="{}/weights_best_ep.p".format( args.xp_dir)) nnet.solver = args.solver.lower() nnet.compile_updates() # nnet.evaluate(solver = args.solver) # nnet.test_time = time.time() - nnet.clock # # pickle/serialize AD results # if Cfg.ad_experiment: # nnet.log_results(filename=Cfg.xp_path + "/AD_results.p") # TODO retrieve labels and scores from evaluation _, _, dsvdd_scores = performance(nnet, 'test') labels = nnet.data._y_test # # text log # nnet.log.save_to_file("{}_results.p".format(base_file)) # save log # log_exp_config(Cfg.xp_path, args.dataset) # log_NeuralNet(Cfg.xp_path, args.loss, args.solver, args.lr, args.momentum, None, args.n_epochs, args.C, args.C_rec, # args.nu, args.dataset) # if Cfg.ad_experiment: # log_AD_results(Cfg.xp_path, nnet) # Save scores and labels for comparison with other experiments if Cfg.export_results: for name in ("", "_recon_err"): results_filepath = '/home/exjobb_resultat/data/%s_DSVDD%s.pkl' % ( args.dataset, name) with open(results_filepath, 'wb') as f: if name is "_recon_err": pickle.dump([recon_errors, labels], f) else: pickle.dump([dsvdd_scores, labels], f) print("Saved results to %s" % results_filepath) # Update data source dict with experiment name common_results_dict = pickle.load( open('/home/exjobb_resultat/data/name_dict.pkl', 'rb')) exp_name = args.xp_dir.strip('../log/%s/' % args.dataset) common_results_dict[args.dataset]["DSVDD%s" % name] = exp_name pickle.dump( common_results_dict, open('/home/exjobb_resultat/data/name_dict.pkl', 'wb')) # print test results to console print("\nOutliers from %s" % Cfg.test_out_folder) print("%d inliers, %d outliers" % (Cfg.n_test_in, Cfg.n_test - Cfg.n_test_in)) print("Test results:\n") print("\t\tAUROC\tAUPRC\n") # Compute test metrics before printing auroc_recon = roc_auc_score(labels, recon_errors) auroc_dsvdd = roc_auc_score(labels, dsvdd_scores) pr, rc, _ = precision_recall_curve(labels, recon_errors) auprc_recon = auc(rc, pr) pr, rc, _ = precision_recall_curve(labels, dsvdd_scores) auprc_dsvdd = auc(rc, pr) print("Recon.err:\t%.4f\t%.4f" % (auroc_recon, auprc_recon)) print("DSVDD:\t\t%.4f\t%.4f" % (auroc_dsvdd, auprc_dsvdd))
def main(): args = parser.parse_args() print('Options:') for (key, value) in vars(args).iteritems(): print("{:16}: {}".format(key, value)) assert os.path.exists(args.xp_dir) # default value for basefile: string basis for all exported file names if args.out_name: base_file = "{}/{}".format(args.xp_dir, args.out_name) else: base_file = "{}/{}_{}_{}".format(args.xp_dir, args.dataset, args.solver, args.loss) # if pickle file already there, consider run already done if (os.path.exists("{}_weights.p".format(base_file)) and os.path.exists("{}_results.p".format(base_file))): sys.exit() # computation device if 'gpu' in args.device: theano.sandbox.cuda.use(args.device) # set save_at to n_epochs if not provided save_at = args.n_epochs if not args.save_at else args.save_at save_to = "{}_weights.p".format(base_file) weights = "../log/{}.p".format(args.in_name) if args.in_name else None # update config data # plot parameters Cfg.xp_path = args.xp_dir # dataset Cfg.seed = args.seed Cfg.out_frac = args.out_frac Cfg.ad_experiment = bool(args.ad_experiment) Cfg.weight_dict_init = bool(args.weight_dict_init) Cfg.pca = bool(args.pca) Cfg.unit_norm_used = args.unit_norm_used Cfg.gcn = bool(args.gcn) Cfg.zca_whitening = bool(args.zca_whitening) Cfg.mnist_val_frac = args.mnist_val_frac Cfg.mnist_bias = bool(args.mnist_bias) Cfg.mnist_rep_dim = args.mnist_rep_dim Cfg.mnist_architecture = args.mnist_architecture Cfg.mnist_normal = args.mnist_normal Cfg.mnist_outlier = args.mnist_outlier Cfg.cifar10_bias = bool(args.cifar10_bias) Cfg.cifar10_rep_dim = args.cifar10_rep_dim Cfg.cifar10_architecture = args.cifar10_architecture Cfg.cifar10_normal = args.cifar10_normal Cfg.cifar10_outlier = args.cifar10_outlier Cfg.gtsrb_rep_dim = args.gtsrb_rep_dim # neural network Cfg.softmax_loss = (args.loss == 'ce') Cfg.svdd_loss = (args.loss == 'svdd') Cfg.kde_loss = (args.loss == 'kde_loss') Cfg.deep_GMM_loss = (args.loss == 'deep_GMM') Cfg.reconstruction_loss = (args.loss == 'autoencoder') Cfg.use_batch_norm = bool(args.use_batch_norm) Cfg.learning_rate.set_value(args.lr) Cfg.lr_decay = bool(args.lr_decay) Cfg.lr_decay_after_epoch = args.lr_decay_after_epoch Cfg.lr_drop = bool(args.lr_drop) Cfg.lr_drop_in_epoch = args.lr_drop_in_epoch Cfg.lr_drop_factor = args.lr_drop_factor Cfg.momentum.set_value(args.momentum) if args.solver == "rmsprop": Cfg.rho.set_value(0.9) if args.solver == "adadelta": Cfg.rho.set_value(0.95) Cfg.block_coordinate = bool(args.block_coordinate) Cfg.k_update_epochs = args.k_update_epochs Cfg.center_fixed = bool(args.center_fixed) Cfg.R_update_solver = args.R_update_solver Cfg.R_update_scalar_method = args.R_update_scalar_method Cfg.R_update_lp_obj = args.R_update_lp_obj Cfg.warm_up_n_epochs = args.warm_up_n_epochs Cfg.batch_size = args.batch_size Cfg.leaky_relu = bool(args.leaky_relu) # Pre-training and autoencoder configuration Cfg.pretrain = bool(args.pretrain) Cfg.ae_loss = args.ae_loss Cfg.ae_lr_drop = bool(args.ae_lr_drop) Cfg.ae_lr_drop_in_epoch = args.ae_lr_drop_in_epoch Cfg.ae_lr_drop_factor = args.ae_lr_drop_factor Cfg.ae_weight_decay = bool(args.ae_weight_decay) Cfg.ae_C.set_value(args.ae_C) # SVDD parameters Cfg.nu.set_value(args.nu) Cfg.c_mean_init = bool(args.c_mean_init) if args.c_mean_init_n_batches == -1: Cfg.c_mean_init_n_batches = "all" else: Cfg.c_mean_init_n_batches = args.c_mean_init_n_batches Cfg.hard_margin = bool(args.hard_margin) # regularization Cfg.weight_decay = bool(args.weight_decay) Cfg.C.set_value(args.C) Cfg.reconstruction_penalty = bool(args.reconstruction_penalty) Cfg.C_rec.set_value(args.C_rec) Cfg.dropout = bool(args.dropout) Cfg.dropout_architecture = bool(args.dropout_arch) # diagnostics Cfg.nnet_diagnostics = bool(args.nnet_diagnostics) Cfg.e1_diagnostics = bool(args.e1_diagnostics) Cfg.ae_diagnostics = bool(args.ae_diagnostics) # train nnet = NeuralNet(dataset=args.dataset, use_weights=weights, pretrain=Cfg.pretrain) # pre-train weights via autoencoder, if specified if Cfg.pretrain: nnet.pretrain(solver="adam", lr=0.0001, n_epochs=1) nnet.train(solver=args.solver, n_epochs=args.n_epochs, save_at=save_at, save_to=save_to) # pickle/serialize AD results if Cfg.ad_experiment: nnet.log_results(filename=Cfg.xp_path + "/AD_results.p") # text log nnet.log.save_to_file("{}_results.p".format(base_file)) # save log log_exp_config(Cfg.xp_path, args.dataset) log_NeuralNet(Cfg.xp_path, args.loss, args.solver, args.lr, args.momentum, None, args.n_epochs, args.C, args.C_rec, args.nu) if Cfg.ad_experiment: log_AD_results(Cfg.xp_path, nnet) # plot diagnostics if Cfg.nnet_diagnostics: # common suffix for plot titles str_lr = "lr = " + str(args.lr) C = int(args.C) if not Cfg.weight_decay: C = None str_C = "C = " + str(C) Cfg.title_suffix = "(" + args.solver + ", " + str_C + ", " + str_lr + ")" if args.loss == 'autoencoder': plot_ae_diagnostics(nnet, Cfg.xp_path, Cfg.title_suffix) else: plot_diagnostics(nnet, Cfg.xp_path, Cfg.title_suffix) plot_filters(nnet, Cfg.xp_path, Cfg.title_suffix) # If AD experiment, plot most anomalous and most normal if Cfg.ad_experiment: n_img = 32 plot_outliers_and_most_normal(nnet, n_img, Cfg.xp_path)
def train_network(nnet): if Cfg.reconstruction_loss: nnet.ae_n_epochs = nnet.n_epochs train_autoencoder(nnet) return print("Starting training with %s" % nnet.sgd_solver) # save initial network parameters for diagnostics nnet.save_initial_parameters() if Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # initialize diagnostics for first epoch (detailed diagnostics per batch) print("detailed diagnostics") nnet.initialize_diagnostics(Cfg.n_batches + 1) else: nnet.initialize_diagnostics(nnet.n_epochs) # initialize c from mean of network feature representations in deep SVDD if specified if Cfg.svdd_loss and Cfg.c_mean_init: initialize_c_as_mean(nnet, Cfg.c_mean_init_n_batches) # initialize c using the Kmeans algorithm on network feature representations of deep mSVDD if Cfg.msvdd_loss: initialize_c_kmeans(nnet, Cfg.c_mean_init_n_batches) for epoch in range(nnet.n_epochs): # get copy of current network parameters to track differences between epochs nnet.copy_parameters() # In each epoch, we do a full pass over the training data: start_time = time.time() # learning rate decay if Cfg.lr_decay: decay_learning_rate(nnet, epoch) if Cfg.lr_drop and (epoch == Cfg.lr_drop_in_epoch): # Drop the learning rate in epoch specified in Cfg.lr_drop_after_epoch by factor Cfg.lr_drop_factor # Thus, a simple separation of learning into a "region search" and "finetuning" stage. lr_new = Cfg.floatX( (1.0 / Cfg.lr_drop_factor) * Cfg.learning_rate.get_value()) print("") print( "Learning rate drop in epoch {} from {:.6f} to {:.6f}".format( epoch, Cfg.floatX(Cfg.learning_rate.get_value()), lr_new)) print("") Cfg.learning_rate.set_value(lr_new) # train on epoch i_batch = 0 for batch in nnet.data.get_epoch_train(): if Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # Evaluation before training if (epoch == 0) and (i_batch == 0): _, _, _ = performance(nnet, which_set='train', epoch=i_batch) if nnet.data.n_val > 0: _, _, _ = performance(nnet, which_set='val', epoch=i_batch) _, _, _ = performance(nnet, which_set='test', epoch=i_batch) # train inputs, targets, _ = batch if Cfg.svdd_loss or Cfg.msvdd_loss: if Cfg.block_coordinate: _, _ = nnet.backprop_without_R(inputs, targets) elif Cfg.hard_margin: _, _ = nnet.backprop_ball(inputs, targets) else: _, _ = nnet.backprop(inputs, targets) else: _, _ = nnet.backprop(inputs, targets) if Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # Get detailed diagnostics (per batch) for the first epoch if epoch == 0: _, _, _ = performance(nnet, which_set='train', epoch=i_batch + 1) if nnet.data.n_val > 0: _, _, _ = performance(nnet, which_set='val', epoch=i_batch + 1) _, _, _ = performance(nnet, which_set='test', epoch=i_batch + 1) nnet.copy_parameters() i_batch += 1 if (epoch == 0) & Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # Plot diagnostics for first epoch plot_diagnostics(nnet, Cfg.xp_path, Cfg.title_suffix, xlabel="Batches", file_prefix="e1_") # Re-initialize diagnostics on epoch level nnet.initialize_diagnostics(nnet.n_epochs) nnet.copy_initial_parameters_to_cache() # Performance on training set (use forward pass with deterministic=True) to get the exact training objective train_objective, train_accuracy, repsTrain = performance( nnet, which_set='train', epoch=epoch, print_=True) # Adjust radius R for the SVDD hard-margin objective if Cfg.svdd_loss and (Cfg.hard_margin or (Cfg.block_coordinate and (epoch < Cfg.warm_up_n_epochs))): # set R to be the (1-nu)-th quantile of distances out_idx = int(np.floor(nnet.data.n_train * Cfg.nu.get_value())) sort_idx = nnet.diag['train']['scores'][:, epoch].argsort() R_new = nnet.diag['train']['scores'][ sort_idx, epoch][-out_idx] + nnet.Rvar.get_value() nnet.Rvar.set_value(Cfg.floatX(R_new)) # Adjust radius R for the mSVDD hard-margin objective if Cfg.msvdd_loss and (Cfg.hard_margin or (Cfg.block_coordinate and (epoch < Cfg.warm_up_n_epochs))): # set R to be the (1-nu)-th quantile of distances n_cluster = Cfg.n_cluster scores = nnet.diag['train']['scores'][:, epoch] dists_idx = nnet.diag['train']['dists_idx'][:, epoch] #set Ri to be the (1-nu)th quantile of distances in ci out_idx = np.zeros(n_cluster).astype(int) nu = Cfg.nu.get_value() R_old = nnet.Rvar.get_value() R = np.float32(np.zeros((n_cluster, 1))) cc = np.float32(np.zeros((n_cluster, 1))) for i in range(n_cluster): cc[i] = np.sum(np.equal(dists_idx, i)) #cluster cardinality for i in range(n_cluster): if cc[i] < np.floor(max(cc) * nu): continue out_idx[i] = int(np.floor(cc[i] * nu)) scores_c = scores[np.equal(dists_idx, i)] sort_idx = scores_c.argsort() R[i] = scores_c[sort_idx][-out_idx[i]] + R_old[i] del scores_c del sort_idx nnet.Rvar.set_value(R) # Update radius R and center c if block coordinate optimization is chosen if Cfg.block_coordinate and (epoch >= Cfg.warm_up_n_epochs) and ( (epoch % Cfg.k_update_epochs) == 0): if Cfg.center_fixed: nnet.update_R() else: nnet.update_R_c() if Cfg.nnet_diagnostics: # Performance on validation and test set if nnet.data.n_val > 0: val_objective, val_accuracy, repsVal = performance( nnet, which_set='val', epoch=epoch, print_=True) test_objective, test_accuracy, repsTest = performance( nnet, which_set='test', epoch=epoch, print_=True) # log performance nnet.log['train_objective'].append(train_objective) nnet.log['train_accuracy'].append(train_accuracy) if nnet.data.n_val > 0: nnet.log['val_objective'].append(val_objective) nnet.log['val_accuracy'].append(val_accuracy) nnet.log['test_objective'].append(test_objective) nnet.log['test_accuracy'].append(test_accuracy) nnet.log['time_stamp'].append(time.time() - nnet.clock) print("Epoch {} of {} took {:.3f}s".format(epoch + 1, nnet.n_epochs, time.time() - start_time)) print('') # save model as required if epoch + 1 == nnet.save_at: nnet.dump_weights(nnet.save_to) # save train time nnet.train_time = time.time() - nnet.clock # Get final performance in last epoch if no running diagnostics are taken if not Cfg.nnet_diagnostics: nnet.initialize_diagnostics(1) nnet.copy_parameters() # perform forward passes on train, val, and test set print("Get final performance...") train_objective, train_accuracy, repsTrain = performance( nnet, which_set='train', epoch=0, print_=True) if nnet.data.n_val > 0: val_objective, val_accuracy, repsVal = performance(nnet, which_set='val', epoch=0, print_=True) test_objective, test_accuracy, repsTest = performance(nnet, which_set='test', epoch=0, print_=True) print("Evaluation completed.") # log performance nnet.log['train_objective'].append(train_objective) nnet.log['train_accuracy'].append(train_accuracy) if nnet.data.n_val > 0: nnet.log['val_objective'].append(val_objective) nnet.log['val_accuracy'].append(val_accuracy) nnet.log['test_objective'].append(test_objective) nnet.log['test_accuracy'].append(test_accuracy) nnet.log['time_stamp'].append(time.time() - nnet.clock) nnet.stop_clock() nnet.test_time = time.time() - (nnet.train_time + nnet.clock) # Save data representations and labels np.savetxt(Cfg.xp_path + "/" + 'repsTrain.txt', repsTrain, fmt='%-7.10f', delimiter=',') np.savetxt(Cfg.xp_path + "/" + 'repsVal.txt', repsVal, fmt='%-7.10f', delimiter=',') np.savetxt(Cfg.xp_path + "/" + 'repsTest.txt', repsTest, fmt='%-7.10f', delimiter=',') np.savetxt(Cfg.xp_path + "/" + 'ltest.txt', nnet.data._yo_test, fmt='%-7.10f', delimiter=',') if Cfg.msvdd_loss: svddM = np.append(nnet.Rvar.get_value(), nnet.cvar.get_value(), axis=1) np.savetxt(Cfg.xp_path + "/" + 'svddM.txt', svddM, fmt='%-7.5f', delimiter=',') # save final weights (and best weights in case of two-class dataset) nnet.dump_weights("{}/weights_final.p".format(Cfg.xp_path)) if nnet.data.n_classes == 2: nnet.dump_best_weights("{}/weights_best_ep.p".format(Cfg.xp_path))