def separate_weights(layer): assert layer.issvm or layer.isdense W_pos = Cfg.floatX(0.5) * (abs(layer.W) + layer.W) W_neg = Cfg.floatX(0.5) * (abs(layer.W) - layer.W) updates = ((layer.W_pos, W_pos), (layer.W_neg, W_neg)) return theano.function([], updates=updates)
def separate_gamma(layer): assert layer.isbatchnorm gamma_pos = Cfg.floatX(0.5) * (abs(layer.gamma) + layer.gamma) gamma_neg = Cfg.floatX(0.5) * (abs(layer.gamma) - layer.gamma) updates = ((layer.gamma_pos, gamma_pos), (layer.gamma_neg, gamma_neg)) return theano.function([], updates=updates)
def decay_learning_rate(nnet, epoch): """ decay the learning rate after epoch specified in Cfg.lr_decay_after_epoch """ # only allow decay for non-adaptive solvers assert nnet.solver in ("sgd", "momentum", "adam") if epoch >= Cfg.lr_decay_after_epoch: lr_new = (Cfg.lr_decay_after_epoch / Cfg.floatX(epoch)) * nnet.learning_rate_init Cfg.learning_rate.set_value(Cfg.floatX(lr_new)) else: return
def save_network_diagnostics(self, epoch, l2, R): """ save diagnostics of the network """ self.diag['network']['l2_penalty'][epoch] = l2 self.log['l2_penalty'].append(Cfg.floatX(l2)) i = 0 j = 0 for layer in self.trainable_layers: if layer.isdense: self.diag['network']['W_norms'][i][:, epoch] = np.sum(layer.W.get_value() ** 2, axis=0) if layer.b is not None: self.diag['network']['b_norms'][i][:, epoch] = layer.b.get_value() ** 2 i += 1 if layer.isdense | layer.isconv: dW = np.sqrt(np.sum((layer.W.get_value() - self.diag['network']['W_copy'][j]) ** 2)) self.diag['network']['dW_norms'][j][epoch] = dW if layer.b is not None: db = np.sqrt(np.sum((layer.b.get_value() - self.diag['network']['b_copy'][j]) ** 2)) self.diag['network']['db_norms'][j][epoch] = db j += 1 # diagnostics only relevant for the SVDD loss if Cfg.svdd_loss: self.diag['network']['R'][epoch] = R self.diag['network']['c_norm'][epoch] = np.sqrt(np.sum(self.cvar.get_value() ** 2))
def update_R_c(self): """ method to update R and c while leaving the network parameters fixed in a block coordinate optimization """ print("Updating radius R and center c...") # Get updates R, c = update_R_c(self.diag['train']['rep'], np.sum(self.diag['train']['rep'] ** 2, axis=1), solver=Cfg.QP_solver) # Update values self.Rvar.set_value(Cfg.floatX(R)) self.cvar.set_value(Cfg.floatX(c)) print("Radius R and center c updated.")
def reinitialize_primal_variables(layer): gpu_vars = (layer.l, layer.k) cpu_vars = () heavy_vars = (layer.W_i, layer.b_i, layer.l_i) if Cfg.store_on_gpu: gpu_vars += heavy_vars else: cpu_vars += heavy_vars zero = Cfg.floatX(0) gpu_updates = [] for var in gpu_vars: gpu_updates.append((var, var.fill(zero))) gpu_fun = theano.function([], updates=gpu_updates) def update_all_fun(): gpu_fun() for var in cpu_vars: var.fill(zero) return update_all_fun
def forward_prop(self, X_ccv, X_cvx, **kwargs): Z_ccv = Cfg.floatX(self.pool_size[0] * self.pool_size[1]) * \ pool_2d(X_ccv, mode='average_exc_pad', **self.pool_opts) Z_cvx = my_pool_2d(X_cvx - X_ccv, mode='max', **self.pool_opts) + Z_ccv return Z_ccv, Z_cvx
def warm_regularization(layer): C, D = Cfg.C, Cfg.D r = C / (C + D) one_by_2K = Cfg.floatX(0.5) * (C + D) / (C * D) warm_reg = one_by_2K * (T.sum((layer.W - r * layer.W_0)**2) + T.sum( (layer.b - r * layer.b_0)**2)) return theano.function([], warm_reg)
def pretrain(self, solver, lr, n_epochs): """ pre-train weights with an autoencoder """ self.ae_solver = solver.lower() self.ae_learning_rate = lr self.ae_n_epochs = n_epochs # set learning rate lr_tmp = Cfg.learning_rate.get_value() Cfg.learning_rate.set_value(Cfg.floatX(lr)) self.compile_autoencoder() from opt.sgd.train import train_autoencoder train_autoencoder(self) # remove layer attributes, re-initialize network and reset learning rate for layer in self.all_layers: delattr(self, layer.name + "_layer") self.initialize_variables(self.data.dataset_name) Cfg.learning_rate.set_value(Cfg.floatX(lr_tmp)) self.pretrained = True # set to True that dictionary initialization mustn't be repeated # load network architecture if Cfg.svdd_loss and Cfg.reconstruction_penalty: self.data.build_autoencoder(self) for layer in self.all_layers: setattr(self, layer.name + "_layer", layer) self.log.store_architecture(self) else: self.data.build_architecture(self) for layer in self.all_layers: setattr(self, layer.name + "_layer", layer) self.log.store_architecture(self) # load weights learned by autoencoder self.load_weights(Cfg.xp_path + "/ae_pretrained_weights.p")
def compile_warm_reg_avg(layer): C, D = Cfg.C, Cfg.D r = C / (C + D) one_by_2K = Cfg.floatX(0.5) * (C + D) / (C * D) warm_reg = one_by_2K * (T.sum((layer.W_avg - r * layer.W_0)**2) + T.sum( (layer.b_avg - r * layer.b_0)**2)) return theano.function([], warm_reg)
def get_dual(layer): C, D = Cfg.C, Cfg.D r = C / (C + D) K = C * D / (C + D) dual = Cfg.floatX(-0.5) / K * T.sum((layer.W - r * layer.W_0)**2) + \ layer.l + r / K * T.sum(layer.W_0 * (r * layer.W_0 - layer.W)) return theano.function([], dual)
def save_diagnostics(self, which_set, epoch, scores, rep_norm, rep, emp_loss, reconstruction_penalty, dists = [], dists_idx = []): """ save diagnostics for which_set of epoch """ if self.data.n_classes == 2: if which_set == 'train': y = self.data._y_train if which_set == 'val': y = self.data._y_val if which_set == 'test': y = self.data._y_test self.diag[which_set]['scores'][:, epoch] = scores if len(dists): self.diag[which_set]['dists'][:, epoch] = dists self.diag[which_set]['dists_idx'][:, epoch] = dists_idx if sum(y) > 0: AUC = roc_auc_score(y, scores) self.diag[which_set]['auc'][epoch] = AUC self.log[which_set + '_auc'].append(float(AUC)) print("{:32} {:.2f}%".format(which_set.title() + ' AUC:', 100. * AUC)) scores_normal = scores[y == 0] scores_outlier = scores[y == 1] normal_summary = get_five_number_summary(scores_normal) outlier_summary = get_five_number_summary(scores_outlier) self.log[which_set + '_normal_scores_summary'].append(normal_summary) self.log[which_set + '_outlier_scores_summary'].append(outlier_summary) self.diag[which_set]['rep'] = rep self.diag[which_set]['rep_norm'][:, epoch] = rep_norm rep_norm_normal = rep_norm[y == 0] rep_norm_outlier = rep_norm[y == 1] normal_summary = get_five_number_summary(rep_norm_normal) outlier_summary = get_five_number_summary(rep_norm_outlier) self.log[which_set + '_normal_rep_norm_summary'].append(normal_summary) self.log[which_set + '_outlier_rep_norm_summary'].append(outlier_summary) if Cfg.svdd_loss: rep_mean = np.mean(rep, axis=0) self.diag[which_set]['output_mean_norm'][epoch] = np.sqrt(np.sum(rep_mean ** 2)) self.diag[which_set]['c_mean_diff'][epoch] = np.sqrt(np.sum((rep_mean - self.cvar.get_value()) **2)) self.diag[which_set]['reconstruction_penalty'][epoch] = reconstruction_penalty self.diag[which_set]['emp_loss'][epoch] = float(emp_loss) self.log[which_set + '_emp_loss'].append(Cfg.floatX(emp_loss))
def mirror_activations(input, input_fixed, pool_size): out_fixed = my_pool_2d(input_fixed, ds=pool_size, ignore_border=True) mask = T.grad(cost=None, wrt=input_fixed, known_grads={out_fixed: T.ones_like(out_fixed)}) masked_input = input * mask out = Cfg.floatX(pool_size[0] * pool_size[1]) * \ pool_2d(masked_input, mode='average_exc_pad', ds=pool_size, ignore_border=True) return out, out_fixed
def update_R(self): """ method to update R while leaving the network parameters and center c fixed in a block coordinate optimization """ print("Updating radius R...") # Get updates R = update_R(self.diag['train']['rep'], self.cvar.get_value(), solver=Cfg.R_update_solver, scalar_method=Cfg.R_update_scalar_method, lp_obj=Cfg.R_update_lp_obj) # Update R self.Rvar.set_value(Cfg.floatX(R)) print("Radius R updated.")
def initialize_c_as_mean(nnet, n_batches, eps=0.1): """ initialize c as the mean of the final layer representations from all samples propagated in n_batches """ print("Initializing c...") # number of batches (and thereby samples) to initialize from if isinstance(n_batches, basestring) and n_batches == "all": n_batches = Cfg.n_batches elif n_batches > Cfg.n_batches: n_batches = Cfg.n_batches else: pass rep_list = list() i_batch = 0 for batch in nnet.data.get_epoch_train(): inputs, targets, _ = batch if i_batch == n_batches: break _, _, _, _, _, b_rep, _, _, _, _ = nnet.forward(inputs, targets) rep_list.append(b_rep) i_batch += 1 reps = np.concatenate(rep_list, axis=0) c = np.mean(reps, axis=0) # If c_i is too close to 0 in dimension i, set to +-eps. # Reason: a zero unit can be trivially matched with zero weights. c[(abs(c) < eps) & (c < 0)] = -eps c[(abs(c) < eps) & (c > 0)] = eps nnet.cvar.set_value(c) # initialize R at the (1-nu)-th quantile of distances dist_init = np.sum((reps - c)**2, axis=1) out_idx = int(np.floor(len(reps) * Cfg.nu.get_value())) sort_idx = dist_init.argsort() nnet.Rvar.set_value(Cfg.floatX(dist_init[sort_idx][-out_idx])) print("c initialized.")
def train_autoencoder(nnet): if Cfg.ae_diagnostics: nnet.initialize_ae_diagnostics(nnet.ae_n_epochs) print("Starting training autoencoder with %s" % nnet.sgd_solver) for epoch in range(nnet.ae_n_epochs): start_time = time.time() if Cfg.ae_lr_drop and (epoch == Cfg.ae_lr_drop_in_epoch): # Drop the learning rate in epoch specified in Cfg.ae_lr_drop_after_epoch by factor Cfg.ae_lr_drop_factor # Thus, a simple separation of learning into a "region search" and "finetuning" stage. lr_new = Cfg.floatX( (1.0 / Cfg.ae_lr_drop_factor) * Cfg.learning_rate.get_value()) print("") print( "Learning rate drop in epoch {} from {:.6f} to {:.6f}".format( epoch, Cfg.floatX(Cfg.learning_rate.get_value()), lr_new)) print("") Cfg.learning_rate.set_value(lr_new) # In each epoch, we do a full pass over the training data: l2 = 0 batches = 0 train_err = 0 train_scores = np.empty(nnet.data.n_train) for batch in nnet.data.get_epoch_train(): inputs, _, batch_idx = batch start_idx = batch_idx * Cfg.batch_size stop_idx = min(nnet.data.n_train, start_idx + Cfg.batch_size) err, l2, b_scores = nnet.ae_backprop(inputs) train_err += err * inputs.shape[0] train_scores[start_idx:stop_idx] = b_scores.flatten() batches += 1 train_err /= nnet.data.n_train # save train diagnostics and test performance on val and test data if specified if Cfg.ae_diagnostics: nnet.save_ae_diagnostics('train', epoch, train_err, train_scores, l2) # Performance on validation and test set if nnet.data.n_val > 0: val_err = ae_performance(nnet, which_set='val', epoch=epoch) test_err = ae_performance(nnet, which_set='test', epoch=epoch) # print results for epoch print("{:32} {:.5f}".format("Train error:", train_err)) if Cfg.ae_diagnostics: if nnet.data.n_val > 0: print("{:32} {:.5f}".format("Val error:", val_err)) print("{:32} {:.5f}".format("Test error:", test_err)) print("Epoch {} of {} took {:.3f}s".format(epoch + 1, nnet.ae_n_epochs, time.time() - start_time)) print("") # Get final performance in last epoch if no running diagnostics are taken if not Cfg.ae_diagnostics: nnet.initialize_ae_diagnostics(1) # perform forward passes on train, val, and test set print("Get final performance...") _ = ae_performance(nnet, which_set='train', epoch=0) if nnet.data.n_val > 0: _ = ae_performance(nnet, which_set='val', epoch=0) _ = ae_performance(nnet, which_set='test', epoch=0) print("Evaluation completed.") # save weights if Cfg.pretrain: nnet.dump_weights("{}/ae_pretrained_weights.p".format(Cfg.xp_path), pretrain=True) else: nnet.dump_weights("{}/weights_final.p".format(Cfg.xp_path)) # if image data plot some random reconstructions if nnet.data._X_train.ndim == 4: from utils.visualization.mosaic_plot import plot_mosaic n_img = 32 random_idx = np.random.choice(nnet.data.n_train, n_img, replace=False) _, _, _, reps = nnet.ae_forward(nnet.data._X_train[random_idx, ...]) title = str(n_img) + " random autoencoder reconstructions" plot_mosaic(reps, title=title, export_pdf=(Cfg.xp_path + "/ae_reconstructions")) # plot diagnostics if specified if Cfg.ae_diagnostics & Cfg.pretrain: from utils.visualization.diagnostics_plot import plot_ae_diagnostics from utils.visualization.filters_plot import plot_filters # common suffix for plot titles str_lr = "lr = " + str(nnet.ae_learning_rate) C = int(Cfg.C.get_value()) if not Cfg.weight_decay: C = None str_C = "C = " + str(C) title_suffix = "(" + nnet.ae_solver + ", " + str_C + ", " + str_lr + ")" # plot diagnostics plot_ae_diagnostics(nnet, Cfg.xp_path, title_suffix) # plot filters plot_filters(nnet, Cfg.xp_path, title_suffix, file_prefix="ae_", pretrain=True)
def train_network(nnet): if Cfg.reconstruction_loss: nnet.ae_n_epochs = nnet.n_epochs train_autoencoder(nnet) return print("Starting training with %s" % nnet.sgd_solver) # save initial network parameters for diagnostics nnet.save_initial_parameters() if Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # initialize diagnostics for first epoch (detailed diagnostics per batch) nnet.initialize_diagnostics(Cfg.n_batches + 1) else: nnet.initialize_diagnostics(nnet.n_epochs) # initialize c from mean of network feature representations in deep SVDD if specified if Cfg.svdd_loss and Cfg.c_mean_init: initialize_c_as_mean(nnet, Cfg.c_mean_init_n_batches) for epoch in range(nnet.n_epochs): # get copy of current network parameters to track differences between epochs nnet.copy_parameters() # In each epoch, we do a full pass over the training data: start_time = time.time() # learning rate decay if Cfg.lr_decay: decay_learning_rate(nnet, epoch) if Cfg.lr_drop and (epoch == Cfg.lr_drop_in_epoch): # Drop the learning rate in epoch specified in Cfg.lr_drop_after_epoch by factor Cfg.lr_drop_factor # Thus, a simple separation of learning into a "region search" and "finetuning" stage. lr_new = Cfg.floatX( (1.0 / Cfg.lr_drop_factor) * Cfg.learning_rate.get_value()) print("") print( "Learning rate drop in epoch {} from {:.6f} to {:.6f}".format( epoch, Cfg.floatX(Cfg.learning_rate.get_value()), lr_new)) print("") Cfg.learning_rate.set_value(lr_new) # train on epoch i_batch = 0 for batch in nnet.data.get_epoch_train(): if Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # Evaluation before training if (epoch == 0) and (i_batch == 0): _, _ = performance(nnet, which_set='train', epoch=i_batch) if nnet.data.n_val > 0: _, _ = performance(nnet, which_set='val', epoch=i_batch) _, _ = performance(nnet, which_set='test', epoch=i_batch) # train inputs, targets, _ = batch if Cfg.svdd_loss: if Cfg.block_coordinate: _, _ = nnet.backprop_without_R(inputs, targets) elif Cfg.hard_margin: _, _ = nnet.backprop_ball(inputs, targets) else: _, _ = nnet.backprop(inputs, targets) else: _, _ = nnet.backprop(inputs, targets) if Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # Get detailed diagnostics (per batch) for the first epoch if epoch == 0: _, _ = performance(nnet, which_set='train', epoch=i_batch + 1) if nnet.data.n_val > 0: _, _ = performance(nnet, which_set='val', epoch=i_batch + 1) _, _ = performance(nnet, which_set='test', epoch=i_batch + 1) nnet.copy_parameters() i_batch += 1 if (epoch == 0) & Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # Plot diagnostics for first epoch plot_diagnostics(nnet, Cfg.xp_path, Cfg.title_suffix, xlabel="Batches", file_prefix="e1_") # Re-initialize diagnostics on epoch level nnet.initialize_diagnostics(nnet.n_epochs) nnet.copy_initial_parameters_to_cache() # Performance on training set (use forward pass with deterministic=True) to get the exact training objective train_objective, train_accuracy = performance(nnet, which_set='train', epoch=epoch, print_=True) # Adjust radius R for the SVDD hard-margin objective if Cfg.svdd_loss and (Cfg.hard_margin or (Cfg.block_coordinate and (epoch < Cfg.warm_up_n_epochs))): # set R to be the (1-nu)-th quantile of distances out_idx = int(np.floor(nnet.data.n_train * Cfg.nu.get_value())) sort_idx = nnet.diag['train']['scores'][:, epoch].argsort() R_new = nnet.diag['train']['scores'][ sort_idx, epoch][-out_idx] + nnet.Rvar.get_value() nnet.Rvar.set_value(Cfg.floatX(R_new)) # Update radius R and center c if block coordinate optimization is chosen if Cfg.block_coordinate and (epoch >= Cfg.warm_up_n_epochs) and ( (epoch % Cfg.k_update_epochs) == 0): if Cfg.center_fixed: nnet.update_R() else: nnet.update_R_c() if Cfg.nnet_diagnostics: # Performance on validation and test set if nnet.data.n_val > 0: val_objective, val_accuracy = performance(nnet, which_set='val', epoch=epoch, print_=True) test_objective, test_accuracy = performance(nnet, which_set='test', epoch=epoch, print_=True) # log performance nnet.log['train_objective'].append(train_objective) nnet.log['train_accuracy'].append(train_accuracy) if nnet.data.n_val > 0: nnet.log['val_objective'].append(val_objective) nnet.log['val_accuracy'].append(val_accuracy) nnet.log['test_objective'].append(test_objective) nnet.log['test_accuracy'].append(test_accuracy) nnet.log['time_stamp'].append(time.time() - nnet.clock) print("Epoch {} of {} took {:.3f}s".format(epoch + 1, nnet.n_epochs, time.time() - start_time)) print('') # save model as required if epoch + 1 == nnet.save_at: nnet.dump_weights(nnet.save_to) # save train time nnet.train_time = time.time() - nnet.clock # test for adversarial model # print 'Start adversarial model' # floatX = Cfg.floatX # nnet.cvar1 = shared(floatX(nnet.cvar.eval())) # nnet.Rvar1 = shared(floatX(nnet.Rvar.eval())) # # distants = T.sum((nnet.feature_layer - nnet.cvar1) ** 2) # logits = distants - nnet.Rvar1 # nnet.logits_layer = logits # for batch in nnet.data.get_epoch('test'): # inputs, targets, batch_idx = batch # err, acc, b_scores, l2, b_rec, b_rep, b_rep_norm, _, b_loss, R = nnet.forward(inputs, targets) # ad(nnet, inputs, targets) # print 'End adversaril model' # Get final performance in last epoch if no running diagnostics are taken if not Cfg.nnet_diagnostics: nnet.initialize_diagnostics(1) nnet.copy_parameters() # perform forward passes on train, val, and test set print("Get final performance...") train_objective, train_accuracy = performance(nnet, which_set='train', epoch=0, print_=True) if nnet.data.n_val > 0: val_objective, val_accuracy = performance(nnet, which_set='val', epoch=0, print_=True) test_objective, test_accuracy = performance(nnet, which_set='test', epoch=0, print_=True) print("Evaluation completed.") # log performance nnet.log['train_objective'].append(train_objective) nnet.log['train_accuracy'].append(train_accuracy) if nnet.data.n_val > 0: nnet.log['val_objective'].append(val_objective) nnet.log['val_accuracy'].append(val_accuracy) nnet.log['test_objective'].append(test_objective) nnet.log['test_accuracy'].append(test_accuracy) nnet.log['time_stamp'].append(time.time() - nnet.clock) nnet.stop_clock() nnet.test_time = time.time() - (nnet.train_time + nnet.clock) # save final weights (and best weights in case of two-class dataset) nnet.dump_weights("{}/weights_final.p".format(Cfg.xp_path)) if nnet.data.n_classes == 2: nnet.dump_best_weights("{}/weights_best_ep.p".format(Cfg.xp_path))
def train_network(nnet): if nnet.data.dataset_name == "imagenet": train_network_imagenet(nnet) return print("Starting training with %s" % nnet.sgd_solver) for epoch in range(nnet.n_epochs): if nnet.solver == 'nesterov' and (epoch + 1) % 10 == 0: lr = Cfg.floatX(Cfg.learning_rate.get_value() / 10.) Cfg.learning_rate.set_value(lr) # In each epoch, we do a full pass over the training data: train_objective = 0 train_accuracy = 0 train_batches = 0 start_time = time.time() # train on epoch for batch in nnet.data.get_epoch_train(): inputs, targets, _ = batch err, acc = nnet.backprop(inputs, targets) train_objective += err train_accuracy += acc train_batches += 1 # normalize results train_objective *= 1. / train_batches train_accuracy *= 100. / train_batches # print performance print_obj_and_acc(train_objective, train_accuracy, which_set='train') val_objective, val_accuracy = performance(nnet, which_set='val', print_=True) print("Epoch {} of {} took {:.3f}s".format(epoch + 1, nnet.n_epochs, time.time() - start_time)) # log performance nnet.log['train_objective'].append(train_objective) nnet.log['train_accuracy'].append(train_accuracy) nnet.log['val_objective'].append(val_objective) nnet.log['val_accuracy'].append(val_accuracy) nnet.log['time_stamp'].append(time.time() - nnet.clock) # send data to Tensorboard if Cfg.draw_on_board: data_to_send = { 'Objective': train_objective, 'Accuracy': train_accuracy } nnet.board_monitor_train.add_scalar_dict(data_to_send) data_to_send = { 'Objective': val_objective, 'Accuracy': val_accuracy } nnet.board_monitor_val.add_scalar_dict(data_to_send) # save model as required if epoch + 1 == nnet.save_at: nnet.dump_weights(nnet.save_to) test_objective, test_accuracy = performance(nnet, which_set='test', print_=True) # log final performance nnet.log['test_objective'] = test_objective nnet.log['test_accuracy'] = test_accuracy nnet.test_time = time.time() - nnet.clock
def train_network(nnet): if Cfg.reconstruction_loss: nnet.ae_n_epochs = nnet.n_epochs train_autoencoder(nnet) return print("Starting training with %s" % nnet.sgd_solver) # save initial network parameters for diagnostics nnet.save_initial_parameters() if Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # initialize diagnostics for first epoch (detailed diagnostics per batch) print("detailed diagnostics") nnet.initialize_diagnostics(Cfg.n_batches + 1) else: nnet.initialize_diagnostics(nnet.n_epochs) # initialize c from mean of network feature representations in deep SVDD if specified if Cfg.svdd_loss and Cfg.c_mean_init: initialize_c_as_mean(nnet, Cfg.c_mean_init_n_batches) # initialize c using the Kmeans algorithm on network feature representations of deep mSVDD if Cfg.msvdd_loss: initialize_c_kmeans(nnet, Cfg.c_mean_init_n_batches) for epoch in range(nnet.n_epochs): # get copy of current network parameters to track differences between epochs nnet.copy_parameters() # In each epoch, we do a full pass over the training data: start_time = time.time() # learning rate decay if Cfg.lr_decay: decay_learning_rate(nnet, epoch) if Cfg.lr_drop and (epoch == Cfg.lr_drop_in_epoch): # Drop the learning rate in epoch specified in Cfg.lr_drop_after_epoch by factor Cfg.lr_drop_factor # Thus, a simple separation of learning into a "region search" and "finetuning" stage. lr_new = Cfg.floatX( (1.0 / Cfg.lr_drop_factor) * Cfg.learning_rate.get_value()) print("") print( "Learning rate drop in epoch {} from {:.6f} to {:.6f}".format( epoch, Cfg.floatX(Cfg.learning_rate.get_value()), lr_new)) print("") Cfg.learning_rate.set_value(lr_new) # train on epoch i_batch = 0 for batch in nnet.data.get_epoch_train(): if Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # Evaluation before training if (epoch == 0) and (i_batch == 0): _, _, _ = performance(nnet, which_set='train', epoch=i_batch) if nnet.data.n_val > 0: _, _, _ = performance(nnet, which_set='val', epoch=i_batch) _, _, _ = performance(nnet, which_set='test', epoch=i_batch) # train inputs, targets, _ = batch if Cfg.svdd_loss or Cfg.msvdd_loss: if Cfg.block_coordinate: _, _ = nnet.backprop_without_R(inputs, targets) elif Cfg.hard_margin: _, _ = nnet.backprop_ball(inputs, targets) else: _, _ = nnet.backprop(inputs, targets) else: _, _ = nnet.backprop(inputs, targets) if Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # Get detailed diagnostics (per batch) for the first epoch if epoch == 0: _, _, _ = performance(nnet, which_set='train', epoch=i_batch + 1) if nnet.data.n_val > 0: _, _, _ = performance(nnet, which_set='val', epoch=i_batch + 1) _, _, _ = performance(nnet, which_set='test', epoch=i_batch + 1) nnet.copy_parameters() i_batch += 1 if (epoch == 0) & Cfg.nnet_diagnostics & Cfg.e1_diagnostics: # Plot diagnostics for first epoch plot_diagnostics(nnet, Cfg.xp_path, Cfg.title_suffix, xlabel="Batches", file_prefix="e1_") # Re-initialize diagnostics on epoch level nnet.initialize_diagnostics(nnet.n_epochs) nnet.copy_initial_parameters_to_cache() # Performance on training set (use forward pass with deterministic=True) to get the exact training objective train_objective, train_accuracy, repsTrain = performance( nnet, which_set='train', epoch=epoch, print_=True) # Adjust radius R for the SVDD hard-margin objective if Cfg.svdd_loss and (Cfg.hard_margin or (Cfg.block_coordinate and (epoch < Cfg.warm_up_n_epochs))): # set R to be the (1-nu)-th quantile of distances out_idx = int(np.floor(nnet.data.n_train * Cfg.nu.get_value())) sort_idx = nnet.diag['train']['scores'][:, epoch].argsort() R_new = nnet.diag['train']['scores'][ sort_idx, epoch][-out_idx] + nnet.Rvar.get_value() nnet.Rvar.set_value(Cfg.floatX(R_new)) # Adjust radius R for the mSVDD hard-margin objective if Cfg.msvdd_loss and (Cfg.hard_margin or (Cfg.block_coordinate and (epoch < Cfg.warm_up_n_epochs))): # set R to be the (1-nu)-th quantile of distances n_cluster = Cfg.n_cluster scores = nnet.diag['train']['scores'][:, epoch] dists_idx = nnet.diag['train']['dists_idx'][:, epoch] #set Ri to be the (1-nu)th quantile of distances in ci out_idx = np.zeros(n_cluster).astype(int) nu = Cfg.nu.get_value() R_old = nnet.Rvar.get_value() R = np.float32(np.zeros((n_cluster, 1))) cc = np.float32(np.zeros((n_cluster, 1))) for i in range(n_cluster): cc[i] = np.sum(np.equal(dists_idx, i)) #cluster cardinality for i in range(n_cluster): if cc[i] < np.floor(max(cc) * nu): continue out_idx[i] = int(np.floor(cc[i] * nu)) scores_c = scores[np.equal(dists_idx, i)] sort_idx = scores_c.argsort() R[i] = scores_c[sort_idx][-out_idx[i]] + R_old[i] del scores_c del sort_idx nnet.Rvar.set_value(R) # Update radius R and center c if block coordinate optimization is chosen if Cfg.block_coordinate and (epoch >= Cfg.warm_up_n_epochs) and ( (epoch % Cfg.k_update_epochs) == 0): if Cfg.center_fixed: nnet.update_R() else: nnet.update_R_c() if Cfg.nnet_diagnostics: # Performance on validation and test set if nnet.data.n_val > 0: val_objective, val_accuracy, repsVal = performance( nnet, which_set='val', epoch=epoch, print_=True) test_objective, test_accuracy, repsTest = performance( nnet, which_set='test', epoch=epoch, print_=True) # log performance nnet.log['train_objective'].append(train_objective) nnet.log['train_accuracy'].append(train_accuracy) if nnet.data.n_val > 0: nnet.log['val_objective'].append(val_objective) nnet.log['val_accuracy'].append(val_accuracy) nnet.log['test_objective'].append(test_objective) nnet.log['test_accuracy'].append(test_accuracy) nnet.log['time_stamp'].append(time.time() - nnet.clock) print("Epoch {} of {} took {:.3f}s".format(epoch + 1, nnet.n_epochs, time.time() - start_time)) print('') # save model as required if epoch + 1 == nnet.save_at: nnet.dump_weights(nnet.save_to) # save train time nnet.train_time = time.time() - nnet.clock # Get final performance in last epoch if no running diagnostics are taken if not Cfg.nnet_diagnostics: nnet.initialize_diagnostics(1) nnet.copy_parameters() # perform forward passes on train, val, and test set print("Get final performance...") train_objective, train_accuracy, repsTrain = performance( nnet, which_set='train', epoch=0, print_=True) if nnet.data.n_val > 0: val_objective, val_accuracy, repsVal = performance(nnet, which_set='val', epoch=0, print_=True) test_objective, test_accuracy, repsTest = performance(nnet, which_set='test', epoch=0, print_=True) print("Evaluation completed.") # log performance nnet.log['train_objective'].append(train_objective) nnet.log['train_accuracy'].append(train_accuracy) if nnet.data.n_val > 0: nnet.log['val_objective'].append(val_objective) nnet.log['val_accuracy'].append(val_accuracy) nnet.log['test_objective'].append(test_objective) nnet.log['test_accuracy'].append(test_accuracy) nnet.log['time_stamp'].append(time.time() - nnet.clock) nnet.stop_clock() nnet.test_time = time.time() - (nnet.train_time + nnet.clock) # Save data representations and labels np.savetxt(Cfg.xp_path + "/" + 'repsTrain.txt', repsTrain, fmt='%-7.10f', delimiter=',') np.savetxt(Cfg.xp_path + "/" + 'repsVal.txt', repsVal, fmt='%-7.10f', delimiter=',') np.savetxt(Cfg.xp_path + "/" + 'repsTest.txt', repsTest, fmt='%-7.10f', delimiter=',') np.savetxt(Cfg.xp_path + "/" + 'ltest.txt', nnet.data._yo_test, fmt='%-7.10f', delimiter=',') if Cfg.msvdd_loss: svddM = np.append(nnet.Rvar.get_value(), nnet.cvar.get_value(), axis=1) np.savetxt(Cfg.xp_path + "/" + 'svddM.txt', svddM, fmt='%-7.5f', delimiter=',') # save final weights (and best weights in case of two-class dataset) nnet.dump_weights("{}/weights_final.p".format(Cfg.xp_path)) if nnet.data.n_classes == 2: nnet.dump_best_weights("{}/weights_best_ep.p".format(Cfg.xp_path))