def decay_learning_rate(epoch): """ decay the learning rate after epoch specified in Cfg.lr_decay_after_epoch """ # only allow decay for non-adaptive solvers assert Cfg.nn_solver in ("sgd", "momentum", "adam") if epoch >= Cfg.lr_decay_after_epoch: lr_new = (Cfg.lr_decay_after_epoch / Cfg.floatX(epoch)) * Cfg.learning_rate_init return Cfg.floatX(lr_new) else: return Cfg.floatX(Cfg.learning_rate_init)
def adjust_learning_rate_finetune(epoch): if Cfg.lr_drop and (epoch == Cfg.lr_drop_in_epoch): # Drop the learning rate in epoch specified in Cfg.lr_drop_after_epoch by factor Cfg.lr_drop_factor # Thus, a simple separation of learning into a "region search" and "finetuning" stage. lr_new = Cfg.floatX((1.0 / Cfg.lr_drop_factor) * Cfg.learning_rate) print("") print( "Learning rate drop in epoch {} from {:.6f} to {:.6f}".format( epoch, Cfg.floatX(Cfg.learning_rate), lr_new)) print("") Cfg.learning_rate = lr_new return lr_new
def initialize_c_as_mean(self, inputs, n_batches, eps=0.1): """ initialize c as the mean of the final layer representations from all samples propagated in n_batches """ reps = self.get_OneClass_SVDD_network_reps(inputs) self.reps = reps print("[INFO:] Initializing c and Radius R value...") # consider the value all the number of batches (and thereby samples) to initialize from c = np.mean(reps, axis=0) # If c_i is too close to 0 in dimension i, set to +-eps. # Reason: a zero unit can be trivially matched with zero weights. c[(abs(c) < eps) & (c < 0)] = -eps c[(abs(c) < eps) & (c > 0)] = eps self.cvar = c # Initialize the center # initialize R at the (1-nu)-th quantile of distances dist_init = np.sum((reps - c)**2, axis=1) out_idx = int(np.floor(len(reps) * Cfg.nu)) sort_idx = dist_init.argsort() self.Rvar = Cfg.floatX(dist_init[sort_idx][-out_idx])