def __init__(self, *args, ada_decay=1, **kwargs): """ Initialise as TrainingSetup, but also initialise shared square gradient matrices """ super().__init__(*args, **kwargs) # Decay hyperparameter if not (ada_decay > 0 and ada_decay <= 1): raise ValueError('ada_decay must be in the range (0,1]') self.ada_decay = ada_decay # Squared gradients self.link_sqsum = [shared_zeros_like(m) for m in self.link_weights] self.pred_sqsum = [shared_zeros_like(m) for m in self.pred_weights]
def __init__(self, params, learning_rate=0.001, beta_1=0.99, beta_2=0.999, timestep=1): self.learning_rate = learning_rate self.beta_1 = beta_1 self.beta_2 = beta_2 self.first_moments = collections.OrderedDict([(param, shared_zeros_like(param)) for param in params]) self.second_moments = collections.OrderedDict([(param, shared_ones_like(param)) for param in params]) self.timestep = theano.shared(np.array(timestep).astype(theano.config.floatX))
def __init__(self, *args, mean_decay=0.9, var_decay=0.999, **kwargs): """ Initialise as TrainingSetup, but also initialise shared square gradient matrices """ super().__init__(*args, **kwargs) # Decay hyperparameters if not (mean_decay > 0 and mean_decay < 1): raise ValueError('mean_decay must be in the range (0,1)') self.mean_decay = mean_decay if not (var_decay > 0 and var_decay < 1): raise ValueError('var_decay must be in the range (0,1)') self.var_decay = var_decay # Moving averages of gradient and squared gradient self.link_mean = [shared_zeros_like(m) for m in self.link_weights] self.pred_mean = [shared_zeros_like(m) for m in self.pred_weights] self.link_var = [shared_zeros_like(m) for m in self.link_weights] self.pred_var = [shared_zeros_like(m) for m in self.pred_weights]