def init_variables(self): super(LeakStepAdaptation, self).init_variables() n_parameters = count_parameters(self.connection) self.variables.leak_average = theano.shared( name='leak-step-adapt/leak-average', value=asfloat(np.zeros(n_parameters)), )
def init_train_updates(self): step = self.variables.step previous_delta = self.variables.prev_delta previous_gradient = self.variables.prev_gradient n_parameters = count_parameters(self.connection) parameters = parameter_values(self.connection) param_vector = T.concatenate([param.flatten() for param in parameters]) gradients = T.grad(self.variables.error_func, wrt=parameters) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) beta = self.update_function(previous_gradient, full_gradient, previous_delta) parameter_delta = ifelse( T.eq(T.mod(self.variables.epoch, n_parameters), 1), -full_gradient, -full_gradient + beta * previous_delta) updated_parameters = param_vector + step * parameter_delta updates = [ (previous_gradient, full_gradient), (previous_delta, parameter_delta), ] parameter_updates = setup_parameter_updates(parameters, updated_parameters) updates.extend(parameter_updates) return updates
def init_variables(self): super(ConjugateGradient, self).init_variables() n_parameters = count_parameters(self.connection) self.variables.update( prev_delta=theano.shared(name="conj-grad/prev-delta", value=asfloat(np.zeros(n_parameters))), prev_gradient=theano.shared(name="conj-grad/prev-gradient", value=asfloat(np.zeros(n_parameters))), )
def init_variables(self): super(LeakStepAdaptation, self).init_variables() n_parameters = count_parameters(self.connection) self.variables.leak_average = tf.Variable( tf.zeros(n_parameters), name="leak-step-adapt/leak-average", dtype=tf.float32, )
def init_variables(self): super(ConjugateGradient, self).init_variables() n_parameters = count_parameters(self.connection) self.variables.update(prev_delta=theano.shared( name="conj-grad/prev-delta", value=asfloat(np.zeros(n_parameters)), ), prev_gradient=theano.shared( name="conj-grad/prev-gradient", value=asfloat(np.zeros(n_parameters)), ))
def init_train_updates(self): n_parameters = count_parameters(self.connection) parameters = parameter_values(self.connection) param_vector = T.concatenate([param.flatten() for param in parameters]) penalty_const = asfloat(self.penalty_const) hessian_matrix, full_gradient = find_hessian_and_gradient( self.variables.error_func, parameters) updated_parameters = param_vector - slinalg.solve( hessian_matrix + penalty_const * T.eye(n_parameters), full_gradient) updates = setup_parameter_updates(parameters, updated_parameters) return updates
def init_variables(self): super(ConjugateGradient, self).init_variables() n_parameters = count_parameters(self.connection) self.variables.update( prev_delta=tf.Variable( tf.zeros([n_parameters]), name="conj-grad/prev-delta", dtype=tf.float32, ), prev_gradient=tf.Variable( tf.zeros([n_parameters]), name="conj-grad/prev-gradient", dtype=tf.float32, ), )
def init_train_updates(self): n_parameters = count_parameters(self.connection) parameters = parameter_values(self.connection) param_vector = T.concatenate([param.flatten() for param in parameters]) penalty_const = asfloat(self.penalty_const) print n_parameters self.variables.hessian = theano.shared(value=asfloat( np.zeros((n_parameters, n_parameters))), name='hessian_inverse') hessian_matrix, full_gradient = find_hessian_and_gradient( self.variables.error_func, parameters) updated_parameters = hessian_matrix updates = setup_parameter_updates([self.variables.hessian], updated_parameters) return updates
def init_train_updates(self): penalty_const = asfloat(self.penalty_const) n_parameters = count_parameters(self.connection) parameters = parameter_values(self.connection) param_vector = make_single_vector(parameters) hessian_matrix, full_gradient = find_hessian_and_gradient( self.variables.error_func, parameters) parameter_update = tf.matrix_solve( hessian_matrix + penalty_const * tf.eye(n_parameters), tf.reshape(full_gradient, [-1, 1])) updated_parameters = param_vector - flatten(parameter_update) updates = setup_parameter_updates(parameters, updated_parameters) return updates
def init_variables(self): super(QuasiNewton, self).init_variables() n_params = count_parameters(self.connection) self.variables.update( inv_hessian=theano.shared( name='algo:quasi-newton/matrix:inv-hessian', value=asfloat(self.h0_scale * np.eye(int(n_params))), ), prev_params=theano.shared( name='algo:quasi-newton/vector:prev-params', value=asfloat(np.zeros(n_params)), ), prev_full_gradient=theano.shared( name='algo:quasi-newton/vector:prev-full-gradient', value=asfloat(np.zeros(n_params)), ), )
def init_train_updates(self): n_parameters = count_parameters(self.connection) parameters = parameter_values(self.connection) param_vector = parameters2vector(self) penalty_const = asfloat(self.penalty_const) hessian_matrix, full_gradient = find_hessian_and_gradient( self.variables.error_func, parameters ) updated_parameters = param_vector - slinalg.solve( hessian_matrix + penalty_const * T.eye(n_parameters), full_gradient ) updates = setup_parameter_updates(parameters, updated_parameters) return updates
def init_variables(self): super(QuasiNewton, self).init_variables() n_parameters = count_parameters(self.connection) self.variables.update( inv_hessian=tf.Variable( asfloat(self.h0_scale) * tf.eye(n_parameters), name="quasi-newton/inv-hessian", dtype=tf.float32, ), prev_params=tf.Variable( tf.zeros([n_parameters]), name="quasi-newton/prev-params", dtype=tf.float32, ), prev_full_gradient=tf.Variable( tf.zeros([n_parameters]), name="quasi-newton/prev-full-gradient", dtype=tf.float32, ), )
def init_train_updates(self): step = self.variables.step previous_delta = self.variables.prev_delta previous_gradient = self.variables.prev_gradient n_parameters = count_parameters(self.connection) parameters = parameter_values(self.connection) param_vector = parameters2vector(self) gradients = T.grad(self.variables.error_func, wrt=parameters) full_gradient = T.concatenate([grad.flatten() for grad in gradients]) beta = self.update_function(previous_gradient, full_gradient, previous_delta) parameter_delta = ifelse( T.eq(T.mod(self.variables.epoch, n_parameters), 1), -full_gradient, -full_gradient + beta * previous_delta ) updated_parameters = param_vector + step * parameter_delta updates = [(previous_gradient, full_gradient), (previous_delta, parameter_delta)] parameter_updates = setup_parameter_updates(parameters, updated_parameters) updates.extend(parameter_updates) return updates
def init_train_updates(self): step = self.variables.step epoch = self.variables.epoch previous_delta = self.variables.prev_delta previous_gradient = self.variables.prev_gradient n_parameters = count_parameters(self.connection) parameters = parameter_values(self.connection) param_vector = make_single_vector(parameters) gradients = tf.gradients(self.variables.error_func, parameters) full_gradient = make_single_vector(gradients) beta = self.update_function(previous_gradient, full_gradient, previous_delta, self.epsilon) parameter_delta = tf.where(tf.equal(tf.mod(epoch, n_parameters), 1), -full_gradient, -full_gradient + beta * previous_delta) step = self.find_optimal_step(param_vector, parameter_delta) updated_parameters = param_vector + step * parameter_delta updates = setup_parameter_updates(parameters, updated_parameters) # We have to compute these values first, otherwise # parallelization in tensorflow can mix update order # and, for example, previous gradient can be equal to # current gradient value. It happens because tensorflow # try to execute operations in parallel. with tf.control_dependencies([full_gradient, parameter_delta]): updates.extend([ previous_gradient.assign(full_gradient), previous_delta.assign(parameter_delta), ]) return updates