Example #1
0
 def init_variables(self):
     super(LeakStepAdaptation, self).init_variables()
     n_parameters = count_parameters(self.connection)
     self.variables.leak_average = theano.shared(
         name='leak-step-adapt/leak-average',
         value=asfloat(np.zeros(n_parameters)),
     )
 def init_variables(self):
     super(LeakStepAdaptation, self).init_variables()
     n_parameters = count_parameters(self.connection)
     self.variables.leak_average = theano.shared(
         name='leak-step-adapt/leak-average',
         value=asfloat(np.zeros(n_parameters)),
     )
Example #3
0
    def init_train_updates(self):
        step = self.variables.step
        previous_delta = self.variables.prev_delta
        previous_gradient = self.variables.prev_gradient

        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = T.concatenate([param.flatten() for param in parameters])

        gradients = T.grad(self.variables.error_func, wrt=parameters)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        beta = self.update_function(previous_gradient, full_gradient,
                                    previous_delta)
        parameter_delta = ifelse(
            T.eq(T.mod(self.variables.epoch, n_parameters), 1), -full_gradient,
            -full_gradient + beta * previous_delta)
        updated_parameters = param_vector + step * parameter_delta

        updates = [
            (previous_gradient, full_gradient),
            (previous_delta, parameter_delta),
        ]
        parameter_updates = setup_parameter_updates(parameters,
                                                    updated_parameters)
        updates.extend(parameter_updates)

        return updates
Example #4
0
    def init_variables(self):
        super(ConjugateGradient, self).init_variables()
        n_parameters = count_parameters(self.connection)

        self.variables.update(
            prev_delta=theano.shared(name="conj-grad/prev-delta", value=asfloat(np.zeros(n_parameters))),
            prev_gradient=theano.shared(name="conj-grad/prev-gradient", value=asfloat(np.zeros(n_parameters))),
        )
Example #5
0
    def init_variables(self):
        super(LeakStepAdaptation, self).init_variables()

        n_parameters = count_parameters(self.connection)
        self.variables.leak_average = tf.Variable(
            tf.zeros(n_parameters),
            name="leak-step-adapt/leak-average",
            dtype=tf.float32,
        )
Example #6
0
    def init_variables(self):
        super(ConjugateGradient, self).init_variables()
        n_parameters = count_parameters(self.connection)

        self.variables.update(prev_delta=theano.shared(
            name="conj-grad/prev-delta",
            value=asfloat(np.zeros(n_parameters)),
        ),
                              prev_gradient=theano.shared(
                                  name="conj-grad/prev-gradient",
                                  value=asfloat(np.zeros(n_parameters)),
                              ))
Example #7
0
    def init_train_updates(self):
        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = T.concatenate([param.flatten() for param in parameters])
        penalty_const = asfloat(self.penalty_const)

        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)

        updated_parameters = param_vector - slinalg.solve(
            hessian_matrix + penalty_const * T.eye(n_parameters),
            full_gradient)
        updates = setup_parameter_updates(parameters, updated_parameters)

        return updates
Example #8
0
    def init_variables(self):
        super(ConjugateGradient, self).init_variables()
        n_parameters = count_parameters(self.connection)

        self.variables.update(
            prev_delta=tf.Variable(
                tf.zeros([n_parameters]),
                name="conj-grad/prev-delta",
                dtype=tf.float32,
            ),
            prev_gradient=tf.Variable(
                tf.zeros([n_parameters]),
                name="conj-grad/prev-gradient",
                dtype=tf.float32,
            ),
        )
Example #9
0
    def init_train_updates(self):
        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = T.concatenate([param.flatten() for param in parameters])
        penalty_const = asfloat(self.penalty_const)
        print n_parameters
        self.variables.hessian = theano.shared(value=asfloat(
            np.zeros((n_parameters, n_parameters))),
                                               name='hessian_inverse')
        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)
        updated_parameters = hessian_matrix
        updates = setup_parameter_updates([self.variables.hessian],
                                          updated_parameters)

        return updates
Example #10
0
    def init_train_updates(self):
        penalty_const = asfloat(self.penalty_const)

        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = make_single_vector(parameters)

        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters)
        parameter_update = tf.matrix_solve(
            hessian_matrix + penalty_const * tf.eye(n_parameters),
            tf.reshape(full_gradient, [-1, 1]))
        updated_parameters = param_vector - flatten(parameter_update)
        updates = setup_parameter_updates(parameters, updated_parameters)

        return updates
Example #11
0
 def init_variables(self):
     super(QuasiNewton, self).init_variables()
     n_params = count_parameters(self.connection)
     self.variables.update(
         inv_hessian=theano.shared(
             name='algo:quasi-newton/matrix:inv-hessian',
             value=asfloat(self.h0_scale * np.eye(int(n_params))),
         ),
         prev_params=theano.shared(
             name='algo:quasi-newton/vector:prev-params',
             value=asfloat(np.zeros(n_params)),
         ),
         prev_full_gradient=theano.shared(
             name='algo:quasi-newton/vector:prev-full-gradient',
             value=asfloat(np.zeros(n_params)),
         ),
     )
Example #12
0
    def init_train_updates(self):
        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = parameters2vector(self)
        penalty_const = asfloat(self.penalty_const)

        hessian_matrix, full_gradient = find_hessian_and_gradient(
            self.variables.error_func, parameters
        )

        updated_parameters = param_vector - slinalg.solve(
            hessian_matrix + penalty_const * T.eye(n_parameters),
            full_gradient
        )
        updates = setup_parameter_updates(parameters, updated_parameters)

        return updates
Example #13
0
 def init_variables(self):
     super(QuasiNewton, self).init_variables()
     n_params = count_parameters(self.connection)
     self.variables.update(
         inv_hessian=theano.shared(
             name='algo:quasi-newton/matrix:inv-hessian',
             value=asfloat(self.h0_scale * np.eye(int(n_params))),
         ),
         prev_params=theano.shared(
             name='algo:quasi-newton/vector:prev-params',
             value=asfloat(np.zeros(n_params)),
         ),
         prev_full_gradient=theano.shared(
             name='algo:quasi-newton/vector:prev-full-gradient',
             value=asfloat(np.zeros(n_params)),
         ),
     )
Example #14
0
    def init_variables(self):
        super(QuasiNewton, self).init_variables()
        n_parameters = count_parameters(self.connection)

        self.variables.update(
            inv_hessian=tf.Variable(
                asfloat(self.h0_scale) * tf.eye(n_parameters),
                name="quasi-newton/inv-hessian",
                dtype=tf.float32,
            ),
            prev_params=tf.Variable(
                tf.zeros([n_parameters]),
                name="quasi-newton/prev-params",
                dtype=tf.float32,
            ),
            prev_full_gradient=tf.Variable(
                tf.zeros([n_parameters]),
                name="quasi-newton/prev-full-gradient",
                dtype=tf.float32,
            ),
        )
Example #15
0
    def init_train_updates(self):
        step = self.variables.step
        previous_delta = self.variables.prev_delta
        previous_gradient = self.variables.prev_gradient

        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = parameters2vector(self)

        gradients = T.grad(self.variables.error_func, wrt=parameters)
        full_gradient = T.concatenate([grad.flatten() for grad in gradients])

        beta = self.update_function(previous_gradient, full_gradient, previous_delta)
        parameter_delta = ifelse(
            T.eq(T.mod(self.variables.epoch, n_parameters), 1), -full_gradient, -full_gradient + beta * previous_delta
        )
        updated_parameters = param_vector + step * parameter_delta

        updates = [(previous_gradient, full_gradient), (previous_delta, parameter_delta)]
        parameter_updates = setup_parameter_updates(parameters, updated_parameters)
        updates.extend(parameter_updates)

        return updates
Example #16
0
    def init_train_updates(self):
        step = self.variables.step
        epoch = self.variables.epoch
        previous_delta = self.variables.prev_delta
        previous_gradient = self.variables.prev_gradient

        n_parameters = count_parameters(self.connection)
        parameters = parameter_values(self.connection)
        param_vector = make_single_vector(parameters)

        gradients = tf.gradients(self.variables.error_func, parameters)
        full_gradient = make_single_vector(gradients)

        beta = self.update_function(previous_gradient, full_gradient,
                                    previous_delta, self.epsilon)

        parameter_delta = tf.where(tf.equal(tf.mod(epoch, n_parameters),
                                            1), -full_gradient,
                                   -full_gradient + beta * previous_delta)

        step = self.find_optimal_step(param_vector, parameter_delta)
        updated_parameters = param_vector + step * parameter_delta
        updates = setup_parameter_updates(parameters, updated_parameters)

        # We have to compute these values first, otherwise
        # parallelization in tensorflow can mix update order
        # and, for example, previous gradient can be equal to
        # current gradient value. It happens because tensorflow
        # try to execute operations in parallel.
        with tf.control_dependencies([full_gradient, parameter_delta]):
            updates.extend([
                previous_gradient.assign(full_gradient),
                previous_delta.assign(parameter_delta),
            ])

        return updates