Python SFG Examples

Programming Language: Python

Namespace/Package Name: sfg

Class/Type: SFG

Examples at hotexamples.com: 2

Python SFG - 2 examples found. These are the top rated real world Python examples of sfg.SFG extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

SFG(1)

updates(1)

Example #1

Show file

    def get_sfg_updates(self, X_sym, y_sym, params, cost,
                        learning_rate, momentum):
        gparams = T.grad(cost, params)
        updates = OrderedDict()
        from sfg import SFG
        if not hasattr(self, "sfg_"):
            self.count_ = theano.shared(0)
            self.slow_freq_ = 20
            self.sfg_ = SFG(params, gparams)

        slow_updates, fast_updates = self.sfg_.updates(self.learning_rate,
                                                       self.momentum,
                                                       epsilon=0.0001,
                                                       momentum_clipping=None)
        for param in slow_updates.keys():
            updates[param] = theano.ifelse.ifelse(T.eq(self.count_,
                                                       self.slow_freq_ - 1),
                                                  slow_updates[param],
                                                  fast_updates[param])
        updates[self.count_] = T.mod(self.count_ + 1, self.slow_freq_)
        return updates

Example #2

Show file

class TrainingMixin(object):
    def get_sgd_updates(self, X_sym, y_sym, params, cost, learning_rate,
                        momentum):
        gparams = T.grad(cost, params)
        updates = OrderedDict()

        if not hasattr(self, "momentum_velocity_"):
            self.momentum_velocity_ = [0.] * len(gparams)

        for n, (param, gparam) in enumerate(zip(params, gparams)):
            velocity = self.momentum_velocity_[n]
            update_step = momentum * velocity - learning_rate * gparam
            self.momentum_velocity_[n] = update_step
            updates[param] = param + update_step

        return updates

    def _norm_constraint(self, param, update_step, max_col_norm):
        stepped_param = param + update_step
        if param.get_value(borrow=True).ndim == 2:
            col_norms = T.sqrt(T.sum(T.sqr(stepped_param), axis=0))
            desired_norms = T.clip(col_norms, 0, max_col_norm)
            scale = desired_norms / (1e-7 + col_norms)
            new_param = param * scale
            new_update_step = update_step * scale
        else:
            new_param = param
            new_update_step = update_step
        return new_param, new_update_step

    def get_clip_sgd_updates(self, X_sym, y_sym, params, cost, learning_rate,
                             momentum, rescale=5.):
        gparams = T.grad(cost, params)
        updates = OrderedDict()

        if not hasattr(self, "momentum_velocity_"):
            self.momentum_velocity_ = [0.] * len(gparams)

        # Gradient clipping
        grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), gparams)))
        not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
        grad_norm = T.sqrt(grad_norm)
        scaling_num = rescale
        scaling_den = T.maximum(rescale, grad_norm)
        for n, (param, gparam) in enumerate(zip(params, gparams)):
            # clip gradient directly, not momentum etc.
            gparam = T.switch(not_finite, 0.1 * param,
                              gparam * (scaling_num / scaling_den))
            velocity = self.momentum_velocity_[n]
            update_step = momentum * velocity - learning_rate * gparam
            self.momentum_velocity_[n] = update_step
            updates[param] = param + update_step
        return updates

    def get_clip_rmsprop_updates(self, X_sym, y_sym, params, cost,
                                 learning_rate, momentum, rescale=5.):
        gparams = T.grad(cost, params)
        updates = OrderedDict()

        if not hasattr(self, "running_average_"):
            self.running_square_ = [0.] * len(gparams)
            self.running_avg_ = [0.] * len(gparams)
            self.updates_storage_ = [0.] * len(gparams)

        if not hasattr(self, "momentum_velocity_"):
            self.momentum_velocity_ = [0.] * len(gparams)

        # Gradient clipping
        grad_norm = T.sqrt(sum(map(lambda x: T.sqr(x).sum(), gparams)))
        not_finite = T.or_(T.isnan(grad_norm), T.isinf(grad_norm))
        grad_norm = T.sqrt(grad_norm)
        scaling_num = rescale
        scaling_den = T.maximum(rescale, grad_norm)
        for n, (param, gparam) in enumerate(zip(params, gparams)):
            gparam = T.switch(not_finite, 0.1 * param,
                              gparam * (scaling_num / scaling_den))
            combination_coeff = 0.9
            minimum_grad = 1e-4
            old_square = self.running_square_[n]
            new_square = combination_coeff * old_square + (
                1. - combination_coeff) * T.sqr(gparam)
            old_avg = self.running_avg_[n]
            new_avg = combination_coeff * old_avg + (
                1. - combination_coeff) * gparam
            rms_grad = T.sqrt(new_square - new_avg ** 2)
            rms_grad = T.maximum(rms_grad, minimum_grad)
            velocity = self.momentum_velocity_[n]
            update_step = momentum * velocity - learning_rate * (
                gparam / rms_grad)
            self.running_square_[n] = new_square
            self.running_avg_[n] = new_avg
            self.updates_storage_[n] = update_step
            self.momentum_velocity_[n] = update_step
            updates[param] = param + update_step

        return updates

    def get_sfg_updates(self, X_sym, y_sym, params, cost,
                        learning_rate, momentum):
        gparams = T.grad(cost, params)
        updates = OrderedDict()
        from sfg import SFG
        if not hasattr(self, "sfg_"):
            self.count_ = theano.shared(0)
            self.slow_freq_ = 20
            self.sfg_ = SFG(params, gparams)

        slow_updates, fast_updates = self.sfg_.updates(self.learning_rate,
                                                       self.momentum,
                                                       epsilon=0.0001,
                                                       momentum_clipping=None)
        for param in slow_updates.keys():
            updates[param] = theano.ifelse.ifelse(T.eq(self.count_,
                                                       self.slow_freq_ - 1),
                                                  slow_updates[param],
                                                  fast_updates[param])
        updates[self.count_] = T.mod(self.count_ + 1, self.slow_freq_)
        return updates