def __init__(self, learning_rate, decay, momentum, epsilon=1e-10, use_locking=False, centered=False, name='RMSProp'): super(RMSPropOptimizer, self).__init__(use_locking, name) self.updater = updaters.RMSPropUpdater(learning_rate, decay, epsilon)
def _create_graph(self): self.x = Tensor(shape=[None, self.img_channels, self.img_height, self.img_width]).Variable() self.y_r = Tensor(shape=[None], name='Yr').Variable() # As implemented in A3C paper self.n1 = ops.Relu(ops.Conv2D([self.x] + self.weight_bias(), kernel_size=8, stride=4, num_output=16)) self.n2 = ops.Relu(ops.Conv2D([self.n1] + self.weight_bias(), kernel_size=4, stride=2, num_output=32)) self.action_index = Tensor(shape=[None, self.num_actions]).Variable() self.d1 = ops.Relu(ops.InnerProduct([self.n2] + self.weight_bias(), num_output=256)) self.logits_v = ops.InnerProduct([self.d1] + self.weight_bias(), num_output=1) self.cost_v = ops.L2Loss([self.y_r, self.logits_v]) self.logits_p = ops.InnerProduct([self.d1] + self.weight_bias(), num_output=self.num_actions) if Config.USE_LOG_SOFTMAX: raise NotImplementedError() else: self.softmax_p = ops.Softmax(self.logits_p) self.selected_action_prob = ops.Sum(self.softmax_p * self.action_index, axis=1) self.cost_p_1 = ops.Log(ops.Clip(self.selected_action_prob, self.log_epsilon, None)) * \ (self.y_r - ops.StopGradient(self.logits_v)) self.cost_p_2 = ops.Sum(ops.Log(ops.Clip(self.softmax_p, self.log_epsilon, None)) * self.softmax_p, axis=1) * (-self.beta) self.cost_p_1_agg = ops.Sum(self.cost_p_1) self.cost_p_2_agg = ops.Sum(self.cost_p_2) self.cost_p = -(self.cost_p_1_agg + self.cost_p_2_agg) self.cost_all = self.cost_p + self.cost_v if Config.DUAL_RMSPROP: raise NotImplementedError() else: if Config.USE_GRAD_CLIP: self.opt = updaters.RMSPropUpdater(decay=Config.RMSPROP_DECAY, eps=Config.RMSPROP_EPSILON, clip_gradient=Config.GRAD_CLIP_NORM) else: self.opt = updaters.RMSPropUpdater(decay=Config.RMSPROP_DECAY, eps=Config.RMSPROP_EPSILON) grads = T.grad(self.cost_all, self.network_params) for p, g in zip(self.network_params, grads): self.opt.append((p, g), lr_mult=1.0)
def __init__(self, prototxt): super(RMSPropSolver, self).__init__(prototxt=prototxt) self._updater = updaters.RMSPropUpdater(**self._update_param) # generates update targets for layer, blobs in self._net.params.items(): self._lr_blobs.extend(blobs) for idx, blob in enumerate(self._lr_blobs): if self._net._lr_mults[idx] > 0: if blob.diff is None: continue self._updater.append((blob.data, blob.diff), self._net._lr_mults[idx], self._net._decay_mults[idx]) self.train = self._net.function self.tests = [test_net.function for test_net in self._test_nets] self.update = function(updater=self._updater)
def __init__( self, learning_rate, decay=0.9, momentum=0.0, epsilon=1e-10, use_locking=False, centered=False, name='RMSProp', ): super(RMSPropOptimizer, self).__init__(use_locking, name) if momentum > 0.: self.updater = _updaters.AdamUpdater( learning_rate, momentum, decay, epsilon) else: self.updater = _updaters.RMSPropUpdater( learning_rate, decay, epsilon) self._set_dynamic_lr(learning_rate)
def __init__(self, prototxt): super(RMSPropSolver, self).__init__(prototxt=prototxt) self._optimizer = updaters.RMSPropUpdater(**self._update_param) self.BuildOptimizer()
def __init__(self, learning_rate, decay, momentum, epsilon=1e-10): super(RMSPropOptimizer, self).__init__() self.updater = updaters.RMSPropUpdater(learning_rate, decay, epsilon)
def __init__(self, proto_txt): super(RMSPropSolver, self).__init__(proto_txt=proto_txt) self.optimizer = _updaters.RMSPropUpdater(**self._optimizer_arguments) self.BuildOptimizer()