コード例 #1
0
 def resource_apply_adam():
     training_ops.resource_apply_adam(
         v_cpu.handle,
         v_gpu.handle,
         v_also_cpu.handle,
         1.0,  # beta1_power
         1.0,  # beta2_power
         1.0,  # learning_rate
         1.0,  # beta1
         1.0,  # beta2
         1.0,  # epsilon,
         [1.0, 1.0, 1.0],  # grad
         False)  # use_locking
     return None
コード例 #2
0
 def resource_apply_adam():
   training_ops.resource_apply_adam(
       v_cpu.handle,
       v_gpu.handle,
       v_also_cpu.handle,
       1.0,  # beta1_power
       1.0,  # beta2_power
       1.0,  # learning_rate
       1.0,  # beta1
       1.0,  # beta2
       1.0,  # epsilon,
       [1.0, 1.0, 1.0],  # grad
       False)  # use_locking
   return None
コード例 #3
0
ファイル: GradientAccumulation.py プロジェクト: DeepMCQ/DeepQ
        def _wrapUseAccu(accuGrads, grad, var, apply_state):
            m = self.get_slot(var, 'm')
            v = self.get_slot(var, 'v')

            if not self.amsgrad:
                result = training_ops.resource_apply_adam(
                    var.handle,
                    m.handle,
                    v.handle,
                    coefficients['beta_1_power'],
                    coefficients['beta_2_power'],
                    coefficients['lr_t'],
                    coefficients['beta_1_t'],
                    coefficients['beta_2_t'],
                    coefficients['epsilon'],
                    grad,
                    use_locking=self._use_locking)
            else:
                vhat = self.get_slot(var, 'vhat')
                result = training_ops.resource_apply_adam_with_amsgrad(
                    var.handle,
                    m.handle,
                    v.handle,
                    vhat.handle,
                    coefficients['beta_1_power'],
                    coefficients['beta_2_power'],
                    coefficients['lr_t'],
                    coefficients['beta_1_t'],
                    coefficients['beta_2_t'],
                    coefficients['epsilon'],
                    grad,
                    use_locking=self._use_locking)
            accuGrads.assign(tf.broadcast_to(0.0, tf.shape(accuGrads)))
            return result
コード例 #4
0
ファイル: radam.py プロジェクト: noobOriented/talos
    def _resource_apply_dense(self, grad, var):
        m = self.get_slot(var, "m")
        v = self.get_slot(var, "v")
        beta1_power, beta2_power, _ = self._get_beta_accumulators()
        beta1_power = math_ops.cast(beta1_power, grad.dtype.base_dtype)
        beta1_t = math_ops.cast(self._beta1_t, grad.dtype.base_dtype)
        beta2_t = math_ops.cast(self._beta2_t, grad.dtype.base_dtype)

        return tf.cond(
            self.condition,
            lambda: training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                beta1_power,
                math_ops.cast(beta2_power, grad.dtype.base_dtype),
                math_ops.cast(self.rectified_lr, grad.dtype.base_dtype
                              ),  # instead of _lr_t
                beta1_t,
                beta2_t,
                math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
                grad,
                use_locking=self._use_locking),
            lambda: self._resource_apply_dense_without_v(
                var.handle, m.handle, v, beta1_power, beta1_t, beta2_t, grad),
        )
    def _resource_apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')
        beta_1_t = self._get_hyper('beta_1', var_dtype)
        beta_2_t = self._get_hyper('beta_2', var_dtype)
        epsilon = self._get_hyper('epsilon', var_dtype)
        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        beta_1_power = math_ops.pow(beta_1_t, local_step)
        beta_2_power = math_ops.pow(beta_2_t, local_step)
        # st()

        if "reader" in var.name:
            lr_t = lr_t * 0.2

        elif "h_mean" in var.name:
            lr_t = lr_t * 0.1

        elif "h_var" in var.name:
            lr_t = lr_t * 0.1

        elif "box_vae" in var.name:
            lr_t = lr_t * 10

        elif "offset_vae" in var.name:
            lr_t = lr_t * 10

        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                beta_1_power,
                beta_2_power,
                lr_t,
                beta_1_t,
                beta_2_t,
                epsilon,
                grad,
                use_locking=self._use_locking)
        else:
            vhat = self.get_slot(var, 'vhat')
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                beta_1_power,
                beta_2_power,
                lr_t,
                beta_1_t,
                beta_2_t,
                epsilon,
                grad,
                use_locking=self._use_locking)
コード例 #6
0
    def _resource_apply_dense(self, grad, var, apply_state=None):
        # print("grad: ", grad.name, grad.shape, "var: ", var.name, var.shape)
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = (apply_state or {}).get(
            (var_device, var_dtype)) or self._fallback_apply_state(
                var_device, var_dtype)
        lr = self._find_lr(var.name, coefficients)
        # lr_idx = -1
        # for i, pattern_lr in enumerate(self.pattern_lrs):
        #     for pattern in pattern_lr["patterns"]:
        #         print("pattern: ", pattern, re.search(pattern, var.name))
        #         if re.search(pattern, var.name):
        #             lr_idx = i
        #             break
        #     if lr_idx != -1:
        #         break
        #
        # if lr_idx == -1:  # unfound pattern
        #     lr = coefficients["lr_t"]
        #     # print(">>>>>> DEFAULT LR: ", lr, var.name)
        # else:
        #     lr = coefficients[f"lr-{lr_idx}_t"]
        #     # print("bert LR: ", lr, var.name)
        m = self.get_slot(var, "m")
        v = self.get_slot(var, "v")

        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients["beta_1_power"],
                coefficients["beta_2_power"],
                lr,  # coefficients['lr_t'],
                coefficients["beta_1_t"],
                coefficients["beta_2_t"],
                coefficients["epsilon"],
                grad,
                use_locking=self._use_locking,
            )
        else:
            vhat = self.get_slot(var, "vhat")
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients["beta_1_power"],
                coefficients["beta_2_power"],
                lr,  # coefficients['lr_t'],
                coefficients["beta_1_t"],
                coefficients["beta_2_t"],
                coefficients["epsilon"],
                grad,
                use_locking=self._use_locking,
            )
コード例 #7
0
 def _resource_apply_dense(self, grad, var):
   m = self.get_slot(var, "m")
   v = self.get_slot(var, "v")
   return training_ops.resource_apply_adam(
       var.handle, m.handle, v.handle,
       math_ops.cast(self._beta1_power, grad.dtype.base_dtype),
       math_ops.cast(self._beta2_power, grad.dtype.base_dtype),
       math_ops.cast(self._lr_t, grad.dtype.base_dtype),
       math_ops.cast(self._beta1_t, grad.dtype.base_dtype),
       math_ops.cast(self._beta2_t, grad.dtype.base_dtype),
       math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
       grad, use_locking=self._use_locking)
コード例 #8
0
ファイル: adam.py プロジェクト: AbhinavJain13/tensorflow
 def _resource_apply_dense(self, grad, var):
   m = self.get_slot(var, "m")
   v = self.get_slot(var, "v")
   return training_ops.resource_apply_adam(
       var.handle, m.handle, v.handle,
       math_ops.cast(self._beta1_power, grad.dtype.base_dtype),
       math_ops.cast(self._beta2_power, grad.dtype.base_dtype),
       math_ops.cast(self._lr_t, grad.dtype.base_dtype),
       math_ops.cast(self._beta1_t, grad.dtype.base_dtype),
       math_ops.cast(self._beta2_t, grad.dtype.base_dtype),
       math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
       grad, use_locking=self._use_locking)
コード例 #9
0
ファイル: adam.py プロジェクト: kuo1220/verbose-barnacle
 def _resource_apply_dense(self, grad, var, state):
   m = state.get_slot(var, "m")
   v = state.get_slot(var, "v")
   beta1_power, beta2_power = self._get_beta_accumulators(state)
   return training_ops.resource_apply_adam(
       var.handle, m.handle, v.handle,
       math_ops.cast(beta1_power, grad.dtype.base_dtype),
       math_ops.cast(beta2_power, grad.dtype.base_dtype),
       state.get_hyper("learning_rate", grad.dtype.base_dtype),
       state.get_hyper("beta1", grad.dtype.base_dtype),
       state.get_hyper("beta2", grad.dtype.base_dtype),
       state.get_hyper("epsilon", grad.dtype.base_dtype),
       grad, use_locking=self._use_locking)
コード例 #10
0
ファイル: adam.py プロジェクト: bikong2/tensorflow
 def _resource_apply_dense(self, grad, var, state):
   m = state.get_slot(var, "m")
   v = state.get_slot(var, "v")
   beta1_power, beta2_power = self._get_beta_accumulators(state)
   return training_ops.resource_apply_adam(
       var.handle, m.handle, v.handle,
       math_ops.cast(beta1_power, grad.dtype.base_dtype),
       math_ops.cast(beta2_power, grad.dtype.base_dtype),
       state.get_hyper("learning_rate", grad.dtype.base_dtype),
       state.get_hyper("beta1", grad.dtype.base_dtype),
       state.get_hyper("beta2", grad.dtype.base_dtype),
       state.get_hyper("epsilon", grad.dtype.base_dtype),
       grad, use_locking=self._use_locking)
コード例 #11
0
 def _resource_apply_dense_in_action(self, grad, var):
     m = self.get_slot(var, "m")
     v = self.get_slot(var, "v")
     beta1_power, beta2_power = self._get_beta_accumulators()
     return training_ops.resource_apply_adam(
         var.handle,
         m.handle,
         v.handle,
         tf.cast(beta1_power, grad.dtype.base_dtype),
         tf.cast(beta2_power, grad.dtype.base_dtype),
         tf.cast(self._lr_t, var.dtype.base_dtype),
         tf.cast(self._beta1_t, grad.dtype.base_dtype),
         tf.cast(self._beta2_t, grad.dtype.base_dtype),
         tf.cast(self._epsilon_t, grad.dtype.base_dtype),
         grad,
         use_locking=self._use_locking)
コード例 #12
0
    def _resource_apply_dense(
        self,
        grad,
        var,
        apply_state=None,
    ):
        (var_device, var_dtype) = (var.device, var.dtype.base_dtype)
        coefficients = (apply_state or {}).get((var_device, var_dtype)) \
            or self._fallback_apply_state(var_device, var_dtype)

        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')

        if self.initiation_dict[var.name] == 1:
            coefficients['lr_t'] = coefficients['lr_t'] * self.param_lrs[
                var.name]
            self.initiation_dict[var.name] = 0
        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                coefficients['lr_t'],
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking,
            )
        else:
            vhat = self.get_slot(var, 'vhat')
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                coefficients['lr_t'],
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking,
            )
コード例 #13
0
ファイル: adam.py プロジェクト: zjh123456789/tensorflow
 def _resource_apply_dense(self, grad, var):
     m = self.get_slot(var, 'm')
     v = self.get_slot(var, 'v')
     # TODO(tanzheny): let optimizer have its own step counter, and let
     # beta1_power and beta2_power depend on it.
     return training_ops.resource_apply_adam(
         var.handle,
         m.handle,
         v.handle,
         math_ops.cast(self._beta_1, grad.dtype.base_dtype),
         math_ops.cast(self._beta_2, grad.dtype.base_dtype),
         math_ops.cast(self._lr, grad.dtype.base_dtype),
         math_ops.cast(self._beta_1, grad.dtype.base_dtype),
         math_ops.cast(self._beta_2, grad.dtype.base_dtype),
         math_ops.cast(self._epsilon, grad.dtype.base_dtype),
         grad,
         use_locking=self._use_locking)
コード例 #14
0
ファイル: adam.py プロジェクト: abhinav-upadhyay/tensorflow
 def _resource_apply_dense(self, grad, var):
   m = self.get_slot(var, 'm')
   v = self.get_slot(var, 'v')
   # TODO(tanzheny): let optimizer have its own step counter, and let
   # beta1_power and beta2_power depend on it.
   return training_ops.resource_apply_adam(
       var.handle,
       m.handle,
       v.handle,
       math_ops.cast(self._get_hyper('beta_1'), grad.dtype.base_dtype),
       math_ops.cast(self._get_hyper('beta_2'), grad.dtype.base_dtype),
       math_ops.cast(self._get_hyper('learning_rate'), grad.dtype.base_dtype),
       math_ops.cast(self._get_hyper('beta_1'), grad.dtype.base_dtype),
       math_ops.cast(self._get_hyper('beta_2'), grad.dtype.base_dtype),
       math_ops.cast(self._get_hyper('epsilon'), grad.dtype.base_dtype),
       grad,
       use_locking=self._use_locking)
コード例 #15
0
ファイル: optimization.py プロジェクト: cgibson6279/bert_gpu
 def _resource_apply_dense(self, grad, var):
     m = self.get_slot(var, "m")
     v = self.get_slot(var, "v")
     decayed_var = var
     if self._do_use_weight_decay(self._get_variable_name(var.name)):
         decayed_var = self._weight_decay_rate * var
     return training_ops.resource_apply_adam(
         decayed_var,
         m,
         v,
         tf.cast(self._beta1, var.dtype.base_dtype),
         tf.cast(self._beta2, var.dtype.base_dtype),
         tf.cast(self._learning_rate, var.dtype.base_dtype),
         tf.cast(self._beta1, var.dtype.base_dtype),
         tf.cast(self._beta2, var.dtype.base_dtype),
         tf.cast(self._epsilon, var.dtype.base_dtype),
         grad,
         use_locking=self._use_locking).op
コード例 #16
0
    def _resource_apply_dense(self, grad, var, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = (apply_state or {}).get(
            (var_device, var_dtype)) or self._fallback_apply_state(
                var_device, var_dtype)

        m = self.get_slot(var, "m")
        v = self.get_slot(var, "v")
        lr = coefficients["lr_t"]
        if str(var.name).find("transpose") != -1:
            lr = constant(
                self._serialize_hyperparameter("learning_rate_deconv"))
        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients["beta_1_power"],
                coefficients["beta_2_power"],
                lr,
                coefficients["beta_1_t"],
                coefficients["beta_2_t"],
                coefficients["epsilon"],
                grad,
                use_locking=self._use_locking,
            )
        else:
            vhat = self.get_slot(var, "vhat")
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients["beta_1_power"],
                coefficients["beta_2_power"],
                lr,
                coefficients["beta_1_t"],
                coefficients["beta_2_t"],
                coefficients["epsilon"],
                grad,
                use_locking=self._use_locking,
            )
コード例 #17
0
    def _resource_apply_dense(self, grad, var, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = ((apply_state or {}).get((var_device, var_dtype))
                        or self._fallback_apply_state(var_device, var_dtype))

        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')

        lr_t = coefficients['lr_t']
        for k in self._lrm_names:
            if var.name.startswith(k):
                lr_t = coefficients['lr_t'] * self._get_hyper(
                    f'lrm_{k}', var.dtype)

        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                lr_t,
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
        else:
            vhat = self.get_slot(var, 'vhat')
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                lr_t,
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
コード例 #18
0
 def _resource_apply_dense(self, grad, var):
     m = self.get_slot(var, "m")
     v = self.get_slot(var, "v")
     beta1_power, beta2_power = self._get_beta_accumulators()
     ops = self._get_ops_tester()
     ops_up = ops.assign_add(1)
     return control_flow_ops.group(*[
         training_ops.resource_apply_adam(
             var.handle,
             m.handle,
             v.handle,
             math_ops.cast(beta1_power, grad.dtype.base_dtype),
             math_ops.cast(beta2_power, grad.dtype.base_dtype),
             math_ops.cast(self._lr_t, grad.dtype.base_dtype),
             math_ops.cast(self._beta1_t, grad.dtype.base_dtype),
             math_ops.cast(self._beta2_t, grad.dtype.base_dtype),
             math_ops.cast(self._epsilon_t, grad.dtype.base_dtype),
             grad,
             use_locking=self._use_locking), ops_up
     ])
コード例 #19
0
ファイル: adam.py プロジェクト: zhaoyongke/tensorflow
 def _resource_apply_dense(self, grad, var):
   grad_dtype = grad.dtype.base_dtype
   m = self.get_slot(var, 'm')
   v = self.get_slot(var, 'v')
   local_step = math_ops.cast(self.iterations + 1, grad_dtype)
   beta_1_t = math_ops.cast(self._get_hyper('beta_1'), grad_dtype)
   beta_2_t = math_ops.cast(self._get_hyper('beta_2'), grad_dtype)
   beta_1_power = math_ops.pow(beta_1_t, local_step)
   beta_2_power = math_ops.pow(beta_2_t, local_step)
   return training_ops.resource_apply_adam(
       var.handle,
       m.handle,
       v.handle,
       beta_1_power,
       beta_2_power,
       math_ops.cast(self._get_hyper('learning_rate'), grad_dtype),
       beta_1_t,
       beta_2_t,
       math_ops.cast(self._get_hyper('epsilon'), grad_dtype),
       grad,
       use_locking=self._use_locking)
コード例 #20
0
ファイル: adam.py プロジェクト: qwerzou1/shibie
 def _resource_apply_dense(self, grad, var):
     grad_dtype = grad.dtype.base_dtype
     m = self.get_slot(var, 'm')
     v = self.get_slot(var, 'v')
     local_step = math_ops.cast(self.iterations + 1, grad_dtype)
     beta_1_t = math_ops.cast(self._get_hyper('beta_1'), grad_dtype)
     beta_2_t = math_ops.cast(self._get_hyper('beta_2'), grad_dtype)
     beta_1_power = math_ops.pow(beta_1_t, local_step)
     beta_2_power = math_ops.pow(beta_2_t, local_step)
     return training_ops.resource_apply_adam(
         var.handle,
         m.handle,
         v.handle,
         beta_1_power,
         beta_2_power,
         math_ops.cast(self._get_hyper('learning_rate'), grad_dtype),
         beta_1_t,
         beta_2_t,
         math_ops.cast(self._get_hyper('epsilon'), grad_dtype),
         grad,
         use_locking=self._use_locking)
コード例 #21
0
 def _resource_apply_dense(self, grad, var):
     var_dtype = var.dtype.base_dtype
     lr_t = self._decayed_lr_t[var_dtype]
     m = self.get_slot(var, 'm')
     v = self.get_slot(var, 'v')
     beta_1_t = self._get_hyper('beta_1', var_dtype)
     beta_2_t = self._get_hyper('beta_2', var_dtype)
     epsilon_t = ops.convert_to_tensor(self.epsilon, var_dtype)
     local_step = math_ops.cast(self.iterations + 1, var_dtype)
     beta_1_power = math_ops.pow(beta_1_t, local_step)
     beta_2_power = math_ops.pow(beta_2_t, local_step)
     if not self.amsgrad:
         return training_ops.resource_apply_adam(
             var.handle,
             m.handle,
             v.handle,
             beta_1_power,
             beta_2_power,
             lr_t,
             beta_1_t,
             beta_2_t,
             epsilon_t,
             grad,
             use_locking=self._use_locking)
     else:
         vhat = self.get_slot(var, 'vhat')
         return training_ops.resource_apply_adam_with_amsgrad(
             var.handle,
             m.handle,
             v.handle,
             vhat.handle,
             beta_1_power,
             beta_2_power,
             lr_t,
             beta_1_t,
             beta_2_t,
             epsilon_t,
             grad,
             use_locking=self._use_locking)
コード例 #22
0
    def _resource_apply_dense(self, grad, var, constraint, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = ((apply_state or {}).get((var_device, var_dtype))
                        or self._fallback_apply_state(var_device, var_dtype))

        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')

        if not self.amsgrad:
            var_update = training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                coefficients['lr_t'],
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
        else:
            vhat = self.get_slot(var, 'vhat')
            var_update = training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                coefficients['lr_t'],
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)

        project_var, was_projected = constraint.euclidean_project(var)
        return state_ops.assign(var, project_var)
コード例 #23
0
ファイル: adam.py プロジェクト: aeverall/tensorflow
 def _resource_apply_dense(self, grad, var):
   var_dtype = var.dtype.base_dtype
   lr_t = self._decayed_lr(var_dtype)
   m = self.get_slot(var, 'm')
   v = self.get_slot(var, 'v')
   beta_1_t = self._get_hyper('beta_1', var_dtype)
   beta_2_t = self._get_hyper('beta_2', var_dtype)
   epsilon = self._get_hyper('epsilon', var_dtype)
   local_step = math_ops.cast(self.iterations + 1, var_dtype)
   beta_1_power = math_ops.pow(beta_1_t, local_step)
   beta_2_power = math_ops.pow(beta_2_t, local_step)
   if not self._amsgrad:
     return training_ops.resource_apply_adam(
         var.handle,
         m.handle,
         v.handle,
         beta_1_power,
         beta_2_power,
         lr_t,
         beta_1_t,
         beta_2_t,
         epsilon,
         grad,
         use_locking=self._use_locking)
   else:
     vhat = self.get_slot(var, 'vhat')
     return training_ops.resource_apply_adam_with_amsgrad(
         var.handle,
         m.handle,
         v.handle,
         vhat.handle,
         beta_1_power,
         beta_2_power,
         lr_t,
         beta_1_t,
         beta_2_t,
         epsilon,
         grad,
         use_locking=self._use_locking)
コード例 #24
0
 def _resource_apply_dense(self, grad, var):
     var_dtype = var.dtype.base_dtype
     lr_t = self._decayed_lr(var_dtype)
     m = self.get_slot(var, 'm')
     v = self.get_slot(var, 'v')
     beta_1_t = self._get_hyper('beta_1', var_dtype)
     beta_2_t = self._get_hyper('beta_2', var_dtype)
     local_step = math_ops.cast(self.iterations + 1, var_dtype)
     beta_1_power = math_ops.pow(beta_1_t, local_step)
     beta_2_power = math_ops.pow(beta_2_t, local_step)
     return training_ops.resource_apply_adam(var.handle,
                                             m.handle,
                                             v.handle,
                                             beta_1_power,
                                             beta_2_power,
                                             lr_t,
                                             beta_1_t,
                                             beta_2_t,
                                             self._get_hyper(
                                                 'epsilon', var_dtype),
                                             grad,
                                             use_locking=self._use_locking)
コード例 #25
0
    def _resource_apply_dense(self, grad, var, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = ((apply_state or {}).get((var_device, var_dtype))
                        or self._fallback_apply_state(var_device, var_dtype))

        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')

        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                # coefficients['lr_t'],  # replaced by next
                coefficients['lr_t']*self.lr_with_layer(var),
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
        else:
            vhat = self.get_slot(var, 'vhat')
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                # coefficients['lr_t'],# replaced by next
                coefficients['lr_t'] * self.lr_wide_layer(var),
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
コード例 #26
0
ファイル: nadam.py プロジェクト: aeverall/tensorflow
 def _resource_apply_dense(self, grad, var):
   var_dtype = var.dtype.base_dtype
   lr_t = self._decayed_lr(var_dtype)
   m = self.get_slot(var, 'm')
   v = self.get_slot(var, 'v')
   beta_1_t = self._get_hyper('beta_1', var_dtype)
   beta_2_t = self._get_hyper('beta_2', var_dtype)
   local_step = math_ops.cast(self.iterations + 1, var_dtype)
   beta_1_power = math_ops.pow(beta_1_t, local_step)
   beta_2_power = math_ops.pow(beta_2_t, local_step)
   return training_ops.resource_apply_adam(
       var.handle,
       m.handle,
       v.handle,
       beta_1_power,
       beta_2_power,
       lr_t,
       beta_1_t,
       beta_2_t,
       self._get_hyper('epsilon', var_dtype),
       grad,
       use_locking=self._use_locking,
       use_nesterov=True)
コード例 #27
0
    def _resource_apply_dense(self, grad, var, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = ((apply_state or {}).get(
            (var.name, var_device, var_dtype)) or self._prepare_local(
                var.name, var_device, var_dtype, apply_state))

        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')

        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                coefficients['lr'],
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
        else:
            vhat = self.get_slot(var, 'vhat')
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                coefficients['lr'],
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
コード例 #28
0
    def _resource_apply_dense(self, grad, var, apply_state=None):
        # print("Dense: {} {} {}".format(var.name, var.device, var.dtype.base_dtype))
        lr_t, _, coefficients, kwargs = self._get_lr(var, apply_state)
        decay = self._decay_weights_op(var, lr_t, apply_state)
        with tf.control_dependencies([decay]):
            m = self.get_slot(var, 'm')
            v = self.get_slot(var, 'v')

            if not self.amsgrad:
                return training_ops.resource_apply_adam(
                    var.handle,
                    m.handle,
                    v.handle,
                    coefficients['beta_1_power'],
                    coefficients['beta_2_power'],
                    lr_t,
                    coefficients['beta_1_t'],
                    coefficients['beta_2_t'],
                    coefficients['epsilon'],
                    grad,
                    use_locking=self._use_locking)
            else:
                vhat = self.get_slot(var, 'vhat')
                return training_ops.resource_apply_adam_with_amsgrad(
                    var.handle,
                    m.handle,
                    v.handle,
                    vhat.handle,
                    coefficients['beta_1_power'],
                    coefficients['beta_2_power'],
                    lr_t,
                    coefficients['beta_1_t'],
                    coefficients['beta_2_t'],
                    coefficients['epsilon'],
                    grad,
                    use_locking=self._use_locking)