def _wrapUseAccu(accuGrads, grad, var, apply_state):
            m = self.get_slot(var, 'm')
            v = self.get_slot(var, 'v')

            if not self.amsgrad:
                result = training_ops.resource_apply_adam(
                    var.handle,
                    m.handle,
                    v.handle,
                    coefficients['beta_1_power'],
                    coefficients['beta_2_power'],
                    coefficients['lr_t'],
                    coefficients['beta_1_t'],
                    coefficients['beta_2_t'],
                    coefficients['epsilon'],
                    grad,
                    use_locking=self._use_locking)
            else:
                vhat = self.get_slot(var, 'vhat')
                result = training_ops.resource_apply_adam_with_amsgrad(
                    var.handle,
                    m.handle,
                    v.handle,
                    vhat.handle,
                    coefficients['beta_1_power'],
                    coefficients['beta_2_power'],
                    coefficients['lr_t'],
                    coefficients['beta_1_t'],
                    coefficients['beta_2_t'],
                    coefficients['epsilon'],
                    grad,
                    use_locking=self._use_locking)
            accuGrads.assign(tf.broadcast_to(0.0, tf.shape(accuGrads)))
            return result
    def _resource_apply_dense(self, grad, var):
        var_dtype = var.dtype.base_dtype
        lr_t = self._decayed_lr(var_dtype)
        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')
        beta_1_t = self._get_hyper('beta_1', var_dtype)
        beta_2_t = self._get_hyper('beta_2', var_dtype)
        epsilon = self._get_hyper('epsilon', var_dtype)
        local_step = math_ops.cast(self.iterations + 1, var_dtype)
        beta_1_power = math_ops.pow(beta_1_t, local_step)
        beta_2_power = math_ops.pow(beta_2_t, local_step)
        # st()

        if "reader" in var.name:
            lr_t = lr_t * 0.2

        elif "h_mean" in var.name:
            lr_t = lr_t * 0.1

        elif "h_var" in var.name:
            lr_t = lr_t * 0.1

        elif "box_vae" in var.name:
            lr_t = lr_t * 10

        elif "offset_vae" in var.name:
            lr_t = lr_t * 10

        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                beta_1_power,
                beta_2_power,
                lr_t,
                beta_1_t,
                beta_2_t,
                epsilon,
                grad,
                use_locking=self._use_locking)
        else:
            vhat = self.get_slot(var, 'vhat')
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                beta_1_power,
                beta_2_power,
                lr_t,
                beta_1_t,
                beta_2_t,
                epsilon,
                grad,
                use_locking=self._use_locking)
Exemple #3
0
    def _resource_apply_dense(self, grad, var, apply_state=None):
        # print("grad: ", grad.name, grad.shape, "var: ", var.name, var.shape)
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = (apply_state or {}).get(
            (var_device, var_dtype)) or self._fallback_apply_state(
                var_device, var_dtype)
        lr = self._find_lr(var.name, coefficients)
        # lr_idx = -1
        # for i, pattern_lr in enumerate(self.pattern_lrs):
        #     for pattern in pattern_lr["patterns"]:
        #         print("pattern: ", pattern, re.search(pattern, var.name))
        #         if re.search(pattern, var.name):
        #             lr_idx = i
        #             break
        #     if lr_idx != -1:
        #         break
        #
        # if lr_idx == -1:  # unfound pattern
        #     lr = coefficients["lr_t"]
        #     # print(">>>>>> DEFAULT LR: ", lr, var.name)
        # else:
        #     lr = coefficients[f"lr-{lr_idx}_t"]
        #     # print("bert LR: ", lr, var.name)
        m = self.get_slot(var, "m")
        v = self.get_slot(var, "v")

        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients["beta_1_power"],
                coefficients["beta_2_power"],
                lr,  # coefficients['lr_t'],
                coefficients["beta_1_t"],
                coefficients["beta_2_t"],
                coefficients["epsilon"],
                grad,
                use_locking=self._use_locking,
            )
        else:
            vhat = self.get_slot(var, "vhat")
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients["beta_1_power"],
                coefficients["beta_2_power"],
                lr,  # coefficients['lr_t'],
                coefficients["beta_1_t"],
                coefficients["beta_2_t"],
                coefficients["epsilon"],
                grad,
                use_locking=self._use_locking,
            )
Exemple #4
0
    def _resource_apply_dense(
        self,
        grad,
        var,
        apply_state=None,
    ):
        (var_device, var_dtype) = (var.device, var.dtype.base_dtype)
        coefficients = (apply_state or {}).get((var_device, var_dtype)) \
            or self._fallback_apply_state(var_device, var_dtype)

        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')

        if self.initiation_dict[var.name] == 1:
            coefficients['lr_t'] = coefficients['lr_t'] * self.param_lrs[
                var.name]
            self.initiation_dict[var.name] = 0
        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                coefficients['lr_t'],
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking,
            )
        else:
            vhat = self.get_slot(var, 'vhat')
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                coefficients['lr_t'],
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking,
            )
Exemple #5
0
    def _resource_apply_dense(self, grad, var, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = ((apply_state or {}).get((var_device, var_dtype))
                        or self._fallback_apply_state(var_device, var_dtype))

        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')

        lr_t = coefficients['lr_t']
        for k in self._lrm_names:
            if var.name.startswith(k):
                lr_t = coefficients['lr_t'] * self._get_hyper(
                    f'lrm_{k}', var.dtype)

        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                lr_t,
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
        else:
            vhat = self.get_slot(var, 'vhat')
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                lr_t,
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
    def _resource_apply_dense(self, grad, var, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = (apply_state or {}).get(
            (var_device, var_dtype)) or self._fallback_apply_state(
                var_device, var_dtype)

        m = self.get_slot(var, "m")
        v = self.get_slot(var, "v")
        lr = coefficients["lr_t"]
        if str(var.name).find("transpose") != -1:
            lr = constant(
                self._serialize_hyperparameter("learning_rate_deconv"))
        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients["beta_1_power"],
                coefficients["beta_2_power"],
                lr,
                coefficients["beta_1_t"],
                coefficients["beta_2_t"],
                coefficients["epsilon"],
                grad,
                use_locking=self._use_locking,
            )
        else:
            vhat = self.get_slot(var, "vhat")
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients["beta_1_power"],
                coefficients["beta_2_power"],
                lr,
                coefficients["beta_1_t"],
                coefficients["beta_2_t"],
                coefficients["epsilon"],
                grad,
                use_locking=self._use_locking,
            )
Exemple #7
0
 def _resource_apply_dense(self, grad, var):
     var_dtype = var.dtype.base_dtype
     lr_t = self._decayed_lr_t[var_dtype]
     m = self.get_slot(var, 'm')
     v = self.get_slot(var, 'v')
     beta_1_t = self._get_hyper('beta_1', var_dtype)
     beta_2_t = self._get_hyper('beta_2', var_dtype)
     epsilon_t = ops.convert_to_tensor(self.epsilon, var_dtype)
     local_step = math_ops.cast(self.iterations + 1, var_dtype)
     beta_1_power = math_ops.pow(beta_1_t, local_step)
     beta_2_power = math_ops.pow(beta_2_t, local_step)
     if not self.amsgrad:
         return training_ops.resource_apply_adam(
             var.handle,
             m.handle,
             v.handle,
             beta_1_power,
             beta_2_power,
             lr_t,
             beta_1_t,
             beta_2_t,
             epsilon_t,
             grad,
             use_locking=self._use_locking)
     else:
         vhat = self.get_slot(var, 'vhat')
         return training_ops.resource_apply_adam_with_amsgrad(
             var.handle,
             m.handle,
             v.handle,
             vhat.handle,
             beta_1_power,
             beta_2_power,
             lr_t,
             beta_1_t,
             beta_2_t,
             epsilon_t,
             grad,
             use_locking=self._use_locking)
Exemple #8
0
    def _resource_apply_dense(self, grad, var, constraint, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = ((apply_state or {}).get((var_device, var_dtype))
                        or self._fallback_apply_state(var_device, var_dtype))

        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')

        if not self.amsgrad:
            var_update = training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                coefficients['lr_t'],
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
        else:
            vhat = self.get_slot(var, 'vhat')
            var_update = training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                coefficients['lr_t'],
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)

        project_var, was_projected = constraint.euclidean_project(var)
        return state_ops.assign(var, project_var)
Exemple #9
0
 def _resource_apply_dense(self, grad, var):
   var_dtype = var.dtype.base_dtype
   lr_t = self._decayed_lr(var_dtype)
   m = self.get_slot(var, 'm')
   v = self.get_slot(var, 'v')
   beta_1_t = self._get_hyper('beta_1', var_dtype)
   beta_2_t = self._get_hyper('beta_2', var_dtype)
   epsilon = self._get_hyper('epsilon', var_dtype)
   local_step = math_ops.cast(self.iterations + 1, var_dtype)
   beta_1_power = math_ops.pow(beta_1_t, local_step)
   beta_2_power = math_ops.pow(beta_2_t, local_step)
   if not self._amsgrad:
     return training_ops.resource_apply_adam(
         var.handle,
         m.handle,
         v.handle,
         beta_1_power,
         beta_2_power,
         lr_t,
         beta_1_t,
         beta_2_t,
         epsilon,
         grad,
         use_locking=self._use_locking)
   else:
     vhat = self.get_slot(var, 'vhat')
     return training_ops.resource_apply_adam_with_amsgrad(
         var.handle,
         m.handle,
         v.handle,
         vhat.handle,
         beta_1_power,
         beta_2_power,
         lr_t,
         beta_1_t,
         beta_2_t,
         epsilon,
         grad,
         use_locking=self._use_locking)
Exemple #10
0
    def _resource_apply_dense(self, grad, var, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = ((apply_state or {}).get((var_device, var_dtype))
                        or self._fallback_apply_state(var_device, var_dtype))

        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')

        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                # coefficients['lr_t'],  # replaced by next
                coefficients['lr_t']*self.lr_with_layer(var),
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
        else:
            vhat = self.get_slot(var, 'vhat')
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                # coefficients['lr_t'],# replaced by next
                coefficients['lr_t'] * self.lr_wide_layer(var),
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
    def _resource_apply_dense(self, grad, var, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = ((apply_state or {}).get(
            (var.name, var_device, var_dtype)) or self._prepare_local(
                var.name, var_device, var_dtype, apply_state))

        m = self.get_slot(var, 'm')
        v = self.get_slot(var, 'v')

        if not self.amsgrad:
            return training_ops.resource_apply_adam(
                var.handle,
                m.handle,
                v.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                coefficients['lr'],
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
        else:
            vhat = self.get_slot(var, 'vhat')
            return training_ops.resource_apply_adam_with_amsgrad(
                var.handle,
                m.handle,
                v.handle,
                vhat.handle,
                coefficients['beta_1_power'],
                coefficients['beta_2_power'],
                coefficients['lr'],
                coefficients['beta_1_t'],
                coefficients['beta_2_t'],
                coefficients['epsilon'],
                grad,
                use_locking=self._use_locking)
Exemple #12
0
    def _resource_apply_dense(self, grad, var, apply_state=None):
        # print("Dense: {} {} {}".format(var.name, var.device, var.dtype.base_dtype))
        lr_t, _, coefficients, kwargs = self._get_lr(var, apply_state)
        decay = self._decay_weights_op(var, lr_t, apply_state)
        with tf.control_dependencies([decay]):
            m = self.get_slot(var, 'm')
            v = self.get_slot(var, 'v')

            if not self.amsgrad:
                return training_ops.resource_apply_adam(
                    var.handle,
                    m.handle,
                    v.handle,
                    coefficients['beta_1_power'],
                    coefficients['beta_2_power'],
                    lr_t,
                    coefficients['beta_1_t'],
                    coefficients['beta_2_t'],
                    coefficients['epsilon'],
                    grad,
                    use_locking=self._use_locking)
            else:
                vhat = self.get_slot(var, 'vhat')
                return training_ops.resource_apply_adam_with_amsgrad(
                    var.handle,
                    m.handle,
                    v.handle,
                    vhat.handle,
                    coefficients['beta_1_power'],
                    coefficients['beta_2_power'],
                    lr_t,
                    coefficients['beta_1_t'],
                    coefficients['beta_2_t'],
                    coefficients['epsilon'],
                    grad,
                    use_locking=self._use_locking)