Exemple #1
0
def predict():
    vgg_model = VGG16(include_top=False, weights='imagenet')
    feature_model = Model(vgg_model.input, vgg_model.layers[-1].output)
    with open(os.path.join(BASE_DIR, "captionbot", "tokenizer.pkl"),
              'rb') as f:
        tokenizer = pickle.load(f)
    encoder = Encoder(256)
    decoder = LocalAttentionDecoder(512, 5001, 256)
    optimizer = tf.keras.optimizers.Adam()
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True, reduction='none')
    num_steps = 1
    img_tensor = K.zeros((64, 49, 512))
    target = K.zeros((64, 39))
    batch_loss, t_loss = train_step(img_tensor, target, decoder, encoder,
                                    tokenizer, loss_object, optimizer)
    encoder.load_weights(os.path.join(BASE_DIR, "captionbot", "encoder2.hdf5"))
    decoder.load_weights(os.path.join(BASE_DIR, "captionbot", "decoder2.hdf5"))
    img_name = os.listdir(os.path.join(BASE_DIR, "media", "captionbot"))[0]
    img_path = os.path.join(BASE_DIR, "media", "captionbot", img_name)
    result = evaluate(img_path, decoder, encoder, tokenizer, feature_model)
    if result[-1] == '<end>':
        caption = ""
        for i in range(len(result) - 1):
            caption = caption + " " + result[i]
    else:
        caption = ""
        for i in range(len(result)):
            caption = caption + " " + result[i]
    print(caption)
    return caption
Exemple #2
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            g2 = K.square(g)
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = v - (1. - self.beta_2) * K.sign(v - g2) * g2
            p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemple #3
0
    def build(self, input_shape):
        assert len(input_shape) >= 2
        self.input_dim = input_shape[-1]

        self.kernel = self.add_weight(shape=(self.input_dim, self.units),
                                      initializer=self.kernel_initializer,
                                      name='kernel',
                                      regularizer=self.kernel_regularizer,
                                      constraint=self.kernel_constraint)

        self.sigma_kernel = self.add_weight(
            shape=(self.input_dim, self.units),
            initializer=initializers.Constant(value=self.sigma_init),
            name='sigma_kernel')

        if self.use_bias:
            self.bias = self.add_weight(shape=(self.units, ),
                                        initializer=self.bias_initializer,
                                        name='bias',
                                        regularizer=self.bias_regularizer,
                                        constraint=self.bias_constraint)
            self.sigma_bias = self.add_weight(
                shape=(self.units, ),
                initializer=initializers.Constant(value=self.sigma_init),
                name='sigma_bias')
        else:
            self.bias = None
            self.epsilon_bias = None

        self.epsilon_kernel = K.zeros(shape=(self.input_dim, self.units))
        self.epsilon_bias = K.zeros(shape=(self.units, ))
        self.sample_noise()
        super(NoisyDense, self).build(input_shape)
Exemple #4
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        # Applies bounds on actual learning rate
        step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                          (1. - K.pow(self.beta_1, t)))

        final_lr = self.final_lr * lr / self.base_lr
        lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.))
        upper_bound = final_lr * (1. + 1. / (self.gamma * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsbound:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # apply weight decay
            if self.weight_decay != 0.:
                g += self.weight_decay * K.stop_gradient(p)

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            if self.amsbound:
                vhat_t = K.maximum(vhat, v_t)
                denom = (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                denom = (K.sqrt(v_t) + self.epsilon)

            # Compute the bounds
            step_size_p = step_size * K.ones_like(denom)
            step_size_p_bound = step_size_p / denom
            bounded_lr_t = m_t * K.minimum(
                K.maximum(step_size_p_bound, lower_bound), upper_bound)

            p_t = p - bounded_lr_t

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemple #5
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [(self.iterations, self.iterations + 1)]

        t = self.iterations + 1
        lr_t = self.lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (
            1. - K.pow(self.beta_1, t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        gs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = ms + vs

        flag = K.equal(t % self.accum_iters, 0)
        flag = K.cast(flag, dtype='float32')

        for p, g, m, v, gg in zip(params, grads, ms, vs, gs):

            gg_t = (1 - flag) * (gg + g)
            m_t = (self.beta_1 *
                   m) + (1. - self.beta_1) * (gg + flag * g) / self.accum_iters
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(
                (gg + flag * g) / self.accum_iters)
            p_t = p - flag * lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append((m, flag * m_t + (1 - flag) * m))
            self.updates.append((v, flag * v_t + (1 - flag) * v))
            self.updates.append((gg, gg_t))

            # apply constraints.
            new_p = p_t
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)
            self.updates.append(K.update(p, new_p))
        return self.updates
Exemple #6
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        adam_lr = self.adam_lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))
            adam_lr = adam_lr * (1. / (1. + self.decay * K.cast(
                self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        adam_lr_t = adam_lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                               (1. - K.pow(self.beta_1, t)))

        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.ms = K.zeros(K.int_shape(params[0]), dtype=K.dtype(params[0]))
        self.vs = K.zeros(K.int_shape(params[0]), dtype=K.dtype(params[0]))
        self.weights = [self.iterations] + moments + vhats + [self.ms
                                                              ] + [self.vs]
        for i, (p, g, m, vhat) in enumerate(zip(params, grads, moments,
                                                vhats)):
            v = self.momentum * m - lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - lr * g
            else:
                new_p = p + v

            if i == 0 and self.e2efs_layer is not None:
                nnz = K.sum(K.cast(K.greater(p, 0.), K.floatx()))
                m_t = (self.beta_1 * self.ms) + (1. - self.beta_1) * g
                v_t = (self.beta_2 *
                       self.vs) + (1. - self.beta_2) * K.square(g)
                if self.amsgrad:
                    vhat_t = K.maximum(vhat, v_t)
                    p_t = p - adam_lr_t * m_t / (K.sqrt(vhat_t) + K.epsilon())
                    self.updates.append(K.update(vhat, vhat_t))
                else:
                    p_t = p - adam_lr_t * m_t / (K.sqrt(v_t) + K.epsilon())

                self.updates.append(K.update(self.ms, m_t))
                self.updates.append(K.update(self.vs, v_t))
                new_p = K.switch(K.less_equal(nnz, self.e2efs_layer.units),
                                 new_p, p_t)

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.learning_rate
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):

            # Learning rate multipliers
            if self.multipliers:
                multiplier = [
                    mult for mult in self.multipliers if mult in p.name
                ]
            else:
                multiplier = None
            if multiplier:
                new_lr_t = lr_t * self.multipliers[multiplier[0]]
                if self.debug_verbose:
                    print('Setting {} to learning rate {}'.format(
                        multiplier[0], new_lr_t))
                    print(K.get_value(new_lr_t))
            else:
                new_lr_t = lr_t
                if self.debug_verbose:
                    print('No change in learning rate {}'.format(p.name))
                    print(K.get_value(new_lr_t))
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - new_lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                p_t = p - new_lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemple #8
0
 def update_1(self, params):
     """ First update on CPU or GPU """
     ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
     vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
     if self.amsgrad:
         vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
     else:
         vhats = [K.zeros(1) for _ in params]
     return ms, vs, vhats
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [state_ops.assign_add(self.iterations, 1)]

        t = math_ops.cast(self.iterations, K.floatx()) + 1
        
        lr_t = self.lr
        if self.initial_decay > 0:
            lr_t = lr_t * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))
            
        lr_t = lr_t * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                          (1. - K.pow(self.beta_1, t)))

        final_lr = self.final_lr * lr_t / self.base_lr
        lower_bound = final_lr * (1 - 1 / ((1-self.beta_2) * (t + 1)))
        upper_bound = final_lr * (1 + 1 / ((1-self.beta_2) * t))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsbound:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g)
            
            if self.weight_decay != 0.:
                g += self.weight_decay * K.stop_gradient(p)
                
            if self.amsbound:
                vhat_t = K.maximum(vhat, v_t)
                denom = K.sqrt(vhat_t) + self.epsilon
                self.updates.append(state_ops.assign(vhat, vhat_t))
            else:
                denom = K.sqrt(v_t) + self.epsilon

            eta_hat = tf.clip_by_value(lr_t/denom, lower_bound, upper_bound)
#             eta = eta_hat / K.sqrt(t)
            
            p_t = p -  m_t * eta_hat
        
            self.updates.append(state_ops.assign(m, m_t))
            self.updates.append(state_ops.assign(v, v_t))
            
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(state_ops.assign(p, new_p))
        return self.updates
Exemple #10
0
 def _update_1(self, params):
     """ Perform the first update. Run under CPU context if running on Tensorflow and CPU mode
     is enabled, otherwise run on the default device. """
     ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
     vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
     if self.amsgrad:
         vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
     else:
         vhats = [K.zeros(1) for _ in params]
     return ms, vs, vhats
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        '''Bias corrections according to the Adam paper
        '''
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):

            ####################################################
            # Add a lr multiplier for vars outside excluded_vars
            if p.name in self.excluded_vars:
                multiplied_lr_t = lr_t
            else:
                multiplied_lr_t = lr_t * self.lr_mult
            ###################################################

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            '''Schedule multiplier eta_t = 1 for simple AdamW
            According to the AdamW paper, eta_t can be fixed, decay, or 
            also be used for warm restarts (AdamWR to come). 
            '''
            eta_t = 1.
            p_t = p - eta_t * (multiplied_lr_t * m_t /
                               (K.sqrt(v_t) + self.epsilon))
            if self.weight_decay != 0:
                '''Normalized weight decay according to the AdamW paper
                '''
                w_d = self.weight_decay * K.sqrt(
                    self.batch_size / (self.samples_per_epoch * self.epochs))
                p_t = p_t - eta_t * (w_d * p)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemple #12
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        self.weights = [self.iterations]
        lr = self.learning_rate

        for i, (p, g) in enumerate(zip(params, grads)):
            g2 = K.square(g) + self.epsilon1
            shape, dtype = K.int_shape(p), K.dtype(p)
            factored_shape = self.factored_shape(shape)
            if factored_shape is None:
                # 定义参数
                v = K.zeros(shape, dtype=dtype, name='v_' + str(i))
                self.weights.append(v)
                # 定义更新
                v_t = self.beta2 * v + (1.0 - self.beta2) * g2
                self.updates.append(K.update(v, v_t))
            else:
                # 定义参数
                shape1, axis1, shape2, axis2 = factored_shape
                vr = K.zeros(shape1, dtype=dtype, name='vr_' + str(i))
                vc = K.zeros(shape2, dtype=dtype, name='vc_' + str(i))
                self.weights.extend([vr, vc])
                # 定义更新
                g2r = K.mean(g2, axis=axis1, keepdims=True)
                g2c = K.mean(g2, axis=axis2, keepdims=True)
                vr_t = self.beta2 * vr + (1.0 - self.beta2) * g2r
                vc_t = self.beta2 * vc + (1.0 - self.beta2) * g2c
                self.updates.extend([K.update(vr, vr_t), K.update(vc, vc_t)])
                # 合成矩阵
                v_t = vr_t * vc_t / K.mean(vr_t, axis=axis2, keepdims=True)
            # 增量主体
            u = g / K.sqrt(v_t)
            # 增量裁剪
            if self.clipping_threshold is not None:
                u_rms = self.rms(u)
                d = self.clipping_threshold
                u = u / K.maximum(1.0, u_rms / d)
            # 增量滑动
            if self.beta1 > 0.0:
                # 定义参数
                m = K.zeros(shape, dtype=dtype, name='m_' + str(i))
                self.weights.append(m)
                # 定义更新
                m_t = self.beta1 * m + (1.0 - self.beta1) * u
                self.updates.append(K.update(m, m_t))
                u = m_t
            # 增量调整
            if self.multiply_by_parameter_scale:
                u = u * K.maximum(self.rms(p), self.epsilon2)
            # 更新参数
            self.updates.append(K.update(p, p - lr * u))

        return self.updates
Exemple #13
0
 def rel_to_abs(self, x):
     shape = K.shape(x)
     shape = [shape[i] for i in range(3)]
     B, Nh, L, = shape
     col_pad = K.zeros(K.stack([B, Nh, L, 1]))
     x = K.concatenate([x, col_pad], axis=3)
     flat_x = K.reshape(x, [B, Nh, L * 2 * L])
     flat_pad = K.zeros(K.stack([B, Nh, L - 1]))
     flat_x_padded = K.concatenate([flat_x, flat_pad], axis=2)
     final_x = K.reshape(flat_x_padded, [B, Nh, L + 1, 2 * L - 1])
     final_x = final_x[:, :, :L, L - 1:]
     return final_x
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [state_ops.assign_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (  # pylint: disable=g-no-augmented-assignment
                1. / (1. + self.decay *
                      math_ops.cast(self.iterations, K.dtype(self.decay))))

        t = math_ops.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - math_ops.pow(self.beta_2, t)) /
                     (1. - math_ops.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):

            ####################################################
            # Add a lr multiplier for vars outside excluded_vars
            if p.name in self.excluded_vars:
                multiplied_lr_t = lr_t
            else:
                multiplied_lr_t = lr_t * self.lr_mult
            ###################################################

            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g)
            if self.amsgrad:
                vhat_t = math_ops.maximum(vhat, v_t)
                p_t = p - multiplied_lr_t * m_t / (K.sqrt(vhat_t) +
                                                   self.epsilon)
                self.updates.append(state_ops.assign(vhat, vhat_t))
            else:
                p_t = p - multiplied_lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(state_ops.assign(m, m_t))
            self.updates.append(state_ops.assign(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(state_ops.assign(p, new_p))
        return self.updates
Exemple #15
0
 def call(self, inputs):
     initial_states = [
         K.zeros((K.shape(inputs)[0], self.units)),
         K.zeros((K.shape(inputs)[0], self.units))
     ]  # 定义初始态(全零)
     outputs = K.rnn(self.one_step, inputs, initial_states)
     self.distance = 1 - K.mean(
         outputs[1][..., self.units:self.units + self.levels], -1)
     self.distance_in = K.mean(outputs[1][..., self.units + self.levels:],
                               -1)
     if self.return_sequences:
         return outputs[1][..., :self.units]
     else:
         return outputs[0][..., :self.units]
    def get_updates(self, loss, params):
        # Mostly the same code as Adam class, with added multiplier variables.
        # Keras code from:
        # https://github.com/tensorflow/tensorflow/blob/r1.12/tensorflow/python/keras/optimizers.py#L456
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (
                1.0 / (1.0 + self.decay * K.cast(self.iterations, K.dtype(self.decay)))
            )

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (
            K.sqrt(1.0 - K.pow(self.beta_2, t)) / (1.0 - K.pow(self.beta_1, t))
        )

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            layername = p.name.split("/", 1)[0]
            mult = self.multipliers.get(layername, 1.0)

            m_t = (self.beta_1 * m) + (1.0 - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1.0 - self.beta_2) * K.square(g)

            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - mult * lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                p_t = p - mult * lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, "constraint", None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemple #17
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = []

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (  # pylint: disable=g-no-augmented-assignment
                    1. /
                    (1. +
                     self.decay * math_ops.cast(self.iterations, K.dtype(self.decay))))

        with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]):
            t = math_ops.cast(self.iterations, K.floatx())
        lr_t = lr * (
                K.sqrt(1. - math_ops.pow(self.beta_2, t)) /
                (1. - math_ops.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        ss = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        # if self.amsgrad is True:
        #     vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        # else:
        vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + ss + vhats

        for p, g, m, v, s, vhat in zip(params, grads, ms, vs, ss, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g)
            # if self.amsgrad is True:
            #     vhat_t = math_ops.maximum(vhat, v_t)
            #     miu_t = lr_t / (K.sqrt(vhat_t) + self.epsilon)
            #     p_t = p - miu_t * m_t
            #     self.updates.append(state_ops.assign(vhat, vhat_t))
            # else:
            miu_t = lr_t / (K.sqrt(v_t) + self.epsilon)
            s_t = self.beta_3 * s + (1 - self.beta_3) * miu_t
            miu_t_hat = math_ops.minimum(miu_t, s_t)
            p_t = p - miu_t_hat * m_t
            self.updates.append(state_ops.assign(m, m_t))
            self.updates.append(state_ops.assign(v, v_t))
            self.updates.append(state_ops.assign(s, s_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(state_ops.assign(p, new_p))
        return self.updates
Exemple #18
0
    def updated_get_updates(self, loss, params):
        self.accumulate_gradient_accumulators = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params
        ]
        if ema_decay > 0:
            self.params_for_ema_tracking = params
            self.params_ema = [
                K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params
            ]
        updates_accumulated_iterations = K.update_add(
            self.accumulated_iterations, 1)
        new_grads = orig_get_gradients(loss, params)
        if not accumulate_sum_or_mean:
            new_grads = [
                g / K.cast(self.update_params_frequency, K.dtype(g))
                for g in new_grads
            ]
        self.updated_grads = [
            K.update_add(p, g)
            for p, g in zip(self.accumulate_gradient_accumulators, new_grads)
        ]

        def update_function():
            with tensorflow.control_dependencies(orig_get_updates(
                    loss, params)):
                reset_grads = [
                    K.update(p, K.zeros(K.int_shape(p), dtype=K.dtype(p)))
                    for p in self.accumulate_gradient_accumulators
                ]
                if ema_decay > 0:
                    reset_grads += [K.update_add(self.total_iterations, 1)]
                    reset_grads += [
                        K.update(e_p, (e_p * ema_decay) + (1 - ema_decay) * p)
                        for e_p, p in zip(self.params_ema, params)
                    ]
            return tensorflow.group(*(reset_grads +
                                      [updates_accumulated_iterations]))

        def just_store_function():
            return tensorflow.group(*[updates_accumulated_iterations])

        update_switch = K.equal(
            (updates_accumulated_iterations) % self.update_params_frequency, 0)

        with tensorflow.control_dependencies(self.updated_grads):
            self.updates = [
                K.switch(update_switch, update_function, just_store_function)
            ]
            return self.updates
Exemple #19
0
    def get_updates(self, params, loss):
        grads = self.get_gradients(loss, params)
        shapes = [K.shape(p) for p in params]
        alphas = [
            K.variable(K.ones(shape) * self.init_alpha) for shape in shapes
        ]
        old_grads = [K.zeros(shape) for shape in shapes]
        prev_weight_deltas = [K.zeros(shape) for shape in shapes]
        self.weights = alphas + old_grads
        self.updates = []

        for param, grad, old_grad, prev_weight_delta, alpha in zip(
                params, grads, old_grads, prev_weight_deltas, alphas):
            # equation 4
            new_alpha = K.switch(
                K.greater(grad * old_grad, 0),
                K.minimum(alpha * self.scale_up, self.max_alpha),
                K.switch(K.less(grad * old_grad, 0),
                         K.maximum(alpha * self.scale_down, self.min_alpha),
                         alpha))

            # equation 5
            new_delta = K.switch(
                K.greater(grad, 0), -new_alpha,
                K.switch(K.less(grad, 0), new_alpha, K.zeros_like(new_alpha)))

            # equation 7
            weight_delta = K.switch(K.less(grad * old_grad, 0),
                                    -prev_weight_delta, new_delta)

            # equation 6
            new_param = param + weight_delta

            # reset gradient_{t-1} to 0 if gradient sign changed (so that we do
            # not "double punish", see paragraph after equation 7)
            grad = K.switch(K.less(grad * old_grad, 0), K.zeros_like(grad),
                            grad)

            # Apply constraints
            #if param in constraints:
            #    c = constraints[param]
            #    new_param = c(new_param)

            self.updates.append(K.update(param, new_param))
            self.updates.append(K.update(alpha, new_alpha))
            self.updates.append(K.update(old_grad, grad))
            self.updates.append(K.update(prev_weight_delta, weight_delta))

        return self.updates
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay *
                             K.cast(self.iterations, K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = [self.iterations] + ms + vs

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)

            m_t_hat = m_t / (1. - K.pow(self.beta_1, t))
            v_t_hat = v_t / (1. - K.pow(self.beta_2, t))

            p_dash = m_t_hat / (K.sqrt(v_t_hat + self.epsilon))

            if self.weight_decay > 0.:
                wd = self.weight_decay * p
                p_dash = p_dash + wd

            r1 = K.sqrt(K.sum(K.square(p)))
            r2 = K.sqrt(K.sum(K.square(p_dash)))

            r = tf.where(tf.greater(r1, 0.),
                         tf.where(tf.greater(r2, 0.), r1 / r2, 1.0), 1.0)
            # r = r1 / r2
            eta = r * lr

            p_t = p - eta * p_dash

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemple #21
0
    def get_updates(self, params, loss):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.inital_decay > 0:
            lr *= (1. / (1. + self.decay * self.iterations))

        t = self.iterations + 1
        lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / (
            1. - K.pow(self.beta_1, t))

        shapes = [K.get_variable_shape(p) for p in params]
        ms = [K.zeros(shape) for shape in shapes]
        vs = [K.zeros(shape) for shape in shapes]
        f = K.variable(0)
        d = K.variable(1)
        self.weights = [self.iterations] + ms + vs + [f, d]

        cond = K.greater(t, K.variable(1))
        small_delta_t = K.switch(K.greater(loss, f), self.small_k + 1,
                                 1. / (self.big_K + 1))
        big_delta_t = K.switch(K.greater(loss, f), self.big_K + 1,
                               1. / (self.small_k + 1))

        c_t = K.minimum(K.maximum(small_delta_t, loss / (f + self.epsilon)),
                        big_delta_t)
        f_t = c_t * f
        r_t = K.abs(f_t - f) / (K.minimum(f_t, f))
        d_t = self.beta_3 * d + (1 - self.beta_3) * r_t

        f_t = K.switch(cond, f_t, loss)
        d_t = K.switch(cond, d_t, K.variable(1.))

        self.updates.append(K.update(f, f_t))
        self.updates.append(K.update(d, d_t))

        for p, g, m, v in zip(params, grads, ms, vs):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            p_t = p - lr_t * m_t / (d_t * K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))

            new_p = p_t
            self.updates.append(K.update(p, new_p))
        return self.updates
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        t = K.cast(self.iterations, K.floatx()) + 1
        lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) /
                     (1. - K.pow(self.beta_1, t)))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        else:
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
            if self.amsgrad:
                vhat_t = K.maximum(vhat, v_t)
                p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon)
                self.updates.append(K.update(vhat, vhat_t))
            else:
                p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon)

            self.updates.append(K.update(m, m_t))
            self.updates.append(K.update(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)
                
            clptrkey = set_pattern_find(p.name,self.clips.keys())
            if self.clips_val and clptrkey:            
                c = K.eval(self.clips[clptrkey])
                if self.verbose>0:
                    print("Clipping variable",p.name," to ", c )
                new_p = K.clip(new_p, c[0], c[1])

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemple #23
0
    def call(self, x):

        truncate = 4.0

        kernel_size = K.round(truncate * self.sigma[0] + 0.5)

        dim = K.eval(kernel_size)

        tensor_range = K.arange(dim[0], dtype='float32')

        x_distance = K.concatenate(
            [-1 * tensor_range, K.zeros(1), tensor_range])

        x_shape = K.int_shape(x_distance)

        y_distance = K.reshape(x_distance, (1, x_shape[0]))

        result = (K.square(x_distance) +
                  K.square(y_distance)) / (2.0 * K.square(self.sigma[0]))
        result = K.exp(-1 * result)
        result = result / K.sum(result)

        result = K.expand_dims(result, axis=-1)
        result = K.repeat_elements(result, 3, axis=-1)
        result = K.expand_dims(result)

        conv_result = tf.nn.depthwise_conv2d(x,
                                             result, (1, 1, 1, 1),
                                             padding='SAME')

        return x + (self.amount[0] * (x - conv_result))
 def update_function():
     with tf.control_dependencies(orig_get_updates(loss, params)):
         reset_grads = [
             K.update(p, K.zeros(K.int_shape(p), dtype=K.dtype(p)))
             for p in self.accumulated_grads
         ]
     return tf.group(*(reset_grads + [iters]))
    def new_get_updates(self, loss, params):
        self.accumulated_grads = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params
        ]
        iters = K.update_add(self.accumulated_iters, 1)
        new_grads = orig_get_gradients(loss, params)
        if do_mean:
            new_grads = [g / K.cast(iter_size, K.dtype(g)) for g in new_grads]
        self.updated_grads = [
            K.update_add(p, g)
            for p, g in zip(self.accumulated_grads, new_grads)
        ]

        def update_function():
            with tf.control_dependencies(orig_get_updates(loss, params)):
                reset_grads = [
                    K.update(p, K.zeros(K.int_shape(p), dtype=K.dtype(p)))
                    for p in self.accumulated_grads
                ]
            return tf.group(*(reset_grads + [iters]))

        def just_store_function():
            return tf.group(*[iters])

        update_switch = K.equal(iters % iter_size, 0)
        with tf.control_dependencies(self.updated_grads):
            self.updates = [
                K.switch(update_switch, update_function, just_store_function)
            ]
            return self.updates
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.learning_rate
        if self.initial_decay > 0:
            lr *= (1. / (1. + self.decay * K.cast(self.iterations,
                                                  K.dtype(self.decay))))
        # momentum
        shapes = [K.int_shape(p) for p in params]
        moments = [K.zeros(shape) for shape in shapes]
        self.weights = [self.iterations] + moments
        for p, g, m in zip(params, grads, moments):

            matched_layer = [x for x in self.lr_multipliers.keys() if x in p.name]
            if matched_layer:
                new_lr = lr * self.lr_multipliers[matched_layer[0]]
            else:
                new_lr = lr

            v = self.momentum * m - new_lr * g  # velocity
            self.updates.append(K.update(m, v))

            if self.nesterov:
                new_p = p + self.momentum * v - new_lr * g
            else:
                new_p = p + v

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemple #27
0
def context_enhancement_module(x1, x2, x3, size, name='cem_block'):
    x1 = conv1x1(x1,
                 in_channels=x1.shape[3],
                 out_channels=245,
                 strides=1,
                 groups=1,
                 use_bias=True,
                 name='{}/c4_lat'.format(name))

    x2 = nn.Lambda(lambda img: tf.image.resize_bilinear(
        img, [20, 20], align_corners=True, name='{}/c5_resize'.format(name)))(
            x2)
    x2 = conv1x1(x2,
                 in_channels=x2.shape[3],
                 out_channels=245,
                 strides=1,
                 groups=1,
                 use_bias=True,
                 name='{}/c5_lat'.format(name))

    zero = K.zeros((1, size, size, 528))
    x3 = nn.Lambda(lambda img: nn.add([img, zero]))(x3)
    x3 = conv1x1(x3,
                 in_channels=x3.shape[3],
                 out_channels=245,
                 strides=1,
                 groups=1,
                 use_bias=True,
                 name='{}/c_glb_lat'.format(name))
    print(x1)
    return nn.add([x1, x2, x3])
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        self.weights = accumulators
        self.updates = [K.update_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * (1. / (1. + self.decay * K.cast(self.iterations,
                                                      K.dtype(self.decay))))

        for p, g, a in zip(params, grads, accumulators):
            # update accumulator
            new_a = self.rho * a + (1. - self.rho) * K.square(g)
            self.updates.append(K.update(a, new_a))
            new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon)

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)
            
            clptrkey = set_pattern_find(p.name,self.clips.keys())
            if self.clips_val and clptrkey:
                if self.verbose>0:
                    print("CLpping variable",p.name," to ", self.clips[p.name] )
                c = K.eval(self.clips[clptrkey])
                new_p = K.clip(new_p, c[0], c[1])

            self.updates.append(K.update(p, new_p))
        return self.updates
Exemple #29
0
def test_DSSIM_channels_first():
    prev_data = K.image_data_format()
    K.set_image_data_format('channels_first')
    for input_dim, kernel_size in zip([32, 33], [2, 3]):
        input_shape = [3, input_dim, input_dim]
        X = np.random.random_sample(4 * input_dim * input_dim * 3)
        X = X.reshape([4] + input_shape)
        y = np.random.random_sample(4 * input_dim * input_dim * 3)
        y = y.reshape([4] + input_shape)

        model = Sequential()
        model.add(Conv2D(32, (3, 3), padding='same', input_shape=input_shape,
                         activation='relu'))
        model.add(Conv2D(3, (3, 3), padding='same', input_shape=input_shape,
                         activation='relu'))
        adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8)
        model.compile(loss=DSSIMObjective(kernel_size=kernel_size), metrics=['mse'],
                      optimizer=adam)
        model.fit(X, y, batch_size=2, epochs=1, shuffle='batch')

        # Test same
        x1 = K.constant(X, 'float32')
        x2 = K.constant(X, 'float32')
        dssim = DSSIMObjective(kernel_size=kernel_size)
        assert_allclose(0.0, K.eval(dssim(x1, x2)), atol=1e-4)

        # Test opposite
        x1 = K.zeros([4] + input_shape)
        x2 = K.ones([4] + input_shape)
        dssim = DSSIMObjective(kernel_size=kernel_size)
        assert_allclose(0.5, K.eval(dssim(x1, x2)), atol=1e-4)

    K.set_image_data_format(prev_data)
Exemple #30
0
    def new_get_updates(self, loss, params):
        self.accumulated_grads = [
            K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params
        ]
        update_iter = [K.update_add(self.accumulated_iterations, 1)]
        new_grads = self.orig_get_gradients(loss, params)
        if self.do_mean:
            new_grads = [
                g / K.cast(self.iter_size, K.dtype(g)) for g in new_grads
            ]
        update_grads = [
            K.update_add(p, g)
            for p, g in zip(self.accumulated_grads, new_grads)
        ]

        def update_func():
            with tf.control_dependencies(self.orig_get_updates(loss, params)):
                reset_grads = [
                    K.update(p, K.zeros(K.int_shape(p), dtype=K.dtype(p)))
                    for p in self.accumulated_grads
                ]
            return tf.group(*(reset_grads + update_iter))

        def just_iter_func():
            return tf.group(*update_iter)

        # do the original get_updates() computations only once every
        # 'iter_size' iterations
        update_switch = K.equal(self.accumulated_iterations % self.iter_size,
                                0)
        with tf.control_dependencies(update_grads):
            self.updates = [
                K.switch(update_switch, update_func, just_iter_func)
            ]
            return self.updates