コード例 #1
0
        def get_updates(self, loss, params):
            # 更新判据
            cond = K.equal(self.iterations % self.grad_accum_steps, 0)
            # 获取梯度
            grads = self.get_gradients(loss, params)
            self.accum_grads = [
                K.zeros(K.int_shape(p),
                        dtype=K.dtype(p),
                        name='accum_grad_%s' % i) for i, p in enumerate(params)
            ]

            old_update = K.update

            def new_update(x, new_x):
                new_x = K.switch(cond, new_x, x)
                return old_update(x, new_x)

            K.update = new_update
            updates = super(new_optimizer, self).get_updates(loss, params)
            K.update = old_update

            # 累积梯度
            with tf.control_dependencies(updates):
                accum_updates = [
                    K.update(ag, K.switch(cond, g, ag + g))
                    for g, ag in zip(grads, self.accum_grads)
                ]

            return accum_updates
コード例 #2
0
    def call(self, inputs):
        """如果custom_position_ids,那么第二个输入为自定义的位置id
        """
        input_shape = K.shape(inputs)
        batch_size, seq_len = input_shape[0], input_shape[1]

        if self.custom_position_ids:
            inputs, position_ids = inputs
            if K.dtype(position_ids) != 'int32':
                position_ids = K.cast(position_ids, 'int32')
        else:
            position_ids = K.arange(0, seq_len, dtype='int32')[None]

        if self.hierarchical:
            alpha = 0.4 if self.hierarchical is True else self.hierarchical
            embeddings = self.embeddings - alpha * self.embeddings[:1]
            embeddings = embeddings / (1 - alpha)
            embeddings_x = K.gather(embeddings, position_ids // self.input_dim)
            embeddings_y = K.gather(embeddings, position_ids % self.input_dim)
            pos_embeddings = alpha * embeddings_x + (1 - alpha) * embeddings_y
        else:
            if self.custom_position_ids:
                pos_embeddings = K.gather(self.embeddings, position_ids)
            else:
                pos_embeddings = self.embeddings[None, :seq_len]

        if self.merge_mode == 'add':
            return inputs + pos_embeddings
        elif self.merge_mode == 'mul':
            return inputs * pos_embeddings
        else:
            if not self.custom_position_ids:
                pos_embeddings = K.tile(pos_embeddings, [batch_size, 1, 1])
            return K.concatenate([inputs, pos_embeddings])
コード例 #3
0
    def call(self, inputs):
        """如果custom_position_ids,那么第二个输入为自定义的位置id
        """
        if self.custom_position_ids:
            seq_len = K.shape(inputs)[1]
            inputs, position_ids = inputs
            if 'float' not in K.dtype(position_ids):
                position_ids = K.cast(position_ids, K.floatx())
        else:
            input_shape = K.shape(inputs)
            batch_size, seq_len = input_shape[0], input_shape[1]
            position_ids = K.arange(0, seq_len, dtype=K.floatx())[None]

        indices = K.arange(0, self.output_dim // 2, dtype=K.floatx())
        indices = K.pow(10000.0, -2 * indices / self.output_dim)
        embeddings = tf.einsum('bn,d->bnd', position_ids, indices)
        embeddings = K.stack([K.sin(embeddings), K.cos(embeddings)], axis=-1)
        embeddings = K.reshape(embeddings, (-1, seq_len, self.output_dim))

        if self.merge_mode == 'add':
            return inputs + embeddings
        elif self.merge_mode == 'mul':
            return inputs * (embeddings + 1.0)
        else:
            if not self.custom_position_ids:
                embeddings = K.tile(embeddings, [batch_size, 1, 1])
            return K.concatenate([inputs, embeddings])
コード例 #4
0
        def get_updates(self, loss, params):
            # 更新判据
            cond = K.equal(self.iterations % self.grad_accum_steps, 0)
            cond = K.cast(cond, K.floatx())
            # 获取梯度
            grads = self.get_gradients(loss, params)
            self.accum_grads = [
                K.zeros(
                    K.int_shape(p), dtype=K.dtype(p), name='accum_grad_%s' % i
                ) for i, p in enumerate(params)
            ]

            old_update = K.update

            def new_update(x, new_x):
                new_x = cond * new_x + (1 - cond) * x
                return old_update(x, new_x)

            K.update = new_update
            updates = super(NewOptimizer, self).get_updates(loss, params)
            K.update = old_update

            # 累积梯度
            with tf.control_dependencies(updates):
                accum_updates = [
                    K.update(ag, g + (1 - cond) * ag)
                    for g, ag in zip(grads, self.accum_grads)
                ]

            return accum_updates
コード例 #5
0
    def call(self, inputs):
        if isinstance(inputs, list):
            return [o for i in inputs for o in self.call(i)]

        outputs = []

        batch_size = K.shape(inputs)[0]
        if np.ndim(self.parts) > 0:
            batch_size = K.cast(batch_size, 'float64')
            slices = [
                K.cast(p * batch_size / sum(self.parts), 'int32')
                for p in np.cumsum(self.parts).astype('float64')
            ]
        else:
            stride = K.cast(tf.math.ceil(batch_size / self.parts),
                            K.dtype(batch_size))
            slices = [stride * (i + 1) for i in range(self.parts)]

        for i, _ in enumerate(slices):
            if i == 0:
                outputs.append(inputs[:slices[0]])
            elif i == len(slices) - 1:
                outputs.append(inputs[slices[-2]:])
            else:
                outputs.append(inputs[slices[i - 1]:slices[i]])

        return outputs
コード例 #6
0
        def get_gradients(self, loss, params):
            accum_grads = []
            for p in params:
                if p not in self.accum_grads:
                    self.accum_grads[p] = K.zeros(K.int_shape(p),
                                                  dtype=K.dtype(p))
                accum_grads.append(self.accum_grads[p])

            return [ag / self.grad_accum_steps for ag in accum_grads]
コード例 #7
0
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [K.update_add(self.iterations, 1)]
        self.weights = [self.iterations]
        lr = self.learning_rate

        for i, (p, g) in enumerate(zip(params, grads)):
            g2 = K.square(g) + self.epsilon1
            shape, dtype = K.int_shape(p), K.dtype(p)
            factored_shape = self.factored_shape(shape)
            if factored_shape is None:
                # 定义参数
                v = K.zeros(shape, dtype=dtype, name='v_' + str(i))
                self.weights.append(v)
                # 定义更新
                v_t = self.beta2 * v + (1.0 - self.beta2) * g2
                self.updates.append(K.update(v, v_t))
            else:
                # 定义参数
                shape1, axis1, shape2, axis2 = factored_shape
                vr = K.zeros(shape1, dtype=dtype, name='vr_' + str(i))
                vc = K.zeros(shape2, dtype=dtype, name='vc_' + str(i))
                self.weights.extend([vr, vc])
                # 定义更新
                vr_t = self.beta2 * vr + K.mean(g2, axis=axis1, keepdims=True)
                vc_t = self.beta2 * vc + K.mean(g2, axis=axis2, keepdims=True)
                self.updates.extend([K.update(vr, vr_t), K.update(vc, vc_t)])
                # 合成矩阵
                v_t = vr_t * vc_t / K.mean(vr_t, axis=axis2, keepdims=True)
            # 增量主体
            u = g / K.sqrt(v_t)
            # 增量裁剪
            if self.clipping_threshold is not None:
                u_rms = K.mean(K.sum(K.square(u)))
                d = self.clipping_threshold
                u = u / K.maximum(1.0, u_rms / d)
            # 增量滑动
            if self.beta1 > 0.0:
                # 定义参数
                m = K.zeros(shape, dtype=dtype, name='m_' + str(i))
                self.weights.append(m)
                # 定义更新
                m_t = self.beta1 * m + (1.0 - self.beta1) * u
                self.updates.append(K.update(m, m_t))
                u = m_t
            # 增量调整
            if self.multiply_by_parameter_scale:
                u = u * K.maximum(K.mean(K.sum(K.square(p))), self.epsilon2)
            # 更新参数
            self.updates.append(K.update(p, p - lr * u))

        return self.updates
コード例 #8
0
 def get_updates(self, loss, params):
     # 获取梯度
     grads = self.get_gradients(loss, params)
     # 定义累积梯度
     self.accum_grads = [
         K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params
     ]
     accum_updates = [
         K.update(ag, K.switch(self.cond, ag * 0, ag + g))
         for g, ag in zip(grads, self.accum_grads)
     ]
     # 继承原更新
     self.optimizer.get_gradients = self.get_gradients
     super(GradientAccumulation, self).get_updates(loss, params)
     self.updates.extend(accum_updates)
     self.weights.extend(self.accum_grads)
     return self.updates
コード例 #9
0
    def call(self, inputs):
        """如果custom_position_ids,那么第二个输入为自定义的位置id
        """
        if self.custom_position_ids:
            inputs, position_ids = inputs
            if K.dtype(position_ids) != 'int32':
                position_ids = K.cast(position_ids, 'int32')
            pos_embeddings = K.gather(self.embeddings, position_ids)
        else:
            input_shape = K.shape(inputs)
            batch_size, seq_len = input_shape[0], input_shape[1]
            pos_embeddings = self.embeddings[:seq_len]
            pos_embeddings = K.expand_dims(pos_embeddings, 0)
            if self.merge_mode != 'add':
                pos_embeddings = K.tile(pos_embeddings, [batch_size, 1, 1])

        if self.merge_mode == 'add':
            return inputs + pos_embeddings
        else:
            return K.concatenate([inputs, pos_embeddings])
コード例 #10
0
        def get_updates(self, loss, params):
            updates = super(new_optimizer, self).get_updates(loss, params)

            k, alpha = self.steps_per_slow_update, self.slow_step_size
            cond = K.equal(self.iterations % k, 0)
            slow_vars = [
                K.zeros(K.int_shape(p),
                        dtype=K.dtype(p),
                        name='slow_var_%s' % i) for i, p in enumerate(params)
            ]

            with tf.control_dependencies(updates):
                slow_updates = [
                    K.update(q, K.switch(cond, q + alpha * (p - q), q))
                    for p, q in zip(params, slow_vars)
                ]
                with tf.control_dependencies(slow_updates):
                    copy_updates = [
                        K.update(p, K.switch(cond, q, p))
                        for p, q in zip(params, slow_vars)
                    ]

            return copy_updates
コード例 #11
0
 def call(self, inputs):
     if K.dtype(inputs) != 'int32':
         inputs = K.cast(inputs, 'int32')
     outputs = K.gather(self._embeddings, inputs)
     outputs = K.dot(outputs, self._project_kernel)
     return outputs