Beispiel #1
0
 def call(self, x, mask=None):
     if 0. < self.rate < 1.:
         noise_shape = self._get_noise_shape(x)
         if self.permanent:
             x = K.dropout(x, self.rate)
         else:       
             x = K.in_train_phase(K.dropout(x, self.rate), x)
     return x
Beispiel #2
0
 def call(self, inputs, **kwargs):
     main_input, embedding_matrix = inputs
     input_shape_tensor = K.shape(main_input)
     last_input_dim = K.int_shape(main_input)[-1]
     emb_input_dim, emb_output_dim = K.int_shape(embedding_matrix)
     projected = K.dot(K.reshape(main_input, (-1, last_input_dim)),
                       self.embedding_weights['projection'])
     if self.add_biases:
         projected = K.bias_add(projected,
                                self.embedding_weights['biases'],
                                data_format='channels_last')
     if 0 < self.projection_dropout < 1:
         projected = K.in_train_phase(
             lambda: K.dropout(projected, self.projection_dropout),
             projected,
             training=kwargs.get('training'))
     attention = K.dot(projected, K.transpose(embedding_matrix))
     if self.scaled_attention:
         # scaled dot-product attention, described in
         # "Attention is all you need" (https://arxiv.org/abs/1706.03762)
         sqrt_d = K.constant(math.sqrt(emb_output_dim), dtype=K.floatx())
         attention = attention / sqrt_d
     result = K.reshape(
         self.activation(attention),
         (input_shape_tensor[0], input_shape_tensor[1], emb_input_dim))
     return result
Beispiel #3
0
  def call(self, inputs):
    # [batch_size, seq_length, embedding_dim]
    embed = self.dropout_embedding(inputs)
    if self.training:
      x = K.dropout(embed, level=self.input_dropout)
      for i in range(self.layer_num):
        x, state_h, state_c = self.rnn_layer[i](x, training=self.training)
      dropped_hidden = K.dropout(x, level=self.dropout)
    else:
      x = embed
      for i in range(self.layer_num):
        x, state_h, state_c = self.rnn_layer[i](x, training=self.training)
      dropped_hidden = x
    hidden = x
    x = self.output_layer(dropped_hidden)
    output = K.softmax(x)

    return output, hidden, dropped_hidden
Beispiel #4
0
def _time_distributed_dense(x,
                            w,
                            b=None,
                            dropout=None,
                            input_dim=None,
                            output_dim=None,
                            timesteps=None,
                            training=None):
    """Apply `y . w + b` for every temporal slice y of x.

    # Arguments
        x: input tensor.
        w: weight matrix.
        b: optional bias vector.
        dropout: wether to apply dropout (same dropout mask
            for every temporal slice of the input).
        input_dim: integer; optional dimensionality of the input.
        output_dim: integer; optional dimensionality of the output.
        timesteps: integer; optional number of timesteps.
        training: training phase tensor or boolean.

    # Returns
        Output tensor.
    """
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.int_shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))
    x = K.dot(x, w)
    if b is not None:
        x = K.bias_add(x, b)
    # reshape to 3D tensor
    if K.backend() == 'tensorflow':
        x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
        x.set_shape([None, None, output_dim])
    else:
        x = K.reshape(x, (-1, timesteps, output_dim))
    return x
Beispiel #5
0
 def dot_product_attention(self, x, mask=None, dropout=0.1, training=None):
     q, k, v = x
     logits = tf.matmul(q, k, transpose_b=True)  # [bs, 8, len, len]
     if self.bias:
         logits += self.b
     if mask is not None:  # [bs, len]
         mask = tf.expand_dims(mask, axis=1)
         mask = tf.expand_dims(mask, axis=1)  # [bs,1,1,len]
         logits = self.mask_logits(logits, mask)
     weights = tf.nn.softmax(logits, name="attention_weights")
     weights = K.in_train_phase(K.dropout(weights, dropout),
                                weights,
                                training=training)
     x = tf.matmul(weights, v)
     return x
Beispiel #6
0
    def call(self, x):

        row = []
        col = []

        # 对特征进行两两组合
        for r, c in combinations(x, 2):  # [field * (field - 1)] / 2
            row.append(r)
            col.append(c)

        p = K.concatenate(
            row,
            axis=1)  # [batch_size, [field * (field - 1)] / 2, embedding_size]
        q = K.concatenate(col, axis=1)

        inner_product = p * q  # 对应元素相乘
        # 添加非线性, 进行激活
        attention_tmp = K.relu(
            K.bias_add(K.dot(inner_product, self.attention_W),
                       self.attention_b))
        # [batch_size, [field * (field - 1)] / 2, embedding_size] * [embedding_size, attention_units]  = > [batch_size, [field * (field - 1)] / 2, attention_units]

        # context 向量
        attention_tmp_dot = K.dot(
            attention_tmp,
            self.projection_h)  # [batch_size, [field * (field - 1)] / 2, 1]

        # 计算的是一个样本的sofmax, sum的是一个样本的所有特征
        attention_weight = K.softmax(
            attention_tmp_dot, axis=1
        )  # 等价于  K.exp(attention_tmp_dot) / K.sum(attention_tmp_dot, axis=1, keepdims=True)
        # [batch_size, [field * (field - 1)] / 2, 1]

        # 权重乘以内积
        attention_output = K.sum(inner_product * attention_weight,
                                 axis=1)  # [batch_size, embedding_size]

        # 经过dropout操作
        attention_output = K.dropout(
            attention_output,
            self.dropout_rate)  # [batch_size, embedding_size]

        # 等价于dense层
        afm_out = K.dot(attention_output, self.projection_p)  # [batch_size, 1]

        return afm_out
 def dropped_inputs():
     return K.dropout(ones, rate)
Beispiel #8
0
 def dropped_inputs():  # pylint: disable=function-redefined
   return K.dropout(ones, self.recurrent_dropout)
Beispiel #9
0
 def dropped_inputs():
   return K.dropout(ones, self.dropout)
 def dropped_weight_connections():
     return K.dropout(ones,
                      self.kernel_dropout) * (1 - self.kernel_dropout)
 def drop_inputs():
     return K.dropout(inputs, self.unit_dropout)
Beispiel #12
0
 def dropped_softmax():
     return K.dropout(attention_softmax, self.dropout)
 def call(self, inputs, states, training=None):
   if self.in_dropout_mask is None and
      self.use_dropout_mask is True:
        self.in_dropout_mask = K.dropout(
              array_ops.ones_like(inputs),
              self.in_dropout)
    else:
      self.bias = None
    self.built = True

  def call(self, inputs, states, training=None):
    if self.in_dropout_mask is None and
       self.use_dropout_mask is True:
         self.in_dropout_mask = K.dropout(
               array_ops.ones_like(inputs),
               self.in_dropout)
      
    if self.recur_dropout_mask is None and
       self.use_recur is True:
           self.recur_dropout_mask = K.dropout(
                 array_ops.ones_like(self.kern_3),
                 self.recur_dropout)

    self.connectivity_kern_1 = K.dropout(
                     array_ops.ones_like(self.kern_1),
                     self.connectivity_1)
    self.connectivity_kern_2 = K.dropout(
                     array_ops.ones_like(self.kern_2),
                     self.connectivity_2)
    self.connectivity_kern_3 = K.dropout(
                     array_ops.ones_like(self.kern_3),
                     self.connectivity_3)
                                   
    K.set_value(self.kern_1,
                spec_normalize(self.kern_1)*
                self.connectivity_kern_1)