Python dropout Examples, tensorflow.keras.backend.dropout Python Examples

Example #1

0

Show file

File: ddrop.py Project: freefeynman123/CIFAR-10

 def call(self, x):
     if 0. < self.prob < 1.:
         self.layer.kernel = K.in_train_phase(
             K.dropout(self.layer.kernel, self.prob), self.layer.kernel)
         self.layer.bias = K.in_train_phase(
             K.dropout(self.layer.bias, self.prob), self.layer.bias)
     return self.layer.call(x)

Example #2

0

Show file

File: ddrop.py Project: freefeynman123/CIFAR-10

    def call(self, x, mask=None):
        if 0. < self.prob < 1.:
            self.kernel = K.in_train_phase(K.dropout(self.kernel, self.prob),
                                           self.kernel)
            self.b = K.in_train_phase(K.dropout(self.b, self.prob), self.b)

        # Same as original
        output = K.dot(x, self.W)
        if self.bias:
            output += self.b
        return self.activation(output)

Example #3

0

Show file

File: effnet_SR.py Project: EvelynAnyebe/cnn_selective_classification

def SR_model(num_classes,
             dropout,
             mc_dropout,
             input_dim,
             training,
             pooling='avg'):
    inputs = Input(input_dim)
    base_model = EfficientNetB0(include_top=False,
                                weights='imagenet',
                                input_tensor=inputs)
    base_model.trainable = True
    x = base_model.output
    x = Dropout(dropout, name='top_dropout_1')(x, training=training)
    if pooling == 'avg':
        x = GlobalAveragePooling2D(name='avg_pool')(x)
    elif pooling == 'max':
        x = GlobalMaxPooling2D(name='max_pool')(x)
    x = Dropout(dropout, name='top_dropout_2')(x, training=training)
    x = Dense(512, activation='relu', name='dense_512')(x)
    x = BatchNormalization()(x)
    x = Dropout(dropout, name='top_dropout_3')(x, training=training)
    x = Lambda(lambda x: K.dropout(x, level=mc_dropout))(x)

    #classification head (f)
    sr = Dense(num_classes, activation='softmax', name='dense_f')(x)
    return Model(inputs=inputs, outputs=sr)

Example #4

0

Show file

def _time_distributed_dense(x, w, b=None, dropout=None,
                           input_dim=None, output_dim=None, timesteps=None):
    '''Apply y.w + b for every temporal slice y of x.
    '''
    if not input_dim:
        # won't work with TensorFlow
        input_dim = K.shape(x)[2]
    if not timesteps:
        # won't work with TensorFlow
        timesteps = K.shape(x)[1]
    if not output_dim:
        # won't work with TensorFlow
        output_dim = K.shape(w)[1]

    if dropout:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x *= expanded_dropout_matrix

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))

    x = K.dot(x, w)
    if b:
        x = x + b
    # reshape to 3D tensor
    x = K.reshape(x, (-1, timesteps, output_dim))
    return x

Example #5

0

Show file

    def call(self, x):
        # Input is a 3-D or 4-D Tensor
        ndim = K.ndim(x)
        if ndim == 4:
            dims = K.int_shape(x)
            x = K.reshape(x, (-1, dims[1] * dims[2], 1, self.D))
        elif ndim != 3:
            raise ValueError(
                'Encoding input should have shape BxNxD or BxHxWxD')

        # Residual vectors
        R = x - self.codes
        ''' OLD WAY
        _x_i = K.repeat_elements(x, self.K, 1)
        _c_k = K.tile(self.codes, (n, 1))
        R = K.reshape(_x_i - _c_k, (-1, n, self.K, self.D))
        '''

        # Assignment weights, optional dropout
        if self.dropout_rate is not None:
            W_ik = K.softmax(
                scaledL2(R, K.dropout(self.scale, self.dropout_rate)))
        else:
            W_ik = K.softmax(scaledL2(R, self.scale))

        # Aggregation
        E = tf.einsum('bik,bikd->bkd', W_ik, R)

        # Normalize encoding vectors
        if self.l2_normalize:
            E = tf.nn.l2_normalize(E, axis=-1)

        E = tf.layers.Flatten()(E)

        return E

Example #6

0

Show file

    def encoder(self, inputs):
        if K.dtype(inputs) != 'int32':
            inputs = K.cast(inputs, 'int32')

        masks = K.equal(inputs, 0)
        # Embeddings
        embeddings = K.gather(self.embeddings, inputs)
        embeddings *= self._model_dim**0.5  # Scale
        # Position Encodings
        position_encodings = PositionEncoding(self._model_dim)(embeddings)
        # Embedings + Postion-encodings
        encodings = embeddings + position_encodings
        # Dropout
        encodings = K.dropout(encodings, self._dropout_rate)

        for i in range(self._encoder_stack):
            # Multi-head-Attention
            attention = MultiHeadAttention(self._n_heads,
                                           self._model_dim // self._n_heads)
            attention_input = [encodings, encodings, encodings, masks]
            attention_out = attention(attention_input)
            # Add & Norm
            attention_out += encodings
            attention_out = LayerNormalization()(attention_out)
            # Feed-Forward
            ff = PositionWiseFeedForward(self._model_dim,
                                         self._feed_forward_size)
            ff_out = ff(attention_out)
            # Add & Norm
            ff_out += attention_out
            encodings = LayerNormalization()(ff_out)

        return encodings, masks

Example #7

0

Show file

    def build_model(self):
        # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.
        weight_decay = self.weight_decay
        basic_dropout_rate = 0.3
        model_input = Input(shape=self.x_shape)
        curr = Dense(
            512, kernel_regularizer=regularizers.l2(weight_decay))(model_input)

        curr = Activation('relu')(curr)
        curr = BatchNormalization()(curr)
        curr = Dropout(basic_dropout_rate + 0.2)(curr)
        curr = Lambda(lambda x: K.dropout(x, level=self.mc_dropout_rate))(curr)
        # classification head (f)
        curr1 = Dense(self.num_classes, activation='softmax')(curr)

        # selection head (g)
        curr2 = Dense(512,
                      kernel_regularizer=regularizers.l2(weight_decay))(curr)
        curr2 = Activation('relu')(curr2)
        curr2 = BatchNormalization()(curr2)
        # this normalization is identical to initialization of batchnorm gamma to 1/10
        curr2 = Lambda(lambda x: x / 10)(curr2)
        curr2 = Dense(1, activation='sigmoid')(curr2)
        # auxiliary head (h)
        selective_output = Concatenate(axis=1,
                                       name="selective_head")([curr1, curr2])

        auxiliary_output = Dense(self.num_classes,
                                 activation='softmax',
                                 name="classification_head")(curr)

        model = Model(inputs=model_input,
                      outputs=[selective_output, auxiliary_output])
        return model

Example #8

0

Show file

File: layers.py Project: yujun001/hub-recsys

    def call(self, inputs):
        if self._masking:
            assert len(
                inputs
            ) == 4, "inputs should be set [queries, keys, values, masks]."
            queries, keys, values, masks = inputs
        else:
            assert len(
                inputs) == 3, "inputs should be set [queries, keys, values]."
            queries, keys, values = inputs

        if K.dtype(queries) != 'float32': queries = K.cast(queries, 'float32')
        if K.dtype(keys) != 'float32': keys = K.cast(keys, 'float32')
        if K.dtype(values) != 'float32': values = K.cast(values, 'float32')

        matmul = K.batch_dot(queries, tf.transpose(keys, [0, 2, 1]))  # MatMul
        scaled_matmul = matmul / int(queries.shape[-1])**0.5  # Scale
        if self._masking:
            scaled_matmul = self.mask(scaled_matmul, masks)  # Mask(opt.)

        if self._future:
            scaled_matmul = self.future_mask(scaled_matmul)

        softmax_out = K.softmax(scaled_matmul)  # SoftMax
        # Dropout
        out = K.dropout(softmax_out, self._dropout_rate)

        outputs = K.batch_dot(out, values)

        return outputs

Example #9

0

Show file

File: model3.py Project: MichaelYin1994/tencent2020_rank_5

    def call(self, inputs):
        values = inputs

        values_linear = K.dot(values, self._weights_values)
        # Dropout
        out = K.dropout(values_linear, self._dropout_rate)
        return out

Example #10

0

Show file

File: old-neuromax.py Project: bionicles/neuromax

 def call(self, inputs):
     if random.random() > 0.5:
         kernel = B.dropout(self.kernel, 0.5) * random.uniform(-1, 1)
     else:
         kernel = self.kernel
     outputs = B.dot(inputs, kernel)
     return self.activation(outputs)

Example #11

0

Show file

    def encoder(self, inputs):
        if K.dtype(inputs) != 'int32':
            inputs = K.cast(inputs, 'int32')

        masks = K.equal(inputs, 0)
        # Embeddings
        embeddings = K.gather(self.embeddings, inputs)
        embeddings *= self._model_dim ** 0.5 # Scale
        # Position Encodings
        position_encodings = self.EncoderPositionEncoding(embeddings)
        # Embedings + Postion-encodings
        encodings = embeddings + position_encodings
        # Dropout
        encodings = K.dropout(encodings, self._dropout_rate)

        for i in range(self._encoder_stack):
            # Multi-head-Attention
            attention = self.EncoderMultiHeadAttetions[i]
            attention_input = [encodings, encodings, encodings, masks]
            attention_out = attention(attention_input)
            # Add & Norm
            attention_out += encodings
            attention_out = self.EncoderLayerNorms0[i](attention_out)
            # Feed-Forward
            ff = self.EncoderPositionWiseFeedForwards[i]
            ff_out = ff(attention_out)
            # Add & Norm
            ff_out += attention_out
            encodings = self.EncoderLayerNorms1[i](ff_out)

        return encodings, masks

Example #12

0

Show file

 def call(self, inputs, **kwargs):
     main_input, embedding_matrix = inputs
     input_shape_tensor = K.shape(main_input)
     last_input_dim = K.int_shape(main_input)[-1]
     emb_input_dim, emb_output_dim = K.int_shape(embedding_matrix)
     projected = K.dot(K.reshape(main_input, (-1, last_input_dim)),
                       self.projection)
     if self.add_biases:
         projected = K.bias_add(projected,
                                self.biases,
                                data_format='channels_last')
     if 0 < self.projection_dropout < 1:
         projected = K.in_train_phase(
             lambda: K.dropout(projected, self.projection_dropout),
             projected,
             training=kwargs.get('training'))
     attention = K.dot(projected, K.transpose(embedding_matrix))
     if self.scaled_attention:
         # scaled dot-product attention, described in
         # "Attention is all you need" (https://arxiv.org/abs/1706.03762)
         sqrt_d = K.constant(math.sqrt(emb_output_dim), dtype=K.floatx())
         attention = attention / sqrt_d
     result = K.reshape(
         self.activation(attention),
         (input_shape_tensor[0], input_shape_tensor[1], emb_input_dim))
     return result

Example #13

0

Show file

File: deepfm.py Project: jianjunyue/deepplus

 def call(self, inputs):
     outputs = K.concatenate(inputs, axis=1)
     for i in range(self._n_layers):
         outputs = K.dot(outputs, self.weights[i])
         outputs = self._activation(outputs)
         outputs = K.dropout(outputs, self._dropout_rate)
     outputs = K.dot(outputs, self.output_weight)
     return outputs

Example #14

0

Show file

File: dropout_inference.py Project: slowmoyang/keras4hep

    def call(self, inputs):
        if 0. < self.rate < 1.:
            noise_shape = self._get_noise_shape(inputs)
            outputs = K.dropout(inputs, self.rate, noise_shape, seed=self.seed)
        else:
            outputs = inputs

        return outputs

Example #15

0

Show file

    def __init__(self,
                 dropout=0.2,
                 mc_dropout=0.2,
                 num_classes=1,
                 training=True,
                 input_dim=(224, 224, 3),
                 pooling="avg"):
        self.c = 0.75
        self.lamda = 32
        self.alpha = 0.5
        self.dropout = dropout
        self.mc_dropout = mc_dropout
        self.pooling = pooling
        self.input_dim = input_dim
        self.training = training
        self.num_classes = num_classes

        #create model
        inputs = Input(shape=self.input_dim)
        base_model = EfficientNetB0(include_top=False,
                                    weights='imagenet',
                                    input_tensor=inputs)
        base_model.trainable = True
        x = base_model.output
        x = Dropout(self.dropout, name='top_dropout_1')(x,
                                                        training=self.training)
        if pooling == 'avg':
            x = GlobalAveragePooling2D(name='avg_pool')(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D(name='max_pool')(x)
        x = Dropout(self.dropout, name='top_dropout_2')(x,
                                                        training=self.training)
        x = Dense(512, activation='relu', name='dense_512')(x)
        x = BatchNormalization()(x)
        x = Dropout(self.mc_dropout,
                    name='top_dropout_3')(x, training=self.training)
        x = Lambda(lambda x: K.dropout(x, level=self.mc_dropout))(x)

        #classification head (f)
        f = Dense(self.num_classes, activation='softmax', name='f_head')(x)

        #selection head (g)
        g = Dense(512, activation='relu', name='dense_512_g')(x)
        g = BatchNormalization()(g)
        # this normalization is identical to initialization of batchnorm gamma to 1/10
        g = Lambda(lambda a: a / 10)(g)
        g = Dense(1, activation='sigmoid', name='g_head')(g)

        # auxiliary head (h)
        selective_output = Concatenate(axis=1, name="selective_head")([f, g])

        auxillary_output = Dense(self.num_classes,
                                 activation='softmax',
                                 name='auxilary_head')(x)

        self.model = Model(inputs=inputs,
                           outputs=[selective_output, auxillary_output])

Example #16

0

Show file

    def decoder(self, inputs):
        decoder_inputs, encoder_encodings, encoder_masks = inputs
        if K.dtype(decoder_inputs) != 'int32':
            decoder_inputs = K.cast(decoder_inputs, 'int32')

        decoder_masks = K.equal(decoder_inputs, 0)
        # Embeddings
        embeddings = K.gather(self.embeddings, decoder_inputs)
        embeddings *= self._model_dim**0.5  # Scale
        # Position Encodings
        position_encodings = PositionEncoding(self._model_dim)(embeddings)
        # Embedings + Postion-encodings
        encodings = embeddings + position_encodings
        # Dropout
        encodings = K.dropout(encodings, self._dropout_rate)

        for i in range(self._decoder_stack):
            # Masked-Multi-head-Attention
            masked_attention = MultiHeadAttention(self._n_heads,
                                                  self._model_dim //
                                                  self._n_heads,
                                                  future=True)
            masked_attention_input = [
                encodings, encodings, encodings, decoder_masks
            ]
            masked_attention_out = masked_attention(masked_attention_input)
            # Add & Norm
            masked_attention_out += encodings
            masked_attention_out = LayerNormalization()(masked_attention_out)

            # Multi-head-Attention
            attention = MultiHeadAttention(self._n_heads,
                                           self._model_dim // self._n_heads)
            attention_input = [
                masked_attention_out, encoder_encodings, encoder_encodings,
                encoder_masks
            ]
            attention_out = attention(attention_input)
            # Add & Norm
            attention_out += masked_attention_out
            attention_out = LayerNormalization()(attention_out)

            # Feed-Forward
            ff = PositionWiseFeedForward(self._model_dim,
                                         self._feed_forward_size)
            ff_out = ff(attention_out)
            # Add & Norm
            ff_out += attention_out
            encodings = LayerNormalization()(ff_out)

        # Pre-Softmax 与 Embeddings 共享参数
        linear_projection = K.dot(encodings, K.transpose(self.embeddings))
        outputs = K.softmax(linear_projection)
        return outputs

Example #17

0

Show file

 def call(self, inputs, **kwargs):
     categorical_inputs, numerical_inputs = inputs
     outputs = K.concatenate(categorical_inputs + numerical_inputs, axis=-1)
     for i in range(self._n_layers):
         outputs = K.dot(outputs, self._kernel_weights[i])
         outputs = self._activation(outputs)
         outputs = K.in_train_phase(
             K.dropout(outputs, self._dropout_rate),
             outputs,
         )
     outputs = K.dot(outputs, self._output_weight)
     return outputs

Example #18

0

Show file

 def call(self, inputs):
     # queries: [None, n, k]
     # keys:    [None, n, k]
     # values:  [None, n, k]
     queries, keys, values = inputs
     score = K.batch_dot(queries, tf.transpose(keys,
                                               [0, 2, 1]))  # [None, n, n]
     score = score / int(queries.shape[-1])**0.5  # 缩放
     score = K.softmax(score)  # SoftMax
     score = K.dropout(score, self._dropout)  # dropout
     outputs = K.batch_dot(score, values)  # [None, n, k]
     return outputs

Example #19

0

Show file

File: multihead_attention.py Project: TSLNIHAOGIT/QANet_keras

 def dot_product_attention(self, x, mask=None, dropout=0.1, training=None):
     q, k, v = x
     logits = tf.matmul(q, k, transpose_b=True)  # [bs, 8, len, len]
     if self.bias:
         logits += self.b
     if mask is not None:  # [bs, len]
         mask = tf.expand_dims(mask, axis=1)
         mask = tf.expand_dims(mask, axis=1)  # [bs,1,1,len]
         logits = self.mask_logits(logits, mask)
     weights = tf.nn.softmax(logits, name="attention_weights")
     weights = K.in_train_phase(K.dropout(weights, dropout), weights, training=training)
     x = tf.matmul(weights, v)
     return x

Example #20

0

Show file

File: layer_utils.py Project: dpogoda/LSTM-FCN

def _time_distributed_dense(x,
                            w,
                            b=None,
                            dropout=None,
                            input_dim=None,
                            output_dim=None,
                            timesteps=None,
                            training=None):
    """Apply `y . w + b` for every temporal slice y of x.

    # Arguments
        x: input tensor.
        w: weight matrix.
        b: optional bias vector.
        dropout: wether to apply dropout (same dropout mask
            for every temporal slice of the input).
        input_dim: integer; optional dimensionality of the input.
        output_dim: integer; optional dimensionality of the output.
        timesteps: integer; optional number of timesteps.
        training: training phase tensor or boolean.

    # Returns
        Output tensor.
    """
    if not input_dim:
        input_dim = K.shape(x)[2]
    if not timesteps:
        timesteps = K.shape(x)[1]
    if not output_dim:
        output_dim = K.int_shape(w)[1]

    if dropout is not None and 0. < dropout < 1.:
        # apply the same dropout pattern at every timestep
        ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim)))
        dropout_matrix = K.dropout(ones, dropout)
        expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps)
        x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training)

    # collapse time dimension and batch dimension together
    x = K.reshape(x, (-1, input_dim))
    x = K.dot(x, w)
    if b is not None:
        x = K.bias_add(x, b)
    # reshape to 3D tensor
    if K.backend() == 'tensorflow':
        x = K.reshape(x, K.stack([-1, timesteps, output_dim]))
        x.set_shape([None, None, output_dim])
    else:
        x = K.reshape(x, (-1, timesteps, output_dim))
    return x

Example #21

0

Show file

File: transformer.py Project: lovehoroscoper/DeepRecommend

    def decoder(self, inputs):
        decoder_inputs, encoder_encodings, encoder_masks = inputs
        if K.dtype(decoder_inputs) != 'int32':
            decoder_inputs = K.cast(decoder_inputs, 'int32')

        decoder_masks = K.equal(decoder_inputs, 0)
        # Embeddings
        embeddings = K.gather(self.embeddings, decoder_inputs)
        embeddings *= self._model_dim**0.5  # Scale
        # Position Encodings
        position_encodings = self.DecoderPositionEncoding(embeddings)
        # Embedings + Postion-encodings
        encodings = embeddings + position_encodings
        # Dropout
        encodings = K.dropout(encodings, self._dropout_rate)

        for i in range(self._decoder_stack):
            # Masked-Multi-head-Attention
            masked_attention = self.DecoderMultiHeadAttetions0[i]
            masked_attention_input = [
                encodings, encodings, encodings, decoder_masks
            ]
            masked_attention_out = masked_attention(masked_attention_input)
            # Add & Norm
            masked_attention_out += encodings
            masked_attention_out = self.DecoderLayerNorms0[i](
                masked_attention_out)

            # Multi-head-Attention
            attention = self.DecoderMultiHeadAttetions1[i]
            attention_input = [
                masked_attention_out, encoder_encodings, encoder_encodings,
                encoder_masks
            ]
            attention_out = attention(attention_input)
            # Add & Norm
            attention_out += masked_attention_out
            attention_out = self.DecoderLayerNorms1[i](attention_out)

            # Feed-Forward
            ff = self.DecoderPositionWiseFeedForwards[i]
            ff_out = ff(attention_out)
            # Add & Norm
            ff_out += attention_out
            encodings = self.DecoderLayerNorms2[i](ff_out)

        # Pre-Softmax 与 Embeddings 共享参数
        linear_projection = K.dot(encodings, K.transpose(self.embeddings))
        outputs = K.softmax(linear_projection)
        return outputs

Example #22

0

Show file

 def build(self, input_shape):
     input_dim = input_shape[-1]
     self.kernel = self.add_weight(shape=(input_dim,
                                          self.units * 4 + self.levels * 2),
                                   name='kernel',
                                   initializer='glorot_uniform')
     self.recurrent_kernel = self.add_weight(
         shape=(self.units, self.units * 4 + self.levels * 2),
         name='recurrent_kernel',
         initializer='orthogonal')
     self.bias = self.add_weight(shape=(self.units * 4 + self.levels * 2, ),
                                 name='bias',
                                 initializer='zeros')
     self.built = True
     if self.dropconnect:
         self._kernel = K.dropout(self.kernel, self.dropconnect)
         self._kernel = K.in_train_phase(self._kernel, self.kernel)
         self._recurrent_kernel = K.dropout(self.recurrent_kernel,
                                            self.dropconnect)
         self._recurrent_kernel = K.in_train_phase(self._recurrent_kernel,
                                                   self.recurrent_kernel)
     else:
         self._kernel = self.kernel
         self._recurrent_kernel = self.recurrent_kernel

Example #23

0

Show file

File: learnmultiply_schriftlich_limit_traindata_subnets.py Project: dsmic/LearnMultiplyByHand

    def call(self, x, training=None):
        sel = [xx[:, :, 0:1]
               for xx in x]  #the first enty of every input is the selector
        sel_tensor = K.concatenate(sel)
        sel_drop = K.dropout(
            sel_tensor, self.dropout)  #drop out of selector before softmax
        self.sel_drop_softmax = K.softmax(
            K.in_train_phase(sel_drop, sel_tensor, training=training))

        oo = [
            x[i][:, :, 1:] * self.sel_drop_softmax[:, :, i:i + 1]
            for i in range(len(x))
        ]

        return [tf.add_n(oo), self.sel_drop_softmax
                ]  # you don't need to explicitly define the custom gradient

Example #24

0

Show file

    def call(self, inputs, **kwargs):
        if self.masking:
            assert len(inputs) == 4, "inputs should be set [queries, keys, values, masks]"
            queries, keys, values, masks = inputs
        else:
            assert len(inputs) == 3, "inputs should be set [queries, keys, values]"
            queries, keys, values = inputs

        if K.dtype(queries) != 'float32':
            queries = K.cast(queries, 'float32')

        if K.dtype(keys) != 'float32':
            keys = K.cast(keys, 'float32')

        if K.dtype(values) != 'float32':
            values = K.cast(values, 'float32')

        # (batch_size*n_heads, max_len, head_dim)
        # (batch_size*n_heads, head_dim, max_len)
        # (batch_size*n_heads, max_len, max_len)
        matmul = K.batch_dot(queries, tf.transpose(keys, [0, 2, 1]))  # MatMul
        scaled_matmul = matmul / int(queries.shape[-1]) ** 0.5  # Scale

        if self.masking:
            scaled_matmul = self.mask(scaled_matmul, masks)

        if self.future:
            scaled_matmul = self.future_mask(scaled_matmul)

        softmax_out = K.softmax(scaled_matmul)  # SoftMax
        # TODO: 这里的dropout是做什么的
        # Dropout
        out = K.dropout(softmax_out, self.dropout_rate)
        # TODO: batch_dot的实际意义
        outputs = K.batch_dot(out, values)

        return outputs

Example #25

0

Show file

    def call(self, inputs):
        """
        Q: [h * batch, q_size, d_model]
        K: [h * batch, k_size, d_model]
        V: [h * batch, k_size, d_model]
        mask?: [h * batch, q_size, k_size]

        returns:
        - output: [h * batch, q_size, d_model]
        - attention weights: [h * batch, q_size, k_size]
        """

        Q, K, V = inputs[0], inputs[1], inputs[2]
        if self.use_mask:
            mask = inputs[3]

        out = tf.matmul(Q,
                        tf.transpose(K,
                                     [0, 2, 1]))  # [h * batch, q_size, k_size]
        out = out / np.sqrt(self.d_k)

        if self.use_mask:
            # wherever mask is zero, replace value in tensor by -1e9
            out = tf.multiply(out, mask) + tf.multiply((1.0 - mask), -1e9)

        p_attn = tf.nn.softmax(out, name="attention_weights")

        # https://github.com/tensorflow/tensorflow/blob/r1.12/tensorflow/python/keras/layers/core.py#L136
        # TODO: figure out why `tf.cond` isn't used for implementing the `Dropout` layer.
        # NOTE: tf.cond seems to work without any visible difference, see the 2.0 branch.
        out = tf.contrib.framework.smart_cond(
            Backend.learning_phase(),
            lambda: Backend.dropout(p_attn, self.dropout),
            lambda: tf.identity(p_attn))

        out = tf.matmul(p_attn, V)  # [h * batch, q_size, d_model]
        return [out, p_attn]

Example #26

0

Show file

 def call(self, x, mask=None):
     if 0. < self.rate < 1.:
         noise_shape = self._get_noise_shape(x)
         x = K.dropout(x, self.rate, noise_shape)
     return x

Example #27

0

Show file

 def dropped_inputs():
     return K.dropout(ones, self.recurrent_dropout)

Example #28

0

Show file

 def dropped_inputs():
     return K.dropout(ones, self.dropout)

Example #29

0

Show file

 def x_prime():
     return K.dropout(x, p)

Example #30

0

Show file

 def dropped_inputs():
     return K.dropout(h, self.dropout_rate, K.shape(h))