Esempio n. 1
0
def helper_test_coupling(my_activation, tf_activation, loss, inputs, y_true,
                         units):
    tf.random.set_seed(42)
    tf_layer = Dense(units, activation=tf_activation)
    tf_layer.build(inputs.shape)

    with tf.GradientTape(persistent=True) as tape:
        tape.watch([inputs, *tf_layer.trainable_weights])
        pred_tf = tf_layer(inputs)
        loss = loss(y_true, pred_tf)

    *grads_tf, dY = tape.gradient(
        loss, [inputs, *tf_layer.trainable_weights, pred_tf])

    tf.random.set_seed(42)
    my_layer = NN.Layer(units, my_activation)
    my_layer.build(inputs.shape)

    pred_my = my_layer(inputs)

    dX, [dW, dB] = my_layer.backprop(dY)
    grads_my = [dX, dW, dB]

    assert np.allclose(pred_my, pred_tf)

    assert all(
        np.allclose(grad_my, grad_tf)
        for grad_my, grad_tf in zip(grads_my, grads_tf))
Esempio n. 2
0
class FB_Classifier(Model):
    """Classifier that uses sentence embeddings to classify sentence -> DDC"""
    def __init__(self, input_dim=(1,None), output_dim=None):
        super(self.__class__, self).__init__()
        self.embed = hub.KerasLayer(PARAGRAPH_ENCODER, input_shape=(1,), trainable=TRAIN_ENCODER)
        # self.conv1 = Conv2D(32, 3, activation='relu')
        # self.flatten = Flatten()
        #TODO: den dense part possibly as layer (https://www.tensorflow.org/guide/keras/custom_layers_and_models#the_model_class)
        self.d1 = Dense(ENCODER_OUTDIM, activation='relu')
        self.d2 = Dense(output_dim)

    def __call__(self, inputs, training=True):
        x = self.embed(inputs)  #input shape: (32,)
        x = self.d1(x)          #x shape: (32, 512)
        return self.d2(x)       #x shape: (32, 512)

    def call(self, inputs, training=True):
        return self.__call__(inputs, training=training)

    #TODO: custom loss - see https://www.tensorflow.org/guide/keras/custom_layers_and_models#the_add_loss_method

    def summary(self):
        self.build([]) #inputs.shape == (32,)
        self.d1.build(ENCODER_OUTDIM)
        self.d2.build()
        self.summary()
Esempio n. 3
0
class SpanBegin(Layer):
    def __init__(self, **kwargs):
        super(SpanBegin, self).__init__(**kwargs)

    def build(self, input_shape):
        last_dim = input_shape[0][-1] + input_shape[1][-1]
        input_shape_dense_1 = input_shape[0][:-1] + (last_dim, )
        self.dense_1 = Dense(units=1)
        self.dense_1.build(input_shape_dense_1)
        self.trainable_weights = self.dense_1.trainable_weights
        super(SpanBegin, self).build(input_shape)

    def call(self, inputs):
        merged_context, modeled_passage = inputs
        span_begin_input = K.concatenate([merged_context, modeled_passage])
        span_begin_weights = TimeDistributed(self.dense_1)(span_begin_input)
        span_begin_probabilities = Softmax()(K.squeeze(span_begin_weights,
                                                       axis=-1))
        return span_begin_probabilities

    def compute_output_shape(self, input_shape):
        merged_context_shape, _ = input_shape
        return merged_context_shape[:-1]

    def get_config(self):
        config = super().get_config()
        return config
Esempio n. 4
0
 def vanilla_export(self):
     self._kwargs["name"] = self.name
     layer = Dense(self.units, self.activation, self.use_bias,
                   "glorot_uniform", "zeros", None, None, None, None, None,
                   **self._kwargs)
     layer.build(self.input_shape)
     layer.kernel.assign(self.kernel.numpy() * self._get_coef())
     if self.use_bias:
         layer.bias.assign(self.bias.numpy())
     return layer
Esempio n. 5
0
class CumulativeSetAttentionLayer(tf.keras.layers.Layer):
    dense_options = {'activation': 'relu', 'kernel_initializer': 'he_uniform'}

    def __init__(self,
                 n_layers=2,
                 width=128,
                 latent_width=128,
                 aggregation_function='mean',
                 dot_prod_dim=64,
                 n_heads=4,
                 attn_dropout=0.3):
        super().__init__()
        assert aggregation_function == 'mean'
        self.width = width
        self.dot_prod_dim = dot_prod_dim
        self.attn_dropout = attn_dropout
        self.n_heads = n_heads
        self.psi = build_dense_dropout_model(n_layers, width, 0.,
                                             self.dense_options)
        self.psi.add(Dense(latent_width, **self.dense_options))
        self.rho = Dense(latent_width, **self.dense_options)

    def build(self, input_shape):
        self.psi.build(input_shape)
        encoded_shape = self.psi.compute_output_shape(input_shape)
        self.rho.build(encoded_shape)
        self.W_k = self.add_weight('W_k', (encoded_shape[-1] + input_shape[-1],
                                           self.dot_prod_dim * self.n_heads),
                                   initializer='he_uniform')
        self.W_q = self.add_weight('W_q', (self.n_heads, self.dot_prod_dim),
                                   initializer=tf.keras.initializers.Zeros())

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.n_heads)

    def call(self, inputs, segment_ids, training=None):
        if training is None:
            training = tf.keras.backend.learning_phase()

        encoded = self.psi(inputs)

        # cumulative mean aggregation
        agg = cumulative_segment_mean(encoded, segment_ids)
        agg = self.rho(agg)

        combined = tf.concat([inputs, agg], axis=-1)
        keys = tf.matmul(combined, self.W_k)
        keys = tf.stack(tf.split(keys, self.n_heads, -1), 1)
        keys = tf.expand_dims(keys, axis=2)
        # should have shape (el, heads, 1, dot_prod_dim)
        queries = tf.expand_dims(tf.expand_dims(self.W_q, -1), 0)
        # should have shape (1, heads, dot_prod_dim, 1)
        preattn = tf.matmul(keys, queries) / tf.sqrt(float(self.dot_prod_dim))
        preattn = tf.squeeze(tf.squeeze(preattn, -1), -1)
        return preattn
    def build(config, classes, softmax=True, scale_adjust_wb=None):
        model = Sequential()
        input_shape = config.input_shape

        # 1. CONV => RELU => BN => POOL
        model.add(
            Conv2D(48, (7, 7), activation='relu', input_shape=input_shape))
        # model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2, 2)))

        # 2. CONV => RELU => BN => POOL
        model.add(Conv2D(96, (5, 5), activation='relu'))
        # model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size=(2, 2)))

        # 3. CONV => RELU => BN => POOL
        model.add(Conv2D(128, (3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))

        # 4. CONV => RELU => BN => POOL
        model.add(Conv2D(96, (3, 3), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))

        # 5. CONV => RELU => BN => POOL
        model.add(Conv2D(64, (1, 1), activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2)))
        model.add(Dropout(0.3))
        model.add(Flatten())

        model.add(Dense(256, activation='relu'))
        # model.add(BatchNormalization())
        model.add(Dropout(0.5))

        model.add(Dense(16, activation='relu'))

        # final layer
        model.add(Dense(classes))
        if softmax:
            model.add(Activation("softmax"))
        if scale_adjust_wb is not None:
            # The below line doesn't save/load well as this is a custom object. Thus replaced by dense layer
            # model.add(Lambda(lambda x: scale_adjust_wb[0] * x + scale_adjust_wb[1]))
            input_shape = (None, classes)
            scale_layer = Dense(
                classes,
                trainable=False,
                input_shape=input_shape,
            )
            scale_layer.build(input_shape=input_shape)
            scale_layer.set_weights(
                [np.diag(scale_adjust_wb[0]), scale_adjust_wb[1]])
            model.add(scale_layer)

        # return the constructed network architecture
        return model
Esempio n. 7
0
 def begin_insert_layer(self, layer_dim):
     # `self.layers[0].get_weights()` -> [weights, bias]
     next_units = self.layers[0].get_weights()[0].shape[0]
     layer = Dense(
         units=next_units,
         activation=tf.nn.relu,
         kernel_regularizer=regularizers.l1_l2(l1=self.l1, l2=self.l2),
         kernel_initializer=initializers.GlorotNormal(seed=self.seed),
         bias_initializer=initializers.Zeros())
     layer.build(input_shape=(None, layer_dim))
     self.layers.insert(0, layer)
Esempio n. 8
0
    def build(self, input_shape):
        """Creates the layer neurons and connections.

        Parameters
        ----------

        input_shape: Union[list, tuple, Any]
            Keras tensor (future input to layer) or list/tuple of Keras tensors
            to reference for weight shape computations.
        """

        Dense.build(self, input_shape)
        self.init_neurons(input_shape)
 def _create_dense_layer(self, _, normalized_weights, num_classes):
     input_shape = tf.TensorShape([None, 512])
     dense_layer = Dense(
         input_shape=(512, ),
         units=num_classes,
         use_bias=False,
         name='fully_connected_to_softmax_crossentropy',
         dtype='float32',
         trainable=False,
     )
     dense_layer.build(input_shape)
     dense_layer.set_weights([normalized_weights.read_value()])
     return dense_layer
Esempio n. 10
0
class SpanEnd(Layer):
    def __init__(self, **kwargs):
        super(SpanEnd, self).__init__(**kwargs)

    def build(self, input_shape):
        emdim = input_shape[0][-1] // 2
        input_shape_bilstm_1 = input_shape[0][:-1] + (emdim * 14, )
        self.bilstm_1 = Bidirectional(LSTM(emdim, return_sequences=True))
        self.bilstm_1.build(input_shape_bilstm_1)
        input_shape_dense_1 = input_shape[0][:-1] + (emdim * 10, )
        self.dense_1 = Dense(units=1)
        self.dense_1.build(input_shape_dense_1)
        self.trainable_weights = self.bilstm_1.trainable_weights + self.dense_1.trainable_weights
        super(SpanEnd, self).build(input_shape)

    def call(self, inputs):
        encoded_passage, merged_context, modeled_passage, span_begin_probabilities = inputs
        weighted_sum = K.sum(
            K.expand_dims(span_begin_probabilities, axis=-1) * modeled_passage,
            -2)
        passage_weighted_by_predicted_span = K.expand_dims(weighted_sum,
                                                           axis=1)
        tile_shape = K.concatenate([[1], [K.shape(encoded_passage)[1]], [1]],
                                   axis=0)
        passage_weighted_by_predicted_span = K.tile(
            passage_weighted_by_predicted_span, tile_shape)
        multiply1 = modeled_passage * passage_weighted_by_predicted_span
        span_end_representation = K.concatenate([
            merged_context, modeled_passage,
            passage_weighted_by_predicted_span, multiply1
        ])

        span_end_representation = self.bilstm_1(span_end_representation)

        span_end_input = K.concatenate(
            [merged_context, span_end_representation])

        span_end_weights = TimeDistributed(self.dense_1)(span_end_input)

        span_end_probabilities = Softmax()(K.squeeze(span_end_weights,
                                                     axis=-1))
        return span_end_probabilities

    def compute_output_shape(self, input_shape):
        _, merged_context_shape, _, _ = input_shape
        return merged_context_shape[:-1]

    def get_config(self):
        config = super().get_config()
        return config
    def build(self, input_shape):
        """Creates the layer neurons and connections.

        Parameters
        ----------

        input_shape: Union[list, tuple, Any]
            Keras tensor (future input to layer) or list/tuple of Keras tensors
            to reference for weight shape computations.
        """

        Dense.build(self, input_shape)
        self.init_neurons(input_shape.as_list())

        if self.config.getboolean('cell', 'bias_relaxation'):
            self.update_b()
Esempio n. 12
0
class Highway(Layer):

    activation = None
    transform_gate_bias = None

    def __init__(self, activation='relu', transform_gate_bias=-1, **kwargs):
        self.activation = activation
        self.transform_gate_bias = transform_gate_bias
        super(Highway, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        dim = input_shape[-1]
        transform_gate_bias_initializer = Constant(self.transform_gate_bias)
        input_shape_dense_1 = input_shape[-1]
        self.dense_1 = Dense(units=dim,
                             bias_initializer=transform_gate_bias_initializer)
        self.dense_1.build(input_shape)
        self.dense_2 = Dense(units=dim)
        self.dense_2.build(input_shape)
        self.trainable_weights = self.dense_1.trainable_weights + self.dense_2.trainable_weights

        super(Highway,
              self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        dim = K.int_shape(x)[-1]
        transform_gate = self.dense_1(x)
        transform_gate = Activation("sigmoid")(transform_gate)
        carry_gate = Lambda(lambda x: 1.0 - x,
                            output_shape=(dim, ))(transform_gate)
        transformed_data = self.dense_2(x)
        transformed_data = Activation(self.activation)(transformed_data)
        transformed_gated = Multiply()([transform_gate, transformed_data])
        identity_gated = Multiply()([carry_gate, x])
        value = Add()([transformed_gated, identity_gated])
        return value

    def compute_output_shape(self, input_shape):
        return input_shape

    def get_config(self):
        config = super().get_config()
        config['activation'] = self.activation
        config['transform_gate_bias'] = self.transform_gate_bias
        return config
Esempio n. 13
0
class FullyConnected(ModuleNative):
    def __init__(self, out_features: int, activation=None):
        """
        A simple fully connected layer (aka Linear Layer or Dense).

        It computes Wx+b with optional activation funciton.

        :param out_features: The number of output features.
        :param activation: The activation function that should be added after the fc layer.
        """
        super().__init__()
        self.out_features = out_features
        self.activation = Activation(activation)

    @RunOnlyOnce
    def _build_pytorch(self, features):
        import torch
        from babilim.core.tensor_pt import Tensor as _Tensor
        in_features = features.shape[-1]
        self.linear = torch.nn.Linear(in_features, self.out_features)
        self.weight = _Tensor(data=None,
                              trainable=True,
                              native=self.linear.weight)
        self.bias = _Tensor(data=None, trainable=True, native=self.linear.bias)
        if torch.cuda.is_available():
            self.linear = self.linear.to(torch.device(
                "cuda"))  # FIXME shouldn't this be done automatically?

    def _call_pytorch(self, features):
        return self.activation(self.linear(features))

    @RunOnlyOnce
    def _build_tf(self, features):
        from tensorflow.keras.layers import Dense
        from babilim.core.tensor_tf import Tensor as _Tensor
        self.linear = Dense(self.out_features)
        self.linear.build(features.shape)
        self.weight = _Tensor(data=None,
                              trainable=True,
                              native=self.linear.kernel)
        self.bias = _Tensor(data=None, trainable=True, native=self.linear.bias)

    def _call_tf(self, features):
        return self.activation(self.linear(features))
Esempio n. 14
0
    def wider(self, added_size=1, pos_layer=None):
        layers_size = len(self.layers)
        if layers_size < 2:
            raise ValueError("Number of layer must be greater than 2.")
        if pos_layer is None:
            pos_layer = max(layers_size - 2, 0)
        elif pos_layer >= layers_size - 1 or pos_layer < 0:
            raise ValueError(
                f"pos_layer is expected less than length of layers (pos_layer in [0, layers_size-2])"
            )

        # TODO: get biggest value to divide for new weights
        weights, bias = self.layers[pos_layer].get_weights()
        weights_next_layer, bias_next_layer = self.layers[pos_layer +
                                                          1].get_weights()

        new_weights, new_bias, new_weights_next_layer = net2wider(
            weights, bias, weights_next_layer, added_size)

        src_units, des_units = weights.shape[0], weights.shape[1] + added_size
        next_des_units = weights_next_layer.shape[1]

        wider_layer = Dense(units=des_units,
                            activation=tf.nn.relu,
                            kernel_regularizer=regularizers.l1_l2(l1=self.l1,
                                                                  l2=self.l2))

        # input_shape = (batch_size, input_features).
        # input_features = number of units in layer = length(layer) = output of previous layer
        wider_layer.build(input_shape=(None, src_units))
        wider_layer.set_weights([new_weights, new_bias])

        next_layer = Dense(units=next_des_units,
                           activation=tf.nn.relu,
                           kernel_regularizer=regularizers.l1_l2(l1=self.l1,
                                                                 l2=self.l2))
        next_layer.build(input_shape=(None, des_units))
        next_layer.set_weights([new_weights_next_layer, bias_next_layer])

        self.layers[pos_layer] = wider_layer
        self.layers[pos_layer + 1] = next_layer
Esempio n. 15
0
class Attention(Layer):
    """
    Implementing attention Layer.
    References:
        1.  https://androidkt.com/text-classification-using-attention-mechanism-in-keras/
    """
    def __init__(self, units=256, **kwargs):
        self.W1 = Dense(units, use_bias=False)
        self.W2 = Dense(units, use_bias=False)
        self.V = Dense(1, use_bias=False)
        super(Attention, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W1.build(input_shape[0])
        self.W2.build((input_shape[1][0], 1, input_shape[1][1]))
        self.V.build(input_shape[0])
        self.trainable_weights.append(self.W1.trainable_weights)
        self.trainable_weights.append(self.W2.trainable_weights)
        self.trainable_weights.append(self.V.trainable_weights)
        super(Attention, self).build(input_shape)
        self.built = True

    def call(self, inputs, mask=None):
        features, hidden = inputs
        hidden_with_time_axis = tf.expand_dims(hidden, 1)
        score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))
        attention_weights = tf.nn.softmax(self.V(score), axis=1)
        context_vector = attention_weights * features
        return tf.reshape(context_vector,
                          tf.shape(features)), attention_weights

    def compute_mask(self, inputs, mask=None):
        return mask
    def build(self, input_shape) -> None:
        """Build the Graph Convolution layer.

        Parameters
        ------------------------------
        input_shape
            Shape of the output of the previous layer.
        """
        if len(input_shape) == 0:
            raise ValueError(
                "The provided input of the Graph Convolution layer "
                "is empty. It should contain exactly two elements, "
                "the adjacency matrix and the node features.")

        if len(input_shape) == 1:
            raise ValueError(
                "The provided input of the Graph Convolution layer "
                "has a single element. It should contain exactly two elements, "
                "the adjacency matrix and the node features.")
        for node_feature_shape in input_shape[1:]:
            dense_layer = Dense(
                units=self._units,
                activation=self._activation,
            )
            dense_layer.build(node_feature_shape)
            self._dense_layers.append(dense_layer)

        if self._dropout_rate is not None:
            self._dropout_layer = Dropout(self._dropout_rate)
            self._dropout_layer.build(node_feature_shape)
        else:
            self._dropout_layer = lambda x: x

        if self._apply_norm:
            self._l2_norm = L2Norm()
            self._l2_norm.build(node_feature_shape)
        else:
            self._l2_norm = lambda x: x

        super().build(input_shape)
Esempio n. 17
0
    def deeper(self, pos_layer=None):
        layers_size = len(self.layers)
        if pos_layer is None:
            pos_layer = max(layers_size - 2, 0)
        elif pos_layer >= layers_size - 1 or pos_layer < 0:
            raise ValueError(
                f"pos_layer is expected less than length of layers (pos_layer in [0, layers_size-2])."
            )

        weights, bias = self.layers[pos_layer].get_weights()
        new_weights, new_bias = net2deeper(weights)
        des_units = weights.shape[1]
        # TODO: add initial kernel
        layer = Dense(
            units=des_units,
            activation=tf.nn.relu,
            kernel_regularizer=regularizers.l1_l2(l1=self.l1, l2=self.l2),
        )
        layer.build(input_shape=(None, des_units))
        layer.set_weights([new_weights, new_bias])

        self.layers.insert(pos_layer + 1, layer)
Esempio n. 18
0
    def last_insert_layer(self, layer_dim):
        prev_weights, prev_bias = self.layers[len(self.layers) -
                                              1].get_weights()
        prev_units = prev_weights.shape[1]

        replace_prev_layer = Dense(
            units=prev_units,
            activation=tf.nn.relu,
            kernel_regularizer=regularizers.l1_l2(l1=self.l1, l2=self.l2),
        )
        replace_prev_layer.build(input_shape=(None, prev_weights.shape[0]))
        replace_prev_layer.set_weights([prev_weights, prev_bias])

        added_layer = Dense(
            units=layer_dim,
            activation=tf.nn.sigmoid,
            kernel_regularizer=regularizers.l1_l2(l1=self.l1, l2=self.l2),
            kernel_initializer=initializers.GlorotNormal(seed=self.seed),
            bias_initializer=initializers.Zeros())
        added_layer.build(input_shape=(None, prev_units))

        del self.layers[len(self.layers) - 1]
        self.layers.append(replace_prev_layer)
        self.layers.append(added_layer)
Esempio n. 19
0
    def __init__(self,
                 input_dim,
                 output_dim=2,
                 hidden_dims=None,
                 l1=0.01,
                 l2=0.01,
                 seed=6):
        super(PartCoder, self).__init__()
        self.l1 = l1
        self.l2 = l2
        self.seed = seed
        # self.layers = NoDependency([])
        # self.__dict__['layers'] = []
        self.layers = []

        _input_dim = input_dim
        for i, dim in enumerate(hidden_dims):
            layer = Dense(
                units=dim,
                activation=tf.nn.relu,
                kernel_regularizer=regularizers.l1_l2(l1=self.l1, l2=self.l2),
                kernel_initializer=initializers.GlorotNormal(seed=self.seed),
                bias_initializer=initializers.Zeros())
            layer.build(input_shape=(None, _input_dim))
            _input_dim = dim
            self.layers.append(layer)

        # Final, adding output_layer (latent/reconstruction layer)
        layer = Dense(units=output_dim,
                      activation=tf.nn.sigmoid,
                      kernel_regularizer=regularizers.l1_l2(l1=self.l1,
                                                            l2=self.l2),
                      kernel_initializer=initializers.GlorotNormal(seed=6),
                      bias_initializer=initializers.Zeros())
        layer.build(input_shape=(None, _input_dim))
        self.layers.append(layer)
Esempio n. 20
0
class MDN(Layer):
    """A Mixture Density Network Layer for Keras.
    This layer has a few tricks to avoid NaNs in the loss function when training:
        - Activation for variances is ELU + 1 + 1e-8 (to avoid very small values)
        - Mixture weights (pi) are trained in as logits, not in the softmax space.

    A loss function needs to be constructed with the same output dimension and number of mixtures.
    A sampling function is also provided to sample from distribution parametrised by the MDN outputs.
    """
    def __init__(self, output_dimension, num_mixtures, **kwargs):
        self.output_dim = output_dimension
        self.num_mix = num_mixtures

        with tf.name_scope('MDN'):
            self.mdn_mus = Dense(
                self.num_mix * self.output_dim,
                name='mdn_mus',
                activation='sigmoid')  # mix*output vals, no activation
            self.mdn_sigmas = Dense(
                self.num_mix * self.output_dim,
                activation=elu_plus_one_plus_epsilon,
                name='mdn_sigmas')  # mix*output vals exp activation
            self.mdn_pi = Dense(self.num_mix,
                                name='mdn_pi',
                                activation='softmax')  # mix vals, logits
        super(MDN, self).__init__(**kwargs)

    def build(self, input_shape):
        self.mdn_mus.build(input_shape)
        self.mdn_sigmas.build(input_shape)
        self.mdn_pi.build(input_shape)
        self._trainable_weights = self.mdn_mus.trainable_weights + self.mdn_sigmas.trainable_weights + self.mdn_pi.trainable_weights
        self._non_trainable_weights = self.mdn_mus.non_trainable_weights + self.mdn_sigmas.non_trainable_weights + self.mdn_pi.non_trainable_weights
        super(MDN, self).build(input_shape)

    def call(self, x, mask=None):
        with tf.name_scope('MDN'):
            mdn_out = keras.layers.concatenate(
                [self.mdn_mus(x),
                 self.mdn_sigmas(x),
                 self.mdn_pi(x)],
                name='mdn_outputs')
        return mdn_out

    def compute_output_shape(self, input_shape):
        """Returns output shape, showing the number of mixture parameters."""
        return (input_shape[0],
                (2 * self.output_dim * self.num_mix) + self.num_mix)

    def get_config(self):
        config = {
            "output_dimension": self.output_dim,
            "num_mixtures": self.num_mix
        }
        base_config = super(MDN, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
Esempio n. 21
0
class SnailAttention(Layer):
    """
    Based on work of Mishra et al., 2018 https://openreview.net/pdf?id=B1DmUzWAW
    Adopting code from https://github.com/philipperemy/keras-snail-attention/blob/master/attention.py
    """
    def __init__(self, dims, k_size, v_size, seq_len=None, **kwargs):
        self.k_size = k_size
        self.seq_len = seq_len
        self.v_size = v_size
        self.dims = dims
        self.sqrt_k = math.sqrt(k_size)
        self.keys_fc = None
        self.queries_fc = None
        self.values_fc = None
        super(SnailAttention, self).__init__(**kwargs)

    def build(self, input_shape):
        # https://stackoverflow.com/questions/54194724/how-to-use-keras-layers-in-custom-keras-layer
        self.keys_fc = Dense(self.k_size, name="Keys_SnailAttn")
        self.keys_fc.build((None, self.dims))
        self._trainable_weights.extend(self.keys_fc.trainable_weights)

        self.queries_fc = Dense(self.k_size, name="Queries_SnailAttn")
        self.queries_fc.build((None, self.dims))
        self._trainable_weights.extend(self.queries_fc.trainable_weights)

        self.values_fc = Dense(self.v_size, name="Values_SnailAttn")
        self.values_fc.build((None, self.dims))
        self._trainable_weights.extend(self.values_fc.trainable_weights)
        #super(SnailAttention, self).__init__(**kwargs)

    def __call__(self, inputs, **kwargs):

        if not self.built:
            self._maybe_build(inputs)

        # check that the implementation matches exactly py torch.
        keys = self.keys_fc(inputs)
        queries = self.queries_fc(inputs)
        values = self.values_fc(inputs)
        logits = K.batch_dot(queries, K.permute_dimensions(keys, (0, 2, 1)))
        mask = K.ones_like(logits) * np.triu(
            (-np.inf) * np.ones(logits.shape.as_list()[1:]), k=1)
        logits = mask + logits
        probs = Softmax(axis=-1,
                        name="Softmax_SnailAttn")(logits / self.sqrt_k)
        read = K.batch_dot(probs, values)
        output = K.concatenate([inputs, read], axis=-1)
        return output

    def compute_output_shape(self, input_shape):
        output_shape = list(input_shape)
        output_shape[-1] += self.v_size
        return tuple(output_shape)
Esempio n. 22
0
class Attention(Layer):
    """
    Layer for implementing two common types of attention mechanisms, i) global (soft) attention
    and ii) local (hard) attention, for two types of sequence tasks, i) many-to-one and
    ii) many-to-many.

    The setting use_bias=False converts the Dense() layers into annotation weight matrices. Softmax
    activation ensures that all weights sum up to 1. Read more here to make more sense of the code
    and implementations:
    i)   https://www.tensorflow.org/beta/tutorials/text/nmt_with_attention
    ii)  https://github.com/philipperemy/keras-attention-mechanism/issues/14
    iii) https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html

    SUGGESTION: If model doesn't converge or the test accuracy is lower than expected, try playing
    around with the hidden size of the recurrent layers, the batch size in training process, or the
    param @window_width if using a 'local' attention.

    NOTE: This implementation takes the hidden states associated with the last timestep of the input
    sequence as the target hidden state (h_t) as suggested by @felixhao28 in i) for many-to-one
    scenarios. Hence, when trying to predict what word (token) comes after sequence ['I', 'love',
    'biscuits', 'and'], we take h('and') with shape (1, H) as the target hidden state. For
    many-to-many scenarios, it takes the hidden state associated with the timestep that is being
    currently iterated in the target sequence, usually by a decoder-like architecture.

    @param (str) context: the context of the problem at hand, specify 'many-to-many' for
           sequence-to-sequence tasks such as machine translation and question answering, or
           specify 'many-to-one' for tasks such as sentiment classification and language modelling
    @param (str) alignment_type: type of attention mechanism to be applied, 'local-m' corresponds to
           monotonic alignment where we take the last @window_width timesteps, 'local-p' corresponds
           to having a Gaussian distribution around the predicted aligned position, whereas
           'local-p*' corresponds to the newly proposed method to adaptively learning the unique
           timesteps to give attention (currently only works for many-to-one scenarios)
    @param (int) window_width: width for set of source hidden states in 'local' attention
    @param (str) score_function: alignment score function config; current implementations include
           the 'dot', 'general', and 'location' both by Luong et al. (2015), 'concat' by Bahdanau et
           al. (2015), and 'scaled_dot' by Vaswani et al. (2017)
    @param (str) model_api: specify to use TF's Sequential OR Functional API, note that attention
           weights are not outputted with the former as it only accepts single-output layers
    """
    def __init__(self, context='many-to-many', alignment_type='global', window_width=None,
                 score_function='general', model_api='functional', **kwargs):
        if context not in ['many-to-many', 'many-to-one']:
            raise ValueError("Argument for param @context is not recognized")
        if alignment_type not in ['global', 'local-m', 'local-p', 'local-p*']:
            raise ValueError("Argument for param @alignment_type is not recognized")
        if alignment_type == 'global' and window_width is not None:
            raise ValueError("Can't use windowed approach with global attention")
        if context == 'many-to-many' and alignment_type == 'local-p*':
            raise ValueError("Can't use local-p* approach in many-to-many scenarios")
        if score_function not in ['dot', 'general', 'location', 'concat', 'scaled_dot']:
            raise ValueError("Argument for param @score_function is not recognized")
        if model_api not in ['sequential', 'functional']:
            raise ValueError("Argument for param @model_api is not recognized")
        super(Attention, self).__init__(**kwargs)
        self.context = context
        self.alignment_type = alignment_type
        self.window_width = window_width  # D
        self.score_function = score_function
        self.model_api = model_api

    def get_config(self):
        base_config = super(Attention, self).get_config()
        base_config['alignment_type'] = self.alignment_type
        base_config['window_width'] = self.window_width
        base_config['score_function'] = self.score_function
        base_config['model_api'] = self.model_api
        return base_config

    def build(self, input_shape):
        # Declare attributes for easy access to dimension values
        if self.context == 'many-to-many':
            self.input_sequence_length, self.hidden_dim = input_shape[0][1], input_shape[0][2]
            self.target_sequence_length = input_shape[1][1]
        elif self.context == 'many-to-one':
            self.input_sequence_length, self.hidden_dim = input_shape[0][1], input_shape[0][2]

        # Build weight matrices for different alignment types and score functions
        if 'local-p' in self.alignment_type:
            self.W_p = Dense(units=self.hidden_dim, use_bias=False)
            self.W_p.build(input_shape=(None, None, self.hidden_dim))                               # (B, 1, H)
            self._trainable_weights += self.W_p.trainable_weights

            self.v_p = Dense(units=1, use_bias=False)
            self.v_p.build(input_shape=(None, None, self.hidden_dim))                               # (B, 1, H)
            self._trainable_weights += self.v_p.trainable_weights

        if 'dot' not in self.score_function:  # weight matrix not utilized for 'dot' function
            self.W_a = Dense(units=self.hidden_dim, use_bias=False)
            self.W_a.build(input_shape=(None, None, self.hidden_dim))                               # (B, S*, H)
            self._trainable_weights += self.W_a.trainable_weights

        if self.score_function == 'concat':  # define additional weight matrices
            self.U_a = Dense(units=self.hidden_dim, use_bias=False)
            self.U_a.build(input_shape=(None, None, self.hidden_dim))                               # (B, 1, H)
            self._trainable_weights += self.U_a.trainable_weights

            self.v_a = Dense(units=1, use_bias=False)
            self.v_a.build(input_shape=(None, None, self.hidden_dim))                               # (B, S*, H)
            self._trainable_weights += self.v_a.trainable_weights

        super(Attention, self).build(input_shape)

    def call(self, inputs):
        # Pass decoder output (prev. timestep) alongside encoder output for all scenarios
        if not isinstance(inputs, list):
            raise ValueError("Pass a list=[encoder_out (Tensor), decoder_out (Tensor)," +
                             "current_timestep (int)] for all scenarios")

        # Specify source and target states (and timestep if applicable) for easy access
        if self.context == 'many-to-one':
            # Get h_t, the current (target) hidden state as the last timestep of input sequence
            target_hidden_state = inputs[1]                                                         # (B, H)
            source_hidden_states = inputs[0]                                                        # (B, S, H)
        elif self.context == 'many-to-many':
            # Get h_t, the current (target) hidden state from the previous decoded hidden state
            target_hidden_state = inputs[1]                                                         # (B, H)
            current_timestep = inputs[2]
            source_hidden_states = inputs[0]                                                        # (B, S, H)

        # Add time axis to h_t
        target_hidden_state = tf.expand_dims(input=target_hidden_state, axis=1)                     # (B, 1, H)

        # Get h_s, source hidden states through specified attention mechanism
        if self.alignment_type == 'global':                                                         # Global Approach
            source_hidden_states = source_hidden_states                                             # (B, S, H)

        elif 'local' in self.alignment_type:                                                        # Local Approach
            # Automatically set window width to default value (8 -> no real logic behind this value)
            self.window_width = 8 if self.window_width is None else self.window_width

            # Get aligned position (between inputs & targets) and derive a context window to focus
            if self.alignment_type == 'local-m':                                                    # Monotonic Alignment
                # Set alignment position
                if self.context == 'many-to-one':
                    aligned_position = self.input_sequence_length
                elif self.context == 'many-to-many':
                    aligned_position = current_timestep
                # Get window borders
                left = int(aligned_position - self.window_width
                           if aligned_position - self.window_width >= 0
                           else 0)
                right = int(aligned_position + self.window_width
                            if aligned_position + self.window_width <= self.input_sequence_length
                            else self.input_sequence_length)
                # Extract window window
                source_hidden_states = Lambda(lambda x: x[:, left:right, :])(source_hidden_states)  # (B, S*=(D, 2xD), H)

            elif self.alignment_type == 'local-p':                                                  # Predictive Alignment
                aligned_position = self.W_p(target_hidden_state)                                    # (B, 1, H)
                aligned_position = Activation('tanh')(aligned_position)                             # (B, 1, H)
                aligned_position = self.v_p(aligned_position)                                       # (B, 1, 1)
                aligned_position = Activation('sigmoid')(aligned_position)                          # (B, 1, 1)
                aligned_position = aligned_position * self.input_sequence_length                    # (B, 1, 1)

            elif self.alignment_type == 'local-p*':                                                 # Completely Predictive Alignment
                aligned_position = self.W_p(source_hidden_states)                                   # (B, S, H)
                aligned_position = Activation('tanh')(aligned_position)                             # (B, S, H)
                aligned_position = self.v_p(aligned_position)                                       # (B, S, 1)
                aligned_position = Activation('sigmoid')(aligned_position)                          # (B, S, 1)
                # Only keep top D values out of the sigmoid activation, and zero-out the rest
                aligned_position = tf.squeeze(aligned_position, axis=-1)                            # (B, S)
                top_probabilities = tf.nn.top_k(input=aligned_position,                             # (values:(B, D), indices:(B, D))
                                                k=self.window_width,
                                                sorted=False)
                onehot_vector = tf.one_hot(indices=top_probabilities.indices,
                                           depth=self.input_sequence_length)                        # (B, D, S)
                onehot_vector = tf.reduce_sum(onehot_vector, axis=1)                                # (B, S)
                aligned_position = Multiply()([aligned_position, onehot_vector])                    # (B, S)
                aligned_position = tf.expand_dims(aligned_position, axis=-1)                        # (B, S, 1)
                initial_source_hidden_states = source_hidden_states                                 # (B, S, 1)
                source_hidden_states = Multiply()([source_hidden_states, aligned_position])         # (B, S*=S(D), H)
                # Scale back-to approximately original hidden state values
                aligned_position += tf.keras.backend.epsilon()                                      # (B, S, 1)
                source_hidden_states /= aligned_position                                            # (B, S*=S(D), H)
                source_hidden_states = initial_source_hidden_states + source_hidden_states          # (B, S, H)

        # Compute alignment score through specified function
        if 'dot' in self.score_function:                                                            # Dot Score Function
            attention_score = Dot(axes=[2, 2])([source_hidden_states, target_hidden_state])         # (B, S*, 1)
            if self.score_function == 'scaled_dot':
                attention_score *= 1 / np.sqrt(float(source_hidden_states.shape[2]))                # (B, S*, 1)

        elif self.score_function == 'general':                                                      # General Score Function
            weighted_hidden_states = self.W_a(source_hidden_states)                                 # (B, S*, H)
            attention_score = Dot(axes=[2, 2])([weighted_hidden_states, target_hidden_state])       # (B, S*, 1)

        elif self.score_function == 'location':                                                     # Location-based Score Function
            weighted_target_state = self.W_a(target_hidden_state)                                   # (B, 1, H)
            attention_score = Activation('softmax')(weighted_target_state)                          # (B, 1, H)
            attention_score = RepeatVector(source_hidden_states.shape[1])(attention_score)          # (B, S*, H)
            attention_score = tf.reduce_sum(attention_score, axis=-1)                               # (B, S*)
            attention_score = tf.expand_dims(attention_score, axis=-1)                              # (B, S*, 1)

        elif self.score_function == 'concat':                                                       # Concat Score Function
            weighted_hidden_states = self.W_a(source_hidden_states)                                 # (B, S*, H)
            weighted_target_state = self.U_a(target_hidden_state)                                   # (B, 1, H)
            weighted_sum = weighted_hidden_states + weighted_target_state                           # (B, S*, H)
            weighted_sum = Activation('tanh')(weighted_sum)                                         # (B, S*, H)
            attention_score = self.v_a(weighted_sum)                                                # (B, S*, 1)

        # Compute attention weights
        attention_weights = Activation('softmax')(attention_score)                                  # (B, S*, 1)

        # Distribute weights around aligned position for local-p approach only
        if self.alignment_type == 'local-p':                                                        # Gaussian Distribution
            gaussian_estimation = lambda s: tf.exp(-tf.square(s - aligned_position) /
                                                   (2 * tf.square(self.window_width / 2)))
            gaussian_factor = gaussian_estimation(0)
            for i in range(1, self.input_sequence_length):
                gaussian_factor = Concatenate(axis=1)([gaussian_factor, gaussian_estimation(i)])    # (B, S*, 1)
            attention_weights = attention_weights * gaussian_factor                                 # (B, S*, 1)

        # Derive context vector
        context_vector = source_hidden_states * attention_weights                                   # (B, S*, H)

        if self.model_api == 'functional':
            return context_vector, attention_weights
        elif self.model_api == 'sequential':
            return context_vector
Esempio n. 23
0
class SRFR(Model):
    def __init__(
            self,
            num_filters: int = 62,
            depth: int = 50,
            categories: int = 512,
            num_gc: int = 32,
            num_blocks: int = 23,
            residual_scailing: float = 0.2,
            training: bool = True,
            input_shape=(28, 28, 3),
            num_classes_syn: int = None,
            both: bool = False,
            num_classes_nat: int = None,
            scale: int = 64,
        ):
        super(SRFR, self).__init__()
        self._training = training
        self.scale = scale
        if both:
            self._natural_input = Conv2D(
                input_shape=input_shape,
                filters=num_filters,
                kernel_size=(3, 3),
                strides=1,
                padding='same',
                name='natural_input',
                activation=mish,
            )
        self._synthetic_input = Conv2D(
            input_shape=input_shape,
            filters=num_filters,
            kernel_size=(3, 3),
            strides=1,
            padding='same',
            name='synthetic_input',
            activation=mish,
        )
        self._super_resolution = GeneratorNetwork(
            num_filters,
            num_gc,
            num_blocks,
            residual_scailing,
        )
        self._face_recognition = ResNet(
            depth,
            categories,
            training
        )
        if self._training:
            if both:
                self._fc_classification_nat = Dense(
                    input_shape=(categories,),
                    units=num_classes_nat,
                    activation=None,
                    use_bias=False,
                    dtype='float32',
                    name='fully_connected_to_softmax_crossentropy_nat',
                )
                self._fc_classification_nat.build(tf.TensorShape([None, 512]))
                self.net_type = 'nat'
            self._fc_classification_syn: Dense = Dense(
                input_shape=(categories,),
                units=num_classes_syn,
                activation=None,
                use_bias=False,
                dtype='float32',
                name='fully_connected_to_softmax_crossentropy_syn',
            )
            self._fc_classification_syn.build(tf.TensorShape([None, 512]))

    @tf.function
    def _call_evaluating(self, input_tensor, input_type: str = 'nat'):
        if input_type == 'syn':
            outputs = self._synthetic_input(input_tensor)
        else:
            outputs = self._natural_input(input_tensor)
        super_resolution_image = self._super_resolution(outputs)
        embeddings = self._face_recognition(super_resolution_image)
        return super_resolution_image, embeddings

    def _calculate_normalized_embeddings(self, embeddings,
                                         net_type: str = 'syn'):
        fc_weights = self.get_weights(net_type)
        normalized_weights = tf.Variable(
            normalize(fc_weights, name='weights_normalization'),
            aggregation=tf.VariableAggregation.NONE,
        )
        normalized_embeddings = normalize(
            embeddings, axis=1, name='embeddings_normalization') * self.scale
        replica = tf.distribute.get_replica_context()
        replica.merge_call(self.set_weights,
                           args=(normalized_weights, net_type))
        return self.call_fc_classification(normalized_embeddings, net_type)

    def _call_training(self, synthetic_images, natural_images=None):
        synthetic_outputs = self._synthetic_input(synthetic_images)
        synthetic_sr_images = self._super_resolution(synthetic_outputs)
        synthetic_embeddings = self._face_recognition(synthetic_sr_images)
        synthetic_embeddings = self._calculate_normalized_embeddings(
            synthetic_embeddings
        )
        if natural_images:
            natural_outputs = self._natural_input(natural_images)
            natural_sr_images = self._super_resolution(natural_outputs)
            natural_embeddings = self._face_recognition(natural_sr_images)
            natural_embeddings = self._calculate_normalized_embeddings(
                natural_embeddings
            )
            return (
                synthetic_sr_images,
                synthetic_embeddings,
                natural_sr_images,
                natural_embeddings,
            )

        return synthetic_sr_images, synthetic_embeddings

    def call(self, input_tensor_01, input_tensor_02=None,
             training: bool = True, input_type: str = 'nat'):
        if training:
            return self._call_training(input_tensor_01, input_tensor_02)

        return self._call_evaluating(input_tensor_01, input_type)

    def get_weights(self, net_type: str = 'syn'):
        if net_type == 'nat':
            return self._fc_classification_nat.get_weights()
        return self._fc_classification_syn.get_weights()

    def set_weights(self, _, weights, net_type: str = 'syn') -> None:
        if net_type == 'nat':
            self._fc_classification_nat.set_weights([weights.read_value()])
        else:
            self._fc_classification_syn.set_weights([weights.read_value()])

    def call_fc_classification(self, input, net_type: str = 'syn'):
        if net_type == 'nat':
            return self._fc_classification_nat(input)
        return self._fc_classification_syn(input)
Esempio n. 24
0
class LocationSensitiveAttentionLayer(Layer):
    def __init__(self):
        super(LocationSensitiveAttentionLayer, self).__init__()
        self.units = hps.LSA_dim
        self.filters = hps.LSA_filters
        self.kernel = hps.LSA_kernel
        self._cumulate = True

        self.location_convolution = Conv1D(filters=self.filters,
                                           kernel_size=self.kernel,
                                           padding='same',
                                           bias_initializer='zeros')
        self.location_layer = Dense(self.units, use_bias=False)
        self.query_layer = Dense(self.units, use_bias=False)
        self.memory_layer = Dense(self.units, use_bias=False)

        self.rnn_cell = Decoderlstm()

        self.values = None

        self.keys = None

    def build(self, input_shape):
        enc_out_seq, dec_out_seq = input_shape
        self.v_a = self.add_weight(name='V_a',
                                   shape=(self.units, ),
                                   initializer='uniform',
                                   trainable=True)
        self.b_a = self.add_weight(name='b_a',
                                   shape=(self.units, ),
                                   initializer='uniform',
                                   trainable=True)
        if self.memory_layer:
            self.memory_layer.build(enc_out_seq)
            self._trainable_weights += self.memory_layer._trainable_weights
        if self.query_layer:
            if not self.query_layer.built:
                if self.rnn_cell:
                    self.query_layer.build(
                        self.rnn_cell.compute_output_shape(dec_out_seq)[0])
                else:
                    self.query_layer.build(dec_out_seq)
            self._trainable_weights += self.query_layer._trainable_weights
        if self.rnn_cell:
            rnn_input_shape = (enc_out_seq[0], 1,
                               dec_out_seq[-1] + enc_out_seq[-1])
            self.rnn_cell.build(rnn_input_shape)
            self._trainable_weights += self.rnn_cell.weights

        conv_input_shape = (enc_out_seq[0], enc_out_seq[1], 1)
        location_input_shape = (enc_out_seq[0], enc_out_seq[1], self.filters)
        self.location_convolution.build(conv_input_shape)
        self.location_layer.build(location_input_shape)

        self._trainable_weights += self.location_convolution._trainable_weights
        self._trainable_weights += self.location_layer._trainable_weights

        super(LocationSensitiveAttentionLayer, self).build(input_shape)

    def call(self, inputs, verbose=False):

        encoder_out_seq, decoder_out_seq = inputs

        values = encoder_out_seq
        keys = self.memory_layer(values) if self.memory_layer else values

        def energy_step(query, states):
            previous_alignments = states[0]
            if self.rnn_cell:
                c_i = states[1]
                cell_state = states[2:]

                lstm_input = K.concatenate([query, c_i])
                lstm_input = K.expand_dims(lstm_input, 1)

                lstm_out = self.rnn_cell(lstm_input, initial_state=cell_state)
                lstm_output, new_cell_state = lstm_out[0], lstm_out[1:]
                query = lstm_output

            processed_query = self.query_layer(
                query) if self.query_layer else query

            expanded_alignments = K.expand_dims(previous_alignments, axis=2)

            f = self.location_convolution(expanded_alignments)

            processed_location_features = self.location_layer(f)

            e_i = K.sum(
                self.v_a * K.tanh(keys + processed_query +
                                  processed_location_features + self.b_a), [2])

            e_i = K.softmax(e_i)

            if self._cumulate:
                next_state = e_i + previous_alignments
            else:
                next_state = e_i

            if self.rnn_cell:
                new_c_i, _ = context_step(e_i, [c_i])

                return e_i, [next_state, new_c_i, *new_cell_state]
            return e_i, [next_state]

        def context_step(inputs, states):

            alignments = inputs
            expanded_alignments = K.expand_dims(alignments, 1)

            c_i = math_ops.matmul(expanded_alignments, values)
            c_i = K.squeeze(c_i, 1)

            return c_i, [c_i]

        def create_initial_state(inputs, hidden_size):
            fake_state = K.zeros_like(inputs)
            fake_state = K.sum(fake_state, axis=[1, 2])
            fake_state = K.expand_dims(fake_state)
            fake_state = K.tile(fake_state, [1, hidden_size])
            return fake_state

        def get_fake_cell_input(fake_state_c):
            fake_input = K.zeros_like(decoder_out_seq)[:, 0, :]
            fake_input = K.concatenate([fake_state_c, fake_input])
            fake_input = K.expand_dims(fake_input, 1)
            return fake_input

        fake_state_c = create_initial_state(values, values.shape[-1])
        fake_state_e = create_initial_state(values, K.shape(values)[1])
        if self.rnn_cell:
            cell_initial_state = self.rnn_cell.get_initial_state(
                get_fake_cell_input(fake_state_c))
            initial_states_e = [
                fake_state_e, fake_state_c, *cell_initial_state
            ]
        else:
            initial_states_e = [fake_state_e]

        last_out, e_outputs, _ = K.rnn(energy_step, decoder_out_seq,
                                       initial_states_e)

        c_outputs = math_ops.matmul(e_outputs, values)

        return [c_outputs, e_outputs]

    def comute_output_shape(self, input_shape):
        return [(input_shape[1][0], input_shape[1][1], input_shape[1][2]),
                (input_shape[1][0], input_shape[1][1], input_shape[0][1])]
class PhasedLSTMCell(tf.keras.layers.Layer):
    """Phased LSTM recurrent network cell.

    https://arxiv.org/pdf/1610.09513v1.pdf
    """
    def __init__(self,
                 num_units,
                 use_peepholes=False,
                 leak=0.001,
                 ratio_on=0.1,
                 trainable_ratio_on=True,
                 period_init_min=0.5,
                 period_init_max=1000.0):
        """Initialize the Phased LSTM cell.

        Args:
          num_units: int, The number of units in the Phased LSTM cell.
          use_peepholes: bool, set True to enable peephole connections.
          leak: float or scalar float Tensor with value in [0, 1]. Leak applied
              during training.
          ratio_on: float or scalar float Tensor with value in [0, 1]. Ratio of
              the period during which the gates are open.
          trainable_ratio_on: bool, weather ratio_on is trainable.
          period_init_min: float or scalar float Tensor. With value > 0.
              Minimum value of the initialized period.
              The period values are initialized by drawing from the
              distribution: e^U(log(period_init_min), log(period_init_max))
              Where U(.,.) is the uniform distribution.
          period_init_max: float or scalar float Tensor.
              With value > period_init_min. Maximum value of the initialized
              period.

        """
        super().__init__()
        self._num_units = num_units
        self._use_peepholes = use_peepholes
        self._leak = leak
        self._ratio_on = ratio_on
        self._trainable_ratio_on = trainable_ratio_on
        self._period_init_min = period_init_min
        self._period_init_max = period_init_max
        self.linear1 = Dense(2 * self._num_units,
                             use_bias=True,
                             activation='sigmoid',
                             name='MaskGates')
        self.linear2 = Dense(self._num_units, use_bias=True, activation='tanh')
        self.linear3 = Dense(self._num_units,
                             use_bias=True,
                             activation='sigmoid')

        self.period = self.add_weight('period',
                                      shape=[self._num_units],
                                      initializer=_random_exp_initializer(
                                          self._period_init_min,
                                          self._period_init_max))
        self.phase = self.add_weight(
            'phase',
            shape=[self._num_units],
            initializer=tf.initializers.random_uniform(
                0., self.period.initial_value))
        self.ratio_on = self.add_weight("ratio_on", [self._num_units],
                                        initializer=tf.constant_initializer(
                                            self._ratio_on),
                                        trainable=self._trainable_ratio_on)

    def build(self, input_shapes):
        time_shape, x_shape = input_shapes.times, input_shapes.x
        x_dim = x_shape[-1]

        if self._use_peepholes:
            mask_gate_and_ouput_gate_dims = 2 * self._num_units + x_dim
        else:
            mask_gate_and_ouput_gate_dims = self._num_units + x_dim

        self.linear1.build((time_shape[0], mask_gate_and_ouput_gate_dims))
        self.linear2.build((time_shape[0], self._num_units + x_dim))
        self.linear3.build((time_shape[0], mask_gate_and_ouput_gate_dims))
        super().build(input_shapes)

    @property
    def state_size(self):
        return tf.nn.rnn_cell.LSTMStateTuple(self._num_units, self._num_units)

    @property
    def output_size(self):
        return self._num_units

    def _mod(self, x, y):
        """Modulo function that propagates x gradients."""
        return tf.stop_gradient(tf.math.mod(x, y) - x) + x

    def _get_cycle_ratio(self, time):
        """Compute the cycle ratio in the dtype of the time."""
        phase = tf.cast(self.phase, dtype=time.dtype)
        period = tf.cast(self.period, dtype=time.dtype)
        shifted_time = time - phase
        cycle_ratio = self._mod(shifted_time, period) / period
        return tf.cast(cycle_ratio, dtype=tf.float32)

    def call(self, inputs, state):
        """Phased LSTM Cell.

        Args:
          inputs: A tuple of 2 Tensor.
             The first Tensor has shape [batch, 1], and type float32 or float64.
             It stores the time.
             The second Tensor has shape [batch, features_size], and type float32.
             It stores the features.
          state: rnn_cell_impl.LSTMStateTuple, state from previous timestep.
        Returns:
          A tuple containing:
          - A Tensor of float32, and shape [batch_size, num_units], representing the
            output of the cell.
          - A rnn_cell_impl.LSTMStateTuple, containing 2 Tensors of float32, shape
            [batch_size, num_units], representing the new state and the output.
        """
        (c_prev, h_prev) = state
        time, x = inputs.times, inputs.x

        if self._use_peepholes:
            input_mask_and_output_gate = tf.concat([x, h_prev, c_prev],
                                                   axis=-1)
        else:
            input_mask_and_output_gate = tf.concat([x, h_prev], axis=-1)

        mask_gates = self.linear1(input_mask_and_output_gate)

        input_gate, forget_gate = tf.split(mask_gates,
                                           axis=1,
                                           num_or_size_splits=2)

        new_input = self.linear2(tf.concat([x, h_prev], axis=-1))

        new_c = (c_prev * forget_gate + input_gate * new_input)

        output_gate = self.linear3(input_mask_and_output_gate)

        new_h = tf.tanh(new_c) * output_gate

        cycle_ratio = self._get_cycle_ratio(time)
        k_up = 2 * cycle_ratio / self.ratio_on
        k_down = 2 - k_up
        k_closed = self._leak * cycle_ratio

        k = tf.where(cycle_ratio < self.ratio_on, k_down, k_closed)
        k = tf.where(cycle_ratio < 0.5 * self.ratio_on, k_up, k)

        new_c = k * new_c + (1 - k) * c_prev
        new_h = k * new_h + (1 - k) * h_prev

        new_state = tf.nn.rnn_cell.LSTMStateTuple(new_c, new_h)
        return new_h, new_state
Esempio n. 26
0
class CNNEncoder(tf.keras.layers.Layer):
    """ CNNEncoder is a combination of multiple convolutional layers and max
    pooling layers. This is defined as a single layer to be consistent with
    other encoders in terms of input and output specifications.

    Input shape: (batch_size, sequence_length, input_dim).
    Output shape: (batch_size, output_dim).

    The CNN has one convolution layer per each ngram filter size. Each
    convolution operation gives out a vector of size num_filters. The number
    of times a convolution layer will be used depends on the ngram size:
    input_len - ngram_size + 1. The corresponding maxpooling layer aggregates
    all these outputs from the convolution layer and outputs the max.

    This operation is repeated for every ngram size passed, and consequently
    the dimensionality of the output after maxpooling is
    len(ngram_filter_sizes) * num_filters.

    We the use a fully connected layer to project in back to the desired
    output_dim.

    References: "A Sensitivity Analysis of (and Practitioners’ Guide to)
    Convolutional Neural Networks for Sentence Classification",
    Zhang and Wallace 2016, particularly Figure 1.

    Args:
        filters: Integer, the output dim for each convolutional layer.
        kernel_sizes: An integer tuple of list, the kernel sizes of each
            convolutional layers.
        units: After doing convolutions, we'll project the collected features
            into a vecor of this size. If this value is `None`, just return the
            result of the max pooling.
        conv_layer_activation: string of convolutional layer `Activation`.
        l1_regularization: float.
        l2_regularization: float.
    """

    def __init__(self, filters=100, kernel_sizes=(2, 3, 4, 5),
                 conv_layer_activation='relu',
                 l1_regularization=None, l2_regularization=None,
                 units=None,
                 **kwargs):
        self.filters = filters
        self.kernel_sizes = kernel_sizes
        self.units = units
        self.conv_layer_activation = conv_layer_activation
        self.l1_regularization = l1_regularization
        self.l2_regularization = l2_regularization
        self.regularizer = l1_l2(
            l1=l1_regularization if l1_regularization is not None else 0.0,
            l2=l2_regularization if l2_regularization is not None else 0.0)
        self.conv_layers = None
        self.projection_layer = None
        self.trainable_layers = None
        self.output_dim = None

        self.input_spec = [InputSpec(ndim=3)]
        super(CNNEncoder, self).__init__(**kwargs)

    def build(self, input_shape):
        self.conv_layers = [Conv1D(filters=self.filters,
                                   kernel_size=kernel_size,
                                   activation=self.conv_layer_activation,
                                   kernel_regularizer=self.regularizer,
                                   bias_regularizer=self.regularizer)
                            for kernel_size in self.kernel_sizes]
        for conv_layer in self.conv_layers:
            with K.name_scope(conv_layer.name):
                conv_layer.build(input_shape)
        maxpool_output_dim = self.filters * len(self.kernel_sizes)
        if self.units is not None:
            self.projection_layer = Dense(self.units)
            projection_input_shape = (input_shape[0], maxpool_output_dim)
            with K.name_scope(self.projection_layer.name):
                self.projection_layer.build(projection_input_shape)
            self.output_dim = self.units
            self.trainable_layers = self.conv_layers + [self.projection_layer]
        else:
            self.projection_layer = None
            self.output_dim = maxpool_output_dim
            self.trainable_layers = self.conv_layers

        super(CNNEncoder, self).build(input_shape)

    def call(self, inputs, mask=None):
        # Each convolution layer returns output of size (batch_size, conv_length, filters),
        # where `conv_length = num_words - kernel_size + 1`. We then do max
        # pooling over each filter for the whole input sequence, just use K.max,
        # giving a result tensor of shape (batch_size, filters), which then
        # gets projected using the projection layer.
        filter_outputs = [K.max(conv_layer.call(inputs), axis=1)
                          for conv_layer in self.conv_layers]
        maxpool_output = Concatenate()(filter_outputs) \
            if len(filter_outputs) > 1 else filter_outputs[0]
        if self.projection_layer:
            result = self.projection_layer.call(maxpool_output)
        else:
            result = maxpool_output
        return result

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.output_dim)

    def compute_mask(self, inputs, mask=None):
        # By default Keras propagates the mask from a layer that supports masking. We don't need it
        # anymore. So eliminating it from the flow.
        return None

    def get_config(self):
        config = {"filters": self.filters,
                  "kernel_sizes": self.kernel_sizes,
                  "units": self.units,
                  "conv_layer_activation": self.conv_layer_activation,
                  "l1_regularization": self.l1_regularization,
                  "l2_regularization": self.l2_regularization
                  }
        base_config = super(CNNEncoder, self).get_config()
        config.update(base_config)
        return config

    @property
    def trainable_weights(self):
        trainable_weights = []
        for layer in self.trainable_layers:
            trainable_weights.extend(layer.trainable_weights)
        return trainable_weights
Esempio n. 27
0
class Attention(Layer):
    """
    Layer for implementing two common types of attention mechanisms:
    i) global (soft) attention, and  ii) local (hard) attention,

    The setting use_bias=False converts the Dense() layers into annotation weight matrices.
    Softmax activation ensures that all weights sum up to 1.
    Read more here to make more sense of the code and implementations:
    i)   https://www.tensorflow.org/beta/tutorials/text/nmt_with_attention
    ii)  https://github.com/philipperemy/keras-attention-mechanism/issues/14
    iii) https://lilianweng.github.io/lil-log/2018/06/24/attention-attention.html

    SUGGESTION: If model doesn't converge, increase either the hidden size of the RNN
    model, the batch size of the model, or the param @size. If test accuracy is low,
    decrease these hyperparameters instead.

    NOTE: This implementation takes the hidden states associted with the last timestep as
    the target hidden state (h_t) as suggested by @felixhao28 in i), whereas originally
    attention was proposed for MANY-TO-MANY sequence tasks like machine translation.
    Hence, when trying to predict what word (token) comes after sequence ['I', 'love',
    'biscuits', 'and'], we take h('and') with shape (1, H) as the target hidden state.

    @param size (int): size of attension vector or attention length; number of hidden
           units to decode the attention to with dense layer, presumably before being fed
           to the final softmax dense layer for next token prediction
    @param alignment_type (str): type of attention mechanism to be applied, 'local-m'
           corresponds to monotonic alignment where we take the last @window_width
           timesteps, 'local-p' corresponds to having a Gaussian distribution around
           the predicted aligned position, whereas 'local-p*' corresponds to the newly
           proposed method to adaptively learning the unique timesteps to give attention
    @param window_width (int): width for set of source hidden states in 'local' attention
    @param score_function (str): alignment score function config; current implementations
           include the 'dot', 'general', and 'location' both by Luong et al. 2015,
           'concat' by Bahdanau et al. 2015, and 'scaled_dot' by Vaswani et al. 2017
    """
    def __init__(self,
                 size,
                 alignment_type='global',
                 window_width=None,
                 score_function='general',
                 **kwargs):
        if alignment_type not in ['global', 'local-m', 'local-p', 'local-p*']:
            raise ValueError(
                "Argument for param @alignment_type is not recognized")
        if alignment_type == 'global':
            if window_width is not None:
                raise ValueError(
                    "Can't use windowed approach with global attention")
        if score_function not in [
                'dot', 'general', 'location', 'concat', 'scaled_dot'
        ]:
            raise ValueError(
                "Argument for param @score_function is not recognized")
        super(Attention, self).__init__(**kwargs)
        self.size = size
        self.alignment_type = alignment_type
        self.window_width = window_width  # 2*D
        self.score_function = score_function

    def get_config(self):
        base_config = super(Attention, self).get_config()
        base_config['size'] = self.size
        base_config['alignment_type'] = self.alignment_type
        base_config['window_width'] = self.window_width
        base_config['score_function'] = self.score_function
        return base_config

    def build(self, input_shape
              ):  # Build weight matrices for trainable, adaptive parameters
        if 'local-p' in self.alignment_type:
            self.W_p = Dense(units=input_shape[2], use_bias=False)
            self.W_p.build(input_shape=(None, None,
                                        input_shape[2]))  # (B, 1, H)
            self._trainable_weights += self.W_p.trainable_weights

            self.v_p = Dense(units=1, use_bias=False)
            self.v_p.build(input_shape=(None, None,
                                        input_shape[2]))  # (B, 1, H)
            self._trainable_weights += self.v_p.trainable_weights

        if 'dot' not in self.score_function:  # weight matrix not utilized for 'dot' function
            self.W_a = Dense(units=input_shape[2], use_bias=False)
            self.W_a.build(input_shape=(None, None,
                                        input_shape[2]))  # (B, S*, H)
            self._trainable_weights += self.W_a.trainable_weights

        if self.score_function == 'concat':  # define additional weight matrices
            self.U_a = Dense(units=input_shape[2], use_bias=False)
            self.U_a.build(input_shape=(None, None,
                                        input_shape[2]))  # (B, 1, H)
            self._trainable_weights += self.U_a.trainable_weights

            self.v_a = Dense(units=1, use_bias=False)
            self.v_a.build(input_shape=(None, None,
                                        input_shape[2]))  # (B, S*, H)
            self._trainable_weights += self.v_a.trainable_weights

        self.attention_vector = Dense(units=self.size,
                                      activation='tanh',
                                      use_bias=False)
        self.attention_vector.build(input_shape=(None, 2 *
                                                 input_shape[2]))  # (B, 2*H)
        self._trainable_weights += self.attention_vector.trainable_weights

        super(Attention, self).build(input_shape)

    def call(self, inputs):
        sequence_length = inputs.shape[1]
        ## Get h_t, the current (target) hidden state ##
        target_hidden_state = Lambda(function=lambda x: x[:, -1, :])(
            inputs)  # (B, H)
        target_hidden_state_reshaped = Reshape(
            target_shape=(1,
                          inputs.shape[2]))(target_hidden_state)  # (B, 1, H)

        ## Get h_s, source hidden states through specified attention mechanism ##
        if self.alignment_type == 'global':  ## Global Approach ##
            source_hidden_states = inputs  # (B, S*=S, H)

        elif 'local' in self.alignment_type:  ## Local Approach ##
            if self.window_width == None:  ## Automatically set window width ##
                self.window_width = sequence_length // 2

            if self.alignment_type == 'local-m':  ## Monotonic Alignment ##
                aligned_position = sequence_length
                left_border = aligned_position - self.window_width if aligned_position - self.window_width >= 0 else 0
                source_hidden_states = Lambda(
                    function=lambda x: x[:, left_border:, :])(
                        inputs)  # (B, S*=D, H)

            elif self.alignment_type == 'local-p':  ## Predictive Alignment ##
                aligned_position = self.W_p(target_hidden_state)  # (B, H)
                aligned_position = Activation('tanh')(
                    aligned_position)  # (B, H)
                aligned_position = self.v_p(aligned_position)  # (B, 1)
                aligned_position = Activation('sigmoid')(
                    aligned_position)  # (B, 1)
                aligned_position = aligned_position * sequence_length  # (B, 1)
                source_hidden_states = inputs  # (B, S, H)

            elif self.alignment_type == 'local-p*':  ## Completely Predictive Alignment ##
                aligned_position = self.W_p(inputs)  # (B, S, H)
                aligned_position = Activation('tanh')(
                    aligned_position)  # (B, S, H)
                aligned_position = self.v_p(aligned_position)  # (B, S, 1)
                aligned_position = Activation('sigmoid')(
                    aligned_position)  # (B, S, 1)
                ## Only keep top D values out of the sigmoid activation, and zero-out the rest ##
                aligned_position = tf.squeeze(aligned_position,
                                              axis=-1)  # (B, S)
                top_probabilities = tf.nn.top_k(
                    input=aligned_position, k=self.window_width,
                    sorted=False)  # (values:(B, D), indices:(B, D))
                onehot_vector = tf.one_hot(indices=top_probabilities.indices,
                                           depth=sequence_length)  # (B, D, S)
                onehot_vector = tf.reduce_sum(onehot_vector, axis=1)  # (B, S)
                aligned_position = Multiply()(
                    [aligned_position, onehot_vector])  # (B, S)
                aligned_position = tf.expand_dims(aligned_position,
                                                  axis=-1)  # (B, S, 1)
                source_hidden_states = Multiply()([inputs, aligned_position
                                                   ])  # (B, S*=S(D), H)
                ## Scale back-to approximately original hidden state values ##
                aligned_position += 1  # (B, S, 1)
                source_hidden_states /= aligned_position  # (B, S*=S(D), H)

        ## Compute alignment score through specified function ##
        if 'dot' in self.score_function:
            attention_score = Dot(axes=[2, 1])(
                [source_hidden_states, target_hidden_state])  # (B, S*)
            if self.score_function == 'scaled_dot':
                attention_score = attention_score * (
                    1 / np.sqrt(float(inputs.shape[2])))  # (B, S*)

        elif self.score_function == 'general':
            weighted_hidden_states = self.W_a(
                source_hidden_states)  # (B, S*, H)
            attention_score = Dot(axes=[2, 1])(
                [weighted_hidden_states, target_hidden_state])  # (B, S*)

        elif self.score_function == 'location':
            weighted_target_state = self.W_a(target_hidden_state)  # (B, H)
            attention_score = Activation('softmax')(
                weighted_target_state)  # (B, H)
            attention_score = RepeatVector(
                n=inputs.shape[1] - 1 if self.seperate else inputs.shape[1])(
                    attention_score)  # (B, S*, H)
            attention_score = tf.reduce_sum(attention_score,
                                            axis=-1)  # (B, S*)

        elif self.score_function == 'concat':
            weighted_hidden_states = self.W_a(
                source_hidden_states)  # (B, S*, H)
            weighted_target_state = self.U_a(
                target_hidden_state_reshaped)  # (B, 1, H)
            weighted_sum = weighted_hidden_states + weighted_target_state  # (B, S*, H)
            weighted_sum = Activation('tanh')(weighted_sum)  # (B, S*, H)
            attention_score = self.v_a(weighted_sum)  # (B, S*, 1)
            attention_score = attention_score[:, :, 0]  # (B, S*)

        attention_weights = Activation('softmax')(attention_score)  # (B, S*)
        if self.alignment_type == 'local-p':  ## Gaussian Distribution ##
            gaussian_estimation = lambda s: tf.exp(-tf.square(
                s - aligned_position) / (2 * tf.square(self.window_width / 2)))
            gaussian_factor = gaussian_estimation(0)
            for i in range(1, sequence_length):
                gaussian_factor = Concatenate()(
                    [gaussian_factor, gaussian_estimation(i)])
            # gaussian_factor: (B, S*)
            attention_weights = attention_weights * gaussian_factor  # (B, S*)

        context_vector = Dot(axes=[1, 1])(
            [source_hidden_states, attention_weights])  # (B, H)
        combined_information = Concatenate()(
            [context_vector, target_hidden_state])  # (B, 2*H)
        attention_vector = self.attention_vector(
            combined_information)  # (B, self.size)
        return attention_vector
Esempio n. 28
0
class SetAttentionLayer(tf.keras.layers.Layer):
    dense_options = {'activation': 'relu', 'kernel_initializer': 'he_uniform'}

    def __init__(self,
                 n_layers=2,
                 width=128,
                 latent_width=128,
                 aggregation_function='mean',
                 dot_prod_dim=64,
                 n_heads=4,
                 attn_dropout=0.3):
        super().__init__()
        self.width = width
        self.dot_prod_dim = dot_prod_dim
        self.attn_dropout = attn_dropout
        self.n_heads = n_heads
        self.psi = build_dense_dropout_model(n_layers, width, 0.,
                                             self.dense_options)
        self.psi.add(Dense(latent_width, **self.dense_options))
        self.psi_aggregation = SegmentAggregation(aggregation_function)
        self.rho = Dense(latent_width, **self.dense_options)

    def build(self, input_shape):
        self.psi.build(input_shape)
        encoded_shape = self.psi.compute_output_shape(input_shape)
        agg_shape = self.psi_aggregation.compute_output_shape(encoded_shape)
        self.rho.build(agg_shape)
        self.W_k = self.add_weight('W_k', (encoded_shape[-1] + input_shape[-1],
                                           self.dot_prod_dim * self.n_heads),
                                   initializer='he_uniform')
        self.W_q = self.add_weight('W_q', (self.n_heads, self.dot_prod_dim),
                                   initializer=tf.keras.initializers.Zeros())

    def call(self, inputs, segment_ids, lengths, training=None):
        if training is None:
            training = tf.keras.backend.learning_phase()

        def dropout_attn(input_tensor):
            if self.attn_dropout > 0:
                mask = (tf.random.uniform(tf.shape(input_tensor)[:-1]) <
                        self.attn_dropout)
                return (input_tensor +
                        tf.expand_dims(tf.cast(mask, tf.float32), -1) * -1e9)
            else:
                return tf.identity(input_tensor)

        encoded = self.psi(inputs)
        agg = self.psi_aggregation(encoded, segment_ids)
        agg = self.rho(agg)
        agg_scattered = tf.gather_nd(agg, tf.expand_dims(segment_ids, -1))
        combined = tf.concat([inputs, agg_scattered], axis=-1)
        keys = tf.matmul(combined, self.W_k)
        keys = tf.stack(tf.split(keys, self.n_heads, -1), 1)
        keys = tf.expand_dims(keys, axis=2)
        # should have shape (el, heads, 1, dot_prod_dim)
        queries = tf.expand_dims(tf.expand_dims(self.W_q, -1), 0)
        # should have shape (1, heads, dot_prod_dim, 1)
        preattn = tf.matmul(keys, queries) / tf.sqrt(float(self.dot_prod_dim))
        preattn = tf.squeeze(preattn, -1)
        preattn = smart_cond(training, lambda: dropout_attn(preattn),
                             lambda: tf.identity(preattn))

        per_head_preattn = tf.unstack(preattn, axis=1)
        attentions = []
        for pre_attn in per_head_preattn:
            attentions.append(segment_softmax(pre_attn, segment_ids))
        return attentions

    def compute_output_shape(self, input_shape):
        return list(chain(input_shape[:-1], (self.n_heads, )))
Esempio n. 29
0
class SRFR(Model):
    def __init__(
        self,
        num_filters: int = 62,
        depth: int = 50,
        categories: int = 512,
        num_gc: int = 32,
        num_blocks: int = 23,
        residual_scailing: float = 0.2,
        training: bool = True,
        input_shape=(28, 28, 3),
        num_classes_syn: int = 2,
        both: bool = False,
        num_classes_nat: int = None,
        scale: int = 64,
    ):
        super(SRFR, self).__init__()
        self._training = training
        self.scale = scale
        if both:
            self._natural_input = Conv2D(
                input_shape=input_shape,
                filters=num_filters,
                kernel_size=(3, 3),
                strides=1,
                padding="same",
                name="natural_input",
                activation=mish,
            )
        self._synthetic_input = Conv2D(
            input_shape=input_shape,
            filters=num_filters,
            kernel_size=(3, 3),
            strides=1,
            padding="same",
            name="synthetic_input",
            activation=mish,
        )
        self._super_resolution = GeneratorNetwork(
            num_filters,
            num_gc,
            num_blocks,
            residual_scailing,
        )
        self._face_recognition = ResNet(depth, categories, training, None)
        if self._training:
            if both:
                self._fc_classification_nat = Dense(
                    input_shape=(categories, ),
                    units=num_classes_nat,
                    activation=None,
                    use_bias=False,
                    dtype="float32",
                    name="fully_connected_to_softmax_crossentropy_nat",
                )
                self._fc_classification_nat.build(tf.TensorShape([None, 512]))
                self.net_type = "nat"

            self._fc_classification_syn: Dense = Dense(
                input_shape=(categories, ),
                units=num_classes_syn,
                activation="softmax",
                use_bias=False,
                dtype="float32",
                name="fully_connected_to_softmax_crossentropy_syn",
            )
            self._fc_classification_syn.build(tf.TensorShape([None, 512]))

    @tf.function
    def _call_evaluating(self, input_tensor, input_type: str = "syn"):
        if input_type == "syn":
            outputs = self._synthetic_input(input_tensor)
        else:
            outputs = self._natural_input(input_tensor)

        super_resolution_image = self._super_resolution(outputs)
        embeddings = self._face_recognition(super_resolution_image)

        # if input_type == "syn":
        #    classification = self._fc_classification_syn(embeddings)
        # else:
        #    classification = self._fc_classification_nat(embeddings)

        return super_resolution_image, embeddings  # , classification

    # def _calculate_normalized_embeddings(self, embeddings, net_type: str = "syn"):
    #    fc_weights = self.get_weights(net_type)
    #    normalized_weights = tf.Variable(
    #        normalize(fc_weights, name="weights_normalization"),
    #        aggregation=tf.VariableAggregation.NONE,
    #    )
    #    normalized_embeddings = (
    #        normalize(embeddings, axis=1, name="embeddings_normalization") * self.scale
    #    )
    #    # replica = tf.distribute.get_replica_context()
    #    # replica.merge_call(self.set_weights,
    #    #                   args=(normalized_weights, net_type))
    #    self.set_weights(normalized_weights, net_type)
    #    return self.call_fc_classification(normalized_embeddings, net_type)

    def _call_training(self, synthetic_images, natural_images=None):
        synthetic_outputs = self._synthetic_input(synthetic_images)
        synthetic_sr_images = self._super_resolution(synthetic_outputs)
        synthetic_embeddings = self._face_recognition(synthetic_sr_images)
        # synthetic_embeddings = self._calculate_normalized_embeddings(
        #    synthetic_embeddings
        # )
        synthetic_classification = self._fc_classification_syn(
            synthetic_embeddings)
        if natural_images:
            natural_outputs = self._natural_input(natural_images)
            natural_sr_images = self._super_resolution(natural_outputs)
            natural_embeddings = self._face_recognition(natural_sr_images)
            # natural_embeddings = self._calculate_normalized_embeddings(
            #    natural_embeddings
            # )
            natural_classification = self._fc_classification_nat(
                natural_embeddings)
            return (
                synthetic_sr_images,
                synthetic_embeddings,
                synthetic_classification,
                natural_sr_images,
                natural_embeddings,
                natural_classification,
            )

        return synthetic_sr_images, synthetic_embeddings, synthetic_classification

    def call(
        self,
        input_tensor_01,
        input_tensor_02=None,
        training: bool = True,
        input_type: str = "syn",
    ):
        if training:
            return self._call_training(input_tensor_01, input_tensor_02)

        return self._call_evaluating(input_tensor_01, input_type)
Esempio n. 30
0
class BAC(Layer):
    def __init__(self,
                 passage_len=200,
                 activation='softmax',
                 nn_units=300,
                 emb_dim=600,
                 **kwargs):
        self.activation = activation
        self.nn_units = nn_units
        self.emb_dim = emb_dim
        self.passage_len = passage_len
        super(BAC, self).__init__(**kwargs)

    def build(self, input_shape):
        passage_shape = (input_shape[0], self.passage_len, input_shape[-1])
        query_shape = (input_shape[0], input_shape[1] - self.passage_len,
                       input_shape[-1])
        self.dense_1 = Dense(self.nn_units, activation=relu, use_bias=True)
        self.dense_1.build(passage_shape)
        self.dense_2 = Dense(self.nn_units, activation=relu, use_bias=True)
        self.dense_2.build(query_shape)
        self.trainable_weight = self.dense_1.trainable_weights + self.dense_2.trainable_weights

        super(BAC, self).build(input_shape)  # Be sure to call this at the end

    def call(self, stack_input):
        # unstack_input = tf.unstack(stack_input)

        passage_input = stack_input[:, :self.passage_len, :]
        query_input = stack_input[:, self.passage_len:, :]

        passage_dense = self.dense_1(passage_input)
        query_dense = self.dense_2(query_input)
        affinity_matrix = tf.matmul(passage_dense,
                                    tf.transpose(query_dense, perm=[0, 2, 1]))
        affinity_matrix = 1 / np.sqrt(self.emb_dim) * affinity_matrix
        activation = Activation(softmax)
        aligned_p = activation(tf.transpose(affinity_matrix, perm=[0, 2, 1]))
        aligned_q = activation(affinity_matrix)
        passage_aligned = tf.matmul(aligned_p, passage_input)
        query_aligned = tf.matmul(aligned_q, query_input)

        passage_concat = tf.concat([query_aligned, passage_input], 2)
        query_concat = tf.concat([passage_aligned, query_input], 2)

        passage_diff = tf.subtract(query_aligned, passage_input)
        query_diff = tf.subtract(passage_aligned, query_input)

        passage_mul = tf.multiply(query_aligned, passage_input)
        query_mul = tf.multiply(passage_aligned, query_input)

        fm_1 = Factorization_machine(5, name='passage_concat_layer')
        fm_2 = Factorization_machine(5, name='query_concat_layer')
        fm_3 = Factorization_machine(5, name='passage_diff_layer')
        fm_4 = Factorization_machine(5, name='query_diff_layer')
        fm_5 = Factorization_machine(5, name='passage_mul_layer')
        fm_6 = Factorization_machine(5, name='query_mul_layer')

        connecter_1 = fm_1(passage_concat)
        connecter_2 = fm_2(query_concat)
        connecter_3 = fm_3(passage_diff)
        connecter_4 = fm_4(query_diff)
        connecter_5 = fm_5(passage_mul)
        connecter_6 = fm_6(query_mul)

        feature_p = [connecter_1, connecter_3, connecter_5]
        feature_q = [connecter_2, connecter_4, connecter_6]

        features_passage = tf.concat(feature_p, 2)
        features_query = tf.concat(feature_q, 2)

        return features_passage, features_query