コード例 #1
0
class BERTPooler(Layer):
    def __init__(self, config, **kwargs):
        self.trainable = False
        self.config = config
        super().__init__(**kwargs)
        self.dense = Dense(input_shape=[
            config.hidden_size,
        ],
                           units=config.hidden_size,
                           trainable=False,
                           activation='tanh')

    def build(self, input_shape):

        if isinstance(input_shape, tuple) and input_shape[0] is None:
            pooler_input_shape = [self.config.hidden_size, input_shape[1]]
        else:
            pooler_input_shape = [self.config.hidden_size, input_shape]

        self.dense.build(pooler_input_shape)
        super(BERTPooler, self).build(input_shape)

    def call(self, hidden_states, **kwargs):
        # We "pool" the model by simply taking the hidden state corresponding
        # to the first token.
        first_token_tensor = hidden_states[:, 0, :]
        pooled_output = self.dense(first_token_tensor)

        return [pooled_output]
コード例 #2
0
def wider_next_dense(layer, start_dim, total_dim, n_add):
    """Get next dense layer for current layer

   Args:
       layer: the dense layer from which we search next dense layer
       n_add: output shape
       start_dim: the started dimension
       total_dim: the total dimension

   Returns:
       The next dense layer
   """
    n_units = layer.units
    teacher_w, teacher_b = layer.get_weights()
    student_w = teacher_w.copy()
    n_units_each_channel = int(teacher_w.shape[0] / total_dim)

    new_weight = np.zeros((n_add * n_units_each_channel, teacher_w.shape[1]))
    student_w = np.concatenate(
        (student_w[:start_dim * n_units_each_channel], new_weight,
         student_w[start_dim * n_units_each_channel:total_dim *
                   n_units_each_channel]))

    new_layer = Dense(n_units, activation=layer.get_config()['activation'])
    new_layer.build((None, student_w.shape[0]))
    new_layer.set_weights((student_w, teacher_b))
    return new_layer
コード例 #3
0
class BERTIntermediate(Layer):
    def __init__(self, config, **kwargs):
        self.config = config
        self.trainable = False
        super().__init__(**kwargs)
        self.dense = Dense(input_shape=(self.config.hidden_size, ),
                           units=self.config.intermediate_size,
                           trainable=False)
        self.intermediate_act_fn = gelu

    def build(self, input_shape):
        self.dense.build(
            (self.config.intermediate_size, self.config.hidden_size))
        super(BERTIntermediate, self).build(input_shape)

    def call(self, x, **kwargs):
        hidden_states = x
        original_shape = hidden_states.shape
        hidden_states_r = K.reshape(hidden_states,
                                    (-1, hidden_states.shape[-1]))

        hidden_states = self.dense(hidden_states_r)
        hidden_states_r = K.reshape(
            hidden_states, (-1, original_shape[1], hidden_states.shape[-1]))
        hidden_states = self.intermediate_act_fn(hidden_states_r)
        return hidden_states
コード例 #4
0
class BERTOutput(Layer):
    def __init__(self, config, **kwargs):
        self.config = config
        self.trainable = False
        super().__init__(**kwargs)
        self.dense = Dense(input_shape=(config.intermediate_size, ),
                           units=config.hidden_size,
                           trainable=False)
        self.LayerNorm = BERTLayerNorm(config, trainable=False)
        self.dropout = Dropout(config.hidden_dropout_prob, trainable=False)

    def build(self, input_shape):
        self.dense.build(
            (self.config.hidden_size, self.config.intermediate_size))
        self.LayerNorm.build(self.config.hidden_size)
        self.dropout.build(self.config.hidden_size)
        super(BERTOutput, self).build(self.config.hidden_size)

    def call(self, x, **kwargs):

        input_tensor, hidden_states = x

        original_shape = hidden_states.shape
        hidden_states_r = K.reshape(hidden_states,
                                    (-1, hidden_states.shape[-1]))

        hidden_states = self.dense(hidden_states_r)
        hidden_states = self.dropout(hidden_states)
        hidden_states_r = K.reshape(
            hidden_states, (-1, original_shape[1], hidden_states.shape[-1]))

        hidden_states = self.LayerNorm(hidden_states_r + input_tensor)
        return hidden_states
コード例 #5
0
def wider_pre_dense(layer, n_add):
    """Get previous dense layer for current layer

   Args:
       layer: the layer from which we get wide previous dense layer
       n_add: output shape

   Returns:
       The previous dense layer
   """
    n_units1 = layer.get_weights()[0].shape[0]
    n_units2 = layer.units

    teacher_w, teacher_b = layer.get_weights()
    rand = np.random.randint(n_units2, size=n_add)
    student_w = teacher_w.copy()
    student_b = teacher_b.copy()

    # target layer update (i)
    for i in range(n_add):
        teacher_index = rand[i]
        new_weight = teacher_w[:, teacher_index]
        new_weight = new_weight[:, np.newaxis]
        student_w = np.concatenate((student_w, new_weight), axis=1)
        student_b = np.append(student_b, teacher_b[teacher_index])

    new_pre_layer = Dense(n_units2 + n_add,
                          input_shape=(n_units1, ),
                          activation='relu')
    new_pre_layer.build((None, n_units1))
    new_pre_layer.set_weights((student_w, student_b))

    return new_pre_layer
コード例 #6
0
ファイル: layers.py プロジェクト: gajanlee/V-net
class AnswerProbability(Layer):

    def __init__(self, **kwargs):
        super(AnswerProbability, self).__init__(**kwargs)

    def build(self, input_shape):
        # input_shape: (None, 5, 200)
        self.dense_1 = Dense(1, activation="relu")
        self.dense_1.build(input_shape[:-1] + (3*input_shape[-1],))
        self.trainable_weights = self.dense_1.trainable_weights
        
        super(AnswerProbability, self).build(input_shape)
    
    def call(self, answer_encoding):
        score_matrix = tf.matmul(answer_encoding, K.permute_dimensions(answer_encoding, (0, 2, 1)))
        eye1 = K.eye(Params.max_passage_count); zero1 = K.zeros_like(eye1); mask = K.cast(K.equal(eye1, zero1), dtype="float32")
        score_matrix = score_matrix * mask
        score_matrix = Softmax(axis=-1)(score_matrix)
        answer_encoding_hat = tf.matmul(score_matrix, answer_encoding)
        answer_encoding_final = K.concatenate([answer_encoding, answer_encoding_hat, answer_encoding*answer_encoding_hat])
        answer_probability = self.dense_1(answer_encoding_final)
        answer_probability = K.squeeze(answer_probability, axis=-1)
        answer_probability = Softmax(axis=-1)(answer_probability)
        return answer_probability

    def compute_output_shape(self, input_shape):
        return (None, input_shape[1])
コード例 #7
0
    def build(self, input_shape):

        if self.use_task_bias:
            self.task_bias = {}
            for F in self.task_features:
                self.task_bias[F] = self.add_weight(
                    shape=(self.units, ),
                    initializer=self.bias_initializer,
                    name='task_bias%s' % (str(F)),
                    regularizer=self.bias_regularizer,
                    constraint=self.bias_constraint)
        else:
            self.task_bias = None

        if self.use_task_gain:
            self.task_gain = {}
            for F in self.task_features:
                self.task_gain[F] = self.add_weight(
                    shape=(self.units, ),
                    initializer=self.bias_initializer,
                    name='task_gain%s' % (str(F)),
                    regularizer=self.bias_regularizer,
                    constraint=self.bias_constraint)
        else:
            self.task_gain = None
        Dense.build(self, input_shape)
コード例 #8
0
ファイル: layers.py プロジェクト: gajanlee/V-net
class SpanBegin(Layer):

    def __init__(self, **kwargs):
        super(SpanBegin, self).__init__(**kwargs)

    def build(self, input_shape):
        last_dim = input_shape[0][-1] + input_shape[1][-1]
        input_shape_dense_1 = input_shape[0][:-1] + (last_dim, )
        self.dense_1 = Dense(units=1)
        self.dense_1.build(input_shape_dense_1)
        self.trainable_weights = self.dense_1.trainable_weights
        super(SpanBegin, self).build(input_shape)

    def call(self, inputs):
        merged_context, modeled_passage = inputs
        span_begin_input = K.concatenate([merged_context, modeled_passage])
        span_begin_weights = TimeDistributed(self.dense_1)(span_begin_input)
        span_begin_probabilities = Softmax()(K.squeeze(span_begin_weights, axis=-1))
        return span_begin_probabilities

    def compute_output_shape(self, input_shape):
        merged_context_shape, _ = input_shape
        return merged_context_shape[:-1]

    def get_config(self):
        config = super().get_config()
        return config
コード例 #9
0
ファイル: layers.py プロジェクト: gajanlee/V-net
class ContentIndice(Layer):
    
    def __init__(self, **kwargs):
        super(ContentIndice, self).__init__(**kwargs)
        
    def build(self, input_shape):
        self.dense_1 = Dense(embedding_dim, activation="relu")
        self.dense_1.build(input_shape)
        self.dense_2 = Dense(1, activation="linear")
        self.dense_2.build(input_shape[:-1] + (embedding_dim, ))
        self.trainable_weights = self.dense_1.trainable_weights + self.dense_2.trainable_weights
        
        super(ContentIndice, self).build(input_shape)
        
    def call(self, passage_modeling):
        passage_representation = self.dense_1(passage_modeling)
        passage_representation = self.dense_2(passage_representation)
        passage_representation = K.squeeze(passage_representation, axis=-1)
        # passage_indices = Softmax(axis=-1)(passage_representation)
        return passage_representation
    
    def compute_output_shape(self, input_shape):
        return input_shape[:-1]
    
    def get_config(self):
        config = super().get_config()
        return config
コード例 #10
0
    def build(self, input_shape):
        Dense.build(self, input_shape)

        if self.needs_drop:
            self.kernel = K.in_train_phase(K.dropout(self.kernel, self.prob, self.drop_noise_shape), self.kernel)

            if self.drop_bias:
                self.bias = K.in_train_phase(K.dropout(self.bias, self.prob, self.drop_noise_shape), self.bias)
コード例 #11
0
class Attention_local(Layer):
    def __init__(self):  # This layer is just to start defining the layer
        super(Attention_local, self).__init__()

    def build(self, input_shape
              ):  # This layer defines the shape of the weights and bias
        self.sequence_length = input_shape[0][
            1]  # the number of words (max_len)
        self.output_dimensions = input_shape[0][
            2]  # output dim [hidden vec dimensions]
        self.W_p = Dense(self.output_dimensions)
        self.W_p.build(input_shape=(None, None,
                                    self.output_dimensions))  # (B, 1, H)

        self.W_a = Dense(self.output_dimensions)
        self.W_a.build(input_shape=(None, None,
                                    self.output_dimensions))  # (B, 1, H)

        self.V_p = tf.keras.layers.Dense(1)
        self.V_p.build(input_shape=(None, None, self.output_dimensions))
        self.window_width = WINDOW_WIDTH
        super(Attention_local, self).build(input_shape)

    def call(self, inputs):  # This is where the action happens
        # inputs is the input tensor
        target_hidden_state = inputs[1]  # (B , H)
        source_hidden_state = inputs[0]  # (B, S, H)
        hidden_with_time_axis = tf.expand_dims(input=target_hidden_state,
                                               axis=1)  # (B, 1, H)

        # N = W_1(h_t)
        # M = V(tanh(N))
        aligned_position = self.V_p(K.tanh(
            self.W_p(hidden_with_time_axis)))  # (B, 1, 1)
        # p_t = sigmoid(M) * S
        aligned_position = K.sigmoid(aligned_position)  # (B, 1, 1)
        aligned_position = aligned_position * self.sequence_length  # (B, 1, 1)

        # α_t=softmax(h_t W_2 h_s)
        attention_score = K.softmax(
            source_hidden_state * self.W_a(hidden_with_time_axis))  # (B, S, H)
        attention_weights = Activation('softmax')(attention_score)  # (B, S, H)

        # α_t (s)= α_t*exp(-((s-p_t)^2)/(2σ^2)
        gaussian_estimation = lambda i: tf.exp(-2 * tf.square(
            (i - aligned_position) / self.window_width))
        gaussian_factor = gaussian_estimation(0)
        for x in range(1, self.sequence_length):
            gaussian_factor = Concatenate(axis=1)(
                [gaussian_factor, gaussian_estimation(x)])
        attention_weights = attention_weights * gaussian_factor  # (B, S, H)

        # C_t= ∑ (α_t (s) * h_s)
        context_embedding = attention_weights * source_hidden_state  # (B, S, H)
        # Derive context vector by getting the weighted average over the source states
        context_vector = tf.reduce_sum(context_embedding, axis=1)

        return context_vector
コード例 #12
0
class SentenceEncoderBlock(Layer):
    def __init__(self,
                 output_dim,
                 attention_dim,
                 n_heads,
                 dropout=0.3,
                 **kwargs):
        self.output_dim = output_dim  # Es la dimensión de salida del encoder después de las fc
        self.n_heads = n_heads
        self.attention_dim = attention_dim  # Es la dimensión para dq/dk/dv de multihead attention
        self.activation = "relu"
        self.dropout = dropout
        super(SentenceEncoderBlock, self).__init__(**kwargs)

    def build(self, input_shape):

        # "Two linear transformations with a ReLU activation in between" #
        self.dense_1 = Dense(self.output_dim, activation=self.activation)
        self.dense_1.build(input_shape)
        self._trainable_weights += self.dense_1.trainable_weights

        self.dense_2 = Dense(self.output_dim)
        self.dense_2.build(input_shape)
        self._trainable_weights += self.dense_2.trainable_weights

        # MultiHeadAttention #
        self.multihead_attention = MultiHeadAttention(self.attention_dim,
                                                      self.n_heads)
        self.multihead_attention.build(input_shape)
        self._trainable_weights += self.multihead_attention.trainable_weights

        # LayerNorm #
        self.layer_normalization = LayerNormalization()
        self.layer_normalization.build(input_shape)
        self._trainable_weights += self.layer_normalization.trainable_weights

        super(SentenceEncoderBlock, self).build(input_shape)

    def compute_mask(self, inputs, mask=None):
        # Just pass the received mask from previous layer, to the next layer
        return mask

    def call(self, x, mask=None):

        z, all_attns = self.multihead_attention(x)
        z = K.dropout(z, self.dropout)
        xz = self.layer_normalization(x + z)
        h_xz = self.dense_1(xz)
        h_xz = self.dense_2(h_xz)
        h_xz = K.dropout(h_xz, self.dropout)
        h_xz = self.layer_normalization(h_xz + xz)
        return [h_xz, all_attns]

    def compute_output_shape(self, input_shape):
        return [(input_shape[0], input_shape[1], self.output_dim),
                (input_shape[0], self.n_heads, input_shape[1], input_shape[1])]
コード例 #13
0
ファイル: ntm.py プロジェクト: text-machine-lab/entity-coref
def get_dense_controller(controller_output_dim,
                         controller_input_dim,
                         activation='relu',
                         batch_size=1):
    controller = Dense(controller_output_dim,
                       activation=activation,
                       batch_input_shape=(batch_size, controller_input_dim))
    controller.build(input_shape=(batch_size, controller_input_dim))

    return controller
コード例 #14
0
ファイル: ttfs.py プロジェクト: wang678/snn_toolbox
    def build(self, input_shape):
        """Creates the layer neurons and connections.

        Parameters
        ----------

        input_shape: Union[list, tuple, Any]
            Keras tensor (future input to layer) or list/tuple of Keras tensors
            to reference for weight shape computations.
        """

        Dense.build(self, input_shape)
        self.init_neurons(input_shape)
コード例 #15
0
class SpanEnd(Layer):
    def __init__(self, **kwargs):
        super(SpanEnd, self).__init__(**kwargs)

    def build(self, input_shape):
        emdim = input_shape[0][-1] // 2
        input_shape_bilstm_1 = input_shape[0][:-1] + (emdim * 14, )
        self.bilstm_1 = Bidirectional(LSTM(emdim, return_sequences=True))
        self.bilstm_1.build(input_shape_bilstm_1)
        input_shape_dense_1 = input_shape[0][:-1] + (emdim * 10, )
        self.dense_1 = Dense(units=1)
        self.dense_1.build(input_shape_dense_1)
        self.trainable_weights = self.bilstm_1.trainable_weights + self.dense_1.trainable_weights
        super(SpanEnd, self).build(input_shape)

    def call(self, inputs):
        encoded_passage, merged_context, modeled_passage, span_begin_probabilities = inputs
        weighted_sum = K.sum(
            K.expand_dims(span_begin_probabilities, axis=-1) * modeled_passage,
            -2)
        passage_weighted_by_predicted_span = K.expand_dims(weighted_sum,
                                                           axis=1)
        tile_shape = K.concatenate([[1], [K.shape(encoded_passage)[1]], [1]],
                                   axis=0)
        passage_weighted_by_predicted_span = K.tile(
            passage_weighted_by_predicted_span, tile_shape)
        multiply1 = modeled_passage * passage_weighted_by_predicted_span
        span_end_representation = K.concatenate([
            merged_context, modeled_passage,
            passage_weighted_by_predicted_span, multiply1
        ])

        span_end_representation = self.bilstm_1(span_end_representation)

        span_end_input = K.concatenate(
            [merged_context, span_end_representation])

        span_end_weights = TimeDistributed(self.dense_1)(span_end_input)

        span_end_probabilities = Softmax()(K.squeeze(span_end_weights,
                                                     axis=-1))
        return span_end_probabilities

    def compute_output_shape(self, input_shape):
        _, merged_context_shape, _, _ = input_shape
        return merged_context_shape[:-1]

    def get_config(self):
        config = super().get_config()
        return config
コード例 #16
0
def dense_to_deeper_layer(dense_layer):
    """Get deeper layer for dense layer

    Args:
        dense_layer: the dense layer from which we get deeper layer

    Returns:
        The deeper dense layer
    """
    units = dense_layer.units
    weight = np.eye(units)
    bias = np.zeros(units)
    new_dense_layer = Dense(units, activation='relu')
    new_dense_layer.build((None, units))
    new_dense_layer.set_weights((weight, bias))
    return new_dense_layer
コード例 #17
0
    def build(self, input_shape):
        """Creates the layer neurons and connections.

        Parameters
        ----------

        input_shape: Union[list, tuple, Any]
            Keras tensor (future input to layer) or list/tuple of Keras tensors
            to reference for weight shape computations.
        """

        Dense.build(self, input_shape)
        self.init_neurons(input_shape)

        if self.config.getboolean('cell', 'bias_relaxation'):
            self.b0 = k.variable(k.get_value(self.bias))
            self.add_update([(self.bias, self.update_b())])
コード例 #18
0
class Highway(Layer):
    """
      codes from github: https://github.com/batikim09/Keras_highways/blob/master/src/conv2d_highway.py
    """
    activation = None
    transform_gate_bias = None

    def __init__(self, activation='relu', transform_gate_bias=-2, **kwargs):
        self.activation = activation
        self.transform_gate_bias = transform_gate_bias
        super(Highway, self).__init__(**kwargs)

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        dim = input_shape[-1]
        self.dense_1 = Dense(units=dim,
                             bias_initializer=Constant(
                                 self.transform_gate_bias))
        self.dense_1.build(input_shape)
        self.dense_2 = Dense(units=dim)
        self.dense_2.build(input_shape)
        self.trainable_weights = self.dense_1.trainable_weights + self.dense_2.trainable_weights
        super(Highway,
              self).build(input_shape)  # Be sure to call this at the end

    def call(self, x):
        dim = K.int_shape(x)[-1]
        transform_gate = self.dense_1(x)
        transform_gate = Activation("sigmoid")(transform_gate)
        carry_gate = Lambda(lambda x: 1.0 - x,
                            output_shape=(dim, ))(transform_gate)
        transformed_data = self.dense_2(x)
        transformed_data = Activation(self.activation)(transformed_data)
        transformed_gated = Multiply()([transform_gate, transformed_data])
        identity_gated = Multiply()([carry_gate, x])
        value = Add()([transformed_gated, identity_gated])
        return value

    def compute_output_shape(self, input_shape):
        return input_shape

    def get_config(self):
        config = super().get_config()
        config['activation'] = self.activation
        config['transform_gate_bias'] = self.transform_gate_bias
        return config
コード例 #19
0
class Discriminator(object):
    def __init__(self, x_k, n_steps, hidden_dim):
        self.x_k = x_k
        self.hidden_dim = hidden_dim
        constraint = lambda: ClipConstraint(1e-2)
        self.lstm = LSTM(hidden_dim)
        self.lstm.build((None, n_steps, 1))
        for w in self.lstm.trainable_weights:
            # print("Weight: {}".format(w))
            self.lstm.constraints[w] = constraint()
        self.dense = Dense(1, W_constraint=constraint())
        self.dense.build((None, hidden_dim))
        self.weights = self.lstm.trainable_weights + self.dense.trainable_weights
        self.constraints = self.lstm.constraints.copy()
        self.constraints.update(self.dense.constraints)
        # print("Constraints: {}".format(self.constraints))

    def call(self, x):
        return self.dense.call(self.lstm.call(x))
コード例 #20
0
class BERTSelfOutput(Layer):
    def __init__(self, config, **kwargs):
        self.trainable = False
        super().__init__(**kwargs)
        self.config = config
        self.dense = Dense(input_shape=(self.config.hidden_size, ),
                           units=self.config.hidden_size,
                           trainable=False)
        self.LayerNorm = BERTLayerNorm(self.config, trainable=False)
        self.dropout = Dropout(self.config.hidden_dropout_prob,
                               trainable=False)

    def build(self, input_shape):

        if isinstance(input_shape, tuple) and input_shape[0] is None:
            dense_input_shape = (self.config.hidden_size, input_shape[1])
        else:
            dense_input_shape = (self.config.hidden_size, input_shape)

        self.dense.build(dense_input_shape)

        self.LayerNorm.build(self.config.hidden_size)
        self.dropout.build(self.config.hidden_size)
        super(BERTSelfOutput, self).build(input_shape)

    def call(self, x, **kwargs):
        input_tensor, hidden_states = x

        original_shape = hidden_states.shape
        hidden_states_r = K.reshape(hidden_states,
                                    (-1, hidden_states.shape[-1]))

        hidden_states = self.dense(hidden_states_r)
        hidden_states = self.dropout(hidden_states)

        hidden_states_r = K.reshape(hidden_states,
                                    (-1, original_shape[1], original_shape[2]))
        hidden_states = self.LayerNorm(hidden_states_r + input_tensor)
        return hidden_states
コード例 #21
0
ファイル: layers.py プロジェクト: gajanlee/V-net
class SpanBegin(Layer):
    
    def __init__(self, **kwargs):
        super(SpanBegin, self).__init__(**kwargs)

    def build(self, input_shape):
        # input_shape: (None, 200, embeddim*8+embeddim*2)
        self.dense_1 = Dense(units=1)
        self.dense_1.build((input_shape[0], input_shape[-1]))
        self.trainable_weights = self.dense_1.trainable_weights
        super(SpanBegin, self).build(input_shape)

    def call(self, span_begin_input):
        span_begin_weights = TimeDistributed(self.dense_1)(span_begin_input)
        span_begin_probabilities = Softmax()(K.squeeze(span_begin_weights, axis=-1))
        return span_begin_probabilities

    def compute_output_shape(self, input_shape):
        return input_shape[:-1]

    def get_config(self):
        config = super().get_config()
        return config
コード例 #22
0
ファイル: layers.py プロジェクト: gajanlee/V-net
class SpanEnd(Layer):
    
    def __init__(self, **kwargs):
        super(SpanEnd, self).__init__(**kwargs)

    def build(self, input_shape):
        input_shape_dense_1 = (input_shape[0], embedding_dim*10)
        self.dense_1 = Dense(units=1)
        self.dense_1.build(input_shape_dense_1)
        self.trainable_weights = self.dense_1.trainable_weights
        super(SpanEnd, self).build(input_shape)

    def call(self, span_end_input):
        span_end_weights = TimeDistributed(self.dense_1)(span_end_input)

        span_end_probabilities = Softmax()(K.squeeze(span_end_weights, axis=-1))
        return span_end_probabilities

    def compute_output_shape(self, input_shape):
        return input_shape[:-1]

    def get_config(self):
        config = super().get_config()
        return config
コード例 #23
0
class MDN(Layer):
    """A Mixture Density Network Layer for Keras.
    This layer has a few tricks to avoid NaNs in the loss function when training:
        - Activation for variances is ELU + 1 + 1e-8 (to avoid very small values)
        - Mixture weights (pi) are trained in as logits, not in the softmax space.

    A loss function needs to be constructed with the same output dimension and number of mixtures.
    A sampling function is also provided to sample from distribution parametrised by the MDN outputs.
    """
    def __init__(self, output_dimension, num_mixtures, **kwargs):
        self.output_dim = output_dimension
        self.num_mix = num_mixtures
        with tf.name_scope('MDN'):
            self.mdn_mus = Dense(
                self.num_mix * self.output_dim,
                activation=elu_plus_one_plus_epsilon,
                name='mdn_mus')  # mix*output vals, exp activation
            self.mdn_sigmas = Dense(
                self.num_mix * self.output_dim,
                activation=elu_plus_one_plus_epsilon,
                name='mdn_sigmas')  # mix*output vals exp activation
            self.mdn_pi = Dense(
                self.num_mix, name='mdn_pi',
                activation='softmax')  # mix vals, softmax activation
        super(MDN, self).__init__(**kwargs)

    def build(self, input_shape):
        self.mdn_mus.build(input_shape)
        self.mdn_sigmas.build(input_shape)
        self.mdn_pi.build(input_shape)
        self.trainable_weights = self.mdn_mus.trainable_weights + self.mdn_sigmas.trainable_weights + self.mdn_pi.trainable_weights
        self.non_trainable_weights = self.mdn_mus.non_trainable_weights + self.mdn_sigmas.non_trainable_weights + self.mdn_pi.non_trainable_weights
        super(MDN, self).build(input_shape)

    def call(self, x, mask=None):
        with tf.name_scope('MDN'):
            mdn_out = keras.layers.concatenate(
                [self.mdn_mus(x),
                 self.mdn_sigmas(x),
                 self.mdn_pi(x)],
                name='mdn_outputs')
        return mdn_out

    def compute_output_shape(self, input_shape):
        """Returns output shape, showing the number of mixture parameters."""
        return (input_shape[0],
                (2 * self.output_dim * self.num_mix) + self.num_mix)

    def get_config(self):
        config = {
            "output_dimension": self.output_dim,
            "num_mixtures": self.num_mix
        }
        base_config = super(MDN, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
コード例 #24
0
ファイル: mdn.py プロジェクト: YaleDHLab/dancing-with-data
class MDN(Layer):
    def __init__(self, output_dim, num_mixes, kernel='unigaussian', **kwargs):
        self.output_dim = output_dim
        self.kernel = kernel
        self.num_mixes = num_mixes

        with tf.name_scope('MDN'):
            self.mdn_mus = Dense(self.num_mixes * self.output_dim,
                                 name='mdn_mus')
            self.mdn_sigmas = Dense(self.num_mixes,
                                    activation=K.exp,
                                    name='mdn_sigmas')
            self.mdn_alphas = Dense(self.num_mixes,
                                    activation=K.softmax,
                                    name='mdn_alphas')
        super(MDN, self).__init__(**kwargs)

    def build(self, input_shape):
        self.mdn_mus.build(input_shape)
        self.mdn_sigmas.build(input_shape)
        self.mdn_alphas.build(input_shape)
        self.trainable_weights = self.mdn_mus.trainable_weights + \
          self.mdn_sigmas.trainable_weights + \
          self.mdn_alphas.trainable_weights
        self.non_trainable_weights = self.mdn_mus.non_trainable_weights + \
          self.mdn_sigmas.non_trainable_weights + \
          self.mdn_alphas.non_trainable_weights
        self.built = True

    def call(self, x, mask=None):
        with tf.name_scope('MDN'):
            mdn_out = keras.layers.concatenate(
                [self.mdn_mus(x),
                 self.mdn_sigmas(x),
                 self.mdn_alphas(x)],
                name='mdn_outputs')
        return mdn_out

    def get_output_shape_for(self, input_shape):
        return (input_shape[0], self.output_dim)

    def get_config(self):
        config = {
            'output_dim': self.output_dim,
            'num_mixes': self.num_mixes,
            'kernel': self.kernel
        }
        base_config = super(MDN, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

    def get_loss_func(self):
        def unigaussian_loss(y_true, y_pred):
            mix = tf.range(start=0, limit=self.num_mixes)
            out_mu, out_sigma, out_alphas = tf.split(
                y_pred,
                num_or_size_splits=[
                    self.num_mixes * self.output_dim, self.num_mixes,
                    self.num_mixes
                ],
                axis=-1,
                name='mdn_coef_split')

            def loss_i(i):
                batch_size = tf.shape(out_sigma)[0]
                sigma_i = tf.slice(out_sigma, [0, i], [batch_size, 1],
                                   name='mdn_sigma_slice')
                alpha_i = tf.slice(out_alphas, [0, i], [batch_size, 1],
                                   name='mdn_alpha_slice')
                mu_i = tf.slice(out_mu, [0, i * self.output_dim],
                                [batch_size, self.output_dim],
                                name='mdn_mu_slice')
                dist = tf.distributions.Normal(loc=mu_i, scale=sigma_i)
                loss = dist.prob(
                    y_true)  # find the pdf around each value in y_true
                loss = alpha_i * loss
                return loss

            result = tf.map_fn(lambda m: loss_i(m),
                               mix,
                               dtype=tf.float32,
                               name='mix_map_fn')
            result = tf.reduce_sum(result, axis=0, keep_dims=False)
            result = -tf.log(result)
            result = tf.reduce_mean(result)
            return result

        if self.kernel == 'unigaussian':
            with tf.name_scope('MDNLayer'):
                return unigaussian_loss
コード例 #25
0
class NSE(Layer):
    '''
    Simple Neural Semantic Encoder.
    '''
    def __init__(self,
                 output_dim,
                 input_length=None,
                 composer_activation='linear',
                 return_mode='last_output',
                 weights=None,
                 **kwargs):
        '''
        Arguments:
        output_dim (int)
        input_length (int)
        composer_activation (str): activation used in the MLP
        return_mode (str): One of last_output, all_outputs, output_and_memory
            This is analogous to the return_sequences flag in Keras' Recurrent.
            last_output returns only the last h_t
            all_outputs returns the whole sequence of h_ts
            output_and_memory returns the last output and the last memory concatenated
                (needed if this layer is followed by a MMA-NSE)
        weights (list): Initial weights
        '''
        self.output_dim = output_dim
        self.input_dim = output_dim  # Equation 2 in the paper makes this assumption.
        self.initial_weights = weights
        self.input_spec = [InputSpec(ndim=3)]
        self.input_length = input_length
        self.composer_activation = composer_activation
        super(NSE, self).__init__(**kwargs)
        self.reader = LSTM(self.output_dim,
                           return_sequences=True,
                           name="{}_reader".format(self.name))
        # TODO: Let the writer use parameter dropout and any consume_less mode.
        # Setting dropout to 0 here to eliminate the need for constants.
        # Setting consume_less to mem to eliminate need for preprocessing
        self.writer = LSTM(self.output_dim,
                           dropout_W=0.0,
                           dropout_U=0.0,
                           consume_less="mem",
                           name="{}_writer".format(self.name))
        self.composer = Dense(self.output_dim * 2,
                              activation=self.composer_activation,
                              name="{}_composer".format(self.name))
        if return_mode not in [
                "last_output", "all_outputs", "output_and_memory"
        ]:
            raise Exception("Unrecognized return mode: %s" % (return_mode))
        self.return_mode = return_mode

    def get_output_shape_for(self, input_shape):
        input_length = input_shape[1]
        if self.return_mode == "last_output":
            return (input_shape[0], self.output_dim)
        elif self.return_mode == "all_outputs":
            return (input_shape[0], input_length, self.output_dim)
        else:
            # return_mode is output_and_memory. Output will be concatenated to memory.
            return (input_shape[0], input_length + 1, self.output_dim)

    def compute_mask(self, input, mask):
        if mask is None or self.return_mode == "last_output":
            return None
        elif self.return_mode == "all_outputs":
            return mask  # (batch_size, input_length)
        else:
            # Return mode is output_and_memory
            # Mask memory corresponding to all the inputs that are masked, and do not mask the output
            # (batch_size, input_length + 1)
            return K.cast(K.concatenate([K.zeros_like(mask[:, :1]), mask]),
                          'uint8')

    def get_composer_input_shape(self, input_shape):
        # Takes concatenation of output and memory summary
        return (input_shape[0], self.output_dim * 2)

    def get_reader_input_shape(self, input_shape):
        return input_shape

    def build(self, input_shape):
        self.input_spec = [InputSpec(shape=input_shape)]
        input_dim = input_shape[-1]
        assert self.reader.return_sequences, "The reader has to return sequences!"
        reader_input_shape = self.get_reader_input_shape(input_shape)
        print >> sys.stderr, "NSE reader input shape:", reader_input_shape
        writer_input_shape = (input_shape[0], 1, self.output_dim * 2
                              )  # Will process one timestep at a time
        print >> sys.stderr, "NSE writer input shape:", writer_input_shape
        composer_input_shape = self.get_composer_input_shape(input_shape)
        print >> sys.stderr, "NSE composer input shape:", composer_input_shape
        self.reader.build(reader_input_shape)
        self.writer.build(writer_input_shape)
        self.composer.build(composer_input_shape)

        # Aggregate weights of individual components for this layer.
        reader_weights = self.reader.trainable_weights
        writer_weights = self.writer.trainable_weights
        composer_weights = self.composer.trainable_weights
        self.trainable_weights = reader_weights + writer_weights + composer_weights

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights

    def read(self, nse_input, input_mask=None):
        '''
        This method produces the 'read' output (equation 1 in the paper) for all timesteps
        and initializes the memory slot mem_0.

        Input: nse_input (batch_size, input_length, input_dim)
        Outputs:
            o (batch_size, input_length, output_dim)
            flattened_mem_0 (batch_size, input_length * output_dim)
 
        While this method simply copies input to mem_0, variants that inherit from this class can do
        something fancier.
        '''
        input_to_read = nse_input
        mem_0 = input_to_read
        flattened_mem_0 = K.batch_flatten(mem_0)
        o = self.reader.call(input_to_read, input_mask)
        o_mask = self.reader.compute_mask(input_to_read, input_mask)
        return o, [flattened_mem_0], o_mask

    @staticmethod
    def summarize_memory(o_t, mem_tm1):
        '''
        This method selects the relevant parts of the memory given the read output and summarizes the
        memory. Implements Equations 2-3 or 8-11 in the paper.
        '''
        # Selecting relevant memory slots, Equation 2
        z_t = K.softmax(K.sum(K.expand_dims(o_t, dim=1) * mem_tm1,
                              axis=2))  # (batch_size, input_length)
        # Summarizing memory, Equation 3
        m_rt = K.sum(K.expand_dims(z_t, dim=2) * mem_tm1,
                     axis=1)  # (batch_size, output_dim)
        return z_t, m_rt

    def compose_memory_and_output(self, output_memory_list):
        '''
        This method takes a list of tensors and applies the composition function on their concatrnation.
        Implements equation 4 or 12 in the paper.
        '''
        # Composition, Equation 4
        c_t = self.composer.call(
            K.concatenate(output_memory_list))  # (batch_size, output_dim)
        return c_t

    def update_memory(self, z_t, h_t, mem_tm1):
        '''
        This method takes the attention vector (z_t), writer output (h_t) and previous timestep's memory (mem_tm1)
        and updates the memory. Implements equations 6, 14 or 15.
        '''
        tiled_z_t = K.tile(
            K.expand_dims(z_t),
            (self.output_dim))  # (batch_size, input_length, output_dim)
        input_length = K.shape(mem_tm1)[1]
        # (batch_size, input_length, output_dim)
        tiled_h_t = K.permute_dimensions(
            K.tile(K.expand_dims(h_t), (input_length)), (0, 2, 1))
        # Updating memory. First term in summation corresponds to selective forgetting and the second term to
        # selective addition. Equation 6.
        mem_t = mem_tm1 * (
            1 - tiled_z_t
        ) + tiled_h_t * tiled_z_t  # (batch_size, input_length, output_dim)
        return mem_t

    def compose_and_write_step(self, o_t, states):
        '''
        This method is a step function that updates the memory at each time step and produces
        a new output vector (Equations 2 to 6 in the paper).
        The memory_state is flattened because K.rnn requires all states to be of the same shape as the output,
        because it uses the same mask for the output and the states.
        Inputs:
            o_t (batch_size, output_dim)
            states (list[Tensor])
                flattened_mem_tm1 (batch_size, input_length * output_dim)
                writer_h_tm1 (batch_size, output_dim)
                writer_c_tm1 (batch_size, output_dim)

        Outputs:
            h_t (batch_size, output_dim)
            flattened_mem_t (batch_size, input_length * output_dim)
        '''
        flattened_mem_tm1, writer_h_tm1, writer_c_tm1 = states
        input_mem_shape = K.shape(flattened_mem_tm1)
        mem_tm1_shape = (input_mem_shape[0],
                         input_mem_shape[1] / self.output_dim, self.output_dim)
        mem_tm1 = K.reshape(
            flattened_mem_tm1,
            mem_tm1_shape)  # (batch_size, input_length, output_dim)
        z_t, m_rt = self.summarize_memory(o_t, mem_tm1)
        c_t = self.compose_memory_and_output([o_t, m_rt])
        # Collecting the necessary variables to directly call writer's step function.
        writer_constants = self.writer.get_constants(
            c_t)  # returns dropouts for W and U (all 1s, see init)
        writer_states = [writer_h_tm1, writer_c_tm1] + writer_constants
        # Making a call to writer's step function, Equation 5
        h_t, [_, writer_c_t] = self.writer.step(
            c_t, writer_states)  # h_t, writer_c_t: (batch_size, output_dim)
        mem_t = self.update_memory(z_t, h_t, mem_tm1)
        flattened_mem_t = K.batch_flatten(mem_t)
        return h_t, [flattened_mem_t, h_t, writer_c_t]

    def call(self, x, mask=None):
        # input_shape = (batch_size, input_length, input_dim). This needs to be defined in build.
        read_output, initial_memory_states, output_mask = self.read(x, mask)
        initial_write_states = self.writer.get_initial_states(
            read_output)  # h_0 and c_0 of the writer LSTM
        initial_states = initial_memory_states + initial_write_states
        # last_output: (batch_size, output_dim)
        # all_outputs: (batch_size, input_length, output_dim)
        # last_states:
        #       last_memory_state: (batch_size, input_length, output_dim)
        #       last_output
        #       last_writer_ct
        last_output, all_outputs, last_states = K.rnn(
            self.compose_and_write_step,
            read_output,
            initial_states,
            mask=output_mask)
        last_memory = last_states[0]
        if self.return_mode == "last_output":
            return last_output
        elif self.return_mode == "all_outputs":
            return all_outputs
        else:
            # return mode is output_and_memory
            expanded_last_output = K.expand_dims(
                last_output, dim=1)  # (batch_size, 1, output_dim)
            # (batch_size, 1+input_length, output_dim)
            return K.concatenate([expanded_last_output, last_memory], axis=1)

    def get_config(self):
        config = {
            'output_dim': self.output_dim,
            'input_length': self.input_length,
            'composer_activation': self.composer_activation,
            'return_mode': self.return_mode
        }
        base_config = super(NSE, self).get_config()
        config.update(base_config)
        return config
コード例 #26
0
def build_model(max_length: int,
                embedding_matrix: Union[np.ndarray, Tuple[int]],
                transformer_depth: int,
                transformer_heads: int,
                filters: List[int],
                kernel_size: List[int],
                pool_size: List[int],
                conv_padding: str,
                pool_padding: str,
                dense_size: List[int],
                loaded_model: Optional[str] = None,
                fine_tune_model: bool = False,
                l2_penalty: Optional[float] = None,
                embedding_dropout: float = 0.6,
                transformer_dropout: float = 0.1,
                conv_dropout: float = 0.1,
                dense_dropout: Union[float, List[float]] = 0.3,
                classifier_dropout: float = 0.1,
                train_lm=True) -> Model:

    if not (len(filters) > 0 and len(kernel_size) > 0 and len(pool_size) > 0):
        logger.error(
            "There are no filters, kernel sizes or pool sizes specified for the CNN."
        )
        raise ValueError(
            "There are no filters, kernel sizes or pool sizes specified for the CNN."
        )

    if type(dense_dropout) != list:
        dense_dropout = [dense_dropout]

    if len(dense_size) > 0 and len(dense_size) != len(dense_dropout):
        max_list_length = max([len(dense_size), len(dense_dropout)])
        new_dense_size = []
        new_dense_dropout = []
        for i in range(max_list_length):
            new_dense_size.append(
                dense_size[i] if i < len(dense_size) else dense_size[-1])
            new_dense_dropout.append(dense_dropout[i] if i < len(dense_dropout)
                                     else dense_dropout[-1])
        dense_size = new_dense_size
        dense_dropout = new_dense_dropout
        logger.warning(
            "Lists given for dense layer sizes and dense layer dropout rates are not the same length. "
            "The shorter lists are padded using the last value to match the length of the longest."
        )

    if len(filters) != len(kernel_size) or len(filters) != len(
            pool_size) or len(kernel_size) != len(pool_size):
        max_list_length = max([len(filters), len(kernel_size), len(pool_size)])
        new_filters = []
        new_kernel_size = []
        new_pool_size = []
        for i in range(max_list_length):
            new_filters.append(filters[i] if i < len(filters) else filters[-1])
            new_kernel_size.append(
                kernel_size[i] if i < len(kernel_size) else kernel_size[-1])
            new_pool_size.append(
                pool_size[i] if i < len(pool_size) else pool_size[-1])
        filters = new_filters
        kernel_size = new_kernel_size
        pool_size = new_pool_size
        logger.warning(
            "Lists given for convolutional filters, kernel sizes and pooling sizes had different lengths. "
            "The shorter lists are padded using the last value to match the length of the longest."
        )

    original_model = None
    if loaded_model:
        # load the specified model
        original_model = load_model(loaded_model,
                                    custom_objects={
                                        "perplexity": perplexity,
                                        "lm_accuracy": lm_accuracy
                                    })

    # regularizer for embedding layer
    l2_regularizer = l2(l2_penalty) if l2_penalty else None

    # input encoded as integers
    raw_input = Input(shape=(max_length, ), name="input")

    # embedding layer, initialised with embedding matrix weights for now
    embedding_weights = [
        original_model.get_layer(name="word_embedding").get_weights()[0]
        if loaded_model else embedding_matrix
    ]
    embedding_layer = ReusableEmbedding(
        input_dim=(embedding_matrix[0] if type(embedding_matrix) == tuple else
                   embedding_matrix.shape[0]),
        output_dim=(embedding_matrix[1] if type(embedding_matrix) == tuple else
                    embedding_matrix.shape[1]),
        input_length=max_length,
        name="word_embedding",
        weights=(None if type(embedding_matrix) == tuple and not loaded_model
                 else embedding_weights),
        embeddings_regularizer=l2_regularizer)

    # "transpose" of embedding matrix to map back to vocabulary
    if loaded_model:
        output_weights = original_model.get_layer(
            name="word_prediction_logits").get_weights()
        output_layer = TiedOutputEmbedding(
            projection_regularizer=l2_regularizer,
            projection_dropout=embedding_dropout,
            name="word_prediction_logits",
            weights=output_weights)
    else:
        output_layer = TiedOutputEmbedding(
            projection_regularizer=l2_regularizer,
            projection_dropout=embedding_dropout,
            name="word_prediction_logits")

    # transformer as taken from here: https://github.com/kpot/keras-transformer/blob/master/example/models.py
    if loaded_model:
        position_weights = original_model.get_layer(
            name="position_embedding").get_weights()
        position_embedding = TransformerCoordinateEmbedding(
            max_transformer_depth=1,
            name="position_embedding",
            weights=position_weights)
    else:
        position_embedding = TransformerCoordinateEmbedding(
            max_transformer_depth=1, name="position_embedding")

    transformer_input, embedding_matrix = embedding_layer(raw_input)
    transformer_output = position_embedding(transformer_input, step=0)
    for i in range(transformer_depth):
        block_name = "transformer" + str(i)

        # define transformer block
        transformer_block = TransformerBlock(
            name=block_name,
            num_heads=transformer_heads,
            residual_dropout=transformer_dropout,
            attention_dropout=transformer_dropout,
            use_masking=True,
            vanilla_wiring=True)

        # build the layers in the block because apparently you have to do that
        if loaded_model:
            if i == 0:
                transformer_block.attention_layer.build(
                    original_model.get_layer(
                        "position_embedding").output_shape)
            else:
                transformer_block.attention_layer.build(
                    original_model.get_layer(
                        "transformer{}_normalization2".format(i -
                                                              1)).output_shape)
            transformer_block.norm1_layer.build(
                original_model.get_layer(block_name +
                                         "_self_attention").output_shape)
            transformer_block.norm2_layer.build(
                original_model.get_layer(block_name +
                                         "_normalization1").output_shape)
            transformer_block.transition_layer.build(
                original_model.get_layer(block_name +
                                         "_normalization1").output_shape)

            # set weights for all the contained layers manually
            transformer_block.attention_layer.set_weights(
                original_model.get_layer(
                    name=(block_name + "_self_attention")).get_weights())
            transformer_block.norm1_layer.set_weights(
                original_model.get_layer(
                    name=(block_name + "_normalization1")).get_weights())
            transformer_block.norm2_layer.set_weights(
                original_model.get_layer(
                    name=(block_name + "_normalization2")).get_weights())
            transformer_block.transition_layer.set_weights(
                original_model.get_layer(name=(block_name +
                                               "_transition")).get_weights())

        # pass output of last layer through transformer
        transformer_output = transformer_block(transformer_output)

    # nothing special to load for softmax
    softmax_layer = Softmax(name="word_predictions")
    lm_output_logits = output_layer([transformer_output, embedding_matrix])
    lm_output = softmax_layer(lm_output_logits)

    if not fine_tune_model:
        m = Model(inputs=raw_input, outputs=lm_output)
        return m

    loaded_layer_names = []
    if loaded_model:
        loaded_layer_names = [layer.name for layer in original_model.layers]

    # convolution layer(s)
    conv_dropout = Dropout(conv_dropout, name="conv_dropout")
    conv_output = transformer_output
    for i in range(len(filters)):
        # construct and possibly load convolutional layer
        conv_layer_name = "conv_{}".format(i)
        convolution = Conv1D(filters[i],
                             kernel_size[i],
                             padding=conv_padding,
                             activation="relu",
                             name=conv_layer_name)
        if loaded_model and conv_layer_name in loaded_layer_names:
            layer = original_model.get_layer(name=conv_layer_name)
            convolution.build(layer.input_shape)
            convolution.set_weights(layer.get_weights())

        # construct max pooling, no weights to load
        pooling = MaxPooling1D(pool_size[i],
                               padding=pool_padding,
                               name="max_pool_{}".format(i))

        # get output/input of next layer
        conv_output = pooling(convolution(conv_dropout(conv_output)))

    # dense layer(s)
    flatten = Flatten(name="flatten")
    dense_output = flatten(conv_output)
    for i in range(len(dense_size)):
        # construct and possibly load dense layer
        dense_layer_name = "dense_{}".format(i)
        dense = Dense(dense_size[i], name=dense_layer_name)
        if loaded_model and dense_layer_name in loaded_layer_names:
            layer = original_model.get_layer(name=dense_layer_name)
            dense.build(layer.input_shape)
            dense.set_weights(layer.get_weights())

        # nothing to load for dropout
        dropout = Dropout(rate=dense_dropout[i],
                          name="dense_dropout_{}".format(i))

        # get output
        dense_output = dense(dropout(dense_output))

    # classification layer
    classifier_dropout = Dropout(classifier_dropout, name="classifier_dropout")
    classifier = Dense(1, name="classifier")
    classifier_prediction = Activation("sigmoid", name="classifier_prediction")
    classifier_output = classifier_prediction(
        classifier(classifier_dropout(dense_output)))

    if train_lm:
        m = Model(inputs=raw_input, outputs=[lm_output, classifier_output])
    else:
        m = Model(inputs=raw_input, outputs=classifier_output)
    return m
コード例 #27
0
class AttLSTMCell(LSTMCell):
    """attention Cell class for the LSTM layer.
    implement type:Bahdanau
    # Arguments
        units: Positive integer, dimensionality of the output space.
        context:[batch_size, sentence_length, embedding_dim]
        input_len:length of final input
        activation: Activation function to use
            (see [activations](../activations.md)).
            Default: hyperbolic tangent (`tanh`).
            If you pass `None`, no activation is applied
            (ie. "linear" activation: `a(x) = x`).
        recurrent_activation: Activation function to use
            for the recurrent step
            (see [activations](../activations.md)).
            Default: hard sigmoid (`hard_sigmoid`).
            If you pass `None`, no activation is applied
            (ie. "linear" activation: `a(x) = x`).x
        use_bias: Boolean, whether the layer uses a bias vector.
        kernel_initializer: Initializer for the `kernel` weights matrix,
            used for the linear transformation of the inputs
            (see [initializers](../initializers.md)).
        recurrent_initializer: Initializer for the `recurrent_kernel`
            weights matrix,
            used for the linear transformation of the recurrent state
            (see [initializers](../initializers.md)).
        bias_initializer: Initializer for the bias vector
            (see [initializers](../initializers.md)).
        unit_forget_bias: Boolean.
            If True, add 1 to the bias of the forget gate at initialization.
            Setting it to true will also force `bias_initializer="zeros"`.
            This is recommended in [Jozefowicz et al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
        kernel_regularizer: Regularizer function applied to
            the `kernel` weights matrix
            (see [regularizer](../regularizers.md)).
        recurrent_regularizer: Regularizer function applied to
            the `recurrent_kernel` weights matrix
            (see [regularizer](../regularizers.md)).
        bias_regularizer: Regularizer function applied to the bias vector
            (see [regularizer](../regularizers.md)).
        kernel_constraint: Constraint function applied to
            the `kernel` weights matrix
            (see [constraints](../constraints.md)).
        recurrent_constraint: Constraint function applied to
            the `recurrent_kernel` weights matrix
            (see [constraints](../constraints.md)).
        bias_constraint: Constraint function applied to the bias vector
            (see [constraints](../constraints.md)).
        dropout: Float between 0 and 1.
            Fraction of the units to drop for
            the linear transformation of the inputs.
        recurrent_dropout: Float between 0 and 1.
            Fraction of the units to drop for
            the linear transformation of the recurrent state.
        implementation: Implementation mode, either 1 or 2.
            Mode 1 will structure its operations as a larger number of
            smaller dot products and additions, whereas mode 2 will
            batch them into fewer, larger operations. These modes will
            have different performance profiles on different hardware and
            for different applications.
    """
    def __init__(self,
                 units,
                 context,
                 context_length,
                 att_hidden_size,
                 att_type="bahdanau",
                 activation='tanh',
                 recurrent_activation='hard_sigmoid',
                 use_bias=True,
                 kernel_initializer='glorot_uniform',
                 recurrent_initializer='orthogonal',
                 bias_initializer='zeros',
                 unit_forget_bias=True,
                 kernel_regularizer=None,
                 recurrent_regularizer=None,
                 bias_regularizer=None,
                 kernel_constraint=None,
                 recurrent_constraint=None,
                 bias_constraint=None,
                 dropout=0.,
                 recurrent_dropout=0.,
                 implementation=1,
                 **kwargs):

        super(AttLSTMCell,
              self).__init__(units=units,
                             activation=activation,
                             recurrent_activation=recurrent_activation,
                             use_bias=use_bias,
                             kernel_initializer=kernel_initializer,
                             recurrent_initializer=recurrent_initializer,
                             bias_initializer=bias_initializer,
                             unit_forget_bias=unit_forget_bias,
                             kernel_regularizer=kernel_regularizer,
                             recurrent_regularizer=recurrent_regularizer,
                             bias_regularizer=bias_regularizer,
                             kernel_constraint=kernel_constraint,
                             recurrent_constraint=recurrent_constraint,
                             bias_constraint=bias_constraint,
                             dropout=dropout,
                             recurrent_dropout=recurrent_dropout,
                             implementation=implementation,
                             **kwargs)

        self.context = context
        self.context_length = context_length
        self.att_hidden_size = att_hidden_size
        self.att_type = att_type

    def reset_states(self):
        self.att_hidden_layer.reset_states()
        self.att_output_layer.reset_states()
        super(AttLSTMCell, self).reset_states()

#     def reuse(self, layer, *args, **kwargs):
#         if not layer.built:
#             if len(args) > 0:
#                 inputs = args[0]
#             else:
#                 inputs = kwargs['inputs']
#             if isinstance(inputs, list):
#                 input_shape = [K.int_shape(x) for x in inputs]
#             else:
#                 input_shape = K.int_shape(inputs)
#             layer.build(input_shape)
#         outputs = layer.call(*args, **kwargs)
#         for w in layer.trainable_weights:
#             if w not in self._trainable_weights:
#                 self._trainable_weights.append(w)
#         for w in layer.non_trainable_weights:
#             if w not in self._non_trainable_weights:
#                 self._non_trainable_weights.append(w)
#         for u in layer.updates:
#             if not hasattr(self, '_updates'):
#                 self._updates = []
#             if u not in self._updates:
#                 self._updates.append(u)
#         return outputs

    def build(self, input_shape):

        att_input_size = input_shape[-1] + self.units

        #         self.kernel_att_hidden = self.add_weight(shape=(att_input_size, self.att_hidden_size),
        #                                   name='kernel_att',
        #                                   initializer=self.kernel_initializer,
        #                                   regularizer=self.kernel_regularizer,
        #                                   constraint=self.kernel_constraint)
        #
        #         self.kernel_att_out = self.add_weight(shape=(self.att_hidden_size, self.context_length),
        #                                   name='kernel_att',
        #                                   initializer=self.kernel_initializer,
        #                                   regularizer=self.kernel_regularizer,
        #                                   constraint=self.kernel_constraint)
        #         self.att_input = Input()
        self.att_hidden_layer = Dense(self.att_hidden_size,
                                      activation='relu',
                                      name="att_hidden_layer")
        self.att_output_layer = Dense(self.context_length,
                                      activation='softmax',
                                      name="att_output_layer")
        self.att_reshape_layer = Reshape(
            (self.context_length * self.att_hidden_size, ))

        with K.name_scope(self.att_hidden_layer.name):
            self.att_hidden_layer.build(input_shape)

        with K.name_scope(self.att_output_layer.name):
            self.att_output_layer.build(input_shape)

        with K.name_scope(self.att_reshape_layer.name):
            self.att_reshape_layer.build(input_shape)

        super(AttLSTMCell, self).build(input_shape=input_shape)
        self.built = True

    def attention(self, inputs, s_tm1):
        """
        :param inputs:[batch_size, input_dim]
        :param s_tm1:[batch_size, units_num]
        imposing attention on encoder embedding
        weights=attention([input1, input2,...], c(t-1))
        input = inputs * weights
        """

        if self.att_type == "bahdanau":

            x = self.attention_bahdanau(self.context, s_tm1)
        elif self.att_type == "cosine":
            x = self.attention_cosine(self.context, s_tm1)
        else:
            raise ("unsupported attention type:{}".format(self.att_type))
#         tf.summary.histogram("summary_name", weights)
        return x

    def attention_bahdanau(self, context, s_tm1):
        s_tm1_seq = K.repeat(s_tm1, self.context_length)
        att_x = K.concatenate([context, s_tm1_seq])

        att_hidden = self.att_hidden_layer(att_x)
        att_hidden = self.att_reshape_layer(att_hidden)
        weights = self.att_output_layer(att_hidden)
        x = K.batch_dot(weights, context, axes=[1, 1])
        return x

    def attention_cosine(self, context, s_tm1):

        return x

    def call(self, inputs, states, training=None):
        '''
        inputs: [batch_size, embedding_dim]
        states: [h(t-1), c(t-1)]
        '''
        s_tm1 = states[0]
        inputs = self.attention(inputs, s_tm1)
        return super(AttLSTMCell, self).call(inputs, states, training)

    def get_weights(self):
        return self.att_hidden_layer.get_weights(
        ) + self.att_output_layer.get_weights()

    @property
    def trainable_weights(self):
        weights = []

        if hasattr(super(AttLSTMCell, self), 'trainable_weights'):
            weights += super(AttLSTMCell, self).trainable_weights

        if hasattr(self.att_hidden_layer, 'trainable_weights'):
            weights += self.att_hidden_layer.trainable_weights

        if hasattr(self.att_output_layer, 'trainable_weights'):
            weights += self.att_output_layer.trainable_weights
        return weights

    @property
    def non_trainable_weights(self):
        weights = []
        if hasattr(super(AttLSTMCell, self), 'non_trainable_weights'):
            weights += super(AttLSTMCell, self).non_trainable_weights

        if hasattr(self.att_hidden_layer, 'non_trainable_weights'):
            weights += self.att_hidden_layer.non_trainable_weights

        if hasattr(self.att_output_layer, 'non_trainable_weights'):
            weights += self.att_output_layer.non_trainable_weights
        return weights

    @property
    def updates(self):
        updates = []

        if hasattr(super(AttLSTMCell, self), 'updates'):
            weights += super(AttLSTMCell, self).updates

        if hasattr(self.att_hidden_layer, 'updates'):
            updates += self.att_hidden_layer.updates

        if hasattr(self.att_output_layer, 'updates'):
            updates += self.att_output_layer.updates
        return updates

    def get_config(self):
        config = {
            'input_lengths': self.input_lengths,
            'input_len': self.input_len
        }
        base_config = super(MultiLSTMCell, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
コード例 #28
0
class CNNEncoder(Layer):
    '''
    CNNEncoder is a combination of multiple convolution layers and max pooling layers. This is
    defined as a single layer to be consistent with the other encoders in terms of input and output
    specifications.  The input to this "layer" is of shape (batch_size, num_words, embedding_size)
    and the output is of size (batch_size, output_dim).

    The CNN has one convolution layer per each ngram filter size. Each convolution operation gives
    out a vector of size num_filters. The number of times a convolution layer will be used
    depends on the ngram size: input_length - ngram_size + 1. The corresponding maxpooling layer
    aggregates all these outputs from the convolution layer and outputs the max.

    This operation is repeated for every ngram size passed, and consequently the dimensionality of
    the output after maxpooling is len(ngram_filter_sizes) * num_filters.

    We then use a fully connected layer to project in back to the desired output_dim.  For more
    details, refer to "A Sensitivity Analysis of (and Practitioners’ Guide to) Convolutional Neural
    Networks for Sentence Classification", Zhang and Wallace 2016, particularly Figure 1.
    '''
    def __init__(self, weights=None, **kwargs):
        self.supports_masking = True

        # This is the output dim for each convolutional layer, which is the same as the number of
        # "filters" learned by that layer.
        self.num_filters = kwargs.pop('num_filters')

        # This specifies both the number of convolutional layers we will create and their sizes.
        # Must be a List[int].  The default of (2, 3, 4, 5) will have four convolutional layers,
        # corresponding to encoding ngrams of size 2 to 5 with some number of filters.
        ngram_filter_sizes = kwargs.pop('ngram_filter_sizes', (2, 3, 4, 5))
        self.ngram_filter_sizes = ngram_filter_sizes

        self.output_dim = kwargs.pop('output_dim')

        conv_layer_activation = kwargs.pop('conv_layer_activation', 'relu')
        self.conv_layer_activation = conv_layer_activation

        self.l1_regularization = kwargs.pop("l1_regularization", None)
        self.l2_regularization = kwargs.pop("l2_regularization", None)
        self.regularizer = lambda: l1l2(l1=self.l1_regularization,
                                        l2=self.l2_regularization)

        # These are member variables that will be defined during self.build().
        self.convolution_layers = None
        self.max_pooling_layers = None
        self.projection_layer = None

        self.input_spec = [InputSpec(ndim=3)]
        self.initial_weights = weights
        super(CNNEncoder, self).__init__(**kwargs)

    def build(self, input_shape):
        input_length = input_shape[1]  # number of words
        # We define convolution, maxpooling and dense layers first.
        self.convolution_layers = [
            Convolution1D(nb_filter=self.num_filters,
                          filter_length=ngram_size,
                          activation=self.conv_layer_activation,
                          W_regularizer=self.regularizer(),
                          b_regularizer=self.regularizer())
            for ngram_size in self.ngram_filter_sizes
        ]
        self.max_pooling_layers = [
            MaxPooling1D(pool_length=input_length - ngram_size + 1)
            for ngram_size in self.ngram_filter_sizes
        ]
        self.projection_layer = Dense(self.output_dim)
        # Building all layers because these sub-layers are not explitly part of the computatonal graph.
        for convolution_layer, max_pooling_layer in zip(
                self.convolution_layers, self.max_pooling_layers):
            convolution_layer.build(input_shape)
            max_pooling_layer.build(
                convolution_layer.get_output_shape_for(input_shape))
        maxpool_output_dim = self.num_filters * len(self.ngram_filter_sizes)
        projection_input_shape = (input_shape[0], maxpool_output_dim)
        self.projection_layer.build(projection_input_shape)
        # Defining the weights of this "layer" as the set of weights from all convolution
        # and maxpooling layers.
        self.trainable_weights = []
        for layer in self.convolution_layers + self.max_pooling_layers + [
                self.projection_layer
        ]:
            self.trainable_weights.extend(layer.trainable_weights)

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights

        super(CNNEncoder, self).build(input_shape)

    def call(self, x, mask=None):
        # Each convolution layer returns output of size (samples, pool_length, num_filters),
        #       where pool_length = num_words - ngram_size + 1
        # Each maxpooling layer returns output of size (samples, 1, num_filters).
        # We need to flatten to remove the second dimension of length 1 from the maxpooled output.
        filter_outputs = [
            K.batch_flatten(
                max_pooling_layer.call(convolution_layer.call(x, mask)))
            for max_pooling_layer, convolution_layer in zip(
                self.max_pooling_layers, self.convolution_layers)
        ]
        maxpool_output = merge(
            filter_outputs,
            mode='concat') if len(filter_outputs) > 1 else filter_outputs[0]
        return self.projection_layer.call(maxpool_output)

    def get_output_shape_for(self, input_shape):
        return (input_shape[0], self.output_dim)

    def compute_mask(self, input, input_mask=None):  # pylint: disable=redefined-builtin
        # By default Keras propagates the mask from a layer that supports masking. We don't need it
        # anymore. So eliminating it from the flow.
        return None

    def get_config(self):
        config = {
            "output_dim": self.output_dim,
            "num_filters": self.num_filters,
            "ngram_filter_sizes": self.ngram_filter_sizes,
            "conv_layer_activation": self.conv_layer_activation,
            "l1_regularization": self.l1_regularization,
            "l2_regularization": self.l2_regularization,
        }
        base_config = super(CNNEncoder, self).get_config()
        config.update(base_config)
        return config
コード例 #29
0
class AdaptiveInstanceLayerNormalization(Layer):
    # creating a layer class in keras
    def __init__(self, smoothing=True, light=False, **kwargs):
        super(AdaptiveInstanceLayerNormalization, self).__init__(**kwargs)
        self.smoothing = smoothing
        self.light = light

    def build(self, input_shape):
        # initialize weight matrix for each capsule in lower layer
        self.W = self.add_weight(shape=[input_shape[-1]],
                                 initializer=Ones(),
                                 name='weights',
                                 constraint=MinMaxNorm())
        self.latent_size = input_shape[-1]

        # TODO: (local)Conv2D with high stride before dense? This is way to inefficient, no wonder UGATIT is 2G
        input_prod = np.prod(input_shape[1:])
        self.fc_gamma = Dense(input_shape[-1])
        self.fc_gamma.build((None, input_prod))
        self.fc_beta = Dense(input_shape[-1])
        self.fc_beta.build((None, input_prod))
        self.flatten = Flatten()
        self.flatten.build(input_shape)
        self.trainable_weights.extend(self.fc_beta.trainable_weights)
        self.trainable_weights.extend(self.fc_gamma.trainable_weights)

        self.built = True

    def call(self, inputs):
        x = inputs
        # if self.light:
        #     x = GlobalAveragePooling2D()(x)

        # Note: Original had 2 fc before this
        gamma = self.flatten(x)
        gamma = self.fc_gamma(gamma)
        gamma = K.reshape(gamma, (-1, 1, 1, self.latent_size))

        beta = self.flatten(x)
        beta = self.fc_beta(beta)
        beta = K.reshape(beta, (-1, 1, 1, self.latent_size))

        eps = 1e-5
        ins_mean, ins_sigma = tf.nn.moments(x, axes=[1, 2], keep_dims=True)
        x_ins = (x - ins_mean) / K.sqrt(ins_sigma + eps)

        ln_mean, ln_sigma = tf.nn.moments(x, axes=[1, 2, 3], keep_dims=True)
        x_ln = (x - ln_mean) / K.sqrt(ln_sigma + eps)

        rho = self.W

        if self.smoothing:
            rho = K.clip(rho - K.constant(0.1), 0.0, 1.0)

        x_hat = rho * x_ins + (1 - rho) * x_ln
        x_hat = x_hat * gamma + beta

        return x_hat

    def compute_output_shape(self, input_shape):
        return input_shape
コード例 #30
0
class CNNEncoder(MaskedLayer):
    '''
    CNNEncoder is a combination of multiple convolution layers and max pooling layers. This is
    defined as a single layer to be consistent with the other encoders in terms of input and output
    specifications.  The input to this "layer" is of shape (batch_size, num_words, embedding_dim)
    and the output is of size (batch_size, output_dim).

    The CNN has one convolution layer per each ngram filter size. Each convolution operation gives
    out a vector of size num_filters. The number of times a convolution layer will be used
    depends on the ngram size: input_length - ngram_size + 1. The corresponding maxpooling layer
    aggregates all these outputs from the convolution layer and outputs the max.

    This operation is repeated for every ngram size passed, and consequently the dimensionality of
    the output after maxpooling is len(ngram_filter_sizes) * num_filters.

    We then use a fully connected layer to project in back to the desired output_dim.  For more
    details, refer to "A Sensitivity Analysis of (and Practitioners’ Guide to) Convolutional Neural
    Networks for Sentence Classification", Zhang and Wallace 2016, particularly Figure 1.

    Parameters
    ----------
    units: int
        After doing convolutions, we'll project the collected features into a vector of this size.
        This used to be ``output_dim``, but Keras changed it to ``units``.  I prefer the name
        ``output_dim``, so we'll leave the code using ``output_dim``, and just use the name
        ``units`` in the external API.
    num_filters: int
        This is the output dim for each convolutional layer, which is the same as the number of
        "filters" learned by that layer.
    ngram_filter_sizes: Tuple[int], optional (default=(2, 3, 4, 5))
        This specifies both the number of convolutional layers we will create and their sizes.  The
        default of (2, 3, 4, 5) will have four convolutional layers, corresponding to encoding
        ngrams of size 2 to 5 with some number of filters.
    conv_layer_activation: str, optional (default='relu')
    l1_regularization: float, optional (default=None)
    l2_regularization: float, optional (default=None)
    '''
    def __init__(self,
                 units: int,
                 num_filters: int,
                 ngram_filter_sizes: Tuple[int] = (2, 3, 4, 5),
                 conv_layer_activation: str = 'relu',
                 l1_regularization: float = None,
                 l2_regularization: float = None,
                 **kwargs):
        self.num_filters = num_filters
        self.ngram_filter_sizes = ngram_filter_sizes
        self.output_dim = units
        self.conv_layer_activation = conv_layer_activation
        self.l1_regularization = l1_regularization
        self.l2_regularization = l2_regularization
        self.regularizer = lambda: l1_l2(l1=self.l1_regularization,
                                         l2=self.l2_regularization)

        # These are member variables that will be defined during self.build().
        self.convolution_layers = None
        self.max_pooling_layers = None
        self.projection_layer = None

        self.input_spec = [InputSpec(ndim=3)]
        super(CNNEncoder, self).__init__(**kwargs)

    @overrides
    def build(self, input_shape):
        input_length = input_shape[1]  # number of words
        # We define convolution, maxpooling and dense layers first.
        self.convolution_layers = [
            Convolution1D(filters=self.num_filters,
                          kernel_size=ngram_size,
                          activation=self.conv_layer_activation,
                          kernel_regularizer=self.regularizer(),
                          bias_regularizer=self.regularizer())
            for ngram_size in self.ngram_filter_sizes
        ]
        self.max_pooling_layers = [
            MaxPooling1D(pool_length=input_length - ngram_size + 1)
            for ngram_size in self.ngram_filter_sizes
        ]
        self.projection_layer = Dense(self.output_dim)
        # Building all layers because these sub-layers are not explitly part of the computatonal graph.
        for convolution_layer, max_pooling_layer in zip(
                self.convolution_layers, self.max_pooling_layers):
            with K.name_scope(convolution_layer.name):
                convolution_layer.build(input_shape)
            with K.name_scope(max_pooling_layer.name):
                max_pooling_layer.build(
                    convolution_layer.compute_output_shape(input_shape))
        maxpool_output_dim = self.num_filters * len(self.ngram_filter_sizes)
        projection_input_shape = (input_shape[0], maxpool_output_dim)
        with K.name_scope(self.projection_layer.name):
            self.projection_layer.build(projection_input_shape)
        # Defining the weights of this "layer" as the set of weights from all convolution
        # and maxpooling layers.
        self.trainable_weights = []
        for layer in self.convolution_layers + self.max_pooling_layers + [
                self.projection_layer
        ]:
            self.trainable_weights.extend(layer.trainable_weights)

        super(CNNEncoder, self).build(input_shape)

    @overrides
    def call(self, inputs, mask=None):  # pylint: disable=unused-argument
        # Each convolution layer returns output of size (samples, pool_length, num_filters),
        #       where pool_length = num_words - ngram_size + 1
        # Each maxpooling layer returns output of size (samples, 1, num_filters).
        # We need to flatten to remove the second dimension of length 1 from the maxpooled output.
        # TODO(matt): we need to use a convolutional layer here that supports masking.
        filter_outputs = [
            K.batch_flatten(
                max_pooling_layer.call(convolution_layer.call(inputs)))
            for max_pooling_layer, convolution_layer in zip(
                self.max_pooling_layers, self.convolution_layers)
        ]
        if K.backend() == 'theano':
            # Just using the `call` method on layers does not set the _keras_shape, which is
            # necessary with the theano backend.  So we set it manually here to what we expect the
            # shape to be.
            for filter_output in filter_outputs:
                filter_output._keras_shape = (None, self.num_filters)  # pylint: disable=protected-access
        maxpool_output = Concatenate()(
            filter_outputs) if len(filter_outputs) > 1 else filter_outputs[0]
        return self.projection_layer.call(maxpool_output)

    @overrides
    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.output_dim)

    @overrides
    def compute_mask(self, inputs, mask=None):  # pylint: disable=unused-argument
        # By default Keras propagates the mask from a layer that supports masking. We don't need it
        # anymore. So eliminating it from the flow.
        return None

    @overrides
    def get_config(self):
        config = {
            "units": self.output_dim,
            "num_filters": self.num_filters,
            "ngram_filter_sizes": self.ngram_filter_sizes,
            "conv_layer_activation": self.conv_layer_activation,
            "l1_regularization": self.l1_regularization,
            "l2_regularization": self.l2_regularization,
        }
        base_config = super(CNNEncoder, self).get_config()
        config.update(base_config)
        return config