def MeanOverTime_depricated(mask_zero=True): if mask_zero: # Masks the timestep vector if all elements are zero mean_func = lambda x: K.cast((x.sum(axis=1) / (x.shape[1] - K.equal( x, 0).all(axis=2).sum(axis=1, keepdims=True))), K.floatx()) layer = Lambda(mean_func, output_shape=lambda s: (s[0], s[2])) else: # Even if the timestep vector is all zeros, it will be used in averaging (so a notion of sequence length is preserved) layer = Lambda(lambda x: K.mean(x, axis=1), output_shape=lambda s: (s[0], s[2])) layer.supports_masking = True #layer.name = 'MeanOverTime' def compute_mask(input, mask): return None layer.compute_mask = compute_mask return layer
vectors = inputs[0] logits = inputs[1] # Flatten the logits and take a softmax logits = K.squeeze(logits, axis=2) pre_softmax = K.switch(mask[0], logits, -numpy.inf) weights = K.expand_dims(K.softmax(pre_softmax)) return K.sum(vectors * weights, axis=1) def attn_merge_shape(input_shapes): return (input_shapes[0][0], input_shapes[0][2]) attn = Lambda(attn_merge, output_shape=attn_merge_shape) attn.supports_masking = True attn.compute_mask = lambda inputs, mask: None content_flat = attn([seq_output, attention_2]) model = Model(inputs=morph_seg, outputs=content_flat) model.load_weights("weights.h5") def attn_weight(inputs, mask): vectors = inputs[0] logits = inputs[1] # Flatten the logits and take a softmax logits = K.squeeze(logits, axis=2) pre_softmax = K.switch(mask[0], logits, -numpy.inf) weights = K.expand_dims(K.softmax(pre_softmax)) return weights
def build(self, learn_context_weights=True): content_forward = Input(shape=(None, ), dtype='int32', name='content_forward') content_backward = Input(shape=(None, ), dtype='int32', name='content_backward') context = Input(shape=(1, ), dtype='int32', name='context') if learn_context_weights: context_weights = None else: context_weights = [self.word2vec_model.syn1neg] context_embedding = Embedding(input_dim=len(self.iterator.word_index), output_dim=256, input_length=1, weights=context_weights) if not learn_context_weights: context_embedding.trainable = False context_flat = Flatten()(context_embedding(context)) char_embedding = Embedding(input_dim=29, output_dim=64, mask_zero=True) embed_forward = char_embedding(content_forward) embed_backward = char_embedding(content_backward) rnn_forward = LSTM(output_dim=256, return_sequences=True, activation='tanh')(embed_forward) backwards_lstm = LSTM(output_dim=256, return_sequences=True, activation='tanh', go_backwards=True) def reverse_tensor(inputs, mask): return inputs[:, ::-1, :] def reverse_tensor_shape(input_shapes): return input_shapes reverse = Lambda(reverse_tensor, output_shape=reverse_tensor_shape) reverse.supports_masking = True rnn_backward = reverse(backwards_lstm(embed_backward)) rnn_bidi = TimeDistributed(Dense(output_dim=256))(merge( [rnn_forward, rnn_backward], mode='concat')) attention_1 = TimeDistributed( Dense(output_dim=256, activation='tanh', bias=False))(rnn_bidi) attention_2 = TimeDistributed( Dense(output_dim=1, activity_regularizer='activity_l2', bias=False))(attention_1) def attn_merge(inputs, mask): vectors = inputs[0] logits = inputs[1] # Flatten the logits and take a softmax logits = K.squeeze(logits, axis=2) pre_softmax = K.switch(mask[0], logits, -numpy.inf) weights = K.expand_dims(K.softmax(pre_softmax)) return K.sum(vectors * weights, axis=1) def attn_merge_shape(input_shapes): return (input_shapes[0][0], input_shapes[0][2]) attn = Lambda(attn_merge, output_shape=attn_merge_shape) attn.supports_masking = True attn.compute_mask = lambda inputs, mask: None content_flat = attn([rnn_bidi, attention_2]) output = Activation('sigmoid', name='output')(merge([content_flat, context_flat], mode='dot', dot_axes=(1, 1))) model = Model(input=[content_forward, content_backward, context], output=output) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) inputs = [content_forward, content_backward] self._predict = K.function(inputs, content_flat) self._attention = K.function(inputs, K.squeeze(attention_2, axis=2)) self.model = model
Dense(units=400, activation='tanh', use_bias=False)) attn_nonlinear_seq = [] for i in range(number_of_segmentation): attn_nonlinear_seq.append(attention_morpheme_nonlinear(encoded_seg[i])) attention_morpheme_softmax = TimeDistributed( Dense(units=1, activity_regularizer=regularizers.l1(0.01), use_bias=False)) attn_soft_seq = [] for i in range(number_of_segmentation): attn_soft_seq.append(attention_morpheme_softmax(attn_nonlinear_seq[i])) attn_morpheme = Lambda(attn_merge, output_shape=attn_merge_shape) attn_morpheme.supports_masking = True attn_morpheme.compute_mask = lambda inputs, mask: None attn_morpheme_output_seq = [] for i in range(number_of_segmentation): attn_morpheme_output_seq.append( attn_morpheme([encoded_seg[i], attn_soft_seq[i]])) concat_vector = concatenate(attn_morpheme_output_seq, axis=-1) merge_vector = Reshape((number_of_segmentation, 400))(concat_vector) masked_vector = Masking()(merge_vector) seq_output = TimeDistributed(Dense(200))(masked_vector) attention_1 = TimeDistributed( Dense(units=200, activation='tanh', use_bias=False))(seq_output)