class _Attention(object): def __init__(self, max_length, nr_hidden, dropout=0.0, L2=0.0, activation='relu'): self.max_length = max_length self.model = Sequential() self.model.add(Dropout(dropout, input_shape=(nr_hidden, ))) self.model.add( Dense(nr_hidden, name='attend1', init='he_normal', W_regularizer=l2(L2), input_shape=(nr_hidden, ), activation='relu')) self.model.add(Dropout(dropout)) self.model.add( Dense(nr_hidden, name='attend2', init='he_normal', W_regularizer=l2(L2), activation='relu')) self.model = TimeDistributed(self.model) def __call__(self, sent1, sent2): def _outer(AB): att_ji = K.batch_dot(AB[1], K.permute_dimensions(AB[0], (0, 2, 1))) return K.permute_dimensions(att_ji, (0, 2, 1)) return merge([self.model(sent1), self.model(sent2)], mode=_outer, output_shape=(self.max_length, self.max_length))
class _Attention(object): def __init__(self, max_length, nr_hidden, dropout=0.0, L2=0.0, activation='relu'): self.max_length = max_length self.model = Sequential() self.model.add(Dropout(dropout, input_shape=(nr_hidden,))) self.model.add( Dense(nr_hidden, name='attend1', init='he_normal', W_regularizer=l2(L2), input_shape=(nr_hidden,), activation='relu')) self.model.add(Dropout(dropout)) self.model.add(Dense(nr_hidden, name='attend2', init='he_normal', W_regularizer=l2(L2), activation='relu')) self.model = TimeDistributed(self.model)
class _Attention(object): def __init__(self, max_length, nr_hidden, dropout=0.0, L2=1e-4, activation='relu'): self.max_length = max_length self.model = Sequential() self.model.add( Dense(nr_hidden, name='attend1', init='he_normal', W_regularizer=l2(L2), input_shape=(nr_hidden, ), activation='relu')) self.model.add(Dropout(dropout)) self.model.add( Dense(nr_hidden, name='attend2', init='he_normal', W_regularizer=l2(L2), activation='relu')) self.model = TimeDistributed(self.model) def __call__(self, sent1, sent2): def _outer((A, B)): att_ji = T.batched_dot(B, A.dimshuffle((0, 2, 1))) return att_ji.dimshuffle((0, 2, 1)) return merge([self.model(sent1), self.model(sent2)], mode=_outer, output_shape=(self.max_length, self.max_length))
class AttentionLayer(object): def __init__(self, max_length, hidden_units, dropout=0.0, l2_weight_decay=0.0, activation='relu'): """ F function => attention = transpose of F(a) * F(b) """ self.max_length = max_length self.model = Sequential() self.model.add(Dropout(dropout, input_shape=(hidden_units, ))) self.model.add( Dense(hidden_units, activation='relu', kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(l2_weight_decay), name='attend1')) self.model.add(Dropout(dropout)) self.model.add( Dense(hidden_units, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(l2_weight_decay), activation='relu', name='attend2')) self.model = TimeDistributed( self.model) # Apply attention for each timestep def __call__(self, sent1, sent2): def _outer(AB): """ Calculate unnormalized attention weights """ energy = K.batch_dot(x=AB[1], y=K.permute_dimensions(AB[0], pattern=(0, 2, 1))) return K.permute_dimensions(energy, (0, 2, 1)) return merge(inputs=[self.model(sent1), self.model(sent2)], mode=_outer, output_shape=(self.max_length, self.max_length))
class _Attention(object): def __init__(self, max_length, nr_hidden, dropout=0.0, L2=0.0, activation='relu'): self.max_length = max_length self.model = Sequential() self.model.add(Dropout(dropout, input_shape=(nr_hidden,))) self.model.add( Dense(nr_hidden, name='attend1', init='he_normal', W_regularizer=l2(L2), input_shape=(nr_hidden,), activation='relu')) self.model.add(Dropout(dropout)) self.model.add(Dense(nr_hidden, name='attend2', init='he_normal', W_regularizer=l2(L2), activation='relu')) self.model = TimeDistributed(self.model) def __call__(self, sent1, sent2): def _outer(AB): att_ji = K.batch_dot(AB[1], K.permute_dimensions(AB[0], (0, 2, 1))) return K.permute_dimensions(att_ji,(0, 2, 1)) return merge( [self.model(sent1), self.model(sent2)], mode=_outer, output_shape=(self.max_length, self.max_length))
class _Attention(object): def __init__(self, max_length, nr_hidden, dropout=0.0, L2=1e-4, activation='relu'): self.max_length = max_length self.model = Sequential() self.model.add( Dense(nr_hidden, name='attend1', init='he_normal', W_regularizer=l2(L2), input_shape=(nr_hidden,), activation='relu')) self.model.add(Dropout(dropout)) self.model.add(Dense(nr_hidden, name='attend2', init='he_normal', W_regularizer=l2(L2), activation='relu')) self.model = TimeDistributed(self.model) def __call__(self, sent1, sent2): def _outer((A, B)): att_ji = T.batched_dot(B, A.dimshuffle((0, 2, 1))) return att_ji.dimshuffle((0, 2, 1)) return merge( [self.model(sent1), self.model(sent2)], mode=_outer, output_shape=(self.max_length, self.max_length))
class _Comparison(object): def __init__(self, words, nr_hidden, L2=0.0, dropout=0.0): self.words = words self.model = Sequential() self.model.add(Dropout(dropout, input_shape=(nr_hidden * 2, ))) self.model.add( Dense(nr_hidden, name='compare1', init='he_normal', W_regularizer=l2(L2))) self.model.add(Activation('relu')) self.model.add(Dropout(dropout)) self.model.add( Dense(nr_hidden, name='compare2', W_regularizer=l2(L2), init='he_normal')) self.model.add(Activation('relu')) self.model = TimeDistributed(self.model) def __call__(self, sent, align, **kwargs): result = self.model(merge([sent, align], mode='concat')) # Shape: (i, n) avged = GlobalAveragePooling1D()(result) # mask=self.words) maxed = GlobalMaxPooling1D()(result) # mask=self.words) merged = merge([avged, maxed]) result = BatchNormalization()(merged) return result
class _Comparison(object): def __init__(self, words, nr_hidden, L2=1e-6, dropout=0.2): self.words = words self.model = Sequential() self.model.add( Dense(nr_hidden, name='compare1', init='he_normal', W_regularizer=l2(L2), input_shape=(nr_hidden * 2, ))) self.model.add(Activation('relu')) self.model.add(Dropout(dropout)) self.model.add( Dense(nr_hidden, name='compare2', W_regularizer=l2(L2), init='he_normal')) self.model.add(Activation('relu')) self.model.add(Dropout(dropout)) self.model = TimeDistributed(self.model) def __call__(self, sent, align, **kwargs): result = self.model(merge([sent, align], mode='concat')) # Shape: (i, n) result = _GlobalSumPooling1D()(result, mask=self.words) return result
class Attention(object): def __init__(self, hidden_len, dropout=0.0, L2=0.0): self.model_utter = Sequential() self.model_utter.add(Dropout(dropout, input_shape=(hidden_len, ))) self.model_utter.add( Dense(hidden_len, name='attend1', init='he_normal', W_regularizer=l2(L2), input_shape=(hidden_len, ), activation='relu')) self.model_utter.add(Dropout(dropout)) self.model_utter.add( Dense(hidden_len, name='attend2', init='he_normal', W_regularizer=l2(L2), activation='relu')) self.model_utter = TimeDistributed(self.model_utter) self.model_path = Sequential() self.model_path.add(Dropout(dropout, input_shape=(hidden_len, ))) self.model_path.add( Dense(hidden_len, name='attend3', init='he_normal', W_regularizer=l2(L2), input_shape=(hidden_len, ), activation='relu')) self.model_path.add(Dropout(dropout)) self.model_path.add( Dense(hidden_len, name='attend4', init='he_normal', W_regularizer=l2(L2), activation='relu')) self.model_path = TimeDistributed(self.model_path) def __call__(self, utter, path, max_len_utter, max_len_path): # attend step that skips the quadratic complexity of normal attention def merge_mode(utter_path): bitwise_attention = K.batch_dot( utter_path[1], K.permute_dimensions(utter_path[0], (0, 2, 1))) return K.permute_dimensions(bitwise_attention, (0, 2, 1)) utter_model = self.model_utter(utter) path_model = self.model_path(path) return merge([utter_model, path_model], mode=merge_mode, output_shape=(max_len_utter, max_len_path))
class CompareAndAggregate(object): def __init__(self, hidden_len, L2=0.0, dropout=0.0): self.model_utt = Sequential() self.model_utt.add(Dropout(dropout, input_shape=(hidden_len * 2, ))) self.model_utt.add( Dense(hidden_len, name='compare1', init='he_normal', W_regularizer=l2(L2))) self.model_utt.add(Activation('relu')) self.model_utt.add(Dropout(dropout)) self.model_utt.add( Dense(hidden_len, name='compare2', init='he_normal', W_regularizer=l2(L2))) self.model_utt.add(Activation('relu')) self.model_utt = TimeDistributed(self.model_utt) self.model_path = Sequential() self.model_path.add(Dropout(dropout, input_shape=(hidden_len * 2, ))) self.model_path.add( Dense(hidden_len, name='compare3', init='he_normal', W_regularizer=l2(L2))) self.model_path.add(Activation('relu')) self.model_path.add(Dropout(dropout)) self.model_path.add( Dense(hidden_len, name='compare4', init='he_normal', W_regularizer=l2(L2))) self.model_path.add(Activation('relu')) self.model_path = TimeDistributed(self.model_path) def __call__(self, sent, align, max_len, is_model_utt, **kwargs): if is_model_utt: result = self.model_utt(merge([sent, align], mode='concat')) else: result = self.model_path(merge([sent, align], mode='concat')) avged = GlobalAveragePooling1D()(result, mask=max_len) maxed = GlobalMaxPooling1D()(result, mask=max_len) merged = merge([avged, maxed]) result = BatchNormalization()(merged) return result
class _Comparison(object): def __init__(self, words, nr_hidden, L2=1e-6, dropout=0.2): self.words = words self.model = Sequential() self.model.add(Dense(nr_hidden, name='compare1', init='he_normal', W_regularizer=l2(L2), input_shape=(nr_hidden*2,))) self.model.add(Activation('relu')) self.model.add(Dropout(dropout)) self.model.add(Dense(nr_hidden, name='compare2', W_regularizer=l2(L2), init='he_normal')) self.model.add(Activation('relu')) self.model.add(Dropout(dropout)) self.model = TimeDistributed(self.model) def __call__(self, sent, align, **kwargs): result = self.model(merge([sent, align], mode='concat')) # Shape: (i, n) result = _GlobalSumPooling1D()(result, mask=self.words) return result
class ComparisonLayer(object): """ Separately compare the aligned phrases using a function "G" """ def __init__(self, words, hidden_units, l2_weight_decay=0.0, dropout=0.0): self.words = words self.model = Sequential() self.model.add(Dropout(dropout, input_shape=(hidden_units * 2, ))) self.model.add( Dense(hidden_units, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(l2_weight_decay), name='compare1')) self.model.add(Activation('relu')) self.model.add(Dropout(dropout)) self.model.add( Dense(hidden_units, kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(l2_weight_decay), name='compare2')) self.model.add(Activation('relu')) self.model = TimeDistributed( self.model) # Apply comparison for each timestep def __call__(self, sent, align, **kwargs): result = self.model(merge( [sent, align], mode='concat')) # Shape: (batch, max_length, 2 * hidden_units) avged = GlobalAveragePooling1D()(result) maxed = GlobalMaxPooling1D()(result) merged = merge([avged, maxed], mode='sum') result = BatchNormalization()(merged) return result