예제 #1
0
class _Attention(object):
    def __init__(self,
                 max_length,
                 nr_hidden,
                 dropout=0.0,
                 L2=0.0,
                 activation='relu'):
        self.max_length = max_length
        self.model = Sequential()
        self.model.add(Dropout(dropout, input_shape=(nr_hidden, )))
        self.model.add(
            Dense(nr_hidden,
                  name='attend1',
                  init='he_normal',
                  W_regularizer=l2(L2),
                  input_shape=(nr_hidden, ),
                  activation='relu'))
        self.model.add(Dropout(dropout))
        self.model.add(
            Dense(nr_hidden,
                  name='attend2',
                  init='he_normal',
                  W_regularizer=l2(L2),
                  activation='relu'))
        self.model = TimeDistributed(self.model)

    def __call__(self, sent1, sent2):
        def _outer(AB):
            att_ji = K.batch_dot(AB[1], K.permute_dimensions(AB[0], (0, 2, 1)))
            return K.permute_dimensions(att_ji, (0, 2, 1))

        return merge([self.model(sent1), self.model(sent2)],
                     mode=_outer,
                     output_shape=(self.max_length, self.max_length))
예제 #2
0
class _Attention(object):
    def __init__(self, max_length, nr_hidden, dropout=0.0, L2=0.0, activation='relu'):
        self.max_length = max_length
        self.model = Sequential()
        self.model.add(Dropout(dropout, input_shape=(nr_hidden,)))
        self.model.add(
            Dense(nr_hidden, name='attend1',
                init='he_normal', W_regularizer=l2(L2),
                input_shape=(nr_hidden,), activation='relu'))
        self.model.add(Dropout(dropout))
        self.model.add(Dense(nr_hidden, name='attend2',
            init='he_normal', W_regularizer=l2(L2), activation='relu'))
        self.model = TimeDistributed(self.model)
class _Attention(object):
    def __init__(self,
                 max_length,
                 nr_hidden,
                 dropout=0.0,
                 L2=1e-4,
                 activation='relu'):
        self.max_length = max_length
        self.model = Sequential()
        self.model.add(
            Dense(nr_hidden,
                  name='attend1',
                  init='he_normal',
                  W_regularizer=l2(L2),
                  input_shape=(nr_hidden, ),
                  activation='relu'))
        self.model.add(Dropout(dropout))
        self.model.add(
            Dense(nr_hidden,
                  name='attend2',
                  init='he_normal',
                  W_regularizer=l2(L2),
                  activation='relu'))
        self.model = TimeDistributed(self.model)

    def __call__(self, sent1, sent2):
        def _outer((A, B)):
            att_ji = T.batched_dot(B, A.dimshuffle((0, 2, 1)))
            return att_ji.dimshuffle((0, 2, 1))

        return merge([self.model(sent1), self.model(sent2)],
                     mode=_outer,
                     output_shape=(self.max_length, self.max_length))
예제 #4
0
class AttentionLayer(object):
    def __init__(self,
                 max_length,
                 hidden_units,
                 dropout=0.0,
                 l2_weight_decay=0.0,
                 activation='relu'):
        """
        F function => attention = transpose of F(a) * F(b)
        """
        self.max_length = max_length
        self.model = Sequential()
        self.model.add(Dropout(dropout, input_shape=(hidden_units, )))
        self.model.add(
            Dense(hidden_units,
                  activation='relu',
                  kernel_initializer='he_normal',
                  kernel_regularizer=regularizers.l2(l2_weight_decay),
                  name='attend1'))
        self.model.add(Dropout(dropout))
        self.model.add(
            Dense(hidden_units,
                  kernel_initializer='he_normal',
                  kernel_regularizer=regularizers.l2(l2_weight_decay),
                  activation='relu',
                  name='attend2'))
        self.model = TimeDistributed(
            self.model)  # Apply attention for each timestep

    def __call__(self, sent1, sent2):
        def _outer(AB):
            """
            Calculate unnormalized attention weights
            """
            energy = K.batch_dot(x=AB[1],
                                 y=K.permute_dimensions(AB[0],
                                                        pattern=(0, 2, 1)))
            return K.permute_dimensions(energy, (0, 2, 1))

        return merge(inputs=[self.model(sent1),
                             self.model(sent2)],
                     mode=_outer,
                     output_shape=(self.max_length, self.max_length))
class _Attention(object):
    def __init__(self, max_length, nr_hidden, dropout=0.0, L2=0.0, activation='relu'):
        self.max_length = max_length
        self.model = Sequential()
        self.model.add(Dropout(dropout, input_shape=(nr_hidden,)))
        self.model.add(
            Dense(nr_hidden, name='attend1',
                init='he_normal', W_regularizer=l2(L2),
                input_shape=(nr_hidden,), activation='relu'))
        self.model.add(Dropout(dropout))
        self.model.add(Dense(nr_hidden, name='attend2',
            init='he_normal', W_regularizer=l2(L2), activation='relu'))
        self.model = TimeDistributed(self.model)
    
    def __call__(self, sent1, sent2):
        def _outer(AB):
            att_ji = K.batch_dot(AB[1], K.permute_dimensions(AB[0], (0, 2, 1)))
            return K.permute_dimensions(att_ji,(0, 2, 1))
        return merge(
                [self.model(sent1), self.model(sent2)],
                mode=_outer,
                output_shape=(self.max_length, self.max_length))
class _Attention(object):
    def __init__(self, max_length, nr_hidden, dropout=0.0, L2=1e-4, activation='relu'):
        self.max_length = max_length
        self.model = Sequential()
        self.model.add(
            Dense(nr_hidden, name='attend1',
                init='he_normal', W_regularizer=l2(L2),
                input_shape=(nr_hidden,), activation='relu'))
        self.model.add(Dropout(dropout))
        self.model.add(Dense(nr_hidden, name='attend2',
            init='he_normal', W_regularizer=l2(L2), activation='relu'))
        self.model = TimeDistributed(self.model)
    
    def __call__(self, sent1, sent2):
        def _outer((A, B)):
            att_ji = T.batched_dot(B, A.dimshuffle((0, 2, 1)))
            return att_ji.dimshuffle((0, 2, 1))

        return merge(
                [self.model(sent1), self.model(sent2)],
                mode=_outer,
                output_shape=(self.max_length, self.max_length))
예제 #7
0
class _Comparison(object):
    def __init__(self, words, nr_hidden, L2=0.0, dropout=0.0):
        self.words = words
        self.model = Sequential()
        self.model.add(Dropout(dropout, input_shape=(nr_hidden * 2, )))
        self.model.add(
            Dense(nr_hidden,
                  name='compare1',
                  init='he_normal',
                  W_regularizer=l2(L2)))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(dropout))
        self.model.add(
            Dense(nr_hidden,
                  name='compare2',
                  W_regularizer=l2(L2),
                  init='he_normal'))
        self.model.add(Activation('relu'))
        self.model = TimeDistributed(self.model)

    def __call__(self, sent, align, **kwargs):
        result = self.model(merge([sent, align],
                                  mode='concat'))  # Shape: (i, n)
        avged = GlobalAveragePooling1D()(result)  # mask=self.words)
        maxed = GlobalMaxPooling1D()(result)  # mask=self.words)
        merged = merge([avged, maxed])
        result = BatchNormalization()(merged)
        return result
class _Comparison(object):
    def __init__(self, words, nr_hidden, L2=1e-6, dropout=0.2):
        self.words = words
        self.model = Sequential()
        self.model.add(
            Dense(nr_hidden,
                  name='compare1',
                  init='he_normal',
                  W_regularizer=l2(L2),
                  input_shape=(nr_hidden * 2, )))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(dropout))
        self.model.add(
            Dense(nr_hidden,
                  name='compare2',
                  W_regularizer=l2(L2),
                  init='he_normal'))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(dropout))
        self.model = TimeDistributed(self.model)

    def __call__(self, sent, align, **kwargs):
        result = self.model(merge([sent, align],
                                  mode='concat'))  # Shape: (i, n)
        result = _GlobalSumPooling1D()(result, mask=self.words)
        return result
class Attention(object):
    def __init__(self, hidden_len, dropout=0.0, L2=0.0):
        self.model_utter = Sequential()
        self.model_utter.add(Dropout(dropout, input_shape=(hidden_len, )))
        self.model_utter.add(
            Dense(hidden_len,
                  name='attend1',
                  init='he_normal',
                  W_regularizer=l2(L2),
                  input_shape=(hidden_len, ),
                  activation='relu'))
        self.model_utter.add(Dropout(dropout))
        self.model_utter.add(
            Dense(hidden_len,
                  name='attend2',
                  init='he_normal',
                  W_regularizer=l2(L2),
                  activation='relu'))
        self.model_utter = TimeDistributed(self.model_utter)

        self.model_path = Sequential()
        self.model_path.add(Dropout(dropout, input_shape=(hidden_len, )))
        self.model_path.add(
            Dense(hidden_len,
                  name='attend3',
                  init='he_normal',
                  W_regularizer=l2(L2),
                  input_shape=(hidden_len, ),
                  activation='relu'))
        self.model_path.add(Dropout(dropout))
        self.model_path.add(
            Dense(hidden_len,
                  name='attend4',
                  init='he_normal',
                  W_regularizer=l2(L2),
                  activation='relu'))
        self.model_path = TimeDistributed(self.model_path)

    def __call__(self, utter, path, max_len_utter, max_len_path):
        # attend step that skips the quadratic complexity of normal attention
        def merge_mode(utter_path):
            bitwise_attention = K.batch_dot(
                utter_path[1], K.permute_dimensions(utter_path[0], (0, 2, 1)))
            return K.permute_dimensions(bitwise_attention, (0, 2, 1))

        utter_model = self.model_utter(utter)
        path_model = self.model_path(path)

        return merge([utter_model, path_model],
                     mode=merge_mode,
                     output_shape=(max_len_utter, max_len_path))
class CompareAndAggregate(object):
    def __init__(self, hidden_len, L2=0.0, dropout=0.0):
        self.model_utt = Sequential()
        self.model_utt.add(Dropout(dropout, input_shape=(hidden_len * 2, )))
        self.model_utt.add(
            Dense(hidden_len,
                  name='compare1',
                  init='he_normal',
                  W_regularizer=l2(L2)))
        self.model_utt.add(Activation('relu'))
        self.model_utt.add(Dropout(dropout))
        self.model_utt.add(
            Dense(hidden_len,
                  name='compare2',
                  init='he_normal',
                  W_regularizer=l2(L2)))
        self.model_utt.add(Activation('relu'))
        self.model_utt = TimeDistributed(self.model_utt)

        self.model_path = Sequential()
        self.model_path.add(Dropout(dropout, input_shape=(hidden_len * 2, )))
        self.model_path.add(
            Dense(hidden_len,
                  name='compare3',
                  init='he_normal',
                  W_regularizer=l2(L2)))
        self.model_path.add(Activation('relu'))
        self.model_path.add(Dropout(dropout))
        self.model_path.add(
            Dense(hidden_len,
                  name='compare4',
                  init='he_normal',
                  W_regularizer=l2(L2)))
        self.model_path.add(Activation('relu'))
        self.model_path = TimeDistributed(self.model_path)

    def __call__(self, sent, align, max_len, is_model_utt, **kwargs):
        if is_model_utt:
            result = self.model_utt(merge([sent, align], mode='concat'))
        else:
            result = self.model_path(merge([sent, align], mode='concat'))
        avged = GlobalAveragePooling1D()(result, mask=max_len)
        maxed = GlobalMaxPooling1D()(result, mask=max_len)
        merged = merge([avged, maxed])
        result = BatchNormalization()(merged)
        return result
class _Comparison(object):
    def __init__(self, words, nr_hidden, L2=1e-6, dropout=0.2):
        self.words = words
        self.model = Sequential()
        self.model.add(Dense(nr_hidden, name='compare1',
            init='he_normal', W_regularizer=l2(L2),
            input_shape=(nr_hidden*2,)))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(dropout))
        self.model.add(Dense(nr_hidden, name='compare2',
                        W_regularizer=l2(L2), init='he_normal'))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(dropout))
        self.model = TimeDistributed(self.model)

    def __call__(self, sent, align, **kwargs):
        result = self.model(merge([sent, align], mode='concat')) # Shape: (i, n)
        result = _GlobalSumPooling1D()(result, mask=self.words)
        return result
예제 #12
0
class ComparisonLayer(object):
    """
    Separately compare the aligned phrases using a function "G"
    """
    def __init__(self, words, hidden_units, l2_weight_decay=0.0, dropout=0.0):
        self.words = words

        self.model = Sequential()
        self.model.add(Dropout(dropout, input_shape=(hidden_units * 2, )))
        self.model.add(
            Dense(hidden_units,
                  kernel_initializer='he_normal',
                  kernel_regularizer=regularizers.l2(l2_weight_decay),
                  name='compare1'))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(dropout))
        self.model.add(
            Dense(hidden_units,
                  kernel_initializer='he_normal',
                  kernel_regularizer=regularizers.l2(l2_weight_decay),
                  name='compare2'))
        self.model.add(Activation('relu'))
        self.model = TimeDistributed(
            self.model)  # Apply comparison for each timestep

    def __call__(self, sent, align, **kwargs):
        result = self.model(merge(
            [sent, align],
            mode='concat'))  # Shape: (batch, max_length, 2 * hidden_units)
        avged = GlobalAveragePooling1D()(result)
        maxed = GlobalMaxPooling1D()(result)
        merged = merge([avged, maxed], mode='sum')
        result = BatchNormalization()(merged)

        return result