def QRNcell():
    xq = Input(batch_shape=(batch_size, embedding_dim * 2))
    # Split into context and query
    xt = Lambda(lambda x, dim: x[:, :dim],
                arguments={'dim': embedding_dim},
                output_shape=lambda s: (s[0], s[1] / 2))(xq)
    qt = Lambda(lambda x, dim: x[:, dim:],
                arguments={'dim': embedding_dim},
                output_shape=lambda s: (s[0], s[1] / 2))(xq)

    h_tm1 = Input(batch_shape=(batch_size, embedding_dim))

    zt = Dense(1, activation='sigmoid',
               bias_initializer=Constant(2.5))(multiply([xt, qt]))
    zt = Lambda(lambda x, dim: K.repeat_elements(x, dim, axis=1),
                arguments={'dim': embedding_dim})(zt)
    ch = Dense(embedding_dim, activation='tanh')(concatenate([xt, qt],
                                                             axis=-1))
    rt = Dense(1, activation='sigmoid')(multiply([xt, qt]))
    rt = Lambda(lambda x, dim: K.repeat_elements(x, dim, axis=1),
                arguments={'dim': embedding_dim})(rt)
    ht = add([
        multiply([zt, ch, rt]),
        multiply(
            [Lambda(lambda x: 1 - x, output_shape=lambda s: s)(zt), h_tm1])
    ])
    return RecurrentModel(input=xq,
                          output=ht,
                          initial_states=[h_tm1],
                          final_states=[ht],
                          return_sequences=True)
Exemple #2
0
def sru(input, initial_state=None, depth=1, dropout=0.2, recurrent_dropout=0.2, return_sequences=False, **kwargs):
    units = K.int_shape(input)[-1]
    input_masked = Masking(mask_value=0.)(input)
    mask = Lambda(lambda x, mask: mask, output_shape=lambda s: s[:2])(input_masked)
    W = Dense(units * 3)
    def drop(x, p):
        shape = K.shape(x)
        noise_shape = (shape[0], 1, shape[2])
        return Dropout(p, noise_shape=noise_shape).call(x)
    input_dropped = Lambda(drop, arguments={'p': dropout}, output_shape=lambda s: s)(input_masked)
    ones = Lambda(lambda x: x * 0. + 1., output_shape=lambda s: s)(input)
    dropped_ones = Dropout(recurrent_dropout)(ones)
    xfr = W(input_dropped)
    ixfrd = concatenate([input, xfr, dropped_ones])
    ixfrd = Lambda(lambda x: x[0], mask=lambda x, _: x[1], output_shape=lambda s: s[0])([ixfrd, mask])
    recurrent_input = Input((units * 5,))
    def unpack(x, n):
        return [Lambda(lambda x, i: x[:,units * i : units * (i + 1)], arguments={'i': i}, output_shape=lambda s: (s[0], units))(x) for i in range(n)]
    x_t, x_p_t, f_t, r_t, drop = unpack(recurrent_input, 5)
    f_t = Activation('sigmoid')(f_t)
    r_t = Activation('sigmoid')(r_t)
    inv = Lambda(lambda x: 1. - x, output_shape=lambda s: s)
    c_tm1 = Input((units, ))
    c_t = c_tm1
    h_t = x_t
    for _ in range(depth):
    	c_t = add([multiply([f_t, c_t]), multiply([inv(f_t), x_p_t])])
    	c_t = multiply([c_t, drop])
    	h_t = add([multiply([r_t, Activation('tanh')(c_t)]), multiply([inv(r_t), h_t])])
    	xfr = W(h_t)
    	x_p_t, f_t, r_t = unpack(xfr, 3)
    rnn = RecurrentModel(recurrent_input, h_t, c_tm1, c_t, return_sequences=return_sequences, **kwargs)
    output = rnn(ixfrd, initial_state=initial_state)
    return output
def RWA(input_dim, output_dim):
    x = Input((input_dim, ))
    h_tm1 = Input((output_dim, ))
    n_tm1 = Input((output_dim, ))
    d_tm1 = Input((output_dim, ))

    x_h = concatenate([x, h_tm1])

    u = Dense(output_dim)(x)
    g = Dense(output_dim, activation='tanh')(x_h)

    a = Dense(output_dim, use_bias=False)(x_h)
    e_a = Lambda(lambda x: K.exp(x))(a)

    z = multiply([u, g])
    nt = add([n_tm1, multiply([z, e_a])])
    dt = add([d_tm1, e_a])
    dt = Lambda(lambda x: 1.0 / x)(dt)
    ht = multiply([nt, dt])
    ht = Activation('tanh')(ht)

    return RecurrentModel(
        input=x,
        output=ht,
        initial_states=[h_tm1, n_tm1, d_tm1],
        final_states=[ht, nt, dt],
        state_initializer=[initializers.random_normal(stddev=1.0)])
Exemple #4
0
def RHN(input_dim, hidden_dim, depth):
    # Wrapped model
    inp = Input(batch_shape=(batch_size, input_dim))
    state = Input(batch_shape=(batch_size, hidden_dim))
    drop_mask = Input(batch_shape=(batch_size, hidden_dim))
    # To avoid all zero mask causing gradient to vanish
    inverted_drop_mask = Lambda(lambda x: 1.0 - x,
                                output_shape=lambda s: s)(drop_mask)
    drop_mask_2 = Lambda(lambda x: x + 0.,
                         output_shape=lambda s: s)(inverted_drop_mask)
    dropped_state = multiply([state, inverted_drop_mask])
    y, new_state = RHNCell(
        units=hidden_dim,
        recurrence_depth=depth,
        kernel_initializer=weight_init,
        kernel_regularizer=l2(weight_decay),
        kernel_constraint=max_norm(gradient_clip),
        bias_initializer=Constant(transform_bias),
        recurrent_initializer=weight_init,
        recurrent_regularizer=l2(weight_decay),
        recurrent_constraint=max_norm(gradient_clip))([inp, dropped_state])
    return RecurrentModel(input=inp,
                          output=y,
                          initial_states=[state, drop_mask],
                          final_states=[new_state, drop_mask_2])
Exemple #5
0
def test_model():
    x = Input((5, ))
    h_tm1 = Input((10, ))
    h = add([Dense(10)(x), Dense(10, use_bias=False)(h_tm1)])
    h = Activation('tanh')(h)
    a = Input((7, 5))

    rnn = RecurrentModel(input=x,
                         output=h,
                         initial_states=h_tm1,
                         final_states=h)
    b = rnn(a)
    model = Model(a, b)

    model.compile(loss='mse', optimizer='sgd')
    model.fit(np.random.random((32, 7, 5)), np.random.random((32, 10)))
    model.predict(np.zeros((32, 7, 5)))
Exemple #6
0
def test_readout():
    x = Input((5, ))
    y_tm1 = Input((5, ))
    h_tm1 = Input((5, ))
    h = add([Dense(5)(add([x, y_tm1])), Dense(5, use_bias=False)(h_tm1)])
    h = Activation('tanh')(h)

    rnn = RecurrentModel(input=x,
                         initial_states=h_tm1,
                         output=h,
                         final_states=h,
                         readout_input=y_tm1)

    a = Input((7, 5))
    b = rnn(a)
    model = Model(a, b)

    model.compile(loss='mse', optimizer='sgd')
    model.fit(np.random.random((32, 7, 5)), np.random.random((32, 5)))
    model.predict(np.zeros((32, 7, 5)))
Exemple #7
0
    def build(self):

        patch = Input((self.patch_size, self.patch_width), name="InputPatch")
        memory_tm1 = Input(batch_shape=self.memory_shape_batch, name="Memory")
        memory_t = memory_tm1

        # conv = self.combine_nodes(patch, working_width)
        # first_node = Lambda(lambda x: x[:,:self.patch_data_width])(flat_patch)
        patch_without_memory_addr = Lambda(
            lambda x: x[:, :, :self.patch_data_width:])(patch)
        flat_patch = Reshape([self.patch_size * self.patch_data_width
                              ])(patch_without_memory_addr)

        working_memory = Dense(self.working_width,
                               activation='relu')(flat_patch)
        # conv = self.combine_nodes(patch, self.working_width)
        # working_memory = concatenate([working_memory, conv])
        # working_memory = Dense(self.working_width, activation='relu')(working_memory)

        pre_memory = working_memory

        use_memory = False

        if use_memory:
            # ------- Memory operations --------- #

            primary_address = Lambda(
                lambda x: x[:, 3, self.patch_data_width:])(patch)
            print(primary_address)

            address = self.generate_address(primary_address,
                                            patch,
                                            name="address_read1")
            read1 = self.read(memory_t, address)

            # Turn batch dimension from None to batch_size
            batched_working_memory = Lambda(lambda x: K.reshape(
                x, [self.batch_size, self.working_width]))(working_memory)
            batched_working_memory = concatenate(
                [batched_working_memory, read1], batch_size=self.batch_size)

            batched_working_memory = Dense(
                self.working_width, activation='relu')(batched_working_memory)

            erase_word = Dense(self.word_size,
                               name="DenseEraseWord",
                               activation='relu')(batched_working_memory)
            # address = self.generate_address(batched_working_memory, patch, name="address_erase")
            erase_word = Lambda(lambda x: K.ones_like(x))(erase_word)
            memory_t = self.erase(memory_t, primary_address, erase_word)

            write_word = Dense(self.word_size,
                               name="DenseWriteWord",
                               activation='relu')(batched_working_memory)
            # address = self.generate_address(batched_working_memory, patch, name="address_write")
            memory_t = self.write(memory_t, primary_address, write_word)

            # address = self.generate_address(batched_working_memory, patch, name="address_read2")
            # read2 = self.read(memory_t, address)

            # working_memory = concatenate([batched_working_memory, read1])
            working_memory = Dense(self.working_width,
                                   activation="relu")(batched_working_memory)

        return RecurrentModel(
            input=patch,
            output=working_memory,
            return_sequences=True,
            stateful=True,
            initial_states=[memory_tm1],
            final_states=[memory_t],
            state_initializer=[initializers.random_normal(stddev=1.0)])
Exemple #8
0
    from recurrentshop import *
    x_t = Input(shape=(5, ))  # The input to the RNN at time t
    h_tm1 = Input(shape=(20, ))  # Previous hidden state

    # Compute new hidden state
    h_t = add([Dense(20)(x_t), Dense(20, use_bias=False)(h_tm1)])

    # tanh activation
    h_t = Activation('tanh')(h_t)

    y_t = Dense(5, activation='softmax')(h_t)
    # Build the RNN
    # RecurrentModel is a standard Keras `Recurrent` layer.
    # RecurrentModel also accepts arguments such as unroll, return_sequences etc
    rnn = RecurrentModel(input=x_t,
                         initial_states=[h_tm1],
                         output=y_t,
                         final_states=[h_t])

    # return_sequences is False by default
    # so it only returns the last h_t state

    # Build a Keras Model using our RNN layer
    # input dimensions are (Time_steps, Depth)
    x = Input(shape=(4, 5))
    y = rnn(x)
    model = keras.models.Model(x, y)

    # Run the RNN over a random sequence
    # Don't forget the batch shape when calling the model!
    out = model.predict(np.random.random((1, 4, 5)))
Exemple #9
0
    def test_memory_rnn_gradient(self):

        # Data setup
        memory_size = 20
        word_size = 4
        batch_size = 1
        patch_size = 10
        patch_width = memory_size + 5
        sequence_length = 10

        header = ExperimentHeader(
            params={
                "word_size": word_size,
                "memory_size": memory_size,
                "patch_size": patch_size,
                "patch_width": patch_width
            })
        experiment = Experiment("test_memory_cell", header, Args(batch_size))

        pb = NTMBase(experiment)

        patch = Input((patch_size, patch_width), name="patch")
        memory_tm1 = Input((memory_size, word_size), name="memory")
        memory_t = memory_tm1

        flat_patch = Reshape((patch_size * patch_width, ))(patch)

        write_word = Dense(word_size)(flat_patch)
        erase_word = Dense(word_size)(flat_patch)

        ptr = Dense(patch_size)(flat_patch)
        address = pb.resolve_address(ptr, patch)
        memory_t = pb.erase(memory_t, address, erase_word)

        ptr = Dense(patch_size)(flat_patch)
        address = pb.resolve_address(ptr, patch)
        memory_t = pb.write(memory_t, address, write_word)

        ptr = Dense(patch_size)(flat_patch)
        address = pb.resolve_address(ptr, patch)
        read = pb.read(memory_t, address)

        out = Dense(3)(read)

        rnn = RecurrentModel(input=patch,
                             output=out,
                             initial_states=[memory_tm1],
                             final_states=[memory_t])
        a = Input((sequence_length, patch_size, patch_width), name="patch_seq")
        b = rnn(a)
        model = Model(a, b)
        model.compile(loss='mse', optimizer='sgd')

        model.fit(
            {
                "patch_seq":
                np.random.random(
                    (batch_size, sequence_length, patch_size, patch_width)),
                # "memory": np.random.random((batch_size, memory_size, word_size)),
            },
            np.random.random((batch_size, 3)))

        model.predict({
            "patch_seq":
            np.zeros((batch_size, sequence_length, patch_size, patch_width)),
            # "memory": np.zeros((batch_size, memory_size, word_size)),
        })
Exemple #10
0
att_scores = add([W_xt, W_ht])
att_mask = Activation(K.softmax, name='att_mask')(att_scores)

lstms_input = dot([att_mask, X_t], axes=(1, 1))

cells = [LSTMCell(attentive_lstm_dim) for _ in range(attentive_lstm_depth)]

lstms_output, h, c = lstms_input, h_tm1, c_tm1
for cell in cells:
    lstms_output, h, c = cell([lstms_output, h, c])

attentive_lstm = RecurrentModel(input=X_t,
                                output=lstms_output,
                                initial_states=[h_tm1, c_tm1],
                                final_states=[h, c],
                                readout_input=readout_input,
                                return_states=False,
                                return_sequences=True)

#--- Full Model ---#
fmap_seq = TimeDistributed(inception)(input_patche_seq)
lstm_out1 = attentive_lstm(fmap_seq)
lstm_out2 = LSTM(8, activation='tanh')(lstm_out1)
hazard = Dense(1, activation='linear')(lstm_out2)

model = Model(input_patche_seq, hazard)
model.compile(loss='mean_squared_error', optimizer='adadelta')
#model.compile(loss=partial_likelihood, optimizer='adadelta')
#----------------------------------------------------------------------
# Fit:
Exemple #11
0
# Readout input.
readout_input = layers.Input(shape=(100, ))

# Internal inputs for the LSTM cell.
last_state = layers.Input(shape=(100, ))
last_output = layers.Input(shape=(100, ))

# Create the LSTM layer.
fused_inputs = layers.concatenate([cell_input, readout_input])
lstm1_o, lstm1_h, lstm1_c = LSTMCell(100)(
    [cell_input, last_state, last_output])

# Build the RNN.
rnn = RecurrentModel(input=cell_input,
                     output=lstm1_o,
                     initial_states=[last_state, last_output],
                     final_states=[lstm1_h, lstm1_c],
                     readout_input=readout_input)

# Main sequence input.
sequence_input = layers.Input(shape=(50, 10))
# Initial readout input.
initial_readout = layers.Input(shape=(100, ))

rnn_output = rnn(sequence_input, initial_readout=initial_readout)

# Build the Keras model.
model = Model(inputs=[sequence_input, initial_readout], outputs=rnn_output)
opt = optimizers.SGD(lr=0.001, momentum=0.9)
model.compile(loss="mean_squared_error", optimizer=opt)
Exemple #12
0
def assemble_model_recurrent(input_shape,
                             num_filters,
                             num_classes,
                             normalization=LayerNorm,
                             norm_kwargs=None,
                             weight_norm=False,
                             num_outputs=1,
                             weight_decay=0.0005,
                             init='he_normal'):
    from recurrentshop import RecurrentModel
    assert (num_outputs == 1)

    if norm_kwargs is None:
        norm_kwargs = {}

    # Inputs
    model_input = Input(batch_shape=input_shape, name='model_input')
    input_t = Input(batch_shape=(input_shape[0], ) + input_shape[2:])
    hidden_input_t = Input(batch_shape=(input_shape[0], num_filters) +
                           input_shape[3:])

    # Common convolution kwargs.
    convolution_kwargs = {
        'filters': num_filters,
        'kernel_size': 3,
        'ndim': 2,
        'padding': 'same',
        'weight_norm': weight_norm,
        'kernel_initializer': init
    }

    # GRU input.
    x_t = Convolution(**convolution_kwargs,
                      kernel_regularizer=_l2(weight_decay),
                      activation='relu',
                      name=_unique('conv_x'))(input_t)
    if normalization is not None:
        x_t = normalization(**norm_kwargs)(x_t)

    # GRU block.
    gate_replace_x = Convolution(**convolution_kwargs,
                                 kernel_regularizer=_l2(weight_decay),
                                 activation='sigmoid',
                                 name=_unique('conv_gate_replace'))(x_t)
    #if normalization is not None:
    #gate_replace_x = normalization(**norm_kwargs)(gate_replace_x)
    gate_replace_h = Convolution(**convolution_kwargs,
                                 kernel_regularizer=_l2(weight_decay),
                                 activation='sigmoid',
                                 name=_unique('conv_gate_replace'))(
                                     hidden_input_t)
    #if normalization is not None:
    #gate_replace_h = normalization(**norm_kwargs)(gate_replace_h)
    gate_replace = merge_add([gate_replace_x, gate_replace_h])

    gate_read_x = Convolution(**convolution_kwargs,
                              kernel_regularizer=_l2(weight_decay),
                              activation='sigmoid',
                              name=_unique('conv_gate_read'))(x_t)
    #if normalization is not None:
    #gate_read_x = normalization(**norm_kwargs)(gate_read_x)
    gate_read_h = Convolution(**convolution_kwargs,
                              kernel_regularizer=_l2(weight_decay),
                              activation='sigmoid',
                              name=_unique('conv_gate_read'))(hidden_input_t)
    #if normalization is not None:
    #gate_read_h = normalization(**norm_kwargs)(gate_read_h)
    gate_read = merge_add([gate_read_x, gate_read_h])

    hidden_read_t = merge_multiply([gate_read, hidden_input_t])
    #if normalization is not None:
    #hidden_read_t = normalization(**norm_kwargs)(hidden_read_t)

    mix_t_pre = merge_concatenate([x_t, hidden_read_t], axis=1)
    mix_t = Convolution(**convolution_kwargs,
                        kernel_regularizer=_l2(weight_decay),
                        activation='tanh',
                        name=_unique('conv_mix'))(mix_t_pre)
    #if normalization is not None:
    #mix_t = normalization(**norm_kwargs)(mix_t)

    lambda_inputs = [mix_t, hidden_input_t, gate_replace]
    hidden_t = Lambda(function=lambda ins: ins[2] * ins[0] +
                      (1 - ins[2]) * ins[1],
                      output_shape=lambda x: x[0])(lambda_inputs)

    # GRU output.
    out_t = Convolution(**convolution_kwargs,
                        kernel_regularizer=_l2(weight_decay),
                        activation='relu',
                        name=_unique('conv_out'))(hidden_t)
    class_convolution_kwargs = copy.copy(convolution_kwargs)
    class_convolution_kwargs['filters'] = num_classes
    out_t = Convolution(**class_convolution_kwargs,
                        kernel_regularizer=_l2(weight_decay),
                        activation='linear',
                        name=_unique('conv_out'))(hidden_t)
    #if normalization is not None:
    #out_t = normalization(**norm_kwargs)(out_t)

    # Classifier.
    out_t = Permute((2, 3, 1))(out_t)
    if num_classes == 1:
        out_t = Activation('sigmoid')(out_t)
    else:
        out_t = Activation(_softmax)(out_t)
    out_t = Permute((3, 1, 2))(out_t)

    # Make it a recurrent block.
    #
    # NOTE: a bidirectional 'stateful' GRU has states passed between blocks
    # of the reverse path in non-temporal order. Only the forward pass is
    # stateful in sequential/temporal order.
    cobject = {LayerNorm.__name__: LayerNorm}
    output_layer = Bidirectional_(RecurrentModel(
        input=input_t,
        initial_states=[hidden_input_t],
        output=out_t,
        final_states=[hidden_t],
        stateful=True,
        return_sequences=True),
                                  merge_mode='sum',
                                  custom_objects=cobject)
    output_layer.name = 'output_0'
    model = Model(inputs=model_input, outputs=output_layer(model_input))
    return model
Exemple #13
0
from recurrentshop import RecurrentModel
from keras.models import Model
from keras.layers import *

x = Input((5, ))
h_tm1 = Input((10, ))
h = add([Dense(10)(x), Dense(10, use_bias=False)(h_tm1)])
h = Activation('tanh')(h)

a = Input((7, 5))

rnn = RecurrentModel(input=x, output=h, initial_states=h_tm1, final_states=h)
b = rnn(a)
model = Model(a, b)

model.compile(loss='mse', optimizer='sgd')
model.fit(np.random.random((32, 7, 5)), np.random.random((32, 10)))
model.predict(np.zeros((32, 7, 5)))

rnn = RecurrentModel(input=x,
                     output=h,
                     initial_states=h_tm1,
                     final_states=h,
                     state_initializer='random_normal')
b = rnn(a)
model = Model(a, b)

model.compile(loss='mse', optimizer='sgd')
model.fit(np.random.random((32, 7, 5)), np.random.random((32, 10)))
model.predict(np.zeros((32, 7, 5)))