def __init__(self, max_length, nr_hidden, dropout=0.0, L2=1e-4, activation='relu'):
     self.max_length = max_length
     self.model = Sequential()
     self.model.add(
         Dense(nr_hidden, name='attend1',
             init='he_normal', W_regularizer=l2(L2),
             input_shape=(nr_hidden,), activation='relu'))
     self.model.add(Dropout(dropout))
     self.model.add(Dense(nr_hidden, name='attend2',
         init='he_normal', W_regularizer=l2(L2), activation='relu'))
     self.model = TimeDistributed(self.model)
 def __init__(self, words, nr_hidden, L2=0.0, dropout=0.0):
     self.words = words
     self.model = Sequential()
     self.model.add(Dropout(dropout, input_shape=(nr_hidden*2,)))
     self.model.add(Dense(nr_hidden, name='compare1',
         init='he_normal', W_regularizer=l2(L2)))
     self.model.add(Activation('relu'))
     self.model.add(Dropout(dropout))
     self.model.add(Dense(nr_hidden, name='compare2',
                     W_regularizer=l2(L2), init='he_normal'))
     self.model.add(Activation('relu'))
     self.model = TimeDistributed(self.model)
class _Attention(object):
    def __init__(self, max_length, nr_hidden, dropout=0.0, L2=0.0, activation='relu'):
        self.max_length = max_length
        self.model = Sequential()
        self.model.add(Dropout(dropout, input_shape=(nr_hidden,)))
        self.model.add(
            Dense(nr_hidden, name='attend1',
                init='he_normal', W_regularizer=l2(L2),
                input_shape=(nr_hidden,), activation='relu'))
        self.model.add(Dropout(dropout))
        self.model.add(Dense(nr_hidden, name='attend2',
            init='he_normal', W_regularizer=l2(L2), activation='relu'))
        self.model = TimeDistributed(self.model)
    
    def __call__(self, sent1, sent2):
        def _outer(AB):
            att_ji = K.batch_dot(AB[1], K.permute_dimensions(AB[0], (0, 2, 1)))
            return K.permute_dimensions(att_ji,(0, 2, 1))
        return merge(
                [self.model(sent1), self.model(sent2)],
                mode=_outer,
                output_shape=(self.max_length, self.max_length))
class _Attention(object):
    def __init__(self, max_length, nr_hidden, dropout=0.0, L2=1e-4, activation='relu'):
        self.max_length = max_length
        self.model = Sequential()
        self.model.add(
            Dense(nr_hidden, name='attend1',
                init='he_normal', W_regularizer=l2(L2),
                input_shape=(nr_hidden,), activation='relu'))
        self.model.add(Dropout(dropout))
        self.model.add(Dense(nr_hidden, name='attend2',
            init='he_normal', W_regularizer=l2(L2), activation='relu'))
        self.model = TimeDistributed(self.model)
    
    def __call__(self, sent1, sent2):
        def _outer((A, B)):
            att_ji = T.batched_dot(B, A.dimshuffle((0, 2, 1)))
            return att_ji.dimshuffle((0, 2, 1))

        return merge(
                [self.model(sent1), self.model(sent2)],
                mode=_outer,
                output_shape=(self.max_length, self.max_length))
    trainable=False,
)(sent_ints)

sent_wv_dr = Dropout(drop_rate)(sent_wv)
sent_wa = bidir_gru(sent_wv_dr, sent_n_units, is_GPU)
sent_att_vec = AttentionWithContext()(sent_wa)
sent_att_vec_dr = Dropout(drop_rate)(sent_att_vec)
# skip connection
sent_added = SkipConnection()([sent_att_vec_dr, sent_wv_dr])
sent_encoder = Model(sent_ints, sent_added)

doc_ints = Input(shape=(
    docs_train.shape[1],
    docs_train.shape[2],
))
sent_att_vecs_dr = TimeDistributed(sent_encoder)(doc_ints)
doc_sa = bidir_gru(sent_att_vecs_dr, doc_n_units, is_GPU)
doc_att_vec = AttentionWithContext()(doc_sa)
doc_att_vec_dr = Dropout(drop_rate)(doc_att_vec)

doc_att_vec_dr = LeakyReLU(alpha=0.01)(doc_att_vec_dr)
doc_att_vec_dr = LeakyReLU(alpha=0.01)(doc_att_vec_dr)
preds = Dense(units=1)(doc_att_vec_dr)
model = Model(doc_ints, preds)

model.compile(loss='mean_squared_error',
              optimizer=my_optimizer,
              metrics=['mae'])

print('model compiled')
def Seq2Seq(output_dim, output_length, batch_input_shape=None,
            input_shape=None, batch_size=None, input_dim=None, input_length=None,
            hidden_dim=None, depth=1, broadcast_state=True, unroll=False,
            stateful=False, inner_broadcast_state=True, teacher_force=False,
            peek=False, dropout=0.):

    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by
    the inner_broadcast_state argument. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
    y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1), C)
    y(0) = LSTM(s0, C, C)
    Where s is the hidden state of the LSTM (h and c), and C is the context vector
    from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    dropout : Dropout probability in between layers.


    '''
    ''' 
        Below block is used for computing the shape - batch_input_shape=(batch_size, timesteps, data_dim) 
        batch_size creates a statefull LSTM while None makes it unstateful 
    '''
    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size,) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size,) + (input_length,) + (input_dim,)
        else:
            shape = (batch_size,) + (None,) + (input_dim,)
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    ''' 
        Sequential model :- https://keras.io/layers/recurrent/
        unroll - Nothing important 
        return_state -  Boolean. Whether to return the last state in addition to the output.

    '''
    encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state,
                                  unroll=unroll, stateful=stateful,
                                  return_states=broadcast_state)
    for _ in range(depth[0]):
        encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))

    ''' 
        TimeDistributed :- https://keras.io/layers/wrappers/
    '''
    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    ''' 
        Readout lets you feed the output of your RNN from the previous time step back to the current time step.
    '''
    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state, decode=True,
                                  output_length=output_length, unroll=unroll,
                                  stateful=stateful, teacher_force=teacher_force)

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim,
                                    batch_input_shape=(shape[0], output_dim)))




    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    encoded_seq = encoder(encoded_seq)
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = Trueoutput_dim
        inputs += [truth_tensor]


    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq, initial_state=states)
    
    model = Model(inputs, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder
    return model
Exemple #7
0
    def create_contextual_attention_model(self, returnEpEh=False):
        # 0, (Optional) Set the upper limit of GPU memory
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        set_session(tf.Session(config=config))

        # 1, Embedding the input and project the embeddings
        premise = Input(shape=(self.SentMaxLen, ), dtype='int32')
        hypothesis = Input(shape=(self.SentMaxLen, ), dtype='int32')
        embed_p = self.Embed(premise)  # [batchsize, Psize, Embedsize]
        embed_h = self.Embed(hypothesis)  # [batchsize, Hsize, Embedsize]
        EmbdProject = TimeDistributed(
            Dense(200,
                  activation='relu',
                  kernel_regularizer=l2(self.L2Strength),
                  bias_regularizer=l2(self.L2Strength)))
        embed_p = Dropout(self.DropProb)(
            EmbdProject(embed_p))  # [batchsize, Psize, units]
        embed_h = Dropout(self.DropProb)(
            EmbdProject(embed_h))  # [batchsize, Hsize, units]

        # 2, Encoder words with its surrounding context
        Encoder = Bidirectional(
            LSTM(units=200, dropout=self.DropProb, return_sequences=True))
        embed_p = Encoder(embed_p)
        embed_h = Encoder(embed_h)

        # 2, Score each words and calc score matrix Eph.
        F_p, F_h = embed_p, embed_h
        for i in range(2):  # Applying Decomposable Score Function
            scoreF = TimeDistributed(
                Dense(200,
                      activation='relu',
                      kernel_regularizer=l2(self.L2Strength),
                      bias_regularizer=l2(self.L2Strength)))
            F_p = Dropout(self.DropProb)(
                scoreF(F_p))  # [batch_size, Psize, units]
            F_h = Dropout(self.DropProb)(
                scoreF(F_h))  # [batch_size, Hsize, units]
        Eph = keras.layers.Dot(axes=(2, 2))([F_h, F_p
                                             ])  # [batch_size, Hsize, Psize]
        Eh = Lambda(lambda x: keras.activations.softmax(x))(
            Eph)  # [batch_size, Hsize, Psize]
        Ep = keras.layers.Permute((2, 1))(Eph)  # [batch_size, Psize, Hsize)
        Ep = Lambda(lambda x: keras.activations.softmax(x))(
            Ep)  # [batch_size, Psize, Hsize]

        # 4, Normalize score matrix, encoder premesis and get alignment
        PremAlign = keras.layers.Dot((2, 1))([Ep, embed_h])
        HypoAlign = keras.layers.Dot((2, 1))([Eh, embed_p])
        PremAlign = keras.layers.Concatenate()(
            [embed_p, PremAlign])  # [batch_size, Psize, 2*unit]
        HypoAlign = keras.layers.Concatenate()(
            [embed_h, HypoAlign])  # [batch_size, Hsize, 2*unit]
        Compresser = TimeDistributed(Dense(
            200,
            kernel_regularizer=l2(self.L2Strength),
            bias_regularizer=l2(self.L2Strength)),
                                     name='Compresser')
        PremAlign = Compresser(PremAlign)
        HypoAlign = Compresser(HypoAlign)

        # 5, Final biLST< Encoder
        Final = Bidirectional(LSTM(units=200, dropout=self.DropProb),
                              name='finaldecoer')  # [-1,2*units]
        final_p = Final(PremAlign)
        final_h = Final(HypoAlign)
        Final = keras.layers.Concatenate()([final_p, final_h])
        for i in range(2):
            Final = Dense(200, name='3dense_' + str(i),
                          activation='relu')(Final)
            Final = Dropout(self.DropProb)(Final)
            Final = BatchNormalization()(Final)

        # 6, Prediction by softmax
        Final = Dense(3, activation='softmax', name='judge')(Final)
        if returnEpEh:
            self.model = Model(inputs=[premise, hypothesis],
                               outputs=[Ep, Eh, Final])
        else:
            self.model = Model(inputs=[premise, hypothesis], outputs=Final)
Exemple #8
0
    def _build_network(self, network_input, network_output,
                       additional_network_outputs):
        cluster_counts = list(self.data_provider.get_cluster_counts())

        # The simple loss cluster NN requires a specific output: a list of softmax distributions
        # First in this list are all softmax distributions for k=k_min for each object, then for k=k_min+1 for each
        # object etc. At the end, there is the cluster count output.

        # First we get an embedding for the network inputs
        embeddings = self._get_embedding(network_input)

        # Reshape all embeddings to 1d vectors
        # embedding_shape = self._embedding_nn.model.layers[-1].output_shape
        # embedding_size = np.prod(embedding_shape[1:])
        embedding_shape = embeddings[0].shape
        embedding_size = int(str(np.prod(embedding_shape[1:])))
        embedding_reshaper = self._s_layer(
            'embedding_reshape', lambda name: Reshape(
                (1, embedding_size), name=name))
        embeddings_reshaped = [
            embedding_reshaper(embedding) for embedding in embeddings
        ]

        # # We need now the internal representation of the embeddings. This means we have to resize them.
        # embedding_internal_resizer = self._s_layer('internal_embedding_resize', lambda name: Dense(internal_embedding_size, name=name))
        # embeddings_reshaped = [embedding_internal_resizer(embedding) for embedding in embeddings_reshaped]
        # embedding_internal_resizer_act = LeakyReLU()
        # embeddings_reshaped = [embedding_internal_resizer_act(embedding) for embedding in embeddings_reshaped]

        # Merge all embeddings to one tensor
        embeddings_merged = self._s_layer(
            'embeddings_merge',
            lambda name: Concatenate(axis=1, name=name))(embeddings_reshaped)

        processed = embeddings_merged
        for i in range(len(self.__lstm_block_state_sizes)):
            lstm_block_state_size = self.__lstm_block_state_sizes[i] // 2 * 2

            if i > 0:
                # For the initial layer wo do not like to have a Dense layer (this should be included in the Embedding network)
                processed = BatchNormalization()(processed)
                processed = self._s_layer(
                    'INTERNAL_STATE_CHANGE{}'.format(i), lambda name:
                    TimeDistributed(Dense(lstm_block_state_size)))(processed)
                processed = LeakyReLU()(processed)

            for j in range(self.__lstm_block_size):
                tmp = self._s_layer(
                    'LSTM_proc_{}_{}'.format(i, j),
                    lambda name: Bidirectional(LSTM(lstm_block_state_size // 2,
                                                    return_sequences=True),
                                               name=name))(processed)
                processed = Add()([processed, tmp])

        # Split the tensor to seperate layers
        embeddings_processed = [
            self._s_layer('slice_{}'.format(i),
                          lambda name: slice_layer(processed, i, name))
            for i in range(len(network_input))
        ]

        # Create now two outputs: The cluster count and for each cluster count / object combination a softmax distribution.
        # These outputs are independent of each other, therefore it doesn't matter which is calculated first. Let us start
        # with the cluster count / object combinations.

        # First prepare some generally required layers
        layers = []
        for i in range(self.__output_dense_layers):
            layers += [
                self._s_layer(
                    'output_dense{}'.format(i),
                    lambda name: Dense(self.__output_dense_units, name=name)),
                self._s_layer('output_batch'.format(i),
                              lambda name: BatchNormalization(name=name)),
                LeakyReLU()
                # self._s_layer('output_relu'.format(i), lambda name: Activation(LeakyReLU(), name=name))
            ]
        cluster_softmax = {
            k: self._s_layer(
                'softmax_cluster_{}'.format(k),
                lambda name: Dense(k, activation='softmax', name=name))
            for k in cluster_counts
        }

        # Create now the outputs
        clusters_output = additional_network_outputs['clusters'] = {}
        for i in range(len(embeddings_processed)):
            embedding_proc = embeddings_processed[i]

            # Add the required layers
            for layer in layers:
                embedding_proc = layer(embedding_proc)

            input_clusters_output = clusters_output['input{}'.format(i)] = {}
            for k in cluster_counts:

                # Create now the required softmax distributions
                output_classifier = cluster_softmax[k](embedding_proc)
                input_clusters_output['cluster{}'.format(
                    k)] = output_classifier
                network_output.append(output_classifier)

        # Calculate the real cluster count
        assert self.__cluster_count_lstm_layers >= 1
        for i in range(self.__cluster_count_lstm_layers - 1):
            cluster_count = self._s_layer(
                'cluster_count_LSTM{}'.format(i),
                lambda name: Bidirectional(LSTM(
                    self.__cluster_count_lstm_units, return_sequences=True),
                                           name=name)(embeddings_merged))
            cluster_count = self._s_layer(
                'cluster_count_LSTM{}_batch'.format(i),
                lambda name: BatchNormalization(name=name))(cluster_count)
        cluster_count = self._s_layer(
            'cluster_count_LSTM_merge',
            lambda name: Bidirectional(LSTM(self.__cluster_count_lstm_units),
                                       name=name)(cluster_count))
        cluster_count = self._s_layer(
            'cluster_count_LSTM_merge_batch',
            lambda name: BatchNormalization(name=name))(cluster_count)
        for i in range(self.__cluster_count_dense_layers):
            cluster_count = self._s_layer(
                'cluster_count_dense{}'.format(i),
                lambda name: Dense(self.__cluster_count_dense_units, name=name
                                   ))(cluster_count)
            cluster_count = self._s_layer(
                'cluster_count_batch{}'.format(i),
                lambda name: BatchNormalization(name=name))(cluster_count)
            cluster_count = LeakyReLU()(cluster_count)
            # cluster_count = self._s_layer('cluster_count_relu{}'.format(i), lambda name: Activation(LeakyReLU(), name=name))(cluster_count)

        # The next layer is an output-layer, therefore the name must not be formatted
        cluster_count = self._s_layer(
            'cluster_count_output',
            lambda name: Dense(
                len(cluster_counts), activation='softmax', name=name),
            format_name=False)(cluster_count)
        additional_network_outputs['cluster_count_output'] = cluster_count

        network_output.append(cluster_count)

        return True
Exemple #9
0
def melody_ResNet_joint_add(options):
    num_output = int(45 * 2**(math.log(options.resolution, 2)) + 2)
    input = Input(shape=(options.input_size, options.num_spec, 1))

    block_1 = Conv2D(
        64,
        (3, 3),
        name="conv1_1",
        padding="same",
        kernel_initializer="he_normal",
        use_bias=False,
        kernel_regularizer=l2(1e-5),
    )(input)
    block_1 = BatchNormalization()(block_1)
    block_1 = LeakyReLU(0.01)(block_1)
    block_1 = Conv2D(
        64,
        (3, 3),
        name="conv1_2",
        padding="same",
        kernel_initializer="he_normal",
        use_bias=False,
        kernel_regularizer=l2(1e-5),
    )(block_1)

    block_2 = ResNet_Block(input=block_1, block_id=2, filterNum=128)
    block_3 = ResNet_Block(input=block_2, block_id=3, filterNum=192)
    block_4 = ResNet_Block(input=block_3, block_id=4, filterNum=256)

    block_4 = BatchNormalization()(block_4)
    block_4 = LeakyReLU(0.01)(block_4)
    block_4 = MaxPooling2D((1, 4))(block_4)
    block_4 = Dropout(0.5)(block_4)

    numOutput_P = 2 * block_4.shape[3]
    output = Reshape((options.input_size, numOutput_P))(block_4)

    output = Bidirectional(
        LSTM(256, return_sequences=True, recurrent_dropout=0.3,
             dropout=0.3))(output)
    output = TimeDistributed(Dense(num_output))(output)
    output = TimeDistributed(Activation("softmax"), name="output")(output)

    block_1 = MaxPooling2D((1, 4**4))(block_1)
    block_2 = MaxPooling2D((1, 4**3))(block_2)
    block_3 = MaxPooling2D((1, 4**2))(block_3)

    joint = concatenate([block_1, block_2, block_3, block_4])
    joint = Conv2D(
        256,
        (1, 1),
        padding="same",
        kernel_initializer="he_normal",
        use_bias=False,
        kernel_regularizer=l2(1e-5),
    )(joint)
    joint = BatchNormalization()(joint)
    joint = LeakyReLU(0.01)(joint)
    joint = Dropout(0.5)(joint)

    num_V = joint.shape[3] * 2
    output_V = Reshape((options.input_size, num_V))(joint)

    output_V = Bidirectional(
        LSTM(
            32,
            return_sequences=True,
            stateful=False,
            recurrent_dropout=0.3,
            dropout=0.3,
        ))(output_V)
    output_V = TimeDistributed(Dense(2))(output_V)
    output_V = TimeDistributed(Activation("softmax"))(output_V)

    output_NS = Lambda(lambda x: x[:, :, 0])(output)
    output_NS = Reshape((options.input_size, 1))(output_NS)
    output_S = Lambda(lambda x: 1 - x[:, :, 0])(output)
    output_S = Reshape((options.input_size, 1))(output_S)
    output_VV = concatenate([output_NS, output_S])

    output_V = add([output_V, output_VV])
    output_V = TimeDistributed(Activation("softmax"),
                               name="output_V")(output_V)

    model = Model(inputs=input, outputs=[output, output_V])
    return model
Exemple #10
0
# Build RNN model
model = Sequential()

# build LSTM RNN
model.add(
    LSTM(
        batch_input_shape=(BATCH_SIZE, TIMES_STEPS, INPUT_SIZE),
        output_dim=CELL_SIZE,
        # default is false only output at last time step
        # however when True model return output each time step
        return_sequences=True,
        # true if batch is related to next batch
        stateful=True))

# output layer
model.add(TimeDistributed(Dense(OUTPUT_SIZE)))

# We add metrics to get more results you want to see
adam = Adam(LR)
model.compile(
    optimizer=
    adam,  #can also use the default 'adam' with the quotes but cannot adjust learning rate
    loss='mse',
)

# training
print("training---------------")
for step in range(4001):
    # batch processing slicing from X_Train and Y_Train
    X_batch, Y_batch, xs = get_batch()
    cost = model.train_on_batch(X_batch, Y_batch)
                                       A_filt_sizes,
                                       Ahat_filt_sizes,
                                       R_filt_sizes,
                                       output_mode="error",
                                       return_sequences=True)
layer_config_base = prednet_base_dynamic.get_config()
layer_config_base["name"] = "prednet_dynamic"
prednet_dynamic = PredNet_dynamic(**layer_config_base)

errors_static = prednet_static(
    inputs_static)  # errors will be (batch_size, nt, nb_layers)
errors_dynamic = prednet_dynamic(inputs_dynamic)

# Error_static
errors_by_time_static = TimeDistributed(
    Dense(1, trainable=False),
    weights=[layer_loss_weights_static, np.zeros(1)],
    trainable=False)(errors_static)
errors_by_time_static = Flatten()(
    errors_by_time_static)  # will be (batch_size, nt)
final_errors_static = Dense(1,
                            weights=[time_loss_weights_static,
                                     np.zeros(1)],
                            trainable=False)(
                                errors_by_time_static)  # weight errors by time

# Error_dynamic
errors_by_time_dynamic = TimeDistributed(
    Dense(1, trainable=False),
    weights=[layer_loss_weights_dynamic,
             np.zeros(1)],
    trainable=False)(errors_dynamic)
Exemple #12
0
def acl_vgg(data, stateful):
    dcn = dcn_vgg()
    outs = TimeDistributed(dcn)(data)
    attention = TimeDistributed(
        MaxPooling2D((2, 2), strides=(2, 2), padding='same'))(outs)
    attention = TimeDistributed(
        Conv2D(64, (1, 1), padding='same', activation='relu'))(attention)
    attention = TimeDistributed(
        Conv2D(128, (3, 3), padding='same', activation='relu'))(attention)
    attention = TimeDistributed(
        MaxPooling2D((2, 2), strides=(2, 2), padding='same'))(attention)
    attention = TimeDistributed(
        Conv2D(64, (1, 1), padding='same', activation='relu'))(attention)
    attention = TimeDistributed(
        Conv2D(128, (3, 3), padding='same', activation='relu'))(attention)
    attention = TimeDistributed(
        Conv2D(1, (1, 1), padding='same', activation='sigmoid'))(attention)
    attention = TimeDistributed(UpSampling2D(4))(attention)

    # attention = TimeDistributed(Conv2D(256, (3, 3), padding='same', activation='relu'))(outs)
    # attention = TimeDistributed(Conv2D(128, (3, 3), padding='same', activation='relu'))(attention)
    # attention = TimeDistributed(Conv2D(1, (1, 1), padding='same', activation='sigmoid'))(attention)

    f_attention = TimeDistributed(Flatten())(attention)
    f_attention = TimeDistributed(RepeatVector(512))(f_attention)
    f_attention = TimeDistributed(Permute((2, 1)))(f_attention)
    f_attention = TimeDistributed(Reshape((32, 40, 512)))(f_attention)  #30
    m_outs = Multiply()([outs, f_attention])
    outs = Add()([outs, m_outs])

    outs = (ConvLSTM2D(filters=256,
                       kernel_size=(3, 3),
                       padding='same',
                       return_sequences=True,
                       stateful=stateful,
                       dropout=0.4))(outs)

    outs = TimeDistributed(
        Conv2D(1, (1, 1), padding='same', activation='sigmoid'))(outs)
    outs = TimeDistributed(UpSampling2D(4))(outs)
    attention = TimeDistributed(UpSampling2D(2))(attention)
    return [outs, outs, outs, attention, attention, attention]  #
class _Comparison(object):
    def __init__(self, words, nr_hidden, L2=0.0, dropout=0.0):
        self.words = words
        self.model = Sequential()
        self.model.add(Dropout(dropout, input_shape=(nr_hidden * 2, )))
        self.model.add(
            Dense(nr_hidden,
                  name='compare1',
                  init='he_normal',
                  W_regularizer=l2(L2)))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(dropout))
        self.model.add(
            Dense(nr_hidden,
                  name='compare2',
                  W_regularizer=l2(L2),
                  init='he_normal'))
        self.model.add(Activation('relu'))
        self.model = TimeDistributed(self.model)

    def __call__(self, sent, align, **kwargs):
        result = self.model(merge([sent, align],
                                  mode='concat'))  # Shape: (i, n)
        avged = GlobalAveragePooling1D()(result, mask=self.words)
        maxed = GlobalMaxPooling1D()(result, mask=self.words)
        merged = merge([avged, maxed])
        result = BatchNormalization()(merged)
        return result
# you can treat any model as if it were a layer, by calling it on a tensor.
x = Input(shape=(784, ))
y = Model(x)

#This can allow, for instance, to quickly create models that can process *sequences* of inputs. You could turn an image classification model into a video classification model, in just one line.

from keras.layers import TimeDistributed

# input tensor for sequences of 20 timesteps,
# each containing a 784-dimensional vector
input_sequences = Input(shape=(20, 784))

# this applies our previous model to every timestep in the input sequences.
# the output of the previous model was a 10-way softmax,
# so the output of the layer below will be a sequence of 20 vectors of size 10.
processed_sequences = TimeDistributed(model)(input_sequences)

## Multi-input and multi-output models

#The main input will receive the headline, as a sequence of integers (each integer encodes a word).
#The integers will be between 1 and 10,000 (a vocabulary of 10,000 words) and the sequences will be 100 words long.
#
#```python

from keras.layers import Input, Embedding, LSTM, Dense, merge
from keras.models import Model

# headline input: meant to receive sequences of 100 integers, between 1 and 10000.
# note that we can name any layer by passing it a "name" argument.
main_input = Input(shape=(100, ), dtype='int32', name='main_input')
########################
# Creating the items and the tokens
tokens_6 = [0, 0, 0, 0, 0, 1]

items_26 = [
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    1
]

#Botvinick and Plaut model
sr_input = Input(shape=(None, len(tokens_6) + 1), name="sr_input")
sr_lstm = LSTM(units=50, return_sequences=True, name="sr_lstm")(sr_input)
sr_output = TimeDistributed(
    Dense(
        units=len(tokens_6) + 1,
        activation="softmax",
        name="sr_output",
    ))(sr_lstm)

sr_model = Model(inputs=sr_input, outputs=[sr_output])

#Binding pool model
bp_item_input = Input(shape=(None, len(items_26)), name="bp_item_input")
bp_token_input = Input(shape=(None, len(tokens_6) + 1), name="bp_token_input")
bp_all_inputs = keras.layers.concatenate([bp_item_input, bp_token_input])
bp_lstm = LSTM(units=50, return_sequences=True, name="bp_lstm")(bp_all_inputs)
bp_output = TimeDistributed(
    Dense(units=len(items_26), activation="softmax",
          name="bp_output"), )(bp_lstm)

bp_model = Model(inputs=[bp_item_input, bp_token_input], outputs=[bp_output])
Exemple #16
0
print(len(train_X))
print(len(test_X))
train_X = np.array(train_X)
test_X = np.array(test_X)
train_label = np.array(train_label)
test_label = np.array(test_label)
# valid_label = np.array(valid_label)
# valid_X = np.array(valid_X)
train_X = train_X.reshape(train_X.shape[0], 1, 50, 1)
test_X = test_X.reshape(test_X.shape[0], 1, 50, 1)
model = Sequential()
#add model layers
model.add(
    TimeDistributed(
        Conv1D(128,
               kernel_size=1,
               activation='relu',
               input_shape=(None, 50, 1))))
model.add(TimeDistributed(MaxPooling1D(2)))
model.add(TimeDistributed(Conv1D(256, kernel_size=1, activation='relu')))
model.add(TimeDistributed(MaxPooling1D(2)))
model.add(TimeDistributed(Conv1D(512, kernel_size=1, activation='relu')))
model.add(TimeDistributed(MaxPooling1D(2)))
model.add(TimeDistributed(Flatten()))
model.add(Bidirectional(LSTM(200, return_sequences=True)))
model.add(Dropout(0.25))
model.add(Bidirectional(LSTM(200, return_sequences=False)))
model.add(Dropout(0.5))
model.add(Dense(1, activation='linear'))
model.compile(optimizer='RMSprop', loss='mse')
model.fit(train_X,
    def start_training(self):
        # data_en = self.load(self.english_train_file)
        # data_de = self.load(self.german_train_file)
        # val_data_en = self.load(self.english_val_file)
        # val_data_de = self.load(self.german_val_file)

        # train_input_data, train_target_data, val_input_data, val_target_data, embedding_matrix, vocab_size = self.preprocess_data(
        #    data_en, data_de, val_data_en, val_data_en)

        # if len(train_input_data) != len(train_target_data) or len(val_input_data) != len(val_target_data):
        #    print("length of input_data and target_data have to be the same")
        #    exit(-1)
        # num_samples = len(train_input_data)

        # print("Number of training data:", num_samples)
        # print("Number of validation data:", len(val_input_data))

        self.START_TOKEN_VECTOR = np.random.rand(self.params['EMBEDDING_DIM'])
        self.END_TOKEN_VECTOR = np.random.rand(self.params['EMBEDDING_DIM'])
        self.UNK_TOKEN_VECTOR = np.random.rand(self.params['EMBEDDING_DIM'])
        np.save(self.BASIC_PERSISTENCE_DIR + '/start_token_vector.npy', self.START_TOKEN_VECTOR)
        np.save(self.BASIC_PERSISTENCE_DIR + '/end_token_vector.npy', self.END_TOKEN_VECTOR)
        np.save(self.BASIC_PERSISTENCE_DIR + '/unk_token_vector.npy', self.UNK_TOKEN_VECTOR)

        self._split_count_data()

        M = Sequential()
        M.add(Embedding(self.params['MAX_WORDS'] + 3, self.params['EMBEDDING_DIM'], weights=[self.embedding_matrix],
                        mask_zero=True, trainable=False))

        M.add(LSTM(self.params['latent_dim'], return_sequences=True))

        M.add(Dropout(self.params['P_DENSE_DROPOUT']))

        M.add(
            LSTM(self.params['latent_dim'] * int(1 / self.params['P_DENSE_DROPOUT']), return_sequences=True))

        M.add(Dropout(self.params['P_DENSE_DROPOUT']))

        M.add(TimeDistributed(Dense(self.params['MAX_WORDS'] + 3,
                                    input_shape=(None, self.params['num_tokens'], self.params['MAX_WORDS'] + 3),
                                    activation='softmax')))

        print('compiling')

        M.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['accuracy'])
        M.summary()

        print('compiled')

        steps = 4
        mod_epochs = np.math.floor(self.num_samples / self.params['batch_size'] / steps * self.params['epochs'])
        tbCallBack = callbacks.TensorBoard(log_dir=self.GRAPH_DIR, histogram_freq=0, write_graph=True,
                                           write_images=True)
        modelCallback = callbacks.ModelCheckpoint(self.MODEL_CHECKPOINT_DIR + '/model.{epoch:02d}-{loss:.2f}.hdf5',
                                                  monitor='loss', verbose=1, save_best_only=False,
                                                  save_weights_only=True, mode='auto',
                                                  period=mod_epochs / self.params['epochs'])

        M.fit_generator(self.serve_batch(), steps, epochs=mod_epochs, verbose=2, max_queue_size=15,
                        callbacks=[tbCallBack, modelCallback])
        M.save(self.model_file)

        print('\n\n Test prediction:')
        print(self.input_texts[0])
        prediction = M.predict(self.input_texts[0])
        reverse_word_index = dict((i, word) for word, i in self.word_index.items())
        predicted_sentence = ''
        for sentence in prediction:
            for token in sentence:
                print(token)
                print(token.shape)
                max_idx = np.argmax(token)
                print(max_idx)
                if max_idx == 0:
                    print("id of max token = 0")
                    predicted_sentence += reverse_word_index[np.argmax(np.delete(token, max_idx))]
                else:
                    print(reverse_word_index[max_idx])
                    predicted_sentence += reverse_word_index[max_idx]
        print(predicted_sentence)
        print("\n\n")
        print(self.input_texts[10000])
        prediction = M.predict(self.input_texts[10000])
        reverse_word_index = dict((i, word) for word, i in self.word_index.items())
        predicted_sentence = ''
        for sentence in prediction:
            for token in sentence:
                print(token)
                print(token.shape)
                max_idx = np.argmax(token)
                print(max_idx)
                if max_idx == 0:
                    print("id of max token = 0")
                    predicted_sentence += reverse_word_index[np.argmax(np.delete(token, max_idx))]
                else:
                    print(reverse_word_index[max_idx])
                    predicted_sentence += reverse_word_index[max_idx]
        print(predicted_sentence)
def create_model(X_vocab_len, X_max_len, y_vocab_len, y_max_len, y1, n1, y2, n2, y3, n3, y4, n4, y5, n5, y6, n6, hidden_size, num_layers):

	def smart_merge(vectors, **kwargs):
			return vectors[0] if len(vectors)==1 else merge(vectors, **kwargs)		
	
	current_word = Input(shape=(X_max_len,), dtype='int32')
	right_word1 = Input(shape=(X_max_len,), dtype='int32')
	right_word2 = Input(shape=(X_max_len,), dtype='int32')
	right_word3 = Input(shape=(X_max_len,), dtype='int32')
	left_word1 = Input(shape=(X_max_len,), dtype='int32')
	left_word2 = Input(shape=(X_max_len,), dtype='int32')
	left_word3 = Input(shape=(X_max_len,), dtype='int32')

	emb_layer = Embedding(X_vocab_len, EMBEDDING_DIM, 
				input_length=X_max_len,
				mask_zero=True) 
	
	current_word_embedding = emb_layer(current_word) # POSITION of layer
	right_word_embedding1 = emb_layer(right_word1) # these are the left shifted X by 1
	right_word_embedding2 = emb_layer(right_word2) # left shifted by 2
	right_word_embedding3 = emb_layer(right_word3)

	left_word_embedding1 = emb_layer(left_word1) # these are the right shifted X by 1, i.e. the left word is at current position
	left_word_embedding2 = emb_layer(left_word2)
	left_word_embedding3 = emb_layer(left_word3)

	BidireLSTM_curr= Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(current_word_embedding)
	BidireLSTM_right1 = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(right_word_embedding1)
	BidireLSTM_right2 = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(right_word_embedding2)
	BidireLSTM_right3 = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(right_word_embedding3)
	BidireLSTM_left1 = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(left_word_embedding1)
	BidireLSTM_left2 = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(left_word_embedding2)
	BidireLSTM_left3 = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(left_word_embedding3)

	#att = AttentionWithContext()(BidireLSTM_curr)
	#print(att.shape)
	RepLayer= RepeatVector(y_max_len)
	RepVec= RepLayer(BidireLSTM_curr)
	Emb_plus_repeat=[current_word_embedding]
	Emb_plus_repeat.append(RepVec)
	Emb_plus_repeat = smart_merge(Emb_plus_repeat, mode='concat')
	
	
	for _ in range(num_layers):
		LtoR_LSTM = Bidirectional(LSTM(40, dropout=dropout, return_sequences=True))
		temp = LtoR_LSTM(Emb_plus_repeat)
	
	# for each time step in the input, we intend to output |y_vocab_len| time steps
	time_dist_layer = TimeDistributed(Dense(y_vocab_len))(temp)
	outputs = Activation('softmax')(time_dist_layer)
	
	# Only for the tags prediction, will we be requiring the context words
	concatenated_encodings = [BidireLSTM_curr]
	concatenated_encodings.append(BidireLSTM_left1)
	concatenated_encodings.append(BidireLSTM_right1)
	concatenated_encodings.append(BidireLSTM_left2)
	concatenated_encodings.append(BidireLSTM_right2)
	concatenated_encodings.append(BidireLSTM_left3)
	concatenated_encodings.append(BidireLSTM_right3)

	concatenated_encodings = smart_merge(concatenated_encodings, mode='concat')

	#att2 = AttentionWithContext()(concatenated_encodings)

	RepVec= RepLayer(concatenated_encodings)
	Emb_plus_repeat=[current_word_embedding]
	Emb_plus_repeat.append(RepVec)
	Emb_plus_repeat = smart_merge(Emb_plus_repeat, mode='concat')
	
	BidireLSTM_vector = Bidirectional(LSTM(40, dropout=dropout, return_sequences=False))(Emb_plus_repeat)
	
	out1 = Dense(n1, activation='softmax')(BidireLSTM_vector)	
	out2 = Dense(n2, activation='softmax')(BidireLSTM_vector)	
	out3 = Dense(n3, activation='softmax')(BidireLSTM_vector)	
	out4 = Dense(n4, activation='softmax')(BidireLSTM_vector)	
	out5 = Dense(n5, activation='softmax')(BidireLSTM_vector)	
	out6 = Dense(n6, activation='softmax')(BidireLSTM_vector)	

	all_inputs = [current_word, right_word1, right_word2, right_word3, left_word1, left_word2, left_word3]
	all_outputs = [outputs, out1, out2, out3, out4, out5, out6]

	model = Model(input=all_inputs, output=all_outputs)
	opt = Adam()
	model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'], 
		loss_weights=[1., 1., 1., 1., 1., 1., 1.])
	
	return model
class _Comparison(object):
    def __init__(self, words, nr_hidden, L2=1e-6, dropout=0.2):
        self.words = words
        self.model = Sequential()
        self.model.add(Dense(nr_hidden, name='compare1',
            init='he_normal', W_regularizer=l2(L2),
            input_shape=(nr_hidden*2,)))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(dropout))
        self.model.add(Dense(nr_hidden, name='compare2',
                        W_regularizer=l2(L2), init='he_normal'))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(dropout))
        self.model = TimeDistributed(self.model)

    def __call__(self, sent, align, **kwargs):
        result = self.model(merge([sent, align], mode='concat')) # Shape: (i, n)
        result = _GlobalSumPooling1D()(result, mask=self.words)
        return result
Exemple #20
0
             activation=None,
             padding='same',
             dilation_rate=16)(od7)
bd8 = BatchNormalization()(cd8)
ad8 = Activation('relu')(bd8)
dd8 = Dropout(conv_dropout)(ad8)
od8 = concatenate([od7, dd8])

# Self-Attention Layer

# atn = MultiHead(AttentionDilated(attn_units, dilation_rate=attn_dilation), layer_num=attn_heads)(od4)
# dat = Dropout(attn_dropout)(atn)
# fat = TimeDistributed(Flatten())(dat)
# oat = concatenate([fat, od4])

out = TimeDistributed(Dense(3, activation='relu'))(od8)

# define model
model = Model(inputs=seqs, outputs=out)

seq_forward = Input(shape=(None, 4))
seq_revcomp = Lambda(rev_comp)(seq_forward)

output_forward = model(seq_forward)
output_revcomp = model(seq_revcomp)
output_back = Lambda(lambda x: K.reverse(x, -2))(output_revcomp)

model_bi = Model(inputs=seq_forward, outputs=[output_forward, output_back])

learning_rate = 0.002
beta_1 = 0.97
Exemple #21
0
# 모델에 양방향 LSTM을 사용, 모델의 출력층에 CRF층을 배치
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense, TimeDistributed, Dropout, Bidirectional
from keras_contrib.layers import CRF

model = Sequential()
model.add(
    Embedding(input_dim=n_words,
              output_dim=20,
              input_length=max_len,
              mask_zero=True))
model.add(
    Bidirectional(LSTM(units=50, return_sequences=True,
                       recurrent_dropout=0.1)))
model.add(TimeDistributed(Dense(50, activation="relu")))
crf = CRF(n_labels)
model.add(crf)

# In[30]:

from keras.utils import np_utils
y_train2 = np_utils.to_categorical(y_train)  # one-hot 인코딩

# In[31]:

pd.set_option('max_colwidth', 800)

# In[32]:

y_train2[0]
Exemple #22
0
words = Embedding(input_dim=wordEmbeddings.shape[0],
                  output_dim=wordEmbeddings.shape[1],
                  weights=[wordEmbeddings],
                  trainable=False)(words_input)
casing_input = Input(shape=(None, ), dtype='int32', name='casing_input')
casing = Embedding(output_dim=caseEmbeddings.shape[1],
                   input_dim=caseEmbeddings.shape[0],
                   weights=[caseEmbeddings],
                   trainable=False)(casing_input)
character_input = Input(shape=(
    None,
    52,
), name='char_input')
embed_char_out = TimeDistributed(Embedding(
    len(char2Idx),
    30,
    embeddings_initializer=RandomUniform(minval=-0.5, maxval=0.5)),
                                 name='char_embedding')(character_input)
dropout = Dropout(0.5)(embed_char_out)
conv1d_out = TimeDistributed(
    Conv1D(kernel_size=3,
           filters=30,
           padding='same',
           activation='tanh',
           strides=1))(dropout)
maxpool_out = TimeDistributed(MaxPooling1D(52))(conv1d_out)
char = TimeDistributed(Flatten())(maxpool_out)
char = Dropout(0.5)(char)
output = concatenate([words, casing, char])
output = Bidirectional(
    LSTM(200, return_sequences=True, dropout=0.50,
                   strides=30,
                   padding='valid',
                   activation='relu')(main_input)
#main_path = Conv1D(filters=7, kernel_size=2, strides = 2, padding='valid', activation = 'relu')(main_input)
main_path = concatenate([main_path, status_input], axis=-1)
main_path = LSTM(units=30, return_sequences=True)(main_path)
main_path = LSTM(units=15, return_sequences=True)(main_path)

#s path
s_path = Dense(units=30, activation='relu')(s_input)
s_path = Dense(units=15, activation='relu')(s_path)

#d path
d_path = d_input
d_path = concatenate([d_path, status_input], axis=-1)
d_path = TimeDistributed(Dense(units=30, activation='relu'))(d_path)
d_path = TimeDistributed(Dense(units=15, activation='relu'))(d_path)

#merge path
s_path = Reshape((1, K.int_shape(s_path)[1]))(s_path)
s_paths = concatenate([s_path] * 24, axis=1)
merge_path = concatenate([main_path, s_paths, d_path], axis=-1)

merge_path = TimeDistributed(
    Dense(units=30, kernel_initializer='he_normal',
          activation='relu'))(merge_path)
merge_path = TimeDistributed(
    Dense(units=15, kernel_initializer='he_normal',
          activation='relu'))(merge_path)

output = TimeDistributed(
Exemple #24
0
    len(it_test) / (Nframes + 1)))
print('')
print('{} sequences = samples per batch'.format(batch_size))
print('{} batches per epoch'.format(spe))
print('{} epochs'.format(epochs))

#Defining the model
#ENCODER-DECODER + LSTM - TIME_DISTRIBUTED

#Initializing the CNN
model = Sequential()

#Adding layers.
#First CNV layer
model.add(
    TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding='same'),
                    input_shape=(None, 128, 128, 1)))
model.add(BatchNormalization())
#Pooling
model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))

##Second CNV layer
model.add(
    TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding='same')))
model.add(BatchNormalization())
##Pooling
model.add(TimeDistributed(MaxPool2D(pool_size=(2, 2))))

##Third CNV layer
model.add(
    TimeDistributed(Conv2D(128, (3, 3), activation='relu', padding='same')))
model.add(BatchNormalization())
Exemple #25
0
# define input sequence
in_seq1 = array([10, 20, 30, 40, 50, 60, 70, 80, 90])
in_seq2 = array([15, 25, 35, 45, 55, 65, 75, 85, 95])
out_seq = array([in_seq1[i]+in_seq2[i] for i in range(len(in_seq1))])
# convert to [rows, columns] structure
in_seq1 = in_seq1.reshape((len(in_seq1), 1))
in_seq2 = in_seq2.reshape((len(in_seq2), 1))
out_seq = out_seq.reshape((len(out_seq), 1))
# horizontally stack columns
dataset = hstack((in_seq1, in_seq2, out_seq))
# choose a number of time steps
n_steps_in, n_steps_out = 3, 2
# covert into input/output
X, y = split_sequences(dataset, n_steps_in, n_steps_out)
# the dataset knows the number of features, e.g. 2
n_features = X.shape[2]
# define model
model = Sequential()
model.add(LSTM(200, activation='relu', input_shape=(n_steps_in, n_features)))
model.add(RepeatVector(n_steps_out))
model.add(LSTM(200, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(n_features)))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit(X, y, epochs=300, verbose=0)
# demonstrate prediction
x_input = array([[60, 65, 125], [70, 75, 145], [80, 85, 165], [70, 75, 145], [80, 85, 165], [90, 95, 185]])
x_input = x_input.reshape((2, n_steps_in, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)
Exemple #26
0
    def create_model(self):
        self._set_model_params()
        act = 'relu'
        input_data = Input(name='the_input',
                           shape=self.input_shape,
                           dtype='float32')
        inner = Convolution2D(self.conv_num_filters,
                              self.filter_size,
                              self.filter_size,
                              border_mode='same',
                              activation=act,
                              name='conv1')(input_data)

        inner = MaxPooling2D(pool_size=(self.pool_size_1, self.pool_size_1),
                             name='max1')(inner)
        inner = Convolution2D(self.conv_num_filters,
                              self.filter_size,
                              self.filter_size,
                              border_mode='same',
                              activation=act,
                              name='conv2')(inner)
        inner = MaxPooling2D(pool_size=(self.pool_size_2, self.pool_size_2),
                             name='max2')(inner)
        conv_to_rnn_dims = (int(
            (self.img_h /
             (self.pool_size_1 * self.pool_size_2)) * self.conv_num_filters),
                            int(self.img_w /
                                (self.pool_size_1 * self.pool_size_2)))
        inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner)
        inner = Permute(dims=(2, 1), name='permute')(inner)

        # cuts down input size going into RNN:
        inner = TimeDistributed(
            Dense(self.time_dense_size, activation=act, name='dense1'))(inner)

        # Two layers of bidirecitonal GRUs
        # GRU seems to work as well, if not better than LSTM:
        gru_1 = GRU(self.rnn_size, return_sequences=True, name='gru1')(inner)
        gru_1b = GRU(self.rnn_size,
                     return_sequences=True,
                     go_backwards=True,
                     name='gru1_b')(inner)
        gru1_merged = merge([gru_1, gru_1b], mode='sum')
        gru_2 = GRU(self.rnn_size, return_sequences=True,
                    name='gru2')(gru1_merged)
        gru_2b = GRU(self.rnn_size, return_sequences=True,
                     go_backwards=True)(gru1_merged)

        # transforms RNN output to character activations:
        inner = TimeDistributed(Dense(self.output_size,
                                      name='dense2'))(merge([gru_2, gru_2b],
                                                            mode='concat'))

        y_pred = Activation('softmax', name='softmax')(inner)
        # Model(input=[input_data], output=y_pred).summary()
        labels = Input(name='the_labels',
                       shape=[self.absolute_max_string_len],
                       dtype='float32')
        input_length = Input(name='input_length', shape=[1], dtype='int64')
        label_length = Input(name='label_length', shape=[1], dtype='int64')
        # Keras doesn't currently support loss funcs with extra parameters
        # so CTC loss is implemented in a lambda layer
        loss_out = Lambda(ctc_lambda_func, output_shape=(1, ), name="ctc")(
            [y_pred, labels, input_length, label_length])
        lr = 0.03
        # clipnorm seems to speeds up convergence
        clipnorm = 5
        sgd = SGD(lr=lr,
                  decay=3e-7,
                  momentum=0.9,
                  nesterov=True,
                  clipnorm=clipnorm)
        model = Model(input=[input_data, labels, input_length, label_length],
                      output=[loss_out])
        # model.summary()
        # the loss calc occurs elsewhere, so use a dummy lambda func for the loss
        if self.weight_file is not None:
            model.load_weights(self.weight_file)

        model.compile(loss={
            'ctc': lambda y_true, y_pred: y_pred
        },
                      optimizer=sgd)
        self.model = model

        self._predictor = K.function([input_data], [y_pred])

        return model
Exemple #27
0
 n_in_seq_length = len(list(X_train[0]))
 n_out_seq_length = len(list(y_train[0]))
 # print(n_in_seq_length,n_out_seq_length) # 24 1
 print(np.shape(X_train), np.shape(y_train))
 # create LSTM
 model = Sequential()
 model.add(
     LSTM(150, batch_input_shape=(None, n_in_seq_length, encoded_length))
 )  #encoder 150即隐含层节点数 = 输出维度,encoded_length即输入维度,n_in_seq_length即输入步长
 model.add(Dropout(0.2))
 model.add(RepeatVector(n_out_seq_length))
 model.add(LSTM(150, return_sequences=True))  #decoder
 model.add(Dropout(0.2))
 model.add(LSTM(150, return_sequences=True))  #decoder
 model.add(Dropout(0.3))
 model.add(TimeDistributed(Dense(encoded_length, activation='softmax')))
 model.compile(loss='categorical_crossentropy',
               optimizer='adam',
               metrics=['accuracy'])
 # show model
 print(model.summary())
 # train LSTM
 history = model.fit(X_train,
                     y_train,
                     epochs=50,
                     batch_size=50,
                     validation_split=0.05,
                     shuffle=False,
                     verbose=2)
 # save model
 model.save('../model/seq2seq_code.h5')
Exemple #28
0
from keras_contrib.layers import CRF

words_input = Input(shape=(None, ), dtype='int32', name='words_input')
words = Embedding(input_dim=wordEmbeddings.shape[0],
                  output_dim=wordEmbeddings.shape[1],
                  weights=[wordEmbeddings],
                  trainable=False)(words_input)
output = Bidirectional(
    LSTM(200, return_sequences=True, dropout=0.50,
         recurrent_dropout=0.25))(words)

outputx = Bidirectional(
    LSTM(200, return_sequences=True, dropout=0.50,
         recurrent_dropout=0.25))(output)

outputxx = TimeDistributed(Dense(50, activation="relu"))(
    outputx)  # a dense layer as suggested by neuralNer
crf = CRF(9)  # CRF layer
out = crf(outputxx)

model = Model(inputs=words_input, outputs=out)
model.compile(optimizer="adam", loss=crf.loss_function, metrics=[crf.accuracy])
model.summary()


def compute_f1(predictions, correct, idx2Label):
    label_pred = []
    for sentence in predictions:
        label_pred.append([idx2Label[element] for element in sentence])

    label_correct = []
    for sentence in correct:
Exemple #29
0
    conv_model_single_image = mlp_block(
        conv_model_single_image,
        number_of_neurons_per_layer_convolutional_model)

#Decide whether to add an Average Layer on top of convolutional model to effectively have
#the model predict optical flow (Mutually exclusive to dense output above!):
flag_use_average_layer_on_top_of_convolutional_model = 0
if flag_use_average_layer_on_top_of_convolutional_model == 1:
    conv_model_single_image = Average(conv_model_single_image)

#TimeDistributed:
#(1). take Conv2D model and actually make it a "Model" according to the functional API
conv_model_single_image_as_model = Model(inputs=[image_input],
                                         outputs=[conv_model_single_image])
#(2). make it time distributed or bidirectional(TimeDistributed)
conv_model_time_distributed = TimeDistributed(
    conv_model_single_image_as_model)(image_inputs)
#(3). after TimeDistributed we have a tensor of shape (batch_size,number_of_time_steps,single_model_output)
#     so i need to add a top layer to output something of desired shape:
conv_model_time_distributed = Flatten()(conv_model_time_distributed)
conv_model_time_distributed = Dense(2)(conv_model_time_distributed)
#(3). make the whole thing, after TimeDistributed, a Model according to the functional API:
#K.set_learning_phase(0)
conv_model_time_distributed = Model(inputs=[image_inputs],
                                    outputs=[conv_model_time_distributed])
conv_model_time_distributed._uses_learning_phase = True  #for learning=True, for testing = False

#Visualize Model:
if flag_plot_model == 1:
    keras.utils.plot_model(conv_model_single_image_as_model)
    keras.utils.vis_utils.plot_model(conv_model_single_image_as_model)
    from IPython.display import SVG
    return word2ind[c]

all_x,X,y =createData(f_test)
X_test_enc = [[checkForUnk(c) for c in x] for x in X]
X_test = pad_sequences(X_test_enc, maxlen=maxlen)

# ------------------------------- defining the model -------------------------------------------------------------------
max_features = len(word2ind)
embedding_size = 500
hidden_size = 150
out_size = len(label2ind) + 1

model = Sequential()
model.add(Embedding(max_features, embedding_size, input_length=maxlen, mask_zero=True))
model.add(Bidirectional(LSTM(hidden_size, return_sequences=True)))
model.add(TimeDistributed(Dense(out_size)))
model.add(Activation('softmax'))

model.compile(loss='binary_crossentropy', optimizer='adam')

# ------------------------------- training and saving the model -------------------------------------------------------------------
batch_size = 32
model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=15, validation_data=(X_val,y_val))
model.save("../model/bilstm",True,True)

# ------------------------------- evaluating the model -------------------------------------------------------------------
def score(yh, pr):
    coords = [np.where(yhh > 0)[0][0] for yhh in yh]
    yh = [yhh[co:] for yhh, co in zip(yh, coords)]
    ypr = [prr[co:] for prr, co in zip(pr, coords)]
    fyh = [c for row in yh for c in row]
Exemple #31
0
    def model_init(self, args):
        """ Build a deep network for speech
        """
        # Main acoustic input
        self.inputs = Input(name='the_input',
                            shape=(None, self.args.num_features))
        # Specify the layers in your network
        if self.args.is_brnn == '123':
            if self.args.rnn_celltype == 'gru':
                for i in range(self.args.num_layers):
                    if i == 0:
                        bidir_rnn = Bidirectional(
                            GRU(self.args.hidden_size,
                                activation=self.args.activation,
                                return_sequences=True,
                                implementation=2,
                                name='bidir' + str(i)),
                            merge_mode='concat')(self.inputs)
                        bn_rnn = BatchNormalization()(bidir_rnn)
                        dropout_rnn = Dropout(rate=self.args.keep_prob)(bn_rnn)
                    else:
                        bidir_rnn = Bidirectional(
                            GRU(self.args.hidden_size,
                                activation=self.args.activation,
                                return_sequences=True,
                                implementation=2,
                                name='bidir' + str(i)),
                            merge_mode='concat')(dropout_rnn)
                        bn_rnn = BatchNormalization()(bidir_rnn)
                        dropout_rnn = Dropout(rate=self.args.keep_prob)(bn_rnn)
            elif self.args.rnn_celltype == 'lstm':
                for i in range(self.args.num_layers):
                    if i == 0:
                        bidir_rnn = Bidirectional(
                            LSTM(self.args.hidden_size,
                                 return_sequences=True,
                                 name='bidir' + str(i)))(self.inputs)
                        bn_rnn = BatchNormalization()(bidir_rnn)
                        dropout_rnn = Dropout(rate=self.args.keep_prob)(bn_rnn)
                    else:
                        bidir_rnn = Bidirectional(
                            LSTM(self.args.hidden_size,
                                 return_sequences=True,
                                 name='bidir' + str(i)))(dropout_rnn)
                        bn_rnn = BatchNormalization()(bidir_rnn)
                        dropout_rnn = Dropout(rate=self.args.keep_prob)(bn_rnn)
        else:
            if self.args.rnn_celltype == 'gru':
                for i in range(self.args.num_layers):
                    if i == 0:
                        bidir_rnn = GRU(self.args.hidden_size,
                                        return_sequences=True,
                                        name='gru' + str(i))(self.inputs)
                        bn_rnn = BatchNormalization()(bidir_rnn)
                        dropout_rnn = Dropout(rate=self.args.keep_prob)(bn_rnn)
                    else:
                        bidir_rnn = GRU(self.args.hidden_size,
                                        return_sequences=True,
                                        name='gru' + str(i))(dropout_rnn)
                        bn_rnn = BatchNormalization()(bidir_rnn)
                        dropout_rnn = Dropout(rate=self.args.keep_prob)(bn_rnn)
            elif self.args.rnn_celltype == 'lstm':
                bidir_rnn = self.make_residual_lstm_layers(
                    self.inputs, self.args.hidden_size, self.args.num_layers,
                    self.args.keep_prob)

        # self.outputs = Dense(self.args.num_classes)(dropout_rnn)
        # Specify the model
        # self.model = Model(inputs= self.inputs, outputs=self.outputs)
        time_dense = TimeDistributed(Dense(self.args.num_classes))(bidir_rnn)
        # Add softmax activation layer
        y_pred = Activation('softmax', name='softmax')(time_dense)
        # Specify the model
        self.model_1 = Model(inputs=self.inputs, outputs=y_pred)
        # Specify model.output_length
        self.model_1.output_length = lambda x: x
Exemple #32
0
    def create_standard_attention_model(self, test_mode=False):
        ''' This model is Largely based on [A Decomposable Attention Model, Ankur et al.] '''
        # 0, (Optional) Set the upper limit of GPU memory
        config = tf.ConfigProto()
        config.gpu_options.per_process_gpu_memory_fraction = 0.2
        set_session(tf.Session(config=config))

        # 1, Embedding the input and project the embeddings
        premise = Input(shape=(self.SentMaxLen, ), dtype='int32')
        hypothesis = Input(shape=(self.SentMaxLen, ), dtype='int32')
        embed_p = self.Embed(premise)  # [batchsize, Psize, Embedsize]
        embed_h = self.Embed(hypothesis)  # [batchsize, Hsize, Embedsize]
        EmbdProject = TimeDistributed(
            Dense(200,
                  activation='relu',
                  kernel_regularizer=l2(self.L2Strength),
                  bias_regularizer=l2(self.L2Strength)))
        embed_p = Dropout(self.DropProb)(
            EmbdProject(embed_p))  # [batchsize, Psize, units]
        embed_h = Dropout(self.DropProb)(
            EmbdProject(embed_h))  # [batchsize, Hsize, units]

        # 2, Score each embeddings and calc score matrix Eph.
        F_p, F_h = embed_p, embed_h
        for i in range(2):  # Applying Decomposable Score Function
            scoreF = TimeDistributed(
                Dense(200,
                      activation='relu',
                      kernel_regularizer=l2(self.L2Strength),
                      bias_regularizer=l2(self.L2Strength)))
            F_p = Dropout(self.DropProb)(
                scoreF(F_p))  # [batch_size, Psize, units]
            F_h = Dropout(self.DropProb)(
                scoreF(F_h))  # [batch_size, Hsize, units]
        Eph = keras.layers.Dot(axes=(2, 2))([F_p, F_h
                                             ])  # [batch_size, Psize, Hsize]

        # 3, Normalize score matrix and get alignment
        Ep = Lambda(lambda x: keras.activations.softmax(x))(
            Eph)  # [batch_size, Psize, Hsize]
        Eh = keras.layers.Permute((2, 1))(Eph)  # [batch_size, Hsize, Psize)
        Eh = Lambda(lambda x: keras.activations.softmax(x))(
            Eh)  # [batch_size, Hsize, Psize]
        PremAlign = keras.layers.Dot((2, 1))([Ep, embed_h])
        HypoAlign = keras.layers.Dot((2, 1))([Eh, embed_p])

        # 4, Concat original and alignment, score each pair of alignment
        PremAlign = keras.layers.concatenate(
            [embed_p, PremAlign])  # [batch_size, PreLen, 2*Size]
        HypoAlign = keras.layers.concatenate([embed_h, HypoAlign
                                              ])  # [batch_size, Hypo, 2*Size]
        for i in range(2):
            scoreG = TimeDistributed(
                Dense(200,
                      activation='relu',
                      kernel_regularizer=l2(self.L2Strength),
                      bias_regularizer=l2(self.L2Strength)))
            PremAlign = scoreG(PremAlign)  # [batch_size, Psize, units]
            HypoAlign = scoreG(HypoAlign)  # [batch_size, Hsize, units]
            PremAlign = Dropout(self.DropProb)(PremAlign)
            HypoAlign = Dropout(self.DropProb)(HypoAlign)

        # 5, Sum all these scores, and make final judge according to sumed-score
        SumWords = Lambda(lambda X: K.reshape(K.sum(X, axis=1, keepdims=True),
                                              (-1, 200)))
        V_P = SumWords(PremAlign)  # [batch_size, 512]
        V_H = SumWords(HypoAlign)  # [batch_size, 512]
        final = keras.layers.concatenate([V_P, V_H])
        for i in range(2):
            final = Dense(200,
                          activation='relu',
                          kernel_regularizer=l2(self.L2Strength),
                          bias_regularizer=l2(self.L2Strength))(final)
            final = Dropout(self.DropProb)(final)
            final = BatchNormalization()(final)

        # 6, Prediction by softmax
        final = Dense(3, activation='softmax')(final)
        if test_mode:
            self.model = Model(inputs=[premise, hypothesis],
                               outputs=[Ep, Eh, final])
        else:
            self.model = Model(inputs=[premise, hypothesis], outputs=final)
def final_model(input_dim,
                filters=50,
                kernel_size=5,
                units=200,
                output_dim=29,
                recur_layers=3,
                activation='relu',
                dropout_rate=0.1):
    """ Build a deep network for speech
    """
    conv_stride = 1
    conv_border_mode = 'same'
    # Main acoustic input
    # we add a dimension, to be able to apply convolution1d to frequencies only
    input_data = Input(name='the_input', shape=(None, input_dim))
    # applying convolution to frequency domain -> allowing to model spectral variance due to speaker change (better than fullly connected because it preserve orders of frequencies)

    conv_0 = Conv1D(filters,
                    1,
                    strides=1,
                    padding=conv_border_mode,
                    activation='relu')(input_data)
    conv_0 = BatchNormalization()(conv_0)
    conv_1 = Conv1D(filters,
                    3,
                    strides=1,
                    padding=conv_border_mode,
                    activation='relu')(conv_0)
    conv_1 = BatchNormalization()(conv_1)
    conv_2 = Conv1D(filters,
                    1,
                    strides=1,
                    padding=conv_border_mode,
                    activation='relu')(conv_1)
    conv_2 = BatchNormalization()(conv_2)

    rnn_input = conv_2
    #units = filters//4
    #rnn_input = input_data
    for num in range(recur_layers):
        rnn_name = 'rnn_{}'.format(num)
        #  TODO: en ajoutant un dropout en input
        rnn_input = Dropout(dropout_rate)(rnn_input)
        # TODO: tester avec LSTM
        # TODO: tester avec activation ='elu' ou tanh?
        # TODO: tester avec un clipped
        simp_rnn = LSTM(units,
                        activation='tanh',
                        return_sequences=True,
                        implementation=2,
                        name=rnn_name)
        rnn = Bidirectional(simp_rnn)(rnn_input)
        output = BatchNormalization()(rnn)
        # we set rnn_input to new output, thus allowing to chain rnn
        rnn_input = output

    # and finally we add a TimeDistributed
    time_dense = TimeDistributed(Dense(output_dim))(rnn_input)

    # TODO: Add softmax activation layer
    y_pred = Activation('softmax', name='softmax')(time_dense)
    # Specify the model
    model = Model(inputs=input_data, outputs=y_pred)
    # TODO: Specify model.output_length
    model.output_length = lambda x: x
    #model.output_length = lambda x: cnn_output_length( x, kernel_size, conv_border_mode, conv_stride)
    print(model.summary())
    return model
Exemple #34
0
    def create_enhanced_attention_model(self):
        # 0, (Optional) Set the upper limit of GPU memory
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        set_session(tf.Session(config=config))

        # 1, Embedding the input and project the embeddings
        premise = Input(shape=(self.SentMaxLen, ), dtype='int32')
        hypothesis = Input(shape=(self.SentMaxLen, ), dtype='int32')
        embed_p = self.Embed(premise)  # [batchsize, Psize, Embedsize]
        embed_h = self.Embed(hypothesis)  # [batchsize, Hsize, Embedsize]

        # 2, Encoder words with its surrounding context
        Encoder = Bidirectional(
            LSTM(units=300, dropout=self.DropProb, return_sequences=True))
        embed_p = Encoder(embed_p)
        embed_h = Encoder(embed_h)

        # 2, Score each words and calc score matrix Eph.
        F_p, F_h = embed_p, embed_h
        Eph = keras.layers.Dot(axes=(2, 2))([F_h, F_p
                                             ])  # [batch_size, Hsize, Psize]
        Eh = Lambda(lambda x: keras.activations.softmax(x))(
            Eph)  # [batch_size, Hsize, Psize]
        Ep = keras.layers.Permute((2, 1))(Eph)  # [batch_size, Psize, Hsize)
        Ep = Lambda(lambda x: keras.activations.softmax(x))(
            Ep)  # [batch_size, Psize, Hsize]

        # 4, Normalize score matrix, encoder premesis and get alignment
        PremAlign = keras.layers.Dot((2, 1))([Ep, embed_h])  # [-1, Psize, dim]
        HypoAlign = keras.layers.Dot((2, 1))([Eh, embed_p])  # [-1, Hsize, dim]
        mm_1 = keras.layers.Multiply()([embed_p, PremAlign])
        mm_2 = keras.layers.Multiply()([embed_h, HypoAlign])

        # ReshapeLayer = Lambda(lambda x: K.reshape(x, (-1, self.SentMaxLen, 600))) # Reshape handles batch_size
        # sb_1 = ReshapeLayer(embed_p - PremAlign)
        # sb_2 = ReshapeLayer(embed_h - HypoAlign)
        sb_1 = Lambda(lambda x: tf.subtract(x, PremAlign))(embed_p)
        sb_2 = Lambda(lambda x: tf.subtract(x, HypoAlign))(embed_h)

        PremAlign = keras.layers.Concatenate()([
            embed_p,
            PremAlign,
            sb_1,
            mm_1,
        ])  # [batch_size, Psize, 2*unit]
        HypoAlign = keras.layers.Concatenate()(
            [embed_h, HypoAlign, sb_2, mm_2])  # [batch_size, Hsize, 2*unit]
        PremAlign = Dropout(self.DropProb)(PremAlign)
        HypoAlign = Dropout(self.DropProb)(HypoAlign)
        Compresser = TimeDistributed(Dense(
            300,
            kernel_regularizer=l2(self.L2Strength),
            bias_regularizer=l2(self.L2Strength)),
                                     name='Compresser')
        PremAlign = Compresser(PremAlign)
        HypoAlign = Compresser(HypoAlign)

        # 5, Final biLST < Encoder + Softmax Classifier
        Final = Bidirectional(LSTM(units=300,
                                   dropout=self.DropProb,
                                   return_sequences=True),
                              name='finaldecoer')  # [-1,2*units]
        final_p = Final(PremAlign)
        final_h = Final(HypoAlign)

        AveragePooling = Lambda(lambda x: K.mean(x, axis=1))  # outs [-1, dim]
        MaxPooling = Lambda(lambda x: K.max(x, axis=1))  # outs [-1, dim]
        avg_p = AveragePooling(final_p)
        avg_h = AveragePooling(final_h)
        max_p = MaxPooling(final_p)
        max_h = MaxPooling(final_h)
        Final = keras.layers.Concatenate()([avg_p, max_p, avg_h, max_h])
        Final = Dropout(self.DropProb)(Final)
        Final = Dense(512, name='dense512', activation='relu')(Final)
        Final = Dropout(self.DropProb)(Final)
        Final = Dense(256, name='dense256', activation='relu')(Final)
        Final = Dropout(self.DropProb)(Final)
        Final = Dense(3, activation='softmax', name='judge256')(Final)
        self.model = Model(inputs=[premise, hypothesis], outputs=Final)
Exemple #35
0
def Seq2Seq(output_dim,
            output_length,
            rnncell_type,
            batch_input_shape=None,
            input_shape=None,
            batch_size=None,
            input_dim=None,
            input_length=None,
            hidden_dim=None,
            depth=1,
            broadcast_state=True,
            unroll=False,
            stateful=False,
            inner_broadcast_state=True,
            teacher_force=False,
            peek=False,
            dropout=0.):
    '''
    Seq2seq model based on [1] and [2].
    This model has the ability to transfer the encoder hidden state to the decoder's
    hidden state(specified by the broadcast_state argument). Also, in deep models
    (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by
    the inner_broadcast_state argument. You can switch between [1] based model and [2]
    based model using the peek argument.(peek = True for [2], peek = False for [1]).
    When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

    [1] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
    y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

            Encoder:
            X = Input sequence
            C = LSTM(X); The context vector

            Decoder:
    y(t) = LSTM(s(t-1), y(t-1), C)
    y(0) = LSTM(s0, C, C)
    Where s is the hidden state of the LSTM (h and c), and C is the context vector
    from the encoder.

    Arguments:

    output_dim : Required output dimension.
    hidden_dim : The dimension of the internal representations of the model.
    output_length : Length of the required output sequence.
    depth : Used to create a deep Seq2seq model. For example, if depth = 3,
                    there will be 3 LSTMs on the enoding side and 3 LSTMs on the
                    decoding side. You can also specify depth as a tuple. For example,
                    if depth = (4, 5), 4 LSTMs will be added to the encoding side and
                    5 LSTMs will be added to the decoding side.
    broadcast_state : Specifies whether the hidden state from encoder should be
                                      transfered to the deocder.
    inner_broadcast_state : Specifies whether hidden states should be propogated
                                                    throughout the LSTM stack in deep models.
    peek : Specifies if the decoder should be able to peek at the context vector
               at every timestep.
    dropout : Dropout probability in between layers.


    '''

    if isinstance(depth, int):
        depth = (depth, depth)
    if batch_input_shape:
        shape = batch_input_shape
    elif input_shape:
        shape = (batch_size, ) + input_shape
    elif input_dim:
        if input_length:
            shape = (batch_size, ) + (input_length, ) + (input_dim, )
        else:
            shape = (batch_size, ) + (None, ) + (input_dim, )
    else:
        # TODO Proper error message
        raise TypeError
    if hidden_dim is None:
        hidden_dim = output_dim

    rnncell = rnncell_list[rnncell_type]
    encoder = RecurrentSequential(readout=True,
                                  state_sync=inner_broadcast_state,
                                  unroll=unroll,
                                  stateful=stateful,
                                  return_states=broadcast_state)
    for _ in range(depth[0]):
        encoder.add(
            rnncell(hidden_dim, batch_input_shape=(shape[0], hidden_dim)))
        encoder.add(Dropout(dropout))

    dense1 = TimeDistributed(Dense(hidden_dim))
    dense1.supports_masking = True
    dense2 = Dense(output_dim)

    decoder = RecurrentSequential(readout='add' if peek else 'readout_only',
                                  state_sync=inner_broadcast_state,
                                  decode=True,
                                  output_length=output_length,
                                  unroll=unroll,
                                  stateful=stateful,
                                  teacher_force=teacher_force)

    for _ in range(depth[1]):
        decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
        decoder.add(
            LSTMDecoderCell(output_dim=output_dim,
                            hidden_dim=hidden_dim,
                            batch_input_shape=(shape[0], output_dim)))

    _input = Input(batch_shape=shape)
    _input._keras_history[0].supports_masking = True
    encoded_seq = dense1(_input)
    encoded_seq = encoder(encoded_seq)
    if broadcast_state:
        assert type(encoded_seq) is list
        states = encoded_seq[-2:]
        encoded_seq = encoded_seq[0]
    else:
        states = None
    encoded_seq = dense2(encoded_seq)
    inputs = [_input]
    if teacher_force:
        truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
        truth_tensor._keras_history[0].supports_masking = True
        inputs += [truth_tensor]

    decoded_seq = decoder(encoded_seq,
                          ground_truth=inputs[1] if teacher_force else None,
                          initial_readout=encoded_seq,
                          initial_state=states)

    model = Model(inputs, decoded_seq)
    model.encoder = encoder
    model.decoder = decoder
    return model
Exemple #36
0
def Seq2Seq(output_dim, output_length, hidden_dim=None, depth=1, broadcast_state=True, inner_broadcast_state=True, teacher_force=False, peek=False, dropout=0., **kwargs):
	'''
	Seq2seq model based on [1] and [2].
	This model has the ability to transfer the encoder hidden state to the decoder's
	hidden state(specified by the broadcast_state argument). Also, in deep models 
	(depth > 1), the hidden state is propogated throughout the LSTM stack(specified by 
	the inner_broadcast_state argument. You can switch between [1] based model and [2] 
	based model using the peek argument.(peek = True for [2], peek = False for [1]).
	When peek = True, the decoder gets a 'peek' at the context vector at every timestep.

	[1] based model:

		Encoder:
		X = Input sequence
		C = LSTM(X); The context vector

		Decoder:
        y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c)
        y(0) = LSTM(s0, C); C is the context vector from the encoder.

    [2] based model:

		Encoder:
		X = Input sequence
		C = LSTM(X); The context vector

		Decoder:
        y(t) = LSTM(s(t-1), y(t-1), C)
        y(0) = LSTM(s0, C, C)
        Where s is the hidden state of the LSTM (h and c), and C is the context vector 
        from the encoder.

	Arguments:

	output_dim : Required output dimension.
	hidden_dim : The dimension of the internal representations of the model.
	output_length : Length of the required output sequence.
	depth : Used to create a deep Seq2seq model. For example, if depth = 3, 
			there will be 3 LSTMs on the enoding side and 3 LSTMs on the 
			decoding side. You can also specify depth as a tuple. For example,
			if depth = (4, 5), 4 LSTMs will be added to the encoding side and
			5 LSTMs will be added to the decoding side.
	broadcast_state : Specifies whether the hidden state from encoder should be 
					  transfered to the deocder.
	inner_broadcast_state : Specifies whether hidden states should be propogated 
							throughout the LSTM stack in deep models.
	peek : Specifies if the decoder should be able to peek at the context vector
		   at every timestep.
	dropout : Dropout probability in between layers.


	'''
	if type(depth) == int:
		depth = [depth, depth]
	if 'batch_input_shape' in kwargs:
		shape = kwargs['batch_input_shape']
		del kwargs['batch_input_shape']
	elif 'input_shape' in kwargs:
		shape = (None,) + tuple(kwargs['input_shape'])
		del kwargs['input_shape']
	elif 'input_dim' in kwargs:
		if 'input_length' in kwargs:
			shape = (None, kwargs['input_length'], kwargs['input_dim'])
			del kwargs['input_length']
		else:
			shape = (None, None, kwargs['input_dim'])
		del kwargs['input_dim']
	if 'unroll' in kwargs:
		unroll = kwargs['unroll']
		del kwargs['unroll']
	else:
		unroll = False
	if 'stateful' in kwargs:
		stateful = kwargs['stateful']
		del kwargs['stateful']
	else:
		stateful = False
	if not hidden_dim:
		hidden_dim = output_dim
	encoder = RecurrentContainer(readout=True, state_sync=inner_broadcast_state, input_length=shape[1], unroll=unroll, stateful=stateful, return_states=broadcast_state)
	for i in range(depth[0]):
		encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim), **kwargs))
		encoder.add(Dropout(dropout))
	dense1 = TimeDistributed(Dense(hidden_dim))
	dense1.supports_masking = True
	dense2 = Dense(output_dim)
	decoder = RecurrentContainer(readout='add' if peek else 'readout_only', state_sync=inner_broadcast_state, output_length=output_length, unroll=unroll, stateful=stateful, decode=True, input_length=shape[1])
	for i in range(depth[1]):
		decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim)))
		decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, batch_input_shape=(shape[0], output_dim), **kwargs))
	input = Input(batch_shape=shape)
	input._keras_history[0].supports_masking = True
	encoded_seq = dense1(input)
	encoded_seq = encoder(encoded_seq)
	if broadcast_state:
		states = encoded_seq[-2:]
		encoded_seq = encoded_seq[0]
	else:
		states = [None] * 2
	encoded_seq = dense2(encoded_seq)
	inputs = [input]
	if teacher_force:
		truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim))
		truth_tensor._keras_history[0].supports_masking = True
		inputs += [truth_tensor]
	decoded_seq = decoder({'input': encoded_seq, 'ground_truth': inputs[1] if teacher_force else None, 'initial_readout': encoded_seq, 'states': states})
	model = Model(inputs, decoded_seq)
	model.encoder = encoder
	model.decoder = decoder
	return model
                              mask_zero=True,
                              input_length=step_length)(hash_index_input)

pos_input = Input(shape=(step_length, pos_length))
chunk_input = Input(shape=(step_length, chunk_length))
gazetteer_input = Input(shape=(step_length, gazetteer_length))

senna_hash_pos_chunk_gazetteer_merge = merge(
    [embedding, encoder_embedding, pos_input, chunk_input, gazetteer_input],
    mode='concat')
input_mask = Masking(mask_value=0)(senna_hash_pos_chunk_gazetteer_merge)
dp_1 = Dropout(0.5)(input_mask)
hidden_1 = Bidirectional(LSTM(128, return_sequences=True))(dp_1)
hidden_2 = Bidirectional(LSTM(64, return_sequences=True))(hidden_1)
dp_2 = Dropout(0.5)(hidden_2)
output = TimeDistributed(Dense(output_length, activation='softmax'))(dp_2)
model = Model(input=[
    embed_index_input, hash_index_input, pos_input, chunk_input,
    gazetteer_input
],
              output=output)

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

print(model.summary())

number_of_train_batches = int(math.ceil(float(train_samples) / batch_size))
number_of_dev_batches = int(math.ceil(float(dev_samples) / batch_size))