def eval_hier_att_sampled_softmax(self, stored_weights):
        def one_step_attention(a, s_prev):

            s_prev = self.repeat_vector_att(s_prev)
            concat = self.concatenator_att([a, s_prev])
            e = self.densor1(concat)
            energies = self.densor2(e)
            alphas = self.att_weights(energies)
            context = self.dotor([alphas, a])

            return context

        eval_softmax = SamplingLayer(self.num_samples,
                                     self.vocab_size,
                                     mode='eval')

        s0 = Input(shape=(self.rnn_dim, ), name='s0')
        s = [s0]

        eval_losses = []
        for t in range(self.decoder_length + 1):

            label_t = Lambda(lambda x: self.labels[:, t, :],
                             name='label-%s' % t)(self.labels)
            x_dec = Lambda(lambda x: self.dec_embedded_sequences[:, t, :],
                           name='dec_embedding-%s' % t)(
                               self.dec_embedded_sequences)
            x_dec = Reshape((1, self.embedding_dim))(x_dec)
            '''
			One step attention
			'''
            # Perform one step of the attention mechanism to get back the context vector at step t

            context = one_step_attention(self.out_bidir_doc_encoder, s[0])
            context = Reshape((1, self.rnn_dim))(context)
            context_concat = concatenate([x_dec, context], axis=-1)
            '''
			end of one-step attention
			'''

            #if t==0:
            #	s = self.out_bidir_doc_encoder
            s, _ = self.fwd_decoder(context_concat, initial_state=s)
            loss = eval_softmax([s, label_t])
            eval_losses.append(loss)
            s = [s]

        eval_model = Model(
            inputs=[self.in_document, self.in_decoder, s0, self.labels],
            outputs=eval_losses)
        eval_model.compile(loss=lambda y_true, loss: loss, optimizer='rmsprop')
        eval_model.load_weights(
            os.path.join(self.filepath, '%s' % (stored_weights)))
        eval_model.summary()

        self.eval_model = eval_model

        return self.eval_model
    def eval_sampled_softmax(self, stored_weights):

        eval_softmax = SamplingLayer(self.num_samples,
                                     self.vocab_size,
                                     mode='eval')

        s0 = Input(shape=(self.rnn_dim, ), name='s0')
        s = [s0]

        eval_losses = []
        for t in range(self.decoder_length + 1):

            label_t = Lambda(lambda x: self.labels[:, t, :],
                             name='label-%s' % t)(self.labels)
            x_dec = Lambda(lambda x: self.dec_embedded_sequences[:, t, :],
                           name='dec_embedding-%s' % t)(
                               self.dec_embedded_sequences)
            x_dec = Reshape((1, self.embedding_dim))(x_dec)

            if t == 0:
                s = self.out_bidir_encoder
            s, _ = self.fwd_decoder(x_dec, initial_state=s)
            loss = eval_softmax([s, label_t])
            eval_losses.append(loss)
            s = [s]

        eval_model = Model(
            inputs=[self.in_encoder, self.in_decoder, s0, self.labels],
            outputs=eval_losses)
        eval_model.compile(loss=lambda y_true, loss: loss, optimizer='rmsprop')

        eval_model.load_weights(
            os.path.join(self.filepath, '%s' % (stored_weights)))
        eval_model.summary()

        self.eval_model = eval_model

        return self.eval_model
Ejemplo n.º 3
0
    def train_att_sampled_softmax(self):

        # custom softmax for normalization w.r.t axis = 1
        def custom_softmax(x, axis=1):
            """Softmax activation function.
			# Arguments
				x : Tensor.
				axis: Integer, axis along which the softmax normalization is applied.
			# Returns
				Tensor, output of softmax transformation.
			# Raises
				ValueError: In case `dim(x) == 1`.
			"""
            ndim = K.ndim(x)
            if ndim == 2:
                return K.softmax(x)
            elif ndim > 2:
                e = K.exp(x - K.max(x, axis=axis, keepdims=True))
                s = K.sum(e, axis=axis, keepdims=True)
                return e / s
            else:
                raise ValueError('Cannot apply softmax to a tensor that is 1D')

        #### Global variables for one step attention layers

        # RepeatVector layer is used to copy hidden state from decoder RNN cell at previous time step (h_dec(t-1)) --> repeated into number of sequence in encoder side
        self.repeator = RepeatVector(self.encoder_length, name='repeator_att')

        # Concatenate layer is for concatenating hidden state from encoder RNN cells (h_enc(t)) and repeated / copied hidden state from decoder side (h_dec(t-1))
        self.concatenator = Concatenate(axis=-1, name='concator_att')

        # Deep feed forward networks to generate attention weight vector per time step
        # first dense layer is to create intermediate energies (non-linear projection of learnt hidden state representation)
        self.densor1 = Dense(self.decoder_length + 1,
                             activation="tanh",
                             name='densor1_att')

        # second dense layer is to further create energies (non-linear projection of learnt hidden state representation)
        self.densor2 = Dense(1, activation="relu", name='densor2_att')

        self.activator = Activation(
            custom_softmax, name='attention_weights'
        )  # We are using a custom softmax(axis = 1) loaded in this notebook

        # dot product to compute attention weight from attention vector (.) and hidden state from encoder RNN cell in one step
        self.dotor = Dot(axes=1, name='dotor_att')

        def one_step_attention(a, s_prev):
            """
			Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
			"alphas" and the hidden states "a" of the Bi-GRU encoder.
			
			Arguments:
			a -- hidden state output of the Bi-GRU encoder, numpy-array of shape (m, Tx, 2*n_a)
			s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)
			
			Returns:
			context -- context vector, input of the next (post-attetion) LSTM cell
			"""

            # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a" (≈ 1 line)
            s_prev = self.repeator(s_prev)
            print("s_prev: %s" % s_prev.shape)
            sys.stdout.flush()

            # Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line)
            concat = self.concatenator([a, s_prev])

            # Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e. (≈1 lines)
            e = self.densor1(concat)

            # Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies. (≈1 lines)

            energies = self.densor2(e)

            # Use "activator" on "energies" to compute the attention weights "alphas" (≈ 1 line)
            alphas = self.activator(energies)

            # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell (≈ 1 line)
            context = self.dotor([alphas, a])
            ### END CODE HERE ###

            return context

        ### Encoder model

        in_encoder = Input(shape=(self.encoder_length, ),
                           dtype='int32',
                           name='encoder_input')
        embed_encoder = Embedding(self.vocab_size,
                                  self.embedding_dim,
                                  input_length=self.encoder_length,
                                  name='embedding_encoder')
        in_enc_embedded = embed_encoder(in_encoder)

        fwd_encoder = GRU(self.birnn_dim,
                          return_sequences=True,
                          name='fwd_encoder')
        bwd_encoder = GRU(self.birnn_dim,
                          return_sequences=True,
                          name='bwd_encoder',
                          go_backwards=True)
        out_encoder_1 = fwd_encoder(in_enc_embedded)
        out_encoder_2 = bwd_encoder(in_enc_embedded)
        out_bidir_encoder = concatenate([out_encoder_1, out_encoder_2],
                                        axis=-1,
                                        name='bidir_encoder')

        #encoder_model = Model(inputs=in_encoder, outputs=out_bidir_encoder)
        #self.encoder_model = encoder_model

        ### Decoder model

        in_decoder = Input(shape=(None, ), name='decoder_input', dtype='int32')
        embed_decoder = Embedding(self.vocab_size,
                                  self.embedding_dim,
                                  name='embedding_decoder')
        in_dec_embedded = embed_decoder(in_decoder)

        labels = Input((self.decoder_length + 1, 1),
                       dtype='int32',
                       name='labels_')

        fwd_decoder = GRU(self.rnn_dim, return_state=True)

        s0 = Input(shape=(self.rnn_dim, ), name='s0')
        s = [s0]

        sampling_softmax = SamplingLayer(self.num_samples,
                                         self.vocab_size,
                                         mode='train')

        losses = []
        for t in range(self.decoder_length + 1):

            label_t = Lambda(lambda x: labels[:, t, :],
                             name='label-%s' % t)(labels)
            x_dec = Lambda(lambda x: in_dec_embedded[:, t, :],
                           name='dec_embedding-%s' % t)(in_dec_embedded)
            x_dec = Reshape((1, self.embedding_dim))(x_dec)
            '''
			One step attention
			'''
            # Perform one step of the attention mechanism to get back the context vector at step t

            context = one_step_attention(out_bidir_encoder, s[0])

            context = Reshape((1, self.rnn_dim))(context)

            context_concat = concatenate([x_dec, context], axis=-1)
            '''
			end of one-step attention
			'''

            #if t==0:
            #	s = out_bidir_encoder

            s, _ = fwd_decoder(context_concat, initial_state=s)

            loss = sampling_softmax([s, label_t])
            losses.append(loss)
            s = [s]

        model = Model(inputs=[in_encoder, in_decoder, s0, labels],
                      outputs=losses)

        self.train_model = model
        self.in_encoder = in_encoder
        self.out_bidir_encoder = out_bidir_encoder
        self.in_decoder = in_decoder
        self.embed_decoder = embed_decoder
        self.in_dec_embedded = in_dec_embedded
        self.labels = labels
        self.fwd_decoder = fwd_decoder

        # store attention layers
        self.repeat_vector_att = model.get_layer("repeator_att")
        self.concatenator_att = model.get_layer("concator_att")
        self.densor1 = model.get_layer("densor1_att")
        self.densor2 = model.get_layer("densor2_att")
        self.att_weights = model.get_layer("attention_weights")
        self.dotor = model.get_layer("dotor_att")

        return self.train_model
    def train_hier_att_sampled_softmax(self, pretrained_embedding,
                                       oov_embedding):

        # custom softmax for normalization w.r.t axis = 1
        def custom_softmax(x, axis=1):
            """Softmax activation function.
			# Arguments
				x : Tensor.
				axis: Integer, axis along which the softmax normalization is applied.
			# Returns
				Tensor, output of softmax transformation.
			# Raises
				ValueError: In case `dim(x) == 1`.
			"""
            ndim = K.ndim(x)
            if ndim == 2:
                return K.softmax(x)
            elif ndim > 2:
                e = K.exp(x - K.max(x, axis=axis, keepdims=True))
                s = K.sum(e, axis=axis, keepdims=True)
                return e / s
            else:
                raise ValueError('Cannot apply softmax to a tensor that is 1D')

        #### Global variables for one step attention layers

        # RepeatVector layer is used to copy hidden state from decoder RNN cell at previous time step (h_dec(t-1)) --> repeated into number of sequence in encoder side
        self.repeator = RepeatVector(self.encoder_length, name='repeator_att')

        # Concatenate layer is for concatenating hidden state from encoder RNN cells (h_enc(t)) and repeated / copied hidden state from decoder side (h_dec(t-1))
        self.concatenator = Concatenate(axis=-1, name='concator_att')

        # Deep feed forward networks to generate attention weight vector per time step
        # first dense layer is to create intermediate energies (non-linear projection of learnt hidden state representation)
        self.densor1 = Dense(self.decoder_length + 1,
                             activation="tanh",
                             name='densor1_att')

        # second dense layer is to further create energies (non-linear projection of learnt hidden state representation)
        self.densor2 = Dense(1, activation="relu", name='densor2_att')

        self.activator = Activation(
            custom_softmax, name='attention_weights'
        )  # We are using a custom softmax(axis = 1) loaded in this notebook

        # dot product to compute attention weight from attention vector (.) and hidden state from encoder RNN cell in one step
        self.dotor = Dot(axes=1, name='dotor_att')

        def one_step_attention(a, s_prev):
            """
			Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
			"alphas" and the hidden states "a" of the Bi-GRU encoder.
			
			Arguments:
			a -- hidden state output of the Bi-GRU encoder, numpy-array of shape (m, Tx, 2*n_a)
			s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)
			
			Returns:
			context -- context vector, input of the next (post-attetion) LSTM cell
			"""

            # Use repeator to repeat s_prev to be of shape (m, Tx, n_s) so that you can concatenate it with all hidden states "a" (≈ 1 line)
            s_prev = self.repeator(s_prev)
            print("s_prev: %s" % s_prev.shape)
            sys.stdout.flush()

            # Use concatenator to concatenate a and s_prev on the last axis (≈ 1 line)
            concat = self.concatenator([a, s_prev])

            # Use densor1 to propagate concat through a small fully-connected neural network to compute the "intermediate energies" variable e. (≈1 lines)
            e = self.densor1(concat)

            # Use densor2 to propagate e through a small fully-connected neural network to compute the "energies" variable energies. (≈1 lines)

            energies = self.densor2(e)

            # Use "activator" on "energies" to compute the attention weights "alphas" (≈ 1 line)
            alphas = self.activator(energies)

            # Use dotor together with "alphas" and "a" to compute the context vector to be given to the next (post-attention) LSTM-cell (≈ 1 line)
            context = self.dotor([alphas, a])
            ### END CODE HERE ###

            return context

        ### Encoder model

        self.vocab_size = pretrained_embedding.shape[0]
        self.oov_size = oov_embedding.shape[0]
        valid_words = self.vocab_size - self.oov_size

        # sentence input
        in_sentence = Input(shape=(self.encoder_length, ),
                            name='sent-input',
                            dtype='int32')

        oov_in_sentence = Lambda(lambda x: x - valid_words)(in_sentence)
        oov_in_sentence = Activation('relu')(oov_in_sentence)

        # document input
        in_document = Input(shape=(self.max_sents, self.encoder_length),
                            name='doc-input',
                            dtype='int32')

        # embedding layer
        embed_encoder = Embedding(self.vocab_size,
                                  self.embedding_dim,
                                  input_length=self.encoder_length,
                                  weights=[pretrained_embedding],
                                  trainable=False,
                                  name='embedding-encoder')

        oov_embed_encoder = Embedding(self.oov_size,
                                      self.embedding_dim,
                                      input_length=self.encoder_length,
                                      weights=[oov_embedding],
                                      trainable=True,
                                      name='oov_embedding_encoder')

        in_enc_embedded = embed_encoder(in_sentence)
        oov_in_enc_embedded = oov_embed_encoder(oov_in_sentence)

        # Add the embedding matrices
        enc_embedded_sequences = Add()([in_enc_embedded, oov_in_enc_embedded])

        # CNN Block to capture N-grams features
        filter_length = [5, 3, 2]
        nb_filter = [16, 32, 64]
        pool_length = 2

        for i in range(len(nb_filter)):
            enc_embedded_sequences = Conv1D(filters=nb_filter[i],
                                            kernel_size=filter_length[i],
                                            padding='valid',
                                            activation='relu',
                                            kernel_initializer='glorot_normal',
                                            strides=1,
                                            name='conv_%s' %
                                            str(i + 1))(enc_embedded_sequences)

            enc_embedded_sequences = Dropout(
                0.1, name='dropout_%s' % str(i + 1))(enc_embedded_sequences)
            enc_embedded_sequences = MaxPooling1D(
                pool_size=pool_length,
                name='maxpool_%s' % str(i + 1))(enc_embedded_sequences)

        # Bidirectional GRU to capture sentence features from CNN N-grams features
        fwd_encoder = GRU(self.birnn_dim, name='fwd-sent-encoder')
        bwd_encoder = GRU(self.birnn_dim,
                          name='bwd-sent-encoder',
                          go_backwards=True)
        out_encoder_1 = fwd_encoder(enc_embedded_sequences)
        out_encoder_2 = bwd_encoder(enc_embedded_sequences)
        out_bidir_encoder = concatenate([out_encoder_1, out_encoder_2],
                                        axis=-1,
                                        name='bidir-sent-encoder')

        #### 1. Sentence Encoder

        sent_encoder = Model(inputs=in_sentence, outputs=out_bidir_encoder)
        self.sent_encoder = sent_encoder

        #### 2. Document Encoder
        encoded = TimeDistributed(sent_encoder,
                                  name='sent-doc-encoded')(in_document)

        # Bidirectional GRU to capture document features from encoded sentence
        fwd_doc_encoder = GRU(self.birnn_dim,
                              return_sequences=True,
                              name='fwd-doc-encoder')
        bwd_doc_encoder = GRU(self.birnn_dim,
                              return_sequences=True,
                              name='bwd-doc-encoder',
                              go_backwards=True)
        out_encoder_doc_1 = fwd_doc_encoder(encoded)
        out_encoder_doc_2 = bwd_doc_encoder(encoded)
        out_bidir_doc_encoder = concatenate(
            [out_encoder_doc_1, out_encoder_doc_2], axis=-1)

        #encoder_model = Model(inputs=in_document, outputs=out_bidir_doc_encoder)
        #self.encoder_model = encoder_model

        ### Decoder model

        # input placeholder for teacher forcing (link ground truth to decoder input)
        in_decoder = Input(shape=(None, ), name='decoder_input', dtype='int32')
        oov_lambda = Lambda(lambda x: x - valid_words)
        oov_activator = Activation('relu')

        oov_in_decoder = oov_lambda(in_decoder)
        oov_in_decoder = oov_activator(oov_in_decoder)

        embed_decoder = Embedding(self.vocab_size,
                                  self.embedding_dim,
                                  weights=[pretrained_embedding],
                                  trainable=False,
                                  name='embedding_decoder')
        oov_embed_decoder = Embedding(self.oov_size,
                                      self.embedding_dim,
                                      weights=[oov_embedding],
                                      trainable=True,
                                      name='oov_embedding_decoder')

        in_dec_embedded = embed_decoder(in_decoder)
        oov_in_dec_embedded = oov_embed_decoder(oov_in_decoder)

        # Add the embedding matrices
        dec_embedded_sequences = Add()([in_dec_embedded, oov_in_dec_embedded])

        labels = Input((self.decoder_length + 1, 1),
                       dtype='int32',
                       name='labels_')

        fwd_decoder = GRU(self.rnn_dim, return_state=True)

        sampling_softmax = SamplingLayer(self.num_samples,
                                         self.vocab_size,
                                         mode='train')

        s0 = Input(shape=(self.rnn_dim, ), name='s0')
        s = [s0]

        losses = []
        for t in range(self.decoder_length + 1):

            label_t = Lambda(lambda x: labels[:, t, :],
                             name='label-%s' % t)(labels)
            x_dec = Lambda(lambda x: dec_embedded_sequences[:, t, :],
                           name='dec_embedding-%s' % t)(dec_embedded_sequences)
            x_dec = Reshape((1, self.embedding_dim))(x_dec)
            '''
			One step attention
			'''
            # Perform one step of the attention mechanism to get back the context vector at step t

            context = one_step_attention(out_bidir_doc_encoder, s[0])
            context = Reshape((1, self.rnn_dim))(context)
            context_concat = concatenate([x_dec, context], axis=-1)
            '''
			end of one-step attention
			'''

            #if t==0:
            #	s = out_bidir_doc_encoder

            s, _ = fwd_decoder(context_concat, initial_state=s)
            loss = sampling_softmax([s, label_t])
            losses.append(loss)
            s = [s]

        model = Model(inputs=[in_document, in_decoder, s0, labels],
                      outputs=losses)

        self.train_model = model
        self.in_document = in_document
        self.out_bidir_doc_encoder = out_bidir_doc_encoder
        self.in_decoder = in_decoder
        self.oov_lambda = oov_lambda
        self.oov_activator = oov_activator
        self.embed_decoder = embed_decoder
        self.oov_embed_decoder = oov_embed_decoder
        self.dec_embedded_sequences = dec_embedded_sequences
        self.labels = labels
        self.fwd_decoder = fwd_decoder

        # store attention layers
        self.repeat_vector_att = model.get_layer("repeator_att")
        self.concatenator_att = model.get_layer("concator_att")
        self.densor1 = model.get_layer("densor1_att")
        self.densor2 = model.get_layer("densor2_att")
        self.att_weights = model.get_layer("attention_weights")
        self.dotor = model.get_layer("dotor_att")

        return self.train_model
Ejemplo n.º 5
0
    def train_hier_sampled_softmax(self, pretrained_embedding, oov_embedding):

        ### Encoder model
        self.vocab_size = pretrained_embedding.shape[0]
        self.oov_size = oov_embedding.shape[0]
        valid_words = self.vocab_size - self.oov_size

        # sentence input
        in_sentence = Input(shape=(self.encoder_length, ),
                            name='sent-input',
                            dtype='int32')

        oov_in_sentence = Lambda(lambda x: x - valid_words)(in_sentence)
        oov_in_sentence = Activation('relu')(oov_in_sentence)

        # document input
        in_document = Input(shape=(self.max_sents, self.encoder_length),
                            name='doc-input',
                            dtype='int32')

        # embedding layer
        embed_encoder = Embedding(self.vocab_size,
                                  self.embedding_dim,
                                  input_length=self.encoder_length,
                                  weights=[pretrained_embedding],
                                  trainable=False,
                                  name='embedding-encoder')

        oov_embed_encoder = Embedding(self.oov_size,
                                      self.embedding_dim,
                                      input_length=self.encoder_length,
                                      weights=[oov_embedding],
                                      trainable=True,
                                      name='oov_embedding_encoder')

        in_enc_embedded = embed_encoder(in_sentence)
        oov_in_enc_embedded = oov_embed_encoder(oov_in_sentence)

        # Add the embedding matrices
        enc_embedded_sequences = Add()([in_enc_embedded, oov_in_enc_embedded])

        # CNN Block to capture N-grams features
        filter_length = [5, 3, 2]
        nb_filter = [16, 32, 64]
        pool_length = 2

        for i in range(len(nb_filter)):
            enc_embedded_sequences = Conv1D(filters=nb_filter[i],
                                            kernel_size=filter_length[i],
                                            padding='valid',
                                            activation='relu',
                                            kernel_initializer='glorot_normal',
                                            strides=1,
                                            name='conv_%s' %
                                            str(i + 1))(enc_embedded_sequences)

            enc_embedded_sequences = Dropout(
                0.1, name='dropout_%s' % str(i + 1))(enc_embedded_sequences)
            enc_embedded_sequences = MaxPooling1D(
                pool_size=pool_length,
                name='maxpool_%s' % str(i + 1))(enc_embedded_sequences)

        # Bidirectional GRU to capture sentence features from CNN N-grams features
        fwd_encoder = GRU(self.birnn_dim, name='fwd-sent-encoder')
        bwd_encoder = GRU(self.birnn_dim,
                          name='bwd-sent-encoder',
                          go_backwards=True)
        out_encoder_1 = fwd_encoder(enc_embedded_sequences)
        out_encoder_2 = bwd_encoder(enc_embedded_sequences)
        out_bidir_encoder = concatenate([out_encoder_1, out_encoder_2],
                                        axis=-1,
                                        name='bidir-sent-encoder')

        #### 1. Sentence Encoder

        sent_encoder = Model(inputs=in_sentence, outputs=out_bidir_encoder)
        self.sent_encoder = sent_encoder

        #### 2. Document Encoder
        encoded = TimeDistributed(sent_encoder,
                                  name='sent-doc-encoded')(in_document)

        # Bidirectional GRU to capture document features from encoded sentence
        fwd_doc_encoder = GRU(self.birnn_dim,
                              return_state=True,
                              name='fwd-doc-encoder')
        bwd_doc_encoder = GRU(self.birnn_dim,
                              return_state=True,
                              name='bwd-doc-encoder',
                              go_backwards=True)
        out_encoder_doc_1, doc_eh_1 = fwd_doc_encoder(encoded)
        out_encoder_doc_2, doc_eh_2 = bwd_doc_encoder(encoded)
        out_bidir_doc_encoder = concatenate(
            [out_encoder_doc_1, out_encoder_doc_2], axis=-1)

        encoder_model = Model(inputs=in_document,
                              outputs=out_bidir_doc_encoder)
        self.encoder_model = encoder_model

        ### Decoder model

        # input placeholder for teacher forcing (link ground truth to decoder input)
        in_decoder = Input(shape=(None, ), name='decoder_input', dtype='int32')

        oov_lambda = Lambda(lambda x: x - valid_words)
        oov_activator = Activation('relu')

        oov_in_decoder = oov_lambda(in_decoder)
        oov_in_decoder = oov_activator(oov_in_decoder)

        embed_decoder = Embedding(self.vocab_size,
                                  self.embedding_dim,
                                  weights=[pretrained_embedding],
                                  trainable=False,
                                  name='embedding_decoder')
        oov_embed_decoder = Embedding(self.oov_size,
                                      self.embedding_dim,
                                      weights=[oov_embedding],
                                      trainable=True,
                                      name='oov_embedding_decoder')

        in_dec_embedded = embed_decoder(in_decoder)
        oov_in_dec_embedded = oov_embed_decoder(oov_in_decoder)

        # Add the embedding matrices
        dec_embedded_sequences = Add()([in_dec_embedded, oov_in_dec_embedded])

        labels = Input((self.decoder_length + 1, 1),
                       dtype='int32',
                       name='labels_')

        fwd_decoder = GRU(self.rnn_dim, return_state=True)

        sampling_softmax = SamplingLayer(self.num_samples,
                                         self.vocab_size,
                                         mode='train')

        s0 = Input(shape=(self.rnn_dim, ), name='s0')
        s = [s0]

        losses = []
        for t in range(self.decoder_length + 1):

            label_t = Lambda(lambda x: labels[:, t, :],
                             name='label-%s' % t)(labels)
            x_dec = Lambda(lambda x: dec_embedded_sequences[:, t, :],
                           name='dec_embedding-%s' % t)(dec_embedded_sequences)
            x_dec = Reshape((1, self.embedding_dim))(x_dec)

            if t == 0:
                s = out_bidir_doc_encoder
            s, _ = fwd_decoder(x_dec, initial_state=s)
            loss = sampling_softmax([s, label_t])
            losses.append(loss)
            s = [s]

        model = Model(inputs=[in_document, in_decoder, s0, labels],
                      outputs=losses)

        self.train_model = model
        self.in_document = in_document
        self.out_bidir_doc_encoder = out_bidir_doc_encoder
        self.in_decoder = in_decoder
        self.oov_lambda = oov_lambda
        self.oov_activator = oov_activator
        self.embed_decoder = embed_decoder
        self.oov_embed_decoder = oov_embed_decoder
        self.dec_embedded_sequences = dec_embedded_sequences
        self.labels = labels
        self.fwd_decoder = fwd_decoder

        return self.train_model
    def train_sampled_softmax(self, pretrained_embedding, oov_embedding):

        ### Encoder model
        self.vocab_size = pretrained_embedding.shape[0]
        self.oov_size = oov_embedding.shape[0]
        valid_words = self.vocab_size - self.oov_size

        in_encoder = Input(shape=(self.encoder_length, ),
                           dtype='int32',
                           name='encoder_input')

        oov_in_encoder = Lambda(lambda x: x - valid_words)(in_encoder)
        oov_in_encoder = Activation('relu')(oov_in_encoder)

        embed_encoder = Embedding(self.vocab_size,
                                  self.embedding_dim,
                                  input_length=self.encoder_length,
                                  weights=[pretrained_embedding],
                                  trainable=False,
                                  name='embedding_encoder')
        oov_embed_encoder = Embedding(self.oov_size,
                                      self.embedding_dim,
                                      input_length=self.encoder_length,
                                      weights=[oov_embedding],
                                      trainable=True,
                                      name='oov_embedding_encoder')

        in_enc_embedded = embed_encoder(in_encoder)
        oov_in_enc_embedded = oov_embed_encoder(oov_in_encoder)

        # Add the embedding matrices
        enc_embedded_sequences = Add()([in_enc_embedded, oov_in_enc_embedded])

        fwd_encoder = GRU(self.birnn_dim,
                          return_state=True,
                          name='fwd_encoder')
        bwd_encoder = GRU(self.birnn_dim,
                          return_state=True,
                          name='bwd_encoder',
                          go_backwards=True)
        out_encoder_1, _eh1 = fwd_encoder(enc_embedded_sequences)
        out_encoder_2, _eh2 = bwd_encoder(enc_embedded_sequences)
        out_bidir_encoder = concatenate([out_encoder_1, out_encoder_2],
                                        axis=-1,
                                        name='bidir_encoder')

        encoder_model = Model(inputs=in_encoder, outputs=out_bidir_encoder)
        self.encoder_model = encoder_model

        ### Decoder model

        in_decoder = Input(shape=(None, ), name='decoder_input', dtype='int32')

        oov_lambda = Lambda(lambda x: x - valid_words)
        oov_activator = Activation('relu')

        oov_in_decoder = oov_lambda(in_decoder)
        oov_in_decoder = oov_activator(oov_in_decoder)

        embed_decoder = Embedding(self.vocab_size,
                                  self.embedding_dim,
                                  weights=[pretrained_embedding],
                                  trainable=False,
                                  name='embedding_decoder')
        oov_embed_decoder = Embedding(self.oov_size,
                                      self.embedding_dim,
                                      weights=[oov_embedding],
                                      trainable=True,
                                      name='oov_embedding_decoder')

        in_dec_embedded = embed_decoder(in_decoder)
        oov_in_dec_embedded = oov_embed_decoder(oov_in_decoder)

        # Add the embedding matrices
        dec_embedded_sequences = Add()([in_dec_embedded, oov_in_dec_embedded])

        labels = Input((self.decoder_length + 1, 1),
                       dtype='int32',
                       name='labels_')

        fwd_decoder = GRU(self.rnn_dim, return_state=True)

        sampling_softmax = SamplingLayer(self.num_samples,
                                         self.vocab_size,
                                         mode='train')

        s0 = Input(shape=(self.rnn_dim, ), name='s0')
        s = [s0]

        losses = []
        for t in range(self.decoder_length + 1):

            label_t = Lambda(lambda x: labels[:, t, :],
                             name='label-%s' % t)(labels)
            x_dec = Lambda(lambda x: dec_embedded_sequences[:, t, :],
                           name='dec_embedding-%s' % t)(dec_embedded_sequences)
            x_dec = Reshape((1, self.embedding_dim))(x_dec)

            if t == 0:
                s = out_bidir_encoder
            s, _ = fwd_decoder(x_dec, initial_state=s)
            loss = sampling_softmax([s, label_t])
            losses.append(loss)
            s = [s]

        model = Model(inputs=[in_encoder, in_decoder, s0, labels],
                      outputs=losses)

        self.train_model = model
        self.in_encoder = in_encoder
        self.out_bidir_encoder = out_bidir_encoder
        self.in_decoder = in_decoder
        self.oov_lambda = oov_lambda
        self.oov_activator = oov_activator
        self.embed_decoder = embed_decoder
        self.oov_embed_decoder = oov_embed_decoder
        self.dec_embedded_sequences = dec_embedded_sequences
        self.labels = labels
        self.fwd_decoder = fwd_decoder

        return self.train_model
Ejemplo n.º 7
0
    def train_sampled_softmax(self):

        ### Encoder model

        in_encoder = Input(shape=(self.encoder_length, ),
                           dtype='int32',
                           name='encoder_input')
        embed_encoder = Embedding(self.vocab_size,
                                  self.embedding_dim,
                                  input_length=self.encoder_length,
                                  name='embedding_encoder')
        in_enc_embedded = embed_encoder(in_encoder)

        fwd_encoder = GRU(self.birnn_dim,
                          return_state=True,
                          name='fwd_encoder')
        bwd_encoder = GRU(self.birnn_dim,
                          return_state=True,
                          name='bwd_encoder',
                          go_backwards=True)
        out_encoder_1, _eh1 = fwd_encoder(in_enc_embedded)
        out_encoder_2, _eh2 = bwd_encoder(in_enc_embedded)
        out_bidir_encoder = concatenate([out_encoder_1, out_encoder_2],
                                        axis=-1,
                                        name='bidir_encoder')

        encoder_model = Model(inputs=in_encoder, outputs=out_bidir_encoder)
        self.encoder_model = encoder_model

        ### Decoder model

        in_decoder = Input(shape=(None, ), name='decoder_input', dtype='int32')
        embed_decoder = Embedding(self.vocab_size,
                                  self.embedding_dim,
                                  name='embedding_decoder')
        in_dec_embedded = embed_decoder(in_decoder)

        labels = Input((self.decoder_length + 1, 1),
                       dtype='int32',
                       name='labels_')

        fwd_decoder = GRU(self.rnn_dim, return_state=True)

        sampling_softmax = SamplingLayer(self.num_samples,
                                         self.vocab_size,
                                         mode='train')

        s0 = Input(shape=(self.rnn_dim, ), name='s0')
        s = [s0]

        losses = []
        for t in range(self.decoder_length + 1):

            label_t = Lambda(lambda x: labels[:, t, :],
                             name='label-%s' % t)(labels)
            x_dec = Lambda(lambda x: in_dec_embedded[:, t, :],
                           name='dec_embedding-%s' % t)(in_dec_embedded)
            x_dec = Reshape((1, self.embedding_dim))(x_dec)

            if t == 0:
                s = out_bidir_encoder
            s, _ = fwd_decoder(x_dec, initial_state=s)
            loss = sampling_softmax([s, label_t])
            losses.append(loss)
            s = [s]

        model = Model(inputs=[in_encoder, in_decoder, s0, labels],
                      outputs=losses)

        self.train_model = model
        self.in_encoder = in_encoder
        self.out_bidir_encoder = out_bidir_encoder
        self.in_decoder = in_decoder
        self.embed_decoder = embed_decoder
        self.in_dec_embedded = in_dec_embedded
        self.labels = labels
        self.fwd_decoder = fwd_decoder

        return self.train_model