Esempio n. 1
0
 def encoder(self, inputs):
     '''Encode sentence and return a latent representation in MLE mode.'''
     with tf.variable_scope("Encoder"):
         if cfg.enc_bidirect:
             fcell = self.rnn_cell(cfg.num_layers,
                                   cfg.hidden_size,
                                   return_states=True)
             bcell = self.rnn_cell(cfg.num_layers,
                                   cfg.hidden_size,
                                   return_states=True)
             outputs, _ = tf.nn.bidirectional_dynamic_rnn(
                 fcell,
                 bcell,
                 inputs,
                 sequence_length=self.lengths,
                 swap_memory=True,
                 dtype=tf.float32)
         else:
             cell = self.rnn_cell(cfg.num_layers,
                                  cfg.hidden_size,
                                  return_states=True)
             outputs, _ = tf.nn.dynamic_rnn(cell,
                                            inputs,
                                            swap_memory=True,
                                            dtype=tf.float32)
             outputs = (outputs,
                        )  # to match bidirectional RNN's output format
         states = []
         for out in outputs:
             output = out[:, :, :cfg.hidden_size]
             d_states = out[:, :, cfg.hidden_size:]
             # for GRU, we skipped the last layer states because they're the outputs
             states.append(tf.concat(2, [d_states, output]))
         states = tf.concat(
             2, states)  # concatenated states from fwd and bwd RNNs
         states = tf.reshape(states, [-1, cfg.hidden_size * len(outputs)])
         states = utils.linear(states,
                               cfg.latent_size,
                               True,
                               0.0,
                               scope='states_transform1')
         states = utils.highway(states, f=tf.nn.elu)
         states = utils.linear(states,
                               cfg.latent_size,
                               True,
                               0.0,
                               scope='states_transform2')
         states = tf.reshape(states, [cfg.batch_size, -1, cfg.latent_size])
         latent = tf.nn.elu(tf.reduce_sum(states, [1])) * 1e-1
         z_mean = utils.linear(latent,
                               cfg.latent_size,
                               True,
                               0.0,
                               scope='Latent_mean')
         z_logvar = utils.linear(latent,
                                 cfg.latent_size,
                                 True,
                                 0.0,
                                 scope='Latent_logvar')
     return z_mean, z_logvar
Esempio n. 2
0
 def discriminator_energy(self, states):  # FIXME
     '''An energy-based discriminator that tries to reconstruct the input states.'''
     with tf.variable_scope("Discriminator"):
         _, state = tf.nn.dynamic_rnn(self.rnn_cell(cfg.d_num_layers,
                                                    cfg.hidden_size),
                                      states,
                                      swap_memory=True,
                                      dtype=tf.float32,
                                      scope='discriminator_encoder')
         # XXX use BiRNN+convnet for the encoder
         # this latent is of size cfg.hidden_size since it needs a lot more capacity than
         # cfg.latent_size to reproduce the hidden states
         # TODO use all states instead of just the final state
         latent = utils.highway(state, layer_size=1)
         latent = utils.linear(latent,
                               cfg.hidden_size,
                               True,
                               scope='discriminator_latent_transform')
         # TODO make initial state from latent, don't just use zeros
         decoder_input = tf.concat(
             1,
             [tf.zeros([2 * cfg.batch_size, 1, cfg.hidden_size]), states])
         output, _ = tf.nn.dynamic_rnn(self.rnn_cell(
             cfg.d_num_layers, cfg.hidden_size, latent),
                                       decoder_input,
                                       swap_memory=True,
                                       dtype=tf.float32,
                                       scope='discriminator_decoder')
         output = tf.reshape(output, [-1, cfg.hidden_size])
         reconstructed = utils.linear(output,
                                      cfg.hidden_size,
                                      True,
                                      0.0,
                                      scope='discriminator_reconst')
         reconstructed = tf.reshape(
             reconstructed, [2 * cfg.batch_size, -1, cfg.hidden_size])
         # don't train this projection, since the model can learn to zero out ret_latent to
         # minimize the reconstruction error
         ret_latent = tf.nn.tanh(
             utils.linear(self.latent,
                          cfg.hidden_size,
                          False,
                          scope='discriminator_ret_latent',
                          train=False))
     return reconstructed, ret_latent
Esempio n. 3
0
    def _embed(self):
        with tf.device('/cpu:0'), tf.variable_scope('word_embedding'):
            self.word_embeddings = tf.get_variable(
                'word_embeddings',
                shape=(self.vocab.word_size(), self.vocab.word_embed_dim),
                initializer=tf.constant_initializer(
                    self.vocab.word_embeddings),
                trainable=False)

            self.char_embeddings = tf.get_variable(
                'char_embeddings',
                shape=(self.vocab.char_size(), self.vocab.char_embed_dim),
                initializer=tf.constant_initializer(
                    self.vocab.char_embeddings))
        ph_emb = tf.reshape(
            tf.nn.embedding_lookup(self.char_embeddings, self.ph),
            [-1, self.max_char_len, self.char_embed_dim])
        qh_emb = tf.reshape(
            tf.nn.embedding_lookup(self.char_embeddings, self.qh),
            [-1, self.max_char_len, self.char_embed_dim])
        ph_emb = tf.nn.dropout(ph_emb, 1.0 - 0.5 * self.dropout)
        qh_emb = tf.nn.dropout(qh_emb, 1.0 - 0.5 * self.dropout)

        # Bidaf style conv-highway encoder
        ph_emb = conv(ph_emb,
                      self.hidden_size,
                      bias=True,
                      activation=tf.nn.relu,
                      kernel_size=3,
                      name="char_conv",
                      reuse=None)
        qh_emb = conv(qh_emb,
                      self.hidden_size,
                      bias=True,
                      activation=tf.nn.relu,
                      kernel_size=3,
                      name="char_conv",
                      reuse=True)

        ph_emb = tf.reduce_max(ph_emb, axis=1)
        qh_emb = tf.reduce_max(qh_emb, axis=1)

        ph_emb = tf.reshape(ph_emb, [-1, self.max_p_len, ph_emb.shape[-1]])
        qh_emb = tf.reshape(qh_emb, [-1, self.max_q_len, qh_emb.shape[-1]])

        p_emb = tf.nn.dropout(
            tf.nn.embedding_lookup(self.word_embeddings, self.p),
            1.0 - 0.5 * self.dropout)
        q_emb = tf.nn.dropout(
            tf.nn.embedding_lookup(self.word_embeddings, self.q),
            1.0 - 0.5 * self.dropout)

        p_emb = tf.concat([p_emb, ph_emb], axis=2)
        q_emb = tf.concat([q_emb, qh_emb], axis=2)

        self.p_emb = highway(p_emb,
                             size=self.hidden_size,
                             scope="highway",
                             dropout=self.dropout,
                             reuse=None)
        self.q_emb = highway(q_emb,
                             size=self.hidden_size,
                             scope="highway",
                             dropout=self.dropout,
                             reuse=True)
Esempio n. 4
0
    def __init__(self, vocab, training, generator=False):
        self.vocab = vocab
        self.training = training
        self.global_step = tf.get_variable('global_step',
                                           shape=[],
                                           initializer=tf.zeros_initializer,
                                           trainable=False)
        self.summary_op = None
        self.summaries = []

        with tf.name_scope('input'):
            # left-aligned data:  <sos> w1 w2 ... w_T <eos> <pad...>
            self.data = tf.placeholder(tf.int32, [cfg.batch_size, None],
                                       name='data')
            # sentences with word dropout
            self.data_dropped = tf.placeholder(tf.int32,
                                               [cfg.batch_size, None],
                                               name='data_dropped')
            # sentence lengths
            self.lengths = tf.placeholder(tf.int32, [cfg.batch_size],
                                          name='lengths')
            if cfg.use_labels:
                self.labels = tf.placeholder(tf.int32, [cfg.batch_size],
                                             name='labels')

        embs = self.word_embeddings(self.data)
        embs_dropped = self.word_embeddings(self.data_dropped, reuse=True)
        if cfg.use_labels:
            embs_labels = self.label_embeddings(self.labels)

        if cfg.use_labels:
            with tf.name_scope('expand-label-dims'):
                # Compensate for words being shifted by 1
                embs_labels = tf.expand_dims(embs_labels, 1)
                self.embs_labels = tf.tile(embs_labels,
                                           [1, tf.shape(embs)[1], 1])

        if cfg.autoencoder:
            if generator:
                self.z = tf.placeholder(tf.float32,
                                        [cfg.batch_size, cfg.latent_size])
            else:
                with tf.name_scope('concat_words_and_labels'):
                    if cfg.use_labels:
                        embs_words_with_labels = tf.concat(
                            2, [embs, self.embs_labels])
                    else:
                        embs_words_with_labels = embs

                self.z_mean, z_logvar = self.encoder(embs_words_with_labels)

                if cfg.variational:
                    with tf.name_scope('reparameterize'):
                        eps = tf.truncated_normal(
                            [cfg.batch_size, cfg.latent_size])
                        self.z = self.z_mean + tf.mul(
                            tf.sqrt(tf.exp(z_logvar)), eps)
                else:
                    self.z = self.z_mean

            with tf.name_scope('transform-z'):
                z = utils.highway(self.z,
                                  f=tf.nn.elu,
                                  bias=0,
                                  scope='transform_z_hw')
                self.z_transformed = utils.linear(z,
                                                  cfg.latent_size,
                                                  True,
                                                  scope='transform_z_lin')
        else:
            z = tf.zeros([cfg.batch_size, 1])

        with tf.name_scope('concat_words-labels-z'):
            # Concatenate dropped word embeddings, label embeddingd and 'z'
            concat_list = []
            if cfg.decoder_inputs:
                concat_list.append(embs_dropped)
            else:
                concat_list.append(
                    tf.zeros([cfg.batch_size,
                              tf.shape(embs_dropped)[1], 1]))
            if cfg.autoencoder:
                zt = tf.expand_dims(self.z_transformed, 1)
                zt = tf.tile(zt, [1, tf.shape(embs_dropped)[1], 1])
                concat_list.append(zt)
            if cfg.use_labels:
                concat_list.append(self.embs_labels)
            decode_embs = tf.concat(2, concat_list)

        output = self.decoder(decode_embs, z)
        if cfg.autoencoder and cfg.mutual_info:
            mask = tf.expand_dims(
                tf.cast(tf.greater(self.data, 0), tf.float32), -1)
            if cfg.use_labels:
                pencoder_embs = tf.concat(2, [mask, self.embs_labels])
            else:
                pencoder_embs = mask
            zo_mean, zo_logvar = self.output_encoder(pencoder_embs, output)

        # shift left the input to get the targets
        with tf.name_scope('left-shift'):
            targets = tf.concat(
                1, [self.data[:, 1:],
                    tf.zeros([cfg.batch_size, 1], tf.int32)])
        with tf.name_scope('mle-cost'):
            nll_per_word = self.mle_loss(output, targets)
            avg_lengths = tf.cast(tf.reduce_mean(self.lengths), tf.float32)
            self.nll = tf.reduce_sum(nll_per_word) / cfg.batch_size
            self.perplexity = tf.exp(self.nll / avg_lengths)
            self.summaries.append(
                tf.scalar_summary('perplexity', self.perplexity))
            self.summaries.append(tf.scalar_summary('cost_mle', self.nll))
        with tf.name_scope('kld-cost'):
            if not cfg.autoencoder or not cfg.variational or generator:
                self.kld = tf.zeros([])
            else:
                self.kld = tf.reduce_sum(self.kld_loss(self.z_mean, z_logvar)) / \
                           cfg.batch_size
            self.summaries.append(
                tf.scalar_summary('cost_kld', tf.reduce_mean(self.kld)))
            if np.isclose(cfg.anneal_bias, 0):
                self.kld_weight = tf.constant(cfg.anneal_max)
            else:
                self.kld_weight = cfg.anneal_max * tf.sigmoid(
                    (10 / cfg.anneal_bias) * (self.global_step -
                                              (cfg.anneal_bias / 2)))
            self.summaries.append(
                tf.scalar_summary('weight_kld', self.kld_weight))
        with tf.name_scope('mutinfo-cost'):
            if not cfg.autoencoder or not cfg.mutual_info:
                self.mutinfo = tf.zeros([])
            else:
                self.mutinfo = tf.reduce_sum(self.mutinfo_loss(self.z,
                                                               zo_mean, zo_logvar)) / \
                               cfg.batch_size
            self.summaries.append(
                tf.scalar_summary('cost_mutinfo',
                                  tf.reduce_mean(self.mutinfo)))

        with tf.name_scope('cost'):
            self.cost = self.nll + (self.kld_weight * (self.kld + (cfg.mutinfo_weight * \
                                                                   self.mutinfo)))

        if training and not generator:
            self.train_op = self.train(self.cost)
        else:
            self.train_op = tf.no_op()
Esempio n. 5
0
 def encoder(self, inputs, scope=None):
     '''Encode sentence and return a latent representation.'''
     with tf.variable_scope(scope or "Encoder"):
         if cfg.convolutional:
             out = inputs
             widths = [int(i) for i in cfg.conv_width.split(',')]
             for i, width in enumerate(widths):
                 out = utils.conv1d(out,
                                    cfg.hidden_size,
                                    width,
                                    1,
                                    'VALID',
                                    scope='conv%d' % i)
                 out = tf.contrib.layers.batch_norm(
                     inputs=out,
                     is_training=self.training,
                     scope='bn%d' % i)
                 if i < len(widths) - 1:
                     out = tf.nn.elu(out)
             z = tf.reduce_max(out, 1)
         else:
             if cfg.encoder_birnn:
                 outputs, fs = tf.nn.bidirectional_dynamic_rnn(
                     self.rnn_cell(cfg.num_layers, cfg.hidden_size // 2),
                     self.rnn_cell(cfg.num_layers, cfg.hidden_size // 2),
                     inputs,
                     sequence_length=self.lengths,
                     swap_memory=True,
                     dtype=tf.float32)
                 outputs = tf.concat(2, outputs)
                 fs = tf.concat(1, fs[0] +
                                fs[1])  # last states of fwd and bkwd
             else:
                 if cfg.encoder_summary == 'laststate':
                     inputs = tf.reverse_sequence(inputs, self.lengths, 1)
                 outputs, fs = tf.nn.dynamic_rnn(
                     self.rnn_cell(cfg.num_layers),
                     inputs,
                     sequence_length=self.lengths,
                     swap_memory=True,
                     dtype=tf.float32)
                 fs = tf.concat(1, fs)
             if cfg.encoder_summary == 'laststate':
                 fs = utils.highway(fs, scope='encoder_output_highway')
                 z = tf.nn.tanh(
                     utils.linear(fs,
                                  cfg.latent_size,
                                  True,
                                  scope='outputs_transform'))
             else:
                 outputs = tf.reshape(outputs, [-1, cfg.hidden_size])
                 outputs = utils.highway(outputs,
                                         scope='encoder_output_highway')
                 if cfg.encoder_summary == 'attention':
                     flat_input = tf.reshape(
                         inputs, [-1, inputs.get_shape()[2].value])
                     weights = utils.linear(tf.concat(
                         1, [flat_input, outputs]),
                                            cfg.hidden_size,
                                            True,
                                            scope='outputs_attention')
                     outputs = tf.reshape(
                         outputs, [cfg.batch_size, -1, cfg.hidden_size])
                     weights = tf.reshape(
                         weights, [cfg.batch_size, -1, cfg.hidden_size])
                     weights = tf.nn.softmax(weights, 1)
                     z = tf.reduce_sum(outputs * weights, [1])
                     z = tf.nn.tanh(
                         utils.linear(z,
                                      cfg.latent_size,
                                      True,
                                      scope='outputs_transform'))
                 elif cfg.encoder_summary == 'mean':
                     outputs = utils.linear(outputs,
                                            cfg.latent_size,
                                            True,
                                            scope='outputs_transform')
                     outputs = tf.reshape(
                         outputs, [cfg.batch_size, -1, cfg.latent_size])
                     z = tf.nn.tanh(tf.reduce_mean(outputs, [1]))
                 else:
                     raise ValueError(
                         'Invalid encoder_summary configuration.')
         z_mean = utils.linear(z,
                               cfg.latent_size,
                               True,
                               scope='encoder_z_mean')
         z_logvar = utils.linear(z,
                                 cfg.latent_size,
                                 True,
                                 scope='encoder_z_logvar')
     return z_mean, z_logvar
Esempio n. 6
0
 def decoder(self, inputs, mle_mode, reuse=None):
     '''Use the latent representation and word inputs to predict next words.'''
     with tf.variable_scope("Decoder", reuse=reuse):
         latent = utils.highway(self.latent, layer_size=2, f=tf.nn.elu)
         latent = utils.linear(latent,
                               cfg.latent_size,
                               True,
                               0.0,
                               scope='Latent_transform')
         self.latent_transformed = latent
         initial = []
         for i in range(cfg.num_layers):
             preact = utils.linear(latent,
                                   cfg.hidden_size,
                                   True,
                                   0.0,
                                   scope='Latent_initial%d' % i)
             act = tf.nn.tanh(preact)
             initial.append(tf.concat(1, [act, preact]))
         if mle_mode:
             inputs = tf.concat(2, [
                 inputs,
                 tf.tile(tf.expand_dims(latent, 1),
                         tf.pack([1, tf.shape(inputs)[1], 1]))
             ])
             cell = self.rnn_cell(cfg.num_layers,
                                  cfg.hidden_size,
                                  return_states=True,
                                  pretanh=True)
             self.decode_cell = cell
         else:
             cell = self.rnn_cell(cfg.num_layers,
                                  cfg.hidden_size,
                                  latent,
                                  self.embedding,
                                  self.softmax_w,
                                  self.softmax_b,
                                  return_states=True,
                                  pretanh=True,
                                  get_embeddings=cfg.concat_inputs)
         initial_state = cell.initial_state(initial)
         if mle_mode:
             self.decode_initial = initial_state
         outputs, _ = tf.nn.dynamic_rnn(cell,
                                        inputs,
                                        initial_state=initial_state,
                                        swap_memory=True,
                                        dtype=tf.float32)
         output = outputs[:, :, :cfg.hidden_size]
         if mle_mode:
             generated = None
             skip = 0
         else:
             words = tf.squeeze(
                 tf.cast(
                     outputs[:, :-1, cfg.hidden_size:cfg.hidden_size + 1],
                     tf.int32), [-1])
             generated = tf.stop_gradient(
                 tf.concat(1, [
                     words,
                     tf.constant(self.vocab.eos_index,
                                 shape=[cfg.batch_size, 1])
                 ]))
             skip = 1
             if cfg.concat_inputs:
                 embeddings = outputs[:, :, cfg.hidden_size +
                                      1:cfg.hidden_size + 1 + cfg.emb_size]
                 embeddings = tf.concat(
                     1, [inputs[:, :1, :], embeddings[:, :-1, :]])
                 embeddings = tf.concat(2, [
                     embeddings,
                     tf.tile(tf.expand_dims(latent, 1),
                             tf.pack([1, tf.shape(embeddings)[1], 1]))
                 ])
                 skip += cfg.emb_size
         states = outputs[:, :, cfg.hidden_size + skip:]
         if cfg.concat_inputs:
             if mle_mode:
                 states = tf.concat(2, [states, inputs])
             else:
                 states = tf.concat(2, [states, embeddings])
     return output, states, generated
Esempio n. 7
0
    def process_seq(self):
        # Getting input embeddings from inputs
        if self._options.reps[0]:
            self._examples = tf.cast(
                tf.verify_tensor_all_finite(tf.cast(self._examples, 'float32'),
                                            'Nan'), 'int64')

            self._wordemb = tf.get_variable(
                name='wordemb',
                shape=[self._options.vocab_size, self._options.w_emb_dim],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 / math.sqrt(float(self._options.vocab_size))))

            w_embeddings = tf.nn.embedding_lookup(
                self._wordemb, tf.reshape(self._examples, [-1]))
            w_input_emb = tf.reshape(w_embeddings, [
                self._options.batch_size, self._options.max_seq_length,
                self._options.w_emb_dim
            ])

            mask = sequence_mask(self._examples)

        if self._options.reps[1]:
            mask, mask_c = sequence_mask(self._examplesChar, char=True)

            if self._options.positionEmbeddings:
                self._charemb_trunc = tf.get_variable(
                    name='charemb',
                    shape=[
                        self._options.char_vocab_size - 1,
                        self._options.max_word_length, self._options.c_emb_dim
                    ],
                    initializer=tf.truncated_normal_initializer(
                        stddev=1.0 /
                        math.sqrt(float(self._options.char_vocab_size))))
                self._char_pad = tf.constant(0.,
                                             shape=[
                                                 1,
                                                 self._options.max_word_length,
                                                 self._options.c_emb_dim
                                             ])
                self._charemb = tf.concat(
                    0, [self._char_pad, self._charemb_trunc])

                position_indexes = tf.cast(
                    tf.range(self._options.max_word_length), 'int64')
                indexes = tf.reshape(
                    self._examplesChar +
                    self._options.char_vocab_size * position_indexes, [-1])
                c_embeddings = tf.gather(
                    tf.reshape(self._charemb, [
                        self._options.char_vocab_size *
                        self._options.max_word_length, self._options.c_emb_dim
                    ]), indexes)
            else:
                self._charemb_trunc = tf.get_variable(
                    name='charemb',
                    shape=[
                        self._options.char_vocab_size - 1,
                        self._options.c_emb_dim
                    ],
                    initializer=tf.truncated_normal_initializer(
                        stddev=1.0 /
                        math.sqrt(float(self._options.char_vocab_size))))
                self._char_pad = tf.constant(
                    0., shape=[1, self._options.c_emb_dim])
                self._charemb = tf.concat(
                    0, [self._char_pad, self._charemb_trunc])

                c_embeddings = tf.nn.embedding_lookup(
                    self._charemb, tf.reshape(self._examplesChar, [-1]))

            if self._options.charLayer == "conv":
                self._convfilters = []
                for w, d in zip(self._options.window_sizes,
                                self._options.filter_dims):
                    self._convfilters.append(
                        tf.get_variable(
                            name='filter%d' % w,
                            shape=[w, self._options.c_emb_dim, d],
                            initializer=tf.truncated_normal_initializer(
                                stddev=1.0 /
                                math.sqrt(float(w *
                                                self._options.c_emb_dim)))))
                    weight_decay = tf.nn.l2_loss(self._convfilters[-1])
                    tf.add_to_collection('losses', weight_decay)
                    c_input_emb = tf.reshape(
                        CE(
                            tf.reshape(c_embeddings, [
                                self._options.batch_size *
                                self._options.max_seq_length,
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]), self._convfilters), [
                                self._options.batch_size,
                                self._options.max_seq_length, -1
                            ])

            elif self._options.charLayer == "LSTM":
                self.char_cell_fw = tf.nn.rnn_cell.LSTMCell(
                    self._options.charLSTM_dim,
                    state_is_tuple=False,
                    activation=tf.nn.relu)
                self.char_cell_bw = tf.nn.rnn_cell.LSTMCell(
                    self._options.charLSTM_dim,
                    state_is_tuple=False,
                    activation=tf.nn.relu)
                c_input_emb = tf.reshape(
                    CE_RNN(
                        tf.reshape(c_embeddings, [
                            self._options.batch_size *
                            self._options.max_seq_length,
                            self._options.max_word_length,
                            self._options.c_emb_dim
                        ]), self.char_cell_fw, self.char_cell_bw,
                        tf.reshape(tf.reduce_sum(mask_c, 2), [-1])),
                    [
                        self._options.batch_size, self._options.max_seq_length,
                        self._options.charLSTM_dim * 2
                    ])

            else:
                c_input_emb = tf.reshape(c_embeddings, [
                    self._options.batch_size, self._options.max_seq_length, -1
                ])

        if self._options.reps[0] and not self._options.reps[1]:
            input_emb = w_input_emb
        elif self._options.reps[1] and not self._options.reps[0]:
            input_emb = c_input_emb
        elif self._options.reps[0] and self._options.reps[1]:
            input_emb = tf.concat(2, [w_input_emb, c_input_emb])

        input_emb = tf.verify_tensor_all_finite(input_emb, 'Nan')
        # Batch normalization
        if self._options.batch_norm:
            self.batch_normalizer = batch_norm()
            input_emb = self.batch_normalizer(input_emb, self._training)

            input_emb = tf.verify_tensor_all_finite(input_emb, 'Nan')

        # Highway Layer
        if self._options.highway_layers > 0:
            self._highway_w = []
            self._highway_wg = []
            self._highway_b = []
            self._highway_bg = []
            for i in range(self._options.highway_layers):
                self._highway_w.append(
                    tf.get_variable(
                        name='highway_w%d' % i,
                        shape=[self._options.emb_dim] * 2,
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.emb_dim)))))
                weight_decay = tf.nn.l2_loss(self._highway_w[-1])
                tf.add_to_collection('losses', weight_decay)
                self._highway_b.append(
                    tf.get_variable(
                        name='highway_b%d' % i,
                        shape=[self._options.emb_dim],
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.emb_dim)))))
                weight_decay = tf.nn.l2_loss(self._highway_b[-1])
                tf.add_to_collection('losses', weight_decay)
                self._highway_wg.append(
                    tf.get_variable(
                        name='highway_wg%d' % i,
                        shape=[self._options.emb_dim] * 2,
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.emb_dim)))))
                weight_decay = tf.nn.l2_loss(self._highway_wg[-1])
                tf.add_to_collection('losses', weight_decay)
                self._highway_bg.append(
                    tf.get_variable(
                        name='highway_bg%d' % i,
                        shape=[self._options.emb_dim],
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.emb_dim)))))
                weight_decay = tf.nn.l2_loss(self._highway_bg[-1])
                tf.add_to_collection('losses', weight_decay)
            input_emb = tf.reshape(
                highway(tf.reshape(input_emb, [-1, self._options.emb_dim]),
                        self._highway_w, self._highway_b, self._highway_wg,
                        self._highway_bg), [
                            self._options.batch_size,
                            self._options.max_seq_length, self._options.emb_dim
                        ])

        input_emb = tf.verify_tensor_all_finite(input_emb, 'Nan')
        # LSTM
        self.cell = tf.nn.rnn_cell.LSTMCell(self._options.hidden_dim,
                                            state_is_tuple=False,
                                            activation=tf.nn.relu)
        if self._training and self._options.dropout < 1.0:
            self.cell = tf.nn.rnn_cell.DropoutWrapper(
                self.cell, output_keep_prob=self._options.dropout)
        if self._options.hidden_layers > 1:
            self.cell = tf.nn.rnn_cell.MultiRNNCell(
                [self.cell] * self._options.hidden_layers)
        hidden, _ = tf.nn.dynamic_rnn(self.cell,
                                      input_emb,
                                      sequence_length=tf.reduce_sum(mask, 1),
                                      dtype='float32')

        hidden = tf.verify_tensor_all_finite(hidden, 'Nan')
        print(hidden.get_shape())
        return mask, hidden
Esempio n. 8
0
    def process_output_seq(self, allVoc=True):

        if self._options.reps[2]:
            self._output_wordemb = tf.get_variable(
                name="output_wordemb",
                shape=[self._options.vocab_size, self._options.w_emb_out_dim],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 /
                    math.sqrt(float(self._options.w_emb_out_dim))))
            if allVoc:
                w_output_embeddings = tf.nn.embedding_lookup(
                    self._output_wordemb,
                    restrict_voc(tf.range(self._options.eval_vocab_size),
                                 self._options.vocab_size))
            else:
                w_output_embeddings = tf.nn.embedding_lookup(
                    self._output_wordemb, tf.reshape(self._labels, [-1]))

        if self._options.reps[3]:
            if self._options.reps[1] and self._options.reuse_character_layer:
                if allVoc:
                    if self._options.positionEmbeddings:
                        position_indexes = tf.cast(
                            tf.range(self._options.max_word_length), 'int64')
                        indexes = tf.reshape(
                            tf.pack(self._options.train_set.wid_to_charid) +
                            self._options.char_vocab_size * position_indexes,
                            [-1])
                        c_output_embeddings = tf.gather(
                            tf.reshape(self._charemb, [
                                self._options.char_vocab_size *
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]), indexes)
                    else:
                        c_output_embeddings = tf.nn.embedding_lookup(
                            self._charemb,
                            tf.reshape(
                                tf.pack(self._options.train_set.wid_to_charid),
                                [-1]))

                    if self._options.charLayer == 'conv':
                        c_output_emb = CE(
                            tf.reshape(c_output_embeddings, [
                                self._options.eval_vocab_size,
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]), self._convfilters)
                    elif self._options.charLayer == 'LSTM':
                        mask_c = tf.sign(
                            tf.pack(self._options.train_set.wid_to_charid))
                        c_output_emb = CE_RNN(tf.reshape(
                            c_output_embeddings, [
                                self._options.eval_vocab_size,
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]),
                                              self.char_cell_fw,
                                              self.char_cell_bw,
                                              tf.reduce_sum(mask_c, 1),
                                              name='biRNNOut')
                    else:
                        c_output_emb = tf.reshape(
                            c_output_embeddings,
                            [self._options.eval_vocab_size, -1])

                else:
                    if self._options.positionEmbeddings:
                        position_indexes = tf.cast(
                            tf.range(self._options.max_word_length), 'int64')
                        indexes = tf.reshape(
                            self._labelsChar +
                            self._options.char_vocab_size * position_indexes,
                            [-1])
                        c_output_embeddings = tf.gather(
                            tf.reshape(self._charemb, [
                                self._options.char_vocab_size *
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]), indexes)
                    else:
                        c_output_embeddings = tf.nn.embedding_lookup(
                            self._charemb, tf.reshape(self._labelsChar, [-1]))

                    if self._options.charLayer == 'conv':
                        c_output_emb = CE(
                            tf.reshape(c_output_embeddings, [
                                self._options.batch_size *
                                self._options.max_seq_length +
                                self._options.noise_length,
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]), self._convfilters)
                    elif self._options.charLayer == 'LSTM':
                        mask_c = tf.sign(self._labelsChar)
                        c_output_emb = CE_RNN(tf.reshape(
                            c_output_embeddings, [
                                self._options.batch_size *
                                self._options.max_seq_length +
                                self._options.noise_length,
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]),
                                              self.char_cell_fw,
                                              self.char_cell_bw,
                                              tf.reduce_sum(mask_c, 1),
                                              name='biRNNOut')
                    else:
                        c_output_emb = tf.reshape(c_output_embeddings, [
                            self._options.batch_size *
                            self._options.max_seq_length +
                            self._options.noise_length, -1
                        ])
            else:
                if self._options.positionEmbeddings:
                    self._output_charemb_trunc = tf.get_variable(
                        name='output_charemb',
                        shape=[
                            self._options.char_vocab_size - 1,
                            self._options.max_word_length,
                            self._options.c_emb_dim
                        ],
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.char_vocab_size))))
                    self._char_pad_out = tf.constant(
                        0.,
                        shape=[
                            1, self._options.max_word_length,
                            self._options.c_emb_dim
                        ])
                    self._output_charemb = tf.concat(
                        0, [self._char_pad_out, self._output_charemb_trunc])
                    position_indexes = tf.cast(
                        tf.range(self._options.max_word_length), 'int64')
                    if allVoc:
                        indexes = tf.reshape(
                            tf.pack(self._options.train_set.wid_to_charid) +
                            self._options.char_vocab_size * position_indexes,
                            [-1])
                        c_output_embeddings = tf.gather(
                            tf.reshape(self._output_charemb, [
                                self._options.char_vocab_size *
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]), indexes)
                    else:
                        indexes = tf.reshape(
                            self._labelsChar +
                            self._options.char_vocab_size * position_indexes,
                            [-1])
                        c_output_embeddings = tf.gather(
                            tf.reshape(self._output_charemb, [
                                self._options.char_vocab_size *
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]), indexes)
                else:
                    self._output_charemb_trunc = tf.get_variable(
                        name='output_charemb',
                        shape=[
                            self._options.char_vocab_size - 1,
                            self._options.c_emb_dim
                        ],
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.char_vocab_size))))
                    self._char_pad_out = tf.constant(
                        0., shape=[1, self._options.c_emb_dim])
                    self._output_charemb = tf.concat(
                        0, [self._char_pad_out, self._output_charemb_trunc])

                    if allVoc:
                        c_output_embeddings = tf.nn.embedding_lookup(
                            self._output_charemb,
                            tf.reshape(
                                tf.pack(self._options.train_set.wid_to_charid),
                                [-1]))
                    else:
                        c_output_embeddings = tf.nn.embedding_lookup(
                            self._output_charemb,
                            tf.reshape(self._labelsChar, [-1]))

                if self._options.charLayer == 'conv':
                    self._output_convfilters = []
                    for w, d in zip(self._options.window_sizes,
                                    self._options.filter_dims):
                        self._output_convfilters.append(
                            tf.get_variable(
                                name='output_filter%d' % w,
                                shape=[w, self._options.c_emb_dim, d],
                                initializer=tf.truncated_normal_initializer(
                                    stddev=1.0 / math.sqrt(
                                        float(w * self._options.c_emb_dim)))))
                    if allVoc:
                        c_output_emb = CE(
                            tf.reshape(c_output_embeddings, [
                                self._options.eval_vocab_size,
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]), self._output_convfilters)
                    else:
                        c_output_emb = CE(
                            tf.reshape(c_output_embeddings, [
                                self._options.batch_size *
                                self._options.max_seq_length +
                                self._options.noise_length,
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]), self._output_convfilters)

                elif self._options.charLayer == 'LSTM':
                    self.output_char_cell_fw = tf.nn.rnn_cell.LSTMCell(
                        self._options.charLSTM_dim,
                        state_is_tuple=False,
                        activation=tf.nn.relu)
                    self.output_char_cell_bw = tf.nn.rnn_cell.LSTMCell(
                        self._options.charLSTM_dim,
                        state_is_tuple=False,
                        activation=tf.nn.relu)
                    if allVoc:
                        mask_c = tf.sign(
                            tf.pack(self._options.train_set.wid_to_charid))
                        c_output_emb = CE_RNN(tf.reshape(
                            c_output_embeddings, [
                                self._options.eval_vocab_size,
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]),
                                              self.output_char_cell_fw,
                                              self.output_char_cell_bw,
                                              tf.reduce_sum(mask_c, 1),
                                              name='biRNNOut')
                    else:
                        mask_c = tf.sign(self._labelsChar)
                        c_output_emb = CE_RNN(tf.reshape(
                            c_output_embeddings, [
                                self._options.batch_size *
                                self._options.max_seq_length +
                                self._options.noise_length,
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]),
                                              self.output_char_cell_fw,
                                              self.output_char_cell_bw,
                                              tf.reduce_sum(mask_c, 1),
                                              name='biRNNOut')
                else:
                    if allVoc:
                        c_output_emb = tf.reshape(
                            c_output_embeddings,
                            [self._options.eval_vocab_size, -1])
                    else:
                        c_output_emb = tf.reshape(c_output_embeddings, [
                            self._options.batch_size *
                            self._options.max_seq_length +
                            self._options.noise_length, -1
                        ])

        if self._options.reps[2] and not self._options.reps[3]:
            output_emb = w_output_embeddings
        elif self._options.reps[3] and not self._options.reps[2]:
            output_emb = c_output_emb
        elif self._options.reps[2] and self._options.reps[
                3] and self._options.wordCharGate:
            self._gateWeight = tf.get_variable(
                name='wg',
                shape=[2 * self._options.hidden_dim, self._options.hidden_dim],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 / math.sqrt(float(self._options.hidden_dim))))
            weight_decay = tf.nn.l2_loss(self._gateWeight)
            tf.add_to_collection('losses', weight_decay)
            self._gateBias = tf.get_variable(
                name='bg',
                shape=[self._options.hidden_dim],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 / math.sqrt(float(self._options.hidden_dim))))
            weight_decay = tf.nn.l2_loss(self._gateBias)
            tf.add_to_collection('losses', weight_decay)
            output_emb = word_char_gate(w_output_embeddings, c_output_emb,
                                        self._gateWeight, self._gateBias)
        elif self._options.reps[2] and self._options.reps[3]:
            print(w_output_embeddings.get_shape())
            print(c_output_emb.get_shape())
            output_emb = tf.concat(1, [w_output_embeddings, c_output_emb])
        shape = tf.shape(output_emb)

        # Highway Layer
        if self._options.output_highway_layers > 0:
            self._output_highway_w = []
            self._output_highway_wg = []
            self._output_highway_b = []
            self._output_highway_bg = []
            for i in range(self._options.output_highway_layers):
                self._output_highway_w.append(
                    tf.get_variable(
                        name='output_highway_w%d' % i,
                        shape=[self._options.hidden_dim] * 2,
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.hidden_dim)))))
                weight_decay = tf.nn.l2_loss(self._output_highway_w[-1])
                tf.add_to_collection('losses', weight_decay)
                self._output_highway_b.append(
                    tf.get_variable(
                        name='output_highway_b%d' % i,
                        shape=[self._options.hidden_dim],
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.hidden_dim)))))
                weight_decay = tf.nn.l2_loss(self._output_highway_b[-1])
                tf.add_to_collection('losses', weight_decay)
                self._output_highway_wg.append(
                    tf.get_variable(
                        name='output_highway_wg%d' % i,
                        shape=[self._options.hidden_dim] * 2,
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.hidden_dim)))))
                weight_decay = tf.nn.l2_loss(self._output_highway_wg[-1])
                tf.add_to_collection('losses', weight_decay)
                self._output_highway_bg.append(
                    tf.get_variable(
                        name='output_highway_bg%d' % i,
                        shape=[self._options.hidden_dim],
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.hidden_dim)))))
                weight_decay = tf.nn.l2_loss(self._output_highway_bg[-1])
                tf.add_to_collection('losses', weight_decay)
            output_emb = tf.reshape(
                highway(output_emb, self._output_highway_w,
                        self._output_highway_b, self._output_highway_wg,
                        self._output_highway_bg), shape)
        return output_emb
Esempio n. 9
0
    def _embed(self):
        """
        The embedding layer, question and passage share embeddings
        """

        with tf.variable_scope('embeddings'):
            self.word_embeddings = tf.get_variable(
                'word_embeddings',
                shape=(self.vocab.word_size(), self.vocab.word_embed_dim),
                initializer=tf.constant_initializer(
                    self.vocab.word_embeddings),
                trainable=False)
            self.char_embeddings = tf.get_variable(
                'char_embeddings',
                shape=(self.vocab.char_size(), self.vocab.char_embed_dim),
                initializer=tf.constant_initializer(
                    self.vocab.char_embeddings))

            ph_emb = tf.reshape(
                tf.nn.embedding_lookup(self.char_embeddings, self.ph),
                [-1, self.max_char_len, self.char_embed_dim])
            qh_emb = tf.reshape(
                tf.nn.embedding_lookup(self.char_embeddings, self.qh),
                [-1, self.max_char_len, self.char_embed_dim])
            ph_emb = tf.nn.dropout(ph_emb, 1.0 - 0.5 * self.dropout)
            qh_emb = tf.nn.dropout(qh_emb, 1.0 - 0.5 * self.dropout)

            # Bidaf style conv - highway encoder
            ph_emb = conv(ph_emb,
                          self.hidden_size,
                          bias=True,
                          activation=tf.nn.relu,
                          kernel_size=3,
                          name="char_conv",
                          reuse=None)
            qh_emb = conv(qh_emb,
                          self.hidden_size,
                          bias=True,
                          activation=tf.nn.relu,
                          kernel_size=3,
                          name="char_conv",
                          reuse=True)

            ph_emb = tf.reduce_max(ph_emb, axis=1)
            qh_emb = tf.reduce_max(qh_emb, axis=1)

            ph_emb = tf.reshape(ph_emb, [-1, self.max_p_len, ph_emb.shape[-1]])

            qh_emb = tf.reshape(qh_emb, [-1, self.max_q_len, qh_emb.shape[-1]])

            # self.py_embeddings = tf.get_variable('py_embeddings',
            #                                      shape=(self.vocab.py_size(
            #                                      ), self.vocab.py_embed_dim),
            #                                      initializer=tf.constant_initializer(
            #                                          self.vocab.py_embeddings))

            # ppy_emb = tf.reshape(tf.nn.embedding_lookup(
            #     self.py_embeddings, self.ppy), [-1, self.max_py_len, self.py_embed_dim])
            # qpy_emb = tf.reshape(tf.nn.embedding_lookup(
            #     self.py_embeddings, self.qpy), [-1, self.max_py_len, self.py_embed_dim])
            # ppy_emb = tf.nn.dropout(ppy_emb, 1.0 - 0.5 * self.dropout)
            # qpy_emb = tf.nn.dropout(qpy_emb, 1.0 - 0.5 * self.dropout)

            # # Bidaf style conv-highway encoder
            # ppy_emb = conv(ppy_emb, self.hidden_size,
            #                bias=True, activation=tf.nn.relu, kernel_size=3, name="char_conv", reuse=None)
            # qpy_emb = conv(qpy_emb, self.hidden_size,
            #                bias=True, activation=tf.nn.relu, kernel_size=3, name="char_conv", reuse=True)

            # ppy_emb = tf.reduce_max(ppy_emb, axis=1)
            # qpy_emb = tf.reduce_max(qpy_emb, axis=1)

            # ppy_emb = tf.reshape(
            #     ppy_emb, [-1, self.max_p_len, ppy_emb.shape[-1]])

            # qpy_emb = tf.reshape(
            #     qpy_emb, [-1, self.max_q_len, qpy_emb.shape[-1]])

            p_emb = tf.nn.dropout(
                tf.nn.embedding_lookup(self.word_embeddings, self.p),
                1.0 - 0.5 * self.dropout)
            q_emb = tf.nn.dropout(
                tf.nn.embedding_lookup(self.word_embeddings, self.q),
                1.0 - 0.5 * self.dropout)

            # p_emb = tf.concat([p_emb, ppy_emb], axis=2)
            # q_emb = tf.concat([q_emb, qpy_emb], axis=2)

            p_emb = tf.concat([p_emb, ph_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

            self.p_emb = highway(p_emb,
                                 size=self.hidden_size,
                                 scope="highway",
                                 dropout=self.dropout,
                                 reuse=None)
            self.q_emb = highway(q_emb,
                                 size=self.hidden_size,
                                 scope="highway",
                                 dropout=self.dropout,
                                 reuse=True)
Esempio n. 10
0
    def process_seq(self):
        # Getting input embeddings from inputs
        # Mots ou lemmes
        if self._options.reps[0] or self._options.reps[1]:
            self._wordemb = tf.get_variable(
                name='wordemb',
                shape=[
                    self._options.input_vocab_size, self._options.w_emb_dim
                ],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 /
                    math.sqrt(float(self._options.input_vocab_size))))

            w_embeddings = tf.nn.embedding_lookup(
                self._wordemb, tf.reshape(self._examples, [-1]))
            w_input_emb = tf.reshape(
                w_embeddings,
                [-1, self._options.max_seq_length, self._options.w_emb_dim])

            mask = sequence_mask(self._examples)

        # Tags
        if self._options.reps[3]:
            self._tagEmbs = []
            tag_input_embs = []
            for i in range(self._options.max_tag_number):
                self._tagEmbs.append(
                    tf.get_variable(
                        name="tagEmbs%d" % i,
                        shape=[
                            self._options.tags_vocab_size[i],
                            self._options.t_emb_dim
                        ],
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.t_emb_dim)))))

                tag_embeddings = tf.nn.embedding_lookup(
                    self._tagEmbs[i],
                    tf.reshape(self._examplesTags[:, :, i], [-1]))
                tag_input_embs.append(
                    tf.reshape(tag_embeddings, [
                        -1, self._options.max_seq_length,
                        self._options.t_emb_dim
                    ]))

            if self._options.tagLayer == "LSTM":
                tag_embeddings = tf.concat(axis=2, values=tag_input_embs)
                self.tag_cell_fw = tf.contrib.rnn.LSTMCell(
                    self._options.tagLSTM_dim,
                    state_is_tuple=False,
                    activation=tf.nn.relu)
                self.tag_cell_bw = tf.contrib.rnn.LSTMCell(
                    self._options.tagLSTM_dim,
                    state_is_tuple=False,
                    activation=tf.nn.relu)
                if self._training and self._options.dropout < 1.0:
                    self.tag_cell_fw = tf.contrib.rnn.DropoutWrapper(
                        self.tag_cell_fw,
                        output_keep_prob=self._options.dropout)
                    self.tag_cell_bw = tf.contrib.rnn.DropoutWrapper(
                        self.tag_cell_bw,
                        output_keep_prob=self._options.dropout)
                size = int(self._training) * self._options.batch_size + int(
                    not self._training) * 32
                tag_input_emb = tf.reshape(
                    CE_RNN(
                        tf.reshape(tag_embeddings, [
                            -1, self._options.max_tag_number,
                            self._options.t_emb_dim
                        ]), self.tag_cell_fw, self.tag_cell_bw,
                        tf.constant(self._options.max_tag_number,
                                    dtype='int64',
                                    shape=[
                                        size * self._options.max_seq_length,
                                    ])), [
                                        -1, self._options.max_seq_length,
                                        self._options.charLSTM_dim * 2
                                    ])

            else:
                tag_input_emb = tf.concat(axis=2, values=tag_input_embs)

        # Caracteres
        if self._options.reps[2]:
            mask, mask_c = sequence_mask(self._examplesChar, char=True)

            self._charemb_trunc = tf.get_variable(
                name='charemb',
                shape=[
                    self._options.char_vocab_size - 1, self._options.c_emb_dim
                ],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 /
                    math.sqrt(float(self._options.char_vocab_size))))
            self._char_pad = tf.constant(0.,
                                         shape=[1, self._options.c_emb_dim])
            self._charemb = tf.concat(
                axis=0, values=[self._char_pad, self._charemb_trunc])

            c_embeddings = tf.nn.embedding_lookup(
                self._charemb, tf.reshape(self._examplesChar, [-1]))

            if self._options.charLayer == "conv":
                self._convfilters = []
                for w, d in zip(self._options.window_sizes,
                                self._options.filter_dims):
                    self._convfilters.append(
                        tf.get_variable(
                            name='filter%d' % w,
                            shape=[w, self._options.c_emb_dim, d],
                            initializer=tf.truncated_normal_initializer(
                                stddev=1.0 /
                                math.sqrt(float(w *
                                                self._options.c_emb_dim)))))
                    weight_decay = tf.nn.l2_loss(self._convfilters[-1])
                    tf.add_to_collection('losses', weight_decay)
                c_input_emb = tf.reshape(
                    CE(
                        tf.reshape(c_embeddings, [
                            -1, self._options.max_word_length,
                            self._options.c_emb_dim
                        ]), self._convfilters), [
                            -1, self._options.max_seq_length,
                            self._options.char_emb_dim
                        ])

            elif self._options.charLayer == "LSTM":
                self.char_cell_fw = tf.contrib.rnn.LSTMCell(
                    self._options.charLSTM_dim,
                    state_is_tuple=False,
                    activation=tf.nn.relu)
                self.char_cell_bw = tf.contrib.rnn.LSTMCell(
                    self._options.charLSTM_dim,
                    state_is_tuple=False,
                    activation=tf.nn.relu)
                if self._training and self._options.dropout < 1.0:
                    self.char_cell_fw = tf.contrib.rnn.DropoutWrapper(
                        self.char_cell_fw,
                        output_keep_prob=self._options.dropout)
                    self.char_cell_bw = tf.contrib.rnn.DropoutWrapper(
                        self.char_cell_bw,
                        output_keep_prob=self._options.dropout)
                c_input_emb = tf.reshape(
                    CE_RNN(
                        tf.reshape(c_embeddings, [
                            -1, self._options.max_word_length,
                            self._options.c_emb_dim
                        ]), self.char_cell_fw, self.char_cell_bw,
                        tf.reshape(tf.reduce_sum(mask_c, 2), [-1])), [
                            -1, self._options.max_seq_length,
                            self._options.charLSTM_dim * 2
                        ])

            else:
                c_input_emb = tf.reshape(c_embeddings, [
                    -1, self._options.max_seq_length,
                    self._options.char_emb_dim
                ])

        embs = []
        if (self._options.reps[0] or self._options.reps[1]):
            embs.append(w_input_emb)
        if self._options.reps[2]:
            embs.append(c_input_emb)
        if self._options.reps[3]:
            embs.append(tag_input_emb)
        input_emb = tf.concat(axis=2, values=embs)

        # Batch normalization
        if self._options.batch_norm:
            self.batch_normalizer = batch_norm()
            input_emb = self.batch_normalizer(input_emb, self._training)

        # Highway Layer
        if self._options.highway_layers > 0:
            self._highway_w = []
            self._highway_wg = []
            self._highway_b = []
            self._highway_bg = []
            for i in range(self._options.highway_layers):
                self._highway_w.append(
                    tf.get_variable(
                        name='highway_w%d' % i,
                        shape=[self._options.emb_dim] * 2,
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.emb_dim)))))
                weight_decay = tf.nn.l2_loss(self._highway_w[-1])
                tf.add_to_collection('losses', weight_decay)
                self._highway_b.append(
                    tf.get_variable(
                        name='highway_b%d' % i,
                        shape=[self._options.emb_dim],
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.emb_dim)))))
                weight_decay = tf.nn.l2_loss(self._highway_b[-1])
                tf.add_to_collection('losses', weight_decay)
                self._highway_wg.append(
                    tf.get_variable(
                        name='highway_wg%d' % i,
                        shape=[self._options.emb_dim] * 2,
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.emb_dim)))))
                weight_decay = tf.nn.l2_loss(self._highway_wg[-1])
                tf.add_to_collection('losses', weight_decay)
                self._highway_bg.append(
                    tf.get_variable(
                        name='highway_bg%d' % i,
                        shape=[self._options.emb_dim],
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.emb_dim)))))
                weight_decay = tf.nn.l2_loss(self._highway_bg[-1])
                tf.add_to_collection('losses', weight_decay)
            input_emb = tf.reshape(
                highway(tf.reshape(input_emb, [-1, self._options.emb_dim]),
                        self._highway_w, self._highway_b, self._highway_wg,
                        self._highway_bg),
                [-1, self._options.max_seq_length, self._options.emb_dim])

        # LSTM
        self.cell = tf.contrib.rnn.LSTMCell(self._options.hidden_dim,
                                            state_is_tuple=False,
                                            activation=tf.nn.relu)
        if self._training and self._options.dropout < 1.0:
            self.cell = tf.contrib.rnn.DropoutWrapper(
                self.cell, output_keep_prob=self._options.dropout)
        if self._options.hidden_layers > 1:
            self.cell = tf.contrib.rnn.MultiRNNCell(
                [self.cell] * self._options.hidden_layers)
        hidden, _ = tf.nn.dynamic_rnn(self.cell,
                                      input_emb,
                                      sequence_length=tf.reduce_sum(mask, 1),
                                      dtype='float32')

        print(hidden.get_shape())
        return mask, hidden
Esempio n. 11
0
    def process_output_seq(self, allVoc=True):
        # Mots
        if self._options.reps[4] or self._options.reps[5]:
            self._output_wordemb = tf.get_variable(
                name="output_wordemb",
                shape=[
                    self._options.output_vocab_size,
                    self._options.w_emb_out_dim
                ],
                initializer=tf.truncated_normal_initializer(
                    stddev=1.0 /
                    math.sqrt(float(self._options.w_emb_out_dim))))
            if allVoc:
                w_output_embeddings = tf.nn.embedding_lookup(
                    self._output_wordemb,
                    restrict_voc_map(tf.range(self._options.eval_vocab_size),
                                     self._options.eval_word_map))
            else:
                w_output_embeddings = tf.nn.embedding_lookup(
                    self._output_wordemb, tf.reshape(self._labels, [-1]))

        # Caracteres
        if self._options.reps[6]:
            if self._options.reps[2] and self._options.reuse_character_layer:
                if allVoc:
                    c_output_embeddings = tf.nn.embedding_lookup(
                        self._charemb,
                        tf.reshape(
                            tf.stack(self._options.train_set.wid_to_charid),
                            [-1]))

                    if self._options.charLayer == 'conv':
                        c_output_emb = CE(
                            tf.reshape(c_output_embeddings, [
                                self._options.eval_vocab_size,
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]), self._convfilters)
                    elif self._options.charLayer == 'LSTM':
                        mask_c = tf.sign(
                            tf.stack(self._options.train_set.wid_to_charid))
                        c_output_emb = CE_RNN(tf.reshape(
                            c_output_embeddings, [
                                self._options.eval_vocab_size,
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]),
                                              self.char_cell_fw,
                                              self.char_cell_bw,
                                              tf.reduce_sum(mask_c, 1),
                                              name='biRNNOut')
                    else:
                        c_output_emb = tf.reshape(
                            c_output_embeddings,
                            [self._options.eval_vocab_size, -1])

                else:
                    c_output_embeddings = tf.nn.embedding_lookup(
                        self._charemb, tf.reshape(self._labelsChar, [-1]))

                    if self._options.charLayer == 'conv':
                        c_output_emb = CE(
                            tf.reshape(c_output_embeddings, [
                                -1, self._options.max_word_length,
                                self._options.c_emb_dim
                            ]), self._convfilters)
                    elif self._options.charLayer == 'LSTM':
                        mask_c = tf.sign(self._labelsChar)
                        c_output_emb = CE_RNN(tf.reshape(
                            c_output_embeddings, [
                                -1, self._options.max_word_length,
                                self._options.c_emb_dim
                            ]),
                                              self.char_cell_fw,
                                              self.char_cell_bw,
                                              tf.reduce_sum(mask_c, 1),
                                              name='biRNNOut')
                    else:
                        c_output_emb = tf.reshape(c_output_embeddings, [
                            self._options.batch_size *
                            self._options.max_seq_length +
                            self._options.noise_length, -1
                        ])
            else:
                self._output_charemb_trunc = tf.get_variable(
                    name='output_charemb',
                    shape=[
                        self._options.char_vocab_size - 1,
                        self._options.c_emb_dim
                    ],
                    initializer=tf.truncated_normal_initializer(
                        stddev=1.0 /
                        math.sqrt(float(self._options.char_vocab_size))))
                self._char_pad_out = tf.constant(
                    0., shape=[1, self._options.c_emb_dim])
                self._output_charemb = tf.concat(
                    axis=0,
                    values=[self._char_pad_out, self._output_charemb_trunc])

                if allVoc:
                    c_output_embeddings = tf.nn.embedding_lookup(
                        self._output_charemb,
                        tf.reshape(
                            tf.stack(self._options.train_set.wid_to_charid),
                            [-1]))
                else:
                    c_output_embeddings = tf.nn.embedding_lookup(
                        self._output_charemb,
                        tf.reshape(self._labelsChar, [-1]))

                if self._options.charLayer == 'conv':
                    self._output_convfilters = []
                    for w, d in zip(self._options.window_sizes,
                                    self._options.filter_dims):
                        self._output_convfilters.append(
                            tf.get_variable(
                                name='output_filter%d' % w,
                                shape=[w, self._options.c_emb_dim, d],
                                initializer=tf.truncated_normal_initializer(
                                    stddev=1.0 / math.sqrt(
                                        float(w * self._options.c_emb_dim)))))
                        weight_decay = tf.nn.l2_loss(
                            self._output_convfilters[-1])
                        tf.add_to_collection('losses', weight_decay)
                    if allVoc:
                        c_output_emb = CE(
                            tf.reshape(c_output_embeddings, [
                                self._options.eval_vocab_size,
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]), self._output_convfilters)
                    else:
                        c_output_emb = CE(
                            tf.reshape(c_output_embeddings, [
                                -1, self._options.max_word_length,
                                self._options.c_emb_dim
                            ]), self._output_convfilters)

                elif self._options.charLayer == 'LSTM':
                    self.output_char_cell_fw = tf.contrib.rnn.LSTMCell(
                        self._options.charLSTM_dim,
                        state_is_tuple=False,
                        activation=tf.nn.relu)
                    self.output_char_cell_bw = tf.contrib.rnn.LSTMCell(
                        self._options.charLSTM_dim,
                        state_is_tuple=False,
                        activation=tf.nn.relu)
                    if self._training and self._options.dropout < 1.0:
                        self.output_char_cell_fw = tf.contrib.rnn.DropoutWrapper(
                            self.output_char_cell_fw,
                            output_keep_prob=self._options.dropout)
                        self.output_char_cell_bw = tf.contrib.rnn.DropoutWrapper(
                            self.output_char_cell_bw,
                            output_keep_prob=self._options.dropout)
                    if allVoc:
                        mask_c = tf.sign(
                            tf.stack(self._options.train_set.wid_to_charid))
                        c_output_emb = CE_RNN(tf.reshape(
                            c_output_embeddings, [
                                self._options.eval_vocab_size,
                                self._options.max_word_length,
                                self._options.c_emb_dim
                            ]),
                                              self.output_char_cell_fw,
                                              self.output_char_cell_bw,
                                              tf.reduce_sum(mask_c, 1),
                                              name='biRNNOut')
                    else:
                        mask_c = tf.sign(self._labelsChar)
                        c_output_emb = CE_RNN(tf.reshape(
                            c_output_embeddings, [
                                -1, self._options.max_word_length,
                                self._options.c_emb_dim
                            ]),
                                              self.output_char_cell_fw,
                                              self.output_char_cell_bw,
                                              tf.reduce_sum(mask_c, 1),
                                              name='biRNNOut')
                else:
                    c_output_emb = tf.reshape(
                        c_output_embeddings,
                        [-1, sum(self._options.filters_dims)])
        # Tags
        if self._options.reps[7] and not self._options.reps[8]:
            self._output_tagEmbs = []
            tag_output_embs = []
            tags_map = tf.stack(self._options.train_set.wid_to_tagsid)
            for i in range(self._options.max_tag_number):
                """
                self._output_tagEmbs.append(tf.get_variable(
                    name="output_tagEmbs%d" % i,
                    shape= [self._options.tags_vocab_size[i], self._options.t_emb_dim],
                    initializer=tf.truncated_normal_initializer(stddev=1.0 / math.sqrt(float(self._options.t_emb_dim)) )))
                """
                if allVoc:
                    tag_output_embeddings = tf.nn.embedding_lookup(
                        self._tagEmbs[i], tf.reshape(tags_map[:, i], [-1]))
                else:
                    tag_output_embeddings = tf.nn.embedding_lookup(
                        self._tagEmbs[i], tf.reshape(self._evalTags[:, i],
                                                     [-1]))
                tag_output_embs.append(
                    tf.reshape(tag_output_embeddings,
                               [-1, self._options.t_emb_dim]))

            tag_output_emb = tf.concat(axis=1, values=tag_output_embs)
            if self._options.tagLayer == "LSTM":
                self.output_tag_cell_fw = tf.contrib.rnn.LSTMCell(
                    self._options.tagLSTM_dim,
                    state_is_tuple=False,
                    activation=tf.nn.relu)
                self.output_tag_cell_bw = tf.contrib.rnn.LSTMCell(
                    self._options.tagLSTM_dim,
                    state_is_tuple=False,
                    activation=tf.nn.relu)
                if self._training and self._options.dropout < 1.0:
                    self.output_tag_cell_fw = tf.contrib.rnn.DropoutWrapper(
                        self.output_tag_cell_fw,
                        output_keep_prob=self._options.dropout)
                    self.output_tag_cell_bw = tf.contrib.rnn.DropoutWrapper(
                        self.output_tag_cell_bw,
                        output_keep_prob=self._options.dropout)
                size = int(self._training) * (
                    self._options.batch_size * self._options.max_seq_length +
                    self._options.noise_length
                ) + int(not self._training) * self._options.eval_vocab_size
                tag_output_emb = CE_RNN(tf.reshape(tag_output_emb, [
                    -1, self._options.max_tag_number, self._options.t_emb_dim
                ]),
                                        self.output_tag_cell_fw,
                                        self.output_tag_cell_bw,
                                        tf.constant(
                                            self._options.max_tag_number,
                                            dtype='int64',
                                            shape=[
                                                size,
                                            ]),
                                        name='tagBiRNNOut')

        output_embs = []
        if (self._options.reps[4] or self._options.reps[5]):
            output_embs.append(w_output_embeddings)
        if self._options.reps[6]:
            output_embs.append(c_output_emb)
        if self._options.reps[7] and not self._options.reps[8]:
            output_embs.append(tag_output_emb)
        output_emb = tf.concat(axis=1, values=output_embs)
        shape = tf.shape(output_emb)

        # Highway Layer
        if self._options.output_highway_layers > 0:
            self._output_highway_w = []
            self._output_highway_wg = []
            self._output_highway_b = []
            self._output_highway_bg = []
            for i in range(self._options.output_highway_layers):
                self._output_highway_w.append(
                    tf.get_variable(
                        name='output_highway_w%d' % i,
                        shape=[self._options.hidden_dim] * 2,
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.hidden_dim)))))
                weight_decay = tf.nn.l2_loss(self._output_highway_w[-1])
                tf.add_to_collection('losses', weight_decay)
                self._output_highway_b.append(
                    tf.get_variable(
                        name='output_highway_b%d' % i,
                        shape=[self._options.hidden_dim],
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.hidden_dim)))))
                weight_decay = tf.nn.l2_loss(self._output_highway_b[-1])
                tf.add_to_collection('losses', weight_decay)
                self._output_highway_wg.append(
                    tf.get_variable(
                        name='output_highway_wg%d' % i,
                        shape=[self._options.hidden_dim] * 2,
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.hidden_dim)))))
                weight_decay = tf.nn.l2_loss(self._output_highway_wg[-1])
                tf.add_to_collection('losses', weight_decay)
                self._output_highway_bg.append(
                    tf.get_variable(
                        name='output_highway_bg%d' % i,
                        shape=[self._options.hidden_dim],
                        initializer=tf.truncated_normal_initializer(
                            stddev=1.0 /
                            math.sqrt(float(self._options.hidden_dim)))))
                weight_decay = tf.nn.l2_loss(self._output_highway_bg[-1])
                tf.add_to_collection('losses', weight_decay)
            output_emb = tf.reshape(
                highway(output_emb, self._output_highway_w,
                        self._output_highway_b, self._output_highway_wg,
                        self._output_highway_bg), shape)
        return output_emb