def encoder(self, inputs): '''Encode sentence and return a latent representation in MLE mode.''' with tf.variable_scope("Encoder"): if cfg.enc_bidirect: fcell = self.rnn_cell(cfg.num_layers, cfg.hidden_size, return_states=True) bcell = self.rnn_cell(cfg.num_layers, cfg.hidden_size, return_states=True) outputs, _ = tf.nn.bidirectional_dynamic_rnn( fcell, bcell, inputs, sequence_length=self.lengths, swap_memory=True, dtype=tf.float32) else: cell = self.rnn_cell(cfg.num_layers, cfg.hidden_size, return_states=True) outputs, _ = tf.nn.dynamic_rnn(cell, inputs, swap_memory=True, dtype=tf.float32) outputs = (outputs, ) # to match bidirectional RNN's output format states = [] for out in outputs: output = out[:, :, :cfg.hidden_size] d_states = out[:, :, cfg.hidden_size:] # for GRU, we skipped the last layer states because they're the outputs states.append(tf.concat(2, [d_states, output])) states = tf.concat( 2, states) # concatenated states from fwd and bwd RNNs states = tf.reshape(states, [-1, cfg.hidden_size * len(outputs)]) states = utils.linear(states, cfg.latent_size, True, 0.0, scope='states_transform1') states = utils.highway(states, f=tf.nn.elu) states = utils.linear(states, cfg.latent_size, True, 0.0, scope='states_transform2') states = tf.reshape(states, [cfg.batch_size, -1, cfg.latent_size]) latent = tf.nn.elu(tf.reduce_sum(states, [1])) * 1e-1 z_mean = utils.linear(latent, cfg.latent_size, True, 0.0, scope='Latent_mean') z_logvar = utils.linear(latent, cfg.latent_size, True, 0.0, scope='Latent_logvar') return z_mean, z_logvar
def discriminator_energy(self, states): # FIXME '''An energy-based discriminator that tries to reconstruct the input states.''' with tf.variable_scope("Discriminator"): _, state = tf.nn.dynamic_rnn(self.rnn_cell(cfg.d_num_layers, cfg.hidden_size), states, swap_memory=True, dtype=tf.float32, scope='discriminator_encoder') # XXX use BiRNN+convnet for the encoder # this latent is of size cfg.hidden_size since it needs a lot more capacity than # cfg.latent_size to reproduce the hidden states # TODO use all states instead of just the final state latent = utils.highway(state, layer_size=1) latent = utils.linear(latent, cfg.hidden_size, True, scope='discriminator_latent_transform') # TODO make initial state from latent, don't just use zeros decoder_input = tf.concat( 1, [tf.zeros([2 * cfg.batch_size, 1, cfg.hidden_size]), states]) output, _ = tf.nn.dynamic_rnn(self.rnn_cell( cfg.d_num_layers, cfg.hidden_size, latent), decoder_input, swap_memory=True, dtype=tf.float32, scope='discriminator_decoder') output = tf.reshape(output, [-1, cfg.hidden_size]) reconstructed = utils.linear(output, cfg.hidden_size, True, 0.0, scope='discriminator_reconst') reconstructed = tf.reshape( reconstructed, [2 * cfg.batch_size, -1, cfg.hidden_size]) # don't train this projection, since the model can learn to zero out ret_latent to # minimize the reconstruction error ret_latent = tf.nn.tanh( utils.linear(self.latent, cfg.hidden_size, False, scope='discriminator_ret_latent', train=False)) return reconstructed, ret_latent
def _embed(self): with tf.device('/cpu:0'), tf.variable_scope('word_embedding'): self.word_embeddings = tf.get_variable( 'word_embeddings', shape=(self.vocab.word_size(), self.vocab.word_embed_dim), initializer=tf.constant_initializer( self.vocab.word_embeddings), trainable=False) self.char_embeddings = tf.get_variable( 'char_embeddings', shape=(self.vocab.char_size(), self.vocab.char_embed_dim), initializer=tf.constant_initializer( self.vocab.char_embeddings)) ph_emb = tf.reshape( tf.nn.embedding_lookup(self.char_embeddings, self.ph), [-1, self.max_char_len, self.char_embed_dim]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_embeddings, self.qh), [-1, self.max_char_len, self.char_embed_dim]) ph_emb = tf.nn.dropout(ph_emb, 1.0 - 0.5 * self.dropout) qh_emb = tf.nn.dropout(qh_emb, 1.0 - 0.5 * self.dropout) # Bidaf style conv-highway encoder ph_emb = conv(ph_emb, self.hidden_size, bias=True, activation=tf.nn.relu, kernel_size=3, name="char_conv", reuse=None) qh_emb = conv(qh_emb, self.hidden_size, bias=True, activation=tf.nn.relu, kernel_size=3, name="char_conv", reuse=True) ph_emb = tf.reduce_max(ph_emb, axis=1) qh_emb = tf.reduce_max(qh_emb, axis=1) ph_emb = tf.reshape(ph_emb, [-1, self.max_p_len, ph_emb.shape[-1]]) qh_emb = tf.reshape(qh_emb, [-1, self.max_q_len, qh_emb.shape[-1]]) p_emb = tf.nn.dropout( tf.nn.embedding_lookup(self.word_embeddings, self.p), 1.0 - 0.5 * self.dropout) q_emb = tf.nn.dropout( tf.nn.embedding_lookup(self.word_embeddings, self.q), 1.0 - 0.5 * self.dropout) p_emb = tf.concat([p_emb, ph_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) self.p_emb = highway(p_emb, size=self.hidden_size, scope="highway", dropout=self.dropout, reuse=None) self.q_emb = highway(q_emb, size=self.hidden_size, scope="highway", dropout=self.dropout, reuse=True)
def __init__(self, vocab, training, generator=False): self.vocab = vocab self.training = training self.global_step = tf.get_variable('global_step', shape=[], initializer=tf.zeros_initializer, trainable=False) self.summary_op = None self.summaries = [] with tf.name_scope('input'): # left-aligned data: <sos> w1 w2 ... w_T <eos> <pad...> self.data = tf.placeholder(tf.int32, [cfg.batch_size, None], name='data') # sentences with word dropout self.data_dropped = tf.placeholder(tf.int32, [cfg.batch_size, None], name='data_dropped') # sentence lengths self.lengths = tf.placeholder(tf.int32, [cfg.batch_size], name='lengths') if cfg.use_labels: self.labels = tf.placeholder(tf.int32, [cfg.batch_size], name='labels') embs = self.word_embeddings(self.data) embs_dropped = self.word_embeddings(self.data_dropped, reuse=True) if cfg.use_labels: embs_labels = self.label_embeddings(self.labels) if cfg.use_labels: with tf.name_scope('expand-label-dims'): # Compensate for words being shifted by 1 embs_labels = tf.expand_dims(embs_labels, 1) self.embs_labels = tf.tile(embs_labels, [1, tf.shape(embs)[1], 1]) if cfg.autoencoder: if generator: self.z = tf.placeholder(tf.float32, [cfg.batch_size, cfg.latent_size]) else: with tf.name_scope('concat_words_and_labels'): if cfg.use_labels: embs_words_with_labels = tf.concat( 2, [embs, self.embs_labels]) else: embs_words_with_labels = embs self.z_mean, z_logvar = self.encoder(embs_words_with_labels) if cfg.variational: with tf.name_scope('reparameterize'): eps = tf.truncated_normal( [cfg.batch_size, cfg.latent_size]) self.z = self.z_mean + tf.mul( tf.sqrt(tf.exp(z_logvar)), eps) else: self.z = self.z_mean with tf.name_scope('transform-z'): z = utils.highway(self.z, f=tf.nn.elu, bias=0, scope='transform_z_hw') self.z_transformed = utils.linear(z, cfg.latent_size, True, scope='transform_z_lin') else: z = tf.zeros([cfg.batch_size, 1]) with tf.name_scope('concat_words-labels-z'): # Concatenate dropped word embeddings, label embeddingd and 'z' concat_list = [] if cfg.decoder_inputs: concat_list.append(embs_dropped) else: concat_list.append( tf.zeros([cfg.batch_size, tf.shape(embs_dropped)[1], 1])) if cfg.autoencoder: zt = tf.expand_dims(self.z_transformed, 1) zt = tf.tile(zt, [1, tf.shape(embs_dropped)[1], 1]) concat_list.append(zt) if cfg.use_labels: concat_list.append(self.embs_labels) decode_embs = tf.concat(2, concat_list) output = self.decoder(decode_embs, z) if cfg.autoencoder and cfg.mutual_info: mask = tf.expand_dims( tf.cast(tf.greater(self.data, 0), tf.float32), -1) if cfg.use_labels: pencoder_embs = tf.concat(2, [mask, self.embs_labels]) else: pencoder_embs = mask zo_mean, zo_logvar = self.output_encoder(pencoder_embs, output) # shift left the input to get the targets with tf.name_scope('left-shift'): targets = tf.concat( 1, [self.data[:, 1:], tf.zeros([cfg.batch_size, 1], tf.int32)]) with tf.name_scope('mle-cost'): nll_per_word = self.mle_loss(output, targets) avg_lengths = tf.cast(tf.reduce_mean(self.lengths), tf.float32) self.nll = tf.reduce_sum(nll_per_word) / cfg.batch_size self.perplexity = tf.exp(self.nll / avg_lengths) self.summaries.append( tf.scalar_summary('perplexity', self.perplexity)) self.summaries.append(tf.scalar_summary('cost_mle', self.nll)) with tf.name_scope('kld-cost'): if not cfg.autoencoder or not cfg.variational or generator: self.kld = tf.zeros([]) else: self.kld = tf.reduce_sum(self.kld_loss(self.z_mean, z_logvar)) / \ cfg.batch_size self.summaries.append( tf.scalar_summary('cost_kld', tf.reduce_mean(self.kld))) if np.isclose(cfg.anneal_bias, 0): self.kld_weight = tf.constant(cfg.anneal_max) else: self.kld_weight = cfg.anneal_max * tf.sigmoid( (10 / cfg.anneal_bias) * (self.global_step - (cfg.anneal_bias / 2))) self.summaries.append( tf.scalar_summary('weight_kld', self.kld_weight)) with tf.name_scope('mutinfo-cost'): if not cfg.autoencoder or not cfg.mutual_info: self.mutinfo = tf.zeros([]) else: self.mutinfo = tf.reduce_sum(self.mutinfo_loss(self.z, zo_mean, zo_logvar)) / \ cfg.batch_size self.summaries.append( tf.scalar_summary('cost_mutinfo', tf.reduce_mean(self.mutinfo))) with tf.name_scope('cost'): self.cost = self.nll + (self.kld_weight * (self.kld + (cfg.mutinfo_weight * \ self.mutinfo))) if training and not generator: self.train_op = self.train(self.cost) else: self.train_op = tf.no_op()
def encoder(self, inputs, scope=None): '''Encode sentence and return a latent representation.''' with tf.variable_scope(scope or "Encoder"): if cfg.convolutional: out = inputs widths = [int(i) for i in cfg.conv_width.split(',')] for i, width in enumerate(widths): out = utils.conv1d(out, cfg.hidden_size, width, 1, 'VALID', scope='conv%d' % i) out = tf.contrib.layers.batch_norm( inputs=out, is_training=self.training, scope='bn%d' % i) if i < len(widths) - 1: out = tf.nn.elu(out) z = tf.reduce_max(out, 1) else: if cfg.encoder_birnn: outputs, fs = tf.nn.bidirectional_dynamic_rnn( self.rnn_cell(cfg.num_layers, cfg.hidden_size // 2), self.rnn_cell(cfg.num_layers, cfg.hidden_size // 2), inputs, sequence_length=self.lengths, swap_memory=True, dtype=tf.float32) outputs = tf.concat(2, outputs) fs = tf.concat(1, fs[0] + fs[1]) # last states of fwd and bkwd else: if cfg.encoder_summary == 'laststate': inputs = tf.reverse_sequence(inputs, self.lengths, 1) outputs, fs = tf.nn.dynamic_rnn( self.rnn_cell(cfg.num_layers), inputs, sequence_length=self.lengths, swap_memory=True, dtype=tf.float32) fs = tf.concat(1, fs) if cfg.encoder_summary == 'laststate': fs = utils.highway(fs, scope='encoder_output_highway') z = tf.nn.tanh( utils.linear(fs, cfg.latent_size, True, scope='outputs_transform')) else: outputs = tf.reshape(outputs, [-1, cfg.hidden_size]) outputs = utils.highway(outputs, scope='encoder_output_highway') if cfg.encoder_summary == 'attention': flat_input = tf.reshape( inputs, [-1, inputs.get_shape()[2].value]) weights = utils.linear(tf.concat( 1, [flat_input, outputs]), cfg.hidden_size, True, scope='outputs_attention') outputs = tf.reshape( outputs, [cfg.batch_size, -1, cfg.hidden_size]) weights = tf.reshape( weights, [cfg.batch_size, -1, cfg.hidden_size]) weights = tf.nn.softmax(weights, 1) z = tf.reduce_sum(outputs * weights, [1]) z = tf.nn.tanh( utils.linear(z, cfg.latent_size, True, scope='outputs_transform')) elif cfg.encoder_summary == 'mean': outputs = utils.linear(outputs, cfg.latent_size, True, scope='outputs_transform') outputs = tf.reshape( outputs, [cfg.batch_size, -1, cfg.latent_size]) z = tf.nn.tanh(tf.reduce_mean(outputs, [1])) else: raise ValueError( 'Invalid encoder_summary configuration.') z_mean = utils.linear(z, cfg.latent_size, True, scope='encoder_z_mean') z_logvar = utils.linear(z, cfg.latent_size, True, scope='encoder_z_logvar') return z_mean, z_logvar
def decoder(self, inputs, mle_mode, reuse=None): '''Use the latent representation and word inputs to predict next words.''' with tf.variable_scope("Decoder", reuse=reuse): latent = utils.highway(self.latent, layer_size=2, f=tf.nn.elu) latent = utils.linear(latent, cfg.latent_size, True, 0.0, scope='Latent_transform') self.latent_transformed = latent initial = [] for i in range(cfg.num_layers): preact = utils.linear(latent, cfg.hidden_size, True, 0.0, scope='Latent_initial%d' % i) act = tf.nn.tanh(preact) initial.append(tf.concat(1, [act, preact])) if mle_mode: inputs = tf.concat(2, [ inputs, tf.tile(tf.expand_dims(latent, 1), tf.pack([1, tf.shape(inputs)[1], 1])) ]) cell = self.rnn_cell(cfg.num_layers, cfg.hidden_size, return_states=True, pretanh=True) self.decode_cell = cell else: cell = self.rnn_cell(cfg.num_layers, cfg.hidden_size, latent, self.embedding, self.softmax_w, self.softmax_b, return_states=True, pretanh=True, get_embeddings=cfg.concat_inputs) initial_state = cell.initial_state(initial) if mle_mode: self.decode_initial = initial_state outputs, _ = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state, swap_memory=True, dtype=tf.float32) output = outputs[:, :, :cfg.hidden_size] if mle_mode: generated = None skip = 0 else: words = tf.squeeze( tf.cast( outputs[:, :-1, cfg.hidden_size:cfg.hidden_size + 1], tf.int32), [-1]) generated = tf.stop_gradient( tf.concat(1, [ words, tf.constant(self.vocab.eos_index, shape=[cfg.batch_size, 1]) ])) skip = 1 if cfg.concat_inputs: embeddings = outputs[:, :, cfg.hidden_size + 1:cfg.hidden_size + 1 + cfg.emb_size] embeddings = tf.concat( 1, [inputs[:, :1, :], embeddings[:, :-1, :]]) embeddings = tf.concat(2, [ embeddings, tf.tile(tf.expand_dims(latent, 1), tf.pack([1, tf.shape(embeddings)[1], 1])) ]) skip += cfg.emb_size states = outputs[:, :, cfg.hidden_size + skip:] if cfg.concat_inputs: if mle_mode: states = tf.concat(2, [states, inputs]) else: states = tf.concat(2, [states, embeddings]) return output, states, generated
def process_seq(self): # Getting input embeddings from inputs if self._options.reps[0]: self._examples = tf.cast( tf.verify_tensor_all_finite(tf.cast(self._examples, 'float32'), 'Nan'), 'int64') self._wordemb = tf.get_variable( name='wordemb', shape=[self._options.vocab_size, self._options.w_emb_dim], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.vocab_size)))) w_embeddings = tf.nn.embedding_lookup( self._wordemb, tf.reshape(self._examples, [-1])) w_input_emb = tf.reshape(w_embeddings, [ self._options.batch_size, self._options.max_seq_length, self._options.w_emb_dim ]) mask = sequence_mask(self._examples) if self._options.reps[1]: mask, mask_c = sequence_mask(self._examplesChar, char=True) if self._options.positionEmbeddings: self._charemb_trunc = tf.get_variable( name='charemb', shape=[ self._options.char_vocab_size - 1, self._options.max_word_length, self._options.c_emb_dim ], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.char_vocab_size)))) self._char_pad = tf.constant(0., shape=[ 1, self._options.max_word_length, self._options.c_emb_dim ]) self._charemb = tf.concat( 0, [self._char_pad, self._charemb_trunc]) position_indexes = tf.cast( tf.range(self._options.max_word_length), 'int64') indexes = tf.reshape( self._examplesChar + self._options.char_vocab_size * position_indexes, [-1]) c_embeddings = tf.gather( tf.reshape(self._charemb, [ self._options.char_vocab_size * self._options.max_word_length, self._options.c_emb_dim ]), indexes) else: self._charemb_trunc = tf.get_variable( name='charemb', shape=[ self._options.char_vocab_size - 1, self._options.c_emb_dim ], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.char_vocab_size)))) self._char_pad = tf.constant( 0., shape=[1, self._options.c_emb_dim]) self._charemb = tf.concat( 0, [self._char_pad, self._charemb_trunc]) c_embeddings = tf.nn.embedding_lookup( self._charemb, tf.reshape(self._examplesChar, [-1])) if self._options.charLayer == "conv": self._convfilters = [] for w, d in zip(self._options.window_sizes, self._options.filter_dims): self._convfilters.append( tf.get_variable( name='filter%d' % w, shape=[w, self._options.c_emb_dim, d], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(w * self._options.c_emb_dim))))) weight_decay = tf.nn.l2_loss(self._convfilters[-1]) tf.add_to_collection('losses', weight_decay) c_input_emb = tf.reshape( CE( tf.reshape(c_embeddings, [ self._options.batch_size * self._options.max_seq_length, self._options.max_word_length, self._options.c_emb_dim ]), self._convfilters), [ self._options.batch_size, self._options.max_seq_length, -1 ]) elif self._options.charLayer == "LSTM": self.char_cell_fw = tf.nn.rnn_cell.LSTMCell( self._options.charLSTM_dim, state_is_tuple=False, activation=tf.nn.relu) self.char_cell_bw = tf.nn.rnn_cell.LSTMCell( self._options.charLSTM_dim, state_is_tuple=False, activation=tf.nn.relu) c_input_emb = tf.reshape( CE_RNN( tf.reshape(c_embeddings, [ self._options.batch_size * self._options.max_seq_length, self._options.max_word_length, self._options.c_emb_dim ]), self.char_cell_fw, self.char_cell_bw, tf.reshape(tf.reduce_sum(mask_c, 2), [-1])), [ self._options.batch_size, self._options.max_seq_length, self._options.charLSTM_dim * 2 ]) else: c_input_emb = tf.reshape(c_embeddings, [ self._options.batch_size, self._options.max_seq_length, -1 ]) if self._options.reps[0] and not self._options.reps[1]: input_emb = w_input_emb elif self._options.reps[1] and not self._options.reps[0]: input_emb = c_input_emb elif self._options.reps[0] and self._options.reps[1]: input_emb = tf.concat(2, [w_input_emb, c_input_emb]) input_emb = tf.verify_tensor_all_finite(input_emb, 'Nan') # Batch normalization if self._options.batch_norm: self.batch_normalizer = batch_norm() input_emb = self.batch_normalizer(input_emb, self._training) input_emb = tf.verify_tensor_all_finite(input_emb, 'Nan') # Highway Layer if self._options.highway_layers > 0: self._highway_w = [] self._highway_wg = [] self._highway_b = [] self._highway_bg = [] for i in range(self._options.highway_layers): self._highway_w.append( tf.get_variable( name='highway_w%d' % i, shape=[self._options.emb_dim] * 2, initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.emb_dim))))) weight_decay = tf.nn.l2_loss(self._highway_w[-1]) tf.add_to_collection('losses', weight_decay) self._highway_b.append( tf.get_variable( name='highway_b%d' % i, shape=[self._options.emb_dim], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.emb_dim))))) weight_decay = tf.nn.l2_loss(self._highway_b[-1]) tf.add_to_collection('losses', weight_decay) self._highway_wg.append( tf.get_variable( name='highway_wg%d' % i, shape=[self._options.emb_dim] * 2, initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.emb_dim))))) weight_decay = tf.nn.l2_loss(self._highway_wg[-1]) tf.add_to_collection('losses', weight_decay) self._highway_bg.append( tf.get_variable( name='highway_bg%d' % i, shape=[self._options.emb_dim], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.emb_dim))))) weight_decay = tf.nn.l2_loss(self._highway_bg[-1]) tf.add_to_collection('losses', weight_decay) input_emb = tf.reshape( highway(tf.reshape(input_emb, [-1, self._options.emb_dim]), self._highway_w, self._highway_b, self._highway_wg, self._highway_bg), [ self._options.batch_size, self._options.max_seq_length, self._options.emb_dim ]) input_emb = tf.verify_tensor_all_finite(input_emb, 'Nan') # LSTM self.cell = tf.nn.rnn_cell.LSTMCell(self._options.hidden_dim, state_is_tuple=False, activation=tf.nn.relu) if self._training and self._options.dropout < 1.0: self.cell = tf.nn.rnn_cell.DropoutWrapper( self.cell, output_keep_prob=self._options.dropout) if self._options.hidden_layers > 1: self.cell = tf.nn.rnn_cell.MultiRNNCell( [self.cell] * self._options.hidden_layers) hidden, _ = tf.nn.dynamic_rnn(self.cell, input_emb, sequence_length=tf.reduce_sum(mask, 1), dtype='float32') hidden = tf.verify_tensor_all_finite(hidden, 'Nan') print(hidden.get_shape()) return mask, hidden
def process_output_seq(self, allVoc=True): if self._options.reps[2]: self._output_wordemb = tf.get_variable( name="output_wordemb", shape=[self._options.vocab_size, self._options.w_emb_out_dim], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.w_emb_out_dim)))) if allVoc: w_output_embeddings = tf.nn.embedding_lookup( self._output_wordemb, restrict_voc(tf.range(self._options.eval_vocab_size), self._options.vocab_size)) else: w_output_embeddings = tf.nn.embedding_lookup( self._output_wordemb, tf.reshape(self._labels, [-1])) if self._options.reps[3]: if self._options.reps[1] and self._options.reuse_character_layer: if allVoc: if self._options.positionEmbeddings: position_indexes = tf.cast( tf.range(self._options.max_word_length), 'int64') indexes = tf.reshape( tf.pack(self._options.train_set.wid_to_charid) + self._options.char_vocab_size * position_indexes, [-1]) c_output_embeddings = tf.gather( tf.reshape(self._charemb, [ self._options.char_vocab_size * self._options.max_word_length, self._options.c_emb_dim ]), indexes) else: c_output_embeddings = tf.nn.embedding_lookup( self._charemb, tf.reshape( tf.pack(self._options.train_set.wid_to_charid), [-1])) if self._options.charLayer == 'conv': c_output_emb = CE( tf.reshape(c_output_embeddings, [ self._options.eval_vocab_size, self._options.max_word_length, self._options.c_emb_dim ]), self._convfilters) elif self._options.charLayer == 'LSTM': mask_c = tf.sign( tf.pack(self._options.train_set.wid_to_charid)) c_output_emb = CE_RNN(tf.reshape( c_output_embeddings, [ self._options.eval_vocab_size, self._options.max_word_length, self._options.c_emb_dim ]), self.char_cell_fw, self.char_cell_bw, tf.reduce_sum(mask_c, 1), name='biRNNOut') else: c_output_emb = tf.reshape( c_output_embeddings, [self._options.eval_vocab_size, -1]) else: if self._options.positionEmbeddings: position_indexes = tf.cast( tf.range(self._options.max_word_length), 'int64') indexes = tf.reshape( self._labelsChar + self._options.char_vocab_size * position_indexes, [-1]) c_output_embeddings = tf.gather( tf.reshape(self._charemb, [ self._options.char_vocab_size * self._options.max_word_length, self._options.c_emb_dim ]), indexes) else: c_output_embeddings = tf.nn.embedding_lookup( self._charemb, tf.reshape(self._labelsChar, [-1])) if self._options.charLayer == 'conv': c_output_emb = CE( tf.reshape(c_output_embeddings, [ self._options.batch_size * self._options.max_seq_length + self._options.noise_length, self._options.max_word_length, self._options.c_emb_dim ]), self._convfilters) elif self._options.charLayer == 'LSTM': mask_c = tf.sign(self._labelsChar) c_output_emb = CE_RNN(tf.reshape( c_output_embeddings, [ self._options.batch_size * self._options.max_seq_length + self._options.noise_length, self._options.max_word_length, self._options.c_emb_dim ]), self.char_cell_fw, self.char_cell_bw, tf.reduce_sum(mask_c, 1), name='biRNNOut') else: c_output_emb = tf.reshape(c_output_embeddings, [ self._options.batch_size * self._options.max_seq_length + self._options.noise_length, -1 ]) else: if self._options.positionEmbeddings: self._output_charemb_trunc = tf.get_variable( name='output_charemb', shape=[ self._options.char_vocab_size - 1, self._options.max_word_length, self._options.c_emb_dim ], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.char_vocab_size)))) self._char_pad_out = tf.constant( 0., shape=[ 1, self._options.max_word_length, self._options.c_emb_dim ]) self._output_charemb = tf.concat( 0, [self._char_pad_out, self._output_charemb_trunc]) position_indexes = tf.cast( tf.range(self._options.max_word_length), 'int64') if allVoc: indexes = tf.reshape( tf.pack(self._options.train_set.wid_to_charid) + self._options.char_vocab_size * position_indexes, [-1]) c_output_embeddings = tf.gather( tf.reshape(self._output_charemb, [ self._options.char_vocab_size * self._options.max_word_length, self._options.c_emb_dim ]), indexes) else: indexes = tf.reshape( self._labelsChar + self._options.char_vocab_size * position_indexes, [-1]) c_output_embeddings = tf.gather( tf.reshape(self._output_charemb, [ self._options.char_vocab_size * self._options.max_word_length, self._options.c_emb_dim ]), indexes) else: self._output_charemb_trunc = tf.get_variable( name='output_charemb', shape=[ self._options.char_vocab_size - 1, self._options.c_emb_dim ], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.char_vocab_size)))) self._char_pad_out = tf.constant( 0., shape=[1, self._options.c_emb_dim]) self._output_charemb = tf.concat( 0, [self._char_pad_out, self._output_charemb_trunc]) if allVoc: c_output_embeddings = tf.nn.embedding_lookup( self._output_charemb, tf.reshape( tf.pack(self._options.train_set.wid_to_charid), [-1])) else: c_output_embeddings = tf.nn.embedding_lookup( self._output_charemb, tf.reshape(self._labelsChar, [-1])) if self._options.charLayer == 'conv': self._output_convfilters = [] for w, d in zip(self._options.window_sizes, self._options.filter_dims): self._output_convfilters.append( tf.get_variable( name='output_filter%d' % w, shape=[w, self._options.c_emb_dim, d], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt( float(w * self._options.c_emb_dim))))) if allVoc: c_output_emb = CE( tf.reshape(c_output_embeddings, [ self._options.eval_vocab_size, self._options.max_word_length, self._options.c_emb_dim ]), self._output_convfilters) else: c_output_emb = CE( tf.reshape(c_output_embeddings, [ self._options.batch_size * self._options.max_seq_length + self._options.noise_length, self._options.max_word_length, self._options.c_emb_dim ]), self._output_convfilters) elif self._options.charLayer == 'LSTM': self.output_char_cell_fw = tf.nn.rnn_cell.LSTMCell( self._options.charLSTM_dim, state_is_tuple=False, activation=tf.nn.relu) self.output_char_cell_bw = tf.nn.rnn_cell.LSTMCell( self._options.charLSTM_dim, state_is_tuple=False, activation=tf.nn.relu) if allVoc: mask_c = tf.sign( tf.pack(self._options.train_set.wid_to_charid)) c_output_emb = CE_RNN(tf.reshape( c_output_embeddings, [ self._options.eval_vocab_size, self._options.max_word_length, self._options.c_emb_dim ]), self.output_char_cell_fw, self.output_char_cell_bw, tf.reduce_sum(mask_c, 1), name='biRNNOut') else: mask_c = tf.sign(self._labelsChar) c_output_emb = CE_RNN(tf.reshape( c_output_embeddings, [ self._options.batch_size * self._options.max_seq_length + self._options.noise_length, self._options.max_word_length, self._options.c_emb_dim ]), self.output_char_cell_fw, self.output_char_cell_bw, tf.reduce_sum(mask_c, 1), name='biRNNOut') else: if allVoc: c_output_emb = tf.reshape( c_output_embeddings, [self._options.eval_vocab_size, -1]) else: c_output_emb = tf.reshape(c_output_embeddings, [ self._options.batch_size * self._options.max_seq_length + self._options.noise_length, -1 ]) if self._options.reps[2] and not self._options.reps[3]: output_emb = w_output_embeddings elif self._options.reps[3] and not self._options.reps[2]: output_emb = c_output_emb elif self._options.reps[2] and self._options.reps[ 3] and self._options.wordCharGate: self._gateWeight = tf.get_variable( name='wg', shape=[2 * self._options.hidden_dim, self._options.hidden_dim], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.hidden_dim)))) weight_decay = tf.nn.l2_loss(self._gateWeight) tf.add_to_collection('losses', weight_decay) self._gateBias = tf.get_variable( name='bg', shape=[self._options.hidden_dim], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.hidden_dim)))) weight_decay = tf.nn.l2_loss(self._gateBias) tf.add_to_collection('losses', weight_decay) output_emb = word_char_gate(w_output_embeddings, c_output_emb, self._gateWeight, self._gateBias) elif self._options.reps[2] and self._options.reps[3]: print(w_output_embeddings.get_shape()) print(c_output_emb.get_shape()) output_emb = tf.concat(1, [w_output_embeddings, c_output_emb]) shape = tf.shape(output_emb) # Highway Layer if self._options.output_highway_layers > 0: self._output_highway_w = [] self._output_highway_wg = [] self._output_highway_b = [] self._output_highway_bg = [] for i in range(self._options.output_highway_layers): self._output_highway_w.append( tf.get_variable( name='output_highway_w%d' % i, shape=[self._options.hidden_dim] * 2, initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.hidden_dim))))) weight_decay = tf.nn.l2_loss(self._output_highway_w[-1]) tf.add_to_collection('losses', weight_decay) self._output_highway_b.append( tf.get_variable( name='output_highway_b%d' % i, shape=[self._options.hidden_dim], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.hidden_dim))))) weight_decay = tf.nn.l2_loss(self._output_highway_b[-1]) tf.add_to_collection('losses', weight_decay) self._output_highway_wg.append( tf.get_variable( name='output_highway_wg%d' % i, shape=[self._options.hidden_dim] * 2, initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.hidden_dim))))) weight_decay = tf.nn.l2_loss(self._output_highway_wg[-1]) tf.add_to_collection('losses', weight_decay) self._output_highway_bg.append( tf.get_variable( name='output_highway_bg%d' % i, shape=[self._options.hidden_dim], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.hidden_dim))))) weight_decay = tf.nn.l2_loss(self._output_highway_bg[-1]) tf.add_to_collection('losses', weight_decay) output_emb = tf.reshape( highway(output_emb, self._output_highway_w, self._output_highway_b, self._output_highway_wg, self._output_highway_bg), shape) return output_emb
def _embed(self): """ The embedding layer, question and passage share embeddings """ with tf.variable_scope('embeddings'): self.word_embeddings = tf.get_variable( 'word_embeddings', shape=(self.vocab.word_size(), self.vocab.word_embed_dim), initializer=tf.constant_initializer( self.vocab.word_embeddings), trainable=False) self.char_embeddings = tf.get_variable( 'char_embeddings', shape=(self.vocab.char_size(), self.vocab.char_embed_dim), initializer=tf.constant_initializer( self.vocab.char_embeddings)) ph_emb = tf.reshape( tf.nn.embedding_lookup(self.char_embeddings, self.ph), [-1, self.max_char_len, self.char_embed_dim]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_embeddings, self.qh), [-1, self.max_char_len, self.char_embed_dim]) ph_emb = tf.nn.dropout(ph_emb, 1.0 - 0.5 * self.dropout) qh_emb = tf.nn.dropout(qh_emb, 1.0 - 0.5 * self.dropout) # Bidaf style conv - highway encoder ph_emb = conv(ph_emb, self.hidden_size, bias=True, activation=tf.nn.relu, kernel_size=3, name="char_conv", reuse=None) qh_emb = conv(qh_emb, self.hidden_size, bias=True, activation=tf.nn.relu, kernel_size=3, name="char_conv", reuse=True) ph_emb = tf.reduce_max(ph_emb, axis=1) qh_emb = tf.reduce_max(qh_emb, axis=1) ph_emb = tf.reshape(ph_emb, [-1, self.max_p_len, ph_emb.shape[-1]]) qh_emb = tf.reshape(qh_emb, [-1, self.max_q_len, qh_emb.shape[-1]]) # self.py_embeddings = tf.get_variable('py_embeddings', # shape=(self.vocab.py_size( # ), self.vocab.py_embed_dim), # initializer=tf.constant_initializer( # self.vocab.py_embeddings)) # ppy_emb = tf.reshape(tf.nn.embedding_lookup( # self.py_embeddings, self.ppy), [-1, self.max_py_len, self.py_embed_dim]) # qpy_emb = tf.reshape(tf.nn.embedding_lookup( # self.py_embeddings, self.qpy), [-1, self.max_py_len, self.py_embed_dim]) # ppy_emb = tf.nn.dropout(ppy_emb, 1.0 - 0.5 * self.dropout) # qpy_emb = tf.nn.dropout(qpy_emb, 1.0 - 0.5 * self.dropout) # # Bidaf style conv-highway encoder # ppy_emb = conv(ppy_emb, self.hidden_size, # bias=True, activation=tf.nn.relu, kernel_size=3, name="char_conv", reuse=None) # qpy_emb = conv(qpy_emb, self.hidden_size, # bias=True, activation=tf.nn.relu, kernel_size=3, name="char_conv", reuse=True) # ppy_emb = tf.reduce_max(ppy_emb, axis=1) # qpy_emb = tf.reduce_max(qpy_emb, axis=1) # ppy_emb = tf.reshape( # ppy_emb, [-1, self.max_p_len, ppy_emb.shape[-1]]) # qpy_emb = tf.reshape( # qpy_emb, [-1, self.max_q_len, qpy_emb.shape[-1]]) p_emb = tf.nn.dropout( tf.nn.embedding_lookup(self.word_embeddings, self.p), 1.0 - 0.5 * self.dropout) q_emb = tf.nn.dropout( tf.nn.embedding_lookup(self.word_embeddings, self.q), 1.0 - 0.5 * self.dropout) # p_emb = tf.concat([p_emb, ppy_emb], axis=2) # q_emb = tf.concat([q_emb, qpy_emb], axis=2) p_emb = tf.concat([p_emb, ph_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) self.p_emb = highway(p_emb, size=self.hidden_size, scope="highway", dropout=self.dropout, reuse=None) self.q_emb = highway(q_emb, size=self.hidden_size, scope="highway", dropout=self.dropout, reuse=True)
def process_seq(self): # Getting input embeddings from inputs # Mots ou lemmes if self._options.reps[0] or self._options.reps[1]: self._wordemb = tf.get_variable( name='wordemb', shape=[ self._options.input_vocab_size, self._options.w_emb_dim ], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.input_vocab_size)))) w_embeddings = tf.nn.embedding_lookup( self._wordemb, tf.reshape(self._examples, [-1])) w_input_emb = tf.reshape( w_embeddings, [-1, self._options.max_seq_length, self._options.w_emb_dim]) mask = sequence_mask(self._examples) # Tags if self._options.reps[3]: self._tagEmbs = [] tag_input_embs = [] for i in range(self._options.max_tag_number): self._tagEmbs.append( tf.get_variable( name="tagEmbs%d" % i, shape=[ self._options.tags_vocab_size[i], self._options.t_emb_dim ], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.t_emb_dim))))) tag_embeddings = tf.nn.embedding_lookup( self._tagEmbs[i], tf.reshape(self._examplesTags[:, :, i], [-1])) tag_input_embs.append( tf.reshape(tag_embeddings, [ -1, self._options.max_seq_length, self._options.t_emb_dim ])) if self._options.tagLayer == "LSTM": tag_embeddings = tf.concat(axis=2, values=tag_input_embs) self.tag_cell_fw = tf.contrib.rnn.LSTMCell( self._options.tagLSTM_dim, state_is_tuple=False, activation=tf.nn.relu) self.tag_cell_bw = tf.contrib.rnn.LSTMCell( self._options.tagLSTM_dim, state_is_tuple=False, activation=tf.nn.relu) if self._training and self._options.dropout < 1.0: self.tag_cell_fw = tf.contrib.rnn.DropoutWrapper( self.tag_cell_fw, output_keep_prob=self._options.dropout) self.tag_cell_bw = tf.contrib.rnn.DropoutWrapper( self.tag_cell_bw, output_keep_prob=self._options.dropout) size = int(self._training) * self._options.batch_size + int( not self._training) * 32 tag_input_emb = tf.reshape( CE_RNN( tf.reshape(tag_embeddings, [ -1, self._options.max_tag_number, self._options.t_emb_dim ]), self.tag_cell_fw, self.tag_cell_bw, tf.constant(self._options.max_tag_number, dtype='int64', shape=[ size * self._options.max_seq_length, ])), [ -1, self._options.max_seq_length, self._options.charLSTM_dim * 2 ]) else: tag_input_emb = tf.concat(axis=2, values=tag_input_embs) # Caracteres if self._options.reps[2]: mask, mask_c = sequence_mask(self._examplesChar, char=True) self._charemb_trunc = tf.get_variable( name='charemb', shape=[ self._options.char_vocab_size - 1, self._options.c_emb_dim ], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.char_vocab_size)))) self._char_pad = tf.constant(0., shape=[1, self._options.c_emb_dim]) self._charemb = tf.concat( axis=0, values=[self._char_pad, self._charemb_trunc]) c_embeddings = tf.nn.embedding_lookup( self._charemb, tf.reshape(self._examplesChar, [-1])) if self._options.charLayer == "conv": self._convfilters = [] for w, d in zip(self._options.window_sizes, self._options.filter_dims): self._convfilters.append( tf.get_variable( name='filter%d' % w, shape=[w, self._options.c_emb_dim, d], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(w * self._options.c_emb_dim))))) weight_decay = tf.nn.l2_loss(self._convfilters[-1]) tf.add_to_collection('losses', weight_decay) c_input_emb = tf.reshape( CE( tf.reshape(c_embeddings, [ -1, self._options.max_word_length, self._options.c_emb_dim ]), self._convfilters), [ -1, self._options.max_seq_length, self._options.char_emb_dim ]) elif self._options.charLayer == "LSTM": self.char_cell_fw = tf.contrib.rnn.LSTMCell( self._options.charLSTM_dim, state_is_tuple=False, activation=tf.nn.relu) self.char_cell_bw = tf.contrib.rnn.LSTMCell( self._options.charLSTM_dim, state_is_tuple=False, activation=tf.nn.relu) if self._training and self._options.dropout < 1.0: self.char_cell_fw = tf.contrib.rnn.DropoutWrapper( self.char_cell_fw, output_keep_prob=self._options.dropout) self.char_cell_bw = tf.contrib.rnn.DropoutWrapper( self.char_cell_bw, output_keep_prob=self._options.dropout) c_input_emb = tf.reshape( CE_RNN( tf.reshape(c_embeddings, [ -1, self._options.max_word_length, self._options.c_emb_dim ]), self.char_cell_fw, self.char_cell_bw, tf.reshape(tf.reduce_sum(mask_c, 2), [-1])), [ -1, self._options.max_seq_length, self._options.charLSTM_dim * 2 ]) else: c_input_emb = tf.reshape(c_embeddings, [ -1, self._options.max_seq_length, self._options.char_emb_dim ]) embs = [] if (self._options.reps[0] or self._options.reps[1]): embs.append(w_input_emb) if self._options.reps[2]: embs.append(c_input_emb) if self._options.reps[3]: embs.append(tag_input_emb) input_emb = tf.concat(axis=2, values=embs) # Batch normalization if self._options.batch_norm: self.batch_normalizer = batch_norm() input_emb = self.batch_normalizer(input_emb, self._training) # Highway Layer if self._options.highway_layers > 0: self._highway_w = [] self._highway_wg = [] self._highway_b = [] self._highway_bg = [] for i in range(self._options.highway_layers): self._highway_w.append( tf.get_variable( name='highway_w%d' % i, shape=[self._options.emb_dim] * 2, initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.emb_dim))))) weight_decay = tf.nn.l2_loss(self._highway_w[-1]) tf.add_to_collection('losses', weight_decay) self._highway_b.append( tf.get_variable( name='highway_b%d' % i, shape=[self._options.emb_dim], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.emb_dim))))) weight_decay = tf.nn.l2_loss(self._highway_b[-1]) tf.add_to_collection('losses', weight_decay) self._highway_wg.append( tf.get_variable( name='highway_wg%d' % i, shape=[self._options.emb_dim] * 2, initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.emb_dim))))) weight_decay = tf.nn.l2_loss(self._highway_wg[-1]) tf.add_to_collection('losses', weight_decay) self._highway_bg.append( tf.get_variable( name='highway_bg%d' % i, shape=[self._options.emb_dim], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.emb_dim))))) weight_decay = tf.nn.l2_loss(self._highway_bg[-1]) tf.add_to_collection('losses', weight_decay) input_emb = tf.reshape( highway(tf.reshape(input_emb, [-1, self._options.emb_dim]), self._highway_w, self._highway_b, self._highway_wg, self._highway_bg), [-1, self._options.max_seq_length, self._options.emb_dim]) # LSTM self.cell = tf.contrib.rnn.LSTMCell(self._options.hidden_dim, state_is_tuple=False, activation=tf.nn.relu) if self._training and self._options.dropout < 1.0: self.cell = tf.contrib.rnn.DropoutWrapper( self.cell, output_keep_prob=self._options.dropout) if self._options.hidden_layers > 1: self.cell = tf.contrib.rnn.MultiRNNCell( [self.cell] * self._options.hidden_layers) hidden, _ = tf.nn.dynamic_rnn(self.cell, input_emb, sequence_length=tf.reduce_sum(mask, 1), dtype='float32') print(hidden.get_shape()) return mask, hidden
def process_output_seq(self, allVoc=True): # Mots if self._options.reps[4] or self._options.reps[5]: self._output_wordemb = tf.get_variable( name="output_wordemb", shape=[ self._options.output_vocab_size, self._options.w_emb_out_dim ], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.w_emb_out_dim)))) if allVoc: w_output_embeddings = tf.nn.embedding_lookup( self._output_wordemb, restrict_voc_map(tf.range(self._options.eval_vocab_size), self._options.eval_word_map)) else: w_output_embeddings = tf.nn.embedding_lookup( self._output_wordemb, tf.reshape(self._labels, [-1])) # Caracteres if self._options.reps[6]: if self._options.reps[2] and self._options.reuse_character_layer: if allVoc: c_output_embeddings = tf.nn.embedding_lookup( self._charemb, tf.reshape( tf.stack(self._options.train_set.wid_to_charid), [-1])) if self._options.charLayer == 'conv': c_output_emb = CE( tf.reshape(c_output_embeddings, [ self._options.eval_vocab_size, self._options.max_word_length, self._options.c_emb_dim ]), self._convfilters) elif self._options.charLayer == 'LSTM': mask_c = tf.sign( tf.stack(self._options.train_set.wid_to_charid)) c_output_emb = CE_RNN(tf.reshape( c_output_embeddings, [ self._options.eval_vocab_size, self._options.max_word_length, self._options.c_emb_dim ]), self.char_cell_fw, self.char_cell_bw, tf.reduce_sum(mask_c, 1), name='biRNNOut') else: c_output_emb = tf.reshape( c_output_embeddings, [self._options.eval_vocab_size, -1]) else: c_output_embeddings = tf.nn.embedding_lookup( self._charemb, tf.reshape(self._labelsChar, [-1])) if self._options.charLayer == 'conv': c_output_emb = CE( tf.reshape(c_output_embeddings, [ -1, self._options.max_word_length, self._options.c_emb_dim ]), self._convfilters) elif self._options.charLayer == 'LSTM': mask_c = tf.sign(self._labelsChar) c_output_emb = CE_RNN(tf.reshape( c_output_embeddings, [ -1, self._options.max_word_length, self._options.c_emb_dim ]), self.char_cell_fw, self.char_cell_bw, tf.reduce_sum(mask_c, 1), name='biRNNOut') else: c_output_emb = tf.reshape(c_output_embeddings, [ self._options.batch_size * self._options.max_seq_length + self._options.noise_length, -1 ]) else: self._output_charemb_trunc = tf.get_variable( name='output_charemb', shape=[ self._options.char_vocab_size - 1, self._options.c_emb_dim ], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.char_vocab_size)))) self._char_pad_out = tf.constant( 0., shape=[1, self._options.c_emb_dim]) self._output_charemb = tf.concat( axis=0, values=[self._char_pad_out, self._output_charemb_trunc]) if allVoc: c_output_embeddings = tf.nn.embedding_lookup( self._output_charemb, tf.reshape( tf.stack(self._options.train_set.wid_to_charid), [-1])) else: c_output_embeddings = tf.nn.embedding_lookup( self._output_charemb, tf.reshape(self._labelsChar, [-1])) if self._options.charLayer == 'conv': self._output_convfilters = [] for w, d in zip(self._options.window_sizes, self._options.filter_dims): self._output_convfilters.append( tf.get_variable( name='output_filter%d' % w, shape=[w, self._options.c_emb_dim, d], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt( float(w * self._options.c_emb_dim))))) weight_decay = tf.nn.l2_loss( self._output_convfilters[-1]) tf.add_to_collection('losses', weight_decay) if allVoc: c_output_emb = CE( tf.reshape(c_output_embeddings, [ self._options.eval_vocab_size, self._options.max_word_length, self._options.c_emb_dim ]), self._output_convfilters) else: c_output_emb = CE( tf.reshape(c_output_embeddings, [ -1, self._options.max_word_length, self._options.c_emb_dim ]), self._output_convfilters) elif self._options.charLayer == 'LSTM': self.output_char_cell_fw = tf.contrib.rnn.LSTMCell( self._options.charLSTM_dim, state_is_tuple=False, activation=tf.nn.relu) self.output_char_cell_bw = tf.contrib.rnn.LSTMCell( self._options.charLSTM_dim, state_is_tuple=False, activation=tf.nn.relu) if self._training and self._options.dropout < 1.0: self.output_char_cell_fw = tf.contrib.rnn.DropoutWrapper( self.output_char_cell_fw, output_keep_prob=self._options.dropout) self.output_char_cell_bw = tf.contrib.rnn.DropoutWrapper( self.output_char_cell_bw, output_keep_prob=self._options.dropout) if allVoc: mask_c = tf.sign( tf.stack(self._options.train_set.wid_to_charid)) c_output_emb = CE_RNN(tf.reshape( c_output_embeddings, [ self._options.eval_vocab_size, self._options.max_word_length, self._options.c_emb_dim ]), self.output_char_cell_fw, self.output_char_cell_bw, tf.reduce_sum(mask_c, 1), name='biRNNOut') else: mask_c = tf.sign(self._labelsChar) c_output_emb = CE_RNN(tf.reshape( c_output_embeddings, [ -1, self._options.max_word_length, self._options.c_emb_dim ]), self.output_char_cell_fw, self.output_char_cell_bw, tf.reduce_sum(mask_c, 1), name='biRNNOut') else: c_output_emb = tf.reshape( c_output_embeddings, [-1, sum(self._options.filters_dims)]) # Tags if self._options.reps[7] and not self._options.reps[8]: self._output_tagEmbs = [] tag_output_embs = [] tags_map = tf.stack(self._options.train_set.wid_to_tagsid) for i in range(self._options.max_tag_number): """ self._output_tagEmbs.append(tf.get_variable( name="output_tagEmbs%d" % i, shape= [self._options.tags_vocab_size[i], self._options.t_emb_dim], initializer=tf.truncated_normal_initializer(stddev=1.0 / math.sqrt(float(self._options.t_emb_dim)) ))) """ if allVoc: tag_output_embeddings = tf.nn.embedding_lookup( self._tagEmbs[i], tf.reshape(tags_map[:, i], [-1])) else: tag_output_embeddings = tf.nn.embedding_lookup( self._tagEmbs[i], tf.reshape(self._evalTags[:, i], [-1])) tag_output_embs.append( tf.reshape(tag_output_embeddings, [-1, self._options.t_emb_dim])) tag_output_emb = tf.concat(axis=1, values=tag_output_embs) if self._options.tagLayer == "LSTM": self.output_tag_cell_fw = tf.contrib.rnn.LSTMCell( self._options.tagLSTM_dim, state_is_tuple=False, activation=tf.nn.relu) self.output_tag_cell_bw = tf.contrib.rnn.LSTMCell( self._options.tagLSTM_dim, state_is_tuple=False, activation=tf.nn.relu) if self._training and self._options.dropout < 1.0: self.output_tag_cell_fw = tf.contrib.rnn.DropoutWrapper( self.output_tag_cell_fw, output_keep_prob=self._options.dropout) self.output_tag_cell_bw = tf.contrib.rnn.DropoutWrapper( self.output_tag_cell_bw, output_keep_prob=self._options.dropout) size = int(self._training) * ( self._options.batch_size * self._options.max_seq_length + self._options.noise_length ) + int(not self._training) * self._options.eval_vocab_size tag_output_emb = CE_RNN(tf.reshape(tag_output_emb, [ -1, self._options.max_tag_number, self._options.t_emb_dim ]), self.output_tag_cell_fw, self.output_tag_cell_bw, tf.constant( self._options.max_tag_number, dtype='int64', shape=[ size, ]), name='tagBiRNNOut') output_embs = [] if (self._options.reps[4] or self._options.reps[5]): output_embs.append(w_output_embeddings) if self._options.reps[6]: output_embs.append(c_output_emb) if self._options.reps[7] and not self._options.reps[8]: output_embs.append(tag_output_emb) output_emb = tf.concat(axis=1, values=output_embs) shape = tf.shape(output_emb) # Highway Layer if self._options.output_highway_layers > 0: self._output_highway_w = [] self._output_highway_wg = [] self._output_highway_b = [] self._output_highway_bg = [] for i in range(self._options.output_highway_layers): self._output_highway_w.append( tf.get_variable( name='output_highway_w%d' % i, shape=[self._options.hidden_dim] * 2, initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.hidden_dim))))) weight_decay = tf.nn.l2_loss(self._output_highway_w[-1]) tf.add_to_collection('losses', weight_decay) self._output_highway_b.append( tf.get_variable( name='output_highway_b%d' % i, shape=[self._options.hidden_dim], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.hidden_dim))))) weight_decay = tf.nn.l2_loss(self._output_highway_b[-1]) tf.add_to_collection('losses', weight_decay) self._output_highway_wg.append( tf.get_variable( name='output_highway_wg%d' % i, shape=[self._options.hidden_dim] * 2, initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.hidden_dim))))) weight_decay = tf.nn.l2_loss(self._output_highway_wg[-1]) tf.add_to_collection('losses', weight_decay) self._output_highway_bg.append( tf.get_variable( name='output_highway_bg%d' % i, shape=[self._options.hidden_dim], initializer=tf.truncated_normal_initializer( stddev=1.0 / math.sqrt(float(self._options.hidden_dim))))) weight_decay = tf.nn.l2_loss(self._output_highway_bg[-1]) tf.add_to_collection('losses', weight_decay) output_emb = tf.reshape( highway(output_emb, self._output_highway_w, self._output_highway_b, self._output_highway_wg, self._output_highway_bg), shape) return output_emb