def build_word_decoder(self, word_vectors_3, char_ids_3): config = self.config with tf.variable_scope('word_condition_projection'): word_vectors_3 = layers.mlp(word_vectors_3, self.config['sentence_decoder_projection']) with tf.variable_scope('word_decoder'): spell_vector_len = config['spell_vector_len'] spell_vector_size = spell_vector_len * config['char_embed_size'] spell_vector_size *= 2 # TODO make this factor configurable # Grab char embeds and concat them to spelling vector representations of words char_ids_3 = self.add_go(char_ids_3, axis=2) char_embeds_4 = layers.embedding(self.num_chars, config['char_embed_size'], char_ids_3) spell_vectors_3 = self.create_spell_vector(char_embeds_4, spell_vector_len) # Pass spelling vector through a layer that can see previous chars, but can't see ahead with tf.variable_scope('future_masked_spelling'): spell_vectors_projected_3 = layers.feed_forward(spell_vectors_3, num_nodes=spell_vector_size, seq_len_for_future_mask=spell_vector_len) # Reshape word representation into individual char representations batch_size, sentence_len, word_len = tf.unstack(tf.shape(char_ids_3)) char_size = spell_vectors_projected_3.shape.as_list()[-1]/spell_vector_len char_vectors_4 = tf.reshape(spell_vectors_projected_3, [batch_size, sentence_len, spell_vector_len, char_size]) char_vectors_4 = char_vectors_4[:, :, :word_len, :] # Project each char_vector up to the size of the conditioning word_vector with tf.variable_scope('char_projection'): word_depth = word_vectors_3.shape.as_list()[-1] char_vectors_4 = layers.feed_forward(char_vectors_4, num_nodes=word_depth) # Add the conditioning word_vector to each char and pass result through an mlp char_vectors_4 += tf.expand_dims(word_vectors_3, axis=2) char_vectors_4 = layers.mlp(char_vectors_4, config['word_decoder_mlp']) with tf.variable_scope('logits'): char_logits_4 = layers.feed_forward(char_vectors_4, num_nodes=self.num_chars, noise_level=config['noise_level']) return char_logits_4
def build_positional_char_embeds(self, char_ids_3, char_embed_size, mlp_layer_specs, word_len_limit): """ """ char_ids_3 = char_ids_3[:, :, :word_len_limit] # potentially trim long words batch_size, max_sentence_len, max_word_len = tf.unstack(tf.shape(char_ids_3)) # Select char embeddings with tf.variable_scope('chars'): char_embeds_4 = layers.embedding(self.num_chars, char_embed_size, char_ids_3) # Create char position ids for every possible char position in the batch (including padding) position_ids_1 = tf.range(max_word_len) position_ids_3 = tf.expand_dims(tf.expand_dims(position_ids_1, 0), 0) position_ids_3 = tf.tile(position_ids_3, [batch_size, max_sentence_len, 1]) # Mask position_ids for padding chars where = tf.equal(char_ids_3, -1) position_ids_3 = tf.where(where, char_ids_3, tf.cast(position_ids_3, char_ids_3.dtype)) # Convert position_ids to relative position (scalar between 0 and 1) word_lengths_3 = tf.reduce_max(position_ids_3, axis=2, keep_dims=True) word_lengths_3 = tf.where(tf.equal(word_lengths_3, 0), tf.ones_like(word_lengths_3), word_lengths_3) word_lengths_3 = tf.cast(word_lengths_3, char_embeds_4.dtype) position_ids_3 = tf.cast(position_ids_3, char_embeds_4.dtype) relative_positions_3 = position_ids_3 / word_lengths_3 # Mask relative_positions for padding chars relative_positions_3 = tf.where(where, tf.zeros_like(relative_positions_3), relative_positions_3) # Combine char embeddings with their respective positions relative_positions_4 = tf.expand_dims(relative_positions_3, axis=3) positional_char_embeds_4 = tf.concat([char_embeds_4, relative_positions_4], axis=3) positional_char_embeds_4 = layers.mlp(positional_char_embeds_4, mlp_layer_specs) return positional_char_embeds_4
def build_model(t_params, n_dim_img, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab, optimizer): ''' Build the whole model for training ''' x = tensor.tensor3('x', config.floatX) mask_x = tensor.matrix('mask_x', 'int8') # Encoder(s) and initialization of hidden layer enc = gru(mask_x, dropout(x), t_params, n_dim_img, n_dim_enc, 'enc')[-1] init_h = tensor.tanh(dense(enc, t_params, n_dim_enc, n_dim_dec, 'init_h')) y = tensor.matrix('y', 'int32') mask_y = tensor.matrix('mask_y', 'int8') n_steps, n_samples = y.shape # Word embedding emb = embedding(y, t_params, n_dim_vocab, n_dim_txt, 'emb').reshape((n_steps, n_samples, n_dim_txt))[: -1] emb = tensor.concatenate([tensor.zeros((1, n_samples, n_dim_txt), config.floatX), emb]) # Decoder(s) dec = gru(mask_y, emb, t_params, n_dim_txt, n_dim_dec, 'dec', init_h=init_h) # Full-connected layer fc = dense(dropout(dec), t_params, n_dim_dec, n_dim_vocab, 'fc') # Classifier prob = tensor.nnet.softmax(fc.reshape((n_steps * n_samples, n_dim_vocab))) # Cost function cost = prob[tensor.arange(n_steps * n_samples), y.flatten()].reshape((n_steps, n_samples)) cost = ((-tensor.log(cost + 1e-6) * mask_y).sum(0) / mask_y.astype(config.floatX).sum(0)).mean() grads = tensor.grad(cost, list(t_params.values())) f_cost, f_update = optimizer(tensor.scalar('lr'), t_params, grads, [x, mask_x, y, mask_y], cost) return f_cost, f_update
def build_dec(t_params, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab, beam_size): ''' Build the decoder for texts ''' def _step(_prob): _y = _prob.argmax(-1) _log_prob = tensor.log(_prob[tensor.arange(_y.shape[0]), _y] + 1e-6) tensor.set_subtensor(_prob[tensor.arange(_y.shape[0]), _y], 0) return _y, _log_prob y = tensor.vector('y', 'int32') init_h = tensor.matrix('init_h', config.floatX) n_samples = y.shape[0] # Word embedding emb = tensor.switch(y[:, None] < 0, tensor.zeros((n_samples, n_dim_txt), config.floatX), embedding(y, t_params, n_dim_vocab, n_dim_txt, 'emb')) # Decoder(s) - Initialization of hidden layer in the next step next_h = gru(tensor.ones_like(y, 'int8'), emb, t_params, n_dim_txt, n_dim_dec, 'dec', True, init_h) # Full-connected layer fc = dense(0.5 * next_h, t_params, n_dim_dec, n_dim_vocab, 'fc') # Classifier prob = tensor.nnet.softmax(fc) # Hypo words [next_y, next_log_prob], _ = theano.scan(_step, non_sequences=prob, n_steps=beam_size) return theano.function([y, init_h], [next_y, next_log_prob, next_h], name='f_dec')
def __init__(self, conv_dim, num_classes): super(Discriminator, self).__init__() self.conv_dim = conv_dim self.res_1 = ResidualBlock_D(3, conv_dim) self.res_2 = ResidualBlock_D(conv_dim, conv_dim * 2) self.attn = SelfAttn(conv_dim * 2) self.res_3 = ResidualBlock_D(conv_dim * 2, conv_dim * 4) self.res_4 = ResidualBlock_D(conv_dim * 4, conv_dim * 8) self.res_5 = ResidualBlock_D(conv_dim * 8, conv_dim * 16) self.lrelu = lrelu(inplace=True) self.linear = spectral_norm(linear(conv_dim * 16, 1)) self.embed = embedding(num_classes, conv_dim * 16) self.apply(init_weights)
def build_model(t_params, n_dim_img, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab, optimizer): ''' Build the whole model for training ''' x = tensor.tensor3('x', config.floatX) mask_x = tensor.matrix('mask_x', 'int8') # Encoder(s) and initialization of hidden layer enc = gru(mask_x, dropout(x), t_params, n_dim_img, n_dim_enc, 'enc')[-1] init_h = tensor.tanh(dense(enc, t_params, n_dim_enc, n_dim_dec, 'init_h')) y = tensor.matrix('y', 'int32') mask_y = tensor.matrix('mask_y', 'int8') n_steps, n_samples = y.shape # Word embedding emb = embedding(y, t_params, n_dim_vocab, n_dim_txt, 'emb').reshape( (n_steps, n_samples, n_dim_txt))[:-1] emb = tensor.concatenate( [tensor.zeros((1, n_samples, n_dim_txt), config.floatX), emb]) # Decoder(s) dec = gru(mask_y, emb, t_params, n_dim_txt, n_dim_dec, 'dec', init_h=init_h) # Full-connected layer fc = dense(dropout(dec), t_params, n_dim_dec, n_dim_vocab, 'fc') # Classifier prob = tensor.nnet.softmax(fc.reshape((n_steps * n_samples, n_dim_vocab))) # Cost function cost = prob[tensor.arange(n_steps * n_samples), y.flatten()].reshape((n_steps, n_samples)) cost = ((-tensor.log(cost + 1e-6) * mask_y).sum(0) / mask_y.astype(config.floatX).sum(0)).mean() grads = tensor.grad(cost, list(t_params.values())) f_cost, f_update = optimizer(tensor.scalar('lr'), t_params, grads, [x, mask_x, y, mask_y], cost) return f_cost, f_update
def build_dec(t_params, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab): ''' Build the decoder for texts ''' y = tensor.vector('y', 'int32') prev_h = tensor.matrix('init_h', config.floatX) n_samples = y.shape[0] # Word embedding emb = tensor.switch(y[:, None] < 0, tensor.zeros((n_samples, n_dim_txt), config.floatX), embedding(y, t_params, n_dim_vocab, n_dim_txt, 'emb')) # Decoder(s) - Initialization of hidden layer in the next step next_h = gru(tensor.ones_like(y, 'int8'), emb, t_params, n_dim_txt, n_dim_dec, 'dec', True, prev_h) # Full-connected layer fc = dense(0.5 * next_h, t_params, n_dim_dec, n_dim_vocab, 'fc') # Classifier prob = tensor.nnet.softmax(fc) return theano.function([y, prev_h], [prob.argmax(-1), next_h], name='f_dec')