def build_model(t_params, n_dim_img, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab, optimizer): ''' Build the whole model for training ''' x = tensor.tensor3('x', config.floatX) mask_x = tensor.matrix('mask_x', 'int8') # Encoder(s) and initialization of hidden layer enc = gru(mask_x, dropout(x), t_params, n_dim_img, n_dim_enc, 'enc')[-1] init_h = tensor.tanh(dense(enc, t_params, n_dim_enc, n_dim_dec, 'init_h')) y = tensor.matrix('y', 'int32') mask_y = tensor.matrix('mask_y', 'int8') n_steps, n_samples = y.shape # Word embedding emb = embedding(y, t_params, n_dim_vocab, n_dim_txt, 'emb').reshape((n_steps, n_samples, n_dim_txt))[: -1] emb = tensor.concatenate([tensor.zeros((1, n_samples, n_dim_txt), config.floatX), emb]) # Decoder(s) dec = gru(mask_y, emb, t_params, n_dim_txt, n_dim_dec, 'dec', init_h=init_h) # Full-connected layer fc = dense(dropout(dec), t_params, n_dim_dec, n_dim_vocab, 'fc') # Classifier prob = tensor.nnet.softmax(fc.reshape((n_steps * n_samples, n_dim_vocab))) # Cost function cost = prob[tensor.arange(n_steps * n_samples), y.flatten()].reshape((n_steps, n_samples)) cost = ((-tensor.log(cost + 1e-6) * mask_y).sum(0) / mask_y.astype(config.floatX).sum(0)).mean() grads = tensor.grad(cost, list(t_params.values())) f_cost, f_update = optimizer(tensor.scalar('lr'), t_params, grads, [x, mask_x, y, mask_y], cost) return f_cost, f_update
def build_dec(t_params, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab, beam_size): ''' Build the decoder for texts ''' def _step(_prob): _y = _prob.argmax(-1) _log_prob = tensor.log(_prob[tensor.arange(_y.shape[0]), _y] + 1e-6) tensor.set_subtensor(_prob[tensor.arange(_y.shape[0]), _y], 0) return _y, _log_prob y = tensor.vector('y', 'int32') init_h = tensor.matrix('init_h', config.floatX) n_samples = y.shape[0] # Word embedding emb = tensor.switch(y[:, None] < 0, tensor.zeros((n_samples, n_dim_txt), config.floatX), embedding(y, t_params, n_dim_vocab, n_dim_txt, 'emb')) # Decoder(s) - Initialization of hidden layer in the next step next_h = gru(tensor.ones_like(y, 'int8'), emb, t_params, n_dim_txt, n_dim_dec, 'dec', True, init_h) # Full-connected layer fc = dense(0.5 * next_h, t_params, n_dim_dec, n_dim_vocab, 'fc') # Classifier prob = tensor.nnet.softmax(fc) # Hypo words [next_y, next_log_prob], _ = theano.scan(_step, non_sequences=prob, n_steps=beam_size) return theano.function([y, init_h], [next_y, next_log_prob, next_h], name='f_dec')
def prediction_network(x_h, x_m, test=False): with tf.variable_scope("network") as sc: if test: sc.reuse_variables() with tf.variable_scope("hand"): flat_h = network_arm(x_h) with tf.variable_scope("main"): flat_m = network_arm(x_m) combined = tf.concat(2, [flat_h, flat_m]) combined, past, future = fork(combined) encoded = gru_last(combined, 512 * 2, 1, batch_size, "lstm_encoder") decoded_past = gru(combined, 512, 1, batch_size, "lstm_past") decoded_future = gru(combined, 512, 1, batch_size, "lstm_future") return tf.add(tf.nn.l2_loss(tf.sub(decoded_past, past)), tf.nn.l2_loss(tf.sub(decoded_future, future)))
def build_enc(t_params, n_dim_img, n_dim_enc, n_dim_dec): ''' Build the encoder for images ''' x = tensor.tensor3('x', config.floatX) mask_x = tensor.matrix('mask_x', 'int8') # Encoder(s) and initialization of hidden layer enc = gru(mask_x, 0.5 * x, t_params, n_dim_img, n_dim_enc, 'enc')[-1] init_h = tensor.tanh(dense(enc, t_params, n_dim_enc, n_dim_dec, 'init_h')) return theano.function([x, mask_x], [init_h], name='f_enc')
def build_model(t_params, n_dim_img, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab, optimizer): ''' Build the whole model for training ''' x = tensor.tensor3('x', config.floatX) mask_x = tensor.matrix('mask_x', 'int8') # Encoder(s) and initialization of hidden layer enc = gru(mask_x, dropout(x), t_params, n_dim_img, n_dim_enc, 'enc')[-1] init_h = tensor.tanh(dense(enc, t_params, n_dim_enc, n_dim_dec, 'init_h')) y = tensor.matrix('y', 'int32') mask_y = tensor.matrix('mask_y', 'int8') n_steps, n_samples = y.shape # Word embedding emb = embedding(y, t_params, n_dim_vocab, n_dim_txt, 'emb').reshape( (n_steps, n_samples, n_dim_txt))[:-1] emb = tensor.concatenate( [tensor.zeros((1, n_samples, n_dim_txt), config.floatX), emb]) # Decoder(s) dec = gru(mask_y, emb, t_params, n_dim_txt, n_dim_dec, 'dec', init_h=init_h) # Full-connected layer fc = dense(dropout(dec), t_params, n_dim_dec, n_dim_vocab, 'fc') # Classifier prob = tensor.nnet.softmax(fc.reshape((n_steps * n_samples, n_dim_vocab))) # Cost function cost = prob[tensor.arange(n_steps * n_samples), y.flatten()].reshape((n_steps, n_samples)) cost = ((-tensor.log(cost + 1e-6) * mask_y).sum(0) / mask_y.astype(config.floatX).sum(0)).mean() grads = tensor.grad(cost, list(t_params.values())) f_cost, f_update = optimizer(tensor.scalar('lr'), t_params, grads, [x, mask_x, y, mask_y], cost) return f_cost, f_update
def build_dec(t_params, n_dim_txt, n_dim_enc, n_dim_dec, n_dim_vocab): ''' Build the decoder for texts ''' y = tensor.vector('y', 'int32') prev_h = tensor.matrix('init_h', config.floatX) n_samples = y.shape[0] # Word embedding emb = tensor.switch(y[:, None] < 0, tensor.zeros((n_samples, n_dim_txt), config.floatX), embedding(y, t_params, n_dim_vocab, n_dim_txt, 'emb')) # Decoder(s) - Initialization of hidden layer in the next step next_h = gru(tensor.ones_like(y, 'int8'), emb, t_params, n_dim_txt, n_dim_dec, 'dec', True, prev_h) # Full-connected layer fc = dense(0.5 * next_h, t_params, n_dim_dec, n_dim_vocab, 'fc') # Classifier prob = tensor.nnet.softmax(fc) return theano.function([y, prev_h], [prob.argmax(-1), next_h], name='f_dec')