def beam_search(nodes, graph, beam_size, expand_size=None, compress_mask=False, model=None, max_calc_batch_size=4096): """Method to call beam search, given TSP samples and a model """ assert model is not None, "Provide model" fixed = model.precompute_fixed(nodes, graph) def propose_expansions(beam): return model.propose_expansions( beam, fixed, expand_size, normalize=True, max_calc_batch_size=max_calc_batch_size) state = TSP.make_state( nodes, graph, visited_dtype=torch.int64 if compress_mask else torch.uint8) return beam_search(state, beam_size, propose_expansions)
def beam_search(input, beam_size, expand_size=None, compress_mask=False, model=None, max_calc_batch_size=4096): assert model is not None, "Provide model" fixed = model.precompute_fixed(input) state = PDP.make_state( input, visited_dtype=torch.int64 if compress_mask else torch.uint8 # input, visited_dtype=torch.int64 if compress_mask else torch.bool ) def propose_expansions(beam): return model.propose_expansions( beam, fixed, expand_size, normalize=True, max_calc_batch_size=max_calc_batch_size) return beam_search(state, beam_size, propose_expansions)
def test(self): p = ProgressBar() f = open('./captions.txt', 'w') for i_ in p(range(0,len(self.test_data),self.batchsize)): data = np.zeros((self.batchsize, self.in_channel, self.input_height, self.input_width), dtype=np.float32) t2 = np.zeros((self.batchsize, self.input_height, self.input_width), dtype=np.int32) label=[] first_words=np.zeros((self.batchsize), dtype=np.int32) for j in xrange(self.batchsize): image = (self.image_hash[self.test_data[i_+j][0]]) image = google_prepare(image) data[j,:,:,:] = image label.append(self.test_data[i_+j][1]) first_words[j]=self.test_data[i_+j][1][0] genrated_sentence=[] data = Variable(cuda.to_gpu(data)) state = {name: Variable(self.xp.zeros((data.shape[0], 1024),dtype=self.xp.float32)) for name in ('c1', 'h1')} h = self.enc(data, train=False, test=True) ### first LSTM ### state,_ = self.dec(h, state,train=False, test=True, image=True) ### input <SOS> ### state,y = self.dec(Variable(cuda.to_gpu(first_words)), state,train=False, test=True) genrated_sentence_beamed = beam_search(self.dec,state,y,data, 20, self.mydict_inv) # maximum sentence length is 50 for i in xrange(50): y = Variable(self.xp.array(np.argmax(y.data.get(), axis=1)).astype(self.xp.int32)) state,y = self.dec(y, state,train=False, test=True) genrated_sentence.append(y.data) for b in range(self.batchsize): f.write(str(self.test_data[i_+b][0])+'/') # GT caption for i in range(1,len(label[b])-1): index=label[b][i] f.write(self.mydict_inv[index]+' ') f.write("/") # Predicted caption for i,predicted_word in enumerate(genrated_sentence): index=cuda.to_cpu(predicted_word.argmax(1))[b] if self.mydict_inv[index]=='<EOS>': break f.write(self.mydict_inv[index]+' ') f.write("/") # beamed caption for i in range(len(genrated_sentence_beamed[b])): index=genrated_sentence_beamed[b][i] if self.mydict_inv[index]=='<EOS>': break f.write(self.mydict_inv[index]+' ') f.write("\n") f.close()
def testTPUBeam(self): batch_size = 1 beam_size = 2 vocab_size = 3 decode_length = 3 initial_ids = tf.constant([0] * batch_size) # GO probabilities = tf.constant([[[0.1, 0.1, 0.8], [0.1, 0.1, 0.8]], [[0.4, 0.5, 0.1], [0.2, 0.4, 0.4]], [[0.05, 0.9, 0.05], [0.4, 0.4, 0.2]]]) # The top beam is always selected so we should see the top beam's state # at each position, which is the one thats getting 3 added to it each step. expected_states = tf.constant([[[0.], [0.]], [[3.], [3.]], [[6.], [6.]]]) def symbols_to_logits(_, i, states, kv_encdecs): # pylint: disable=unused-argument # We have to assert the values of state inline here since we can't fetch # them out of the loop! with tf.control_dependencies( [tf.assert_equal(states["state"], expected_states[i])]): logits = tf.to_float(tf.log(probabilities[i, :])) states["state"] += tf.constant([[3.], [7.]]) return logits, states states = { "state": tf.zeros((batch_size, 1)), } states["state"] = tf.placeholder_with_default(states["state"], shape=(None, 1)) final_ids, _ = beam_search.beam_search(symbols_to_logits, initial_ids, beam_size, decode_length, vocab_size, 3.5, eos_id=1, states=states) with self.test_session() as sess: # Catch and fail so that the testing framework doesn't think it's an error try: sess.run(final_ids) except tf.errors.InvalidArgumentError as e: raise AssertionError(e.message) self.assertAllEqual([[[0, 2, 0, 1], [0, 2, 1, 0]]], final_ids)
def main(args): checkpoint_path = os.path.join("saved/", args.name, args.checkpoint) checkpoint = torch.load(checkpoint_path) config = checkpoint['config'] #if args.task.lower() == 'caption': embedder = eval(config['embedder']['type']) embedder_path = os.path.join("saved/", args.name, "embedder.pkl") data_loader = CaptionDataLoader(config, embedder, mode='test', path=args.data_dir, embedder_path=embedder_path) model = Seq2Seq(config, embedder=data_loader.embedder) model.load_state_dict(checkpoint['state_dict']) if not args.no_cuda: model.cuda() model.eval() model.summary() result = [] for batch_idx, (in_seq, id) in enumerate(data_loader): in_seq = torch.FloatTensor(in_seq) in_seq = Variable(in_seq) if not args.no_cuda: in_seq = in_seq.cuda() if args.beam_size == 1: out_seq = model(in_seq, 24) out_seq = np.array([seq.data.cpu().numpy() for seq in out_seq]) out_seq = np.transpose(out_seq, (1, 0, 2)) out_seq = data_loader.embedder.decode_lines(out_seq) else: out_seq = beam_search(model, data_loader.embedder, in_seq, seq_len=24, beam_size=args.beam_size) out_seq = data_loader.embedder.decode_lines(out_seq) out_seq = [(str(id[0]), out_seq)] result.extend(out_seq) with open(args.output, 'w') as f: for video_id, caption in result: caption = postprocess(caption) f.write(video_id + ',' + caption + '\n')
def beam_search(input, beam_size, expand_size=None, compress_mask=False, model=None, max_calc_batch_size=4096): assert model is not None, "Provide model" fixed = model.precompute_fixed(input) def propose_expansions(beam): return model.propose_expansions( beam, fixed, expand_size, normalize=True, max_calc_batch_size=max_calc_batch_size ) # With beam search we always consider the deterministic case state = PCTSPDet.make_state( input, visited_dtype=torch.int64 if compress_mask else torch.uint8 ) return beam_search(state, beam_size, propose_expansions)
def beam_search(input, beam_size, expand_size=None, compress_mask=False, model=None, max_calc_batch_size=4096): assert model is not None, "Provide model" assert not compress_mask, "SDVRP does not support compression of the mask" fixed = model.precompute_fixed(input) def propose_expansions(beam): return model.propose_expansions( beam, fixed, expand_size, normalize=True, max_calc_batch_size=max_calc_batch_size) state = SDVRP.make_state(input) return beam_search(state, beam_size, propose_expansions)
def main(params): if params.input == 'GOT': corpus_path = "/home/luoyy/datasets_small/got" data_raw = data_.got_read(corpus_path) data, labels_arr, embed_arr, data_dict = data_.prepare_data(data_raw, params) elif params.input == 'PTB': # data in form [data, labels] train_data_raw, valid_data_raw, test_data_raw = data_.ptb_read( './PTB_DATA/data') data, labels_arr, embed_arr, data_dict = data_.prepare_data( train_data_raw, params) with tf.Graph().as_default() as graph: inputs = tf.placeholder(shape=[None, None], dtype=tf.int32) d_inputs_ps = tf.placeholder(dtype=tf.int32, shape=[None, None]) labels = tf.placeholder(shape=[None, None], dtype=tf.int32) with tf.device("/cpu:0"): if not params.pre_trained_embed: embedding = tf.get_variable( "embedding", [data_dict.vocab_size, params.embed_size], dtype=tf.float32) vect_inputs = tf.nn.embedding_lookup(embedding, inputs) else: # [data_dict.vocab_size, params.embed_size] embedding = tf.Variable( embed_arr, trainable=params.fine_tune_embed, name="embedding", dtype=tf.float32) vect_inputs = tf.nn.embedding_lookup(embedding, inputs) # inputs = tf.unstack(inputs, num=num_steps, axis=1) vocab_size = data_dict.vocab_size seq_length = tf.placeholder_with_default([0.0], shape=[None]) d_seq_length = tf.placeholder(shape=[None], dtype=tf.float32) qz = q_net(vect_inputs, seq_length, params.batch_size) x_logits, _, _ = vae_lstm({'z': qz}, params.batch_size, d_seq_length, embedding, d_inputs_ps, vocab_size=vocab_size) # loss, masking <PAD> current_len = tf.placeholder_with_default(params.sent_max_size, shape=()) # tf.sequence_mask, tf.contrib.seq2seq.sequence_loss labels_flat = tf.reshape(labels, [-1]) cross_entr = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=x_logits, labels=labels_flat) mask_labels = tf.sign(tf.to_float(labels_flat)) masked_losses = mask_labels * cross_entr # reshape again masked_losses = tf.reshape(masked_losses, tf.shape(labels)) mean_loss_by_example = tf.reduce_sum(masked_losses, reduction_indices=1) / d_seq_length rec_loss = tf.reduce_mean(mean_loss_by_example) perplexity = tf.exp(rec_loss) # kl divergence calculation kld = -0.5 * tf.reduce_mean( tf.reduce_sum( 1 + tf.log(tf.square(qz.distribution.std) + 0.0001) - tf.square(qz.distribution.mean) - tf.square(qz.distribution.std), 1)) tf.summary.scalar('kl_divergence', kld) # kld weight annealing anneal = tf.placeholder(tf.int32) annealing = (tf.tanh((tf.to_float(anneal) - 3500)/1000) + 1)/2 # overall loss reconstruction loss - kl_regularization lower_bound = rec_loss + tf.multiply( tf.to_float(annealing), tf.to_float(kld)) / 10 #lower_bound = rec_loss sm2 = [tf.summary.scalar('lower_bound', lower_bound), tf.summary.scalar('kld_coeff', annealing)] gradients = tf.gradients(lower_bound, tf.trainable_variables()) opt = tf.train.AdamOptimizer(learning_rate=params.learning_rate) clipped_grad, _ = tf.clip_by_global_norm(gradients, 5) optimize = opt.apply_gradients(zip(clipped_grad, tf.trainable_variables())) #sample logits, states, smpl = vae_lstm({}, 1, d_seq_length, embedding, d_inputs_ps, vocab_size=vocab_size, gen_mode=True) init_state = states[0] fin_output = states[1] # merge summaries merged = tf.summary.merge_all() with tf.Session() as sess: sess.run([tf.global_variables_initializer(), tf.local_variables_initializer()]) if params.debug: sess = tf_debug.LocalCLIDebugWrapperSession(sess) summary_writer = tf.summary.FileWriter(params.LOG_DIR, sess.graph) summary_writer.add_graph(sess.graph) #ptb_data = PTBInput(params.batch_size, train_data) num_iters = len(data) // params.batch_size cur_it = 0 iters, kld_arr, coeff = [], [], [] for e in range(params.num_epochs): for it in range(num_iters): params.is_training = True batch = data[it * params.batch_size: (it + 1) * params.batch_size] l_batch = labels_arr[it * params.batch_size:(it + 1) * params.batch_size] # zero padding pad = len(max(batch, key=len)) # not optimal!! length_ = np.array([len(sent) for sent in batch]).reshape(params.batch_size) # prepare encoder and decoder inputs to feed batch = np.array([sent + [0] * (pad - len(sent)) for sent in batch]) l_batch = np.array([(sent + [0] * (pad - len(sent))) for sent in l_batch]) # encoder feed=[....<EOS>], decoder feed=[<BOS>....], labels=[.....<EOS>] feed = {inputs: l_batch, d_inputs_ps: batch, labels: l_batch, seq_length: length_, d_seq_length: length_, anneal: cur_it, current_len: pad} lb, _, kld_, ann_, r_loss, perplexity_ = sess.run([lower_bound, optimize, kld, annealing, rec_loss, perplexity], feed_dict=feed) cur_it += 1 iters.append(cur_it) kld_arr.append(kld_) coeff.append(ann_) if cur_it % 100 == 0 and cur_it != 0: print("VLB after {} ({}) iterations (epoch): {} KLD: " "{} Annealing Coeff: {} CE: {}".format( cur_it, e,lb, kld_, ann_, r_loss)) print("Perplexity: {}".format(perplexity_)) if cur_it % 150 == 0: if not params.beam_search: params.is_training = False online_inference(sess, data_dict, sample=smpl, seq=d_inputs_ps, in_state=init_state, out_state=fin_output, length=d_seq_length) else: gen_sentence = beam_search(sess, data_dict, states, smpl, (d_inputs_ps, d_seq_length), params, beam_size=params.beam_size) print(gen_sentence) if cur_it % 400 == 0 and cur_it!=0: # saver = tf.train.Saver() summary = sess.run(merged, feed_dict=feed) summary_writer.add_summary(summary) # saver.save(sess, os.path.join(params.LOG_DIR, "lstmlstm_model.ckpt"), cur_it) if params.visualise: if cur_it % 30000 == 0 and cur_it!=0: import matplotlib.pyplot as plt with open("./run_kld" + str(params.dec_keep_rate), 'w') as wf: _ = [wf.write(str(s) + ' ')for s in iters] wf.write('\n') _ = [wf.write(str(s) + ' ')for s in kld_arr] wf.write('\n') _ = [wf.write(str(s) + ' ') for s in coeff] plt.plot(iters, kld_arr, label='KLD') plt.xlabel('Iterations') plt.legend(bbox_to_anchor=(1.05, 1), loc=1, borderaxespad=0.) plt.show() plt.plot(iters, coeff, 'r--', label='annealing') plt.legend(bbox_to_anchor=(1.05, 1), loc=1, borderaxespad=0.) plt.show()
def generate(self, source, source_mask, k, max_len): return beam_search(self, source, source_mask, self.params["target_vocab"], k, max_len)
def fast_decode_tpu(encoder_output, symbols_to_logits_fn, hparams, decode_length, vocab_size, beam_size, top_beams=1, alpha=1.0, sos_id=0, eos_id=beam_search.EOS_ID, batch_size=None, scope_prefix="body/"): """Given encoder output and a symbols to logits function, does fast decoding. Implements beam search decoding for TPU. Args: encoder_output: A tensor, output from encoder. symbols_to_logits_fn: Incremental decoding, function mapping triple `(ids, step, cache)` to symbol logits. hparams: Run hyperparameters. decode_length: An integer, how many additional timesteps to decode. vocab_size: Output vocabulary size. beam_size: An integer, number of beams. top_beams: An integer, how many of the beams to return. alpha: A float that controls the length penalty. Larger the alpha, stronger the preference for longer translations. sos_id: Start-of-sequence symbol. eos_id: End-of-sequence symbol. batch_size: An integer, must be passed if there is no input. scope_prefix: str, prefix for decoder layer variable scopes. Returns: A dict of decoding results { "outputs": integer `Tensor` of decoded ids of shape [batch_size, top_beams, <= decode_length] "scores": decoding log probs from the beam search. }. Raises: NotImplementedError: If beam size > 1 with partial targets. """ if encoder_output is not None: batch_size = common_layers.shape_list(encoder_output)[0] key_channels = hparams.attention_key_channels or hparams.hidden_size value_channels = hparams.attention_value_channels or hparams.hidden_size num_layers = hparams.num_decoder_layers or hparams.num_hidden_layers cache = { "layer_%d" % layer: { "k": tf.zeros([ batch_size, hparams.num_heads, key_channels // hparams.num_heads, decode_length ], dtype=encoder_output.dtype), "v": tf.zeros([ batch_size, hparams.num_heads, value_channels // hparams.num_heads, decode_length ], dtype=encoder_output.dtype), } for layer in range(num_layers) } kv_encdecs = {"layer_%d" % layer: {} for layer in range(num_layers)} if encoder_output is not None: for layer in range(num_layers): layer_name = "layer_%d" % layer with tf.variable_scope( "%sdecoder/%s/encdec_attention/multihead_attention" % (scope_prefix, layer_name)): k_encdec = common_attention.compute_attention_component( encoder_output, key_channels, hparams.num_heads, name="k") k_encdec = beam_search.merge_beam_dim( beam_search.expand_to_beam_size(k_encdec, beam_size)) v_encdec = common_attention.compute_attention_component( encoder_output, value_channels, hparams.num_heads, name="v") v_encdec = beam_search.merge_beam_dim( beam_search.expand_to_beam_size(v_encdec, beam_size)) kv_encdecs[layer_name]["k_encdec"] = k_encdec kv_encdecs[layer_name]["v_encdec"] = v_encdec initial_ids = sos_id * tf.ones([batch_size], dtype=tf.int32) decoded_ids, scores = beam_search.beam_search(symbols_to_logits_fn, initial_ids, beam_size, decode_length, vocab_size, alpha, states=cache, kv_encdecs=kv_encdecs, eos_id=eos_id, stop_early=(top_beams == 1)) if top_beams == 1: decoded_ids = decoded_ids[:, 0, 1:] scores = scores[:, 0] else: decoded_ids = decoded_ids[:, :top_beams, 1:] scores = scores[:, :top_beams] return {"outputs": decoded_ids, "scores": scores}
def _beam_decode(self, features, decode_length, beam_size, top_beams, last_position_only, alpha, ensemble_num=1): """Beam search decoding. Args: features: an map of string to `Tensor` decode_length: an integer. How many additional timesteps to decode. beam_size: number of beams. top_beams: an integer. How many of the beams to return. last_position_only: a boolean, speed-up by computing last position only. alpha: Float that controls the length penalty. larger the alpha, stronger the preference for slonger translations. Returns: samples: an integer `Tensor`. Top samples from the beam search """ tf.logging.info('we use this beam_search') target_modality = self._hparams.problems[ self._problem_idx].target_modality vocab_size = 84000 #target_modality.top_dimensionality def symbols_to_logits_fn(ids): """Go from ids to logits.""" ids = tf.expand_dims(tf.expand_dims(ids, axis=2), axis=3) ids = tf.pad(ids[:, 1:], [[0, 0], [0, 1], [0, 0], [0, 0]]) features["targets"] = ids self._coverage = None logits = tf.zeros([1, 1, 1, 1, vocab_size], dtype=tf.float32) for i in range(ensemble_num): tf.logging.info('the %dth model_fn' % (i + 1)) #with tf.variable_scope("graph_%d" % (i+1)): sharded_logits, _, _ = self.model_fn( features, False, last_position_only=last_position_only, hparams=self._hparams_list[i], num=i) # now self._coverage is a coverage tensor for the first datashard. # it has shape [batch_size] and contains floats between 0 and # source_length. logits += sharded_logits[0] # Assuming we have one shard. logits /= ensemble_num if last_position_only: return tf.squeeze(logits, axis=[1, 2, 3]) current_output_position = tf.shape( ids)[1] - 1 # -1 due to the pad above. logits = logits[:, current_output_position, :, :] return tf.squeeze(logits, axis=[1, 2]) batch_size = tf.shape(features["inputs"])[0] initial_ids = tf.zeros([batch_size], dtype=tf.int32) inputs_old = features["inputs"] features["inputs"] = tf.expand_dims(features["inputs"], 1) if len(features["inputs"].shape) < 5: features["inputs"] = tf.expand_dims(features["inputs"], 4) # Expand the inputs in to the beam size. features["inputs"] = tf.tile(features["inputs"], [1, beam_size, 1, 1, 1]) s = tf.shape(features["inputs"]) features["inputs"] = tf.reshape(features["inputs"], [s[0] * s[1], s[2], s[3], s[4]]) #print('the inputs of feature in beam_search is :', tf.shape(features["inputs"])[3]) #target_modality = self._hparams.problems[self._problem_idx].target_modality #vocab_size = target_modality.top_dimensionality # Setting decode length to input length + decode_length decode_length = tf.shape( features["inputs"])[1] + tf.constant(decode_length) ids, scores = beam_search.beam_search(symbols_to_logits_fn, initial_ids, beam_size, decode_length, vocab_size, alpha) # Set inputs back to the unexpanded inputs to not to confuse the Estimator! features["inputs"] = inputs_old # Return `top_beams` decodings (also remove initial id from the beam search) return_scores = True # TODO(lukaszkaiser): make it work multi-problem. if top_beams == 1: if return_scores: return {"outputs": ids[:, 0, 1:], "scores": scores} return ids[:, 0, 1:] else: if return_scores: return {"outputs": ids[:, :top_beams, 1:], "scores": scores} return ids[:, :top_beams, 1:]
def test(self): p = ProgressBar() f = open('./captions.txt', 'w') for i_ in p(range(0, len(self.test_data), self.batchsize)): data = np.zeros((self.batchsize, self.in_channel, self.input_height, self.input_width), dtype=np.float32) t2 = np.zeros( (self.batchsize, self.input_height, self.input_width), dtype=np.int32) label = [] first_words = np.zeros((self.batchsize), dtype=np.int32) for j in xrange(self.batchsize): image = (self.image_hash[self.test_data[i_ + j][0]]) image = google_prepare(image) data[j, :, :, :] = image label.append(self.test_data[i_ + j][1]) first_words[j] = self.test_data[i_ + j][1][0] genrated_sentence = [] data = Variable(cuda.to_gpu(data)) state = { name: Variable( self.xp.zeros((data.shape[0], 1024), dtype=self.xp.float32)) for name in ('c1', 'h1') } h = self.enc(data, train=False, test=True) ### first LSTM ### state, _ = self.dec(h, state, train=False, test=True, image=True) ### input <SOS> ### state, y = self.dec(Variable(cuda.to_gpu(first_words)), state, train=False, test=True) genrated_sentence_beamed = beam_search(self.dec, state, y, data, 20, self.mydict_inv) # maximum sentence length is 50 for i in xrange(50): y = Variable( self.xp.array(np.argmax(y.data.get(), axis=1)).astype(self.xp.int32)) state, y = self.dec(y, state, train=False, test=True) genrated_sentence.append(y.data) for b in range(self.batchsize): f.write(str(self.test_data[i_ + b][0]) + '/') # GT caption for i in range(1, len(label[b]) - 1): index = label[b][i] f.write(self.mydict_inv[index] + ' ') f.write("/") # Predicted caption for i, predicted_word in enumerate(genrated_sentence): index = cuda.to_cpu(predicted_word.argmax(1))[b] if self.mydict_inv[index] == '<EOS>': break f.write(self.mydict_inv[index] + ' ') f.write("/") # beamed caption for i in range(len(genrated_sentence_beamed[b])): index = genrated_sentence_beamed[b][i] if self.mydict_inv[index] == '<EOS>': break f.write(self.mydict_inv[index] + ' ') f.write("\n") f.close()