def search(self, context, beam_size=1, ignore_unk=False, \ min_length=1, max_length=100, normalize_by_length=True, verbose=False): if not self.compiled: self.compile() # Convert to column vector context = numpy.array(context, dtype='int32')[:, None] prev_hd = numpy.zeros((beam_size, self.qdim), dtype='float32') prev_hs = numpy.zeros((beam_size, self.sdim), dtype='float32') # Compute the context encoding and get # the last hierarchical state h, hs = self.compute_encoding(context) prev_hs[:] = hs[-1] fin_beam_gen = [] fin_beam_costs = [] beam_gen = [[] for i in range(beam_size)] costs = [0.0 for i in range(beam_size)] for k in range(max_length): if len(fin_beam_gen) >= beam_size: break if verbose: logger.info("Beam search at step %d" % k) prev_words = (numpy.array(map(lambda bg : bg[-1], beam_gen)) if k > 0 else numpy.zeros(beam_size, dtype="int32") + self.eos_sym) outputs, hd = self.next_probs_predictor(prev_hs, prev_words, prev_hd) log_probs = numpy.log(outputs) # Adjust log probs according to search restrictions if ignore_unk: log_probs[:, self.unk_sym] = -numpy.inf if k <= min_length: log_probs[:, self.eos_sym] = -numpy.inf next_costs = numpy.array(costs)[:, None] - log_probs # Pick only on the first line (for the beginning of sampling) # This will avoid duplicate <s> token. if k == 0: flat_next_costs = next_costs[:1, :].flatten() else: # Set the next cost to infinite for finished sentences (they will be replaced) # by other sentences in the beam indices = [i for i, bg in enumerate(beam_gen) if bg[-1] == self.eos_sym] next_costs[indices, :] = numpy.inf flat_next_costs = next_costs.flatten() best_costs_indices = argpartition( flat_next_costs.flatten(), beam_size)[:beam_size] # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] new_beam_gen = [[] for i in range(beam_size)] new_costs = numpy.zeros(beam_size) new_prev_hd = numpy.zeros((beam_size, self.qdim), dtype="float32") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_beam_gen[i] = beam_gen[orig_idx] + [next_word] new_costs[i] = next_cost new_prev_hd[i] = hd[orig_idx] # Save the previous hidden states prev_hd = new_prev_hd beam_gen = new_beam_gen costs = new_costs for i in range(beam_size): # We finished sampling? if beam_gen[i][-1] == self.eos_sym: if verbose: logger.debug("Adding sentence {} from beam {}".format(new_beam_gen[i], i)) # Add without start and end-of-sentence fin_beam_gen.append(beam_gen[i]) if normalize_by_length: costs[i] /= len(beam_gen[i]) fin_beam_costs.append(costs[i]) # If we have not sampled anything # then force include stuff if len(fin_beam_gen) == 0: fin_beam_gen = beam_gen if normalize_by_length: costs = [costs[i]/len(beam_gen[i]) for i in range(len(beam_gen))] fin_beam_costs = costs # Here we could have more than beam_size samples. # This is because we allow to sample beam_size terms # even if one sentence in the beam has been terminated </s> fin_beam_gen = numpy.array(fin_beam_gen)[numpy.argsort(fin_beam_costs)] fin_beam_costs = numpy.array(sorted(fin_beam_costs)) return fin_beam_gen[:beam_size], fin_beam_costs[:beam_size]
def search(self, context, beam_size=1, ignore_unk=False, \ min_length=1, max_length=100, normalize_by_length=True, verbose=False): if not self.compiled: self.compile() # Convert to column vector context = numpy.array(context, dtype='int32')[:, None] prev_hd = numpy.zeros((beam_size, self.qdim), dtype='float32') prev_hs = numpy.zeros((beam_size, self.sdim), dtype='float32') # Compute the context encoding and get # the last hierarchical state h, hs = self.compute_encoding(context) prev_hs[:] = hs[-1] fin_beam_gen = [] fin_beam_costs = [] fin_beam_ranks = [] beam_gen = [[] for i in range(beam_size)] costs = [0.0 for i in range(beam_size)] for k in range(max_length): if len(fin_beam_gen) >= beam_size: break if verbose: logger.info("Beam search at step %d" % k) prev_words = (numpy.array(map(lambda bg: bg[-1], beam_gen)) if k > 0 else numpy.zeros(beam_size, dtype="int32") + self.eoq_sym) outputs, hd = self.next_probs_predictor(prev_hs, prev_words, prev_hd) log_probs = numpy.log(outputs) # Adjust log probs according to search restrictions if ignore_unk: log_probs[:, self.unk_sym] = -numpy.inf if k <= min_length: log_probs[:, self.eoq_sym] = -numpy.inf next_costs = numpy.array(costs)[:, None] - log_probs # Pick only on the first line (for the beginning of sampling) # This will avoid duplicate <s> token. if k == 0: flat_next_costs = next_costs[:1, :].flatten() else: # Set the next cost to infinite for finished sentences (they will be replaced) # by other sentences in the beam indices = [ i for i, bg in enumerate(beam_gen) if bg[-1] == self.eoq_sym ] next_costs[indices, :] = numpy.inf flat_next_costs = next_costs.flatten() best_costs_indices = argpartition(flat_next_costs.flatten(), beam_size)[:beam_size] # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] new_beam_gen = [[] for i in range(beam_size)] new_costs = numpy.zeros(beam_size) new_prev_hd = numpy.zeros((beam_size, self.qdim), dtype="float32") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_beam_gen[i] = beam_gen[orig_idx] + [next_word] new_costs[i] = next_cost new_prev_hd[i] = hd[orig_idx] # Save the previous hidden states prev_hd = new_prev_hd beam_gen = new_beam_gen costs = new_costs for i in range(beam_size): # We finished sampling? if beam_gen[i][-1] == self.eoq_sym: if verbose: logger.debug("Adding sentence {} from beam {}".format( new_beam_gen[i], i)) new_session = numpy.vstack([ context, numpy.array(new_beam_gen[i], dtype='int32')[:, None] ]) ranks = self.rank_prediction(new_session, len(new_session)) fin_beam_ranks.append(numpy.ravel(ranks)[-1]) fin_beam_gen.append(beam_gen[i]) if normalize_by_length: fin_beam_costs.append(costs[i] / len(beam_gen[i])) # If we have not sampled anything # then force include stuff if len(fin_beam_gen) == 0: fin_beam_gen = beam_gen fin_beam_ranks = [0] fin_beam_costs = [ costs[i] / len(beam_gen[i]) for i in range(len(costs)) ] # Here we could have more than beam_size samples. # This is because we allow to sample beam_size terms # even if one sentence in the beam has been terminated </s> fin_beam_ranks = numpy.array(fin_beam_ranks)[numpy.argsort( fin_beam_costs)] fin_beam_gen = numpy.array(fin_beam_gen)[numpy.argsort(fin_beam_costs)] fin_beam_costs = numpy.array(sorted(fin_beam_costs)) return fin_beam_gen[: beam_size], fin_beam_costs[: beam_size], fin_beam_ranks[: beam_size]
def search(self, seq, n_samples=1, ignore_unk=False, minlen=1, normalize_by_length=True, session=False): # Make seq a column vector def _is_finished(beam_gen): if session and beam_gen[-1] == self.eos_sym: return True if not session and beam_gen[-1] == self.eoq_sym: return True return False seq = numpy.array(seq) if seq.ndim == 1: seq = numpy.array([seq], dtype='int32').T else: seq = seq.T assert seq.ndim == 2 h, hr, hs = self.compute_encoding(seq) # Initializing starting points with the last encoding of the sequence prev_words = numpy.zeros( (seq.shape[1], ), dtype='int32') + self.eoq_sym prev_hd = numpy.zeros((seq.shape[1], self.qdim), dtype='float32') prev_hs = numpy.zeros((seq.shape[1], self.sdim), dtype='float32') prev_hs[:] = hs[-1] fin_beam_gen = [] fin_beam_costs = [] fin_beam_ranks = [] beam_gen = [[]] costs = [0.0] max_step = 30 for k in range(max_step): logger.info("Beam search at step %d" % k) if n_samples == 0: break # prev_hd = prev_hd[:beam_size] # prev_hs = prev_hs[:beam_size] beam_size = len(beam_gen) prev_words = (numpy.array(map(lambda bg: bg[-1], beam_gen)) if k > 0 else numpy.zeros(1, dtype="int32") + self.eoq_sym) assert prev_hs.shape[0] == prev_hd.shape[0] assert prev_words.shape[0] == prev_hs.shape[0] repeat = numpy.repeat(seq, beam_size, axis=1) whole_context = numpy.vstack( [repeat, numpy.array(beam_gen, dtype='int32').T]) h, hr, hs = self.compute_encoding(whole_context) outputs, hd = self.next_probs_predictor(hs[-1], prev_words, prev_hd) log_probs = numpy.log(outputs) # Adjust log probs according to search restrictions if ignore_unk: log_probs[:, self.unk_sym] = -numpy.inf if k <= minlen: log_probs[:, self.eos_sym] = -numpy.inf log_probs[:, self.eoq_sym] = -numpy.inf # Artificially not reproduce same words for i in range(n_samples): if k > 0: log_probs[i, beam_gen[i][1:]] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition(flat_next_costs.flatten(), n_samples)[:n_samples] # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] # Form a beam for the next iteration new_beam_gen = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_prev_hs = numpy.zeros((n_samples, self.sdim), dtype="float32") new_prev_hs[:] = hs[-1] new_prev_hd = numpy.zeros((n_samples, self.qdim), dtype="float32") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_beam_gen[i] = beam_gen[orig_idx] + [next_word] new_costs[i] = next_cost new_prev_hd[i] = hd[orig_idx] beam_gen = [] costs = [] indices = [] for i in range(n_samples): # We finished sampling? if not _is_finished(new_beam_gen[i]): beam_gen.append(new_beam_gen[i]) costs.append(new_costs[i]) indices.append(i) else: n_samples -= 1 # Concatenate sequence and predict rank concat_seq = numpy.vstack( [seq, numpy.array([new_beam_gen[i]], dtype='int32').T]) ranks = self.rank_prediction(concat_seq) fin_beam_ranks.append(numpy.ravel(ranks)[-1]) fin_beam_gen.append(new_beam_gen[i]) if normalize_by_length: fin_beam_costs.append(new_costs[i] / len(new_beam_gen[i])) # Filter out the finished states prev_hd = new_prev_hd[indices] prev_hs = new_prev_hs[indices] fin_beam_gen = numpy.array(fin_beam_gen)[numpy.argsort(fin_beam_costs)] fin_beam_ranks = numpy.array(fin_beam_ranks)[numpy.argsort( fin_beam_costs)] fin_beam_costs = numpy.array(sorted(fin_beam_costs)) return fin_beam_gen, fin_beam_costs, fin_beam_ranks
def _search(self, seq_origin, n_samples, ignore_unk=False, minlen=1, debug=False, training=False): ## batch size 1 ''' seq = seq_origin seqs = numpy.array([seq]*n_samples) fin_trans = numpy.array([ [5, 6],[14, 15],]) #fin_trans = numpy.array([[5, 6]]) x,x_mask,y,y_mask,_,_ = prepare_reorderdata_minibatch(seqs,fin_trans) print x, y cost = self.enc_dec.fn_sent_cost(x,x_mask,y,y_mask) print 'prob',numpy.log(self.enc_dec.fn_prob(x,x_mask,y,y_mask)+1e-8) print 'static cost', cost print 'sent static cost', cost.sum(axis=0) print 'h_ ', self.enc_dec.fn_proj(x,x_mask,y,y_mask)[:,:,:5] print 'x_ ', self.enc_dec.fn_proj_x(x,x_mask,y,y_mask) ''' seq = numpy.array(seq_origin)[:, None] mask = numpy.ones(seq.shape, dtype=config.floatX) ## to calculate the h and c of encoder, its shape is (d,), where d=4*layers*dim h_, c_ = self.comp_repr_enc(seq, mask) ## h_.shape=(1,d) if debug: print "self.enc_dec.layers", self.enc_dec.layers ## wrapper hc into shape (1,1,d) new_h_ = numpy.tile(h_[0], (1, 1, 1)) new_c_ = numpy.tile(c_[0], (1, 1, 1)) fin_trans = [] fin_costs = [] trans = [[]] #*n_samples## it is the beam, 2d list costs = [0.0] for k in range(3 * len(seq)): if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t: t[-1], trans))[None, :] if k > 0 else numpy.zeros( (1, beam_size), dtype="int64")) if debug: print "last_words, k", last_words, k ## given h,c and the last words of trans in beam, to calculate the log_probs with shape(n,v) mask = numpy.ones((1, len(last_words[0])), dtype=config.floatX) if debug: print 'mask', mask print "new_h_.shape", new_h_.shape print "new_c_.shape", new_h_.shape print "last_words, k", last_words, k print self.enc_dec.decoder.dbg(last_words, mask, new_h_[0], new_c_[0], 0.0) log_probs, h_, c_, proj_x = self.comp_next_probs_hc(last_words,mask,new_h_[0],new_c_[0], 1.0) \ if k>0 else self.comp_next_probs_hc(last_words,mask,new_h_[0],new_c_[0], 0.0) log_probs = numpy.log(log_probs[0]) if debug: print 'new_h_[0,:,:5]', new_h_[0, :, :5] print 'h_[0,:,:5]', h_[0, :, :5] print 'proj_x', proj_x print 'log_probs', log_probs if k > 0 and 0: last_words[0][-1] = 5 print last_words log_probs_new, _, _, _ = self.comp_next_probs_hc( last_words, mask, new_h_, new_c_, 1.0) log_probs_new = numpy.log(log_probs_new[0]) print 'log_probs', log_probs_new if debug: print 'shape log_probs', log_probs.shape ## its shape is (n,v), v is the vocab size # Adjust log probs according to search restrictions if ignore_unk: log_probs[:, self.unk_id] = -numpy.inf # TODO: report me in the paper!!! if k < minlen: log_probs[:, self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs if debug: print "next_costs.shape", next_costs.shape #print next_costs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition(flat_next_costs.flatten(), n_samples)[:n_samples] # Decypher flatten indices voc_size = log_probs.shape[1] ## trans_indices is indicate the previous trans id. trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size if debug: print 'best_costs_indices', best_costs_indices print 'trans_indices', trans_indices print 'word_indices', word_indices costs = flat_next_costs[best_costs_indices] #print costs # Form a beam for the next iteration new_trans = [[]] * n_samples new_costs = numpy.zeros(n_samples) inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] new_costs[i] = next_cost inputs[i] = next_word #print 'new_costs', new_costs # Filter the sequences that end with end-of-sequence character trans = [] costs = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != self.eos_id: trans.append(new_trans[i]) costs.append(new_costs[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) if debug: print 'new_trans', new_trans print 'trans', trans print 'h_.shape', h_.shape print 'indices', indices print 'trans_indices', trans_indices pre_t_indices = trans_indices[indices] wrapper_fn = lambda x: (x[pre_t_indices])[None, :] #print 'pre_t_indices',pre_t_indices #print 'h_[0,:,:5]',h_[0,:,:5] new_h_ = wrapper_fn(h_[0]) new_c_ = wrapper_fn(c_[0]) if debug: print 'new_h_.shape', new_h_.shape if k == 1: pass #break if debug: print 'fin_trans', fin_trans # Dirty tricks to obtain any translation if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(seq, n_samples, False, minlen) elif n_samples < 4 and not training: logger.warning( "Still no translations: try beam size {}".format( n_samples * 2)) print 'seq is empty?', seq_origin return self._search(seq_origin, n_samples * 2, False, minlen) else: logger.warning("Translation failed: cannot end with EOS") if training: return [[]], [0.0], "NO TRANS" else: ## testing output the partial translation and add eos to the end heuristically logger.warning( "Translation failed: cannot end with EOS, but output the trans in the beam" ) for x in trans: x.append(self.eos_id) fin_trans = numpy.array(trans)[numpy.argsort( costs)][:self.beamsize] fin_costs = numpy.array(sorted(costs)[:self.beamsize]) best_trans = fin_trans[0][:-1] if self.enc_dec.reverse_trg: best_trans = best_trans[::-1] best_trans = self.to_words(best_trans, self.t_index2word) return list(fin_trans), fin_costs, best_trans fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) #''' # print 'fin_trans',fin_trans #print 'fin_costs',fin_costs seq = seq_origin seqs = numpy.array([seq] * len(fin_trans)) #print seqs.shape,seqs x, x_mask, y, y_mask, _, _ = prepare_reorderdata_minibatch( seqs, fin_trans) #print "x", x, x_mask #print "y", y, y_mask cost = self.enc_dec.fn_sent_cost(x, x_mask, y, y_mask) #print 'static cost', cost #print 'sent static cost', cost.sum(axis=0) #''' #print 'trans id', fin_trans[0][:-1] #print "dict", self.t_index2word best_trans = fin_trans[0][:-1] if self.enc_dec.reverse_trg: best_trans = best_trans[::-1] best_trans = self.to_words(best_trans, self.t_index2word) return list(fin_trans), fin_costs, best_trans
def search(self, seq, n_samples=1, ignore_unk=False, minlen=1, normalize_by_length=True): # Make seq a column vector seq = numpy.array(seq) if seq.ndim == 1: seq = numpy.array([seq], dtype='int32').T else: seq = seq.T assert seq.ndim == 2 h, hr, hs = self.compute_encoding(seq) # Initializing starting points with the last encoding of the sequence prev_words = numpy.zeros((seq.shape[1],), dtype='int32') + self.eoq_sym prev_hd = numpy.zeros((seq.shape[1], self.qdim), dtype='float32') prev_hs = numpy.zeros((seq.shape[1], self.sdim), dtype='float32') prev_hs[:] = hs[-1] fin_beam_gen = [] fin_beam_costs = [] fin_beam_ranks = [] beam_gen = [[]] costs = [0.0] max_step = 50 for k in range(max_step): logger.info("Beam search at step %d" % k) if n_samples == 0: break beam_size = len(beam_gen) prev_words = (numpy.array(map(lambda bg : bg[-1], beam_gen)) if k > 0 else numpy.zeros(1, dtype="int32") + self.eoq_sym) assert prev_hs.shape[0] == prev_hd.shape[0] assert prev_words.shape[0] == prev_hs.shape[0] repeat = numpy.repeat(seq, beam_size, axis=1) whole_context = numpy.vstack([repeat, numpy.array(beam_gen,dtype='int32').T]) h, hr, hs = self.compute_encoding(whole_context) outputs, hd = self.next_probs_predictor(hs[-1], prev_words, prev_hd) log_probs = numpy.log(outputs) # Adjust log probs according to search restrictions if ignore_unk: log_probs[:, self.unk_sym] = -numpy.inf if k <= minlen: log_probs[:, self.eos_sym] = -numpy.inf log_probs[:, self.eoq_sym] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition( flat_next_costs.flatten(), n_samples)[:n_samples] # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] # Form a beam for the next iteration new_beam_gen = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_prev_hs = numpy.zeros((n_samples, self.sdim), dtype="float32") new_prev_hs[:] = hs[-1] new_prev_hd = numpy.zeros((n_samples, self.qdim), dtype="float32") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_beam_gen[i] = beam_gen[orig_idx] + [next_word] new_costs[i] = next_cost new_prev_hd[i] = hd[orig_idx] beam_gen = [] costs = [] indices = [] for i in range(n_samples): # We finished sampling? if new_beam_gen[i][-1] != self.eos_sym: beam_gen.append(new_beam_gen[i]) costs.append(new_costs[i]) indices.append(i) else: n_samples -= 1 # Concatenate sequence and predict rank concat_seq = numpy.vstack([seq, numpy.array([new_beam_gen[i]], dtype='int32').T]) ranks = self.rank_prediction(concat_seq) fin_beam_ranks.append(numpy.ravel(ranks)[-1]) fin_beam_gen.append(new_beam_gen[i]) if normalize_by_length: fin_beam_costs.append(new_costs[i]/len(new_beam_gen[i])) # Filter out the finished states prev_hd = new_prev_hd[indices] prev_hs = new_prev_hs[indices] fin_beam_gen = numpy.array(fin_beam_gen)[numpy.argsort(fin_beam_costs)] fin_beam_ranks = numpy.array(fin_beam_ranks)[numpy.argsort(fin_beam_costs)] fin_beam_costs = numpy.array(sorted(fin_beam_costs)) return fin_beam_gen, fin_beam_costs, fin_beam_ranks
def search(self, seq, n_samples, ignore_unk=False, minlen=1): c = self.comp_repr(seq)[0] states = map(lambda x : x[None, :], self.comp_init_states(c)) dim = states[0].shape[1] num_levels = len(states) fin_trans = [] fin_costs = [] trans = [[]] costs = [0.0] for k in range(3 * len(seq)): if n_samples == 0: break # Compute probabilities of the next words for # all the elements of the beam. beam_size = len(trans) last_words = (numpy.array(map(lambda t : t[-1], trans)) if k > 0 else numpy.zeros(beam_size, dtype="int64")) log_probs = numpy.log(self.comp_next_probs(c, k, last_words, *states)[0]) # Adjust log probs according to search restrictions if ignore_unk: log_probs[:,self.unk_id] = -numpy.inf # TODO: report me in the paper!!! if k < minlen: log_probs[:,self.eos_id] = -numpy.inf # Find the best options by calling argpartition of flatten array next_costs = numpy.array(costs)[:, None] - log_probs flat_next_costs = next_costs.flatten() best_costs_indices = argpartition( flat_next_costs.flatten(), n_samples)[:n_samples] # Decypher flatten indices voc_size = log_probs.shape[1] trans_indices = best_costs_indices / voc_size word_indices = best_costs_indices % voc_size costs = flat_next_costs[best_costs_indices] # Form a beam for the next iteration new_trans = [[]] * n_samples new_costs = numpy.zeros(n_samples) new_states = [numpy.zeros((n_samples, dim), dtype="float32") for level in range(num_levels)] inputs = numpy.zeros(n_samples, dtype="int64") for i, (orig_idx, next_word, next_cost) in enumerate( zip(trans_indices, word_indices, costs)): new_trans[i] = trans[orig_idx] + [next_word] new_costs[i] = next_cost for level in range(num_levels): new_states[level][i] = states[level][orig_idx] inputs[i] = next_word new_states = self.comp_next_states(c, k, inputs, *new_states) # Filter the sequences that end with end-of-sequence character trans = [] costs = [] indices = [] for i in range(n_samples): if new_trans[i][-1] != self.enc_dec.state['null_sym_target']: trans.append(new_trans[i]) costs.append(new_costs[i]) indices.append(i) else: n_samples -= 1 fin_trans.append(new_trans[i]) fin_costs.append(new_costs[i]) states = map(lambda x : x[indices], new_states) # Dirty tricks to obtain any translation if not len(fin_trans): if ignore_unk: logger.warning("Did not manage without UNK") return self.search(seq, n_samples, False, minlen) elif n_samples < 500: logger.warning("Still no translations: try beam size {}".format(n_samples * 2)) return self.search(seq, n_samples * 2, False, minlen) else: logger.error("Translation failed") fin_trans = numpy.array(fin_trans)[numpy.argsort(fin_costs)] fin_costs = numpy.array(sorted(fin_costs)) return fin_trans, fin_costs