def beamsearch(model, seq, mask=None, beamsize=10, normalize=False, maxlen=None, minlen=None, dtype=None): size = beamsize dtype = dtype or theano.config.floatX # get vocabulary from the first model vocab = model.option["vocabulary"][1][1] eosid = model.option["eosid"] bosid = model.option["bosid"] if maxlen == None: maxlen = seq.shape[0] * 3 if minlen == None: minlen = seq.shape[0] / 2 # encoding source if mask is None: mask = numpy.ones(seq.shape, dtype) annotation, states, mapped_annot = model.encode(seq, mask) initial_beam = beam(size) # bosid must be 0 initial_beam.candidate = [[bosid]] initial_beam.score = numpy.zeros([1], dtype) hypo_list = [] beam_list = [initial_beam] cond = lambda x: x[-1] == eosid for k in range(maxlen): # get previous results prev_beam = beam_list[-1] candidate = prev_beam.candidate num = len(candidate) last_words = numpy.array(map(lambda t: t[-1], candidate), "int32") # compute context first, then compute word distribution batch_mask = numpy.repeat(mask, num, 1) batch_annot = numpy.repeat(annotation, num, 1) batch_mannot = numpy.repeat(mapped_annot, num, 1) outputs = model.predict(last_words, states, batch_annot, batch_mannot, batch_mask) prob_dists, contexts, alpha = outputs logprobs = numpy.log(prob_dists) # do not generate eos symbol if k < minlen: logprobs[:, eosid] = -numpy.inf # force to add eos symbol if k == maxlen - 1: # copy eosprob = logprobs[:, eosid].copy() logprobs[:, :] = -numpy.inf logprobs[:, eosid] = eosprob next_beam = beam(size) outputs = next_beam.prune(logprobs, cond, prev_beam) # translation complete hypo_list.extend(outputs[0]) batch_indices, word_indices = outputs[1:] size -= len(outputs[0]) if size == 0: break # generate next state candidate = next_beam.candidate num = len(candidate) last_words = numpy.array(map(lambda t: t[-1], candidate), "int32") states = select_nbest(states, batch_indices) contexts = select_nbest(contexts, batch_indices) states = model.generate(last_words, states, contexts) beam_list.append(next_beam) # postprocessing if len(hypo_list) == 0: score_list = [0.0] hypo_list = [[eosid]] else: score_list = [item[1] for item in hypo_list] # exclude bos symbol hypo_list = [item[0][1:] for item in hypo_list] for i, (trans, score) in enumerate(zip(hypo_list, score_list)): count = len(trans) if count > 0: if normalize: score_list[i] = score / count else: score_list[i] = score # sort hypo_list = numpy.array(hypo_list)[numpy.argsort(score_list)] score_list = numpy.array(sorted(score_list)) output = [] for trans, score in zip(hypo_list, score_list): trans = map(lambda x: vocab[x], trans) output.append((trans, score)) return output
def beamsearch(models, seq, mask=None, beamsize=10, normalize=False, maxlen=None, minlen=None, arithmetic=False, dtype=None): dtype = dtype or theano.config.floatX if not isinstance(models, (list, tuple)): models = [models] num_models = len(models) # get vocabulary from the first model option = models[0].option vocab = option["vocabulary"][1][1] eosid = option["eosid"] bosid = option["bosid"] if maxlen is None: maxlen = seq.shape[0] * 3 if minlen is None: minlen = seq.shape[0] / 2 # encoding source if mask is None: mask = numpy.ones(seq.shape, dtype) outputs = [model.encode(seq, mask) for model in models] states = [item[0] for item in outputs] annotations0 = [item[1] for item in outputs] annotations1 = [item[2] for item in outputs] mapped_annots0 = [item[3] for item in outputs] mapped_annots1 = [item[4] for item in outputs] soft_masks = [item[5] for item in outputs] # sys.stderr.write("l-src={}\nl-soft-tgt={}\n".format(numpy.sum(mask), numpy.sum(soft_masks[0]))) # soft_prob = soft_probs[0] # soft_mask = soft_masks[0] # soft_y = numpy.argmax(soft_prob, 1) initial_beam = beam(beamsize) size = beamsize # bosid must be 0 initial_beam.candidates = [[bosid]] initial_beam.scores = numpy.zeros([1], dtype) hypo_list = [] beam_list = [initial_beam] done_predicate = lambda x: x[-1] == eosid for k in range(maxlen): # get previous results prev_beam = beam_list[-1] candidates = prev_beam.candidates num = len(candidates) last_words = numpy.array(map(lambda cand: cand[-1], candidates), "int32") # compute context first, then compute word distribution batch_mask = numpy.repeat(mask, num, 1) batch_annots0 = map(numpy.repeat, annotations0, [num] * num_models, [1] * num_models) batch_annots1 = map(numpy.repeat, annotations1, [num] * num_models, [1] * num_models) batch_mannots0 = map(numpy.repeat, mapped_annots0, [num] * num_models, [1] * num_models) batch_mannots1 = map(numpy.repeat, mapped_annots1, [num] * num_models, [1] * num_models) batch_soft_mask = map(numpy.repeat, soft_masks, [num] * num_models, [1] * num_models) # predict returns [probs, context, alpha] outputs = [ model.predict(last_words, state, annot0, mannot0, batch_mask, annot1, mannot1, softmask) for model, state, annot0, annot1, mannot0, mannot1, softmask in zip(models, states, batch_annots0, batch_annots1, batch_mannots0, batch_mannots1, batch_soft_mask) ] prob_dists = [item[0] for item in outputs] # search nbest given word distribution if arithmetic: logprobs = numpy.log(sum(prob_dists) / num_models) else: # geometric mean logprobs = sum(numpy.log(prob_dists)) / num_models if k < minlen: logprobs[:, eosid] = -numpy.inf # make sure eos won't be selected # force to add eos symbol if k == maxlen - 1: # copy eosprob = logprobs[:, eosid].copy() logprobs[:, :] = -numpy.inf logprobs[:, eosid] = eosprob # make sure eos will be selected next_beam = beam(size) finished, remain_beam_indices = next_beam.prune( logprobs, done_predicate, prev_beam) hypo_list.extend(finished) # completed translation size -= len(finished) if size == 0: # reach k completed translation before maxlen break # generate next state candidates = next_beam.candidates num = len(candidates) last_words = numpy.array(map(lambda t: t[-1], candidates), "int32") if option["decoder"] == "GruSimple": contexts = [item[1] for item in outputs] states = select_nbest( states, remain_beam_indices ) # select corresponding states for each model contexts = select_nbest(contexts, remain_beam_indices) states = [ model.generate(last_words, state, context) for model, state, context in zip(models, states, contexts) ] elif option["decoder"] == "GruCond": states = [item[1] for item in outputs] states = select_nbest( states, remain_beam_indices ) # select corresponding states for each model beam_list.append(next_beam) # postprocessing if len(hypo_list) == 0: score_list = [0.0] hypo_list = [[eosid]] else: score_list = [item[1] for item in hypo_list] # exclude bos symbol hypo_list = [item[0][1:] for item in hypo_list] for i, (trans, score) in enumerate(zip(hypo_list, score_list)): count = len(trans) if count > 0: if normalize: score_list[i] = score / count else: score_list[i] = score # sort hypo_list = numpy.array(hypo_list)[numpy.argsort(score_list)] score_list = numpy.array(sorted(score_list)) output = [] for trans, score in zip(hypo_list, score_list): trans = map(lambda x: vocab[x], trans) output.append((trans, score)) return output
def beamsearch(model, seq, seqlen=None, beamsize=10, normalize=False, maxlen=None, minlen=None): size = beamsize vocabulary = model.option["vocabulary"] eos_symbol = model.option["eos"] hidden_size = model.option["hidden"] num_layers = model.option["num_layers"] encode = model.encode predict = model.predict vocab = vocabulary[1][1] eosid = vocabulary[1][0][eos_symbol] batch_dim = 0 time_dim = 1 if seqlen is None: seq_len = np.array([seq.shape[time_dim]]) else: seq_len = seqlen if maxlen is None: maxlen = seq_len[0] * 3 if minlen is None: minlen = seq_len[0] / 2 annotation, enc_attn_mask = encode(seq, seq_len) batch = annotation.shape[batch_dim] state = { "layer_%d" % i: { "key": np.zeros([batch, 0, hidden_size], "float32"), "value": np.zeros([batch, 0, hidden_size], "float32") } for i in range(num_layers) } initial_beam = beam(size) initial_beam.candidate = [[eosid]] initial_beam.alignment = [[-1]] initial_beam.score = np.zeros([1], "float32") hypo_list = [] beam_list = [initial_beam] for k in range(maxlen): if size == 0: break prev_beam = beam_list[-1] candidate = prev_beam.candidate num = len(prev_beam.candidate) #last_words = np.array(map(lambda t: t[-1], candidate), "int32") partial_translation = np.array(candidate, "int32") batch_annot = np.repeat(annotation, num, batch_dim) batch_mask = np.repeat(enc_attn_mask, num, batch_dim) prob_dist, state = predict(partial_translation, state, batch_annot, batch_mask) logprobs = np.log(prob_dist) # force to don't select eos if k < minlen: logprobs[:, eosid] = -np.inf # force to add eos symbol if k == maxlen - 1: # copy eosprob = logprobs[:, eosid].copy() logprobs[:, :] = -np.inf logprobs[:, eosid] = eosprob next_beam = beam(size) outputs = next_beam.prune(logprobs, lambda x: x[-1] == eosid, prev_beam) hypo_list.extend(outputs[0]) batch_indices, word_indices = outputs[1:] size -= len(outputs[0]) new_state = {} for key, value in state.items(): state_k = select_nbest(value["key"], batch_indices) state_v = select_nbest(value["value"], batch_indices) new_state[key] = {"key": state_k, "value": state_v} state = new_state beam_list.append(next_beam) # postprocessing if len(hypo_list) == 0: score_list = [0.0] hypo_list = [[eosid]] else: score_list = [item[1] for item in hypo_list] # exclude bos symbol hypo_list = [item[0][1:] for item in hypo_list] for i, (trans, score) in enumerate(zip(hypo_list, score_list)): count = len(trans) if count > 0: if normalize: length_penalty = (float(5. + count))**1.0 / (5. + 1.) score_list[i] = score / length_penalty else: score_list[i] = score # sort hypo_list = np.array(hypo_list)[np.argsort(score_list)] score_list = np.array(sorted(score_list)) output = [] for trans, score in zip(hypo_list, score_list): trans = map(lambda x: vocab[x], trans) output.append((trans, score)) return output
def beamsearch(model, seq, length=None, beamsize=10, normalize=False, maxlen=None, minlen=None): size = beamsize # get vocabulary from the first model vocab = model.option["vocabulary"][1][1] eosid = model.option["eosid"] bosid = model.option["bosid"] time_dim = 0 batch_dim = 1 if maxlen == None: maxlen = seq.shape[time_dim] * 3 if minlen == None: minlen = seq.shape[time_dim] / 2 if length is None: seq_len = np.array([seq.shape[time_dim]]) else: seq_len = length annotation, initial_state = model.encode(seq, seq_len) mapped_states = model.precompute(annotation) initial_beam = beam(size) # </s> initial_beam.candidate = [[bosid]] initial_beam.score = np.zeros([1], "float32") hypo_list = [] beam_list = [initial_beam] cond = lambda hypo: hypo[-1] == eosid state = initial_state for k in range(maxlen): prev_beam = beam_list[-1] candidate = prev_beam.candidate num = len(candidate) last_words = np.array(map(lambda t: t[-1], candidate), "int32") # prediction batch_seq_len = np.repeat(seq_len, num, 0) batch_annot = np.repeat(annotation, num, batch_dim) batch_mannot = np.repeat(mapped_states, num, batch_dim) alpha, context = model.align(state, batch_annot, batch_mannot, batch_seq_len) prob_dist = model.predict(last_words, state, context) # select nbest logprobs = np.log(prob_dist) if k < minlen: logprobs[:, eosid] = -np.inf # force to add eos symbol if k == maxlen - 1: # copy eosprob = logprobs[:, eosid].copy() logprobs[:, :] = -np.inf logprobs[:, eosid] = eosprob next_beam = beam(size) outputs = next_beam.prune(logprobs, cond, prev_beam) # translation complete hypo_list.extend(outputs[0]) batch_indices, word_indices = outputs[1:] size -= len(outputs[0]) if size == 0: break state = select_nbest(state, batch_indices) context = select_nbest(context, batch_indices) # generate next state candidate = next_beam.candidate num = len(candidate) current_words = np.array(map(lambda t: t[-1], candidate), "int32") state = model.generate(current_words, state, context) beam_list.append(next_beam) # postprocessing score_list = [item[1] for item in hypo_list] # remove BOS symbol hypo_list = [item[0][1:] for item in hypo_list] for i, (trans, score) in enumerate(zip(hypo_list, score_list)): count = len(trans) - 1 if count > 0: if normalize: score_list[i] = score / count else: score_list[i] = score hypo_list = np.array(hypo_list)[np.argsort(score_list)] score_list = np.array(sorted(score_list)) output = [] for trans, score in zip(hypo_list, score_list): trans = map(lambda x: vocab[x], trans) output.append((trans, score)) return output
def beamsearch(model, tvocab, itvocab, eos_symbol, seq, seqlen=None, beamsize=10, normalize=False, maxlen=None, minlen=None): size = beamsize encode = model.encode predict = model.predict eosid = tvocab[eos_symbol] time_dim = 0 batch_dim = 1 if seqlen is None: seq_len = np.array([seq.shape[time_dim]]) else: seq_len = seqlen if maxlen is None: maxlen = seq_len[0] * 3 if minlen is None: minlen = seq_len[0] / 2 annotation, mapped_states, initial_state, attn_mask = encode(seq, seq_len) state = initial_state initial_beam = beam(size) initial_beam.candidate = [[eosid]] initial_beam.score = np.zeros([1], "float32") hypo_list = [] beam_list = [initial_beam] for k in range(maxlen): if size == 0: break prev_beam = beam_list[-1] candidate = prev_beam.candidate num = len(prev_beam.candidate) last_words = np.array(map(lambda t: t[-1], candidate), "int32") batch_annot = np.repeat(annotation, num, batch_dim) batch_mannot = np.repeat(mapped_states, num, batch_dim) batch_mask = np.repeat(attn_mask, num, batch_dim) prob_dist, state, alpha = predict(last_words, state, batch_annot, batch_mannot, batch_mask) logprobs = np.log(prob_dist) if k < minlen: logprobs[:, eosid] = -np.inf # force to add eos symbol if k == maxlen - 1: # copy eosprob = logprobs[:, eosid].copy() logprobs[:, :] = -np.inf logprobs[:, eosid] = eosprob next_beam = beam(size) outputs = next_beam.prune(logprobs, lambda x: x[-1] == eosid, prev_beam) hypo_list.extend(outputs[0]) batch_indices, word_indices = outputs[1:] size -= len(outputs[0]) state = select_nbest(state, batch_indices) beam_list.append(next_beam) # postprocessing if len(hypo_list) == 0: score_list = [0.0] hypo_list = [[eosid]] else: score_list = [item[1] for item in hypo_list] # exclude bos symbol hypo_list = [item[0][1:] for item in hypo_list] for i, (trans, score) in enumerate(zip(hypo_list, score_list)): count = len(trans) if count > 0: if normalize: score_list[i] = score / count else: score_list[i] = score # sort hypo_list = np.array(hypo_list)[np.argsort(score_list)] score_list = np.array(sorted(score_list)) output = [] for trans, score in zip(hypo_list, score_list): trans = map(lambda x: itvocab[x], trans) output.append((trans, score)) return output