def main(): arg_parser = argparse.ArgumentParser( description="Neural Machine Translation Testing") arg_parser.add_argument("--model_file", required=True, help="Model File") arg_parser.add_argument("--valid_data", required=True, nargs="+", help="Validation_data") args = arg_parser.parse_args() args = vars(args) print(args) model = Seq2Seq.load(args["model_file"]) print(model) model.device = "cpu" tr_dev_dataset_fn, en_dev_dataset_fn = args["valid_data"] tr_valid_data = read_text(tr_dev_dataset_fn) en_valid_data = read_text(en_dev_dataset_fn) valid_data = list(zip(tr_valid_data, en_valid_data)) src_valid, tgt_valid = add_start_end_tokens(valid_data) hypotheses = beam_search(model, src_valid, beam_size=3, max_decoding_time_step=70) top_hypotheses = [hyps[0] for hyps in hypotheses] bleu_score = compute_corpus_level_bleu_score(tgt_valid, top_hypotheses) print('Corpus BLEU: {}'.format(bleu_score * 100))
def main(): mask_meta = '<m>' word_to_idx = pkl.load(open('word_to_idx.pkl', 'rb')) model = models.pointer_transformer(alphabet_size=len(word_to_idx), blocks=args.blocks, num_heads=args.heads, dim=args.model_dim, dropout=None, mask_meta=word_to_idx[mask_meta]) model.load_weights(args.weights) letters = list(args.letters) letter_inds = np.array([word_to_idx[c] for c in letters]).reshape((1, -1)) utils.beam_search(letter_inds, model.predict, word_to_idx, beam_length=args.beam_size, max_print=args.max_print)
def beam_search_(args): head, tail = args graph = forward_attn_graph if head < tail else backward_attn_graph return beam_search(head, tail, graph=graph, n_beams=n_beams, alpha=alpha, max_length=max_length, min_length=min_length, num_return_paths=num_return_paths, aggregate_method=aggregate_method)
def eval(self, vocab, valid_dataset): encoder_input, decoder_target = next(iter(valid_dataset)) encoder_input_sum = tf.expand_dims(encoder_input[0, :], 0) greedy_summary = greedy_search(encoder_input_sum, self, vocab) beam_summaries = beam_search(encoder_input_sum, self, vocab, beam_size=4, n_keep=4) target_summary = [d for d in decoder_target.numpy()[0] if d != 0] print("Greedy search:" + vocab.decode_seq(greedy_summary)) print( "Top 4 beam search: \n", "\n".join( [vocab.decode_seq(summary[1]) for summary in beam_summaries])) print("Target:" + vocab.decode_seq(target_summary))
def score(self, data, phn_hiddens, txt_hiddens, wrds, trans_file, acc_file=None): # print (phn_hiddens.shape) # print (txt_hiddens.shape) sim_values, sim_wrds = getNN(self.top_NN, phn_hiddens, txt_hiddens, wrds) # indices = [] # for sim_w_utt, w in zip(sim_wrds, data.wrds): # for i, sim_w in enumerate(sim_w_utt): # if sim_w == w or i == len(sim_w_utt)-1: # indices.append(i) # break # print (indices) utt_lens = [len(u) for u in data.wrd_meta] # print (sum(utt_lens), len(sim_values), len(sim_wrds)) start = 0 sim_value_utts = [] sim_wrd_utts = [] for l in utt_lens: sim_value_utts.append(sim_values[start:start + l]) sim_wrd_utts.append(sim_wrds[start:start + l]) start += l # sim_value_utts = [sim_values[:utt_lens[0]]] # sim_word_utts = [sim_words[:utt_lens[0]]] trans = beam_search(sim_value_utts, sim_wrd_utts, data.LM, self.width, self.weight_LM, trans_file) acc = 0 for w1, w2 in zip(trans, data.wrds): if w1 == w2: acc += 1 print(acc / len(trans), acc, len(trans)) if acc_file: with open(acc_file, 'a') as f: f.write(str(acc / len(trans)) + '\n') return
def translate(): if request.method == "GET": return render_template("index.html") elif request.method == "POST": args = request.form print(args) text_input = args["textarea"] print("Input: ", text_input) tokenized_sent = tokenizer.tokenize(text_input) print("Tokenized input: ", tokenized_sent) with open(VOCAB_FILE, "rb") as f: vocabs = pickle.load(f) model = Seq2Seq.load(MODEL_PATH) model.device = "cpu" hypothesis = beam_search(model, [tokenized_sent], beam_size=3, max_decoding_time_step=70)[0] print("Hypothesis") print(hypothesis) for i in range(3): new_target = [['<sos>'] + hypothesis[i].value + ['<eos>']] a_ts = generate_attention_map(model, vocabs, [tokenized_sent], new_target) save_attention(tokenized_sent, hypothesis[i].value, [ a[0].detach().cpu().numpy() for a in a_ts[:len(hypothesis[i].value)] ], save_path="static/list_{}.png".format(i)) result_hypothesis = [] for idx, hyp in enumerate(hypothesis): result_hypothesis.append((idx, " ".join(hyp.value))) return render_template("index.html", hypothesis=result_hypothesis, sentence=text_input)
def predict_on_model(model, batch_data, device, id_to_word_func, right_space, model_rerank, rank_k): batch_cnt = len(batch_data) answer = [] cnt = 0 for bnum, batch in enumerate(batch_data): batch = [x.to(device) if x is not None else x for x in batch] bat_context = batch[0] bat_answer_range = batch[-1] # forward batch_input = batch[:len(batch) - 1] tmp_ans_prop, tmp_ans_range, _ = model.forward(*batch_input) if model_rerank is not None: cand_ans_range = beam_search(tmp_ans_prop, k=rank_k) context = batch_input[0] question = batch_input[1] cand_score, tmp_ans_range = model_rerank(context, question, cand_ans_range) tmp_context_ans = zip(bat_context.cpu().data.numpy(), tmp_ans_range.cpu().data.numpy()) # generate initial answer text i = 0 for c, a in tmp_context_ans: cur_no = cnt + i tmp_ans = id_to_word_func(c[a[0]:(a[1] + 1)]) cur_space = right_space[cur_no][a[0]:(a[1] + 1)] cur_ans = '' for j, word in enumerate(tmp_ans): cur_ans += word if cur_space[j]: cur_ans += ' ' answer.append(cur_ans.strip()) i += 1 cnt += i logging.info('batch=%d/%d' % (bnum, batch_cnt)) return answer
def decode(model, src_sent_list, beam_size, max_decoding_time_step, device): was_training = model.training model.eval() hypotheses = [] with torch.no_grad(): for src_sent in src_sent_list: example_hyps = utils.beam_search( model, src_sent, beam_size=beam_size, max_decoding_time_step=max_decoding_time_step, device=device) # example_hyps = beam(model, src_sent, beam_size, max_decoding_time_step, 2, device) hypotheses.append(example_hyps) # print(hypotheses) if was_training: model.train(was_training) return hypotheses
def gen_sample( tparams, f_inits, f_nexts, # list of functions to generate outputs inps, options, trng=None, k=1, max_label_len=10, stochastic=True, argmax=False): assert len(f_inits) == len(f_nexts) if len(inps) == 2 and len(f_nexts) == 1: x, x_mask = inps f_init = f_inits[0] f_next = f_nexts[0] else: raise ValueError('The number of input variables should be equal to ' 'the number of items in `f_nexts` multiplied by 2') assert max_label_len > 0 # k is the beam size we have assert k >= 1 fixed_length = False if 'label_type' in options and options['label_type'] == 'binary': fixed_length = True live_k = 1 solutions_ds = [('num_samples', 0), ('samples', []), ('alignments', []), ('scores', [])] hypotheses_ds = [ ('num_samples', live_k), ('samples', [[]] * live_k), ('alignments', [[]] * live_k), ('scores', numpy.zeros(live_k).astype('float32')), ] solutions = OrderedDict(solutions_ds) hypotheses = OrderedDict(hypotheses_ds) def _check_stop_condition(solutions, hypotheses, k): return solutions['num_samples'] >= k or hypotheses['num_samples'] < 1 # get initial state of decoder rnn and encoder context # ctx0 is 3d tensor of hidden states for the input sentence # next_state is a summary of hidden states for the input setence # ctx0: (# src words x # sentence (i.e., 1) x # hid dim) # next_state: (# sentences (i.e., 1) x # hid dim of the target setence) encoder_outs = f_init(*inps) next_label_state, ctx0 = encoder_outs[0], encoder_outs[1] next_l = -1 * numpy.ones((1, )).astype('int64') # bos indicator l_history = -1 * numpy.ones((1, 1)).astype('int64') for ii in xrange(max_label_len): live_k = hypotheses['num_samples'] # NOTE `hyp_samples` is initailized by a list with a single empty list # repeat the contexts the number of hypotheses # (corresponding to the number of setences) # (# src words x 1 x hid dim) -> (# src words x # next_hyp x hid dim) ctx = numpy.tile(ctx0, [1, live_k, 1]) x_mask_ = numpy.tile(x_mask, [1, live_k]) # inputs to sample word candidates lsamp_inps = [x_mask_, next_l, ctx, next_label_state] # generate a word for the given last hidden states # and previously generated words lsamp_outs = f_next(*lsamp_inps) next_p = lsamp_outs[0] # next_label_sample = wsamp_outs[1] XXX Not used next_label_state = lsamp_outs[2] next_alphas = lsamp_outs[3] # preparation of inputs to beam search # XXX change next_word_state: (# layers x # samples x hid dim) # -> (# samples x hid dim x # layers) next_label_state = next_label_state.transpose([1, 2, 0]) beam_state = [next_label_state, next_p, next_alphas] # perform beam search to generate most probable word sequence # with limited budget. solutions, hypotheses = \ beam_search(solutions, hypotheses, beam_state, decode_char=False, k=k, fixed_length=fixed_length) if _check_stop_condition(solutions, hypotheses, k): break # get the last single word for each hypothesis next_l = numpy.array([w[-1] for w in hypotheses['samples']]) l_history = numpy.array(hypotheses['samples']) next_label_state = numpy.array(hypotheses['states']) next_label_state = next_label_state.transpose([2, 0, 1]) # dump every remaining one if hypotheses['num_samples'] > 0: for idx in xrange(hypotheses['num_samples']): solutions['samples'].append(hypotheses['samples'][idx]) solutions['scores'].append(hypotheses['scores'][idx]) solutions['alignments'].append(hypotheses['alignments'][idx]) return solutions