def translate_model(jobqueue, resultqueue, model, options, k, normalize, build_sampler, gen_sample, init_params, model_id, silent): from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams trng = RandomStreams(1234) # allocate model parameters params = init_params(options) # load model parameters and set theano shared variables params = load_params(model, params) tparams = init_tparams(params) # word index use_noise = theano.shared(numpy.float32(0.)) f_init, f_next = build_sampler(tparams, options, trng, use_noise) def _translate(seq): use_noise.set_value(0.) # sample given an input sequence and obtain scores # NOTE : if seq length too small, do something about it # beam size is 5 by default sample, score = gen_sample(tparams, f_init, f_next, numpy.array(seq).reshape([len(seq), 1]), options, trng=trng, k=k, maxlen=500, stochastic=False, argmax=False) # normalize scores according to sequence lengths if normalize: lengths = numpy.array([len(s) for s in sample]) score = score / lengths sidx = numpy.argmin(score) return sample[sidx] while jobqueue: req = jobqueue.pop(0) idx, x = req[0], req[1] if not silent: print "sentence", idx, model_id seq = _translate(x) #print 'Seq', seq, 'Score:', score resultqueue.append((idx, seq)) return
def translate_model(queue, rqueue, pid, model, options, k, normalize): from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams trng = RandomStreams(1234) # allocate model parameters params = init_params(options) # load model parameters and set theano shared variables params = load_params(model, params) tparams = init_tparams(params) # word index use_noise = theano.shared(numpy.float32(0.)) f_init, f_next = build_sampler(tparams, options, trng, use_noise) def _translate(seq): use_noise.set_value(0.) # sample given an input sequence and obtain scores sample, score = gen_sample(tparams, f_init, f_next, numpy.array(seq).reshape([len(seq), 1]), options, trng=trng, k=k, maxlen=500, stochastic=False, argmax=False) # normalize scores according to sequence lengths if normalize: lengths = numpy.array([len(s) for s in sample]) score = score / lengths sidx = numpy.argmin(score) return sample[sidx] while True: req = queue.get() if req is None: break idx, x = req[0], req[1] print pid, '-', idx seq = _translate(x) rqueue.put((idx, seq)) return
def main(model, src_dict, target_dict, source_file, target_file, saveto, source_word_level=1, target_word_level=0, valid_batch_size=128, n_words_src=302, n_words=302): from char_base import (init_params, build_model, build_sampler) from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams from nmt import (pred_probs, prepare_data) # load model model_options pkl_file = model.split('.')[0] + '.pkl' with open(pkl_file, 'rb') as f: options = pkl.load(f) trng = RandomStreams(1234) # allocate model parameters params = init_params(options) # load model parameters and set theano shared variables params = load_params(model, params) # create shared variables for parameters tparams = init_tparams(params) trng, use_noise, \ x, x_mask, y, y_mask, \ opt_ret, \ cost = \ build_model(tparams, options) inps = [x, x_mask, y, y_mask] print 'Building sampler...\n', f_init, f_next = build_sampler(tparams, options, trng, use_noise) print 'Done' # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost) print 'Done' print('Preparing dataset...') dataset = TextIterator(source=source_file, target=target_file, source_dict=src_dict, target_dict=target_dict, n_words_source=n_words_src, n_words_target=n_words, source_word_level=source_word_level, target_word_level=target_word_level, batch_size=valid_batch_size, sort_size=sort_size) print('Predicting probs...') log_probs = pred_probs(f_log_probs, prepare_data, options, dataset, verboseFreq=10000) print('Done...') output_file = open(saveto, 'w') pwd_cnt = 0 for line in open(target_file): output_file.writelines(line.rstrip() + '\t' + str(1.0 / (math.e**log_probs[pwd_cnt])) + '\n') pwd_cnt += 1 """ for prob in log_probs: output_file.writelines(str(prob) + '\n') """ output_file.flush() output_file.close() print('Evaluation finished...')