def main(model, dictionary_target, source_fea, source_latex, saveto, wer_file, k=5): # load model model_options with open('%s.pkl' % model, 'rb') as f: options = pkl.load(f) # load source dictionary and invert worddicts = load_dict(dictionary_target) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.iteritems(): worddicts_r[vv] = kk valid, valid_uid_list = dataIterator_valid(source_fea, source_latex, worddicts, batch_size=1, maxlen=2000) trng = RandomStreams(1234) params = init_params(options) params = load_params(model, params) tparams = init_tparams(params) f_init, f_next = build_sampler(tparams, options, trng) fpp_sample = open(saveto, 'w') valid_count_idx = 0 print 'Decoding...' ud_epoch = 0 ud_epoch_start = time.time() for x, y in valid: for xx in x: print '%d : %s' % (valid_count_idx + 1, valid_uid_list[valid_count_idx]) xx_pad = numpy.zeros((xx.shape[0] + 1, xx.shape[1]), dtype='float32') xx_pad[:xx.shape[0], :] = xx stochastic = False sample, score = gen_sample(f_init, f_next, xx_pad[:, None, :], options, trng=trng, k=k, maxlen=1000, stochastic=stochastic, argmax=False) if stochastic: ss = sample else: score = score / numpy.array([len(s) for s in sample]) ss = sample[score.argmin()] fpp_sample.write(valid_uid_list[valid_count_idx]) valid_count_idx = valid_count_idx + 1 for vv in ss: if vv == 0: # <eol> break fpp_sample.write(' ' + worddicts_r[vv]) fpp_sample.write('\n') fpp_sample.close() ud_epoch = (time.time() - ud_epoch_start) / 60. print 'test set decode done, cost time ...', ud_epoch os.system('python compute-wer.py ' + saveto + ' ' + source_latex + ' ' + wer_file) fpp = open(wer_file) stuff = fpp.readlines() fpp.close() m = re.search('WER (.*)\n', stuff[0]) valid_per = 100. * float(m.group(1)) m = re.search('ExpRate (.*)\n', stuff[1]) valid_sacc = 100. * float(m.group(1)) print 'Valid WER: %.2f%%, ExpRate: %.2f%%' % (valid_per, valid_sacc)
def main(model, bn_model, dictionary_target, fea, latex, saveto, output, k=5): # load model model_options with open('%s.pkl' % model, 'rb') as f: options = pkl.load(f) # load source dictionary and invert worddicts = load_dict(dictionary_target) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.iteritems(): worddicts_r[vv] = kk valid, valid_uid_list = dataIterator(fea, latex, worddicts, batch_size=1, batch_Imagesize=500000, maxlen=500, maxImagesize=500000) trng = RandomStreams(1234) use_noise = theano.shared(numpy.float32(0.)) # allocate model parameters params = init_params(options) bn_params = init_bn_params(options) # load model parameters and set theano shared variables params = load_params(model, params) bn_params = load_params(bn_model, bn_params) tparams = init_tparams(params) bn_tparams = init_tparams(bn_params) f_init, f_next = build_sampler(tparams, bn_tparams, options, trng, use_noise) use_noise.set_value(0.) fpp_sample = open(saveto, 'w') valid_count_idx = 0 # FIXME: random selection? print 'Decoding ... ' for x, y in valid: for xx in x: print '%d : %s' % (valid_count_idx + 1, valid_uid_list[valid_count_idx]) xx_pad = numpy.zeros( (xx.shape[0], xx.shape[1], xx.shape[2]), dtype='float32') # input_channels * height * width xx_pad[:, :, :] = xx / 255. stochastic = False sample, score = gen_sample(f_init, f_next, xx_pad[None, :, :, :], options, trng=trng, k=10, maxlen=1000, stochastic=stochastic, argmax=False) if stochastic: ss = sample else: score = score / numpy.array([len(s) for s in sample]) ss = sample[score.argmin()] fpp_sample.write(valid_uid_list[valid_count_idx]) valid_count_idx = valid_count_idx + 1 for vv in ss: if vv == 0: # <eol> break fpp_sample.write(' ' + worddicts_r[vv]) fpp_sample.write('\n') fpp_sample.close() print 'test set decode done' os.system('python compute-wer.py ' + saveto + ' ' + latex + ' ' + output) fpp = open(output) # %WER 31.63 stuff = fpp.readlines() fpp.close() m = re.search('WER (.*)\n', stuff[0]) valid_per = 100. * float(m.group(1)) m = re.search('ExpRate (.*)\n', stuff[1]) valid_sacc = 100. * float(m.group(1)) print 'Valid WER: %.2f%%, ExpRate: %.2f%%' % (valid_per, valid_sacc)
def main(model_files, dictionary_target, grammar_target, data_path, saveto, wer_file, k=5): # load source dictionary and invert worddicts = load_dict(dictionary_target) worddicts_r = [None] * len(worddicts) for kk, vv in worddicts.items(): worddicts_r[vv] = kk grammar=compileGrammar(loadGrammar(grammar_target,worddicts)) trng = RandomStreams(1234) models=[] # load model model_options for model_file in model_files: print('Loading model: %s' % model_file) with open('%s.pkl' % model_file, 'rb') as f: options = pkl.load(f) print(options) params = init_params(options) params = load_params(model_file, params) tparams = init_tparams(params) f_init, f_next = build_sampler(tparams, options, trng) models.append((f_init,f_next,options,0.8)) for lm_file in []: print('Loading language model: %s' % lm_file) f_init,f_next,options=load_language_model(lm_file) models.append((f_init,f_next,options,0.2)) valid,valid_uid_list = dataIterator_valid(data_path, worddicts, batch_size=1, maxlen=250) fpp_sample=[open('%s.%d'%(saveto,beam),'w') for beam in range(k)] valid_count_idx=0 print('Decoding...') ud_epoch = 0 ud_epoch_start = time.time() for x,y in valid: for xx in x: print('%d : %s' % (valid_count_idx+1, valid_uid_list[valid_count_idx])) xx_pad = numpy.zeros((xx.shape[0]+1,xx.shape[1]), dtype='float32') xx_pad[:xx.shape[0],:] = xx stochastic = False sample, score = gen_sample(models, xx_pad[:, None, :], grammar, trng=trng, k=k, maxlen=250, dictlen=len(worddicts), stochastic=stochastic, argmax=False) score = score / numpy.array([len(s) for s in sample]) sample_rank=numpy.argsort(score) for beam in range(k): fpp_sample[beam].write(valid_uid_list[valid_count_idx]) if len(sample)>beam: ss=sample[sample_rank[beam]] else: ss=[0] for vv in ss: if vv == 0: # <eol> break fpp_sample[beam].write(' '+worddicts_r[vv]) fpp_sample[beam].write('\n') valid_count_idx=valid_count_idx+1 ud_epoch = (time.time() - ud_epoch_start) print 'test set decode done, cost time ...', ud_epoch for beam in range(k): fpp_sample[beam].flush(); fpp_sample[beam].close(); os.system('python compute-wer.py %s.%d %s %s'%(saveto,beam,os.path.join(data_path,"caption.txt"),wer_file)) fpp=open(wer_file) stuff=fpp.readlines() fpp.close() m=re.search('WER (.*)\n',stuff[0]) valid_per=100. * float(m.group(1)) m=re.search('ExpRate (.*)\n',stuff[1]) valid_sacc=100. * float(m.group(1)) print '%d Valid WER: %.2f%%, ExpRate: %.2f%%' % (beam,valid_per,valid_sacc)