grade = T.grad(cost, params) # add step clipping if config['step_clipping'] > 0.: grad = step_clipping(params, grade, config['step_clipping']) updates = adadelta(params, grade) logger.info('begin to build translation model : tr_fn') tr_fn = theano.function([source, source_mask, target, target_mask], [cost], updates=updates) logger.info('end build translation model : tr_fn') logger.info('begin to build sample model : f_init, f_next') f_init, f_next = trans.build_sample() logger.info('end build sample model : f_init, f_next') src_vocab = pickle.load(open(config['src_vocab'])) trg_vocab = pickle.load(open(config['trg_vocab'])) src_vocab = ensure_special_tokens(src_vocab, bos_idx=0, eos_idx=config['src_vocab_size'] - 1, unk_idx=config['unk_id']) trg_vocab = ensure_special_tokens(trg_vocab, bos_idx=0, eos_idx=config['src_vocab_size'] - 1, unk_idx=config['unk_id']) trg_vocab_reverse = {index: word for word, index in trg_vocab.iteritems()} src_vocab_reverse = {index: word for word, index in src_vocab.iteritems()} logger.info('load dict finished ! src dic size : {} trg dic size : {}.'.format(len(src_vocab), len(trg_vocab))) tr_stream = get_tr_stream(**config)
if config['step_clipping'] > 0.: grade = step_clipping(params, grade, config['step_clipping']) updates = adadelta(params, grade) log('Done') log('Build translation model tr_fn ... ', nl=False) # by using adadelta, we update parameters, gradients and deltax if config['use_mv']: inps = [source, source_mask, target, target_mask, v_part, v_true] else: inps = [source, source_mask, target, target_mask] tr_fn = theano.function(inps, [cost, log_norm], updates=updates) log('Done') log('Build sample model f_init f_nh f_na f_ns f_mo f_ws f_ps f_p ... ', nl=False) fs = trans.build_sample() log('Done') k_batch_start_sample = config['k_batch_start_sample'] batch_size, sample_size = config['batch_size'], config['hook_samples'] if batch_size < sample_size: log('Batch size must be great or equal with sample size') sys.exit(0) batch_start_sample = np.random.randint( 2, k_batch_start_sample) # [low, high) log('will randomly generate {} sample at {}th batch'.format( sample_size, batch_start_sample)) batch_count, sent_count, val_time, best_score = 0, 0, 0, 0. model_name = ''