one_model = config['one_model'] log('Build lookup table, bi-directional encoder and decoder ... ', nl=False) trans = Translate(**config) # transpose all the input matrix into shape (sent_len * batch_size) if config['use_mv']: trans.apply(source.T, source_mask.T, target.T, target_mask.T, v_part, v_true) else: trans.apply(source.T, source_mask.T, target.T, target_mask.T) log('Done\n') if config['reload']: log('Reload model {}'.format(config['one_model'])) trans.load(one_model) # actually the average cross entropy (cost) per sentence in a batch cost = trans.mean_cost log_norm = trans.mean_abs_log_norm params = trans.params # print all parameters in this rnn search for value in params: log('\t{:15}: {}'.format(value.get_value().shape, value.name)) log('Build grad ... ', nl=False) grade = T.grad(cost, params) # add step clipping, L2-norm of grade to prevent over-fitting, make # gradients (update) smaller, model simpler if config['step_clipping'] > 0.:
params = trans.params print params[0].get_value().sum() logger.info("begin to build sample model : f_init, f_next") f_init, f_next = trans.build_sample() logger.info("end build sample model : f_init, f_next") src_vocab = pickle.load(open(config["src_vocab"])) trg_vocab = pickle.load(open(config["trg_vocab"])) src_vocab = ensure_special_tokens( src_vocab, bos_idx=0, eos_idx=config["src_vocab_size"] - 1, unk_idx=config["unk_id"] ) trg_vocab = ensure_special_tokens( trg_vocab, bos_idx=0, eos_idx=config["src_vocab_size"] - 1, unk_idx=config["unk_id"] ) trg_vocab_reverse = {index: word for word, index in trg_vocab.iteritems()} src_vocab_reverse = {index: word for word, index in src_vocab.iteritems()} logger.info("load dict finished ! src dic size : {} trg dic size : {}.".format(len(src_vocab), len(trg_vocab))) # val_set=sys.argv[1] # config['val_set']=val_set dev_stream = get_dev_stream(**config) logger.info("start training!!!") trans.load(config["saveto"] + "/params.npz") val_save_file = open("trans", "w") data_iter = dev_stream.get_epoch_iterator() trans = multi_process_sample(data_iter, f_init, f_next, k=10, vocab=trg_vocab_reverse, process=1, normalize=False) val_save_file.writelines(trans) val_save_file.close()
# {1: '<UNK>', 0: '<S>', trg_vocab_size-1: '</S>', 5: 'is', ...} debug('\t~done source vocab count: {}, target vocab count: {}'.format( len(src_vocab), len(trg_vocab))) lm = kenlm.Model(lmpath) if (lmpath and search_mode == 3) else None val_prefix = config['val_prefix'] config['val_prefix'] = valid_set config['val_set'] = config['val_tst_dir'] + config['val_prefix'] + '.src' if config['val_prefix'] == val_prefix: dev_stream = get_dev_stream(**config) else: dev_stream = get_tst_stream(**config) if args.use_valid: _log('start decoding ... {}'.format(config['val_set'])) trans.load(model_name) # this is change all weights of nmt, importance!!! np_params = trans.load2numpy(model_name) ''' for np_param in np_params.files: print type(np_param) print np_param ''' params = trans.params # _log('Weights in model {}'.format(model_name)) # for shared_var in params: # _log('{} : {} {} {}'.format(shared_var.name, shared_var.get_value().sum(), # type(shared_var), type(shared_var.get_value()))) translator = Translator( fs=fs, switchs=switchs,