def load_model(load_states=True): if utils.check_file(params): model.load_params(params, ctx=ctxs) logging.info("Loading parameters from : {}".format(params)) if load_states and utils.check_file(trainingfile): trainer.set_learning_rate(float( utils.read_kvstore(trainingfile)['lr'])) logging.info("Loading lr from : {}".format(trainingfile))
def gen_model(queue, rqueue, pid, model, options, k, normalize, word_idict, sampling): import theano from theano import tensor from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams trng = RandomStreams(1234) # this is zero indicate we are not using dropout in the graph use_noise = theano.shared(numpy.float32(0.), name='use_noise') # get the parameters params = init_params(options) params = load_params(model, params) tparams = init_tparams(params) # build the sampling computational graph # see model.py for more detailed explanations print "Starting to build sampler ..." f_init, f_next = build_sampler(tparams, options, use_noise, trng, sampling=sampling) def _gencap(cc0): sample, score = gen_sample(tparams, f_init, f_next, cc0, options, trng=trng, k=k, maxlen=200, stochastic=False) # adjust for length bias if normalize: lengths = numpy.array([len(s) for s in sample]) score = score / lengths sidx = numpy.argmin(score) return sample[sidx] while True: req = queue.get() # exit signal if req is None: break idx, context = req[0], req[1] print pid, '-', idx seq = _gencap(context) rqueue.put((idx, seq)) return
def main(model, dictionary, dictionary_tag, source_file, target_file, saveto): # load model model_options with open('%s.pkl' % model, 'rb') as f: options = pkl.load(f) # load source dictionary and invert with open(dictionary, 'rb') as f: word_dict = pkl.load(f) word_idict = dict() for kk, vv in word_dict.iteritems(): word_idict[vv] = kk word_idict[0] = '<eos>' word_idict[1] = 'UNK' # load tag dictionary and invert with open(dictionary_tag, 'rb') as f: tag_dict = pkl.load(f) tag_idict = dict() for kk, vv in tag_dict.iteritems(): tag_idict[vv] = kk # allocate model parameters params = init_params(options) # load model parameters and set theano shared variables params = load_params(model, params) tparams = init_tparams(params) trng, use_noise, \ x, x_mask, y, y_mask, \ opt_ret, \ cost, predicts = \ build_model(tparams, options) print 'Building f_predicts...', f_predicts = theano.function([x, x_mask], predicts) print 'Done' use_noise.set_value(0.) valid_err = evaluation(f_predicts, options, tag_idict, word_dict, source_file, saveto, target_file, 0, options['n_words_src'], back_file=target_file + ".back") print 'Test ', valid_err
def main(data_path, model_path, dict_path, save_path): print("Preparing Data...") # Load data and dictionary X = [] with io.open(data_path, 'r', encoding='utf-8') as f: for line in f: X.append(line.rstrip('\n')) with open(dict_path, 'rb') as f: chardict = pkl.load(f) n_char = len(chardict.keys()) + 1 # Prepare data for encoding batches = Batch(X) # Load model print("Loading model params...") params = load_params(model_path) # Build encoder print("Building encoder...") # Theano variables tweet = T.itensor3() t_mask = T.fmatrix() # Embeddings emb_t = tweet2vec(tweet, t_mask, params, n_char)[0] # Theano function f_enc = theano.function([tweet, t_mask], emb_t) # Encode print("Encoding data...") print("Input data {} samples".format(len(X))) features = np.zeros((len(X), WDIM), dtype='float32') it = 0 for x, i in batches: if it % 100 == 0: print("Minibatch {}".format(it)) it += 1 xp, x_mask = prepare_data(x, chardict) ff = f_enc(xp, x_mask) for ind, idx in enumerate(i): features[idx] = ff[ind] # Save with open(save_path, 'w') as o: np.save(o, features)
def main(data_path, model_path, dict_path, save_path): print("Preparing Data...") # Load data and dictionary X = [] with io.open(data_path,'r',encoding='utf-8') as f: for line in f: X.append(line.rstrip('\n')) with open(dict_path, 'rb') as f: chardict = pkl.load(f) n_char = len(chardict.keys()) + 1 # Prepare data for encoding batches = Batch(X) # Load model print("Loading model params...") params = load_params(model_path) # Build encoder print("Building encoder...") # Theano variables tweet = T.itensor3() t_mask = T.fmatrix() # Embeddings emb_t = tweet2vec(tweet, t_mask, params, n_char)[0] # Theano function f_enc = theano.function([tweet, t_mask], emb_t) # Encode print("Encoding data...") print("Input data {} samples".format(len(X))) features = np.zeros((len(X),WDIM), dtype='float32') it = 0 for x,i in batches: if it % 100 == 0: print("Minibatch {}".format(it)) it += 1 xp, x_mask = prepare_data(x, chardict) ff = f_enc(xp, x_mask) for ind, idx in enumerate(i): features[idx] = ff[ind] # Save with open(save_path, 'w') as o: np.save(o, features)
def train( dim_word=100, # word vector dimensionality dim=1000, # the number of LSTM units encoder='gru', decoder='gru_cond', patience=10, max_epochs=5000, dispFreq=100, decay_c=0., alpha_c=0., diag_c=0., clip_c=-1., lrate=0.01, n_words_src=100000, n_words=100000, maxlen=100, # maximum length of the description optimizer='rmsprop', batch_size=16, valid_batch_size=16, saveto='model.npz', validFreq=1000, saveFreq=1000, # save the parameters after every saveFreq updates sampleFreq=100, # generate some samples after every sampleFreq updates datasets=[ '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok', '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok' ], valid_datasets=[ '../data/dev/newstest2011.en.tok', '../data/dev/newstest2011.fr.tok' ], dictionaries=[ '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok.pkl', '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok.pkl' ], use_dropout=False, reload_=False): # Model options model_options = locals().copy() worddicts = [None] * len(dictionaries) worddicts_r = [None] * len(dictionaries) for ii, dd in enumerate(dictionaries): with open(dd, 'rb') as f: worddicts[ii] = pkl.load(f) worddicts_r[ii] = dict() for kk, vv in worddicts[ii].iteritems(): worddicts_r[ii][vv] = kk # reload options if reload_ and os.path.exists(saveto): with open('%s.pkl' % saveto, 'rb') as f: models_options = pkl.load(f) print 'Loading data' train = TextIterator(datasets[0], datasets[1], dictionaries[0], dictionaries[1], n_words_source=n_words_src, n_words_target=n_words, batch_size=batch_size, maxlen=maxlen) valid = TextIterator(valid_datasets[0], valid_datasets[1], dictionaries[0], dictionaries[1], n_words_source=n_words_src, n_words_target=n_words, batch_size=valid_batch_size, maxlen=maxlen) print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, use_noise, \ x, x_mask, y, y_mask, \ opt_ret, \ cost = \ build_model(tparams, model_options) inps = [x, x_mask, y, y_mask] print 'Buliding sampler' f_init, f_next = build_sampler(tparams, model_options, trng) # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=profile) print 'Done' cost = cost.mean() if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv**2).sum() weight_decay *= decay_c cost += weight_decay if alpha_c > 0. and not model_options['decoder'].endswith('simple'): alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c') alpha_reg = alpha_c * ( (tensor.cast(y_mask.sum(0) // x_mask.sum(0), 'float32')[:, None] - opt_ret['dec_alphas'].sum(0))**2).sum(1).mean() cost += alpha_reg # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=profile) print 'Done' print 'Computing gradient...', grads = tensor.grad(cost, wrt=itemlist(tparams)) print 'Done' print 'Building f_grad...', f_grad = theano.function(inps, grads, profile=profile) print 'Done' if clip_c > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append( tensor.switch(g2 > (clip_c**2), g / tensor.sqrt(g2) * clip_c, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Done' print 'Optimization' history_errs = [] # reload history if reload_ and os.path.exists(saveto): history_errs = list(numpy.load(saveto)['history_errs']) best_p = None bad_count = 0 if validFreq == -1: validFreq = len(train[0]) / batch_size if saveFreq == -1: saveFreq = len(train[0]) / batch_size if sampleFreq == -1: sampleFreq = len(train[0]) / batch_size uidx = 0 estop = False for eidx in xrange(max_epochs): n_samples = 0 for x, y in train: n_samples += len(x) uidx += 1 use_noise.set_value(1.) x, x_mask, y, y_mask = prepare_data(x, y, maxlen=maxlen, n_words_src=n_words_src, n_words=n_words) if x is None: print 'Minibatch with zero sample under length ', maxlen uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(x, x_mask, y, y_mask) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(saveto, history_errs=history_errs, **params) pkl.dump(model_options, open('%s.pkl' % saveto, 'wb')) print 'Done' if numpy.mod(uidx, sampleFreq) == 0: # FIXME: random selection? for jj in xrange(numpy.minimum(5, x.shape[1])): stochastic = True sample, score = gen_sample(tparams, f_init, f_next, x[:, jj][:, None], model_options, trng=trng, k=1, maxlen=30, stochastic=stochastic, argmax=False) print 'Source ', jj, ': ', for vv in x[:, jj]: if vv == 0: break if vv in worddicts_r[0]: print worddicts_r[0][vv], else: print 'UNK', print print 'Truth ', jj, ' : ', for vv in y[:, jj]: if vv == 0: break if vv in worddicts_r[1]: print worddicts_r[1][vv], else: print 'UNK', print print 'Sample ', jj, ': ', if stochastic: ss = sample else: score = score / numpy.array([len(s) for s in sample]) ss = sample[score.argmin()] for vv in ss: if vv == 0: break if vv in worddicts_r[1]: print worddicts_r[1][vv], else: print 'UNK', print if numpy.mod(uidx, validFreq) == 0: use_noise.set_value(0.) valid_errs = pred_probs(f_log_probs, prepare_data, model_options, valid) valid_err = valid_errs.mean() history_errs.append(valid_err) if uidx == 0 or valid_err <= numpy.array(history_errs).min(): best_p = unzip(tparams) bad_counter = 0 if len(history_errs) > patience and valid_err >= numpy.array( history_errs)[:-patience].min(): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break if numpy.isnan(valid_err): import ipdb ipdb.set_trace() print 'Valid ', valid_err print 'Seen %d samples' % n_samples if estop: break if best_p is not None: zipp(best_p, tparams) use_noise.set_value(0.) valid_err = pred_probs(f_log_probs, prepare_data, model_options, valid).mean() print 'Valid ', valid_err params = copy.copy(best_p) numpy.savez(saveto, zipped_params=best_p, history_errs=history_errs, **params) return valid_err
def main(data_path, model_path): print("Loading data...") with open(data_path, 'r') as f: valX = pkl.load(f) print("Preparing data...") val_iter = batched_tweets.BatchedTweets(valX, batch_size=1024, maxlen=MAX_LENGTH) print("Loading dictionary...") with open('%s/dict.pkl' % model_path, 'rb') as f: chardict = pkl.load(f) n_char = len(chardict.keys()) + 1 # check for model files files = sorted(glob.glob(model_path + 'model_*.npz')) print("Found {} model files".format(len(files))) for modelf in files: print("Computing validation cost on {}".format(modelf)) print("Loading params...") params = load_params(modelf) print("Building network...") # Tweet variables tweet = T.itensor3() ptweet = T.itensor3() ntweet = T.itensor3() # masks t_mask = T.fmatrix() tp_mask = T.fmatrix() tn_mask = T.fmatrix() # Embeddings emb_t = tweet2vec(tweet, t_mask, params, n_char)[0] emb_tp = tweet2vec(ptweet, tp_mask, params, n_char)[0] emb_tn = tweet2vec(ntweet, tn_mask, params, n_char)[0] # batch cost D1 = 1 - T.batched_dot(emb_t, emb_tp) / (tnorm(emb_t) * tnorm(emb_tp)) D2 = 1 - T.batched_dot(emb_t, emb_tn) / (tnorm(emb_t) * tnorm(emb_tn)) gap = D1 - D2 + M loss = gap * (gap > 0) cost = T.mean(loss) # Theano function print("Compiling theano function...") inps = [tweet, t_mask, ptweet, tp_mask, ntweet, tn_mask] cost_val = theano.function(inps, cost) print("Testing...") uidx = 0 try: validation_cost = 0. n_val_samples = 0 for x, y, z in val_iter: if not x: print("Validation: Minibatch with no valid triples") continue n_val_samples += len(x) x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data( x, y, z, chardict, maxlen=MAX_LENGTH, n_chars=n_char) if x == None: print( "Validation: Minibatch with zero samples under maxlength" ) continue curr_cost = cost_val(x, x_m, y, y_m, z, z_m) validation_cost += curr_cost * len(x) print("Model {} Validation Cost {}".format( modelf, validation_cost / n_val_samples)) print("Seen {} samples.".format(n_val_samples)) except KeyboardInterrupt: pass
def main(data_path, model_path): print("Loading data...") with open(data_path,'r') as f: valX = pkl.load(f) print("Preparing data...") val_iter = batched_tweets.BatchedTweets(valX, batch_size=512, maxlen=MAX_LENGTH) print("Loading dictionary...") with open('%s/dict.pkl' % model_path, 'rb') as f: chardict = pkl.load(f) n_char = len(chardict.keys()) + 1 # check for model files files = sorted(glob.glob(model_path+'model_*.npz')) print("Found {} model files".format(len(files))) for modelf in files: print("Computing validation cost on {}".format(modelf)) print("Loading params...") params = load_params(modelf) print("Building network...") # Tweet variables tweet = T.itensor4() ptweet = T.itensor4() ntweet = T.itensor4() # masks t_mask = T.ftensor3() tp_mask = T.ftensor3() tn_mask = T.ftensor3() # Embeddings emb_t = char2word2vec(tweet, t_mask, params, n_char)[0] emb_tp = char2word2vec(ptweet, tp_mask, params, n_char)[0] emb_tn = char2word2vec(ntweet, tn_mask, params, n_char)[0] # batch cost D1 = 1 - T.batched_dot(emb_t, emb_tp)/(tnorm(emb_t)*tnorm(emb_tp)) D2 = 1 - T.batched_dot(emb_t, emb_tn)/(tnorm(emb_t)*tnorm(emb_tn)) gap = D1-D2+M loss = gap*(gap>0) cost = T.mean(loss) reg = REGULARIZATION*lasagne.regularization.regularize_network_params(char2word2vec(tweet, t_mask, params, n_char)[1], lasagne.regularization.l2) + REGULARIZATION*lasagne.regularization.regularize_network_params(char2word2vec(tweet, t_mask, params, n_char)[2], lasagne.regularization.l2) # Theano function print("Compiling theano function...") inps = [tweet,t_mask,ptweet,tp_mask,ntweet,tn_mask] cost_val = theano.function(inps,cost) reg_val = theano.function([], reg) print("Testing...") uidx = 0 try: validation_cost = 0. reg_cost = 0. n_val_samples = 0 for x,y,z in val_iter: if not x: print("Validation: Minibatch with no valid triples") continue n_val_samples += len(x) x, x_m, y, y_m, z, z_m = batched_tweets.prepare_data_c2w2s(x, y, z, chardict, maxwordlen=MAX_WORD_LENGTH, maxseqlen=MAX_SEQ_LENGTH, n_chars=n_char) if x==None: print("Validation: Minibatch with zero samples under maxlength") continue curr_cost = cost_val(x,x_m,y,y_m,z,z_m) validation_cost += curr_cost*len(x) reg_cost = reg_val() print("Model {} Validation Cost {} Regularization Cost {}".format(modelf, validation_cost/n_val_samples, reg_cost)) print("Seen {} samples.".format(n_val_samples)) except KeyboardInterrupt: pass
def train(dim_word=100, # word vector dimensionality dim=1000, # the number of LSTM units encoder='gru', decoder='gru_cond', patience=10, max_epochs=5000, dispFreq=100, decay_c=0., alpha_c=0., diag_c=0., clip_c=-1., lrate=0.01, n_words_src=100000, n_words=100000, maxlen=100, # maximum length of the description optimizer='rmsprop', batch_size = 16, valid_batch_size = 16, saveto='model.npz', validFreq=1000, saveFreq=1000, # save the parameters after every saveFreq updates sampleFreq=100, # generate some samples after every sampleFreq updates datasets=['/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok', '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok'], valid_datasets=['../data/dev/newstest2011.en.tok', '../data/dev/newstest2011.fr.tok'], dictionaries=['/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.en.tok.pkl', '/data/lisatmp3/chokyun/europarl/europarl-v7.fr-en.fr.tok.pkl'], use_dropout=False, reload_=False): # Model options model_options = locals().copy() worddicts = [None] * len(dictionaries) worddicts_r = [None] * len(dictionaries) for ii, dd in enumerate(dictionaries): with open(dd, 'rb') as f: worddicts[ii] = pkl.load(f) worddicts_r[ii] = dict() for kk, vv in worddicts[ii].iteritems(): worddicts_r[ii][vv] = kk # reload options if reload_ and os.path.exists(saveto): with open('%s.pkl' % saveto, 'rb') as f: models_options = pkl.load(f) print 'Loading data' train = TextIterator(datasets[0], datasets[1], dictionaries[0], dictionaries[1], n_words_source=n_words_src, n_words_target=n_words, batch_size=batch_size, maxlen=maxlen) valid = TextIterator(valid_datasets[0], valid_datasets[1], dictionaries[0], dictionaries[1], n_words_source=n_words_src, n_words_target=n_words, batch_size=valid_batch_size, maxlen=maxlen) print 'Building model' params = init_params(model_options) # reload parameters if reload_ and os.path.exists(saveto): params = load_params(saveto, params) tparams = init_tparams(params) trng, use_noise, \ x, x_mask, y, y_mask, \ opt_ret, \ cost = \ build_model(tparams, model_options) inps = [x, x_mask, y, y_mask] print 'Buliding sampler' f_init, f_next = build_sampler(tparams, model_options, trng) # before any regularizer print 'Building f_log_probs...', f_log_probs = theano.function(inps, cost, profile=profile) print 'Done' cost = cost.mean() if decay_c > 0.: decay_c = theano.shared(numpy.float32(decay_c), name='decay_c') weight_decay = 0. for kk, vv in tparams.iteritems(): weight_decay += (vv ** 2).sum() weight_decay *= decay_c cost += weight_decay if alpha_c > 0. and not model_options['decoder'].endswith('simple'): alpha_c = theano.shared(numpy.float32(alpha_c), name='alpha_c') alpha_reg = alpha_c * ((tensor.cast(y_mask.sum(0)//x_mask.sum(0), 'float32')[:,None]- opt_ret['dec_alphas'].sum(0))**2).sum(1).mean() cost += alpha_reg # after any regularizer print 'Building f_cost...', f_cost = theano.function(inps, cost, profile=profile) print 'Done' print 'Computing gradient...', grads = tensor.grad(cost, wrt=itemlist(tparams)) print 'Done' print 'Building f_grad...', f_grad = theano.function(inps, grads, profile=profile) print 'Done' if clip_c > 0.: g2 = 0. for g in grads: g2 += (g**2).sum() new_grads = [] for g in grads: new_grads.append(tensor.switch(g2 > (clip_c**2), g / tensor.sqrt(g2) * clip_c, g)) grads = new_grads lr = tensor.scalar(name='lr') print 'Building optimizers...', f_grad_shared, f_update = eval(optimizer)(lr, tparams, grads, inps, cost) print 'Done' print 'Optimization' history_errs = [] # reload history if reload_ and os.path.exists(saveto): history_errs = list(numpy.load(saveto)['history_errs']) best_p = None bad_count = 0 if validFreq == -1: validFreq = len(train[0])/batch_size if saveFreq == -1: saveFreq = len(train[0])/batch_size if sampleFreq == -1: sampleFreq = len(train[0])/batch_size uidx = 0 estop = False for eidx in xrange(max_epochs): n_samples = 0 for x, y in train: n_samples += len(x) uidx += 1 use_noise.set_value(1.) x, x_mask, y, y_mask = prepare_data(x, y, maxlen=maxlen, n_words_src=n_words_src, n_words=n_words) if x is None: print 'Minibatch with zero sample under length ', maxlen uidx -= 1 continue ud_start = time.time() cost = f_grad_shared(x, x_mask, y, y_mask) f_update(lrate) ud = time.time() - ud_start if numpy.isnan(cost) or numpy.isinf(cost): print 'NaN detected' return 1., 1., 1. if numpy.mod(uidx, dispFreq) == 0: print 'Epoch ', eidx, 'Update ', uidx, 'Cost ', cost, 'UD ', ud if numpy.mod(uidx, saveFreq) == 0: print 'Saving...', if best_p is not None: params = best_p else: params = unzip(tparams) numpy.savez(saveto, history_errs=history_errs, **params) pkl.dump(model_options, open('%s.pkl'%saveto, 'wb')) print 'Done' if numpy.mod(uidx, sampleFreq) == 0: # FIXME: random selection? for jj in xrange(numpy.minimum(5,x.shape[1])): stochastic = True sample, score = gen_sample(tparams, f_init, f_next, x[:,jj][:,None], model_options, trng=trng, k=1, maxlen=30, stochastic=stochastic, argmax=False) print 'Source ', jj, ': ', for vv in x[:, jj]: if vv == 0: break if vv in worddicts_r[0]: print worddicts_r[0][vv], else: print 'UNK', print print 'Truth ', jj, ' : ', for vv in y[:, jj]: if vv == 0: break if vv in worddicts_r[1]: print worddicts_r[1][vv], else: print 'UNK', print print 'Sample ', jj, ': ', if stochastic: ss = sample else: score = score / numpy.array([len(s) for s in sample]) ss = sample[score.argmin()] for vv in ss: if vv == 0: break if vv in worddicts_r[1]: print worddicts_r[1][vv], else: print 'UNK', print if numpy.mod(uidx, validFreq) == 0: use_noise.set_value(0.) valid_errs = pred_probs(f_log_probs, prepare_data, model_options, valid) valid_err = valid_errs.mean() history_errs.append(valid_err) if uidx == 0 or valid_err <= numpy.array(history_errs).min(): best_p = unzip(tparams) bad_counter = 0 if len(history_errs) > patience and valid_err >= numpy.array(history_errs)[:-patience].min(): bad_counter += 1 if bad_counter > patience: print 'Early Stop!' estop = True break if numpy.isnan(valid_err): import ipdb; ipdb.set_trace() print 'Valid ', valid_err print 'Seen %d samples' % n_samples if estop: break if best_p is not None: zipp(best_p, tparams) use_noise.set_value(0.) valid_err = pred_probs(f_log_probs, prepare_data, model_options, valid).mean() print 'Valid ', valid_err params = copy.copy(best_p) numpy.savez(saveto, zipped_params=best_p, history_errs=history_errs, **params) return valid_err
trainer.step(1) total_L += mx.nd.sum(L).asscalar() if i % args.log_interval == 0 and i > 0: cur_L = total_L / args.log_interval print('[Epoch %d Batch %d] loss %.2f, ppl %.2f' % (epoch, i, cur_L, math.exp(cur_L))) total_L = 0.0 val_L = eval(val_data) print('[Epoch %d] time cost %.2fs, valid loss %.2f, valid ppl %.2f' % (epoch, time.time() - start_time, val_L, math.exp(val_L))) if val_L < best_val: best_val = val_L test_L = eval(test_data) model.save_params(args.save) print('test loss %.2f, test ppl %.2f' % (test_L, math.exp(test_L))) else: args.lr = args.lr * 0.25 trainer.set_learning_rate(args.lr) if __name__ == '__main__': train() model.load_params(args.save, context) test_L = eval(test_data) print('Best test loss %.2f, test ppl %.2f' % (test_L, math.exp(test_L)))
def __init__(self, model_dir, num_channels, shape_z, shape_y, shape_scale=5, num_maps=1, batch_size=1, tf_graph=None, tf_sess=None, debug_plot=False): """ Setup model for inference Args: model_dir: Directory with model files num_channels: Number of channels for input data shape_z: Shape of input data in Z shape_y: Shape of input data in Y shape_scale: Scale data with center k-space data num_maps: Number of sets of sensitivity maps """ self.debug_plot = debug_plot self.tf_graph = tf_graph if self.tf_graph is None: self.tf_graph = tf.Graph() self.tf_sess = tf_sess if self.tf_sess is None: session_config = tf.ConfigProto() session_config.gpu_options.allow_growth = True # pylint: disable=E1101 session_config.allow_soft_placement = True self.tf_sess = tf.Session(graph=self.tf_graph, config=session_config) params = model.load_params(model_dir) with self.tf_graph.as_default(): self.batch_size = batch_size self.tf_kspace_input = tf.placeholder( tf.complex64, (self.batch_size, shape_z, shape_y, num_channels)) self.tf_sensemap_input = tf.placeholder( tf.complex64, (self.batch_size, shape_z, shape_y, num_maps, num_channels)) if shape_scale > 0: scale = tf.image.resize_image_with_crop_or_pad( self.tf_kspace_input, shape_scale, shape_scale) scale = tf.reduce_mean(tf.square(tf.abs(scale))) scale *= shape_scale * shape_scale / shape_y / shape_z else: logger.info('Turning off scaling...') scale = 1.0 scale = tf.cast(1.0 / tf.sqrt(scale), dtype=tf.complex64) tf_kspace_input_scaled = self.tf_kspace_input * scale tf_image_output_scaled, tf_kspace_output_scaled, self.iter_out = model.unrolled_prox( tf_kspace_input_scaled, self.tf_sensemap_input, num_grad_steps=params['unrolled_steps'], resblock_num_features=params['unrolled_num_features'], resblock_num_blocks=params['unrolled_num_resblocks'], resblock_share=params['unrolled_share'], training=False, hard_projection=params['hard_projection'], scope=params['recon_scope']) self.tf_image_output = tf_image_output_scaled / scale self.tf_kspace_output = tf_kspace_output_scaled / scale if params['loss_adv'] > 0: adv_scope = 'Adversarial' tf_image_input_scaled = tfmri.model_transpose( tf_kspace_input_scaled, self.tf_sensemap_input) self.adv_output = model.adversarial(tf_image_input_scaled, training=False, scope=adv_scope) else: self.adv_output = None filename_latest_model = tf.train.latest_checkpoint(model_dir) logger.info('Loading model ({})...'.format(filename_latest_model)) saver = tf.train.Saver() saver.restore(self.tf_sess, filename_latest_model)