def simulate(): # dynfun = dynamics(stochastic=False) dynfun = dynamics(stochastic=True) # uncomment for stochastic dynamics costfun = cost() T = 100 # episode length N = 100 # number of episodes gamma = 0.95 # discount factor A = dynfun.A B = dynfun.B Q = costfun.Q R = costfun.R L, P = Riccati(A, B, Q, R) total_costs = [] for n in range(N): costs = [] x = dynfun.reset() for t in range(T): # policy u = (-L @ x) # get reward c = costfun.evaluate(x, u) costs.append((gamma**t) * c) # dynamics step x = dynfun.step(u) total_costs.append(sum(costs)) return np.mean(total_costs)
from model import dynamics, cost import numpy as np dynfun = dynamics(stochastic=False) # dynfun = dynamics(stochastic=True) # uncomment for stochastic dynamics costfun = cost() T = 100 # episode length N = 100 # number of episodes gamma = 0.95 # discount factor # Riccati recursion def Riccati(A,B,Q,R): # TODO implement infinite horizon riccati recursion #matlab code that works # P_current = Q; # P_new = P_current; # L_current = zeros(size(Q,1), size(R,2)); %general form, in ex. creates 4x1 zeros matrix # L_new = L_current; # firstIt = true; # while (norm(L_new - L_current, 2) >= 1e-4) || (firstIt == true) # if firstIt == true # firstIt = false; # end # L_current = L_new; # P_current = P_new; # L_new = -inv(R + B'*P_current*B)*(B'*P_current*A); # P_new = Q + L_new'*R*L_new + (A + B*L_new)'*P_current*(A + B*L_new);
valid_id_string = ' '.join([line.strip() for line in fin.readlines()]) # 实际上是id_list valid_set = [int(w) for w in valid_id_string.split()] # 将读取的单词编号转为整数 # 计算总的batch数量,每个batch包含的单词数量是batch_size * num_step valid_num_batches = (len(valid_set) - 1) // (config.batch_size * config.num_step) # input_data的类型一定要定义为tf.int32,tf.nn.embedding_lookup(embedding, ids=input_data)要求的 input_data = tf.placeholder(dtype=tf.int32, shape=[None, config.num_step], name="input_data") labels = tf.placeholder(dtype=tf.int32, shape=[None, config.num_step], name="labels") logits = NNLM(input_data, config) loss, train_op = cost(logits, labels, config) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # 训练数据生成器 train_gen = MakeDataset(train_set, train_num_batches, config.batch_size, config.num_step) # 验证数据生成器 valid_gen = MakeDataset(valid_set, valid_num_batches, config.batch_size, config.num_step) for epoch in range(config.epoches): # 训练数据拟合 iters = 0 total_costs = 0 for i in range(train_num_batches): X, y = train_gen.__next__() cost, _ = sess.run([loss, train_op],
def main(): # parse command line parser = opts_parser() options = parser.parse_args() modelfile = options.modelfile # read configuration files and immediate settings cfg = {} for fn in options.vars: cfg.update(config.parse_config_file(fn)) cfg.update(config.parse_variable_assignments(options.var)) # prepare dataset datadir = os.path.join(os.path.dirname(__file__), os.path.pardir, 'datasets', options.dataset) print("Preparing training data feed...") with io.open(os.path.join(datadir, 'filelists', 'train')) as f: filelist = [l.rstrip() for l in f if l.rstrip()] train_feed, train_formats = data.prepare_datafeed(filelist, datadir, 'train', cfg) # If told so, we plot some mini-batches on screen. if cfg.get('plot_datafeed'): import matplotlib.pyplot as plt for batch in data.run_datafeed(train_feed, cfg): plt.matshow(np.log(batch['spect'][0]).T, aspect='auto', origin='lower', cmap='hot', interpolation='nearest') plt.colorbar() plt.title(str(batch['label'][0])) plt.show() # We start the mini-batch generator and augmenter in one or more # background threads or processes (unless disabled). bg_threads = cfg['bg_threads'] bg_processes = cfg['bg_processes'] if not bg_threads and not bg_processes: # no background processing: just create a single generator batches = data.run_datafeed(train_feed, cfg) elif bg_threads: # multithreading: create a separate generator per thread batches = augment.generate_in_background([ data.run_datafeed(feed, cfg) for feed in data.split_datafeed(train_feed, bg_threads, cfg) ], num_cached=bg_threads * 2) elif bg_processes: # multiprocessing: single generator is forked along with processes batches = augment.generate_in_background( [data.run_datafeed(train_feed, cfg)] * bg_processes, num_cached=bg_processes * 25, in_processes=True) # If told so, we benchmark the creation of a given number of mini-batches. if cfg.get('benchmark_datafeed'): print("Benchmark: %d mini-batches of %d items " % (cfg['benchmark_datafeed'], cfg['batchsize']), end='') if bg_threads: print("(in %d threads): " % bg_threads) elif bg_processes: print("(in %d processes): " % bg_processes) else: print("(in main thread): ") import time import itertools t0 = time.time() next( itertools.islice(batches, cfg['benchmark_datafeed'], cfg['benchmark_datafeed']), None) t1 = time.time() print(t1 - t0) return # - prepare validation data generator if options.validate: print("Preparing validation data feed...") with io.open(os.path.join(datadir, 'filelists', 'valid')) as f: filelist_val = [l.rstrip() for l in f if l.rstrip()] val_feed, val_formats = data.prepare_datafeed(filelist_val, datadir, 'valid', cfg) if bg_threads or bg_processes: multi = bg_threads or bg_processes val_feed = data.split_datafeed(val_feed, multi, cfg) def run_val_datafeed(): if bg_threads or bg_processes: return augment.generate_in_background( [data.run_datafeed(feed, cfg) for feed in val_feed], num_cached=multi, in_processes=bool(bg_processes)) else: return data.run_datafeed(val_feed, cfg) print("Preparing training function...") # instantiate neural network input_vars = { name: T.TensorType(str(np.dtype(dtype)), (False, ) * len(shape))(name) for name, (dtype, shape) in train_formats.items() } input_shapes = { name: shape for name, (dtype, shape) in train_formats.items() } network = model.architecture(input_vars, input_shapes, cfg) print( "- %d layers (%d with weights), %f mio params" % (len(lasagne.layers.get_all_layers(network)), sum(hasattr(l, 'W') for l in lasagne.layers.get_all_layers(network)), lasagne.layers.count_params(network, trainable=True) / 1e6)) print("- weight shapes: %r" % [ l.W.get_value().shape for l in lasagne.layers.get_all_layers(network) if hasattr(l, 'W') and hasattr(l.W, 'get_value') ]) cost_vars = dict(input_vars) # prepare for born-again-network, if needed if cfg.get('ban'): network2 = model.architecture(input_vars, input_shapes, cfg) with np.load(cfg['ban'], encoding='latin1') as f: lasagne.layers.set_all_param_values( network2, [f['param%d' % i] for i in range(len(f.files))]) cost_vars['pseudo_label'] = lasagne.layers.get_output( network2, deterministic=True) # load pre-trained weights, if needed if cfg.get('init_from'): param_values = [] for fn in cfg['init_from'].split(':'): with np.load(fn, encoding='latin1') as f: param_values.extend(f['param%d' % i] for i in range(len(f.files))) lasagne.layers.set_all_param_values(network, param_values) del param_values # create cost expression outputs = lasagne.layers.get_output(network, deterministic=False) cost = T.mean(model.cost(outputs, cost_vars, 'train', cfg)) if cfg.get('l2_decay', 0): cost_l2 = lasagne.regularization.regularize_network_params( network, lasagne.regularization.l2) * cfg['l2_decay'] else: cost_l2 = 0 # prepare and compile training function params = lasagne.layers.get_all_params(network, trainable=True) initial_eta = cfg['initial_eta'] eta_decay = cfg['eta_decay'] eta_decay_every = cfg.get('eta_decay_every', 1) eta_cycle = tuple(map(float, str(cfg['eta_cycle']).split(':'))) if eta_cycle == (0, ): eta_cycle = (1, ) # so eta_cycle=0 equals disabling it patience = cfg.get('patience', 0) trials_of_patience = cfg.get('trials_of_patience', 1) patience_criterion = cfg.get( 'patience_criterion', 'valid_loss' if options.validate else 'train_loss') momentum = cfg['momentum'] first_params = params[:cfg['first_params']] first_params_eta_scale = cfg['first_params_eta_scale'] if cfg['learn_scheme'] == 'nesterov': learn_scheme = lasagne.updates.nesterov_momentum elif cfg['learn_scheme'] == 'momentum': learn_scheme = lasagne.update.momentum elif cfg['learn_scheme'] == 'adam': learn_scheme = lasagne.updates.adam else: raise ValueError('Unknown learn_scheme=%s' % cfg['learn_scheme']) eta = theano.shared(lasagne.utils.floatX(initial_eta)) if not first_params or first_params_eta_scale == 1: updates = learn_scheme(cost + cost_l2, params, eta, momentum) else: grads = theano.grad(cost + cost_l2, params) updates = learn_scheme(grads[len(first_params):], params[len(first_params):], eta, momentum) if first_params_eta_scale > 0: updates.update( learn_scheme(grads[:len(first_params)], first_params, eta * first_params_eta_scale, momentum)) print("Compiling training function...") train_fn = theano.function(list(input_vars.values()), cost, updates=updates, on_unused_input='ignore') # prepare and compile validation function, if requested if options.validate: print("Compiling validation function...") outputs_test = lasagne.layers.get_output(network, deterministic=True) cost_test = T.mean(model.cost(outputs_test, input_vars, 'valid', cfg)) if isinstance(outputs_test, (list, tuple)): outputs_test = outputs_test[0] val_fn = theano.function([input_vars[k] for k in val_formats], [cost_test, outputs_test], on_unused_input='ignore') # restore previous training state, or create fresh training state state = {} if options.keep_state: statefile = modelfile[:-len('.npz')] + '.state' if os.path.exists(statefile): print("Restoring training state...") state = np.load(modelfile[:-len('.npz')] + '.state', encoding='latin1') restore_state(network, updates, state['network']) epochs = cfg['epochs'] epochsize = cfg['epochsize'] batches = iter(batches) if options.save_errors: errors = state.get('errors', []) if first_params and cfg['first_params_log']: first_params_hist = [] if options.keep_state and os.path.exists(modelfile[:-4] + '.hist.npz'): with np.load(modelfile[:-4] + '.hist.npz') as f: first_params_hist = list( zip(*(f['param%d' % i] for i in range(len(first_params))))) if patience > 0: best_error = state.get('best_error', np.inf) best_state = state.get('best_state') or get_state(network, updates) patience = state.get('patience', patience) trials_of_patience = state.get('trials_of_patience', trials_of_patience) epoch = state.get('epoch', 0) del state # run training loop print("Training:") for epoch in range(epoch, epochs): # actual training err = 0 for batch in progress(range(epochsize), min_delay=.5, desc='Epoch %d/%d: Batch ' % (epoch + 1, epochs)): err += train_fn(**next(batches)) if not np.isfinite(err): print("\nEncountered NaN loss in training. Aborting.") sys.exit(1) if first_params and cfg['first_params_log'] and ( batch % cfg['first_params_log'] == 0): first_params_hist.append( tuple(param.get_value() for param in first_params)) np.savez( modelfile[:-4] + '.hist.npz', **{ 'param%d' % i: param for i, param in enumerate(zip(*first_params_hist)) }) # report training loss print("Train loss: %.3f" % (err / epochsize)) if options.save_errors: errors.append(err / epochsize) # compute and report validation loss, if requested if options.validate: import time t0 = time.time() # predict in mini-batches val_err = 0 val_batches = 0 preds = [] truth = [] for batch in run_val_datafeed(): e, p = val_fn(**batch) val_err += np.sum(e) val_batches += 1 preds.append(p) truth.append(batch['label']) t1 = time.time() # join mini-batches preds = np.concatenate(preds) if len(preds) > 1 else preds[0] truth = np.concatenate(truth) if len(truth) > 1 else truth[0] # show results print("Validation loss: %.3f" % (val_err / val_batches)) from eval import evaluate results = evaluate(preds, truth) print("Validation error: %.3f" % (1 - results['accuracy'])) print("Validation MAP: %.3f" % results['map']) print("(took %.2f seconds)" % (t1 - t0)) if options.save_errors: errors.append(val_err / val_batches) errors.append(1 - results['accuracy']) errors.append(results['map']) # update learning rate and/or apply early stopping, if needed if patience > 0: if patience_criterion == 'train_loss': cur_error = err / epochsize elif patience_criterion == 'valid_loss': cur_error = val_err / val_batches elif patience_criterion == 'valid_error': cur_error = 1 - results['accuracy'] elif patience_criterion == 'valid_map': cur_error = 1 - results['map'] if cur_error <= best_error: best_error = cur_error best_state = get_state(network, updates) patience = cfg['patience'] else: patience -= 1 if patience == 0: if eta_decay_every == 'trial_of_patience' and eta_decay != 1: eta.set_value(eta.get_value() * lasagne.utils.floatX(eta_decay)) restore_state(network, updates, best_state) patience = cfg['patience'] trials_of_patience -= 1 print("Lost patience (%d remaining trials)." % trials_of_patience) if trials_of_patience == 0: break if eta_decay_every != 'trial_of_patience' and eta_decay != 1 and \ (epoch + 1) % eta_decay_every == 0: eta.set_value(eta.get_value() * lasagne.utils.floatX(eta_decay)) if eta_cycle[epoch % len(eta_cycle)] != 1: eta.set_value( eta.get_value() * lasagne.utils.floatX(eta_cycle[epoch % len(eta_cycle)])) # store current training state, if needed if options.keep_state: state = {} state['epoch'] = epoch + 1 state['network'] = get_state(network, updates) if options.save_errors: state['errors'] = errors if patience > 0: state['best_error'] = best_error state['best_state'] = best_state state['patience'] = patience state['trials_of_patience'] = trials_of_patience with open(statefile, 'wb') as f: pickle.dump(state, f, -1) del state # for debugging: print memory use and break into debugger #import resource, psutil #print("Memory usage: %.3f MiB / %.3f MiB" % # (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024., # psutil.Process().memory_info()[0] / float(1024**2))) #import pdb; pdb.set_trace() # save final network print("Saving final model") save_model(modelfile, network, cfg) if options.save_errors: np.savez(modelfile[:-len('.npz')] + '.err.npz', np.asarray(errors).reshape(epoch + 1, -1))
def main(): # pylint: disable=too-many-locals, too-many-statements """Create the RNN model and train it, outputting the text results. Periodically: (1) the training/evaluation set cost and accuracies are printed, and (2) the RNN is given a random input feed to output its own self-generated output text for our amusement. """ text = utils.retrieve_text(params.TEXT_FILE) chars = set(text) chars_size = len(chars) dictionary, reverse_dictionary = utils.build_dataset(chars) train_one_hots, eval_one_hots = utils.create_one_hots(text, dictionary) x = tf.placeholder(tf.float32, [None, params.N_INPUT * chars_size]) labels = tf.placeholder(tf.float32, [None, chars_size]) logits = model.inference(x, chars_size) cost = model.cost(logits, labels) optimizer = model.optimizer(cost) accuracy = model.accuracy(logits, labels) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter( params.SUMMARIES_DIR + '/train', sess.graph) eval_writer = tf.summary.FileWriter(params.SUMMARIES_DIR + '/eval') loss_total = 0 accuracy_total = 0 for epoch in range(params.EPOCHS): for index in range(0, len(train_one_hots) - params.N_INPUT - params.BATCH_SIZE, params.BATCH_SIZE): input_x, output_y = utils.create_training_io( train_one_hots, index, params.BATCH_SIZE, chars_size) _, acc, loss, summary = sess.run( [optimizer, accuracy, cost, merged], feed_dict={x: input_x, labels: output_y}) step = epoch * (len(train_one_hots) - params.N_INPUT) + index train_writer.add_summary(summary, step) loss_total += loss accuracy_total += acc if index % params.TRAINING_DISPLAY_STEP == 0 and index: print( 'Epoch: {} Training Step: {}\n' 'Training Set: Loss: {:.3f} ' 'Accuracy: {:.3f}'.format( epoch, index, loss_total * params.BATCH_SIZE / params.TRAINING_DISPLAY_STEP, accuracy_total * params.BATCH_SIZE / params.TRAINING_DISPLAY_STEP, ) ) loss_total = accuracy_total = 0 evaluation.evaluation(sess, step, eval_one_hots, x, labels, accuracy, cost, eval_writer, chars_size, merged) utils.create_example_text(sess, x, logits, chars, dictionary, reverse_dictionary)
X = T.itensor4('X') X_hat, posteriors, priors = autoencoder(T.cast(X, 'float32') / 255.) latent_kls = [ T.mean(vae.kl_divergence(po_m, po_s, pr_m, pr_s), axis=0) for (po_m, po_s), (pr_m, pr_s) in zip(posteriors, priors) ] beta_start = 500 * (np.arange(len(latent_kls)) + 1) beta_lin = theano.shared(np.float32(0)) betas_ = (beta_lin - beta_start) / np.float32(500) betas_ = T.switch(betas_ < 0, 0, betas_) betas = T.switch(betas_ > 1, 1, betas_)[::-1] print betas.eval() train_latent_kl = sum(betas[i] * kl for i, kl in enumerate(latent_kls)) latent_kl = sum(latent_kls) recon_loss = model.cost(X_hat, X[:, :, 16:-16, 16:-16]) pprint(parameters) l2 = sum(T.sum(T.sqr(w)) for w in parameters) pretrain_loss = ( model.cost(inpaint(T.cast(X, 'float32') / np.float32(255.)), X[:, :, 16:-16, 16:-16]) + latent_kl + 1e-2 * l2) / (32**2) loss = (recon_loss + latent_kl + 1e-3 * l2) / (32**2) val_loss = (recon_loss + latent_kl) / (32**2) print "Calculating gradient...", gradients = updates.clip_deltas(T.grad(loss, wrt=parameters), 1) print "Done with gradients." chunk_X = theano.shared(np.empty((1, 3, 64, 64), dtype=np.int32))
import trainer # Data batcher = data.batcher() x, y = batcher.placeholders() # Model logits = mnist.logits(x) # logits # Accuracy y_hat = mnist.prediction(logits) # predicted label accuracy = tfutils.modeling.accuracy(y, y_hat) # Optimization step = tfutils.opt.global_step() cost = mnist.cost(y, logits, regularize=True) train_step = tf.train.MomentumOptimizer(0.0005, 0.9).minimize( cost, global_step=step) model_vars = { 'x': x, 'y': y, 'logits': logits, 'y_hat': y_hat, 'accuracy': accuracy, 'cost': cost, 'train_step': train_step, } # Training with batcher: # batcher has files open
def normalise_weights(updates): return [ ( p, weight_norm(u) if p.name.startswith('W') else u ) for p,u in updates ] if __name__ == "__main__": P = Parameters() extract,_ = model.build(P, "vrnn") X = T.tensor3('X') l = T.ivector('l') [Z_prior_mean, Z_prior_std, Z_mean, Z_std, X_mean, X_std] = extract(X,l) parameters = P.values() batch_cost = model.cost(X, Z_prior_mean, Z_prior_std, Z_mean, Z_std, X_mean, X_std,l) print "Calculating gradient..." print parameters batch_size = T.cast(X.shape[1],'float32') gradients = T.grad(batch_cost,wrt=parameters) gradients = [ g / batch_size for g in gradients ] gradients = clip(5,parameters,gradients) P_learn = Parameters() updates = updates.adam(parameters,gradients,learning_rate=0.00025,P=P_learn) updates = normalise_weights(updates) print "Compiling..." train = theano.function( inputs=[X,l],
if __name__ == "__main__": model.SAMPLED_LAYERS = [int(s) for s in sys.argv[1:]] print model.SAMPLED_LAYERS P = Parameters() autoencoder, inpaint = model.build(P) parameters = P.values() X = T.itensor4('X') X_hat, posteriors, priors = \ autoencoder(T.cast(X, 'float32') / np.float32(255.)) latent_kls = [ T.mean(vae.kl_divergence(po_m, po_s, pr_m, pr_s), axis=0) for (po_m, po_s), (pr_m, pr_s) in zip(posteriors, priors) ] recon_loss = model.cost(X_hat, X[:, :, 16:-16, 16:-16]) val_loss = (recon_loss + sum(latent_kls)) / (32**2) X_recon = inpaint(T.cast(X, 'float32') / np.float32(255.)) Y = model.predict(X_recon) fill = theano.function(inputs=[X], outputs=[Y, val_loss, recon_loss / (32**2)] + latent_kls) P.load('unval_model.pkl') stream = data_io.stream_file("data/val2014.pkl.gz") stream = data_io.buffered_random(stream) stream = data_io.chunks((x[0] for x in stream), buffer_items=10) for chunk in stream: pass fig = plt.figure(figsize=(20, 5)) fig.subplots_adjust(left=0,
display_step = 10 n_input = 300 n_steps = 20 n_hidden = 256 hidden_sizes = [512,512,256,256,128,128,64,64,20,20] ruler = 0.15 x1,x2,y,Tx1,Tx2,Ty = db.short_for_data() print("data over") x1p,x2p,yp,dropout1,dropout5 = model.placeholder() print ("have the placeholder") pre = model.model(x1p,x2p,dropout1,dropout5) print ("have got the pred") opt,cost = model.cost(yp,pre) print ("have the optimizer") init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=0) with tf.Session(config =config) as sess: print ("in the session") sess.run(init) print ("init over") step = 1 t_losses = 10 while ruler < t_losses: batch_x1, batch_x2,batch_y = db.get_batch(batch_size,x1,x2,y) #batch_x = batch_x.reshape((batch_size, n_steps, n_input)) sess.run(opt, feed_dict={x1p: batch_x1,x2p:batch_x2,yp:batch_y,dropout1:1.0,dropout5:0.5}) #print ("one circle")
def normalise_weights(updates): return [(p, weight_norm(u) if p.name.startswith('W') else u) for p, u in updates] if __name__ == "__main__": P = Parameters() extract, _ = model.build(P, "vrnn") X = T.tensor3('X') l = T.ivector('l') [Z_prior_mean, Z_prior_std, Z_mean, Z_std, X_mean, X_std] = extract(X, l) parameters = P.values() batch_cost = model.cost(X, Z_prior_mean, Z_prior_std, Z_mean, Z_std, X_mean, X_std, l) print "Calculating gradient..." print parameters batch_size = T.cast(X.shape[1], 'float32') gradients = T.grad(batch_cost, wrt=parameters) gradients = [g / batch_size for g in gradients] gradients = clip(5, parameters, gradients) P_learn = Parameters() updates = updates.adam(parameters, gradients, learning_rate=0.00025, P=P_learn) updates = normalise_weights(updates)