_ = r[-1] # set next inits att_w_init = att_w_np[-1] att_k_init = att_k_np[-1] att_h_init = att_h_np[-1] att_c_init = att_c_np[-1] h1_init = h1_np[-1] c1_init = c1_np[-1] h2_init = h2_np[-1] c2_init = c2_np[-1] stateful_args = [ att_w_init, att_k_init, att_h_init, att_c_init, h1_init, c1_init, h2_init, c2_init ] return l, None, stateful_args with tf.Session(graph=g) as sess: run_loop(sess, loop, itr, loop, itr, n_steps=30000, n_train_steps_per=1000, train_stateful_args=stateful_args, n_valid_steps_per=0, valid_stateful_args=stateful_args)
def loop(sess, itr, extras, stateful_args): x, y = itr.next_batch() if extras["train"]: feed = {vs.images: x, vs.labels: y} outs = [vs.loss, vs.train_step] r = sess.run(outs, feed_dict=feed) l = r[0] step = r[1] else: feed = {vs.images: x, vs.labels: y} outs = [vs.loss] r = sess.run(outs, feed_dict=feed) l = r[0] return l, None, stateful_args with tf.Session(graph=g) as sess: run_loop(sess, loop, train_itr, loop, val_itr, n_steps=100 * 1000, n_train_steps_per=5000, n_valid_steps_per=1000) print("training done") from IPython import embed embed() raise ValueError()
l = r[-2] _ = r[-1] # set next inits att_w_init = att_w_np[-1] att_k_init = att_k_np[-1] att_h_init = att_h_np[-1] att_c_init = att_c_np[-1] h1_init = h1_np[-1] c1_init = c1_np[-1] h2_init = h2_np[-1] c2_init = c2_np[-1] stateful_args = [ att_w_init, att_k_init, att_h_init, att_c_init, h1_init, c1_init, h2_init, c2_init ] return l, None, stateful_args run_loop(sess, loop, train_itr, loop, train_itr, continue_training=True, n_steps=1000000, n_train_steps_per=1000, train_stateful_args=stateful_args, n_valid_steps_per=0, valid_stateful_args=stateful_args)
init_q_h = np.zeros((batch_size, n_hid)).astype("float32") init_q_c = np.zeros((batch_size, n_hid)).astype("float32") if extras["train"]: feed = { vs.inputs: inps, vs.targets: targets, vs.init_hidden: init_h, vs.init_cell: init_c, vs.init_q_hidden: init_q_h, vs.init_q_cell: init_q_c } outs = [vs.rec_loss, vs.loss, vs.train_step] r = sess.run(outs, feed_dict=feed) l = r[0] t_l = r[1] step = r[2] else: raise ValueError("No valid, no cry") return l, None, stateful_args with tf.Session(graph=g) as sess: run_loop(sess, loop, copy_itr, loop, copy_itr, n_steps=200000, n_train_steps_per=10000, n_valid_steps_per=0)
vs.init_q_cell: init_q_c, vs.cell_dropout: 1. } outs = [vs.rec_loss, vs.hiddens, vs.cells, vs.q_hiddens, vs.q_cells] r = sess.run(outs, feed_dict=feed) l = r[0] hiddens = r[1] cells = r[2] q_hiddens = r[3] q_cells = r[4] init_h_t = hiddens[-1] init_c_t = cells[-1] init_q_h_t = q_hiddens[-1] init_q_c_t = q_cells[-1] stateful_args = [init_h_t, init_c_t, init_q_h_t, init_q_c_t] return l, None, stateful_args with tf.Session(graph=g) as sess: run_loop(sess, loop, train_itr, loop, valid_itr, train_stateful_args=train_stateful_args, valid_stateful_args=valid_stateful_args, n_steps=50000, n_train_steps_per=5000, n_valid_steps_per=500)
g, vs = create_graph() def loop(sess, itr, extras, stateful_args): x, = itr.next_batch() if extras["train"]: feed = {vs.images: x, vs.bn_flag: 0.} outs = [vs.rec_loss, vs.loss, vs.train_step] r = sess.run(outs, feed_dict=feed) l = r[0] t_l = r[1] step = r[2] else: feed = {vs.images: x, vs.bn_flag: 1.} outs = [vs.rec_loss] r = sess.run(outs, feed_dict=feed) l = r[0] return l, None, stateful_args with tf.Session(graph=g) as sess: run_loop(sess, loop, train_itr, loop, valid_itr, n_steps=50000, n_train_steps_per=5000, n_valid_steps_per=250)
def main(): restore_model = args.restore seq_len = args.seq_len batch_size = args.batch_size num_epoch = args.epochs num_units = args.units batches_per_epoch = 1000 g, vs = create_graph(vocabulary_size, speech_size, batch_size, num_units=args.units, lstm_layers=args.lstm_layers, window_mixtures=args.window_mixtures) num_letters = vocabulary_size att_w_init = np.zeros((batch_size, num_letters)) att_k_init = np.zeros((batch_size, window_mixtures)) att_h_init = np.zeros((batch_size, num_units)) att_c_init = np.zeros((batch_size, num_units)) h1_init = np.zeros((batch_size, num_units)) c1_init = np.zeros((batch_size, num_units)) h2_init = np.zeros((batch_size, num_units)) c2_init = np.zeros((batch_size, num_units)) stateful_args = [ att_w_init, att_k_init, att_h_init, att_c_init, h1_init, c1_init, h2_init, c2_init ] loop_step = 0 def loop(sess, itr, extras, stateful_args): speech, seq, reset = itr.next_batch() att_w_init = stateful_args[0] att_k_init = stateful_args[1] att_h_init = stateful_args[2] att_c_init = stateful_args[3] h1_init = stateful_args[4] c1_init = stateful_args[5] h2_init = stateful_args[6] c2_init = stateful_args[7] att_w_init *= reset att_k_init *= reset att_h_init *= reset att_c_init *= reset h1_init *= reset c1_init *= reset h2_init *= reset c2_init *= reset noise_pwr = 4. noise = noise_pwr * random_state.randn(*speech[:-1].shape) feed = { vs.in_speech: speech[:-1] + noise, vs.in_speech_mask: 0. * speech[:-1, :, 0] + 1., vs.out_speech: speech[1:], vs.out_speech_mask: 0. * speech[1:, :, 0] + 1., vs.sequence: seq, vs.sequence_mask: 0. * seq[:, :, 0] + 1., vs.att_w_init: att_w_init, vs.att_k_init: att_k_init, vs.att_h_init: att_h_init, vs.att_c_init: att_c_init, vs.h1_init: h1_init, vs.c1_init: c1_init, vs.h2_init: h2_init, vs.c2_init: c2_init } outs = [ vs.att_w, vs.att_k, vs.att_h, vs.att_c, vs.h1, vs.c1, vs.h2, vs.c2, vs.att_phi, vs.loss, vs.summary, vs.train_step ] r = sess.run(outs, feed_dict=feed) att_w_np = r[0] att_k_np = r[1] att_h_np = r[2] att_c_np = r[3] h1_np = r[4] c1_np = r[5] h2_np = r[6] c2_np = r[7] att_phi_np = r[8] l = r[-3] s = r[-2] _ = r[-1] # set next inits att_w_init = att_w_np[-1] att_k_init = att_k_np[-1] att_h_init = att_h_np[-1] att_c_init = att_c_np[-1] h1_init = h1_np[-1] c1_init = c1_np[-1] h2_init = h2_np[-1] c2_init = c2_np[-1] stateful_args = [ att_w_init, att_k_init, att_h_init, att_c_init, h1_init, c1_init, h2_init, c2_init ] return l, s, stateful_args with tf.Session(graph=g) as sess: run_loop(sess, loop, itr, loop, itr, n_steps=500000, n_train_steps_per=1000, train_stateful_args=stateful_args, n_valid_steps_per=0, valid_stateful_args=stateful_args)