def bidirectional_lstm(inputs, keep_prob, INPUT_SIZE, HIDDEN_SIZE, SEQ_LENGTH): initializer = tf.random_uniform_initializer(-0.01, 0.01) cell_F = LSTMCell(HIDDEN_SIZE, INPUT_SIZE, initializer=initializer) cell_B = LSTMCell(HIDDEN_SIZE, INPUT_SIZE, initializer=initializer) inputs_ = [tf.nn.dropout(each, keep_prob) for each in inputs] outputs = bidirectional_rnn(cell_F, cell_B, inputs_, initial_state_fw=None, initial_state_bw=None, sequence_length=None, dtype=tf.float32) return outputs
batch_size= 100 n_steps = 45 seq_width = 50 initializer = tf.random_uniform_initializer(-1,1) seq_input = tf.placeholder(tf.float32, [n_steps, batch_size, seq_width]) #sequence we will provide at runtime early_stop = tf.placeholder(tf.int32) #what timestep we want to stop at inputs = [tf.reshape(i, (batch_size, seq_width)) for i in tf.split(0, n_steps, seq_input)] #inputs for rnn needs to be a list, each item being a timestep. #we need to split our input into each timestep, and reshape it because split keeps dims by default cell = LSTMCell(size, seq_width, initializer=initializer) initial_state = cell.zero_state(batch_size, tf.float32) outputs, states = rnn.rnn(cell, inputs, initial_state=initial_state, sequence_length=early_stop) #set up lstm iop = tf.initialize_all_variables() #create initialize op, this needs to be run by the session! session = tf.Session() session.run(iop) #actually initialize, if you don't do this you get errors about uninitialized stuff feed = {early_stop:100, seq_input:np.random.rand(n_steps, batch_size, seq_width).astype('float32')} #define our feeds. #early_stop can be varied, but seq_input needs to match the shape that was defined earlier outs = session.run(outputs, feed_dict=feed)
def __init__(self, model_dir, gpu=0, batch_size=1, num_units=256): env_size = Env.FIELD_NUM * Env.FIELD_DEPTH args_size = Env.ARG_MAX_NUM * Env.ARG_DEPTH prog_size = ProgramManager.PG_NUM self.sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) with tf.device("/gpu:%d" % gpu): # global step self.global_step = tf.get_variable(tf.int32, shape=[None], initializer=tf.constant_initializer(0.0), trainable=False) # input place_holder self.input_env = tf.placeholder(tf.int16, shape=[None, env_size]) self.input_arg = tf.placeholder(tf.int16, shape=[None, args_size]) self.input_prog = tf.placeholder(tf.int16, shape=[None, prog_size]) self.lstm_state = tf.placeholder(tf.float32, shape=[batch_size, 2 * num_units]) # output place_holder self.out_end = tf.placeholder(tf.float32, shape=[None, 1]) self.out_prog = tf.placeholder(tf.float32, shape=[None, prog_size]) self.out_args = tf.placeholder(tf.float32, shape=[None, Env.ARG_MAX_NUM]) # init variables w_enc = tf.get_variable("w_enc", shape=[env_size + args_size, 128], initializer=initial_weight) b_enc = tf.get_variable("b_enc", shape=[128], initializer=initial_bias) w_end = tf.get_variable("w_end", shape=[num_units, 1], initializer=initial_weight) b_end = tf.get_variable("b_end", shape=[1], initializer=initial_bias) w_prog = tf.get_variable("w_enc", shape=[num_units, prog_size], initializer=initial_weight) b_prog = tf.get_variable("b_enc", shape=[prog_size], initializer=initial_bias) w_args = [tf.get_variable("w_arg", shape=[num_units, Env.ARG_DEPTH], initializer=initial_weight) for _ in xrange(Env.ARG_MAX_NUM)] b_args = [tf.get_variable("b_arg", shape=[Env.ARG_DEPTH], initializer=initial_bias) for _ in xrange(Env.ARG_MAX_NUM)] # networks h_concat_1 = tf.concat(1, [self.input_env, self.input_arg], name="merge_env_arg") f_enc = tf.nn.relu(tf.matmul(h_concat_1, w_enc) + b_enc, name="f_enc") f_enc_reshape = tf.reshape(f_enc, shape=[-1, 1, 128]) h_concat_2 = tf.concat(2, [f_enc_reshape, ], name="merge_state_prog") # LSTM layers lstm_cell = LSTMCell(256) h_output, self.state_out = tf.nn.rnn(lstm_cell, h_concat_2, initial_state=self.lstm_state) f_lstm = tf.nn.relu(h_output[-1], name="f_lstm") # logits out f_end_logits = tf.matmul(f_lstm, w_end) + b_end self.f_end = tf.nn.sigmoid(f_end_logits, name="f_end") f_prog_logits = tf.matmul(f_lstm, w_prog) + b_prog self.f_prog = tf.nn.softmax(f_prog_logits, name="f_prog") f_args_logits, self.f_args = [], [] for arg_i in xrange(Env.ARG_MAX_NUM): f_args_logits.append(tf.matmul(f_lstm, w_args[arg_i]) + b_args[arg_i]) self.f_args.append(tf.nn.softmax(f_args_logits[-1], name="f_arg_%d" % arg_i)) # loss (objective function) l2_loss = tf.add_n(map(lambda arg: tf.nn.l2_loss(arg), [w_enc, w_end, w_prog] + w_args), name="l2_loss") f_end_loss = tf.nn.sigmoid_cross_entropy_with_logits(f_end_logits, self.out_end, name="f_end_loss") f_prog_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(f_prog_logits, self.out_prog, name="f_prog_loss") _out_args = tf.split(1, Env.ARG_MAX_NUM, self.out_args) f_args_loss = [tf.nn.sparse_softmax_cross_entropy_with_logits(f_args_logits[i], _out_args[i], name="f_args_loss_%d" % i) for i in xrange(Env.ARG_MAX_NUM)] total_loss = f_prog_loss + f_end_loss + tf.add_n(f_args_loss) + l2_loss # optimizer self.train_opt = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(total_loss, global_step=self.global_step) # summary summaries = [ tf.scalar_summary("out/l2_loss", l2_loss), tf.scalar_summary("out/f_end_loss", f_end_loss), tf.scalar_summary("out/f_prog_loss", f_prog_loss), tf.scalar_summary("out/f_args_loss", f_args_loss), tf.scalar_summary("out/total_loss", total_loss), ] self.summary_op = tf.merge_summary(summaries) self.summary_writer = tf.train.SummaryWriter(model_dir) # init saver self.saver = tf.train.Saver() self.sess.run(tf.initialize_all_variables()) restore_model(self.sess, model_dir, self.saver) # reset lstm state self.reset_lstm_state = lambda: np.zeros((batch_size, 2 * num_units), dtype=np.float32) self.lstm_state_init = self.reset_lstm_state()
# What timestep we want to stop at early_stop = tf.placeholder(tf.int32) initializer = tf.random_uniform_initializer(-1, 1) # Inputs for rnn needs to be a list, each item being a timestep. # we need to split our input into each timestep, and reshape it because # split keeps dims by default inputs = [ tf.reshape(i, (batch_size, input_dim)) for i in tf.split(0, n_steps, seq_input) ] with tf.device("/cpu:0"): cell1 = LSTMCell(hidden_dim, input_dim, initializer=initializer) initial_state1 = cell1.zero_state(batch_size, tf.float32) outputs1, states1 = rnn.rnn(cell1, inputs, initial_state=initial_state1, sequence_length=early_stop, scope="RNN1") with tf.device("/cpu:0"): cell2 = LSTMCell(output_dim, hidden_dim, initializer=initializer) initial_state2 = cell2.zero_state(batch_size, tf.float32) outputs2, states2 = rnn.rnn(cell2, outputs1, initial_state=initial_state2, sequence_length=early_stop, scope="RNN2")
z_size = 10 # QSampler output size T = 10 # MNIST generation sequence length batch_size = 100 # training minibatch size train_iters = 10000 learning_rate = 1e-3 # learning rate for optimizer eps = 1e-8 # epsilon for numerical stability ## BUILD MODEL ## DO_SHARE = None # workaround for variable_scope(reuse=True) x = tf.placeholder(tf.float32, shape=(batch_size, img_size)) # input (batch_size * img_size) e = tf.random_normal((batch_size, z_size), mean=0, stddev=1) # Qsampler noise lstm_enc = LSTMCell(enc_size, read_size + dec_size) # encoder Op lstm_dec = LSTMCell(dec_size, z_size) # decoder Op def linear(x, output_dim): """ affine transformation Wx+b assumes x.shape = (batch_size, num_features) """ w = tf.get_variable("w", [x.get_shape()[1], output_dim]) b = tf.get_variable("b", [output_dim], initializer=tf.constant_initializer(0.0)) return tf.matmul(x, w) + b def filterbank(gx, gy, sigma2, delta, N):
z_size = 10 # QSampler output size T = 10 # MNIST generation sequence length batch_size = train_data._num_examples # training minibatch size train_iters = 10000 learning_rate = 1e-3 # learning rate for optimizer eps = 1e-8 # epsilon for numerical stability ## BUILD MODEL ## DO_SHARE = None # workaround for variable_scope(reuse=True) x = tf.placeholder(tf.float32, shape=(batch_size, img_size)) # input (batch_size * img_size) e = tf.random_normal((batch_size, z_size), mean=0, stddev=1) # Qsampler noise lstm_enc = LSTMCell(enc_size, (rs / 4) * (rs / 4) * 5 + dec_size) # encoder Op lstm_dec = LSTMCell(dec_size, z_size) # decoder Op phase_train = tf.placeholder(tf.bool, name='phase_train') def linear(x, output_dim): """ affine transformation Wx+b assumes x.shape = (batch_size, num_features) """ w = tf.get_variable("w", [x.get_shape()[1], output_dim]) b = tf.get_variable("b", [output_dim], initializer=tf.constant_initializer(0.0)) return tf.matmul(x, w) + b
def __init__(self, args): self.size = args.rnn_size self.n_steps = args.n_steps self.batch_size = args.batch_size self.input_dim = args.input_dim self.num_layers = args.num_layers initializer = tf.random_uniform_initializer(-0.8, 0.8) # initializer = tf.zeros_initializer((size*2,1), dtype=tf.float32) self.seq_input = tf.placeholder( tf.float32, [self.n_steps, self.batch_size, self.input_dim]) # sequence we will provide at runtime self.early_stop = tf.placeholder(tf.int32) # what timestep we want to stop at self.inputs = [ tf.reshape(i, (self.batch_size, self.input_dim)) for i in tf.split(0, self.n_steps, self.seq_input) ] # inputs for rnn needs to be a list, each item being a timestep. # we need to split our input into each timestep, and reshape it because split keeps dims by default # result = tf.placeholder(tf.float32, [n_steps, batch_size, seq_width]) self.result = tf.placeholder(tf.float32, [None, self.input_dim]) if args.cell_type == "srnn": cell = BasicRNNCell( self.size) #, seq_width, initializer=initializer) elif args.cell_type == "lstm": cell = BasicLSTMCell(self.size, forget_bias=1.0) elif args.cell_type == "lstmp": cell = LSTMCell(self.size, self.input_dim, initializer=initializer) elif args.cell_type == "cw": cell = CWRNNCell( self.size, [1, 4, 16, 64]) #, seq_width, initializer=initializer) self.cell = cell = rnn_cell.MultiRNNCell([cell] * self.num_layers) # initial_state = cell.zero_state(batch_size, tf.float32) self.initial_state = tf.random_uniform( [self.batch_size, self.cell.state_size], -0.1, 0.1) # self variables: scope RNN -> BasicRNNCell -> get_variable("Matrix", "Bias") # network type if args.rnn_type == "rnn": self.outputs, self.states = rnn.rnn( self.cell, self.inputs, initial_state=self.initial_state, sequence_length=self.early_stop) elif args.rnn_type == "seq2seq": self.outputs, self.states = seq2seq.rnn_decoder( self.inputs, self.initial_state, self.cell, loop_function=loop if False else None) # set up lstm self.final_state = self.states[-1] self.W_o = tf.Variable(tf.random_normal([self.size, 1], stddev=0.01)) self.b_o = tf.Variable(tf.random_normal([1], stddev=0.01)) print "type(outputs)", type(self.outputs) self.output_cat = tf.reshape(tf.concat(1, self.outputs), [-1, self.size]) self.output = tf.nn.xw_plus_b(self.output_cat, self.W_o, self.b_o) # self.final_state = states[-1] self.output2 = tf.reshape( self.output, [self.batch_size, self.n_steps, self.input_dim]) self.output2 = self.output2 + tf.random_normal( [self.batch_size, self.n_steps, self.input_dim], stddev=0.05) # then transpose self.output2 = tf.transpose(self.output2, [1, 0, 2])