def get_last_features(self, x, reuse): x_has_timesteps = (x.get_shape().ndims == 5) if x_has_timesteps: sh = tf.shape(x) x = flatten_two_dims(x) #with tf.variable_scope(self.scope + "_features", reuse=reuse): with tf.variable_scope(self.scope+"_features", reuse=reuse): x = (tf.to_float(x) - self.ob_mean) / self.ob_std x = small_convnet(x, nl=self.nl, feat_dim=self.feat_dim, last_nl=None, layernormalize=self.layernormalize) if x_has_timesteps: x = unflatten_first_dim(x, sh) x = tf.reshape(x, [-1, sh[1], self.feat_dim]) with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE): init_1 = tf.contrib.rnn.LSTMStateTuple(self.last_c_in_1, self.last_h_in_1) if self.lstm2_size: init_2 = tf.contrib.rnn.LSTMStateTuple(self.last_c_in_2, self.last_h_in_2) if self.aux_input: prev_rews = tf.expand_dims(self.ph_last_rew, -1) x = tf.concat([x, prev_rews], -1) x, c_out_1, h_out_1 = lstm(self.lstm1_size)(x, initial_state=init_1) if self.lstm2_size: if self.aux_input: prev_acs = tf.one_hot(self.ph_last_ac, depth=self.num_actions) x = tf.concat([x, tf.cast(prev_acs, tf.float32)], -1) x = tf.concat([x, self.ph_last_vel], -1) x, c_out_2, h_out_2 = lstm(self.lstm2_size)(x, initial_state=init_2) return x
def __init__(self, sess, state_dim, n_actions, n_steps, n_lstm=256, reuse=False): self.obs_in = tf.placeholder(dtype=tf.float32, shape=[None, state_dim], name='obs_in') # observations self.D = tf.placeholder(dtype=tf.float32, shape=[None], name='dones') # dones self.LS = tf.placeholder(dtype=tf.float32, shape=[None, n_lstm*2], name='lstm_s') # cell and hidden states with tf.variable_scope("model", reuse=reuse): h1 = tf.layers.dense(self.obs_in, units=20, activation=tf.nn.relu) h2 = tf.layers.dense(h1, units=20, activation=tf.nn.relu) # LSTM cell h3, s_new = lstm(h2, self.D, self.LS, scope='lstm', n_lstm=n_lstm) self.ap_out = tf.layers.dense(h3, units=n_actions, activation=tf.nn.softmax) self.vf_out = tf.layers.dense(h3, units=1, activation=None) # The output of the NN are non-normalized action probabilities. They are converted to a probabiltiy # distribution from which normalized probabilities can be sampled. # self.aps = tf.squeeze(tf.nn.softmax(self.ap_out)) # a0 = np.random.choice(np.arange(n_actions), p=self.ap_out) v0 = self.vf_out[:, 0] # picked_action_prob = tf.gather(self.ap_out, a0) # a0 are the labels for the cross entropy computation self.initial_states = [np.zeros(shape=n_lstm*2, dtype=np.float32)] def step(obs, dones, lstm_states): return sess.run([self.ap_out, v0, s_new], {self.obs_in: obs, self.D: dones, self.LS: lstm_states}) # return sess.run([a0, self.ap_out, v0, s_new, picked_action_prob], {self.obs_in: obs, self.D: dones, self.LS: lstm_states}) def value(obs, dones, lstm_states): return sess.run(v0, {self.obs_in: obs, self.D: dones, self.LS: lstm_states}) # return sess.run([self.vf_out], {self.obs_in: obs, self.D: dones, self.LS: lstm_states}) self.step = step self.value = value
def __init__(self, sess, state_dim, n_actions, n_steps, n_lstm=256, reuse=False): self.obs_in = tf.placeholder(dtype=tf.float32, shape=[None, state_dim], name='obs_in') # observations self.D = tf.placeholder(dtype=tf.float32, shape=[None], name='dones') # dones self.LS = tf.placeholder(dtype=tf.float32, shape=[None, n_lstm*2], name='lstm_s') # cell and hidden states with tf.variable_scope("model", reuse=reuse): h1 = tf.layers.dense(self.obs_in, units=20, activation=tf.nn.relu) h2 = tf.layers.dense(h1, units=20, activation=tf.nn.relu) # LSTM cell h3, s_new = lstm(h2, self.D, self.LS, scope='lstm', n_lstm=n_lstm) self.ap_out = tf.layers.dense(h3, units=n_actions, activation=None) self.vf_out = tf.layers.dense(h3, units=1, activation=None) # The output of the NN are non-normalized action probabilities. They are converted to a probabiltiy # distribution from which normalized probabilities can be sampled. self.pd = CategoricalPd(self.ap_out) # Init the distribution with output values of NN a0 = self.pd.sample() # sample probabilities for each action from probability distribution which adds small unifrom noise to the prob distribution derived from NN output (a0=[n_actions]) v0 = self.vf_out[:, 0] neglogprob0 = self.pd.neglogprob(a0) # a0 are the labels for the cross entropy computation self.initial_states = [np.zeros(shape=n_lstm*2, dtype=np.float32)] def step(obs, dones, lstm_states): return sess.run([a0, self.ap_out, v0, s_new, neglogprob0], {self.obs_in: obs, self.D: dones, self.LS: lstm_states}) def value(obs, dones, lstm_states): return sess.run(v0, {self.obs_in: obs, self.D: dones, self.LS: lstm_states}) # return sess.run([self.vf_out], {self.obs_in: obs, self.D: dones, self.LS: lstm_states}) self.step = step self.value = value self.a0 = a0
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, nlstm=256, reuse=False): nenv = nbatch // nsteps self.pdtype = make_pdtype(ac_space) X, processed_x = observation_input(ob_space, nbatch) M = tf.placeholder(tf.float32, [nbatch]) #mask (done t-1) S = tf.placeholder(tf.float32, [nenv, nlstm*2]) #states with tf.variable_scope("model", reuse=reuse): h = nature_cnn(X) xs = batch_to_seq(h, nenv, nsteps) ms = batch_to_seq(M, nenv, nsteps) h5, snew = lstm(xs, ms, S, 'lstm1', nh=nlstm) h5 = seq_to_batch(h5) vf = fc(h5, 'v', 1) self.pd, self.pi = self.pdtype.pdfromlatent(h5) v0 = vf[:, 0] a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = np.zeros((nenv, nlstm*2), dtype=np.float32) def step(ob, state, mask): return sess.run([a0, v0, snew, neglogp0], {X:ob, S:state, M:mask}) def value(ob, state, mask): return sess.run(v0, {X:ob, S:state, M:mask}) self.X = X self.M = M self.S = S self.vf = vf self.step = step self.value = value
def body(i, sent, prev_c, prev_h, hidden_states): # only 1 time step # get embedding represetnation and infer through LSTM functino nextc, next_h = lstm(...) # trick for attention score = None if is_attention: # make use of enc_output next_h = next_h hidden_states = hidden_states.write(i, next_h) return i + 1, sent, next_c, next_h, hidden_states, score
def __init__(self, sess, ob_space, ac_space, nbatch, nsteps, nlstm=256, reuse=False): nenv = nbatch // nsteps nh, nw, nc = ob_space.shape ob_shape = (nbatch, nh, nw, nc) nact = ac_space.n X = tf.placeholder(tf.uint8, ob_shape) # obs M = tf.placeholder(tf.float32, [nbatch]) # mask (done t-1) S = tf.placeholder(tf.float32, [nenv, nlstm * 2]) # states with tf.variable_scope("model", reuse=reuse): h = conv(tf.cast(X, tf.float32) / 255., 'c1', nf=32, rf=8, stride=4, init_scale=np.sqrt(2)) h2 = conv(h, 'c2', nf=64, rf=4, stride=2, init_scale=np.sqrt(2)) h3 = conv(h2, 'c3', nf=64, rf=3, stride=1, init_scale=np.sqrt(2)) h3 = conv_to_fc(h3) h4 = fc(h3, 'fc1', nh=512, init_scale=np.sqrt(2)) xs = batch_to_seq(h4, nenv, nsteps) ms = batch_to_seq(M, nenv, nsteps) h5, snew = lstm(xs, ms, S, 'lstm1', nh=nlstm) h5 = seq_to_batch(h5) pi = fc(h5, 'pi', nact, act=lambda x: x) vf = fc(h5, 'v', 1, act=lambda x: x) self.pdtype = make_pdtype(ac_space) self.pd = self.pdtype.pdfromflat(pi) v0 = vf[:, 0] a0 = self.pd.sample() neglogp0 = self.pd.neglogp(a0) self.initial_state = np.zeros((nenv, nlstm * 2), dtype=np.float32) def step(ob, state, mask): return sess.run([a0, v0, snew, neglogp0], {X: ob, S: state, M: mask}) def value(ob, state, mask): return sess.run(v0, {X: ob, S: state, M: mask}) self.X = X self.M = M self.S = S self.pi = pi self.vf = vf self.step = step self.value = value
'input_dim':26, 'hidden_size': 100, 'learning_rate':0.05, 'scale': 1 } print "Begin to load data" reader = reader(conf) reader.get_data() # reader.padding() features = reader.features targets = reader.targets print "Load data complete" print "Begin to build Networks" network = lstm(conf) network.build_net() print "Build Networks complete" # print network.fit(features, targets) print network.test(features, targets, reader.testset,reader.testtar) # network.draw("model.png") # #####TEST##### # a = np.ones((20,27)) # b = np.arange(0,20).reshape(20,) # c = [] # for i in xrange(20):c.append(a[i]*b[i]) # c = np.array(c)
log("The index of 'the' is:", word_to_index["the"], logfile=logpath, is_verbose=is_verbose) log("The word of index 20 is:", index_to_word[20], logfile=logpath, is_verbose=is_verbose) # lstm = LSTM(batch_size, embedding_size, vocab_size, hidden_size, max_size) x = tf.placeholder(tf.int32, (batch_size, max_size - 1), name="x") label = tf.placeholder(tf.int32, (batch_size, max_size - 1), name="label") teacher_forcing = tf.placeholder(tf.bool, (), name="teacher_forcing") output, softmax_output = lstm(x, label, vocab_size, hidden_size, max_size, batch_size, embedding_size, teacher_forcing) with tf.Session() as sess: onehot = tf.argmax(softmax_output, 1) with tf.variable_scope("optimizer", reuse=tf.AUTO_REUSE): optimizer, loss = optimize(output, label, learning_rate) # perplexity = tf.pow(2, loss) tf.summary.scalar('loss', loss) """Now let's execute the graph in the session. We ge a data batch with `dataloader.get_batch(batch_size)`. This fetches a batch of word sequences. We then need to transform that into a batch of word index. We can achieve this with the helper function `word_to_index_transform(word_to_index, word_batch)` defined before.
# load data filename = 'pkl_data/' + str(sample_length) + '.pkl' x_train, y_train, x_val, y_val, x_test, y_test, val_SNRs, test_SNRs = utils.radioml_IQ_data( filename, mod_name, swap_dim=swap_dim) # callbacks early_stopping = EarlyStopping(monitor='val_loss', patience=patience) best_model_path = 'result/models/LSTM/' + str( sample_length) + '/' + str(mod_name) + 'best.h5' checkpointer = ModelCheckpoint(best_model_path, verbose=1, save_best_only=True) TB_dir = 'result/TB/' + str(mod_name) + '_' + str(sample_length) tensorboard = TensorBoard(TB_dir) model = utils.lstm(lr, input_dim) history = model.fit( x_train, y_train, epochs=max_epoch, batch_size=batch_size, verbose=1, shuffle=True, validation_data=(x_val, y_val), callbacks=[early_stopping, checkpointer, tensorboard]) print('Fisrt stage finished, loss is stable') pf_min = 6.0 pf_max = 7.9 pf_test = LambdaCallback(on_epoch_end=lambda epoch, logs: utils.get_pf(
def _init_actor_net(self, scope, trainable=True, is_inference=False): my_initializer = tf.contrib.layers.xavier_initializer() with tf.variable_scope(scope, reuse=tf.AUTO_REUSE): last_output_dims = 0 last_output = None last_hidden_state = None if USE_CNN: cnn_w_1 = tf.get_variable("cnn_w_1", [8, 8, C, 32], initializer=my_initializer) cnn_b_1 = tf.get_variable( "cnn_b_1", [32], initializer=tf.constant_initializer(0.0)) output1 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(self.s, cnn_w_1, strides=[1, 4, 4, 1], padding='SAME'), cnn_b_1)) cnn_w_2 = tf.get_variable("cnn_w_2", [4, 4, 32, 64], initializer=my_initializer) cnn_b_2 = tf.get_variable( "cnn_b_2", [64], initializer=tf.constant_initializer(0.0)) output2 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(output1, cnn_w_2, strides=[1, 2, 2, 1], padding='SAME'), cnn_b_2)) cnn_w_3 = tf.get_variable("cnn_w_3", [3, 3, 64, 64], initializer=my_initializer) cnn_b_3 = tf.get_variable( "cnn_b_3", [64], initializer=tf.constant_initializer(0.0)) output3 = tf.nn.relu( tf.nn.bias_add( tf.nn.conv2d(output2, cnn_w_3, strides=[1, 1, 1, 1], padding='SAME'), cnn_b_3)) last_output_dims = np.prod( [v.value for v in output3.get_shape()[1:]]) last_output = tf.reshape(output3, [-1, last_output_dims]) else: flat_output_size = INPUT_DIMENS_FLAT flat_output = tf.reshape(self.s, [-1, flat_output_size], name='flat_output') fc_W_1 = tf.get_variable( shape=[flat_output_size, self.layer_size], name='fc_W_1', trainable=trainable, initializer=my_initializer) fc_b_1 = tf.get_variable(shape=[1], name='fc_b_1', trainable=trainable) output1 = tf.nn.relu(tf.matmul(flat_output, fc_W_1) + fc_b_1) fc_W_2 = tf.get_variable( shape=[self.layer_size, self.layer_size], name='fc_W_2', trainable=trainable, initializer=my_initializer) fc_b_2 = tf.get_variable(shape=[1], name='fc_b_2', trainable=trainable) output2 = tf.nn.relu(tf.matmul(output1, fc_W_2) + fc_b_2) fc_W_3 = tf.get_variable( shape=[self.layer_size, self.layer_size], name='fc_W_3', trainable=trainable, initializer=my_initializer) fc_b_3 = tf.get_variable(shape=[1], name='fc_b_3', trainable=trainable) output3 = tf.nn.relu(tf.matmul(output2, fc_W_3) + fc_b_3) last_output_dims = self.layer_size last_output = output3 # Add lstm here, to convert last_output to lstm_output tf.summary.histogram("lstm_input", last_output) tf.summary.histogram("lstm_input_hidden_state", self.lstm_hidden) use_tensorflow_lstm = True if use_tensorflow_lstm: lstm_input = last_output use_keras = True if use_keras: lstm_layer = tf.keras.layers.LSTM( HIDDEN_STATE_LEN, return_state=True, return_sequences=True, kernel_initializer=my_initializer, recurrent_initializer=my_initializer) fold_time_step = TIME_STEP if is_inference: fold_time_step = 1 reshaped_lstm_input = tf.reshape( lstm_input, [-1, fold_time_step, last_output_dims]) reshaped_lstm_mask = tf.reshape(self.lstm_mask, [-1, fold_time_step]) reshaped_lstm_mask = 1 - reshaped_lstm_mask reshaped_lstm_mask = tf.cast(reshaped_lstm_mask, dtype=np.bool) c_old, h_old = tf.split(axis=1, num_or_size_splits=2, value=self.lstm_hidden) c_old = c_old[::fold_time_step, ...] h_old = h_old[::fold_time_step, ...] reshaped_output, last_h, last_c = lstm_layer( reshaped_lstm_input, #mask=reshaped_lstm_mask, initial_state=[h_old, c_old]) ''' layer_weights = lstm_layer.get_weights() for idx in range(len(layer_weights)): new_tensor = tf.convert_to_tensor(layer_weights[idx]) tf.summary.histogram("lstm_layer_{}".format(idx), new_tensor) ''' last_c = tf.reshape(last_c, [-1, HIDDEN_STATE_LEN]) last_h = tf.reshape(last_h, [-1, HIDDEN_STATE_LEN]) last_hidden_state = tf.concat(axis=1, values=[last_c, last_h]) last_output = tf.reshape(reshaped_output, [-1, HIDDEN_STATE_LEN]) else: lstm_cell = tf.nn.rnn_cell.LSTMCell(HIDDEN_STATE_LEN, name='lstm_cell', dynamic=True) c_old, h_old = tf.split(axis=1, num_or_size_splits=2, value=self.lstm_hidden) c_old, h_old = c_old[:, ::TIME_STEP], h_old[:, ::TIME_STEP] combined_hidden_states = tf.stack([c_old, h_old], axis=2) lstm_input = tf.reshape( lstm_input, (-1, TIME_STEP, *(lstm_input.get_shape()[1:]))) lstm_input = tf.unstack(lstm_input, axis=1) last_output, last_hidden_state = tf.nn.static_rnn( cell=lstm_cell, inputs=lstm_input, initial_state=combined_hidden_states) # last_output, last_hidden_state = lstm_cell(inputs=lstm_input, state=(c_old, h_old)) # last_output, last_hidden_state = tf.nn.dynamic_rnn(cell=lstm_cell, inputs=lstm_input, initial_state = combined_hidden_states) # last_output = last_output[0] else: lstm_input = last_output last_output, last_hidden_state = utils.lstm( lstm_input, self.is_inference, self.lstm_hidden, self.lstm_mask, 'lstm', HIDDEN_STATE_LEN, LSTM_CELL_COUNT, my_initializer) tf.summary.histogram("last_output", last_output) tf.summary.histogram("last_hidden_state", last_hidden_state) last_output_dims = HIDDEN_STATE_LEN a_logits_arr = [] a_prob_arr = [] #self.a_space_keys for k in self.a_space_keys: output_num = self.a_space[k] # actor network weight_layer_name = 'fc_W_{}'.format(k) bias_layer_name = 'fc_b_{}'.format(k) logit_layer_name = '{}_logits'.format(k) head_layer_name = '{}_head'.format(k) fc_W_a = tf.get_variable(shape=[last_output_dims, output_num], name=weight_layer_name, trainable=trainable, initializer=my_initializer) fc_b_a = tf.get_variable( shape=[1], name=bias_layer_name, trainable=trainable, initializer=tf.constant_initializer(0.0)) a_logits = tf.matmul(last_output, fc_W_a) + fc_b_a a_logits_arr.append(a_logits) a_prob = stable_softmax( a_logits, head_layer_name) #tf.nn.softmax(a_logits) a_prob_arr.append(a_prob) tf.summary.histogram("a_prob_{}".format(k), a_prob) # value network fc1_W_v = tf.get_variable(shape=[last_output_dims, 1], name='fc1_W_v', trainable=trainable, initializer=my_initializer) fc1_b_v = tf.get_variable(shape=[1], name='fc1_b_v', trainable=trainable, initializer=tf.constant_initializer(0.0)) value = tf.matmul(last_output, fc1_W_v) + fc1_b_v value = tf.reshape(value, [ -1, ], name="value_output") tf.summary.histogram("value", value) summary_merged = tf.summary.merge_all() return a_prob_arr, a_logits_arr, value, last_hidden_state, summary_merged