def create_critic_network(self): ep_length = self.episode_length batch_state_x = Input(batch_shape=[None, ep_length, self.s_dim]) batch_action_x = Input(batch_shape=[None, ep_length, self.a_dim]) # state branch state_net = TimeDistributed(Dense(400))(batch_state_x) state_net = BatchNormalization()(state_net) state_net = Activation('relu')(state_net) # action branch action_net = TimeDistributed(Dense(400))(batch_action_x) action_net = BatchNormalization()(action_net) action_net = Activation('relu')(action_net) # merge branches t1_layer = TimeDistributed(Dense(400)) t1_layer_out = t1_layer(state_net) t2_layer = TimeDistributed(Dense(400)) t2_layer_out = t2_layer(action_net) state_net_reshaped = tf.reshape(state_net, shape=[-1, 400]) action_net_reshaped = tf.reshape(action_net, shape=[-1, 400]) merged_net = tf.matmul(state_net_reshaped, t1_layer.get_weights()[0]) + tf.matmul(action_net_reshaped, t2_layer.get_weights()[0]) \ + t1_layer.get_weights()[1] + t2_layer.get_weights()[1] merged_net = Activation('relu')(merged_net) merged_net = tf.reshape(merged_net, shape=[-1, ep_length, 400]) # lstm cell rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.lstm_num_cells, state_is_tuple=True) val, state = tf.nn.dynamic_rnn(rnn_cell, merged_net, dtype=tf.float32) lstm_outputs = val # final dense layer w_init = RandomUniform(minval=-0.003, maxval=0.003) last_layer = Dense(1) batch_y = last_layer(lstm_outputs) return batch_state_x, batch_action_x, batch_y
def create_world_modeler_network(self): ep_length = self.episode_length batchStateX = Input(batch_shape=[None, ep_length, self.s_dim]) batchActionX = Input(batch_shape=[None, ep_length, self.a_dim]) # state branch state_net = TimeDistributed(Dense(400, activation='relu'))(batchStateX) # action branch action_net = TimeDistributed(Dense(400, activation='relu'))(batchActionX) # merge branches t1_layer = TimeDistributed(Dense(400)) t1_layer_out = t1_layer(state_net) t2_layer = TimeDistributed(Dense(400)) t2_layer_out = t2_layer(action_net) state_net_reshaped = tf.reshape(state_net, shape=[-1, 400]) action_net_reshaped = tf.reshape(action_net, shape=[-1, 400]) merged_net = tf.matmul(state_net_reshaped, t1_layer.get_weights()[0]) + tf.matmul(action_net_reshaped, t2_layer.get_weights()[0])\ + t1_layer.get_weights()[1] + t2_layer.get_weights()[1] merged_net = Activation('relu')(merged_net) merged_net = tf.reshape(merged_net, shape=[-1, ep_length, 400]) # lstm cell rnn_cell = tf.nn.rnn_cell.LSTMCell(num_units=self.lstm_num_cells, state_is_tuple=True) val, state = tf.nn.dynamic_rnn(rnn_cell, merged_net, dtype=tf.float32) lstm_outputs = val # final dense layer w_init = RandomUniform(minval=-0.005, maxval=0.005) last_layer = Dense(self.s_dim) batchStateY = last_layer(lstm_outputs) batchStateY_scaled_out = tf.multiply(batchStateY, self.state_bound) return batchStateX, batchActionX, batchStateY, batchStateY_scaled_out