def build_main_model(self): """ Builds neural network model to approximate policy and value functions """ if self.parameters['obs_type'] == 'image': self.observation = tf.placeholder(shape=[ None, self.parameters["frame_height"], self.parameters["frame_width"], self.parameters["num_frames"] ], dtype=tf.float32, name='observation') else: self.observation = tf.placeholder( shape=[None, self.parameters['obs_size']], dtype=tf.float32, name='observation') # normalize input if self.parameters['env_type'] == 'atari': self.observation = tf.cast(self.observation, tf.float32) / 255. if self.convolutional: self.feature_vector = net.cnn(self.observation, self.parameters["num_conv_layers"], self.parameters["num_filters"], self.parameters["kernel_sizes"], self.parameters["strides"], tf.nn.relu, False, 'cnn') network_input = c_layers.flatten(self.feature_vector) else: self.feature_vector = self.observation network_input = self.feature_vector if self.fully_connected: hidden = net.fcn(network_input, self.parameters["num_fc_layers"], self.parameters["num_fc_units"], tf.nn.relu, 'fcn') if self.recurrent: self.prev_action = tf.placeholder(shape=[None], dtype=tf.int32, name='prev_action') self.prev_action_onehot = c_layers.one_hot_encoding( self.prev_action, self.act_size) # network_input = tf.concat([network_input, self.prev_action_onehot], axis=1) c_in = tf.placeholder(tf.float32, [None, self.parameters['num_rec_units']], name='c_state') h_in = tf.placeholder(tf.float32, [None, self.parameters['num_rec_units']], name='h_state') self.seq_len = tf.placeholder(shape=None, dtype=tf.int32, name='sequence_length') self.state_in = tf.contrib.rnn.LSTMStateTuple(c_in, h_in) hidden, self.state_out = net.rnn(network_input, self.state_in, self.parameters['num_rec_units'], self.seq_len, 'rnn') self.hidden = hidden