def _build_net_critic(self, net_name): norm_s_tf = self.s_norm.normalize_tf(self.s_tf) input_tfs = [norm_s_tf] if (self.has_goal()): norm_g_tf = self.g_norm.normalize_tf(self.g_tf) input_tfs += [norm_g_tf] #####################[ if Settings.mode() == Mode.CWT_CNN_v1 or Settings.mode( ) == Mode.CWT_CNN_v2: cnn_network = NetBuilder.build_net( MyCNN.NAME, self.wt_tf, self.my_memory_buffer.channel_count) input_tfs += [cnn_network] #####################] h = NetBuilder.build_net(net_name, input_tfs) norm_val_tf = tf.layers.dense( inputs=h, units=1, activation=None, kernel_initializer=TFUtil.xavier_initializer) norm_val_tf = tf.reshape(norm_val_tf, [-1]) val_tf = self.val_norm.unnormalize_tf(norm_val_tf) return val_tf
def _build_net_actor(self, net_name, init_output_scale): norm_s_tf = self.s_norm.normalize_tf(self.s_tf) input_tfs = [norm_s_tf] if (self.has_goal()): norm_g_tf = self.g_norm.normalize_tf(self.g_tf) input_tfs += [norm_g_tf] #####################[ if Settings.mode() == Mode.CWT_CNN_v1 or Settings.mode( ) == Mode.CWT_CNN_v2: cnn_network = NetBuilder.build_net( MyCNN.NAME, self.wt_tf, self.my_memory_buffer.channel_count) input_tfs += [cnn_network] #####################] h = NetBuilder.build_net(net_name, input_tfs) norm_a_tf = tf.layers.dense( inputs=h, units=self.get_action_size(), activation=None, kernel_initializer=tf.random_uniform_initializer( minval=-init_output_scale, maxval=init_output_scale)) a_tf = self.a_norm.unnormalize_tf(norm_a_tf) return a_tf
def _build_net_critic(self, net_name, input_tfs, reuse=False): out_size = 1 with tf.variable_scope('critic', reuse=reuse): h = NetBuilder.build_net(net_name, input_tfs, reuse) val_tf = tf.layers.dense(inputs=h, units=out_size, activation=None, kernel_initializer=tf.contrib.layers.xavier_initializer(), reuse=reuse) val_tf = tf.squeeze(val_tf, axis=-1) return val_tf
def _build_net_actor(self, net_name, init_output_scale): norm_s_tf = self.s_norm.normalize_tf(self.s_tf) input_tfs = [norm_s_tf] if (self.has_goal()): norm_g_tf = self.g_norm.normalize_tf(self.g_tf) input_tfs += [norm_g_tf] h = NetBuilder.build_net(net_name, input_tfs) norm_a_tf = tf.layers.dense(inputs=h, units=self.get_action_size(), activation=None, kernel_initializer=tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale)) a_tf = self.a_norm.unnormalize_tf(norm_a_tf) return a_tf
def _build_net_critic(self, net_name): norm_s_tf = self.s_norm.normalize_tf(self.s_tf) input_tfs = [norm_s_tf] if (self.has_goal()): norm_g_tf = self.g_norm.normalize_tf(self.g_tf) input_tfs += [norm_g_tf] h = NetBuilder.build_net(net_name, input_tfs) norm_val_tf = tf.layers.dense(inputs=h, units=1, activation=None, kernel_initializer=TFUtil.xavier_initializer); norm_val_tf = tf.reshape(norm_val_tf, [-1]) val_tf = self.val_norm.unnormalize_tf(norm_val_tf) return val_tf
def _build_disc_net(self, net_name, input_tfs, init_output_scale, reuse=False): out_size = 1 h = net_builder.build_net(net_name, input_tfs, reuse) logits_tf = tf.layers.dense( inputs=h, units=out_size, activation=None, reuse=reuse, kernel_initializer=tf.random_uniform_initializer( minval=-init_output_scale, maxval=init_output_scale), name=self.DISC_LOGIT_NAME) return logits_tf
def _build_net_critic(self, net_name): with tf.device('cpu:0'): norm_s_tf = self.s_norm.normalize_tf(self.s_tf) input_tfs = [norm_s_tf] if (self.has_goal()): norm_g_tf = self.g_norm.normalize_tf(self.g_tf) input_tfs += [norm_g_tf] h = NetBuilder.build_net(net_name, input_tfs) norm_val_tf = tf.layers.dense(inputs=h, units=1, activation=None, kernel_initializer='glorot_uniform') norm_val_tf = tf.reshape(norm_val_tf, [-1]) val_tf = self.val_norm.unnormalize_tf(norm_val_tf) return val_tf
def _build_net_actor(self, net_name, input_tfs, init_output_scale, reuse=False): with tf.variable_scope('actor', reuse=reuse): h = NetBuilder.build_net(net_name, input_tfs, reuse) std_type = TFDistributionGaussianDiag.StdType.Default a_size = self.get_action_size() mean_kernel_init = tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale) mean_bias_init = tf.zeros_initializer() logstd_kernel_init = tf.random_uniform_initializer(minval=-init_output_scale, maxval=init_output_scale) logstd_bias_init = np.log(self.exp_params_curr.noise) * np.ones(a_size) logstd_bias_init = logstd_bias_init.astype(np.float32) norm_a_pd_tf = TFDistributionGaussianDiag(input=h, dim=a_size, std_type=std_type, mean_kernel_init=mean_kernel_init, mean_bias_init=mean_bias_init, logstd_kernel_init=logstd_kernel_init, logstd_bias_init=logstd_bias_init, reuse=reuse) return norm_a_pd_tf