def _tech_net(self, data, trainable=True): with tf.variable_scope("Tech_net"): # if self.use_norm == True: # data = layer.batch_norm(data, self.is_training, 'BN') d1 = layer.dense_layer(data, 256, "DenseLayer1", is_training=self.is_training, trainable=trainable, norm=self.use_norm) d2 = layer.dense_layer(d1, 128, "DenseLayer2", is_training=self.is_training, trainable=trainable, norm=self.use_norm) dout = layer.dense_layer(d2, 1, "DenseLayerOut", func=None, is_training=self.is_training, trainable=trainable, norm=self.use_norm) scope = tf.get_variable_scope().name return dout, scope
def build_graph(self, obs_ph, acs_ph, reuse=False): with tf.variable_scope(self.scope): if reuse: tf.get_variable_scope().reuse_variables() data = tf.concat([obs_ph, acs_ph], axis=1) # concatenate the two input -> form a transition '''p_h1 = tf.contrib.layers.fully_connected(_input, self.hidden_size, activation_fn=tf.nn.relu) p_h2 = tf.contrib.layers.fully_connected(p_h1, self.hidden_size, activation_fn=tf.nn.relu) logits = tf.contrib.layers.fully_connected(p_h2, 1, activation_fn=tf.identity)''' if self.use_norm: data = layer.batch_norm(data, self.pop_batch_norm, 'BN') d1 = layer.dense_layer(data, 128, "DenseLayer1", is_training=self.pop_batch_norm, trainable=True, norm=self.use_norm) d2 = layer.dense_layer(d1, 32, "DenseLayer2", is_training=self.pop_batch_norm, trainable=True, norm=self.use_norm) dout = layer.dense_layer(d2, 1, "DenseLayer3", func=None, is_training=self.pop_batch_norm, trainable=True, norm=None) return dout
def __init__(self, name: str, sess, ob_space, act_space_array, activation=tf.nn.relu): """ :param name: string """ self.sess = sess with tf.variable_scope(name): self.obs = tf.placeholder(dtype=tf.float32, shape=[None, ob_space], name='obs') with tf.variable_scope('policy_net'): with tf.variable_scope('controller'): layer_1 = layer.dense_layer(self.obs, 64, "DenseLayer1", func=activation) layer_2 = layer.dense_layer(layer_1, 64, "DenseLayer2", func=activation) # act_space_array = wait, build worker, build pylon, self.tech_probs = layer.dense_layer(layer_2, act_space_array, "tech_output", func=tf.nn.softmax) self.tech_act = tf.multinomial(tf.log(self.tech_probs), num_samples=1) self.tech_act = tf.reshape(self.tech_act, shape=[-1]) with tf.variable_scope('value_net'): layer_1 = layer.dense_layer(self.obs, 64, "DenseLayer1", func=activation) layer_2 = layer.dense_layer(layer_1, 64, "DenseLayer2", func=activation) self.v_preds = layer.dense_layer(layer_2, 1, "DenseLayer4", func=None) # self.act_stochastic = tf.multinomial(tf.log(self.act_probs), num_samples=1) # self.act_stochastic = tf.reshape(self.act_stochastic, shape=[-1]) # self.act_deterministic = tf.argmax(self.act_probs, axis=1) self.scope = tf.get_variable_scope().name
def __init__(self, name: str, sess, ob_space, add_ob_space, act_space_array, add_act_space, freeze_head=True, activation=tf.nn.relu): """ :param name: string """ self.sess = sess self.add_weight = 0.2 self.map_width = 32 if P.use_small_map else 64 self.map_channel = 10 with tf.variable_scope(name): self.obs = tf.placeholder(dtype=tf.float32, shape=[None, ob_space], name='obs') self.obs_add = tf.placeholder(dtype=tf.float32, shape=[None, add_ob_space], name='obs_add') self.obs_map = tf.placeholder(dtype=tf.float32, shape=[None, self.map_width, self.map_width, self.map_channel], name='obs_map') with tf.variable_scope('policy_net'): with tf.variable_scope('controller'): layer_1 = layer.dense_layer(self.obs, 64, "DenseLayer1", func=activation, trainable=not freeze_head) self.layer_2 = layer.dense_layer(layer_1, 64, "DenseLayer2", func=activation, trainable=not freeze_head) # attention #layer_3 = layer.dense_layer(self.obs_add, 64, "DenseLayer3", func=tf.nn.sigmoid) #layer_2 = layer_2 * layer_3 # add obs self.layer_3 = layer.dense_layer(self.obs_add, 64, "DenseLayer3", func=activation, initial_type='original') # map obs self.layer_4, self.map_variable_scope = ops.simple_resnet(self.obs_map, 18, 64, "Resnet") # weighted sum self.layer_5 = (1. - self.add_weight) * self.layer_2 + self.add_weight / 2. * self.layer_3 + self.add_weight / 2. * self.layer_4 self.tech_probs = layer.output_layer(self.layer_5, act_space_array, add_act_space, "tech_output", func=tf.nn.softmax) self.tech_act = tf.multinomial(tf.log(self.tech_probs), num_samples=1) self.tech_act = tf.reshape(self.tech_act, shape=[-1]) with tf.variable_scope('value_net'): layer_1 = layer.dense_layer(self.obs, 64, "DenseLayer1", func=activation, trainable=not freeze_head) layer_2 = layer.dense_layer(layer_1, 64, "DenseLayer2", func=activation, trainable=not freeze_head) #layer_3 = layer.dense_layer(self.obs_add, 32, "DenseLayer3", func=activation) #layer_4 = tf.concat([layer_2, layer_3], 1) self.v_preds = layer.dense_layer(layer_2, 1, "DenseLayer4", func=None) #self.v_preds = layer.dense_layer(layer_2, 1, "DenseLayer4_add", func=None) self.scope = tf.get_variable_scope().name
def sep_policy_value_net(self, name, hidden_units=64, resnet=None, activation=tf.nn.relu, freeze_head=False, initial_type='original', norm=True, is_training=True, reuse=False): with tf.variable_scope(name, reuse=reuse): with tf.variable_scope('policy_net'): with tf.variable_scope('controller'): print('freeze_head:', freeze_head) layer_1 = layer.dense_layer(self.obs, hidden_units, "DenseLayer1", norm=norm, is_training=is_training, func=activation, initial_type=initial_type, trainable=not freeze_head) self.layer_2 = layer.dense_layer(layer_1, hidden_units, "DenseLayer2", norm=False, is_training=is_training, func=activation, initial_type=initial_type, trainable=not freeze_head) if self.use_add_obs: self.layer_3 = layer.dense_layer( self.obs_add, hidden_units, "DenseLayer3", norm=False, is_training=is_training, func=activation, initial_type=initial_type) if self.add_image: self.layer_4, self.map_variable_scope = resnet( self.obs_map, 18, hidden_units, "Resnet", is_training=is_training) else: print('not add image') self.layer_4, self.map_variable_scope = self.layer_3, [] #self.layer_4, self.map_variable_scope = ops.simple_resnet_changed(self.obs_map, 18, hidden_units, "Resnet", is_training=is_training) #self.layer_4, self.map_variable_scope = ops.resnet(self.obs_map, 18, hidden_units, "Resnet", is_training=is_training, reuse=reuse) if self.weighted_sum_type == 'AttentionWeight': raise NotImplementedError elif self.weighted_sum_type == 'AdaptiveWeight': raise NotImplementedError elif self.weighted_sum_type == 'AddWeight': # weighted sum self.layer_5 = (1. - self.add_weight) * self.layer_2 + self.add_weight / 2. * self.layer_3 + \ self.add_weight / 2. * self.layer_4 elif self.weighted_sum_type == 'Add': print('self.weighted_sum_type:', self.weighted_sum_type) self.layer_5 = self.layer_2 + self.layer_3 + self.layer_4 else: raise NotImplementedError else: self.layer_5 = self.layer_2 probs = layer.output_layer(self.layer_5, self.act_space_array, self.add_act_space, "output", is_training=is_training, initial_type=initial_type, func=tf.nn.softmax) act = tf.multinomial(tf.log(probs), num_samples=1) act = tf.reshape(act, shape=[-1]) with tf.variable_scope('value_net'): layer_1 = layer.dense_layer(self.obs, hidden_units, "DenseLayer1", norm=norm, is_training=is_training, func=activation, initial_type=initial_type, trainable=not freeze_head) layer_2 = layer.dense_layer(layer_1, hidden_units, "DenseLayer2", norm=norm, is_training=is_training, func=activation, initial_type=initial_type, trainable=not freeze_head) v_preds = layer.dense_layer(layer_2, 1, "DenseLayer4", initial_type=initial_type, is_training=is_training, func=None) self.scope = tf.get_variable_scope().name return probs, act, v_preds
def sep_policy_value_net(self, name, hidden_units=64, activation=tf.nn.relu, freeze_head=False, initial_type='original', norm=True, is_training=True, reuse=False): with tf.variable_scope(name, reuse=reuse): with tf.variable_scope('policy_net'): with tf.variable_scope('controller'): layer_1 = layer.dense_layer(self.obs, hidden_units, "DenseLayer1", norm=norm, is_training=is_training, func=activation, initial_type=initial_type, trainable=not freeze_head) self.layer_2 = layer.dense_layer(layer_1, hidden_units, "DenseLayer2", norm=norm, is_training=is_training, func=activation, initial_type=initial_type, trainable=not freeze_head) # weighted sum if self.use_add_obs: self.layer_3 = layer.dense_layer( self.obs_add, hidden_units, "DenseLayer3", norm=norm, is_training=is_training, func=activation, initial_type=initial_type) if self.weight_type == 'AttentionWeight': self.attention_weight = layer.dense_layer( self.layer_3, hidden_units, "AttentionWeight", initial_type='original', func=tf.nn.sigmoid) #self.attention_weight = tf.nn.sigmoid(self.layer_3) self.layer_4 = self.layer_2 * self.attention_weight elif self.weight_type == 'AdaptiveWeight': self.adaptive_weight = tf.get_variable( name="AdaptiveWeight", shape=[hidden_units], initializer=tf.zeros_initializer(), trainable=True) self.layer_4 = ( 1 - self.adaptive_weight ) * self.layer_2 + self.adaptive_weight * self.layer_3 elif self.weight_type == 'AddWeight': self.layer_4 = ( 1. - self.add_weight ) * self.layer_2 + self.add_weight * self.layer_3 else: self.layer_4 = self.layer_2 + self.layer_3 else: self.layer_4 = self.layer_2 #self.train_probs = layer.dense_layer(layer_2, act_space_array, "output", func=tf.nn.softmax) probs = layer.output_layer(self.layer_4, self.act_space_array, self.add_act_space, "output", is_training=is_training, initial_type=initial_type, func=tf.nn.softmax) act = tf.multinomial(tf.log(probs), num_samples=1) act = tf.reshape(act, shape=[-1]) with tf.variable_scope('value_net'): layer_1 = layer.dense_layer(self.obs, hidden_units, "DenseLayer1", norm=norm, is_training=is_training, func=activation, initial_type=initial_type, trainable=not freeze_head) layer_2 = layer.dense_layer(layer_1, hidden_units, "DenseLayer2", norm=norm, is_training=is_training, func=activation, initial_type=initial_type, trainable=not freeze_head) v_preds = layer.dense_layer(layer_2, 1, "DenseLayer4", initial_type=initial_type, is_training=is_training, func=None) self.scope = tf.get_variable_scope().name return probs, act, v_preds