def _build_layers_v2(self, input_dict, num_outputs, options): # Hard deprecate this class. All Models should use the ModelV2 # API from here on. deprecation_warning( "Model->VisionNetwork", "ModelV2->VisionNetwork", error=False) inputs = input_dict["obs"] filters = options.get("conv_filters") if not filters: filters = _get_filter_config(inputs.shape.as_list()[1:]) activation = get_activation_fn(options.get("conv_activation")) with tf.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): inputs = tf.layers.conv2d( inputs, out_size, kernel, stride, activation=activation, padding="same", name="conv{}".format(i)) out_size, kernel, stride = filters[-1] # skip final linear layer if options.get("no_final_linear"): fc_out = tf.layers.conv2d( inputs, num_outputs, kernel, stride, activation=activation, padding="valid", name="fc_out") return flatten(fc_out), flatten(fc_out) fc1 = tf.layers.conv2d( inputs, out_size, kernel, stride, activation=activation, padding="valid", name="fc1") fc2 = tf.layers.conv2d( fc1, num_outputs, [1, 1], activation=None, padding="same", name="fc2") return flatten(fc2), flatten(fc1)
def _build_layers_v2(self, input_dict, num_outputs, options): inputs = input_dict["obs"] hiddens = [32, 32] with tf.name_scope("custom_net"): inputs = slim.conv2d(inputs, 6, [3, 3], 1, activation_fn=tf.nn.relu, scope="conv") last_layer = flatten(inputs) i = 1 for size in hiddens: label = "fc{}".format(i) last_layer = slim.fully_connected( last_layer, size, weights_initializer=normc_initializer(1.0), activation_fn=tf.nn.relu, scope=label) i += 1 output = slim.fully_connected( last_layer, num_outputs, weights_initializer=normc_initializer(0.01), activation_fn=None, scope="fc_out") return output, last_layer
def forward(self, input_dict, state, seq_lens): obs = input_dict['obs']['board'] obs = tf.expand_dims(obs, -1) if self.is_conv else flatten(obs) action_mask = tf.maximum(tf.log(input_dict['obs']['action_mask']), tf.float32.min) model_out, self._value_out = self.base_model(obs) return action_mask + model_out, state
def _build_layers_v2(self, input_dict, num_outputs, options): inputs = input_dict["obs"] filters = options.get("conv_filters") if not filters: filters = _get_filter_config(inputs.shape.as_list()[1:]) activation = get_activation_fn(options.get("conv_activation")) with tf.name_scope("vision_net"): for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1): inputs = tf.layers.conv2d( inputs, out_size, kernel, stride, activation=activation, padding="same", name="conv{}".format(i)) out_size, kernel, stride = filters[-1] # skip final linear layer if options.get("no_final_linear"): fc_out = tf.layers.conv2d( inputs, num_outputs, kernel, stride, activation=activation, padding="valid", name="fc_out") return flatten(fc_out), flatten(fc_out) fc1 = tf.layers.conv2d( inputs, out_size, kernel, stride, activation=activation, padding="valid", name="fc1") fc2 = tf.layers.conv2d( fc1, num_outputs, [1, 1], activation=None, padding="same", name="fc2") return flatten(fc2), flatten(fc1)
def forward(self, input_dict, state, seq_lens): obs = input_dict['obs']['board'] if self.is_conv: paddings = [[0, 0], [0, 1], [0, 0]] obs = tf.expand_dims( tf.pad(obs, paddings, mode='CONSTANT', constant_values=3), -1) else: obs = flatten(obs) action_mask = tf.maximum(tf.log(input_dict['obs']['action_mask']), tf.float32.min) model_out, self._value_out = self.base_model(obs) return action_mask + model_out, state
def forward(self, input_dict, state, seq_lens): obs = flatten(input_dict['obs']['board']) action_mask = tf.maximum(tf.log(input_dict['obs']['action_mask']), tf.float32.min) model_out, _ = self.mlp({'obs': obs}) return action_mask + model_out, state
def __init__(self, obs_space, action_space, num_outputs, *args, **kwargs): super(RobotModel, self).__init__(obs_space, action_space, num_outputs, *args, **kwargs) obs_space = { "maps": Box((48, 48)), "robot_theta": Box((1, )), "robot_velocity": Box((3, )), "target": Box((2, )), "ckpts": Box((4, 2)), } maps_input = tf.keras.layers.Input(shape=obs_space["maps"].shape, dtype="float32", name="map") target_input = tf.keras.layers.Input(shape=obs_space["target"].shape, dtype="float32", name="target") robot_theta_input = tf.keras.layers.Input( shape=obs_space["robot_theta"].shape, dtype="float32", name="robot_theta") robot_velocity_input = tf.keras.layers.Input( shape=obs_space["robot_velocity"].shape, dtype="float32", name="robot_velocity") ckpt_input = tf.keras.layers.Input(shape=obs_space["ckpts"].shape, dtype="float32", name="ckpts") inputs = [ maps_input, target_input, robot_theta_input, robot_velocity_input, ckpt_input, ] x = (maps_input - MAP_MEAN) / MAP_STD x = tf.keras.backend.expand_dims(x, -1) # Convolutional block x = tf.keras.layers.Conv2D(16, (3, 3), strides=(1, 1), activation="relu", padding="same", name="conv1")(x) x = tf.keras.layers.BatchNormalization(momentum=0.999)(x) x = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x) x = tf.keras.layers.Conv2D(32, (3, 3), strides=(2, 2), activation="relu", padding="same", name="conv2")(x) x = tf.keras.layers.Conv2D(32, (3, 3), strides=(1, 1), activation="relu", padding="same", name="conv3")(x) x = tf.keras.layers.BatchNormalization(momentum=0.999)(x) x = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x) x = tf.keras.layers.Conv2D(32, (3, 3), strides=(2, 2), activation="relu", padding="same", name="conv4")(x) x = tf.keras.layers.BatchNormalization(momentum=0.999)(x) x = flatten(x) metrics = x # Concatenate all inputs together sensors = [ target_input / DIST_STD, robot_theta_input / DIST_STD, robot_velocity_input / DIST_STD, flatten(ckpt_input) / DIST_STD, ] x = tf.keras.layers.Concatenate(axis=-1, name="sensor_concat")(sensors + [x]) x = tf.keras.layers.Dense(num_outputs - 14)(x) x = tf.keras.layers.BatchNormalization(center=False, scale=False, momentum=0.999)(x) output_layer = tf.keras.layers.Concatenate( axis=-1, name="robot_concat")(sensors + [x]) self.base_model = tf.keras.Model(inputs, [output_layer, metrics]) self.register_variables(self.base_model.variables)