def _create_network_internal(self, observation_input=None, action_input=None): assert observation_input is not None and action_input is not None observation_input = self._process_layer(observation_input, scope_name="observation_input") action_input = self._process_layer(action_input, scope_name="action_input") with tf.variable_scope("conv_network"): observation_output, output_shape = conv_network( observation_input, self.input_shape, self.conv_filters, self.conv_filter_sizes, self.conv_strides, self.conv_pads, self.observation_hidden_sizes, self.hidden_nonlinearity, W_initializer=xavier_uniform_initializer(), b_initializer=tf.constant_initializer(0.), pre_nonlin_lambda=self._process_layer, ) output_dim = np.prod(output_shape[1:]) observation_output = tf.contrib.layers.flatten(observation_output, [-1, output_dim]) with tf.variable_scope("mlp"): observation_output = mlp( observation_output, output_dim, self.observation_hidden_sizes, self.hidden_nonlinearity, W_initializer=xavier_uniform_initializer(), b_initializer=tf.constant_initializer(0.), pre_nonlin_lambda=self._process_layer, ) embedded = tf.concat(1, [observation_output, action_input]) embedded_dim = self.action_dim + self.observation_hidden_sizes[-1] with tf.variable_scope("fusion_mlp"): fused_output = mlp( embedded, embedded_dim, self.embedded_hidden_sizes, self.hidden_nonlinearity, W_initializer=self.hidden_W_init, b_initializer=self.hidden_b_init, pre_nonlin_lambda=self._process_layer, ) fused_output = self._process_layer(fused_output) with tf.variable_scope("output"): return linear( observation_output, self.embedded_hidden_sizes[-1], 1, W_initializer=xavier_uniform_initializer(), b_initializer=tf.constant_initializer(0.), )
def _create_network_internal(self, observation_input=None): assert observation_input is not None observation_input = self._process_layer(observation_input, scope_name="observation_input") with tf.variable_scope("mlp"): observation_output = mlp( observation_input, self.observation_dim, self.observation_hidden_sizes, self.hidden_nonlinearity, W_initializer=self.hidden_W_init, b_initializer=self.hidden_b_init, pre_nonlin_lambda=self._process_layer, ) observation_output = self._process_layer( observation_output, scope_name="output_preactivations", ) with tf.variable_scope("output"): return self.output_nonlinearity( linear( observation_output, self.observation_hidden_sizes[-1], self.output_dim, W_initializer=self.output_W_init, b_initializer=self.output_b_init, ))
def _create_network_internal(self, feature_input=None, action_input=None): assert feature_input is not None and action_input is not None feature_input = self._process_layer(feature_input, scope_name="feature_input") action_input = self._process_layer(action_input, scope_name="action_input") with tf.variable_scope("mlp"): embedded = tf.concat(1, [feature_input, action_input]) embedded_dim = self.feature_dim + self.action_dim feature_output = mlp( embedded, embedded_dim, self.hidden_sizes, self.hidden_activation, # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape), b_initializer=tf.constant_initializer(0.), pre_nonlin_lambda=self._process_layer, ) with tf.variable_scope("output_linear"): return linear( feature_output, self.hidden_sizes[-1], self.feature_dim, # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape), b_initializer=tf.constant_initializer(0.), )
def _create_network_internal(self, observation_input=None): assert observation_input is not None observation_input = self._process_layer(observation_input, scope_name="observation_input") if len(self.hidden_sizes) > 0: with tf.variable_scope("mlp"): observation_output = mlp( observation_input, self.observation_dim, self.hidden_sizes, self.hidden_activation, W_initializer=he_uniform_initializer(), b_initializer=tf.constant_initializer(0.), pre_nonlin_lambda=self._process_layer, ) last_layer_size = self.hidden_sizes[-1] else: observation_output = observation_input last_layer_size = self.observation_dim with tf.variable_scope("output"): return linear( observation_output, last_layer_size, self.feature_dim, W_initializer=he_uniform_initializer(), b_initializer=tf.constant_initializer(0.), )
def _create_network_internal(self, observation_input, action_input): observation_input = self._process_layer(observation_input, scope_name="observation_input") action_input = self._process_layer(action_input, scope_name="action_input") with tf.variable_scope("observation_mlp"): observation_output = mlp( observation_input, self.observation_dim, self.observation_hidden_sizes, self.hidden_nonlinearity, W_initializer=self.hidden_W_init, b_initializer=self.hidden_b_init, pre_nonlin_lambda=self._process_layer, ) observation_output = self._process_layer( observation_output, scope_name="observation_output") embedded = tf.concat(1, [observation_output, action_input]) embedded_dim = self.action_dim + self.observation_hidden_sizes[-1] with tf.variable_scope("fusion_mlp"): fused_output = mlp( embedded, embedded_dim, self.embedded_hidden_sizes, self.hidden_nonlinearity, W_initializer=self.hidden_W_init, b_initializer=self.hidden_b_init, pre_nonlin_lambda=self._process_layer, ) fused_output = self._process_layer(fused_output) with tf.variable_scope("output_linear"): return linear( fused_output, self.embedded_hidden_sizes[-1], 1, W_initializer=self.output_W_init, b_initializer=self.output_b_init, )
def _create_network_internal(self, observation_input=None): assert observation_input is not None observation_input = self._process_layer(observation_input, scope_name="observation_input") with tf.variable_scope("conv_network"): observation_output, output_shape = conv_network( observation_input, self.input_shape, self.conv_filters, self.conv_filter_sizes, self.conv_strides, self.conv_pads, self.observation_hidden_sizes, self.hidden_nonlinearity, W_initializer=xavier_uniform_initializer(), b_initializer=tf.constant_initializer(0.), pre_nonlin_lambda=self._process_layer, ) output_dim = np.prod(output_shape[1:]) observation_output = tf.reshape(observation_output, [-1, output_dim], name="reshape") # observation_output = tf.contrib.layers.flatten(observation_output, [-1, output_dim]) self.reshaped_observation_feature = observation_output # tf.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, observation_output) with tf.variable_scope("mlp"): observation_output = mlp( observation_output, output_dim, self.observation_hidden_sizes, self.hidden_nonlinearity, W_initializer=xavier_uniform_initializer(), b_initializer=tf.constant_initializer(0.), pre_nonlin_lambda=self._process_layer, ) with tf.variable_scope("output"): return self.output_nonlinearity( linear( observation_output, self.observation_hidden_sizes[-1], self.output_dim, W_initializer=xavier_uniform_initializer(), b_initializer=tf.constant_initializer(0.), ))
def _create_network_internal(self, observation_input=None): assert observation_input is not None observation_input = self._process_layer(observation_input, scope_name="observation_input") with tf.variable_scope("conv_network"): observation_output, output_shape = conv_network( observation_input, self.input_shape, self.conv_filters, self.conv_filter_sizes, self.conv_strides, self.conv_pads, self.hidden_sizes, self.hidden_activation, W_initializer=xavier_uniform_initializer(), b_initializer=tf.constant_initializer(0.), pre_nonlin_lambda=self._process_layer, ) output_dim = np.prod(output_shape[1:]) observation_output = tf.contrib.layers.flatten(observation_output, [-1, output_dim]) with tf.variable_scope("mlp"): observation_output = mlp( observation_output, output_dim, self.hidden_sizes, self.hidden_activation, W_initializer=xavier_uniform_initializer(), b_initializer=tf.constant_initializer(0.), pre_nonlin_lambda=self._process_layer, ) if len(self.hidden_sizes) > 0: last_size = self.hidden_sizes[-1] else: last_size = output_dim with tf.variable_scope("output"): return linear( observation_output, last_size, self.feature_dim, W_initializer=xavier_uniform_initializer(), b_initializer=tf.constant_initializer(0.), )
def _create_network_internal(self, feature_input1=None, feature_input2=None): assert feature_input1 is not None and feature_input2 is not None feature_input1 = self._process_layer(feature_input1, scope_name="feature_input1") feature_input2 = self._process_layer(feature_input2, scope_name="feature_input2") with tf.variable_scope("mlp"): embedded = tf.concat(1, [feature_input1, feature_input2]) action_output = mlp( embedded, 2 * self.feature_dim, self.hidden_sizes, self.hidden_activation, # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape), b_initializer=tf.constant_initializer(0.), pre_nonlin_lambda=self._process_layer, ) outputs = [] for i in range(self.action_dim): if i == 0: embedded = action_output else: embedded = tf.concat(1, [action_output, outputs[-1]]) with tf.variable_scope("inverse_output%d" % i): linear_output = linear( embedded, embedded.get_shape().as_list()[1], 1, ) if self.output_activation is not None: outputs.append(self.output_activation(linear_output)) else: outputs.append(linear_output) return tf.concat(1, outputs)
def _create_network_internal(self, feature_input): # reshape # assert feature_output = mlp( feature_input, self.feature_dim, (self.output_dim, self.upsample_dim), self.hidden_activation, W_initializer=xavier_uniform_initializer(), b_initializer=tf.constant_initializer(0.), pre_nonlin_lambda=self._process_layer, ) # side = int(math.sqrt(feature_output.get_shape().as_list()[1])) reshaped_feature_output = tf.reshape( feature_output, [tf.shape(feature_output)[0], 32, 32, 4]) upsampled_output = tf.image.resize_images(reshaped_feature_output, [64, 64]) return upsampled_output
def _create_network_internal(self, feature_input1=None, feature_input2=None): assert feature_input1 is not None and feature_input2 is not None feature_input1 = self._process_layer(feature_input1, scope_name="feature_input1") feature_input2 = self._process_layer(feature_input2, scope_name="feature_input2") with tf.variable_scope("mlp"): embedded = tf.concat(1, [feature_input1, feature_input2]) action_output = mlp( embedded, 2 * self.feature_dim, self.hidden_sizes, self.hidden_activation, # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape), b_initializer=tf.constant_initializer(0.), pre_nonlin_lambda=self._process_layer, ) with tf.variable_scope("output"): if self.output_activation is not None: return self.output_activation( linear( action_output, self.hidden_sizes[-1], self.action_dim, # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape), b_initializer=tf.constant_initializer(0.), )) else: return linear( action_output, self.hidden_sizes[-1], self.action_dim, # W_initializer=lambda shape, dtype, partition_info: tf.truncated_normal(shape), b_initializer=tf.constant_initializer(0.), )
def _create_network_internal(self, observation_input, action_input): observation_input = self._process_layer(observation_input, scope_name="observation_input") action_input = self._process_layer(action_input, scope_name="action_input") concat_input = tf.concat(1, [observation_input, action_input]) hidden_output = tf_util.mlp( concat_input, self.observation_dim + self.action_dim, self.hidden_sizes, self.hidden_nonlinearity, W_initializer=self.hidden_W_init, b_initializer=self.hidden_b_init, pre_nonlin_lambda=self._process_layer, ) return self.output_nonlinearity( tf_util.linear( hidden_output, self.hidden_sizes[-1], self.output_dim, W_initializer=self.output_W_init, b_initializer=self.output_b_init, ))
def _create_network_internal(self, observation_input): observation_input = self._process_layer(observation_input, scope_name="observation_input") with tf.variable_scope("hidden_mlp"): hidden_output = tf_util.mlp( observation_input, self.observation_dim, self.observation_hidden_sizes, self.hidden_nonlinearity, W_initializer=self.hidden_W_init, b_initializer=self.hidden_b_init, pre_nonlin_lambda=self._process_layer, ) hidden_output = self._process_layer(hidden_output, scope_name="hidden_output") with tf.variable_scope("output"): return self.output_nonlinearity( tf_util.linear( hidden_output, self.observation_hidden_sizes[-1], self.output_dim, W_initializer=self.output_W_init, b_initializer=self.output_b_init, ))
def __init__( self, env_spec, mean_hidden_nonlinearity=tf.nn.relu, mean_hidden_sizes=(32, 32), std_hidden_nonlinearity=tf.nn.relu, std_hidden_sizes=(32, 32), min_std=1e-6, ): """ :param env_spec: :param mean_hidden_nonlinearity: nonlinearity used for the mean hidden layers :param mean_hidden_sizes: list of hidden_sizes for the fully-connected hidden layers :param std_hidden_nonlinearity: nonlinearity used for the std hidden layers :param std_hidden_sizes: list of hidden_sizes for the fully-connected hidden layers :param min_std: whether to make sure that the std is at least some threshold value, to avoid numerical issues :return: """ Serializable.quick_init(self, locals()) assert isinstance(env_spec.action_space, Box) super(GaussianMLPPolicy, self).__init__(env_spec) self.env_spec = env_spec gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2) self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) # Create network observation_dim = self.env_spec.observation_space.flat_dim self.observations_input = tf.placeholder(tf.float32, shape=[None, observation_dim]) action_dim = self.env_spec.action_space.flat_dim with tf.variable_scope('mean') as _: mlp_mean_output = tf_util.mlp(self.observations_input, observation_dim, mean_hidden_sizes, mean_hidden_nonlinearity) mlp_mean_output_size = mean_hidden_sizes[-1] self.mean = tf_util.linear(mlp_mean_output, mlp_mean_output_size, action_dim) with tf.variable_scope('log_std') as _: mlp_std_output = tf_util.mlp(self.observations_input, observation_dim, std_hidden_sizes, std_hidden_nonlinearity) mlp_std_output_size = std_hidden_sizes[-1] self.log_std = tf_util.linear(mlp_std_output, mlp_std_output_size, action_dim) self.std = tf.maximum(tf.exp(self.log_std), min_std) self._dist = DiagonalGaussian(action_dim) self.actions_output = tf.placeholder(tf.float32, shape=[None, action_dim]) z = (self.actions_output - self.mean) / self.std self.log_likelihood = (- tf.log(self.std**2) - z**2 * 0.5 - tf.log(2*np.pi) * 0.5)
def create_network(in_size): hidden_sizes = (32, 4) nonlinearity = tf.nn.relu input_ph = tf.placeholder(tf.float32, shape=[None, in_size]) last_layer = tf_util.mlp(input_ph, in_size, hidden_sizes, nonlinearity) return input_ph, last_layer