def test_output_with_identity_filter(self, filters, in_channels, strides): with tf.compat.v1.variable_scope('CNN'): self.cnn = cnn(input_var=self._input_ph, filters=filters, strides=strides, name='cnn1', padding='VALID', hidden_w_init=tf.constant_initializer(1), hidden_nonlinearity=self.hidden_nonlinearity) self.sess.run(tf.compat.v1.global_variables_initializer()) result = self.sess.run(self.cnn, feed_dict={self._input_ph: self.obs_input}) filter_sum = 1 # filter value after 3 layers of conv for filter_iter, in_channel in zip(filters, in_channels): filter_sum *= filter_iter[1][0] * filter_iter[1][1] * in_channel height_size = self.input_height width_size = self.input_width for filter_iter, stride in zip(filters, strides): height_size = int((height_size - filter_iter[1][0]) / stride) + 1 width_size = int((width_size - filter_iter[1][1]) / stride) + 1 flatten_shape = height_size * width_size * filters[-1][0] # flatten h_out = np.full((self.batch_size, flatten_shape), filter_sum, dtype=np.float32) np.testing.assert_array_equal(h_out, result)
def test_invalid_padding(self): with pytest.raises(ValueError): with tf.compat.v1.variable_scope('CNN'): self.cnn = cnn(input_var=self._input_ph, filters=((32, (3, 3)), ), strides=(1, ), name='cnn', padding='UNKNOWN')
def _build(self, state_input, name=None): return cnn(input_var=state_input, filter_dims=self._filter_dims, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, num_filters=self._num_filters, strides=self._strides, padding=self._padding, name='cnn')
def test_output_shape_valid(self, filter_sizes, out_channels, strides): with tf.compat.v1.variable_scope('CNN'): self.cnn = cnn(input_var=self._input_ph, filter_dims=filter_sizes, num_filters=out_channels, strides=strides, name='cnn', padding='VALID', hidden_w_init=tf.constant_initializer(1), hidden_nonlinearity=self.hidden_nonlinearity) self.sess.run(tf.compat.v1.global_variables_initializer()) result = self.sess.run(self.cnn, feed_dict={self._input_ph: self.obs_input}) current_size = self.input_width for filter_size, stride in zip(filter_sizes, strides): current_size = int((current_size - filter_size) / stride) + 1 flatten_shape = current_size * current_size * out_channels[-1] assert result.shape == (self.batch_size, flatten_shape)
def test_output_with_random_filter(self, filter_sizes, in_channels, out_channels, strides): # Build a cnn with random filter weights with tf.compat.v1.variable_scope('CNN'): self.cnn2 = cnn(input_var=self._input_ph, filter_dims=filter_sizes, num_filters=out_channels, strides=strides, name='cnn1', padding='VALID', hidden_nonlinearity=self.hidden_nonlinearity) self.sess.run(tf.compat.v1.global_variables_initializer()) result = self.sess.run(self.cnn2, feed_dict={self._input_ph: self.obs_input}) two_layer = len(filter_sizes) == 2 # get weight values with tf.compat.v1.variable_scope('CNN', reuse=True): h0_w = tf.compat.v1.get_variable('cnn1/h0/weight').eval() h0_b = tf.compat.v1.get_variable('cnn1/h0/bias').eval() if two_layer: h1_w = tf.compat.v1.get_variable('cnn1/h1/weight').eval() h1_b = tf.compat.v1.get_variable('cnn1/h1/bias').eval() filter_weights = (h0_w, h1_w) if two_layer else (h0_w, ) filter_bias = (h0_b, h1_b) if two_layer else (h0_b, ) # convolution according to TensorFlow's approach input_val = convolve(_input=self.obs_input, filter_weights=filter_weights, filter_bias=filter_bias, strides=strides, filter_sizes=filter_sizes, in_channels=in_channels, hidden_nonlinearity=self.hidden_nonlinearity) # flatten dense_out = input_val.reshape((self.batch_size, -1)).astype(np.float32) np.testing.assert_array_almost_equal(dense_out, result)
def _build(self, state_input, name=None): """Build model given input placeholder(s). Args: state_input (tf.Tensor): Tensor input for state. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is metarl.tf.models.Sequential. Return: tf.Tensor: Tensor output of the model. """ del name return cnn(input_var=state_input, filters=self._filters, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, strides=self._strides, padding=self._padding, name='cnn')
def test_output_shape_valid(self, filters, strides): with tf.compat.v1.variable_scope('CNN'): self.cnn = cnn(input_var=self._input_ph, filters=filters, strides=strides, name='cnn', padding='VALID', hidden_w_init=tf.constant_initializer(1), hidden_nonlinearity=self.hidden_nonlinearity) self.sess.run(tf.compat.v1.global_variables_initializer()) result = self.sess.run(self.cnn, feed_dict={self._input_ph: self.obs_input}) height_size = self.input_height width_size = self.input_width for filter_iter, stride in zip(filters, strides): height_size = int((height_size - filter_iter[1][0]) / stride) + 1 width_size = int((width_size - filter_iter[1][1]) / stride) + 1 flatten_shape = height_size * width_size * filters[-1][0] assert result.shape == (self.batch_size, flatten_shape)
def test_output_shape_same(self, filters, strides): with tf.compat.v1.variable_scope('CNN'): self.cnn = cnn(input_var=self._input_ph, filters=filters, strides=strides, name='cnn', padding='SAME', hidden_w_init=tf.constant_initializer(1), hidden_nonlinearity=self.hidden_nonlinearity) self.sess.run(tf.compat.v1.global_variables_initializer()) result = self.sess.run(self.cnn, feed_dict={self._input_ph: self.obs_input}) height_size = self.input_height width_size = self.input_width for stride in strides: height_size = int((height_size + stride - 1) / stride) width_size = int((width_size + stride - 1) / stride) flatten_shape = width_size * height_size * filters[-1][0] assert result.shape == (5, flatten_shape)
def _build(self, state_input, name=None): action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an CNN b = np.concatenate([ np.zeros(action_dim), np.full(action_dim, self._init_std_param) ], axis=0) # yapf: disable mean_std_conv = cnn( input_var=state_input, filter_dims=self._filter_dims, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, num_filters=self._num_filters, strides=self._strides, padding=self._padding, name='mean_std_cnn') mean_std_network = mlp( mean_std_conv, output_dim=action_dim * 2, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=tf.constant_initializer(b), name='mean_std_network', layer_normalization=self._layer_normalization) with tf.compat.v1.variable_scope('mean_network'): mean_network = mean_std_network[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_network = mean_std_network[..., action_dim:] else: # separate MLPs for mean and std networks # mean network mean_conv = cnn(input_var=state_input, filter_dims=self._filter_dims, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, num_filters=self._num_filters, strides=self._strides, padding=self._padding, name='mean_cnn') mean_network = mlp( mean_conv, output_dim=action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, name='mean_network', layer_normalization=self._layer_normalization) # std network if self._adaptive_std: log_std_conv = cnn( input_var=state_input, filter_dims=self._std_filter_dims, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, num_filters=self._std_num_filters, strides=self._std_strides, padding=self._std_padding, name='log_std_cnn') log_std_network = mlp( log_std_conv, output_dim=action_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, output_nonlinearity=self._std_output_nonlinearity, output_w_init=self._std_output_w_init, output_b_init=tf.constant_initializer( self._init_std_param), name='log_std_network', layer_normalization=self._layer_normalization) else: log_std_network = parameter( input_var=state_input, length=action_dim, initializer=tf.constant_initializer( self._init_std_param), trainable=self._learn_std, name='log_std_network') mean_var = mean_network std_param = log_std_network with tf.compat.v1.variable_scope('std_limits'): if self._min_std_param is not None: std_param = tf.maximum(std_param, self._min_std_param) if self._max_std_param is not None: std_param = tf.minimum(std_param, self._max_std_param) with tf.compat.v1.variable_scope('std_parameterization'): # build std_var with std parameterization if self._std_parameterization == 'exp': log_std_var = std_param else: # we know it must be softplus here log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param))) dist = DiagonalGaussian(self._output_dim) rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:]) action_var = rnd * tf.exp(log_std_var) + mean_var return action_var, mean_var, log_std_var, std_param, dist
def _build(self, state_input, name=None): """Build model given input placeholder(s). Args: state_input (tf.Tensor): Place holder for state input. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is metarl.tf.models.Sequential. Return: tf.Tensor: Sampled action. tf.Tensor: Mean. tf.Tensor: Parameterized log_std. tf.Tensor: log_std. metarl.tf.distributions.DiagonalGaussian: Policy distribution. """ del name action_dim = self._output_dim with tf.compat.v1.variable_scope('dist_params'): if self._std_share_network: # mean and std networks share an CNN b = np.concatenate([ np.zeros(action_dim), np.full(action_dim, self._init_std_param) ], axis=0) # yapf: disable mean_std_conv = cnn( input_var=state_input, filters=self._filters, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, strides=self._strides, padding=self._padding, name='mean_std_cnn') mean_std_network = mlp( mean_std_conv, output_dim=action_dim * 2, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=tf.constant_initializer(b), name='mean_std_network', layer_normalization=self._layer_normalization) with tf.compat.v1.variable_scope('mean_network'): mean_network = mean_std_network[..., :action_dim] with tf.compat.v1.variable_scope('log_std_network'): log_std_network = mean_std_network[..., action_dim:] else: # separate MLPs for mean and std networks # mean network mean_conv = cnn(input_var=state_input, filters=self._filters, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, strides=self._strides, padding=self._padding, name='mean_cnn') mean_network = mlp( mean_conv, output_dim=action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, name='mean_network', layer_normalization=self._layer_normalization) # std network if self._adaptive_std: log_std_conv = cnn( input_var=state_input, filters=self._std_filters, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, strides=self._std_strides, padding=self._std_padding, name='log_std_cnn') log_std_network = mlp( log_std_conv, output_dim=action_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, output_nonlinearity=self._std_output_nonlinearity, output_w_init=self._std_output_w_init, output_b_init=tf.constant_initializer( self._init_std_param), name='log_std_network', layer_normalization=self._layer_normalization) else: log_std_network = parameter( input_var=state_input, length=action_dim, initializer=tf.constant_initializer( self._init_std_param), trainable=self._learn_std, name='log_std_network') mean_var = mean_network std_param = log_std_network with tf.compat.v1.variable_scope('std_limits'): if self._min_std_param is not None: std_param = tf.maximum(std_param, self._min_std_param) if self._max_std_param is not None: std_param = tf.minimum(std_param, self._max_std_param) with tf.compat.v1.variable_scope('std_parameterization'): # build std_var with std parameterization if self._std_parameterization == 'exp': log_std_var = std_param else: # we know it must be softplus here log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param))) dist = DiagonalGaussian(self._output_dim) rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:]) action_var = rnd * tf.exp(log_std_var) + mean_var return action_var, mean_var, log_std_var, std_param, dist