def test_is_pickleable(self): input_shape = (28, 28, 3) gcr = GaussianCNNRegressor(input_shape=input_shape, num_filters=(3, 6), filter_dims=(3, 3), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), output_dim=1, adaptive_std=False, use_trust_region=False) with tf.compat.v1.variable_scope( 'GaussianCNNRegressor/GaussianCNNRegressorModel', reuse=True): bias = tf.compat.v1.get_variable( 'dist_params/mean_network/hidden_0/bias') bias.load(tf.ones_like(bias).eval()) result1 = gcr.predict([np.ones(input_shape)]) h = pickle.dumps(gcr) with tf.compat.v1.Session(graph=tf.Graph()): gcr_pickled = pickle.loads(h) result2 = gcr_pickled.predict([np.ones(input_shape)]) assert np.array_equal(result1, result2)
def __init__( self, env_spec, subsample_factor=1., regressor_args=None, name='GaussianCNNBaseline', ): if not isinstance(env_spec.observation_space, akro.Box) or \ not len(env_spec.observation_space.shape) in (2, 3): raise ValueError( '{} can only process 2D, 3D akro.Image or' ' akro.Box observations, but received an env_spec with ' 'observation_space of type {} and shape {}'.format( type(self).__name__, type(env_spec.observation_space).__name__, env_spec.observation_space.shape)) super().__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianCNNRegressor( input_shape=(env_spec.observation_space.shape), output_dim=1, subsample_factor=subsample_factor, name=name, **regressor_args) self.name = name self.env_spec = env_spec
def test_log_likelihood_sym(self, output_dim): input_shape = (28, 28, 3) gcr = GaussianCNNRegressor(input_shape=input_shape, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), output_dim=1, adaptive_std=False, use_trust_region=False) new_input_var = tf.compat.v1.placeholder(tf.float32, shape=(None, ) + input_shape) new_ys_var = tf.compat.v1.placeholder(dtype=tf.float32, name='ys', shape=(None, output_dim)) data = np.full(input_shape, 0.5) label = np.ones(output_dim) outputs = gcr.log_likelihood_sym(new_input_var, new_ys_var, name='ll_sym') ll_from_sym = self.sess.run(outputs, feed_dict={ new_input_var: [data], new_ys_var: [label] }) mean, log_std = gcr._f_pdists([data]) ll = gcr.model.networks['default'].dist.log_likelihood( [label], dict(mean=mean, log_std=log_std)) assert np.allclose(ll, ll_from_sym, rtol=0, atol=1e-5)
def __init__( self, env_spec, subsample_factor=1., regressor_args=None, name='GaussianCNNBaseline', ): super().__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianCNNRegressor( input_shape=(env_spec.observation_space.shape), output_dim=1, subsample_factor=subsample_factor, name=name, **regressor_args) self.name = name
def test_fit_unnormalized(self): gcr = GaussianCNNRegressor(input_shape=(10, 10, 3), num_filters=(3, 6), filter_dims=(3, 3), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), output_dim=1, adaptive_std=True, normalize_inputs=False, normalize_outputs=False) train_data, test_data = get_train_test_data() observations, returns = train_data for _ in range(20): gcr.fit(observations, returns) paths, expected = test_data prediction = gcr.predict(paths['observations']) average_error = 0.0 for i in range(len(expected)): average_error += np.abs(expected[i] - prediction[i]) average_error /= len(expected) assert average_error <= 0.1 x_mean = self.sess.run(gcr.model.networks['default'].x_mean) x_mean_expected = np.zeros_like(x_mean) x_std = self.sess.run(gcr.model.networks['default'].x_std) x_std_expected = np.ones_like(x_std) assert np.array_equal(x_mean, x_mean_expected) assert np.array_equal(x_std, x_std_expected) y_mean = self.sess.run(gcr.model.networks['default'].y_mean) y_mean_expected = np.zeros_like(y_mean) y_std = self.sess.run(gcr.model.networks['default'].y_std) y_std_expected = np.ones_like(y_std) assert np.allclose(y_mean, y_mean_expected) assert np.allclose(y_std, y_std_expected)
def test_fit_normalized(self): gcr = GaussianCNNRegressor(input_shape=(10, 10, 3), filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), output_dim=1, adaptive_std=False, use_trust_region=True) train_data, test_data = get_train_test_data() observations, returns = train_data for _ in range(20): gcr.fit(observations, returns) paths, expected = test_data prediction = gcr.predict(paths['observations']) average_error = 0.0 for i, exp in enumerate(expected): average_error += np.abs(exp - prediction[i]) average_error /= len(expected) assert average_error <= 0.1 x_mean = self.sess.run(gcr.model.networks['default'].x_mean) x_mean_expected = np.mean(observations, axis=0, keepdims=True) x_std = self.sess.run(gcr.model.networks['default'].x_std) x_std_expected = np.std(observations, axis=0, keepdims=True) assert np.allclose(x_mean, x_mean_expected) assert np.allclose(x_std, x_std_expected) y_mean = self.sess.run(gcr.model.networks['default'].y_mean) y_mean_expected = np.mean(returns, axis=0, keepdims=True) y_std = self.sess.run(gcr.model.networks['default'].y_std) y_std_expected = np.std(returns, axis=0, keepdims=True) assert np.allclose(y_mean, y_mean_expected) assert np.allclose(y_std, y_std_expected)
def test_fit_without_trusted_region(self): gcr = GaussianCNNRegressor(input_shape=(10, 10, 3), filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), output_dim=1, adaptive_std=False, use_trust_region=False) train_data, test_data = get_train_test_data() observations, returns = train_data for _ in range(20): gcr.fit(observations, returns) paths, expected = test_data prediction = gcr.predict(paths['observations']) average_error = 0.0 for i, exp in enumerate(expected): average_error += np.abs(exp - prediction[i]) average_error /= len(expected) assert average_error <= 0.1
def test_optimizer_args(self, mock_lbfgs): lbfgs_args = dict(max_opt_itr=25) gcr = GaussianCNNRegressor(input_shape=(10, 10, 3), filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), output_dim=1, optimizer=LbfgsOptimizer, optimizer_args=lbfgs_args, use_trust_region=True) assert mock_lbfgs.return_value is gcr._optimizer mock_lbfgs.assert_called_with(max_opt_itr=25)
def test_fit_smaller_subsample_factor(self): gcr = GaussianCNNRegressor(input_shape=(10, 10, 3), num_filters=(3, 6), filter_dims=(3, 3), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), output_dim=1, subsample_factor=0.9, adaptive_std=False) train_data, test_data = get_train_test_data() observations, returns = train_data for _ in range(20): gcr.fit(observations, returns) paths, expected = test_data prediction = gcr.predict(paths['observations']) average_error = 0.0 for i in range(len(expected)): average_error += np.abs(expected[i] - prediction[i]) average_error /= len(expected) assert average_error <= 0.05
def test_is_pickleable2(self): input_shape = (28, 28, 3) gcr = GaussianCNNRegressor(input_shape=input_shape, filters=((3, (3, 3)), (6, (3, 3))), strides=(1, 1), padding='SAME', hidden_sizes=(32, ), output_dim=1, adaptive_std=False, use_trust_region=False) with tf.compat.v1.variable_scope( 'GaussianCNNRegressor/GaussianCNNRegressorModel', reuse=True): x_mean = tf.compat.v1.get_variable('normalized_vars/x_mean') x_mean.load(tf.ones_like(x_mean).eval()) x1 = gcr.model.networks['default'].x_mean.eval() h = pickle.dumps(gcr) with tf.compat.v1.Session(graph=tf.Graph()): gcr_pickled = pickle.loads(h) x2 = gcr_pickled.model.networks['default'].x_mean.eval() assert np.array_equal(x1, x2)
class GaussianCNNBaseline(Baseline): """GaussianCNNBaseline With Model. It fits the input data to a gaussian distribution estimated by a CNN. Args: env_spec (metarl.envs.env_spec.EnvSpec): Environment specification. subsample_factor (float): The factor to subsample the data. By default it is 1.0, which means using all the data. regressor_args (dict): Arguments for regressor. name (str): Name of baseline. """ def __init__( self, env_spec, subsample_factor=1., regressor_args=None, name='GaussianCNNBaseline', ): if not isinstance(env_spec.observation_space, akro.Box) or \ not len(env_spec.observation_space.shape) in (2, 3): raise ValueError( '{} can only process 2D, 3D akro.Image or' ' akro.Box observations, but received an env_spec with ' 'observation_space of type {} and shape {}'.format( type(self).__name__, type(env_spec.observation_space).__name__, env_spec.observation_space.shape)) super().__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianCNNRegressor( input_shape=(env_spec.observation_space.shape), output_dim=1, subsample_factor=subsample_factor, name=name, **regressor_args) self.name = name self.env_spec = env_spec def fit(self, paths): """Fit regressor based on paths. Args: paths (dict[numpy.ndarray]): Sample paths. """ observations = np.concatenate([p['observations'] for p in paths]) if isinstance(self.env_spec.observation_space, akro.Image): observations = normalize_pixel_batch(observations) returns = np.concatenate([p['returns'] for p in paths]) self._regressor.fit(observations, returns.reshape((-1, 1))) def predict(self, path): """Predict value based on paths. Args: path (dict[numpy.ndarray]): Sample paths. Returns: numpy.ndarray: Predicted value. """ observations = path['observations'] if isinstance(self.env_spec.observation_space, akro.Image): observations = normalize_pixel_batch(observations) return self._regressor.predict(observations).flatten() def get_param_values(self): """Get parameter values. Returns: List[np.ndarray]: A list of values of each parameter. """ return self._regressor.get_param_values() def set_param_values(self, flattened_params): """Set param values. Args: flattened_params (np.ndarray): A numpy array of parameter values. """ self._regressor.set_param_values(flattened_params) def get_params_internal(self): """Get the params, which are the trainable variables. Returns: List[tf.Variable]: A list of trainable variables in the current variable scope. """ return self._regressor.get_params_internal()
class GaussianCNNBaseline(Baseline): """GaussianCNNBaseline With Model. It fits the input data to a gaussian distribution estimated by a CNN. Args: env_spec (metarl.envs.env_spec.EnvSpec): Environment specification. subsample_factor (float): The factor to subsample the data. By default it is 1.0, which means using all the data. regressor_args (dict): Arguments for regressor. name (str): Name of baseline. """ def __init__( self, env_spec, subsample_factor=1., regressor_args=None, name='GaussianCNNBaseline', ): super().__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianCNNRegressor( input_shape=(env_spec.observation_space.shape), output_dim=1, subsample_factor=subsample_factor, name=name, **regressor_args) self.name = name def fit(self, paths): """Fit regressor based on paths. Args: paths (dict[numpy.ndarray]): Sample paths. """ observations = np.concatenate([p['observations'] for p in paths]) returns = np.concatenate([p['returns'] for p in paths]) self._regressor.fit(observations, returns.reshape((-1, 1))) def predict(self, path): """Predict value based on paths. Args: path (dict[numpy.ndarray]): Sample paths. Returns: numpy.ndarray: Predicted value. """ return self._regressor.predict(path['observations']).flatten() def get_param_values(self): """Get parameter values. Returns: List[np.ndarray]: A list of values of each parameter. """ return self._regressor.get_param_values() def set_param_values(self, flattened_params): """Set param values. Args: flattened_params (np.ndarray): A numpy array of parameter values. """ self._regressor.set_param_values(flattened_params) def get_params_internal(self): """Get the params, which are the trainable variables. Returns: List[tf.Variable]: A list of trainable variables in the current variable scope. """ return self._regressor.get_params_internal()