def test_get_action(self, obs_dim, task_num, latent_dim, action_dim): env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) env.reset() obs = env.step(1).observation latent = np.random.random((latent_dim, )) task = np.zeros(task_num) task[0] = 1 action1, _ = policy.get_action_given_latent(obs, latent) action2, _ = policy.get_action_given_task(obs, task) action3, _ = policy.get_action(np.concatenate([obs.flatten(), task])) assert env.action_space.contains(action1) assert env.action_space.contains(action2) assert env.action_space.contains(action3) obses, latents, tasks = [obs] * 3, [latent] * 3, [task] * 3 aug_obses = [np.concatenate([obs.flatten(), task])] * 3 action1n, _ = policy.get_actions_given_latents(obses, latents) action2n, _ = policy.get_actions_given_tasks(obses, tasks) action3n, _ = policy.get_actions(aug_obses) for action in chain(action1n, action2n, action3n): assert env.action_space.contains(action)
def test_get_vars(self): obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2 env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec, hidden_sizes=[32, 32, 32]) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder, hidden_sizes=[32, 32, 32]) vars1 = sorted(policy.get_trainable_vars(), key=lambda v: v.name) vars2 = sorted(policy.get_global_vars(), key=lambda v: v.name) assert vars1 == vars2 # Two network. Each with 4 layers * (1 weight + 1 bias) + 1 log_std assert len(vars1) == 2 * (4 * 2 + 1) obs = np.random.random(obs_dim) latent = np.random.random((latent_dim, )) for var in vars1: var.assign(np.ones(var.shape)) assert np.any(policy.get_action_given_latent(obs, latent) != 0) for var in vars1: var.assign(np.zeros(var.shape)) assert not np.all(policy.get_action_given_latent(obs, latent) == 0)
def get_env_spec(cls, env_spec, latent_dim, num_skills, module): obs_dim = int(np.prod(env_spec.observation_space.shape)) # print("obs_dim is") # print(obs_dim) action_dim = int(np.prod(env_spec.action_space.shape)) if module == 'encoder': in_dim = obs_dim + action_dim + num_skills + 1 out_dim = latent_dim * 2 elif module == 'vf': in_dim = obs_dim out_dim = latent_dim elif module == 'controller_policy': in_dim = obs_dim + latent_dim out_dim = num_skills elif module == 'qf': in_dim = obs_dim + latent_dim out_dim = num_skills in_space = akro.Box(low=-1, high=1, shape=(in_dim, ), dtype=np.float32) out_space = akro.Box(low=-1, high=1, shape=(out_dim, ), dtype=np.float32) if module == 'encoder': spec = InOutSpec(in_space, out_space) elif module == 'vf': spec = EnvSpec(in_space, out_space) elif module == 'controller_policy': spec = EnvSpec(in_space, out_space) elif module == 'qf': spec = EnvSpec(in_space, out_space) return spec
def __init__(self, goal=np.array((1., 1.), dtype=np.float32), arena_size=5., done_bonus=0., never_done=False, max_episode_length=math.inf): goal = np.array(goal, dtype=np.float32) self._goal = goal self._done_bonus = done_bonus self._never_done = never_done self._arena_size = arena_size assert ((goal >= -arena_size) & (goal <= arena_size)).all() self._step_cnt = None self._max_episode_length = max_episode_length self._visualize = False self._point = np.zeros_like(self._goal) self._task = {'goal': self._goal} self._observation_space = akro.Box(low=-np.inf, high=np.inf, shape=(3, ), dtype=np.float32) self._action_space = akro.Box(low=-0.1, high=0.1, shape=(2, ), dtype=np.float32) self._spec = EnvSpec(action_space=self.action_space, observation_space=self.observation_space, max_episode_length=max_episode_length)
def test_new_time_step(sample_data): s = TimeStep(**sample_data) assert s.env_spec is sample_data['env_spec'] assert s.observation is sample_data['observation'] assert s.action is sample_data['action'] assert s.reward is sample_data['reward'] assert s.step_type is sample_data['step_type'] assert s.env_info is sample_data['env_info'] assert s.agent_info is sample_data['agent_info'] del s obs_space = akro.Box(low=-1, high=10, shape=(4, 3, 2), dtype=np.float32) act_space = akro.Box(low=-1, high=10, shape=(4, 2), dtype=np.float32) env_spec = EnvSpec(obs_space, act_space) sample_data['env_spec'] = env_spec obs_space = akro.Box(low=-1000, high=1000, shape=(4, 3, 2), dtype=np.float32) act_space = akro.Box(low=-1000, high=1000, shape=(4, 2), dtype=np.float32) sample_data['observation'] = obs_space.sample() sample_data['next_observation'] = obs_space.sample() sample_data['action'] = act_space.sample() s = TimeStep(**sample_data) assert s.observation is sample_data['observation'] assert s.next_observation is sample_data['next_observation'] assert s.action is sample_data['action']
def get_env_spec(cls, env_spec, latent_dim, module): """Get environment specs of encoder with latent dimension. Args: env_spec (EnvSpec): Environment specification. latent_dim (int): Latent dimension. module (str): Module to get environment specs for. Returns: InOutSpec: Module environment specs with latent dimension. """ obs_dim = int(np.prod(env_spec.observation_space.shape)) action_dim = int(np.prod(env_spec.action_space.shape)) if module == 'encoder': in_dim = obs_dim + action_dim + 1 out_dim = latent_dim * 2 elif module == 'vf': in_dim = obs_dim out_dim = latent_dim in_space = akro.Box(low=-1, high=1, shape=(in_dim, ), dtype=np.float32) out_space = akro.Box(low=-1, high=1, shape=(out_dim, ), dtype=np.float32) if module == 'encoder': spec = InOutSpec(in_space, out_space) elif module == 'vf': spec = EnvSpec(in_space, out_space) return spec
def load_model(self, model_path=None): if model_path: self.model_path = model_path self.osim_model = OsimModel( self.model_path, self.visualize, integrator_accuracy=self.integrator_accuracy) # Create specs, action and observation spaces mocks for compatibility with OpenAI gym self.spec = Spec() self.spec.timestep_limit = self.time_limit self.spec.action_space = akro.Box( low=0, high=1, shape=(self.osim_model.get_action_space_size(), )) self.spec.observation_space = akro.Box( low=-math.pi * 100, high=math.pi * 100, shape=(self.get_observation_space_size(), )) self.action_space = ([0.0] * self.osim_model.get_action_space_size(), [1.0] * self.osim_model.get_action_space_size()) # self.observation_space = ( [-math.pi*100] * self.get_observation_space_size(), [math.pi*100] * self.get_observation_space_s self.observation_space = ([0] * self.get_observation_space_size(), [0] * self.get_observation_space_size()) self.action_space = convert_to_gym(self.action_space) self.observation_space = convert_to_gym(self.observation_space)
def test_env_spec_pickleable(): env_spec = EnvSpec(akro.Box(-1, 1, (1, )), akro.Box(-2, 2, (2, )), 500) round_trip = cloudpickle.loads(cloudpickle.dumps(env_spec)) assert round_trip assert round_trip.action_space == env_spec.action_space assert round_trip.observation_space == env_spec.observation_space assert round_trip.max_episode_length == env_spec.max_episode_length
def test_auxiliary(self): obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 2, 2 env = GymEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) obs_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, 2)) task_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, 2)) networks = policy.build(obs_input, task_input) dist = networks[0].dist encoder_dist = networks[1].dist assert dist.loc.get_shape().as_list()[-1] == env.action_space.flat_dim assert policy.encoder == encoder assert policy.latent_space.flat_dim == latent_dim assert policy.task_space.flat_dim == task_num assert (policy.augmented_observation_space.flat_dim == env.observation_space.flat_dim + task_num) assert encoder_dist.loc.get_shape().as_list()[-1] == latent_dim
def test_log_multitask_performance_task_id(): lengths = np.array([10, 5, 1, 1]) batch = TrajectoryBatch( EnvSpec(akro.Box(np.array([0., 0., 0.]), np.array([1., 1., 1.])), akro.Box(np.array([-1., -1.]), np.array([0., 0.]))), observations=np.ones((sum(lengths), 3), dtype=np.float32), last_observations=np.ones((len(lengths), 3), dtype=np.float32), actions=np.zeros((sum(lengths), 2), dtype=np.float32), rewards=np.array([ 0.34026529, 0.58263177, 0.84307509, 0.97651095, 0.81723901, 0.22631398, 0.03421301, 0.97515046, 0.64311832, 0.65068933, 0.17657714, 0.04783857, 0.73904013, 0.41364329, 0.52235551, 0.24203526, 0.43328910 ]), terminals=np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1], dtype=bool), env_infos={ 'success': np.array([0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1], dtype=bool), 'task_id': np.array([1] * 10 + [3] * 5 + [1] + [4]) }, agent_infos={}, lengths=lengths) log_file = tempfile.NamedTemporaryFile() csv_output = dowel.CsvOutput(log_file.name) logger.add_output(csv_output) log_multitask_performance(7, batch, 0.8, { 1: 'env1', 3: 'env2', 4: 'env3', 5: 'env4' }) logger.log(tabular) logger.dump_output_type(dowel.CsvOutput) with open(log_file.name, 'r') as file: rows = list(csv.DictReader(file)) res = {k: float(r) for (k, r) in rows[0].items()} assert res['env1/Iteration'] == 7 assert res['env2/Iteration'] == 7 assert res['env3/Iteration'] == 7 assert res['env4/Iteration'] == 7 assert res['env1/NumTrajs'] == 2 assert res['env2/NumTrajs'] == 1 assert res['env3/NumTrajs'] == 1 assert res['env4/NumTrajs'] == 0 assert math.isclose(res['env1/SuccessRate'], 0.5) assert math.isclose(res['env2/SuccessRate'], 1.0) assert math.isclose(res['env3/SuccessRate'], 1.0) assert math.isnan(res['env4/SuccessRate']) assert math.isnan(res['env4/AverageReturn'])
def test_auxiliary(self): input_space = akro.Box(np.array([-1, -1]), np.array([1, 1])) latent_space = akro.Box(np.array([-2, -2, -2]), np.array([2, 2, 2])) embedding_spec = InOutSpec(input_space=input_space, output_space=latent_space) embedding = GaussianMLPEncoder(embedding_spec, hidden_sizes=[32, 32, 32]) task_input = tf.compat.v1.placeholder(tf.float32, shape=(None, None, embedding.input_dim)) embedding.build(task_input) # 9 Layers: (3 hidden + 1 output) * (1 weight + 1 bias) + 1 log_std assert len(embedding.get_params()) == 9 assert len(embedding.get_global_vars()) == 9 assert embedding.distribution.loc.get_shape().as_list( )[-1] == latent_space.shape[0] assert embedding.input.shape.as_list() == [ None, None, input_space.shape[0] ] assert (embedding.latent_mean.shape.as_list() == [ None, None, latent_space.shape[0] ]) assert (embedding.latent_std_param.shape.as_list() == [ None, None, latent_space.shape[0] ]) # To increase coverage in embeddings/base.py embedding.reset() assert embedding.input_dim == embedding_spec.input_space.flat_dim assert embedding.output_dim == embedding_spec.output_space.flat_dim var_shapes = [ (2, 32), (32, ), # input (32, 32), (32, ), # hidden 0 (32, 32), (32, ), # hidden 1 (32, 3), (3, ), # hidden 2 (3, ) ] # log_std assert sorted(embedding.get_param_shapes()) == sorted(var_shapes) var_count = sum(list(map(np.prod, var_shapes))) embedding.set_param_values(np.ones(var_count)) assert (embedding.get_param_values() == np.ones(var_count)).all() assert (sorted( map(np.shape, embedding.flat_to_params( np.ones(var_count)))) == sorted(var_shapes))
def observation_space(self): """Return the observation space. Returns: akro.Dict: Observation space. """ return akro.Dict({ 'achieved_goal': akro.Box(low=-200., high=200., shape=(3, ), dtype=np.float32), 'desired_goal': akro.Box(low=-200., high=200., shape=(3, ), dtype=np.float32), 'observation': akro.Box(low=-200., high=200., shape=(25, ), dtype=np.float32) })
def test_set_output_size(kernel_sizes, hidden_channels, strides, pool_shape, pool_stride): spec = InOutSpec(akro.Box(shape=[3, 19, 15], high=np.inf, low=-np.inf), akro.Box(shape=[200], high=np.inf, low=-np.inf)) model = CNNModule(spec, image_format='NCHW', hidden_channels=hidden_channels, kernel_sizes=kernel_sizes, strides=strides, pool_shape=[(pool_shape, pool_shape)], pool_stride=[(pool_stride, pool_stride)], layer_normalization=True) images = torch.ones(10, 3, 19, 15) x = model(images) assert x.shape == (10, 200)
def get_infer_spec(cls, env_spec, latent_dim, inference_window_size): """Get the embedding spec of the inference. Every `inference_window_size` timesteps in the trajectory will be used as the inference network input. Args: env_spec (garage.envs.EnvSpec): Environment spec. latent_dim (int): Latent dimension. inference_window_size (int): Length of inference window. Returns: garage.InOutSpec: Inference spec. """ latent_space = cls._get_latent_space(latent_dim) obs_lb, obs_ub = env_spec.observation_space.bounds obs_lb_flat = env_spec.observation_space.flatten(obs_lb) obs_ub_flat = env_spec.observation_space.flatten(obs_ub) traj_lb = np.stack([obs_lb_flat] * inference_window_size) traj_ub = np.stack([obs_ub_flat] * inference_window_size) traj_space = akro.Box(traj_lb, traj_ub) return InOutSpec(traj_space, latent_space)
def test_act_box_env_spec_mismatch_eps(eps_data): with pytest.raises(ValueError, match='Each action has shape'): eps_data['env_spec'] = EnvSpec( eps_data['env_spec'].observation_space, akro.Box(low=1, high=np.inf, shape=(4, 3, 2), dtype=np.float32)) t = EpisodeBatch(**eps_data) del t
def test_act_box_env_spec_mismatch_batch(batch_data): with pytest.raises(ValueError, match='Each action has'): batch_data['env_spec'] = EnvSpec( batch_data['env_spec'].observation_space, akro.Box(low=1, high=np.inf, shape=(4, 3, 2), dtype=np.float32)) s = TimeStepBatch(**batch_data) del s
def test_log_performance(): lengths = np.array([10, 5, 1, 1]) batch = EpisodeBatch( EnvSpec(akro.Box(np.array([0., 0., 0.]), np.array([1., 1., 1.])), akro.Box(np.array([-1., -1.]), np.array([0., 0.]))), observations=np.ones((sum(lengths), 3), dtype=np.float32), last_observations=np.ones((len(lengths), 3), dtype=np.float32), actions=np.zeros((sum(lengths), 2), dtype=np.float32), rewards=np.array([ 0.34026529, 0.58263177, 0.84307509, 0.97651095, 0.81723901, 0.22631398, 0.03421301, 0.97515046, 0.64311832, 0.65068933, 0.17657714, 0.04783857, 0.73904013, 0.41364329, 0.52235551, 0.24203526, 0.43328910 ]), step_types=np.array( [StepType.FIRST] + [StepType.MID] * (lengths[0] - 2) + [StepType.TERMINAL] + [StepType.FIRST] + [StepType.MID] * (lengths[1] - 2) + [StepType.TERMINAL] + [StepType.FIRST] + [StepType.FIRST], dtype=StepType), env_infos={ 'success': np.array([0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1], dtype=bool) }, agent_infos={}, lengths=lengths) log_file = tempfile.NamedTemporaryFile() csv_output = dowel.CsvOutput(log_file.name) logger.add_output(csv_output) log_performance(7, batch, 0.8, prefix='test_log_performance') logger.log(tabular) logger.dump_output_type(dowel.CsvOutput) with open(log_file.name, 'r') as file: rows = list(csv.DictReader(file)) res = {k: float(r) for (k, r) in rows[0].items()} assert res['test_log_performance/Iteration'] == 7 assert res['test_log_performance/NumEpisodes'] == 4 assert math.isclose(res['test_log_performance/SuccessRate'], 0.75) assert math.isclose(res['test_log_performance/TerminationRate'], 0.5) assert math.isclose(res['test_log_performance/AverageDiscountedReturn'], 1.1131040640673113) assert math.isclose(res['test_log_performance/AverageReturn'], 2.1659965525) assert math.isclose(res['test_log_performance/StdReturn'], 2.354067152038576)
def test_act_box_env_spec_mismatch_eps(eps_data): with pytest.raises(ValueError, match='actions should have'): eps_data['env_spec'].action_space = akro.Box(low=1, high=np.inf, shape=(4, 3, 2), dtype=np.float32) t = EpisodeBatch(**eps_data) del t
def concat_spaces(top, bottom): assert isinstance(top, akro.Box) assert isinstance(bottom, akro.Box) top_lb, top_ub = top.bounds bottom_lb, bottom_ub = bottom.bounds return akro.Box(np.concatenate([top_lb, bottom_lb]), np.concatenate([top_ub, bottom_ub]))
def action_space(self): """Return the action space. Returns: akro.Box: Action space. """ return akro.Box(low=-5.0, high=5.0, shape=(1, ), dtype=np.float32)
def __init__(self, env, task_id, task_name, pad=False): super().__init__(env) self.task_id = task_id self.task_name = task_name self.pad = pad if pad and np.prod(env.observation_space.shape) < 9: self.observation_space = akro.Box(low=-1, high=1, shape=(9, ))
def observation_space(self): """Return an observation space. Returns: gym.spaces: The observation space of the environment. """ return akro.Box(low=-1, high=1, shape=self._obs_dim, dtype=np.float32)
def test_pickling(self): obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2 env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) pickled = pickle.dumps(policy) with tf.compat.v1.variable_scope('resumed'): unpickled = pickle.loads(pickled) assert hasattr(unpickled, '_f_dist_obs_latent') assert hasattr(unpickled, '_f_dist_obs_task')
def test_act_box_env_spec_mismatch_batch(batch_data): with pytest.raises(ValueError, match='actions should have'): batch_data['env_spec'].action_space = akro.Box(low=1, high=np.inf, shape=(4, 3, 2), dtype=np.float32) s = TimeStepBatch(**batch_data) del s
def test_act_box_env_spec_mismatch_traj(traj_data): with pytest.raises(ValueError, match='actions should have'): traj_data['env_spec'].action_space = akro.Box(low=1, high=np.inf, shape=(4, 3, 2), dtype=np.float32) t = TrajectoryBatch(**traj_data) del t
def test_output_values(output_dim, kernel_sizes, hidden_channels, strides, paddings): input_width = 32 input_height = 32 in_channel = 3 input_shape = (in_channel, input_height, input_width) spec = InOutSpec(akro.Box(shape=input_shape, low=-np.inf, high=np.inf), akro.Box(shape=(output_dim, ), low=-np.inf, high=np.inf)) obs = torch.rand(input_shape) module = DiscreteCNNModule(spec=spec, image_format='NCHW', hidden_channels=hidden_channels, hidden_sizes=hidden_channels, kernel_sizes=kernel_sizes, strides=strides, paddings=paddings, padding_mode='zeros', hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_) cnn = CNNModule(spec=InOutSpec( akro.Box(shape=input_shape, low=-np.inf, high=np.inf), None), image_format='NCHW', hidden_channels=hidden_channels, kernel_sizes=kernel_sizes, strides=strides, paddings=paddings, padding_mode='zeros', hidden_w_init=nn.init.ones_) flat_dim = torch.flatten(cnn(obs).detach(), start_dim=1).shape[1] mlp = MLPModule( flat_dim, output_dim, hidden_channels, hidden_w_init=nn.init.ones_, output_w_init=nn.init.ones_, ) cnn_out = cnn(obs) output = mlp(torch.flatten(cnn_out, start_dim=1)) assert torch.all(torch.eq(output.detach(), module(obs).detach()))
def observation_space(self): """Observation space. Returns: akro.Box: Observation space. """ if self._mode == 'vanilla': return self.env.observation_space elif self._mode == 'add-onehot': task_lb, task_ub = self.task_space.bounds env_lb, env_ub = self._observation_space.bounds return akro.Box(np.concatenate([env_lb, task_lb]), np.concatenate([env_ub, task_ub])) else: # self._mode == 'del-onehot' env_lb, env_ub = self._observation_space.bounds num_tasks = self._num_tasks return akro.Box(env_lb[:-num_tasks], env_ub[:-num_tasks])
def __init__(self, env, task_number, num_tasks, max_env_shape): super().__init__(env) self._task_number = task_number self._num_tasks = num_tasks one_hot_ub = np.ones(self._num_tasks) one_hot_lb = np.zeros(self._num_tasks) task_space = akro.Box(one_hot_lb, one_hot_ub) self.one_hot = np.zeros(task_space.shape) self.one_hot[task_number] = task_space.high[task_number] self.max_env_shape = max_env_shape
def task_space(self): """Task Space. Returns: akro.Box: Task space. """ one_hot_ub = np.ones(self.num_tasks) one_hot_lb = np.zeros(self.num_tasks) return akro.Box(one_hot_lb, one_hot_ub)
def test_get_latent(self): obs_dim, action_dim, task_num, latent_dim = (2, ), (2, ), 5, 2 env = GarageEnv(DummyBoxEnv(obs_dim=obs_dim, action_dim=action_dim)) embedding_spec = InOutSpec( input_space=akro.Box(low=np.zeros(task_num), high=np.ones(task_num)), output_space=akro.Box(low=np.zeros(latent_dim), high=np.ones(latent_dim))) encoder = GaussianMLPEncoder(embedding_spec) policy = GaussianMLPTaskEmbeddingPolicy(env_spec=env.spec, encoder=encoder) task_id = 3 task_onehot = np.zeros(task_num) task_onehot[task_id] = 1 latent, latent_info = policy.get_latent(task_onehot) assert latent.shape == (latent_dim, ) assert latent_info['mean'].shape == (latent_dim, ) assert latent_info['log_std'].shape == (latent_dim, )