def test_slice(): # Both continuous and discrete aa = AgentAction( torch.tensor([[1.0], [1.0], [1.0]]), [torch.tensor([2, 1, 0]), torch.tensor([1, 2, 0])], ) saa = aa.slice(0, 2) assert saa.continuous_tensor.shape == (2, 1) assert saa.discrete_tensor.shape == (2, 2)
def test_actions_to_onehot(): all_actions = torch.tensor([[1, 0, 2], [1, 0, 2]]) action_size = [2, 1, 3] oh_actions = ModelUtils.actions_to_onehot(all_actions, action_size) expected_result = [ torch.tensor([[0, 1], [0, 1]], dtype=torch.float), torch.tensor([[1], [1]], dtype=torch.float), torch.tensor([[0, 0, 1], [0, 0, 1]], dtype=torch.float), ] for res, exp in zip(oh_actions, expected_result): assert torch.equal(res, exp)
def test_to_flat(): # Both continuous and discrete aa = AgentAction(torch.tensor([[1.0, 1.0, 1.0]]), [torch.tensor([2]), torch.tensor([1])]) flattened_actions = aa.to_flat([3, 3]) assert torch.eq(flattened_actions, torch.tensor([[1, 1, 1, 0, 0, 1, 0, 1, 0]])).all() # Just continuous aa = AgentAction(torch.tensor([[1.0, 1.0, 1.0]]), None) flattened_actions = aa.to_flat([]) assert torch.eq(flattened_actions, torch.tensor([1, 1, 1])).all() # Just discrete aa = AgentAction(torch.tensor([]), [torch.tensor([2]), torch.tensor([1])]) flattened_actions = aa.to_flat([3, 3]) assert torch.eq(flattened_actions, torch.tensor([0, 0, 1, 0, 1, 0])).all()
def test_categorical_dist_instance(): torch.manual_seed(0) act_size = 4 test_prob = torch.tensor([[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)]) # High prob for first action dist_instance = CategoricalDistInstance(test_prob) for _ in range(10): action = dist_instance.sample() assert action.shape == (1, 1) assert action < act_size # Make sure the first action as higher probability than the others. prob_first_action = dist_instance.log_prob(torch.tensor([0])) for i in range(1, act_size): assert dist_instance.log_prob(torch.tensor([i])) < prob_first_action
def test_list_to_tensor(): # Test converting pure list unconverted_list = [[1.0, 2], [1, 3], [1, 4]] tensor = ModelUtils.list_to_tensor(unconverted_list) # Should be equivalent to torch.tensor conversion assert torch.equal(tensor, torch.tensor(unconverted_list)) # Test converting pure numpy array np_list = np.asarray(unconverted_list) tensor = ModelUtils.list_to_tensor(np_list) # Should be equivalent to torch.tensor conversion assert torch.equal(tensor, torch.tensor(unconverted_list)) # Test converting list of numpy arrays list_of_np = [np.asarray(_el) for _el in unconverted_list] tensor = ModelUtils.list_to_tensor(list_of_np) # Should be equivalent to torch.tensor conversion assert torch.equal(tensor, torch.tensor(unconverted_list, dtype=torch.float32))
def test_actor_critic(ac_type, lstm): obs_size = 4 network_settings = NetworkSettings( memory=NetworkSettings.MemorySettings() if lstm else None, normalize=True) obs_spec = create_observation_specs_with_shapes([(obs_size, )]) act_size = 2 mask = torch.ones([1, act_size * 2]) stream_names = [f"stream_name{n}" for n in range(4)] # action_spec = ActionSpec.create_continuous(act_size[0]) action_spec = ActionSpec(act_size, tuple(act_size for _ in range(act_size))) actor = ac_type(obs_spec, network_settings, action_spec, stream_names) if lstm: sample_obs = torch.ones( (1, network_settings.memory.sequence_length, obs_size)) memories = torch.ones( (1, network_settings.memory.sequence_length, actor.memory_size)) else: sample_obs = torch.ones((1, obs_size)) memories = torch.tensor([]) # memories isn't always set to None, the network should be able to # deal with that. # Test critic pass value_out, memories_out = actor.critic_pass([sample_obs], memories=memories) for stream in stream_names: if lstm: assert value_out[stream].shape == ( network_settings.memory.sequence_length, ) assert memories_out.shape == memories.shape else: assert value_out[stream].shape == (1, ) # Test get action stats and_value action, log_probs, entropies, value_out, mem_out = actor.get_action_stats_and_value( [sample_obs], memories=memories, masks=mask) if lstm: assert action.continuous_tensor.shape == (64, 2) else: assert action.continuous_tensor.shape == (1, 2) assert len(action.discrete_list) == 2 for _disc in action.discrete_list: if lstm: assert _disc.shape == (64, 1) else: assert _disc.shape == (1, 1) if mem_out is not None: assert mem_out.shape == memories.shape for stream in stream_names: if lstm: assert value_out[stream].shape == ( network_settings.memory.sequence_length, ) else: assert value_out[stream].shape == (1, )
def test_masked_mean(): test_input = torch.tensor([1, 2, 3, 4, 5]) masks = torch.ones_like(test_input).bool() mean = ModelUtils.masked_mean(test_input, masks=masks) assert mean == 3.0 masks = torch.tensor([False, False, True, True, True]) mean = ModelUtils.masked_mean(test_input, masks=masks) assert mean == 4.0 # Make sure it works if all masks are off masks = torch.tensor([False, False, False, False, False]) mean = ModelUtils.masked_mean(test_input, masks=masks) assert mean == 0.0 # Make sure it works with 2d arrays of shape (mask_length, N) test_input = torch.tensor([1, 2, 3, 4, 5]).repeat(2, 1).T masks = torch.tensor([False, False, True, True, True]) mean = ModelUtils.masked_mean(test_input, masks=masks) assert mean == 4.0
def test_continuous_action_prediction(behavior_spec: BehaviorSpec, seed: int) -> None: np.random.seed(seed) torch.manual_seed(seed) curiosity_settings = CuriositySettings(32, 0.1) curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings) buffer = create_agent_buffer(behavior_spec, 5) for _ in range(200): curiosity_rp.update(buffer) prediction = curiosity_rp._network.predict_action(buffer)[0] target = torch.tensor(buffer["continuous_action"][0]) error = torch.mean((prediction - target)**2).item() assert error < 0.001
def compute_loss( self, policy_batch: AgentBuffer, expert_batch: AgentBuffer ) -> torch.Tensor: """ Given a policy mini_batch and an expert mini_batch, computes the loss of the discriminator. """ total_loss = torch.zeros(1) stats_dict: Dict[str, np.ndarray] = {} policy_estimate, policy_mu = self.compute_estimate( policy_batch, use_vail_noise=True ) expert_estimate, expert_mu = self.compute_estimate( expert_batch, use_vail_noise=True ) stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.mean().item() stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.mean().item() discriminator_loss = -( torch.log(expert_estimate + self.EPSILON) + torch.log(1.0 - policy_estimate + self.EPSILON) ).mean() stats_dict["Losses/GAIL Loss"] = discriminator_loss.item() total_loss += discriminator_loss if self._settings.use_vail: # KL divergence loss (encourage latent representation to be normal) kl_loss = torch.mean( -torch.sum( 1 + (self._z_sigma ** 2).log() - 0.5 * expert_mu ** 2 - 0.5 * policy_mu ** 2 - (self._z_sigma ** 2), dim=1, ) ) vail_loss = self._beta * (kl_loss - self.mutual_information) with torch.no_grad(): self._beta.data = torch.max( self._beta + self.alpha * (kl_loss - self.mutual_information), torch.tensor(0.0), ) total_loss += vail_loss stats_dict["Policy/GAIL Beta"] = self._beta.item() stats_dict["Losses/GAIL KL Loss"] = kl_loss.item() if self.gradient_penalty_weight > 0.0: gradient_magnitude_loss = ( self.gradient_penalty_weight * self.compute_gradient_magnitude(policy_batch, expert_batch) ) stats_dict["Policy/GAIL Grad Mag Loss"] = gradient_magnitude_loss.item() total_loss += gradient_magnitude_loss return total_loss, stats_dict
def test_get_probs_and_entropy(): inp_size = 4 act_size = 2 action_model, masks = create_action_model(inp_size, act_size) _continuous_dist = GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))) act_size = 2 test_prob = torch.tensor([[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)]) _discrete_dist_list = [ CategoricalDistInstance(test_prob), CategoricalDistInstance(test_prob), ] dist_tuple = DistInstances(_continuous_dist, _discrete_dist_list) agent_action = AgentAction(torch.zeros( (1, 2)), [torch.tensor([0]), torch.tensor([1])]) log_probs, entropies = action_model._get_probs_and_entropy( agent_action, dist_tuple) assert log_probs.continuous_tensor.shape == (1, 2) assert len(log_probs.discrete_list) == 2 for _disc in log_probs.discrete_list: assert _disc.shape == (1, ) assert len(log_probs.all_discrete_list) == 2 for _disc in log_probs.all_discrete_list: assert _disc.shape == (1, 2) for clp in log_probs.continuous_tensor[0]: # Log prob of standard normal at 0 assert clp == pytest.approx(-0.919, abs=0.01) assert log_probs.discrete_list[0] > log_probs.discrete_list[1] for ent, val in zip(entropies[0], [1.4189, 0.6191, 0.6191]): assert ent == pytest.approx(val, abs=0.01)
def test_get_probs_and_entropy(): # Test continuous # Add two dists to the list. This isn't done in the code but we'd like to support it. dist_list = [ GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))), GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))), ] action_list = [torch.zeros((1, 2)), torch.zeros((1, 2))] log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy( action_list, dist_list ) assert log_probs.shape == (1, 2, 2) assert entropies.shape == (1, 2, 2) assert all_probs is None for log_prob in log_probs.flatten(): # Log prob of standard normal at 0 assert log_prob == pytest.approx(-0.919, abs=0.01) for ent in entropies.flatten(): # entropy of standard normal at 0 assert ent == pytest.approx(1.42, abs=0.01) # Test continuous # Add two dists to the list. act_size = 2 test_prob = torch.tensor( [[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)] ) # High prob for first action dist_list = [CategoricalDistInstance(test_prob), CategoricalDistInstance(test_prob)] action_list = [torch.tensor([0]), torch.tensor([1])] log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy( action_list, dist_list ) assert all_probs.shape == (1, len(dist_list * act_size)) assert entropies.shape == (1, len(dist_list)) # Make sure the first action has high probability than the others. assert log_probs.flatten()[0] > log_probs.flatten()[1]
def test_break_into_branches(): # Test normal multi-branch case all_actions = torch.tensor([[1, 2, 3, 4, 5, 6]]) action_size = [2, 1, 3] broken_actions = ModelUtils.break_into_branches(all_actions, action_size) assert len(action_size) == len(broken_actions) for i, _action in enumerate(broken_actions): assert _action.shape == (1, action_size[i]) # Test 1-branch case action_size = [6] broken_actions = ModelUtils.break_into_branches(all_actions, action_size) assert len(broken_actions) == 1 assert broken_actions[0].shape == (1, 6)
def test_normalizer(): input_size = 2 norm = Normalizer(input_size) # These three inputs should mean to 0.5, and variance 2 # with the steps starting at 1 vec_input1 = torch.tensor([[1, 1]]) vec_input2 = torch.tensor([[1, 1]]) vec_input3 = torch.tensor([[0, 0]]) norm.update(vec_input1) norm.update(vec_input2) norm.update(vec_input3) # Test normalization for val in norm(vec_input1)[0]: assert val == pytest.approx(0.707, abs=0.001) # Test copy normalization norm2 = Normalizer(input_size) assert not compare_models(norm, norm2) norm2.copy_from(norm) assert compare_models(norm, norm2) for val in norm2(vec_input1)[0]: assert val == pytest.approx(0.707, abs=0.001)
def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None: super().__init__() self._policy_specs = specs self._use_vail = settings.use_vail self._settings = settings state_encoder_settings = NetworkSettings( normalize=False, hidden_units=settings.encoding_size, num_layers=2, vis_encode_type=EncoderType.SIMPLE, memory=None, ) self._state_encoder = NetworkBody(specs.observation_shapes, state_encoder_settings) self._action_flattener = ModelUtils.ActionFlattener(specs) encoder_input_size = settings.encoding_size if settings.use_actions: encoder_input_size += (self._action_flattener.flattened_size + 1 ) # + 1 is for done self.encoder = torch.nn.Sequential( linear_layer(encoder_input_size, settings.encoding_size), Swish(), linear_layer(settings.encoding_size, settings.encoding_size), Swish(), ) estimator_input_size = settings.encoding_size if settings.use_vail: estimator_input_size = self.z_size self._z_sigma = torch.nn.Parameter(torch.ones((self.z_size), dtype=torch.float), requires_grad=True) self._z_mu_layer = linear_layer( settings.encoding_size, self.z_size, kernel_init=Initialization.KaimingHeNormal, kernel_gain=0.1, ) self._beta = torch.nn.Parameter(torch.tensor(self.initial_beta, dtype=torch.float), requires_grad=False) self._estimator = torch.nn.Sequential( linear_layer(estimator_input_size, 1), torch.nn.Sigmoid())
def forward(self, mini_batch: AgentBuffer) -> torch.Tensor: n_vis = len(self._encoder.visual_processors) hidden, _ = self._encoder.forward( vec_inputs=[ ModelUtils.list_to_tensor(mini_batch["vector_obs"], dtype=torch.float) ], vis_inputs=[ ModelUtils.list_to_tensor(mini_batch["visual_obs%d" % i], dtype=torch.float) for i in range(n_vis) ], ) self._encoder.update_normalization( torch.tensor(mini_batch["vector_obs"])) return hidden
def test_multi_categorical_distribution(): torch.manual_seed(0) hidden_size = 16 act_size = [3, 3, 4] sample_embedding = torch.ones((1, 16)) gauss_dist = MultiCategoricalDistribution(hidden_size, act_size) # Make sure backprop works optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3) def create_test_prob(size: int) -> torch.Tensor: test_prob = torch.tensor([[1.0 - 0.01 * (size - 1)] + [0.01] * (size - 1)]) # High prob for first action return test_prob.log() for _ in range(100): dist_insts = gauss_dist(sample_embedding, masks=torch.ones((1, sum(act_size)))) loss = 0 for i, dist_inst in enumerate(dist_insts): assert isinstance(dist_inst, CategoricalDistInstance) log_prob = dist_inst.all_log_prob() test_log_prob = create_test_prob(act_size[i]) # Force log_probs to match the high probability for the first action generated by # create_test_prob loss += torch.nn.functional.mse_loss(log_prob, test_log_prob) optimizer.zero_grad() loss.backward() optimizer.step() for dist_inst, size in zip(dist_insts, act_size): # Check that the log probs are close to the fake ones that we generated. test_log_probs = create_test_prob(size) for _prob, _test_prob in zip( dist_inst.all_log_prob().flatten().tolist(), test_log_probs.flatten().tolist(), ): assert _prob == pytest.approx(_test_prob, abs=0.1) # Test masks masks = [] for branch in act_size: masks += [0] * (branch - 1) + [1] masks = torch.tensor([masks]) dist_insts = gauss_dist(sample_embedding, masks=masks) for dist_inst in dist_insts: log_prob = dist_inst.all_log_prob() assert log_prob.flatten()[-1] == pytest.approx(0, abs=0.001)
def test_actor_critic(ac_type, lstm): obs_size = 4 network_settings = NetworkSettings( memory=NetworkSettings.MemorySettings() if lstm else None) obs_shapes = [(obs_size, )] act_size = [2] stream_names = [f"stream_name{n}" for n in range(4)] action_spec = ActionSpec.create_continuous(act_size[0]) actor = ac_type(obs_shapes, network_settings, action_spec, stream_names) if lstm: sample_obs = torch.ones( (1, network_settings.memory.sequence_length, obs_size)) memories = torch.ones( (1, network_settings.memory.sequence_length, actor.memory_size)) else: sample_obs = torch.ones((1, obs_size)) memories = torch.tensor([]) # memories isn't always set to None, the network should be able to # deal with that. # Test critic pass value_out, memories_out = actor.critic_pass([sample_obs], [], memories=memories) for stream in stream_names: if lstm: assert value_out[stream].shape == ( network_settings.memory.sequence_length, ) assert memories_out.shape == memories.shape else: assert value_out[stream].shape == (1, ) # Test get_dist_and_value dists, value_out, mem_out = actor.get_dist_and_value([sample_obs], [], memories=memories) if mem_out is not None: assert mem_out.shape == memories.shape for dist in dists: assert isinstance(dist, GaussianDistInstance) for stream in stream_names: if lstm: assert value_out[stream].shape == ( network_settings.memory.sequence_length, ) else: assert value_out[stream].shape == (1, )
def test_simple_actor(use_discrete): obs_size = 4 network_settings = NetworkSettings() obs_shapes = [(obs_size, )] act_size = [2] if use_discrete: masks = torch.ones((1, 1)) action_spec = ActionSpec.create_discrete(tuple(act_size)) else: masks = None action_spec = ActionSpec.create_continuous(act_size[0]) actor = SimpleActor(obs_shapes, network_settings, action_spec) # Test get_dist sample_obs = torch.ones((1, obs_size)) dists, _ = actor.get_dists([sample_obs], [], masks=masks) for dist in dists: if use_discrete: assert isinstance(dist, CategoricalDistInstance) else: assert isinstance(dist, GaussianDistInstance) # Test sample_actions actions = actor.sample_action(dists) for act in actions: if use_discrete: assert act.shape == (1, 1) else: assert act.shape == (1, act_size[0]) # Test forward actions, ver_num, mem_size, is_cont, act_size_vec = actor.forward( [sample_obs], [], masks=masks) for act in actions: # This is different from above for ONNX export if use_discrete: assert act.shape == tuple(act_size) else: assert act.shape == (act_size[0], 1) assert mem_size == 0 assert is_cont == int(not use_discrete) assert act_size_vec == torch.tensor(act_size)
def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings): super().__init__() self.policy = policy self.trainer_settings = trainer_settings self.update_dict: Dict[str, torch.Tensor] = {} self.value_heads: Dict[str, torch.Tensor] = {} self.memory_in: torch.Tensor = None self.memory_out: torch.Tensor = None self.m_size: int = 0 self.global_step = torch.tensor(0) self.bc_module: Optional[BCModule] = None self.create_reward_signals(trainer_settings.reward_signals) if trainer_settings.behavioral_cloning is not None: self.bc_module = BCModule( self.policy, trainer_settings.behavioral_cloning, policy_learning_rate=trainer_settings.hyperparameters.learning_rate, default_batch_size=trainer_settings.hyperparameters.batch_size, default_num_epoch=3, )
def test_simple_actor(action_type): obs_size = 4 network_settings = NetworkSettings() obs_shapes = [(obs_size, )] act_size = [2] masks = None if action_type == ActionType.CONTINUOUS else torch.ones( (1, 1)) actor = SimpleActor(obs_shapes, network_settings, action_type, act_size) # Test get_dist sample_obs = torch.ones((1, obs_size)) dists, _ = actor.get_dists([sample_obs], [], masks=masks) for dist in dists: if action_type == ActionType.CONTINUOUS: assert isinstance(dist, GaussianDistInstance) else: assert isinstance(dist, CategoricalDistInstance) # Test sample_actions actions = actor.sample_action(dists) for act in actions: if action_type == ActionType.CONTINUOUS: assert act.shape == (1, act_size[0]) else: assert act.shape == (1, 1) # Test forward actions, ver_num, mem_size, is_cont, act_size_vec = actor.forward( [sample_obs], [], masks=masks) for act in actions: # This is different from above for ONNX export if action_type == ActionType.CONTINUOUS: assert act.shape == (act_size[0], 1) else: assert act.shape == tuple(act_size) assert mem_size == 0 assert is_cont == int(action_type == ActionType.CONTINUOUS) assert act_size_vec == torch.tensor(act_size)
def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None: super().__init__() self._use_vail = settings.use_vail self._settings = settings encoder_settings = settings.network_settings if encoder_settings.memory is not None: encoder_settings.memory = None logger.warning( "memory was specified in network_settings but is not supported by GAIL. It is being ignored." ) self._action_flattener = ActionFlattener(specs.action_spec) unencoded_size = (self._action_flattener.flattened_size + 1 if settings.use_actions else 0) # +1 is for dones self.encoder = NetworkBody(specs.observation_specs, encoder_settings, unencoded_size) estimator_input_size = encoder_settings.hidden_units if settings.use_vail: estimator_input_size = self.z_size self._z_sigma = torch.nn.Parameter(torch.ones((self.z_size), dtype=torch.float), requires_grad=True) self._z_mu_layer = linear_layer( encoder_settings.hidden_units, self.z_size, kernel_init=Initialization.KaimingHeNormal, kernel_gain=0.1, ) self._beta = torch.nn.Parameter(torch.tensor(self.initial_beta, dtype=torch.float), requires_grad=False) self._estimator = torch.nn.Sequential( linear_layer(estimator_input_size, 1, kernel_gain=0.2), torch.nn.Sigmoid())
def create_test_prob(size: int) -> torch.Tensor: test_prob = torch.tensor([[1.0 - 0.01 * (size - 1)] + [0.01] * (size - 1)]) # High prob for first action return test_prob.log()
def __init__(self, vec_obs_size: int): super().__init__() self.register_buffer("normalization_steps", torch.tensor(1)) self.register_buffer("running_mean", torch.zeros(vec_obs_size)) self.register_buffer("running_variance", torch.ones(vec_obs_size))