Beispiel #1
0
def test_gaussian_dist_instance():
    torch.manual_seed(0)
    act_size = 4
    dist_instance = GaussianDistInstance(torch.zeros(1, act_size),
                                         torch.ones(1, act_size))
    action = dist_instance.sample()
    assert action.shape == (1, act_size)
    for log_prob in dist_instance.log_prob(torch.zeros(
        (1, act_size))).flatten():
        # Log prob of standard normal at 0
        assert log_prob == pytest.approx(-0.919, abs=0.01)

    for ent in dist_instance.entropy().flatten():
        # entropy of standard normal at 0, based on 1/2 + ln(sqrt(2pi)sigma)
        assert ent == pytest.approx(1.42, abs=0.01)
Beispiel #2
0
def test_get_probs_and_entropy():
    # Test continuous
    # Add two dists to the list. This isn't done in the code but we'd like to support it.
    dist_list = [
        GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
        GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
    ]
    action_list = [torch.zeros((1, 2)), torch.zeros((1, 2))]
    log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
        action_list, dist_list)
    assert log_probs.shape == (1, 2, 2)
    assert entropies.shape == (1, 2, 2)
    assert all_probs is None

    for log_prob in log_probs.flatten():
        # Log prob of standard normal at 0
        assert log_prob == pytest.approx(-0.919, abs=0.01)

    for ent in entropies.flatten():
        # entropy of standard normal at 0
        assert ent == pytest.approx(1.42, abs=0.01)

    # Test continuous
    # Add two dists to the list.
    act_size = 2
    test_prob = torch.tensor([[1.0 - 0.1 * (act_size - 1)] + [0.1] *
                              (act_size - 1)])  # High prob for first action
    dist_list = [
        CategoricalDistInstance(test_prob),
        CategoricalDistInstance(test_prob)
    ]
    action_list = [torch.tensor([0]), torch.tensor([1])]
    log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
        action_list, dist_list)
    assert all_probs.shape == (1, len(dist_list * act_size))
    assert entropies.shape == (1, len(dist_list))
    # Make sure the first action has high probability than the others.
    assert log_probs.flatten()[0] > log_probs.flatten()[1]
def test_get_probs_and_entropy():
    inp_size = 4
    act_size = 2
    action_model, masks = create_action_model(inp_size, act_size)

    _continuous_dist = GaussianDistInstance(torch.zeros((1, 2)),
                                            torch.ones((1, 2)))
    act_size = 2
    test_prob = torch.tensor([[1.0 - 0.1 * (act_size - 1)] + [0.1] *
                              (act_size - 1)])
    _discrete_dist_list = [
        CategoricalDistInstance(test_prob),
        CategoricalDistInstance(test_prob),
    ]
    dist_tuple = DistInstances(_continuous_dist, _discrete_dist_list)

    agent_action = AgentAction(torch.zeros(
        (1, 2)), [torch.tensor([0]), torch.tensor([1])])

    log_probs, entropies = action_model._get_probs_and_entropy(
        agent_action, dist_tuple)

    assert log_probs.continuous_tensor.shape == (1, 2)
    assert len(log_probs.discrete_list) == 2
    for _disc in log_probs.discrete_list:
        assert _disc.shape == (1, )
    assert len(log_probs.all_discrete_list) == 2
    for _disc in log_probs.all_discrete_list:
        assert _disc.shape == (1, 2)

    for clp in log_probs.continuous_tensor[0]:
        # Log prob of standard normal at 0
        assert clp == pytest.approx(-0.919, abs=0.01)

    assert log_probs.discrete_list[0] > log_probs.discrete_list[1]

    for ent, val in zip(entropies[0], [1.4189, 0.6191, 0.6191]):
        assert ent == pytest.approx(val, abs=0.01)