Ejemplo n.º 1
0
def test_entropy_categorical():
    """
    Test entropy of a categorical distribution
    """
    head = CategoricalActionHead(1, 5)

    logits = F.log_softmax(torch.tensor([0.0, 1.0, 2.0, 3.0, 4.0]), dim=0)

    distrib = d.Categorical(logits=logits)

    entropy1 = distrib.entropy()
    entropy2 = head.entropy(logits[None])

    nt.assert_allclose(entropy1.item(), entropy2.item())
Ejemplo n.º 2
0
def test_kl_divergence_categorical():
    """
    Test KL divergence between categorical distributions
    """
    head = CategoricalActionHead(1, 5)

    logits1 = F.log_softmax(torch.tensor([0.0, 1.0, 2.0, 3.0, 4.0]), dim=0)
    logits2 = F.log_softmax(torch.tensor([-1.0, 0.2, 5.0, 2.0, 8.0]), dim=0)

    distrib1 = d.Categorical(logits=logits1)
    distrib2 = d.Categorical(logits=logits2)

    kl_div_1 = d.kl_divergence(distrib1, distrib2)
    kl_div_2 = head.kl_divergence(logits1[None], logits2[None])

    nt.assert_allclose(kl_div_1.item(), kl_div_2.item(), rtol=1e-5)
Ejemplo n.º 3
0
def test_neglogp_categorical():
    """
    Test negative logarithm of likelihood of a categorical distribution
    """
    head = CategoricalActionHead(1, 5)

    logits = F.log_softmax(torch.tensor([0.0, 1.0, 2.0, 3.0, 4.0]), dim=0)

    distrib = d.Categorical(logits=logits)

    actions = torch.tensor([0, 1, 2, 3, 4])

    log_p_1 = distrib.log_prob(actions)
    log_p_2 = head.logprob(actions, torch.stack([logits, logits, logits, logits, logits], dim=0))

    nt.assert_allclose(log_p_1.detach().cpu().numpy(), log_p_2.detach().cpu().numpy(), rtol=1e-5)
Ejemplo n.º 4
0
def test_sample_categorical():
    """
    Test sampling from a categorical distribution
    """
    head = CategoricalActionHead(1, 5)

    array = np.zeros((10000, 5))

    sample = head.sample(torch.from_numpy(array))

    result_array = sample.detach().cpu().numpy()

    nt.assert_array_less(np.abs(result_array.mean(axis=0)), 2.1)
    nt.assert_array_less(1.9, np.abs(result_array.mean(axis=0)))

    array2 = np.zeros((10000, 5))
    array2[:, 0:4] = -10.0
    array2[:, 4] = 10.0

    sample2 = head.sample(F.log_softmax(torch.from_numpy(array2), dim=1))
    result_array2 = sample2.detach().cpu().numpy()

    nt.assert_array_less(np.abs(result_array2.mean(axis=0)), 4.1)
    nt.assert_array_less(3.9, np.abs(result_array2.mean(axis=0)))