예제 #1
0
def test_networkbody_vector():
    torch.manual_seed(0)
    obs_size = 4
    network_settings = NetworkSettings()
    obs_shapes = [(obs_size,)]

    networkbody = NetworkBody(
        create_observation_specs_with_shapes(obs_shapes),
        network_settings,
        encoded_act_size=2,
    )
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
    sample_obs = 0.1 * torch.ones((1, obs_size))
    sample_act = 0.1 * torch.ones((1, 2))

    for _ in range(300):
        encoded, _ = networkbody([sample_obs], sample_act)
        assert encoded.shape == (1, network_settings.hidden_units)
        # Try to force output to 1
        loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for _enc in encoded.flatten().tolist():
        assert _enc == pytest.approx(1.0, abs=0.1)
예제 #2
0
def test_gaussian_distribution(conditional_sigma, tanh_squash):
    torch.manual_seed(0)
    hidden_size = 16
    act_size = 4
    sample_embedding = torch.ones((1, 16))
    gauss_dist = GaussianDistribution(
        hidden_size,
        act_size,
        conditional_sigma=conditional_sigma,
        tanh_squash=tanh_squash,
    )

    # Make sure backprop works
    force_action = torch.zeros((1, act_size))
    optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3)

    for _ in range(50):
        dist_inst = gauss_dist(sample_embedding)
        if tanh_squash:
            assert isinstance(dist_inst, TanhGaussianDistInstance)
        else:
            assert isinstance(dist_inst, GaussianDistInstance)
        log_prob = dist_inst.log_prob(force_action)
        loss = torch.nn.functional.mse_loss(log_prob,
                                            -2 * torch.ones(log_prob.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    for prob in log_prob.flatten().tolist():
        assert prob == pytest.approx(-2, abs=0.1)
예제 #3
0
def test_sac_optimizer_update(dummy_config, rnn, visual, discrete):
    torch.manual_seed(0)
    # Test evaluate
    optimizer = create_sac_optimizer_mock(dummy_config,
                                          use_rnn=rnn,
                                          use_discrete=discrete,
                                          use_visual=visual)
    # Test update
    update_buffer = mb.simulate_rollout(BUFFER_INIT_SAMPLES,
                                        optimizer.policy.behavior_spec,
                                        memory_size=24)
    # Mock out reward signal eval
    update_buffer["extrinsic_rewards"] = update_buffer["environment_rewards"]
    return_stats = optimizer.update(
        update_buffer,
        num_sequences=update_buffer.num_experiences //
        optimizer.policy.sequence_length,
    )
    # Make sure we have the right stats
    required_stats = [
        "Losses/Policy Loss",
        "Losses/Value Loss",
        "Losses/Q1 Loss",
        "Losses/Q2 Loss",
        "Policy/Entropy Coeff",
        "Policy/Learning Rate",
    ]
    for stat in required_stats:
        assert stat in return_stats.keys()
예제 #4
0
def test_multinetworkbody_num_agents(with_actions):
    torch.manual_seed(0)
    act_size = 2
    obs_size = 4
    network_settings = NetworkSettings()
    obs_shapes = [(obs_size,)]
    action_spec = ActionSpec(act_size, tuple(act_size for _ in range(act_size)))
    networkbody = MultiAgentNetworkBody(
        create_observation_specs_with_shapes(obs_shapes), network_settings, action_spec
    )
    sample_obs = [[0.1 * torch.ones((1, obs_size))]]
    # simulate baseline in POCA
    sample_act = [
        AgentAction(
            0.1 * torch.ones((1, 2)), [0.1 * torch.ones(1) for _ in range(act_size)]
        )
    ]
    for n_agent, max_so_far in [(1, 1), (5, 5), (4, 5), (10, 10), (5, 10), (1, 10)]:
        if with_actions:
            encoded, _ = networkbody(
                obs_only=sample_obs * (n_agent - 1), obs=sample_obs, actions=sample_act
            )
        else:
            encoded, _ = networkbody(obs_only=sample_obs * n_agent, obs=[], actions=[])
        # look at the last value of the hidden units (the number of agents)
        target = (n_agent * 1.0 / max_so_far) * 2 - 1
        assert abs(encoded[0, -1].item() - target) < 1e-6
        assert encoded[0, -1].item() <= 1
        assert encoded[0, -1].item() >= -1
예제 #5
0
def test_networkbody_lstm():
    torch.manual_seed(0)
    obs_size = 4
    seq_len = 16
    network_settings = NetworkSettings(
        memory=NetworkSettings.MemorySettings(sequence_length=seq_len, memory_size=12)
    )
    obs_shapes = [(obs_size,)]

    networkbody = NetworkBody(
        create_observation_specs_with_shapes(obs_shapes), network_settings
    )
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
    sample_obs = torch.ones((1, seq_len, obs_size))

    for _ in range(200):
        encoded, _ = networkbody([sample_obs], memories=torch.ones(1, seq_len, 12))
        # Try to force output to 1
        loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for _enc in encoded.flatten().tolist():
        assert _enc == pytest.approx(1.0, abs=0.1)
예제 #6
0
def test_networkbody_visual():
    torch.manual_seed(1)
    vec_obs_size = 4
    obs_size = (84, 84, 3)
    network_settings = NetworkSettings()
    obs_shapes = [(vec_obs_size,), obs_size]

    networkbody = NetworkBody(
        create_observation_specs_with_shapes(obs_shapes), network_settings
    )
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
    sample_obs = 0.1 * torch.ones((1, 84, 84, 3), dtype=torch.float32)
    sample_vec_obs = torch.ones((1, vec_obs_size), dtype=torch.float32)
    obs = [sample_vec_obs] + [sample_obs]
    loss = 1
    step = 0
    while loss > 1e-6 and step < 1e3:
        encoded, _ = networkbody(obs)
        assert encoded.shape == (1, network_settings.hidden_units)
        # Try to force output to 1
        loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        step += 1
    # In the last step, values should be close to 1
    for _enc in encoded.flatten().tolist():
        assert _enc == pytest.approx(1.0, abs=0.1)
예제 #7
0
def test_valuenetwork():
    torch.manual_seed(0)
    obs_size = 4
    num_outputs = 2
    network_settings = NetworkSettings()
    obs_spec = create_observation_specs_with_shapes([(obs_size,)])

    stream_names = [f"stream_name{n}" for n in range(4)]
    value_net = ValueNetwork(
        stream_names, obs_spec, network_settings, outputs_per_stream=num_outputs
    )
    optimizer = torch.optim.Adam(value_net.parameters(), lr=3e-3)

    for _ in range(50):
        sample_obs = torch.ones((1, obs_size))
        values, _ = value_net([sample_obs])
        loss = 0
        for s_name in stream_names:
            assert values[s_name].shape == (1, num_outputs)
            # Try to force output to 1
            loss += torch.nn.functional.mse_loss(
                values[s_name], torch.ones((1, num_outputs))
            )

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for value in values.values():
        for _out in value.tolist():
            assert _out[0] == pytest.approx(1.0, abs=0.1)
예제 #8
0
def test_reward_decreases(demo_to_buffer: Any, use_actions: bool,
                          behavior_spec: BehaviorSpec, seed: int) -> None:
    np.random.seed(seed)
    torch.manual_seed(seed)
    buffer_expert = create_agent_buffer(behavior_spec, 1000)
    buffer_policy = create_agent_buffer(behavior_spec, 1000)
    demo_to_buffer.return_value = None, buffer_expert
    gail_settings = GAILSettings(demo_path="",
                                 learning_rate=0.005,
                                 use_vail=False,
                                 use_actions=use_actions)
    gail_rp = create_reward_provider(RewardSignalType.GAIL, behavior_spec,
                                     gail_settings)

    init_reward_expert = gail_rp.evaluate(buffer_expert)[0]
    init_reward_policy = gail_rp.evaluate(buffer_policy)[0]

    for _ in range(20):
        gail_rp.update(buffer_policy)
        reward_expert = gail_rp.evaluate(buffer_expert)[0]
        reward_policy = gail_rp.evaluate(buffer_policy)[0]
        assert reward_expert >= 0  # GAIL / VAIL reward always positive
        assert reward_policy >= 0
    reward_expert = gail_rp.evaluate(buffer_expert)[0]
    reward_policy = gail_rp.evaluate(buffer_policy)[0]
    assert reward_expert > reward_policy  # Expert reward greater than non-expert reward
    assert (reward_expert > init_reward_expert
            )  # Expert reward getting better as network trains
    assert (reward_policy < init_reward_policy
            )  # Non-expert reward getting worse as network trains
예제 #9
0
def test_reward_decreases_vail(demo_to_buffer: Any, use_actions: bool,
                               behavior_spec: BehaviorSpec, seed: int) -> None:
    np.random.seed(seed)
    torch.manual_seed(seed)
    buffer_expert = create_agent_buffer(behavior_spec, 1000)
    buffer_policy = create_agent_buffer(behavior_spec, 1000)
    demo_to_buffer.return_value = None, buffer_expert
    gail_settings = GAILSettings(demo_path="",
                                 learning_rate=0.005,
                                 use_vail=True,
                                 use_actions=use_actions)
    DiscriminatorNetwork.initial_beta = 0.0
    # we must set the initial value of beta to 0 for testing
    # If we do not, the kl-loss will dominate early and will block the estimator
    gail_rp = create_reward_provider(RewardSignalType.GAIL, behavior_spec,
                                     gail_settings)

    for _ in range(20):
        gail_rp.update(buffer_policy)
        reward_expert = gail_rp.evaluate(buffer_expert)[0]
        reward_policy = gail_rp.evaluate(buffer_policy)[0]
        assert reward_expert >= 0  # GAIL / VAIL reward always positive
        assert reward_policy >= 0
    reward_expert = gail_rp.evaluate(buffer_expert)[0]
    reward_policy = gail_rp.evaluate(buffer_policy)[0]
    assert reward_expert > reward_policy  # Expert reward greater than non-expert reward
예제 #10
0
def test_initialization_layer():
    torch.manual_seed(0)
    # Test Zero
    layer = linear_layer(
        3, 4, kernel_init=Initialization.Zero, bias_init=Initialization.Zero
    )
    assert torch.all(torch.eq(layer.weight.data, torch.zeros_like(layer.weight.data)))
    assert torch.all(torch.eq(layer.bias.data, torch.zeros_like(layer.bias.data)))
예제 #11
0
def test_tanh_gaussian_dist_instance():
    torch.manual_seed(0)
    act_size = 4
    dist_instance = TanhGaussianDistInstance(torch.zeros(1, act_size),
                                             torch.ones(1, act_size))
    for _ in range(10):
        action = dist_instance.sample()
        assert action.shape == (1, act_size)
        assert torch.max(action) < 1.0 and torch.min(action) > -1.0
예제 #12
0
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:
    np.random.seed(seed)
    torch.manual_seed(seed)
    rnd_settings = RNDSettings(32, 0.01)
    rnd_rp = RNDRewardProvider(behavior_spec, rnd_settings)
    buffer = create_agent_buffer(behavior_spec, 5)
    rnd_rp.update(buffer)
    reward_old = rnd_rp.evaluate(buffer)[0]
    for _ in range(100):
        rnd_rp.update(buffer)
        reward_new = rnd_rp.evaluate(buffer)[0]
    assert reward_new < reward_old
예제 #13
0
def test_next_state_prediction(behavior_spec: BehaviorSpec, seed: int) -> None:
    np.random.seed(seed)
    torch.manual_seed(seed)
    curiosity_settings = CuriositySettings(32, 0.1)
    curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
    buffer = create_agent_buffer(behavior_spec, 5)
    for _ in range(100):
        curiosity_rp.update(buffer)
    prediction = curiosity_rp._network.predict_next_state(buffer)[0]
    target = curiosity_rp._network.get_next_state(buffer)[0]
    error = float(ModelUtils.to_numpy(torch.mean((prediction - target)**2)))
    assert error < 0.001
예제 #14
0
def test_reward_decreases(behavior_spec: BehaviorSpec, seed: int) -> None:
    np.random.seed(seed)
    torch.manual_seed(seed)
    curiosity_settings = CuriositySettings(32, 0.01)
    curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
    buffer = create_agent_buffer(behavior_spec, 5)
    curiosity_rp.update(buffer)
    reward_old = curiosity_rp.evaluate(buffer)[0]
    for _ in range(20):
        curiosity_rp.update(buffer)
        reward_new = curiosity_rp.evaluate(buffer)[0]
    assert reward_new < reward_old
예제 #15
0
def test_lstm_layer():
    torch.manual_seed(0)
    # Test zero for LSTM
    layer = lstm_layer(
        4, 4, kernel_init=Initialization.Zero, bias_init=Initialization.Zero
    )
    for name, param in layer.named_parameters():
        if "weight" in name:
            assert torch.all(torch.eq(param.data, torch.zeros_like(param.data)))
        elif "bias" in name:
            assert torch.all(
                torch.eq(param.data[4:8], torch.ones_like(param.data[4:8]))
            )
예제 #16
0
def test_continuous_action_prediction(behavior_spec: BehaviorSpec,
                                      seed: int) -> None:
    np.random.seed(seed)
    torch.manual_seed(seed)
    curiosity_settings = CuriositySettings(32, 0.1)
    curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
    buffer = create_agent_buffer(behavior_spec, 5)
    for _ in range(200):
        curiosity_rp.update(buffer)
    prediction = curiosity_rp._network.predict_action(buffer)[0]
    target = torch.tensor(buffer["continuous_action"][0])
    error = torch.mean((prediction - target)**2).item()
    assert error < 0.001
예제 #17
0
def test_multinetworkbody_lstm(with_actions):
    torch.manual_seed(0)
    obs_size = 4
    act_size = 2
    seq_len = 16
    n_agents = 3
    network_settings = NetworkSettings(memory=NetworkSettings.MemorySettings(
        sequence_length=seq_len, memory_size=12))

    obs_shapes = [(obs_size, )]
    action_spec = ActionSpec(act_size,
                             tuple(act_size for _ in range(act_size)))
    networkbody = MultiAgentNetworkBody(
        create_observation_specs_with_shapes(obs_shapes), network_settings,
        action_spec)
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
    sample_obs = [[0.1 * torch.ones((seq_len, obs_size))]
                  for _ in range(n_agents)]
    # simulate baseline in POCA
    sample_act = [
        AgentAction(
            0.1 * torch.ones((seq_len, 2)),
            [0.1 * torch.ones(seq_len) for _ in range(act_size)],
        ) for _ in range(n_agents - 1)
    ]

    for _ in range(300):
        if with_actions:
            encoded, _ = networkbody(
                obs_only=sample_obs[:1],
                obs=sample_obs[1:],
                actions=sample_act,
                memories=torch.ones(1, 1, 12),
                sequence_length=seq_len,
            )
        else:
            encoded, _ = networkbody(
                obs_only=sample_obs,
                obs=[],
                actions=[],
                memories=torch.ones(1, 1, 12),
                sequence_length=seq_len,
            )
        # Try to force output to 1
        loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for _enc in encoded.flatten().tolist():
        assert _enc == pytest.approx(1.0, abs=0.1)
예제 #18
0
def test_gaussian_dist_instance():
    torch.manual_seed(0)
    act_size = 4
    dist_instance = GaussianDistInstance(torch.zeros(1, act_size),
                                         torch.ones(1, act_size))
    action = dist_instance.sample()
    assert action.shape == (1, act_size)
    for log_prob in dist_instance.log_prob(torch.zeros(
        (1, act_size))).flatten():
        # Log prob of standard normal at 0
        assert log_prob == pytest.approx(-0.919, abs=0.01)

    for ent in dist_instance.entropy().flatten():
        # entropy of standard normal at 0, based on 1/2 + ln(sqrt(2pi)sigma)
        assert ent == pytest.approx(1.42, abs=0.01)
예제 #19
0
def test_lstm_class():
    torch.manual_seed(0)
    input_size = 12
    memory_size = 64
    batch_size = 8
    seq_len = 16
    lstm = LSTM(input_size, memory_size)

    assert lstm.memory_size == memory_size

    sample_input = torch.ones((batch_size, seq_len, input_size))
    sample_memories = torch.ones((1, batch_size, memory_size))
    out, mem = lstm(sample_input, sample_memories)
    # Hidden size should be half of memory_size
    assert out.shape == (batch_size, seq_len, memory_size // 2)
    assert mem.shape == (1, batch_size, memory_size)
예제 #20
0
def test_predict_minimum_training():
    # of 5 numbers, predict index of min
    np.random.seed(1336)
    torch.manual_seed(1336)
    n_k = 5
    size = n_k + 1
    embedding_size = 64
    entity_embeddings = EntityEmbeddings(size, [size],
                                         embedding_size, [n_k],
                                         concat_self=False)
    transformer = ResidualSelfAttention(embedding_size)
    l_layer = LinearEncoder(embedding_size, 2, n_k)
    loss = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(
        list(entity_embeddings.parameters()) + list(transformer.parameters()) +
        list(l_layer.parameters()),
        lr=0.001,
        weight_decay=1e-6,
    )

    batch_size = 200
    onehots = ModelUtils.actions_to_onehot(
        torch.range(0, n_k - 1).unsqueeze(1), [n_k])[0]
    onehots = onehots.expand((batch_size, -1, -1))
    losses = []
    for _ in range(400):
        num = np.random.randint(0, n_k)
        inp = torch.rand((batch_size, num + 1, 1))
        with torch.no_grad():
            # create the target : The minimum
            argmin = torch.argmin(inp, dim=1)
            argmin = argmin.squeeze()
            argmin = argmin.detach()
        sliced_oh = onehots[:, :num + 1]
        inp = torch.cat([inp, sliced_oh], dim=2)

        embeddings = entity_embeddings(inp, [inp])
        masks = EntityEmbeddings.get_masks([inp])
        prediction = transformer(embeddings, masks)
        prediction = l_layer(prediction)
        ce = loss(prediction, argmin)
        losses.append(ce.item())
        print(ce.item())
        optimizer.zero_grad()
        ce.backward()
        optimizer.step()
    assert np.array(losses[-20:]).mean() < 0.1
예제 #21
0
def test_multi_categorical_distribution():
    torch.manual_seed(0)
    hidden_size = 16
    act_size = [3, 3, 4]
    sample_embedding = torch.ones((1, 16))
    gauss_dist = MultiCategoricalDistribution(hidden_size, act_size)

    # Make sure backprop works
    optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3)

    def create_test_prob(size: int) -> torch.Tensor:
        test_prob = torch.tensor([[1.0 - 0.01 * (size - 1)] + [0.01] *
                                  (size - 1)])  # High prob for first action
        return test_prob.log()

    for _ in range(100):
        dist_insts = gauss_dist(sample_embedding,
                                masks=torch.ones((1, sum(act_size))))
        loss = 0
        for i, dist_inst in enumerate(dist_insts):
            assert isinstance(dist_inst, CategoricalDistInstance)
            log_prob = dist_inst.all_log_prob()
            test_log_prob = create_test_prob(act_size[i])
            # Force log_probs to match the high probability for the first action generated by
            # create_test_prob
            loss += torch.nn.functional.mse_loss(log_prob, test_log_prob)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    for dist_inst, size in zip(dist_insts, act_size):
        # Check that the log probs are close to the fake ones that we generated.
        test_log_probs = create_test_prob(size)
        for _prob, _test_prob in zip(
                dist_inst.all_log_prob().flatten().tolist(),
                test_log_probs.flatten().tolist(),
        ):
            assert _prob == pytest.approx(_test_prob, abs=0.1)

    # Test masks
    masks = []
    for branch in act_size:
        masks += [0] * (branch - 1) + [1]
    masks = torch.tensor([masks])
    dist_insts = gauss_dist(sample_embedding, masks=masks)
    for dist_inst in dist_insts:
        log_prob = dist_inst.all_log_prob()
        assert log_prob.flatten()[-1] == pytest.approx(0, abs=0.001)
예제 #22
0
def test_categorical_dist_instance():
    torch.manual_seed(0)
    act_size = 4
    test_prob = torch.tensor([[1.0 - 0.1 * (act_size - 1)] + [0.1] *
                              (act_size - 1)])  # High prob for first action
    dist_instance = CategoricalDistInstance(test_prob)

    for _ in range(10):
        action = dist_instance.sample()
        assert action.shape == (1, 1)
        assert action < act_size

    # Make sure the first action as higher probability than the others.
    prob_first_action = dist_instance.log_prob(torch.tensor([0]))

    for i in range(1, act_size):
        assert dist_instance.log_prob(torch.tensor([i])) < prob_first_action
예제 #23
0
def test_simple_transformer_training():
    np.random.seed(1336)
    torch.manual_seed(1336)
    size, n_k, = 3, 5
    embedding_size = 64
    entity_embeddings = EntityEmbeddings(size, [size], [n_k], embedding_size)
    transformer = ResidualSelfAttention(embedding_size, [n_k])
    l_layer = linear_layer(embedding_size, size)
    optimizer = torch.optim.Adam(list(transformer.parameters()) +
                                 list(l_layer.parameters()),
                                 lr=0.001)
    batch_size = 200
    point_range = 3
    init_error = -1.0
    for _ in range(250):
        center = torch.rand((batch_size, size)) * point_range * 2 - point_range
        key = torch.rand(
            (batch_size, n_k, size)) * point_range * 2 - point_range
        with torch.no_grad():
            # create the target : The key closest to the query in euclidean distance
            distance = torch.sum((center.reshape(
                (batch_size, 1, size)) - key)**2,
                                 dim=2)
            argmin = torch.argmin(distance, dim=1)
            target = []
            for i in range(batch_size):
                target += [key[i, argmin[i], :]]
            target = torch.stack(target, dim=0)
            target = target.detach()

        embeddings = entity_embeddings(center, [key])
        masks = EntityEmbeddings.get_masks([key])
        prediction = transformer.forward(embeddings, masks)
        prediction = l_layer(prediction)
        prediction = prediction.reshape((batch_size, size))
        error = torch.mean((prediction - target)**2, dim=1)
        error = torch.mean(error) / 2
        if init_error == -1.0:
            init_error = error.item()
        else:
            assert error.item() < init_error
        print(error.item())
        optimizer.zero_grad()
        error.backward()
        optimizer.step()
    assert error.item() < 0.3
예제 #24
0
def test_predict_closest_training():
    np.random.seed(1336)
    torch.manual_seed(1336)
    size, n_k, = 3, 5
    embedding_size = 64
    entity_embeddings = EntityEmbedding(size, n_k, embedding_size)
    entity_embeddings.add_self_embedding(size)
    transformer = ResidualSelfAttention(embedding_size, n_k)
    l_layer = linear_layer(embedding_size, size)
    optimizer = torch.optim.Adam(
        list(entity_embeddings.parameters()) + list(transformer.parameters()) +
        list(l_layer.parameters()),
        lr=0.001,
        weight_decay=1e-6,
    )
    batch_size = 200
    for _ in range(200):
        center = torch.rand((batch_size, size))
        key = torch.rand((batch_size, n_k, size))
        with torch.no_grad():
            # create the target : The key closest to the query in euclidean distance
            distance = torch.sum((center.reshape(
                (batch_size, 1, size)) - key)**2,
                                 dim=2)
            argmin = torch.argmin(distance, dim=1)
            target = []
            for i in range(batch_size):
                target += [key[i, argmin[i], :]]
            target = torch.stack(target, dim=0)
            target = target.detach()

        embeddings = entity_embeddings(center, key)
        masks = get_zero_entities_mask([key])
        prediction = transformer.forward(embeddings, masks)
        prediction = l_layer(prediction)
        prediction = prediction.reshape((batch_size, size))
        error = torch.mean((prediction - target)**2, dim=1)
        error = torch.mean(error) / 2
        print(error.item())
        optimizer.zero_grad()
        error.backward()
        optimizer.step()
    assert error.item() < 0.02
예제 #25
0
def test_all_masking(mask_value):
    # We make sure that a mask of all zeros or all ones will not trigger an error
    np.random.seed(1336)
    torch.manual_seed(1336)
    size, n_k, = 3, 5
    embedding_size = 64
    entity_embeddings = EntityEmbedding(size, n_k, embedding_size)
    entity_embeddings.add_self_embedding(size)
    transformer = ResidualSelfAttention(embedding_size, n_k)
    l_layer = linear_layer(embedding_size, size)
    optimizer = torch.optim.Adam(
        list(entity_embeddings.parameters()) + list(transformer.parameters()) +
        list(l_layer.parameters()),
        lr=0.001,
        weight_decay=1e-6,
    )
    batch_size = 20
    for _ in range(5):
        center = torch.rand((batch_size, size))
        key = torch.rand((batch_size, n_k, size))
        with torch.no_grad():
            # create the target : The key closest to the query in euclidean distance
            distance = torch.sum((center.reshape(
                (batch_size, 1, size)) - key)**2,
                                 dim=2)
            argmin = torch.argmin(distance, dim=1)
            target = []
            for i in range(batch_size):
                target += [key[i, argmin[i], :]]
            target = torch.stack(target, dim=0)
            target = target.detach()

        embeddings = entity_embeddings(center, key)
        masks = [torch.ones_like(key[:, :, 0]) * mask_value]
        prediction = transformer.forward(embeddings, masks)
        prediction = l_layer(prediction)
        prediction = prediction.reshape((batch_size, size))
        error = torch.mean((prediction - target)**2, dim=1)
        error = torch.mean(error) / 2
        optimizer.zero_grad()
        error.backward()
        optimizer.step()
예제 #26
0
def test_visual_encoder_trains(vis_class, size):
    torch.manual_seed(0)
    image_size = (size, size, 1)
    batch = 100

    inputs = torch.cat([
        torch.zeros((batch, ) + image_size),
        torch.ones((batch, ) + image_size)
    ],
                       dim=0)
    target = torch.cat([torch.zeros((batch, )), torch.ones((batch, ))], dim=0)
    enc = vis_class(image_size[0], image_size[1], image_size[2], 1)
    optimizer = torch.optim.Adam(enc.parameters(), lr=0.001)

    for _ in range(15):
        prediction = enc(inputs)[:, 0]
        loss = torch.mean((target - prediction)**2)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    assert loss.item() < 0.05
예제 #27
0
def test_multinetworkbody_visual(with_actions):
    torch.manual_seed(0)
    act_size = 2
    n_agents = 3
    obs_size = 4
    vis_obs_size = (84, 84, 3)
    network_settings = NetworkSettings()
    obs_shapes = [(obs_size, ), vis_obs_size]
    action_spec = ActionSpec(act_size,
                             tuple(act_size for _ in range(act_size)))
    networkbody = MultiAgentNetworkBody(
        create_observation_specs_with_shapes(obs_shapes), network_settings,
        action_spec)
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
    sample_obs = [[0.1 * torch.ones(
        (1, obs_size))] + [0.1 * torch.ones((1, 84, 84, 3))]
                  for _ in range(n_agents)]
    # simulate baseline in POCA
    sample_act = [
        AgentAction(0.1 * torch.ones((1, 2)),
                    [0.1 * torch.ones(1) for _ in range(act_size)])
        for _ in range(n_agents - 1)
    ]
    for _ in range(300):
        if with_actions:
            encoded, _ = networkbody(obs_only=sample_obs[:1],
                                     obs=sample_obs[1:],
                                     actions=sample_act)
        else:
            encoded, _ = networkbody(obs_only=sample_obs, obs=[], actions=[])

        assert encoded.shape == (1, network_settings.hidden_units)
        # Try to force output to 1
        loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for _enc in encoded.flatten().tolist():
        assert _enc == pytest.approx(1.0, abs=0.1)
예제 #28
0
def test_multi_head_attention_training():
    np.random.seed(1336)
    torch.manual_seed(1336)
    size, n_h, n_k, n_q = 3, 10, 5, 1
    embedding_size = 64
    mha = MultiHeadAttention(size, size, size, size, n_h, embedding_size)
    optimizer = torch.optim.Adam(mha.parameters(), lr=0.001)
    batch_size = 200
    point_range = 3
    init_error = -1.0
    for _ in range(50):
        query = torch.rand(
            (batch_size, n_q, size)) * point_range * 2 - point_range
        key = torch.rand(
            (batch_size, n_k, size)) * point_range * 2 - point_range
        value = key
        with torch.no_grad():
            # create the target : The key closest to the query in euclidean distance
            distance = torch.sum((query - key)**2, dim=2)
            argmin = torch.argmin(distance, dim=1)
            target = []
            for i in range(batch_size):
                target += [key[i, argmin[i], :]]
            target = torch.stack(target, dim=0)
            target = target.detach()

        prediction, _ = mha.forward(query, key, value)
        prediction = prediction.reshape((batch_size, size))
        error = torch.mean((prediction - target)**2, dim=1)
        error = torch.mean(error) / 2
        if init_error == -1.0:
            init_error = error.item()
        else:
            assert error.item() < init_error
        print(error.item())
        optimizer.zero_grad()
        error.backward()
        optimizer.step()
    assert error.item() < 0.5
예제 #29
0
def test_networkbody_visual():
    torch.manual_seed(0)
    vec_obs_size = 4
    obs_size = (84, 84, 3)
    network_settings = NetworkSettings()
    obs_shapes = [(vec_obs_size, ), obs_size]

    networkbody = NetworkBody(obs_shapes, network_settings)
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
    sample_obs = 0.1 * torch.ones((1, 84, 84, 3))
    sample_vec_obs = torch.ones((1, vec_obs_size))

    for _ in range(150):
        encoded, _ = networkbody([sample_vec_obs], [sample_obs])
        assert encoded.shape == (1, network_settings.hidden_units)
        # Try to force output to 1
        loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for _enc in encoded.flatten():
        assert _enc == pytest.approx(1.0, abs=0.1)
예제 #30
0
def test_layer_norm():
    torch.manual_seed(0)
    torch_ln = torch.nn.LayerNorm(10, elementwise_affine=False)
    cust_ln = LayerNorm()

    sample_input = torch.rand(10)
    assert torch.all(
        torch.isclose(
            torch_ln(sample_input), cust_ln(sample_input), atol=1e-5, rtol=0.0
        )
    )
    sample_input = torch.rand((4, 10))
    assert torch.all(
        torch.isclose(
            torch_ln(sample_input), cust_ln(sample_input), atol=1e-5, rtol=0.0
        )
    )
    sample_input = torch.rand((7, 6, 10))
    assert torch.all(
        torch.isclose(
            torch_ln(sample_input), cust_ln(sample_input), atol=1e-5, rtol=0.0
        )
    )