Esempio n. 1
0
def test_gaussian_distribution(conditional_sigma, tanh_squash):
    torch.manual_seed(0)
    hidden_size = 16
    act_size = 4
    sample_embedding = torch.ones((1, 16))
    gauss_dist = GaussianDistribution(
        hidden_size,
        act_size,
        conditional_sigma=conditional_sigma,
        tanh_squash=tanh_squash,
    )

    # Make sure backprop works
    force_action = torch.zeros((1, act_size))
    optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3)

    for _ in range(50):
        dist_inst = gauss_dist(sample_embedding)[0]
        if tanh_squash:
            assert isinstance(dist_inst, TanhGaussianDistInstance)
        else:
            assert isinstance(dist_inst, GaussianDistInstance)
        log_prob = dist_inst.log_prob(force_action)
        loss = torch.nn.functional.mse_loss(log_prob,
                                            -2 * torch.ones(log_prob.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    for prob in log_prob.flatten():
        assert prob == pytest.approx(-2, abs=0.1)
Esempio n. 2
0
def test_networkbody_vector():
    torch.manual_seed(0)
    obs_size = 4
    network_settings = NetworkSettings()
    obs_shapes = [(obs_size, )]

    networkbody = NetworkBody(
        create_observation_specs_with_shapes(obs_shapes),
        network_settings,
        encoded_act_size=2,
    )
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
    sample_obs = 0.1 * torch.ones((1, obs_size))
    sample_act = 0.1 * torch.ones((1, 2))

    for _ in range(300):
        encoded, _ = networkbody([sample_obs], sample_act)
        assert encoded.shape == (1, network_settings.hidden_units)
        # Try to force output to 1
        loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for _enc in encoded.flatten():
        assert _enc == pytest.approx(1.0, abs=0.1)
Esempio n. 3
0
def test_valuenetwork():
    torch.manual_seed(0)
    obs_size = 4
    num_outputs = 2
    network_settings = NetworkSettings()
    obs_spec = create_observation_specs_with_shapes([(obs_size, )])

    stream_names = [f"stream_name{n}" for n in range(4)]
    value_net = ValueNetwork(stream_names,
                             obs_spec,
                             network_settings,
                             outputs_per_stream=num_outputs)
    optimizer = torch.optim.Adam(value_net.parameters(), lr=3e-3)

    for _ in range(50):
        sample_obs = torch.ones((1, obs_size))
        values, _ = value_net([sample_obs])
        loss = 0
        for s_name in stream_names:
            assert values[s_name].shape == (1, num_outputs)
            # Try to force output to 1
            loss += torch.nn.functional.mse_loss(values[s_name],
                                                 torch.ones((1, num_outputs)))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for value in values.values():
        for _out in value.tolist():
            assert _out[0] == pytest.approx(1.0, abs=0.1)
Esempio n. 4
0
def test_min_visual_size():
    # Make sure each EncoderType has an entry in MIS_RESOLUTION_FOR_ENCODER
    assert set(ModelUtils.MIN_RESOLUTION_FOR_ENCODER.keys()) == set(EncoderType)

    for encoder_type in EncoderType:
        good_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type]
        vis_input = torch.ones((1, good_size, good_size, 3))
        ModelUtils._check_resolution_for_encoder(good_size, good_size, encoder_type)
        enc_func = ModelUtils.get_encoder_for_type(encoder_type)
        enc = enc_func(good_size, good_size, 3, 1)
        enc.forward(vis_input)

        # Anything under the min size should raise an exception. If not, decrease the min size!
        with pytest.raises(Exception):
            bad_size = ModelUtils.MIN_RESOLUTION_FOR_ENCODER[encoder_type] - 1
            vis_input = torch.ones((1, bad_size, bad_size, 3))

            with pytest.raises(UnityTrainerException):
                # Make sure we'd hit a friendly error during model setup time.
                ModelUtils._check_resolution_for_encoder(
                    bad_size, bad_size, encoder_type
                )

            enc = enc_func(bad_size, bad_size, 3, 1)
            enc.forward(vis_input)
Esempio n. 5
0
def test_multinetworkbody_num_agents(with_actions):
    torch.manual_seed(0)
    act_size = 2
    obs_size = 4
    network_settings = NetworkSettings()
    obs_shapes = [(obs_size,)]
    action_spec = ActionSpec(act_size, tuple(act_size for _ in range(act_size)))
    networkbody = MultiAgentNetworkBody(
        create_observation_specs_with_shapes(obs_shapes), network_settings, action_spec
    )
    sample_obs = [[0.1 * torch.ones((1, obs_size))]]
    # simulate baseline in POCA
    sample_act = [
        AgentAction(
            0.1 * torch.ones((1, 2)), [0.1 * torch.ones(1) for _ in range(act_size)]
        )
    ]
    for n_agent, max_so_far in [(1, 1), (5, 5), (4, 5), (10, 10), (5, 10), (1, 10)]:
        if with_actions:
            encoded, _ = networkbody(
                obs_only=sample_obs * (n_agent - 1), obs=sample_obs, actions=sample_act
            )
        else:
            encoded, _ = networkbody(obs_only=sample_obs * n_agent, obs=[], actions=[])
        # look at the last value of the hidden units (the number of agents)
        target = (n_agent * 1.0 / max_so_far) * 2 - 1
        assert abs(encoded[0, -1].item() - target) < 1e-6
        assert encoded[0, -1].item() <= 1
        assert encoded[0, -1].item() >= -1
Esempio n. 6
0
def test_networkbody_lstm():
    torch.manual_seed(0)
    obs_size = 4
    seq_len = 6
    network_settings = NetworkSettings(memory=NetworkSettings.MemorySettings(
        sequence_length=seq_len, memory_size=12))
    obs_shapes = [(obs_size, )]

    networkbody = NetworkBody(create_observation_specs_with_shapes(obs_shapes),
                              network_settings)
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-4)
    sample_obs = torch.ones((seq_len, obs_size))

    for _ in range(300):
        encoded, _ = networkbody([sample_obs],
                                 memories=torch.ones(1, 1, 12),
                                 sequence_length=seq_len)
        # Try to force output to 1
        loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for _enc in encoded.flatten().tolist():
        assert _enc == pytest.approx(1.0, abs=0.1)
Esempio n. 7
0
def test_networkbody_visual():
    torch.manual_seed(0)
    vec_obs_size = 4
    obs_size = (84, 84, 3)
    network_settings = NetworkSettings()
    obs_shapes = [(vec_obs_size, ), obs_size]

    networkbody = NetworkBody(create_observation_specs_with_shapes(obs_shapes),
                              network_settings)
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
    sample_obs = 0.1 * torch.ones((1, 84, 84, 3))
    sample_vec_obs = torch.ones((1, vec_obs_size))
    obs = [sample_vec_obs] + [sample_obs]

    for _ in range(150):
        encoded, _ = networkbody(obs)
        assert encoded.shape == (1, network_settings.hidden_units)
        # Try to force output to 1
        loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for _enc in encoded.flatten().tolist():
        assert _enc == pytest.approx(1.0, abs=0.1)
Esempio n. 8
0
def test_valueheads():
    stream_names = [f"reward_signal_{num}" for num in range(5)]
    input_size = 5
    batch_size = 4

    # Test default 1 value per head
    value_heads = ValueHeads(stream_names, input_size)
    input_data = torch.ones((batch_size, input_size))
    value_out = value_heads(
        input_data)  # Note: mean value will be removed shortly

    for stream_name in stream_names:
        assert value_out[stream_name].shape == (batch_size, )

    # Test that inputting the wrong size input will throw an error
    with pytest.raises(Exception):
        value_out = value_heads(torch.ones((batch_size, input_size + 2)))

    # Test multiple values per head (e.g. discrete Q function)
    output_size = 4
    value_heads = ValueHeads(stream_names, input_size, output_size)
    input_data = torch.ones((batch_size, input_size))
    value_out = value_heads(input_data)

    for stream_name in stream_names:
        assert value_out[stream_name].shape == (batch_size, output_size)
Esempio n. 9
0
def test_actor_critic(ac_type, lstm):
    obs_size = 4
    network_settings = NetworkSettings(
        memory=NetworkSettings.MemorySettings() if lstm else None,
        normalize=True)
    obs_spec = create_observation_specs_with_shapes([(obs_size, )])
    act_size = 2
    mask = torch.ones([1, act_size * 2])
    stream_names = [f"stream_name{n}" for n in range(4)]
    # action_spec = ActionSpec.create_continuous(act_size[0])
    action_spec = ActionSpec(act_size,
                             tuple(act_size for _ in range(act_size)))
    actor = ac_type(obs_spec, network_settings, action_spec, stream_names)
    if lstm:
        sample_obs = torch.ones(
            (1, network_settings.memory.sequence_length, obs_size))
        memories = torch.ones(
            (1, network_settings.memory.sequence_length, actor.memory_size))
    else:
        sample_obs = torch.ones((1, obs_size))
        memories = torch.tensor([])
        # memories isn't always set to None, the network should be able to
        # deal with that.
    # Test critic pass
    value_out, memories_out = actor.critic_pass([sample_obs],
                                                memories=memories)
    for stream in stream_names:
        if lstm:
            assert value_out[stream].shape == (
                network_settings.memory.sequence_length, )
            assert memories_out.shape == memories.shape
        else:
            assert value_out[stream].shape == (1, )

    # Test get action stats and_value
    action, log_probs, entropies, value_out, mem_out = actor.get_action_stats_and_value(
        [sample_obs], memories=memories, masks=mask)
    if lstm:
        assert action.continuous_tensor.shape == (64, 2)
    else:
        assert action.continuous_tensor.shape == (1, 2)

    assert len(action.discrete_list) == 2
    for _disc in action.discrete_list:
        if lstm:
            assert _disc.shape == (64, 1)
        else:
            assert _disc.shape == (1, 1)

    if mem_out is not None:
        assert mem_out.shape == memories.shape
    for stream in stream_names:
        if lstm:
            assert value_out[stream].shape == (
                network_settings.memory.sequence_length, )
        else:
            assert value_out[stream].shape == (1, )
Esempio n. 10
0
def test_conditional_layer_initialization():
    b, input_size, goal_size, h, num_cond_layers, num_normal_layers = 7, 10, 8, 16, 2, 1
    conditional_enc = ConditionalEncoder(
        input_size, goal_size, h, num_normal_layers + num_cond_layers, num_cond_layers
    )

    input_tensor = torch.ones(b, input_size)
    goal_tensor = torch.ones(b, goal_size)

    output = conditional_enc.forward(input_tensor, goal_tensor)

    assert output.shape == (b, h)
Esempio n. 11
0
def test_multi_head_attention_initialization():
    n_h, emb_size = 4, 12
    n_k, n_q, b = 13, 14, 15
    mha = MultiHeadAttention(emb_size, n_h)

    query = torch.ones((b, n_q, emb_size))
    key = torch.ones((b, n_k, emb_size))
    value = torch.ones((b, n_k, emb_size))

    output, attention = mha.forward(query, key, value, n_q, n_k)

    assert output.shape == (b, n_q, emb_size)
    assert attention.shape == (b, n_h, n_q, n_k)
Esempio n. 12
0
def test_multi_head_attention_initialization():
    q_size, k_size, v_size, o_size, n_h, emb_size = 7, 8, 9, 10, 11, 12
    n_k, n_q, b = 13, 14, 15
    mha = MultiHeadAttention(q_size, k_size, v_size, o_size, n_h, emb_size)

    query = torch.ones((b, n_q, q_size))
    key = torch.ones((b, n_k, k_size))
    value = torch.ones((b, n_k, v_size))

    output, attention = mha.forward(query, key, value)

    assert output.shape == (b, n_q, o_size)
    assert attention.shape == (b, n_h, n_q, n_k)
Esempio n. 13
0
def test_lstm_class():
    torch.manual_seed(0)
    input_size = 12
    memory_size = 64
    batch_size = 8
    seq_len = 16
    lstm = LSTM(input_size, memory_size)

    assert lstm.memory_size == memory_size

    sample_input = torch.ones((batch_size, seq_len, input_size))
    sample_memories = torch.ones((1, batch_size, memory_size))
    out, mem = lstm(sample_input, sample_memories)
    # Hidden size should be half of memory_size
    assert out.shape == (batch_size, seq_len, memory_size // 2)
    assert mem.shape == (1, batch_size, memory_size)
Esempio n. 14
0
def export_model(network):
    vec_obs_size = 16
    num_vis_obs = 0
    dummy_vec_obs = [torch.zeros([1] + [vec_obs_size])]
    dummy_vis_obs = []
    dummy_var_len_obs = []
    dummy_masks = torch.ones([1] + [0])
    dummy_memories = torch.zeros([1] + [1] + [256])
    dummy_input = (
        dummy_vec_obs,
        dummy_vis_obs,
        dummy_var_len_obs,
        dummy_masks,
        dummy_memories,
    )
    input_names = ['vector_observation', 'action_masks', 'recurrent_in']
    dynamic_axes = {name: {0: "batch"} for name in input_names}
    output_names = [
        'version_number', 'memory_size', 'continuous_actions',
        'continuous_action_output_shape', 'action', 'is_continuous_control',
        'action_output_shape', 'recurrent_out'
    ]
    dynamic_axes.update({'continuous_actions': {0: "batch"}})
    dynamic_axes.update({'action': {0: "batch"}})

    torch.onnx.export(network,
                      dummy_input,
                      EXPORT_FILE,
                      opset_version=9,
                      input_names=input_names,
                      output_names=output_names,
                      dynamic_axes=dynamic_axes)
Esempio n. 15
0
def test_get_onnx_deterministic_tensors():
    inp_size = 4
    act_size = 2
    action_model, masks = create_action_model(inp_size, act_size)
    sample_inp = torch.ones((1, inp_size))
    out_tensors = action_model.get_action_out(sample_inp, masks=masks)
    (
        continuous_out,
        discrete_out,
        action_out_deprecated,
        deterministic_continuous_out,
        deterministic_discrete_out,
    ) = out_tensors
    assert continuous_out.shape == (1, 2)
    assert discrete_out.shape == (1, 2)
    assert deterministic_discrete_out.shape == (1, 2)
    assert deterministic_continuous_out.shape == (1, 2)

    # Second sampling from same distribution
    out_tensors2 = action_model.get_action_out(sample_inp, masks=masks)
    (
        continuous_out_2,
        discrete_out_2,
        action_out_2_deprecated,
        deterministic_continuous_out_2,
        deterministic_discrete_out_2,
    ) = out_tensors2
    assert ~torch.all(torch.eq(continuous_out, continuous_out_2))
    assert torch.all(
        torch.eq(deterministic_continuous_out, deterministic_continuous_out_2)
    )
Esempio n. 16
0
def test_visual_encoder(vis_class, image_size):
    num_outputs = 128
    enc = vis_class(image_size[0], image_size[1], image_size[2], num_outputs)
    # Note: NCHW not NHWC
    sample_input = torch.ones((1, image_size[0], image_size[1], image_size[2]))
    encoding = enc(sample_input)
    assert encoding.shape == (1, num_outputs)
Esempio n. 17
0
def test_multi_categorical_distribution():
    torch.manual_seed(0)
    hidden_size = 16
    act_size = [3, 3, 4]
    sample_embedding = torch.ones((1, 16))
    gauss_dist = MultiCategoricalDistribution(hidden_size, act_size)

    # Make sure backprop works
    optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3)

    def create_test_prob(size: int) -> torch.Tensor:
        test_prob = torch.tensor([[1.0 - 0.01 * (size - 1)] + [0.01] *
                                  (size - 1)])  # High prob for first action
        return test_prob.log()

    for _ in range(100):
        dist_insts = gauss_dist(sample_embedding,
                                masks=torch.ones((1, sum(act_size))))
        loss = 0
        for i, dist_inst in enumerate(dist_insts):
            assert isinstance(dist_inst, CategoricalDistInstance)
            log_prob = dist_inst.all_log_prob()
            test_log_prob = create_test_prob(act_size[i])
            # Force log_probs to match the high probability for the first action generated by
            # create_test_prob
            loss += torch.nn.functional.mse_loss(log_prob, test_log_prob)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    for dist_inst, size in zip(dist_insts, act_size):
        # Check that the log probs are close to the fake ones that we generated.
        test_log_probs = create_test_prob(size)
        for _prob, _test_prob in zip(
                dist_inst.all_log_prob().flatten().tolist(),
                test_log_probs.flatten().tolist(),
        ):
            assert _prob == pytest.approx(_test_prob, abs=0.1)

    # Test masks
    masks = []
    for branch in act_size:
        masks += [0] * (branch - 1) + [1]
    masks = torch.tensor([masks])
    dist_insts = gauss_dist(sample_embedding, masks=masks)
    for dist_inst in dist_insts:
        log_prob = dist_inst.all_log_prob()
        assert log_prob.flatten()[-1] == pytest.approx(0, abs=0.001)
Esempio n. 18
0
    def __init__(self, policy):
        # ONNX only support input in NCHW (channel first) format.
        # Barracuda also expect to get data in NCHW.
        # Any multi-dimentional input should follow that otherwise will
        # cause problem to barracuda import.
        self.policy = policy
        observation_specs = self.policy.behavior_spec.observation_specs
        batch_dim = [1]
        seq_len_dim = [1]
        num_obs = len(observation_specs)

        dummy_obs = [
            torch.zeros(batch_dim +
                        list(ModelSerializer._get_onnx_shape(obs_spec.shape)))
            for obs_spec in observation_specs
        ]

        dummy_masks = torch.ones(
            batch_dim +
            [sum(self.policy.behavior_spec.action_spec.discrete_branches)])
        dummy_memories = torch.zeros(batch_dim + seq_len_dim +
                                     [self.policy.export_memory_size])

        self.dummy_input = (dummy_obs, dummy_masks, dummy_memories)

        self.input_names = [
            TensorNames.get_observation_name(i) for i in range(num_obs)
        ]
        self.input_names += [
            TensorNames.action_mask_placeholder,
            TensorNames.recurrent_in_placeholder,
        ]

        self.dynamic_axes = {name: {0: "batch"} for name in self.input_names}

        self.output_names = [
            TensorNames.version_number, TensorNames.memory_size
        ]
        if self.policy.behavior_spec.action_spec.continuous_size > 0:
            self.output_names += [
                TensorNames.continuous_action_output,
                TensorNames.continuous_action_output_shape,
            ]
            self.dynamic_axes.update(
                {TensorNames.continuous_action_output: {
                    0: "batch"
                }})
        if self.policy.behavior_spec.action_spec.discrete_size > 0:
            self.output_names += [
                TensorNames.discrete_action_output,
                TensorNames.discrete_action_output_shape,
            ]
            self.dynamic_axes.update(
                {TensorNames.discrete_action_output: {
                    0: "batch"
                }})

        if self.policy.export_memory_size > 0:
            self.output_names += [TensorNames.recurrent_output]
Esempio n. 19
0
 def _extract_masks(self, decision_requests: DecisionSteps) -> np.ndarray:
     mask = None
     if self.behavior_spec.action_spec.discrete_size > 0:
         mask = torch.ones([len(decision_requests), np.sum(self.act_size)])
         if decision_requests.action_mask is not None:
             mask = torch.as_tensor(
                 1 - np.concatenate(decision_requests.action_mask, axis=1)
             )
     return mask
Esempio n. 20
0
def test_tanh_gaussian_dist_instance():
    torch.manual_seed(0)
    act_size = 4
    dist_instance = TanhGaussianDistInstance(torch.zeros(1, act_size),
                                             torch.ones(1, act_size))
    for _ in range(10):
        action = dist_instance.sample()
        assert action.shape == (1, act_size)
        assert torch.max(action) < 1.0 and torch.min(action) > -1.0
Esempio n. 21
0
def test_actor_critic(ac_type, lstm):
    obs_size = 4
    network_settings = NetworkSettings(
        memory=NetworkSettings.MemorySettings() if lstm else None)
    obs_shapes = [(obs_size, )]
    act_size = [2]
    stream_names = [f"stream_name{n}" for n in range(4)]
    action_spec = ActionSpec.create_continuous(act_size[0])
    actor = ac_type(obs_shapes, network_settings, action_spec, stream_names)
    if lstm:
        sample_obs = torch.ones(
            (1, network_settings.memory.sequence_length, obs_size))
        memories = torch.ones(
            (1, network_settings.memory.sequence_length, actor.memory_size))
    else:
        sample_obs = torch.ones((1, obs_size))
        memories = torch.tensor([])
        # memories isn't always set to None, the network should be able to
        # deal with that.
    # Test critic pass
    value_out, memories_out = actor.critic_pass([sample_obs], [],
                                                memories=memories)
    for stream in stream_names:
        if lstm:
            assert value_out[stream].shape == (
                network_settings.memory.sequence_length, )
            assert memories_out.shape == memories.shape
        else:
            assert value_out[stream].shape == (1, )

    # Test get_dist_and_value
    dists, value_out, mem_out = actor.get_dist_and_value([sample_obs], [],
                                                         memories=memories)
    if mem_out is not None:
        assert mem_out.shape == memories.shape
    for dist in dists:
        assert isinstance(dist, GaussianDistInstance)
    for stream in stream_names:
        if lstm:
            assert value_out[stream].shape == (
                network_settings.memory.sequence_length, )
        else:
            assert value_out[stream].shape == (1, )
Esempio n. 22
0
def test_get_dists():
    inp_size = 4
    act_size = 2
    action_model, masks = create_action_model(inp_size, act_size)
    sample_inp = torch.ones((1, inp_size))
    dists = action_model._get_dists(sample_inp, masks=masks)
    assert isinstance(dists.continuous, GaussianDistInstance)
    assert len(dists.discrete) == 2
    for _dist in dists.discrete:
        assert isinstance(_dist, CategoricalDistInstance)
Esempio n. 23
0
def test_simple_actor(use_discrete):
    obs_size = 4
    network_settings = NetworkSettings()
    obs_shapes = [(obs_size, )]
    act_size = [2]
    if use_discrete:
        masks = torch.ones((1, 1))
        action_spec = ActionSpec.create_discrete(tuple(act_size))
    else:
        masks = None
        action_spec = ActionSpec.create_continuous(act_size[0])
    actor = SimpleActor(obs_shapes, network_settings, action_spec)
    # Test get_dist
    sample_obs = torch.ones((1, obs_size))
    dists, _ = actor.get_dists([sample_obs], [], masks=masks)
    for dist in dists:
        if use_discrete:
            assert isinstance(dist, CategoricalDistInstance)
        else:
            assert isinstance(dist, GaussianDistInstance)

    # Test sample_actions
    actions = actor.sample_action(dists)
    for act in actions:
        if use_discrete:
            assert act.shape == (1, 1)
        else:
            assert act.shape == (1, act_size[0])

    # Test forward
    actions, ver_num, mem_size, is_cont, act_size_vec = actor.forward(
        [sample_obs], [], masks=masks)
    for act in actions:
        # This is different from above for ONNX export
        if use_discrete:
            assert act.shape == tuple(act_size)
        else:
            assert act.shape == (act_size[0], 1)

    assert mem_size == 0
    assert is_cont == int(not use_discrete)
    assert act_size_vec == torch.tensor(act_size)
Esempio n. 24
0
def test_sample_action():
    inp_size = 4
    act_size = 2
    action_model, masks = create_action_model(inp_size, act_size)
    sample_inp = torch.ones((1, inp_size))
    dists = action_model._get_dists(sample_inp, masks=masks)
    agent_action = action_model._sample_action(dists)
    assert agent_action.continuous_tensor.shape == (1, 2)
    assert len(agent_action.discrete_list) == 2
    for _disc in agent_action.discrete_list:
        assert _disc.shape == (1, 1)
Esempio n. 25
0
def test_vector_encoder(mock_normalizer):
    mock_normalizer_inst = mock.Mock()
    mock_normalizer.return_value = mock_normalizer_inst
    input_size = 64
    normalize = False
    vector_encoder = VectorInput(input_size, normalize)
    output = vector_encoder(torch.ones((1, input_size)))
    assert output.shape == (1, input_size)

    normalize = True
    vector_encoder = VectorInput(input_size, normalize)
    new_vec = torch.ones((1, input_size))
    vector_encoder.update_normalization(new_vec)

    mock_normalizer.assert_called_with(input_size)
    mock_normalizer_inst.update.assert_called_with(new_vec)

    vector_encoder2 = VectorInput(input_size, normalize)
    vector_encoder.copy_normalization(vector_encoder2)
    mock_normalizer_inst.copy_from.assert_called_with(mock_normalizer_inst)
Esempio n. 26
0
 def _split_decision_step(
     self, decision_requests: DecisionSteps
 ) -> Tuple[SplitObservations, np.ndarray]:
     vec_vis_obs = SplitObservations.from_observations(
         decision_requests.obs)
     mask = None
     if not self.use_continuous_act:
         mask = torch.ones([len(decision_requests), np.sum(self.act_size)])
         if decision_requests.action_mask is not None:
             mask = torch.as_tensor(
                 1 - np.concatenate(decision_requests.action_mask, axis=1))
     return vec_vis_obs, mask
Esempio n. 27
0
def test_visual_encoder_trains(vis_class, size):
    torch.manual_seed(0)
    image_size = (size, size, 1)
    batch = 100

    inputs = torch.cat([
        torch.zeros((batch, ) + image_size),
        torch.ones((batch, ) + image_size)
    ],
                       dim=0)
    target = torch.cat([torch.zeros((batch, )), torch.ones((batch, ))], dim=0)
    enc = vis_class(image_size[0], image_size[1], image_size[2], 1)
    optimizer = torch.optim.Adam(enc.parameters(), lr=0.001)

    for _ in range(15):
        prediction = enc(inputs)[:, 0]
        loss = torch.mean((target - prediction)**2)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    assert loss.item() < 0.05
Esempio n. 28
0
def test_multinetworkbody_visual(with_actions):
    torch.manual_seed(0)
    act_size = 2
    n_agents = 3
    obs_size = 4
    vis_obs_size = (84, 84, 3)
    network_settings = NetworkSettings()
    obs_shapes = [(obs_size, ), vis_obs_size]
    action_spec = ActionSpec(act_size,
                             tuple(act_size for _ in range(act_size)))
    networkbody = MultiAgentNetworkBody(
        create_observation_specs_with_shapes(obs_shapes), network_settings,
        action_spec)
    optimizer = torch.optim.Adam(networkbody.parameters(), lr=3e-3)
    sample_obs = [[0.1 * torch.ones(
        (1, obs_size))] + [0.1 * torch.ones((1, 84, 84, 3))]
                  for _ in range(n_agents)]
    # simulate baseline in POCA
    sample_act = [
        AgentAction(0.1 * torch.ones((1, 2)),
                    [0.1 * torch.ones(1) for _ in range(act_size)])
        for _ in range(n_agents - 1)
    ]
    for _ in range(300):
        if with_actions:
            encoded, _ = networkbody(obs_only=sample_obs[:1],
                                     obs=sample_obs[1:],
                                     actions=sample_act)
        else:
            encoded, _ = networkbody(obs_only=sample_obs, obs=[], actions=[])

        assert encoded.shape == (1, network_settings.hidden_units)
        # Try to force output to 1
        loss = torch.nn.functional.mse_loss(encoded, torch.ones(encoded.shape))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # In the last step, values should be close to 1
    for _enc in encoded.flatten().tolist():
        assert _enc == pytest.approx(1.0, abs=0.1)
Esempio n. 29
0
    def __init__(self, policy):
        # ONNX only support input in NCHW (channel first) format.
        # Barracuda also expect to get data in NCHW.
        # Any multi-dimentional input should follow that otherwise will
        # cause problem to barracuda import.
        self.policy = policy
        batch_dim = [1]
        seq_len_dim = [1]
        dummy_vec_obs = [torch.zeros(batch_dim + [self.policy.vec_obs_size])]
        # create input shape of NCHW
        # (It's NHWC in self.policy.behavior_spec.observation_shapes)
        dummy_vis_obs = [
            torch.zeros(batch_dim + [shape[2], shape[0], shape[1]])
            for shape in self.policy.behavior_spec.observation_shapes
            if len(shape) == 3
        ]
        dummy_masks = torch.ones(
            batch_dim + [sum(self.policy.behavior_spec.action_spec.discrete_branches)]
        )
        dummy_memories = torch.zeros(
            batch_dim + seq_len_dim + [self.policy.export_memory_size]
        )

        self.dummy_input = (dummy_vec_obs, dummy_vis_obs, dummy_masks, dummy_memories)

        self.input_names = (
            ["vector_observation"]
            + [f"visual_observation_{i}" for i in range(self.policy.vis_obs_size)]
            + ["action_masks", "memories"]
        )
        self.dynamic_axes = {name: {0: "batch"} for name in self.input_names}

        self.output_names = ["version_number", "memory_size"]
        if self.policy.behavior_spec.action_spec.continuous_size > 0:
            self.output_names += [
                "continuous_actions",
                "continuous_action_output_shape",
            ]
            self.dynamic_axes.update({"continuous_actions": {0: "batch"}})
        if self.policy.behavior_spec.action_spec.discrete_size > 0:
            self.output_names += ["discrete_actions", "discrete_action_output_shape"]
            self.dynamic_axes.update({"discrete_actions": {0: "batch"}})
        if (
            self.policy.behavior_spec.action_spec.continuous_size == 0
            or self.policy.behavior_spec.action_spec.discrete_size == 0
        ):
            self.output_names += [
                "action",
                "is_continuous_control",
                "action_output_shape",
            ]
            self.dynamic_axes.update({"action": {0: "batch"}})
Esempio n. 30
0
def test_deterministic_sample_action():
    inp_size = 4
    act_size = 8
    action_model, masks = create_action_model(inp_size, act_size, deterministic=True)
    sample_inp = torch.ones((1, inp_size))
    dists = action_model._get_dists(sample_inp, masks=masks)
    agent_action1 = action_model._sample_action(dists)
    agent_action2 = action_model._sample_action(dists)
    agent_action3 = action_model._sample_action(dists)

    assert torch.equal(agent_action1.continuous_tensor, agent_action2.continuous_tensor)
    assert torch.equal(agent_action1.continuous_tensor, agent_action3.continuous_tensor)
    assert torch.equal(agent_action1.discrete_tensor, agent_action2.discrete_tensor)
    assert torch.equal(agent_action1.discrete_tensor, agent_action3.discrete_tensor)

    action_model, masks = create_action_model(inp_size, act_size, deterministic=False)
    sample_inp = torch.ones((1, inp_size))
    dists = action_model._get_dists(sample_inp, masks=masks)
    agent_action1 = action_model._sample_action(dists)
    agent_action2 = action_model._sample_action(dists)
    agent_action3 = action_model._sample_action(dists)

    assert not torch.equal(
        agent_action1.continuous_tensor, agent_action2.continuous_tensor
    )

    assert not torch.equal(
        agent_action1.continuous_tensor, agent_action3.continuous_tensor
    )

    chance_counter = 0
    if not torch.equal(agent_action1.discrete_tensor, agent_action2.discrete_tensor):
        chance_counter += 1
    if not torch.equal(agent_action1.discrete_tensor, agent_action3.discrete_tensor):
        chance_counter += 1
    if not torch.equal(agent_action2.discrete_tensor, agent_action3.discrete_tensor):
        chance_counter += 1

    assert chance_counter > 1