예제 #1
0
def test_slice():
    # Both continuous and discrete
    aa = AgentAction(
        torch.tensor([[1.0], [1.0], [1.0]]),
        [torch.tensor([2, 1, 0]), torch.tensor([1, 2, 0])],
    )
    saa = aa.slice(0, 2)
    assert saa.continuous_tensor.shape == (2, 1)
    assert saa.discrete_tensor.shape == (2, 2)
예제 #2
0
def test_actions_to_onehot():
    all_actions = torch.tensor([[1, 0, 2], [1, 0, 2]])
    action_size = [2, 1, 3]
    oh_actions = ModelUtils.actions_to_onehot(all_actions, action_size)
    expected_result = [
        torch.tensor([[0, 1], [0, 1]], dtype=torch.float),
        torch.tensor([[1], [1]], dtype=torch.float),
        torch.tensor([[0, 0, 1], [0, 0, 1]], dtype=torch.float),
    ]
    for res, exp in zip(oh_actions, expected_result):
        assert torch.equal(res, exp)
예제 #3
0
def test_to_flat():
    # Both continuous and discrete
    aa = AgentAction(torch.tensor([[1.0, 1.0, 1.0]]),
                     [torch.tensor([2]), torch.tensor([1])])
    flattened_actions = aa.to_flat([3, 3])
    assert torch.eq(flattened_actions,
                    torch.tensor([[1, 1, 1, 0, 0, 1, 0, 1, 0]])).all()

    # Just continuous
    aa = AgentAction(torch.tensor([[1.0, 1.0, 1.0]]), None)
    flattened_actions = aa.to_flat([])
    assert torch.eq(flattened_actions, torch.tensor([1, 1, 1])).all()

    # Just discrete
    aa = AgentAction(torch.tensor([]), [torch.tensor([2]), torch.tensor([1])])
    flattened_actions = aa.to_flat([3, 3])
    assert torch.eq(flattened_actions, torch.tensor([0, 0, 1, 0, 1, 0])).all()
예제 #4
0
def test_categorical_dist_instance():
    torch.manual_seed(0)
    act_size = 4
    test_prob = torch.tensor([[1.0 - 0.1 * (act_size - 1)] + [0.1] *
                              (act_size - 1)])  # High prob for first action
    dist_instance = CategoricalDistInstance(test_prob)

    for _ in range(10):
        action = dist_instance.sample()
        assert action.shape == (1, 1)
        assert action < act_size

    # Make sure the first action as higher probability than the others.
    prob_first_action = dist_instance.log_prob(torch.tensor([0]))

    for i in range(1, act_size):
        assert dist_instance.log_prob(torch.tensor([i])) < prob_first_action
예제 #5
0
def test_list_to_tensor():
    # Test converting pure list
    unconverted_list = [[1.0, 2], [1, 3], [1, 4]]
    tensor = ModelUtils.list_to_tensor(unconverted_list)
    # Should be equivalent to torch.tensor conversion
    assert torch.equal(tensor, torch.tensor(unconverted_list))

    # Test converting pure numpy array
    np_list = np.asarray(unconverted_list)
    tensor = ModelUtils.list_to_tensor(np_list)
    # Should be equivalent to torch.tensor conversion
    assert torch.equal(tensor, torch.tensor(unconverted_list))

    # Test converting list of numpy arrays
    list_of_np = [np.asarray(_el) for _el in unconverted_list]
    tensor = ModelUtils.list_to_tensor(list_of_np)
    # Should be equivalent to torch.tensor conversion
    assert torch.equal(tensor, torch.tensor(unconverted_list, dtype=torch.float32))
예제 #6
0
def test_actor_critic(ac_type, lstm):
    obs_size = 4
    network_settings = NetworkSettings(
        memory=NetworkSettings.MemorySettings() if lstm else None,
        normalize=True)
    obs_spec = create_observation_specs_with_shapes([(obs_size, )])
    act_size = 2
    mask = torch.ones([1, act_size * 2])
    stream_names = [f"stream_name{n}" for n in range(4)]
    # action_spec = ActionSpec.create_continuous(act_size[0])
    action_spec = ActionSpec(act_size,
                             tuple(act_size for _ in range(act_size)))
    actor = ac_type(obs_spec, network_settings, action_spec, stream_names)
    if lstm:
        sample_obs = torch.ones(
            (1, network_settings.memory.sequence_length, obs_size))
        memories = torch.ones(
            (1, network_settings.memory.sequence_length, actor.memory_size))
    else:
        sample_obs = torch.ones((1, obs_size))
        memories = torch.tensor([])
        # memories isn't always set to None, the network should be able to
        # deal with that.
    # Test critic pass
    value_out, memories_out = actor.critic_pass([sample_obs],
                                                memories=memories)
    for stream in stream_names:
        if lstm:
            assert value_out[stream].shape == (
                network_settings.memory.sequence_length, )
            assert memories_out.shape == memories.shape
        else:
            assert value_out[stream].shape == (1, )

    # Test get action stats and_value
    action, log_probs, entropies, value_out, mem_out = actor.get_action_stats_and_value(
        [sample_obs], memories=memories, masks=mask)
    if lstm:
        assert action.continuous_tensor.shape == (64, 2)
    else:
        assert action.continuous_tensor.shape == (1, 2)

    assert len(action.discrete_list) == 2
    for _disc in action.discrete_list:
        if lstm:
            assert _disc.shape == (64, 1)
        else:
            assert _disc.shape == (1, 1)

    if mem_out is not None:
        assert mem_out.shape == memories.shape
    for stream in stream_names:
        if lstm:
            assert value_out[stream].shape == (
                network_settings.memory.sequence_length, )
        else:
            assert value_out[stream].shape == (1, )
예제 #7
0
def test_masked_mean():
    test_input = torch.tensor([1, 2, 3, 4, 5])
    masks = torch.ones_like(test_input).bool()
    mean = ModelUtils.masked_mean(test_input, masks=masks)
    assert mean == 3.0

    masks = torch.tensor([False, False, True, True, True])
    mean = ModelUtils.masked_mean(test_input, masks=masks)
    assert mean == 4.0

    # Make sure it works if all masks are off
    masks = torch.tensor([False, False, False, False, False])
    mean = ModelUtils.masked_mean(test_input, masks=masks)
    assert mean == 0.0

    # Make sure it works with 2d arrays of shape (mask_length, N)
    test_input = torch.tensor([1, 2, 3, 4, 5]).repeat(2, 1).T
    masks = torch.tensor([False, False, True, True, True])
    mean = ModelUtils.masked_mean(test_input, masks=masks)
    assert mean == 4.0
예제 #8
0
def test_continuous_action_prediction(behavior_spec: BehaviorSpec,
                                      seed: int) -> None:
    np.random.seed(seed)
    torch.manual_seed(seed)
    curiosity_settings = CuriositySettings(32, 0.1)
    curiosity_rp = CuriosityRewardProvider(behavior_spec, curiosity_settings)
    buffer = create_agent_buffer(behavior_spec, 5)
    for _ in range(200):
        curiosity_rp.update(buffer)
    prediction = curiosity_rp._network.predict_action(buffer)[0]
    target = torch.tensor(buffer["continuous_action"][0])
    error = torch.mean((prediction - target)**2).item()
    assert error < 0.001
예제 #9
0
 def compute_loss(
     self, policy_batch: AgentBuffer, expert_batch: AgentBuffer
 ) -> torch.Tensor:
     """
     Given a policy mini_batch and an expert mini_batch, computes the loss of the discriminator.
     """
     total_loss = torch.zeros(1)
     stats_dict: Dict[str, np.ndarray] = {}
     policy_estimate, policy_mu = self.compute_estimate(
         policy_batch, use_vail_noise=True
     )
     expert_estimate, expert_mu = self.compute_estimate(
         expert_batch, use_vail_noise=True
     )
     stats_dict["Policy/GAIL Policy Estimate"] = policy_estimate.mean().item()
     stats_dict["Policy/GAIL Expert Estimate"] = expert_estimate.mean().item()
     discriminator_loss = -(
         torch.log(expert_estimate + self.EPSILON)
         + torch.log(1.0 - policy_estimate + self.EPSILON)
     ).mean()
     stats_dict["Losses/GAIL Loss"] = discriminator_loss.item()
     total_loss += discriminator_loss
     if self._settings.use_vail:
         # KL divergence loss (encourage latent representation to be normal)
         kl_loss = torch.mean(
             -torch.sum(
                 1
                 + (self._z_sigma ** 2).log()
                 - 0.5 * expert_mu ** 2
                 - 0.5 * policy_mu ** 2
                 - (self._z_sigma ** 2),
                 dim=1,
             )
         )
         vail_loss = self._beta * (kl_loss - self.mutual_information)
         with torch.no_grad():
             self._beta.data = torch.max(
                 self._beta + self.alpha * (kl_loss - self.mutual_information),
                 torch.tensor(0.0),
             )
         total_loss += vail_loss
         stats_dict["Policy/GAIL Beta"] = self._beta.item()
         stats_dict["Losses/GAIL KL Loss"] = kl_loss.item()
     if self.gradient_penalty_weight > 0.0:
         gradient_magnitude_loss = (
             self.gradient_penalty_weight
             * self.compute_gradient_magnitude(policy_batch, expert_batch)
         )
         stats_dict["Policy/GAIL Grad Mag Loss"] = gradient_magnitude_loss.item()
         total_loss += gradient_magnitude_loss
     return total_loss, stats_dict
예제 #10
0
def test_get_probs_and_entropy():
    inp_size = 4
    act_size = 2
    action_model, masks = create_action_model(inp_size, act_size)

    _continuous_dist = GaussianDistInstance(torch.zeros((1, 2)),
                                            torch.ones((1, 2)))
    act_size = 2
    test_prob = torch.tensor([[1.0 - 0.1 * (act_size - 1)] + [0.1] *
                              (act_size - 1)])
    _discrete_dist_list = [
        CategoricalDistInstance(test_prob),
        CategoricalDistInstance(test_prob),
    ]
    dist_tuple = DistInstances(_continuous_dist, _discrete_dist_list)

    agent_action = AgentAction(torch.zeros(
        (1, 2)), [torch.tensor([0]), torch.tensor([1])])

    log_probs, entropies = action_model._get_probs_and_entropy(
        agent_action, dist_tuple)

    assert log_probs.continuous_tensor.shape == (1, 2)
    assert len(log_probs.discrete_list) == 2
    for _disc in log_probs.discrete_list:
        assert _disc.shape == (1, )
    assert len(log_probs.all_discrete_list) == 2
    for _disc in log_probs.all_discrete_list:
        assert _disc.shape == (1, 2)

    for clp in log_probs.continuous_tensor[0]:
        # Log prob of standard normal at 0
        assert clp == pytest.approx(-0.919, abs=0.01)

    assert log_probs.discrete_list[0] > log_probs.discrete_list[1]

    for ent, val in zip(entropies[0], [1.4189, 0.6191, 0.6191]):
        assert ent == pytest.approx(val, abs=0.01)
예제 #11
0
def test_get_probs_and_entropy():
    # Test continuous
    # Add two dists to the list. This isn't done in the code but we'd like to support it.
    dist_list = [
        GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
        GaussianDistInstance(torch.zeros((1, 2)), torch.ones((1, 2))),
    ]
    action_list = [torch.zeros((1, 2)), torch.zeros((1, 2))]
    log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
        action_list, dist_list
    )
    assert log_probs.shape == (1, 2, 2)
    assert entropies.shape == (1, 2, 2)
    assert all_probs is None

    for log_prob in log_probs.flatten():
        # Log prob of standard normal at 0
        assert log_prob == pytest.approx(-0.919, abs=0.01)

    for ent in entropies.flatten():
        # entropy of standard normal at 0
        assert ent == pytest.approx(1.42, abs=0.01)

    # Test continuous
    # Add two dists to the list.
    act_size = 2
    test_prob = torch.tensor(
        [[1.0 - 0.1 * (act_size - 1)] + [0.1] * (act_size - 1)]
    )  # High prob for first action
    dist_list = [CategoricalDistInstance(test_prob), CategoricalDistInstance(test_prob)]
    action_list = [torch.tensor([0]), torch.tensor([1])]
    log_probs, entropies, all_probs = ModelUtils.get_probs_and_entropy(
        action_list, dist_list
    )
    assert all_probs.shape == (1, len(dist_list * act_size))
    assert entropies.shape == (1, len(dist_list))
    # Make sure the first action has high probability than the others.
    assert log_probs.flatten()[0] > log_probs.flatten()[1]
예제 #12
0
def test_break_into_branches():
    # Test normal multi-branch case
    all_actions = torch.tensor([[1, 2, 3, 4, 5, 6]])
    action_size = [2, 1, 3]
    broken_actions = ModelUtils.break_into_branches(all_actions, action_size)
    assert len(action_size) == len(broken_actions)
    for i, _action in enumerate(broken_actions):
        assert _action.shape == (1, action_size[i])

    # Test 1-branch case
    action_size = [6]
    broken_actions = ModelUtils.break_into_branches(all_actions, action_size)
    assert len(broken_actions) == 1
    assert broken_actions[0].shape == (1, 6)
예제 #13
0
def test_normalizer():
    input_size = 2
    norm = Normalizer(input_size)

    # These three inputs should mean to 0.5, and variance 2
    # with the steps starting at 1
    vec_input1 = torch.tensor([[1, 1]])
    vec_input2 = torch.tensor([[1, 1]])
    vec_input3 = torch.tensor([[0, 0]])
    norm.update(vec_input1)
    norm.update(vec_input2)
    norm.update(vec_input3)

    # Test normalization
    for val in norm(vec_input1)[0]:
        assert val == pytest.approx(0.707, abs=0.001)

    # Test copy normalization
    norm2 = Normalizer(input_size)
    assert not compare_models(norm, norm2)
    norm2.copy_from(norm)
    assert compare_models(norm, norm2)
    for val in norm2(vec_input1)[0]:
        assert val == pytest.approx(0.707, abs=0.001)
예제 #14
0
    def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
        super().__init__()
        self._policy_specs = specs
        self._use_vail = settings.use_vail
        self._settings = settings

        state_encoder_settings = NetworkSettings(
            normalize=False,
            hidden_units=settings.encoding_size,
            num_layers=2,
            vis_encode_type=EncoderType.SIMPLE,
            memory=None,
        )
        self._state_encoder = NetworkBody(specs.observation_shapes,
                                          state_encoder_settings)

        self._action_flattener = ModelUtils.ActionFlattener(specs)

        encoder_input_size = settings.encoding_size
        if settings.use_actions:
            encoder_input_size += (self._action_flattener.flattened_size + 1
                                   )  # + 1 is for done

        self.encoder = torch.nn.Sequential(
            linear_layer(encoder_input_size, settings.encoding_size),
            Swish(),
            linear_layer(settings.encoding_size, settings.encoding_size),
            Swish(),
        )

        estimator_input_size = settings.encoding_size
        if settings.use_vail:
            estimator_input_size = self.z_size
            self._z_sigma = torch.nn.Parameter(torch.ones((self.z_size),
                                                          dtype=torch.float),
                                               requires_grad=True)
            self._z_mu_layer = linear_layer(
                settings.encoding_size,
                self.z_size,
                kernel_init=Initialization.KaimingHeNormal,
                kernel_gain=0.1,
            )
            self._beta = torch.nn.Parameter(torch.tensor(self.initial_beta,
                                                         dtype=torch.float),
                                            requires_grad=False)

        self._estimator = torch.nn.Sequential(
            linear_layer(estimator_input_size, 1), torch.nn.Sigmoid())
 def forward(self, mini_batch: AgentBuffer) -> torch.Tensor:
     n_vis = len(self._encoder.visual_processors)
     hidden, _ = self._encoder.forward(
         vec_inputs=[
             ModelUtils.list_to_tensor(mini_batch["vector_obs"],
                                       dtype=torch.float)
         ],
         vis_inputs=[
             ModelUtils.list_to_tensor(mini_batch["visual_obs%d" % i],
                                       dtype=torch.float)
             for i in range(n_vis)
         ],
     )
     self._encoder.update_normalization(
         torch.tensor(mini_batch["vector_obs"]))
     return hidden
예제 #16
0
def test_multi_categorical_distribution():
    torch.manual_seed(0)
    hidden_size = 16
    act_size = [3, 3, 4]
    sample_embedding = torch.ones((1, 16))
    gauss_dist = MultiCategoricalDistribution(hidden_size, act_size)

    # Make sure backprop works
    optimizer = torch.optim.Adam(gauss_dist.parameters(), lr=3e-3)

    def create_test_prob(size: int) -> torch.Tensor:
        test_prob = torch.tensor([[1.0 - 0.01 * (size - 1)] + [0.01] *
                                  (size - 1)])  # High prob for first action
        return test_prob.log()

    for _ in range(100):
        dist_insts = gauss_dist(sample_embedding,
                                masks=torch.ones((1, sum(act_size))))
        loss = 0
        for i, dist_inst in enumerate(dist_insts):
            assert isinstance(dist_inst, CategoricalDistInstance)
            log_prob = dist_inst.all_log_prob()
            test_log_prob = create_test_prob(act_size[i])
            # Force log_probs to match the high probability for the first action generated by
            # create_test_prob
            loss += torch.nn.functional.mse_loss(log_prob, test_log_prob)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    for dist_inst, size in zip(dist_insts, act_size):
        # Check that the log probs are close to the fake ones that we generated.
        test_log_probs = create_test_prob(size)
        for _prob, _test_prob in zip(
                dist_inst.all_log_prob().flatten().tolist(),
                test_log_probs.flatten().tolist(),
        ):
            assert _prob == pytest.approx(_test_prob, abs=0.1)

    # Test masks
    masks = []
    for branch in act_size:
        masks += [0] * (branch - 1) + [1]
    masks = torch.tensor([masks])
    dist_insts = gauss_dist(sample_embedding, masks=masks)
    for dist_inst in dist_insts:
        log_prob = dist_inst.all_log_prob()
        assert log_prob.flatten()[-1] == pytest.approx(0, abs=0.001)
예제 #17
0
def test_actor_critic(ac_type, lstm):
    obs_size = 4
    network_settings = NetworkSettings(
        memory=NetworkSettings.MemorySettings() if lstm else None)
    obs_shapes = [(obs_size, )]
    act_size = [2]
    stream_names = [f"stream_name{n}" for n in range(4)]
    action_spec = ActionSpec.create_continuous(act_size[0])
    actor = ac_type(obs_shapes, network_settings, action_spec, stream_names)
    if lstm:
        sample_obs = torch.ones(
            (1, network_settings.memory.sequence_length, obs_size))
        memories = torch.ones(
            (1, network_settings.memory.sequence_length, actor.memory_size))
    else:
        sample_obs = torch.ones((1, obs_size))
        memories = torch.tensor([])
        # memories isn't always set to None, the network should be able to
        # deal with that.
    # Test critic pass
    value_out, memories_out = actor.critic_pass([sample_obs], [],
                                                memories=memories)
    for stream in stream_names:
        if lstm:
            assert value_out[stream].shape == (
                network_settings.memory.sequence_length, )
            assert memories_out.shape == memories.shape
        else:
            assert value_out[stream].shape == (1, )

    # Test get_dist_and_value
    dists, value_out, mem_out = actor.get_dist_and_value([sample_obs], [],
                                                         memories=memories)
    if mem_out is not None:
        assert mem_out.shape == memories.shape
    for dist in dists:
        assert isinstance(dist, GaussianDistInstance)
    for stream in stream_names:
        if lstm:
            assert value_out[stream].shape == (
                network_settings.memory.sequence_length, )
        else:
            assert value_out[stream].shape == (1, )
예제 #18
0
def test_simple_actor(use_discrete):
    obs_size = 4
    network_settings = NetworkSettings()
    obs_shapes = [(obs_size, )]
    act_size = [2]
    if use_discrete:
        masks = torch.ones((1, 1))
        action_spec = ActionSpec.create_discrete(tuple(act_size))
    else:
        masks = None
        action_spec = ActionSpec.create_continuous(act_size[0])
    actor = SimpleActor(obs_shapes, network_settings, action_spec)
    # Test get_dist
    sample_obs = torch.ones((1, obs_size))
    dists, _ = actor.get_dists([sample_obs], [], masks=masks)
    for dist in dists:
        if use_discrete:
            assert isinstance(dist, CategoricalDistInstance)
        else:
            assert isinstance(dist, GaussianDistInstance)

    # Test sample_actions
    actions = actor.sample_action(dists)
    for act in actions:
        if use_discrete:
            assert act.shape == (1, 1)
        else:
            assert act.shape == (1, act_size[0])

    # Test forward
    actions, ver_num, mem_size, is_cont, act_size_vec = actor.forward(
        [sample_obs], [], masks=masks)
    for act in actions:
        # This is different from above for ONNX export
        if use_discrete:
            assert act.shape == tuple(act_size)
        else:
            assert act.shape == (act_size[0], 1)

    assert mem_size == 0
    assert is_cont == int(not use_discrete)
    assert act_size_vec == torch.tensor(act_size)
예제 #19
0
 def __init__(self, policy: TorchPolicy, trainer_settings: TrainerSettings):
     super().__init__()
     self.policy = policy
     self.trainer_settings = trainer_settings
     self.update_dict: Dict[str, torch.Tensor] = {}
     self.value_heads: Dict[str, torch.Tensor] = {}
     self.memory_in: torch.Tensor = None
     self.memory_out: torch.Tensor = None
     self.m_size: int = 0
     self.global_step = torch.tensor(0)
     self.bc_module: Optional[BCModule] = None
     self.create_reward_signals(trainer_settings.reward_signals)
     if trainer_settings.behavioral_cloning is not None:
         self.bc_module = BCModule(
             self.policy,
             trainer_settings.behavioral_cloning,
             policy_learning_rate=trainer_settings.hyperparameters.learning_rate,
             default_batch_size=trainer_settings.hyperparameters.batch_size,
             default_num_epoch=3,
         )
예제 #20
0
def test_simple_actor(action_type):
    obs_size = 4
    network_settings = NetworkSettings()
    obs_shapes = [(obs_size, )]
    act_size = [2]
    masks = None if action_type == ActionType.CONTINUOUS else torch.ones(
        (1, 1))
    actor = SimpleActor(obs_shapes, network_settings, action_type, act_size)
    # Test get_dist
    sample_obs = torch.ones((1, obs_size))
    dists, _ = actor.get_dists([sample_obs], [], masks=masks)
    for dist in dists:
        if action_type == ActionType.CONTINUOUS:
            assert isinstance(dist, GaussianDistInstance)
        else:
            assert isinstance(dist, CategoricalDistInstance)

    # Test sample_actions
    actions = actor.sample_action(dists)
    for act in actions:
        if action_type == ActionType.CONTINUOUS:
            assert act.shape == (1, act_size[0])
        else:
            assert act.shape == (1, 1)

    # Test forward
    actions, ver_num, mem_size, is_cont, act_size_vec = actor.forward(
        [sample_obs], [], masks=masks)
    for act in actions:
        # This is different from above for ONNX export
        if action_type == ActionType.CONTINUOUS:
            assert act.shape == (act_size[0], 1)
        else:
            assert act.shape == tuple(act_size)

    assert mem_size == 0
    assert is_cont == int(action_type == ActionType.CONTINUOUS)
    assert act_size_vec == torch.tensor(act_size)
예제 #21
0
    def __init__(self, specs: BehaviorSpec, settings: GAILSettings) -> None:
        super().__init__()
        self._use_vail = settings.use_vail
        self._settings = settings

        encoder_settings = settings.network_settings
        if encoder_settings.memory is not None:
            encoder_settings.memory = None
            logger.warning(
                "memory was specified in network_settings but is not supported by GAIL. It is being ignored."
            )

        self._action_flattener = ActionFlattener(specs.action_spec)
        unencoded_size = (self._action_flattener.flattened_size +
                          1 if settings.use_actions else 0)  # +1 is for dones
        self.encoder = NetworkBody(specs.observation_specs, encoder_settings,
                                   unencoded_size)

        estimator_input_size = encoder_settings.hidden_units
        if settings.use_vail:
            estimator_input_size = self.z_size
            self._z_sigma = torch.nn.Parameter(torch.ones((self.z_size),
                                                          dtype=torch.float),
                                               requires_grad=True)
            self._z_mu_layer = linear_layer(
                encoder_settings.hidden_units,
                self.z_size,
                kernel_init=Initialization.KaimingHeNormal,
                kernel_gain=0.1,
            )
            self._beta = torch.nn.Parameter(torch.tensor(self.initial_beta,
                                                         dtype=torch.float),
                                            requires_grad=False)

        self._estimator = torch.nn.Sequential(
            linear_layer(estimator_input_size, 1, kernel_gain=0.2),
            torch.nn.Sigmoid())
예제 #22
0
 def create_test_prob(size: int) -> torch.Tensor:
     test_prob = torch.tensor([[1.0 - 0.01 * (size - 1)] + [0.01] *
                               (size - 1)])  # High prob for first action
     return test_prob.log()
예제 #23
0
 def __init__(self, vec_obs_size: int):
     super().__init__()
     self.register_buffer("normalization_steps", torch.tensor(1))
     self.register_buffer("running_mean", torch.zeros(vec_obs_size))
     self.register_buffer("running_variance", torch.ones(vec_obs_size))