Esempio n. 1
0
    def __init__(self,
                 env_spec: EnvSpec,
                 batch_size: int,
                 reward_multiplier: float,
                 lr: float = 3e-3,
                 logger: StepLogger = None,
                 device: str = 'cuda' if to.cuda.is_available() else 'cpu'):
        """
        Constructor

        :param env_spec: environment specification
        :param batch_size: batch size for each update step
        :param reward_multiplier: factor for the predicted probability
        :param lr: learning rate
        :param logger: logger for every step of the algorithm, if `None` the default logger will be created
        """
        self.device = device
        self.batch_size = batch_size
        self.reward_multiplier = reward_multiplier
        self.lr = lr
        spec = EnvSpec(obs_space=BoxSpace.cat(
            [env_spec.obs_space, env_spec.obs_space, env_spec.act_space]),
                       act_space=BoxSpace(bound_lo=[0], bound_up=[1]))
        self.discriminator = FNNPolicy(spec=spec,
                                       hidden_nonlin=to.tanh,
                                       hidden_sizes=[62],
                                       output_nonlin=to.sigmoid)
        self.loss_fcn = nn.BCELoss()
        self.optimizer = to.optim.Adam(self.discriminator.parameters(),
                                       lr=lr,
                                       eps=1e-5)
        self.logger = logger
Esempio n. 2
0
def test_cat_box_space(bs_list):
    bs_cat = BoxSpace.cat(bs_list)
    assert isinstance(bs_cat, BoxSpace)
    assert bs_cat.flat_dim == sum([bs.flat_dim for bs in bs_list])
Esempio n. 3
0
 def obs_space(self):
     outer_space = self.wrapped_env.obs_space
     augmented_space = BoxSpace(0.5*self._nominal, 1.5*self._nominal, [self._nominal.shape[0]], self._params)
     return BoxSpace.cat((outer_space, augmented_space))