def __init__(self, env_spec: EnvSpec, batch_size: int, reward_multiplier: float, lr: float = 3e-3, logger: StepLogger = None, device: str = 'cuda' if to.cuda.is_available() else 'cpu'): """ Constructor :param env_spec: environment specification :param batch_size: batch size for each update step :param reward_multiplier: factor for the predicted probability :param lr: learning rate :param logger: logger for every step of the algorithm, if `None` the default logger will be created """ self.device = device self.batch_size = batch_size self.reward_multiplier = reward_multiplier self.lr = lr spec = EnvSpec(obs_space=BoxSpace.cat( [env_spec.obs_space, env_spec.obs_space, env_spec.act_space]), act_space=BoxSpace(bound_lo=[0], bound_up=[1])) self.discriminator = FNNPolicy(spec=spec, hidden_nonlin=to.tanh, hidden_sizes=[62], output_nonlin=to.sigmoid) self.loss_fcn = nn.BCELoss() self.optimizer = to.optim.Adam(self.discriminator.parameters(), lr=lr, eps=1e-5) self.logger = logger
def test_cat_box_space(bs_list): bs_cat = BoxSpace.cat(bs_list) assert isinstance(bs_cat, BoxSpace) assert bs_cat.flat_dim == sum([bs.flat_dim for bs in bs_list])
def obs_space(self): outer_space = self.wrapped_env.obs_space augmented_space = BoxSpace(0.5*self._nominal, 1.5*self._nominal, [self._nominal.shape[0]], self._params) return BoxSpace.cat((outer_space, augmented_space))