def __init__(self, observation_space, action_space, net, rew_func, n_samples=1000, horizon=20, mean_obs=0., std_obs=1., mean_acs=0., std_acs=1., rnn=False, normalize_ac=True): BasePol.__init__(self, observation_space, action_space, net, rnn=rnn, normalize_ac=normalize_ac) self.rew_func = rew_func self.n_samples = n_samples self.horizon = horizon self.to(get_device()) self.mean_obs = torch.tensor(mean_obs, dtype=torch.float).repeat(n_samples, 1) self.std_obs = torch.tensor(std_obs, dtype=torch.float).repeat(n_samples, 1) self.mean_acs = torch.tensor(mean_acs, dtype=torch.float).repeat(n_samples, 1) self.std_acs = torch.tensor(std_acs, dtype=torch.float).repeat(n_samples, 1)
def __init__(self, ob_space, ac_space, qfunc, rnn=False, normalize_ac=True, data_parallel=False, parallel_dim=0, eps=0.2): BasePol.__init__(self, ob_space, ac_space, None, rnn, normalize_ac, data_parallel, parallel_dim) self.qfunc = qfunc self.eps = eps self.a_i_shape = (1, ) self.to(get_device())
def __init__(self, observation_space, action_space, net, normalize_ac=True): BasePol.__init__(self, observation_space, action_space, normalize_ac) self.net = net self.pd = MixtureGaussianPd() self.to(get_device())
def __init__(self, observation_space, action_space, net, rnn=False, normalize_ac=True): BasePol.__init__(self, observation_space, action_space, net, rnn, normalize_ac) self.pd = GaussianPd() self.to(get_device())
def __init__(self, observation_space, action_space, net, rnn=False, normalize_ac=True): BasePol.__init__(self, observation_space, action_space, net, rnn, normalize_ac) self.pd = MultiCategoricalPd() self.to(get_device())
def __init__(self, observation_space, action_space, net, rnn=False, normalize_ac=True, data_parallel=False, parallel_dim=0): BasePol.__init__(self, observation_space, action_space, net, rnn, normalize_ac, data_parallel, parallel_dim) self.pd = GaussianPd() self.to(get_device())
def __init__(self, ob_space, ac_space, net, rnn=False, normalize_ac=True, data_parallel=False, parallel_dim=0): BasePol.__init__(self, ob_space, ac_space, net, rnn, normalize_ac, data_parallel, parallel_dim) self.pd = MultiCategoricalPd() self.to(get_device())
def __init__(self, observation_space, action_space, qfunc, rnn=False, normalize_ac=True, eps=0.2): BasePol.__init__(self, observation_space, action_space, None, rnn, normalize_ac) self.qfunc = qfunc self.eps = eps self.a_i_shape = (1, ) self.to(get_device())
def __init__(self, observation_space, action_space, net=None, rnn=False, normalize_ac=True, data_parallel=False, parallel_dim=0): BasePol.__init__(self, observation_space, action_space, net, rnn=rnn, normalize_ac=normalize_ac, data_parallel=data_parallel, parallel_dim=parallel_dim)
def __init__(self, ob_space, ac_space, net, rew_func, n_samples=1000, horizon=20, mean_obs=0., std_obs=1., mean_acs=0., std_acs=1., rnn=False, normalize_ac=True, data_parallel=False, parallel_dim=0): BasePol.__init__(self, ob_space, ac_space, net, rnn=rnn, normalize_ac=normalize_ac, data_parallel=data_parallel, parallel_dim=parallel_dim) self.rew_func = rew_func self.n_samples = n_samples self.horizon = horizon self.to(get_device()) self.mean_obs = torch.tensor( mean_obs, dtype=torch.float).repeat(n_samples, 1) self.std_obs = torch.tensor( std_obs, dtype=torch.float).repeat(n_samples, 1) self.mean_acs = torch.tensor( mean_acs, dtype=torch.float).repeat(n_samples, 1) self.std_acs = torch.tensor( std_acs, dtype=torch.float).repeat(n_samples, 1)
def __init__(self, observation_space, action_space, net, noise=None, rnn=False, normalize_ac=True): if rnn: raise ValueError( 'rnn with DeterministicActionNoisePol is not supported now') BasePol.__init__(self, observation_space, action_space, net, rnn=rnn, normalize_ac=normalize_ac) self.noise = noise self.pd = DeterministicPd() self.to(get_device())
def __init__(self, ob_space, ac_space, net, noise=None, rnn=False, normalize_ac=True, data_parallel=False, parallel_dim=0): if rnn: raise ValueError( 'rnn with DeterministicActionNoisePol is not supported now') BasePol.__init__(self, ob_space, ac_space, net, rnn=rnn, normalize_ac=normalize_ac, data_parallel=data_parallel, parallel_dim=parallel_dim) self.noise = noise self.pd = DeterministicPd() self.to(get_device())
def __init__(self, observation_space, action_space, net=None, rnn=False, normalize_ac=True): BasePol.__init__(self, observation_space, action_space, net, rnn=rnn, normalize_ac=normalize_ac)