def __init__(self, prior: np.ndarray= None): """ @ Args prior: a (n, 2) ndarray with beta priors. prior[:,0] is the first concentration If given, `n` has to be the same as number of valid actions """ prior = torch.tensor(prior, dtype= torch.float32) save__init__args(locals())
def __init__(self, win_probs: List[float], ): """ @ Args: win_probs: telling the winning probablilty of each arm, which also gives the number of arms """ win_probs = np.array(win_probs) save__init__args(locals(), underscore=True) self._action_space = IntBox(0, len(self._win_probs)) self._observation_space = IntBox(0, 1) # This serves no purpose, just to meet the interface self.BanditEnvInfo = namedtuple("BanditEnvInfo", [*BanditEnvInfoBase._fields] + ["arm{}".format(i) for i in range(len(win_probs))])
def __init__( self, EnvCls, env_kwargs, traj_len: int = 1, batch_size: int = 1, **kwargs, ): """ @ Args: traj_len / T: maximum transitions in one sample trajectory. If under some circumstance not reached, reset of them will be filled with zeros. batch_size / B: the batch size of one sample operation. """ save__init__args(locals())
def __init__( self, algo: AlgoBase, agent: AgentBase, sampler: SamplerBase, affinity, max_train_epochs: int, log_interval: int = 1, **kwargs, ): """ @ Args max_train_epochs: The maximum number of training epoches, One epoch is one-time of calling algo.train() log_interval: The interval of actually logging into file affinity: Incase you run multiple experiment on one machine """ self.algo = algo self.agent = agent self.sampler = sampler save__init__args(locals())
def __init__(self, c= 1.0): """ As UCB algorithm described, you need to provide a factor c """ save__init__args(locals())
def __init__(self, epsilon): save__init__args(locals())
def __init__(self, random_init= False, beta= 1.0, # coefficient for likelyhood b: float= None, # if None, constantly update baseline; or keep baseline constant ): save__init__args(locals())