Exemple #1
0
 def __init__(self, prior: np.ndarray= None):
     """
     @ Args
         prior: a (n, 2) ndarray with beta priors. prior[:,0] is the first concentration
             If given, `n` has to be the same as number of valid actions
     """
     prior = torch.tensor(prior, dtype= torch.float32)
     save__init__args(locals())
Exemple #2
0
 def __init__(self,
         win_probs: List[float],
     ):
     """
     @ Args:
         win_probs: telling the winning probablilty of each arm, which also gives the number of arms
     """
     win_probs = np.array(win_probs)
     save__init__args(locals(), underscore=True)
     self._action_space = IntBox(0, len(self._win_probs))
     self._observation_space = IntBox(0, 1) # This serves no purpose, just to meet the interface
     self.BanditEnvInfo = namedtuple("BanditEnvInfo",
         [*BanditEnvInfoBase._fields] + ["arm{}".format(i) for i in range(len(win_probs))])
Exemple #3
0
 def __init__(
     self,
     EnvCls,
     env_kwargs,
     traj_len: int = 1,
     batch_size: int = 1,
     **kwargs,
 ):
     """
     @ Args:
         traj_len / T: maximum transitions in one sample trajectory. 
             If under some circumstance not reached, reset of them will be filled with zeros.
         batch_size / B: the batch size of one sample operation.
     """
     save__init__args(locals())
Exemple #4
0
 def __init__(
     self,
     algo: AlgoBase,
     agent: AgentBase,
     sampler: SamplerBase,
     affinity,
     max_train_epochs: int,
     log_interval: int = 1,
     **kwargs,
 ):
     """
     @ Args
         max_train_epochs: The maximum number of training epoches,
             One epoch is one-time of calling algo.train()
         log_interval: The interval of actually logging into file
         affinity: Incase you run multiple experiment on one machine
     """
     self.algo = algo
     self.agent = agent
     self.sampler = sampler
     save__init__args(locals())
Exemple #5
0
 def __init__(self, c= 1.0):
     """ As UCB algorithm described, you need to provide a factor c
     """
     save__init__args(locals())
Exemple #6
0
 def __init__(self, epsilon):
     save__init__args(locals())
Exemple #7
0
 def __init__(self,
         random_init= False,
         beta= 1.0, # coefficient for likelyhood
         b: float= None, # if None, constantly update baseline; or keep baseline constant
     ):
     save__init__args(locals())