Beispiel #1
0
 def __init__(self, args):
     """
     我们人为生成一些上下文来模拟
     :param args: 臂个数参数,以及各个臂的穿越参数等
     """
     Policy.__init__(self, args)
     self.alpha = args[1]
     self.travel_args = args[2:]  # 穿越过来的臂均值参数
     self.d = 3  # 上下文维度
     self.A = np.array([np.identity(self.d) for _ in range(self.n_bandits)])
     self.b = np.array([np.zeros(self.d) for _ in range(self.n_bandits)])
     self.context = None
Beispiel #2
0
 def __init__(self, args):
     Policy.__init__(self, args)
     self.try_perSlot = int(args[1])
Beispiel #3
0
 def __init__(self, args):
     Policy.__init__(self, args)
     self.temperature = args[1]  # 降火参数,温度越高,分子越随机,成为气体;低温的时候有序排列,成为固体
     self.anneal = args[2] > 0.0
Beispiel #4
0
 def __init__(self, args):
     Policy.__init__(self, args)
Beispiel #5
0
 def __init__(self, args):
     Policy.__init__(self, args)
     self.anneal = args[2] > 0.0
     self.decay = args[1]
Beispiel #6
0
 def __init__(self, args):
     Policy.__init__(self, args)
     # 记录每个臂的beta分布参数
     self.betaArgs = [[args[1], args[2]] for _ in range(self.n_bandits)]
Beispiel #7
0
 def __init__(self, args):
     Policy.__init__(self, args)
     self.gamma = args[1]
     self._weights = np.array([1] * self.n_bandits)
     self._probs = None
Beispiel #8
0
 def __init__(self, args):
     Policy.__init__(self, args)
     self.squared_reward = [0.0] * self.n_bandits