def __init__(self, env, **kwargs): def index(r, t): return np.mean(r) + np.sqrt(np.log(t**2) / (2 * len(r))) IndexAgent.__init__(self, env, index, **kwargs) self.env = WriterWrapper( self.env, self.writer, write_scalar="action_and_reward" )
def __init__(self, env, **kwargs): def index(r, t): Na = len(r) return np.mean(r) + np.sqrt(A / Na * max(0, np.log(T / (A * Na)))) IndexAgent.__init__(self, env, index, **kwargs) self.env = WriterWrapper( self.env, self.writer, write_scalar="action_and_reward" )
def __init__(self, env, m=20, **kwargs): def index(r, t): if t < m * A: index = -len(r) # select an action pulled the least else: index = np.mean(r, axis=0) return index IndexAgent.__init__(self, env, index, **kwargs) self.env = WriterWrapper( self.env, self.writer, write_scalar="action_and_reward" )
def __init__(self, env, B=1, **kwargs): def stat_function(stat, Na, action, reward): # The statistic is the empirical mean. We compute it recursively. if stat is None: stat = np.zeros(len(Na)) stat[action] = (Na[action] - 1 ) / Na[action] * stat[action] + reward / Na[action] return stat def index(stat, Na, t): return stat + B * np.sqrt(2 * np.log(t**2) / Na) RecursiveIndexAgent.__init__(self, env, stat_function, index, **kwargs) self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
def __init__(self, env, **kwargs): def index(r, p, t): return np.sum(1 - (1 - r) / p) def prob(indices, t): eta = np.minimum(np.sqrt(self.n_arms * np.log(self.n_arms) / (t + 1)), 1.0) w = np.exp(eta * indices) w /= w.sum() return (1 - eta) * w + eta * np.ones(self.n_arms) / self.n_arms RandomizedAgent.__init__(self, env, index, prob, **kwargs) self.env = WriterWrapper( self.env, self.writer, write_scalar="action_and_reward" )
def __init__(self, env, **kwargs): UCBVIAgent.__init__(self, env, **kwargs) self.env = WriterWrapper(self.env, self.writer, write_scalar="invalid")
def __init__(self, env, **kwargs): TSAgent.__init__(self, env, "beta", **kwargs) self.env = WriterWrapper(self.env, self.writer, write_scalar="action")
def __init__(self, env, **kwargs): UCBVIAgent.__init__(self, env, horizon=50, **kwargs) self.env = WriterWrapper(self.env, self.writer, write_scalar="reward")
def __init__(self, env, **kwargs): RecursiveIndexAgent.__init__(self, env, **kwargs) # default is UCB for Bernoulli. self.env = WriterWrapper(self.env, self.writer, write_scalar="action")