def playArm(self, env, t): assert isinstance(env, UnimodalEnvironment ), "not unimodal environment for unimodal policy" nb_arms = env.nb_arms if t < nb_arms: arm_t = env.get_arm_idx(t) leader_t = None else: list_arm_mu_hat = [(arm, arm.mu_hat) for arm in env.list_of_arms] leader_t = tools.best_arm(list_arm_mu_hat) if leader_t.nb_times_drawn % self.draw_leader_every == 0: arm_t = leader_t else: if not leader_t.neighbors: env.set_neighbors(leader_t) theta_neighbors = [] for cur_neighbor_arm in leader_t.neighbors: theta_neighbors.append( (cur_neighbor_arm, self.draw_theta(cur_neighbor_arm))) arm_t = tools.best_arm(theta_neighbors) reward_t = arm_t.draw(t) return arm_t, reward_t, leader_t
def playArm(self, env, t): #print("list_dle = ", self.list_dle) assert isinstance(env, UnimodalEnvironment ), "not unimodal environment for unimodal policy" if t < env.nb_arms: arm_t = env.get_arm_idx(t) leader_t = None else: list_arm_mu_hat = [(arm, arm.mu_hat) for arm in env.list_of_arms] leader_t = tools.best_arm(list_arm_mu_hat) if leader_t.dle_alpha in self.list_dle: #print("draw leader", leader_t.dle_alpha) arm_t = leader_t else: if not leader_t.neighbors: env.set_neighbors(leader_t) theta_neighbors = [] for cur_neighbor_arm in leader_t.neighbors: theta_neighbors.append( (cur_neighbor_arm, self.draw_theta(cur_neighbor_arm))) arm_t = tools.best_arm(theta_neighbors) leader_t.dle_alpha += 1 if leader_t.dle_alpha == self.y_gamma: leader_t.dle_alpha = 0 reward_t = arm_t.draw(t) return arm_t, reward_t, leader_t
def playArm(self, env, #mu_hat_history, t): assert isinstance(env, UnimodalEnvironment), "not unimodal environments" nb_arms = env.nb_arms if t < nb_arms: arm_t = env.get_arm_idx(t) leader_t = None else: list_arm_mu_hat = [(arm, arm.mu_hat) for arm in env.list_of_arms] leader_t = tools.best_arm(list_arm_mu_hat) leader_t.nb_times_leader += 1 if self.draw_leader_every!='no_leader' and leader_t.nb_times_leader%self.draw_leader_every == 0: arm_t = leader_t else: if not leader_t.neighbors: env.set_neighbors(leader_t) UCB_neighbors_idx = [] for cur_neighbor_arm in leader_t.neighbors: UCB_neighbors_idx.append((cur_neighbor_arm, self.KLUCB_idx(cur_neighbor_arm, t))) arm_t = tools.best_arm(UCB_neighbors_idx) reward_t = arm_t.draw(t) return arm_t, reward_t, leader_t
def playArm(self, env, t): nb_arms = env.nb_arms if t < nb_arms: arm_t = env.get_arm_idx(t) else: list_arm_UCB = [(arm, self.UCB_idx(arm, t)) for arm in env.list_of_arms] arm_t = tools.best_arm(list_arm_UCB) reward_t = arm_t.draw(t) return arm_t, reward_t
def playArm(self, env, t): nb_arms = env.nb_arms if t < nb_arms: arm_t = env.get_arm_idx(t) else: list_arm_theta = [(arm, self.draw_theta(arm)) for arm in env.list_of_arms] arm_t = tools.best_arm(list_arm_theta) reward_t = arm_t.draw(t) return arm_t, reward_t, leader_t
def playArm( self, env, #mu_hat_history, t): assert isinstance(env, UnimodalEnvironment), "not unimodal environments" nb_arms = env.nb_arms if t < nb_arms: arm_t = env.get_arm_idx(t) leader_t = None else: list_arm_mu_hat = [(arm, arm.mu_hat) for arm in env.list_of_arms] leader_t = tools.best_arm(list_arm_mu_hat) print(f"Time {t}, leader is {leader_t.idx}, {leader_t.idx_pair}") if leader_t.nb_times_drawn % self.draw_leader_every == 0: arm_t = leader_t else: if not leader_t.neighbors: env.set_neighbors(leader_t) UCB_neighbors_idx = [] for cur_neighbor_arm in leader_t.neighbors: print("klucb", self.UCB1_idx(cur_neighbor_arm, t), "arm ", cur_neighbor_arm.idx) UCB_neighbors_idx.append( (cur_neighbor_arm, self.KLUCB_idx(cur_neighbor_arm, t))) arm_t = tools.best_arm(UCB_neighbors_idx) reward_t = arm_t.draw(t) return arm_t, reward_t, leader_t