def playArm(self, env, t):
        assert isinstance(env, UnimodalEnvironment
                          ), "not unimodal environment for unimodal policy"
        nb_arms = env.nb_arms

        if t < nb_arms:
            arm_t = env.get_arm_idx(t)
            leader_t = None
        else:
            list_arm_mu_hat = [(arm, arm.mu_hat) for arm in env.list_of_arms]
            leader_t = tools.best_arm(list_arm_mu_hat)

            if leader_t.nb_times_drawn % self.draw_leader_every == 0:
                arm_t = leader_t

            else:
                if not leader_t.neighbors:
                    env.set_neighbors(leader_t)

                theta_neighbors = []
                for cur_neighbor_arm in leader_t.neighbors:
                    theta_neighbors.append(
                        (cur_neighbor_arm, self.draw_theta(cur_neighbor_arm)))
                arm_t = tools.best_arm(theta_neighbors)

        reward_t = arm_t.draw(t)

        return arm_t, reward_t, leader_t
Esempio n. 2
0
    def playArm(self, env, t):
        #print("list_dle = ", self.list_dle)
        assert isinstance(env, UnimodalEnvironment
                          ), "not unimodal environment for unimodal policy"

        if t < env.nb_arms:
            arm_t = env.get_arm_idx(t)
            leader_t = None
        else:
            list_arm_mu_hat = [(arm, arm.mu_hat) for arm in env.list_of_arms]
            leader_t = tools.best_arm(list_arm_mu_hat)

            if leader_t.dle_alpha in self.list_dle:
                #print("draw leader", leader_t.dle_alpha)
                arm_t = leader_t
            else:
                if not leader_t.neighbors:
                    env.set_neighbors(leader_t)

                theta_neighbors = []
                for cur_neighbor_arm in leader_t.neighbors:
                    theta_neighbors.append(
                        (cur_neighbor_arm, self.draw_theta(cur_neighbor_arm)))
                arm_t = tools.best_arm(theta_neighbors)

            leader_t.dle_alpha += 1
            if leader_t.dle_alpha == self.y_gamma:
                leader_t.dle_alpha = 0

        reward_t = arm_t.draw(t)

        return arm_t, reward_t, leader_t
Esempio n. 3
0
	def playArm(self,
				env,
				#mu_hat_history,
				t):
		assert isinstance(env, UnimodalEnvironment), "not unimodal environments"
		nb_arms = env.nb_arms

		if t < nb_arms:
			arm_t = env.get_arm_idx(t)
			leader_t = None
		else:
			list_arm_mu_hat = [(arm, arm.mu_hat) for arm in env.list_of_arms]
			leader_t = tools.best_arm(list_arm_mu_hat)
			leader_t.nb_times_leader += 1

			if self.draw_leader_every!='no_leader' and leader_t.nb_times_leader%self.draw_leader_every == 0:
				arm_t = leader_t

			else:
				if not leader_t.neighbors:
					env.set_neighbors(leader_t)

				UCB_neighbors_idx = []
				for cur_neighbor_arm in leader_t.neighbors:
					UCB_neighbors_idx.append((cur_neighbor_arm, self.KLUCB_idx(cur_neighbor_arm, t)))
				arm_t = tools.best_arm(UCB_neighbors_idx)
				


		reward_t = arm_t.draw(t)

		return arm_t, reward_t, leader_t
Esempio n. 4
0
    def playArm(self, env, t):
        nb_arms = env.nb_arms

        if t < nb_arms:
            arm_t = env.get_arm_idx(t)
        else:
            list_arm_UCB = [(arm, self.UCB_idx(arm, t))
                            for arm in env.list_of_arms]
            arm_t = tools.best_arm(list_arm_UCB)

        reward_t = arm_t.draw(t)

        return arm_t, reward_t
Esempio n. 5
0
	def playArm(self,
				env,
				t):
		nb_arms = env.nb_arms

		if t < nb_arms:
			arm_t = env.get_arm_idx(t)
		else:
			list_arm_theta = [(arm, self.draw_theta(arm)) for arm in env.list_of_arms]
			arm_t = tools.best_arm(list_arm_theta)


		reward_t = arm_t.draw(t)

		return arm_t, reward_t, leader_t
    def playArm(
            self,
            env,
            #mu_hat_history,
            t):
        assert isinstance(env,
                          UnimodalEnvironment), "not unimodal environments"
        nb_arms = env.nb_arms

        if t < nb_arms:
            arm_t = env.get_arm_idx(t)
            leader_t = None
        else:
            list_arm_mu_hat = [(arm, arm.mu_hat) for arm in env.list_of_arms]
            leader_t = tools.best_arm(list_arm_mu_hat)
            print(f"Time {t}, leader is {leader_t.idx}, {leader_t.idx_pair}")

            if leader_t.nb_times_drawn % self.draw_leader_every == 0:
                arm_t = leader_t

            else:
                if not leader_t.neighbors:
                    env.set_neighbors(leader_t)

                UCB_neighbors_idx = []
                for cur_neighbor_arm in leader_t.neighbors:
                    print("klucb", self.UCB1_idx(cur_neighbor_arm, t), "arm ",
                          cur_neighbor_arm.idx)
                    UCB_neighbors_idx.append(
                        (cur_neighbor_arm, self.KLUCB_idx(cur_neighbor_arm,
                                                          t)))
                arm_t = tools.best_arm(UCB_neighbors_idx)

        reward_t = arm_t.draw(t)

        return arm_t, reward_t, leader_t