Esempio n. 1
0
    def plot_suboptimal_arm(self):

        self.ph.initiate_figure("#Pulls of sub-optimal arms vs Time",
                                "Time T",
                                "#Pulls",
                                x_log=False,
                                y_log=True)

        for col in range(self.K):  # For each arm,
            theoretical_bound = self.theoretical_bounds_arm_pulls[:, col]

            if col != self.best_arm:
                self.ph.add_curve(theoretical_bound,
                                  mh.stringify(self.true_means[col]) + " Theo",
                                  col, 0)

            for i in range(self.algo_count):  # For each bandit algorithm,

                empirical_pulls = self.cum_pulls[i][:, col]
                self.ph.add_curve(
                    empirical_pulls,
                    mh.stringify(self.true_means[col]) +
                    self.algorithms_to_run[i][0], col, i + 1)

        self.ph.plot_curves()
Esempio n. 2
0
    def analyse_suboptimal_arm_pulls(self):
        # Compute deltas and theoretical upper bound of playing each sub-optimal arm.
        self.best_arm = mh.get_maximum_index(self.true_means)
        mean_of_best_arm = self.true_means[self.best_arm]

        for i in range(self.K):
            self.deltas[i] = mean_of_best_arm - self.true_means[i]

        del_sq_invs = mh.get_instance_dependent_square_inverses(
            self.deltas, self.best_arm)

        addi_constant = rvh.func_of_pi(add=1, power=2, mult=1 / 3)

        time_series = np.arange(self.T + 1)

        logarithmic_time_series = rvh.natural_logarithm(time_series)

        a = np.array(del_sq_invs)
        del_sq_inv_row_matrix = np.reshape(a, (1, -1))
        logarithmic_time_series_column_matrix = np.reshape(
            logarithmic_time_series, (-1, 1))

        matrix = np.dot(logarithmic_time_series_column_matrix,
                        del_sq_inv_row_matrix)

        self.theoretical_bounds_arm_pulls = matrix + addi_constant
Esempio n. 3
0
    def play_arms(self):
        rewards = [0]
        n = 0

        # At time t = 0,
        for i in range(1, self.K + 1):
            arm_number = i - 1
            reward = super().pull_arm(arm_number)
            rewards.append(reward)

            n = n + 1

        # From time t = 1
        for t in range(1, mh.ciel_root(self.N) + 1):
            self.revise_ucbs(n)

            # pull the arm with highest UCB 2t-1 times
            pulls_this_iteration = 2 * t - 1

            arm_with_highest_ucb = mh.get_maximum_index(
                self.upper_confidence_bound)

            for i in range(pulls_this_iteration):
                if n >= self.N:
                    break

                reward = super().pull_arm(arm_with_highest_ucb)
                rewards.append(reward)
                n = n + 1
            # end for
        # end for

        return rewards
Esempio n. 4
0
    def plot_regret(self):

        self.ph.clear_curves()
        true_means_string = "True means of arms: " + mh.stringify_list(
            self.true_means)

        self.ph.initiate_figure("Regret of algorithms vs Time\n" +
                                true_means_string,
                                "Time T",
                                "Regret",
                                x_log=False,
                                y_log=False)

        # ph.add_curve(self.cum_optimal_reward, "Optimal Reward", 1)
        # ph.add_curve(self.cum_reward_empirical, "Empirical Reward", 2)
        # ph.add_curve(self.cum_reward_empirical_incremental, "Empirical Reward" incremental, 3)

        self.ph.add_curve(self.cum_regret_theo_bound,
                          "Theoretical Upper Bound", 4)

        for i in range(self.algo_count):  # For each bandit algorithm,
            self.ph.add_curve(self.cum_regret_empirical[i],
                              self.algorithms_to_run[i][0], 5 + i)

        self.ph.plot_curves()
Esempio n. 5
0
    def analyse_common_stats(self):
        # Compute deltas and theoretical upper bound of regret of UCB1.
        self.best_arm = mh.get_maximum_index(self.true_means)
        mean_of_best_arm = self.true_means[self.best_arm]

        for i in range(self.K):
            self.deltas[i] = mean_of_best_arm - self.true_means[i]

        sum_del_inv, sum_del = mh.get_instance_dependent_values(
            self.best_arm, self.deltas)

        mult_constant, addi_constant = mh.get_theoretical_constants(
            sum_del_inv, sum_del)

        time_series = np.arange(self.T + 1)
        self.cum_regret_theo_bound = mult_constant * rvh.natural_logarithm(
            time_series) + addi_constant
        self.cum_optimal_reward = time_series * mean_of_best_arm
Esempio n. 6
0
    def __init__(self, k=10, t=10**6):
        self.ph = PlotHelper()
        self.logger = LogHelper.get_logger(__name__)

        # Set the parameters of number of arms, time horizon arbitrarily.
        self.K = k
        self.T = t

        self.deltas = [0] * self.K

        # Create the arms
        self.true_means, self.arms = mh.get_arms(self.K, self.T)
Esempio n. 7
0
    def play_arms(self):
        rewards = [0]

        for t in range(1, self.K + 1):
            arm_number = t-1

            reward = super().pull_arm(arm_number)

            rewards.append(reward)

        for t in range(self.K + 1, self.T + 1):
            self.revise_ucbs(t)

            # pull the arm with highest UCB
            arm_with_highest_ucb = mh.get_maximum_index(self.upper_confidence_bound)

            reward = super().pull_arm(arm_with_highest_ucb)

            rewards.append(reward)

        return rewards
Esempio n. 8
0
 def revise_ucbs(self, t):
     for i in range(self.K):
         self.upper_confidence_bound[i] = mh.textbook_radius(t, self.arms[i].pull_count) + \
                                          self.arms[i].empirical_mean
Esempio n. 9
0
    def test_get_arms(self):
        true_means, arms = mh.get_arms(10, 100)

        best_arm = mh.get_maximum_index(true_means)