Example #1
0
    def batch_update(self, x: np.matrix, chosen_arm: int, reward: float) -> None:
        """Update the information about the arms with a new batch of data.

        :param x: observed context matrix.
        :param chosen_arm: index of the chosen arm.
        :param reward: reward from the chosen arm.
        """
        self.data_size += 1
        z = x[:][:self.z_dim]
        x = x[:][self.z_dim:]

        self.counts[chosen_arm] += 1
        self.rewards += reward
        self._A_zero += self._B[chosen_arm].T.dot(self._A_inv[chosen_arm]).dot(self._B[chosen_arm])
        self._b_zero += self._B[chosen_arm].T.dot(self._A_inv[chosen_arm]).dot(self._b[chosen_arm])
        self._A_inv[chosen_arm] -= self._A_inv[chosen_arm].dot(x.dot(x.T.dot(self._A_inv[chosen_arm]))) / (1 + x.T.dot(self._A_inv[chosen_arm].dot(x)))
        self._B[chosen_arm] += x.dot(z.T)
        self._b[chosen_arm] += x * reward
        self._A_zero += z.dot(z.T) - self._B[chosen_arm].T.dot(self._A_inv[chosen_arm]).dot(self._B[chosen_arm])
        self._b_zero += z * reward - self._B[chosen_arm].T.dot(self._A_inv[chosen_arm]).dot(self._b[chosen_arm])

        if self.data_size % self.batch_size == 0:
            self.A_zero = self._A_zero[:]
            self.b_zero = self._b_zero[:]
            self.A_inv = copy.deepcopy(self._A_inv)
            self.B = copy.deepcopy(self._B)
            self.b = copy.deepcopy(self._b)
Example #2
0
def nipals1(x: np.matrix):

    max_it = 3
    # max_it = matrix_rank(x)
    # print("rank(x) = " + str(max_it))
    p = np.zeros(shape=(x.shape[1], max_it))
    t = np.zeros(shape=(x.shape[0], max_it))

    for j in range(0, max_it):
        # a = norm(x, axis=0)
        # idx = a.argmax()
        t_j = x[:, j]

        # # verify if the column is a zero vector
        # zero_col = np.zeros(shape=t_j.shape)
        # if norm(t_j - zero_col) <= epsilon:
        #     j += 1
        #     max_it += 1
        #     continue

        t_j1 = t_j - t_j
        while norm(t_j1 - t_j) > epsilon:
            p_j = x.transpose().dot(t_j)
            p_j /= norm(p_j)
            t_j1 = t_j
            t_j = x.dot(p_j)

        p[:, j] = p_j
        t[:, j] = t_j
        j += 1
        x = x - np.dot(t_j[:, None], p_j[None, :])

    return t, p
Example #3
0
def eigen(matx: np.matrix, eps=1e-3):
    """
    функція знаходження власного числа і вектора методом скалярних добутків
    """
    tr_matrix = matx.transpose()
    y = np.zeros(matx.shape[0])
    z = np.zeros(matx.shape[0])
    y[0] = START_Y
    z[0] = y[0]
    eigenvalue = 0
    for j in range(ITERATION_LIMIT):
        if y.shape[0] == 1:
            y = y.transpose()
        if z.shape[0] == 1:
            z = z.transpose()
        next_y = np.array(matx.dot(y))
        next_z = np.array(tr_matrix.dot(z))

        tmp1 = sum1(next_y * next_z)
        tmp2 = sum1(y * next_z)

        tmp_res = tmp1 / tmp2
        if j == 0:
            eigenvalue = tmp_res
        elif abs(eigenvalue - tmp_res) < eps:
            break
        else:
            eigenvalue = tmp_res
        y = next_y
        z = next_z

    eigenvector = y / norm(y)
    return eigenvalue, eigenvector
Example #4
0
 def iter_vertices(self, matrix: np.matrix) -> None:
     """
     Iter through self.vertices to apply a matrix tranformation
     """
     transformed_vertices = list()
     for vertex in self.vertices:
         transformed_vertices.append(matrix.dot(vertex).A1)
     return transformed_vertices
Example #5
0
    def update(self, x: np.matrix, chosen_arm: int, reward: float) -> None:
        """Update the information about the arms.

        :param x: observed context matrix.
        :param chosen_arm: index of the chosen arm.
        :param reward: reward from the chosen arm.
        """
        z = x[:][:self.z_dim]
        x = x[:][self.z_dim:]

        self.counts[chosen_arm] += 1
        self.rewards += reward
        self.A_zero += self.B[chosen_arm].T.dot(self.A_inv[chosen_arm]).dot(self.B[chosen_arm])
        self.b_zero += self.B[chosen_arm].T.dot(self.A_inv[chosen_arm]).dot(self.b[chosen_arm])
        self.A_inv[chosen_arm] -= self.A_inv[chosen_arm].dot(x.dot(x.T.dot(self.A_inv[chosen_arm]))) / (1 + x.T.dot(self.A_inv[chosen_arm].dot(x)))
        self.B[chosen_arm] += x.dot(z.T)
        self.b[chosen_arm] += x * reward
        self.A_zero += z.dot(z.T) - self.B[chosen_arm].T.dot(self.A_inv[chosen_arm]).dot(self.B[chosen_arm])
        self.b_zero += z * reward - self.B[chosen_arm].T.dot(self.A_inv[chosen_arm]).dot(self.b[chosen_arm])
Example #6
0
def dominate_value(mx: matrix):
    a = 0
    x1 = matrix(random.rand(mx.shape[0], 1))
    x2 = matrix([[0] for x in range(mx.shape[0])])
    while abs(linalg.norm(x1 - x2)) > 0.0000001:
        a += 1
        x2 = x1
        x1 = mx.dot(x1)
        x1 = x1 / max(x1, key=abs)
    print(x1 / linalg.norm(x1))
    print(a)
Example #7
0
    def update(self, x: np.matrix, chosen_arm: int, reward: float) -> None:
        """Update the information about the arms.

        :param x: observed context matrix.
        :param chosen_arm: index of the chosen arm.
        :param reward: reward from the chosen arm.
        """
        self.counts[chosen_arm] += 1
        self.rewards += reward
        self.A_inv[chosen_arm] -= self.A_inv[chosen_arm].dot(x.dot(x.T.dot(self.A_inv[chosen_arm]))) / (1 + x.T.dot(self.A_inv[chosen_arm].dot(x)))
        self.b[chosen_arm] += x * reward  # d * 1
Example #8
0
    def batch_update(self, x: np.matrix, chosen_arm: int, reward: float) -> None:
        """Update the information about the arms with a new batch of data.

        :param x: observed context matrix.
        :param chosen_arm: index of the chosen arm.
        :param reward: reward from the chosen arm.
        """
        self.data_size += 1
        self.counts[chosen_arm] += 1
        self.rewards += reward
        self._A_inv[chosen_arm] -= self._A_inv[chosen_arm].dot(x.dot(x.T.dot(self._A_inv[chosen_arm]))) / (1 + x.T.dot(self._A_inv[chosen_arm].dot(x)))  # d * d
        self._b[chosen_arm] += x * reward  # d * 1
        if self.data_size % self.batch_size == 0:
            self.A_inv = copy.deepcopy(self._A_inv)  # d * d
            self.b = copy.deepcopy(self._b)  # d * 1
Example #9
0
def hotelling(x: np.matrix):
    s = x.sum(axis=1)
    alpha = s / np.amax(s)
    # print("alpha = " + str(alpha))

    alpha_old = alpha - alpha
    while (np.absolute(alpha - alpha_old)).sum() >= epsilon:
        # print("============================")
        beta = x.dot(alpha)
        # print("beta = " + str(beta))
        alpha_old = alpha
        alpha = beta / np.amax(beta)
        # print("alpha = " + str(alpha))
        # print("difference = " + str(np.absolute(alpha - alpha_old)))
        # print("diff_sum = " + str((np.absolute(alpha - alpha_old)).sum()))
    return alpha, np.amax(beta)
    def fit(self, x: np.matrix, y: np.matrix):
        random_number_generator: RandomState = RandomState(
            self.weight_init_seed)
        self.weights = random_number_generator.normal(loc=0.0,
                                                      scale=0.01,
                                                      size=1 + x.shape[1])

        for i in range(self.num_epochs):
            output = self.activation(self.net_input(x))
            errors = (y - output)

            # update weights
            self.weights[1:] += self.learning_rate * x.T.dot(errors)
            self.weights[0] += self.learning_rate * errors.sum()

            # update cost
            self.cost.append(
                (-y.dot(np.log(output)) - ((1 - y).dot(np.log(1 - output)))))

        return self
Example #11
0
def variance(weights: np.matrix, sigma: np.array):
    # returns the variance (NOT standard deviation) given weights and sigma
    return weights.dot(sigma).dot(weights.transpose())
Example #12
0
def sim_acts_norm(algo_list: list,
                  arms: np.matrix,
                  num_sims: int,
                  horizon: int,
                  algo_name: list,
                  acts_key: list,
                  base_type: str = "random",
                  base_error_scale: float = 1.0,
                  lift_error_scale: float = 1.0,
                  lin_base_loc: float = 0.0,
                  lin_base_scale: float = 1.0,
                  ran_base_loc: float = 0.0,
                  ran_base_scale: float = 1.0,
                  batch: bool = False,
                  batch_size: int = 200) -> DataFrame:
    """Run simulations Action-Centered Multi-Armed Bandit Algorithms on rewards given by Gaussian distributions.

    :param algo_list: a list of simulated algorithms.
    :param arms: a matrix which contains linear parameters of every arm.
    :param scale: a variances of error given by a Gaussian distribution.
    :param num_sims: the number of simulations.
    :param horizon: the number of tiral in a simulation.
    :param algo_name: a list of names of the simulated algorithms.
    :param context_key: a list of bools which represent whether simulated algorithms are contextual or not.
    :param monitor: whether monitor simulation progress or not.
    :param batch: whether simulations are run in the batch update situation or not.
    :param batch_size: the size of information about rewards given in a update.

    :return: a list of simulation results for each algorithm.
    """
    sim_data_list = []
    for i, algo in enumerate(algo_list):

        n_arms = arms.shape[0]
        dim = arms.shape[1]
        chosen_arms = np.zeros(num_sims * horizon, dtype=int)
        successes = np.zeros(num_sims * horizon, dtype=int)
        cumulative_lifts = np.zeros(num_sims * horizon)
        base_rewards = np.zeros(num_sims * horizon)
        rewards = np.zeros(num_sims * horizon)
        cumulative_rewards = np.zeros(num_sims * horizon)
        regrets = np.zeros(num_sims * horizon)
        cumulative_regrets = np.zeros(num_sims * horizon)
        sim_nums = np.zeros(num_sims * horizon, dtype=int)
        times = np.zeros(num_sims * horizon, dtype=int)
        elapsed_time = np.zeros(num_sims)

        for sim in range(num_sims):
            a = copy.deepcopy(algo)
            if batch:
                a.batch_size = batch_size

            start = time.time()
            for t in range(horizon):
                t += 1
                index = (sim - 1) * horizon + t - 1
                sim_nums[index] = sim + 1
                times[index] = t

                x = np.matrix((np.random.randint(2, size=dim - 1))).T
                x = np.concatenate([x, np.matrix(np.array([1])).T])
                e1 = np.random.normal(loc=0, scale=base_error_scale)
                e2 = np.random.normal(loc=0, scale=lift_error_scale)

                chosen_arm = a.select_arm(x)
                chosen_arms[index] = chosen_arm

                if base_type == "linear":
                    base_reward = np.matrix([
                        np.random.normal(
                            loc=lin_base_loc, scale=lin_base_scale, size=dim)
                    ]).dot(x) + e1
                elif base_type == "random":
                    base_reward = np.matrix(
                        np.random.normal(loc=ran_base_loc,
                                         scale=ran_base_scale) + e1)
                base_rewards[index] = base_reward
                max_reward = np.max(arms.dot(x))
                if max_reward < 0:
                    i_max = 0
                    max_reward = 0
                else:
                    i_max = np.argmax(arms.dot(x))

                if acts_key[i]:
                    if chosen_arm == 0:
                        lift = 0
                        reward = base_reward
                    else:
                        lift = arms[chosen_arm - 1].dot(x) + e2
                        reward = base_reward + lift
                    rewards[index] = reward
                    regret = max_reward - lift
                    if chosen_arm == i_max:
                        successes[index] = 1

                else:
                    lift = arms[chosen_arm].dot(x) + e2
                    reward = lift + base_reward
                    rewards[index] = reward
                    regret = max_reward - lift
                    if (chosen_arm + 1) == i_max:
                        successes[index] = 1

                regrets[index] = regret

                if t == 1:
                    cumulative_lifts[index] = lift
                    cumulative_regrets[index] = regret
                    cumulative_rewards[index] = reward
                else:
                    cumulative_lifts[index] = cumulative_lifts[index -
                                                               1] + lift
                    cumulative_regrets[index] = cumulative_regrets[index -
                                                                   1] + regret
                    cumulative_rewards[index] = cumulative_rewards[index -
                                                                   1] + reward

                if batch:
                    a.batch_update(x, chosen_arm, reward[0][0])
                else:
                    a.update(x, chosen_arm, reward[0][0])

        elapsed_time[sim] = time.time() - start
        print(
            f"Avg Elapsed Time({horizon} iter) {algo_name[i]} : {round(np.mean(elapsed_time), 3)}s"
        )
        sim_data = [
            sim_nums, times, chosen_arms, base_rewards, rewards,
            cumulative_rewards, regrets, cumulative_regrets, cumulative_lifts,
            successes
        ]

        df = DataFrame({
            "sim_nums": sim_data[0],
            "times": sim_data[1],
            "chosen_arm": sim_data[2],
            "Base Rewards": sim_data[3],
            "Rewards": sim_data[4],
            "Cumulative Rewards": sim_data[5],
            "Regrets": sim_data[6],
            "Cumulative Regrets": sim_data[7],
            "Cumulative Lifts": sim_data[8],
            "Successes": sim_data[9]
        }).set_index(["sim_nums", "times"])

        sim_data_list.append(df)

    return sim_data_list
Example #13
0
def sim_conmabs_norm(algo_list: list,
                     arms: np.matrix,
                     scale: float,
                     num_sims: int,
                     horizon: int,
                     algo_name: list,
                     context_key: list,
                     monitor: bool = False,
                     batch: bool = False,
                     batch_size: int = 200) -> DataFrame:
    """Run simulations Contextual Multi-Armed Bandit Algorithms on rewards given by Gaussian distributions.

    :param algo_list: a list of simulated algorithms.
    :param arms: a matrix which contains linear parameters of every arm.
    :param scale: a variances of error given by a Gaussian distribution.
    :param num_sims: the number of simulations.
    :param horizon: the number of tiral in a simulation.
    :param algo_name: a list of names of the simulated algorithms.
    :param context_key: a list of bools which represent whether simulated algorithms are contextual or not.
    :param monitor: whether monitor simulation progress or not.
    :param batch: whether simulations are run in the batch update situation or not.
    :param batch_size: the size of information about rewards given in a update.

    :return: a list of simulation results for each algorithm.
    """
    sim_data_list = []
    for i, algo in enumerate(algo_list):
        chosen_arms = np.zeros(num_sims * horizon, dtype=int)
        successes = np.zeros(num_sims * horizon, dtype=int)
        rewards = np.zeros(num_sims * horizon)
        cumulative_rewards = np.zeros(num_sims * horizon)
        regrets = np.zeros(num_sims * horizon)
        cumulative_regrets = np.zeros(num_sims * horizon)
        sim_nums = np.zeros(num_sims * horizon, dtype=int)
        times = np.zeros(num_sims * horizon, dtype=int)
        elapsed_time = np.zeros(num_sims)

        for sim in range(num_sims):
            a = copy.deepcopy(algo)
            if batch:
                a.batch_size = batch_size

            start = time.time()
            for t in range(horizon):
                t += 1
                index = (sim - 1) * horizon + t - 1
                sim_nums[index] = sim + 1
                times[index] = t

                x = np.matrix(np.random.randint(2, size=arms.shape[1])).T
                e = np.random.normal(loc=0, scale=scale)

                if context_key[i]:
                    chosen_arm = a.select_arm(x)
                else:
                    chosen_arm = a.select_arm()
                chosen_arms[index] = chosen_arm

                reward = arms[chosen_arm].dot(x)
                rewards[index] = reward + e
                regret = np.max(arms.dot(x)) - reward
                regrets[index] = regret

                # if chosen_arm == np.argmax(arms.dot(x)):
                if regret < 1e-5:
                    successes[index] = 1

                if t == 1:
                    cumulative_regrets[index] = regret
                    cumulative_rewards[index] = reward
                else:
                    cumulative_regrets[index] = cumulative_regrets[index -
                                                                   1] + regret
                    cumulative_rewards[index] = cumulative_rewards[index -
                                                                   1] + reward

                if context_key[i]:
                    if batch:
                        a.batch_update(x, chosen_arm, reward[0][0])
                    else:
                        a.update(x, chosen_arm, reward[0][0])
                else:
                    if batch:
                        a.batch_update(chosen_arm, reward[0][0])
                    else:
                        a.update(chosen_arm, reward[0][0])

        elapsed_time[sim] = time.time() - start
        print(
            f"Avg Elapsed Time({horizon} iter) {algo_name[i]} : {round(np.mean(elapsed_time), 3)}s"
        )
        sim_data = [
            sim_nums, times, chosen_arms, rewards, cumulative_rewards, regrets,
            cumulative_regrets, successes
        ]

        df = DataFrame({
            "sim_nums": sim_data[0],
            "times": sim_data[1],
            "chosen_arm": sim_data[2],
            "Rewards": sim_data[3],
            "Cumulative Rewards": sim_data[4],
            "Regrets": sim_data[5],
            "Cumulative Regrets": sim_data[6],
            "Successes": sim_data[7]
        }).set_index(["sim_nums", "times"])

        sim_data_list.append(df)

    return sim_data_list