def batch_update(self, x: np.matrix, chosen_arm: int, reward: float) -> None: """Update the information about the arms with a new batch of data. :param x: observed context matrix. :param chosen_arm: index of the chosen arm. :param reward: reward from the chosen arm. """ self.data_size += 1 z = x[:][:self.z_dim] x = x[:][self.z_dim:] self.counts[chosen_arm] += 1 self.rewards += reward self._A_zero += self._B[chosen_arm].T.dot(self._A_inv[chosen_arm]).dot(self._B[chosen_arm]) self._b_zero += self._B[chosen_arm].T.dot(self._A_inv[chosen_arm]).dot(self._b[chosen_arm]) self._A_inv[chosen_arm] -= self._A_inv[chosen_arm].dot(x.dot(x.T.dot(self._A_inv[chosen_arm]))) / (1 + x.T.dot(self._A_inv[chosen_arm].dot(x))) self._B[chosen_arm] += x.dot(z.T) self._b[chosen_arm] += x * reward self._A_zero += z.dot(z.T) - self._B[chosen_arm].T.dot(self._A_inv[chosen_arm]).dot(self._B[chosen_arm]) self._b_zero += z * reward - self._B[chosen_arm].T.dot(self._A_inv[chosen_arm]).dot(self._b[chosen_arm]) if self.data_size % self.batch_size == 0: self.A_zero = self._A_zero[:] self.b_zero = self._b_zero[:] self.A_inv = copy.deepcopy(self._A_inv) self.B = copy.deepcopy(self._B) self.b = copy.deepcopy(self._b)
def nipals1(x: np.matrix): max_it = 3 # max_it = matrix_rank(x) # print("rank(x) = " + str(max_it)) p = np.zeros(shape=(x.shape[1], max_it)) t = np.zeros(shape=(x.shape[0], max_it)) for j in range(0, max_it): # a = norm(x, axis=0) # idx = a.argmax() t_j = x[:, j] # # verify if the column is a zero vector # zero_col = np.zeros(shape=t_j.shape) # if norm(t_j - zero_col) <= epsilon: # j += 1 # max_it += 1 # continue t_j1 = t_j - t_j while norm(t_j1 - t_j) > epsilon: p_j = x.transpose().dot(t_j) p_j /= norm(p_j) t_j1 = t_j t_j = x.dot(p_j) p[:, j] = p_j t[:, j] = t_j j += 1 x = x - np.dot(t_j[:, None], p_j[None, :]) return t, p
def eigen(matx: np.matrix, eps=1e-3): """ функція знаходження власного числа і вектора методом скалярних добутків """ tr_matrix = matx.transpose() y = np.zeros(matx.shape[0]) z = np.zeros(matx.shape[0]) y[0] = START_Y z[0] = y[0] eigenvalue = 0 for j in range(ITERATION_LIMIT): if y.shape[0] == 1: y = y.transpose() if z.shape[0] == 1: z = z.transpose() next_y = np.array(matx.dot(y)) next_z = np.array(tr_matrix.dot(z)) tmp1 = sum1(next_y * next_z) tmp2 = sum1(y * next_z) tmp_res = tmp1 / tmp2 if j == 0: eigenvalue = tmp_res elif abs(eigenvalue - tmp_res) < eps: break else: eigenvalue = tmp_res y = next_y z = next_z eigenvector = y / norm(y) return eigenvalue, eigenvector
def iter_vertices(self, matrix: np.matrix) -> None: """ Iter through self.vertices to apply a matrix tranformation """ transformed_vertices = list() for vertex in self.vertices: transformed_vertices.append(matrix.dot(vertex).A1) return transformed_vertices
def update(self, x: np.matrix, chosen_arm: int, reward: float) -> None: """Update the information about the arms. :param x: observed context matrix. :param chosen_arm: index of the chosen arm. :param reward: reward from the chosen arm. """ z = x[:][:self.z_dim] x = x[:][self.z_dim:] self.counts[chosen_arm] += 1 self.rewards += reward self.A_zero += self.B[chosen_arm].T.dot(self.A_inv[chosen_arm]).dot(self.B[chosen_arm]) self.b_zero += self.B[chosen_arm].T.dot(self.A_inv[chosen_arm]).dot(self.b[chosen_arm]) self.A_inv[chosen_arm] -= self.A_inv[chosen_arm].dot(x.dot(x.T.dot(self.A_inv[chosen_arm]))) / (1 + x.T.dot(self.A_inv[chosen_arm].dot(x))) self.B[chosen_arm] += x.dot(z.T) self.b[chosen_arm] += x * reward self.A_zero += z.dot(z.T) - self.B[chosen_arm].T.dot(self.A_inv[chosen_arm]).dot(self.B[chosen_arm]) self.b_zero += z * reward - self.B[chosen_arm].T.dot(self.A_inv[chosen_arm]).dot(self.b[chosen_arm])
def dominate_value(mx: matrix): a = 0 x1 = matrix(random.rand(mx.shape[0], 1)) x2 = matrix([[0] for x in range(mx.shape[0])]) while abs(linalg.norm(x1 - x2)) > 0.0000001: a += 1 x2 = x1 x1 = mx.dot(x1) x1 = x1 / max(x1, key=abs) print(x1 / linalg.norm(x1)) print(a)
def update(self, x: np.matrix, chosen_arm: int, reward: float) -> None: """Update the information about the arms. :param x: observed context matrix. :param chosen_arm: index of the chosen arm. :param reward: reward from the chosen arm. """ self.counts[chosen_arm] += 1 self.rewards += reward self.A_inv[chosen_arm] -= self.A_inv[chosen_arm].dot(x.dot(x.T.dot(self.A_inv[chosen_arm]))) / (1 + x.T.dot(self.A_inv[chosen_arm].dot(x))) self.b[chosen_arm] += x * reward # d * 1
def batch_update(self, x: np.matrix, chosen_arm: int, reward: float) -> None: """Update the information about the arms with a new batch of data. :param x: observed context matrix. :param chosen_arm: index of the chosen arm. :param reward: reward from the chosen arm. """ self.data_size += 1 self.counts[chosen_arm] += 1 self.rewards += reward self._A_inv[chosen_arm] -= self._A_inv[chosen_arm].dot(x.dot(x.T.dot(self._A_inv[chosen_arm]))) / (1 + x.T.dot(self._A_inv[chosen_arm].dot(x))) # d * d self._b[chosen_arm] += x * reward # d * 1 if self.data_size % self.batch_size == 0: self.A_inv = copy.deepcopy(self._A_inv) # d * d self.b = copy.deepcopy(self._b) # d * 1
def hotelling(x: np.matrix): s = x.sum(axis=1) alpha = s / np.amax(s) # print("alpha = " + str(alpha)) alpha_old = alpha - alpha while (np.absolute(alpha - alpha_old)).sum() >= epsilon: # print("============================") beta = x.dot(alpha) # print("beta = " + str(beta)) alpha_old = alpha alpha = beta / np.amax(beta) # print("alpha = " + str(alpha)) # print("difference = " + str(np.absolute(alpha - alpha_old))) # print("diff_sum = " + str((np.absolute(alpha - alpha_old)).sum())) return alpha, np.amax(beta)
def fit(self, x: np.matrix, y: np.matrix): random_number_generator: RandomState = RandomState( self.weight_init_seed) self.weights = random_number_generator.normal(loc=0.0, scale=0.01, size=1 + x.shape[1]) for i in range(self.num_epochs): output = self.activation(self.net_input(x)) errors = (y - output) # update weights self.weights[1:] += self.learning_rate * x.T.dot(errors) self.weights[0] += self.learning_rate * errors.sum() # update cost self.cost.append( (-y.dot(np.log(output)) - ((1 - y).dot(np.log(1 - output))))) return self
def variance(weights: np.matrix, sigma: np.array): # returns the variance (NOT standard deviation) given weights and sigma return weights.dot(sigma).dot(weights.transpose())
def sim_acts_norm(algo_list: list, arms: np.matrix, num_sims: int, horizon: int, algo_name: list, acts_key: list, base_type: str = "random", base_error_scale: float = 1.0, lift_error_scale: float = 1.0, lin_base_loc: float = 0.0, lin_base_scale: float = 1.0, ran_base_loc: float = 0.0, ran_base_scale: float = 1.0, batch: bool = False, batch_size: int = 200) -> DataFrame: """Run simulations Action-Centered Multi-Armed Bandit Algorithms on rewards given by Gaussian distributions. :param algo_list: a list of simulated algorithms. :param arms: a matrix which contains linear parameters of every arm. :param scale: a variances of error given by a Gaussian distribution. :param num_sims: the number of simulations. :param horizon: the number of tiral in a simulation. :param algo_name: a list of names of the simulated algorithms. :param context_key: a list of bools which represent whether simulated algorithms are contextual or not. :param monitor: whether monitor simulation progress or not. :param batch: whether simulations are run in the batch update situation or not. :param batch_size: the size of information about rewards given in a update. :return: a list of simulation results for each algorithm. """ sim_data_list = [] for i, algo in enumerate(algo_list): n_arms = arms.shape[0] dim = arms.shape[1] chosen_arms = np.zeros(num_sims * horizon, dtype=int) successes = np.zeros(num_sims * horizon, dtype=int) cumulative_lifts = np.zeros(num_sims * horizon) base_rewards = np.zeros(num_sims * horizon) rewards = np.zeros(num_sims * horizon) cumulative_rewards = np.zeros(num_sims * horizon) regrets = np.zeros(num_sims * horizon) cumulative_regrets = np.zeros(num_sims * horizon) sim_nums = np.zeros(num_sims * horizon, dtype=int) times = np.zeros(num_sims * horizon, dtype=int) elapsed_time = np.zeros(num_sims) for sim in range(num_sims): a = copy.deepcopy(algo) if batch: a.batch_size = batch_size start = time.time() for t in range(horizon): t += 1 index = (sim - 1) * horizon + t - 1 sim_nums[index] = sim + 1 times[index] = t x = np.matrix((np.random.randint(2, size=dim - 1))).T x = np.concatenate([x, np.matrix(np.array([1])).T]) e1 = np.random.normal(loc=0, scale=base_error_scale) e2 = np.random.normal(loc=0, scale=lift_error_scale) chosen_arm = a.select_arm(x) chosen_arms[index] = chosen_arm if base_type == "linear": base_reward = np.matrix([ np.random.normal( loc=lin_base_loc, scale=lin_base_scale, size=dim) ]).dot(x) + e1 elif base_type == "random": base_reward = np.matrix( np.random.normal(loc=ran_base_loc, scale=ran_base_scale) + e1) base_rewards[index] = base_reward max_reward = np.max(arms.dot(x)) if max_reward < 0: i_max = 0 max_reward = 0 else: i_max = np.argmax(arms.dot(x)) if acts_key[i]: if chosen_arm == 0: lift = 0 reward = base_reward else: lift = arms[chosen_arm - 1].dot(x) + e2 reward = base_reward + lift rewards[index] = reward regret = max_reward - lift if chosen_arm == i_max: successes[index] = 1 else: lift = arms[chosen_arm].dot(x) + e2 reward = lift + base_reward rewards[index] = reward regret = max_reward - lift if (chosen_arm + 1) == i_max: successes[index] = 1 regrets[index] = regret if t == 1: cumulative_lifts[index] = lift cumulative_regrets[index] = regret cumulative_rewards[index] = reward else: cumulative_lifts[index] = cumulative_lifts[index - 1] + lift cumulative_regrets[index] = cumulative_regrets[index - 1] + regret cumulative_rewards[index] = cumulative_rewards[index - 1] + reward if batch: a.batch_update(x, chosen_arm, reward[0][0]) else: a.update(x, chosen_arm, reward[0][0]) elapsed_time[sim] = time.time() - start print( f"Avg Elapsed Time({horizon} iter) {algo_name[i]} : {round(np.mean(elapsed_time), 3)}s" ) sim_data = [ sim_nums, times, chosen_arms, base_rewards, rewards, cumulative_rewards, regrets, cumulative_regrets, cumulative_lifts, successes ] df = DataFrame({ "sim_nums": sim_data[0], "times": sim_data[1], "chosen_arm": sim_data[2], "Base Rewards": sim_data[3], "Rewards": sim_data[4], "Cumulative Rewards": sim_data[5], "Regrets": sim_data[6], "Cumulative Regrets": sim_data[7], "Cumulative Lifts": sim_data[8], "Successes": sim_data[9] }).set_index(["sim_nums", "times"]) sim_data_list.append(df) return sim_data_list
def sim_conmabs_norm(algo_list: list, arms: np.matrix, scale: float, num_sims: int, horizon: int, algo_name: list, context_key: list, monitor: bool = False, batch: bool = False, batch_size: int = 200) -> DataFrame: """Run simulations Contextual Multi-Armed Bandit Algorithms on rewards given by Gaussian distributions. :param algo_list: a list of simulated algorithms. :param arms: a matrix which contains linear parameters of every arm. :param scale: a variances of error given by a Gaussian distribution. :param num_sims: the number of simulations. :param horizon: the number of tiral in a simulation. :param algo_name: a list of names of the simulated algorithms. :param context_key: a list of bools which represent whether simulated algorithms are contextual or not. :param monitor: whether monitor simulation progress or not. :param batch: whether simulations are run in the batch update situation or not. :param batch_size: the size of information about rewards given in a update. :return: a list of simulation results for each algorithm. """ sim_data_list = [] for i, algo in enumerate(algo_list): chosen_arms = np.zeros(num_sims * horizon, dtype=int) successes = np.zeros(num_sims * horizon, dtype=int) rewards = np.zeros(num_sims * horizon) cumulative_rewards = np.zeros(num_sims * horizon) regrets = np.zeros(num_sims * horizon) cumulative_regrets = np.zeros(num_sims * horizon) sim_nums = np.zeros(num_sims * horizon, dtype=int) times = np.zeros(num_sims * horizon, dtype=int) elapsed_time = np.zeros(num_sims) for sim in range(num_sims): a = copy.deepcopy(algo) if batch: a.batch_size = batch_size start = time.time() for t in range(horizon): t += 1 index = (sim - 1) * horizon + t - 1 sim_nums[index] = sim + 1 times[index] = t x = np.matrix(np.random.randint(2, size=arms.shape[1])).T e = np.random.normal(loc=0, scale=scale) if context_key[i]: chosen_arm = a.select_arm(x) else: chosen_arm = a.select_arm() chosen_arms[index] = chosen_arm reward = arms[chosen_arm].dot(x) rewards[index] = reward + e regret = np.max(arms.dot(x)) - reward regrets[index] = regret # if chosen_arm == np.argmax(arms.dot(x)): if regret < 1e-5: successes[index] = 1 if t == 1: cumulative_regrets[index] = regret cumulative_rewards[index] = reward else: cumulative_regrets[index] = cumulative_regrets[index - 1] + regret cumulative_rewards[index] = cumulative_rewards[index - 1] + reward if context_key[i]: if batch: a.batch_update(x, chosen_arm, reward[0][0]) else: a.update(x, chosen_arm, reward[0][0]) else: if batch: a.batch_update(chosen_arm, reward[0][0]) else: a.update(chosen_arm, reward[0][0]) elapsed_time[sim] = time.time() - start print( f"Avg Elapsed Time({horizon} iter) {algo_name[i]} : {round(np.mean(elapsed_time), 3)}s" ) sim_data = [ sim_nums, times, chosen_arms, rewards, cumulative_rewards, regrets, cumulative_regrets, successes ] df = DataFrame({ "sim_nums": sim_data[0], "times": sim_data[1], "chosen_arm": sim_data[2], "Rewards": sim_data[3], "Cumulative Rewards": sim_data[4], "Regrets": sim_data[5], "Cumulative Regrets": sim_data[6], "Successes": sim_data[7] }).set_index(["sim_nums", "times"]) sim_data_list.append(df) return sim_data_list