def mabp_random(self): df = pd.read_pickle('../data/profit_df.pkl') df.fillna(df.mean()) scaler = RobustScaler().fit(df) df = scaler.transform(df) N = df.shape[0] d = df.shape[1] selected = [] total_reward = 0 total_profit = 0 for n in range(0, N): item = random.randrange(d) selected.append(item) reward = df[n, item] profit = scaler.inverse_tranform(df)[n, item] total_reward = total_reward + reward total_profit = total_profit + profit return pd.Series(selected).value_counts(normalize=True)
def mapb_ucb(self): df = pd.read_pickle('../data/profit_df.pkl') df.fillna(df.mean()) scaler = RobustScaler().fit(df) df = scaler.transform(df) N = df.shape[0] d = df.shape[1] selected = [] numbers_of_selections = [0] * d sums_of_reward = [0] * d total_reward = 0 total_profit = 0 for n in range(0, N): item = 0 max_upper_bound = 0 for i in range(0, d): if (numbers_of_selections[i] > 0): average_reward = sums_of_reward[i] / numbers_of_selections[ i] delta_i = math.sqrt(2 * math.log(n + 1) / numbers_of_selections[i]) upper_bound = average_reward + delta_i else: upper_bound = 1e400 if upper_bound > max_upper_bound: max_upper_bound = upper_bound item = i selected.append(item) numbers_of_selections[item] += 1 reward = df[n, item] profit = scaler.inverse_tranform(df)[n, item] sums_of_reward[item] += reward total_reward += reward total_profit += profit return pd.Series(selected).value_counts(normalize=True)