def play(): bandit = BanditGame() total_money = bandit.money() money = total_money arm_num = bandit.arm_num() for arm in range(1, arm_num+1): while money >= 1.0*total_money*(arm_num-arm)/arm_num: _, gain, money = bandit.play(arm) if money <= 0: return 0 if money >= bandit.celling(): return 1
def __init__(self): self.__bandit = BanditGame() self.__celling = self.__bandit.celling() self.__arm_num = self.__bandit.arm_num() self.__stat = {} for arm in range(1, self.__arm_num+1): self.__stat[arm] = [0, 0, 0] # total, win, profit
def play(): bandit = BanditGame() total_money = bandit.money() money = total_money arm_num = bandit.arm_num() prior = [] win = [] total = [] for arm in range(0, arm_num): prior.append([1,1]) win.append(0) total.append(0) while money > 0 and money < bandit.celling(): def find_best_one(selected_arms): best_e = float('-inf') best_arm = None for arm in selected_arms: current_e = (prior[arm][0] - prior[arm][1])*1.0/total[arm] if current_e > best_e: best_e = current_e best_arm = arm return best_arm def sample_an_arm(): max_sample = float('-inf') best_arm = None mature_ones = [] for arm in range(0, arm_num): if win[arm] >= 10 and prior[arm][0] < prior[arm][1]: continue if win[arm] >= 10 and prior[arm][0] > prior[arm][1]: mature_ones.append(arm) continue r = beta.rvs(prior[arm][0], prior[arm][1]) # print r if r > max_sample: max_sample = r best_arm = arm if len(mature_ones) > 0: return find_best_one(mature_ones) if best_arm is not None: return best_arm else: return find_best_one(range(bandit.arm_num())) arm_chosen = sample_an_arm() spend, gain = play_one_till_win(arm_chosen+1, bandit) money = bandit.money() prior[arm_chosen][1] += spend total[arm_chosen] += (spend+1) if gain > 0: prior[arm_chosen][0] += gain win[arm_chosen] += 1 # print prior # print win if money > bandit.celling(): return 1 else: return 0
def play(): bandit = BanditGame() total_money = bandit.money() money = total_money arm_num = bandit.arm_num() prior = [] win = [] total = [] for arm in range(0, arm_num): prior.append([1,1]) win.append(0) total.append(0) while money > 0 and money < bandit.celling(): def sample_an_arm(): max_sample = float('-inf') best_arm = None for arm in range(0, arm_num): r = beta.rvs(prior[arm][0], prior[arm][1]) # print r if r > max_sample: max_sample = r best_arm = arm return best_arm arm_chosen = sample_an_arm() spend, gain = play_one_till_win(arm_chosen+1, bandit) money = bandit.money() prior[arm_chosen][1] += spend total[arm_chosen] += (spend+1) if gain > 0: prior[arm_chosen][0] += gain win[arm_chosen] += 1 # print prior # print win if money > bandit.celling(): return 1 else: return 0
class PlayBandit(object): def __init__(self): self.__bandit = BanditGame() self.__celling = self.__bandit.celling() self.__arm_num = self.__bandit.arm_num() self.__stat = {} for arm in range(1, self.__arm_num+1): self.__stat[arm] = [0, 0, 0] # total, win, profit def play_chosen(self, arm): '''Play the chosen arm till win or lose''' money = self.__bandit.money() while money > 0 and money < self.__celling: _, gain, money = self.__bandit.play(arm) def find_best_and_play(self): best_e = self.__stat[1][2] / self.__stat[1][0] best_arm = 1 for arm in range(1, self.__arm_num+1): current_e = self.__stat[arm][2] / self.__stat[arm][0] if current_e > best_e: best_e = current_e best_arm = arm self.play_chosen(best_arm) def has_positive_ones(self): for arm in range(1, self.__arm_num+1): if self.__stat[arm][1] >= 10 and (self.__stat[arm][2] / self.__stat[arm][0]) > 0: return True return False def find_insufficient_ones(self): insufficient_ones = [] for arm in range(1, self.__arm_num+1): if self.__stat[arm][1] < 10: insufficient_ones.append(arm) return insufficient_ones def play_budget_on_insufficient_arms(self, insufficient_arms): money = self.__bandit.money() budgit_per_arm = money / len(insufficient_arms) for arm in insufficient_arms: start_money = money enough_stat = False while money > max((start_money - budgit_per_arm), 0) and money < self.__celling and not enough_stat: _, gain, money = self.__bandit.play(arm) self.__stat[arm][0] += 1 self.__stat[arm][2] += gain if gain > 0: self.__stat[arm][1] += 1 if self.__stat[arm][1] >= 10 and self.__stat[arm][2] < 0: enough_stat = True def play(self): money = self.__bandit.money() while money > 0 and money < self.__celling: if self.has_positive_ones(): self.find_best_and_play() else: insufficient_arms = self.find_insufficient_ones() if len(insufficient_arms) > 0: self.play_budget_on_insufficient_arms(insufficient_arms) else: self.find_best_and_play() money = self.__bandit.money() # Check win or lose and return if money <= 0: return 0 else: return 1