class GBRTTrainer(): #trains using Bayesian global optimization with GBRT against random play def __init__(self,max_moves,ai_config,opponent_config,param_ranges,n_iter,n_games,init_params,n_batches = 1): self.ai_config = ai_config self.opponent_config = opponent_config self.param_ranges = param_ranges self.n_iter = n_iter self.n_games = n_games self.max_moves = max_moves self.engine = Engine(self.max_moves,self.ai_config, self.opponent_config) self.params = init_params self.n_batches = n_batches self.score_history = [] self.the_good_ones = [] def objective(self,params): self.ai_config[Settings.AI_PARAMS.value] = params wins = 0 for i in range(self.n_games): self.engine.restart(self.max_moves,self.ai_config,self.opponent_config) winner = self.engine.run() if winner == 1: wins += 1.0 elif winner == 0: wins += 0.5 print wins / self.n_games self.score_history.append(wins / self.n_games) if(wins / self.n_games) >= .99: #print(params) self.the_good_ones.append(params) #return 1 - (wins / self.n_games) return - self.engine.get_value(1, 100, -1, 500) #res = 0 #for i in range(self.n_games): # self.engine.restart(self.max_moves,self.ai_config,self.opponent_config) # self.engine.run() # val = self.engine.get_value(1,100,-1,500) # res += val #print res #self.score_history.append(res) #return -res def train(self): opt = Optimizer(self.param_ranges,"GP") r = opt.run(self.objective,n_iter=self.n_iter) self.params = r.x #print("trained paramaters are: ") #print(r.x) def objective_batchwise(self,params): full_params = self.params[:self.training_param_range[0]] + params + self.params[self.training_param_range[1]:] self.ai_config[Settings.AI_PARAMS.value] = full_params wins = 0 for i in range(self.n_games): self.engine.restart(self.max_moves,self.ai_config,self.opponent_config) self.engine.run() val = self.engine.get_value(1,100,-1,500) res += val print res return -res def train_batchwise(self): opt = Optimizer(self.param_ranges,"GP") for batch in range(self.n_batches): print("starting a batch") for start in range(10): print("starting a round of optimization") self.training_param_range = [start*10, start*10 + 10] opt = Optimizer(self.param_ranges[start*10:start*10 + 10],"GP") r = opt.run(self.objective,n_iter=self.n_iter) self.params[start*10:start*10 + 10] = r.x def evaluate(self): wins = 0.0 for i in range(self.n_games): self.engine.restart(self.max_moves,self.ai_config,self.opponent_config) winner = self.engine.run() if winner == 1: wins += 1.0 elif winner == 0: wins += 0.5 print winner return wins / (self.n_games)
class Tournement: def __init__(self, config, max_moves): self.ai_configs = [] for filename in config[AI_FIELD][AIType.REACHABLE.value]: self.ai_configs += read_file_into_configs(filename, reachable_config) for filename in config[AI_FIELD][AIType.MODIFIED_REACHABLE.value]: self.ai_configs += read_file_into_configs(filename, mod_reachable_config) for filename in config[AI_FIELD][AIType.PIECE_BASED_ADD.value]: self.ai_configs += read_file_into_configs(filename, piecebased_config_add) for filename in config[AI_FIELD][AIType.PIECE_BASED_MUL.value]: self.ai_configs += read_file_into_configs(filename, piecebased_config_mul) self.start_configs = [] for filename in config[START_FIELD]: self.start_configs += read_file_into_configs( filename, flexible_start_config) self.engine = Engine(max_moves, None, None) self.max_moves = max_moves def play_against(self, config1, config2, n_games=5): player1_wins = 0 player2_wins = 0 for i in range(n_games): self.engine.restart(self.max_moves, config1, config2) winner = self.engine.run() if winner == 1: player1_wins += 1 elif winner == 2: player2_wins += 1 if player1_wins > player2_wins: return 1 elif player1_wins < player2_wins: return 2 else: return 0 def playoff_helper(self, param_lst, num): this_round = param_lst next_round = [] i = 1 while len(this_round) > num: print("round {}, {} configurations left".format( i, len(this_round))) i += 1 random.shuffle(this_round) for i in range(int(len(this_round) / 2)): config1 = param_lst[i] config2 = param_lst[i + 1] # do a playoff and the winner goes to the next round winner = self.play_against(config1, config2) if winner == 1: next_round.append(config1) elif winner == 2: next_round.append(config2) else: #in the event of a tie, play again with twice the number of #moves to try to determine the winner self.engine.restart(2 * self.max_moves, config1, config2) winner = self.engine.run() if winner == 1: next_round.append(config1) elif winner == 2: next_round.append(config2) else: #double tie indicates the configs are about the same #so its safe to just randomly pick one to move on to #the next round if random.random() < 0.5: next_round.append(config1) else: next_round.append(config2) if (len(this_round) % 2 != 0): next_round.append(this_round[-1]) this_round = next_round next_round = [] return this_round def playoff(self, num=1): print('finding the best ai configurations') best_ai = self.playoff_helper(self.ai_configs, num) print('finding the best start state') best_start = self.playoff_helper(self.start_configs, num) return best_ai, best_start
class LSStartStateTrainer: def __init__(self, start=None, n_games=10, start_temp=1024, factor=2): self.start = start if start == None: self.start = get_random_start() self.delta = 0.00001 self.n_games = n_games #make both of them random, but the simpel start state self.player_config = { Settings.AI.value: True, #should this be Ai or person Settings.START_TYPE.value: StartType.SIMPLE.value, #what kind of start state Settings.START_PARAMS.value: [], #any parameters for the stater Settings.SEARCH_TYPE.value: SearchType.RANDOM.value, #what kind of search is happening Settings.SEARCH_PARAMS.value: [], #any parameters for the search Settings.AI_TYPE.value: AIType.NONE.value, # what is the AI Settings.AI_PARAMS.value: [] #any params for the ai } self.opponent_config = { Settings.AI.value: True, #should this be Ai or person Settings.START_TYPE.value: StartType.CHAMPION.value, #what kind of start state Settings.START_PARAMS.value: [], #any parameters for the stater Settings.SEARCH_TYPE.value: SearchType.RANDOM.value, #what kind of search is happening Settings.SEARCH_PARAMS.value: [], #any parameters for the search Settings.AI_TYPE.value: AIType.NONE.value, # what is the AI Settings.AI_PARAMS.value: [] #any params for the ai } self.engine = Engine(1000) self.best, stats = self.simulated_annealing(start_temp, factor) print(self.best) f = open("train/simulated_annealing_results.txt", "a") t = open("train/simulated_annealing_factors.txt", "a") s = open("train/simuated_annealing_start_state.txt", "a") g = open("train/simulated_annealing_stats.txt", "a") f.write("{}\n".format(self.best)) t.write("{}\n".format([start_temp, factor])) s.write("{}\n".format(self.start)) g.write("{}\n".format(stats)) g.close() s.close() t.close() f.close() def evaluate(self, last_start, next_start): wins = 0 self.player_config[Settings.START_PARAMS.value] = next_start self.opponent_config[Settings.START_PARAMS.value] = last_start for i in range(self.n_games): self.engine.restart(1000, self.player_config, self.opponent_config) res = self.engine.run() if res == 0: wins += 0.5 else: wins += (res == 1) print(float(self.n_games - wins) / self.n_games) return float(self.n_games - wins) / self.n_games def get_random_next(self, last_start): i = int(random.random() * len(last_start)) j = i while j == i: j = random.random() * len(last_start) next_start = copy.copy(last_start) temp = next_start[i] next_start[i] = next_start[j] next_start[j] = temp return next_start def get_similar_random_next(self, last_start): #want to swap two neighboring pieces s1 = int(random.random() * len(last_start)) i1, j1 = s1 / 10, s1 % 10 s2 = s1 valid = False while (not valid): n = int(random.random() * 8) count = 0 for i2 in range(i1 - 1, i1 + 2): if valid: break for j2 in range(j1 - 1, j1 + 2): if valid: break if (i1 == i2 and j1 == j2): continue s2 = 10 * i2 + j2 if (count == n): if (not s2 < 0) and (not s2 >= len(last_start)): #print(s2) valid = True count += 1 next_start = copy.copy(last_start) #print("s1 : {}".format(s1)) #print("s2 : {}".format(s2)) temp = next_start[s1] next_start[s1] = next_start[s2] next_start[s2] = temp return next_start def simulated_annealing(self, start_temp, factor): cur = self.start temp = start_temp stats = [] while True: if temp <= self.delta: return cur, stats next_start = self.get_similar_random_next(cur) e = self.evaluate(cur, next_start) stats.append(e) if (e > 0.5): cur = next_start else: should_use_anyways = random.random() p = np.exp((e - 0.5) / temp) if (should_use_anyways < p): cur = next_start temp = temp / factor