Ejemplo n.º 1
0
class GBRTTrainer(): #trains using Bayesian global optimization with GBRT against random play
    def __init__(self,max_moves,ai_config,opponent_config,param_ranges,n_iter,n_games,init_params,n_batches = 1):
        self.ai_config = ai_config
        self.opponent_config = opponent_config
        self.param_ranges = param_ranges
        self.n_iter = n_iter
        self.n_games = n_games
        self.max_moves = max_moves
        self.engine = Engine(self.max_moves,self.ai_config, self.opponent_config)
        self.params = init_params
        self.n_batches = n_batches
        self.score_history = []
        self.the_good_ones = []

    def objective(self,params):
        self.ai_config[Settings.AI_PARAMS.value] = params
        wins = 0
        for i in range(self.n_games):
            self.engine.restart(self.max_moves,self.ai_config,self.opponent_config)
            winner = self.engine.run()
            if winner == 1:
                wins += 1.0
            elif winner == 0:
                wins += 0.5
        print wins / self.n_games
        self.score_history.append(wins / self.n_games)
        if(wins / self.n_games) >= .99:
            #print(params)
            self.the_good_ones.append(params)
        #return 1 - (wins / self.n_games)
        return - self.engine.get_value(1, 100, -1, 500)
        #res = 0
        #for i in range(self.n_games):
        #    self.engine.restart(self.max_moves,self.ai_config,self.opponent_config)
        #    self.engine.run()
        #    val = self.engine.get_value(1,100,-1,500)
        #    res += val
        #print res
        #self.score_history.append(res)
        #return -res


    def train(self):
        opt = Optimizer(self.param_ranges,"GP")
        r = opt.run(self.objective,n_iter=self.n_iter)
        self.params = r.x
        #print("trained paramaters are: ")
        #print(r.x)

    def objective_batchwise(self,params):
        full_params = self.params[:self.training_param_range[0]] + params + self.params[self.training_param_range[1]:]
        self.ai_config[Settings.AI_PARAMS.value] = full_params
        wins = 0
        for i in range(self.n_games):
            self.engine.restart(self.max_moves,self.ai_config,self.opponent_config)
            self.engine.run()
            val = self.engine.get_value(1,100,-1,500)
            res += val
        print res
        return -res

    def train_batchwise(self):
        opt = Optimizer(self.param_ranges,"GP")
        for batch in range(self.n_batches):
            print("starting a batch")
            for start in range(10):
                print("starting a round of optimization")
                self.training_param_range = [start*10, start*10 + 10]
                opt = Optimizer(self.param_ranges[start*10:start*10 + 10],"GP")
                r = opt.run(self.objective,n_iter=self.n_iter)
                self.params[start*10:start*10 + 10] = r.x

    def evaluate(self):
        wins = 0.0
        for i in range(self.n_games):
            self.engine.restart(self.max_moves,self.ai_config,self.opponent_config)
            winner = self.engine.run()
            if winner == 1:
                wins += 1.0
            elif winner == 0:
                wins += 0.5
            print winner
        return wins / (self.n_games)
Ejemplo n.º 2
0
class Tournement:
    def __init__(self, config, max_moves):
        self.ai_configs = []
        for filename in config[AI_FIELD][AIType.REACHABLE.value]:
            self.ai_configs += read_file_into_configs(filename,
                                                      reachable_config)
        for filename in config[AI_FIELD][AIType.MODIFIED_REACHABLE.value]:
            self.ai_configs += read_file_into_configs(filename,
                                                      mod_reachable_config)
        for filename in config[AI_FIELD][AIType.PIECE_BASED_ADD.value]:
            self.ai_configs += read_file_into_configs(filename,
                                                      piecebased_config_add)
        for filename in config[AI_FIELD][AIType.PIECE_BASED_MUL.value]:
            self.ai_configs += read_file_into_configs(filename,
                                                      piecebased_config_mul)

        self.start_configs = []

        for filename in config[START_FIELD]:
            self.start_configs += read_file_into_configs(
                filename, flexible_start_config)

        self.engine = Engine(max_moves, None, None)

        self.max_moves = max_moves

    def play_against(self, config1, config2, n_games=5):
        player1_wins = 0
        player2_wins = 0
        for i in range(n_games):
            self.engine.restart(self.max_moves, config1, config2)
            winner = self.engine.run()
            if winner == 1:
                player1_wins += 1
            elif winner == 2:
                player2_wins += 1

        if player1_wins > player2_wins:
            return 1
        elif player1_wins < player2_wins:
            return 2
        else:
            return 0

    def playoff_helper(self, param_lst, num):
        this_round = param_lst
        next_round = []

        i = 1
        while len(this_round) > num:
            print("round {}, {} configurations left".format(
                i, len(this_round)))
            i += 1
            random.shuffle(this_round)
            for i in range(int(len(this_round) / 2)):
                config1 = param_lst[i]
                config2 = param_lst[i + 1]
                # do a playoff and the winner goes to the next round
                winner = self.play_against(config1, config2)
                if winner == 1:
                    next_round.append(config1)
                elif winner == 2:
                    next_round.append(config2)
                else:
                    #in the event of a tie, play again with twice the number of
                    #moves to try to determine the winner
                    self.engine.restart(2 * self.max_moves, config1, config2)
                    winner = self.engine.run()
                    if winner == 1:
                        next_round.append(config1)
                    elif winner == 2:
                        next_round.append(config2)
                    else:
                        #double tie indicates the configs are about the same
                        #so its safe to just randomly pick one to move on to
                        #the next round
                        if random.random() < 0.5:
                            next_round.append(config1)
                        else:
                            next_round.append(config2)
            if (len(this_round) % 2 != 0):
                next_round.append(this_round[-1])

            this_round = next_round
            next_round = []

        return this_round

    def playoff(self, num=1):
        print('finding the best ai configurations')
        best_ai = self.playoff_helper(self.ai_configs, num)
        print('finding the best start state')
        best_start = self.playoff_helper(self.start_configs, num)
        return best_ai, best_start
Ejemplo n.º 3
0
class LSStartStateTrainer:
    def __init__(self, start=None, n_games=10, start_temp=1024, factor=2):
        self.start = start
        if start == None:
            self.start = get_random_start()

        self.delta = 0.00001
        self.n_games = n_games
        #make both of them random, but the simpel start state
        self.player_config = {
            Settings.AI.value: True,  #should this be Ai or person
            Settings.START_TYPE.value:
            StartType.SIMPLE.value,  #what kind of start state
            Settings.START_PARAMS.value: [],  #any parameters for the stater
            Settings.SEARCH_TYPE.value:
            SearchType.RANDOM.value,  #what kind of search is happening
            Settings.SEARCH_PARAMS.value: [],  #any parameters for the search
            Settings.AI_TYPE.value: AIType.NONE.value,  # what is the AI
            Settings.AI_PARAMS.value: []  #any params for the ai
        }
        self.opponent_config = {
            Settings.AI.value: True,  #should this be Ai or person
            Settings.START_TYPE.value:
            StartType.CHAMPION.value,  #what kind of start state
            Settings.START_PARAMS.value: [],  #any parameters for the stater
            Settings.SEARCH_TYPE.value:
            SearchType.RANDOM.value,  #what kind of search is happening
            Settings.SEARCH_PARAMS.value: [],  #any parameters for the search
            Settings.AI_TYPE.value: AIType.NONE.value,  # what is the AI
            Settings.AI_PARAMS.value: []  #any params for the ai
        }

        self.engine = Engine(1000)
        self.best, stats = self.simulated_annealing(start_temp, factor)
        print(self.best)
        f = open("train/simulated_annealing_results.txt", "a")
        t = open("train/simulated_annealing_factors.txt", "a")
        s = open("train/simuated_annealing_start_state.txt", "a")
        g = open("train/simulated_annealing_stats.txt", "a")
        f.write("{}\n".format(self.best))
        t.write("{}\n".format([start_temp, factor]))
        s.write("{}\n".format(self.start))
        g.write("{}\n".format(stats))
        g.close()
        s.close()
        t.close()
        f.close()

    def evaluate(self, last_start, next_start):
        wins = 0
        self.player_config[Settings.START_PARAMS.value] = next_start
        self.opponent_config[Settings.START_PARAMS.value] = last_start

        for i in range(self.n_games):
            self.engine.restart(1000, self.player_config, self.opponent_config)
            res = self.engine.run()
            if res == 0:
                wins += 0.5
            else:
                wins += (res == 1)

        print(float(self.n_games - wins) / self.n_games)
        return float(self.n_games - wins) / self.n_games

    def get_random_next(self, last_start):
        i = int(random.random() * len(last_start))
        j = i
        while j == i:
            j = random.random() * len(last_start)

        next_start = copy.copy(last_start)
        temp = next_start[i]
        next_start[i] = next_start[j]
        next_start[j] = temp
        return next_start

    def get_similar_random_next(self, last_start):
        #want to swap two neighboring pieces
        s1 = int(random.random() * len(last_start))
        i1, j1 = s1 / 10, s1 % 10
        s2 = s1

        valid = False
        while (not valid):
            n = int(random.random() * 8)
            count = 0
            for i2 in range(i1 - 1, i1 + 2):
                if valid: break
                for j2 in range(j1 - 1, j1 + 2):
                    if valid: break
                    if (i1 == i2 and j1 == j2): continue
                    s2 = 10 * i2 + j2
                    if (count == n):
                        if (not s2 < 0) and (not s2 >= len(last_start)):
                            #print(s2)
                            valid = True

                    count += 1

        next_start = copy.copy(last_start)

        #print("s1 : {}".format(s1))
        #print("s2 : {}".format(s2))

        temp = next_start[s1]
        next_start[s1] = next_start[s2]
        next_start[s2] = temp
        return next_start

    def simulated_annealing(self, start_temp, factor):
        cur = self.start
        temp = start_temp
        stats = []
        while True:
            if temp <= self.delta: return cur, stats
            next_start = self.get_similar_random_next(cur)
            e = self.evaluate(cur, next_start)
            stats.append(e)
            if (e > 0.5):
                cur = next_start
            else:
                should_use_anyways = random.random()
                p = np.exp((e - 0.5) / temp)
                if (should_use_anyways < p):
                    cur = next_start
            temp = temp / factor