Exemple #1
0
 def get_move_list(self, current_game, mode='avg'):
     if self.model is None:
         return
     num_each_dir = 10
     values = {}
     if mode == "minmax":
         calcs = []
         for dir in list(Direction):
             if not current_game.direction_legal[dir]:
                 continue
             values[dir] = 2.0
             for _ in range(num_each_dir):
                 game = Game(current_game)
                 game.move(dir)
                 array = game.to_logarray()
                 calcs.append(array)
         poss_arrays = np.vstack(calcs)
         vals = self.model.predict(poss_arrays)
         index = 0
         for dir in list(Direction):
             if dir not in values.keys():
                 continue
             for _ in range(num_each_dir):
                 values[dir] = min(values[dir], vals[index][0])
                 index += 1
     elif mode == "avg":
         calcs = []
         """
         for dir in list(Direction):
             if not current_game.direction_legal[dir]:
                 continue
             values[dir] = 0.0
             for _ in range(num_each_dir):
                 game = Game(current_game)
                 game.move(dir)
                 array = game.to_logarray()
                 calcs.append(array)
         """
         states = current_game.get_states_each_dir(num_each_dir)
         for dir in list(Direction):
             if not current_game.direction_legal[dir]:
                 continue
             values[dir] = 0.0
             calcs.extend(states[dir])
         poss_arrays = np.vstack(calcs)
         vals = self.model.predict(poss_arrays)
         index = 0
         for dir in list(Direction):
             if dir not in values.keys():
                 continue
             for _ in range(num_each_dir):
                 values[dir] += vals[index][0]
                 index += 1
     sorted_dirs = sorted(values.keys(), key=lambda x: values[x])
     return sorted_dirs
Exemple #2
0
def play_a_game(total_num=100):
    vl = ValueLearner()
    vl.load_latest()
    avg = 0
    states = []
    for i in range(total_num):
        game = Game()
        num = 0
        while not game.end:
            states.append(game.to_array())
            game.move(vl.move(game))
            num += 1
        print num
        avg += num
    Application2(states).mainloop()
    print float(avg) / total_num
Exemple #3
0
 def train_on_played_games_lowest_2(self, histories, sums_back=10):
     histories = sorted(histories, key=lambda x: pow2_sum(x[-1]))
     num_hist = 10  #int(len(histories)*0.5)
     max_game = pow2_sum(histories[num_hist][-1])
     min_game = pow2_sum(histories[0][-1]) - sums_back
     act_arrays = {}
     for i in range(min_game, max_game, 2):
         act_arrays[i] = []
     for hist in histories:
         max_sum = pow2_sum(hist[-1])
         index = 0
         while 1:
             if index >= len(hist):
                 break
             vec = hist[index]
             sum = pow2_sum(vec)
             index += 1
             if sum < min_game:
                 continue
             if sum >= max_game:
                 break
             act_arrays[sum].append((vec, max_sum - sum))
     vecs, vals = [], []
     # normalize val
     for sum in sorted(act_arrays.keys()):
         if len(act_arrays[sum]) == 0:
             continue
         max_val = max(act_arrays[sum], key=lambda x: x[1])[1]
         if max_val == 0:
             continue
         for pair in act_arrays[sum]:
             game = Game(board=list(pair[0]))
             game_val = float(pair[1]) / max_val
             # apply some function:
             #cut_num = 100
             #if pair[1] > cut_num:
             #    game_val = 1.0
             #else:
             #    game_val = float(pair[1]) / cut_num
             #rangel = 0.9
             #game_val = (1-rangel)/2 + rangel * game_val
             for sym in game.symmetries():
                 vecs.append(sym.to_logarray())
                 vals.append(game_val)
     return vecs, vals
Exemple #4
0
    def train_on_played_games_lowest(self, histories, min_game, params):
        sums_back = params["sums_back"]
        sums_back_from = params["sums_back_from"]
        ###
        min_game -= sums_back_from
        act_arrays = {}
        if min_game < 2 * sums_back:
            sums_back = min_game / 2
        for i in range(sums_back + 1):
            act_arrays[min_game - i * 2] = []
        for hist in histories:
            max_sum = pow2_sum(hist[-1])
            index = 0
            while 1:
                vec = hist[index]
                sum = pow2_sum(vec)
                index += 1
                if sum < min_game - 2 * sums_back:
                    continue
                if sum >= min_game:
                    break
                act_arrays[sum].append((vec, max_sum - sum))
        # normalize vals
        numrange = []
        vecs, vals = [], []
        for sum in sorted(act_arrays.keys()):
            if len(act_arrays[sum]) == 0:
                continue
            numrange.append(sum)
            self.vecs[sum] = []
            self.vals[sum] = []
            max_val = max(act_arrays[sum], key=lambda x: x[1])[1]
            for pair in act_arrays[sum]:
                game = Game(board=list(pair[0]))
                game_val = float(pair[1]) / max_val
                for sym in game.symmetries():
                    self.vecs[sum].append(sym.to_logarray())
                    vecs.append(sym.to_logarray())
                    self.vals[sum].append(game_val)
                    vals.append(game_val)

        # add symetries???
        self.train_on_data_lowest(params, numrange)
        return vecs, vals
Exemple #5
0
 def train_on_played_games(self, histories, sums_back=100, num_hist=5):
     histories = sorted(histories, key=lambda x: pow2_sum(x[-1]))
     max_game = pow2_sum(histories[num_hist][-1])
     min_game = pow2_sum(histories[0][-1]) - sums_back
     act_arrays = {}
     for i in range(min_game, max_game, 2):
         act_arrays[i] = []
     for hist in histories:
         max_sum = pow2_sum(hist[-1])
         index = 0
         while 1:
             if index >= len(hist):
                 break
             vec = hist[index]
             sum = pow2_sum(vec)
             index += 1
             if sum < min_game:
                 continue
             if sum >= max_game:
                 break
             act_arrays[sum].append((vec, max_sum - sum))
     vecs, vals = [[]], [[]]
     current_index = 0
     for sum in sorted(act_arrays.keys()):
         while self.ranges[current_index] < sum:
             vecs.append([])
             vals.append([])
             current_index += 1
         if len(act_arrays[sum]) == 0:
             continue
         max_val = max(act_arrays[sum], key=lambda x: x[1])[1]
         if max_val == 0:
             continue
         for pair in act_arrays[sum]:
             game = Game(board=list(pair[0]))
             game_val = float(pair[1]) / max_val
             for sym in game.symmetries():
                 vecs[-1].append(sym.to_logarray())
                 vals[-1].append(game_val)
     return vecs, vals
Exemple #6
0
def random_EV(sum, num_games, shuffles=5, num_random_games=10):
    logarrays = []
    expvals = []
    for i1 in range(num_games):
        board = game_sort(random_game_board(sum))
        for i2 in range(shuffles):
            b = copy.deepcopy(board)
            random.shuffle(b)
            game = Game(board=b)
            syms = game.symmetries()
            ev = 0.0
            for g in syms:
                ev += play_random_games(g, num_random_games)
                logarrays.append(g.to_logarray())
            expvals.append(ev / 8)
        game = Game(board=board)
        syms = game.symmetries()
        ev = 0.0
        for g in syms:
            ev += play_random_games(g, num_random_games)
            logarrays.append(g.to_logarray())
        expvals.append(ev / 8)
    minval = min(expvals)
    maxval = max(expvals)
    dif = maxval - minval
    if dif == 0.0:
        return [], []
    evs = []
    for ev in expvals:
        for i3 in range(8):
            evs.append((ev - minval) / dif)
    return logarrays, evs
Exemple #7
0
 def play_games(self, total_num):
     min_game = 999999999
     avg = 0
     histories = []
     for i in range(total_num):
         history = []
         game = Game()
         while not game.end:
             history.append(game.to_logarray())
             game.move(self.move(game))
         history.append(game.to_logarray())
         min_game = min(min_game, np.sum(game.board))
         avg += len(history)
         histories.append(history)
     self.log(min_game, float(avg) / total_num)
     return histories, min_game
Exemple #8
0
 def play_games(self, total_num):
     avg = 0
     histories = []
     for i in range(total_num):
         history = []
         game = Game()
         current_sum = np.sum(game.board)
         model_index = 0
         while not game.end:
             history.append(game.to_logarray())
             dir = self.get_move_list_exhaustive(game, model_index)[-1]
             game.move(dir)
             current_sum = np.sum(game.board)
             if current_sum > self.ranges[model_index]:
                 model_index += 1
         history.append(game.to_logarray())
         avg += len(history)
         histories.append(history)
     self.log(float(avg) / total_num)
     return histories
Exemple #9
0
def make_eval(qlnode, model):
    vecs, ids = qlnode.get_game_vectors()
    if len(vecs) != 0:
        matrix = np.vstack(vecs)
        values = model.predict(matrix)
        #print len(vecs), time.clock()-tim, (time.clock()-tim)/len(vecs)
        for i in range(len(values)):
            ids[i].value = values[i][0]
    #print "-----"
    qlnode.evaluate()
    #print len(vecs), time.clock()-tim, (time.clock()-tim)/len(vecs)


if __name__ == '__main__':
    QLNode(Game())














Exemple #10
0
# ui objects
from interface import UI, UIElement, Padding, Margin
from pygame_gui.elements import UILabel, UIButton, UITextEntryLine
# handlers
from interface import exit_button_handler, \
    change_interface, quit_from_game, load_game, exit_event_handler
# game objects
from interface import Game

game = Game()
# main menu
game.add_interface('main',
                   interface=UI(elements=[
                       UIElement(UILabel, 300, 50, text='SPACESHIPS'),
                       UIElement(UIButton,
                                 200,
                                 75,
                                 text='PLAY',
                                 ui_id='play-button'),
                       UIElement(UIButton,
                                 200,
                                 75,
                                 text='EXIT',
                                 ui_id='exit-button')
                   ],
                                hor_layout=UI.Center,
                                ver_layout=UI.Top,
                                padding=Padding.all(200),
                                margin=Margin.only(bottom=30)))
# main menu buttons
game.add_handler(
Exemple #11
0
def controller(strategies,
               grid_size,
               candy_ratio=1.,
               max_iter=None,
               verbose=0,
               gui_active=False,
               game_speed=None):
    # Pygame Init
    pygame.init()

    clock = pygame.time.Clock()
    if gui_active:
        gui_options = gui.Options()
        win = gui.Window(grid_size, 'Multiplayer Snake', gui_options)
        quit_game = False

    # Start Game
    game = Game(grid_size,
                len(strategies),
                candy_ratio=candy_ratio,
                max_iter=max_iter)
    # state = game.startState()
    state = game.start(strategies)
    prev_human_action = None
    game_over = False

    agent_names = [a.name for a in strategies]

    if "human" in agent_names:
        waiting = True
        while waiting:
            for event in pygame.event.get():
                if event.type == pygame.KEYDOWN:
                    waiting = False
                    break

    i_human = None
    if "human" in agent_names:
        i_human = agent_names.index("human")

    while not ((gui_active and quit_game) or ((not gui_active) and game_over)):
        # Print state
        if verbose > 0:
            state.printGrid(game.grid_size)
        # Get events
        if gui_active:
            events = pygame.event.get()
            if pygame.QUIT in [ev.type for ev in events]:
                quit_game = True
                continue

        # Compute the actions for each player following its strategy (except human)
        actions = game.agentActions()

        # Compute human strategy if necessary
        human_action = None
        if i_human is not None:
            speed = 2. if pygame.K_SPACE in [
                ev.key for ev in events if ev.type == pygame.KEYDOWN
            ] else 1.
            arrow_key = False
            for event in events:
                if event.type == pygame.KEYDOWN:
                    if event.key == pygame.K_LEFT:
                        human_action = move.Move((-1, 0), speed)
                        arrow_key = True
                    if event.key == pygame.K_RIGHT:
                        human_action = move.Move((1, 0), speed)
                        arrow_key = True
                    if event.key == pygame.K_UP:
                        human_action = move.Move((0, -1), speed)
                        arrow_key = True
                    if event.key == pygame.K_DOWN:
                        human_action = move.Move((0, 1), speed)
                        arrow_key = True

            if not arrow_key and prev_human_action is None:
                human_action = move.Move((0, -1), speed)
            elif not arrow_key:
                human_action = prev_human_action

        # Assign human action
        if i_human is not None and i_human in list(actions.keys()):
            actions[i_human] = human_action
            prev_human_action = human_action

        if verbose > 1:
            print(state)
            print(actions)

        # Update the state
        if not game_over:
            state = game.succ(state, actions, copy=False)
        # Pause
        if game_speed:
            clock.tick(game_speed)

        # Check if game over
        game_over = game.isEnd(state)
        # if game_over:
        # win.print_message('GAME OVER')

        # Update gui
        if gui_active:
            win.updateSprites(state)
            win.refresh()

    if verbose > 0:
        state.printGrid(game.grid_size)

    return state
Exemple #12
0
 def train_random_moves(self,
                        min_length=300,
                        eps=0.03,
                        max_active_games=30):
     branches = []
     histories = [[]]
     active_games = []
     num_games = 0
     eval = 0
     while len(histories[0]) < min_length or eval < 0.3:
         histories[0] = []
         active_games = []
         game = Game()
         for _ in range(min_length):
             game.move(self.move(game))
             histories[0].append(game.to_logarray())
             if game.end:
                 break
         active_games.append((game, 0))
         eval = self.eval(game)
     #print self.eval(game)
     while len(active_games) > 0:
         new_games = []
         for game, index in active_games:
             move_list = self.get_move_list(game)
             rand = random.random()
             if rand < eps and len(move_list) > 1 and len(
                     active_games) < max_active_games:
                 new_node = Game(game)
                 num_games += 1
                 new_games.append((new_node, num_games))
                 histories.append(copy.deepcopy(histories[index]))
                 branches.append(np.sum(game.board))
                 game.move(move_list[-1])
                 histories[index].append(game.to_logarray())
                 move = 2  #random.randint(2, min(len(move_list), 3))
                 new_node.move(move_list[-move])
                 histories[num_games].append(new_node.to_logarray())
             else:
                 game.move(move_list[-1])
                 histories[index].append(game.to_logarray())
         active_games.extend(new_games)
         active_games = [pair for pair in active_games if not pair[0].end]
     print len(histories)
     histl = max(histories, key=lambda x: len(x))
     print len(histl), len(histories[0])
     if len(histl) > 1500:
         Application2(histl).mainloop()
     return histories, branches
Exemple #13
0
 def play_games_2(self, total_num):
     min_game = 999999999
     avg = 0
     histories = []
     seconds = []
     thirds = []
     for i in range(total_num):
         history = []
         game = Game()
         while not game.end:
             history.append(game.to_logarray())
             moves = self.get_move_list(game)
             if len(moves) > 1:
                 newgame = Game(game)
                 newgame.move(moves[-2])
                 if not newgame.end:
                     seconds.append(newgame)
             if len(moves) > 2:
                 newgame = Game(game)
                 newgame.move(moves[-3])
                 if not newgame.end:
                     seconds.append(newgame)
             game.move(moves[-1])
         history.append(game.to_logarray())
         min_game = min(min_game, np.sum(game.board))
         avg += len(history)
         histories.append(history)
     self.log(min_game, float(avg) / total_num)
     return histories, min_game, seconds, thirds
Exemple #14
0
# Create a universal Q-Table
universalQTable = dict()
gamesToPlay = 50000
radiiOfShooting = np.arange(0, 3, 0.5)
# allShotsTaken = np.zeros(gamesToPlay)
initialEpsilon = 0.9

for radiusOfShooting in radiiOfShooting:
    tic = time.perf_counter()
    for i in range(gamesToPlay):
        currentEpsilon = calculateEpsilon(initialEpsilon, i)
        print("Epsilon = {:0.4f}, Playing Game #{}".format(currentEpsilon, i),
              end="\r")
        currentGame = Game(qTable=universalQTable,
                           radiusOfShooting=radiusOfShooting,
                           epsilon=currentEpsilon)
        currentGame.playGame()
        universalQTable = currentGame.getQTable()
        # If you want to know how many shots were taken uncomment the following line
        # allShotsTaken[i] = currentGame.getShotsTaken()

    toc = time.perf_counter()
    print("It took {:0.2f}s to run {} simulations with radius {:0.2f}!".format(
        (toc - tic), gamesToPlay, radiusOfShooting))

    # Export the Q-Table to a pickle
    fileName = "rawTable_{:0.2f}.p".format(radiusOfShooting)
    fullPath = os.path.join("./rawTables", fileName)
    with open(fullPath, "wb") as filePointer:
        pickle.dump(universalQTable, filePointer)
Exemple #15
0
import pickle
from interface import Game, QPlayer

epsilon = 0.9
player1 = QPlayer(mark="X", epsilon=epsilon)
player2 = QPlayer(mark="O", epsilon=epsilon)
game = Game(player1, player2, True)

N_episodes = 20000
for episodes in range(N_episodes):
    game.start()
    game.reset()

Q = game.Q

filename = "Q_training_Nepisodes_{}.p".format(N_episodes)
pickle.dump(Q, open(filename, "wb"))