def get_move_list(self, current_game, mode='avg'): if self.model is None: return num_each_dir = 10 values = {} if mode == "minmax": calcs = [] for dir in list(Direction): if not current_game.direction_legal[dir]: continue values[dir] = 2.0 for _ in range(num_each_dir): game = Game(current_game) game.move(dir) array = game.to_logarray() calcs.append(array) poss_arrays = np.vstack(calcs) vals = self.model.predict(poss_arrays) index = 0 for dir in list(Direction): if dir not in values.keys(): continue for _ in range(num_each_dir): values[dir] = min(values[dir], vals[index][0]) index += 1 elif mode == "avg": calcs = [] """ for dir in list(Direction): if not current_game.direction_legal[dir]: continue values[dir] = 0.0 for _ in range(num_each_dir): game = Game(current_game) game.move(dir) array = game.to_logarray() calcs.append(array) """ states = current_game.get_states_each_dir(num_each_dir) for dir in list(Direction): if not current_game.direction_legal[dir]: continue values[dir] = 0.0 calcs.extend(states[dir]) poss_arrays = np.vstack(calcs) vals = self.model.predict(poss_arrays) index = 0 for dir in list(Direction): if dir not in values.keys(): continue for _ in range(num_each_dir): values[dir] += vals[index][0] index += 1 sorted_dirs = sorted(values.keys(), key=lambda x: values[x]) return sorted_dirs
def play_a_game(total_num=100): vl = ValueLearner() vl.load_latest() avg = 0 states = [] for i in range(total_num): game = Game() num = 0 while not game.end: states.append(game.to_array()) game.move(vl.move(game)) num += 1 print num avg += num Application2(states).mainloop() print float(avg) / total_num
def train_on_played_games_lowest_2(self, histories, sums_back=10): histories = sorted(histories, key=lambda x: pow2_sum(x[-1])) num_hist = 10 #int(len(histories)*0.5) max_game = pow2_sum(histories[num_hist][-1]) min_game = pow2_sum(histories[0][-1]) - sums_back act_arrays = {} for i in range(min_game, max_game, 2): act_arrays[i] = [] for hist in histories: max_sum = pow2_sum(hist[-1]) index = 0 while 1: if index >= len(hist): break vec = hist[index] sum = pow2_sum(vec) index += 1 if sum < min_game: continue if sum >= max_game: break act_arrays[sum].append((vec, max_sum - sum)) vecs, vals = [], [] # normalize val for sum in sorted(act_arrays.keys()): if len(act_arrays[sum]) == 0: continue max_val = max(act_arrays[sum], key=lambda x: x[1])[1] if max_val == 0: continue for pair in act_arrays[sum]: game = Game(board=list(pair[0])) game_val = float(pair[1]) / max_val # apply some function: #cut_num = 100 #if pair[1] > cut_num: # game_val = 1.0 #else: # game_val = float(pair[1]) / cut_num #rangel = 0.9 #game_val = (1-rangel)/2 + rangel * game_val for sym in game.symmetries(): vecs.append(sym.to_logarray()) vals.append(game_val) return vecs, vals
def train_on_played_games_lowest(self, histories, min_game, params): sums_back = params["sums_back"] sums_back_from = params["sums_back_from"] ### min_game -= sums_back_from act_arrays = {} if min_game < 2 * sums_back: sums_back = min_game / 2 for i in range(sums_back + 1): act_arrays[min_game - i * 2] = [] for hist in histories: max_sum = pow2_sum(hist[-1]) index = 0 while 1: vec = hist[index] sum = pow2_sum(vec) index += 1 if sum < min_game - 2 * sums_back: continue if sum >= min_game: break act_arrays[sum].append((vec, max_sum - sum)) # normalize vals numrange = [] vecs, vals = [], [] for sum in sorted(act_arrays.keys()): if len(act_arrays[sum]) == 0: continue numrange.append(sum) self.vecs[sum] = [] self.vals[sum] = [] max_val = max(act_arrays[sum], key=lambda x: x[1])[1] for pair in act_arrays[sum]: game = Game(board=list(pair[0])) game_val = float(pair[1]) / max_val for sym in game.symmetries(): self.vecs[sum].append(sym.to_logarray()) vecs.append(sym.to_logarray()) self.vals[sum].append(game_val) vals.append(game_val) # add symetries??? self.train_on_data_lowest(params, numrange) return vecs, vals
def train_on_played_games(self, histories, sums_back=100, num_hist=5): histories = sorted(histories, key=lambda x: pow2_sum(x[-1])) max_game = pow2_sum(histories[num_hist][-1]) min_game = pow2_sum(histories[0][-1]) - sums_back act_arrays = {} for i in range(min_game, max_game, 2): act_arrays[i] = [] for hist in histories: max_sum = pow2_sum(hist[-1]) index = 0 while 1: if index >= len(hist): break vec = hist[index] sum = pow2_sum(vec) index += 1 if sum < min_game: continue if sum >= max_game: break act_arrays[sum].append((vec, max_sum - sum)) vecs, vals = [[]], [[]] current_index = 0 for sum in sorted(act_arrays.keys()): while self.ranges[current_index] < sum: vecs.append([]) vals.append([]) current_index += 1 if len(act_arrays[sum]) == 0: continue max_val = max(act_arrays[sum], key=lambda x: x[1])[1] if max_val == 0: continue for pair in act_arrays[sum]: game = Game(board=list(pair[0])) game_val = float(pair[1]) / max_val for sym in game.symmetries(): vecs[-1].append(sym.to_logarray()) vals[-1].append(game_val) return vecs, vals
def random_EV(sum, num_games, shuffles=5, num_random_games=10): logarrays = [] expvals = [] for i1 in range(num_games): board = game_sort(random_game_board(sum)) for i2 in range(shuffles): b = copy.deepcopy(board) random.shuffle(b) game = Game(board=b) syms = game.symmetries() ev = 0.0 for g in syms: ev += play_random_games(g, num_random_games) logarrays.append(g.to_logarray()) expvals.append(ev / 8) game = Game(board=board) syms = game.symmetries() ev = 0.0 for g in syms: ev += play_random_games(g, num_random_games) logarrays.append(g.to_logarray()) expvals.append(ev / 8) minval = min(expvals) maxval = max(expvals) dif = maxval - minval if dif == 0.0: return [], [] evs = [] for ev in expvals: for i3 in range(8): evs.append((ev - minval) / dif) return logarrays, evs
def play_games(self, total_num): min_game = 999999999 avg = 0 histories = [] for i in range(total_num): history = [] game = Game() while not game.end: history.append(game.to_logarray()) game.move(self.move(game)) history.append(game.to_logarray()) min_game = min(min_game, np.sum(game.board)) avg += len(history) histories.append(history) self.log(min_game, float(avg) / total_num) return histories, min_game
def play_games(self, total_num): avg = 0 histories = [] for i in range(total_num): history = [] game = Game() current_sum = np.sum(game.board) model_index = 0 while not game.end: history.append(game.to_logarray()) dir = self.get_move_list_exhaustive(game, model_index)[-1] game.move(dir) current_sum = np.sum(game.board) if current_sum > self.ranges[model_index]: model_index += 1 history.append(game.to_logarray()) avg += len(history) histories.append(history) self.log(float(avg) / total_num) return histories
def make_eval(qlnode, model): vecs, ids = qlnode.get_game_vectors() if len(vecs) != 0: matrix = np.vstack(vecs) values = model.predict(matrix) #print len(vecs), time.clock()-tim, (time.clock()-tim)/len(vecs) for i in range(len(values)): ids[i].value = values[i][0] #print "-----" qlnode.evaluate() #print len(vecs), time.clock()-tim, (time.clock()-tim)/len(vecs) if __name__ == '__main__': QLNode(Game())
# ui objects from interface import UI, UIElement, Padding, Margin from pygame_gui.elements import UILabel, UIButton, UITextEntryLine # handlers from interface import exit_button_handler, \ change_interface, quit_from_game, load_game, exit_event_handler # game objects from interface import Game game = Game() # main menu game.add_interface('main', interface=UI(elements=[ UIElement(UILabel, 300, 50, text='SPACESHIPS'), UIElement(UIButton, 200, 75, text='PLAY', ui_id='play-button'), UIElement(UIButton, 200, 75, text='EXIT', ui_id='exit-button') ], hor_layout=UI.Center, ver_layout=UI.Top, padding=Padding.all(200), margin=Margin.only(bottom=30))) # main menu buttons game.add_handler(
def controller(strategies, grid_size, candy_ratio=1., max_iter=None, verbose=0, gui_active=False, game_speed=None): # Pygame Init pygame.init() clock = pygame.time.Clock() if gui_active: gui_options = gui.Options() win = gui.Window(grid_size, 'Multiplayer Snake', gui_options) quit_game = False # Start Game game = Game(grid_size, len(strategies), candy_ratio=candy_ratio, max_iter=max_iter) # state = game.startState() state = game.start(strategies) prev_human_action = None game_over = False agent_names = [a.name for a in strategies] if "human" in agent_names: waiting = True while waiting: for event in pygame.event.get(): if event.type == pygame.KEYDOWN: waiting = False break i_human = None if "human" in agent_names: i_human = agent_names.index("human") while not ((gui_active and quit_game) or ((not gui_active) and game_over)): # Print state if verbose > 0: state.printGrid(game.grid_size) # Get events if gui_active: events = pygame.event.get() if pygame.QUIT in [ev.type for ev in events]: quit_game = True continue # Compute the actions for each player following its strategy (except human) actions = game.agentActions() # Compute human strategy if necessary human_action = None if i_human is not None: speed = 2. if pygame.K_SPACE in [ ev.key for ev in events if ev.type == pygame.KEYDOWN ] else 1. arrow_key = False for event in events: if event.type == pygame.KEYDOWN: if event.key == pygame.K_LEFT: human_action = move.Move((-1, 0), speed) arrow_key = True if event.key == pygame.K_RIGHT: human_action = move.Move((1, 0), speed) arrow_key = True if event.key == pygame.K_UP: human_action = move.Move((0, -1), speed) arrow_key = True if event.key == pygame.K_DOWN: human_action = move.Move((0, 1), speed) arrow_key = True if not arrow_key and prev_human_action is None: human_action = move.Move((0, -1), speed) elif not arrow_key: human_action = prev_human_action # Assign human action if i_human is not None and i_human in list(actions.keys()): actions[i_human] = human_action prev_human_action = human_action if verbose > 1: print(state) print(actions) # Update the state if not game_over: state = game.succ(state, actions, copy=False) # Pause if game_speed: clock.tick(game_speed) # Check if game over game_over = game.isEnd(state) # if game_over: # win.print_message('GAME OVER') # Update gui if gui_active: win.updateSprites(state) win.refresh() if verbose > 0: state.printGrid(game.grid_size) return state
def train_random_moves(self, min_length=300, eps=0.03, max_active_games=30): branches = [] histories = [[]] active_games = [] num_games = 0 eval = 0 while len(histories[0]) < min_length or eval < 0.3: histories[0] = [] active_games = [] game = Game() for _ in range(min_length): game.move(self.move(game)) histories[0].append(game.to_logarray()) if game.end: break active_games.append((game, 0)) eval = self.eval(game) #print self.eval(game) while len(active_games) > 0: new_games = [] for game, index in active_games: move_list = self.get_move_list(game) rand = random.random() if rand < eps and len(move_list) > 1 and len( active_games) < max_active_games: new_node = Game(game) num_games += 1 new_games.append((new_node, num_games)) histories.append(copy.deepcopy(histories[index])) branches.append(np.sum(game.board)) game.move(move_list[-1]) histories[index].append(game.to_logarray()) move = 2 #random.randint(2, min(len(move_list), 3)) new_node.move(move_list[-move]) histories[num_games].append(new_node.to_logarray()) else: game.move(move_list[-1]) histories[index].append(game.to_logarray()) active_games.extend(new_games) active_games = [pair for pair in active_games if not pair[0].end] print len(histories) histl = max(histories, key=lambda x: len(x)) print len(histl), len(histories[0]) if len(histl) > 1500: Application2(histl).mainloop() return histories, branches
def play_games_2(self, total_num): min_game = 999999999 avg = 0 histories = [] seconds = [] thirds = [] for i in range(total_num): history = [] game = Game() while not game.end: history.append(game.to_logarray()) moves = self.get_move_list(game) if len(moves) > 1: newgame = Game(game) newgame.move(moves[-2]) if not newgame.end: seconds.append(newgame) if len(moves) > 2: newgame = Game(game) newgame.move(moves[-3]) if not newgame.end: seconds.append(newgame) game.move(moves[-1]) history.append(game.to_logarray()) min_game = min(min_game, np.sum(game.board)) avg += len(history) histories.append(history) self.log(min_game, float(avg) / total_num) return histories, min_game, seconds, thirds
# Create a universal Q-Table universalQTable = dict() gamesToPlay = 50000 radiiOfShooting = np.arange(0, 3, 0.5) # allShotsTaken = np.zeros(gamesToPlay) initialEpsilon = 0.9 for radiusOfShooting in radiiOfShooting: tic = time.perf_counter() for i in range(gamesToPlay): currentEpsilon = calculateEpsilon(initialEpsilon, i) print("Epsilon = {:0.4f}, Playing Game #{}".format(currentEpsilon, i), end="\r") currentGame = Game(qTable=universalQTable, radiusOfShooting=radiusOfShooting, epsilon=currentEpsilon) currentGame.playGame() universalQTable = currentGame.getQTable() # If you want to know how many shots were taken uncomment the following line # allShotsTaken[i] = currentGame.getShotsTaken() toc = time.perf_counter() print("It took {:0.2f}s to run {} simulations with radius {:0.2f}!".format( (toc - tic), gamesToPlay, radiusOfShooting)) # Export the Q-Table to a pickle fileName = "rawTable_{:0.2f}.p".format(radiusOfShooting) fullPath = os.path.join("./rawTables", fileName) with open(fullPath, "wb") as filePointer: pickle.dump(universalQTable, filePointer)
import pickle from interface import Game, QPlayer epsilon = 0.9 player1 = QPlayer(mark="X", epsilon=epsilon) player2 = QPlayer(mark="O", epsilon=epsilon) game = Game(player1, player2, True) N_episodes = 20000 for episodes in range(N_episodes): game.start() game.reset() Q = game.Q filename = "Q_training_Nepisodes_{}.p".format(N_episodes) pickle.dump(Q, open(filename, "wb"))