def test_final_position_should_be_less_than_or_equal_to_105(self): self.assertLessEqual(run_game(10, True), 105) self.assertLessEqual(run_game(20, True), 105) self.assertLessEqual(run_game(30, True), 105) self.assertLessEqual(run_game(40, True), 105) self.assertLessEqual(run_game(50, True), 105) self.assertLessEqual(run_game(10, False), 105) self.assertLessEqual(run_game(20, False), 105) self.assertLessEqual(run_game(30, False), 105) self.assertLessEqual(run_game(40, False), 105) self.assertLessEqual(run_game(50, False), 105)
def simulate_config(**kwargs): kwargs["simulate"] = True start_time = time.time() results = run_game(**kwargs) end_time = time.time() print("total seconds for the simulation:", int(end_time - start_time)) return results
def performance_stats(model1, model2, verbose=False, N_RUNS=50): wins = loss = draw = 0 model1._moves = [] for _ in tqdm(range(N_RUNS), desc="Scoring"): result = run_game(alg0=model1, alg1=model2, verbose=False) if result == 0: wins += 1 elif result == 1: loss += 1 else: draw += 1 p = wins / N_RUNS ci = 1.96 * p * (1 - p) / math.sqrt(N_RUNS) print( f"As player 0: wins/draws/losses = {100*wins/N_RUNS}/{100*draw/N_RUNS}/{100*loss/N_RUNS}% +/={round(100*ci,1)}%" ) print("moves played:") print(pd.Series(model1._moves).value_counts(normalize=True).sort_index()) win_rate = wins wins = loss = draw = 0 model1._moves = [] for _ in tqdm(range(N_RUNS), desc="Scoring"): result = run_game(alg0=model2, alg1=model1, verbose=False) if result == 1: wins += 1 elif result == 0: loss += 1 else: draw += 1 p = wins / N_RUNS ci = 1.96 * p * (1 - p) / math.sqrt(N_RUNS) print( f"As player 1: wins/draws/losses = {100*wins/N_RUNS}/{100*draw/N_RUNS}/{100*loss/N_RUNS}% +/={round(100*ci,1)}%" ) print("moves played:") print(pd.Series(model1._moves).value_counts(normalize=True).sort_index()) return (1.0 * (win_rate + wins)) / (2 * N_RUNS)
def run_benchmark(pl1, pl2, iterations, winning_points): print( f'run_benchmark({pl1["name"]}, {pl2["name"]}, {iterations}, {winning_points})' ) stats = { 'victories': { 'pl1': 0, 'pl2': 0, }, 'points': { 'pl1': 0, 'pl2': 0, }, 'runtimes': { 'pl1': 0, 'pl2': 0, }, 'players': (pl1, pl2), 'test_settings': { 'iterations': iterations, 'winning_points': winning_points, }, 'total_turns': 0, } for _ in range(iterations): try: encounter = run_game(pl1, pl2, winning_points, interactive=False) except RecursionError as exc: print(f'Game run failed to complete, ignoring results: {exc}') else: if encounter['winner'] == -1: stats['victories']['pl1'] += 1 elif encounter['winner'] == 1: stats['victories']['pl2'] += 1 else: raise ValueError(f'Unknown player id: {encounter["winner"]}') if 'scores' in encounter: stats['points']['pl1'] += encounter['scores'][0] stats['points']['pl2'] += encounter['scores'][1] if 'runtimes' in encounter: stats['runtimes']['pl1'] += encounter['runtimes'][0] stats['runtimes']['pl2'] += encounter['runtimes'][1] if 'total_turns' in encounter: stats['total_turns'] += encounter['total_turns'] return stats
def run_gui(): pygame.init() # Set up the drawing window screen = pygame.display.set_mode([SCREEN_WIDTH, SCREEN_HEIGHT]) pygame.display.set_caption('Amazing Game') clock = pygame.time.Clock() running = True while running: for event in pygame.event.get(): if event.type == pygame.QUIT: running = False for game in run_game(): board = Board(game, (SCREEN_WIDTH, SCREEN_HEIGHT)) screen.blit(board.surface, (0, 0)) pygame.display.flip() clock.tick(60) pygame.quit()
def training_loop(training_model, opponent_model, verbose=False): winner = None # for tensor board logging log_dir = ("logs/fit/" + training_model._name + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) tensorboard = ModifiedTensorBoard(log_dir=log_dir) # now execute the q learning y = 0.9 eps = 0.5 interval_size = 500 num_episodes = interval_size * 1 decay_factor = (1000 * eps)**( -1 / num_episodes ) # ensures that eps = 0.001 after `num_episodes` episodes r_avg_list = [] sse_avg_list = [] wins = [] n_moves_list = [] moves_played = [0] * BOARD_SIZE for i in tqdm(range(num_episodes), desc="Training"): as_player = random.choice([0, 1]) eps *= decay_factor g = Game(verbose=verbose) if as_player == 1: # Training as player 1 so opponent makes first move winner, board = g.move(opponent_model.move(g._board, 0)) else: board = g._board done = False r_sum = 0 sse_sum = 0 move_num = 0 while not done: random_move = False move_num += 1 preds = training_model.predict(board, as_player) # To encourage early exploration if np.random.random() < eps: move = np.random.randint(0, BOARD_SIZE - 1) random_move = True else: move = training_model.move(board, as_player) moves_played.append(move) winner, new_board = g.move(move) if winner is None: opponent_move = opponent_model.move(new_board, 1 - as_player) winner, new_board = g.move(opponent_move) # Calculate reward amount if winner == as_player: done = True wins.append(1) r = 1000 - move_num**2 elif winner == 1 - as_player: done = True wins.append(0) r = -(1000 - move_num**2) elif winner == -1: done = True wins.append(None) r = 1000 else: r = move_num if winner is None: target = r + y * np.max( training_model.predict(new_board, as_player)) else: target = r target_vec = deepcopy(preds[0]) target_vec[move] = target training_model.fit_one( as_player, board, np.array([target_vec]), epochs=1, verbose=0, callbacks=[tensorboard], ) new_preds = training_model.predict(board, as_player) sse = sum([(x - y)**2 for x, y in zip(preds[0], target_vec)]) new_sse = sum([(x - y)**2 for x, y in zip(new_preds[0], target_vec)]) sse_sum += sse if verbose: print(f""" {training_model._name} training as player: {as_player}, move: {move_num}, eps: {round(eps, 2)}, old preds: {[round(p, 2) for p in preds[0]]}, rand move: {random_move}, tgt preds: {[round(p, 2) for p in target_vec]}, reward: {r}, new preds: {[round(p, 2) for p in new_preds[0]]}, average last 20 games: {round(sum(r_avg_list[-20:])/20, 2)} sse: {round(sse, 4)} >> {round(new_sse, 4)} """) board = new_board r_sum += r if verbose and ((i % interval_size == 0 and i > 0) or (i == num_episodes - 1)): run_game(training_model, opponent_model, verbose=True) # Collect game level metrics r_avg_list.append(round(r_sum, 2)) n_moves_list.append(move_num) tensorboard.update_stats(reward_sum=r_sum, wins=wins[-1], n_moves_avg=n_moves_list[-1]) tensorboard.update_dist(moves_played=moves_played)
def test_zero_turns_should_return_initial_position(self): self.assertEqual(run_game(0, True), 1)
def test_negative_turns_should_return_initial_position(self): self.assertEqual(run_game(-1, True), 1)
def test_zero_turns_does_not_depend_on_dice(self): self.assertEqual(run_game(0, False), 1)
#! /usr/bin/env python import sys sys.path.insert(0, "lib") import main main.run_game()