def main(): pygame.init() while(1): width = GameSize height = GameSize x1, y1 = randomPosition(width,height) x2, y2 = randomPosition(width, height) while x1==x2 and y1==y2: x1, y1 = randomPosition(width, height) game = Game(width, height, [ PositionPlayer(1, AiBasic(), [x1,y1]), PositionPlayer(2, Aisurvivor(), [x2,y2]), ]) pygame.mouse.set_visible(False) if VisibleScreen: window = Window(game, 40) # displayGameMenu(window, game) game.main_loop(window) else: game.main_loop() printGameResults(game)
def main(): # Prepare the size for the game. # Those values may be good if you want to play, they might not be so good # to train your AI. Decreasing them will make the learning faster. width = 10 height = 10 # Create a game from its size and its players game = Game( width, height, [ # Here we create two players with constant direction. # It's not very interesting but it's the basis of everything else. PositionPlayer(1, ConstantPlayer(Direction.RIGHT), [0, 0]), PositionPlayer(2, ConstantPlayer(Direction.LEFT), [width - 1, height - 1]), ]) # Run the game. # Since no window is passed as parameter, not only the game will not # display anything, which avoid doing useless computations, but it will # also not be limited to a certain framerate, which would be necessary for # human users. game.main_loop() # The game is done, you can get information about it and do what you want. if game.winner is None: print("It's a draw!") else: print('Player {} wins!'.format(game.winner))
def play(width, height): # Initialize players' position init_player_1 = init_player_position(width, height) init_player_2 = init_player_position(width, height) # Ensure the players do not start at the same position while init_player_1[0] == init_player_2[0] and init_player_1[ 1] == init_player_2[1]: init_player_2 = init_player_position(width, height) # Create a game from its size and its players game = Game( width, height, [ # We create two PositionPlayer for each player of the game. PositionPlayer(1, Ai(), init_player_1), PositionPlayer(2, Ai(), init_player_2) ]) # Run the game. game.main_loop() return game
def main(): # Initialize the game engine pygame.init() # Prepare the size for the game. # Those values may be good if you want to play, they might not be so good # to train your AI. Decreasing them will make the learning faster. width = 10 height = 10 # Create a game from its size and its players game = Game( width, height, [ # We create two PositionPlayer for each player of the game. # The first one has the id 1, and will use keyboard interaction, with a # default direction that will be to the right, and that will use the Z, # Q, S and D keys. # The last array defines the initial position of the player. PositionPlayer(1, KeyboardPlayer(Direction.RIGHT, Mode.ZQSD), [0, 0]), # We create a second player that will use the arrow keys. PositionPlayer(2, KeyboardPlayer(Direction.LEFT, Mode.ARROWS), [width - 1, height - 1]), ]) # Create a window for the game so the players can see what they're doing. window = Window(game, 10) # Hide mouse pygame.mouse.set_visible(False) # Run the game. game.main_loop(window) # Once the game is finished, if game.winner is None, it means it's a draw # Otherwise, game.winner will tell us which player has won the game. if game.winner is None: print("It's a draw!") else: print('Player {} wins!'.format(game.winner))
def train(model): writer = SummaryWriter() # Initialize neural network parameters and optimizer optimizer = optim.Adam(model.parameters()) # Initialize exploration rate epsilon = EPSILON_START epsilon_temp = float(epsilon) # Initialize memory memory = ReplayMemory(MEM_CAPACITY) # Initialize the game counter game_counter = 0 move_counter = 0 vs_min_p1_win = 0 minimax_game = 0 while True: # Initialize the game cycle parameters cycle_step = 0 p1_victories = 0 p2_victories = 0 null_games = 0 player_1 = Ai(epsilon) player_2 = Ai(epsilon) # Play a cycle of games while cycle_step < GAME_CYCLE: # Increment the counters game_counter += 1 cycle_step += 1 # Initialize the starting positions x1 = random.randint(0, MAP_WIDTH - 1) y1 = random.randint(0, MAP_HEIGHT - 1) x2 = random.randint(0, MAP_WIDTH - 1) y2 = random.randint(0, MAP_HEIGHT - 1) while x1 == x2 and y1 == y2: x1 = random.randint(0, MAP_WIDTH - 1) y1 = random.randint(0, MAP_HEIGHT - 1) # Initialize the game player_1.epsilon = epsilon player_2.epsilon = epsilon game = Game(MAP_WIDTH, MAP_HEIGHT, [ PositionPlayer(1, player_1, [x1, y1]), PositionPlayer(2, player_2, [x2, y2]), ]) # Get the initial state for each player old_state_p1 = game.map().state_for_player(1) old_state_p1 = np.reshape(old_state_p1, (1, 1, old_state_p1.shape[0], old_state_p1.shape[1])) old_state_p1 = torch.from_numpy(old_state_p1).float() old_state_p2 = game.map().state_for_player(2) old_state_p2 = np.reshape(old_state_p2, (1, 1, old_state_p2.shape[0], old_state_p2.shape[1])) old_state_p2 = torch.from_numpy(old_state_p2).float() game.main_loop(model) # Analyze the game move_counter += len(game.history) terminal = False for historyStep in range(len(game.history) - 1): # Get the state for each player new_state_p1 = game.history[historyStep + 1].map.state_for_player(1) new_state_p1 = np.reshape(new_state_p1, (1, 1, new_state_p1.shape[0], new_state_p1.shape[1])) new_state_p1 = torch.from_numpy(new_state_p1).float() new_state_p2 = game.history[historyStep + 1].map.state_for_player(2) new_state_p2 = np.reshape(new_state_p2, (1, 1, new_state_p2.shape[0], new_state_p2.shape[1])) new_state_p2 = torch.from_numpy(new_state_p2).float() # Get the action for each player if game.history[historyStep].player_one_direction is not None: action_p1 = torch.from_numpy(np.array([game.history[historyStep].player_one_direction.value - 1], dtype=np.float32)).unsqueeze(0) action_p2 = torch.from_numpy(np.array([game.history[historyStep].player_two_direction.value - 1], dtype=np.float32)).unsqueeze(0) else: action_p1 = torch.from_numpy(np.array([0], dtype=np.float32)).unsqueeze(0) action_p2 = torch.from_numpy(np.array([0], dtype=np.float32)).unsqueeze(0) # Compute the reward for each player reward_p1 = historyStep reward_p2 = historyStep if historyStep + 1 == len(game.history) - 1: if game.winner is None: null_games += 1 reward_p1 = 0 reward_p2 = 0 elif game.winner == 1: reward_p1 = 100 reward_p2 = -25 p1_victories += 1 else: reward_p1 = -25 reward_p2 = 100 p2_victories += 1 terminal = True reward_p1 = torch.from_numpy(np.array([reward_p1], dtype=np.float32)).unsqueeze(0) reward_p2 = torch.from_numpy(np.array([reward_p2], dtype=np.float32)).unsqueeze(0) # Save the transition for each player memory.push(old_state_p1, action_p1, new_state_p1, reward_p1, terminal) memory.push(old_state_p2, action_p2, new_state_p2, reward_p2, terminal) # Update old state for each player old_state_p1 = new_state_p1 old_state_p2 = new_state_p2 # Update exploration rate nouv_epsilon = epsilon * DECAY_RATE if nouv_epsilon > ESPILON_END: epsilon = nouv_epsilon if epsilon == 0 and game_counter % 100 == 0: epsilon = epsilon_temp # Get a sample for training transitions = memory.sample(min(len(memory), model.batch_size)) batch = Transition(*zip(*transitions)) old_state_batch = torch.cat(batch.old_state) action_batch = torch.cat(batch.action).long() new_state_batch = torch.cat(batch.new_state) reward_batch = torch.cat(batch.reward).to(device) # Compute predicted Q-values for each action pred_q_values_batch = torch.sum(model(old_state_batch).gather(1, action_batch.to(device)), dim=1) pred_q_values_next_batch = model(new_state_batch) # Compute targeted Q-value for action performed target_q_values_batch = torch.cat( tuple(reward_batch[i] if batch[4][i] else reward_batch[i] + model.gamma * torch.max( pred_q_values_next_batch[i]) for i in range(len(reward_batch)))).to(device) # zero the parameter gradients model.zero_grad() # Compute the loss target_q_values_batch = target_q_values_batch.detach() # loss = criterion(pred_q_values_batch,target_q_values_batch) loss = F.smooth_l1_loss(pred_q_values_batch, target_q_values_batch) # Do backward pass loss.backward() optimizer.step() # Update bak torch.save(model.state_dict(), 'save/DQN.bak') p1_winrate = p1_victories / (GAME_CYCLE) if game_counter % DISPLAY_CYCLE == 0: loss_string = str(loss) loss_string = loss_string[7:len(loss_string)] loss_value = loss_string.split(',')[0] vis_loss = float(loss_value) writer.add_scalar('loss_tracker', vis_loss, game_counter) writer.add_scalar('duration_tracker', (float(move_counter) / float(DISPLAY_CYCLE)), game_counter) writer.add_scalar('ration_tracker', p1_winrate, game_counter) move_counter = 0
def train(model): # Initialize neural network parameters and optimizer optimizer = optim.Adam(model.parameters()) criterion = nn.MSELoss() # Initialize exploration rate epsilon = EPSILON_START epsilon_temp = float(epsilon) # Initialize memory memory = ReplayMemory(MEM_CAPACITY) # Initialize the game counter game_counter = 0 move_counter = 0 # Start training while True: # Initialize the game cycle parameters cycle_step = 0 p1_victories = 0 p2_victories = 0 null_games = 0 player_1 = Ai(epsilon) player_2 = Ai(epsilon) otherOpponent = True # Play a cycle of games while cycle_step < GAME_CYCLE: # Increment the counters game_counter += 1 cycle_step += 1 # Initialize the starting positions x1 = random.randint(0, MAP_WIDTH - 1) y1 = random.randint(0, MAP_HEIGHT - 1) x2 = random.randint(0, MAP_WIDTH - 1) y2 = random.randint(0, MAP_HEIGHT - 1) while x1 == x2 and y1 == y2: x1 = random.randint(0, MAP_WIDTH - 1) y1 = random.randint(0, MAP_HEIGHT - 1) # Initialize the game player_1.epsilon = epsilon player_2.epsilon = epsilon game = Game(MAP_WIDTH, MAP_HEIGHT, [ PositionPlayer(1, player_1, [x1, y1]), PositionPlayer(2, player_2, [x2, y2]), ]) # Get the initial state for each player old_state_p1 = game.map().state_for_player(1) old_state_p1 = np.reshape( old_state_p1, (1, 1, old_state_p1.shape[0], old_state_p1.shape[1])) old_state_p1 = torch.from_numpy(old_state_p1).float() old_state_p2 = game.map().state_for_player(2) old_state_p2 = np.reshape( old_state_p2, (1, 1, old_state_p2.shape[0], old_state_p2.shape[1])) old_state_p2 = torch.from_numpy(old_state_p2).float() # Run the game window = Window(game, 40) game.main_loop(window) #game.main_loop() # Analyze the game move_counter += len(game.history) terminal = False for historyStep in range(len(game.history) - 1): # Get the state for each player new_state_p1 = game.history[historyStep + 1].map.state_for_player(1) new_state_p1 = np.reshape( new_state_p1, (1, 1, new_state_p1.shape[0], new_state_p1.shape[1])) new_state_p1 = torch.from_numpy(new_state_p1).float() new_state_p2 = game.history[historyStep + 1].map.state_for_player(2) new_state_p2 = np.reshape( new_state_p2, (1, 1, new_state_p2.shape[0], new_state_p2.shape[1])) new_state_p2 = torch.from_numpy(new_state_p2).float() # Get the action for each player if game.history[historyStep].player_one_direction is not None: action_p1 = torch.from_numpy( np.array([ game.history[historyStep].player_one_direction. value - 1 ], dtype=np.float32)).unsqueeze(0) action_p2 = torch.from_numpy( np.array([ game.history[historyStep].player_two_direction. value - 1 ], dtype=np.float32)).unsqueeze(0) else: action_p1 = torch.from_numpy( np.array([0], dtype=np.float32)).unsqueeze(0) action_p2 = torch.from_numpy( np.array([0], dtype=np.float32)).unsqueeze(0) # Compute the reward for each player reward_p1 = +1 reward_p2 = +1 if historyStep + 1 == len(game.history) - 1: if game.winner is None: null_games += 1 reward_p1 = 0 reward_p2 = 0 elif game.winner == 1: reward_p1 = 100 reward_p2 = -25 p1_victories += 1 else: reward_p1 = -25 reward_p2 = 100 p2_victories += 1 terminal = True reward_p1 = torch.from_numpy( np.array([reward_p1], dtype=np.float32)).unsqueeze(0) reward_p2 = torch.from_numpy( np.array([reward_p2], dtype=np.float32)).unsqueeze(0) # Save the transition for each player memory.push(old_state_p1, action_p1, new_state_p1, reward_p1, terminal) if not (otherOpponent): memory.push(old_state_p2, action_p2, new_state_p2, reward_p2, terminal) # Update old state for each player old_state_p1 = new_state_p1 old_state_p2 = new_state_p2 # Update exploration rate nouv_epsilon = epsilon * DECAY_RATE if nouv_epsilon > ESPILON_END: epsilon = nouv_epsilon if epsilon == 0 and game_counter % 100 == 0: epsilon = espilon_temp # Get a sample for training transitions = memory.sample(min(len(memory), model.batch_size)) batch = Transition(*zip(*transitions)) old_state_batch = torch.cat(batch.old_state) action_batch = torch.cat(batch.action).long() new_state_batch = torch.cat(batch.new_state) reward_batch = torch.cat(batch.reward) # Compute predicted Q-values for each action pred_q_values_batch = torch.sum(model(old_state_batch).gather( 1, action_batch), dim=1) pred_q_values_next_batch = model(new_state_batch) # Compute targeted Q-value for action performed target_q_values_batch = torch.cat( tuple(reward_batch[i] if batch[4] else reward_batch[i] + model.gamma * torch.max(pred_q_values_next_batch[i]) for i in range(len(reward_batch)))) # zero the parameter gradients model.zero_grad() # Compute the loss target_q_values_batch = target_q_values_batch.detach() loss = criterion(pred_q_values_batch, target_q_values_batch) # Do backward pass loss.backward() optimizer.step() # Update bak torch.save(model.state_dict(), 'ais/' + folderName + '/ai.bak') # Display results if (game_counter % DISPLAY_CYCLE) == 0: loss_string = str(loss) loss_string = loss_string[7:len(loss_string)] loss_value = loss_string.split(',')[0] print("--- Match", game_counter, "---") print("Average duration :", float(move_counter) / float(DISPLAY_CYCLE)) print("Loss =", loss_value) print("Epsilon =", epsilon) print("") with open('ais/' + folderName + '/data.txt', 'a') as myfile: myfile.write( str(game_counter) + ', ' + str(float(move_counter) / float(DISPLAY_CYCLE)) + ', ' + loss_value + '\n') move_counter = 0