def main(): pygame.init() while(1): width = GameSize height = GameSize x1, y1 = randomPosition(width,height) x2, y2 = randomPosition(width, height) while x1==x2 and y1==y2: x1, y1 = randomPosition(width, height) game = Game(width, height, [ PositionPlayer(1, AiBasic(), [x1,y1]), PositionPlayer(2, Aisurvivor(), [x2,y2]), ]) pygame.mouse.set_visible(False) if VisibleScreen: window = Window(game, 40) # displayGameMenu(window, game) game.main_loop(window) else: game.main_loop() printGameResults(game)
def main(args): pygame.init() rating=False iter=30 actor_critic = Net2() # 신경망 객체 생성 global_brain = Brain(actor_critic,args, acktr=True) # global_brain.actor_critic.load_state_dict(torch.load(folderName + '/ACKTR_player_test.bak')) # global_brain.actor_critic.eval() actor_critic2 = Net2() # 신경망 객체 생성 global_brain2 = Brain(actor_critic2,args, acktr=True) # global_brain2.actor_critic.load_state_dict(torch.load(folderName + '/ACKTR_player_test.bak')) # global_brain2.actor_critic.eval() # DQN=DQNNET() # DQN.load_state_dict(torch.load(folderName+'/DDQN.bak')) # DQN.eval() if rating: nullgame = 0 p1_win = 0 p2_win = 0 for i in range(iter): game = make_game(False, False, "fair") pygame.mouse.set_visible(False) window = None game.main_loop(global_brain.actor_critic, pop_up, window, DQN, ("AC", "DQN")) if game.winner is None: nullgame+=1 elif game.winner ==1: p1_win+=1 else: p2_win+=1 print("Player 1:{} \n Player 2:{}\n ".format(p1_win,p2_win)) else: while True: game = make_game(False, False, "fair") pygame.mouse.set_visible(False) window = Window(game, 40) game.main_loop(global_brain.actor_critic,pop_up,window,global_brain2.actor_critic) print_game_results(game)
def display_game_menu(window, game): window.screen.fill([0, 0, 0]) myimage = pygame.image.load("asset/TronTitle.png") myimage = pygame.transform.scale(myimage, pygame.display.get_surface().get_size()) imagerect = myimage.get_rect(center=window.screen.get_rect().center) window.screen.blit(myimage, imagerect) pygame.display.flip() event = pygame.event.poll() while 1: event = pygame.event.poll() if event.type == pygame.KEYDOWN: if event.key == pygame.K_RETURN: window = Window(game, 40) break
def main(): # Initialize the game engine pygame.init() # Prepare the size for the game. # Those values may be good if you want to play, they might not be so good # to train your AI. Decreasing them will make the learning faster. width = 10 height = 10 # Create a game from its size and its players game = Game( width, height, [ # We create two PositionPlayer for each player of the game. # The first one has the id 1, and will use keyboard interaction, with a # default direction that will be to the right, and that will use the Z, # Q, S and D keys. # The last array defines the initial position of the player. PositionPlayer(1, KeyboardPlayer(Direction.RIGHT, Mode.ZQSD), [0, 0]), # We create a second player that will use the arrow keys. PositionPlayer(2, KeyboardPlayer(Direction.LEFT, Mode.ARROWS), [width - 1, height - 1]), ]) # Create a window for the game so the players can see what they're doing. window = Window(game, 10) # Hide mouse pygame.mouse.set_visible(False) # Run the game. game.main_loop(window) # Once the game is finished, if game.winner is None, it means it's a draw # Otherwise, game.winner will tell us which player has won the game. if game.winner is None: print("It's a draw!") else: print('Player {} wins!'.format(game.winner))
def train(model): # Initialize neural network parameters and optimizer optimizer = optim.Adam(model.parameters()) criterion = nn.MSELoss() # Initialize exploration rate epsilon = EPSILON_START epsilon_temp = float(epsilon) # Initialize memory memory = ReplayMemory(MEM_CAPACITY) # Initialize the game counter game_counter = 0 move_counter = 0 # Start training while True: # Initialize the game cycle parameters cycle_step = 0 p1_victories = 0 p2_victories = 0 null_games = 0 player_1 = Ai(epsilon) player_2 = Ai(epsilon) otherOpponent = True # Play a cycle of games while cycle_step < GAME_CYCLE: # Increment the counters game_counter += 1 cycle_step += 1 # Initialize the starting positions x1 = random.randint(0, MAP_WIDTH - 1) y1 = random.randint(0, MAP_HEIGHT - 1) x2 = random.randint(0, MAP_WIDTH - 1) y2 = random.randint(0, MAP_HEIGHT - 1) while x1 == x2 and y1 == y2: x1 = random.randint(0, MAP_WIDTH - 1) y1 = random.randint(0, MAP_HEIGHT - 1) # Initialize the game player_1.epsilon = epsilon player_2.epsilon = epsilon game = Game(MAP_WIDTH, MAP_HEIGHT, [ PositionPlayer(1, player_1, [x1, y1]), PositionPlayer(2, player_2, [x2, y2]), ]) # Get the initial state for each player old_state_p1 = game.map().state_for_player(1) old_state_p1 = np.reshape( old_state_p1, (1, 1, old_state_p1.shape[0], old_state_p1.shape[1])) old_state_p1 = torch.from_numpy(old_state_p1).float() old_state_p2 = game.map().state_for_player(2) old_state_p2 = np.reshape( old_state_p2, (1, 1, old_state_p2.shape[0], old_state_p2.shape[1])) old_state_p2 = torch.from_numpy(old_state_p2).float() # Run the game window = Window(game, 40) game.main_loop(window) #game.main_loop() # Analyze the game move_counter += len(game.history) terminal = False for historyStep in range(len(game.history) - 1): # Get the state for each player new_state_p1 = game.history[historyStep + 1].map.state_for_player(1) new_state_p1 = np.reshape( new_state_p1, (1, 1, new_state_p1.shape[0], new_state_p1.shape[1])) new_state_p1 = torch.from_numpy(new_state_p1).float() new_state_p2 = game.history[historyStep + 1].map.state_for_player(2) new_state_p2 = np.reshape( new_state_p2, (1, 1, new_state_p2.shape[0], new_state_p2.shape[1])) new_state_p2 = torch.from_numpy(new_state_p2).float() # Get the action for each player if game.history[historyStep].player_one_direction is not None: action_p1 = torch.from_numpy( np.array([ game.history[historyStep].player_one_direction. value - 1 ], dtype=np.float32)).unsqueeze(0) action_p2 = torch.from_numpy( np.array([ game.history[historyStep].player_two_direction. value - 1 ], dtype=np.float32)).unsqueeze(0) else: action_p1 = torch.from_numpy( np.array([0], dtype=np.float32)).unsqueeze(0) action_p2 = torch.from_numpy( np.array([0], dtype=np.float32)).unsqueeze(0) # Compute the reward for each player reward_p1 = +1 reward_p2 = +1 if historyStep + 1 == len(game.history) - 1: if game.winner is None: null_games += 1 reward_p1 = 0 reward_p2 = 0 elif game.winner == 1: reward_p1 = 100 reward_p2 = -25 p1_victories += 1 else: reward_p1 = -25 reward_p2 = 100 p2_victories += 1 terminal = True reward_p1 = torch.from_numpy( np.array([reward_p1], dtype=np.float32)).unsqueeze(0) reward_p2 = torch.from_numpy( np.array([reward_p2], dtype=np.float32)).unsqueeze(0) # Save the transition for each player memory.push(old_state_p1, action_p1, new_state_p1, reward_p1, terminal) if not (otherOpponent): memory.push(old_state_p2, action_p2, new_state_p2, reward_p2, terminal) # Update old state for each player old_state_p1 = new_state_p1 old_state_p2 = new_state_p2 # Update exploration rate nouv_epsilon = epsilon * DECAY_RATE if nouv_epsilon > ESPILON_END: epsilon = nouv_epsilon if epsilon == 0 and game_counter % 100 == 0: epsilon = espilon_temp # Get a sample for training transitions = memory.sample(min(len(memory), model.batch_size)) batch = Transition(*zip(*transitions)) old_state_batch = torch.cat(batch.old_state) action_batch = torch.cat(batch.action).long() new_state_batch = torch.cat(batch.new_state) reward_batch = torch.cat(batch.reward) # Compute predicted Q-values for each action pred_q_values_batch = torch.sum(model(old_state_batch).gather( 1, action_batch), dim=1) pred_q_values_next_batch = model(new_state_batch) # Compute targeted Q-value for action performed target_q_values_batch = torch.cat( tuple(reward_batch[i] if batch[4] else reward_batch[i] + model.gamma * torch.max(pred_q_values_next_batch[i]) for i in range(len(reward_batch)))) # zero the parameter gradients model.zero_grad() # Compute the loss target_q_values_batch = target_q_values_batch.detach() loss = criterion(pred_q_values_batch, target_q_values_batch) # Do backward pass loss.backward() optimizer.step() # Update bak torch.save(model.state_dict(), 'ais/' + folderName + '/ai.bak') # Display results if (game_counter % DISPLAY_CYCLE) == 0: loss_string = str(loss) loss_string = loss_string[7:len(loss_string)] loss_value = loss_string.split(',')[0] print("--- Match", game_counter, "---") print("Average duration :", float(move_counter) / float(DISPLAY_CYCLE)) print("Loss =", loss_value) print("Epsilon =", epsilon) print("") with open('ais/' + folderName + '/data.txt', 'a') as myfile: myfile.write( str(game_counter) + ', ' + str(float(move_counter) / float(DISPLAY_CYCLE)) + ', ' + loss_value + '\n') move_counter = 0