Example #1
0
def main():
	pygame.init()

	while(1):
		width = GameSize
		height = GameSize

		x1, y1 = randomPosition(width,height)
		x2, y2 = randomPosition(width, height)

		while x1==x2 and y1==y2:
			x1, y1 = randomPosition(width, height)

		game = Game(width, height, [
			PositionPlayer(1, AiBasic(), [x1,y1]),
			PositionPlayer(2, Aisurvivor(), [x2,y2]),
		])

		pygame.mouse.set_visible(False)

		if VisibleScreen:
			window = Window(game, 40)
			# displayGameMenu(window, game)
			game.main_loop(window)
		else:
			game.main_loop()

		printGameResults(game)
Example #2
0
def main(args):
    pygame.init()
    rating=False
    iter=30

    actor_critic = Net2()  # 신경망 객체 생성
    global_brain = Brain(actor_critic,args, acktr=True)
    # global_brain.actor_critic.load_state_dict(torch.load(folderName + '/ACKTR_player_test.bak'))
    # global_brain.actor_critic.eval()

    actor_critic2 = Net2()  # 신경망 객체 생성
    global_brain2 = Brain(actor_critic2,args, acktr=True)
    # global_brain2.actor_critic.load_state_dict(torch.load(folderName + '/ACKTR_player_test.bak'))
    # global_brain2.actor_critic.eval()

    # DQN=DQNNET()
    # DQN.load_state_dict(torch.load(folderName+'/DDQN.bak'))
    # DQN.eval()

    if rating:
        nullgame = 0
        p1_win = 0
        p2_win = 0

        for i in range(iter):
            game = make_game(False, False, "fair")
            pygame.mouse.set_visible(False)
            window = None

            game.main_loop(global_brain.actor_critic, pop_up, window, DQN, ("AC", "DQN"))

            if game.winner is None:
                nullgame+=1
            elif game.winner ==1:
                p1_win+=1
            else:
                p2_win+=1

        print("Player 1:{} \n Player 2:{}\n ".format(p1_win,p2_win))
    else:
        while True:
            game = make_game(False, False, "fair")
            pygame.mouse.set_visible(False)

            window = Window(game, 40)

            game.main_loop(global_brain.actor_critic,pop_up,window,global_brain2.actor_critic)
            print_game_results(game)
Example #3
0
def display_game_menu(window, game):
    window.screen.fill([0, 0, 0])

    myimage = pygame.image.load("asset/TronTitle.png")
    myimage = pygame.transform.scale(myimage, pygame.display.get_surface().get_size())
    imagerect = myimage.get_rect(center=window.screen.get_rect().center)
    window.screen.blit(myimage, imagerect)

    pygame.display.flip()

    event = pygame.event.poll()
    while 1:
        event = pygame.event.poll()
        if event.type == pygame.KEYDOWN:
            if event.key == pygame.K_RETURN:
                window = Window(game, 40)
                break
Example #4
0
def main():
    # Initialize the game engine
    pygame.init()

    # Prepare the size for the game.
    # Those values may be good if you want to play, they might not be so good
    # to train your AI. Decreasing them will make the learning faster.
    width = 10
    height = 10

    # Create a game from its size and its players
    game = Game(
        width,
        height,
        [
            # We create two PositionPlayer for each player of the game.
            # The first one has the id 1, and will use keyboard interaction, with a
            # default direction that will be to the right, and that will use the Z,
            # Q, S and D keys.
            # The last array defines the initial position of the player.
            PositionPlayer(1, KeyboardPlayer(Direction.RIGHT, Mode.ZQSD),
                           [0, 0]),

            # We create a second player that will use the arrow keys.
            PositionPlayer(2, KeyboardPlayer(Direction.LEFT, Mode.ARROWS),
                           [width - 1, height - 1]),
        ])

    # Create a window for the game so the players can see what they're doing.
    window = Window(game, 10)

    # Hide mouse
    pygame.mouse.set_visible(False)

    # Run the game.
    game.main_loop(window)

    # Once the game is finished, if game.winner is None, it means it's a draw
    # Otherwise, game.winner will tell us which player has won the game.
    if game.winner is None:
        print("It's a draw!")
    else:
        print('Player {} wins!'.format(game.winner))
Example #5
0
def train(model):

    # Initialize neural network parameters and optimizer
    optimizer = optim.Adam(model.parameters())
    criterion = nn.MSELoss()

    # Initialize exploration rate
    epsilon = EPSILON_START
    epsilon_temp = float(epsilon)

    # Initialize memory
    memory = ReplayMemory(MEM_CAPACITY)

    # Initialize the game counter
    game_counter = 0
    move_counter = 0

    # Start training
    while True:

        # Initialize the game cycle parameters
        cycle_step = 0
        p1_victories = 0
        p2_victories = 0
        null_games = 0
        player_1 = Ai(epsilon)
        player_2 = Ai(epsilon)
        otherOpponent = True

        # Play a cycle of games
        while cycle_step < GAME_CYCLE:

            # Increment the counters
            game_counter += 1
            cycle_step += 1

            # Initialize the starting positions
            x1 = random.randint(0, MAP_WIDTH - 1)
            y1 = random.randint(0, MAP_HEIGHT - 1)
            x2 = random.randint(0, MAP_WIDTH - 1)
            y2 = random.randint(0, MAP_HEIGHT - 1)
            while x1 == x2 and y1 == y2:
                x1 = random.randint(0, MAP_WIDTH - 1)
                y1 = random.randint(0, MAP_HEIGHT - 1)

            # Initialize the game
            player_1.epsilon = epsilon
            player_2.epsilon = epsilon
            game = Game(MAP_WIDTH, MAP_HEIGHT, [
                PositionPlayer(1, player_1, [x1, y1]),
                PositionPlayer(2, player_2, [x2, y2]),
            ])

            # Get the initial state for each player
            old_state_p1 = game.map().state_for_player(1)
            old_state_p1 = np.reshape(
                old_state_p1,
                (1, 1, old_state_p1.shape[0], old_state_p1.shape[1]))
            old_state_p1 = torch.from_numpy(old_state_p1).float()
            old_state_p2 = game.map().state_for_player(2)
            old_state_p2 = np.reshape(
                old_state_p2,
                (1, 1, old_state_p2.shape[0], old_state_p2.shape[1]))
            old_state_p2 = torch.from_numpy(old_state_p2).float()

            # Run the game
            window = Window(game, 40)
            game.main_loop(window)
            #game.main_loop()

            # Analyze the game
            move_counter += len(game.history)
            terminal = False

            for historyStep in range(len(game.history) - 1):

                # Get the state for each player
                new_state_p1 = game.history[historyStep +
                                            1].map.state_for_player(1)
                new_state_p1 = np.reshape(
                    new_state_p1,
                    (1, 1, new_state_p1.shape[0], new_state_p1.shape[1]))
                new_state_p1 = torch.from_numpy(new_state_p1).float()
                new_state_p2 = game.history[historyStep +
                                            1].map.state_for_player(2)
                new_state_p2 = np.reshape(
                    new_state_p2,
                    (1, 1, new_state_p2.shape[0], new_state_p2.shape[1]))
                new_state_p2 = torch.from_numpy(new_state_p2).float()

                # Get the action for each player
                if game.history[historyStep].player_one_direction is not None:
                    action_p1 = torch.from_numpy(
                        np.array([
                            game.history[historyStep].player_one_direction.
                            value - 1
                        ],
                                 dtype=np.float32)).unsqueeze(0)
                    action_p2 = torch.from_numpy(
                        np.array([
                            game.history[historyStep].player_two_direction.
                            value - 1
                        ],
                                 dtype=np.float32)).unsqueeze(0)
                else:
                    action_p1 = torch.from_numpy(
                        np.array([0], dtype=np.float32)).unsqueeze(0)
                    action_p2 = torch.from_numpy(
                        np.array([0], dtype=np.float32)).unsqueeze(0)

                # Compute the reward for each player
                reward_p1 = +1
                reward_p2 = +1
                if historyStep + 1 == len(game.history) - 1:
                    if game.winner is None:
                        null_games += 1
                        reward_p1 = 0
                        reward_p2 = 0
                    elif game.winner == 1:
                        reward_p1 = 100
                        reward_p2 = -25
                        p1_victories += 1
                    else:
                        reward_p1 = -25
                        reward_p2 = 100
                        p2_victories += 1
                    terminal = True

                reward_p1 = torch.from_numpy(
                    np.array([reward_p1], dtype=np.float32)).unsqueeze(0)
                reward_p2 = torch.from_numpy(
                    np.array([reward_p2], dtype=np.float32)).unsqueeze(0)

                # Save the transition for each player
                memory.push(old_state_p1, action_p1, new_state_p1, reward_p1,
                            terminal)
                if not (otherOpponent):
                    memory.push(old_state_p2, action_p2, new_state_p2,
                                reward_p2, terminal)

                # Update old state for each player
                old_state_p1 = new_state_p1
                old_state_p2 = new_state_p2

            # Update exploration rate
            nouv_epsilon = epsilon * DECAY_RATE
            if nouv_epsilon > ESPILON_END:
                epsilon = nouv_epsilon
            if epsilon == 0 and game_counter % 100 == 0:
                epsilon = espilon_temp

        # Get a sample for training
        transitions = memory.sample(min(len(memory), model.batch_size))
        batch = Transition(*zip(*transitions))
        old_state_batch = torch.cat(batch.old_state)
        action_batch = torch.cat(batch.action).long()
        new_state_batch = torch.cat(batch.new_state)
        reward_batch = torch.cat(batch.reward)

        # Compute predicted Q-values for each action
        pred_q_values_batch = torch.sum(model(old_state_batch).gather(
            1, action_batch),
                                        dim=1)
        pred_q_values_next_batch = model(new_state_batch)

        # Compute targeted Q-value for action performed
        target_q_values_batch = torch.cat(
            tuple(reward_batch[i] if batch[4] else reward_batch[i] +
                  model.gamma * torch.max(pred_q_values_next_batch[i])
                  for i in range(len(reward_batch))))

        # zero the parameter gradients
        model.zero_grad()

        # Compute the loss
        target_q_values_batch = target_q_values_batch.detach()
        loss = criterion(pred_q_values_batch, target_q_values_batch)

        # Do backward pass
        loss.backward()
        optimizer.step()

        # Update bak
        torch.save(model.state_dict(), 'ais/' + folderName + '/ai.bak')

        # Display results
        if (game_counter % DISPLAY_CYCLE) == 0:
            loss_string = str(loss)
            loss_string = loss_string[7:len(loss_string)]
            loss_value = loss_string.split(',')[0]
            print("--- Match", game_counter, "---")
            print("Average duration :",
                  float(move_counter) / float(DISPLAY_CYCLE))
            print("Loss =", loss_value)
            print("Epsilon =", epsilon)
            print("")
            with open('ais/' + folderName + '/data.txt', 'a') as myfile:
                myfile.write(
                    str(game_counter) + ', ' +
                    str(float(move_counter) / float(DISPLAY_CYCLE)) + ', ' +
                    loss_value + '\n')
            move_counter = 0