Example #1
0
def run():
    pygame.init()
    
    
    agent = DQNAgent()
    counter_games = 0
    score_plot = []
    counter_plot =[]
    record = 0
    
    while counter_games < 100:
        game = Game(width, height)
        pipe = game.pipe
        player = game.player
        while not game.crash:
            
            agent.epsilon = 60 - counter_games
            
            state_old = agent.get_state(game, player, pipe)
            
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 1), num_classes=2)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1,6)))
                final_move = to_categorical(np.argmax(prediction[0]), num_classes=2)
              
            player.Move(game,final_move)
            pipe.move_pipe(160)
            ##check_collision(player,pipe)
            
            check_score(pipe,game )
            
            reward = agent.set_reward(pipe, game.crash)
            
            state_new = agent.get_state(game, player, pipe)
            
            
            agent.train_short_memory(state_old, final_move, reward, state_new, game.crash)
            
             # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new, game.crash)
            
            
         
            
            display(game,player,pipe,record)
            if game.score == 20:
                game.crash = True;
        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)
        record = get_record(game.score, record)
        
    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
    pygame.quit()
Example #2
0
def run():
    agent = DQNAgent(size)
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < games:
        # Initialize classes
        game = Game(size, size)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent)
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            #agent.epsilon is set to give randomness to actions
            agent.epsilon = (games * 0.4) - counter_games

            #get old state
            state_old = agent.get_state(game, player1, food1)

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, games) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, agent.size)))
                final_move = to_categorical(np.argmax(prediction[0]), num_classes=3)

            #perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1, agent)
            state_new = agent.get_state(game, player1, food1)

            #set reward for the new state
            reward = agent.set_reward(player1, game.crash)

            #train short memory base on the new action and state
            agent.train_short_memory(state_old, final_move, reward, state_new, game.crash)

            # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new, game.crash)
            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        score_plot.append(game.score)
        counter_plot.append(counter_games)
        print('Game', counter_games, ' Score:', game.score, 'Last 10 Avg:', np.mean(score_plot[-10:]))

    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
Example #3
0
def run():
    pygame.init()
    agent = DQNAgent()
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < 150:
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent)
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            # agent.epsilon is set to give randomness to actions
            agent.epsilon = 80 - counter_games

            # get old state
            state_old = agent.get_state(game, player1, food1)

            # predict action based on the old state
            final_move = agent.predict(state_old)

            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            # set treward for the new state
            reward = agent.set_reward(player1, game.crash)

            # train short memory base on the new action and state
            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     game.crash)

            # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new,
                           game.crash)
            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)

    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
Example #4
0
def train(display_on, speed, params):
    pygame.init()
    pygame.font.init()

    agent = DQNAgent(params)

    counter_games = 0
    high_score = 0
    score_plot = []
    counter_plot = []

    while counter_games < params['episodes']:
        game = Game(440, 440, high_score)

        if display_on:
            game.update_display()

        while not game.crash:
            if handle_game_event(game):
                return

            # agent.epsilon is set to give randomness to actions
            agent.epsilon = 1 - (counter_games *
                                 params['epsilon_decay_linear'])

            state = game.get_state()
            move = agent.get_move(state)
            game.do_move(move)

            new_state = game.get_state()
            reward = get_reward(game)

            # train short memory base on the new action and state
            agent.train_short_memory(state, move, reward, new_state,
                                     game.crash)

            agent.remember(state, move, reward, new_state, game.crash)

            if display_on:
                game.update_display()
                pygame.time.wait(speed)

        counter_games += 1
        print(f'Game {counter_games}      Score: {game.score}')
        high_score = game.high_score

        score_plot.append(game.score)
        counter_plot.append(counter_games)

        agent.replay_memory(params['batch_size'])

    agent.model.save_weights(params['weights_path'])
    pygame.quit()
    plot_seaborn(counter_plot, score_plot)
Example #5
0
def run():
    pygame.init()
    agent = DQNAgent()
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < 150:
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent)
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            agent.epsilon = 80 - counter_games
            state_old = agent.get_state(game, player1, food1)
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)[0]
            else:
                prediction = agent.model.predict(state_old.reshape((1, 11)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)[0]
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)
            reward = agent.set_reward(player1, game.crash)
            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     game.crash)
            agent.remember(state_old, final_move, reward, state_new,
                           game.crash)
            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
Example #6
0
def train(epoch=10):
    pygame.init()
    agent = DQNAgent(output_dim=5)
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < epoch:
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player1
        player2 = game.player2
        field0 = game.field


        # Perform first move
        initialize_game(player1, player2, game, field0, agent)
        if display_option:
            display(player1, player2, field0, game, record)

        game_epoch = 0
        while not game.crash:
            #agent.epsilon is set to give randomness to actions
            agent.epsilon = 50 - game_epoch

            train_each_epoch(agent, game, field0, player1, [player2], game_epoch)
            train_each_epoch(agent, game, field0, player2, [player1], game_epoch)

            record = get_record(game.player1.score, game.player2.score, record)
            if display_option:
                display(player1, player2, field0, game, record)
                pygame.time.wait(speed)
            
            game_epoch += 1
            game.crash = not (game.player1.display or game.player2.display)

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.player1.score, game.player2.score)
        score_plot.append(game.player1.score)
        counter_plot.append(counter_games)
        print(counter_plot)
    agent.model.save_weights('weights_multi.hdf5')
    plot_seaborn(counter_plot, score_plot)
Example #7
0
def play(display_on, speed, params):
    pygame.init()
    pygame.font.init()

    agent = DQNAgent(params)
    agent.epsilon = 0

    counter_games = 0
    high_score = 0
    score_plot = []
    counter_plot = []

    while counter_games < params['episodes']:
        game = Game(440, 440, high_score)

        if display_on:
            game.update_display()

        while not game.crash:
            if handle_game_event(game):
                return

            state = game.get_state()
            move = agent.get_move(state)

            game.do_move(move)

            if display_on:
                game.update_display()
                pygame.time.wait(speed)

        counter_games += 1
        print(f'Game {counter_games}      Score: {game.score}')
        high_score = game.high_score

        score_plot.append(game.score)
        counter_plot.append(counter_games)

    pygame.quit()
    plot_seaborn(counter_plot, score_plot)
Example #8
0
def run(episodes, load_weights, display_option, speed):
    pygame.init()
    agent = DQNAgent()
    weights_filepath = os.path.join(os.getcwd(), WEIGHTS_FILENAME)
    if load_weights and os.path.isfile(weights_filepath):
        agent.model.load_weights(weights_filepath)

    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < episodes:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent)
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            # agent.epsilon is set to give randomness to actions
            agent.epsilon = 80 - counter_games

            # get old state
            state_old = agent.get_state(game, player1, food1)

            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 11)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            # set treward for the new state
            reward = agent.set_reward(player1, game.crash)

            # train short memory base on the new action and state
            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     game.crash)

            # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new,
                           game.crash)
            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        print(f'Game {counter_games}      Score: {game.score}')
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    agent.model.save_weights(WEIGHTS_FILENAME)
    plot_seaborn(counter_plot, score_plot)
Example #9
0
def run(params):
    """
    Run the DQN algorithm, based on the parameters previously set.   
    """
    pygame.init()
    agent = DQNAgent(params)
    agent = agent.to(DEVICE)
    agent.optimizer = optim.Adam(agent.parameters(), weight_decay=0, lr=params['learning_rate'])
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    total_score = 0
    while counter_games < params['epoch']:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Pierwszy ruch
        initialize_game(player1, game, food1, agent, params['batch_size'])
        if params['display']:
            display(player1, food1, game, record)
        
        steps = 0       # Ruchy od ostatniej otrzymanej nagrody
        while (not game.crash) and (steps < 100):
            if not params['train']:
                agent.epsilon = 0.01
            else:
                # agent.epsilon dla losowosci akcji
                agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear'])

            # Otrzymaj stary stan
            state_old = agent.get_state(game, player1, food1)

            # Wykonuj losowe akcje na podstawie agent.epsilon albo na podstawie starych
            if random.uniform(0, 1) < agent.epsilon:
                final_move = np.eye(3)[randint(0,2)]
            else:
                # Predykcja na podstawie starych akcji
                with torch.no_grad():
                    state_old_tensor = torch.tensor(state_old.reshape((1, 11)), dtype=torch.float32).to(DEVICE)
                    prediction = agent(state_old_tensor)
                    final_move = np.eye(3)[np.argmax(prediction.detach().cpu().numpy()[0])]

            # Wykonaj nowy ruch i otrzymaj nowy stan
            player1.do_move(final_move, player1.x, player1.y, game, food1, agent)
            state_new = agent.get_state(game, player1, food1)

            # Okresl nagrode dla nowego ruchu
            reward = agent.set_reward(player1, game.crash)
            
            # Kiedy zje, zeruj kroki
            if reward > 0:
                steps = 0
                
            if params['train']:
                # Trenuj pamiec krotka na podstawie nowych ruchow
                agent.train_short_memory(state_old, final_move, reward, state_new, game.crash)
                # Zapisz pamiec nowych ruchow do pamieci dlugiej
                agent.remember(state_old, final_move, reward, state_new, game.crash)

            record = get_record(game.score, record)
            if params['display']:
                display(player1, food1, game, record)
                pygame.time.wait(params['speed'])
            steps+=1
        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        counter_games += 1
        total_score += game.score
        print(f'Game {counter_games}      Score: {game.score}')
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    mean, stdev = get_mean_stdev(score_plot)
    if params['train']:
        model_weights = agent.state_dict()
        torch.save(model_weights, params["weights_path"])
    if params['plot_score']:
        plot_seaborn(counter_plot, score_plot, params['train'])
    return total_score, mean, stdev
Example #10
0
def main():
    # Initialisation du reseau
    pygame.init()
    agent = DQNAgent()
    score_plot = []
    counter_plot = []
    record = 0
    taille = 30  # Nombre de cellules = taille * taille
    init_ihm()  # Initialization IHM
    max_iteration = 10  # Nombre de parties
    cpt_iteration = 0  # Compteur de parties
    best_score = 0  # Meilleur score
    rep_lines_bestScore = []  # Liste des lignes du meilleur score
    while (cpt_iteration < max_iteration):
        game = Game(taille)  # Initialisation du jeu
        nbr_lignes_crees = 0  # Nombre de lignes crées
        game.rep_lines.clear()  # Mettre la liste des lignes crées à 0
        game.calculer_lignes_jouables(
        )  # Calculer les lignes possibles d'etres jouées
        while not game.crash:  # Tant qu'il reste encore des lignes possibles à jouer
            agent.epsilon = 80 - cpt_iteration
            state_old = agent.get_state(game)  # get old state
            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:  # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 13)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)
            if np.array_equal(final_move, [1, 0, 0]):
                tmp_line = game.rep_playable_lines[random.randint(
                    0, int((len(game.rep_playable_lines) - 1) / 3))]
            elif np.array_equal(final_move, [0, 1, 0]):
                tmp_line = game.rep_playable_lines[random.randint(
                    int((len(game.rep_playable_lines) - 1) / 3),
                    int((len(game.rep_playable_lines) - 1) * 2 / 3))]
            elif np.array_equal(final_move, [0, 0, 1]):
                tmp_line = game.rep_playable_lines[random.randint(
                    int((len(game.rep_playable_lines) - 1) * 2 / 3),
                    int(len(game.rep_playable_lines) - 1))]
            game.jouer_ligne(
                tmp_line)  # Jouer la ligne tmp_line=[[cellule * 5],direction]
            game.rep_lines.append(
                tmp_line)  # Ajouter la ligne à la liste des lignes jouées
            game.calculer_lignes_jouables()  # Recalcule des lignes jouables
            best = 0
            if (len(game.rep_lines) > best_score):
                best_score = len(game.rep_lines)  # Modifier le meilleur score
                # Sauvegarder la liste des lignes du meilleur score
                rep_lines_bestScore = game.rep_lines
                best = 1
            if (len(game.rep_playable_lines) == 0):
                game.crash = True
            else:
                state_new = agent.get_state(game)
                reward = agent.set_reward(game.crash, best)
                #train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)
        print("Score: ", len(game.rep_lines), " -  Best score: ", best_score)
        score_plot.append(len(game.rep_lines))
        counter_plot.append(cpt_iteration)
        cpt_iteration += 1
        agent.replay_new(agent.memory)
    score_ihm(max_iteration, best_score)
    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
    afficher_lignes_ihm(rep_lines_bestScore)
Example #11
0
def run():
    agent1 = DQNAgent()
    agent2 = DQNAgent()
    counter_games = 0
    game_engine = Game()

    while counter_games < GAMES_COUNT:
        board = game_engine.get_init_board()
        start = timer()
        #set player for agents
        agent1.player = randint(1, 2)
        agent2.player = 1 if agent1.player == 2 else 2

        while not game_engine.is_finished(board):
            #agent.epsilon is set to give randomness to actions
            agent1.epsilon = EPSILON - counter_games
            agent2.epsilon = EPSILON - counter_games

            #get state
            state1 = agent1.get_state(board)
            state2 = agent2.get_state(board)

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, EPSILON * RANDOM_MOVES_PROPORTION) < agent1.epsilon:
                final_move1 = random.choice(agent1.possible_moves(state1))
            else:
                # predict action based on the state
                prediction = agent1.model.predict(state1)
                final_move1 = np.argmax(prediction[0])

            if randint(0, EPSILON * RANDOM_MOVES_PROPORTION) < agent2.epsilon:
                final_move2 = random.choice(agent2.possible_moves(state2))
            else:
                # predict action based on the state
                prediction = agent2.model.predict(state2)
                final_move2 = np.argmax(prediction[0])

            #perform new move and get new state
            board_new, changed_dir1 = game_engine.make_move(
                board, final_move1, agent1.player)
            board_new, changed_dir2 = game_engine.make_move(
                board_new, final_move2, agent2.player)
            board_new = game_engine.send_move(board_new)
            state_new1 = agent1.get_state(board_new)
            state_new2 = agent2.get_state(board_new)

            #set reward for the new state
            reward1 = agent1.get_reward(game_engine, board, board_new,
                                        changed_dir1)
            reward2 = agent2.get_reward(game_engine, board, board_new,
                                        changed_dir2)

            #train short memory base on the new action and state
            game_is_finished = game_engine.is_finished(board_new)
            agent1.train_short_memory(state1, final_move1, reward1, state_new1,
                                      game_is_finished)
            agent2.train_short_memory(state2, final_move2, reward2, state_new2,
                                      game_is_finished)

            # store the new data into a long term memory
            agent1.remember(state1, final_move1, reward1, state_new1,
                            game_is_finished)
            agent2.remember(state2, final_move2, reward2, state_new2,
                            game_is_finished)
            board = board_new

            if game_is_finished:
                if game_engine.is_win(board, agent1.player):
                    agent1.wins_count += 1
                if game_engine.is_win(board, agent2.player):
                    agent2.wins_count += 1
            print('.', end='', flush=True)

        agent1.replay_new()
        agent2.replay_new()
        counter_games += 1
        print('Finished')
        print('Game', counter_games)
        print('Time', timer() - start)
        print('Turns', board["turn"])
        print('Agent 1 wins', agent1.wins_count)
        print('Agent 2 wins', agent2.wins_count)

    # save trained model
    if agent1.wins_count > agent2.wins_count:
        agent1.model.save_weights('weights.hdf5')
    else:
        agent2.model.save_weights('weights.hdf5')
Example #12
0
def run(display_option, speed, params):
    pygame.init()
    agent1 = DQNAgent(params, 1)
    agent2 = DQNAgent(params, 2)
    weights_filepath1 = params['weights_path1']
    weights_filepath2 = params['weights_path2']

    counter_games = 0
    record1 = 0
    record2 = 0
    while counter_games < params['episodes']:
        if params['load_weights']:
            agent1.model.load_weights(weights_filepath1)
            agent2.model.load_weights(weights_filepath2)
            print("weights loaded")
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(900, 600)
        player1 = Player(game, 300, 300)
        player2 = Player(game, 600, 300)

        # Perform first move
        initialize_game(player1, player2, game, agent1, agent2,
                        params['batch_size'])
        if display_option:
            display(player1, player2, game, record1, record2)

        while not game.crash:
            if not params['train']:
                agent1.epsilon = 0
                agent2.epsilon = 0
            else:
                # agent.epsilon is set to give randomness to actions
                agent1.epsilon = 1 - (counter_games *
                                      params['epsilon_decay_linear'])
                agent2.epsilon = 1 - (counter_games *
                                      params['epsilon_decay_linear'])

            # get old state
            state_old1 = agent1.get_state(game, player1, player2)
            state_old2 = agent2.get_state(game, player2, player1)

            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 1) < agent1.epsilon:
                final_move1 = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction1 = agent1.model.predict(state_old1.reshape((1, 7)))
                final_move1 = to_categorical(np.argmax(prediction1[0]),
                                             num_classes=3)

            if randint(0, 1) < agent2.epsilon:
                final_move2 = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction2 = agent2.model.predict(state_old2.reshape((1, 7)))
                final_move2 = to_categorical(np.argmax(prediction2[0]),
                                             num_classes=3)

            # perform new move and get new state
            player1.do_move(final_move1, player1.x, player1.y, game, agent1,
                            player2)
            player2.do_move(final_move2, player2.x, player2.y, game, agent2,
                            player1)
            state_new1 = agent1.get_state(game, player1, player2)
            state_new2 = agent2.get_state(game, player2, player1)
            # set reward for the new state
            reward1 = agent1.set_reward(player1, player1.crash)
            reward2 = agent2.set_reward(player2, player2.crash)

            if params['train']:
                # train short memory base on the new action and state
                agent1.train_short_memory(state_old1, final_move1, reward1,
                                          state_new1, player1.crash)
                # store the new data into a long term memory
                agent1.remember(state_old1, final_move1, reward1, state_new1,
                                player1.crash)
                agent2.train_short_memory(state_old2, final_move2, reward2,
                                          state_new2, player2.crash)
                # store the new data into a long term memory
                agent2.remember(state_old2, final_move2, reward2, state_new2,
                                player2.crash)
            game.score1 += reward1
            game.score2 += reward2
            record1 = get_record(game.score1, record1)
            record2 = get_record(game.score2, record2)
            if display_option:
                display(player1, player2, game, record1, record2)
                pygame.time.wait(speed)
            if player1.crash and player2.crash:
                game.crash = True

        counter_games += 1
        game.crash = False
        player1.crash = False
        player2.crash = False
        print("score1: " + str(game.score1) + "/n")
        print("score2: " + str(game.score2) + "/n")
        if params['train'] and counter_games % 10 == 0:
            agent1.model.save_weights(params['weights_path1'])
            agent2.model.save_weights(params['weights_path2'])
            print("weights saved")
    if params['train']:
        agent1.model.save_weights(params['weights_path1'])
        agent2.model.save_weights(params['weights_path2'])
Example #13
0
def run(display_option, speed, params):
    pygame.init()
    agent = DQNAgent(params)
    weights_filepath = params['weights_path']
    if params['load_weights']:
        agent.model.load_weights(weights_filepath)
        print("weights loaded")

    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < params['episodes']:
        logger.info("===========================")
        logger.info(f"{info_string}")

        time_start = time.time()
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food
        # Perform first move
        initialize_game(player1, game, food1, agent, params['batch_size'])

        if display_option == True:
            display(player1, food1, game, record)

        time_start_game_update = time.time()
        while not game.crash:
            time_start_game_update_pygame = time.time()
            if not params['train']:
                agent.epsilon = 0
            else:
                # agent.epsilon is set to give randomness to actions
                if agent.epsilon <= params['min_epsilon']:
                    agent.epsilon = params['min_epsilon']
                else:
                    agent.epsilon = 1 - (counter_games *
                                         params['epsilon_decay_linear'])

            # get old state
            state_old, vision = agent.get_state(game, player1, food1)

            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 1) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(
                    state_old.reshape((1, params['num_input_features'])))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new, vision = agent.get_state(game, player1, food1)

            # set reward for the new state
            reward = agent.set_reward(player1, game.crash)
            time_end_game_update_pygame = time.time()

            time_start_game_update_train = time.time()

            if params['train']:
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)

            time_end_game_update_train = time.time()

            time_start_game_update_record = time.time()
            record = get_record(game.score, record)
            time_end_game_update_record = time.time()

            logger.debug("Pygame update step: " +
                         str((time_end_game_update_pygame -
                              time_start_game_update_pygame)))
            logger.debug("Train short term update step: " +
                         str((time_end_game_update_train -
                              time_start_game_update_train)))
            logger.debug("Record score  step: " +
                         str((time_end_game_update_record -
                              time_start_game_update_record)))

            if display_option == True:
                cv2.imshow("Vision of the Snake", vision * 255.0)

                # detect any kepresses
                key = cv2.waitKey(1) & 0xFF
                # if the `q` key was pressed, break from the loop
                if key == ord("q"):
                    break
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        # # Pause visualisation if crash
        # if display_option==True:
        #     cv2.imshow("Vision of the Snake", vision * 255.0)
        #
        #     # detect any kepresses
        #     key = cv2.waitKey(1) & 0xFF
        #     # if the `q` key was pressed, break from the loop
        #     if key == ord("q"):
        #         break
        #     display(player1, food1, game, record)
        #     pygame.time.wait(5000)
        time_end_game_update = time.time()
        logger.info(
            "Time to play one game: " +
            str(round((time_end_game_update - time_start_game_update), 3)))

        time_start_long_term = time.time()
        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        time_end_long_term = time.time()
        logger.info("Train long term update step: " +
                    str(round((time_end_long_term - time_start_long_term), 3)))

        if agent.epsilon <= params['min_epsilon']:
            agent.epsilon = params['min_epsilon']
        else:
            agent.epsilon = 1 - (counter_games *
                                 params['epsilon_decay_linear'])
        logger.info(f'The epsilon value is: {agent.epsilon}')

        logger.debug("===========================")

        counter_games += 1
        logger.info(f'Game {counter_games}      Score: {game.score}')
        logger.info(f'The agent memory length is: {len(agent.memory)}')

        score_plot.append(game.score)
        counter_plot.append(counter_games)
        if params['train'] and counter_games % 100 == 0:
            agent.model.save_weights(params['weights_path'])
            logger.info("===========SAVING THE MODEL================")
            with open(params['memory_path'], 'wb') as handle:
                pickle.dump(agent.memory, handle)
        logger.info("End Game Loop")
        time_end = time.time()
        epoch_timer = round((time_end - time_start), 3)
        logger.info(f"One epoch takes: {epoch_timer} seconds")
        eta_prediction = round(
            (params['episodes'] - counter_games) * epoch_timer / 60)
        logger.info(f"Time remaining is: {eta_prediction} minutes")

    if params['train']:
        agent.model.save_weights(params['weights_path'])
        with open(params['memory_path'], 'wb') as handle:
            pickle.dump(agent.memory, handle)
        params['counter_plot'] = counter_plot
        params['score_plot'] = score_plot
        with open(params['params_path'], 'wb') as handle:
            pickle.dump(params, handle)
def main():
    # Initialisation du reseau
    agent = DQNAgent()
    taille = 30  # Nombre de cellules = taille * taille
    game = Game(taille)  # Initialisation du jeu
    init_ihm(game)  # Initialization IHM
    max_iteration, cpt_iteration = 2, 0  # Nombre de parties à jouer et Compteur de parties
    rep_lines_bestScore, score_plot, counter_plot = [], [], [
    ]  # scores, numéros de parties, lignes du meilleur score
    while (cpt_iteration < max_iteration):
        game = Game(taille)  # Initialisation du jeu
        game.calculer_lignes_jouables(
        )  # Calculer les lignes possibles d'etres jouées
        while not game.crash:  # Tant qu'il reste encore des lignes possibles à jouer
            agent.epsilon = 80 - cpt_iteration
            state_old = agent.get_state(game)  # get old state
            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:  # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 13)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)
            if np.array_equal(final_move, [1, 0, 0]):
                tmp_line = game.rep_playable_lines[random.randint(
                    0, int((len(game.rep_playable_lines) - 1) / 3))]
            elif np.array_equal(final_move, [0, 1, 0]):
                tmp_line = game.rep_playable_lines[random.randint(
                    int((len(game.rep_playable_lines) - 1) / 3),
                    int((len(game.rep_playable_lines) - 1) * 2 / 3))]
            elif np.array_equal(final_move, [0, 0, 1]):
                tmp_line = game.rep_playable_lines[random.randint(
                    int((len(game.rep_playable_lines) - 1) * 2 / 3),
                    int(len(game.rep_playable_lines) - 1))]
            game.jouer_ligne(
                tmp_line)  # Jouer la ligne tmp_line=[[cellule * 5],direction]
            game.rep_lines.append(
                tmp_line)  # Ajouter la ligne à la liste des lignes jouées
            game.calculer_lignes_jouables()  # Recalcule des lignes jouables
            best = 0
            if cpt_iteration == 0 or len(game.rep_lines) > max(score_plot):
                rep_lines_bestScore = game.rep_lines  # Sauvegarder la liste des lignes du meilleur score
                best = 1
            if (len(game.rep_playable_lines) == 0
                ):  # s'il reste plus de ligne jouable
                game.crash = True
            else:
                state_new = agent.get_state(game)
                reward = agent.set_reward(game.crash, best)
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)
        if cpt_iteration == 0 or len(game.rep_lines) > max(
                score_plot):  # Si c'est le meilleur score atteint
            rep_lines_bestScore = game.rep_lines  # Sauvegarder la liste des lignes du meilleur score
        score_plot.append(len(
            game.rep_lines))  # Ajouter le score à la liste des scores
        cpt_iteration += 1
        print(cpt_iteration, "-Score: ", len(game.rep_lines),
              " -  Best score: ", max(score_plot))
        counter_plot.append(cpt_iteration)  # ajouter le numéro de la partie
        agent.replay_new(agent.memory)
    agent.model.save_weights('weights.hdf5')
    afficher_lignes_ihm(rep_lines_bestScore, max_iteration, max(score_plot))
    plot_seaborn(counter_plot, score_plot)
    mainloop()
Example #15
0
def run(params):
    """
    Run the DQN algorithm, based on the parameters previously set.   
    """
    pygame.init()
    agent = DQNAgent(params)
    agent = agent.to(DEVICE)
    agent.optimizer = optim.Adam(agent.parameters(),
                                 weight_decay=0,
                                 lr=params['learning_rate'])
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    total_score = 0
    while counter_games < params['episodes']:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent, params['batch_size'])
        if params['display']:
            display(player1, food1, game, record)

        while not game.crash:
            if not params['train']:
                agent.epsilon = 0.01
            else:
                # agent.epsilon is set to give randomness to actions
                agent.epsilon = 1 - (counter_games *
                                     params['epsilon_decay_linear'])

            # get old state
            state_old = agent.get_state(game, player1, food1)

            # perform random actions based on agent.epsilon, or choose the action
            if random.uniform(0, 1) < agent.epsilon:
                final_move = np.eye(3)[randint(0, 2)]
            else:
                # predict action based on the old state
                with torch.no_grad():
                    state_old_tensor = torch.tensor(
                        state_old.reshape(
                            (1, 11)), dtype=torch.float32).to(DEVICE)
                    prediction = agent(state_old_tensor)
                    final_move = np.eye(3)[np.argmax(
                        prediction.detach().cpu().numpy()[0])]

            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            # set reward for the new state
            reward = agent.set_reward(player1, game.crash)

            if params['train']:
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)

            record = get_record(game.score, record)
            if params['display']:
                display(player1, food1, game, record)
                pygame.time.wait(params['speed'])
        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        counter_games += 1
        total_score += game.score
        print(f'Game {counter_games}      Score: {game.score}')
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    mean, stdev = get_mean_stdev(score_plot)
    if params['train']:
        model_weights = agent.state_dict()
        torch.save(model_weights, params["weights_path"])
    if params['plot_score']:
        plot_seaborn(counter_plot, score_plot, params['train'])
    return total_score, mean, stdev
Example #16
0
def run(display_option, speed, params):
    if display_option:
        pygame.init()
    agent = DQNAgent(params)

    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < params['episodes']:
        # Initialize classes
        game = Game(440, 440, display_option)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent, params['batch_size'])
        if display_option:
            display(player1, food1, game, record)
        step_count = 0
        raw_data = [] if params['raw_output'] is not None else None
        while not game.crash:
            if display_option:
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        pygame.quit()
                        quit()
            if not params['train']:
                agent.epsilon = 0
            else:
                # agent.epsilon is set to give randomness to actions
                agent.epsilon = 1 - (counter_games *
                                     params['epsilon_decay_linear'])

            # get old state
            state_old = agent.get_state(game, player1, food1)

            # perform random actions based on agent.epsilon, or choose the action
            if random() < agent.epsilon or random() < (step_count - 300) / 300:
                prediction = [0, 0, 0]
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 11)))[0]
                final_move = to_categorical(np.argmax(prediction),
                                            num_classes=3)
            if raw_data is not None:
                step_data = {
                    'head_x': player1.x,
                    'head_y': player1.y,
                    'food_x': food1.x_food,
                    'food_y': food1.y_food,
                    'snake_position': copy.deepcopy(player1.position),
                    'snake_x_change': player1.x_change,
                    'snake_y_change': player1.y_change,
                    'action': final_move.tolist()
                }
            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            game.player.action = final_move
            state_new = agent.get_state(game, player1, food1)

            # set reward for the new state
            reward = agent.set_reward(player1, game.crash)
            if raw_data is not None:
                step_data['eaten'] = player1.eaten
                step_data['reward'] = reward
                step_data['crash'] = game.crash
                step_data['next_state'] = {
                    'head_x': player1.x,
                    'head_y': player1.y,
                    'food_x': food1.x_food,
                    'food_y': food1.y_food,
                    'snake_position': copy.deepcopy(player1.position),
                    'snake_x_change': player1.x_change,
                    'snake_y_change': player1.y_change,
                }
                raw_data.append(step_data)

            if params['verbose']:
                print(prediction, final_move, reward, step_count)

            if params['train']:
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)

            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)
            step_count += 1
        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        counter_games += 1
        print(
            f'Game {counter_games}/{step_count}/{agent.epsilon}      Score: {game.score}'
        )
        if raw_data is not None:
            fn_index = params['raw_output_index'] + counter_games
            with open(os.path.join(params['raw_output'], f'{fn_index}.json'),
                      'w') as out:
                json.dump(raw_data, out)
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    if params['train']:
        agent.model.save_weights(params['weights_path'])
    plot_seaborn(counter_plot, score_plot)
Example #17
0
def gameLoop():
    game_over = False
    games = 0
    games_to_play = 300
    highscore = 0
    score_plot = []
    games_plot = []
    agent = DQNAgent()

    while games < games_to_play:  # Play a total of x games
        xpos = dis_width / 2  # X Spawn point coordinate
        ypos = dis_height / 2  # Y Spawn point coordinate

        xdir = 0
        ydir = 0

        snake_List = []
        length_of_snake = 1

        food = random_food(snake_List)
        xfood = food[0]
        yfood = food[1]

        while not game_over:
            agent.epsilon = 80 - games
            # agent.epsilon = 0

            state_old = agent.get_state(xpos, ypos, xdir, ydir, snake_block,
                                        xfood, yfood, dis_width, dis_height,
                                        snake_List)

            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                prediction = agent.model.predict(state_old.reshape((1, 11)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)
            # Do Action -------------------------------------------------------------
            if np.array_equal(final_move, [1, 0, 0]):
                pass
            elif np.array_equal(
                    final_move,
                [0, 1, 0]) and ydir is 0:  # right - going horizontal
                ydir = snake_block
                xdir = 0
            elif np.array_equal(
                    final_move,
                [0, 1, 0]) and xdir is 0:  # right - going vertical
                xdir = snake_block
                ydir = 0
            elif np.array_equal(
                    final_move,
                [0, 0, 1]) and ydir is 0:  # left - going horizontal
                ydir = -snake_block
                xdir = 0
            elif np.array_equal(
                    final_move,
                [0, 0, 1]) and xdir is 0:  # left - going vertical
                xdir = -snake_block
                ydir = 0
            # Did Action -------------------------------------------------------------
            # Update Frame after Action ----------------------------------------------
            eaten = False
            # If collide with border
            if xpos == dis_width - snake_block and xdir > 0 or xpos == 0 and xdir < 0 or ypos == dis_height - snake_block and ydir > 0 or ypos == 0 and ydir < 0:
                game_over = True
            xpos += xdir
            ypos += ydir
            dis.fill(blue)
            pygame.draw.rect(dis, green,
                             [xfood, yfood, snake_block, snake_block])
            snake_Head = [xpos, ypos]
            snake_List.append(snake_Head)
            if len(snake_List) > length_of_snake:
                del snake_List[0]

            for x in snake_List[:-1]:
                if x == snake_Head:
                    game_over = True

            draw_snake(snake_block, snake_List)
            highscore = get_highscore(highscore, length_of_snake - 1)
            your_score(highscore, length_of_snake - 1)

            if xpos == xfood and ypos == yfood:
                eaten = True
                food = random_food(snake_List)
                xfood = food[0]
                yfood = food[1]
                length_of_snake += 1
            # Updated Frame after Action ---------------------------------------------

            state_new = agent.get_state(xpos, ypos, xdir, ydir, snake_block,
                                        xfood, yfood, dis_width, dis_height,
                                        snake_List)
            reward = agent.set_reward(game_over, eaten)

            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     game_over)

            agent.remember(state_old, final_move, reward, state_new, game_over)

            pygame.display.update()
            clock.tick(snake_speed)

        print("Game:", games + 1, "Score:", length_of_snake - 1, "Highscore:",
              highscore)
        agent.replay_new(agent.memory)
        games += 1
        score_plot.append(length_of_snake - 1)
        games_plot.append(games)
        game_over = False

    agent.model.save_weights('weights10x10V3.hdf5')
    pygame.quit()
    # Plot stats of game:
    sns.set(color_codes=True)
    ax = sns.regplot(x=games_plot, y=score_plot)
    ax.set(xlabel='games', ylabel='score')
    plt.show()
    quit()
Example #18
0
def run(params):
    pygame.init()
    agent = DQNAgent(params)
    print_info(params)
    weights_filepath = params['weights_path']
    if params['load_weights']:
        agent.model.load_weights(weights_filepath)
        print("Weights Loaded")

    else:
        print("Training From Scratch...")
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < params['episodes']:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Class Objects
        game = Game(params)
        player1 = game.player
        food1 = game.food

        total_reward = -150
        total_reward2 = 0
        # First Move
        initialize_game(player1, game, food1, agent, params['batch_size'],
                        counter_games)
        if params['display']:
            display(player1, food1, game, record, counter_games, total_reward,
                    params)
        while not game.crash:
            if not params['train']:
                agent.epsilon = 0

            else:
                # agent.epsilon is set to give randomness to actions
                agent.epsilon = 1 - (
                    (counter_games) * params['epsilon_decay_linear'])

            # old State
            state_old = agent.get_state(game, player1, food1, params)

            # Random Actions or Choose
            if randint(0, 1) < agent.epsilon and not params['load_weights']:
                final_move = to_categorical(randint(0, 2), num_classes=3)

            # Prediction
            else:
                prediction = agent.model.predict(state_old.reshape((1, 20)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1, params)
            reward = agent.set_reward(player1, game.crash, food1,
                                      counter_games, final_move)
            total_reward += reward
            total_reward2 += reward
            total_reward = round(total_reward, 2)
            total_reward2 = round(total_reward2, 2)

            if params['train']:
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)

            record = get_record(game.score, record)
            if params['display']:
                display(player1, food1, game, record, counter_games,
                        total_reward, params)

        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        counter_games += 1
        print(
            f'Game {counter_games}      Score: {game.score}    Reward: {total_reward2}'
        )
        score_plot.append(game.score)
        counter_plot.append(counter_games)
        if counter_games % params['cf'] == 0:
            n = int(counter_games / params['cf'])
            agent.model.save_weights(params['cp'] + str(n) + ".hdf5")
            print("Checkpoint Saved...")

    plot_seaborn(counter_plot, score_plot)
    pygame.quit()
    quit()
def main():
    # Initialisation du reseau
    agent = DQNAgent()
    taille = 30  # Nombre de cellules = taille * taille
    game = Game(taille)  # Initialisation du jeu
    init_ihm(game)  # Initialization IHM
    max_iteration, cpt_iteration = 200, 0  # Nombre de parties à jouer et Compteur de parties
    rep_lines_bestScore, score_plot, counter_plot = [], [], [
    ]  # scores, numéros de parties, lignes du meilleur score
    while (cpt_iteration < max_iteration):
        game = Game(taille)  # Initialisation du jeu
        game.calculer_lignes_jouables(
        )  # Calculer les lignes possibles d'etres jouées
        if len(game.rep_playable_lines
               ) > 1:  # choisir une ligne parmis les lignes jouable au hasard
            game.choosed_line = game.rep_playable_lines[random.randint(
                0,
                len(game.rep_playable_lines) - 1)]
        elif len(game.rep_playable_lines) != 0:
            game.choosed_line = game.rep_playable_lines[0]
        while not game.crash and game.cpt_liberte < game.liberte:  # Tant qu'il reste encore des lignes possibles à jouer
            agent.epsilon = 120 - cpt_iteration
            state_old = agent.get_state(game)  # get old state
            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 4), num_classes=8)
            else:  # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 80)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=8)
            game.move = final_move
            vect_move = []  # liste des sorties possibles
            vect_squar = [[-1, -1], [-1, 0], [-1, 1], [0, 1], [1, 1], [1, 0],
                          [1, -1], [0, -1]]
            for i in range(len(vect_squar)):
                vect_move.append([0, 0, 0, 0, 0, 0, 0,
                                  0])  # initialisation des sorties
            for i in range(len(vect_squar)):
                vect_move[i][i] = 1  # Matrice identité 8*8
            for i in range(len(vect_move)):
                if vect_move[i] == final_move.tolist(
                ):  # à quel sortie correspond le final_move
                    game.x_player = int(
                        game.x_player + vect_squar[i][0]
                    )  # deplacer le jouer d'après la sortie indiqué par final_move
                    game.y_player = int(game.y_player + vect_squar[i][1])
                    game.move = vect_move[i]  # sauvegarder mouvememnt
            game.found = False
            for i in range(
                    len(game.rep_playable_lines) - 1
            ):  # si une ligne jouable commence par la position du joueur
                if len(game.rep_playable_lines
                       ) != 0 and game.rep_playable_lines[i][0][
                           0].x == game.x_player and game.rep_playable_lines[
                               i][0][0].y == game.y_player:
                    game.found = True
                    game.jouer_ligne(
                        game.rep_playable_lines[i]
                    )  # Jouer la ligne tmp_line=[[cellule * 5],direction]
                    game.rep_lines.append(
                        game.rep_playable_lines[i]
                    )  # Ajouter la ligne à la liste des lignes jouées
                    game.calculer_lignes_jouables(
                    )  # Recalcule des lignes jouables
                    if cpt_iteration == 0 or len(
                            game.rep_lines) > max(score_plot):
                        rep_lines_bestScore = game.rep_lines  # Sauvegarder la liste des lignes du meilleur score
                    state_new = agent.get_state(game)
                    reward = agent.set_reward(len(game.rep_playable_lines),
                                              game.found)
                    # train short memory base on the new action and state
                    agent.train_short_memory(state_old, final_move, reward,
                                             state_new, game.crash)
                    # store the new data into a long term memory
                    agent.remember(state_old, final_move, reward, state_new,
                                   game.crash)
                    game.cpt_liberte = 0
                    break
            if len(game.rep_playable_lines
                   ) == 0:  # s'il reste plus de lignes jouables
                game.crash = True
                state_new = agent.get_state(game)
                reward = agent.set_reward(0, game.found)
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)
            elif game.cpt_liberte > game.liberte - 2 and not game.found:
                game.x_player = 13  # retourr à la case de départ
                game.y_player = 13
                game.crash = True
                game.found = False
                state_new = agent.get_state(game)
                reward = agent.set_reward(0, game.found)
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)
            elif game.found == False:
                game.cpt_liberte += 1
        if cpt_iteration == 0 or len(game.rep_lines) > max(
                score_plot):  # Si c'est le meilleur score atteint
            rep_lines_bestScore = game.rep_lines  # Sauvegarder la liste des lignes du meilleur score
        score_plot.append(len(
            game.rep_lines))  # Ajouter le score à la liste des scores
        cpt_iteration += 1  # augmenter le nombre de partie
        print(cpt_iteration, "-Score: ", len(game.rep_lines),
              " -  Best score: ", max(score_plot))
        counter_plot.append(cpt_iteration)  # ajouter le numéro de la partie
        agent.replay_new(agent.memory)
    afficher_lignes_ihm(
        rep_lines_bestScore, max_iteration,
        max(score_plot))  # afficher dans l'ihm les lignes du meilleur score
    plot_seaborn(counter_plot,
                 score_plot)  # afficher le shéma des scores selon les parties
    mainloop()
Example #20
0
def main():
    # Initialisieren aller Pygame-Module und
    # Fenster erstellen (wir bekommen eine Surface, die den Bildschirm repräsentiert).
    pygame.init()

    agent = DQNAgent()
    counter_games = 0
    record = 0
    while counter_games < 150:
        screen = pygame.display.set_mode((800, 600))

        # Titel des Fensters setzen, Mauszeiger nicht verstecken und Tastendrücke wiederholt senden.
        pygame.display.set_caption("Pygame-Tutorial: Animation")
        pygame.mouse.set_visible(1)
        pygame.key.set_repeat(1, 30)
        pygame.font.init()  # you have to call this at the start,
        myfont = pygame.font.SysFont('Comic Sans MS', 30)

        # Clock-Objekt erstellen, das wir benötigen, um die Framerate zu begrenzen.
        clock = pygame.time.Clock()

        # Wir erstellen eine Tilemap.
        map = Tilemap.Tilemap()

        event = AutoInput.AutoInput()

        # Die Schleife, und damit unser Spiel, läuft solange running == True.
        running = True
        max_steps_reached = False
        max_steps = 100
        step = 0
        max_score = map.player.pos_x
        max_score_evolution = []
        while running and not max_steps_reached:
            agent.epsilon = 80 - counter_games
            #get old state
            state_old = agent.get_state(map)
            map.player.pos_x_old = map.player.pos_x

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 7)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            # Framerate auf 30 Frames pro Sekunde beschränken.
            # Pygame wartet, falls das Programm schneller läuft.
            clock.tick(30)

            # screen Surface mit Schwarz (RGB = 0, 0, 0) füllen.
            screen.fill((198, 209, 255))

            map.handle_input(final_move)

            #continue jump animation after
            if map.player.isjump:
                map.player.jump()

            # Die Tilemap auf die screen-Surface rendern.
            map.render(screen)
            textsurface = myfont.render(
                "Game " + str(counter_games) + " Step " + str(step) +
                " Max Score " + str(max_score), False, (0, 0, 0))
            screen.blit(textsurface, (50, 50))

            #Print Hindernis onto map and check if there should be a new one
            if not map.isThereHindernis:
                map.createNewHindernis()
                map.isThereHindernis = True

            map.hindernis.move()
            map.hindernis.render(screen)
            map.checkHindernisOnMap()

            state_new = agent.get_state(map)

            crash = map.collisionDetection()

            #set treward for the new state
            reward = agent.set_reward(map.player, crash)
            #train short memory base on the new action and state
            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     running)

            # Inhalt von screen anzeigen
            pygame.display.flip()

            if map.player.pos_x > max_score:
                max_score = map.player.pos_x

            step += 1
            if step >= max_steps:
                max_steps_reached = True
                max_score_evolution.append(max_score)

        agent.remember(state_old, final_move, reward, state_new, running)
        #record = get_record(map.player.pos_x, record)
        #if display_option:
        #    #display(player1, food1, game, record)
        #    pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1

    agent.model.save_weights('weights.hdf5')
    sns.plot(max_score_evolution)
Example #21
0
    return s


if __name__ == '__main__':
    tetris = Tetris()
    genetic = None

    dict = {}
    for e in Figure:
        dict[e.name] = e.value

    state_size = 200
    action_size = 38
    agent = DQNAgent(10, action_size)
    agent.load("mreza-dqn.h5")
    agent.epsilon = 0.12
    done = False
    episodes = 0

    app = TetrisApp()
    ###############################################################
    ####################Deo bitan za tetris########################
    key_actions = {
        'ESCAPE': app.quit,
        'LEFT': lambda: app.move(-1),
        'RIGHT': lambda: app.move(+1),
        'DOWN': lambda: app.drop(True),
        'UP': app.rotate_stone,
        'p': app.toggle_pause,
        'RETURN': app.start_game,
        'SPACE': agent.save("mreza-dqn.h5")
Example #22
0
    # print(reward)
    agent.memoize(state_init1, action, reward, state_init2, game.game_over)
    agent.replay_new(agent.memory)


while num_games < 500:
    num_games += 1
    player = []
    game = Game(window_width, window_height)
    player.append(
        Player(game, 0.5 * game.window_width, 0.5 * game.window_height))
    food = Food(game)
    init(agent, game, player, food)
    while game.game_over == False:
        # for random moves
        agent.epsilon = epsilon - num_games
        old_state = agent.get_state(game, player, food)

        if random.randint(0, 200) < agent.epsilon:
            new_direction = to_categorical(random.randint(0, 4), num_classes=5)
        else:
            predict = agent.model.predict(old_state.reshape((1, 13)))
            new_direction = to_categorical(np.argmax(predict[0]),
                                           num_classes=5)
        # perform move
        event_handler(player, np.argmax(np.array(new_direction)))

        update_screen()

        new_state = agent.get_state(game, player, food)
Example #23
0
def run_game():
    FPS = 60

    # Initialize game, settings and create a screen object.
    pygame.init()
    fps_clock = pygame.time.Clock()
    ai_settings = Settings()

    # FOR THE DQN #

    agent = DQNAgent()
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0

    # FOR THE DQN #

    while counter_games < 150:

        # Create statistics.
        stats = GameStats(ai_settings)

        # Create game items.
        game_items = GameItems(ai_settings, stats)

        # Create a fleet of aliens.
        gf.create_fleet(ai_settings, game_items)
        played = False

        gf.start_new_game(ai_settings, stats, game_items)

        # Start the main loop for the game.
        while stats.game_active:
            stats.time_passed = fps_clock.tick(FPS) / 1000  # Time in seconds since previous loop.

            gf.check_events(ai_settings, stats, game_items)

            if stats.game_active:
                game_items.ship.update(stats)
                gf.update_bullets(ai_settings, stats, game_items)
                gf.update_aliens(ai_settings, stats, game_items)
                # FOR THE DQN #
                agent.epsilon = 80 - counter_games
                state_old = gf.get_state(ai_settings, stats, game_items)
                if randint(0, 200) < agent.epsilon:
                    final_move = to_categorical(randint(0, 3), num_classes=4)
                else:
                    # predict action based on the old state
                    prediction = agent.model.predict(state_old.reshape((1, 3536)))
                    final_move = to_categorical(np.argmax(prediction[0]), num_classes=4)

                # FOR THE DQN #

                # DQN #
                # perform new move and get new state
                gf.do_move(final_move, ai_settings, stats, game_items)


                state_new = gf.get_state(ai_settings, stats, game_items)

                # set reward for the new state
                reward = agent.set_reward(stats.score, stats.ships_left)

                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward, state_new, stats.game_active)

                # store the new data into a long term memory
                # TO:DO  agent.remember(state_old, final_move, reward, state_new, game.crash)
                # Get value of played game
                # TO:DO record = get_record(game.score, record)
                # DQN #
                
                
                played = True
            elif played:
                user = ask(game_items.screen)
                if len(user) > 0:
                    coll = connect_and_collect()
                    add_score(user , stats.score, coll)
                played = False

            # gf.update_screen(ai_settings, stats, game_items)


        # FOR THE DQN #
        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', stats.score)
        score_plot.append(stats.score)
        counter_plot.append(counter_games)
    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
Example #24
0
def run(display_option, speed, params):
    pygame.init()
    agent = DQNAgent(params)
    weights_filepath = params['weights_path']
    if params['load_weights']:
        agent.model.load_weights(weights_filepath)
        print("weights loaded")

    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < params['episodes']:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent, params['batch_size'])
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            if not params['train']:
                agent.epsilon = 0
            else:
                # agent.epsilon is set to give randomness to actions
                agent.epsilon = 1 - (counter_games *
                                     params['epsilon_decay_linear'])

            # get old state
            state_old = agent.get_state(game, player1, food1)

            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 1) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 11)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            # set reward for the new state
            reward = agent.set_reward(player1, game.crash)

            if params['train']:
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)

            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)
        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        counter_games += 1
        print(f'Game {counter_games}      Score: {game.score}')
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    if params['train']:
        agent.model.save_weights(params['weights_path'])
    plot_seaborn(counter_plot, score_plot)
Example #25
0
def run():
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    agent = DQNAgent(TEST_MODE)
    if TEST_MODE:
        pygame.init()
        pygame.font.init()
        print('Start testing the trained model ...')
    else:
        print('Start training ...')
    while counter_games < 150:
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent)
        if TEST_MODE:
            display(player1, food1, game, record)

        while not game.crash:
            #agent.epsilon is set to give randomness to actions
            agent.epsilon = 80 - counter_games

            #get old state
            state_old = agent.get_state(game, player1, food1)

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon and not TEST_MODE:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old)
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            #perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            #set treward for the new state
            reward = agent.set_reward(player1, game.crash)

            #train short memory base on the new action and state
            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     game.crash)

            # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new,
                           game.crash)
            record = get_record(game.score, record)
            if TEST_MODE:
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
def train(epoch=10):
    pygame.init()
    agent = DQNAgent(output_dim=3)
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < epoch:
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        field0 = game.field

        # Perform first move
        initialize_game(player1, game, field0, agent)
        if display_option:
            display(player1, field0, game, record)

        game_epoch = 0
        while not game.crash:
            #agent.epsilon is set to give randomness to actions
            agent.epsilon = 50 - game_epoch

            #get old state
            state_old = agent.get_state(game, player1, field0)

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, 100) < agent.epsilon:
                final_move = randint(0, 2)
                # print("random with prob {}".format(agent.epsilon))
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old)
                final_move = np.argmax(prediction[0])
                print("prediction : {}".format(prediction))

            # print("move: {} to position ({}, {})".format(final_move, player1.x, player1.y))

            #perform new move and get new state
            player1.do_move(final_move, field0, game)

            if game_epoch >= 19:
                # get new state
                state_new = agent.get_state(game, player1, field0)

                #set treward for the new state
                reward = agent.set_reward(player1, game.crash, final_move)

                #train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)

                # store the new data into a long term memory
                if_remember = False
                if game.crash:
                    agent.remember(state_old, final_move, reward, state_new,
                                   game.crash)
                    if_remember = True
                    # print("remember this move with reward {}".format(reward))
                elif final_move == 0 and randint(1, 20) < 20:
                    agent.remember(state_old, final_move, reward, state_new,
                                   game.crash)
                    if_remember = True
                    # print("remember this move with reward {}".format(reward))
                elif final_move != 0 and randint(1, 20) < 20:
                    agent.remember(state_old, final_move, reward, state_new,
                                   game.crash)
                    if_remember = True
                    # print("remember this move with reward {}".format(reward))
                print(
                    "actual move {} to ({}, {}) gets reward {} - remember {}".
                    format(final_move, player1.x, player1.y, reward,
                           if_remember))

                # explore other move
                if final_move == 0:  # no
                    # 1 left
                    explore_moves(game, field0, agent, player1, state_old, 1,
                                  max(0, player1.x - 1), player1.y)
                    # 2 right
                    explore_moves(game, field0, agent, player1, state_old, 2,
                                  min(player1.x + 1, 21), player1.y)
                elif final_move == 1:  # left
                    # 0 no
                    explore_moves(game, field0, agent, player1, state_old, 0,
                                  min(player1.x + 1, 21), player1.y)
                    # 2 right
                    explore_moves(game, field0, agent, player1, state_old, 2,
                                  min(player1.x + 2, 21), player1.y)
                elif final_move == 2:  # right
                    # 0 no
                    explore_moves(game, field0, agent, player1, state_old, 0,
                                  max(0, player1.x - 1), player1.y)
                    # 2 right
                    explore_moves(game, field0, agent, player1, state_old, 1,
                                  max(0, player1.x - 2), player1.y)

            record = get_record(game.score, record)
            if display_option:
                display(player1, field0, game, record)
                pygame.time.wait(speed)

            game_epoch += 1

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)

        if game.score >= record:
            agent.model.save_weights(modelFile + '/weights.hdf5')
    agent.model.save_weights(modelFile + '/weightsFinal.hdf5')
    plot_seaborn(counter_plot, score_plot)
Example #27
0
def run():
    # Workaround for the current problem of incopability to wirk with CUDA and TF -> using CPU this way in code
    import os
    import tensorflow as tf

    # os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
    os.environ['CUDA_VISIBLE_DEVICES'] = ''

    if tf.test.gpu_device_name():
        print('[DEBUG] GPU found')
    else:
        print("[DEBUG] No GPU found")
    # till here Workaround

    pygame.init()
    agent = DQNAgent()
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    # while counter_games < 150:
    while counter_games < 100:
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent)
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            #agent.epsilon is set to give randomness to actions
            agent.epsilon = 80 - counter_games

            #get old state
            state_old = agent.get_state(game, player1, food1)

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
                print(f"final_move: {final_move}")

                # from here random responses for starting the learning

                final_response = {"type": "endRound"}

                # till here random responses for starting the learning

            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 11)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            #perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            #set treward for the new state
            reward = agent.set_reward(player1, game.crash)

            #train short memory base on the new action and state
            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     game.crash)

            # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new,
                           game.crash)
            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)