예제 #1
0
def run():
    pygame.init()
    
    
    agent = DQNAgent()
    counter_games = 0
    score_plot = []
    counter_plot =[]
    record = 0
    
    while counter_games < 100:
        game = Game(width, height)
        pipe = game.pipe
        player = game.player
        while not game.crash:
            
            agent.epsilon = 60 - counter_games
            
            state_old = agent.get_state(game, player, pipe)
            
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 1), num_classes=2)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1,6)))
                final_move = to_categorical(np.argmax(prediction[0]), num_classes=2)
              
            player.Move(game,final_move)
            pipe.move_pipe(160)
            ##check_collision(player,pipe)
            
            check_score(pipe,game )
            
            reward = agent.set_reward(pipe, game.crash)
            
            state_new = agent.get_state(game, player, pipe)
            
            
            agent.train_short_memory(state_old, final_move, reward, state_new, game.crash)
            
             # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new, game.crash)
            
            
         
            
            display(game,player,pipe,record)
            if game.score == 20:
                game.crash = True;
        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)
        record = get_record(game.score, record)
        
    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
    pygame.quit()
예제 #2
0
def run():
    agent = DQNAgent(size)
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < games:
        # Initialize classes
        game = Game(size, size)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent)
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            #agent.epsilon is set to give randomness to actions
            agent.epsilon = (games * 0.4) - counter_games

            #get old state
            state_old = agent.get_state(game, player1, food1)

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, games) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, agent.size)))
                final_move = to_categorical(np.argmax(prediction[0]), num_classes=3)

            #perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1, agent)
            state_new = agent.get_state(game, player1, food1)

            #set reward for the new state
            reward = agent.set_reward(player1, game.crash)

            #train short memory base on the new action and state
            agent.train_short_memory(state_old, final_move, reward, state_new, game.crash)

            # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new, game.crash)
            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        score_plot.append(game.score)
        counter_plot.append(counter_games)
        print('Game', counter_games, ' Score:', game.score, 'Last 10 Avg:', np.mean(score_plot[-10:]))

    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
예제 #3
0
def run():
    pygame.init()
    agent = DQNAgent()
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < 150:
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent)
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            # agent.epsilon is set to give randomness to actions
            agent.epsilon = 80 - counter_games

            # get old state
            state_old = agent.get_state(game, player1, food1)

            # predict action based on the old state
            final_move = agent.predict(state_old)

            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            # set treward for the new state
            reward = agent.set_reward(player1, game.crash)

            # train short memory base on the new action and state
            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     game.crash)

            # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new,
                           game.crash)
            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)

    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
예제 #4
0
class QLearningBehaviour(Behaviour):
    def __init__(self,visualRange=3):
        self.agent = DQNAgent(visualRange)
        self.age = 0
        self.current_move = None
        self.current_input = None
    
    #def on_init(self,visualRange):

    def decide(self, input):
        self.current_input = input

        final_move = None

        epsilon = 150 - self.age
        if epsilon < 15:
            epsilon = 15
        
        if randint(0, 100) > epsilon:
            final_move = randint(0,5)
        else:
            #get old state
            #state_old = agent.get_state(game, player1, food1)
            state_old = np.asarray(input)

            prediction = self.agent.model.predict(input.reshape((1,-1)))
            #final_move = to_categorical(np.argmax(prediction[0]), num_classes=5)
            final_move = np.argmax(prediction[0])

        self.current_move = final_move
        self.age += 1
        print(final_move)
        return final_move

        #perform new move and get new state
        #self.do_move(final_move, self.x,self.y,agent)
        #state_new = agent.get_state(game, player1, food1)

    def feedback(self, reward, state):
        #set treward for the new state
        #reward = agent.set_reward(input, move,reward)
        state_new = np.asarray(state)
        #train short memory base on the new action and state
        state_old = self.current_input #
        final_move = to_categorical(self.current_move, num_classes=6)
        self.agent.train_short_memory(state_old, final_move, reward, state_new)

        # store the new data into a long term memory
        self.agent.remember(state_old, final_move, reward, state_new)
        #record = get_record(game.score, record)
        #if display_option:
        #    display(player1, food1, game, record)
        #    pygame.time.wait(speed)

        self.agent.replay_new(self.agent.memory) #???
예제 #5
0
def train(display_on, speed, params):
    pygame.init()
    pygame.font.init()

    agent = DQNAgent(params)

    counter_games = 0
    high_score = 0
    score_plot = []
    counter_plot = []

    while counter_games < params['episodes']:
        game = Game(440, 440, high_score)

        if display_on:
            game.update_display()

        while not game.crash:
            if handle_game_event(game):
                return

            # agent.epsilon is set to give randomness to actions
            agent.epsilon = 1 - (counter_games *
                                 params['epsilon_decay_linear'])

            state = game.get_state()
            move = agent.get_move(state)
            game.do_move(move)

            new_state = game.get_state()
            reward = get_reward(game)

            # train short memory base on the new action and state
            agent.train_short_memory(state, move, reward, new_state,
                                     game.crash)

            agent.remember(state, move, reward, new_state, game.crash)

            if display_on:
                game.update_display()
                pygame.time.wait(speed)

        counter_games += 1
        print(f'Game {counter_games}      Score: {game.score}')
        high_score = game.high_score

        score_plot.append(game.score)
        counter_plot.append(counter_games)

        agent.replay_memory(params['batch_size'])

    agent.model.save_weights(params['weights_path'])
    pygame.quit()
    plot_seaborn(counter_plot, score_plot)
예제 #6
0
def run():
    pygame.init()
    agent = DQNAgent()
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < 150:
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent)
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            agent.epsilon = 80 - counter_games
            state_old = agent.get_state(game, player1, food1)
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)[0]
            else:
                prediction = agent.model.predict(state_old.reshape((1, 11)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)[0]
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)
            reward = agent.set_reward(player1, game.crash)
            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     game.crash)
            agent.remember(state_old, final_move, reward, state_new,
                           game.crash)
            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
예제 #7
0
class Simulation():
    def __init__(self):

        self.amountOfSimulations = 0
        self.maxSpeed = 4
        self.score = 0
        self.previousScore = 0
        self.highscore = 0
        self.highscoreTime = 0
        #self.delayCounter = 0
        #self.delay = randint(5,10)
        self.keepRunning = True
        self.agent = DQNAgent()

        self.mysystem = chrono.ChSystemNSC()
        self.ground = theBattleground.theBattleground(self.mysystem)
        self.createRobot(self.mysystem)
        self.createApplication()
        self.run()

    def createApplication(self):
        #  Create an Irrlicht application to visualize the system

        self.myapplication = chronoirr.ChIrrApp(
            self.mysystem, 'PyChrono example',
            chronoirr.dimension2du(1024, 768))

        self.myapplication.AddTypicalSky()
        self.myapplication.AddTypicalLogo()
        self.myapplication.AddTypicalCamera(chronoirr.vector3df(0.6, 0.6, 0.8))
        self.myapplication.AddLightWithShadow(
            chronoirr.vector3df(2, 4, 2),  # point
            chronoirr.vector3df(0, 0, 0),  # aimpoint
            9,  # radius (power)
            1,
            9,  # near, far
            30)  # angle of FOV

        # ==IMPORTANT!== Use this function for adding a ChIrrNodeAsset to all items
        # in the system. These ChIrrNodeAsset assets are 'proxies' to the Irrlicht meshes.
        # If you need a finer control on which item really needs a visualization proxy in
        # Irrlicht, just use application.AssetBind(myitem); on a per-item basis.

        self.myapplication.AssetBindAll()

        # ==IMPORTANT!== Use this function for 'converting' into Irrlicht meshes the assets
        # that you added to the bodies into 3D shapes, they can be visualized by Irrlicht!

        self.myapplication.AssetUpdateAll()
        self.myapplication.AddShadowAll()
        self.myapplication.SetShowInfos(True)

    def displayScore(self):
        print("Score: " + str(self.score) + "     Highscore: " +
              str(self.highscore))

    def checkIfDead(self):
        if (((self.robot.mbody1.GetRot()).Q_to_Rotv()).z < 2.5):
            print("tilt forward - DEAD")
            self.keepRunning = False

        if (((self.robot.mbody1.GetRot()).Q_to_Rotv()).z > 4):
            print("tilt backward - DEAD")
            self.keepRunning = False

    def doMove(self, prediction):

        self.checkIfDead()

        print("pred is: ")
        print(prediction)
        print(" ")

        if prediction[0][0] > prediction[0][1]:
            speed = (prediction[0][0] * self.maxSpeed)
            self.robot.motor_R.SetMotorFunction(chrono.ChFunction_Const(speed))
            self.robot.motor_L.SetMotorFunction(chrono.ChFunction_Const(speed))

        elif prediction[0][1] > prediction[0][0]:
            speed = -(prediction[0][1] * self.maxSpeed)
            self.robot.motor_R.SetMotorFunction(chrono.ChFunction_Const(speed))
            self.robot.motor_L.SetMotorFunction(chrono.ChFunction_Const(speed))

        else:
            self.robot.motor_R.SetMotorFunction(chrono.ChFunction_Const(0))
            self.robot.motor_L.SetMotorFunction(chrono.ChFunction_Const(0))

    def createRobot(self, system):
        try:
            del self.robot
        except:
            pass
        self.robot = theRobot.theRobot(system)

    def restart(self):
        print("restart called")

        del self.myapplication
        del self.mysystem
        self.mysystem = chrono.ChSystemNSC()
        self.ground = theBattleground.theBattleground(self.mysystem)
        self.createRobot(self.mysystem)
        self.createApplication()

        self.mysystem.SetChTime(0)
        self.keepRunning = True

        if self.score > self.highscore:
            self.highscore = self.score
            self.highscoreTime = self.amountOfSimulations

        self.previousScore = self.score
        self.score = 0

    def run(self):
        self.myapplication.SetTimestep(0.1)
        self.myapplication.SetTryRealtime(False)

        while self.amountOfSimulations <= 500:
            self.restart()

            while (self.myapplication.GetDevice().run() and self.keepRunning):
                self.agent.epsilon = 80 - self.amountOfSimulations

                #get old state
                state_old = self.agent.get_state(self.robot)

                self.myapplication.BeginScene()
                self.myapplication.DrawAll()
                self.myapplication.DoStep()
                self.myapplication.EndScene()
                self.checkIfDead()

                #perform random actions based on agent.epsilon, or choose the action
                if randint(0, 200) < self.agent.epsilon:
                    #prediction = to_categorical(random.random(), num_classes=2)
                    prediction = [[random.random(), random.random()]]
                    print("random action")
                else:
                    # predict action based on the old state
                    print("AI action")
                    prediction = self.agent.model.predict(
                        state_old.reshape((1, 2)))
                    #final_move = to_categorical(np.argmax(prediction[0]), num_classes=2)

                #perform new move and get new state
                self.doMove(prediction)
                state_new = self.agent.get_state(self.robot)

                #self.delay = randint(5, 10)
                self.delayCounter = 0

                self.displayScore()

                self.score += 1
                #self.delayCounter += 1

            #set reward for the new state
            reward = self.agent.set_reward(self.score, self.highscore,
                                           self.previousScore,
                                           self.amountOfSimulations,
                                           self.highscoreTime)

            #train short memory base on the new action and state
            self.agent.train_short_memory(state_old, prediction, reward,
                                          state_new, self.keepRunning)

            # store the new data into a long term memory
            self.agent.remember(state_old, prediction, reward, state_new,
                                self.keepRunning)

            self.amountOfSimulations += 1
            self.agent.replay_new(self.agent.memory)

        self.agent.model.save_weights('weights.hdf5')
예제 #8
0
def main():
    # Initialisieren aller Pygame-Module und
    # Fenster erstellen (wir bekommen eine Surface, die den Bildschirm repräsentiert).
    pygame.init()

    agent = DQNAgent()
    counter_games = 0
    record = 0
    while counter_games < 150:
        screen = pygame.display.set_mode((800, 600))

        # Titel des Fensters setzen, Mauszeiger nicht verstecken und Tastendrücke wiederholt senden.
        pygame.display.set_caption("Pygame-Tutorial: Animation")
        pygame.mouse.set_visible(1)
        pygame.key.set_repeat(1, 30)
        pygame.font.init()  # you have to call this at the start,
        myfont = pygame.font.SysFont('Comic Sans MS', 30)

        # Clock-Objekt erstellen, das wir benötigen, um die Framerate zu begrenzen.
        clock = pygame.time.Clock()

        # Wir erstellen eine Tilemap.
        map = Tilemap.Tilemap()

        event = AutoInput.AutoInput()

        # Die Schleife, und damit unser Spiel, läuft solange running == True.
        running = True
        max_steps_reached = False
        max_steps = 100
        step = 0
        max_score = map.player.pos_x
        max_score_evolution = []
        while running and not max_steps_reached:
            agent.epsilon = 80 - counter_games
            #get old state
            state_old = agent.get_state(map)
            map.player.pos_x_old = map.player.pos_x

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 7)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            # Framerate auf 30 Frames pro Sekunde beschränken.
            # Pygame wartet, falls das Programm schneller läuft.
            clock.tick(30)

            # screen Surface mit Schwarz (RGB = 0, 0, 0) füllen.
            screen.fill((198, 209, 255))

            map.handle_input(final_move)

            #continue jump animation after
            if map.player.isjump:
                map.player.jump()

            # Die Tilemap auf die screen-Surface rendern.
            map.render(screen)
            textsurface = myfont.render(
                "Game " + str(counter_games) + " Step " + str(step) +
                " Max Score " + str(max_score), False, (0, 0, 0))
            screen.blit(textsurface, (50, 50))

            #Print Hindernis onto map and check if there should be a new one
            if not map.isThereHindernis:
                map.createNewHindernis()
                map.isThereHindernis = True

            map.hindernis.move()
            map.hindernis.render(screen)
            map.checkHindernisOnMap()

            state_new = agent.get_state(map)

            crash = map.collisionDetection()

            #set treward for the new state
            reward = agent.set_reward(map.player, crash)
            #train short memory base on the new action and state
            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     running)

            # Inhalt von screen anzeigen
            pygame.display.flip()

            if map.player.pos_x > max_score:
                max_score = map.player.pos_x

            step += 1
            if step >= max_steps:
                max_steps_reached = True
                max_score_evolution.append(max_score)

        agent.remember(state_old, final_move, reward, state_new, running)
        #record = get_record(map.player.pos_x, record)
        #if display_option:
        #    #display(player1, food1, game, record)
        #    pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1

    agent.model.save_weights('weights.hdf5')
    sns.plot(max_score_evolution)
예제 #9
0
def run(episodes, load_weights, display_option, speed):
    pygame.init()
    agent = DQNAgent()
    weights_filepath = os.path.join(os.getcwd(), WEIGHTS_FILENAME)
    if load_weights and os.path.isfile(weights_filepath):
        agent.model.load_weights(weights_filepath)

    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < episodes:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent)
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            # agent.epsilon is set to give randomness to actions
            agent.epsilon = 80 - counter_games

            # get old state
            state_old = agent.get_state(game, player1, food1)

            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 11)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            # set treward for the new state
            reward = agent.set_reward(player1, game.crash)

            # train short memory base on the new action and state
            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     game.crash)

            # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new,
                           game.crash)
            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        print(f'Game {counter_games}      Score: {game.score}')
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    agent.model.save_weights(WEIGHTS_FILENAME)
    plot_seaborn(counter_plot, score_plot)
예제 #10
0
파일: snakeClass.py 프로젝트: s14965/NAI
def run(params):
    """
    Run the DQN algorithm, based on the parameters previously set.   
    """
    pygame.init()
    agent = DQNAgent(params)
    agent = agent.to(DEVICE)
    agent.optimizer = optim.Adam(agent.parameters(), weight_decay=0, lr=params['learning_rate'])
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    total_score = 0
    while counter_games < params['epoch']:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Pierwszy ruch
        initialize_game(player1, game, food1, agent, params['batch_size'])
        if params['display']:
            display(player1, food1, game, record)
        
        steps = 0       # Ruchy od ostatniej otrzymanej nagrody
        while (not game.crash) and (steps < 100):
            if not params['train']:
                agent.epsilon = 0.01
            else:
                # agent.epsilon dla losowosci akcji
                agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear'])

            # Otrzymaj stary stan
            state_old = agent.get_state(game, player1, food1)

            # Wykonuj losowe akcje na podstawie agent.epsilon albo na podstawie starych
            if random.uniform(0, 1) < agent.epsilon:
                final_move = np.eye(3)[randint(0,2)]
            else:
                # Predykcja na podstawie starych akcji
                with torch.no_grad():
                    state_old_tensor = torch.tensor(state_old.reshape((1, 11)), dtype=torch.float32).to(DEVICE)
                    prediction = agent(state_old_tensor)
                    final_move = np.eye(3)[np.argmax(prediction.detach().cpu().numpy()[0])]

            # Wykonaj nowy ruch i otrzymaj nowy stan
            player1.do_move(final_move, player1.x, player1.y, game, food1, agent)
            state_new = agent.get_state(game, player1, food1)

            # Okresl nagrode dla nowego ruchu
            reward = agent.set_reward(player1, game.crash)
            
            # Kiedy zje, zeruj kroki
            if reward > 0:
                steps = 0
                
            if params['train']:
                # Trenuj pamiec krotka na podstawie nowych ruchow
                agent.train_short_memory(state_old, final_move, reward, state_new, game.crash)
                # Zapisz pamiec nowych ruchow do pamieci dlugiej
                agent.remember(state_old, final_move, reward, state_new, game.crash)

            record = get_record(game.score, record)
            if params['display']:
                display(player1, food1, game, record)
                pygame.time.wait(params['speed'])
            steps+=1
        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        counter_games += 1
        total_score += game.score
        print(f'Game {counter_games}      Score: {game.score}')
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    mean, stdev = get_mean_stdev(score_plot)
    if params['train']:
        model_weights = agent.state_dict()
        torch.save(model_weights, params["weights_path"])
    if params['plot_score']:
        plot_seaborn(counter_plot, score_plot, params['train'])
    return total_score, mean, stdev
예제 #11
0
def run(display_option, speed, params):
    pygame.init()
    agent = DQNAgent(params)
    weights_filepath = params['weights_path']
    if params['load_weights']:
        agent.model.load_weights(weights_filepath)
        print("weights loaded")

    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < params['episodes']:
        logger.info("===========================")
        logger.info(f"{info_string}")

        time_start = time.time()
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food
        # Perform first move
        initialize_game(player1, game, food1, agent, params['batch_size'])

        if display_option == True:
            display(player1, food1, game, record)

        time_start_game_update = time.time()
        while not game.crash:
            time_start_game_update_pygame = time.time()
            if not params['train']:
                agent.epsilon = 0
            else:
                # agent.epsilon is set to give randomness to actions
                if agent.epsilon <= params['min_epsilon']:
                    agent.epsilon = params['min_epsilon']
                else:
                    agent.epsilon = 1 - (counter_games *
                                         params['epsilon_decay_linear'])

            # get old state
            state_old, vision = agent.get_state(game, player1, food1)

            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 1) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(
                    state_old.reshape((1, params['num_input_features'])))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new, vision = agent.get_state(game, player1, food1)

            # set reward for the new state
            reward = agent.set_reward(player1, game.crash)
            time_end_game_update_pygame = time.time()

            time_start_game_update_train = time.time()

            if params['train']:
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)

            time_end_game_update_train = time.time()

            time_start_game_update_record = time.time()
            record = get_record(game.score, record)
            time_end_game_update_record = time.time()

            logger.debug("Pygame update step: " +
                         str((time_end_game_update_pygame -
                              time_start_game_update_pygame)))
            logger.debug("Train short term update step: " +
                         str((time_end_game_update_train -
                              time_start_game_update_train)))
            logger.debug("Record score  step: " +
                         str((time_end_game_update_record -
                              time_start_game_update_record)))

            if display_option == True:
                cv2.imshow("Vision of the Snake", vision * 255.0)

                # detect any kepresses
                key = cv2.waitKey(1) & 0xFF
                # if the `q` key was pressed, break from the loop
                if key == ord("q"):
                    break
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        # # Pause visualisation if crash
        # if display_option==True:
        #     cv2.imshow("Vision of the Snake", vision * 255.0)
        #
        #     # detect any kepresses
        #     key = cv2.waitKey(1) & 0xFF
        #     # if the `q` key was pressed, break from the loop
        #     if key == ord("q"):
        #         break
        #     display(player1, food1, game, record)
        #     pygame.time.wait(5000)
        time_end_game_update = time.time()
        logger.info(
            "Time to play one game: " +
            str(round((time_end_game_update - time_start_game_update), 3)))

        time_start_long_term = time.time()
        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        time_end_long_term = time.time()
        logger.info("Train long term update step: " +
                    str(round((time_end_long_term - time_start_long_term), 3)))

        if agent.epsilon <= params['min_epsilon']:
            agent.epsilon = params['min_epsilon']
        else:
            agent.epsilon = 1 - (counter_games *
                                 params['epsilon_decay_linear'])
        logger.info(f'The epsilon value is: {agent.epsilon}')

        logger.debug("===========================")

        counter_games += 1
        logger.info(f'Game {counter_games}      Score: {game.score}')
        logger.info(f'The agent memory length is: {len(agent.memory)}')

        score_plot.append(game.score)
        counter_plot.append(counter_games)
        if params['train'] and counter_games % 100 == 0:
            agent.model.save_weights(params['weights_path'])
            logger.info("===========SAVING THE MODEL================")
            with open(params['memory_path'], 'wb') as handle:
                pickle.dump(agent.memory, handle)
        logger.info("End Game Loop")
        time_end = time.time()
        epoch_timer = round((time_end - time_start), 3)
        logger.info(f"One epoch takes: {epoch_timer} seconds")
        eta_prediction = round(
            (params['episodes'] - counter_games) * epoch_timer / 60)
        logger.info(f"Time remaining is: {eta_prediction} minutes")

    if params['train']:
        agent.model.save_weights(params['weights_path'])
        with open(params['memory_path'], 'wb') as handle:
            pickle.dump(agent.memory, handle)
        params['counter_plot'] = counter_plot
        params['score_plot'] = score_plot
        with open(params['params_path'], 'wb') as handle:
            pickle.dump(params, handle)
def main():
    # Initialisation du reseau
    agent = DQNAgent()
    taille = 30  # Nombre de cellules = taille * taille
    game = Game(taille)  # Initialisation du jeu
    init_ihm(game)  # Initialization IHM
    max_iteration, cpt_iteration = 2, 0  # Nombre de parties à jouer et Compteur de parties
    rep_lines_bestScore, score_plot, counter_plot = [], [], [
    ]  # scores, numéros de parties, lignes du meilleur score
    while (cpt_iteration < max_iteration):
        game = Game(taille)  # Initialisation du jeu
        game.calculer_lignes_jouables(
        )  # Calculer les lignes possibles d'etres jouées
        while not game.crash:  # Tant qu'il reste encore des lignes possibles à jouer
            agent.epsilon = 80 - cpt_iteration
            state_old = agent.get_state(game)  # get old state
            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:  # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 13)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)
            if np.array_equal(final_move, [1, 0, 0]):
                tmp_line = game.rep_playable_lines[random.randint(
                    0, int((len(game.rep_playable_lines) - 1) / 3))]
            elif np.array_equal(final_move, [0, 1, 0]):
                tmp_line = game.rep_playable_lines[random.randint(
                    int((len(game.rep_playable_lines) - 1) / 3),
                    int((len(game.rep_playable_lines) - 1) * 2 / 3))]
            elif np.array_equal(final_move, [0, 0, 1]):
                tmp_line = game.rep_playable_lines[random.randint(
                    int((len(game.rep_playable_lines) - 1) * 2 / 3),
                    int(len(game.rep_playable_lines) - 1))]
            game.jouer_ligne(
                tmp_line)  # Jouer la ligne tmp_line=[[cellule * 5],direction]
            game.rep_lines.append(
                tmp_line)  # Ajouter la ligne à la liste des lignes jouées
            game.calculer_lignes_jouables()  # Recalcule des lignes jouables
            best = 0
            if cpt_iteration == 0 or len(game.rep_lines) > max(score_plot):
                rep_lines_bestScore = game.rep_lines  # Sauvegarder la liste des lignes du meilleur score
                best = 1
            if (len(game.rep_playable_lines) == 0
                ):  # s'il reste plus de ligne jouable
                game.crash = True
            else:
                state_new = agent.get_state(game)
                reward = agent.set_reward(game.crash, best)
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)
        if cpt_iteration == 0 or len(game.rep_lines) > max(
                score_plot):  # Si c'est le meilleur score atteint
            rep_lines_bestScore = game.rep_lines  # Sauvegarder la liste des lignes du meilleur score
        score_plot.append(len(
            game.rep_lines))  # Ajouter le score à la liste des scores
        cpt_iteration += 1
        print(cpt_iteration, "-Score: ", len(game.rep_lines),
              " -  Best score: ", max(score_plot))
        counter_plot.append(cpt_iteration)  # ajouter le numéro de la partie
        agent.replay_new(agent.memory)
    agent.model.save_weights('weights.hdf5')
    afficher_lignes_ihm(rep_lines_bestScore, max_iteration, max(score_plot))
    plot_seaborn(counter_plot, score_plot)
    mainloop()
예제 #13
0
def main():
    # Initialisation du reseau
    pygame.init()
    agent = DQNAgent()
    score_plot = []
    counter_plot = []
    record = 0
    taille = 30  # Nombre de cellules = taille * taille
    init_ihm()  # Initialization IHM
    max_iteration = 10  # Nombre de parties
    cpt_iteration = 0  # Compteur de parties
    best_score = 0  # Meilleur score
    rep_lines_bestScore = []  # Liste des lignes du meilleur score
    while (cpt_iteration < max_iteration):
        game = Game(taille)  # Initialisation du jeu
        nbr_lignes_crees = 0  # Nombre de lignes crées
        game.rep_lines.clear()  # Mettre la liste des lignes crées à 0
        game.calculer_lignes_jouables(
        )  # Calculer les lignes possibles d'etres jouées
        while not game.crash:  # Tant qu'il reste encore des lignes possibles à jouer
            agent.epsilon = 80 - cpt_iteration
            state_old = agent.get_state(game)  # get old state
            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:  # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 13)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)
            if np.array_equal(final_move, [1, 0, 0]):
                tmp_line = game.rep_playable_lines[random.randint(
                    0, int((len(game.rep_playable_lines) - 1) / 3))]
            elif np.array_equal(final_move, [0, 1, 0]):
                tmp_line = game.rep_playable_lines[random.randint(
                    int((len(game.rep_playable_lines) - 1) / 3),
                    int((len(game.rep_playable_lines) - 1) * 2 / 3))]
            elif np.array_equal(final_move, [0, 0, 1]):
                tmp_line = game.rep_playable_lines[random.randint(
                    int((len(game.rep_playable_lines) - 1) * 2 / 3),
                    int(len(game.rep_playable_lines) - 1))]
            game.jouer_ligne(
                tmp_line)  # Jouer la ligne tmp_line=[[cellule * 5],direction]
            game.rep_lines.append(
                tmp_line)  # Ajouter la ligne à la liste des lignes jouées
            game.calculer_lignes_jouables()  # Recalcule des lignes jouables
            best = 0
            if (len(game.rep_lines) > best_score):
                best_score = len(game.rep_lines)  # Modifier le meilleur score
                # Sauvegarder la liste des lignes du meilleur score
                rep_lines_bestScore = game.rep_lines
                best = 1
            if (len(game.rep_playable_lines) == 0):
                game.crash = True
            else:
                state_new = agent.get_state(game)
                reward = agent.set_reward(game.crash, best)
                #train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)
        print("Score: ", len(game.rep_lines), " -  Best score: ", best_score)
        score_plot.append(len(game.rep_lines))
        counter_plot.append(cpt_iteration)
        cpt_iteration += 1
        agent.replay_new(agent.memory)
    score_ihm(max_iteration, best_score)
    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
    afficher_lignes_ihm(rep_lines_bestScore)
예제 #14
0
def run(display_option, speed, params):
    if display_option:
        pygame.init()
    agent = DQNAgent(params)

    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < params['episodes']:
        # Initialize classes
        game = Game(440, 440, display_option)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent, params['batch_size'])
        if display_option:
            display(player1, food1, game, record)
        step_count = 0
        raw_data = [] if params['raw_output'] is not None else None
        while not game.crash:
            if display_option:
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        pygame.quit()
                        quit()
            if not params['train']:
                agent.epsilon = 0
            else:
                # agent.epsilon is set to give randomness to actions
                agent.epsilon = 1 - (counter_games *
                                     params['epsilon_decay_linear'])

            # get old state
            state_old = agent.get_state(game, player1, food1)

            # perform random actions based on agent.epsilon, or choose the action
            if random() < agent.epsilon or random() < (step_count - 300) / 300:
                prediction = [0, 0, 0]
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 11)))[0]
                final_move = to_categorical(np.argmax(prediction),
                                            num_classes=3)
            if raw_data is not None:
                step_data = {
                    'head_x': player1.x,
                    'head_y': player1.y,
                    'food_x': food1.x_food,
                    'food_y': food1.y_food,
                    'snake_position': copy.deepcopy(player1.position),
                    'snake_x_change': player1.x_change,
                    'snake_y_change': player1.y_change,
                    'action': final_move.tolist()
                }
            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            game.player.action = final_move
            state_new = agent.get_state(game, player1, food1)

            # set reward for the new state
            reward = agent.set_reward(player1, game.crash)
            if raw_data is not None:
                step_data['eaten'] = player1.eaten
                step_data['reward'] = reward
                step_data['crash'] = game.crash
                step_data['next_state'] = {
                    'head_x': player1.x,
                    'head_y': player1.y,
                    'food_x': food1.x_food,
                    'food_y': food1.y_food,
                    'snake_position': copy.deepcopy(player1.position),
                    'snake_x_change': player1.x_change,
                    'snake_y_change': player1.y_change,
                }
                raw_data.append(step_data)

            if params['verbose']:
                print(prediction, final_move, reward, step_count)

            if params['train']:
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)

            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)
            step_count += 1
        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        counter_games += 1
        print(
            f'Game {counter_games}/{step_count}/{agent.epsilon}      Score: {game.score}'
        )
        if raw_data is not None:
            fn_index = params['raw_output_index'] + counter_games
            with open(os.path.join(params['raw_output'], f'{fn_index}.json'),
                      'w') as out:
                json.dump(raw_data, out)
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    if params['train']:
        agent.model.save_weights(params['weights_path'])
    plot_seaborn(counter_plot, score_plot)
예제 #15
0
def run():
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    agent = DQNAgent(TEST_MODE)
    if TEST_MODE:
        pygame.init()
        pygame.font.init()
        print('Start testing the trained model ...')
    else:
        print('Start training ...')
    while counter_games < 150:
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent)
        if TEST_MODE:
            display(player1, food1, game, record)

        while not game.crash:
            #agent.epsilon is set to give randomness to actions
            agent.epsilon = 80 - counter_games

            #get old state
            state_old = agent.get_state(game, player1, food1)

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon and not TEST_MODE:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old)
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            #perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            #set treward for the new state
            reward = agent.set_reward(player1, game.crash)

            #train short memory base on the new action and state
            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     game.crash)

            # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new,
                           game.crash)
            record = get_record(game.score, record)
            if TEST_MODE:
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
예제 #16
0
def run():
    # Workaround for the current problem of incopability to wirk with CUDA and TF -> using CPU this way in code
    import os
    import tensorflow as tf

    # os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
    os.environ['CUDA_VISIBLE_DEVICES'] = ''

    if tf.test.gpu_device_name():
        print('[DEBUG] GPU found')
    else:
        print("[DEBUG] No GPU found")
    # till here Workaround

    pygame.init()
    agent = DQNAgent()
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    # while counter_games < 150:
    while counter_games < 100:
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent)
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            #agent.epsilon is set to give randomness to actions
            agent.epsilon = 80 - counter_games

            #get old state
            state_old = agent.get_state(game, player1, food1)

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
                print(f"final_move: {final_move}")

                # from here random responses for starting the learning

                final_response = {"type": "endRound"}

                # till here random responses for starting the learning

            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 11)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            #perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            #set treward for the new state
            reward = agent.set_reward(player1, game.crash)

            #train short memory base on the new action and state
            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     game.crash)

            # store the new data into a long term memory
            agent.remember(state_old, final_move, reward, state_new,
                           game.crash)
            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)
예제 #17
0
파일: snake.py 프로젝트: Dyzlee/DQN-Snake
def gameLoop():
    game_over = False
    games = 0
    games_to_play = 300
    highscore = 0
    score_plot = []
    games_plot = []
    agent = DQNAgent()

    while games < games_to_play:  # Play a total of x games
        xpos = dis_width / 2  # X Spawn point coordinate
        ypos = dis_height / 2  # Y Spawn point coordinate

        xdir = 0
        ydir = 0

        snake_List = []
        length_of_snake = 1

        food = random_food(snake_List)
        xfood = food[0]
        yfood = food[1]

        while not game_over:
            agent.epsilon = 80 - games
            # agent.epsilon = 0

            state_old = agent.get_state(xpos, ypos, xdir, ydir, snake_block,
                                        xfood, yfood, dis_width, dis_height,
                                        snake_List)

            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                prediction = agent.model.predict(state_old.reshape((1, 11)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)
            # Do Action -------------------------------------------------------------
            if np.array_equal(final_move, [1, 0, 0]):
                pass
            elif np.array_equal(
                    final_move,
                [0, 1, 0]) and ydir is 0:  # right - going horizontal
                ydir = snake_block
                xdir = 0
            elif np.array_equal(
                    final_move,
                [0, 1, 0]) and xdir is 0:  # right - going vertical
                xdir = snake_block
                ydir = 0
            elif np.array_equal(
                    final_move,
                [0, 0, 1]) and ydir is 0:  # left - going horizontal
                ydir = -snake_block
                xdir = 0
            elif np.array_equal(
                    final_move,
                [0, 0, 1]) and xdir is 0:  # left - going vertical
                xdir = -snake_block
                ydir = 0
            # Did Action -------------------------------------------------------------
            # Update Frame after Action ----------------------------------------------
            eaten = False
            # If collide with border
            if xpos == dis_width - snake_block and xdir > 0 or xpos == 0 and xdir < 0 or ypos == dis_height - snake_block and ydir > 0 or ypos == 0 and ydir < 0:
                game_over = True
            xpos += xdir
            ypos += ydir
            dis.fill(blue)
            pygame.draw.rect(dis, green,
                             [xfood, yfood, snake_block, snake_block])
            snake_Head = [xpos, ypos]
            snake_List.append(snake_Head)
            if len(snake_List) > length_of_snake:
                del snake_List[0]

            for x in snake_List[:-1]:
                if x == snake_Head:
                    game_over = True

            draw_snake(snake_block, snake_List)
            highscore = get_highscore(highscore, length_of_snake - 1)
            your_score(highscore, length_of_snake - 1)

            if xpos == xfood and ypos == yfood:
                eaten = True
                food = random_food(snake_List)
                xfood = food[0]
                yfood = food[1]
                length_of_snake += 1
            # Updated Frame after Action ---------------------------------------------

            state_new = agent.get_state(xpos, ypos, xdir, ydir, snake_block,
                                        xfood, yfood, dis_width, dis_height,
                                        snake_List)
            reward = agent.set_reward(game_over, eaten)

            agent.train_short_memory(state_old, final_move, reward, state_new,
                                     game_over)

            agent.remember(state_old, final_move, reward, state_new, game_over)

            pygame.display.update()
            clock.tick(snake_speed)

        print("Game:", games + 1, "Score:", length_of_snake - 1, "Highscore:",
              highscore)
        agent.replay_new(agent.memory)
        games += 1
        score_plot.append(length_of_snake - 1)
        games_plot.append(games)
        game_over = False

    agent.model.save_weights('weights10x10V3.hdf5')
    pygame.quit()
    # Plot stats of game:
    sns.set(color_codes=True)
    ax = sns.regplot(x=games_plot, y=score_plot)
    ax.set(xlabel='games', ylabel='score')
    plt.show()
    quit()
예제 #18
0
def run(params):
    pygame.init()
    agent = DQNAgent(params)
    print_info(params)
    weights_filepath = params['weights_path']
    if params['load_weights']:
        agent.model.load_weights(weights_filepath)
        print("Weights Loaded")

    else:
        print("Training From Scratch...")
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < params['episodes']:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Class Objects
        game = Game(params)
        player1 = game.player
        food1 = game.food

        total_reward = -150
        total_reward2 = 0
        # First Move
        initialize_game(player1, game, food1, agent, params['batch_size'],
                        counter_games)
        if params['display']:
            display(player1, food1, game, record, counter_games, total_reward,
                    params)
        while not game.crash:
            if not params['train']:
                agent.epsilon = 0

            else:
                # agent.epsilon is set to give randomness to actions
                agent.epsilon = 1 - (
                    (counter_games) * params['epsilon_decay_linear'])

            # old State
            state_old = agent.get_state(game, player1, food1, params)

            # Random Actions or Choose
            if randint(0, 1) < agent.epsilon and not params['load_weights']:
                final_move = to_categorical(randint(0, 2), num_classes=3)

            # Prediction
            else:
                prediction = agent.model.predict(state_old.reshape((1, 20)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1, params)
            reward = agent.set_reward(player1, game.crash, food1,
                                      counter_games, final_move)
            total_reward += reward
            total_reward2 += reward
            total_reward = round(total_reward, 2)
            total_reward2 = round(total_reward2, 2)

            if params['train']:
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)

            record = get_record(game.score, record)
            if params['display']:
                display(player1, food1, game, record, counter_games,
                        total_reward, params)

        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        counter_games += 1
        print(
            f'Game {counter_games}      Score: {game.score}    Reward: {total_reward2}'
        )
        score_plot.append(game.score)
        counter_plot.append(counter_games)
        if counter_games % params['cf'] == 0:
            n = int(counter_games / params['cf'])
            agent.model.save_weights(params['cp'] + str(n) + ".hdf5")
            print("Checkpoint Saved...")

    plot_seaborn(counter_plot, score_plot)
    pygame.quit()
    quit()
def main():
    # Initialisation du reseau
    agent = DQNAgent()
    taille = 30  # Nombre de cellules = taille * taille
    game = Game(taille)  # Initialisation du jeu
    init_ihm(game)  # Initialization IHM
    max_iteration, cpt_iteration = 200, 0  # Nombre de parties à jouer et Compteur de parties
    rep_lines_bestScore, score_plot, counter_plot = [], [], [
    ]  # scores, numéros de parties, lignes du meilleur score
    while (cpt_iteration < max_iteration):
        game = Game(taille)  # Initialisation du jeu
        game.calculer_lignes_jouables(
        )  # Calculer les lignes possibles d'etres jouées
        if len(game.rep_playable_lines
               ) > 1:  # choisir une ligne parmis les lignes jouable au hasard
            game.choosed_line = game.rep_playable_lines[random.randint(
                0,
                len(game.rep_playable_lines) - 1)]
        elif len(game.rep_playable_lines) != 0:
            game.choosed_line = game.rep_playable_lines[0]
        while not game.crash and game.cpt_liberte < game.liberte:  # Tant qu'il reste encore des lignes possibles à jouer
            agent.epsilon = 120 - cpt_iteration
            state_old = agent.get_state(game)  # get old state
            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 200) < agent.epsilon:
                final_move = to_categorical(randint(0, 4), num_classes=8)
            else:  # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 80)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=8)
            game.move = final_move
            vect_move = []  # liste des sorties possibles
            vect_squar = [[-1, -1], [-1, 0], [-1, 1], [0, 1], [1, 1], [1, 0],
                          [1, -1], [0, -1]]
            for i in range(len(vect_squar)):
                vect_move.append([0, 0, 0, 0, 0, 0, 0,
                                  0])  # initialisation des sorties
            for i in range(len(vect_squar)):
                vect_move[i][i] = 1  # Matrice identité 8*8
            for i in range(len(vect_move)):
                if vect_move[i] == final_move.tolist(
                ):  # à quel sortie correspond le final_move
                    game.x_player = int(
                        game.x_player + vect_squar[i][0]
                    )  # deplacer le jouer d'après la sortie indiqué par final_move
                    game.y_player = int(game.y_player + vect_squar[i][1])
                    game.move = vect_move[i]  # sauvegarder mouvememnt
            game.found = False
            for i in range(
                    len(game.rep_playable_lines) - 1
            ):  # si une ligne jouable commence par la position du joueur
                if len(game.rep_playable_lines
                       ) != 0 and game.rep_playable_lines[i][0][
                           0].x == game.x_player and game.rep_playable_lines[
                               i][0][0].y == game.y_player:
                    game.found = True
                    game.jouer_ligne(
                        game.rep_playable_lines[i]
                    )  # Jouer la ligne tmp_line=[[cellule * 5],direction]
                    game.rep_lines.append(
                        game.rep_playable_lines[i]
                    )  # Ajouter la ligne à la liste des lignes jouées
                    game.calculer_lignes_jouables(
                    )  # Recalcule des lignes jouables
                    if cpt_iteration == 0 or len(
                            game.rep_lines) > max(score_plot):
                        rep_lines_bestScore = game.rep_lines  # Sauvegarder la liste des lignes du meilleur score
                    state_new = agent.get_state(game)
                    reward = agent.set_reward(len(game.rep_playable_lines),
                                              game.found)
                    # train short memory base on the new action and state
                    agent.train_short_memory(state_old, final_move, reward,
                                             state_new, game.crash)
                    # store the new data into a long term memory
                    agent.remember(state_old, final_move, reward, state_new,
                                   game.crash)
                    game.cpt_liberte = 0
                    break
            if len(game.rep_playable_lines
                   ) == 0:  # s'il reste plus de lignes jouables
                game.crash = True
                state_new = agent.get_state(game)
                reward = agent.set_reward(0, game.found)
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)
            elif game.cpt_liberte > game.liberte - 2 and not game.found:
                game.x_player = 13  # retourr à la case de départ
                game.y_player = 13
                game.crash = True
                game.found = False
                state_new = agent.get_state(game)
                reward = agent.set_reward(0, game.found)
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)
            elif game.found == False:
                game.cpt_liberte += 1
        if cpt_iteration == 0 or len(game.rep_lines) > max(
                score_plot):  # Si c'est le meilleur score atteint
            rep_lines_bestScore = game.rep_lines  # Sauvegarder la liste des lignes du meilleur score
        score_plot.append(len(
            game.rep_lines))  # Ajouter le score à la liste des scores
        cpt_iteration += 1  # augmenter le nombre de partie
        print(cpt_iteration, "-Score: ", len(game.rep_lines),
              " -  Best score: ", max(score_plot))
        counter_plot.append(cpt_iteration)  # ajouter le numéro de la partie
        agent.replay_new(agent.memory)
    afficher_lignes_ihm(
        rep_lines_bestScore, max_iteration,
        max(score_plot))  # afficher dans l'ihm les lignes du meilleur score
    plot_seaborn(counter_plot,
                 score_plot)  # afficher le shéma des scores selon les parties
    mainloop()
예제 #20
0
def run(params):
    """
    Run the DQN algorithm, based on the parameters previously set.   
    """
    pygame.init()
    agent = DQNAgent(params)
    agent = agent.to(DEVICE)
    agent.optimizer = optim.Adam(agent.parameters(),
                                 weight_decay=0,
                                 lr=params['learning_rate'])
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    total_score = 0
    while counter_games < params['episodes']:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent, params['batch_size'])
        if params['display']:
            display(player1, food1, game, record)

        while not game.crash:
            if not params['train']:
                agent.epsilon = 0.01
            else:
                # agent.epsilon is set to give randomness to actions
                agent.epsilon = 1 - (counter_games *
                                     params['epsilon_decay_linear'])

            # get old state
            state_old = agent.get_state(game, player1, food1)

            # perform random actions based on agent.epsilon, or choose the action
            if random.uniform(0, 1) < agent.epsilon:
                final_move = np.eye(3)[randint(0, 2)]
            else:
                # predict action based on the old state
                with torch.no_grad():
                    state_old_tensor = torch.tensor(
                        state_old.reshape(
                            (1, 11)), dtype=torch.float32).to(DEVICE)
                    prediction = agent(state_old_tensor)
                    final_move = np.eye(3)[np.argmax(
                        prediction.detach().cpu().numpy()[0])]

            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            # set reward for the new state
            reward = agent.set_reward(player1, game.crash)

            if params['train']:
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)

            record = get_record(game.score, record)
            if params['display']:
                display(player1, food1, game, record)
                pygame.time.wait(params['speed'])
        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        counter_games += 1
        total_score += game.score
        print(f'Game {counter_games}      Score: {game.score}')
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    mean, stdev = get_mean_stdev(score_plot)
    if params['train']:
        model_weights = agent.state_dict()
        torch.save(model_weights, params["weights_path"])
    if params['plot_score']:
        plot_seaborn(counter_plot, score_plot, params['train'])
    return total_score, mean, stdev
예제 #21
0
def run(display_option, speed, params):
    pygame.init()
    agent1 = DQNAgent(params, 1)
    agent2 = DQNAgent(params, 2)
    weights_filepath1 = params['weights_path1']
    weights_filepath2 = params['weights_path2']

    counter_games = 0
    record1 = 0
    record2 = 0
    while counter_games < params['episodes']:
        if params['load_weights']:
            agent1.model.load_weights(weights_filepath1)
            agent2.model.load_weights(weights_filepath2)
            print("weights loaded")
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(900, 600)
        player1 = Player(game, 300, 300)
        player2 = Player(game, 600, 300)

        # Perform first move
        initialize_game(player1, player2, game, agent1, agent2,
                        params['batch_size'])
        if display_option:
            display(player1, player2, game, record1, record2)

        while not game.crash:
            if not params['train']:
                agent1.epsilon = 0
                agent2.epsilon = 0
            else:
                # agent.epsilon is set to give randomness to actions
                agent1.epsilon = 1 - (counter_games *
                                      params['epsilon_decay_linear'])
                agent2.epsilon = 1 - (counter_games *
                                      params['epsilon_decay_linear'])

            # get old state
            state_old1 = agent1.get_state(game, player1, player2)
            state_old2 = agent2.get_state(game, player2, player1)

            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 1) < agent1.epsilon:
                final_move1 = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction1 = agent1.model.predict(state_old1.reshape((1, 7)))
                final_move1 = to_categorical(np.argmax(prediction1[0]),
                                             num_classes=3)

            if randint(0, 1) < agent2.epsilon:
                final_move2 = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction2 = agent2.model.predict(state_old2.reshape((1, 7)))
                final_move2 = to_categorical(np.argmax(prediction2[0]),
                                             num_classes=3)

            # perform new move and get new state
            player1.do_move(final_move1, player1.x, player1.y, game, agent1,
                            player2)
            player2.do_move(final_move2, player2.x, player2.y, game, agent2,
                            player1)
            state_new1 = agent1.get_state(game, player1, player2)
            state_new2 = agent2.get_state(game, player2, player1)
            # set reward for the new state
            reward1 = agent1.set_reward(player1, player1.crash)
            reward2 = agent2.set_reward(player2, player2.crash)

            if params['train']:
                # train short memory base on the new action and state
                agent1.train_short_memory(state_old1, final_move1, reward1,
                                          state_new1, player1.crash)
                # store the new data into a long term memory
                agent1.remember(state_old1, final_move1, reward1, state_new1,
                                player1.crash)
                agent2.train_short_memory(state_old2, final_move2, reward2,
                                          state_new2, player2.crash)
                # store the new data into a long term memory
                agent2.remember(state_old2, final_move2, reward2, state_new2,
                                player2.crash)
            game.score1 += reward1
            game.score2 += reward2
            record1 = get_record(game.score1, record1)
            record2 = get_record(game.score2, record2)
            if display_option:
                display(player1, player2, game, record1, record2)
                pygame.time.wait(speed)
            if player1.crash and player2.crash:
                game.crash = True

        counter_games += 1
        game.crash = False
        player1.crash = False
        player2.crash = False
        print("score1: " + str(game.score1) + "/n")
        print("score2: " + str(game.score2) + "/n")
        if params['train'] and counter_games % 10 == 0:
            agent1.model.save_weights(params['weights_path1'])
            agent2.model.save_weights(params['weights_path2'])
            print("weights saved")
    if params['train']:
        agent1.model.save_weights(params['weights_path1'])
        agent2.model.save_weights(params['weights_path2'])
예제 #22
0
def run(display_option, speed, params):
    pygame.init()
    agent = DQNAgent(params)
    weights_filepath = params['weights_path']
    if params['load_weights']:
        agent.model.load_weights(weights_filepath)
        print("weights loaded")

    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < params['episodes']:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                pygame.quit()
                quit()
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        food1 = game.food

        # Perform first move
        initialize_game(player1, game, food1, agent, params['batch_size'])
        if display_option:
            display(player1, food1, game, record)

        while not game.crash:
            if not params['train']:
                agent.epsilon = 0
            else:
                # agent.epsilon is set to give randomness to actions
                agent.epsilon = 1 - (counter_games *
                                     params['epsilon_decay_linear'])

            # get old state
            state_old = agent.get_state(game, player1, food1)

            # perform random actions based on agent.epsilon, or choose the action
            if randint(0, 1) < agent.epsilon:
                final_move = to_categorical(randint(0, 2), num_classes=3)
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old.reshape((1, 11)))
                final_move = to_categorical(np.argmax(prediction[0]),
                                            num_classes=3)

            # perform new move and get new state
            player1.do_move(final_move, player1.x, player1.y, game, food1,
                            agent)
            state_new = agent.get_state(game, player1, food1)

            # set reward for the new state
            reward = agent.set_reward(player1, game.crash)

            if params['train']:
                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)
                # store the new data into a long term memory
                agent.remember(state_old, final_move, reward, state_new,
                               game.crash)

            record = get_record(game.score, record)
            if display_option:
                display(player1, food1, game, record)
                pygame.time.wait(speed)
        if params['train']:
            agent.replay_new(agent.memory, params['batch_size'])
        counter_games += 1
        print(f'Game {counter_games}      Score: {game.score}')
        score_plot.append(game.score)
        counter_plot.append(counter_games)
    if params['train']:
        agent.model.save_weights(params['weights_path'])
    plot_seaborn(counter_plot, score_plot)
예제 #23
0
def run():
    agent1 = DQNAgent()
    agent2 = DQNAgent()
    counter_games = 0
    game_engine = Game()

    while counter_games < GAMES_COUNT:
        board = game_engine.get_init_board()
        start = timer()
        #set player for agents
        agent1.player = randint(1, 2)
        agent2.player = 1 if agent1.player == 2 else 2

        while not game_engine.is_finished(board):
            #agent.epsilon is set to give randomness to actions
            agent1.epsilon = EPSILON - counter_games
            agent2.epsilon = EPSILON - counter_games

            #get state
            state1 = agent1.get_state(board)
            state2 = agent2.get_state(board)

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, EPSILON * RANDOM_MOVES_PROPORTION) < agent1.epsilon:
                final_move1 = random.choice(agent1.possible_moves(state1))
            else:
                # predict action based on the state
                prediction = agent1.model.predict(state1)
                final_move1 = np.argmax(prediction[0])

            if randint(0, EPSILON * RANDOM_MOVES_PROPORTION) < agent2.epsilon:
                final_move2 = random.choice(agent2.possible_moves(state2))
            else:
                # predict action based on the state
                prediction = agent2.model.predict(state2)
                final_move2 = np.argmax(prediction[0])

            #perform new move and get new state
            board_new, changed_dir1 = game_engine.make_move(
                board, final_move1, agent1.player)
            board_new, changed_dir2 = game_engine.make_move(
                board_new, final_move2, agent2.player)
            board_new = game_engine.send_move(board_new)
            state_new1 = agent1.get_state(board_new)
            state_new2 = agent2.get_state(board_new)

            #set reward for the new state
            reward1 = agent1.get_reward(game_engine, board, board_new,
                                        changed_dir1)
            reward2 = agent2.get_reward(game_engine, board, board_new,
                                        changed_dir2)

            #train short memory base on the new action and state
            game_is_finished = game_engine.is_finished(board_new)
            agent1.train_short_memory(state1, final_move1, reward1, state_new1,
                                      game_is_finished)
            agent2.train_short_memory(state2, final_move2, reward2, state_new2,
                                      game_is_finished)

            # store the new data into a long term memory
            agent1.remember(state1, final_move1, reward1, state_new1,
                            game_is_finished)
            agent2.remember(state2, final_move2, reward2, state_new2,
                            game_is_finished)
            board = board_new

            if game_is_finished:
                if game_engine.is_win(board, agent1.player):
                    agent1.wins_count += 1
                if game_engine.is_win(board, agent2.player):
                    agent2.wins_count += 1
            print('.', end='', flush=True)

        agent1.replay_new()
        agent2.replay_new()
        counter_games += 1
        print('Finished')
        print('Game', counter_games)
        print('Time', timer() - start)
        print('Turns', board["turn"])
        print('Agent 1 wins', agent1.wins_count)
        print('Agent 2 wins', agent2.wins_count)

    # save trained model
    if agent1.wins_count > agent2.wins_count:
        agent1.model.save_weights('weights.hdf5')
    else:
        agent2.model.save_weights('weights.hdf5')
def train(epoch=10):
    pygame.init()
    agent = DQNAgent(output_dim=3)
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0
    while counter_games < epoch:
        # Initialize classes
        game = Game(440, 440)
        player1 = game.player
        field0 = game.field

        # Perform first move
        initialize_game(player1, game, field0, agent)
        if display_option:
            display(player1, field0, game, record)

        game_epoch = 0
        while not game.crash:
            #agent.epsilon is set to give randomness to actions
            agent.epsilon = 50 - game_epoch

            #get old state
            state_old = agent.get_state(game, player1, field0)

            #perform random actions based on agent.epsilon, or choose the action
            if randint(0, 100) < agent.epsilon:
                final_move = randint(0, 2)
                # print("random with prob {}".format(agent.epsilon))
            else:
                # predict action based on the old state
                prediction = agent.model.predict(state_old)
                final_move = np.argmax(prediction[0])
                print("prediction : {}".format(prediction))

            # print("move: {} to position ({}, {})".format(final_move, player1.x, player1.y))

            #perform new move and get new state
            player1.do_move(final_move, field0, game)

            if game_epoch >= 19:
                # get new state
                state_new = agent.get_state(game, player1, field0)

                #set treward for the new state
                reward = agent.set_reward(player1, game.crash, final_move)

                #train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward,
                                         state_new, game.crash)

                # store the new data into a long term memory
                if_remember = False
                if game.crash:
                    agent.remember(state_old, final_move, reward, state_new,
                                   game.crash)
                    if_remember = True
                    # print("remember this move with reward {}".format(reward))
                elif final_move == 0 and randint(1, 20) < 20:
                    agent.remember(state_old, final_move, reward, state_new,
                                   game.crash)
                    if_remember = True
                    # print("remember this move with reward {}".format(reward))
                elif final_move != 0 and randint(1, 20) < 20:
                    agent.remember(state_old, final_move, reward, state_new,
                                   game.crash)
                    if_remember = True
                    # print("remember this move with reward {}".format(reward))
                print(
                    "actual move {} to ({}, {}) gets reward {} - remember {}".
                    format(final_move, player1.x, player1.y, reward,
                           if_remember))

                # explore other move
                if final_move == 0:  # no
                    # 1 left
                    explore_moves(game, field0, agent, player1, state_old, 1,
                                  max(0, player1.x - 1), player1.y)
                    # 2 right
                    explore_moves(game, field0, agent, player1, state_old, 2,
                                  min(player1.x + 1, 21), player1.y)
                elif final_move == 1:  # left
                    # 0 no
                    explore_moves(game, field0, agent, player1, state_old, 0,
                                  min(player1.x + 1, 21), player1.y)
                    # 2 right
                    explore_moves(game, field0, agent, player1, state_old, 2,
                                  min(player1.x + 2, 21), player1.y)
                elif final_move == 2:  # right
                    # 0 no
                    explore_moves(game, field0, agent, player1, state_old, 0,
                                  max(0, player1.x - 1), player1.y)
                    # 2 right
                    explore_moves(game, field0, agent, player1, state_old, 1,
                                  max(0, player1.x - 2), player1.y)

            record = get_record(game.score, record)
            if display_option:
                display(player1, field0, game, record)
                pygame.time.wait(speed)

            game_epoch += 1

        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', game.score)
        score_plot.append(game.score)
        counter_plot.append(counter_games)

        if game.score >= record:
            agent.model.save_weights(modelFile + '/weights.hdf5')
    agent.model.save_weights(modelFile + '/weightsFinal.hdf5')
    plot_seaborn(counter_plot, score_plot)
예제 #25
0
def run_game():
    FPS = 60

    # Initialize game, settings and create a screen object.
    pygame.init()
    fps_clock = pygame.time.Clock()
    ai_settings = Settings()

    # FOR THE DQN #

    agent = DQNAgent()
    counter_games = 0
    score_plot = []
    counter_plot = []
    record = 0

    # FOR THE DQN #

    while counter_games < 150:

        # Create statistics.
        stats = GameStats(ai_settings)

        # Create game items.
        game_items = GameItems(ai_settings, stats)

        # Create a fleet of aliens.
        gf.create_fleet(ai_settings, game_items)
        played = False

        gf.start_new_game(ai_settings, stats, game_items)

        # Start the main loop for the game.
        while stats.game_active:
            stats.time_passed = fps_clock.tick(FPS) / 1000  # Time in seconds since previous loop.

            gf.check_events(ai_settings, stats, game_items)

            if stats.game_active:
                game_items.ship.update(stats)
                gf.update_bullets(ai_settings, stats, game_items)
                gf.update_aliens(ai_settings, stats, game_items)
                # FOR THE DQN #
                agent.epsilon = 80 - counter_games
                state_old = gf.get_state(ai_settings, stats, game_items)
                if randint(0, 200) < agent.epsilon:
                    final_move = to_categorical(randint(0, 3), num_classes=4)
                else:
                    # predict action based on the old state
                    prediction = agent.model.predict(state_old.reshape((1, 3536)))
                    final_move = to_categorical(np.argmax(prediction[0]), num_classes=4)

                # FOR THE DQN #

                # DQN #
                # perform new move and get new state
                gf.do_move(final_move, ai_settings, stats, game_items)


                state_new = gf.get_state(ai_settings, stats, game_items)

                # set reward for the new state
                reward = agent.set_reward(stats.score, stats.ships_left)

                # train short memory base on the new action and state
                agent.train_short_memory(state_old, final_move, reward, state_new, stats.game_active)

                # store the new data into a long term memory
                # TO:DO  agent.remember(state_old, final_move, reward, state_new, game.crash)
                # Get value of played game
                # TO:DO record = get_record(game.score, record)
                # DQN #
                
                
                played = True
            elif played:
                user = ask(game_items.screen)
                if len(user) > 0:
                    coll = connect_and_collect()
                    add_score(user , stats.score, coll)
                played = False

            # gf.update_screen(ai_settings, stats, game_items)


        # FOR THE DQN #
        agent.replay_new(agent.memory)
        counter_games += 1
        print('Game', counter_games, '      Score:', stats.score)
        score_plot.append(stats.score)
        counter_plot.append(counter_games)
    agent.model.save_weights('weights.hdf5')
    plot_seaborn(counter_plot, score_plot)