Esempio n. 1
0
class App:
    windowDimY = 14
    windowDimX = 18
    windowWidth = 0
    windowHeight = 0
    toolbarWidth = 200
    player = 0
    apple = 0
    toolbar = 0
    dataCollect = 0
    displayq = False
 
    def __init__(self,dq,state,freq):
        self._running = True
        self._display_surf = None
        self._image_surf = None
        self._apple_surf = None

        self.game = Game()
        self.player = Player(3,self.windowDimX, self.windowDimY)
        self.windowWidth = self.windowDimX*self.player.step
        self.windowHeight = self.windowDimY*self.player.step 
        self.toolbar = Toolbar(self.toolbarWidth, self.windowWidth, self.windowHeight)
        self.apple = Apple(randint(0,self.windowDimX-1), randint(0,self.windowDimY-1))
        self.dataCollect = DataCollector()
        self.displayq=dq
        self.state_size = state
        self.frequency = freq

    def on_init(self):
        pygame.init()
        if(self.displayq):
            self._display_surf = pygame.display.set_mode((self.windowWidth+self.toolbarWidth,self.windowHeight), pygame.HWSURFACE)
            pygame.display.set_caption('Pygame Snake game!')
            self._image_surf = pygame.image.load("images/game_objects/smake.png").convert()
            self._apple_surf = pygame.image.load("images/game_objects/smapple.png").convert()
            self.toolbar.load_images()
        self._running = True
        # self.savepath = "frames/"+time.strftime("%Y%m%d-%H%M%S")
        # os.mkdir(self.savepath)
 
    def on_event(self, event):
        if event.type == QUIT:
            self._running = False
 
    def on_loop(self): 
        self.player.update()

        # does snake collide with itself?
        for i in range(2,self.player.length):
            if self.game.isCollision(self.player.x[0],self.player.y[0],self.player.x[i], self.player.y[i],self.player.step-1):
                print("You lose! Collision with yourself: ")
                print("x[0] (" + str(self.player.x[0]) + "," + str(self.player.y[0]) + ")")
                print("x[" + str(i) + "] (" + str(self.player.x[i]) + "," + str(self.player.y[i]) + ")")
                self.player.crashed = True
                
        # does snake eat apple?
        for i in range(0,self.player.length):
            if self.game.isCollision(self.apple.x,self.apple.y,self.player.x[i], self.player.y[i],self.player.step-1):
                self.apple.x = randint(0,self.windowDimX-1) * self.player.step
                self.apple.y = randint(0,self.windowDimY-1) * self.player.step
                print("apple x=",self.apple.x,"apple y=",self.apple.y)
                self.player.eatenApple = True
 
        #does snake collide with wall?
        if(self.player.x[0]<0 or self.player.x[0]>=self.windowWidth or self.player.y[0]<0 or self.player.y[0]>=self.windowHeight):
            print("You lose! Collision with wall: ")
            print("x[0] (" + str(self.player.x[0]) + "," + str(self.player.y[0]) + ")")
            self.player.crashed = True
        
        pass

    def on_render(self, state):
        self._display_surf.fill((0,0,0))
        self.player.draw(self._display_surf, self._image_surf)
        self.apple.draw(self._display_surf, self._apple_surf)
        self.toolbar.draw(self._display_surf, self.player.direction, state)
        pygame.display.flip()
 
    def on_cleanup(self):
        pygame.quit()

    # initializing agent before the 1st move
    def init_agent(self, agent):
        state_init1 = agent.get_state(game, self.player, self.food) #first state after random placement
        #first action
        
    def reset_player(self):
        self.player = Player(3,self.windowDimX, self.windowDimY)
        #print(self.player.x)
        #print(self.player.y)

    def on_execute(self,speed):
        print('starting execution!')
        params = parameters()
        agent = dqnagent(params,self.state_size) #initialize the agent!
        if(agent.load_weights): #load weights maybe
            agent.model.load_weights(agent.weights)
            print("loaded the weights")

        counter = 0 #how many games have been played/trials done
        record = 0 #highest score

        #---------------------------------------------------------------------------
        #------------------------- LOOP THRU EPISODES ------------------------------
        #---------------------------------------------------------------------------

        while counter<params['episodes']: #still have more trials to do
            counter += 1
            print(counter)

            print("\nEPISODE ", counter, "\n")

            if not params['train']: # if you're not training it, no need for exploration
                agent.epsilon = 0
            else:
                agent.epsilon = 1.0 - ((counter -1) * params['epsilon_decay_linear'])#exploration/randomness factor that decreases over time

            #print("EPSILON = ", agent.epsilon, "\n")

            if self.on_init() == False:
                self._running = False

            #print("PLAYER\tx : ", self.player.x,"\ty : ", self.player.y, "\n")
            duration = 0

            #---------------------------------------------------------------------------
            #--------------------------- INDIVIDUAL EPISODE ----------------------------
            #---------------------------------------------------------------------------
            # indexx = 0
            while(self._running):
                if(counter%self.frequency==0):
                    self.dataCollect.record(self.player.x, self.player.y, self.apple.x, self.apple.y)
                duration+=1
                #print("\nMOVE : ", duration, "\n")
                if(self.displayq):
                    pygame.event.pump()
                    keys = pygame.key.get_pressed() 
                    if (keys[K_ESCAPE]):
                        exit(0)
                oldstate = agent.get_state(self, self.player, self.apple)
                #print("\noldstate = ", oldstate)


                #--------------------------- GET AGENT ACTION ----------------------------

                if random() < agent.epsilon: #every so often random exploration
                    action = randint(0,3) #random action
                    #print("random action : ",action)
                else: #Actionprecited by agent
                    state = oldstate.reshape(1,self.state_size**2+8)
                    predictedq= agent.model.predict(state) # predicts the q values for the action in that state
                    action = np.argmax(predictedq[0]) #maximum (highest q) action
                    #print("predicted action : ", action, "\tq-values : ", predictedq)


                #---------------------------- EXECUTE ACTION -----------------------------

                print(action)
                self.player.do_move(action) #do the action
                self.on_loop()
                newstate = agent.get_state(self, self.player, self.apple) #new state from the action we've taken
                reward = agent.set_reward(self.player)
                #print("newstate = ", newstate)
                #print("reward = ", reward)
                #print("crashed = ", self.player.crashed, "\n")


                #---------------------------- SHORT TRAINING -----------------------------

                if(params['train']):
                    agent.train_short_memory(oldstate,action, reward, newstate, self.player.crashed)
                    agent.remember(oldstate,action, reward, newstate, self.player.crashed)


                #------------------------------ RENDER GAME ------------------------------

                self._running = not(self.player.crashed)
                if(self.displayq):
                    self.on_render(newstate)
                # if(counter%self.frequency==0):
                #     self.on_render(newstate)
                #     pygame.image.save(self._display_surf,savepath+str(indexx))
                time.sleep (speed/1000.0)
                # indexx +=1


            #---------------------------------------------------------------------------
            #----------------------- TRAINING & DATA COLLECTION ------------------------
            #--------------------------------------------------------------------------- 

            if(params['train']):
                agent.replay_new(agent.memory, params['batch_size'])
            
            
            # self.dataCollect.add(self.player.length,duration,agent.epsilon,agent.history.losses)
            self.dataCollect.add(self.player.length,duration,agent.epsilon, 0.0)
            self.dataCollect.save()
            #print(agent.history.losses.length())
            #agent.history.losses = []
            self.player.reset(3,self.windowDimX, self.windowDimY )
            self.on_cleanup()

        #---------------------------------------------------------------------------
        #------------------------------ DATA OUTPUT --------------------------------
        #--------------------------------------------------------------------------- 
        if(params['train']):
             os.mkdir(params['weights_path_save'])
             agent.model.save_weights(params['weights_path_save']+'/weights.hdf5')
        self.dataCollect.save()