class SnakeEnv(gym.Env): """Open AI Snake Environment""" metadata = {'render.modes': ['human']} def __init__(self, size, max_without_eating=300, mode='standard'): super(SnakeEnv, self).__init__() self.max_without_eating = max_without_eating self.steps_without_apple = 0 if mode == 'standard': self.action_space = spaces.Discrete(4) elif mode == 'pov': self.action_space = spaces.Discrete(3) else: raise ValueError('Uknown mode: ' + str(mode)) self.observation_space = spaces.Box(low=0, high=1, shape=(size[0], size[1], 3), dtype=np.uint8) self.game = SnakeGame(size, controls=mode) def step(self, action): self.game.update(action) if self.game.ate_apple: reward = 1 self.steps_without_apple = 0 elif self.game.snake.dead: reward = -1 else: reward = 0 self.steps_without_apple += 1 done = self.steps_without_apple > self.max_without_eating or self.game.snake.dead return self.game.get_state(), reward, done, {} def reset(self): self.game.reset() self.explored = np.zeros_like(self.explored) self.steps_without_apple = 0 return self.game.get_state() def render(self, mode='human', close=False): if mode == 'human': time.sleep(0.1) self.game.render(mode=mode) def seed(self, seed=None): np.random.seed(seed)
from snake_game import SnakeGame from agent import Agent from Plot import plot if __name__ == '__main__': plot_scores = [] plot_mean_scores = [] total_score = 0 record = 0 agent = Agent() game = SnakeGame() #game_loop while True: # get state state_old = game.get_state() # get move action = agent.get_action(state_old) # perform move and get new state reward, done, score = game.play_step(action) state_new = game.get_state() # train short memory agent.train_short_memory(state_old, action, reward, state_new, done) # remember agent.remember(state_old, action, reward, state_new, done) if done: #게임이 끝났을때