def generate_data(DQN, min_epsilon, epsilon, copy_step): maze = game.MazeGame(9,7) maze.create_path(20) maze.draw_path() maze.clean_path() is_finished = False reward = 0 turn = 0 decay = 0.999995 fails = 0 repetitions = 1 loss = 0 while repetitions % 2 != 0: prev_observations = maze.return_state_2d() possible_actions = maze.possible_actions() action, action_key = DQN.get_action(prev_observations, epsilon, possible_actions) maze.move(action) is_finished, reward = maze.check(turn) observations = maze.return_state_2d() turn += 1 exp = {'prev_obs': prev_observations, 'a' : action_key-1, 'r': reward, 'obs': observations, 'done' : is_finished } DQN.add_experience(exp) if is_finished: maze.reset() # TODO repetitions +=1 is_finished = False if turn % 20 == 0: epsilon = max(epsilon*decay, min_epsilon) if turn % 8 == 0: loss += DQN.train() if turn % 40 == 0: fails = 1 return fails, turn, loss, epsilon return fails, turn, loss, epsilon
def __init__(self, handle): """Set up the Maze activity.""" activity.Activity.__init__(self, handle) self._busy_count = 0 self._unbusy_idle_sid = None self.build_toolbar() self.pservice = PresenceService() self.owner = self.pservice.get_owner() state = None if 'state' in self.metadata: state = json.loads(self.metadata['state']) self.game = game.MazeGame(self, self.owner, state) self.set_canvas(self.game) self.game.show() self.connect("key_press_event", self.game.key_press_cb) self.text_channel = None self.my_key = profile.get_pubkey() self._alert = None if self.shared_activity: # we are joining the activity self._add_alert(_('Joining a maze'), _('Connecting...')) self.connect('joined', self._joined_cb) if self.get_shared(): # we have already joined self._joined_cb() else: # we are creating the activity self.connect('shared', self._shared_cb)
def __init__(self, maze): pygame.init() pygame.font.init() self.DQN = MazeDQN.MazeDQN(63, 4, 9, 7) self.DQN.load_weights("mazenet") self.side_length = 100 self.maze = maze self.done = False self.height = maze.rows self.width = maze.col self.imagerect = (0, 0) self.black = (0, 0, 0) self.white = (255, 255, 255) self.red = (255, 0, 0) self.green = (0, 255, 0) self.open = (125, 125, 125) self.blue = (0, 0, 125) self.screen = pygame.display.set_mode([ self.side_length * 2 + self.width * 100, self.side_length + 100 * self.height ]) self.text = pygame.font.SysFont('Arial Black', 30) while True: maze = self.maze self.mouse_pos = pygame.mouse.get_pos() self.render(self.white, self.black, self.red, self.green, self.white) for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() exit() if event.type == pygame.MOUSEBUTTONUP: prev_observations = maze.return_state_2d() possible_actions = maze.possible_actions() action, action_key = self.DQN.get_action( prev_observations, 0, possible_actions) position = maze.game_move(action) print(maze.board) print(action) is_finished, reward = maze.check(0) if is_finished: self.done = True self.render(self.white, self.black, self.red, self.green, self.white) sleep(4) maze = game.MazeGame(9, 7) maze.create_path(20) self.maze = maze self.done = False
def play_game(DQN): maze = game.MazeGame(9,7) maze.create_path(20) maze.draw_path() maze.render() is_finished = False reward = 0 turn = 0 while not is_finished: possible_actions = maze.possible_actions() action, action_key = DQN.get_action(maze.board, 0.9, possible_actions) maze.move(action) is_finished, reward = maze.check(turn) maze.clean_render() turn += 1
def dojo(DQN, iterations, min_epsilon, epsilon, copy_step): total_loss = 0 total_fails = 0 total_turns = 0 games = 1 decay = 0.99995 test_game = game.MazeGame(9,7) test_state = test_game.return_state_2d() test_predict = DQN.predict(np.atleast_2d(test_state)).detach().numpy() print(test_predict) for i in range(iterations): fails, turns, loss, epsilon = generate_data(DQN, min_epsilon, epsilon, copy_step) loss += DQN.train() total_fails += fails total_loss += loss total_turns += turns games +=1 if i % 100 == 0 and i != 0: print("total loss:", total_loss) print("average turns:", total_turns/ games) print("average fails", total_fails / games) print("epsilon", epsilon) games = 0 total_fails = 0 total_loss = 0 total_turns = 0 print("games", i) if i % 8 == 0: DQN.copy_weights() test_predict = DQN.predict(np.atleast_2d(test_state)).detach().numpy() print(test_predict) if i % 10 == 0: epsilon = max(epsilon*decay, min_epsilon) if i % 1000 == 0 and i != 0: plot = plot_grad_flow(DQN.model.named_parameters()) path = "plot" + str(i)+ ".png" plot.savefig(path) DQN.save_weights("mazenet")
else: textsurface = self.text.render("FERDIG!", True, (0, 0, 0)) self.screen.blit(textsurface, (200, 0)) #vertical lines for line in range(self.width + 1): pygame.draw.line( self.screen, self.black, [line * 300 + self.side_length, 50], [line * 300 + self.side_length, self.height * 100 + 50], 3) for line in range(self.height + 1): pygame.draw.line( self.screen, self.black, [self.side_length, line * 300 + 50], [self.side_length + self.width * 100, line * 300 + 50], 3) for col in range(self.width): for row in range(self.height): space = (col, row) if self.task.board[space] == 1: pygame.draw.rect(self.screen, self.blue, (self.side_length + 1 + col * 100, 51 + row * 100, 98, 98)) if self.task.board[space] == -1: pygame.draw.rect(self.screen, self.red, (self.side_length + 1 + col * 100, 51 + row * 100, 98, 98)) pygame.event.pump() pygame.display.flip() task = game.MazeGame(9, 7) task.create_path(20) render(task)