return random.choice(maximals) def eval_one_step_prey(self, seeker, prey): if seeker.caught(prey): return prey else: choices = [ m for m in get_neighbourhood(prey) if self.is_valid_position([m]) ] + [prey] return self.greedy_move_prey(choices) if choices else prey if __name__ == "__main__": rows = 60 cols = 80 g = GameBoard(rows, cols, (4, 4), (rows - 1, cols - 1)) episode_len = int((g.rows * g.cols) / 2) episode_num = 50000 g.q_learning(0.7, 0.9, 0.5, episode_len, episode_num) g.seeker.reset() pos_list = g.eval_control(episode_len) ani = Draw.AnimateGameBoard(g) ani.show(pos_list) for i in range(5): seeker = g.seeker.get_random(g.rows, g.cols, g.is_valid_position) if seeker: g.seeker = seeker pos_list = g.eval_control(episode_len) ani.show(pos_list)
if __name__ == "__main__": rows = 30 cols = 40 rounds = 10 #cols*rows episode_len = 100 episode_num = 5000 arena = seeker.GameBoard(rows, cols, (4, 4), (int(rows / 2), int(cols / 2))) trace_seeker = False show = None prey = shapes.Blob(arena.prey) if trace_seeker: Q = q_learning_seeker(arena, arena.seeker, arena.prey, 0.6, 0.9, 0.5, episode_len, episode_num) if arena.seeker.get_hash() not in Q: Q[arena.seeker.get_hash()] = 0 print("Q" + str(arena.seeker.get_pos()) + " := " + str(Q[arena.seeker.get_hash()])) movement = eval_seeker(arena, Q, arena.seeker, arena.prey, episode_len) show = movement, [prey] * (len(movement)) else: Q = q_learning_prey(arena, arena.seeker, arena.prey, 0.6, 0.9, 0.5, episode_len, episode_num) if prey.get_hash() not in Q: Q[prey.get_hash()] = 0 print("Q" + str(prey.get_pos()) + " := " + str(Q[prey.get_hash()])) movement = eval_prey(arena, Q, arena.seeker, arena.prey, episode_len) show = [arena.seeker] * len(movement), movement ani = Draw.AnimateGameBoard(arena) ani.show_exact(show)