def __init__(self, make_video=False, replay=False, live_preview=False): self._actions = self.ACTIONS self.ROWS, self.COLS = s.ROWS, s.COLS self._live_preview = False args = namedtuple("args", [ "no_gui", "fps", "log_dir", "turn_based", "update_interval", "save_replay", "replay", "make_video", "continue_without_training" ]) args.continue_without_training = False args.save_replay = False args.log_dir = "agent_code/koetherminator" if make_video: # not working yet! args.no_gui = False args.make_video = True # obviously gotta change to True if ffmpeg issue is fixed args.fps = 15 args.update_interval = 0.1 args.turn_based = False elif live_preview: self._live_preview = True args.no_gui = False args.make_video = False args.fps = 15 args.update_interval = 1 args.turn_based = False else: args.no_gui = True args.make_video = False if replay: args.save_replay = True # agents = [("user_agent", True)] + [("rule_based_agent", False)] * (s.MAX_AGENTS-1) agents = [("user_agent", True) ] + [("peaceful_agent", False)] * (s.MAX_AGENTS - 1) if not args.no_gui: pygame.init() self._world = BombeRLeWorld(args, agents) self._agent = self._world.agents[0] rb_agent_cfg = {"color": "blue", "name": "rule_based_agent"} rb_agent_backend = SequentialAgentBackend(False, rb_agent_cfg['name'], rb_agent_cfg['name']) rb_agent_backend.start() self._rb_agent = Agent(rb_agent_cfg['color'], rb_agent_cfg['name'], rb_agent_cfg['name'], train=False, backend=rb_agent_backend)
def main(argv = None): parser = ArgumentParser() subparsers = parser.add_subparsers(dest='command_name', required=True) # Run arguments play_parser = subparsers.add_parser("play") agent_group = play_parser.add_mutually_exclusive_group() agent_group.add_argument("--my-agent", type=str, help="Play agent of name ... against three rule_based_agents") agent_group.add_argument("--agents", type=str, nargs="+", default=["rule_based_agent"] * s.MAX_AGENTS, help="Explicitly set the agent names in the game") play_parser.add_argument("--train", default=0, type=int, choices=[0, 1, 2, 3, 4], help="First … agents should be set to training mode") play_parser.add_argument("--continue-without-training", default=False, action="store_true") # play_parser.add_argument("--single-process", default=False, action="store_true") play_parser.add_argument("--n-rounds", type=int, default=10, help="How many rounds to play") play_parser.add_argument("--save-replay", const=True, default=False, action='store', nargs='?', help='Store the game as .pt for a replay') play_parser.add_argument("--no-gui", default=False, action="store_true", help="Deactivate the user interface and play as fast as possible.") # Replay arguments replay_parser = subparsers.add_parser("replay") replay_parser.add_argument("replay", help="File to load replay from") # Interaction for sub in [play_parser, replay_parser]: sub.add_argument("--fps", type=int, default=15, help="FPS of the GUI (does not change game)") sub.add_argument("--turn-based", default=False, action="store_true", help="Wait for key press until next movement") sub.add_argument("--update-interval", type=float, default=0.1, help="How often agents take steps (ignored without GUI)") sub.add_argument("--log_dir", type=str, default=os.path.dirname(os.path.abspath(__file__)) + "/logs") # Video? sub.add_argument("--make-video", default=False, action="store_true", help="Make a video from the game") args = parser.parse_args(argv) if args.command_name == "replay": args.no_gui = False args.n_rounds = 1 has_gui = not args.no_gui if has_gui: if not LOADED_PYGAME: raise ValueError("pygame could not loaded, cannot run with GUI") pygame.init() # Initialize environment and agents if args.command_name == "play": agents = [] if args.train == 0 and not args.continue_without_training: args.continue_without_training = True if args.my_agent: agents.append((args.my_agent, len(agents) < args.train)) args.agents = ["rule_based_agent"] * (s.MAX_AGENTS - 1) for agent_name in args.agents: agents.append((agent_name, len(agents) < args.train)) world = BombeRLeWorld(args, agents) elif args.command_name == "replay": world = ReplayWorld(args) else: raise ValueError(f"Unknown command {args.command_name}") # Emulate Windows process spawning behaviour under Unix (for testing) # mp.set_start_method('spawn') user_inputs = [] # Start game logic thread t = threading.Thread(target=game_logic, args=(world, user_inputs, args), name="Game Logic") t.daemon = True t.start() # Run one or more games for _ in tqdm(range(args.n_rounds)): if not world.running: world.ready_for_restart_flag.wait() world.ready_for_restart_flag.clear() world.new_round() # First render if has_gui: world.render() pygame.display.flip() round_finished = False last_frame = time() user_inputs.clear() # Main game loop while not round_finished: if has_gui: # Grab GUI events for event in pygame.event.get(): if event.type == pygame.QUIT: if world.running: world.end_round() world.end() return elif event.type == pygame.KEYDOWN: key_pressed = event.key if key_pressed in (pygame.K_q, pygame.K_ESCAPE): world.end_round() if not world.running: round_finished = True # Convert keyboard input into actions if s.INPUT_MAP.get(key_pressed): if args.turn_based: user_inputs.clear() user_inputs.append(s.INPUT_MAP.get(key_pressed)) # Render only once in a while if time() - last_frame >= 1 / args.fps: world.render() pygame.display.flip() last_frame = time() else: sleep_time = 1 / args.fps - (time() - last_frame) if sleep_time > 0: sleep(sleep_time) elif not world.running: round_finished = True else: # Non-gui mode, check for round end in 1ms sleep(0.001) world.end()
def main(): pygame.init() # Emulate Windows process spawning behaviour under Unix (for testing) # mp.set_start_method('spawn') # Initialize environment and agents world = BombeRLeWorld([('TheImitator', False), ('simple_agent', False), ('simple_agent', False), ('simple_agent', False)]) # world = ReplayWorld('Replay 2019-01-30 16:57:42') user_inputs = [] # Start game logic thread t = threading.Thread(target=game_logic, args=(world, user_inputs)) t.daemon = True t.start() # Run one or more games for i in range(s.n_rounds): if not world.running: world.ready_for_restart_flag.wait() world.ready_for_restart_flag.clear() world.new_round() # First render if s.gui: world.render() pygame.display.flip() round_finished = False last_update = time() last_frame = time() user_inputs.clear() # Main game loop while not round_finished: # Grab events key_pressed = None for event in pygame.event.get(): if event.type == QUIT: world.end_round() world.end() return elif event.type == KEYDOWN: key_pressed = event.key if key_pressed in (K_q, K_ESCAPE): world.end_round() if not world.running: round_finished = True # Convert keyboard input into actions if s.input_map.get(key_pressed): if s.turn_based: user_inputs.clear() user_inputs.append(s.input_map.get(key_pressed)) if not world.running and not s.gui: round_finished = True # Rendering if s.gui and (time() - last_frame >= 1 / s.fps): world.render() pygame.display.flip() last_frame = time() else: sleep_time = 1 / s.fps - (time() - last_frame) if sleep_time > 0: sleep(sleep_time) if not s.gui: last_frame = time() world.end()
def main(): pygame.init() # Emulate Windows process spawning behaviour under Unix (for testing) # mp.set_start_method('spawn') agent_list = [('cbt_agent', True), ('simple_agent', False), ('simple_agent', False), ('simple_agent', False)] # stores the path to the last agent, which is used for the round number and extraction of theta at every 20th round path = './agent_code/{}/'.format(agent_list[-1][0]) # Initialize environment and agents world = BombeRLeWorld(agent_list) # world = ReplayWorld('replay_3')# ('Replay 2019-01-30 16:57:42') user_inputs = [] # Start game logic thread t = threading.Thread(target=game_logic, args=(world, user_inputs)) t.daemon = True t.start() start_time = time() print('Start am {2}.{1}.{0}, um {3:02}:{4:02}:{5:02} Uhr.'.format( *time_.localtime(start_time))) # Run one or more games for i in range(s.n_rounds): if (i % 20 == 0): print(i) if not world.running: world.ready_for_restart_flag.wait() world.ready_for_restart_flag.clear() world.new_round() # First render if s.gui: world.render() pygame.display.flip() round_finished = False last_update = time() last_frame = time() user_inputs.clear() # Main game loop while not round_finished: # Grab events key_pressed = None for event in pygame.event.get(): if event.type == QUIT: world.end_round() world.end() return elif event.type == KEYDOWN: key_pressed = event.key if key_pressed in (K_q, K_ESCAPE): world.end_round() if not world.running: round_finished = True # Convert keyboard input into actions if s.input_map.get(key_pressed): if s.turn_based: user_inputs.clear() user_inputs.append(s.input_map.get(key_pressed)) if not world.running and not s.gui: round_finished = True # Rendering if s.gui and (time() - last_frame >= 1 / s.fps): world.render() pygame.display.flip() last_frame = time() else: sleep_time = 1 / s.fps - (time() - last_frame) if sleep_time > 0: sleep(sleep_time) if not s.gui: last_frame = time() world.end() end_time = time() print('Ende am {2}.{1}.{0}, um {3:02}:{4:02}:{5:02} Uhr.'.format( *time_.localtime(end_time))) print('Duration =', end_time - start_time, 's =', (end_time - start_time) / 60, 'min')
class BombermanGame: ACTIONS = ['UP', 'DOWN', 'LEFT', 'RIGHT', 'WAIT', 'BOMB'] def __init__(self, make_video=False, replay=False, live_preview=False): self._actions = self.ACTIONS self.ROWS, self.COLS = s.ROWS, s.COLS self._live_preview = False args = namedtuple("args", [ "no_gui", "fps", "log_dir", "turn_based", "update_interval", "save_replay", "replay", "make_video", "continue_without_training" ]) args.continue_without_training = False args.save_replay = False args.log_dir = "agent_code/koetherminator" if make_video: # not working yet! args.no_gui = False args.make_video = True # obviously gotta change to True if ffmpeg issue is fixed args.fps = 15 args.update_interval = 0.1 args.turn_based = False elif live_preview: self._live_preview = True args.no_gui = False args.make_video = False args.fps = 15 args.update_interval = 1 args.turn_based = False else: args.no_gui = True args.make_video = False if replay: args.save_replay = True # agents = [("user_agent", True)] + [("rule_based_agent", False)] * (s.MAX_AGENTS-1) agents = [("user_agent", True) ] + [("peaceful_agent", False)] * (s.MAX_AGENTS - 1) if not args.no_gui: pygame.init() self._world = BombeRLeWorld(args, agents) self._agent = self._world.agents[0] rb_agent_cfg = {"color": "blue", "name": "rule_based_agent"} rb_agent_backend = SequentialAgentBackend(False, rb_agent_cfg['name'], rb_agent_cfg['name']) rb_agent_backend.start() self._rb_agent = Agent(rb_agent_cfg['color'], rb_agent_cfg['name'], rb_agent_cfg['name'], train=False, backend=rb_agent_backend) def actions(self): """ getter for available actions in the Bomberman Game Returns: private list containing the possible actions """ return self._actions def make_action(self, agent_action: str): """ Args: agent_action: action to be taken. Returns: reward: reward resulting from the action that has been taken. """ self._world.do_step(agent_action) events = self._agent.events reward = self.reward(events) if self._live_preview: self._world.render() self._world.gui.render_text(f"ACTION: {agent_action}", 800, 490, (255, 255, 255)) self._world.gui.render_text(f"REWARD: {reward}", 800, 520, (50, 255, 50) if reward > 0 else (255, 50, 50)) pygame.display.flip() sleep(0.03) return np.array(reward, dtype=np.float32) def get_world_state(self): return self._world.get_state_for_agent(self._agent) def get_observation(self): return self.get_observation_from_state(self.get_world_state()) @staticmethod def reward(events: List[str]) -> float: """ *This is not a required function, but an idea to structure your code.* Here you can modify the rewards your agent get so as to en/discourage certain behavior. """ game_rewards = { e.COIN_COLLECTED: 1, e.KILLED_OPPONENT: 5, # positive auxiliary rewards e.BOMB_DROPPED: 0.001, #e.COIN_FOUND: 0.01, # e.SURVIVED_ROUND: 0.5, e.CRATE_DESTROYED: 0.1, e.MOVED_LEFT: 0.001, e.MOVED_RIGHT: 0.001, e.MOVED_UP: 0.001, e.MOVED_DOWN: 0.001, # negative auxiliary rewards e.INVALID_ACTION: -0.002, e.WAITED: -0.002, e.GOT_KILLED: -1, e.KILLED_SELF: -1 } reward_sum = 0 for event in events: if event in game_rewards: reward_sum += game_rewards[event] return reward_sum @classmethod def get_observation_from_state(cls, state): """ Build a tensor of the observed board state for the agent. Layers: 0: field with walls and crates 1: revealed coins 2: bombs 3: agents (self and others) Returns: observation tensor """ cols, rows = state['field'].shape[0], state['field'].shape[1] observation = np.zeros([rows, cols, 1], dtype=np.float32) # write field with crates observation[:, :, 0] = state['field'] # write revealed coins if state['coins']: coins_x, coins_y = zip(*state['coins']) observation[list(coins_y), list(coins_x), 0] = 2 # revealed coins # write ticking bombs if state['bombs']: bombs_xy, bombs_t = zip(*state['bombs']) bombs_x, bombs_y = zip(*bombs_xy) observation[list(bombs_y), list(bombs_x), 0] = -2 # list(bombs_t) """ bombs_xy = [xy for (xy, t) in state['bombs']] bombs_t = [t for (xy, t) in state['bombs']] bombs_x, bombs_y = [x for x, y in bombs_xy], [y for x, y in bombs_xy] observation[2, bombs_x, bombs_y] = bombs_t or 0 """ # write agents if state['self']: # let's hope there is... _, _, _, (self_x, self_y) = state['self'] observation[self_y, self_x, 0] = 3 if state['others']: _, _, _, others_xy = zip(*state['others']) others_x, others_y = zip(*others_xy) observation[others_y, others_x, 0] = -3 return observation def new_episode(self): # todo: End the world/game properly # if self._world.time_to_stop(): # self._world.end_round() # self._world.new_round() # if self._world.running: # self._world.end_round() # self._world.end() if self._world.running: self._world.end_round() if not self._world.running: self._world.ready_for_restart_flag.wait() self._world.ready_for_restart_flag.clear() self._world.new_round() def is_episode_finished(self): return self._world.time_to_stop() def set_user_input(self, new_user_input): self._world.user_input = new_user_input
def main(): pygame.init() # Emulate Windows process spawning behaviour under Unix (for testing) # mp.set_start_method('spawn') # Initialize environment and agents world = BombeRLeWorld([('simple_agent', False), ('simple_agent', False), ('simple_agent', False), ('simple_agent', False)]) # world = ReplayWorld('Replay 2019-01-30 16:57:42') user_inputs = [] # Start game logic thread t = threading.Thread(target=game_logic, args=(world, user_inputs)) t.daemon = True t.start() # Save agents at the beginning # agents = world.agents # Run one or more games for i in range(s.n_rounds): # if i % 2==0: # Variable crate density # world.set_crate_density(0) # To only use our agent # # for agent in agents: # # if agent.name == "Nobel": # # world.change_agents([agent]) # else: # Variable crate density # world.set_crate_density(0.75) # To again use all of them # # world.change_agents(agents) if not world.running: world.ready_for_restart_flag.wait() world.ready_for_restart_flag.clear() world.new_round() # First render if s.gui: world.render() pygame.display.flip() round_finished = False last_update = time() last_frame = time() user_inputs.clear() # Main game loop while not round_finished: # Grab events key_pressed = None for event in pygame.event.get(): if event.type == QUIT: world.end_round() world.end() return elif event.type == KEYDOWN: key_pressed = event.key if key_pressed in (K_q, K_ESCAPE): world.end_round() if not world.running: round_finished = True # Convert keyboard input into actions if s.input_map.get(key_pressed): if s.turn_based: user_inputs.clear() user_inputs.append(s.input_map.get(key_pressed)) if not world.running and not s.gui: round_finished = True # Rendering if s.gui and (time() - last_frame >= 1 / s.fps): world.render() pygame.display.flip() last_frame = time() else: sleep_time = 1 / s.fps - (time() - last_frame) if sleep_time > 0: sleep(sleep_time) if not s.gui: last_frame = time() # Prints the round counter and the total score for each agent # score=[] # for a in world.agents: # score += [a.total_score] # print("Round: "+str(i+1)+", Scores: "+str(score)) world.end()