class GameManager: def __init__(self, id): self.visualize = False if Config.VISUALIZE and int(id / len(Config.PATH_TO_WORLD)) == 0: self.visualize = True elif Config.PLAY_MODE: self.visualize = True world_name = Config.PATH_TO_WORLD[id % len(Config.PATH_TO_WORLD)] self.env = Environment(world_name) print("Env {} for Agent {} started.".format(world_name, id)) self.env.set_mode(Config.MODE, Config.TERMINATE_AT_END) self.env.set_observation_rotation_size(Config.OBSERVATION_ROTATION_SIZE) self.env.use_observation_rotation_size(Config.USE_OBSERVATION_ROTATION) self.env.set_cluster_size(Config.CLUSTER_SIZE) self.reset() def reset(self): observation, _, _, _ = self.env.reset() input_laser, rotation = self.process_observation(observation) map = StateMap(input_laser) obs = np.array([ [map.S_image], [rotation] ]) return obs def step(self, action): self._update_display() if action is None: observation, reward, done, info = self.env.step(0, 0, 20) input_laser, rotation = self.process_observation(observation) map = StateMap(input_laser) #obs = np.array([[map.States_map, map.Reward_map], [rotation]]) obs = np.array([[map.S_image], [rotation]]) reward = 0 done = False else: linear, angular = map_action(action) observation, reward, done, info = self.env.step(linear, angular, 20) input_laser, rotation = self.process_observation(observation) map = StateMap(input_laser) obs = np.array([[map.S_image], [rotation]]) return obs, reward, done, info def _update_display(self): if self.visualize: self.env.visualize() def observation_size(self): return self.env.observation_size() def process_observation(self, observation): laser_scan = np.array(observation[:Config.OBSERVATION_SIZE]) oriontaion = np.array(observation[Config.OBSERVATION_SIZE:]) return laser_scan, oriontaion
class GameManager: def __init__(self, id): self.visualize = False if Config.VISUALIZE and int(id / len(Config.PATH_TO_WORLD)) == 0: self.visualize = True elif Config.PLAY_MODE: self.visualize = True world_name = Config.PATH_TO_WORLD[id % len(Config.PATH_TO_WORLD)] self.env = Environment(world_name) print("Env {} for Agent {} started.".format(world_name, id)) self.env.set_mode(Config.MODE, Config.TERMINATE_AT_END) self.env.set_observation_rotation_size( Config.OBSERVATION_ROTATION_SIZE) self.env.use_observation_rotation_size(Config.USE_OBSERVATION_ROTATION) self.env.set_cluster_size(Config.CLUSTER_SIZE) self.reset() def reset(self): observation, _, _, _ = self.env.reset() return observation def step(self, action): self._update_display() if action is None: observation, reward, done, info = self.env.step(0, 0, 20) reward = 0 done = False else: linear, angular = map_action(action) observation, reward, done, info = self.env.step( linear, angular, 20) return observation, reward, done, info def _update_display(self): if self.visualize: self.env.visualize() def observation_size(self): return self.env.observation_size()
def process_reward(self, reward): return reward def process_info(self, info): return info def process_action(self, action): return action def process_state_batch(self, batch): return batch[:, 0, :] env = Environment("Simulation2d/svg/proto_4", 4) env.use_observation_rotation_size(True) env.set_observation_rotation_size(128) env.set_mode(Mode.ALL_RANDOM) processor = DQNAgentProc() states = env.observation_size() actions = action_mapper.ACTION_SIZE if DEBUG: print('states: {0}'.format(states)) print('actions: {0}'.format(actions)) def build_callbacks(env_name): weights_filename = 'new_results/'+ env_name +'{step}.h5f' log_filename = 'new_log/{}.json'.format(env_name) callbacks = [ModelIntervalCheckpoint(weights_filename, interval=10000)] callbacks += [FileLogger(log_filename, interval=1000)] return callbacks