def run_module(current_module): log.info('Running all tests') num = 0 for attr in dir(current_module): if attr.startswith('test_'): num += 1 log.info('Running ' + attr) getattr(current_module, attr)() log.success(f'Test: {attr} ran successfully') return num
def step(self, action): if self.total_steps == 0: log.info(self.env_config) self.start_step_time = time.time() agent = self.agents[self.agent_index] self.check_for_collisions() step_out = agent.step(action) if step_out == PARTIAL_PHYSICS_STEP: return step_out ret = self.finish_step() return ret
def _set_config(self, env_config): name_col_len = 45 orig_config = deepcopy(self.env_config) self.env_config.update(env_config) for k, v in self.env_config.items(): if k not in orig_config or orig_config[k] != v: name_col = f'{k.lower()}' custom = True else: name_col = f'{k.lower()}' custom = False padding = ' ' * (name_col_len - len(name_col)) description = 'custom ' if custom else 'default' log.info(f'{name_col}{padding}{description} {v}')
def run_tests(): current_module = sys.modules[__name__] if len(sys.argv) > 1: test_case = sys.argv[1] log.info('Running ' + test_case) getattr(current_module, test_case)() num = 1 log.success(f'{test_case} ran successfully!') else: num = run_module(current_module) for module in MODULES_TO_TEST: num += run_module(module) log.success(f'{num} tests ran successfully!')
def step(self, action): if self.total_steps == 0: log.info(self.env_config) if IS_DEBUG_MODE: return self._step(action) else: # Fail gracefully when running so that long training runs are # not interrupted by transient errors try: return self._step(action) except: log.exception('Caught exception in step, ending episode') obz = self.get_blank_observation() done = True if '--penalize-loss' in sys.argv: reward = GAME_OVER_PENALTY else: reward = 0 info = {} return obz, reward, done, info
def __init__( self, px_per_m=PX_PER_M, add_rotational_friction=True, add_longitudinal_friction=True, return_observation_as_array=True, seed_value=0, expect_normalized_actions=True, expect_normalized_action_deltas=False, decouple_step_time=True, physics_steps_per_observation=6, is_one_waypoint_map=False, is_intersection_map=False, match_angle_only=False, incent_win=False, gamma=0.99, add_static_obstacle=False, disable_gforce_penalty=False, forbid_deceleration=False, ): self.logger = log log.info(f'{sys.executable} {sys.argv}') # All units in SI units (meters and radians) unless otherwise specified self.return_observation_as_array: bool = return_observation_as_array self.px_per_m: float = px_per_m self.expect_normalized_actions: bool = expect_normalized_actions self.expect_normalized_action_deltas: bool = expect_normalized_action_deltas self.seed_value: int = seed_value self.add_rotational_friction: bool = add_rotational_friction self.add_longitudinal_friction: bool = add_longitudinal_friction self.static_map: bool = '--static-map' in sys.argv self.physics_steps_per_observation: int = physics_steps_per_observation self.forbid_deceleration = forbid_deceleration self.disable_gforce_penalty = disable_gforce_penalty # The previous observation, reward, done, info for each agent # Useful for running / training the agents self.agent_step_outputs = [ ] # TODO: Use pre-allocated numpy array here # For faster / slower than real-time stepping self.decouple_step_time = decouple_step_time # Step properties self.episode_steps: int = 0 self.num_episodes: int = 0 self.total_steps: int = 0 self.last_step_time: float = None self.wall_dt: float = None self.last_sleep_time: float = None self.start_step_time: float = None self.fps: int = 60 # Actions per second # TODO: Try fine-tuning at higher FPS, or cyclic FPS self.aps = self.fps / self.physics_steps_per_observation self.target_dt: float = 1 / self.fps self.total_episode_time: float = 0 self.match_angle_only: bool = match_angle_only self.is_one_waypoint_map: bool = is_one_waypoint_map self.is_intersection_map: bool = is_intersection_map self.incent_win: bool = incent_win self.gamma: float = gamma self.add_static_obstacle: bool = add_static_obstacle # max_one_waypoint_mult # Specifies distance to waypoint as ratio: distance / map_size # 0.22 m/s on 0.1 # Less than 2.5 m/s on 0.1? self.max_one_waypoint_mult = 0.5 if '--no-timeout' in sys.argv: max_seconds = 100000 elif '--one_waypoint_map' in sys.argv: self.is_one_waypoint_map = True max_seconds = self.max_one_waypoint_mult * 200 elif self.is_intersection_map: max_seconds = 60 else: max_seconds = 60 self._max_episode_steps = \ max_seconds * 1/self.target_dt * 1/self.physics_steps_per_observation np.random.seed(self.seed_value) # TODO (research): Think about tree of neural nets for RL options # TODO: Change random seed on fine-tune to prevent overfitting self.player = None self.should_render = False self._has_enabled_render = False if self.is_intersection_map: self.num_agents = 2 else: self.num_agents = 1 self.dummy_accel_agent_indices: List[int] = [] self.env_config = dict( jerk_penalty_coeff=0.10, gforce_penalty_coeff=0.031, lane_penalty_coeff=0.02, collision_penalty_coeff=0.31, speed_reward_coeff=0.50, win_coefficient=1, end_on_harmful_gs=True, constrain_controls=True, ignore_brake=False, forbid_deceleration=forbid_deceleration, expect_normalized_action_deltas=expect_normalized_action_deltas, incent_win=incent_win, dummy_accel_agent_indices=None, wait_for_action=False, ) self.agents = None self.dummy_accel_agents = None self.all_agents = None # agents + dummy_agents self.agent_index: int = 0 # Current agent we are stepping self.curr_reward = 0
def __init__( self, px_per_m=PX_PER_M, add_rotational_friction=True, add_longitudinal_friction=True, return_observation_as_array=True, seed_value=0, expect_normalized_actions=True, expect_normalized_action_deltas=False, decouple_step_time=True, physics_steps_per_observation=6, is_one_waypoint_map=False, is_intersection_map=False, match_angle_only=False, incent_win=False, gamma=0.99, add_static_obstacle=False, disable_gforce_penalty=False, forbid_deceleration=False, being_played=False, ): self.logger = log log.info(f'{sys.executable} {sys.argv}') # Env config ----------------------------------------------------------- self.env_config = dict( jerk_penalty_coeff=0.10, gforce_penalty_coeff=0.031, lane_penalty_coeff=0.02, collision_penalty_coeff=0.31, speed_reward_coeff=0.50, win_coefficient=1, gforce_threshold=1, jerk_threshold=None, constrain_controls=False, ignore_brake=False, forbid_deceleration=forbid_deceleration, expect_normalized_action_deltas=expect_normalized_action_deltas, discrete_actions=None, incent_win=incent_win, dummy_accel_agent_indices=None, wait_for_action=False, incent_yield_to_oncoming_traffic=True, physics_steps_per_observation=physics_steps_per_observation, end_on_lane_violation=False, lane_margin=0, ) # All units in SI units (meters and radians) unless otherwise specified self.return_observation_as_array: bool = return_observation_as_array self.px_per_m: float = px_per_m self.expect_normalized_actions: bool = expect_normalized_actions self.seed_value: int = seed_value self.add_rotational_friction: bool = add_rotational_friction self.add_longitudinal_friction: bool = add_longitudinal_friction self.static_map: bool = '--static-map' in sys.argv self.disable_gforce_penalty = disable_gforce_penalty # The previous observation, reward, done, info for each agent # Useful for running / training the agents self.agent_step_outputs = [ ] # TODO: Use pre-allocated numpy array here # For faster / slower than real-time stepping self.decouple_step_time = decouple_step_time self.fps: int = FPS self.target_dt: float = 1 / self.fps self.match_angle_only: bool = match_angle_only self.is_one_waypoint_map: bool = is_one_waypoint_map self.is_intersection_map: bool = is_intersection_map self.gamma: float = gamma self.add_static_obstacle: bool = add_static_obstacle # max_one_waypoint_mult # Specifies distance to waypoint as ratio: distance / map_size # 0.22 m/s on 0.1 # Less than 2.5 m/s on 0.1? self.max_one_waypoint_mult = 0.5 np.random.seed(self.seed_value) self.player = None self.should_render = False self._has_enabled_render = False if self.is_intersection_map: self.num_agents = 2 else: self.num_agents = 1 self.dummy_accel_agent_indices: List[int] = [] self.agent_index: int = 0 # Current agent we are stepping self.discrete_actions = None self.being_played = being_played self.update_intermediate_physics = self.should_render or self.being_played self.render_choppy_but_realtime = False # End env config ------------------------------------------------------- # Env state ------------------------------------------------------------ # Step properties self.episode_steps: int = 0 self.num_episodes: int = 0 self.total_steps: int = 0 self.last_step_time: float = None self.wall_dt: float = None self.last_sleep_time: float = None self.start_step_time: float = None self.total_episode_time: float = 0 self.curr_reward = 0 self.agents = None self.dummy_accel_agents = None self.all_agents = None # agents + dummy_agents self.last_step_output = None