def __init__(self, params: BaseEnvironmentParams, display: BaseDisplay): self.stats = ModelStats(params.model_stats_params, display=display) self.trainer = None self.grid = None self.rewards = None self.physics = None self.display = display self.episode_count = 0 self.step_count = 0
def register_functions(self, stats: ModelStats): stats.set_evaluation_value_callback(self.get_cral) stats.add_log_data_callback('cral', self.get_cral) stats.add_log_data_callback('cr', self.get_collection_ratio) stats.add_log_data_callback('successful_landing', self.has_landed) stats.add_log_data_callback('boundary_counter', self.get_boundary_counter) stats.add_log_data_callback('landing_attempts', self.get_landing_attempts) stats.add_log_data_callback('movement_ratio', self.get_movement_ratio)
class BaseEnvironment: def __init__(self, params: BaseEnvironmentParams, display: BaseDisplay): self.stats = ModelStats(params.model_stats_params, display=display) self.trainer = None self.grid = None self.rewards = None self.physics = None self.display = display self.episode_count = 0 self.step_count = 0 def fill_replay_memory(self): while self.trainer.should_fill_replay_memory(): state = copy.deepcopy(self.init_episode()) while not state.terminal: next_state = self.step( state, random=self.trainer.params.rm_pre_fill_random) state = copy.deepcopy(next_state) def train_episode(self): state = copy.deepcopy(self.init_episode()) self.stats.on_episode_begin(self.episode_count) while not state.is_terminal(): state = self.step(state) self.trainer.train_agent() self.stats.on_episode_end(self.episode_count) self.stats.log_training_data(step=self.step_count) self.episode_count += 1 def run(self): self.fill_replay_memory() print('Running ', self.stats.params.log_file_name) bar = tqdm.tqdm(total=int(self.trainer.params.num_steps)) last_step = 0 while self.step_count < self.trainer.params.num_steps: bar.update(self.step_count - last_step) last_step = self.step_count self.train_episode() if self.episode_count % self.trainer.params.eval_period == 0: self.test_episode() self.stats.save_if_best() self.stats.training_ended() def step(self, state, random=False): pass def init_episode(self, init_state=None): if init_state: state = copy.deepcopy(self.grid.init_scenario(init_state)) else: state = copy.deepcopy(self.grid.init_episode()) self.rewards.reset() self.physics.reset(state) return state def test_episode(self): pass def test_scenario(self, scenario): pass def eval(self, episodes, show=False): for _ in tqdm.tqdm(range(episodes)): self.test_episode() self.step_count += 1 # Increase step count so that logging works properly if show: self.display.display_episode(self.grid.map_image, self.stats.trajectory, plot=True) resp = input('Save run? [y/N]\n') try: if distutils.util.strtobool(resp): save_as = input('Save as: [run_' + str(self.step_count) + ']\n') if save_as == '': save_as = 'run_' + str(self.step_count) self.display.display_episode(self.grid.map_image, self.stats.trajectory, plot=False, save_path=save_as + '.png') self.stats.save_episode(save_as) print("Saved as run_" + str(self.step_count)) except ValueError: pass print("next then") def eval_scenario(self, init_state): self.test_scenario(init_state) self.display.display_episode(self.grid.map_image, self.stats.trajectory, plot=True) resp = input('Save run? [y/N]\n') try: if distutils.util.strtobool(resp): save_as = input('Save as: [scenario]\n') if save_as == '': save_as = 'scenario' self.display.display_episode(self.grid.map_image, self.stats.trajectory, plot=False, save_path=save_as + '.png') self.stats.save_episode(save_as) print("Saved as", save_as) except ValueError: pass
def __init__(self, params: BaseGridParams, stats: ModelStats): self.map_image = Map.load_map(params.map_path) self.shape = self.map_image.start_land_zone.shape self.starting_vector = self.map_image.get_starting_vector() stats.set_env_map_callback(self.get_map_image)