def test_add_data(self): ds = Dataset() s, a, s_ = np.array([0., 1., 2.])[:, np.newaxis] entry_args = [10, 1., s, a, s_] entry_kwargs = {'done': False, 'failed': True} ds.add_entry(*entry_args, **entry_kwargs) data = np.array([[10, 1., s, a, s_, True, False]]) self._test_data_equal(ds, data) group = { ds.REWARD: [1.], ds.STATE: [s], ds.ACTION: [a], ds.NEW: [s_], ds.FAILED: [True], ds.DONE: [False] } ds.add_group(group, group_number=100) new_data = data.copy() new_data[0, 0] = 100 data = np.vstack((data, new_data)) self._test_data_equal(ds, data) ds = Dataset('a', 'b', group_name='group') data = [[1., 10], ['b value', 'other b value']] group = dict(zip(['a', 'b'], data)) ds.add_group(group) data = np.array([[0, 0]] + data, dtype=object).T self._test_data_equal(ds, data)
def test_load_preserves_ds(ds): sdir = Path('/tmp') spath = sdir / (ds.name + '.csv') ds = Dataset(name='my_dataset') ds.save(sdir) load = Dataset.load(spath) self._test_columns_equal(ds, load.columns, load.columns_wo_group) self._test_data_equal(ds, load.to_numpy())
def test_creation(self): ds = Dataset() self._test_columns_equal(ds, Dataset.DEFAULT_COLUMNS, Dataset.DEFAULT_COLUMNS_WO_EPISODE) wo_group = ['hello', 'world'] group = 'my_groupname' ds = Dataset(*wo_group, group_name=group) self._test_columns_equal(ds, list((*wo_group, group)), wo_group)
def test_average_performances(self): ds = Dataset(group_name='training') s, a, s_ = np.array([0., 1., 2.])[:, np.newaxis] entry_args = [10, 0, 1., s, a, s_] entry_kwargs = {'done': False, 'failed': True} ds.add_entry(*entry_args, **entry_kwargs) entry_args[1] = 1 entry_kwargs = {'done': False, 'failed': False} ds.add_entry(*entry_args, **entry_kwargs) ep = { ds.REWARD: [1., 2.], ds.STATE: [s, s_], ds.ACTION: [a, a], ds.NEW: [s_, s], ds.FAILED: [True, False], ds.DONE: [False, True] } for n_ep in range(6): ep[ds.EPISODE] = [n_ep, n_ep] ep[ds.REWARD] = [n_ep, n_ep + 1.] ep[ds.FAILED] = [False, (n_ep % 2) == 0] ds.add_group(ep, group_number=11) perfs = average_performances(ds.df, ds.group_name, ds.EPISODE) truth = (np.mean([1, 1, 1, 3, 5, 7, 9, 11]), np.mean([True, False] * 4)) self.assertTupleEqual(perfs, truth) df = ds.loc[ds.df[ds.group_name] == 11] perfs = average_performances(df, ds.group_name, ds.EPISODE, last_n_episodes=3) truth = (9, 1 / 3) self.assertTupleEqual(perfs, truth)
def __init__(self, output_directory, name, plotters): self.set_seed() self.output_directory = Path(output_directory) / name self.name = name self.plotters = plotters if plotters is not None else {} self.training_dataset = Dataset() self.fig_path = self.output_directory / 'figs' self.log_path = self.output_directory / 'logs' self.data_path = self.output_directory / 'data' self.fig_path.mkdir(parents=True, exist_ok=True) self.log_path.mkdir(parents=False, exist_ok=True) self.data_path.mkdir(parents=False, exist_ok=True) self.training_dataset_path = self.data_path / 'training_samples.csv' self.__saved_figures = {} self.setup_default_logging_configuration()
def test_load(self): def test_load_preserves_ds(ds): sdir = Path('/tmp') spath = sdir / (ds.name + '.csv') ds = Dataset(name='my_dataset') ds.save(sdir) load = Dataset.load(spath) self._test_columns_equal(ds, load.columns, load.columns_wo_group) self._test_data_equal(ds, load.to_numpy()) ds = Dataset('a', 'b', group_name='group', name='my_dataset.csv') entry_args = [10, 1.] entry_kwargs = {'b': False} ds.add_entry(*entry_args, **entry_kwargs) group = {'a': [2., 3.], 'b': [False, True]} ds.add_group(group, group_number=100) test_load_preserves_ds(ds)
agent = RandomSafetyLearner.load( env=env, mpath=apath, gamma_cautious=gamma_cautious, lambda_cautious=lambda_cautious ) else: raise ValueError truth_path = here.parent.parent / 'data' / 'ground_truth' / 'from_vibly' / \ 'hover_map.pickle' ground_truth = SafetyTruth(env) ground_truth.from_vibly_file(truth_path) dataset_path = here / f'{args.nominal}_controller' / 'data' / 'train.csv' dataset = Dataset.load(dataset_path, group_name='Training') print(f"EVALUATING {args.nominal} AGENT AFTER BATCH #{args.nmodel}") n_samples = len(dataset.loc[dataset.df['Training'] <= args.nmodel]) print(f'Number of training samples: {n_samples}') optimistic_qv_ratio = learned_qv(agent, ground_truth, cautious=False) print(f"Q_opt / Q_V ratio: {optimistic_qv_ratio*100:.3f} %") cautious_qv_ratio = learned_qv(agent, ground_truth, cautious=True) print(f"Q_caut / Q_V ratio: {cautious_qv_ratio*100:.3f} %") if args.nominal == AFFINE: mean_diff, inf_diff = difference(agent, ground_truth) print(f"L2 difference with optimal controller (state average): " f"{mean_diff:.3f}") print(f"L_inf difference with optimal controller: " f"{inf_diff:.3f}")
def __init__(self, name, shape, gamma_cautious, lambda_cautious, gamma_optimistic, controller, reset_in_safe_state, n_episodes_train, n_episodes_test, n_train_test, plot_every=1): shapedict = {} if shape is None else {'shape': shape} self.env = LowGoalHovership( goal_state=False, initial_state=np.array([1.3]), **shapedict # This matters for the GP ) x_seed = np.array([[2, .1]]) y_seed = np.array([.5]) lengthscale_means = (0.2, 0.2) lengthscale_vars = (0.1, 0.1) lengthscale_prior = tuple(zip(lengthscale_means, lengthscale_vars)) outputscale_prior = (1., 10.) noise_prior = (0.007, 0.1) gp_params = { 'train_x': x_seed, 'train_y': y_seed, 'outputscale_prior': outputscale_prior, 'lengthscale_prior': lengthscale_prior, 'noise_prior': noise_prior, 'mean_constant': None, 'dataset_type': None, 'dataset_params': None, # Other possible options: # 'dataset_type': 'downsampling', # 'dataset_params': {'append_every': 10}, # 'dataset_type': 'neighborerasing', # 'dataset_params': {'radius': 0.01}, 'value_structure_discount_factor': None, } if controller == 'random': agent = RandomSafetyLearner( env=self.env, s_gp_params=gp_params.copy(), gamma_cautious=gamma_cautious, lambda_cautious=lambda_cautious, gamma_optimistic=gamma_optimistic, ) elif controller == 'affine': agent = AffineSafetyLearner( env=self.env, offset=(np.array([2.0]), np.array([0.1])), jacobian=np.array([[(0.7 - 0.1) / (0. - 2.)]]), s_gp_params=gp_params.copy(), gamma_cautious=gamma_cautious, lambda_cautious=lambda_cautious, gamma_optimistic=gamma_optimistic, ) else: raise ValueError('Invalid controller') self.agent = agent truth_path = Path(__file__).parent.parent.parent / 'data' / \ 'ground_truth' / 'from_vibly' / f'hover_map.pickle' self.ground_truth = SafetyTruth(self.env) self.ground_truth.from_vibly_file(truth_path) ctrlr = None if controller == 'random' else self.agent.policy plotters = { 'safety': SafetyPlotter(self.agent, ground_truth=self.ground_truth, controller=ctrlr) } output_directory = Path(__file__).parent.resolve() super().__init__(output_directory, name, plotters) self.reset_in_safe_state = reset_in_safe_state self.n_episodes_train = n_episodes_train self.n_episodes_test = n_episodes_test self.n_train_test = n_train_test self.plot_every = plot_every self.training_dataset = Dataset(*Dataset.DEFAULT_COLUMNS, CTRLR_VIAB, FLWD_CTRLR, group_name=GROUP_NAME, name='train') self.testing_dataset = Dataset(*Dataset.DEFAULT_COLUMNS, SAFETY_NAME, CTRLR_VIAB, FLWD_CTRLR, group_name=GROUP_NAME, name=f'test')
class FixedControllerLowdim(ModelLearningSimulation): @log_simulation_parameters def __init__(self, name, shape, gamma_cautious, lambda_cautious, gamma_optimistic, controller, reset_in_safe_state, n_episodes_train, n_episodes_test, n_train_test, plot_every=1): shapedict = {} if shape is None else {'shape': shape} self.env = LowGoalHovership( goal_state=False, initial_state=np.array([1.3]), **shapedict # This matters for the GP ) x_seed = np.array([[2, .1]]) y_seed = np.array([.5]) lengthscale_means = (0.2, 0.2) lengthscale_vars = (0.1, 0.1) lengthscale_prior = tuple(zip(lengthscale_means, lengthscale_vars)) outputscale_prior = (1., 10.) noise_prior = (0.007, 0.1) gp_params = { 'train_x': x_seed, 'train_y': y_seed, 'outputscale_prior': outputscale_prior, 'lengthscale_prior': lengthscale_prior, 'noise_prior': noise_prior, 'mean_constant': None, 'dataset_type': None, 'dataset_params': None, # Other possible options: # 'dataset_type': 'downsampling', # 'dataset_params': {'append_every': 10}, # 'dataset_type': 'neighborerasing', # 'dataset_params': {'radius': 0.01}, 'value_structure_discount_factor': None, } if controller == 'random': agent = RandomSafetyLearner( env=self.env, s_gp_params=gp_params.copy(), gamma_cautious=gamma_cautious, lambda_cautious=lambda_cautious, gamma_optimistic=gamma_optimistic, ) elif controller == 'affine': agent = AffineSafetyLearner( env=self.env, offset=(np.array([2.0]), np.array([0.1])), jacobian=np.array([[(0.7 - 0.1) / (0. - 2.)]]), s_gp_params=gp_params.copy(), gamma_cautious=gamma_cautious, lambda_cautious=lambda_cautious, gamma_optimistic=gamma_optimistic, ) else: raise ValueError('Invalid controller') self.agent = agent truth_path = Path(__file__).parent.parent.parent / 'data' / \ 'ground_truth' / 'from_vibly' / f'hover_map.pickle' self.ground_truth = SafetyTruth(self.env) self.ground_truth.from_vibly_file(truth_path) ctrlr = None if controller == 'random' else self.agent.policy plotters = { 'safety': SafetyPlotter(self.agent, ground_truth=self.ground_truth, controller=ctrlr) } output_directory = Path(__file__).parent.resolve() super().__init__(output_directory, name, plotters) self.reset_in_safe_state = reset_in_safe_state self.n_episodes_train = n_episodes_train self.n_episodes_test = n_episodes_test self.n_train_test = n_train_test self.plot_every = plot_every self.training_dataset = Dataset(*Dataset.DEFAULT_COLUMNS, CTRLR_VIAB, FLWD_CTRLR, group_name=GROUP_NAME, name='train') self.testing_dataset = Dataset(*Dataset.DEFAULT_COLUMNS, SAFETY_NAME, CTRLR_VIAB, FLWD_CTRLR, group_name=GROUP_NAME, name=f'test') def run_episode(self, n_episode, prefix=None): episode = { cname: [] for cname in self.training_dataset.columns_wo_group } done = self.env.done n = 0 if prefix is not None: self.save_figs(prefix=f'{prefix}_{n}') while not done: old_state = self.agent.state new_state, reward, failed, done = self.agent.step() action = self.agent.last_action ctrlr_action = self.agent.last_controller_action ctrlr_viab = self.ground_truth.is_viable(state=old_state, action=ctrlr_action) flwd_ctrlr = self.agent.followed_controller append_to_episode(self.training_dataset, episode, old_state, action, new_state, reward, failed, done, ctrlr_viab, flwd_ctrlr) if self.agent.training_mode: marker = None color = [1, 0, 0 ] if self.agent.followed_controller else [0, 1, 0] super().on_run_iteration(state=old_state, action=action, new_state=new_state, reward=reward, failed=failed, color=color, marker=marker) if prefix is not None: if (n + 1) % self.plot_every == 0: self.save_figs(prefix=f'{prefix}_{n}') n += 1 len_episode = len(episode[self.training_dataset.REWARD]) episode[self.training_dataset.EPISODE] = [n_episode] * len_episode return episode def reset_agent_state(self): if self.reset_in_safe_state: is_viable = self.agent.safety_model.measure( slice(None, None, None), lambda_threshold=self.agent.lambda_cautious, gamma_threshold=self.agent.gamma_cautious) > 0 if any(is_viable): viable_indexes = np.atleast_1d( np.argwhere(is_viable).squeeze()) state_index = viable_indexes[np.random.choice( len(viable_indexes))] s = self.env.stateaction_space.state_space[state_index] self.agent.reset(s) while self.env.done: s = self.agent.reset() return s @timeit def train_agent(self, n_train): self.agent.training_mode = True # self.save_figs(prefix=f'{n_train}ep{0}') for n in range(self.n_episodes_train): self.reset_agent_state() episode = self.run_episode(n, prefix=f'{n_train}ep{n+1}') self.training_dataset.add_group(episode, group_number=n_train) # if (n+1) % self.plot_every == 0: # self.save_figs(prefix=f'{n_train}ep{n+1}') @timeit def test_agent(self, n_test): self.agent.training_mode = False for n in range(self.n_episodes_test): self.reset_agent_state() episode = self.run_episode(n) self.testing_dataset.add_group(episode, group_number=n_test) @timeit def log_performance(self, n_train, ds, name_in_log, duration=None, header=True, limit_episodes=None): df = ds.df if n_train is not None: train = df.loc[df[ds.group_name] == n_train, :] else: train = df r, f, xplo_steps, off_ctrlr = average_performances( train, ds.group_name, ds.EPISODE, limit_episodes) n_steps = len(train) caveat = '' if limit_episodes is None \ else f'(last {limit_episodes} episodes) ' header = '-------- Performance --------\n' if header else '' message = (f'--- {name_in_log} {caveat}\n' f'Average total reward per episode: {r:.3f}\n' f'Average number of failures: {f * 100:.3f} %\n' f'Number of exploration steps: {xplo_steps} / {n_steps}\n' f'Number of off-controller steps: {off_ctrlr} / {n_steps}') if duration is not None: message += f'\nComputation time: {duration:.3f} s' logging.info(header + message) def log_cautious_qv_ratio(self): ratio = cautious_qv(self.agent, self.ground_truth) message = f'Proportion of Q_V labeled as cautious: {ratio*100:.3f} %' logging.info(message) def log_memory(self): if device == cuda: message = ('Memory usage\n' + torch.cuda.memory_summary()) logging.info(message) def log_samples(self): n_samples = self.agent.safety_model.gp.train_x.shape[0] logging.info(f'Training dataset size: {n_samples}') @timeit def checkpoint(self, n): self.training_dataset.save(self.data_path) self.testing_dataset.save(self.data_path) self.save_safety_model(f'safety_model_{n}') def save_safety_model(self, name): savepath = self.local_models_path / 'safety_model' / name savepath.mkdir(exist_ok=True, parents=True) self.agent.safety_model.save(savepath, save_data=True) def get_models_to_save(self): return {'safety_model': self.agent.safety_model} @timeit def run(self): for n in range(self.n_train_test): logging.info(f'========= CYCLE {n+1}/{self.n_train_test} ========') t = 0 if self.n_train_test == 1 else n / (self.n_train_test - 1) self.agent.update_safety_params(t=t) train_t = self.train_agent(n) try: pass except RuntimeError as e: train_t = None logging.critical(f'train_agent({n}) failed:\n{str(e)}') self.log_memory() torch.cuda.empty_cache() finally: self.log_performance(n, self.training_dataset, 'Training', train_t, header=True, limit_episodes=self.n_episodes_train) self.log_samples() try: test_t = self.test_agent(n) except RuntimeError as e: test_t = None logging.critical(f'test_agent({n}) failed:\n{str(e)}') torch.cuda.empty_cache() finally: self.log_performance(n, self.testing_dataset, 'Testing', test_t, header=False, limit_episodes=None) chkpt_t = self.checkpoint(n) logging.info(f'Checkpointing time: {chkpt_t:.3f} s') self.log_performance(None, self.training_dataset, 'Training - Full dataset', duration=None, header=False, limit_episodes=None) self.log_performance(None, self.testing_dataset, 'Testing - Full dataset', duration=None, header=False, limit_episodes=None) self.log_cautious_qv_ratio()
class Simulation: """ Base class for a Simulation. Takes care of defining the agent, the main loop, and saving the results and figures in the appropriate locations. """ def __init__(self, output_directory, name, plotters): self.set_seed() self.output_directory = Path(output_directory) / name self.name = name self.plotters = plotters if plotters is not None else {} self.training_dataset = Dataset() self.fig_path = self.output_directory / 'figs' self.log_path = self.output_directory / 'logs' self.data_path = self.output_directory / 'data' self.fig_path.mkdir(parents=True, exist_ok=True) self.log_path.mkdir(parents=False, exist_ok=True) self.data_path.mkdir(parents=False, exist_ok=True) self.training_dataset_path = self.data_path / 'training_samples.csv' self.__saved_figures = {} self.setup_default_logging_configuration() def set_seed(self, value=None): npseed(value) def run(self): raise NotImplementedError def on_run_iteration(self, *args, **kwargs): for plotter in self.plotters.values(): try: plotter.on_run_iteration(*args, **kwargs) except AttributeError as e: # The plotter does not have a on_run_iteration routine: # this is not a problem. pass def on_simulation_end(self, *args, **kwargs): self.training_dataset.save(self.training_dataset_path) self.save_figs(prefix='final') def save_figs(self, prefix): for name, plotter in self.plotters.items(): savename = prefix + '_' + name + '.pdf' savepath = self.fig_path / savename fig = plotter.get_figure() fig.savefig(str(savepath), format='pdf') if self.__saved_figures.get(name) is None: self.__saved_figures[name] = [str(savepath)] else: self.__saved_figures[name] += [str(savepath)] plt_close('all') def compile_gif(self): for name, figures in self.__saved_figures.items(): figures_to_compile = ' '.join(figures) path = str(self.fig_path) gif_command = ("convert -delay 50 -loop 0 -density 300 " f"{figures_to_compile} {path}/{name}.gif") try: os.system(gif_command) except Exception as e: print(f'Error: could not compile {name}.gif. Exception: {e}') def setup_default_logging_configuration(self): self.__training_handler = logging.FileHandler(self.log_path / 'training.log') self.__training_handler.addFilter(ConfigFilter(log_if_match=False)) self.__training_handler.setLevel(logging.INFO) self.__config_handler = logging.FileHandler(self.log_path / 'config.log') self.__config_handler.addFilter(ConfigFilter(log_if_match=True)) self.__config_handler.setLevel(logging.INFO) self.__stdout_handler = logging.StreamHandler() self.__stdout_handler.setLevel(logging.INFO) root_logger = logging.getLogger() root_logger.setLevel(logging.INFO) root_logger.addHandler(self.__training_handler) root_logger.addHandler(self.__config_handler) root_logger.addHandler(self.__stdout_handler) def reset_default_logging_configuration(self): root_logger = logging.getLogger() root_logger.removeHandler(self.__training_handler) root_logger.removeHandler(self.__config_handler) root_logger.removeHandler(self.__stdout_handler) list(map(root_logger.removeFilter, root_logger.filters[:]))