def __init__(self, env_name, record_video=False, video_schedule=None, log_dir=None, record_log=False, force_reset=True): if log_dir is None: if logger.get_snapshot_dir() is None: logger.log( "Warning: skipping Gym environment monitoring since snapshot_dir not configured." ) else: log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log") Serializable.quick_init(self, locals()) env = gym.envs.make(env_name) # HACK: Gets rid of the TimeLimit wrapper that sets 'done = True' when # the time limit specified for each environment has been passed and # therefore the environment is not Markovian (terminal condition depends # on time rather than state). env = env.env self.env = env self.env_id = env.spec.id assert not (not record_log and record_video) if log_dir is None or record_log is False: self.monitoring = False else: if not record_video: video_schedule = NoVideoSchedule() else: if video_schedule is None: video_schedule = CappedCubicVideoSchedule() self.env = gym.wrappers.Monitor(self.env, log_dir, video_callable=video_schedule, force=True) self.monitoring = True self._observation_space = convert_gym_space(env.observation_space) logger.log("observation space: {}".format(self._observation_space)) self._action_space = convert_gym_space(env.action_space) logger.log("action space: {}".format(self._action_space)) self._horizon = env.spec.tags[ 'wrapper_config.TimeLimit.max_episode_steps'] self._log_dir = log_dir self._force_reset = force_reset
def record_policy(self, env, policy, itr, n_rollout=1, path=None, postfix=""): # Rollout if path is None: path = logger.get_snapshot_dir().rstrip( os.sep) + os.sep + "videos" + os.sep + "itr_%05d%s.mp4" % ( itr, postfix) path_directory = path.rsplit(os.sep, 1)[0] if not os.path.exists(path_directory): os.makedirs(path_directory, exist_ok=True) for _ in range(n_rollout): obs = env.reset() recorder = VideoRecorder(env.env, path=path) while True: # env.render() # import pdb; pdb.set_trace() action, _ = policy.get_action(obs) obs, _, done, _ = env.step(action) recorder.capture_frame() if done: break recorder.close()
def h5_prepare_file(self, filename, args): # Assuming the following structure / indexing of the H5 file # teacher_info/ # - [teacher_indx]: # - description # - params # traj_data/ # - [teacher_indx] * [iter_indx] * traj_data # Making names and opening h5 file if filename is None: self.h5_filename = logger.get_snapshot_dir( ) + os.sep + "trajectories.h5" else: #capability to store multiple teachers in a single file self.h5_filename = filename self.h5_filename = self.h5_filename if self.h5_filename[ -3:] == '.h5' else (self.h5_filename + '.h5') if os.path.exists(self.h5_filename): # input("WARNING: output file %s already exists and will be appended. Press ENTER to continue. (exit with ctrl-C)" % self.h5_filename) print( "WARNING: output file %s already exists and will be appended" % self.h5_filename) self.hdf = h5py.File(self.h5_filename, "a") # Creating proper groups groups = list(self.hdf.keys()) # Groups to create: tuples: (group_name, structure_decscripton) create_groups = [("teacher_info", "Runs indices(Teachers)"), ("traj_data", "Runs(Teachers) x Iterations x Trajectories x Data")] for group in create_groups: if not group in groups: self.hdf.create_group(group[0]) self.hdf[group[0]].attrs["structure"] = np.string_(group[1]) # Checking if other teachers' results already exist in the h5 file # If they exist - just append teacher_indices = list(self.hdf["traj_data"].keys()) if not teacher_indices: self.teacher_indx = 0 else: teacher_indices = [int(indx) for indx in teacher_indices] teacher_indices = np.sort(teacher_indices) self.teacher_indx = teacher_indices[-1] + 1 print("%s : Appended teacher index: " % self.__class__.__name__, self.teacher_indx) self.hdf.create_group("traj_data/" + h5u.indx2str(self.teacher_indx)) #Teacher group ## Saving info about the teacher teacher_info_group = "teacher_info/" + h5u.indx2str( self.teacher_indx) + "/" self.hdf.create_group(teacher_info_group) #Teacher group h5u.add_dict(self.hdf, self.args, groupname=teacher_info_group) return self.hdf
def train(self, sess=None, snapshot_mode=None): if sess is None: sess = tf.Session() sess.__enter__() self._tf_sess = sess if snapshot_mode is not None: logger.set_snapshot_mode(snapshot_mode) last_average_return = super(AdaptiveSkillAcquisition, self).train(sess=sess) return { 'last_average_return': last_average_return, 'snapshot_dir': logger.get_snapshot_dir() }
def __init__(self, env_name, record_video=False, video_schedule=None, log_dir=None, record_log=False, force_reset=True): if log_dir is None: if logger.get_snapshot_dir() is None: logger.log("Warning: skipping Gym environment monitoring since snapshot_dir not configured.") else: log_dir = os.path.join(logger.get_snapshot_dir(), "gym_log") Serializable.quick_init(self, locals()) env = gym.envs.make(env_name) # HACK: Gets rid of the TimeLimit wrapper that sets 'done = True' when # the time limit specified for each environment has been passed and # therefore the environment is not Markovian (terminal condition depends # on time rather than state). env = env.env self.env = env self.env_id = env.spec.id assert not (not record_log and record_video) if log_dir is None or record_log is False: self.monitoring = False else: if not record_video: video_schedule = NoVideoSchedule() else: if video_schedule is None: video_schedule = CappedCubicVideoSchedule() self.env = gym.wrappers.Monitor(self.env, log_dir, video_callable=video_schedule, force=True) self.monitoring = True self._observation_space = convert_gym_space(env.observation_space) logger.log("observation space: {}".format(self._observation_space)) self._action_space = convert_gym_space(env.action_space) logger.log("action space: {}".format(self._action_space)) self._horizon = env.spec.tags['wrapper_config.TimeLimit.max_episode_steps'] self._log_dir = log_dir self._force_reset = force_reset
def save_rendered_plot(self): plt.scatter(*self.agent_pos, marker='x', s=50, c='r') # to mark agent`s end position directory = logger.get_snapshot_dir() if directory is None: directory = '~/garage/data/local/asa/instant-run' directory = os.path.expanduser(directory) if not os.path.isdir(directory): os.makedirs(directory) base = 'demo_run_' try: i = 1 + max([ int(f[len(base):f.find('.')]) for f in os.listdir(directory) if f.startswith(base) ]) except ValueError: i = 0 plt.savefig(os.path.join(directory, '{}{}.png'.format(base, i)))
def save_samples(self, itr, samples_data): with open(osp.join(logger.get_snapshot_dir(), 'samples_%i.pkl' % itr), "wb") as fout: pickle.dump(samples_data, fout)
parser.add_argument('--log_tabular_only', type=bool, default=False) parser.add_argument('--log_dir', type=str, default='./Data/AST/GA/Test') parser.add_argument('--args_data', type=str, default=None) args = parser.parse_args() # Create the logger log_dir = args.log_dir tabular_log_file = osp.join(log_dir, args.tabular_log_file) text_log_file = osp.join(log_dir, args.text_log_file) params_log_file = osp.join(log_dir, args.params_log_file) logger.log_parameters_lite(params_log_file, args) # logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) prev_snapshot_dir = logger.get_snapshot_dir() prev_mode = logger.get_snapshot_mode() logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode(args.snapshot_mode) logger.set_snapshot_gap(args.snapshot_gap) logger.set_log_tabular_only(args.log_tabular_only) logger.push_prefix("[%s] " % args.exp_name) seed = 0 top_k = 10 max_path_length = 100 top_paths = BPQ.BoundedPriorityQueue(top_k) np.random.seed(seed) tf.set_random_seed(seed)
def __init__(self, env, policy, baseline, scope=None, n_itr=500, max_samples=None, start_itr=0, batch_size=5000, max_path_length=500, discount=0.99, gae_lambda=1, plot=False, pause_for_plot=False, center_adv=True, positive_adv=False, store_paths=False, paths_h5_filename=None, whole_paths=True, fixed_horizon=False, sampler_cls=None, sampler_args=None, force_batch_sampler=False, play_every_itr=None, record_every_itr=None, record_end_ep_num=3, **kwargs): """ :param env: Environment :param policy: Policy :type policy: Policy :param baseline: Baseline :param scope: Scope for identifying the algorithm. Must be specified if running multiple algorithms simultaneously, each using different environments and policies :param n_itr: Max umber of iterations. :param max_samples: If not None - exit when max env samples is collected (overrides n_itr) :param start_itr: Starting iteration. :param batch_size: Number of samples per iteration. :param max_path_length: Maximum length of a single rollout. :param discount: Discount. :param gae_lambda: Lambda used for generalized advantage estimation. :param plot: Plot evaluation run after each iteration. :param pause_for_plot: Whether to pause before contiuing when plotting. :param center_adv: Whether to rescale the advantages so that they have mean 0 and standard deviation 1. :param positive_adv: Whether to shift the advantages so that they are always positive. When used in conjunction with center_adv the advantages will be standardized before shifting. :param store_paths: Whether to save all paths data to the snapshot. :return: """ self.args = locals() del self.args["kwargs"] del self.args["self"] self.args = {**self.args, **kwargs} #merging dicts self.env = env try: self.env.env.save_dyn_params( filename=logger.get_snapshot_dir().rstrip(os.sep) + os.sep + "dyn_params.yaml") except: print("WARNING: BatchPolOpt: couldn't save dynamics params") # import pdb; pdb.set_trace() from gym.wrappers import Monitor # self.env_rec = Monitor(self.env.env, logger.get_snapshot_dir() + os.sep + "videos", force=True) self.policy = policy self.baseline = baseline self.scope = scope self.n_itr = n_itr self.max_samples = max_samples self.start_itr = start_itr self.batch_size = batch_size self.max_path_length = max_path_length self.discount = discount self.gae_lambda = gae_lambda self.plot = plot self.pause_for_plot = pause_for_plot self.center_adv = center_adv self.positive_adv = positive_adv self.store_paths = store_paths self.whole_paths = whole_paths self.fixed_horizon = fixed_horizon self.play_every_itr = play_every_itr self.record_every_itr = record_every_itr self.record_end_ep_num = record_end_ep_num if sampler_cls is None: if self.policy.vectorized and not force_batch_sampler: sampler_cls = OnPolicyVectorizedSampler else: sampler_cls = BatchSampler if sampler_args is None: sampler_args = dict() self.sampler = sampler_cls(self, **sampler_args) self.init_opt() ## Initialization of HDF5 logging of trajectories if self.store_paths: self.h5_prepare_file(filename=paths_h5_filename, args=self.args) ## Initialize cleaner if we close atexit.register(self.clean_at_exit)
def run_experiment(argv): default_log_dir = config.LOG_DIR now = datetime.datetime.now(dateutil.tz.tzlocal()) # avoid name clashes when running distributed jobs rand_id = str(uuid.uuid4())[:5] timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z') default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id) parser = argparse.ArgumentParser() parser.add_argument( '--n_parallel', type=int, default=1, help=("Number of parallel workers to perform rollouts. " "0 => don't start any workers")) parser.add_argument( '--exp_name', type=str, default=default_exp_name, help='Name of the experiment.') parser.add_argument( '--log_dir', type=str, default=None, help='Path to save the log and iteration snapshot.') parser.add_argument( '--snapshot_mode', type=str, default='all', help='Mode to save the snapshot. Can be either "all" ' '(all iterations will be saved), "last" (only ' 'the last iteration will be saved), "gap" (every' '`snapshot_gap` iterations are saved), or "none" ' '(do not save snapshots)') parser.add_argument( '--snapshot_gap', type=int, default=1, help='Gap between snapshot iterations.') parser.add_argument( '--tabular_log_file', type=str, default='progress.csv', help='Name of the tabular log file (in csv).') parser.add_argument( '--text_log_file', type=str, default='debug.log', help='Name of the text log file (in pure text).') parser.add_argument( '--tensorboard_step_key', type=str, default=None, help=("Name of the step key in tensorboard_summary.")) parser.add_argument( '--params_log_file', type=str, default='params.json', help='Name of the parameter log file (in json).') parser.add_argument( '--variant_log_file', type=str, default='variant.json', help='Name of the variant log file (in json).') parser.add_argument( '--resume_from', type=str, default=None, help='Name of the pickle file to resume experiment from.') parser.add_argument( '--plot', type=ast.literal_eval, default=False, help='Whether to plot the iteration results') parser.add_argument( '--log_tabular_only', type=ast.literal_eval, default=False, help='Print only the tabular log information (in a horizontal format)') parser.add_argument('--seed', type=int, help='Random seed for numpy') parser.add_argument( '--args_data', type=str, help='Pickled data for objects') parser.add_argument( '--variant_data', type=str, help='Pickled data for variant configuration') parser.add_argument( '--use_cloudpickle', type=ast.literal_eval, default=False) args = parser.parse_args(argv[1:]) if args.seed is not None: set_seed(args.seed) # SIGINT is blocked for all processes created in parallel_sampler to avoid # the creation of sleeping and zombie processes. # # If the user interrupts run_experiment, there's a chance some processes # won't die due to a dead lock condition where one of the children in the # parallel sampler exits without releasing a lock once after it catches # SIGINT. # # Later the parent tries to acquire the same lock to proceed with his # cleanup, but it remains sleeping waiting for the lock to be released. # In the meantime, all the process in parallel sampler remain in the zombie # state since the parent cannot proceed with their clean up. with mask_signals([signal.SIGINT]): if args.n_parallel > 0: parallel_sampler.initialize(n_parallel=args.n_parallel) if args.seed is not None: parallel_sampler.set_seed(args.seed) if not args.plot: garage.plotter.Plotter.disable() garage.tf.plotter.Plotter.disable() if args.log_dir is None: log_dir = osp.join(default_log_dir, args.exp_name) else: log_dir = args.log_dir tabular_log_file = osp.join(log_dir, args.tabular_log_file) text_log_file = osp.join(log_dir, args.text_log_file) params_log_file = osp.join(log_dir, args.params_log_file) if args.variant_data is not None: variant_data = pickle.loads(base64.b64decode(args.variant_data)) variant_log_file = osp.join(log_dir, args.variant_log_file) logger.log_variant(variant_log_file, variant_data) else: variant_data = None if not args.use_cloudpickle: logger.log_parameters_lite(params_log_file, args) logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) logger.set_tensorboard_dir(log_dir) prev_snapshot_dir = logger.get_snapshot_dir() prev_mode = logger.get_snapshot_mode() logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode(args.snapshot_mode) logger.set_snapshot_gap(args.snapshot_gap) logger.set_log_tabular_only(args.log_tabular_only) logger.set_tensorboard_step_key(args.tensorboard_step_key) logger.push_prefix("[%s] " % args.exp_name) if args.resume_from is not None: data = joblib.load(args.resume_from) assert 'algo' in data algo = data['algo'] algo.train() else: # read from stdin if args.use_cloudpickle: import cloudpickle method_call = cloudpickle.loads(base64.b64decode(args.args_data)) try: method_call(variant_data) except BaseException: children = garage.plotter.Plotter.get_plotters() children += garage.tf.plotter.Plotter.get_plotters() if args.n_parallel > 0: children += [parallel_sampler] child_proc_shutdown(children) raise else: data = pickle.loads(base64.b64decode(args.args_data)) maybe_iter = concretize(data) if is_iterable(maybe_iter): for _ in maybe_iter: pass logger.set_snapshot_mode(prev_mode) logger.set_snapshot_dir(prev_snapshot_dir) logger.remove_tabular_output(tabular_log_file) logger.remove_text_output(text_log_file) logger.pop_prefix()
def run_task(*_): # Configure TF session config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config).as_default() as tf_session: ## Load data from itr_N.pkl with open(snapshot_file, 'rb') as file: saved_data = dill.load(file) ## Construct PathTrie and find missing skill description # This is basically ASA.decide_new_skill min_length = 3 max_length = 5 action_map = {0: 's', 1: 'L', 2: 'R'} min_f_score = 1 max_results = 10 aggregations = [] # sublist of ['mean', 'most_freq', 'nearest_mean', 'medoid'] or 'all' paths = saved_data['paths'] path_trie = PathTrie(saved_data['hrl_policy'].num_skills) for path in paths: actions = path['actions'].argmax(axis=1).tolist() observations = path['observations'] path_trie.add_all_subpaths( actions, observations, min_length=min_length, max_length=max_length ) logger.log('Searched {} rollouts'.format(len(paths))) frequent_paths = path_trie.items( action_map=action_map, min_count=10, # len(paths) * 2 min_f_score=min_f_score, max_results=max_results, aggregations=aggregations ) logger.log('Found {} frequent paths: [index, actions, count, f-score]'.format(len(frequent_paths))) for i, f_path in enumerate(frequent_paths): logger.log(' {:2}: {:{pad}}\t{}\t{:.3f}'.format( i, f_path['actions_text'], f_path['count'], f_path['f_score'], pad=max_length)) top_subpath = frequent_paths[0] start_obss = top_subpath['start_observations'] end_obss = top_subpath['end_observations'] ## Prepare elements for training # Environment base_env = saved_data['env'].env.env # <NormalizedEnv<MinibotEnv instance>> skill_learning_env = TfEnv( SkillLearningEnv( # base env that was wrapped in HierarchizedEnv (not fully unwrapped - may be normalized!) env=base_env, start_obss=start_obss, end_obss=end_obss ) ) # Skill policy hrl_policy = saved_data['hrl_policy'] new_skill_policy, new_skill_id = hrl_policy.create_new_skill( end_obss=end_obss ) # Baseline - clone baseline specified in low_algo_kwargs, or top-algo`s baseline low_algo_kwargs = dict(saved_data['low_algo_kwargs']) baseline_to_clone = low_algo_kwargs.get('baseline', saved_data['baseline']) baseline = Serializable.clone( # to create blank baseline obj=baseline_to_clone, name='{}Skill{}'.format(type(baseline_to_clone).__name__, new_skill_id) ) low_algo_kwargs['baseline'] = baseline low_algo_cls = saved_data['low_algo_cls'] # Set custom training params (should`ve been set in asa_basic_run) low_algo_kwargs['batch_size'] = 2500 low_algo_kwargs['max_path_length'] = 50 low_algo_kwargs['n_itr'] = 500 # Algorithm algo = low_algo_cls( env=skill_learning_env, policy=new_skill_policy, **low_algo_kwargs ) # Logger parameters logger_snapshot_dir_before = logger.get_snapshot_dir() logger_snapshot_mode_before = logger.get_snapshot_mode() logger_snapshot_gap_before = logger.get_snapshot_gap() # No need to change snapshot dir in this script, it is used in ASA-algo.create_and_train_new_skill() # logger.set_snapshot_dir(os.path.join( # logger_snapshot_dir_before, # 'skill{}'.format(new_skill_id) # )) logger.set_snapshot_mode('none') logger.set_tensorboard_step_key('Iteration') ## Train new skill with logger.prefix('Skill {} | '.format(new_skill_id)): algo.train(sess=tf_session) ## Save new policy and its end_obss (we`ll construct skill stopping function # from them manually in asa_resume_with_new_skill.py) out_file = os.path.join(logger.get_snapshot_dir(), 'final.pkl') with open(out_file, 'wb') as file: out_data = { 'policy': new_skill_policy, 'subpath': top_subpath } dill.dump(out_data, file) # Restore logger parameters logger.set_snapshot_dir(logger_snapshot_dir_before) logger.set_snapshot_mode(logger_snapshot_mode_before) logger.set_snapshot_gap(logger_snapshot_gap_before)
def run_experiment(argv): default_log_dir = config.LOG_DIR now = datetime.datetime.now(dateutil.tz.tzlocal()) # avoid name clashes when running distributed jobs rand_id = str(uuid.uuid4())[:5] timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z') default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id) parser = argparse.ArgumentParser() parser.add_argument( '--n_parallel', type=int, default=1, help=("Number of parallel workers to perform rollouts. " "0 => don't start any workers")) parser.add_argument( '--exp_name', type=str, default=default_exp_name, help='Name of the experiment.') parser.add_argument( '--log_dir', type=str, default=None, help='Path to save the log and iteration snapshot.') parser.add_argument( '--snapshot_mode', type=str, default='all', help='Mode to save the snapshot. Can be either "all" ' '(all iterations will be saved), "last" (only ' 'the last iteration will be saved), "gap" (every' '`snapshot_gap` iterations are saved), or "none" ' '(do not save snapshots)') parser.add_argument( '--snapshot_gap', type=int, default=1, help='Gap between snapshot iterations.') parser.add_argument( '--tabular_log_file', type=str, default='progress.csv', help='Name of the tabular log file (in csv).') parser.add_argument( '--text_log_file', type=str, default='debug.log', help='Name of the text log file (in pure text).') parser.add_argument( '--tensorboard_step_key', type=str, default=None, help=("Name of the step key in tensorboard_summary.")) parser.add_argument( '--params_log_file', type=str, default='params.json', help='Name of the parameter log file (in json).') parser.add_argument( '--variant_log_file', type=str, default='variant.json', help='Name of the variant log file (in json).') parser.add_argument( '--resume_from', type=str, default=None, help='Name of the pickle file to resume experiment from.') parser.add_argument( '--plot', type=ast.literal_eval, default=False, help='Whether to plot the iteration results') parser.add_argument( '--log_tabular_only', type=ast.literal_eval, default=False, help='Print only the tabular log information (in a horizontal format)') parser.add_argument('--seed', type=int, help='Random seed for numpy') parser.add_argument( '--args_data', type=str, help='Pickled data for stub objects') parser.add_argument( '--variant_data', type=str, help='Pickled data for variant configuration') parser.add_argument( '--use_cloudpickle', type=ast.literal_eval, default=False) args = parser.parse_args(argv[1:]) assert (os.environ.get("JOBLIB_START_METHOD", None) == "forkserver") if args.seed is not None: set_seed(args.seed) if args.n_parallel > 0: from garage.sampler import parallel_sampler parallel_sampler.initialize(n_parallel=args.n_parallel) if args.seed is not None: parallel_sampler.set_seed(args.seed) if not args.plot: garage.plotter.Plotter.disable() garage.tf.plotter.Plotter.disable() if args.log_dir is None: log_dir = osp.join(default_log_dir, args.exp_name) else: log_dir = args.log_dir tabular_log_file = osp.join(log_dir, args.tabular_log_file) text_log_file = osp.join(log_dir, args.text_log_file) params_log_file = osp.join(log_dir, args.params_log_file) if args.variant_data is not None: variant_data = pickle.loads(base64.b64decode(args.variant_data)) variant_log_file = osp.join(log_dir, args.variant_log_file) logger.log_variant(variant_log_file, variant_data) else: variant_data = None if not args.use_cloudpickle: logger.log_parameters_lite(params_log_file, args) logger.add_text_output(text_log_file) logger.add_tabular_output(tabular_log_file) logger.set_tensorboard_dir(log_dir) prev_snapshot_dir = logger.get_snapshot_dir() prev_mode = logger.get_snapshot_mode() logger.set_snapshot_dir(log_dir) logger.set_snapshot_mode(args.snapshot_mode) logger.set_snapshot_gap(args.snapshot_gap) logger.set_log_tabular_only(args.log_tabular_only) logger.set_tensorboard_step_key(args.tensorboard_step_key) logger.push_prefix("[%s] " % args.exp_name) if args.resume_from is not None: data = joblib.load(args.resume_from) assert 'algo' in data algo = data['algo'] algo.train() else: # read from stdin if args.use_cloudpickle: import cloudpickle method_call = cloudpickle.loads(base64.b64decode(args.args_data)) try: method_call(variant_data) except BaseException: if args.n_parallel > 0: parallel_sampler.terminate() raise else: data = pickle.loads(base64.b64decode(args.args_data)) maybe_iter = concretize(data) if is_iterable(maybe_iter): for _ in maybe_iter: pass logger.set_snapshot_mode(prev_mode) logger.set_snapshot_dir(prev_snapshot_dir) logger.remove_tabular_output(tabular_log_file) logger.remove_text_output(text_log_file) logger.pop_prefix()
def run_task_continue(task_param): """ Wrap PPO training task in the run_task function. :param _: :return: """ from garage.tf.baselines import GaussianMLPBaseline from garage.tf.envs import TfEnv from garage.tf.policies import GaussianMLPPolicy, DeterministicMLPPolicy, GaussianGRUPolicy, GaussianLSTMPolicy from quad_train.algos.cem import CEM from quad_train.algos.cma_es import CMAES from quad_train.algos.ppo import PPO from quad_train.algos.trpo import TRPO import sys import os import garage.misc.logger as logger import joblib pkl_file = logger.get_snapshot_dir().rstrip(os.sep) + os.sep + "params.pkl" if os.path.isfile(pkl_file): print("WARNING: Loading and continuing from %s snapshot ..." % logger.get_snapshot_dir().rstrip(os.sep)) else: raise ValueError("ERROR: params.pkl not found in %s" % pkl_file) import tensorflow as tf with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Unpack the snapshot snapshot = joblib.load(pkl_file) env = snapshot["env"] policy = snapshot["policy"] itr = snapshot["itr"] if args.new_env: from quad_sim.quadrotor import QuadrotorEnv env = TfEnv(QuadrotorEnv(**task_param["env_param"])) task_param["alg_param"]["start_itr"] = itr + 1 del task_param["env"] del task_param["env_param"] del task_param["policy_class"] del task_param["policy_param"] if task_param["alg_class"] != "CEM" and task_param[ "alg_class"] != "CMAES": baseline = snapshot["baseline"] del task_param["baseline_class"] del task_param["baseline_param"] algo = locals()[task_param["alg_class"]](env=env, policy=policy, baseline=baseline, **task_param["alg_param"]) else: algo = locals()[task_param["alg_class"]](env=env, policy=policy, **task_param["alg_param"]) del task_param["alg_class"] del task_param["alg_param"] # Check that we used all parameters: # It helps revealing situations where you thought you set certain parameter # But in fact made spelling mistake and it failed del task_param["exp_name"] #This is probably generated by garage assert task_param == {}, "ERROR: Some of parameter values were not used: %s" % str( task_param) algo.train(sess=sess, step=itr + 1)
def plot_state(self, name='sensors', state=None): if state: self.wrapped_env.reset(state) structure = self.__class__.MAZE_STRUCTURE size_scaling = self.__class__.MAZE_SIZE_SCALING # duplicate cells to plot the maze structure_plot = np.zeros( ((len(structure) - 1) * 2, (len(structure[0]) - 1) * 2)) for i in range(len(structure)): for j in range(len(structure[0])): cell = structure[i][j] if type(cell) is not int: cell = 0.3 if cell == 'r' else 0.7 if i == 0: if j == 0: structure_plot[i, j] = cell elif j == len(structure[0]) - 1: structure_plot[i, 2 * j - 1] = cell else: structure_plot[i, 2 * j - 1:2 * j + 1] = cell elif i == len(structure) - 1: if j == 0: structure_plot[2 * i - 1, j] = cell elif j == len(structure[0]) - 1: structure_plot[2 * i - 1, 2 * j - 1] = cell else: structure_plot[2 * i - 1, 2 * j - 1:2 * j + 1] = cell else: if j == 0: structure_plot[2 * i - 1:2 * i + 1, j] = cell elif j == len(structure[0]) - 1: structure_plot[2 * i - 1:2 * i + 1, 2 * j - 1] = cell else: structure_plot[2 * i - 1:2 * i + 1, 2 * j - 1:2 * j + 1] = cell fig, ax = plt.subplots() im = ax.pcolor(-np.array(structure_plot), cmap='gray', edgecolor='black', linestyle=':', lw=1) x_labels = list(range(len(structure[0]))) y_labels = list(range(len(structure))) ax.grid(True) # elimiate this to avoid inner lines ax.xaxis.set(ticks=2 * np.arange(len(x_labels)), ticklabels=x_labels) ax.yaxis.set(ticks=2 * np.arange(len(y_labels)), ticklabels=y_labels) obs = self.get_current_maze_obs() robot_xy = np.array(self.wrapped_env.get_body_com("torso") [:2]) # the coordinates of this are wrt the init ori = self.get_ori( ) # for Ant this is computed with atan2, which gives [-pi, pi] # compute origin cell i_o, j_o coordinates and center of it x_o, y_o # (with 0,0 in the top-right corner of struc) o_xy = np.array(self._find_robot( )) # this is self.init_torso_x, self.init_torso_y: center of the cell xy! o_ij = (o_xy / size_scaling).astype( int) # this is the position in the grid o_xy_plot = o_xy / size_scaling * 2 robot_xy_plot = o_xy_plot + robot_xy / size_scaling * 2 plt.scatter(*robot_xy_plot) for ray_idx in range(self._n_bins): if obs[ray_idx]: length_wall = self._sensor_range - obs[ray_idx] * self._sensor_range else: length_wall = 1e-6 ray_ori = ori - self._sensor_span * 0.5 + ray_idx / ( self._n_bins - 1) * self._sensor_span if ray_ori > math.pi: ray_ori -= 2 * math.pi elif ray_ori < -math.pi: ray_ori += 2 * math.pi # find the end point wall end_xy = ( robot_xy + length_wall * np.array([math.cos(ray_ori), math.sin(ray_ori)])) end_xy_plot = (o_ij + end_xy / size_scaling) * 2 plt.plot([robot_xy_plot[0], end_xy_plot[0]], [robot_xy_plot[1], end_xy_plot[1]], 'r') if obs[ray_idx + self._n_bins]: length_goal = self._sensor_range - obs[ ray_idx + self._n_bins] * self._sensor_range else: length_goal = 1e-6 ray_ori = ori - self._sensor_span * 0.5 + ray_idx / ( self._n_bins - 1) * self._sensor_span # find the end point goal end_xy = ( robot_xy + length_goal * np.array([math.cos(ray_ori), math.sin(ray_ori)])) end_xy_plot = (o_ij + end_xy / size_scaling) * 2 plt.plot([robot_xy_plot[0], end_xy_plot[0]], [robot_xy_plot[1], end_xy_plot[1]], 'g') log_dir = logger.get_snapshot_dir() ax.set_title('sensors: ' + name) plt.savefig(osp.join( log_dir, name + '_sesors.png')) # this saves the current figure, here f plt.close()
def create_and_train_new_skill(self, skill_subpath): """ Create and train a new skill based on given subpath. The new skill policy and ID are returned, and also saved in self._hrl_policy. """ ## Prepare elements for training # Environment skill_learning_env = TfEnv( SkillLearningEnv( # base env that was wrapped in HierarchizedEnv (not fully unwrapped - may be normalized!) env=self.env.env.env, start_obss=skill_subpath['start_observations'], end_obss=skill_subpath['end_observations'] ) ) # Skill policy new_skill_pol, new_skill_id = self._hrl_policy.create_new_skill(skill_subpath['end_observations']) # blank policy to be trained # Baseline - clone baseline specified in low_algo_kwargs, or top-algo`s baseline # We need to clone baseline, as each skill policy must have its own instance la_kwargs = dict(self._low_algo_kwargs) baseline_to_clone = la_kwargs.get('baseline', self.baseline) baseline = Serializable.clone( # to create blank baseline obj=baseline_to_clone, name='{}Skill{}'.format(type(baseline_to_clone).__name__, new_skill_id) ) la_kwargs['baseline'] = baseline # Algorithm algo = self._low_algo_cls( env=skill_learning_env, policy=new_skill_pol, **la_kwargs ) # Logger parameters logger.dump_tabular(with_prefix=False) logger.log('Launching training of the new skill') logger_snapshot_dir_before = logger.get_snapshot_dir() logger_snapshot_mode_before = logger.get_snapshot_mode() logger_snapshot_gap_before = logger.get_snapshot_gap() logger.set_snapshot_dir(os.path.join( logger_snapshot_dir_before, 'skill{}'.format(new_skill_id) )) logger.set_snapshot_mode('none') # logger.set_snapshot_gap(max(1, np.floor(la_kwargs['n_itr'] / 10))) logger.push_tabular_prefix('Skill{}/'.format(new_skill_id)) logger.set_tensorboard_step_key('Iteration') # Train new skill with logger.prefix('Skill {} | '.format(new_skill_id)): algo.train(sess=self._tf_sess) # Restore logger parameters logger.pop_tabular_prefix() logger.set_snapshot_dir(logger_snapshot_dir_before) logger.set_snapshot_mode(logger_snapshot_mode_before) logger.set_snapshot_gap(logger_snapshot_gap_before) logger.log('Training of the new skill finished') return new_skill_pol, new_skill_id
def _plot_visitations(self, paths, opts=None): """ Plot visitation graphs, i.e. stacked all paths in batch. :param paths: paths statistics (dict) :param opts: plotting options: {'save': directory to save, True for default directory, or False to disable, 'live': <boolean>, 'alpha': <0..1> opacity of each plotted path, 'noise': <0..1> amount of noise added to distinguish individual paths} """ if opts is None: opts = dict() if opts.get('live', False): plt.figure('Paths') else: plt.ioff() plt.clf() # Common plot opts m = self.map plt.tight_layout() plt.xlim(-0.5, self.n_col - 0.5) plt.ylim(-0.5, self.n_row - 0.5) plt.xticks([], []) plt.yticks([], []) plt.gca().set_aspect('equal') # # Grid # x_grid = np.arange(self.n_col + 1) - 0.5 # y_grid = np.arange(self.n_row + 1) - 0.5 # plt.plot(x_grid, np.stack([y_grid] * x_grid.size), ls='-', # c='k', lw=1, alpha=0.8) # plt.plot(np.stack([x_grid] * y_grid.size), y_grid, ls='-', # c='k', lw=1, alpha=0.8) # Coins, holes, starts, goals and walls coins = self._get_pos_as_xy(np.argwhere(m == 'C').T) holes = self._get_pos_as_xy(np.argwhere(m == 'H').T) starts = self._get_pos_as_xy(np.argwhere(m == 'S').T) goals = self._get_pos_as_xy(np.argwhere(m == 'G').T) walls = self._get_pos_as_xy(np.argwhere(m == 'W').T) plt.scatter(*coins, c='gold', marker='o', s=150, zorder=10, edgecolors='black') plt.scatter(*holes, c='red', marker='X', s=100, zorder=10) plt.gca().add_collection( PatchCollection([Rectangle(xy - 0.5, 1, 1) for xy in starts.T], color='navajowhite')) plt.gca().add_collection( PatchCollection([Rectangle(xy - 0.5, 1, 1) for xy in goals.T], color='lightgreen')) plt.gca().add_collection( PatchCollection([Rectangle(xy - 0.5, 1, 1) for xy in walls.T], color='navy')) # Plot paths alpha = opts.get('alpha', 0.1) noise = opts.get('noise', 0.1) for path in paths: data = path['env_infos'] # Concat subpaths from HRL rollout if 'prev_pos_xy' not in data: data = SubpolicyPathInfo.concat_subpath_infos( path['env_infos']['subpath_infos'])['env_infos'] # Starting position start_pos = data['prev_pos_xy'][:1].T # All others all_pos = data['next_pos_xy'].T all_pos = np.c_[start_pos, all_pos] all_pos = all_pos + np.random.normal(size=all_pos.shape, scale=noise) # Colorful line collection points = all_pos.T.reshape(-1, 1, 2) segments = np.concatenate([points[:-1], points[1:]], axis=1) lc = LineCollection(segments, cmap=plt.get_cmap('jet'), alpha=alpha) lc.set_array(np.arange(all_pos.shape[-1])) plt.gca().add_collection(lc) # Save paths figure folder = opts.get('save', False) if folder: if isinstance(folder, str): folder = os.path.expanduser(folder) if not os.path.isdir(folder): os.makedirs(folder) else: folder = logger.get_snapshot_dir() plt.savefig( os.path.join( folder, 'visitation{:0>3d}.png'.format(self.visitation_plot_num))) self.visitation_plot_num += 1 # Live plotting if opts.get('live', False): plt.gcf().canvas.draw() plt.waitforbuttonpress(timeout=0.001)