def collect_training_trajectories( self, itr: int, load_initial_expertdata: Optional[str], collect_policy: BasePolicy, batch_size: int, ) -> Tuple[List[PathDict], int, Optional[List[PathDict]]]: """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param batch_size: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ paths: List[PathDict] # Decide whether to load training data or use the current policy to collect more data # HINT: depending on if it's the first iteration or not, decide whether to either # (1) load the data. In this case you can directly return as follows # ``` return loaded_paths, 0, None ``` # (2) collect `self.params['batch_size']` transitions # Collect `batch_size` samples to be used for training # HINT1: use sample_trajectories from utils # HINT2: you want each of these collected rollouts to be of length self.params['ep_len'] print("\nCollecting data to be used for training...") if itr == 0 and load_initial_expertdata is not None: with open(load_initial_expertdata, 'rb') as paths_file: loaded_paths = pickle.load(paths_file) paths, envsteps_this_batch = loaded_paths, 0 else: envsteps_this_batch = 0 paths = [] while envsteps_this_batch <= batch_size: paths.extend( utils.sample_n_trajectories( self.env, collect_policy, max((batch_size - envsteps_this_batch) // self.params['ep_len'], 1), max_path_length=self.params['ep_len'], )) envsteps_this_batch = sum(path['observation'].shape[0] for path in paths) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print( '\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size): # TODO: get this from hw1 # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration print("\nCollecting data to be used for training...") if itr == 0 and load_initial_expertdata is not None: with open(load_initial_expertdata, 'rb') as paths_file: loaded_path = pickle.load(paths_file) paths, envsteps_this_batch = loaded_path, 0 else: envsteps_this_batch = 0 paths = [] while envsteps_this_batch <= batch_size: paths.extend( utils.sample_n_trajectories( self.env, collect_policy, max((batch_size - envsteps_this_batch) // self.params['ep_len'], 1), max_path_length=self.params['ep_len'], )) envsteps_this_batch = sum(path['observation'].shape[0] for path in paths) train_video_paths = None if self.log_video: print( '\nCollecting train rollouts to be used for saving videos...') ## TODO look in utils and implement sample_n_trajectories train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param num_transitions_to_sample: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ # TODO: get this from Piazza √ if itr == 0 and initial_expertdata is not None: file = open(initial_expertdata, 'rb') loaded_paths = pickle.load(file) file.close() return loaded_paths, 0, None envsteps_this_batch = 0 paths = [] while envsteps_this_batch <= num_transitions_to_sample: paths_this_batch, timesteps_this_batch = utils.sample_trajectories(self.env, collect_policy, max((num_transitions_to_sample - envsteps_this_batch) // self.params['ep_len'], 1), self.params['ep_len']) paths.extend(paths_this_batch) envsteps_this_batch += timesteps_this_batch train_video_paths = None if save_expert_data_to_disk: print('\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param batch_size: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration if itr == 0 and load_initial_expertdata is not None: # load expert data with open(load_initial_expertdata, "rb") as f: expertdata = pickle.load(f) return expertdata, 0, None print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories( self.env, collect_policy, min_timesteps_per_batch=batch_size, max_path_length=self.params['ep_len'] # render, render_mode ? ) train_video_paths = None if self.log_video: print( '\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories( self, itr, load_initial_expertdata, collect_policy, batch_size, ): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param batch_size: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories( self.env, collect_policy, batch_size, self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print( '\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param num_transitions_to_sample: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ # TODO: get this from hw1 or hw2 if initial_expertdata is not None and itr == 0: import pickle with open(initial_expertdata, "rb") as handle: loaded_paths = pickle.load(handle) return loaded_paths, 0, None paths, envsteps_this_batch = utils.sample_trajectories( self.env, collect_policy, num_transitions_to_sample, self.params['ep_len']) train_video_paths = None if self.logvideo: print( '\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size): # TODO: get this from hw1 if itr == 0: if load_initial_expertdata: with open(load_initial_expertdata, 'rb') as f: loaded_paths = pickle.load(f) return loaded_paths, 0, None # if it's the first iteration and you aren't loading data, then # `self.params['batch_size_initial']` is the number of transitions you want to collect else: batch_size = self.params['batch_size_initial'] # TODO collect `batch_size` samples to be used for training # HINT1: use sample_trajectories from utils # HINT2: you want each of these collected rollouts to be of length self.params['ep_len'] print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories( self.env, collect_policy, batch_size, self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.logvideo: print( '\nCollecting train rollouts to be used for saving videos...') ## TODO look in utils and implement sample_n_trajectories train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size): # TODO: get this from hw1 # if your load_initial_expertdata is None, # then you need to collect new trajectories at *every* iteration if load_initial_expertdata != None: with open(load_initial_expertdata, 'rb') as f: expert_data = pickle.loads(f.read()) return expert_data, 0, None print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories( self.env, collect_policy, batch_size, self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print( '\nCollecting train rollouts to be used for saving videos...') ## TODO look in utils and implement sample_n_trajectories train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False): if itr == 0: if initial_expertdata is not None: paths = pickle.load(open(self.params['expert_data'], 'rb')) return paths, 0, None if save_expert_data_to_disk: num_transitions_to_sample = self.params['batch_size_initial'] # collect data to be used for training print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories( self.env, collect_policy, num_transitions_to_sample, self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard train_video_paths = None if self.logvideo: print( '\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) if save_expert_data_to_disk and itr == 0: with open('expert_data_{}.pkl'.format(self.params['env_name']), 'wb') as file: pickle.dump(paths, file) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param batch_size: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ if (itr == 0) and (load_initial_expertdata is not None): with open(load_initial_expertdata, 'rb') as f: loaded_paths = pickle.loads(f.read()) return loaded_paths, 0, None print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, batch_size, self.params['ep_len']) train_video_paths = None if self.log_video: print('\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param num_transitions_to_sample: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ # get this from Piazza # if (itr == 0) and (load_initial_expertdata is not None): # with open(load_initial_expertdata, 'rb') as f: # loaded_paths = pickle.loads(f.read()) # return loaded_paths, 0, None # collect `batch_size` samples to be used for training # HINT1: use sample_trajectories from utils # HINT2: you want each of these collected rollouts to be of length self.params['ep_len'] print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, num_transitions_to_sample, self.params['ep_len'], True, "human") # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.logvideo: print('\nCollecting train rollouts to be used for saving videos...') ## look in utils and implement sample_n_trajectories train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True, "human") return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size): # TODO: get this from hw1 # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration if itr == 0 and load_initial_expertdata != None: import pickle with open(load_initial_expertdata, 'rb') as f: loaded_paths = pickle.load(f) return loaded_paths, 0, None # done TODO collect `batch_size` samples to be used for training # HINT1: use sample_trajectories from utils # HINT2: you want each of these collected rollouts to be of length self.params['ep_len'] print("\nCollecting data to be used for training...") # FOR BATCH SIZE? print('batch size', batch_size) paths, envsteps_this_batch = utils.sample_trajectories(self.env, self.agent.actor, batch_size, self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print('\nCollecting train rollouts to be used for saving videos...') ## TODO look in utils and implement sample_n_trajectories train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def perform_logging(self, itr, paths, eval_policy, train_video_paths, training_logs): # collect eval trajectories, for logging print("\nCollecting data for eval...") eval_paths, eval_env_steps_this_batch = utils.sample_trajectories(self.env, eval_policy, self.params['eval_batch_size'], self.params['ep_len']) # save eval roll outs as videos in tensor board event file if self.log_video and train_video_paths is not None: print('\nCollecting video rollouts eval') eval_video_paths = utils.sample_n_trajectories(self.env, eval_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) # save train/eval videos print('\nSaving train rollouts as videos...') self.logger.log_paths_as_videos(train_video_paths, itr, fps=self.fps, max_videos_to_save=MAX_NVIDEO, video_title='train_rollouts') self.logger.log_paths_as_videos(eval_video_paths, itr, fps=self.fps, max_videos_to_save=MAX_NVIDEO, video_title='eval_rollouts') # save eval metrics if self.log_metrics: # returns, for logging train_returns = [path["reward"].sum() for path in paths] eval_returns = [eval_path["reward"].sum() for eval_path in eval_paths] # episode lengths, for logging train_ep_lens = [len(path["reward"]) for path in paths] eval_ep_lens = [len(eval_path["reward"]) for eval_path in eval_paths] # decide what to log logs = OrderedDict() logs["Eval_AverageReturn"] = np.mean(eval_returns) logs["Eval_StdReturn"] = np.std(eval_returns) logs["Eval_MaxReturn"] = np.max(eval_returns) logs["Eval_MinReturn"] = np.min(eval_returns) logs["Eval_AverageEpLen"] = np.mean(eval_ep_lens) logs["Train_AverageReturn"] = np.mean(train_returns) logs["Train_StdReturn"] = np.std(train_returns) logs["Train_MaxReturn"] = np.max(train_returns) logs["Train_MinReturn"] = np.min(train_returns) logs["Train_AverageEpLen"] = np.mean(train_ep_lens) logs["Train_EnvstepsSoFar"] = self.total_env_steps logs["TimeSinceStart"] = time.time() - self.start_time last_log = training_logs[-1] # Only use the last log for now logs.update(last_log) if itr == 0: self.initial_return = np.mean(train_returns) logs["Initial_DataCollection_AverageReturn"] = self.initial_return # perform the logging for key, value in logs.items(): print('{} : {}'.format(key, value)) self.logger.log_scalar(value, key, itr) print('Done logging...\n\n') self.logger.flush()
def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size): # TODO: get this from hw1 # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration if itr == 0 and load_initial_expertdata is not None: with open(load_initial_expertdata, 'rb') as f: # the number of training data collected (in the env) during each iteration defaults 1000 # so the expert data's shape is [1000, x](e.g., observation's x is 111, action's x is 8 ,etc) paths = pickle.load(f) new_paths = [] for path in paths: for key in path: path[key] = path[key][:batch_size] new_paths.append(path) return new_paths, 0, None print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories( self.env, collect_policy, batch_size, self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print( '\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param num_transitions_to_sample: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ if itr == 0 and initial_expertdata: with open(initial_expertdata, 'rb') as f: paths = pickle.load(f) return paths, 0, None print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, num_transitions_to_sample, self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.logvideo: print('\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param num_transitions_to_sample: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ if itr == 0: if initial_expertdata is not None: paths = pickle.load(open(self.params['expert_data'], 'rb')) return paths, 0, None if save_expert_data_to_disk: num_transitions_to_sample = self.params['batch_size_initial'] # collect data to be used for training print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, num_transitions_to_sample, self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard train_video_paths = None if self.logvideo: print('\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) if save_expert_data_to_disk and itr == 0: with open('expert_data_{}.pkl'.format(self.params['env_name']), 'wb') as file: pickle.dump(paths, file) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param num_transitions_to_sample: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ # TODO collect `batch_size` samples to be used for training # HINT1: use sample_trajectories from utils # HINT2: you want each of these collected rollouts to be of length self.params['ep_len'] print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories(env=self.env, policy=collect_policy, min_timesteps_per_batch=num_transitions_to_sample, max_path_length=self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.logvideo: print('\nCollecting train rollouts to be used for saving videos...') ## TODO look in utils and implement sample_n_trajectories train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size): """ :param itr: the current iteration number :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data (bcagent.actor = MLPPolicySL) :param batch_size: the number of transitions we collect :return: paths: a list of trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths. If just loading expert data, we didn't take any environment steps :) train_video_paths: paths which also contain videos for visualization purposes """ print("\nCollecting data to be used for training...") # If it's the first iteration, just return the expert training data if itr == 0 and load_initial_expertdata is not None: loaded_paths = pickle.load(open(load_initial_expertdata, 'rb')) return loaded_paths, 0, None # TODO(DAgger) # Otherwise we need to rollout our current policy to collect new observations # which we can later relabel using the expert policy. paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, batch_size, self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print('\nCollecting train rollouts to be used for saving videos...') ## TODO look in utils and implement sample_n_trajectories train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)
def collect_training_trajectories( self, itr, load_initial_expertdata, collect_policy, batch_size, ): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param batch_size: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ # TODO decide whether to load training data or use the current policy to collect more data √ # HINT: depending on if it's the first iteration or not, decide whether to either # (1) load the data. In this case you can directly return as follows # ``` return loaded_paths, 0, None ``` # (2) collect `self.params['batch_size']` transitions if itr == 0: file = open(load_initial_expertdata, 'rb') loaded_paths = pickle.load(file) file.close() return loaded_paths, 0, None # TODO collect `batch_size` samples to be used for training √ # HINT1: use sample_trajectories from utils # HINT2: you want each of these collected rollouts to be of length self.params['ep_len'] print("\nCollecting data to be used for training...") paths = utils.sample_n_trajectories(self.env, collect_policy, batch_size // self.params['ep_len'], self.params['ep_len']) envsteps_this_batch = sum(utils.get_pathlength(path) for path in paths) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print('\nCollecting train rollouts to be used for saving videos...') ## TODO look in utils and implement sample_n_trajectories √ train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories( self, itr, load_initial_expertdata, collect_policy, batch_size, ): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param batch_size: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ # TODO done decide whether to load training data or use the current policy to collect more data # HINT: depending on if it's the first iteration or not, decide whether to either # (1) load the data. In this case you can directly return as follows # ``` return loaded_paths, 0, None ``` # (2) collect `self.params['batch_size']` transitions if itr == 0: # load expert data dir_path = os.path.dirname(os.path.realpath(__file__)) cwd = os.getcwd() load_initial_expertdata = os.path.join(cwd, "../../", load_initial_expertdata) # filename = os.path.join(dir_path, filename) with open(load_initial_expertdata, "rb") as f: expertdata = pickle.load(f) return expertdata, 0, None # TODO done collect `batch_size` samples to be used for training # HINT1: use sample_trajectories from utils # HINT2: you want each of these collected rollouts to be of length self.params['ep_len'] print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories( self.env, collect_policy, min_timesteps_per_batch=batch_size, max_path_length=self.params['ep_len'] # render, render_mode ? ) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print( '\nCollecting train rollouts to be used for saving videos...') ## TODO done look in utils and implement sample_n_trajectories train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories( self, itr, load_initial_expertdata, collect_policy, batch_size, ): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param batch_size: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ # TODO decide whether to load training data or use the current policy to collect more data # HINT: depending on if it's the first iteration or not, decide whether to either # (1) load the data. In this case you can directly return as follows # ``` return loaded_paths, 0, None ``` # (2) collect `self.params['batch_size']` transitions if itr == 0: # Load pickle (.pkl) of data with open(load_initial_expertdata, 'rb') as handle: loaded_paths = np.load(handle, allow_pickle=True) paths = loaded_paths envsteps_this_batch = 0 else: # TODO collect `batch_size` samples to be used for training # HINT1: use sample_trajectories from utils # HINT2: you want each of these collected rollouts to be of length self.params['ep_len'] print("Collecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories( self.env, policy=collect_policy, min_timesteps_per_batch=batch_size, max_path_length=self.params['ep_len'], render=False, render_mode=('rgb_array')) # TODO # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print('Collecting train rollouts to be used for saving videos...') ## TODO look in utils and implement sample_n_trajectories # Using self.params['ep_len'] instead of MAX_VIDEO_LEN train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, self.params['ep_len'], True) print(envsteps_this_batch) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param num_transitions_to_sample: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ # TODO: get this from hw1 or hw2 print("\nCollecting data to be used for training...") if itr == 0 and initial_expertdata is not None: with open(initial_expertdata, 'rb') as paths_file: loaded_paths = pickle.load(paths_file) paths, envsteps_this_batch = loaded_paths, 0 else: envsteps_this_batch = 0 paths = [] while envsteps_this_batch <= num_transitions_to_sample: paths.extend(utils.sample_n_trajectories( self.env, collect_policy, max((num_transitions_to_sample - envsteps_this_batch) // self.params['ep_len'], 1), self.params['ep_len'], )) envsteps_this_batch = np.sum(path['observation'].shape[0] for path in paths) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print('\nCollecting train rollouts to be used for saving videos...') ## look in utils and implement sample_n_trajectories train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size): # (TODO) Decide whether to load training data or use the current policy to collect more data # HINT: depending on if it's the first iteration or not, decide whether to either # (1) load the data. In this case you can directly return as follows # ``` return loaded_paths, 0, None ``` # (2) collect `self.params['batch_size']` transitions # Collect `batch_size` samples to be used for training # HINT1: use sample_trajectories from utils # HINT2: you want each of these collected rollouts to be of length self.params['ep_len'] print("\nCollecting data to be used for training...") if itr == 0 and load_initial_expertdata is not None: with open(load_initial_expertdata, 'rb') as paths_file: loaded_paths = pickle.load(paths_file) paths, envsteps_this_batch = loaded_paths, 0 else: envsteps_this_batch = 0 paths = [] while envsteps_this_batch <= batch_size: paths.extend( utils.sample_n_trajectories( self.env, collect_policy, max((batch_size - envsteps_this_batch) // self.params['ep_len'], 1), max_path_length=self.params['ep_len'], )) envsteps_this_batch = sum(path['observation'].shape[0] for path in paths) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print( '\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories( self, itr, load_initial_expertdata, collect_policy, batch_size, ): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param batch_size: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ # TODO decide whether to load training data or use the current policy to collect more data # HINT: depending on if it's the first iteration or not, decide whether to either # (1) load the data. In this case you can directly return as follows # ``` return loaded_paths, 0, None ``` if itr == 0: with open(load_initial_expertdata, 'rb') as f: # the number of training data collected (in the env) during each iteration defaults 1000 # so the expert data's shape is [1000, x](e.g., observation's x is 111, action's x is 8 ,etc) paths = pickle.load(f) new_paths = [] for path in paths: for key in path: path[key] = path[key][:batch_size] new_paths.append(path) return new_paths, 0, None # (2) collect `self.params['batch_size']` transitions # TODO collect `batch_size` samples to be used for training # HINT1: use sample_trajectories from utils # HINT2: you want each of these collected rollouts to be of length self.params['ep_len'] print("\nCollecting data to be used for training...") # paths, envsteps_this_batch = TODO paths, envsteps_this_batch = utils.sample_trajectories( self.env, collect_policy, batch_size, self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print( '\nCollecting train rollouts to be used for saving videos...') ## TODO look in utils and implement sample_n_trajectories train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories( self, itr, load_initial_expertdata, collect_policy, batch_size, ): """ :param itr: :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param batch_size: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths ? train_video_paths: paths which also contain videos for visualization purposes ? """ ## TODO: get this from hw1 # TODO decide whether to load training data or use the current policy to collect more data # HINT: depending on if it's the first iteration or not, decide whether to either # (1) load the data. In this case you can directly return as follows # ``` return loaded_paths, 0, None ``` # (2) collect `self.params['batch_size']` transitions # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration if itr == 0 and load_initial_expertdata is not None: with open(load_initial_expertdata, 'rb') as f: paths = pickle.load(f.read()) return paths, 0, None # (2) collect `self.params['batch_size']` transitions # TODO collect `batch_size` samples to be used for training # HINT1: use sample_trajectories from utils # HINT2: you want each of these collected rollouts to be of length self.params['ep_len'] print("\nCollecting data to be used for training...") #TODO paths, envsteps_this_batch = utils.sample_trajectories( env=self.env, policy=collect_policy, min_timesteps_per_batch=batch_size, max_path_length=self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print( '\nCollecting train rollouts to be used for saving videos...') ## TODO look in utils and implement sample_n_trajectories train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories( self, itr, load_initial_expertdata, collect_policy, batch_size, ): """ :param itr: the current iteration number :param load_initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data (bcagent.actor = MLPPolicySL) :param batch_size: the number of transitions we collect :return: paths: a list of trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths. If just loading expert data, we didn't take any environment steps :) train_video_paths: paths which also contain videos for visualization purposes """ # In this section of the code, you are choosing between behavior cloning and DAgger # TODO decide whether to load training data or use the current policy to collect more data # HINT: depending on if it's the first iteration or not, decide whether to either # (1) load the data. In this case you can directly return as follows # ``` return loaded_paths, 0, None ``` # (2) collect `self.params['batch_size']` transitions # TODO collect `batch_size` samples to be used for training # HINT1: use sample_trajectories from utils # HINT2: you want each of these collected rollouts to be of length self.params['ep_len'] print("\nCollecting data to be used for training...") # If it's the first iteration, just return the expert training data if itr == 0: loaded_paths = pickle.load(open(load_initial_expertdata, 'rb')) return loaded_paths, 0, None # TODO(DAgger) # Otherwise we need to rollout our current policy to collect new observations # which we can later relabel using the expert policy. paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, batch_size, self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.log_video: print('\nCollecting train rollouts to be used for saving videos...') ## TODO look in utils and implement sample_n_trajectories train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False): """ :param itr: :param initial_expertdata: path to expert data pkl file :param collect_policy: the current policy using which we collect data :param num_transitions_to_sample: the number of transitions we collect :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ assert not save_expert_data_to_disk # TODO decide whether to load training data or use the current policy to collect more data # HINT: depending on if it's the first iteration or not, decide whether to either # (1) load the data. In this case you can directly return as follows # ``` return loaded_paths, 0, None ``` # (2) collect `num_transitions_to_sample` transitions if itr == 0 and initial_expertdata is not None: with open(initial_expertdata, 'rb') as fd: loaded_paths = pickle.load(fd) return loaded_paths, 0, None # TODO collect `num_transitions_to_sample` samples to be used for training # HINT1: use sample_trajectories from utils # HINT2: you want each of these collected rollouts to be of length self.params['ep_len'] print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories( self.env, collect_policy, num_transitions_to_sample, self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN train_video_paths = None if self.logvideo: print( '\nCollecting train rollouts to be used for saving videos...') ## TODO look in utils and implement sample_n_trajectories train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, collect_policy, batch_size): # decide how much training data to collect + which policy to use to collect it # collect data to be used for training print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, self.params['batch_size'], self.params['ep_len']) # collect more rollouts with the same policy, to be saved as videos in tensorboard train_video_paths = None if self.logvideo: print('\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories( self, itr, initial_expertdata, collect_policy, batch_size, ): """ This function is called only in run_training_loop in this module. If itr == 0, it simply loads the trajectories from initial_expertdata Otherwise, it returns some new trajectories using collect_policy. :param itr: The iteration index. Starts at 0. :param initial_expertdata: Path to expert data pkl file. :param collect_policy: The current policy using which we collect data. :param batch_size: The number of transitions we collect. :return: paths: a list trajectories envsteps_this_batch: the sum over the numbers of environment steps in paths train_video_paths: paths which also contain videos for visualization purposes """ if itr == 0: if initial_expertdata: pickle_in = open(initial_expertdata, "rb") loaded_paths = pickle.load(pickle_in) return loaded_paths, 0, None else: # it's the first iteration, but you aren't loading expert data, # collect `self.params['batch_size_initial']` batch_size = self.params['batch_size_initial'] # collect batch_size samples with collect_policy # each of these collected rollouts is of length self.params['ep_len'] print("\nCollecting data to be used for training...") paths, envsteps_this_batch = \ utils.sample_trajectories(env=self.env, policy=collect_policy, min_timesteps_per_batch=batch_size, max_path_length=self.params['ep_len']) # collect more rollouts with collect_policy, to be saved as videos in tensorboard # collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN. train_video_paths = None if self.log_video: print('\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths
def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size): if itr == 0 and load_initial_expertdata: loaded_data = np.load(load_initial_expertdata, allow_pickle=True) return loaded_data, 0, None print("\nCollecting data to be used for training...") paths, envsteps_this_batch = utils.sample_trajectories( self.env, collect_policy, batch_size, self.params['ep_len']) train_video_paths = None if self.log_video: print( '\nCollecting train rollouts to be used for saving videos...') train_video_paths = utils.sample_n_trajectories( self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True) return paths, envsteps_this_batch, train_video_paths