Python sample_n_trajectories 예제들, cs285.infrastructure.utils.sample_n_trajectories Python 예제들

예제 #1

0

파일 보기

파일: rl_trainer.py 프로젝트: yzyvl/cs285-homework

    def collect_training_trajectories(
        self,
        itr: int,
        load_initial_expertdata: Optional[str],
        collect_policy: BasePolicy,
        batch_size: int,
    ) -> Tuple[List[PathDict], int, Optional[List[PathDict]]]:
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """

        paths: List[PathDict]

        # Decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
        # (1) load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```

        # (2) collect `self.params['batch_size']` transitions

        # Collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        if itr == 0 and load_initial_expertdata is not None:
            with open(load_initial_expertdata, 'rb') as paths_file:
                loaded_paths = pickle.load(paths_file)
            paths, envsteps_this_batch = loaded_paths, 0
        else:
            envsteps_this_batch = 0
            paths = []
            while envsteps_this_batch <= batch_size:
                paths.extend(
                    utils.sample_n_trajectories(
                        self.env,
                        collect_policy,
                        max((batch_size - envsteps_this_batch) //
                            self.params['ep_len'], 1),
                        max_path_length=self.params['ep_len'],
                    ))
                envsteps_this_batch = sum(path['observation'].shape[0]
                                          for path in paths)

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #2

0

파일 보기

    def collect_training_trajectories(self, itr, load_initial_expertdata,
                                      collect_policy, batch_size):
        # TODO: get this from hw1
        # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration

        print("\nCollecting data to be used for training...")
        if itr == 0 and load_initial_expertdata is not None:
            with open(load_initial_expertdata, 'rb') as paths_file:
                loaded_path = pickle.load(paths_file)
            paths, envsteps_this_batch = loaded_path, 0
        else:
            envsteps_this_batch = 0
            paths = []
            while envsteps_this_batch <= batch_size:
                paths.extend(
                    utils.sample_n_trajectories(
                        self.env,
                        collect_policy,
                        max((batch_size - envsteps_this_batch) //
                            self.params['ep_len'], 1),
                        max_path_length=self.params['ep_len'],
                    ))
                envsteps_this_batch = sum(path['observation'].shape[0]
                                          for path in paths)

        train_video_paths = None
        if self.log_video:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #3

0

파일 보기

    def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False):
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """
        # TODO: get this from Piazza √
        if itr == 0 and initial_expertdata is not None:
            file = open(initial_expertdata, 'rb')
            loaded_paths = pickle.load(file)
            file.close()
            return loaded_paths, 0, None

        envsteps_this_batch = 0
        paths = []
        while envsteps_this_batch <= num_transitions_to_sample:
            paths_this_batch, timesteps_this_batch = utils.sample_trajectories(self.env, collect_policy,
                                                                               max((num_transitions_to_sample - envsteps_this_batch) // self.params['ep_len'], 1), self.params['ep_len'])
            paths.extend(paths_this_batch)
            envsteps_this_batch += timesteps_this_batch

        train_video_paths = None
        if save_expert_data_to_disk:
            print('\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #4

0

파일 보기

    def collect_training_trajectories(self, itr, load_initial_expertdata,
                                      collect_policy, batch_size):
        """
            :param itr:
            :param load_initial_expertdata:  path to expert data pkl file
            :param collect_policy:  the current policy using which we collect data
            :param batch_size:  the number of transitions we collect
            :return:
                paths: a list trajectories
                envsteps_this_batch: the sum over the numbers of environment steps in paths
                train_video_paths: paths which also contain videos for visualization purposes
    """
        # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration
        if itr == 0 and load_initial_expertdata is not None:
            # load expert data
            with open(load_initial_expertdata, "rb") as f:
                expertdata = pickle.load(f)
            return expertdata, 0, None

        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env,
            collect_policy,
            min_timesteps_per_batch=batch_size,
            max_path_length=self.params['ep_len']
            # render, render_mode ?
        )
        train_video_paths = None
        if self.log_video:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #5

0

파일 보기

    def collect_training_trajectories(
        self,
        itr,
        load_initial_expertdata,
        collect_policy,
        batch_size,
    ):
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """

        # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration

        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)
        return paths, envsteps_this_batch, train_video_paths

예제 #6

0

파일 보기

파일: rl_trainer.py 프로젝트: JerryJohnsonLee/homework_fall2020

    def collect_training_trajectories(self,
                                      itr,
                                      initial_expertdata,
                                      collect_policy,
                                      num_transitions_to_sample,
                                      save_expert_data_to_disk=False):
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """
        # TODO: get this from hw1 or hw2
        if initial_expertdata is not None and itr == 0:
            import pickle
            with open(initial_expertdata, "rb") as handle:
                loaded_paths = pickle.load(handle)
            return loaded_paths, 0, None

        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, num_transitions_to_sample,
            self.params['ep_len'])

        train_video_paths = None
        if self.logvideo:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)
        return paths, envsteps_this_batch, train_video_paths

예제 #7

0

파일 보기

파일: rl_trainer.py 프로젝트: yanlai00/homework_fall2020

    def collect_training_trajectories(self, itr, load_initial_expertdata,
                                      collect_policy, batch_size):
        # TODO: get this from hw1
        if itr == 0:
            if load_initial_expertdata:
                with open(load_initial_expertdata, 'rb') as f:
                    loaded_paths = pickle.load(f)
                    return loaded_paths, 0, None

                # if it's the first iteration and you aren't loading data, then
                # `self.params['batch_size_initial']` is the number of transitions you want to collect
            else:
                batch_size = self.params['batch_size_initial']

        # TODO collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.logvideo:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #8

0

파일 보기

파일: rl_trainer.py 프로젝트: hackmalife/homework_fall2020

    def collect_training_trajectories(self, itr, load_initial_expertdata,
                                      collect_policy, batch_size):
        # TODO: get this from hw1
        # if your load_initial_expertdata is None,
        # then you need to collect new trajectories at *every* iteration
        if load_initial_expertdata != None:
            with open(load_initial_expertdata, 'rb') as f:
                expert_data = pickle.loads(f.read())
            return expert_data, 0, None

        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #9

0

파일 보기

파일: rl_trainer.py 프로젝트: zhanyuanucb/homework_fall2020

    def collect_training_trajectories(self,
                                      itr,
                                      initial_expertdata,
                                      collect_policy,
                                      num_transitions_to_sample,
                                      save_expert_data_to_disk=False):
        if itr == 0:
            if initial_expertdata is not None:
                paths = pickle.load(open(self.params['expert_data'], 'rb'))
                return paths, 0, None
            if save_expert_data_to_disk:
                num_transitions_to_sample = self.params['batch_size_initial']

        # collect data to be used for training
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, num_transitions_to_sample,
            self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        train_video_paths = None
        if self.logvideo:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        if save_expert_data_to_disk and itr == 0:
            with open('expert_data_{}.pkl'.format(self.params['env_name']),
                      'wb') as file:
                pickle.dump(paths, file)

        return paths, envsteps_this_batch, train_video_paths

예제 #10

0

파일 보기

파일: rl_trainer.py 프로젝트: finlaymiller/homework_fall2020

    def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size):
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """

        if (itr == 0) and (load_initial_expertdata is not None):
            with open(load_initial_expertdata, 'rb') as f:
                loaded_paths = pickle.loads(f.read())
            return loaded_paths, 0, None

        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, batch_size, self.params['ep_len'])

        train_video_paths = None
        if self.log_video:
            print('\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #11

0

파일 보기

    def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False):
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """
        # get this from Piazza

        # if (itr == 0) and (load_initial_expertdata is not None):
        #     with open(load_initial_expertdata, 'rb') as f:
        #         loaded_paths = pickle.loads(f.read())

        #     return loaded_paths, 0, None

        #  collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, num_transitions_to_sample, self.params['ep_len'], True, "human")
        
        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.logvideo:
            print('\nCollecting train rollouts to be used for saving videos...')
            ##  look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True, "human")

        return paths, envsteps_this_batch, train_video_paths

예제 #12

0

파일 보기

    def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size):
        # TODO: get this from hw1
        # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration
        
        if itr == 0 and load_initial_expertdata != None: 
            import pickle

            with open(load_initial_expertdata, 'rb') as f:
                loaded_paths = pickle.load(f)
            return loaded_paths, 0, None

        # done TODO collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")

        # FOR BATCH SIZE? 

        print('batch size', batch_size)
        
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, self.agent.actor, 
        batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print('\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)
        
        return paths, envsteps_this_batch, train_video_paths

예제 #13

0

파일 보기

    def perform_logging(self, itr, paths, eval_policy, train_video_paths, training_logs):

        # collect eval trajectories, for logging
        print("\nCollecting data for eval...")
        eval_paths, eval_env_steps_this_batch = utils.sample_trajectories(self.env, eval_policy,
                                                                          self.params['eval_batch_size'],
                                                                          self.params['ep_len'])

        # save eval roll outs as videos in tensor board event file
        if self.log_video and train_video_paths is not None:
            print('\nCollecting video rollouts eval')
            eval_video_paths = utils.sample_n_trajectories(self.env, eval_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

            # save train/eval videos
            print('\nSaving train rollouts as videos...')
            self.logger.log_paths_as_videos(train_video_paths, itr, fps=self.fps, max_videos_to_save=MAX_NVIDEO,
                                            video_title='train_rollouts')
            self.logger.log_paths_as_videos(eval_video_paths, itr, fps=self.fps, max_videos_to_save=MAX_NVIDEO,
                                            video_title='eval_rollouts')

        # save eval metrics
        if self.log_metrics:
            # returns, for logging
            train_returns = [path["reward"].sum() for path in paths]
            eval_returns = [eval_path["reward"].sum() for eval_path in eval_paths]

            # episode lengths, for logging
            train_ep_lens = [len(path["reward"]) for path in paths]
            eval_ep_lens = [len(eval_path["reward"]) for eval_path in eval_paths]

            # decide what to log
            logs = OrderedDict()
            logs["Eval_AverageReturn"] = np.mean(eval_returns)
            logs["Eval_StdReturn"] = np.std(eval_returns)
            logs["Eval_MaxReturn"] = np.max(eval_returns)
            logs["Eval_MinReturn"] = np.min(eval_returns)
            logs["Eval_AverageEpLen"] = np.mean(eval_ep_lens)

            logs["Train_AverageReturn"] = np.mean(train_returns)
            logs["Train_StdReturn"] = np.std(train_returns)
            logs["Train_MaxReturn"] = np.max(train_returns)
            logs["Train_MinReturn"] = np.min(train_returns)
            logs["Train_AverageEpLen"] = np.mean(train_ep_lens)

            logs["Train_EnvstepsSoFar"] = self.total_env_steps
            logs["TimeSinceStart"] = time.time() - self.start_time
            last_log = training_logs[-1]  # Only use the last log for now
            logs.update(last_log)

            if itr == 0:
                self.initial_return = np.mean(train_returns)
            logs["Initial_DataCollection_AverageReturn"] = self.initial_return

            # perform the logging
            for key, value in logs.items():
                print('{} : {}'.format(key, value))
                self.logger.log_scalar(value, key, itr)
            print('Done logging...\n\n')

            self.logger.flush()

예제 #14

0

파일 보기

    def collect_training_trajectories(self, itr, load_initial_expertdata,
                                      collect_policy, batch_size):
        # TODO: get this from hw1
        # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration
        if itr == 0 and load_initial_expertdata is not None:
            with open(load_initial_expertdata, 'rb') as f:
                # the number of training data collected (in the env) during each iteration defaults 1000
                # so the expert data's shape is [1000, x](e.g., observation's x is 111, action's x is 8 ,etc)
                paths = pickle.load(f)
                new_paths = []
                for path in paths:
                    for key in path:
                        path[key] = path[key][:batch_size]
                    new_paths.append(path)
            return new_paths, 0, None
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)
        return paths, envsteps_this_batch, train_video_paths

예제 #15

0

파일 보기

파일: rl_trainer.py 프로젝트: ihooercom/cs285-homework

    def collect_training_trajectories(self,
                                      itr,
                                      initial_expertdata,
                                      collect_policy,
                                      num_transitions_to_sample,
                                      save_expert_data_to_disk=False):
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """
        if itr == 0 and initial_expertdata:
            with open(initial_expertdata, 'rb') as f:
                paths = pickle.load(f)
            return paths, 0, None

        print("\nCollecting data to be used for training...")

        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, num_transitions_to_sample,
                                                               self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.logvideo:
            print('\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #16

0

파일 보기

파일: rl_trainer.py 프로젝트: Max-Fu/homework_fall2020

    def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False):
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """
        if itr == 0:
            if initial_expertdata is not None:
                paths = pickle.load(open(self.params['expert_data'], 'rb'))
                return paths, 0, None
            if save_expert_data_to_disk:
                num_transitions_to_sample = self.params['batch_size_initial']

        # collect data to be used for training
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, num_transitions_to_sample, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        train_video_paths = None
        if self.logvideo:
            print('\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        if save_expert_data_to_disk and itr == 0:
            with open('expert_data_{}.pkl'.format(self.params['env_name']), 'wb') as file:
                pickle.dump(paths, file)

        return paths, envsteps_this_batch, train_video_paths

예제 #17

0

파일 보기

파일: rl_trainer.py 프로젝트: tpvt99/rl_homework_2020

    def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False):
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """
        # TODO collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(env=self.env, policy=collect_policy,
                                                               min_timesteps_per_batch=num_transitions_to_sample,
                                                               max_path_length=self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.logvideo:
            print('\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #18

0

파일 보기

파일: rl_trainer.py 프로젝트: henktillman/homework_fall2020

    def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size):
        """
        :param itr: the current iteration number
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data (bcagent.actor = MLPPolicySL)
        :param batch_size:  the number of transitions we collect
        :return:
            paths: a list of trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths.
                If just loading expert data, we didn't take any environment steps :)
            train_video_paths: paths which also contain videos for visualization purposes
        """

        print("\nCollecting data to be used for training...")
        # If it's the first iteration, just return the expert training data
        if itr == 0 and load_initial_expertdata is not None:
            loaded_paths = pickle.load(open(load_initial_expertdata, 'rb'))
            return loaded_paths, 0, None


        # TODO(DAgger)
        # Otherwise we need to rollout our current policy to collect new observations
        # which we can later relabel using the expert policy.
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print('\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

예제 #19

0

파일 보기

    def collect_training_trajectories(
            self,
            itr,
            load_initial_expertdata,
            collect_policy,
            batch_size,
    ):
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """

        # TODO decide whether to load training data or use the current policy to collect more data √
        # HINT: depending on if it's the first iteration or not, decide whether to either
                # (1) load the data. In this case you can directly return as follows
                # ``` return loaded_paths, 0, None ```

                # (2) collect `self.params['batch_size']` transitions
        if itr == 0:
            file = open(load_initial_expertdata, 'rb')
            loaded_paths = pickle.load(file)
            file.close()
            return loaded_paths, 0, None

        # TODO collect `batch_size` samples to be used for training √
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths = utils.sample_n_trajectories(self.env, collect_policy, batch_size // self.params['ep_len'], self.params['ep_len'])
        envsteps_this_batch = sum(utils.get_pathlength(path) for path in paths)

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print('\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories √
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #20

0

파일 보기

    def collect_training_trajectories(
        self,
        itr,
        load_initial_expertdata,
        collect_policy,
        batch_size,
    ):
        """
    :param itr:
    :param load_initial_expertdata:  path to expert data pkl file
    :param collect_policy:  the current policy using which we collect data
    :param batch_size:  the number of transitions we collect
    :return:
        paths: a list trajectories
        envsteps_this_batch: the sum over the numbers of environment steps in paths
        train_video_paths: paths which also contain videos for visualization purposes
    """

        # TODO done decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
        # (1) load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```

        # (2) collect `self.params['batch_size']` transitions
        if itr == 0:
            # load expert data
            dir_path = os.path.dirname(os.path.realpath(__file__))
            cwd = os.getcwd()
            load_initial_expertdata = os.path.join(cwd, "../../",
                                                   load_initial_expertdata)
            # filename = os.path.join(dir_path, filename)
            with open(load_initial_expertdata, "rb") as f:
                expertdata = pickle.load(f)
            return expertdata, 0, None

        # TODO done collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env,
            collect_policy,
            min_timesteps_per_batch=batch_size,
            max_path_length=self.params['ep_len']
            # render, render_mode ?
        )

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO done look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #21

0

파일 보기

파일: rl_trainer.py 프로젝트: LecJackS/CS_285-Deep_Reinforcement_Learning

    def collect_training_trajectories(
        self,
        itr,
        load_initial_expertdata,
        collect_policy,
        batch_size,
    ):
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """

        # TODO decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
        # (1) load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```

        # (2) collect `self.params['batch_size']` transitions
        if itr == 0:
            # Load pickle (.pkl) of data
            with open(load_initial_expertdata, 'rb') as handle:
                loaded_paths = np.load(handle, allow_pickle=True)

            paths = loaded_paths
            envsteps_this_batch = 0
        else:
            # TODO collect `batch_size` samples to be used for training
            # HINT1: use sample_trajectories from utils
            # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
            print("Collecting data to be used for training...")
            paths, envsteps_this_batch = utils.sample_trajectories(
                self.env,
                policy=collect_policy,
                min_timesteps_per_batch=batch_size,
                max_path_length=self.params['ep_len'],
                render=False,
                render_mode=('rgb_array'))  # TODO

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print('Collecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            # Using self.params['ep_len'] instead of MAX_VIDEO_LEN
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, self.params['ep_len'],
                True)
            print(envsteps_this_batch)
        return paths, envsteps_this_batch, train_video_paths

예제 #22

0

파일 보기

파일: rl_trainer.py 프로젝트: Taekbum/cs285

    def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False):
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """
        
        # TODO: get this from hw1 or hw2

        print("\nCollecting data to be used for training...")
        if itr == 0 and initial_expertdata is not None:
            with open(initial_expertdata, 'rb') as paths_file:
                loaded_paths = pickle.load(paths_file)
            paths, envsteps_this_batch = loaded_paths, 0
        
        else:
            envsteps_this_batch = 0
            paths = []
            while envsteps_this_batch <= num_transitions_to_sample:
            
                paths.extend(utils.sample_n_trajectories(
                    self.env, 
                    collect_policy, 
                    max((num_transitions_to_sample - envsteps_this_batch) // self.params['ep_len'], 1),
                    self.params['ep_len'],
                ))
                envsteps_this_batch = np.sum(path['observation'].shape[0] for path in paths)

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print('\nCollecting train rollouts to be used for saving videos...')
            ## look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #23

0

파일 보기

    def collect_training_trajectories(self, itr, load_initial_expertdata,
                                      collect_policy, batch_size):
        # (TODO) Decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
        # (1) load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```

        # (2) collect `self.params['batch_size']` transitions

        # Collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        if itr == 0 and load_initial_expertdata is not None:
            with open(load_initial_expertdata, 'rb') as paths_file:
                loaded_paths = pickle.load(paths_file)
            paths, envsteps_this_batch = loaded_paths, 0
        else:
            envsteps_this_batch = 0
            paths = []
            while envsteps_this_batch <= batch_size:
                paths.extend(
                    utils.sample_n_trajectories(
                        self.env,
                        collect_policy,
                        max((batch_size - envsteps_this_batch) //
                            self.params['ep_len'], 1),
                        max_path_length=self.params['ep_len'],
                    ))
                envsteps_this_batch = sum(path['observation'].shape[0]
                                          for path in paths)

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #24

0

파일 보기

    def collect_training_trajectories(
        self,
        itr,
        load_initial_expertdata,
        collect_policy,
        batch_size,
    ):
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """

        # TODO decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
        # (1) load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```
        if itr == 0:
            with open(load_initial_expertdata, 'rb') as f:
                # the number of training data collected (in the env) during each iteration defaults 1000
                # so the expert data's shape is [1000, x](e.g., observation's x is 111, action's x is 8 ,etc)
                paths = pickle.load(f)
                new_paths = []
                for path in paths:
                    for key in path:
                        path[key] = path[key][:batch_size]
                    new_paths.append(path)
            return new_paths, 0, None
        # (2) collect `self.params['batch_size']` transitions

        # TODO collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        # paths, envsteps_this_batch = TODO
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #25

0

파일 보기

파일: rl_trainer.py 프로젝트: MinWang1997/Policy-Gradient-Agent

    def collect_training_trajectories(
        self,
        itr,
        load_initial_expertdata,
        collect_policy,
        batch_size,
    ):
        """
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths ?
            train_video_paths: paths which also contain videos for visualization purposes ?
        """
        ## TODO: get this from hw1
        # TODO decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
        # (1) load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```
        # (2) collect `self.params['batch_size']` transitions

        # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration
        if itr == 0 and load_initial_expertdata is not None:
            with open(load_initial_expertdata, 'rb') as f:
                paths = pickle.load(f.read())
            return paths, 0, None

        # (2) collect `self.params['batch_size']` transitions
        # TODO collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        #TODO
        paths, envsteps_this_batch = utils.sample_trajectories(
            env=self.env,
            policy=collect_policy,
            min_timesteps_per_batch=batch_size,
            max_path_length=self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #26

0

파일 보기

파일: rl_trainer.py 프로젝트: henktillman/homework_fall2020

    def collect_training_trajectories(
            self,
            itr,
            load_initial_expertdata,
            collect_policy,
            batch_size,
    ):
        """
        :param itr: the current iteration number
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data (bcagent.actor = MLPPolicySL)
        :param batch_size:  the number of transitions we collect
        :return:
            paths: a list of trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths.
                If just loading expert data, we didn't take any environment steps :)
            train_video_paths: paths which also contain videos for visualization purposes
        """

        # In this section of the code, you are choosing between behavior cloning and DAgger
        # TODO decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
                # (1) load the data. In this case you can directly return as follows
                # ``` return loaded_paths, 0, None ```

                # (2) collect `self.params['batch_size']` transitions

        # TODO collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        # If it's the first iteration, just return the expert training data
        if itr == 0:
            loaded_paths = pickle.load(open(load_initial_expertdata, 'rb'))
            return loaded_paths, 0, None


        # TODO(DAgger)
        # Otherwise we need to rollout our current policy to collect new observations
        # which we can later relabel using the expert policy.
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print('\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #27

0

파일 보기

    def collect_training_trajectories(self,
                                      itr,
                                      initial_expertdata,
                                      collect_policy,
                                      num_transitions_to_sample,
                                      save_expert_data_to_disk=False):
        """
        :param itr:
        :param initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """

        assert not save_expert_data_to_disk

        # TODO decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
        # (1) load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```

        # (2) collect `num_transitions_to_sample` transitions

        if itr == 0 and initial_expertdata is not None:
            with open(initial_expertdata, 'rb') as fd:
                loaded_paths = pickle.load(fd)
            return loaded_paths, 0, None

        # TODO collect `num_transitions_to_sample` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, num_transitions_to_sample,
            self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.logvideo:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #28

0

파일 보기

파일: rl_trainer.py 프로젝트: zhanyuanucb/homework_fall2020

    def collect_training_trajectories(self, itr, collect_policy, batch_size):
        # decide how much training data to collect + which policy to use to collect it
        # collect data to be used for training
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, self.params['batch_size'], self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        train_video_paths = None
        if self.logvideo:
            print('\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)


        return paths, envsteps_this_batch, train_video_paths

예제 #29

0

파일 보기

파일: rl_trainer.py 프로젝트: yuxiliu1995/homework_fall2020

    def collect_training_trajectories(
            self,
            itr,
            initial_expertdata,
            collect_policy,
            batch_size,
    ):
        """
        This function is called only in run_training_loop in this module.
        If itr == 0, it simply loads the trajectories from initial_expertdata
        Otherwise, it returns some new trajectories using collect_policy.

        :param itr: The iteration index. Starts at 0.
        :param initial_expertdata: Path to expert data pkl file.
        :param collect_policy: The current policy using which we collect data.
        :param batch_size: The number of transitions we collect.

        :return:
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        """
        if itr == 0:
            if initial_expertdata:
                pickle_in = open(initial_expertdata, "rb")
                loaded_paths = pickle.load(pickle_in)
                return loaded_paths, 0, None
            else:
                # it's the first iteration, but you aren't loading expert data,
                # collect `self.params['batch_size_initial']`
                batch_size = self.params['batch_size_initial']

        # collect batch_size samples with collect_policy
        # each of these collected rollouts is of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = \
            utils.sample_trajectories(env=self.env,
                                policy=collect_policy,
                                min_timesteps_per_batch=batch_size,
                                max_path_length=self.params['ep_len'])

        # collect more rollouts with collect_policy, to be saved as videos in tensorboard
        # collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN.
        train_video_paths = None
        if self.log_video:
            print('\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths

예제 #30

0

파일 보기

파일: rl_trainer.py 프로젝트: hcost/homework_fall2020

    def collect_training_trajectories(self, itr, load_initial_expertdata,
                                      collect_policy, batch_size):
        if itr == 0 and load_initial_expertdata:
            loaded_data = np.load(load_initial_expertdata, allow_pickle=True)
            return loaded_data, 0, None
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, batch_size, self.params['ep_len'])
        train_video_paths = None
        if self.log_video:
            print(
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths