Example #1
    def collect_training_trajectories(self, itr, load_initial_expertdata,
                                      collect_policy, batch_size):
        # TODO: get this from hw1
        if itr == 0:
            if load_initial_expertdata:
                with open(load_initial_expertdata, 'rb') as f:
                    loaded_paths = pickle.load(f)
                    return loaded_paths, 0, None

                # if it's the first iteration and you aren't loading data, then
                # `self.params['batch_size_initial']` is the number of transitions you want to collect
                batch_size = self.params['batch_size_initial']

        # TODO collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.logvideo:
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
    def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size):
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes

        if (itr == 0) and (load_initial_expertdata is not None):
            with open(load_initial_expertdata, 'rb') as f:
                loaded_paths = pickle.loads(f.read())
            return loaded_paths, 0, None

        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, batch_size, self.params['ep_len'])

        train_video_paths = None
        if self.log_video:
            print('\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
    def collect_training_trajectories(self, itr, load_initial_expertdata,
                                      collect_policy, batch_size):
        # TODO: get this from hw1
        # if your load_initial_expertdata is None,
        # then you need to collect new trajectories at *every* iteration
        if load_initial_expertdata != None:
            with open(load_initial_expertdata, 'rb') as f:
                expert_data = pickle.loads(f.read())
            return expert_data, 0, None

        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
Example #4
    def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size):
        # TODO: get this from hw1
        # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration
        if itr == 0 and load_initial_expertdata != None: 
            import pickle

            with open(load_initial_expertdata, 'rb') as f:
                loaded_paths = pickle.load(f)
            return loaded_paths, 0, None

        # done TODO collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")

        # FOR BATCH SIZE? 

        print('batch size', batch_size)
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, self.agent.actor, 
        batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print('\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)
        return paths, envsteps_this_batch, train_video_paths
Example #5
    def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False):
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        # get this from Piazza

        # if (itr == 0) and (load_initial_expertdata is not None):
        #     with open(load_initial_expertdata, 'rb') as f:
        #         loaded_paths = pickle.loads(f.read())

        #     return loaded_paths, 0, None

        #  collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, num_transitions_to_sample, self.params['ep_len'], True, "human")
        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.logvideo:
            print('\nCollecting train rollouts to be used for saving videos...')
            ##  look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True, "human")

        return paths, envsteps_this_batch, train_video_paths
Example #6
    def run(self):
        print("开始线程:" + self.name)
        self.paths, self.envsteps_this_batch = utils.sample_trajectories(
            self.env, self.policy, self.min_timesteps_per_batch,

        print("退出线程:" + self.name)
    def collect_training_trajectories(self,
        if itr == 0:
            if initial_expertdata is not None:
                paths = pickle.load(open(self.params['expert_data'], 'rb'))
                return paths, 0, None
            if save_expert_data_to_disk:
                num_transitions_to_sample = self.params['batch_size_initial']

        # collect data to be used for training
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, num_transitions_to_sample,

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        train_video_paths = None
        if self.logvideo:
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        if save_expert_data_to_disk and itr == 0:
            with open('expert_data_{}.pkl'.format(self.params['env_name']),
                      'wb') as file:
                pickle.dump(paths, file)

        return paths, envsteps_this_batch, train_video_paths
Example #8
    def perform_logging(self, itr, paths, eval_policy, train_video_paths, training_logs):

        # collect eval trajectories, for logging
        print("\nCollecting data for eval...")
        eval_paths, eval_env_steps_this_batch = utils.sample_trajectories(self.env, eval_policy,

        # save eval roll outs as videos in tensor board event file
        if self.log_video and train_video_paths is not None:
            print('\nCollecting video rollouts eval')
            eval_video_paths = utils.sample_n_trajectories(self.env, eval_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

            # save train/eval videos
            print('\nSaving train rollouts as videos...')
            self.logger.log_paths_as_videos(train_video_paths, itr, fps=self.fps, max_videos_to_save=MAX_NVIDEO,
            self.logger.log_paths_as_videos(eval_video_paths, itr, fps=self.fps, max_videos_to_save=MAX_NVIDEO,

        # save eval metrics
        if self.log_metrics:
            # returns, for logging
            train_returns = [path["reward"].sum() for path in paths]
            eval_returns = [eval_path["reward"].sum() for eval_path in eval_paths]

            # episode lengths, for logging
            train_ep_lens = [len(path["reward"]) for path in paths]
            eval_ep_lens = [len(eval_path["reward"]) for eval_path in eval_paths]

            # decide what to log
            logs = OrderedDict()
            logs["Eval_AverageReturn"] = np.mean(eval_returns)
            logs["Eval_StdReturn"] = np.std(eval_returns)
            logs["Eval_MaxReturn"] = np.max(eval_returns)
            logs["Eval_MinReturn"] = np.min(eval_returns)
            logs["Eval_AverageEpLen"] = np.mean(eval_ep_lens)

            logs["Train_AverageReturn"] = np.mean(train_returns)
            logs["Train_StdReturn"] = np.std(train_returns)
            logs["Train_MaxReturn"] = np.max(train_returns)
            logs["Train_MinReturn"] = np.min(train_returns)
            logs["Train_AverageEpLen"] = np.mean(train_ep_lens)

            logs["Train_EnvstepsSoFar"] = self.total_env_steps
            logs["TimeSinceStart"] = time.time() - self.start_time
            last_log = training_logs[-1]  # Only use the last log for now

            if itr == 0:
                self.initial_return = np.mean(train_returns)
            logs["Initial_DataCollection_AverageReturn"] = self.initial_return

            # perform the logging
            for key, value in logs.items():
                print('{} : {}'.format(key, value))
                self.logger.log_scalar(value, key, itr)
            print('Done logging...\n\n')

Example #9
    def collect_training_trajectories(self,
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        if itr == 0 and initial_expertdata:
            with open(initial_expertdata, 'rb') as f:
                paths = pickle.load(f)
            return paths, 0, None

        print("\nCollecting data to be used for training...")

        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, num_transitions_to_sample,

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.logvideo:
            print('\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
    def collect_training_trajectories(self,
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        # TODO: get this from hw1 or hw2
        if initial_expertdata is not None and itr == 0:
            import pickle
            with open(initial_expertdata, "rb") as handle:
                loaded_paths = pickle.load(handle)
            return loaded_paths, 0, None

        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, num_transitions_to_sample,

        train_video_paths = None
        if self.logvideo:
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)
        return paths, envsteps_this_batch, train_video_paths
Example #11
    def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False):
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        if itr == 0:
            if initial_expertdata is not None:
                paths = pickle.load(open(self.params['expert_data'], 'rb'))
                return paths, 0, None
            if save_expert_data_to_disk:
                num_transitions_to_sample = self.params['batch_size_initial']

        # collect data to be used for training
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, num_transitions_to_sample, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        train_video_paths = None
        if self.logvideo:
            print('\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        if save_expert_data_to_disk and itr == 0:
            with open('expert_data_{}.pkl'.format(self.params['env_name']), 'wb') as file:
                pickle.dump(paths, file)

        return paths, envsteps_this_batch, train_video_paths
    def collect_training_trajectories(self, itr, load_initial_expertdata, collect_policy, batch_size):
        :param itr: the current iteration number
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data (bcagent.actor = MLPPolicySL)
        :param batch_size:  the number of transitions we collect
            paths: a list of trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths.
                If just loading expert data, we didn't take any environment steps :)
            train_video_paths: paths which also contain videos for visualization purposes

        print("\nCollecting data to be used for training...")
        # If it's the first iteration, just return the expert training data
        if itr == 0 and load_initial_expertdata is not None:
            loaded_paths = pickle.load(open(load_initial_expertdata, 'rb'))
            return loaded_paths, 0, None

        # TODO(DAgger)
        # Otherwise we need to rollout our current policy to collect new observations
        # which we can later relabel using the expert policy.
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print('\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)
Example #13
    def collect_training_trajectories(self, itr, load_initial_expertdata,
                                      collect_policy, batch_size):
        # TODO: get this from hw1
        # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration
        if itr == 0 and load_initial_expertdata is not None:
            with open(load_initial_expertdata, 'rb') as f:
                # the number of training data collected (in the env) during each iteration defaults 1000
                # so the expert data's shape is [1000, x](e.g., observation's x is 111, action's x is 8 ,etc)
                paths = pickle.load(f)
                new_paths = []
                for path in paths:
                    for key in path:
                        path[key] = path[key][:batch_size]
            return new_paths, 0, None
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)
        return paths, envsteps_this_batch, train_video_paths
Example #14
    def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False):
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        # TODO collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(env=self.env, policy=collect_policy,

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.logvideo:
            print('\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
Example #15
    def collect_training_trajectories(self, itr, initial_expertdata, collect_policy, num_transitions_to_sample, save_expert_data_to_disk=False):
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        # TODO: get this from Piazza √
        if itr == 0 and initial_expertdata is not None:
            file = open(initial_expertdata, 'rb')
            loaded_paths = pickle.load(file)
            return loaded_paths, 0, None

        envsteps_this_batch = 0
        paths = []
        while envsteps_this_batch <= num_transitions_to_sample:
            paths_this_batch, timesteps_this_batch = utils.sample_trajectories(self.env, collect_policy,
                                                                               max((num_transitions_to_sample - envsteps_this_batch) // self.params['ep_len'], 1), self.params['ep_len'])
            envsteps_this_batch += timesteps_this_batch

        train_video_paths = None
        if save_expert_data_to_disk:
            print('\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
Example #16
    def collect_training_trajectories(self, itr, load_initial_expertdata,
                                      collect_policy, batch_size):
            :param itr:
            :param load_initial_expertdata:  path to expert data pkl file
            :param collect_policy:  the current policy using which we collect data
            :param batch_size:  the number of transitions we collect
                paths: a list trajectories
                envsteps_this_batch: the sum over the numbers of environment steps in paths
                train_video_paths: paths which also contain videos for visualization purposes
        # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration
        if itr == 0 and load_initial_expertdata is not None:
            # load expert data
            with open(load_initial_expertdata, "rb") as f:
                expertdata = pickle.load(f)
            return expertdata, 0, None

        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            # render, render_mode ?
        train_video_paths = None
        if self.log_video:
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
Example #17
    def collect_training_trajectories(
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes

        # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration

        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)
        return paths, envsteps_this_batch, train_video_paths
Example #18
    def collect_training_trajectories(
    :param itr:
    :param load_initial_expertdata:  path to expert data pkl file
    :param collect_policy:  the current policy using which we collect data
    :param batch_size:  the number of transitions we collect
        paths: a list trajectories
        envsteps_this_batch: the sum over the numbers of environment steps in paths
        train_video_paths: paths which also contain videos for visualization purposes

        # TODO done decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
        # (1) load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```

        # (2) collect `self.params['batch_size']` transitions
        if itr == 0:
            # load expert data
            dir_path = os.path.dirname(os.path.realpath(__file__))
            cwd = os.getcwd()
            load_initial_expertdata = os.path.join(cwd, "../../",
            # filename = os.path.join(dir_path, filename)
            with open(load_initial_expertdata, "rb") as f:
                expertdata = pickle.load(f)
            return expertdata, 0, None

        # TODO done collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            # render, render_mode ?

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO done look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
    def collect_training_trajectories(
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes

        # TODO decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
        # (1) load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```

        # (2) collect `self.params['batch_size']` transitions
        if itr == 0:
            # Load pickle (.pkl) of data
            with open(load_initial_expertdata, 'rb') as handle:
                loaded_paths = np.load(handle, allow_pickle=True)

            paths = loaded_paths
            envsteps_this_batch = 0
            # TODO collect `batch_size` samples to be used for training
            # HINT1: use sample_trajectories from utils
            # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
            print("Collecting data to be used for training...")
            paths, envsteps_this_batch = utils.sample_trajectories(
                render_mode=('rgb_array'))  # TODO

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print('Collecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            # Using self.params['ep_len'] instead of MAX_VIDEO_LEN
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, self.params['ep_len'],
        return paths, envsteps_this_batch, train_video_paths
Example #20
    def perform_dqn_logging(self, all_logs):
        last_log = all_logs[-1]

        episode_rewards = get_wrapper_by_name(self.env,
        print("0: ", len(episode_rewards))  #added
        if len(episode_rewards) > 0:
            self.mean_episode_reward = np.mean(episode_rewards[-100:])
            print("1: ", self.mean_episode_reward)  # added
        if len(episode_rewards) > 100:
            self.best_mean_episode_reward = max(self.best_mean_episode_reward,
            print("2: ", self.mean_episode_reward)  # added

        logs = OrderedDict()

        logs["Train_EnvstepsSoFar"] = self.agent.t
        print("Timestep %d" % (self.agent.t, ))
        if self.mean_episode_reward > -5000:
            logs["Train_AverageReturn"] = np.mean(self.mean_episode_reward)
        print("mean reward (100 episodes) %f" % self.mean_episode_reward)
        if self.best_mean_episode_reward > -5000:
            logs["Train_BestReturn"] = np.mean(self.best_mean_episode_reward)
        print("best mean reward %f" % self.best_mean_episode_reward)

        if self.start_time is not None:
            time_since_start = (time.time() - self.start_time)
            print("running time %f" % time_since_start)
            logs["TimeSinceStart"] = time_since_start


        eval_paths, eval_envsteps_this_batch = utils.sample_trajectories(
            self.eval_env, self.agent.eval_policy,
            self.params['eval_batch_size'], self.params['ep_len'])

        eval_returns = [eval_path["reward"].sum() for eval_path in eval_paths]
        eval_ep_lens = [len(eval_path["reward"]) for eval_path in eval_paths]

        logs["Eval_AverageReturn"] = np.mean(eval_returns)
        logs["Eval_StdReturn"] = np.std(eval_returns)
        logs["Eval_MaxReturn"] = np.max(eval_returns)
        logs["Eval_MinReturn"] = np.min(eval_returns)
        logs["Eval_AverageEpLen"] = np.mean(eval_ep_lens)

        logs['Buffer size'] = self.agent.replay_buffer.num_in_buffer


        for key, value in logs.items():
            print('{} : {}'.format(key, value))
            self.logger.log_scalar(value, key, self.agent.t)
        print('Done logging...\n\n')

Example #21
    def collect_training_trajectories(
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes

        # TODO decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
        # (1) load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```
        if itr == 0:
            with open(load_initial_expertdata, 'rb') as f:
                # the number of training data collected (in the env) during each iteration defaults 1000
                # so the expert data's shape is [1000, x](e.g., observation's x is 111, action's x is 8 ,etc)
                paths = pickle.load(f)
                new_paths = []
                for path in paths:
                    for key in path:
                        path[key] = path[key][:batch_size]
            return new_paths, 0, None
        # (2) collect `self.params['batch_size']` transitions

        # TODO collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        # paths, envsteps_this_batch = TODO
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
    def collect_training_trajectories(
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths ?
            train_video_paths: paths which also contain videos for visualization purposes ?
        ## TODO: get this from hw1
        # TODO decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
        # (1) load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```
        # (2) collect `self.params['batch_size']` transitions

        # if your load_initial_expertdata is None, then you need to collect new trajectories at *every* iteration
        if itr == 0 and load_initial_expertdata is not None:
            with open(load_initial_expertdata, 'rb') as f:
                paths = pickle.load(f.read())
            return paths, 0, None

        # (2) collect `self.params['batch_size']` transitions
        # TODO collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
Example #23
    def collect_training_trajectories(
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        if itr == 0:
            if load_initial_expertdata:
                with open(load_initial_expertdata, 'rb') as f:
                    data = pickle.loads(f.read())
                return data, 0, None

        # TODO decide whether to load training data or use the current policy to collect more data (DAgger?)
        # HINT: depending on if it's the first iteration or not, decide whether to either
        # (1) load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```

        # (2) collect `self.params['batch_size']` transitions

        # TODO collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = [], 0
        for s in range(batch_size):
            path, timesteps_this_batch = sample_trajectories(
                self.env, collect_policy, self.params['ep_len'],
            envsteps_this_batch += timesteps_this_batch
        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
    def collect_training_trajectories(
        :param itr: the current iteration number
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data (bcagent.actor = MLPPolicySL)
        :param batch_size:  the number of transitions we collect
            paths: a list of trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths.
                If just loading expert data, we didn't take any environment steps :)
            train_video_paths: paths which also contain videos for visualization purposes

        # In this section of the code, you are choosing between behavior cloning and DAgger
        # TODO decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
                # (1) load the data. In this case you can directly return as follows
                # ``` return loaded_paths, 0, None ```

                # (2) collect `self.params['batch_size']` transitions

        # TODO collect `batch_size` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        # If it's the first iteration, just return the expert training data
        if itr == 0:
            loaded_paths = pickle.load(open(load_initial_expertdata, 'rb'))
            return loaded_paths, 0, None

        # TODO(DAgger)
        # Otherwise we need to rollout our current policy to collect new observations
        # which we can later relabel using the expert policy.
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, batch_size, self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
            print('\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
Example #25
    def collect_training_trajectories(self,
        :param itr:
        :param initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param num_transitions_to_sample:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes

        assert not save_expert_data_to_disk

        # TODO decide whether to load training data or use the current policy to collect more data
        # HINT: depending on if it's the first iteration or not, decide whether to either
        # (1) load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```

        # (2) collect `num_transitions_to_sample` transitions

        if itr == 0 and initial_expertdata is not None:
            with open(initial_expertdata, 'rb') as fd:
                loaded_paths = pickle.load(fd)
            return loaded_paths, 0, None

        # TODO collect `num_transitions_to_sample` samples to be used for training
        # HINT1: use sample_trajectories from utils
        # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, num_transitions_to_sample,

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.logvideo:
                '\nCollecting train rollouts to be used for saving videos...')
            ## TODO look in utils and implement sample_n_trajectories
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
    def collect_training_trajectories(self, itr, collect_policy, batch_size):
        # decide how much training data to collect + which policy to use to collect it
        # collect data to be used for training
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(self.env, collect_policy, self.params['batch_size'], self.params['ep_len'])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        train_video_paths = None
        if self.logvideo:
            print('\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
    def collect_training_trajectories(
        This function is called only in run_training_loop in this module.
        If itr == 0, it simply loads the trajectories from initial_expertdata
        Otherwise, it returns some new trajectories using collect_policy.

        :param itr: The iteration index. Starts at 0.
        :param initial_expertdata: Path to expert data pkl file.
        :param collect_policy: The current policy using which we collect data.
        :param batch_size: The number of transitions we collect.

            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes
        if itr == 0:
            if initial_expertdata:
                pickle_in = open(initial_expertdata, "rb")
                loaded_paths = pickle.load(pickle_in)
                return loaded_paths, 0, None
                # it's the first iteration, but you aren't loading expert data,
                # collect `self.params['batch_size_initial']`
                batch_size = self.params['batch_size_initial']

        # collect batch_size samples with collect_policy
        # each of these collected rollouts is of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = \

        # collect more rollouts with collect_policy, to be saved as videos in tensorboard
        # collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN.
        train_video_paths = None
        if self.log_video:
            print('\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
Example #28
    def collect_training_trajectories(self, itr, load_initial_expertdata,
                                      collect_policy, batch_size):
        if itr == 0 and load_initial_expertdata:
            loaded_data = np.load(load_initial_expertdata, allow_pickle=True)
            return loaded_data, 0, None
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, batch_size, self.params['ep_len'])
        train_video_paths = None
        if self.log_video:
                '\nCollecting train rollouts to be used for saving videos...')
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
    def collect_training_trajectories(
        :param itr:
        :param load_initial_expertdata:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes

        # decide whether to load training data or use the current policy to collect more data
        # depending on if it's the first iteration or not, decide whether to either
        # (1) load the data
        if itr == 0 and load_initial_expertdata is not None:
            with open(load_initial_expertdata, "rb") as fin:
                loaded_paths = pickle.load(fin)
            return loaded_paths, 0, None

        # (2) collect `self.params['batch_size']` transitions

        # collect `batch_size` samples to be used for training
        # use sample_trajectories from utils
        # you want each of these collected rollouts to be of length self.params['ep_len']
        print("\nCollecting data to be used for training...")
        paths, envsteps_this_batch = utils.sample_trajectories(
            self.env, collect_policy, batch_size, self.params["ep_len"])

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if self.log_video:
                "\nCollecting train rollouts to be used for saving videos...")
            train_video_paths = utils.sample_n_trajectories(
                self.env, collect_policy, MAX_NVIDEO, MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths
    def collect_training_trajectories(self, itr, initial_expertdata_path,
                                      collect_policy, batch_size, log_video):
        :param itr:
        :param initial_expertdata_path:  path to expert data pkl file
        :param collect_policy:  the current policy using which we collect data
        :param batch_size:  the number of transitions we collect
        :param log_video:  whether to sample a set of trajectories to be logged as videos
            paths: a list trajectories
            envsteps_this_batch: the sum over the numbers of environment steps in paths
            train_video_paths: paths which also contain videos for visualization purposes

        # HINT: depending on if it's the first iteration or not,
        # decide whether to either
        # load the data. In this case you can directly return as follows
        # ``` return loaded_paths, 0, None ```
        # collect data, batch_size is the number of transitions you want to collect.
        if itr == 0 and initial_expertdata_path is not None:
            with open(initial_expertdata_path, 'rb') as fin:
                paths, envsteps_this_batch = pickle.load(fin), 0
            # TODO collect data to be used for training
            # HINT1: use sample_trajectories from utils
            # HINT2: you want each of these collected rollouts to be of length self.params['ep_len']
            print("\nCollecting data to be used for training...")
            paths, envsteps_this_batch = sample_trajectories(

        # collect more rollouts with the same policy, to be saved as videos in tensorboard
        # note: here, we collect MAX_NVIDEO rollouts, each of length MAX_VIDEO_LEN
        train_video_paths = None
        if log_video:
                '\nCollecting train rollouts to be used for saving videos...')
            # TODO look in utils and implement sample_n_trajectories
            train_video_paths = sample_n_trajectories(self.env, collect_policy,
                                                      MAX_VIDEO_LEN, True)

        return paths, envsteps_this_batch, train_video_paths