Example #1
0
    def add_rollouts(self, rlist, max_to_add=None):
        """
        rlist can contain pkl filenames, or dictionaries
        """
        step = sum([len(replay_pool) for replay_pool in self._replay_pools])
        replay_pools = itertools.cycle(self._replay_pools)
        done_adding = False

        for rlist_entry in rlist:
            if type(rlist_entry) is str:
                rollouts = mypickle.load(rlist_entry)['rollouts']
            elif issubclass(type(rlist_entry), dict):
                rollouts = [rlist_entry]
            else:
                raise NotImplementedError

            for rollout, replay_pool in zip(rollouts, replay_pools):
                r_len = len(rollout['dones'])
                if max_to_add is not None and step + r_len >= max_to_add:
                    diff = max_to_add - step
                    for k in ('observations', 'actions', 'rewards', 'dones'):
                        rollout[k] = rollout[k][:diff]
                    done_adding = True
                    r_len = len(rollout['dones'])

                replay_pool.store_rollout(step, rollout)
                step += r_len

                if done_adding:
                    break

            if done_adding:
                break
Example #2
0
    def _write_tfrecord(pkl_fname, tfrecord_fname, obs_shape):
        writer = tf.python_io.TFRecordWriter(tfrecord_fname)

        rollouts = mypickle.load(pkl_fname)['rollouts']
        for r in rollouts:
            images = r['env_infos']['rgb']
            semantics = r['env_infos']['semantic']

            for image, semantic in zip(images, semantics):
                rightlane_mask = CarlaCollSpeedRoadEnv.get_rightlane(
                    semantic)[0].astype(np.uint8)

                image = utils.imresize(image, obs_shape)
                rightlane_mask = utils.imresize(rightlane_mask[..., None],
                                                obs_shape[:2] + [1],
                                                PIL.Image.BILINEAR)[..., -1]

                example = tf.train.Example(features=tf.train.Features(
                    feature={
                        'image':
                        RoadLabeller._bytes_feature(image.tostring()),
                        'label':
                        RoadLabeller._bytes_feature(rightlane_mask.tostring()),
                    }))
                writer.write(example.SerializeToString())

        writer.close()
Example #3
0
 def _load_rollouts(self, itr):
     fname = self._rollouts_file_name(itr)
     if os.path.exists(fname):
         rollouts = mypickle.load(fname)['rollouts']
     else:
         rollouts = []
     return rollouts
Example #4
0
    def add_rollouts(self, rollout_filenames, max_to_add=None):
        step = sum([len(replay_pool) for replay_pool in self._replay_pools])
        itr = 0
        replay_pools = itertools.cycle(self._replay_pools)
        done_adding = False

        for fname in rollout_filenames:
            rollouts = mypickle.load(fname)['rollouts']
            itr += 1

            for rollout, replay_pool in zip(rollouts, replay_pools):
                r_len = len(rollout['dones'])
                if max_to_add is not None and step + r_len >= max_to_add:
                    diff = max_to_add - step
                    for k in ('observations', 'actions', 'rewards', 'dones', 'logprobs'):
                        rollout[k] = rollout[k][:diff]
                    done_adding = True
                    r_len = len(rollout['dones'])

                replay_pool.store_rollout(step, rollout)
                step += r_len

                if done_adding:
                    break

            if done_adding:
                break
Example #5
0
    def _convert_rollout(self, i, pkl_fname, pkl_suffix):
        rollouts = mypickle.load(pkl_fname)['rollouts']

        tfrecord_fname = os.path.join(
            self._output_folder, '{0:05d}_{1}.tfrecord'.format(i, pkl_suffix))

        writer = tf.python_io.TFRecordWriter(tfrecord_fname)

        for r in rollouts:

            ### modify the rollout (in case the env is different)!
            r = self._create_rollout(r, self._labeller)

            for k in ('observations_im', 'observations_vec', 'actions',
                      'dones', 'steps', 'goals'):
                assert k in r.keys(), '{0} not in rollout!'.format(k)

            ex = tf.train.SequenceExample()
            for k, np_dtype in zip(
                    GCGPolicyTfrecord.tfrecord_feature_names,
                    GCGPolicyTfrecord.tfrecord_feature_np_types):
                fl = ex.feature_lists.feature_list[k]
                for feature in r[k]:
                    fl.feature.add().bytes_list.value.append(
                        feature.astype(np_dtype).tostring())

            writer.write(ex.SerializeToString())

        writer.close()
Example #6
0
def plot_rw_rccar_var001_var016():
    label_params = [
        # ['exp', ('exp_name',)],
        ['policy', ('policy', 'GCGPolicy', 'outputs', 0, 'name')],
        ['H', ('policy', 'H')],
        ['target', ('policy', 'use_target')],
        ['obs_shape', ('alg', 'env', 'params', 'obs_shape')]
    ]

    experiment_groups = [
        ExperimentGroup(os.path.join(DATA_DIR, 'rw_rccar/var{0:03d}'.format(num)),
                        label_params=label_params,
                        plot={
                        })
    for num in [1, 5, 9, 12, 13]]

    mec = MultiExperimentComparison(experiment_groups)

    lengths_list = []
    for exp in mec.list:
        eval_folder = os.path.join(exp.folder, 'eval_itr_0039')
        eval_pkl_fname = os.path.join(eval_folder, 'itr_0039_eval_rollouts.pkl')
        rollouts = mypickle.load(eval_pkl_fname)['rollouts']

        assert (len(rollouts) == 24)

        lengths = [len(r['dones']) for r in rollouts]
        lengths_list.append(lengths)

    f, ax = plt.subplots(1, 1)
    xs = np.vstack((np.r_[0:8.] + 0.,
                    np.r_[0:8.] + 0.1,
                    np.r_[0:8.] + 0.2,)).T.ravel()
    legend_patches = []
    for i, (exp, lengths) in enumerate(zip(mec.list, lengths_list)):
        lengths = np.reshape(lengths, (8, 3))
        width = 0.6 / float(len(lengths_list))
        color = cm.viridis(i / float(len(lengths_list)))
        label = 'median: {0}, {1}'.format(np.median(lengths), exp.plot['label'])

        bp = ax.boxplot(lengths.T, positions=np.arange(len(lengths)) + 1.2 * i * width, widths=width, patch_artist=True)
        for patch in bp['boxes']:
            patch.set_facecolor(color)
        legend_patches.append(mpatches.Patch(color=color, label=label))
        # ax.plot(xs, lengths, label=exp.plot['label'], linestyle='None', marker='o')
    ax.legend(handles=legend_patches)
    ax.xaxis.set_ticks(np.arange(8))
    ax.xaxis.set_ticklabels(np.arange(8))
    ax.set_xlim((-0.5, 8.5))
    ax.set_xlabel('Start Position Number')
    ax.set_ylabel('Timesteps survived')
    plt.show()

    import IPython; IPython.embed()
Example #7
0
 def _get_itr_rollouts(self, itr, testing=False):
     if testing:
         fname = "itr_{0:04d}_eval_rollouts.pkl".format(itr)
     else:
         fname = "itr_{0:04d}_train_rollouts.pkl".format(itr)
     path = os.path.join(self._data_dir, fname)
     if os.path.exists(path):
         rollouts_dict = mypickle.load(path)
         rollouts = rollouts_dict['rollouts']
         return rollouts
     else:
         return None
Example #8
0
    def _load_rollouts(self, file_func):
        rollouts_itrs = []
        itr = 0
        while os.path.exists(file_func(itr)):
            rollouts = mypickle.load(file_func(itr))['rollouts']
            if self._clear_obs:
                for r in rollouts:
                    r['observations'] = None
            rollouts_itrs.append(rollouts)
            itr += 1

        return rollouts_itrs
Example #9
0
 def _get_rollouts(self):
     data = mypickle.load(self._data_path)
     rollouts = []
     goals = []
     for traj in data['rollouts']:
         pos_hprs = []
         traj_goals = []
         for step in traj['env_infos'][:-1]:
             pos_hprs.append((step['pos'], step['hpr']))
             traj_goals.append(step['goal_h'])
         rollouts.append(pos_hprs)
         goals.append(traj_goals)
     return rollouts, goals
Example #10
0
    def _get_inference_step(self):
        """ Returns number of steps taken the environment """
        inference_itr = 0
        inference_step = 0
        while True:
            fname = self._train_rollouts_file_name(inference_itr)
            if not os.path.exists(fname):
                break

            rollouts = mypickle.load(fname)['rollouts']

            inference_itr += 1
            inference_step += sum([len(r['dones']) for r in rollouts])

        return inference_step
Example #11
0
    def _load_data(self, folder):
        """
        Loads all .pkl files that can be found recursively from this folder
        """
        assert (os.path.exists(folder))

        rollouts = []
        num_load_success, num_load_fail = 0, 0
        for fname in glob.iglob('{0}/**/*.pkl'.format(folder), recursive=True):
            try:
                rollouts += mypickle.load(fname)['rollouts']
                num_load_success += 1
            except:
                num_load_fail += 1
        logger.info('Files successfully loaded: {0:.2f}%'.format(
            100. * num_load_success / float(num_load_success + num_load_fail)))

        num_bootstraps = self.num_bootstraps
        if num_bootstraps is not None:
            logger.info('Creating {0} bootstraps'.format(num_bootstraps))
            ReplayPoolClass = lambda **kwargs: BootstrapReplayPool(
                num_bootstraps, **kwargs)
        else:
            ReplayPoolClass = ReplayPool

        replay_pool = ReplayPoolClass(
            env_spec=self._env.spec,
            env_horizon=self._env.horizon,
            N=self._model.N,
            gamma=self._model.gamma,
            size=int(1.1 * sum([len(r['dones']) for r in rollouts])),
            obs_history_len=self._model.obs_history_len,
            sampling_method='uniform',
            save_rollouts=False,
            save_rollouts_observations=False,
            save_env_infos=True,
            replay_pool_params={})

        replay_pool.store_rollouts(0, rollouts)

        return replay_pool
Example #12
0
    def store_rollouts(self, rlist, max_to_add=None):
        """
        rlist can be a list of pkl filenames, or rollout dictionaries
        """
        step = len(self)

        for rlist_entry in rlist:
            if type(rlist_entry) is str:
                rollouts = mypickle.load(rlist_entry)['rollouts']
            elif issubclass(type(rlist_entry), dict):
                rollouts = [rlist_entry]
            else:
                raise NotImplementedError

            for rollout in rollouts:
                r_len = len(rollout['dones'])
                if max_to_add is not None and step + r_len >= max_to_add:
                    return

                self._store_rollout(step, rollout)
                step += r_len
Example #13
0
def extract_images_from_pkls(pkl_folder, save_folder, maxsaved, image_shape,
                             rescale, bordersize):
    """
    :param pkl_folder: folder containing pkls with training images
    :param save_folder: where to save the resulting images
    :param maxsaved: how many images to save
    :param image_shape: shape of the image
    :param rescale: make rescale times bigger, for ease of labelling
    :param bordersize: how much to pad with 0s, for ease of labelling
    """
    random.seed(0)

    fnames = glob.glob(os.path.join(pkl_folder, '*.pkl'))
    random.shuffle(fnames)
    logger.info('{0} files to read'.format(len(fnames)))
    fnames = itertools.cycle(fnames)

    im_num = 0
    while im_num < maxsaved:
        fname = next(fnames)
        rollout = random.choice(mypickle.load(fname)['rollouts'])
        obs = random.choice(rollout['observations_im'])

        height, width, channels = image_shape

        im = np.reshape(obs, image_shape)
        im = utils.imresize(im, (rescale * height, rescale * width, channels))
        im = np.pad(im, ((bordersize, bordersize), (bordersize, bordersize),
                         (0, 0)), 'constant')
        if im.shape[-1] == 1:
            im = im[:, :, 0]
        Image.fromarray(im).save(
            os.path.join(save_folder, 'image_{0:06d}.jpg'.format(im_num)))
        im_num += 1

    logger.info('Saved {0} images'.format(im_num))
Example #14
0
    def __init__(self, env, steps, save_file, H, K):
        self._env = env
        self._steps = steps
        self._save_file = save_file
        self._H = H
        self._K = K

        # get starting positions
        grd = GatherRandomData(env, int(100*steps), '/tmp/tmp.pkl')
        grd.run()

        rollouts = mypickle.load('/tmp/tmp.pkl')['rollouts']
        self._start_poses = []
        while len(self._start_poses) < steps:
            rollout = random.choice(rollouts)
            env_info = random.choice(rollout['env_infos'])
            if 'pos' not in env_info:
                env_info = rollout['env_infos'][0]
            self._start_poses.append((env_info['pos'], env_info['hpr']))

        self._replay_pool = ReplayPool(self._env.spec,
                                       self._env.horizon,
                                       1,
                                       1,
                                       int(1e5),
                                       obs_history_len=1,
                                       sampling_method='uniform',
                                       save_rollouts=True,
                                       save_rollouts_observations=True,
                                       save_env_infos=True,
                                       replay_pool_params={})

        self._action_sequences = []
        for _ in range(self._K):
            self._action_sequences.append([self._env.action_space.sample() for _ in range(self._H)])
        self._action_sequences = np.array(self._action_sequences)
Example #15
0
    def _eval_pred_all(self, eval_on_holdout):
        pkl_file_name = self._eval_train_rollouts_file_name if not eval_on_holdout else self._eval_holdout_rollouts_file_name

        if os.path.exists(pkl_file_name):
            logger.info('Load evaluation rollouts for {0}'.format(
                'holdout' if eval_on_holdout else 'train'))
            d = mypickle.load(pkl_file_name)
        else:
            logger.info('Evaluating model on {0}'.format(
                'holdout' if eval_on_holdout else 'train'))

            replay_pool = self._replay_holdout_pool if eval_on_holdout else self._replay_pool

            # get collision idx in obs_vec
            vec_spec = self._env.observation_vec_spec
            obs_vec_start_idxs = np.cumsum(
                [space.flat_dim for space in vec_spec.values()]) - 1
            coll_idx = obs_vec_start_idxs[list(vec_spec.keys()).index('coll')]

            # model will be evaluated on 1e3 inputs at a time (accounting for bnn samples)
            batch_size = 1000 // self._num_bnn_samples
            assert (batch_size > 1)
            rp_gen = replay_pool.sample_all_generator(batch_size=batch_size,
                                                      include_env_infos=True)

            # keep everything in dict d
            d = defaultdict(list)
            for steps, (observations_im, observations_vec
                        ), actions, rewards, dones, env_infos in rp_gen:
                observations = (
                    observations_im[:, :self._model.obs_history_len, :],
                    observations_vec[:, :self._model.obs_history_len, :])
                coll_labels = (np.cumsum(
                    observations_vec[:, self._model.obs_history_len:,
                                     coll_idx],
                    axis=1) >= 1.).astype(float)

                observations_repeat = (np.repeat(observations[0],
                                                 self._num_bnn_samples,
                                                 axis=0),
                                       np.repeat(observations[1],
                                                 self._num_bnn_samples,
                                                 axis=0))
                actions_repeat = np.repeat(actions,
                                           self._num_bnn_samples,
                                           axis=0)

                yhats, bhats = self._model.get_model_outputs(
                    observations_repeat, actions_repeat)
                coll_preds = np.reshape(
                    yhats['coll'], (len(steps), self._num_bnn_samples, -1))

                d['coll_labels'].append(coll_labels)
                d['coll_preds'].append(coll_preds)
                d['env_infos'].append(env_infos)
                d['dones'].append(dones)
                # Note: you can save more things (e.g. actions) if you want to do something with them later

            for k, v in d.items():
                d[k] = np.concatenate(v)

            mypickle.dump(d, pkl_file_name)

        return d
Example #16
0
    def plot_online_switching_std(self,
                                  H_avert=4,
                                  H_takeover=4,
                                  H_signal=4,
                                  save_dir=None):
        import matplotlib.pyplot as plt

        assert (save_dir is not None)
        pkl_fname = os.path.join(
            save_dir, 'plot_online_switching_std_Ha{0}_Ht{1}_Hs{2}.pkl'.format(
                H_avert, H_takeover, H_signal))

        if not os.path.exists(pkl_fname):
            stds = np.linspace(0, 0.3, 1000)
            stds_used, frac_autonomous_list, frac_crashes_averted_list = [], [], []
            for std in stds:
                frac_autonomous, frac_crashes_averted = self.plot_online_switching(
                    H_avert=H_avert,
                    H_takeover=H_takeover,
                    H_signal=H_signal,
                    std_thresh=std)

                if len(frac_autonomous_list) == 0 or abs(
                        frac_autonomous - frac_autonomous_list[-1]) > 1e-3:
                    stds_used.append(std)
                    frac_autonomous_list.append(frac_autonomous)
                    frac_crashes_averted_list.append(frac_crashes_averted)

            mypickle.dump(
                {
                    'stds_used': stds_used,
                    'frac_autonomous_list': frac_autonomous_list,
                    'frac_crashes_averted_list': frac_crashes_averted_list
                }, pkl_fname)
        else:
            d = mypickle.load(pkl_fname)
            stds_used = d['stds_used']
            frac_autonomous_list = d['frac_autonomous_list']
            frac_crashes_averted_list = d['frac_crashes_averted_list']

        stds_used.append(stds_used[-1])
        frac_autonomous_list.append(1.0)
        frac_crashes_averted_list.append(frac_crashes_averted_list[-1])

        f, ax = plt.subplots(1, 1)

        ax.plot(frac_autonomous_list,
                1 - np.array(frac_crashes_averted_list),
                color='r')

        ax.set_xlim((0, 1.05))
        ax.set_ylim((0, 1.05))
        plt.xticks(np.linspace(0., 1., 11))
        plt.yticks(np.linspace(0., 1., 11))

        # Customize the major grid
        ax.grid(which='major', linestyle=':', linewidth='0.5', color='black')

        ax.set_xlabel('Fraction autonomous')
        ax.set_ylabel('Fraction crashes')
        ax.set_title('Autonomy vs crashes averted with std thresholding')

        if save_dir:
            f.savefig(os.path.join(save_dir, 'plot_online_switching_std.png'),
                      dpi=200,
                      bbox_inches='tight')

        return f
Example #17
0
def plot_rw_rccar_var001_var016(ckpt_itr=None):
    label_params =[['exp', ('exp_name',)]]

    experiment_groups = [
        ExperimentGroup(os.path.join(DATA_DIR, 'rw_rccar/var{0:03d}'.format(i)),
                        label_params=label_params,
                        plot={

                        }) for i in [19]
    ]

    mec = MultiExperimentComparison(experiment_groups)
    exps = mec.list

    ### plot length of the rollouts
    # f, axes = plt.subplots(1, len(exps), figsize=(32, 6), sharex=True, sharey=True)
    # for i, (exp, ax) in enumerate(zip(exps, axes)):
    #     rollouts = list(itertools.chain(*exp.train_rollouts))
    #     lengths = [len(r['dones']) for r in rollouts][:16]
    #     assert (len(lengths) == 16)
    #     steps = np.arange(len(lengths))
    #
    #     label = '{0}, height: {1}, color: {2}, H: {3}'.format(
    #         exp.name,
    #         exp.params['alg']['env'].split("'obs_shape': (")[1].split(',')[0],
    #         exp.params['alg']['env'].split(',')[-1].split(')})')[0],
    #         exp.params['policy']['H']
    #     )
    #
    #     ax.scatter(steps, lengths, color=cm.magma(i / len(exps)))
    #     ax.set_title(label)
    #     ax.legend()
    #
    # plt.tight_layout()
    # f.savefig('plots/rw-rccar/var001_016.png', bbox_inches='tight', dpi=100)

    ### plot policy on the rollouts

    # candidate actions
    for exp in exps:
        rollouts = mypickle.load(os.path.join(exp.folder, 'rosbag_rollouts_00.pkl'))['rollouts']
        rollouts = rollouts[::len(rollouts) // 16]
        # rollouts = list(itertools.chain(*exp.train_rollouts))[:16]

        tf_sess, tf_graph = GCGPolicy.create_session_and_graph(gpu_device=1, gpu_frac=0.6)

        with tf_sess.as_default(), tf_graph.as_default():
            exp.create_env()
            exp.create_policy()
            exp_ckpt_itr = exp.restore_policy(itr=ckpt_itr)

            K = 2048
            actions = np.random.uniform(*exp.env.action_space.bounds,
                                        size=(K, exp.policy.H + 1, exp.env.action_space.flat_dim))

            replay_pool = ReplayPool(
                env_spec=exp.env.spec,
                env_horizon=exp.env.horizon,
                N=exp.policy.N,
                gamma=1,
                size=int(1.1 * sum([len(r['dones']) for r in rollouts])),
                obs_history_len=exp.policy.obs_history_len,
                sampling_method='uniform'
            )

            step = 0
            outputs = []
            for i, r in enumerate(rollouts):
                r_len = len(r['dones'])
                outputs_i = []
                for j in range(r_len):
                    # evaluate and get output
                    observation = (r['observations'][j][0], np.empty([exp.policy.obs_history_len, 0]))
                    replay_pool.store_observation(step, observation)

                    encoded_observation = replay_pool.encode_recent_observation()

                    observation_im, observation_vec = encoded_observation
                    observations = (np.tile(observation_im, (K, 1, 1)), np.tile(observation_vec, (K, 1, 1)))

                    probcolls = exp.policy.get_model_outputs(observations, actions)
                    outputs_i.append(probcolls)

                    step += 1
                    replay_pool.store_effect(
                        r['actions'][j],
                        r['rewards'][j],
                        r['dones'][j],
                        None,
                        r['est_values'][j],
                        r['logprobs'][j]
                    )

                outputs.append(outputs_i)

        f, axes = plt.subplots(1, 2, figsize=(12, 8))
        imshow = None

        plot_folder = os.path.join(exp.folder, 'plot', 'ckpt_{0:03d}'.format(exp_ckpt_itr))
        os.makedirs(plot_folder, exist_ok=True)

        for i, (r_i, output_i) in enumerate(zip(rollouts, outputs)):
            for j, (obs, cost) in enumerate(zip(r_i['observations'], output_i)):
                obs_im, obs_vec = obs
                probcoll = -cost

                # plot image
                im = np.reshape(obs_im, exp.env.observation_im_space.shape)
                is_gray = (im.shape[-1] == 1)
                if is_gray:
                    im = im[:, :, 0]
                    color = 'Greys_r'
                else:
                    color=None

                if imshow is None:
                    imshow = axes[0].imshow(im, cmap=color)
                else:
                    imshow.set_data(im)

                # plot probcolls
                steers = actions[:, :-1, 0]
                angle_const = 0.5 * np.pi / 2.
                angles = angle_const * steers
                ys = np.cumsum(np.cos(angles), axis=1)
                xs = np.cumsum(-np.sin(angles), axis=1)
                sort_idxs = np.argsort(probcoll)

                xlim = (min(xs.min(), 0), max(xs.max(), 0))
                ylim = (min(ys.min(), -0.5), max(ys.max(), 0.5))
                min_probcoll = probcoll.min()
                max_probcoll = probcoll.max()

                keep = 10
                ys = ys[sort_idxs][::K//keep]
                xs = xs[sort_idxs][::K//keep]
                probcoll = probcoll[sort_idxs][::K//keep]
                steers = steers[sort_idxs][::K//keep]

                ys = np.hstack((np.zeros((len(ys), 1)), ys))
                xs = np.hstack((np.zeros((len(xs), 1)), xs))

                # if lines is None:
                axes[1].cla()
                axes[1].plot(0, 0, 'rx', markersize=10)
                # lines = axes[1].plot(np.expand_dims(xs[:,-1], 0), np.expand_dims(ys[:,-1], 0),
                #                      marker='o', linestyle='', markersize=2)
                lines = axes[1].plot(xs.T, ys.T)
                axes[1].plot(xs[0,:], ys[0,:], 'b^', linestyle='', markersize=5)
                axes[1].arrow(0, 0, -2*np.sin(0.5*np.pi * steers[0,0]), 2*np.cos(0.5*np.pi * steers[0,0]), fc='b', ec='b')

                #normalize for color reasons
                # probcoll -= probcoll.min()
                # probcoll /= probcoll.max()
                for l, p in zip(lines, probcoll):
                    l.set_color(cm.viridis(1 - p))
                    l.set_markerfacecolor(cm.viridis(1 - p))

                axes[1].set_xlim(xlim)
                axes[1].set_ylim(ylim)
                axes[1].set_aspect('equal')

                axes[1].set_title('steer {0:.3f}, probcoll in [{1:.2f}, {2:.2f}]'.format(-steers[0, 0], min_probcoll, max_probcoll))

                f.savefig(os.path.join(plot_folder, 'rollout_{0:03d}_t_{1:03d}.png'.format(i, j)),
                          bbox_inches='tight', dpi=200)
                # break
            # break

        plt.close(f)

        tf_sess.close()