def add_rollouts(self, rlist, max_to_add=None): """ rlist can contain pkl filenames, or dictionaries """ step = sum([len(replay_pool) for replay_pool in self._replay_pools]) replay_pools = itertools.cycle(self._replay_pools) done_adding = False for rlist_entry in rlist: if type(rlist_entry) is str: rollouts = mypickle.load(rlist_entry)['rollouts'] elif issubclass(type(rlist_entry), dict): rollouts = [rlist_entry] else: raise NotImplementedError for rollout, replay_pool in zip(rollouts, replay_pools): r_len = len(rollout['dones']) if max_to_add is not None and step + r_len >= max_to_add: diff = max_to_add - step for k in ('observations', 'actions', 'rewards', 'dones'): rollout[k] = rollout[k][:diff] done_adding = True r_len = len(rollout['dones']) replay_pool.store_rollout(step, rollout) step += r_len if done_adding: break if done_adding: break
def _write_tfrecord(pkl_fname, tfrecord_fname, obs_shape): writer = tf.python_io.TFRecordWriter(tfrecord_fname) rollouts = mypickle.load(pkl_fname)['rollouts'] for r in rollouts: images = r['env_infos']['rgb'] semantics = r['env_infos']['semantic'] for image, semantic in zip(images, semantics): rightlane_mask = CarlaCollSpeedRoadEnv.get_rightlane( semantic)[0].astype(np.uint8) image = utils.imresize(image, obs_shape) rightlane_mask = utils.imresize(rightlane_mask[..., None], obs_shape[:2] + [1], PIL.Image.BILINEAR)[..., -1] example = tf.train.Example(features=tf.train.Features( feature={ 'image': RoadLabeller._bytes_feature(image.tostring()), 'label': RoadLabeller._bytes_feature(rightlane_mask.tostring()), })) writer.write(example.SerializeToString()) writer.close()
def _load_rollouts(self, itr): fname = self._rollouts_file_name(itr) if os.path.exists(fname): rollouts = mypickle.load(fname)['rollouts'] else: rollouts = [] return rollouts
def add_rollouts(self, rollout_filenames, max_to_add=None): step = sum([len(replay_pool) for replay_pool in self._replay_pools]) itr = 0 replay_pools = itertools.cycle(self._replay_pools) done_adding = False for fname in rollout_filenames: rollouts = mypickle.load(fname)['rollouts'] itr += 1 for rollout, replay_pool in zip(rollouts, replay_pools): r_len = len(rollout['dones']) if max_to_add is not None and step + r_len >= max_to_add: diff = max_to_add - step for k in ('observations', 'actions', 'rewards', 'dones', 'logprobs'): rollout[k] = rollout[k][:diff] done_adding = True r_len = len(rollout['dones']) replay_pool.store_rollout(step, rollout) step += r_len if done_adding: break if done_adding: break
def _convert_rollout(self, i, pkl_fname, pkl_suffix): rollouts = mypickle.load(pkl_fname)['rollouts'] tfrecord_fname = os.path.join( self._output_folder, '{0:05d}_{1}.tfrecord'.format(i, pkl_suffix)) writer = tf.python_io.TFRecordWriter(tfrecord_fname) for r in rollouts: ### modify the rollout (in case the env is different)! r = self._create_rollout(r, self._labeller) for k in ('observations_im', 'observations_vec', 'actions', 'dones', 'steps', 'goals'): assert k in r.keys(), '{0} not in rollout!'.format(k) ex = tf.train.SequenceExample() for k, np_dtype in zip( GCGPolicyTfrecord.tfrecord_feature_names, GCGPolicyTfrecord.tfrecord_feature_np_types): fl = ex.feature_lists.feature_list[k] for feature in r[k]: fl.feature.add().bytes_list.value.append( feature.astype(np_dtype).tostring()) writer.write(ex.SerializeToString()) writer.close()
def plot_rw_rccar_var001_var016(): label_params = [ # ['exp', ('exp_name',)], ['policy', ('policy', 'GCGPolicy', 'outputs', 0, 'name')], ['H', ('policy', 'H')], ['target', ('policy', 'use_target')], ['obs_shape', ('alg', 'env', 'params', 'obs_shape')] ] experiment_groups = [ ExperimentGroup(os.path.join(DATA_DIR, 'rw_rccar/var{0:03d}'.format(num)), label_params=label_params, plot={ }) for num in [1, 5, 9, 12, 13]] mec = MultiExperimentComparison(experiment_groups) lengths_list = [] for exp in mec.list: eval_folder = os.path.join(exp.folder, 'eval_itr_0039') eval_pkl_fname = os.path.join(eval_folder, 'itr_0039_eval_rollouts.pkl') rollouts = mypickle.load(eval_pkl_fname)['rollouts'] assert (len(rollouts) == 24) lengths = [len(r['dones']) for r in rollouts] lengths_list.append(lengths) f, ax = plt.subplots(1, 1) xs = np.vstack((np.r_[0:8.] + 0., np.r_[0:8.] + 0.1, np.r_[0:8.] + 0.2,)).T.ravel() legend_patches = [] for i, (exp, lengths) in enumerate(zip(mec.list, lengths_list)): lengths = np.reshape(lengths, (8, 3)) width = 0.6 / float(len(lengths_list)) color = cm.viridis(i / float(len(lengths_list))) label = 'median: {0}, {1}'.format(np.median(lengths), exp.plot['label']) bp = ax.boxplot(lengths.T, positions=np.arange(len(lengths)) + 1.2 * i * width, widths=width, patch_artist=True) for patch in bp['boxes']: patch.set_facecolor(color) legend_patches.append(mpatches.Patch(color=color, label=label)) # ax.plot(xs, lengths, label=exp.plot['label'], linestyle='None', marker='o') ax.legend(handles=legend_patches) ax.xaxis.set_ticks(np.arange(8)) ax.xaxis.set_ticklabels(np.arange(8)) ax.set_xlim((-0.5, 8.5)) ax.set_xlabel('Start Position Number') ax.set_ylabel('Timesteps survived') plt.show() import IPython; IPython.embed()
def _get_itr_rollouts(self, itr, testing=False): if testing: fname = "itr_{0:04d}_eval_rollouts.pkl".format(itr) else: fname = "itr_{0:04d}_train_rollouts.pkl".format(itr) path = os.path.join(self._data_dir, fname) if os.path.exists(path): rollouts_dict = mypickle.load(path) rollouts = rollouts_dict['rollouts'] return rollouts else: return None
def _load_rollouts(self, file_func): rollouts_itrs = [] itr = 0 while os.path.exists(file_func(itr)): rollouts = mypickle.load(file_func(itr))['rollouts'] if self._clear_obs: for r in rollouts: r['observations'] = None rollouts_itrs.append(rollouts) itr += 1 return rollouts_itrs
def _get_rollouts(self): data = mypickle.load(self._data_path) rollouts = [] goals = [] for traj in data['rollouts']: pos_hprs = [] traj_goals = [] for step in traj['env_infos'][:-1]: pos_hprs.append((step['pos'], step['hpr'])) traj_goals.append(step['goal_h']) rollouts.append(pos_hprs) goals.append(traj_goals) return rollouts, goals
def _get_inference_step(self): """ Returns number of steps taken the environment """ inference_itr = 0 inference_step = 0 while True: fname = self._train_rollouts_file_name(inference_itr) if not os.path.exists(fname): break rollouts = mypickle.load(fname)['rollouts'] inference_itr += 1 inference_step += sum([len(r['dones']) for r in rollouts]) return inference_step
def _load_data(self, folder): """ Loads all .pkl files that can be found recursively from this folder """ assert (os.path.exists(folder)) rollouts = [] num_load_success, num_load_fail = 0, 0 for fname in glob.iglob('{0}/**/*.pkl'.format(folder), recursive=True): try: rollouts += mypickle.load(fname)['rollouts'] num_load_success += 1 except: num_load_fail += 1 logger.info('Files successfully loaded: {0:.2f}%'.format( 100. * num_load_success / float(num_load_success + num_load_fail))) num_bootstraps = self.num_bootstraps if num_bootstraps is not None: logger.info('Creating {0} bootstraps'.format(num_bootstraps)) ReplayPoolClass = lambda **kwargs: BootstrapReplayPool( num_bootstraps, **kwargs) else: ReplayPoolClass = ReplayPool replay_pool = ReplayPoolClass( env_spec=self._env.spec, env_horizon=self._env.horizon, N=self._model.N, gamma=self._model.gamma, size=int(1.1 * sum([len(r['dones']) for r in rollouts])), obs_history_len=self._model.obs_history_len, sampling_method='uniform', save_rollouts=False, save_rollouts_observations=False, save_env_infos=True, replay_pool_params={}) replay_pool.store_rollouts(0, rollouts) return replay_pool
def store_rollouts(self, rlist, max_to_add=None): """ rlist can be a list of pkl filenames, or rollout dictionaries """ step = len(self) for rlist_entry in rlist: if type(rlist_entry) is str: rollouts = mypickle.load(rlist_entry)['rollouts'] elif issubclass(type(rlist_entry), dict): rollouts = [rlist_entry] else: raise NotImplementedError for rollout in rollouts: r_len = len(rollout['dones']) if max_to_add is not None and step + r_len >= max_to_add: return self._store_rollout(step, rollout) step += r_len
def extract_images_from_pkls(pkl_folder, save_folder, maxsaved, image_shape, rescale, bordersize): """ :param pkl_folder: folder containing pkls with training images :param save_folder: where to save the resulting images :param maxsaved: how many images to save :param image_shape: shape of the image :param rescale: make rescale times bigger, for ease of labelling :param bordersize: how much to pad with 0s, for ease of labelling """ random.seed(0) fnames = glob.glob(os.path.join(pkl_folder, '*.pkl')) random.shuffle(fnames) logger.info('{0} files to read'.format(len(fnames))) fnames = itertools.cycle(fnames) im_num = 0 while im_num < maxsaved: fname = next(fnames) rollout = random.choice(mypickle.load(fname)['rollouts']) obs = random.choice(rollout['observations_im']) height, width, channels = image_shape im = np.reshape(obs, image_shape) im = utils.imresize(im, (rescale * height, rescale * width, channels)) im = np.pad(im, ((bordersize, bordersize), (bordersize, bordersize), (0, 0)), 'constant') if im.shape[-1] == 1: im = im[:, :, 0] Image.fromarray(im).save( os.path.join(save_folder, 'image_{0:06d}.jpg'.format(im_num))) im_num += 1 logger.info('Saved {0} images'.format(im_num))
def __init__(self, env, steps, save_file, H, K): self._env = env self._steps = steps self._save_file = save_file self._H = H self._K = K # get starting positions grd = GatherRandomData(env, int(100*steps), '/tmp/tmp.pkl') grd.run() rollouts = mypickle.load('/tmp/tmp.pkl')['rollouts'] self._start_poses = [] while len(self._start_poses) < steps: rollout = random.choice(rollouts) env_info = random.choice(rollout['env_infos']) if 'pos' not in env_info: env_info = rollout['env_infos'][0] self._start_poses.append((env_info['pos'], env_info['hpr'])) self._replay_pool = ReplayPool(self._env.spec, self._env.horizon, 1, 1, int(1e5), obs_history_len=1, sampling_method='uniform', save_rollouts=True, save_rollouts_observations=True, save_env_infos=True, replay_pool_params={}) self._action_sequences = [] for _ in range(self._K): self._action_sequences.append([self._env.action_space.sample() for _ in range(self._H)]) self._action_sequences = np.array(self._action_sequences)
def _eval_pred_all(self, eval_on_holdout): pkl_file_name = self._eval_train_rollouts_file_name if not eval_on_holdout else self._eval_holdout_rollouts_file_name if os.path.exists(pkl_file_name): logger.info('Load evaluation rollouts for {0}'.format( 'holdout' if eval_on_holdout else 'train')) d = mypickle.load(pkl_file_name) else: logger.info('Evaluating model on {0}'.format( 'holdout' if eval_on_holdout else 'train')) replay_pool = self._replay_holdout_pool if eval_on_holdout else self._replay_pool # get collision idx in obs_vec vec_spec = self._env.observation_vec_spec obs_vec_start_idxs = np.cumsum( [space.flat_dim for space in vec_spec.values()]) - 1 coll_idx = obs_vec_start_idxs[list(vec_spec.keys()).index('coll')] # model will be evaluated on 1e3 inputs at a time (accounting for bnn samples) batch_size = 1000 // self._num_bnn_samples assert (batch_size > 1) rp_gen = replay_pool.sample_all_generator(batch_size=batch_size, include_env_infos=True) # keep everything in dict d d = defaultdict(list) for steps, (observations_im, observations_vec ), actions, rewards, dones, env_infos in rp_gen: observations = ( observations_im[:, :self._model.obs_history_len, :], observations_vec[:, :self._model.obs_history_len, :]) coll_labels = (np.cumsum( observations_vec[:, self._model.obs_history_len:, coll_idx], axis=1) >= 1.).astype(float) observations_repeat = (np.repeat(observations[0], self._num_bnn_samples, axis=0), np.repeat(observations[1], self._num_bnn_samples, axis=0)) actions_repeat = np.repeat(actions, self._num_bnn_samples, axis=0) yhats, bhats = self._model.get_model_outputs( observations_repeat, actions_repeat) coll_preds = np.reshape( yhats['coll'], (len(steps), self._num_bnn_samples, -1)) d['coll_labels'].append(coll_labels) d['coll_preds'].append(coll_preds) d['env_infos'].append(env_infos) d['dones'].append(dones) # Note: you can save more things (e.g. actions) if you want to do something with them later for k, v in d.items(): d[k] = np.concatenate(v) mypickle.dump(d, pkl_file_name) return d
def plot_online_switching_std(self, H_avert=4, H_takeover=4, H_signal=4, save_dir=None): import matplotlib.pyplot as plt assert (save_dir is not None) pkl_fname = os.path.join( save_dir, 'plot_online_switching_std_Ha{0}_Ht{1}_Hs{2}.pkl'.format( H_avert, H_takeover, H_signal)) if not os.path.exists(pkl_fname): stds = np.linspace(0, 0.3, 1000) stds_used, frac_autonomous_list, frac_crashes_averted_list = [], [], [] for std in stds: frac_autonomous, frac_crashes_averted = self.plot_online_switching( H_avert=H_avert, H_takeover=H_takeover, H_signal=H_signal, std_thresh=std) if len(frac_autonomous_list) == 0 or abs( frac_autonomous - frac_autonomous_list[-1]) > 1e-3: stds_used.append(std) frac_autonomous_list.append(frac_autonomous) frac_crashes_averted_list.append(frac_crashes_averted) mypickle.dump( { 'stds_used': stds_used, 'frac_autonomous_list': frac_autonomous_list, 'frac_crashes_averted_list': frac_crashes_averted_list }, pkl_fname) else: d = mypickle.load(pkl_fname) stds_used = d['stds_used'] frac_autonomous_list = d['frac_autonomous_list'] frac_crashes_averted_list = d['frac_crashes_averted_list'] stds_used.append(stds_used[-1]) frac_autonomous_list.append(1.0) frac_crashes_averted_list.append(frac_crashes_averted_list[-1]) f, ax = plt.subplots(1, 1) ax.plot(frac_autonomous_list, 1 - np.array(frac_crashes_averted_list), color='r') ax.set_xlim((0, 1.05)) ax.set_ylim((0, 1.05)) plt.xticks(np.linspace(0., 1., 11)) plt.yticks(np.linspace(0., 1., 11)) # Customize the major grid ax.grid(which='major', linestyle=':', linewidth='0.5', color='black') ax.set_xlabel('Fraction autonomous') ax.set_ylabel('Fraction crashes') ax.set_title('Autonomy vs crashes averted with std thresholding') if save_dir: f.savefig(os.path.join(save_dir, 'plot_online_switching_std.png'), dpi=200, bbox_inches='tight') return f
def plot_rw_rccar_var001_var016(ckpt_itr=None): label_params =[['exp', ('exp_name',)]] experiment_groups = [ ExperimentGroup(os.path.join(DATA_DIR, 'rw_rccar/var{0:03d}'.format(i)), label_params=label_params, plot={ }) for i in [19] ] mec = MultiExperimentComparison(experiment_groups) exps = mec.list ### plot length of the rollouts # f, axes = plt.subplots(1, len(exps), figsize=(32, 6), sharex=True, sharey=True) # for i, (exp, ax) in enumerate(zip(exps, axes)): # rollouts = list(itertools.chain(*exp.train_rollouts)) # lengths = [len(r['dones']) for r in rollouts][:16] # assert (len(lengths) == 16) # steps = np.arange(len(lengths)) # # label = '{0}, height: {1}, color: {2}, H: {3}'.format( # exp.name, # exp.params['alg']['env'].split("'obs_shape': (")[1].split(',')[0], # exp.params['alg']['env'].split(',')[-1].split(')})')[0], # exp.params['policy']['H'] # ) # # ax.scatter(steps, lengths, color=cm.magma(i / len(exps))) # ax.set_title(label) # ax.legend() # # plt.tight_layout() # f.savefig('plots/rw-rccar/var001_016.png', bbox_inches='tight', dpi=100) ### plot policy on the rollouts # candidate actions for exp in exps: rollouts = mypickle.load(os.path.join(exp.folder, 'rosbag_rollouts_00.pkl'))['rollouts'] rollouts = rollouts[::len(rollouts) // 16] # rollouts = list(itertools.chain(*exp.train_rollouts))[:16] tf_sess, tf_graph = GCGPolicy.create_session_and_graph(gpu_device=1, gpu_frac=0.6) with tf_sess.as_default(), tf_graph.as_default(): exp.create_env() exp.create_policy() exp_ckpt_itr = exp.restore_policy(itr=ckpt_itr) K = 2048 actions = np.random.uniform(*exp.env.action_space.bounds, size=(K, exp.policy.H + 1, exp.env.action_space.flat_dim)) replay_pool = ReplayPool( env_spec=exp.env.spec, env_horizon=exp.env.horizon, N=exp.policy.N, gamma=1, size=int(1.1 * sum([len(r['dones']) for r in rollouts])), obs_history_len=exp.policy.obs_history_len, sampling_method='uniform' ) step = 0 outputs = [] for i, r in enumerate(rollouts): r_len = len(r['dones']) outputs_i = [] for j in range(r_len): # evaluate and get output observation = (r['observations'][j][0], np.empty([exp.policy.obs_history_len, 0])) replay_pool.store_observation(step, observation) encoded_observation = replay_pool.encode_recent_observation() observation_im, observation_vec = encoded_observation observations = (np.tile(observation_im, (K, 1, 1)), np.tile(observation_vec, (K, 1, 1))) probcolls = exp.policy.get_model_outputs(observations, actions) outputs_i.append(probcolls) step += 1 replay_pool.store_effect( r['actions'][j], r['rewards'][j], r['dones'][j], None, r['est_values'][j], r['logprobs'][j] ) outputs.append(outputs_i) f, axes = plt.subplots(1, 2, figsize=(12, 8)) imshow = None plot_folder = os.path.join(exp.folder, 'plot', 'ckpt_{0:03d}'.format(exp_ckpt_itr)) os.makedirs(plot_folder, exist_ok=True) for i, (r_i, output_i) in enumerate(zip(rollouts, outputs)): for j, (obs, cost) in enumerate(zip(r_i['observations'], output_i)): obs_im, obs_vec = obs probcoll = -cost # plot image im = np.reshape(obs_im, exp.env.observation_im_space.shape) is_gray = (im.shape[-1] == 1) if is_gray: im = im[:, :, 0] color = 'Greys_r' else: color=None if imshow is None: imshow = axes[0].imshow(im, cmap=color) else: imshow.set_data(im) # plot probcolls steers = actions[:, :-1, 0] angle_const = 0.5 * np.pi / 2. angles = angle_const * steers ys = np.cumsum(np.cos(angles), axis=1) xs = np.cumsum(-np.sin(angles), axis=1) sort_idxs = np.argsort(probcoll) xlim = (min(xs.min(), 0), max(xs.max(), 0)) ylim = (min(ys.min(), -0.5), max(ys.max(), 0.5)) min_probcoll = probcoll.min() max_probcoll = probcoll.max() keep = 10 ys = ys[sort_idxs][::K//keep] xs = xs[sort_idxs][::K//keep] probcoll = probcoll[sort_idxs][::K//keep] steers = steers[sort_idxs][::K//keep] ys = np.hstack((np.zeros((len(ys), 1)), ys)) xs = np.hstack((np.zeros((len(xs), 1)), xs)) # if lines is None: axes[1].cla() axes[1].plot(0, 0, 'rx', markersize=10) # lines = axes[1].plot(np.expand_dims(xs[:,-1], 0), np.expand_dims(ys[:,-1], 0), # marker='o', linestyle='', markersize=2) lines = axes[1].plot(xs.T, ys.T) axes[1].plot(xs[0,:], ys[0,:], 'b^', linestyle='', markersize=5) axes[1].arrow(0, 0, -2*np.sin(0.5*np.pi * steers[0,0]), 2*np.cos(0.5*np.pi * steers[0,0]), fc='b', ec='b') #normalize for color reasons # probcoll -= probcoll.min() # probcoll /= probcoll.max() for l, p in zip(lines, probcoll): l.set_color(cm.viridis(1 - p)) l.set_markerfacecolor(cm.viridis(1 - p)) axes[1].set_xlim(xlim) axes[1].set_ylim(ylim) axes[1].set_aspect('equal') axes[1].set_title('steer {0:.3f}, probcoll in [{1:.2f}, {2:.2f}]'.format(-steers[0, 0], min_probcoll, max_probcoll)) f.savefig(os.path.join(plot_folder, 'rollout_{0:03d}_t_{1:03d}.png'.format(i, j)), bbox_inches='tight', dpi=200) # break # break plt.close(f) tf_sess.close()