def _get_goal(self): goal = AttrDict() goal.is_turn = np.ravel(False).astype(np.float32) goal.turn_goal = np.ravel(0.).astype(np.float32) return goal
def get_batch(self, indices=None, is_tf=True): if indices is None: indices = np.random.choice(self._valid_start_indices[:-self._horizon], size=self._batch_size) sampled_datadict = self._datadict.leaf_apply( lambda arr: np.stack([arr[idx:idx+self._horizon+1] for idx in indices], axis=0)) inputs = AttrDict() outputs = AttrDict() for key in self._env_spec.names: value = sampled_datadict[key] if key in self._env_spec.observation_names: inputs[key] = value[:, 0] elif key in self._env_spec.action_names: inputs[key] = value[:, :-1] if key in self._env_spec.output_observation_names: outputs[key] = value[:, 1:] outputs.done = sampled_datadict.done[:, 1:].cumsum(axis=1).astype(bool) if is_tf: for d in (inputs, outputs): d.leaf_modify(lambda x: tf.convert_to_tensor(x)) return inputs, outputs
def warm_start(self, model, observation, goal): assert not tf.executing_eagerly() logger.debug('Setting up CEM graph....') self._session = tf.get_default_session() assert self._session is not None ### create placeholders self._obs_placeholders = AttrDict() for name in self._env_spec.observation_names: shape = list(self._env_spec.names_to_shapes[name]) dtype = tf.as_dtype(self._env_spec.names_to_dtypes[name]) ph = tf.placeholder(dtype, shape=shape, name=name) self._obs_placeholders[name] = ph self._goal_placeholders = AttrDict() for name, value in goal.leaf_items(): self._goal_placeholders[name] = tf.placeholder(tf.as_dtype( value.dtype), shape=value.shape, name=name) self._get_action_outputs = self._cem(model, self._obs_placeholders, self._goal_placeholders) logger.debug('CEM graph setup complete')
def call(self, inputs, obs_lowd=None, training=False): obs_lowd = obs_lowd if obs_lowd is not None else self.get_obs_lowd( inputs, training=training) outputs = AttrDict(obs_lowd=obs_lowd) if training: outputs.kernels = tf_utils.get_kernels(self.layers) return outputs.freeze()
def get_batch(self): """ Returns: inputs (AttrDict) outputs (AttrDict) """ raise NotImplementedError inputs = AttrDict() outputs = AttrDict() return inputs, outputs
def __init__(self, params, names_shapes_limits_dtypes=[]): names_shapes_limits_dtypes = list(names_shapes_limits_dtypes) names_shapes_limits_dtypes += [('done', (1, ), (0, 1), np.bool)] self._names_to_shapes = AttrDict() self._names_to_limits = AttrDict() self._names_to_dtypes = AttrDict() for name, shape, limit, dtype in names_shapes_limits_dtypes: self._names_to_shapes[name] = shape self._names_to_limits[name] = limit self._names_to_dtypes[name] = dtype
def _get_outputs(self, inputs, rnn_outputs, training=False): # bin the actions actions = inputs.commands.turn[..., 0] lower, upper = -1., 1. edges = np.linspace(lower, upper, self._num_collision_bins + 1).astype(np.float32) bins = tf.cast(tfp.stats.find_bins(actions, edges, extend_lower_interval=True, extend_upper_interval=True), tf.int32) turn_one_hot = tf.one_hot(bins, depth=self._num_collision_bins, axis=2) # for training all_pre_logits = rnn_outputs all_unscaled_logits = tf.nn.softmax(all_pre_logits, axis=-1) all_logits = -5 + 10 * all_unscaled_logits # from -5 to 5 logits = tf.reduce_sum(all_logits * turn_one_hot, axis=-1) logits = logits[..., tf.newaxis] # for compatibility # for planning probs = tf.nn.sigmoid(logits) return AttrDict( turn_one_hot=turn_one_hot, logits=logits, probcoll=probs, # debuggin all_logits=all_logits )
def _get_outputs(self, inputs, obs_lowd, training=False): # labels horizon = 1 turn = tf.reshape(inputs.commands.turn[..., :horizon, 0], (-1, )) bins = tfp.stats.find_bins(turn, self._bin_edges, extend_lower_interval=True, extend_upper_interval=True) # prediction logits = obs_lowd dist = tf.distributions.Categorical(logits) log_probs = dist.log_prob(bins) # accuracy accs = tf.cast( tf.equal(tf.argmax(logits, axis=-1), tf.cast(bins, tf.int64)), tf.float32) return AttrDict( logits=logits, log_prob=log_probs, acc=accs, bin_edges=tf.convert_to_tensor(self._bin_edges), )
def _load_hdf5(self, fname): self._d = AttrDict() with h5py.File(fname, 'r') as f: for key in self.keys: self._d[key] = np.array(f[key]) if 'image' in key: self._d[key] = np.array( list(np_utils.uncompress_video(self._d[key])))
def _init_setup(self): self._fig, axes = plt.subplots(4, 1, figsize=(8, 20)) ax_observation, ax_policy, ax_turn, ax_model = axes.ravel() self._pyblit = AttrDict( observation=self._init_setup_observation(ax_observation), policy=self._init_setup_policy(ax_policy), turn=self._init_setup_turn(ax_turn), model=self._init_setup_model(ax_model)) self._fig_shown = False
def _unwhiten_and_split(self, tensor_whitened, names): lower, upper = self._env_spec.limits(names) mean = 0.5 * (lower + upper) var = 0.5 * (upper - lower) tensor = tensor_whitened * var + mean return AttrDict.from_dict({ k: v for k, v in zip( names, tf.split(tensor, self._env_spec.dims(names), axis=2)) })
def _get_obs(self): obs = AttrDict() for name in set(self._env_spec.observation_names): obs[name] = self._data_traverser.get(name, horizon=1)[0] original_image = obs.images.front desired_shape = self._env_spec.names_to_shapes.images.front image = process_image(original_image, desired_shape, image_rectify=True) obs.images.front = image return obs
def _load_hdf5s(self): hdf5_fnames = file_utils.get_files_ending_with(self._hdf5_folders, '.hdf5') # initialize to empty lists datadict = AttrDict() for key in self._env_spec.names: datadict[key] = [] datadict.done = [] datadict.hdf5_fname = [] datadict.rollout_timestep = [] # concatenate each hdf5 for hdf5_fname in hdf5_fnames: logger.debug('Loading ' + hdf5_fname) with h5py.File(hdf5_fname, 'r') as f: hdf5_names = file_utils.get_hdf5_leaf_names(f) hdf5_lens = np.array([len(f[name]) for name in hdf5_names]) if len(hdf5_names) == 0: logger.warning('Empty hdf5, skipping!') continue if not np.all(hdf5_lens == hdf5_lens[0]): logger.warning('data lengths not all the same, skipping!') continue if hdf5_lens[0] == 0: logger.warning('data lengths are 0, skipping!') continue for key in self._env_spec.names: assert key in f, '"{0}" not in env space names'.format(key) value = self._parse_hdf5(key, f[key]) datadict[key].append(value) datadict.done.append([False] * (len(value) - 1) + [True]) datadict.hdf5_fname.append([hdf5_fname] * len(value)) datadict.rollout_timestep.append(np.arange(len(value))) # turn every value into a single numpy array datadict.leaf_modify(lambda arr_list: np.concatenate(arr_list, axis=0)) datadict_len = len(datadict.done) datadict.leaf_assert(lambda arr: len(arr) == datadict_len) logger.debug('Dataset length: {}'.format(datadict_len)) # everywhere not done valid_start_indices = np.where(np.logical_not(datadict.done))[0] return datadict, valid_start_indices
def get_action(self, model, observation, goal): assert self._session is not None feed_dict = self._get_action_feed_dict(observation, goal) get_action_tf = {} for name, tensor in self._get_action_outputs.leaf_items(): get_action_tf[name] = tensor get_action_tf_output = self._session.run(get_action_tf, feed_dict=feed_dict) get_action = AttrDict.from_dict(get_action_tf_output) get_action.cost_fn = self._cost_fn return get_action
def _load_hdf5(self, fname): self._d = AttrDict() self._image_buffer = None self._image_key = None with h5py.File(fname, 'r') as f: for key in self.keys: if 'image' in key: assert self._image_key is None self._image_key = key self._image_buffer = np.array(f[key]) self._d[key] = list() else: self._d[key] = np.array(f[key]) assert self._image_buffer is not None thread = threading.Thread(target=self._background_video_thread) thread.daemon = True thread.start()
def _init_setup_observation(self, ax): imshow = pyblit.Imshow(ax) return AttrDict(imshow=imshow, ax=pyblit.Axis(ax, [imshow]))
def _init_setup_turn(self, ax): bar = pyblit.Barh(ax) return AttrDict(bar=bar, ax=pyblit.Axis(ax, [bar]))
def _init_setup_model(self, ax): imshow = pyblit.Imshow(ax) return AttrDict(imshow=imshow, ax=pyblit.Axis(ax, [imshow]))
def _init_setup_model(self, ax): batch_line = pyblit.BatchLineCollection(ax) return AttrDict(batch_line=batch_line, ax=pyblit.Axis(ax, [batch_line]))
def _cem(self, model, observation, goal): observation.leaf_modify(lambda v: tf.convert_to_tensor(v)) goal.leaf_modify(lambda v: tf.convert_to_tensor(v)) ### get obs lowd inputs = observation.leaf_apply(lambda value: value[tf.newaxis]) obs_lowd = model.get_obs_lowd(inputs) ### CEM setup inputs = inputs.leaf_filter(lambda key, value: len(value.shape) < 4) action_selection_lower_limits = np.tile(self._action_selection_lower_limits, (self._horizon,)) action_selection_upper_limits = np.tile(self._action_selection_upper_limits, (self._horizon,)) action_distribution = tf.contrib.distributions.Uniform( action_selection_lower_limits, action_selection_upper_limits ) # CEM params Ms = [self._M_init] + [self._M] * (self._itrs - 1) Ks = [self._K] * (self._itrs - 1) + [1] ### keep track of all_costs = [] all_costs_per_timestep = [] all_actions = [] all_model_outputs = [] ### CEM loop for M, K in zip(Ms, Ks): concat_actions = tf.reshape( action_distribution.sample((M,)), (M, self._horizon, -1) ) concat_actions = tf.clip_by_value( concat_actions, np.reshape(action_selection_lower_limits, (self._horizon, self._action_dim)), np.reshape(action_selection_upper_limits, (self._horizon, self._action_dim)) ) actions = self._split_action(concat_actions) inputs_tiled = inputs.leaf_filter(lambda k, v: k in self._env_spec.output_observation_names) inputs_tiled = inputs_tiled.leaf_apply(lambda v: tf.tile(v, [M] + [1] * (len(v.shape) - 1))) for k, v in actions.leaf_items(): inputs_tiled[k] = v obs_lowd_tiled = tf.tile(obs_lowd, (M, 1)) ### call model and evaluate cost model_outputs = model.call(inputs_tiled, obs_lowd=obs_lowd_tiled) model_outputs = model_outputs.leaf_filter(lambda key, value: key[0] != '_') costs_per_timestep = self._cost_fn(inputs_tiled, model_outputs, goal, actions) costs = tf.reduce_mean(costs_per_timestep.total, axis=1) ### keep track all_costs.append(costs) all_costs_per_timestep.append(costs_per_timestep) all_actions.append(actions) all_model_outputs.append(model_outputs.leaf_filter(lambda k, v: tf.is_tensor(v))) ### get top K _, top_indices = tf.nn.top_k(-costs, k=K) top_actions = tf.gather( tf.reshape(concat_actions, [M, self._horizon * self._action_dim]), indices=top_indices ) ### set new distribution based on top k mean = tf.reduce_mean(top_actions, axis=0) covar = tf.matmul(tf.transpose(top_actions), top_actions) / float(K) sigma = covar + self._eps * tf.eye(self._horizon * self._action_dim) action_distribution = tf.contrib.distributions.MultivariateNormalFullCovariance( loc=mean, covariance_matrix=sigma ) all_costs = tf.concat(all_costs, axis=0) all_costs_per_timestep = AttrDict.leaf_combine_and_apply(all_costs_per_timestep, lambda arrs: tf.concat(arrs, axis=0)) all_actions = AttrDict.leaf_combine_and_apply(all_actions, lambda arrs: tf.concat(arrs, axis=0)) all_model_outputs = AttrDict.leaf_combine_and_apply(all_model_outputs, lambda arrs: tf.concat(arrs, axis=0)) best_idx = tf.argmin(all_costs) best_cost = all_costs[best_idx] best_cost_per_timestep = all_costs_per_timestep.leaf_apply(lambda v: v[best_idx]) best_action_sequence = all_actions.leaf_apply(lambda v: v[best_idx]) best_action = best_action_sequence.leaf_apply(lambda v: v[0]) # best_model_outputs = all_model_outputs.leaf_apply(lambda v: v[best_idx]) get_action_outputs = AttrDict( cost=best_cost, cost_per_timestep=best_cost_per_timestep, action=best_action, action_sequence=best_action_sequence, # model_outputs=best_model_outputs, all_costs=all_costs, all_costs_per_timestep=all_costs_per_timestep, all_actions=all_actions, all_model_outputs=all_model_outputs ) return get_action_outputs
def _get_obs(self): obs = AttrDict() for name in self._env_spec.observation_names: obs[name] = self._data_traverser.get(name, horizon=1) return obs
lambda arr: np.stack([arr[idx:idx+self._horizon+1] for idx in indices], axis=0)) inputs = AttrDict() outputs = AttrDict() for key in self._env_spec.names: value = sampled_datadict[key] if key in self._env_spec.observation_names: inputs[key] = value[:, 0] elif key in self._env_spec.action_names: inputs[key] = value[:, :-1] if key in self._env_spec.output_observation_names: outputs[key] = value[:, 1:] outputs.done = sampled_datadict.done[:, 1:].cumsum(axis=1).astype(bool) if is_tf: for d in (inputs, outputs): d.leaf_modify(lambda x: tf.convert_to_tensor(x)) return inputs, outputs def __len__(self): return len(self._datadict.done) if __name__ == '__main__': from sidewalk.envs.env_spec import EnvSpec d = Hdf5Dataset(AttrDict(), EnvSpec(AttrDict()))
def _setup_mppi_graph(self, model, goals): ### create placeholders obs_placeholders = AttrDict() for name in self._env_spec.observation_names: shape = list(self._env_spec.names_to_shapes[name]) dtype = tf.as_dtype(self._env_spec.names_to_dtypes[name]) ph = tf.placeholder(dtype, shape=shape, name=name) obs_placeholders[name] = ph goal_placeholders = AttrDict() for name, value in goals.leaf_items(): goal_placeholders[name] = tf.placeholder(tf.as_dtype(value.dtype), shape=value.shape, name=name) mppi_mean_placeholder = tf.placeholder( tf.float32, name='mppi_mean', shape=[self._horizon, self._action_dim]) ### get obs lowd inputs = obs_placeholders.leaf_apply(lambda value: value[tf.newaxis]) obs_lowd = model.get_obs_lowd(inputs) past_mean = mppi_mean_placeholder[0] shifted_mean = tf.concat( [mppi_mean_placeholder[1:], mppi_mean_placeholder[-1:]], axis=0) # sample through time delta_limits = 0.5 * (self._action_selection_upper_limits - self._action_selection_lower_limits) eps = tf.random_normal(mean=0, stddev=self._sigma * delta_limits, shape=(self._N, self._horizon, self._action_dim)) actions = [] for h in range(self._horizon): if h == 0: # action_h = self._beta * (shifted_mean[h, :] + eps[:, h, :]) + (1. - self._beta) * past_mean action_h = self._beta * (past_mean + eps[:, h, :]) + ( 1. - self._beta) * past_mean else: action_h = self._beta * (shifted_mean[h, :] + eps[:, h, :]) + ( 1. - self._beta) * actions[-1] actions.append(action_h) actions = tf.stack(actions, axis=1) actions = tf.clip_by_value( actions, self._action_selection_lower_limits[np.newaxis, np.newaxis], self._action_selection_upper_limits[np.newaxis, np.newaxis]) # forward simulate actions_split = self._split_action(actions) inputs_tiled = inputs.leaf_filter( lambda k, v: k in self._env_spec.output_observation_names ).leaf_apply(lambda v: tf.tile(v, [self._N] + [1] * (len(v.shape) - 1))) inputs_tiled.combine(actions_split) inputs_tiled.freeze() obs_lowd_tiled = tf.tile(obs_lowd, (self._N, 1)) ### call model and evaluate cost model_outputs = model.call(inputs_tiled, obs_lowd=obs_lowd_tiled) costs_per_timestep = self._cost_fn(inputs_tiled, model_outputs, goal_placeholders, actions_split) costs = tf.reduce_mean(costs_per_timestep.total, axis=1) # MPPI update scores = -costs probs = tf.exp(self._gamma * (scores - tf.reduce_max(scores))) probs /= tf.reduce_sum(probs) + 1e-10 new_mppi_mean = tf.reduce_sum(actions * probs[:, tf.newaxis, tf.newaxis], axis=0) best_idx = tf.argmin(costs) best_actions = self._split_action(new_mppi_mean) get_action_outputs = AttrDict( cost=costs[best_idx], cost_per_timestep=costs_per_timestep.leaf_apply( lambda v: v[best_idx]), action=best_actions.leaf_apply(lambda v: v[0]), action_sequence=best_actions, # model_outputs=model_outputs.leaf_filter(lambda k, v: tf.is_tensor(v)).leaf_apply(lambda v: v[best_idx]), all_costs=costs, all_costs_per_timestep=costs_per_timestep, all_actions=actions_split, all_model_outputs=model_outputs.leaf_filter( lambda k, v: tf.is_tensor(v)), mppi_mean=new_mppi_mean, ).freeze() return obs_placeholders, goal_placeholders, mppi_mean_placeholder, get_action_outputs
class StaticCEMPolicy(EagerCEMPolicy): @abstract.overrides def _init_setup(self): super()._init_setup() # static graph self._session = None self._obs_placeholders = None self._goal_placeholders = None self._get_action_outputs = None @abstract.overrides def warm_start(self, model, observation, goal): assert not tf.executing_eagerly() logger.debug('Setting up CEM graph....') self._session = tf.get_default_session() assert self._session is not None ### create placeholders self._obs_placeholders = AttrDict() for name in self._env_spec.observation_names: shape = list(self._env_spec.names_to_shapes[name]) dtype = tf.as_dtype(self._env_spec.names_to_dtypes[name]) ph = tf.placeholder(dtype, shape=shape, name=name) self._obs_placeholders[name] = ph self._goal_placeholders = AttrDict() for name, value in goal.leaf_items(): self._goal_placeholders[name] = tf.placeholder(tf.as_dtype( value.dtype), shape=value.shape, name=name) self._get_action_outputs = self._cem(model, self._obs_placeholders, self._goal_placeholders) logger.debug('CEM graph setup complete') def _get_action_feed_dict(self, observation, goal): feed_dict = {} for name, ph in self._obs_placeholders.leaf_items(): value = np.array(observation[name]) if value.shape == tuple(): value = value[np.newaxis] feed_dict[ph] = value for name, ph in self._goal_placeholders.leaf_items(): feed_dict[ph] = np.array(goal[name]) return feed_dict @abstract.overrides def get_action(self, model, observation, goal): assert self._session is not None feed_dict = self._get_action_feed_dict(observation, goal) get_action_tf = {} for name, tensor in self._get_action_outputs.leaf_items(): get_action_tf[name] = tensor get_action_tf_output = self._session.run(get_action_tf, feed_dict=feed_dict) get_action = AttrDict.from_dict(get_action_tf_output) get_action.cost_fn = self._cost_fn return get_action
def _get_goal(self): goal = AttrDict() for name in self._env_spec.goal_names: goal[name] = self._data_traverser.get(name, horizon=1) return goal
def _split_action(self, actions): split_actions = AttrDict() for name, tensor in zip(self._env_spec.action_names, tf.split(actions, self._env_spec.dims(self._env_spec.action_names), axis=-1)): split_actions[name] = tensor return split_actions