def plan_internal(self, planning_query: PlanningQuery) -> PlanningResult:
        start_planning_time = perf_counter()

        action_rng = np.random.RandomState(planning_query.seed)
        random_actions = self.scenario.sample_action_sequences(environment=planning_query.environment,
                                                               state=planning_query.start,
                                                               action_params=self.action_params,
                                                               n_action_sequences=self.n_samples,
                                                               action_sequence_length=1,
                                                               validate=True,
                                                               action_rng=action_rng)
        random_actions = [sequence_of_dicts_to_dict_of_tensors(a) for a in random_actions]
        random_actions = sequence_of_dicts_to_dict_of_tensors(random_actions)

        environment_batched = {k: tf.stack([v] * self.n_samples, axis=0) for k, v in planning_query.environment.items()}
        start_state_batched = {k: tf.expand_dims(tf.stack([v] * self.n_samples, axis=0), axis=1) for k, v in
                               planning_query.start.items()}
        mean_predictions, _ = self.fwd_model.propagate_differentiable_batched(environment=environment_batched,
                                                                              state=start_state_batched,
                                                                              actions=random_actions)

        final_states = {k: v[:, -1] for k, v in mean_predictions.items()}
        goal_state_batched = {k: tf.stack([v] * self.n_samples, axis=0) for k, v in planning_query.goal.items()}
        costs = self.scenario.trajopt_distance_to_goal_differentiable(final_states, goal_state_batched)
        costs = tf.squeeze(costs)
        min_idx = tf.math.argmin(costs, axis=0)
        # print(costs)
        # print('min idx', min_idx.numpy())
        best_indices = tf.argsort(costs)

        cmap = cm.Blues
        n_to_show = 5
        min_cost = costs[best_indices[0]]
        max_cost = costs[best_indices[n_to_show]]
        for j, i in enumerate(best_indices[:n_to_show]):
            s = numpify({k: v[i] for k, v in start_state_batched.items()})
            a = numpify({k: v[i][0] for k, v in random_actions.items()})
            c = (costs[i] - min_cost) / (max_cost - min_cost)
            self.scenario.plot_action_rviz(s, a, label='samples', color=cmap(c), idx1=2 * j, idx2=2 * j + 1)

        best_actions = {k: v[min_idx] for k, v in random_actions.items()}
        best_prediction = {k: v[min_idx] for k, v in mean_predictions.items()}

        best_actions = numpify(dict_of_sequences_to_sequence_of_dicts(best_actions))
        best_predictions = numpify(dict_of_sequences_to_sequence_of_dicts(best_prediction))

        planning_time = perf_counter() - start_planning_time
        planner_status = MyPlannerStatus.Solved
        return PlanningResult(status=planner_status, path=best_predictions, actions=best_actions, time=planning_time, tree={})
    def call(self, example, training, mask=None):
        actions = {k: example[k] for k in self.action_keys}
        input_sequence_length = actions[self.action_keys[0]].shape[1]
        s_0 = {k: example[k][:, 0] for k in self.state_keys}

        pred_states = [s_0]
        for t in range(input_sequence_length):
            s_t = pred_states[-1]
            action_t = {k: example[k][:, t] for k in self.action_keys}
            local_action_t = self.scenario.put_action_local_frame(
                s_t, action_t)

            s_t_local = self.scenario.put_state_local_frame(s_t)
            states_and_actions = list(s_t_local.values()) + list(
                local_action_t.values())

            # concat into one big state-action vector
            z_t = self.concat(states_and_actions)
            for dense_layer in self.dense_layers:
                z_t = dense_layer(z_t)

            delta_s_t = vector_to_dict(self.state_description, z_t)
            s_t_plus_1 = self.scenario.integrate_dynamics(s_t, delta_s_t)

            pred_states.append(s_t_plus_1)

        pred_states_dict = sequence_of_dicts_to_dict_of_tensors(pred_states,
                                                                axis=1)
        return pred_states_dict
Ejemplo n.º 3
0
    def from_example(self, example, training: bool = False):
        """ This is the function that all other functions eventually call """
        if 'batch_size' not in example:
            example['batch_size'] = self.get_batch_size(example)

        outputs = [
            net(net.preprocess_no_gradient(example, training),
                training=training) for net in self.nets
        ]
        outputs_dict = sequence_of_dicts_to_dict_of_tensors(outputs)

        outputs_dict = {
            k: flatten_after(outputs_dict[k], axis=self.get_num_batch_axes())
            for k in self.get_output_keys()
        }

        # axis 0 is the different networks
        mean = {
            k: tf.math.reduce_mean(outputs_dict[k], axis=0)
            for k in self.get_output_keys()
        }
        stdev = {
            k: tf.math.reduce_std(outputs_dict[k], axis=0)
            for k in self.get_output_keys()
        }

        # each output variable has its own vector of variances,
        # and here we sum all the elements of all the vectors to get a single scalar
        all_stdevs = tf.concat(list(stdev.values()), axis=-1)
        mean['stdev'] = tf.reduce_sum(all_stdevs, axis=-1, keepdims=True)
        return mean, stdev
 def check_constraint_tf(self,
                         environment: Dict,
                         states_sequence: List[Dict],
                         actions: List[Dict]):
     environment = add_batch(environment)
     states_sequence_dict = sequence_of_dicts_to_dict_of_tensors(states_sequence)
     states_sequence_dict = add_batch(states_sequence_dict)
     state_sequence_length = len(states_sequence)
     actions_dict = sequence_of_dicts_to_dict_of_tensors(actions)
     actions_dict = add_batch(actions_dict)
     mean_probabilities, stdev_probabilities = self.check_constraint_batched_tf(environment=environment,
                                                                                predictions=states_sequence_dict,
                                                                                actions=actions_dict,
                                                                                batch_size=1,
                                                                                state_sequence_length=state_sequence_length)
     mean_probabilities = remove_batch(mean_probabilities)
     stdev_probabilities = remove_batch(stdev_probabilities)
     return mean_probabilities, stdev_probabilities
Ejemplo n.º 5
0
def batch_stateless_sample_action(scenario: ExperimentScenario,
                                  environment: Dict, state: Dict,
                                  batch_size: int, n_action_samples: int,
                                  n_actions: int, action_params: Dict,
                                  action_rng: np.random.RandomState):
    # TODO: make the lowest level sample_action operate on batched state dictionaries
    action_sequences = scenario.sample_action_sequences(
        environment=environment,
        state=state,
        action_params=action_params,
        n_action_sequences=n_action_samples,
        action_sequence_length=n_actions,
        validate=False,
        action_rng=action_rng)
    action_sequences = [
        sequence_of_dicts_to_dict_of_tensors(a) for a in action_sequences
    ]
    action_sequences = sequence_of_dicts_to_dict_of_tensors(action_sequences)
    return {
        k: tf.tile(v[tf.newaxis], [batch_size, 1, 1, 1])
        for k, v in action_sequences.items()
    }
Ejemplo n.º 6
0
 def propagate_differentiable(self, environment: Dict, start_state: Dict, actions: List[Dict]) -> Tuple[
     List[Dict], List[Dict]]:
     # add time dimension of size 1
     net_inputs = {k: tf.expand_dims(start_state[k], axis=0) for k in self.state_keys}
     net_inputs.update(environment)
     net_inputs.update(sequence_of_dicts_to_dict_of_tensors(actions))
     net_inputs = add_batch(net_inputs)
     net_inputs = make_dict_tf_float32(net_inputs)
     # the network returns a dictionary where each value is [T, n_state]
     # which is what you'd want for training, but for planning and execution and everything else
     # it is easier to deal with a list of states where each state is a dictionary
     mean_predictions, stdev_predictions = self.from_example(net_inputs, training=False)
     mean_predictions = remove_batch(mean_predictions)
     stdev_predictions = remove_batch(stdev_predictions)
     mean_predictions = dict_of_sequences_to_sequence_of_dicts_tf(mean_predictions)
     stdev_predictions = dict_of_sequences_to_sequence_of_dicts_tf(stdev_predictions)
     return mean_predictions, stdev_predictions
Ejemplo n.º 7
0
def predict(fwd_model: BaseDynamicsFunction,
            environment: Dict,
            start_states: List[Dict],
            actions: List[List[List[Dict]]],
            n_actions: int,
            n_actions_sampled: int,
            n_start_states: int) -> List[List[List[Dict]]]:
    # reformat the inputs to be efficiently batched
    actions_batched = batch_of_many_of_actions_sequences_to_dict(actions, n_actions_sampled, n_start_states, n_actions)

    start_states = sequence_of_dicts_to_dict_of_tensors(start_states)
    start_states = make_dict_tf_float32({k: tf.expand_dims(v, axis=1) for k, v in start_states.items()})
    # copy from start states for each random action
    start_states_tiled = {k: tf.concat([v] * n_actions_sampled, axis=0) for k, v in start_states.items()}

    # Actually do the predictions
    predictions_dict, _ = fwd_model.propagate_differentiable_batched(environment=environment,
                                                                     state=start_states_tiled,
                                                                     actions=actions_batched)

    # break out the num actions and num start states
    n_states = n_actions + 1
    predictions_dict = {k: tf.reshape(v, [n_start_states, n_actions_sampled, n_states, -1])
                        for k, v in predictions_dict.items()}

    # invert structure to List[List[List[Dict]]] and return
    predictions_list = []
    for i in range(n_start_states):
        predictions_i = []
        for j in range(n_actions_sampled):
            predictions_ij = []
            for t in range(n_actions + 1):
                prediction_ijt = {k: predictions_dict[k][i, j, t] for k, v in predictions_dict.items()}
                predictions_ij.append(prediction_ijt)
            predictions_i.append(predictions_ij)
        predictions_list.append(predictions_i)
    return predictions_list
    def call(self, example, training, mask=None):
        batch_size = example['batch_size']
        time = example[self.action_keys[0]].shape[1] + 1

        s_0 = {k: example[k][:, 0] for k in self.state_keys}

        # Construct a [b, h, w, c, 3] grid of the indices which make up the local environment
        pixel_row_indices = tf.range(0,
                                     self.local_env_h_rows,
                                     dtype=tf.float32)
        pixel_col_indices = tf.range(0,
                                     self.local_env_w_cols,
                                     dtype=tf.float32)
        pixel_channel_indices = tf.range(0,
                                         self.local_env_c_channels,
                                         dtype=tf.float32)
        x_indices, y_indices, z_indices = tf.meshgrid(pixel_col_indices,
                                                      pixel_row_indices,
                                                      pixel_channel_indices)

        # Make batched versions for creating the local environment
        batch_y_indices = tf.cast(
            tf.tile(tf.expand_dims(y_indices, axis=0), [batch_size, 1, 1, 1]),
            tf.int64)
        batch_x_indices = tf.cast(
            tf.tile(tf.expand_dims(x_indices, axis=0), [batch_size, 1, 1, 1]),
            tf.int64)
        batch_z_indices = tf.cast(
            tf.tile(tf.expand_dims(z_indices, axis=0), [batch_size, 1, 1, 1]),
            tf.int64)

        # Convert for rastering state
        pixel_indices = tf.stack([y_indices, x_indices, z_indices], axis=3)
        pixel_indices = tf.expand_dims(pixel_indices, axis=0)
        pixel_indices = tf.tile(pixel_indices, [batch_size, 1, 1, 1, 1])

        # # DEBUG
        # # plot the occupancy grid
        # time_steps = np.arange(time)
        # anim = RvizAnimationController(time_steps)
        # b = 0
        # full_env_dict = {
        #     'env': example['env'][b],
        #     'origin': example['origin'][b],
        #     'res': example['res'][b],
        #     'extent': example['extent'][b],
        # }
        # self.scenario.plot_environment_rviz(full_env_dict)
        # # END DEBUG

        pred_states = [s_0]
        for t in range(time - 1):
            s_t = pred_states[-1]
            s_t_local = self.scenario.put_state_local_frame(s_t)
            action_t = {k: example[k][:, t] for k in self.action_keys}
            local_action_t = self.scenario.put_action_local_frame(
                s_t, action_t)

            local_env_center_t = self.scenario.local_environment_center_differentiable(
                s_t)

            # by converting too and from the frame of the full environment, we ensure the grids are aligned
            indices = batch_point_to_idx_tf_3d_in_batched_envs(
                local_env_center_t, example)
            local_env_center_t = batch_idx_to_point_3d_in_env_tf(
                *indices, example)

            local_env_t, local_env_origin_t = get_local_env(
                center_point=local_env_center_t,
                full_env=example['env'],
                full_env_origin=example['origin'],
                res=example['res'],
                local_h_rows=self.local_env_h_rows,
                local_w_cols=self.local_env_w_cols,
                local_c_channels=self.local_env_c_channels,
                batch_x_indices=batch_x_indices,
                batch_y_indices=batch_y_indices,
                batch_z_indices=batch_z_indices,
                batch_size=batch_size)

            local_voxel_grid_t_array = tf.TensorArray(tf.float32,
                                                      size=0,
                                                      dynamic_size=True)
            local_voxel_grid_t_array = local_voxel_grid_t_array.write(
                0, local_env_t)
            for i, state_component_t in enumerate(s_t.values()):
                state_component_voxel_grid = raster_3d(
                    state=state_component_t,
                    pixel_indices=pixel_indices,
                    res=example['res'],
                    origin=local_env_origin_t,
                    h=self.local_env_h_rows,
                    w=self.local_env_w_cols,
                    c=self.local_env_c_channels,
                    k=self.rope_image_k,
                    batch_size=batch_size)

                local_voxel_grid_t_array = local_voxel_grid_t_array.write(
                    i + 1, state_component_voxel_grid)
            local_voxel_grid_t = tf.transpose(local_voxel_grid_t_array.stack(),
                                              [1, 2, 3, 4, 0])
            # add channel dimension information because tf.function erases it somehow...
            local_voxel_grid_t.set_shape(
                [None, None, None, None,
                 len(self.state_keys) + 1])

            # # DEBUG
            # local_env_dict = {
            #     'env': tf.clip_by_value(tf.reduce_sum(local_voxel_grid_t[b], axis=-1), 0, 1),
            #     # 'env': local_voxel_grid_t[b],
            #     'origin': local_env_origin_t[b].numpy(),
            #     'res': example['res'][b].numpy(),
            # }
            # msg = environment_to_occupancy_msg(local_env_dict, frame='local_occupancy')
            # link_bot_sdf_utils.send_occupancy_tf(self.scenario.broadcaster, local_env_dict, frame='local_occupancy')
            # self.debug_pub.publish(msg)
            # self.scenario.plot_state_rviz(numpify(index_dict_of_batched_vectors_tf(s_t, b)), label='pred')
            # local_extent = compute_extent_3d(*local_voxel_grid_t[b].shape[:3], resolution=example['res'][b].numpy())
            # depth, width, height = extent_to_env_size(local_extent)
            # bbox_msg = BoundingBox()
            # bbox_msg.header.frame_id = 'local_occupancy'
            # bbox_msg.pose.position.x = width / 2
            # bbox_msg.pose.position.y = depth / 2
            # bbox_msg.pose.position.z = height / 2
            # bbox_msg.dimensions.x = width
            # bbox_msg.dimensions.y = depth
            # bbox_msg.dimensions.z = height
            # self.local_env_bbox_pub.publish(bbox_msg)

            # anim.step()
            # # END DEBUG

            # CNN
            z_t = local_voxel_grid_t
            for conv_layer, pool_layer in zip(self.conv_layers,
                                              self.pool_layers):
                z_t = conv_layer(z_t)
                z_t = pool_layer(z_t)
            conv_z_t = self.flatten_conv_output(z_t)
            for dense_layer in self.conv_only_dense_layers:
                conv_z_t = dense_layer(conv_z_t)

            # concat into one big state-action vector
            states_and_actions = list(s_t_local.values()) + list(
                local_action_t.values())
            state_action_t = self.concat2(states_and_actions)
            for dense_layer in self.state_action_dense_layers:
                state_action_t = dense_layer(state_action_t)

            # combine state/action vector with output of CNN
            full_z_t = self.combine_state_action_and_conv_output(
                [state_action_t, conv_z_t])

            # dense layers for combined vector
            for dense_layer in self.final_dense_layers:
                full_z_t = dense_layer(full_z_t)

            delta_s_t = self.vector_to_state_dict(full_z_t)
            s_t_plus_1 = self.scenario.integrate_dynamics(s_t, delta_s_t)

            pred_states.append(s_t_plus_1)

        pred_states_dict = sequence_of_dicts_to_dict_of_tensors(pred_states,
                                                                axis=1)
        return pred_states_dict
Ejemplo n.º 9
0
def predict_and_classify(fwd_model: Ensemble,
                         classifier: NNClassifierWrapper,
                         environment: Dict,
                         start_states: List[Dict],
                         actions: List[List[List[Dict]]],
                         n_actions: int,
                         n_actions_sampled: int,
                         n_start_states: int):
    state_sequence_length = n_actions + 1

    # reformat the inputs to be efficiently batched
    actions_dict = {}
    for actions_for_start_state in actions:
        for actions in actions_for_start_state:
            for action in actions:
                for k, v in action.items():
                    if k not in actions_dict:
                        actions_dict[k] = []
                    actions_dict[k].append(v)

    environment_batched = {k: tf.stack([v] * n_actions_sampled * n_start_states, axis=0)
                           for k, v in environment.items()}
    actions_batched = {k: tf.reshape(v, [n_actions_sampled * n_start_states, n_actions, -1])
                       for k, v in actions_dict.items()}
    start_states = sequence_of_dicts_to_dict_of_tensors(start_states)
    start_states = make_dict_tf_float32({k: tf.expand_dims(v, axis=1) for k, v in start_states.items()})
    # copy from start states for each random action
    start_states_tiled = {k: tf.concat([v] * n_actions_sampled, axis=0) for k, v in start_states.items()}

    # Actually do the predictions
    predictions_dict = fwd_model.propagate_differentiable_batched(start_states=start_states_tiled,
                                                                  actions=actions_batched)

    # Run classifier
    accept_probabilities = classifier.check_constraint_batched_tf(environment=environment_batched,
                                                                  predictions=predictions_dict,
                                                                  actions=actions_batched,
                                                                  state_sequence_length=state_sequence_length,
                                                                  batch_size=n_start_states * n_actions_sampled)
    accept_probabilities = tf.reshape(
        accept_probabilities, [n_start_states, n_actions_sampled, state_sequence_length - 1, -1])

    # break out the num actions and num start states
    state_sequence_length = n_actions + 1
    predictions_dict = {k: tf.reshape(v, [n_start_states, n_actions_sampled, state_sequence_length, -1]) for k, v in
                        predictions_dict.items()}

    # invert structure to List[List[List[Dict]]] and return
    predictions_list = []
    accept_probabilities_list = []
    for i in range(n_start_states):
        predictions_i = []
        accept_probabilities_i = []
        for j in range(n_actions_sampled):
            predictions_ij = []
            accept_probabilities_ij = []
            for t in range(n_actions + 1):
                prediction_ijt = {k: predictions_dict[k][i, j, t] for k, v in predictions_dict.items()}
                predictions_ij.append(prediction_ijt)
                if t < n_actions:
                    accept_probabilities_ij.append(accept_probabilities[i, j, t])
            predictions_i.append(predictions_ij)
            accept_probabilities_i.append(accept_probabilities_ij)
        predictions_list.append(predictions_i)
        accept_probabilities_list.append(accept_probabilities_i)

    return predictions_list, accept_probabilities_list
def test_classifier(classifier_model_dir: pathlib.Path,
                    fwd_model_dir: List[pathlib.Path],
                    n_actions: int,
                    saved_state: Optional[pathlib.Path],
                    generate_actions: Callable):
    fwd_model, _ = dynamics_utils.load_generic_model([pathlib.Path(p) for p in fwd_model_dir])
    classifier: NNClassifierWrapper = classifier_utils.load_generic_model([classifier_model_dir])

    service_provider = GazeboServices()
    service_provider.setup_env(verbose=0,
                               real_time_rate=0,
                               max_step_size=fwd_model.data_collection_params['max_step_size'],
                               play=False)
    if saved_state:
        service_provider.restore_from_bag(saved_state)

    scenario = fwd_model.scenario
    scenario.on_before_get_state_or_execute_action()

    # NOTE: perhaps it would make sense to have a "fwd_model" have API for get_env, get_state, sample_action, etc
    #  because the fwd_model knows it's scenario, and importantly it also knows it's data_collection_params
    #  which is what we're using here to pass to the scenario methods
    params = fwd_model.data_collection_params
    environment = numpify(scenario.get_environment(params))
    start_state = numpify(scenario.get_state())

    start_state_tiled = repeat(start_state, n_actions, axis=0, new_axis=True)
    start_states_tiled = add_time_dim(start_state_tiled)

    actions = generate_actions(environment, start_state_tiled, scenario, params, n_actions)

    environment_tiled = repeat(environment, n_actions, axis=0, new_axis=True)
    actions_dict = sequence_of_dicts_to_dict_of_tensors(actions)
    actions_dict = add_time_dim(actions_dict)
    predictions, _ = fwd_model.propagate_differentiable_batched(environment=environment_tiled,
                                                                state=start_states_tiled,
                                                                actions=actions_dict)

    # Run classifier
    state_sequence_length = 2
    accept_probabilities, _ = classifier.check_constraint_batched_tf(environment=environment_tiled,
                                                                     predictions=predictions,
                                                                     actions=actions_dict,
                                                                     state_sequence_length=state_sequence_length,
                                                                     batch_size=n_actions)
    # animate over the sampled actions
    anim = RvizAnimation(scenario=scenario,
                         n_time_steps=n_actions,
                         init_funcs=[init_viz_env],
                         t_funcs=[
                             lambda s, e, t: init_viz_env(s, e),
                             viz_transition_for_model_t({}, fwd_model),
                             ExperimentScenario.plot_accept_probability_t,
                         ],
                         )
    example = {
        'accept_probability': tf.squeeze(accept_probabilities, axis=1),
    }
    example.update(environment)
    example.update(predictions)
    example.update(actions_dict)
    anim.play(example)
 def check_constraint_from_example(self, example: Dict, training: Optional[bool] = False):
     predictions = [net(net.preprocess_no_gradient(example, training), training=training) for net in self.nets]
     predictions_dict = sequence_of_dicts_to_dict_of_tensors(predictions)
     mean_predictions = {k: tf.math.reduce_mean(v, axis=0) for k, v in predictions_dict.items()}
     stdev_predictions = {k: tf.math.reduce_std(v, axis=0) for k, v in predictions_dict.items()}
     return mean_predictions, stdev_predictions