def __call__(self, environment: Dict, state: Dict):
        # sample a bunch of actions (batched?) and pick the best one
        max_unstuck_probability = -1
        best_action = None
        # anim = RvizAnimationController(np.arange(self.n_action_samples))
        # while not anim.done:
        for _ in range(self.n_action_samples):
            self.scenario.last_action = None
            action = self.scenario.sample_action(
                action_rng=self.action_rng,
                environment=environment,
                state=state,
                action_params=self.data_collection_params,
                validate=True)

            # TODO: use the unconstrained dynamics to predict the state resulting from (e, s, a)
            # then add that to the recovery_model_input

            recovery_model_input = environment
            recovery_model_input.update(
                add_batch(state))  # add time dimension to state and action
            recovery_model_input.update(add_batch(action))
            recovery_model_input = make_dict_tf_float32(
                add_batch(recovery_model_input))
            recovery_model_input.update({
                'batch_size': 1,
                'time': 2,
            })
            recovery_model_output = self.model(recovery_model_input,
                                               training=False)
            recovery_probability = recovery_model_output['probabilities']

            # self.scenario.plot_environment_rviz(environment)
            # self.scenario.plot_state_rviz(state, label='stuck state')
            # self.scenario.plot_recovery_probability(recovery_probability)
            # color_factor = log_scale_0_to_1(tf.squeeze(recovery_probability), k=100)
            # self.scenario.plot_action_rviz(state, action, label='proposed', color=cm.Greens(color_factor), idx=1)

            if recovery_probability > max_unstuck_probability:
                max_unstuck_probability = recovery_probability
                best_action = action
                # print(max_unstuck_probability)
                # self.scenario.plot_action_rviz(state, action, label='best_proposed', color='g', idx=2)
            # anim.step()
        best_action_noisy = self.scenario.add_noise(best_action,
                                                    self.noise_rng)
        return best_action_noisy
예제 #2
0
 def sample(self, environment: Dict, state: Dict):
     input_dict = environment
     input_dict.update({add_predicted(k): tf.expand_dims(v, axis=0) for k, v in state.items()})
     input_dict = add_batch(input_dict)
     input_dict = {k: tf.cast(v, tf.float32) for k, v in input_dict.items()}
     output = self.net.sample(input_dict)
     output = remove_batch(output)
     output = numpify(output)
     return output
 def check_constraint_tf(self,
                         environment: Dict,
                         states_sequence: List[Dict],
                         actions: List[Dict]):
     environment = add_batch(environment)
     states_sequence_dict = sequence_of_dicts_to_dict_of_tensors(states_sequence)
     states_sequence_dict = add_batch(states_sequence_dict)
     state_sequence_length = len(states_sequence)
     actions_dict = sequence_of_dicts_to_dict_of_tensors(actions)
     actions_dict = add_batch(actions_dict)
     mean_probabilities, stdev_probabilities = self.check_constraint_batched_tf(environment=environment,
                                                                                predictions=states_sequence_dict,
                                                                                actions=actions_dict,
                                                                                batch_size=1,
                                                                                state_sequence_length=state_sequence_length)
     mean_probabilities = remove_batch(mean_probabilities)
     stdev_probabilities = remove_batch(stdev_probabilities)
     return mean_probabilities, stdev_probabilities
예제 #4
0
    def filter_differentiable(self, environment: Dict, state: Optional[Dict], observation: Dict) -> Tuple[Dict, Dict]:
        net_inputs = {}
        net_inputs.update(environment)
        net_inputs.update(observation)
        if state is not None:
            net_inputs.update(state)
        net_inputs = add_batch(net_inputs)
        net_inputs = make_dict_tf_float32(net_inputs)

        mean_state, stdev_state = self.from_example(net_inputs, training=False)
        mean_state = remove_batch(mean_state)
        stdev_state = remove_batch(stdev_state)
        return mean_state, stdev_state
예제 #5
0
 def propagate_differentiable(self, environment: Dict, start_state: Dict, actions: List[Dict]) -> Tuple[
     List[Dict], List[Dict]]:
     # add time dimension of size 1
     net_inputs = {k: tf.expand_dims(start_state[k], axis=0) for k in self.state_keys}
     net_inputs.update(environment)
     net_inputs.update(sequence_of_dicts_to_dict_of_tensors(actions))
     net_inputs = add_batch(net_inputs)
     net_inputs = make_dict_tf_float32(net_inputs)
     # the network returns a dictionary where each value is [T, n_state]
     # which is what you'd want for training, but for planning and execution and everything else
     # it is easier to deal with a list of states where each state is a dictionary
     mean_predictions, stdev_predictions = self.from_example(net_inputs, training=False)
     mean_predictions = remove_batch(mean_predictions)
     stdev_predictions = remove_batch(stdev_predictions)
     mean_predictions = dict_of_sequences_to_sequence_of_dicts_tf(mean_predictions)
     stdev_predictions = dict_of_sequences_to_sequence_of_dicts_tf(stdev_predictions)
     return mean_predictions, stdev_predictions
예제 #6
0
    def propagate_differentiable(self, full_env: np.ndarray,
                                 full_env_origin: np.ndarray, res: float,
                                 start_state: Dict[str, np.ndarray],
                                 actions: tf.Variable) -> List[Dict]:
        """
        :param full_env:        (H, W)
        :param full_env_origin: (2)
        :param res:             scalar
        :param start_state:          each value in the dictionary should be of shape (batch, n_state)
        :param actions:        (T, 2)
        :return: states:       each value in the dictionary should be a of shape [batch, T+1, n_state)
        """
        test_x = {
            # shape: T, 2
            'action':
            tf.convert_to_tensor(actions, dtype=tf.float32),
            # shape: 1
            'res':
            tf.convert_to_tensor(res, dtype=tf.float32),
            # shape: H, W
            'full_env/env':
            tf.convert_to_tensor(full_env, dtype=tf.float32),
            # shape: 2
            'full_env/origin':
            tf.convert_to_tensor(full_env_origin, dtype=tf.float32),
            # scalar
            'full_env/res':
            tf.convert_to_tensor(res, dtype=tf.float32),
        }

        for state_key, v in start_state.items():
            # handles conversion from double -> float
            start_state = tf.convert_to_tensor(v, dtype=tf.float32)
            start_state_with_time_dim = tf.expand_dims(start_state, axis=0)
            test_x[state_key] = start_state_with_time_dim

        test_x = add_batch(test_x)
        predictions = self.net((test_x, False))
        predictions = remove_batch(predictions)
        predictions = dict_of_sequences_to_sequence_of_dicts_tf(predictions)

        return predictions
def test_as_inverse_model(filter_model, latent_dynamics_model, test_dataset,
                          test_tf_dataset):
    scenario = test_dataset.scenario
    shooting_method = ShootingMethod(fwd_model=latent_dynamics_model,
                                     classifier_model=None,
                                     scenario=scenario,
                                     params={'n_samples': 1000})
    trajopt = TrajectoryOptimizer(fwd_model=latent_dynamics_model,
                                  classifier_model=None,
                                  scenario=scenario,
                                  params={
                                      "iters": 100,
                                      "length_alpha": 0,
                                      "goal_alpha": 1000,
                                      "constraints_alpha": 0,
                                      "action_alpha": 0,
                                      "initial_learning_rate": 0.0001,
                                  })

    s_color_viz_pub = rospy.Publisher("s_state_color_viz",
                                      Image,
                                      queue_size=10,
                                      latch=True)
    s_next_color_viz_pub = rospy.Publisher("s_next_state_color_viz",
                                           Image,
                                           queue_size=10,
                                           latch=True)
    image_diff_viz_pub = rospy.Publisher("image_diff_viz",
                                         Image,
                                         queue_size=10,
                                         latch=True)

    action_horizon = 1
    initial_actions = []
    total_errors = []
    for example_idx, example in enumerate(test_tf_dataset):
        stepper = RvizAnimationController(
            n_time_steps=test_dataset.steps_per_traj)
        for t in range(test_dataset.steps_per_traj - 1):
            print(example_idx)
            environment = {}
            current_observation = remove_batch(
                scenario.index_observation_time_batched(add_batch(example), t))

            for j in range(action_horizon):
                left_gripper_position = [0, 0, 0]
                right_gripper_position = [0, 0, 0]
                initial_action = {
                    'left_gripper_position': left_gripper_position,
                    'right_gripper_position': right_gripper_position,
                }
                initial_actions.append(initial_action)
            goal_observation = {
                k: example[k][1]
                for k in filter_model.obs_keys
            }
            planning_query = PlanningQuery(start=current_observation,
                                           goal=goal_observation,
                                           environment=environment,
                                           seed=1)
            planning_result = shooting_method.plan(planning_query)
            actions = planning_result.actions
            planned_path = planning_result.latent_path
            true_action = numpify(
                {k: example[k][0]
                 for k in latent_dynamics_model.action_keys})

            for j in range(action_horizon):
                optimized_action = actions[j]
                # optimized_action = {
                #     'left_gripper_position': current_observation['left_gripper'],
                #     'right_gripper_position': current_observation['right_gripper'],
                # }
                true_action = numpify({
                    k: example[k][j]
                    for k in latent_dynamics_model.action_keys
                })

                # Visualize
                s = numpify(
                    remove_batch(
                        scenario.index_observation_time_batched(
                            add_batch(example), 0)))
                s.update(
                    numpify(
                        remove_batch(
                            scenario.index_observation_features_time_batched(
                                add_batch(example), 0))))
                s_next = numpify(
                    remove_batch(
                        scenario.index_observation_time_batched(
                            add_batch(example), 1)))
                s_next.update(
                    numpify(
                        remove_batch(
                            scenario.index_observation_features_time_batched(
                                add_batch(example), 1))))
                scenario.plot_state_rviz(s, label='t', color="#ff000055", id=1)
                scenario.plot_state_rviz(s_next,
                                         label='t+1',
                                         color="#aa222255",
                                         id=2)
                # scenario.plot_action_rviz(s, optimized_action, label='inferred', color='#00ff00', id=1)
                # scenario.plot_action_rviz(s, true_action, label='true', color='#ee770055', id=2)

                publish_color_image(s_color_viz_pub, s['rgbd'][:, :, :3])
                publish_color_image(s_next_color_viz_pub,
                                    s_next['rgbd'][:, :, :3])
                diff = s['rgbd'][:, :, :3] - s_next['rgbd'][:, :, :3]
                publish_color_image(image_diff_viz_pub, diff)

                # Metrics
                total_error = 0
                for v1, v2 in zip(optimized_action.values(),
                                  true_action.values()):
                    total_error += -np.dot(v1, v2)
                total_errors.append(total_error)

                stepper.step()

        if example_idx > 100:
            break
    print(np.min(total_errors))
    print(np.max(total_errors))
    print(np.mean(total_errors))
    plt.xlabel("total error (meter-ish)")
    plt.hist(total_errors, bins=np.linspace(0, 2, 20))
    plt.show()
def filter_no_reconverging(example):
    is_close = example['is_close']
    return tf.logical_not(remove_batch(is_reconverging(add_batch(is_close))))
def filter_only_reconverging(example):
    is_close = example['is_close']
    return remove_batch(is_reconverging(add_batch(is_close)))
 def _gen(e):
     example = next(goal_dataset_iterator)
     example_t = dataset.index_time_batched(example_batched=add_batch(example), t=1)
     goal = remove_batch(example_t)
     return goal
def index_time(e: Dict, time_indexed_keys: List[str], t: int):
    return remove_batch(index_time_batched(add_batch(e), time_indexed_keys, t))
 def add_time(self):
     self.dnames.insert(1, "time")
     self.data = add_batch(self.data)
     return self
 def remove_batch(self):
     self.dnames.remove("batch")
     self.data = add_batch(self.data)
     return self
 def add_batch(self):
     self.dnames.insert(0, "batch")
     self.data = add_batch(self.data)
     return self