Beispiel #1
0
def test_isnan_check():
    """
    Test _isnan_check function
    Returns
    -------

    """

    blocksize = 100

    input_vector = np.repeat(np.array([np.NaN]), blocksize)
    correct_vector = np.repeat(np.array([np.True_]), blocksize)
    test_vector = _isnan_check(input_vector)

    assert_allclose(test_vector, correct_vector)
    def step(self, action):
        """
        This method integrates the simulation number of steps given in num_steps_per_update, using the actions
        selected by the controller and returns state information, reward, and done boolean.

        Parameters
        ----------
        action :  numpy.ndarray
            1D (n_torque_directions * number_of_control_points,) array containing data with 'float' type.
            Action returns control points selected by control algorithm to the Elastica simulation. n_torque_directions
            is number of torque directions, this is controlled by the dim.

        Returns
        -------
        state : numpy.ndarray
            1D (number_of_states) array containing data with 'float' type.
            Size of the states depends on the problem.
        reward : float
            Reward after the integration.
        done: boolean
            Stops, simulation or training if done is true. This means, simulation reached final time or NaN is
            detected in the simulation.

        """

        # action contains the control points for actuation torques in different directions in range [-1, 1]
        if self.dim == 2.0:
            self.spline_points_func_array_normal_dir[:] = action[:self.
                                                                 number_of_control_points]
            self.spline_points_func_array_binormal_dir[:] = (
                action[:self.number_of_control_points] * 0.0)
            self.spline_points_func_array_tangent_dir[:] = (
                action[:self.number_of_control_points] * 0.0)
        elif self.dim == 2.5:
            self.spline_points_func_array_normal_dir[:] = action[:self.
                                                                 number_of_control_points]
            self.spline_points_func_array_binormal_dir[:] = (
                action[:self.number_of_control_points] * 0.0)
            self.spline_points_func_array_tangent_dir[:] = action[
                self.number_of_control_points:]
        # apply binormal activations if solving 3D case
        elif self.dim == 3.0:
            self.spline_points_func_array_normal_dir[:] = action[:self.
                                                                 number_of_control_points]
            self.spline_points_func_array_binormal_dir[:] = action[
                self.number_of_control_points:]
            self.spline_points_func_array_tangent_dir[:] = (
                action[:self.number_of_control_points] * 0.0)
        elif self.dim == 3.5:
            self.spline_points_func_array_normal_dir[:] = action[:self.
                                                                 number_of_control_points]
            self.spline_points_func_array_binormal_dir[:] = action[
                self.number_of_control_points:2 *
                self.number_of_control_points]
            self.spline_points_func_array_tangent_dir[:] = action[
                2 * self.number_of_control_points:]

        # Store control points for this learning step to reproduce the results later on.
        if self.COLLECT_CONTROL_POINTS_DATA == True:
            self.control_point_history_array[self.current_step, :] = action[:]

        # Do multiple time step of simulation for <one learning step>
        for _ in range(self.num_steps_per_update):
            self.time_tracker = self.do_step(
                self.StatefulStepper,
                self.stages_and_updates,
                self.simulator,
                self.time_tracker,
                self.time_step,
            )

        if self.mode == 3:
            ##### (+1, 0, 0) -> (0, -1, 0) -> (-1, 0, 0) -> (0, +1, 0) -> (+1, 0, 0) #####
            if (self.current_step %
                (1.0 / (self.h_time_step * self.num_steps_per_update)) == 0):
                if self.dir_indicator == 1:
                    self.sphere.velocity_collection[..., 0] = [
                        0.0,
                        -self.sphere_initial_velocity,
                        0.0,
                    ]
                    self.dir_indicator = 2
                elif self.dir_indicator == 2:
                    self.sphere.velocity_collection[..., 0] = [
                        -self.sphere_initial_velocity,
                        0.0,
                        0.0,
                    ]
                    self.dir_indicator = 3
                elif self.dir_indicator == 3:
                    self.sphere.velocity_collection[..., 0] = [
                        0.0,
                        +self.sphere_initial_velocity,
                        0.0,
                    ]
                    self.dir_indicator = 4
                elif self.dir_indicator == 4:
                    self.sphere.velocity_collection[..., 0] = [
                        +self.sphere_initial_velocity,
                        0.0,
                        0.0,
                    ]
                    self.dir_indicator = 1
                else:
                    print("ERROR")

        self.current_step += 1

        # observe current state: current as sensed signal
        state = self.get_state()

        dist = np.linalg.norm(self.shearable_rod.position_collection[..., -1] -
                              self.sphere.position_collection[..., 0])
        """ Reward Engineering """
        reward_dist = -np.square(dist).sum()
        reward = 1.0 * reward_dist
        """ Reward Engineering """
        """ Done is a boolean to reset the environment before episode is completed """
        done = False

        if np.isclose(dist, 0.0, atol=0.05 * 2.0).all():
            self.on_goal += self.time_step
            reward += 0.5
        # for this specific case, check on_goal parameter
        if np.isclose(dist, 0.0, atol=0.05).all():
            self.on_goal += self.time_step
            reward += 1.5
        else:
            self.on_goal = 0

        if self.current_step >= self.total_learning_steps:
            done = True
            if reward > 0:
                print(
                    " Reward greater than 0! Reward: %0.3f, Distance: %0.3f " %
                    (reward, dist))
            else:
                print(" Finished simulation. Reward: %0.3f, Distance: %0.3f " %
                      (reward, dist))
        """ Done is a boolean to reset the environment before episode is completed """

        # set previous_action = action
        self.previous_action = action

        invalid_values_condition_state = _isnan_check(state)
        if invalid_values_condition_state == True:
            print(" Nan detected in the state data, exiting simulation now")
            reward = -100
            state[np.argwhere(
                np.isnan(state))] = self.state_buffer[np.argwhere(
                    np.isnan(state))]
            done = True

        self.state_buffer = (
            state  # hold onto state data in case simulation blows up next step
        )

        return state, reward, done, {"ctime": self.time_tracker}
Beispiel #3
0
    def step(self, action):
        """
        This method integrates the simulation number of steps given in num_steps_per_update, using the actions
        selected by the controller and returns state information, reward, and done boolean.

        Parameters
        ----------
        action :  numpy.ndarray
            1D (n_torque_directions * number_of_control_points,) array containing data with 'float' type.
            Action returns control points selected by control algorithm to the Elastica simulation. n_torque_directions
            is number of torque directions, this is controlled by the dim.

        Returns
        -------
        state : numpy.ndarray
            1D (number_of_states) array containing data with 'float' type.
            Size of the states depends on the problem.
        reward : float
            Reward after the integration.
        done: boolean
            Stops, simulation or training if done is true. This means, simulation reached final time or NaN is
            detected in the simulation.

        """

        # action contains the control points for actuation torques in different directions in range [-1, 1]
        self.action = action

        # set binormal activations to 0 if solving 2D case
        if self.dim == 2.0:
            self.spline_points_func_array_normal_dir[:] = action[:self.
                                                                 number_of_control_points]
            self.spline_points_func_array_binormal_dir[:] = (
                action[:self.number_of_control_points] * 0.0)
            self.spline_points_func_array_twist_dir[:] = (
                action[:self.number_of_control_points] * 0.0)
        elif self.dim == 2.5:
            self.spline_points_func_array_normal_dir[:] = action[:self.
                                                                 number_of_control_points]
            self.spline_points_func_array_binormal_dir[:] = (
                action[:self.number_of_control_points] * 0.0)
            self.spline_points_func_array_twist_dir[:] = action[
                self.number_of_control_points:]
        # apply binormal activations if solving 3D case
        elif self.dim == 3.0:
            self.spline_points_func_array_normal_dir[:] = action[:self.
                                                                 number_of_control_points]
            self.spline_points_func_array_binormal_dir[:] = action[
                self.number_of_control_points:]
            self.spline_points_func_array_twist_dir[:] = (
                action[:self.number_of_control_points] * 0.0)
        elif self.dim == 3.5:
            self.spline_points_func_array_normal_dir[:] = action[:self.
                                                                 number_of_control_points]
            self.spline_points_func_array_binormal_dir[:] = action[
                self.number_of_control_points:2 *
                self.number_of_control_points]
            self.spline_points_func_array_twist_dir[:] = action[
                2 * self.number_of_control_points:]

        # Do multiple time step of simulation for <one learning step>
        for _ in range(self.num_steps_per_update):
            self.time_tracker = self.do_step(
                self.StatefulStepper,
                self.stages_and_updates,
                self.simulator,
                self.time_tracker,
                self.time_step,
            )

        if self.mode == 3:
            ##### (+1, 0, 0) -> (0, -1, 0) -> (-1, 0, 0) -> (0, +1, 0) -> (+1, 0, 0) #####
            if (self.current_step %
                (1.0 / (self.h_time_step * self.num_steps_per_update)) == 0):
                if self.dir_indicator == 1:
                    self.sphere.velocity_collection[..., 0] = [
                        0.0,
                        -self.sphere_initial_velocity,
                        0.0,
                    ]
                    self.dir_indicator = 2
                elif self.dir_indicator == 2:
                    self.sphere.velocity_collection[..., 0] = [
                        -self.sphere_initial_velocity,
                        0.0,
                        0.0,
                    ]
                    self.dir_indicator = 3
                elif self.dir_indicator == 3:
                    self.sphere.velocity_collection[..., 0] = [
                        0.0,
                        +self.sphere_initial_velocity,
                        0.0,
                    ]
                    self.dir_indicator = 4
                elif self.dir_indicator == 4:
                    self.sphere.velocity_collection[..., 0] = [
                        +self.sphere_initial_velocity,
                        0.0,
                        0.0,
                    ]
                    self.dir_indicator = 1
                else:
                    print("ERROR")

        if self.mode == 4:
            self.trajectory_iteration += 1
            if self.trajectory_iteration == 500:
                # print('changing direction')
                self.rand_direction_1 = np.pi * np.random.uniform(0, 2)
                if self.dim == 2.0 or self.dim == 2.5:
                    self.rand_direction_2 = np.pi / 2.0
                elif self.dim == 3.0 or self.dim == 3.5:
                    self.rand_direction_2 = np.pi * np.random.uniform(0, 2)

                self.v_x = (self.target_v * np.cos(self.rand_direction_1) *
                            np.sin(self.rand_direction_2))
                self.v_y = (self.target_v * np.sin(self.rand_direction_1) *
                            np.sin(self.rand_direction_2))
                self.v_z = self.target_v * np.cos(self.rand_direction_2)

                self.sphere.velocity_collection[..., 0] = [
                    self.v_x,
                    self.v_y,
                    self.v_z,
                ]
                self.trajectory_iteration = 0

        self.current_step += 1

        # observe current state: current as sensed signal
        state = self.get_state()

        dist = np.linalg.norm(self.shearable_rod.position_collection[..., -1] -
                              self.sphere.position_collection[..., 0])
        """ Reward Engineering """
        reward_dist = -np.square(dist).sum()

        ## distance between orientations from https://math.stackexchange.com/questions/90081/quaternion-distance
        orientation_dist = (
            1.0 -
            np.dot(self.rod_tip_orientation, self.target_tip_orientation)**2)
        orientation_penalty = -((orientation_dist)**2)

        reward = 1.0 * reward_dist + 0.5 * orientation_penalty
        """ Done is a boolean to reset the environment before episode is completed """
        done = False

        # Position of the rod cannot be NaN, it is not valid, stop the simulation
        invalid_values_condition = _isnan_check(
            self.shearable_rod.position_collection)

        if invalid_values_condition == True:
            print(" Nan detected in the position, exiting simulation now")
            self.shearable_rod.position_collection = np.zeros(
                self.shearable_rod.position_collection.shape)
            reward = -10000
            state = self.get_state()
            done = True

        if np.isclose(dist, 0.0, atol=0.05 * 2.0).all():
            reward += 0.5
            reward += 0.5 * (1 - orientation_dist)
            if np.isclose(orientation_dist, 0.0, atol=0.05 * 2.0).all():
                reward += 0.5

        # for this specific case, check on_goal parameter
        if np.isclose(dist, 0.0, atol=0.05).all():
            reward += 1.5
            reward += 1.5 * (1 - orientation_dist)
            if np.isclose(orientation_dist, 0.0, atol=0.05).all():
                reward += 1.5

        if self.current_step >= self.total_learning_steps:
            done = True
            if reward > 0:
                print(
                    " Reward greater than 0! Reward: %0.3f, Distance: %0.3f, Orientation: %0.3f -- %0.3f, %0.3f "
                    % (reward, dist, orientation_dist, reward_dist,
                       orientation_penalty))
            else:
                print(
                    " Finished simulation. Reward: %0.3f, Distance: %0.3f, Orientation: %0.3f -- %0.3f, %0.3f"
                    % (reward, dist, orientation_dist, reward_dist,
                       orientation_penalty))
        """ Done is a boolean to reset the environment before episode is completed """

        self.previous_action = action

        invalid_values_condition_state = _isnan_check(state)
        if invalid_values_condition_state == True:
            print(
                " Nan detected in the state other than position data, exiting simulation now"
            )
            reward = -10000
            state = np.zeros(state.shape)
            done = True

        return state, reward, done, {"ctime": self.time_tracker}