def reset(self):
        """
        Reset The Robot to its initial Position and restart the Controllers

        1) Change Gravity to 0 ->That arm doesnt fall
        2) Turn Controllers off
        3) Pause Simulation
        4) Delete previous target object if randomly chosen object is set to True
        4) Reset Simulation
        5) Set Model Pose to desired one
        6) Unpause Simulation
        7) Turn on Controllers
        8) Restore Gravity
        9) Get Observations and return current State
        10) Check all Systems work
        11) Spawn new target
        12) Pause Simulation
        13) Write initial Position into Yaml File
        14) Create YAML Files for contact forces in order to get the average over 2 contacts
        15) Create YAML Files for collision to make shure to see a collision due to high noise in topic
        16) Unpause Simulation cause in next Step Sysrem must be running otherwise no data is seen by Subscribers
        17) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number
        18) Return State
        """

        self.gazebo.change_gravity(0, 0, 0)
        self.controllers_object.turn_off_controllers()
        self.gazebo.pauseSim()
        self.gazebo.resetSim()
        # randomly set the wrist_3 to -90 until 90 degree
        init_position = [1.5, -1.2, 1.4, -1.87, -1.57, 1.57]
        self.pickbot_joint_publisher_object.set_joints(init_position)
        self.set_target_object(random_object=self._random_object, random_position=self._random_position)
        self.gazebo.unpauseSim()
        self.controllers_object.turn_on_controllers()
        self.gazebo.change_gravity(0, 0, -9.81)
        self._check_all_systems_ready()
        # self.randomly_spawn_object()

        last_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0]
        with open('last_position.yml', 'w') as yaml_file:
            yaml.dump(last_position, yaml_file, default_flow_style=False)
        with open('contact_1_force.yml', 'w') as yaml_file:
            yaml.dump(0.0, yaml_file, default_flow_style=False)
        with open('contact_2_force.yml', 'w') as yaml_file:
            yaml.dump(0.0, yaml_file, default_flow_style=False)
        with open('collision.yml', 'w') as yaml_file:
            yaml.dump(False, yaml_file, default_flow_style=False)
        observation = self.get_obs()
        # get maximum distance to the object to calculate reward
        self.max_distance, _ = U.get_distance_gripper_to_object(self.object_height)
        self.min_distance = self.max_distance
        self.gazebo.pauseSim()
        state = U.get_state(observation)
        self._update_episode()
        self.gazebo.unpauseSim()
        return state
Ejemplo n.º 2
0
    def step(self, action):
        """
        Given the action selected by the learning algorithm,
        we perform the corresponding movement of the robot
        return: the state of the robot, the corresponding reward for the step and if its done(terminal State)

        1) read last published joint from YAML
        2) define ne joints acording to chosen action
        3) Write joint position into YAML to save last published joints for next step
        4) Unpause, Move to that pos for defined time, Pause
        5) Get Observations and pause Simulation
        6) Convert Observations into State
        7) Unpause Simulation check if its done, calculate done_reward and pause Simulation again
        8) Calculate reward based on Observatin and done_reward
        9) Unpause that topics can be received in next step
        10) Return State, Reward, Done
        """

        self.old_obs = self.get_obs()

        print(
            "===================================================================="
        )
        # print("action: {}".format(action))

        # 1) read last_position out of YAML File
        last_position = self.old_obs[:6]
        # with open("last_position.yml", 'r') as stream:
        #     try:
        #         last_position = (yaml.load(stream, Loader=yaml.Loader))
        #     except yaml.YAMLError as exc:
        #         print(exc)
        # 2) get the new joint positions according to chosen action
        if self._joint_increment_value is None:
            next_action_position = action
        else:
            next_action_position = self.get_action_to_position(
                np.clip(action, -self._joint_increment_value,
                        self._joint_increment_value), last_position)
        print("next action position: {}".format(
            np.around(next_action_position, decimals=3)))

        # 3) write last_position into YAML File
        # with open('last_position.yml', 'w') as yaml_file:
        #     yaml.dump(next_action_position, yaml_file, default_flow_style=False)

        # 4) unpause, move to position for certain time
        self.gazebo.unpauseSim()
        self.pickbot_joint_publisher_object.move_joints(next_action_position)
        # time.sleep(self.running_step)

        # Busy waiting until all the joints reach the next_action_position (first the third joints are reversed)
        start_ros_time = rospy.Time.now()
        while True:
            # Check collision:
            invalid_collision = self.get_collisions()
            if invalid_collision:
                print(">>>>>>>>>> Collision: RESET <<<<<<<<<<<<<<<")
                observation = self.get_obs()
                print("joints after reset collision : {} ".format(
                    observation[:6]))

                # calculate reward immediately
                distance_error = observation[6:9] - observation[13:16]
                orientation_error = quaternion_multiply(
                    observation[9:13], quaternion_conjugate(observation[16:]))

                rewardDist = UMath.rmseFunc(distance_error)
                rewardOrientation = 2 * np.arccos(abs(orientation_error[0]))

                reward = UMath.computeReward(rewardDist, rewardOrientation,
                                             invalid_collision)
                print("Reward this step after colliding {}".format(reward))
                self.accumulated_episode_reward += reward
                return U.get_state(observation), reward, True, {}

            elapsed_time = rospy.Time.now() - start_ros_time
            if np.isclose(next_action_position,
                          self.joints_state.position,
                          rtol=0.0,
                          atol=0.01).all():
                break
            elif elapsed_time > rospy.Duration(2):  # time out
                print("TIME OUT, joints haven't reach positions")
                break

        # 5) Get Observations and pause Simulation
        observation = self.get_obs()
        print("Observation in the step: {}".format(
            np.around(observation[:6], decimals=3)))
        print("Joints      in the step: {}".format(
            np.around(self.joints_state.position, decimals=3)))
        # if observation[0] < self.min_distance:
        #     self.min_distance = observation[0]
        self.gazebo.pauseSim()

        # 6) Convert Observations into state
        state = U.get_state(observation)

        # U.get_obj_orient()

        # 7) Unpause Simulation check if its done, calculate done_reward
        self.gazebo.unpauseSim()
        done, done_reward, invalid_collision = self.is_done(
            observation, last_position)
        self.gazebo.pauseSim()

        # 8) Calculate reward based on Observation and done_reward and update the accumulated Episode Reward
        # reward = self.compute_reward(observation, done_reward, invalid_contact)
        # reward = UMath.compute_reward_orient(observation, done_reward, invalid_contact)

        distance_error = observation[6:9] - observation[13:16]
        orientation_error = quaternion_multiply(
            observation[9:13], quaternion_conjugate(observation[16:]))

        rewardDist = UMath.rmseFunc(distance_error)
        rewardOrientation = 2 * np.arccos(abs(orientation_error[0]))

        reward = UMath.computeReward(rewardDist, rewardOrientation,
                                     invalid_collision) + done_reward
        print("Reward this step {}".format(reward))

        self.accumulated_episode_reward += reward

        # 9) Unpause that topics can be received in next step
        self.gazebo.unpauseSim()

        self.episode_steps += 1
        # 10) Return State, Reward, Done
        return state, reward, done, {}
Ejemplo n.º 3
0
    def reset(self):
        """
        Reset The Robot to its initial Position and restart the Controllers

        1) Change Gravity to 0 -> That arm doesnt fall
        2) Turn Controllers off
        3) Pause Simulation
        4) Delete previous target object if randomly chosen object is set to True
        4) Reset Simulation
        5) Set Model Pose to desired one
        6) Unpause Simulation
        7) Turn on Controllers
        8) Restore Gravity
        9) Get Observations and return current State
        10) Check all Systems work
        11) Spawn new target
        12) Pause Simulation
        13) Write initial Position into Yaml File
        14) Create YAML Files for contact forces in order to get the average over 2 contacts
        15) Create YAML Files for collision to make sure to see a collision due to high noise in topic
        16) Unpause Simulation cause in next Step System must be running otherwise no data is seen by Subscribers
        17) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number
        18) Return State
        """

        # print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Reset %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
        self.gazebo.change_gravity(0, 0, 0)
        self.controllers_object.turn_off_controllers()
        # turn off the gripper
        # U.turn_off_gripper()
        self.gazebo.resetSim()
        self.gazebo.pauseSim()
        self.gazebo.resetSim()
        time.sleep(0.1)

        # turn on the gripper
        # U.turn_on_gripper()

        if self._load_init_pos:
            # load sample from previous training result
            sample_ep = random.choice(self.init_samples)
            print("Joints from samples: {}".format(sample_ep[0:6]))
            # self.pickbot_joint_publisher_object.set_joints(sample_ep[0:6])
            self.set_target_object(sample_ep[-6:])
        else:
            self.pickbot_joint_publisher_object.set_joints()
            vg_geo = U.get_link_state("vacuum_gripper_link")
            to_geo = U.get_link_state("target")
            orientation_error = quaternion_multiply(
                vg_geo[3:], quaternion_conjugate(to_geo[3:]))
            # print("Orientation error {}".format(orientation_error))
            box_pos = U.get_random_door_handle_pos(
            ) if self._random_position else self.object_initial_position
            U.change_object_position(self.object_name, box_pos)
        # Code above is hard-coded for door handle, modify later.
        # TO-DO: Modify reset wrt the object type as in the reach env

        self.gazebo.unpauseSim()
        self.controllers_object.turn_on_controllers()
        self.gazebo.change_gravity(0, 0, -9.81)
        self._check_all_systems_ready()

        # last_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0]
        # last_position = [0, 0, 0, 0, 0, 0]
        # with open('last_position.yml', 'w') as yaml_file:
        #     yaml.dump(last_position, yaml_file, default_flow_style=False)
        # with open('contact_1_force.yml', 'w') as yaml_file:
        #     yaml.dump(0.0, yaml_file, default_flow_style=False)
        # with open('contact_2_force.yml', 'w') as yaml_file:
        #     yaml.dump(0.0, yaml_file, default_flow_style=False)
        with open('collision.yml', 'w') as yaml_file:
            yaml.dump(False, yaml_file, default_flow_style=False)
        observation = self.get_obs()
        # print("current joints {}".format(observation[:6]))
        # get maximum distance to the object to calculate reward
        # self.max_distance, _ = U.get_distance_gripper_to_object()
        # self.min_distance = self.max_distance
        self.gazebo.pauseSim()
        state = U.get_state(observation)
        self._update_episode()
        self.gazebo.unpauseSim()
        return state
Ejemplo n.º 4
0
    def step(self, action):
        """
        Given the action selected by the learning algorithm,
        we perform the corresponding movement of the robot
        return: the state of the robot, the corresponding reward for the step and if its done(terminal State)
        
        1) read last published joint from YAML
        2) define ne joints acording to chosen action
        3) Write joint position into YAML to save last published joints for next step
        4) Unpause, Move to that pos for defined time, Pause
        5) Get Observations and pause Simulation
        6) Convert Observations into State
        7) Unpause Simulation check if its done, calculate done_reward and pause Simulation again
        8) Calculate reward based on Observatin and done_reward 
        9) Unpause that topics can be received in next step
        10) Return State, Reward, Done
        """

        # print("action: {}".format(action))

        # 1) read last_position out of YAML File
        with open("last_position.yml", 'r') as stream:
            try:
                last_position = (yaml.load(stream, Loader=yaml.Loader))
            except yaml.YAMLError as exc:
                print(exc)

        old_observation = self.get_obs()
        last_position = old_observation[1:7]
        # 2) get the new joint positions according to chosen action
        if self._joint_increment is None:
            next_action_position = action
        else:
            next_action_position = self.get_action_to_position(
                np.clip(action, -self._joint_increment, self._joint_increment),
                last_position)
        # 3) write last_position into YAML File
        with open('last_position.yml', 'w') as yaml_file:
            yaml.dump(next_action_position,
                      yaml_file,
                      default_flow_style=False)

        # 4) unpause, move to position for certain time
        self.gazebo.unpauseSim()
        self.pickbot_joint_pubisher_object.move_joints(next_action_position)

        # Busy waiting until all the joints reach the next_action_position (first the third joints are reversed)
        start_ros_time = rospy.Time.now()
        while True:
            # Check collision:
            invalid_collision = self.get_collisions()
            if invalid_collision:
                print(">>>>>>>>>> Collision: RESET <<<<<<<<<<<<<<<")
                observation = self.get_obs()
                reward = UMath.compute_reward(observation, -200, True)
                observation = self.get_obs()
                print("Test Joint: {}".format(
                    np.around(observation[1:7], decimals=3)))
                return U.get_state(observation), reward, True, {}

            elapsed_time = rospy.Time.now() - start_ros_time
            if np.isclose(next_action_position,
                          self.joints_state.position,
                          rtol=0.0,
                          atol=0.01).all():
                break
            elif elapsed_time > rospy.Duration(2):  # time out
                print("TIME OUT, have not reached destination")
                break
        # time.sleep(self.running_step)
        """
        #execute action as long as the current position is close to the target position and there is no invalid collision and time spend in the while loop is below 1.2 seconds to avoid beeing stuck touching the object and not beeing able to go to the desired position     
        time1=time.time()
        while np.linalg.norm(np.asarray(self.joints_state.position)-np.asarray(next_action_position))>0.1 and self.get_collisions()==False and time.time()-time1<0.1:         
            rospy.loginfo("Not yet reached target position and no collision")
        """
        # 5) Get Observations, update the minimum distance, and pause Simulation
        observation = self.get_obs()
        if observation[0] < self.min_distance:
            self.min_distance = observation[0]
        self.gazebo.pauseSim()

        # 6) Convert Observations into state
        state = U.get_state(observation)

        # 7) Unpause Simulation check if its done, calculate done_reward
        self.gazebo.unpauseSim()
        done, done_reward, invalid_contact = self.is_done(observation)
        self.gazebo.pauseSim()

        # 8) Calculate reward based on Observation and done_reward and update the accumulated Episode Reward
        reward = UMath.compute_reward(observation, done_reward,
                                      invalid_contact)
        self.accumulated_episode_reward += reward

        # 9) Unpause that topics can be received in next step
        self.gazebo.unpauseSim()

        self.episode_steps += 1
        # 10) Return State, Reward, Done
        return state, reward, done, {}
Ejemplo n.º 5
0
    def reset(self):
        """
        Reset The Robot to its initial Position and restart the Controllers 

        1) Change Gravity to 0 ->That arm doesnt fall
        2) Turn Controllers off
        3) Pause Simulation
        4) Reset Simulation
        5) Set Model Pose to desired one 
        6) Unpause Simulation 
        7) Turn on Controllers
        8) Restore Gravity
        9) Get Observations and return current State
        10) Check all Systems work
        11) Pause Simulation
        12) Write initial Position into Yaml File 
        13) Create YAML Files for contact forces in order to get the average over 2 contacts 
        14) Create YAML Files for collision to make shure to see a collision due to high noise in topic
        15) Unpause Simulation cause in next Step Sysrem must be running otherwise no data is seen by Subscribers 
        16) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number
        17) Return State 
        """

        ###### TEST
        obs = self.get_obs()
        print("Before RESET Joint: {}".format(np.around(obs[1:7], decimals=3)))
        ###### END of TEST

        self.gazebo.change_gravity(0, 0, 0)
        self.controllers_object.turn_off_controllers()
        self.gazebo.resetSim()
        self.gazebo.pauseSim()

        ##### TEST #########
        # idx = 0
        # sys_exit = False
        # correction_ids = []
        # reset_target_pos = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
        # current_joint_pos = obs[1:7]
        #
        # for joint_pos in obs[1:7]:
        #     if np.abs(joint_pos - math.pi) < 0.1:
        #         sys_exit = True
        #         correction_ids.append(idx)
        #     idx += 1
        #
        # if sys_exit:
        #     for i in correction_ids:
        #         print("i:{}".format(i))
        #         reset_target_pos[i] = 2.0 if current_joint_pos[i] > 0 else -2.0
        #
        # self.pickbot_joint_pubisher_object.set_joints(reset_target_pos)
        # self.pickbot_joint_pubisher_object.set_joints([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
        ####  END of TEST #####

        self.pickbot_joint_pubisher_object.set_joints()
        self.set_target_object(random_object=self._random_object,
                               random_position=self._random_position)
        self.gazebo.unpauseSim()
        self.controllers_object.turn_on_controllers()
        self.gazebo.change_gravity(0, 0, -9.81)
        self._check_all_systems_ready()

        ######  TEST  #########
        # init_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0]
        # self.pickbot_joint_pubisher_object.move_joints(init_position)
        #
        # start_ros_time = rospy.Time.now()
        # while True:
        #     elapsed_time = rospy.Time.now() - start_ros_time
        #     if np.isclose(init_position, self.joints_state.position, rtol=0.0, atol=0.01).all():
        #         break
        #     elif elapsed_time > rospy.Duration(2):  # time out
        #         break
        ###### END of TEST ########

        last_position = [1.5, -1.2, 1.4, -1.87, -1.57, 0]
        with open('last_position.yml', 'w') as yaml_file:
            yaml.dump(last_position, yaml_file, default_flow_style=False)
        with open('contact_1_force.yml', 'w') as yaml_file:
            yaml.dump(0.0, yaml_file, default_flow_style=False)
        with open('contact_2_force.yml', 'w') as yaml_file:
            yaml.dump(0.0, yaml_file, default_flow_style=False)
        with open('collision.yml', 'w') as yaml_file:
            yaml.dump(False, yaml_file, default_flow_style=False)
        observation = self.get_obs()
        print("After  RESET Joint: {}".format(
            np.around(observation[1:7], decimals=3)))
        # if sys_exit:
        #     print("##################################################")
        #     print("############# Joint near Pi ######################")
        #     print("Reset_target_pos:   {}".format(reset_target_pos))
        #     print("##################################################")

        # get maximum distance to the object to calculate reward
        self.max_distance, _ = U.get_distance_gripper_to_object()
        self.min_distance = self.max_distance
        self.gazebo.pauseSim()
        state = U.get_state(observation)
        self._update_episode()
        self.gazebo.unpauseSim()
        return state
    def step(self, action):
        """
        Given the action selected by the learning algorithm,
        we perform the corresponding movement of the robot
        return: the state of the robot, the corresponding reward for the step and if its done(terminal State)

        1) read last published joint from YAML
        2) define ne joints acording to chosen action
        3) Write joint position into YAML to save last published joints for next step
        4) Unpause, Move to that pos for defined time, Pause
        5) Get Observations and pause Simulation
        6) Convert Observations into State
        7) Unpause Simulation check if its done, calculate done_reward and pause Simulation again
        8) Calculate reward based on Observatin and done_reward
        9) Unpause that topics can be received in next step
        10) Return State, Reward, Done
        """

        # 1) read last_position out of YAML File
        with open("last_position.yml", 'r') as stream:
            try:
                last_position = (yaml.load(stream, Loader=yaml.Loader))
            except yaml.YAMLError as exc:
                print(exc)
        # 2) get the new joint positions acording to chosen action
        next_action_position = self.get_action_to_position(action, last_position)

        # 3) write last_position into YAML File
        with open('last_position.yml', 'w') as yaml_file:
            yaml.dump(next_action_position, yaml_file, default_flow_style=False)

        # 4) unpause, move to position for certain time
        self.gazebo.unpauseSim()
        self.pickbot_joint_publisher_object.move_joints(next_action_position)
        time.sleep(self.running_step)

        # 5) Get Observations and pause Simulation
        observation = self.get_obs()
        if observation[0] < self.min_distance:
            self.min_distance = observation[0]
        self.gazebo.pauseSim()

        # 6) Convert Observations into state
        state = U.get_state(observation)

        # 7) Unpause Simulation check if its done, calculate done_reward
        self.gazebo.unpauseSim()
        done, done_reward, invalid_contact = self.is_done(observation, last_position)
        self.gazebo.pauseSim()

        # 8) Calculate reward based on Observation and done_reward and update the accumulated Episode Reward
        # reward = self.compute_reward(observation, done_reward, invalid_contact)
        reward = UMath.compute_reward(observation, done_reward, invalid_contact)
        self.accumulated_episode_reward += reward

        # 9) Unpause that topics can be received in next step
        self.gazebo.unpauseSim()

        self.episode_steps += 1
        # 10) Return State, Reward, Done
        return state, reward, done, {}
    def step(self, action):
        """
        Given the action selected by the learning algorithm,
        we perform the corresponding movement of the robot
        return: the state of the robot, the corresponding reward for the step and if its done(terminal State)

        1) Read last joint positions by getting the observation before acting
        2) Get the new joint positions according to chosen action (actions here are the joint increments)
        3) Publish the joint_positions to MoveIt, meanwhile busy waiting, until the movement is complete
        4) Get new observation after performing the action
        5) Convert Observations into States
        6) Check if the task is done or crashing happens, calculate done_reward and pause Simulation again
        7) Calculate reward based on Observatin and done_reward
        8) Return State, Reward, Done
        """
        # print("############################")
        # print("action: {}".format(action))

        self.movement_complete.data = False

        # 1) Read last joint positions by getting the observation before acting
        old_observation = self.get_obs()

        # 2) Get the new joint positions according to chosen action (actions here are the joint increments)
        if self._joint_increment is None:
            next_action_position = action
        else:
            next_action_position = self.get_action_to_position(
                action, old_observation[1:7])

        # 3) Move to position and wait for moveit to complete the execution
        self.publisher_to_moveit_object.pub_joints_to_moveit(
            next_action_position)
        # rospy.wait_for_message("/pickbot/movement_complete", Bool)
        while not self.movement_complete.data:
            pass

        start_ros_time = rospy.Time.now()
        while True:
            # Check collision:
            # invalid_collision = self.get_collisions()
            # if invalid_collision:
            #     print(">>>>>>>>>> Collision: RESET <<<<<<<<<<<<<<<")
            #     observation = self.get_obs()
            #     reward = UMath.compute_reward(observation, -200, True)
            #     observation = self.get_obs()
            #     print("Test Joint: {}".format(np.around(observation[1:7], decimals=3)))
            #     return U.get_state(observation), reward, True, {}

            elapsed_time = rospy.Time.now() - start_ros_time
            if np.isclose(next_action_position,
                          self.joints_state.position,
                          rtol=0.0,
                          atol=0.01).all():
                break
            elif elapsed_time > rospy.Duration(2):  # time out
                break
        # time.sleep(s
        """
        #execute action as long as the current position is close to the target position and there is no invalid collision and time spend in the while loop is below 1.2 seconds to avoid beeing stuck touching the object and not beeing able to go to the desired position     
        time1=time.time()
        while np.linalg.norm(np.asarray(self.joints_state.position)-np.asarray(next_action_position))>0.1 and self.get_collisions()==False and time.time()-time1<0.1:         
            rospy.loginfo("Not yet reached target position and no collision")
        """
        # 4) Get new observation and update min_distance after performing the action
        new_observation = self.get_obs()
        if new_observation[0] < self.min_distace:
            self.min_distace = new_observation[0]
        # print("observ: {}".format( np.around(new_observation[1:7], decimals=3)))

        # 5) Convert Observations into state
        state = U.get_state(new_observation)

        # 6) Check if its done, calculate done_reward
        done, done_reward, invalid_contact = self.is_done(new_observation)

        # 7) Calculate reward based on Observatin and done_reward and update the accumulated Episode Reward
        reward = UMath.compute_reward(new_observation, done_reward,
                                      invalid_contact)

        ### TEST ###
        if done:
            joint_pos = self.joints_state.position
            print("Joint in step (done): {}".format(
                np.around(joint_pos, decimals=3)))
        ### END of TEST ###

        self.accumulated_episode_reward += reward

        self.episode_steps += 1

        return state, reward, done, {}
    def reset(self):
        """
        Reset The Robot to its initial Position and restart the Controllers 
        1) Publish the initial joint_positions to MoveIt
        2) Busy waiting until the movement is completed by MoveIt
        3) set target_object to random position
        4) Check all Systems work
        5) Create YAML Files for contact forces in order to get the average over 2 contacts
        6) Create YAML Files for collision to make shure to see a collision due to high noise in topic
        7) Get Observations and return current State
        8) Publish Episode Reward and set accumulated reward back to 0 and iterate the Episode Number
        9) Return State
        """
        # print("Joint (reset): {}".format(np.around(self.joints_state.position, decimals=3)))
        init_joint_pos = [1.5, -1.2, 1.4, -1.87, -1.57, 0]
        self.publisher_to_moveit_object.set_joints(init_joint_pos)

        # print(">>>>>>>>>>>>>>>>>>> RESET: waiting for the movement to complete")
        # rospy.wait_for_message("/pickbot/movement_complete", Bool)
        while not self.movement_complete.data:
            pass
        # print(">>>>>>>>>>>>>>>>>>> RESET: Waiting complete")

        start_ros_time = rospy.Time.now()
        while True:
            # Check collision:
            # invalid_collision = self.get_collisions()
            # if invalid_collision:
            #     print(">>>>>>>>>> Collision: RESET <<<<<<<<<<<<<<<")
            #     observation = self.get_obs()
            #     reward = UMath.compute_reward(observation, -200, True)
            #     observation = self.get_obs()
            #     print("Test Joint: {}".format(np.around(observation[1:7], decimals=3)))
            #     return U.get_state(observation), reward, True, {}

            elapsed_time = rospy.Time.now() - start_ros_time
            if np.isclose(init_joint_pos,
                          self.joints_state.position,
                          rtol=0.0,
                          atol=0.01).all():
                break
            elif elapsed_time > rospy.Duration(2):  # time out
                break

        self.set_target_object(random_object=self._random_object,
                               random_position=self._random_position)
        self._check_all_systems_ready()

        with open('contact_1_force.yml', 'w') as yaml_file:
            yaml.dump(0.0, yaml_file, default_flow_style=False)
        with open('contact_2_force.yml', 'w') as yaml_file:
            yaml.dump(0.0, yaml_file, default_flow_style=False)
        with open('collision.yml', 'w') as yaml_file:
            yaml.dump(False, yaml_file, default_flow_style=False)
        observation = self.get_obs()
        self.object_position = observation[9:12]

        # print("Joint (after): {}".format(np.around(observation[1:7], decimals=3)))

        # get maximum distance to the object to calculate reward
        self.max_distance, _ = U.get_distance_gripper_to_object()
        self.min_distace = self.max_distance
        state = U.get_state(observation)
        self._update_episode()
        return state