def _reset_(self):
        """Resets the environment episode.

        Moves the arm to either fixed reference or random position and resets targets and target generators
        """
        print("Resetting")
        if(self._move_shape_ == 'circle'):
            startpoint = np.zeros(3)
            startpoint[self._circle_plane_-2] += self._circle_radius_ 
            startpoint += self._circle_centrepoint_
            x_target = np.zeros(3)
            np.copyto(x_target, startpoint)
            self._move_generator_ = self._circle_generator_(self._circle_plane_)            
        elif(self._move_shape_ == 'line'):
            x_target = self._line_midpoint_
            self._move_generator_ = self._line_generator_(self._line_dir_)
        np.copyto(self._x_target_, x_target)
        self._target_ = self._x_target_[self._end_effector_indices]

        self._action_ = self._rand_obj_.uniform(self._action_low, self._action_high)
        self._cmd_prev_ = np.zeros(len(self._action_low))  # to be used with derivative control of velocity
        if self._reset_type != 'none':
            if self._reset_type == 'random':
                reset_angles, _ = self._pick_random_angles_()
            elif self._reset_type == 'zero':
                reset_angles = self._q_ref[self._joint_indices]
            self._reset_arm(reset_angles)

        rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array(
            self._rand_obj_.get_state()
        )
        np.copyto(self._shared_rstate_array_, np.frombuffer(rand_state_array, dtype=rand_state_array_type))

        print("Reset done")
    def _reset_(self):
        """Resets the environment episode.

        Moves the arm to either fixed reference or random position and
        generates a new target from _target_generator_.
        """
        print("Resetting")

        x_target = self._target_generator_.__next__()
        np.copyto(self._x_target_, x_target)
        self._target_ = self._x_target_[self._end_effector_indices]

        self._action_ = self._rand_obj_.uniform(self._action_low, self._action_high)
        self._cmd_prev_ = np.zeros(len(self._action_low))  # to be used with derivative control of velocity
        if self._reset_type != 'none':
            if self._reset_type == 'random':
                reset_angles, _ = self._pick_random_angles_()
            elif self._reset_type == 'zero':
                reset_angles = self._q_ref[self._joint_indices]
            self._reset_arm(reset_angles)

        rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array(
            self._rand_obj_.get_state()
        )
        np.copyto(self._shared_rstate_array_, np.frombuffer(rand_state_array, dtype=rand_state_array_type))

        print("Reset done")
Esempio n. 3
0
    def _reset_(self):
        """ Resets the environment episode.

        Moves the DXL to either fixed reference or random position and
        generates a new target within a bounding box.
        """
        print("Resetting")

        if self.reset_type == 'zero':
            self._reset_pos_.value = self.reset_pos_center
        elif self.reset_type == 'random':
            self._reset_pos_.value = self._rand_obj_.uniform(
                low=self.angle_low, high=self.angle_high)

        self._target_pos_.value = self._rand_obj_.uniform(low=self.angle_low,
                                                          high=self.angle_high)

        error_prior = 0
        integral = 0
        present_pos = 0.0

        # Once in the correct regime, the `present_pos` values can be trusted
        start_time = time.time()
        while time.time() - start_time < 5:
            if self._sensor_comms[self._comm_name].sensor_buffer.updated():
                sensor_window, timestamp_window, index_window = self._sensor_comms[
                    self._comm_name].sensor_buffer.read_update(1)
                present_pos = sensor_window[0][self.reg_index['present_pos']]
                current_temperature = sensor_window[0][
                    self.reg_index['temperature']]

                if current_temperature > self.cool_down_temperature:
                    print("Starting to overheat. sleep for a few seconds")
                    time.sleep(10)

                error = self._reset_pos_.value - present_pos
                if abs(error) > 0.017:  # ~1 deg
                    integral = integral + (error * self.gripper_dt)
                    derivative = (error - error_prior) / self.gripper_dt
                    action = self.kp * error + self.ki * integral + self.kd * derivative
                    error_prior = error
                else:
                    break

                self._actuator_comms[self._comm_name].actuator_buffer.write(
                    action)
                time.sleep(0.001)

        self._actuator_comms[self._comm_name].actuator_buffer.write(0)
        self.episode_steps = 0
        rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array(
            self._rand_obj_.get_state())
        np.copyto(self._shared_rstate_array_,
                  np.frombuffer(rand_state_array, dtype=rand_state_array_type))
        time.sleep(
            0.1
        )  # Give the shared buffer time to get updated and prevent false episode done conditions
        print("Reset done. Gripper pos: {}".format(present_pos))
Esempio n. 4
0
    def _compute_actuation_(self, action, timestamp, index):
        if self.crash_flag.value == 2:
            raise Exception("Compute actuation crash triggered.")

        self._actuation_packet_['generic1'] = action
        self._actuation_packet_['generic2'] = action
        values = self._rand_obj_.uniform(-1, +1, 3)
        rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array(
            self._rand_obj_.get_state()
        )
        np.copyto(self._shared_rstate_array_, np.frombuffer(rand_state_array, dtype=rand_state_array_type))
        np.copyto(self._uniform_array_, values)
Esempio n. 5
0
    def _reset_(self, q_start=None, q_target=None):
        """Resets the environment episode.

        Moves the arm to either fixed reference or random position and
        generates a new target within a safety box.
        """
        print("Resetting")

        if q_target is None:
            self._q_target_, x_target = self._pick_random_angles_()
        else:
            self._q_target = self._q_ref.copy()
            self._q_target[self._joint_indices] = q_target
            _, inside_buffer_bound, _, x_target = self._check_bound(self._q_target)
            assert inside_buffer_bound, "Target Joint Angle should be in boundary"

        np.copyto(self._x_target_, x_target)
        if self._target_type == 'position':
            self._target_ = self._x_target_[self._end_effector_indices]
        elif self._target_type == 'angle':
            self._target_ = self._q_target_
        self._action_ = self._rand_obj_.uniform(self._action_low, self._action_high)
        self._cmd_prev_ = np.zeros(len(self._action_low))  # to be used with derivative control of velocity

        if q_start is not None:
            reset_angles = self._q_ref.copy()
            reset_angles[self._joint_indices] = q_start
            _, inside_buffer_bound, _, _ = self._check_bound(reset_angles)
            assert inside_buffer_bound, "Invalid q start: {}".format(q_start)
            if self._safety_mode_ == ur_utils.SafetyModes.PROTECTIVE_STOP:
                print("Unlocking p-stop")
                self._actuator_comms['UR5'].actuator_buffer.write(self._pstop_unlock_packet)
                time.sleep(2.0)

            self._reset_arm(q_start)
        elif self._reset_type != 'none':
            if self._reset_type == 'random':
                reset_angles, _ = self._pick_random_angles_()
            elif self._reset_type == 'zero':
                reset_angles = self._q_ref[self._joint_indices]
            self._reset_arm(reset_angles)

        rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array(
            self._rand_obj_.get_state()
        )
        np.copyto(self._shared_rstate_array_, np.frombuffer(rand_state_array, dtype=rand_state_array_type))

        print("Reset done")
Esempio n. 6
0
    def test_random_state_array(self):
        rand_obj = np.random.RandomState(1)
        rand_state = rand_obj.get_state()
        original_uniform_values = rand_obj.uniform(-1, 1, 100)
        original_normal_values = rand_obj.randn(100)

        rand_state_array_type, rand_state_array_size, rand_state_array = get_random_state_array(rand_state)
        shared_rand_array = np.frombuffer(Array('b', rand_state_array_size).get_obj(), dtype=rand_state_array_type)
        np.copyto(shared_rand_array, np.frombuffer(rand_state_array, dtype=rand_state_array_type))

        new_rand_obj = np.random.RandomState()
        new_rand_obj.set_state(get_random_state_from_array(shared_rand_array))
        new_uniform_values = new_rand_obj.uniform(-1, 1, 100)
        new_normal_values = new_rand_obj.randn(100)

        assert np.all(original_uniform_values == new_uniform_values)
        assert np.all(original_normal_values == new_normal_values)
Esempio n. 7
0
    def _reset_(self):
        """The required _reset_ interface.

        This method does the handling of charging the Create2, repositioning, and set to the correct mode.
        """
        logging.info("Resetting...")
        self._episode_step_.value = 0
        np.copyto(self._prev_action_, np.array([0, 0]))
        for d in self._observation_def:
            d.reset()

        # wait for create2 to startup properly if just started (ie. wait to actually start receiving observation)
        while not self._sensor_comms[self._comm_name].sensor_buffer.updated():
            time.sleep(0.01)

        sensor_window, _, _ = self._sensor_comms[
            self._comm_name].sensor_buffer.read()

        if sensor_window[-1][0]['battery charge'] <= self._min_battery:
            logging.info("Waiting for Create2 to be manually docked.")
            self._wait_until_charged()

            sensor_window, _, _ = self._sensor_comms[
                self._comm_name].sensor_buffer.read()

        # Always switch to SAFE mode to run an episode, so that Create2 will switch to PASSIVE on the
        # charger.  If the create2 is in any other mode on the charger, we will not be able to detect
        # the non-responsive sleep mode that happens at the 60 seconds mark.
        logging.info("Setting Create2 into safe mode.")
        self._write_opcode('safe')
        time.sleep(0.1)

        # after charging/docked, try to drive away from the dock if still on it
        if sensor_window[-1][0]['charging sources available'] > 0:
            logging.info("Undocking the Create2.")
            self._write_opcode('drive_direct', -300, -300)
            time.sleep(1.0)
            self._write_opcode('drive_direct', 0, 0)
            time.sleep(0.1)

        # drive backward for a fixed amount
        logging.info("Moving Create2 into position.")
        target_values = [-150, -150]
        self._write_opcode('drive_direct', *target_values)
        rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array(
            self._rand_obj_.get_state())
        np.copyto(self._shared_rstate_array_,
                  np.frombuffer(rand_state_array, dtype=rand_state_array_type))
        time.sleep(1.5)
        self._write_opcode('drive', 0, 0)

        # find the rotation angle (right wheel distance - left wheel distance) / wheel base distance
        self._total_rotation += (target_values[1] * 1.5 -
                                 target_values[0] * 1.5) / 235.0 * 180.0 / 3.14
        self._wait_until_unwinded()

        # make sure in SAFE mode in case the random drive caused switch to PASSIVE, or
        # create2 stuck somewhere and require human reset (don't want an episode to start
        # until fixed, otherwise we get a whole bunch of one step episodes)
        sensor_window, _, _ = self._sensor_comms[
            self._comm_name].sensor_buffer.read()
        while sensor_window[-1][0]['oi mode'] != 2:
            logging.warning(
                "Create2 not in SAFE mode, reattempting... (might require human intervention)."
            )
            self._write_opcode('full')
            time.sleep(0.2)
            self._write_opcode('drive_direct', -50, -50)
            time.sleep(0.5)
            self._write_opcode('drive', 0, 0)
            time.sleep(0.1)
            self._write_opcode('safe')
            time.sleep(0.2)

            # try another unwind since unwind could have failed if stuck in PASSIVE mode
            self._wait_until_unwinded()
            sensor_window, _, _ = self._sensor_comms[
                self._comm_name].sensor_buffer.read()

        # don't want to state during reset pollute the first sensation
        time.sleep(2 * self._internal_timing)

        logging.info("Reset completed.")
Esempio n. 8
0
    def __init__(self,
                 communicator_setups,
                 action_dim,
                 observation_dim,
                 run_mode='multiprocess',
                 dt=.1,
                 dt_tol=1e-5,
                 sleep_time=0.0001,
                 busy_loop=True,
                 random_state=None,
                 **kwargs
                ):

        """Inits RTRLBaseEnv object with task specific parameters.

        Args:
            communicator_setups: A dictionary containing configuration
                parameters for each physical device communicator.
                The form of this dictionary is as follows:
                config = {'name':{'Communicator': CommunicatorClass,
                                  'kwargs': dict()
                                 }
                }
            action_dim: An integer dimensionality of the action space
            observation_dim: An integer dimensionality of the observation space
            run_mode: A string specifying the method of parallelism between
                the agent and the environment, one of 'singlethread',
                'multithread', or 'multiprocess'.
            dt: A float timestep duration to maintain when calling 'step'
                or 'sense_wait'
            dt_tol: a float small tolerance subtracted from dt to compensate for
                OS delays when exiting sleep() in 'step' or 'sense_wait'.
            random_state: A tuple containing random state returned by
                numpy.random.RandomState().get_state(). This is to ensure
                reproducibility by reusing the same random state externally from
                an experiment script.
            sleep_time: a float representing lower bound on sleep() function
                time resolution provided by OS. For linux based OSes the
                resolution is typically ~0.001s, for Windows based OSes its ~0.01s.
            busy_loop: a boolean specifying whether to use busy loops instead
                of time.sleep() to accurately maintain short real time intervals.
            random_state: a numpy random state object to use for generating
                random sequences

        """
        assert run_mode in ['singlethread', 'multithread', 'multiprocess']
        self._run_mode = run_mode

        # Used for gym compatible step function
        self._dt = dt
        self._dt_tol = dt_tol
        self._sleep_time = sleep_time
        self._busy_loop = busy_loop

        # create random object based on passed random_state tuple
        self._rand_obj_ = np.random.RandomState()
        if random_state is None:
            random_state = self._rand_obj_.get_state()
        else:
            self._rand_obj_.set_state(random_state)
        rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array(random_state)
        # Ideally, the random state tuple of `_rand_obj_` needs to be copied to this `_shared_rstate_array_`
        # after every use of `_rand_obj_` for generating random numbers.
        self._shared_rstate_array_ = np.frombuffer(Array('b', rand_state_array_size).get_obj(),
                                                   dtype=rand_state_array_type)
        np.copyto(self._shared_rstate_array_, np.frombuffer(rand_state_array, dtype=rand_state_array_type))
        self._reset_flag = Value('i', 0)


        self._action_buffer = SharedBuffer(
            buffer_len=SharedBuffer.DEFAULT_BUFFER_LEN,
            array_len=action_dim,
            array_type='d',
            np_array_type='d',
        )

        # Contains the observation vector, with the last element being the _reward_
        self._sensation_buffer = SharedBuffer(
            buffer_len=SharedBuffer.DEFAULT_BUFFER_LEN,
            array_len=observation_dim + 2,
            array_type='d',
            np_array_type='d',
        )

        # A dictionary of dictionaries, one for each communicator that is required
        self._communicator_setups = communicator_setups

        # Dictionary of all instantiated communicator processes
        # Both sensor_comms and actuator_comms would have a reference to the same
        # communicator when that communicator contains both sensors and actuators
        self._sensor_comms = {}
        self._actuator_comms = {}

        # Contains a reference to every communicator.
        # Used for terminating all communicators once when the environment is closed

        self._all_comms = {}

        # Dictionary containing actuation packets for each communicator
        self._actuation_packet_ = {}

        # Dictionary containing the number of sensor packets to read at a time
        # from each communicator
        self._num_sensor_packets = {}

        # Construct the communicators without starting
        for name, setup in communicator_setups.items():
            # Initialize communicator with the given parameters
            comm = setup['Communicator'](**setup['kwargs'])

            if comm.use_actuator:
                self._actuation_packet_[name] = np.zeros(
                    shape=comm.actuator_buffer.array_len,
                    dtype=comm.actuator_buffer.np_array_type,
                )
                self._actuator_comms[name] = comm

            if comm.use_sensor:
                if 'num_sensor_packets' in setup.keys():
                    self._num_sensor_packets[name] = setup['num_sensor_packets']
                else:
                    self._num_sensor_packets[name] = 1
                self._sensor_comms[name] = comm

            self._all_comms[name] = comm

        self._running = False