Esempio n. 1
0
class ZcmTransceiver(object):
    def __init__(self):
        self.transceiver = ZCM('ipc')
        if not self.transceiver.good():
            print("Unable to initialize ZeroCM")
            exit(-1)

    def __call__(self,
                 left: bool = True,
                 forward: bool = True,
                 right: bool = True):
        msg_traffic_light_signal = MsgTrafficLightSignal()
        msg_traffic_light_signal.timestamp = int(time.time())
        msg_traffic_light_signal.turn_signal = bytes(
            [int(left), int(forward), int(right)])
        self.transceiver.publish("msgTrafficLightSignal",
                                 msg_traffic_light_signal)
Esempio n. 2
0
subs = zcm.subscribe("EXAMPLE", example_t, handler)

msg = example_t()
msg.utime = 10
msg.position = [1, 2, 3.5]
msg.orientation = [1.1, 2.2, 3.3, 4.4]
msg.ranges = [10, 20]
msg.num_ranges = len(msg.ranges)
msg.name = "this is a test"
msg.ignore0 = 0
msg.ignore1 = 0
msg.ignore2 = 0
msg.enabled = True

assert (msg.test_const_8_max_hex == -1)
assert (msg.test_const_16_max_hex == -1)
assert (msg.test_const_32_max_hex == -1)
assert (msg.test_const_64_max_hex == -1)

assert (msg.test_const_float == 1e-20)
assert (msg.test_const_double == 12.1e200)

zcm.start()
for i in range(0, 5):
    zcm.publish("EXAMPLE", msg)
    time.sleep(0.2)
zcm.stop()

sys.exit(0 if success == "Success" else 1)
class QuadrotorEnvironment(Env):
    """
    The QuadrotorEnvironment is a Gym environment that simulates a quadrotor. The simulation has support for
        * action and observation delays with jitter
        * observation noise, and
        * state clipping.
    The environment is highly customizable including
        * the (randomized) initial state of the quadrotor
        * the state to observation transformation, which can include a history of past observations
        * the action to propeller speed mapping including direct and differential drive, absolute speed clipping and
          propeller acceleration clipping.
        * the reward function
    Its design is modular. E.g. you can use the action to propeller speed mapping or the state to observation
    transformation when running the learned policies on real hardware without taking the environment apart.
    """

    __slots__ = "max_time", "enable_rendering", "simulation_model", "delayed_model", "noised_state", \
                "state_to_obersvation", "observation_history", "reward_base", "action_to_propeller_speed", "attitude_pd", \
                "initial_state_parameters", "zcm", "reward_fn", "action_space", "observation_space"

    def __str__(self):
        return "Quadrotor Environment"

    def __init__(self,
                 reward_function_parameters: Dict = dict(),
                 quadrotor_model_parameters: Dict = dict(),
                 delay_parameters: Dict = dict(),
                 observation_mapping_parameters: Dict = dict(),
                 noise_parameters: Dict = dict(),
                 observation_history_parameters: Dict = dict(),
                 state_clipping_parameters: Dict = dict(),
                 observe_prop_rate: bool = False,
                 reward_base: str = 'current_state',
                 propeller_speed_mapping_parameters: Dict = dict(),
                 pd_support_parameters: Union[None, Dict] = None,
                 initial_state_parameters: Dict = dict(box_side_length=2,
                                                       velocity=1,
                                                       angular_velocity=1),
                 max_time: float = 10,
                 controller_period: float = 0.01,
                 enable_rendering: bool = False,
                 quadrotor_model_parameters_for_preprocessing=None):
        """
        Constructs a new QuadrotorEnvironment with the given parameters.

        :param reward_function_parameters: The parameters to be passed to the reward function object.
        See the documentation for the RewardFunction class for details.
        :param delay_parameters: The parameters to be passed to the model delay wrapper. Defaults to no delays or
        jitters. See the documentation for the ModelDelayWrapper class for details.
        :param observation_mapping_parameters: The parameters to be passed to the state to observation mapping object.
        Defaults to observe position, rotation, velocity and angular velocity with some balanced scaling factors. The
        rotation is observed as a (flattened) rotation matrix by default. See the documentation for the ToObservationMap
         class for details.
        :param noise_parameters: The parameters to be passed to the state noiser object. It applies noise to the system
        state before it is observed. By default there is no noise. See the documentation for the NoisedStateMap for
        details.
        :param observation_history_parameters: Parameters to be passed to the ObservationHistory class. It provides a 
        view on past state observations and actions. By default just the current state is observed and no past actions.
        See the ObservationHistory documentation for details.
        :param state_clipping_parameters: Parameters to be passed to the StateClippingWrapper class. During runtime it
        constraints the state within defined bounds. See the StateClippingWrapper documentation for details.
        :param observe_prop_rate: Flag that enables observing the current propeller rate TODO: this one should go away
        :param reward_base: The reward is calculated based on a system state. What system state (observed state,
        actual state, state with or without the observed noise etc.) is used to compute the reward is determined by this
        parameter. Can beone of 'current_state', 'next_state', 'observed_state', 'observed_state_with_noise'.
        :param propeller_speed_mapping_parameters: The parameters passed to the PropellerSpeedMapping class.
        By default, the propeller speed is controlled directly, clipped to up to three times the hovering speed without
        any constraints on the propeller acceleration.
        :param pd_support_parameters: Gains for an attitude proportinal, differential (PD) controller whose output is
        added to all actions. If None, there is no PD controller. See the AttitudePDController for details on the
        parameters.
        :param initial_state_parameters: Controls the distribution from which the initial state is drawn upon reset.
        Must be a dict with the members
            * box_side_length: for the size of the box around the origin in which the quadrotor will be placed
            * velocity: for the maximum initial velocity in x, y and z direction.
            * angular_velocity: for the maximum angular velocity around all three rotation axes.
        :param max_time: The maximum time after which an episode in this environment is done.
        :param controller_period: The duration (in seconds simulated time) between two calls to the policy.
        :param enable_rendering: A flag controlling if the environment should be rendered or not.
        :param quadrotor_model_parameters_for_preprocessing: Some of the preprocessing steps such as the observation
        computation and the PropellerSpeedMapping use some parameters of the simulation such as the hovering thrust.
        If we want to base those preprocessing steps on different simulation parameters than the actual simulation, this
        parameter can be set. If None the original simulation parameters are being used.
        """
        assert reward_base in [
            'observed_state', 'observed_state_with_noise', 'current_state',
            'next_state'
        ]
        if quadrotor_model_parameters_for_preprocessing is None:
            quadrotor_model_parameters_for_preprocessing = quadrotor_model_parameters
        quadrotor_model_for_preprocessing = QuadrotorModel(
            **quadrotor_model_parameters_for_preprocessing)
        self.max_time = max_time
        self.enable_rendering = enable_rendering

        self.simulation_model = QuadrotorModel(**quadrotor_model_parameters)
        self.delayed_model = ModelDelayWrapper(
            StateClippingWrapper(self.simulation_model,
                                 **state_clipping_parameters),
            controller_period=controller_period,
            **delay_parameters)
        self.noised_state = NoisedStateMap(**noise_parameters)
        if observe_prop_rate:
            self.state_to_observation = ToObservationMap(
                propeller_speed_scale=0.1 /
                quadrotor_model_for_preprocessing.hovering_thrust,
                **observation_mapping_parameters)
        else:
            self.state_to_observation = ToObservationMap(
                **observation_mapping_parameters)
        self.observation_history = ObservationHistory(
            **observation_history_parameters)
        self.reward_base = reward_base
        self.action_to_propeller_speed = PropellerSpeedMapping(
            quadrotor_model_for_preprocessing.hovering_thrust,
            **propeller_speed_mapping_parameters)
        if pd_support_parameters is not None:
            self.attitude_pd = AttitudePDController(
                quadrotor_model_for_preprocessing, **pd_support_parameters)
        else:
            self.attitude_pd = None

        self.initial_state_parameters = initial_state_parameters
        self.zcm = None

        self.reward_fn = RewardFunction(**reward_function_parameters)

        self.action_space = Box(-1, 1, (4, ), dtype=np.float32)
        self.observation_space = Box(-np.infty,
                                     np.infty,
                                     self.reset().shape,
                                     dtype=np.float32)

    def step(self, action):
        """
        Computes the next observation, reward, done flag and info as the Environment interface of Gym requires.

        :param action: The action to be transformed to propeller speeds.
        :return: Tuple consisting of observation, reward, done, {}
        """
        # Compute rotor speeds
        propeller_speeds, relative_propeller_speed, relative_propeller_acceleration = self.action_to_propeller_speed(
            action, self.observation_history.dt_history[-1]
        )  # This is a hacky way to access the most recent dt

        if self.attitude_pd is not None:
            current_state = self.delayed_model.compute_current_state()
            propeller_speeds += self.attitude_pd(current_state)
            propeller_speeds = np.clip(
                propeller_speeds, 0,
                self.action_to_propeller_speed.hovering_speed * 3)

        if self.reward_base == 'current_state':
            reward = self.reward_fn(self.delayed_model.compute_current_state(),
                                    relative_propeller_speed,
                                    relative_propeller_acceleration)

        # Compute next state
        observed_state, dt, observation_age = self.delayed_model.step(
            propeller_speeds)
        if self.reward_base == 'observed_state':
            reward = self.reward_fn(observed_state, relative_propeller_speed,
                                    relative_propeller_acceleration)

        # Compute Observation
        observed_state_with_noise = self.noised_state(observed_state)

        if self.reward_base == 'observed_state_with_noise':
            reward = self.reward_fn(observed_state_with_noise,
                                    relative_propeller_speed,
                                    relative_propeller_acceleration)

        observation = self.observation_history.get_observation_with_history(
            self.state_to_observation(observed_state_with_noise, dt,
                                      observation_age),
            relative_propeller_speed, dt)

        if self.reward_base == 'next_state':
            reward = self.reward_fn(self.delayed_model.compute_current_state(),
                                    relative_propeller_speed,
                                    relative_propeller_acceleration)

        if self.enable_rendering:
            self.render(reward, self.delayed_model.compute_current_state(),
                        observed_state_with_noise, propeller_speeds)

        return observation, reward, self.delayed_model.time >= self.max_time, {}

    def random_state(self):
        """
        Computes a random state based on the initial state parameters.
        The initial propeller speed is always the hovering speed.

        :return: A new system state.
        """
        radius = self.initial_state_parameters['box_side_length']
        velocity = self.initial_state_parameters['velocity']
        angular_velocity = self.initial_state_parameters['angular_velocity']
        return SysState(
            np.random.uniform(-radius, radius, 3),
            np.random.uniform(-velocity, velocity, 3),
            sample_random_rotation(),
            np.random.uniform(-angular_velocity, angular_velocity, 3),
            np.ones(4) * self.simulation_model.hovering_thrust)

    def reset(self,
              initial_state: Union[SysState, None] = None,
              initial_propeller_speed: Union[np.ndarray, None] = None):
        """
        Resets the environment to either a random initial state and hovering speed for the propellers or the provided
        initial state and propeller speed.

        :param initial_state: The initial state. If None (default) a random state is generated.
        :param initial_propeller_speed:  The initial propeller speed. If None (default) the hovering speed is used.
        :return: The initial observation.
        """
        if initial_propeller_speed is None:
            initial_propeller_speed = np.ones(
                4) * self.simulation_model.hovering_thrust
        if initial_state is None:
            initial_state = self.random_state()
        if initial_state.propeller_speed is None:
            initial_state.propeller_speed = initial_propeller_speed
        observed_state, initial_dt, initial_obs_age = self.delayed_model.reset(
            initial_propeller_speed, initial_state)

        initial_observation = self.state_to_observation(
            observed_state, initial_dt, initial_obs_age)

        self.observation_history.reset(
            initial_observation, initial_propeller_speed * 0.1 /
            self.simulation_model.hovering_thrust, initial_dt)
        initial_relative_propeller_speed = self.action_to_propeller_speed.reset(
            initial_propeller_speed)

        if self.enable_rendering:
            self.render(0, initial_state, initial_state,
                        initial_propeller_speed)

        return self.observation_history.get_observation_with_history(
            initial_observation, initial_relative_propeller_speed, initial_dt)

    def on_target_msg(self, channel, msg):
        # print("new target:", msg.values)
        assert len(msg.values) == 3
        self.state_to_observation.position_offset = msg.values

    def render(self, reward, state, observed_state, thrust):

        if self.zcm is None:
            self.zcm = ZCM("")
            if self.zcm.good():
                self.zcm.subscribe("target", timestamped_vector_double,
                                   self.on_target_msg)
                self.zcm.start()

        if self.zcm.good():
            msg = timestamped_vector_double()
            msg.ts = int(self.delayed_model.time * 1e9)

            def publish(channel, data):
                msg.values = data
                msg.len = len(data)
                self.zcm.publish(channel, msg)

            publish(
                "quadrotor_viz/quat_pose",
                np.concatenate([state.position, state.rotation.components]))
            publish(
                "quadrotor_viz/quat_pose_noised",
                np.concatenate([
                    observed_state.position, observed_state.rotation.components
                ]))
            publish("quadrotor_viz/velocity", state.velocity)
            publish("quadrotor_viz/rotation_rate", state.angular_velocity)
            publish("quadrotor_viz/rotatorspeed", state.propeller_speed)
            publish("quadrotor_viz/control", thrust)
            publish("quadrotor_viz/reward", [reward])

    @property
    def time(self):
        return self.delayed_model.time

    def get_current_state(self) -> SysState:
        return self.delayed_model.compute_current_state()
Esempio n. 4
0
zcm = ZCM("")
if not zcm.good():
    print("Unable to initialize zcm")
    exit()

# declare a new msg and populate it
msg = example_t()
msg.timestamp = 10

# set up a subscription on channel "TEST"
subs1 = zcm.subscribe("TEST", example_t, handler)
subs2 = zcm.subscribe_raw("TEST", handler_raw)
subs3 = zcm.subscribe("TEST", example_t, handler_with_recv_timestamp)

# publish a message
zcm.publish("TEST", msg)

# wait a second
time.sleep(1)

# publish another
zcm.publish("TEST", msg)

# handle incoming message
zcm.handle()

# clean up
zcm.unsubscribe(subs1)
zcm.unsubscribe(subs2)
zcm.unsubscribe(subs3)
Esempio n. 5
0
assert (b.SIGN_TEST_25 == 1)
assert (b.SIGN_TEST_26 == 7)
assert (b.SIGN_TEST_27 == 9223372036854775807)

assert (b.SIGN_TEST_28 == 0x7f)
assert (b.SIGN_TEST_29 == 0xff)
assert (b.SIGN_TEST_30 == 0x7f)
assert (b.SIGN_TEST_31 == -1)
assert (b.SIGN_TEST_32 == 127)
assert (b.SIGN_TEST_33 == -1)
assert (b.SIGN_TEST_34 == 0x7fff)
assert (b.SIGN_TEST_35 == -1)
assert (b.SIGN_TEST_36 == 32767)
assert (b.SIGN_TEST_37 == -1)
assert (b.SIGN_TEST_38 == 0x7fffffff)
assert (b.SIGN_TEST_39 == -1)
assert (b.SIGN_TEST_40 == 2147483647)
assert (b.SIGN_TEST_41 == -1)
assert (b.SIGN_TEST_42 == 0x7fffffffffffffff)
assert (b.SIGN_TEST_43 == -1)
assert (b.SIGN_TEST_44 == 9223372036854775807)
assert (b.SIGN_TEST_45 == -1)

zcm.start()
for i in range(0, 5):
    zcm.publish("BITFIELD", b)
    time.sleep(0.2)
zcm.stop()

sys.exit(0 if success == "Success" else 1)