def make_world(self):
        world = HazardousWorld()

        # set scenario-specific world parameters
        world.collaborative = True
        world.systemic_rewards = True
        world.identical_rewards = False
        world.dim_c = 0 # observation-based communication
        world.max_communication_distance = _MAX_COMMUNICATION_DISTANCE
        world.distance_resistance_gain = _DISTANCE_RESISTANCE_GAIN
        world.distance_resistance_exponent = _DISTANCE_RESISTANCE_EXPONENT

        # add landmarks
        world.origin_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0))
        world.destination_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0))
        world.landmarks = [world.origin_terminal_landmark, world.destination_terminal_landmark]
        for lm in _NON_TERMINAL_LANDMARKS:
            world.landmarks.append(lm)

        for i, landmark in enumerate(world.landmarks):
            landmark.name = 'landmark_%d' % i
            landmark.collide = False
            landmark.movable = False
            landmark.size = _LANDMARK_SIZE
            # properties for landmarks
            if isinstance(landmark, RiskRewardLandmark) and landmark.is_hazard:
                #TODO: make colors heatmap of risk probability over all bounds
                landmark.color = np.array([landmark.risk_fn.get_failure_probability(0,0) + .1, 0, 0])
            else:
                landmark.color = np.array([0.25, 0.25, 0.25])

        # make initial conditions
        self.reset_world(world)
        return world
    def make_world(self):
        world = HazardousWorld()

        # set scenario-specific world parameters
        world.collaborative = True
        world.systemic_rewards = True
        world.identical_rewards = self.identical_rewards
        world.dim_c = 0  # observation-based communication
        world.connection_reward = _CONNECTION_REWARD
        world.termination_reward = _TERMINATION_REWARD
        world.render_connections = True

        # add landmarks. terminals first then hazards (if any)
        # world.origin_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 0.0))
        # world.destination_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 0.0))
        world.origin_terminal_landmark = Landmark()
        world.destination_terminal_landmark = Landmark()
        world.landmarks = [
            world.origin_terminal_landmark, world.destination_terminal_landmark
        ]

        world.hazard_landmarks = []
        for i in range(self.num_hazards):
            lm = RiskRewardLandmark(risk_fn=RadialRisk(_HAZARD_SIZE,
                                                       self.hazard_risk),
                                    reward_fn=RadialReward(_HAZARD_SIZE, 0.0))
            lm.silent = True
            lm.deaf = True
            lm.ignore_connection_rendering = True
            world.hazard_landmarks.append(lm)
            world.landmarks.append(lm)

        for i, landmark in enumerate(world.landmarks):
            landmark.name = 'landmark_%d' % i
            landmark.collide = False
            landmark.movable = False
            landmark.size = _LANDMARK_SIZE
            # properties for landmarks
            if isinstance(landmark, RiskRewardLandmark) and landmark.is_hazard:
                #TODO: make colors heatmap of risk probability over all bounds
                landmark.color = np.array([
                    landmark.risk_fn.get_failure_probability(0, 0) + .1, 0, 0
                ])
            else:
                landmark.color = np.array([0.25, 0.25, 0.25])

        # make initial conditions
        self.reset_world(world)
        return world
Beispiel #3
0
    def make_world(self):
        world = HazardousWorld()

        # set scenario-specific world parameters
        world.collaborative = True
        world.systemic_rewards = True
        world.identical_rewards = False
        world.dim_c = 0  # observation-based communication
        world.max_communication_distance = _MAX_COMMUNICATION_DISTANCE
        world.distance_resistance_gain = _DISTANCE_RESISTANCE_GAIN

        # create and add terminal landmarks
        # no intermediate landmarks in this scenario
        world.origin_terminal_landmark = RiskRewardLandmark(
            risk_fn=None, reward_fn=RadialReward(1.0, 10.0))
        world.origin_terminal_landmark.name = 'origin'
        world.origin_terminal_landmark.state.p_pos = np.array([-0.75, -0.75])
        world.destination_terminal_landmark = RiskRewardLandmark(
            risk_fn=None, reward_fn=RadialReward(1.0, 10.0))
        world.destination_terminal_landmark.name = 'destination'
        world.destination_terminal_landmark.state.p_pos = np.array(
            [0.75, 0.75])

        # create landmark list and set properties
        world.landmarks = [
            world.origin_terminal_landmark, world.destination_terminal_landmark
        ]
        for i, landmark in enumerate(world.landmarks):
            landmark.p_vel = np.zeros(world.dim_p)
            landmark.collide = False
            landmark.movable = False
            landmark.size = _LANDMARK_SIZE
            landmark.color = np.array(
                [landmark.risk_fn.get_failure_probability(0, 0) + .1, 0, 0])

        # make initial conditions
        self.reset_world(world)
        return world