def make_world(self): world = HazardousWorld() world.dim_c = 2 # add landmarks # TODO: decide on desired landmark properties instead of using a 'default' collection world.landmarks = [] world.landmarks.append(RiskRewardLandmark(risk_fn=None, reward_fn=PolynomialRewardFunction2D(coefs=[10.0, 0.0, 0.0, -1.0, 0.0, -1.0], bounds={'xmin':-2.0, 'xmax':2.0, 'ymin':-2.0, 'ymax':2.0}))) for i, landmark in enumerate(world.landmarks): landmark.name = 'landmark %d' % i landmark.collide = True landmark.movable = False landmark.size = DP.landmark_size # properties for landmarks if isinstance(landmark, RiskRewardLandmark) and landmark.is_hazard: landmark.color = np.array([landmark.risk(landmark.size) + .1, 0, 0]) else: landmark.color = np.array([0.25, 0.25, 0.25]) # make initial conditions self.reset_world(world) return world
def make_world(self): world = HazardousWorld(collision_termination_probability=0.0) # observation-based communication world.dim_c = 0 world.max_communication_distance = _MAX_COMMUNICATION_DISTANCE # collaborative rewards world.collaborative = True world.systemic_rewards = False world.identical_rewards = False # add landmarks world.landmarks = [] for lm in _LANDMARKS: world.landmarks.append(lm) for i, landmark in enumerate(world.landmarks): landmark.name = 'landmark %d' % i landmark.collide = False landmark.movable = False landmark.size = _LANDMARK_SIZE # properties for landmarks if isinstance(landmark, RiskRewardLandmark) and landmark.is_hazard: #TODO: make colors heatmap of risk probability over all bounds landmark.color = np.array([ landmark.risk_fn.get_failure_probability(0, 0) + .1, 0, 0 ]) else: landmark.color = np.array([0.25, 0.25, 0.25]) # make initial conditions self.reset_world(world) return world
def make_world(self): world = HazardousWorld( collision_termination_probability=self.collision_risk, flyoff_termination_radius=10.0, flyoff_termination_speed=50.0, spontaneous_termination_probability=1.0 / (8.0 * 50.0 * self.num_agents)) # set scenario-specific world parameters world.collaborative = True world.systemic_rewards = True world.identical_rewards = self.identical_rewards world.dim_c = 0 # observation-based communication world.termination_reward = _TERMINATION_REWARD world.render_connections = True # add landmarks to world world.landmarks = [] for lm in self.scenario_landmarks: world.landmarks.append(lm) for i, landmark in enumerate(world.landmarks): landmark.name = 'landmark_%d' % i landmark.collide = False landmark.movable = False landmark.size = _LANDMARK_SIZE # properties for landmarks if isinstance(landmark, RiskRewardLandmark) and landmark.is_hazard: #TODO: make colors heatmap of risk probability over all bounds landmark.color = np.array([ landmark.risk_fn.get_failure_probability(0, 0) + .1, 0, 0 ]) else: landmark.color = np.array([0.25, 0.25, 0.25]) # make initial conditions self.reset_world(world) return world
def make_world(self): world = HazardousWorld() # observation-based communication world.dim_c = 0 world.max_communication_distance = DP.max_communication_distance # add landmarks world.landmarks = [Landmark() for i in range(self.num_landmarks)] for i, landmark in enumerate(world.landmarks): landmark.name = 'landmark %d' % i landmark.collide = True landmark.movable = False landmark.size = DP.landmark_size # add obstacles world.obstacles = [Obstacle() for i in range(self.num_obstacles)] for i, obstacle in enumerate(world.obstacles): obstacle.name = 'obstacle %d' % i obstacle.collide = True obstacle.size = 0.05 # make initial conditions self.reset_world(world) return world
def make_world(self): world = HazardousWorld() # set scenario-specific world parameters world.collaborative = True world.systemic_rewards = True world.identical_rewards = False world.dim_c = 0 # observation-based communication world.max_communication_distance = _MAX_COMMUNICATION_DISTANCE world.distance_resistance_gain = _DISTANCE_RESISTANCE_GAIN # create and add terminal landmarks # no intermediate landmarks in this scenario world.origin_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0)) world.origin_terminal_landmark.name = 'origin' world.origin_terminal_landmark.state.p_pos = np.array([-0.75, -0.75]) world.destination_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0)) world.destination_terminal_landmark.name = 'destination' world.destination_terminal_landmark.state.p_pos = np.array( [0.75, 0.75]) # create landmark list and set properties world.landmarks = [ world.origin_terminal_landmark, world.destination_terminal_landmark ] for i, landmark in enumerate(world.landmarks): landmark.p_vel = np.zeros(world.dim_p) landmark.collide = False landmark.movable = False landmark.size = _LANDMARK_SIZE landmark.color = np.array( [landmark.risk_fn.get_failure_probability(0, 0) + .1, 0, 0]) # make initial conditions self.reset_world(world) return world
def make_world(self): world = HazardousWorld() # set scenario-specific world parameters world.collaborative = True world.systemic_rewards = True world.identical_rewards = False world.dim_c = 0 # observation-based communication world.connection_reward = _CONNECTION_REWARD world.termination_reward = _TERMINATION_REWARD world.render_connections = True # add landmarks world.origin_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0)) world.destination_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0)) world.hazard_landmark = _NON_TERMINAL_LANDMARKS[0] world.hazard_landmark.ignore_connection_rendering = True world.landmarks = [ world.origin_terminal_landmark, world.destination_terminal_landmark, world.hazard_landmark ] for i, landmark in enumerate(world.landmarks): landmark.name = 'landmark_%d' % i landmark.collide = False landmark.movable = False landmark.size = _LANDMARK_SIZE # properties for landmarks if isinstance(landmark, RiskRewardLandmark) and landmark.is_hazard: #TODO: make colors heatmap of risk probability over all bounds landmark.color = np.array([ landmark.risk_fn.get_failure_probability(0, 0) + .1, 0, 0 ]) else: landmark.color = np.array([0.25, 0.25, 0.25]) # make initial conditions self.reset_world(world) return world
def make_world(self): world = HazardousWorld() # set scenario-specific world parameters world.collaborative = True world.systemic_rewards = True world.identical_rewards = False world.dim_c = 0 # observation-based communication world.max_communication_distance = _MAX_COMMUNICATION_DISTANCE world.distance_resistance_gain = _DISTANCE_RESISTANCE_GAIN world.distance_resistance_exponent = _DISTANCE_RESISTANCE_EXPONENT # add landmarks world.origin_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0)) world.destination_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0)) world.landmarks = [world.origin_terminal_landmark, world.destination_terminal_landmark] for lm in _NON_TERMINAL_LANDMARKS: world.landmarks.append(lm) for i, landmark in enumerate(world.landmarks): landmark.name = 'landmark_%d' % i landmark.collide = False landmark.movable = False landmark.size = _LANDMARK_SIZE # properties for landmarks if isinstance(landmark, RiskRewardLandmark) and landmark.is_hazard: #TODO: make colors heatmap of risk probability over all bounds landmark.color = np.array([landmark.risk_fn.get_failure_probability(0,0) + .1, 0, 0]) else: landmark.color = np.array([0.25, 0.25, 0.25]) # make initial conditions self.reset_world(world) return world