def __init__(self, *, num_agents, num_hazards, identical_rewards, observation_type, hazard_risk=0.5, collision_risk=0.0): ''' Args: - num_agents [int] number of agents in scenario - num_hazards [int] number of hazards landmarks in the scenario - identical_rewards [bool] true if all agents receieve exact same reward, false if rewards are "local" to agents - observation_type [str] "direct" if observation directly of each entity, "histogram" if bin entities in spacial grid - hazard_risk [float] max probability of failure caused by hazard landmark - collision_risk [float] probability of failure caused by collision ''' # check inputs assert isinstance(num_agents, int) assert isinstance(num_hazards, int) assert (num_hazards == 0 or num_hazards == 1) assert isinstance(identical_rewards, bool) assert (observation_type == "direct" or observation_type == "histogram") assert (hazard_risk >= 0.0 and hazard_risk <= 1.0) assert (collision_risk >= 0.0 and collision_risk <= 1.0) # set member vars self.num_agents = num_agents self.num_hazards = num_hazards self.identical_rewards = identical_rewards self.observation_type = observation_type self.hazard_risk = hazard_risk self.collision_risk = collision_risk # create list of landmarks # Note: RadialReward function is not directly used for calculating reward in this scenario, thus peak value of 0.0. # non-zero radius used for numerical reasons landmarks = [] for i in range(self.num_agents): landmarks.append( RiskRewardLandmark(risk_fn=None, reward_fn=RadialReward(1.0, 0.0))) for i in range(self.num_hazards): landmarks.append( RiskRewardLandmark(risk_fn=RadialRisk(_LANDMARK_SIZE, 0.5), reward_fn=RadialReward(1.0, 0.0))) self.scenario_landmarks = landmarks self.n_landmarks = len(self.scenario_landmarks)
def make_world(self): world = HazardousWorld() # set scenario-specific world parameters world.collaborative = True world.systemic_rewards = True world.identical_rewards = False world.dim_c = 0 # observation-based communication world.max_communication_distance = _MAX_COMMUNICATION_DISTANCE world.distance_resistance_gain = _DISTANCE_RESISTANCE_GAIN world.distance_resistance_exponent = _DISTANCE_RESISTANCE_EXPONENT # add landmarks world.origin_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0)) world.destination_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0)) world.landmarks = [world.origin_terminal_landmark, world.destination_terminal_landmark] for lm in _NON_TERMINAL_LANDMARKS: world.landmarks.append(lm) for i, landmark in enumerate(world.landmarks): landmark.name = 'landmark_%d' % i landmark.collide = False landmark.movable = False landmark.size = _LANDMARK_SIZE # properties for landmarks if isinstance(landmark, RiskRewardLandmark) and landmark.is_hazard: #TODO: make colors heatmap of risk probability over all bounds landmark.color = np.array([landmark.risk_fn.get_failure_probability(0,0) + .1, 0, 0]) else: landmark.color = np.array([0.25, 0.25, 0.25]) # make initial conditions self.reset_world(world) return world
def make_world(self): world = HazardousWorld() # set scenario-specific world parameters world.collaborative = True world.systemic_rewards = True world.identical_rewards = False world.dim_c = 0 # observation-based communication world.connection_reward = _CONNECTION_REWARD world.termination_reward = _TERMINATION_REWARD world.render_connections = True # add landmarks world.origin_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0)) world.destination_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0)) world.hazard_landmark = _NON_TERMINAL_LANDMARKS[0] world.hazard_landmark.ignore_connection_rendering = True world.landmarks = [ world.origin_terminal_landmark, world.destination_terminal_landmark, world.hazard_landmark ] for i, landmark in enumerate(world.landmarks): landmark.name = 'landmark_%d' % i landmark.collide = False landmark.movable = False landmark.size = _LANDMARK_SIZE # properties for landmarks if isinstance(landmark, RiskRewardLandmark) and landmark.is_hazard: #TODO: make colors heatmap of risk probability over all bounds landmark.color = np.array([ landmark.risk_fn.get_failure_probability(0, 0) + .1, 0, 0 ]) else: landmark.color = np.array([0.25, 0.25, 0.25]) # make initial conditions self.reset_world(world) return world
def make_world(self): world = HazardousWorld() # set scenario-specific world parameters world.collaborative = True world.systemic_rewards = True world.identical_rewards = False world.dim_c = 0 # observation-based communication world.max_communication_distance = _MAX_COMMUNICATION_DISTANCE world.distance_resistance_gain = _DISTANCE_RESISTANCE_GAIN # create and add terminal landmarks # no intermediate landmarks in this scenario world.origin_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0)) world.origin_terminal_landmark.name = 'origin' world.origin_terminal_landmark.state.p_pos = np.array([-0.75, -0.75]) world.destination_terminal_landmark = RiskRewardLandmark( risk_fn=None, reward_fn=RadialReward(1.0, 10.0)) world.destination_terminal_landmark.name = 'destination' world.destination_terminal_landmark.state.p_pos = np.array( [0.75, 0.75]) # create landmark list and set properties world.landmarks = [ world.origin_terminal_landmark, world.destination_terminal_landmark ] for i, landmark in enumerate(world.landmarks): landmark.p_vel = np.zeros(world.dim_p) landmark.collide = False landmark.movable = False landmark.size = _LANDMARK_SIZE landmark.color = np.array( [landmark.risk_fn.get_failure_probability(0, 0) + .1, 0, 0]) # make initial conditions self.reset_world(world) return world
from particle_environments.common import RadialPolynomialRewardFunction2D as RadialReward from particle_environments.common import RadialBernoulliRiskFunction2D as RadialRisk from rl_algorithms.scenariolearning import ScenarioHeuristicAgentTrainer # Scenario Parameters _MAX_COMMUNICATION_DISTANCE = np.inf _AGENT_SIZE = 0.15 _LANDMARK_SIZE = 0.05 _AGENT_OBSERVATION_LEN = 5 _LANDMARK_OBSERVATION_LEN = 3 _NUM_AGENTS = 3 _LANDMARKS = [] _LANDMARKS.append( RiskRewardLandmark(risk_fn=RadialRisk(_LANDMARK_SIZE, 0.5), reward_fn=RadialReward(1.0, 0.0))) _LANDMARKS.append( RiskRewardLandmark(risk_fn=None, reward_fn=RadialReward(1.0, 0.0))) _LANDMARKS.append( RiskRewardLandmark(risk_fn=None, reward_fn=RadialReward(1.0, 0.0))) _LANDMARKS.append( RiskRewardLandmark(risk_fn=None, reward_fn=RadialReward(1.0, 0.0))) _N_LANDMARKS = len(_LANDMARKS) class Scenario(BaseScenario): # static class num_agents = _NUM_AGENTS def make_world(self): world = HazardousWorld(collision_termination_probability=0.0)
_MAX_CONNECTION_DISTANCE = 0.35 _MAX_OBSERVATION_DISTANCE = 1.0 _CONNECTION_REWARD = 1.0 _TERMINATION_REWARD = -0.0 _AGENT_SIZE = 0.01 _LANDMARK_SIZE = 0.025 _N_RADIAL_BINS = 4 _N_ANGULAR_BINS = 8 _N_OBSERVED_TERMINATIONS = 5 _N_TERMINALS = 2 _ZERO_THRESHOLD = 1e-6 _NON_TERMINAL_LANDMARKS = [] _NON_TERMINAL_LANDMARKS.append( RiskRewardLandmark(risk_fn=RadialRisk(0.1), reward_fn=RadialReward(0.1, 10.0))) class Scenario(BaseScenario): # static class num_agents = 20 def make_world(self): world = HazardousWorld() # set scenario-specific world parameters world.collaborative = True world.systemic_rewards = True world.identical_rewards = False world.dim_c = 0 # observation-based communication world.connection_reward = _CONNECTION_REWARD
from particle_environments.mager.world import TemporarilyObservableRiskRewardLandmark as TORRLandmark from particle_environments.mager.observation import format_observation from particle_environments.common import is_collision, distance, delta_pos from particle_environments.common import RadialPolynomialRewardFunction2D as RadialReward from particle_environments.common import RadialBernoulliRiskFunction2D as RadialRisk from particle_environments.common import DefaultParameters as DP # Scenario Parameters _MAX_COMMUNICATION_DISTANCE = 0.5 _AGENT_SIZE = 0.01 _LANDMARK_SIZE = 0.025 _LANDMARKS = [] _LANDMARKS.append( TORRLandmark(risk_fn=RadialRisk(0.1), reward_fn=RadialReward(0.15, 10.0), observe_duration=1.0)) _LANDMARKS.append( TORRLandmark(risk_fn=RadialRisk(0.1), reward_fn=RadialReward(0.15, 10.0), observe_duration=1.0)) _LANDMARKS.append( TORRLandmark(risk_fn=RadialRisk(0.1), reward_fn=RadialReward(0.15, 10.0), observe_duration=1.0)) class Scenario(BaseScenario): # static class num_agents = 10