def reset_world(self, world): # random properties for agents # add agents world.agents = [MortalAgent() for i in range(self.num_agents)] for i, agent in enumerate(world.agents): agent.name = 'agent %d' % i agent.collide = False agent.silent = True agent.terminated = False agent.size = _AGENT_SIZE agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) agent.state.p_vel = np.zeros(world.dim_p) agent.state.c = np.zeros(world.dim_c) agent.color = np.array([0.35, 0.35, 0.85]) agent.previous_observation = None # shuffle landmarks to make sure hazard is not in same index shuffle(world.landmarks) for landmark in world.landmarks: landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) landmark.state.p_vel = np.zeros(world.dim_p) # properties for landmarks if isinstance(landmark, RiskRewardLandmark) and landmark.is_hazard: #TODO: make colors heatmap of risk probability over all bounds landmark.color = np.array([ landmark.risk_fn.get_failure_probability(0, 0) + .1, 0, 0 ]) landmark.hazard_tag = 1.0 else: landmark.color = np.array([0.25, 0.25, 0.25]) landmark.hazard_tag = 0.0
def reset_world(self, world): # add agents with random properties world.agents = [MortalAgent() for i in range(self.num_agents)] for i, agent in enumerate(world.agents): agent.name = 'agent %d' % i agent.terminated = False agent.collide = True agent.silent = True agent.size = DP.agent_size agent.color = np.array([0.35, 0.35, 0.85]) # random properties for landmarks for i, landmark in enumerate(world.landmarks): landmark.color = np.array([0.25, 0.25, 0.25]) goal = np.random.choice(world.landmarks) goal.color = np.array([0.15, 0.65, 0.15]) for i, obstacle in enumerate(world.obstacles): obstacle.color = np.array([0.90, 0.40, 0.40]) # set random initial states for agent in world.agents: agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) agent.state.p_vel = np.zeros(world.dim_p) agent.state.c = np.zeros(world.dim_c) for landmark in world.landmarks: landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) landmark.state.p_vel = np.zeros(world.dim_p) for obstacle in world.obstacles: obstacle.state.p_pos = np.random.uniform(-1, +1, world.dim_p) obstacle.state.p_vel = np.zeros(world.dim_p)
def reset_world(self, world): # find agent size as function of number of agents baselined off simple_spread 3-agent case agent_size = 0.15 * np.sqrt(3.0 / float(self.num_agents)) # random properties for agents # add agents world.agents = [MortalAgent() for i in range(self.num_agents)] for i, agent in enumerate(world.agents): agent.name = 'agent_%d' % i agent.collide = True agent.silent = True agent.terminated = False agent.size = agent_size agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) agent.state.p_vel = np.zeros(world.dim_p) agent.state.c = np.zeros(world.dim_c) agent.color = np.array([0.35, 0.35, 0.85]) agent.previous_observation = None # shuffle landmarks to make sure hazard is not in same index shuffle(world.landmarks) for landmark in world.landmarks: # rename landmarks to preserve label ordering in joint state (see mager/environment.py:get_joint_state) landmark.name = 'landmark_%d' % i landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) landmark.state.p_vel = np.zeros(world.dim_p) landmark.hazard_tag = 0.0 landmark.color = np.array([0.25, 0.25, 0.25])
def reset_world(self, world): # random properties for agents # add agents world.agents = [MortalAgent() for i in range(self.num_agents)] for i, agent in enumerate(world.agents): agent.name = 'agent %d' % i agent.collide = True agent.silent = True agent.terminated = False agent.size = _AGENT_SIZE agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) agent.state.p_vel = np.zeros(world.dim_p) agent.state.c = np.zeros(world.dim_c) agent.color = np.array([0.35, 0.35, 0.85])
def reset_world(self, world): # random properties for agents # add agents world.agents = [MortalAgent() for i in range(self.num_agents)] for i, agent in enumerate(world.agents): agent.name = 'agent %d' % i agent.collide = True agent.silent = True agent.terminated = False agent.size = _AGENT_SIZE agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) agent.state.p_vel = np.zeros(world.dim_p) agent.state.c = np.zeros(world.dim_c) agent.color = np.array([0.35, 0.35, 0.85]) agent.previous_observation = None # shuffle landmarks to make sure hazard is not in same index for landmark in world.landmarks: landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) landmark.state.p_vel = np.zeros(world.dim_p)
def reset_world(self, world): # random properties for agents # add agents world.agents = [MortalAgent() for i in range(self.num_agents)] for i, agent in enumerate(world.agents): agent.name = 'agent %d' % i agent.collide = True agent.silent = True agent.terminated = False agent.size = _AGENT_SIZE agent.max_observation_distance = _MAX_OBSERVATION_DISTANCE agent.state.p_pos = np.random.uniform(-1, +1, world.dim_p) agent.state.p_vel = np.zeros(world.dim_p) agent.state.c = np.zeros(world.dim_c) agent.color = np.array([0.35, 0.35, 0.85]) for landmark in world.landmarks: landmark.state.p_pos = np.random.uniform(-1, +1, world.dim_p) landmark.state.p_vel = np.zeros(world.dim_p) # randomize terminal locations, but regularize to ensure conistent distances origin_state, destination_state = self.spawn_terminals(world) world.origin_terminal_landmark.state.p_pos = origin_state world.destination_terminal_landmark.state.p_pos = destination_state