Esempio n. 1
0
 def benchmark_data(self, agent, world):
     collisions = 0
     occupied_landmarks = 0
     min_dists = 0
     for l in world.landmarks:
         dists = [np.linalg.norm(a.state.p_pos - l.state.p_pos) for a in world.agents]
         min_dists += min(dists)
         if min(dists) < 0.1:
             occupied_landmarks += 1
     if agent.collide:
         for a in world.agents:
             if is_collision(a, agent):
                 collisions += 1
     return (self.reward(agent, world), collisions, min_dists, occupied_landmarks)
 def reward(self, agent, world):
     # Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions
     rew = 0
     for l in world.landmarks:
         dists = [
             np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos)))
             for a in world.agents
         ]
         rew -= min(dists)
     if agent.collide:
         for a in world.agents:
             if is_collision(a, agent):
                 rew -= 1
     return rew
    def reward(self, agent, world):
        # Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions
        assert world.identical_rewards == True
        rew = 0
        for lm in [l for l in world.landmarks if not l.is_hazard]:
            # dists = [np.sqrt(np.sum(np.square(a.state.p_pos - l.state.p_pos))) for a in world.agents]
            dists = [distance(ag, lm) for ag in world.agents]
            rew -= min(dists)

        if agent.collide:
            for a in world.agents:
                if is_collision(a, agent):
                    rew -= 1
        return rew
    def reward(self, agent, world):
        # Agents are rewarded based on minimum agent distance to each landmark, penalized for collisions
        rew = 0
        for l in world.landmarks:
            dists = [
                np.linalg.norm(a.state.p_pos - l.state.p_pos)
                for a in world.agents
            ]
            rew -= min(dists)

        for o in world.obstacles:
            if o.known:
                dists = [
                    np.linalg.norm(a.state.p_pos - o.state.p_pos)
                    for a in world.agents
                ]
                rew += min(dists)
        if agent.collide:
            for a in world.agents:
                if is_collision(a, agent):
                    rew -= 1
        return rew
Esempio n. 5
0
    def step(self):
        super().step()

        # decrement observability and reward function of certain landmarks
        for landmark in self.landmarks:
            if isinstance(landmark, TemporarilyObservableRiskRewardLandmark):
                landmark.decrement_observe_clock(self.dt)
                # landmark.update_reward_function()

        # check for casualties
        for i, agent in enumerate(self.agents):

            # skip terminated agents since already a casualty
            if agent.terminated:
                # self.render_geoms = None # force resetting of object rendering list
                continue

            # check for destruction by hazard
            for landmark in self.landmarks:
                if isinstance(landmark, RiskRewardLandmark):
                    rel_pos = delta_pos(agent, landmark)
                    agent_failure = landmark.risk_fn.sample_failure(
                        rel_pos[0], rel_pos[1])
                    if agent_failure:
                        agent.terminate_agent()

                        # If landmark caused a failure, make landmark observable
                        if isinstance(landmark,
                                      TemporarilyObservableRiskRewardLandmark):
                            landmark.set_observe_clock()
                        break

            # check for destruction by flyoff or excess velocity
            # NOTE: this is a hack, certain scenarios were causing early training to send
            #       agents to positions and velocities that overflowed floating point operations. this caused
            #       the policy to return NaN actions. By using the termination state, we shouldn't
            #       need to impose "walls" on the motion of agents, instead just cause them to
            #       be terminated if they fly too far off
            if (np.linalg.norm(agent.state.p_pos) >
                    self.flyoff_termination_radius or np.linalg.norm(
                        agent.state.p_vel) > self.flyoff_termination_speed):
                agent.terminate_agent()

            # check if spontaneous destruction of agent
            if np.random.rand() < self.spontaneous_termination_probability:
                agent.terminate_agent()

            # recheck for terminated agents and skip
            if agent.terminated:
                # self.render_geoms = None # force resetting of object rendering list
                continue

            # check for collisions
            for other_agent in self.agents[i + 1:]:

                if other_agent.terminated:
                    # skip if other agent alread terminated
                    continue

                if is_collision(agent, other_agent):
                    if np.random.random(
                    ) < self.collision_termination_probability:
                        # terminated other agent
                        other_agent.terminate_agent()
                        # logging.info('fatal crash')
                    if np.random.random(
                    ) < self.collision_termination_probability:
                        # terminated current agent
                        agent.terminate_agent()
                        # logging.info('fatal crash')
                        break