Beispiel #1
0
    def _histogram_observation(self, agent, world):
        ''' observation in histogram format of number of entities in spacial bins '''

        # get histogram of landmark observations (marking hazardous landmarks as needed)
        landmark_histogram_2d, observed_hazards_2d = landmark_histogram_observation(
            agent, world.landmarks, _MAX_HISTOGRAM_OBSERVATION_DISTANCE,
            _N_RADIAL_BINS, _N_ANGULAR_BINS)

        # get histogram of agent observations
        agent_histogram_2d, observed_terminations_2d = agent_histogram_observation(
            agent, world.agents, _MAX_HISTOGRAM_OBSERVATION_DISTANCE,
            _N_RADIAL_BINS, _N_ANGULAR_BINS)

        # flatten landmark and agent histograms to 1d list
        landmark_histogram = [
            val for sublist in landmark_histogram_2d for val in sublist
        ]
        agent_histogram = [
            val for sublist in agent_histogram_2d for val in sublist
        ]

        # flatten, truncate/pad observed hazards and terminations to fixed length
        observed_hazards = [
            val for sublist in observed_hazards_2d for val in sublist
        ]
        observed_hazards = truncate_or_pad(observed_hazards,
                                           2 * _N_OBSERVED_HAZARDS)
        observed_terminations = [
            val for sublist in observed_terminations_2d for val in sublist
        ]
        observed_terminations = truncate_or_pad(observed_terminations,
                                                2 * _N_OBSERVED_TERMINATIONS)

        # package new observation
        new_obs = np.asarray([agent.terminated] + agent.state.p_vel.tolist() +
                             agent.state.p_pos.tolist() + landmark_histogram +
                             observed_hazards + agent_histogram +
                             observed_terminations)

        # append previous observation for velocity estimation
        if agent.previous_observation is None:
            agent.previous_observation = 0.0 * new_obs
        obs = np.append(new_obs, agent.previous_observation)
        agent.previous_observation = new_obs

        return obs
    def _histogram_observation(self, agent, world):
        # get positions of all entities in this agent's reference frame

        # Observe communication terminals
        terminals = (world.origin_terminal_landmark.state.p_pos.tolist() +
                     world.destination_terminal_landmark.state.p_pos.tolist())

        # get histogram of agent observations
        agent_histogram_2d, observed_terminations_2d = agent_histogram_observation(
            agent, world.agents, _MAX_OBSERVATION_DISTANCE, _N_RADIAL_BINS,
            _N_ANGULAR_BINS)

        # flatten histogram to 1d list
        agent_histogram = [
            val for sublist in agent_histogram_2d for val in sublist
        ]

        # flatten, truncate/pad observed terminations to fixed length
        observed_terminations = [
            val for sublist in observed_terminations_2d for val in sublist
        ]
        observed_terminations = truncate_or_pad(observed_terminations,
                                                2 * _N_OBSERVED_TERMINATIONS)

        # package new observation
        new_obs = np.asarray([agent.terminated] + agent.state.p_vel.tolist() +
                             agent.state.p_pos.tolist() + terminals +
                             agent_histogram + observed_terminations)

        # append previous observation for velocity estimation
        if agent.previous_observation is None:
            agent.previous_observation = 0.0 * new_obs
        obs = np.append(new_obs, agent.previous_observation)
        agent.previous_observation = new_obs

        return obs
    def _histogram_observation(self, agent, world):
        # get positions of all entities in this agent's reference frame

        # check reward signal received from landmark
        landmark_sensor_reading = 0.0
        for lm in world.landmarks:
            landmark_sensor_reading += lm.reward_fn.get_value(
                *agent.state.p_pos)

        # Format agent histograms
        bin_depth = _MAX_OBSERVATION_DISTANCE / 10.0
        radial_bins = np.logspace(np.log10(bin_depth),
                                  np.log10(_MAX_OBSERVATION_DISTANCE),
                                  num=_N_RADIAL_BINS)
        bin_angle = 2.0 * np.pi / float(_N_ANGULAR_BINS)
        angular_bins = np.linspace(bin_angle / 2.0,
                                   2 * np.pi - bin_angle / 2.0,
                                   num=_N_ANGULAR_BINS)
        agent_histogram_2d = np.array([[0] * _N_ANGULAR_BINS] * _N_RADIAL_BINS)
        reward_histogram_2d = np.array([[0.0] * _N_ANGULAR_BINS] *
                                       _N_RADIAL_BINS)

        # establish observation of failures
        observed_terminations_2d = []
        observed_terminations_dists = []

        # count agents in each bin
        for a in world.agents:
            dist = distance(a, agent)

            # skip if agent is agent
            if a == agent:
                continue

            # record observed termination
            if a.terminated:
                insert_index = bisect(observed_terminations_dists, dist)
                observed_terminations_dists.insert(insert_index, dist)
                observed_terminations_2d.insert(insert_index,
                                                delta_pos(a, agent))
                # don't "continue", record terminated agent in histogram like live agent

            # find radial bin
            rad_bin = np.searchsorted(radial_bins, dist)
            if rad_bin == _N_RADIAL_BINS:
                # agent is too far away and observation is not stored
                continue

            # calculate angle
            dx, dy = delta_pos(a, agent)
            ang = np.arctan2(dy, dx)
            if ang < 0:
                ang += 2 * np.pi

            # find angular bin
            ang_bin = np.searchsorted(angular_bins, ang)
            if ang_bin == _N_ANGULAR_BINS:
                ang_bin = 0

            # add count to histogram
            agent_histogram_2d[rad_bin][
                ang_bin] = agent_histogram_2d[rad_bin][ang_bin] + 1

            # add aggregate landmark sensor reading to histogram
            # Note: should not need to compute average reading over agents in bin
            #   because neural net should be able to learn to do this using agent count
            #   histogram
            for lm in world.landmarks:
                reward_histogram_2d[rad_bin][
                    ang_bin] += lm.reward_fn.get_value(*a.state.p_pos)

        # flatten histogram to 1d list
        agent_histogram = [
            val for sublist in agent_histogram_2d for val in sublist
        ]

        # flatten reward histogram to 1d list and compute average
        reward_histogram = [
            val for sublist in reward_histogram_2d for val in sublist
        ]

        # flatten, truncate/pad observed terminations to fixed length
        observed_terminations = [
            val for sublist in observed_terminations_2d for val in sublist
        ]
        observed_terminations = truncate_or_pad(observed_terminations,
                                                2 * _N_OBSERVED_TERMINATIONS)

        # package new observation
        new_obs = np.asarray([agent.terminated] + agent.state.p_vel.tolist() +
                             agent.state.p_pos.tolist() +
                             [landmark_sensor_reading] + agent_histogram +
                             reward_histogram + observed_terminations)

        # append previous observation for velocity estimation
        if agent.previous_observation is None:
            agent.previous_observation = 0.0 * new_obs
        obs = np.append(new_obs, agent.previous_observation)
        agent.previous_observation = new_obs

        return obs
    def observation(self, agent, world):
        # get positions of all entities in this agent's reference frame

        # Observe communication terminals
        terminals = (world.origin_terminal_landmark.state.p_pos.tolist() +
                     world.destination_terminal_landmark.state.p_pos.tolist())

        # Format agent histograms
        # bin_depth = _MAX_OBSERVATION_DISTANCE/float(_N_RADIAL_BINS)
        # radial_bins = np.linspace(bin_depth, _MAX_OBSERVATION_DISTANCE, num=_N_RADIAL_BINS)
        bin_depth = _MAX_OBSERVATION_DISTANCE / 10.0
        radial_bins = np.logspace(np.log10(bin_depth),
                                  np.log10(_MAX_OBSERVATION_DISTANCE),
                                  num=_N_RADIAL_BINS)
        bin_angle = 2.0 * np.pi / float(_N_ANGULAR_BINS)
        angular_bins = np.linspace(bin_angle / 2.0,
                                   2 * np.pi - bin_angle / 2.0,
                                   num=_N_ANGULAR_BINS)
        agent_histogram_2d = np.array([[0] * _N_ANGULAR_BINS] * _N_RADIAL_BINS)

        # establish observation of failures
        observed_terminations_2d = []
        observed_terminations_dists = []

        # count agents in each bin
        for a in world.agents:
            dist = distance(a, agent)

            # skip if agent is agent
            if a == agent:
                continue

            # record observed termination
            if a.terminated:
                insert_index = bisect(observed_terminations_dists, dist)
                observed_terminations_dists.insert(insert_index, dist)
                observed_terminations_2d.insert(insert_index,
                                                delta_pos(a, agent))
                continue

            # skip if outside of observation range
            if not agent.is_entity_observable(a):
                continue

            # find radial bin
            rad_bin = np.searchsorted(radial_bins, dist)

            # calculate angle
            dx, dy = delta_pos(a, agent)
            ang = np.arctan2(dy, dx)
            if ang < 0:
                ang += 2 * np.pi

            # find angular bin
            ang_bin = np.searchsorted(angular_bins, ang)
            if ang_bin == _N_ANGULAR_BINS:
                ang_bin = 0

            # add count to histogram
            agent_histogram_2d[rad_bin][
                ang_bin] = agent_histogram_2d[rad_bin][ang_bin] + 1

        # flatten histogram to 1d list
        agent_histogram = [
            val for sublist in agent_histogram_2d for val in sublist
        ]

        # flatten, truncate/pad observed terminations to fixed length
        observed_terminations = [
            val for sublist in observed_terminations_2d for val in sublist
        ]
        observed_terminations = truncate_or_pad(observed_terminations,
                                                2 * _N_OBSERVED_TERMINATIONS)

        # package new observation
        new_obs = np.asarray(agent.state.p_vel.tolist() +
                             agent.state.p_pos.tolist() + terminals +
                             agent_histogram + observed_terminations)
        if agent.terminated:
            # if agent is terminated, return all zeros for observation
            # TODO: make this more efficient. Right now it does a lot of unnecessary calcs which are all
            #   then set to zero. Done this way to ensure consistant array size
            new_obs = 0.0 * new_obs

        # append previous observation for velocity estimation
        if agent.previous_observation is None:
            agent.previous_observation = 0.0 * new_obs
        obs = np.append(new_obs, agent.previous_observation)
        agent.previous_observation = new_obs

        return obs