Ejemplo n.º 1
0
def create_reward_vector(agent, locations, move_actions):
    """
    Creates the default linear reward vector.
    :param Agent agent: the PsychSim agent capable of retrieving the features' values given a state.
    :param list[str] locations: the list of possible world locations.
    :param list[ActionSet] move_actions: the list of the agent's move_actions.
    :rtype: LinearRewardVector
    :return: the linear reward vector.
    """
    world = agent.world
    features = []

    features.append(ValueComparisonLinearRewardFeature(
        'Before Middle', world, get_mission_seconds_key(),
        MISSION_PHASE_END_TIMES[MISSION_PHASES.index(MIDDLE_STR)], '<'))
    features.append(ValueComparisonLinearRewardFeature(
        'After Middle', world, get_mission_seconds_key(),
        MISSION_PHASE_END_TIMES[MISSION_PHASES.index(MIDDLE_STR)] - 1, '>'))

    # features.append(LocationFrequencyReward('Location Frequency', agent, locations, False))
    features.append(LocationVisitedReward('Location Visited', agent, locations))

    features.append(NumericLinearRewardFeature('Triaged Green', get_triaged_key(agent, 'Green')))
    features.append(NumericLinearRewardFeature('Triaged Gold', get_triaged_key(agent, 'Gold')))

    features.append(ValueComparisonLinearRewardFeature('See White', world, get_fov_key(agent), 'White', '=='))
    features.append(ValueComparisonLinearRewardFeature('See Red', world, get_fov_key(agent), 'Red', '=='))

    features.extend([ActionLinearRewardFeature(
        'Move ' + next(iter(action))['object'], agent, action) for action in move_actions])

    return LinearRewardVector(features)
Ejemplo n.º 2
0
    def collect_sub_trajectories(self, coordinates, locations, neighbors,
                                 trajectory):
        # only consider first half of mission
        idx = 0
        for idx in range(len(trajectory)):
            secs = trajectory[idx][0].getFeature(get_mission_seconds_key(),
                                                 unique=True)
            if secs >= 5 * 60:
                break

        # collect sub-trajectories from player's trajectory
        trajectories = sample_spread_sub_trajectories(trajectory[:idx],
                                                      self.num_trajectories,
                                                      self.length)

        logging.info(
            'Collected {} trajectories of length {} from original trajectory (length {}).'
            .format(self.num_trajectories, self.length, idx + 1))
        plot_trajectories(self.triage_agent,
                          trajectories,
                          locations,
                          neighbors,
                          os.path.join(
                              self._output_dir,
                              'sub-trajectories.{}'.format(self.img_format)),
                          coordinates,
                          title='Training Sub-Trajectories')

        return trajectories
Ejemplo n.º 3
0
 def __init__(self, name, agent, all_locations, inverse=True, max_frequency=1):
     """
     Creates a new reward feature.
     :param Agent agent: the PsychSim agent capable of retrieving the feature's value given a state.
     :param str name: the label for this reward feature.
     :param list[str] all_locations: all the world's locations.
     :param bool inverse: whether to take the inverse frequency, i.e., `time - freq`.
     :param int max_frequency: the maximum frequency that the agent can achieve (either for any or all locations).
     """
     super().__init__(name, 1. / max_frequency)  # use max frequency as normalization factor to keep var in [0,1]
     self.agent = agent
     self.world = agent.world
     self.inverse = inverse
     self.all_locations = all_locations
     self.location_feat = get_location_key(agent)
     self.time_feat = get_mission_seconds_key()
Ejemplo n.º 4
0
def get_actions_durations(trajectories, agents):
    """
    Gets the average durations of actions according to the given trajectories.
    :param list[list[(World, Distribution)]] trajectories: the set of trajectories, containing sequences of state-action pairs.
    :param Agent or list[Agent] agents: a list with the agent for each trajectory set whose actions durations we want to retrieve.
    :rtype: dict[str,(float,float)]
    :return: the mean and std error action duration.
    """
    if isinstance(agents, Agent):
        agents = [agents] * len(trajectories)
    assert len(trajectories) == len(
        agents), 'One agent per set of trajectories has to be provided'

    data = {}
    clock_key = get_mission_seconds_key()
    for i in range(len(trajectories)):
        trajectory = trajectories[i]
        for t in range(len(trajectory) - 1):

            # compute clock diff
            duration = trajectory[t + 1][0].getFeature(clock_key, unique=True) - \
                       trajectory[t][0].getFeature(clock_key, unique=True)

            # get action and register duration
            a_dist = trajectory[t][1]
            for a, p in a_dist.items():
                a = str(a).replace('{}-'.format(agents[i].name),
                                   '').replace('_',
                                               ' ')  # get clean action name
                if a not in data:
                    data[a] = []
                data[a].append(p * duration)

    return OrderedDict({
        a: [np.mean(data[a]), np.std(data[a]) / len(data[a])]
        for a in sorted(data)
    })
Ejemplo n.º 5
0
def process_players_data(analyzer, output_dir, clear=False, verbosity=1):
    """
    Collects statistics regarding the players' behavior, mean location and action frequencies from the collected trajectories.
    :param RewardModelAnalyzer analyzer: the reward model analyzer containing the necessary data.
    :param str output_dir: the directory in which to save the results.
    :param bool clear: whether to clear the directory before processing.
    :param int verbosity: the verbosity level of the log file.
    :return:
    """
    create_clear_dir(output_dir, clear)
    change_log_handler(os.path.join(output_dir, 'post-process.log'), verbosity)

    file_names = list(analyzer.trajectories)
    logging.info('\n=================================')
    logging.info('Analyzing mean player behavior for {} results...'.format(
        len(file_names)))

    # separates stats by map name
    map_files = {}
    for file_name in file_names:
        map_table = analyzer.map_tables[file_name]
        map_name = map_table.name.lower()
        if map_name not in map_files:
            map_files[map_name] = []
        map_files[map_name].append(file_name)

    for map_name, files in map_files.items():
        map_table = analyzer.map_tables[files[0]]
        locations = map_table.rooms_list
        trajectories = [analyzer.trajectories[filename] for filename in files]
        agents = [
            trajectories[i][-1][0].agents[analyzer.agent_names[files[i]]]
            for i in range(len(files))
        ]

        # saves mean location frequencies
        location_data = get_locations_frequencies(trajectories, agents,
                                                  locations)
        plot_bar(location_data,
                 'Mean Location Visitation Frequencies',
                 os.path.join(
                     output_dir,
                     '{}-loc-frequencies.{}'.format(map_name,
                                                    analyzer.img_format)),
                 y_label='Frequency')

        # saves mean action frequencies
        act_data = get_actions_frequencies(trajectories, agents)
        plot_bar(act_data,
                 'Mean Action Execution Frequencies',
                 os.path.join(
                     output_dir,
                     '{}-action-frequencies.{}'.format(map_name,
                                                       analyzer.img_format)),
                 y_label='Frequency')

        # saves mean action durations
        act_data = get_actions_durations(trajectories, agents)
        plot_bar(act_data,
                 'Mean Action Durations',
                 os.path.join(
                     output_dir,
                     '{}-action-durations.{}'.format(map_name,
                                                     analyzer.img_format)),
                 y_label='Duration (secs)')

        # saves all player trajectories
        plot_trajectories(agents,
                          trajectories,
                          locations,
                          map_table.adjacency,
                          os.path.join(
                              output_dir, '{}-trajectories.{}'.format(
                                  map_name, analyzer.img_format)),
                          map_table.coordinates,
                          title='Player Trajectories')

    # saves trajectory length
    traj_len_data = OrderedDict({
        analyzer.get_player_name(file_name):
        len(analyzer.trajectories[file_name])
        for file_name in file_names
    })
    traj_len_data = {
        name: traj_len_data[name]
        for name in sorted(traj_len_data)
    }
    plot_bar(
        traj_len_data, 'Player Trajectory Length',
        os.path.join(output_dir,
                     'trajectory-length.{}'.format(analyzer.img_format)))

    # saves game mission times
    mission_time_data = {}
    for file_name in file_names:
        mission_time_feat = get_mission_seconds_key()
        world = analyzer.trajectories[file_name][-1][0]
        state = copy.deepcopy(world.state)
        state.select(True)
        mission_time_data[analyzer.get_player_name(
            file_name)] = world.getFeature(mission_time_feat, state, True)
    mission_time_data = {
        name: mission_time_data[name]
        for name in sorted(mission_time_data)
    }
    plot_bar(
        mission_time_data, 'Player Mission Time (secs)',
        os.path.join(output_dir,
                     'mission-time.{}'.format(analyzer.img_format)))