예제 #1
0
    def replay(self, duration, logger):
        # checks results and avoids replaying episode
        if self._check_results():
            return

        # create output
        self._output_dir = os.path.join(
            self.output, get_file_name_without_extension(self.parser.filename))
        create_clear_dir(self._output_dir, self.clear)

        # sets up log to file
        change_log_handler(os.path.join(self._output_dir, 'learning.log'),
                           self.verbosity)

        # replays trajectory
        super().replay(duration, logger)
예제 #2
0
def process_players_data(analyzer, output_dir, clear=False, verbosity=1):
    """
    Collects statistics regarding the players' behavior, mean location and action frequencies from the collected trajectories.
    :param RewardModelAnalyzer analyzer: the reward model analyzer containing the necessary data.
    :param str output_dir: the directory in which to save the results.
    :param bool clear: whether to clear the directory before processing.
    :param int verbosity: the verbosity level of the log file.
    :return:
    """
    create_clear_dir(output_dir, clear)
    change_log_handler(os.path.join(output_dir, 'post-process.log'), verbosity)

    file_names = list(analyzer.trajectories)
    logging.info('\n=================================')
    logging.info('Analyzing mean player behavior for {} results...'.format(
        len(file_names)))

    # separates stats by map name
    map_files = {}
    for file_name in file_names:
        map_table = analyzer.map_tables[file_name]
        map_name = map_table.name.lower()
        if map_name not in map_files:
            map_files[map_name] = []
        map_files[map_name].append(file_name)

    for map_name, files in map_files.items():
        map_table = analyzer.map_tables[files[0]]
        locations = map_table.rooms_list
        trajectories = [analyzer.trajectories[filename] for filename in files]
        agents = [
            trajectories[i][-1][0].agents[analyzer.agent_names[files[i]]]
            for i in range(len(files))
        ]

        # saves mean location frequencies
        location_data = get_locations_frequencies(trajectories, agents,
                                                  locations)
        plot_bar(location_data,
                 'Mean Location Visitation Frequencies',
                 os.path.join(
                     output_dir,
                     '{}-loc-frequencies.{}'.format(map_name,
                                                    analyzer.img_format)),
                 y_label='Frequency')

        # saves mean action frequencies
        act_data = get_actions_frequencies(trajectories, agents)
        plot_bar(act_data,
                 'Mean Action Execution Frequencies',
                 os.path.join(
                     output_dir,
                     '{}-action-frequencies.{}'.format(map_name,
                                                       analyzer.img_format)),
                 y_label='Frequency')

        # saves mean action durations
        act_data = get_actions_durations(trajectories, agents)
        plot_bar(act_data,
                 'Mean Action Durations',
                 os.path.join(
                     output_dir,
                     '{}-action-durations.{}'.format(map_name,
                                                     analyzer.img_format)),
                 y_label='Duration (secs)')

        # saves all player trajectories
        plot_trajectories(agents,
                          trajectories,
                          locations,
                          map_table.adjacency,
                          os.path.join(
                              output_dir, '{}-trajectories.{}'.format(
                                  map_name, analyzer.img_format)),
                          map_table.coordinates,
                          title='Player Trajectories')

    # saves trajectory length
    traj_len_data = OrderedDict({
        analyzer.get_player_name(file_name):
        len(analyzer.trajectories[file_name])
        for file_name in file_names
    })
    traj_len_data = {
        name: traj_len_data[name]
        for name in sorted(traj_len_data)
    }
    plot_bar(
        traj_len_data, 'Player Trajectory Length',
        os.path.join(output_dir,
                     'trajectory-length.{}'.format(analyzer.img_format)))

    # saves game mission times
    mission_time_data = {}
    for file_name in file_names:
        mission_time_feat = get_mission_seconds_key()
        world = analyzer.trajectories[file_name][-1][0]
        state = copy.deepcopy(world.state)
        state.select(True)
        mission_time_data[analyzer.get_player_name(
            file_name)] = world.getFeature(mission_time_feat, state, True)
    mission_time_data = {
        name: mission_time_data[name]
        for name in sorted(mission_time_data)
    }
    plot_bar(
        mission_time_data, 'Player Mission Time (secs)',
        os.path.join(output_dir,
                     'mission-time.{}'.format(analyzer.img_format)))
예제 #3
0
        help=
        'Directory containing the replay logs or single replay file to process.'
    )
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=OUTPUT_DIR,
                        help='Directory in which to save results.')
    args = parser.parse_args()

    # checks input files
    if os.path.isfile(args.replays):
        files = [args.replays]
    elif os.path.isdir(args.replays):
        files = list(get_files_with_extension(args.replays, 'csv'))
    else:
        raise ValueError(
            'Input path is not a valid file or directory: {}.'.format(
                args.replays))

    # create output and log file
    create_clear_dir(args.output, False)
    change_log_handler(os.path.join(args.output, 'parsing.log'))

    # create replayer and process all files
    analyzer = TrajectoryAnalyzer(files)
    analyzer.process_files()

    # prints some charts about data
    process_players_data(analyzer, args.output)
예제 #4
0
                        type=str,
                        default=MAP_NAME,
                        help='Name of the map for trajectory generation.')
    parser.add_argument(
        '-p',
        '--processes',
        type=none_or_int,
        default=PROCESSES,
        help=
        'Number of processes/cores to use. If unspecified, all available cores will be used'
    )
    args = parser.parse_args()

    # create output and log file
    create_clear_dir(args.output, False)
    change_log_handler(os.path.join(args.output, 'learning.log'))

    # saves args
    with open(os.path.join(args.output, 'args.json'), 'w') as fp:
        json.dump(vars(args), fp, indent=4)

    # checks input files
    files = []
    if args.replays is None:
        logging.info(
            'No replay file provided, skipping parsing benchmark.'.format(
                args.replays))
    elif os.path.isfile(args.replays):
        files = [args.replays]
    elif os.path.isdir(args.replays):
        files = list(get_files_with_extension(args.replays, 'csv'))
예제 #5
0
def cluster_reward_weights(analyzer, output_dir,
                           linkage='ward', dist_threshold=DEF_DIST_THRESHOLD, stds=DEF_STDS,
                           clear=False, verbosity=1):
    """
    Analyzes the reward functions resulting from IRL optimization for each player log file.
    Performs clustering of reward functions based on the weight vectors and computes the mean rewards in each cluster.
    :param RewardModelAnalyzer analyzer: the reward model analyzer containing the necessary data.
    :param str output_dir: the directory in which to save the results.
    :param str linkage: the clustering linkage criterion.
    :param float dist_threshold: the distance above which clusters are not merged.
    :param float stds: the number of standard deviations above the gradient mean used for automatic cluster detection.
    :param bool clear: whether to clear the directory before processing.
    :param int verbosity: the verbosity level of the log file.
    :return:
    """
    create_clear_dir(output_dir, clear)
    change_log_handler(os.path.join(output_dir, 'post-process.log'), verbosity)

    file_names = list(analyzer.results)
    logging.info('\n=================================')
    logging.info('Analyzing models\' reward weights for {} results...'.format(len(file_names)))

    # performs clustering of reward weights
    results = [analyzer.results[filename] for filename in file_names]
    clustering, thetas = cluster_linear_rewards(results, linkage, dist_threshold, stds)

    # gets rwd feature names with dummy info
    agent_name = analyzer.agent_names[file_names[0]]
    agent = analyzer.trajectories[file_names[0]][-1][0].agents[agent_name]
    locations = analyzer.map_tables[file_names[0]].rooms_list
    rwd_feat_names = create_reward_vector(agent, locations, WorldMap.get_move_actions(agent)).names

    # overall weight mean
    data = np.array([np.mean(thetas, axis=0), np.std(thetas, axis=0) / len(thetas)]).T.tolist()
    plot_bar(OrderedDict(zip(rwd_feat_names, data)),
             'Overall Mean Weights', os.path.join(output_dir, 'weights-mean.{}'.format(analyzer.img_format)),
             plot_mean=False)

    # mean weights within each cluster
    clusters, cluster_weights = get_clusters_means(clustering, thetas)
    logging.info('Found {} clusters at max. distance: {:.2f}'.format(
        clustering.n_clusters_, clustering.distance_threshold))
    for cluster in sorted(cluster_weights.keys()):
        idxs = clusters[cluster]
        data = cluster_weights[cluster]
        data[1] = data[1] / len(idxs)
        with np.printoptions(precision=2, suppress=True):
            logging.info('\tCluster {}: {}, \n\tmean: {}\n'.format(cluster, idxs, data[0]))
        plot_bar(OrderedDict(zip(rwd_feat_names, data.T.tolist())),
                 'Mean Weights for Cluster {}'.format(cluster),
                 os.path.join(output_dir, 'weights-mean-{}.{}'.format(cluster, analyzer.img_format)),
                 plot_mean=False)

    subject_ids = [analyzer.get_player_name(file_name) for file_name in file_names]
    player_names = [analyzer.agent_names[file_name] for file_name in file_names]
    save_mean_cluster_weights(cluster_weights, os.path.join(output_dir, 'cluster-weights.csv'), rwd_feat_names)
    extra_info = OrderedDict({
        'Internal subject ID': subject_ids, 'File name': file_names, 'Game player name': player_names})
    save_clusters_info(clustering, extra_info,
                       thetas, os.path.join(output_dir, 'clusters.csv'), rwd_feat_names)

    # individual rwd weights
    thetas = np.array([result.stats[THETA_STR] for result in results])
    ind_df = pd.DataFrame(list(zip(file_names, *thetas.T.tolist())), columns=['File name'] + rwd_feat_names)
    ind_df.to_csv(os.path.join(output_dir, 'individual-weights.csv'), index=False)

    # cluster sizes
    cluster_sizes = OrderedDict({str(cluster): len(clusters[cluster]) for cluster in sorted(clusters.keys())})
    plot_bar(cluster_sizes, 'Clusters Size', os.path.join(output_dir, 'sizes.{}'.format(analyzer.img_format)))

    # dendrogram
    plot_clustering_dendrogram(
        clustering, os.path.join(output_dir, 'weights-dendrogram.{}'.format(analyzer.img_format)))
    # player_names)
    plot_clustering_distances(
        clustering, os.path.join(output_dir, 'weights-distance.{}'.format(analyzer.img_format)))

    # gets different data partitions according to maps, conditions, subjects, etc
    gt_labels = {
        'Subject': [analyzer.trial_conditions[file_name][SUBJECT_ID_TAG] for file_name in file_names],
        'Map Condition': [analyzer.trial_conditions[file_name][COND_MAP_TAG][0] for file_name in file_names],
        'Dynamic Map Cond.': [analyzer.trial_conditions[file_name][COND_MAP_TAG][1] for file_name in file_names],
        'Train Condition': [analyzer.trial_conditions[file_name][COND_TRAIN_TAG] for file_name in file_names]
    }
    subject_min_trials = {}
    for i, file_name in enumerate(file_names):
        subj_label = gt_labels['Subject'][i]
        subj_trial = int(analyzer.trial_conditions[file_name][TRIAL_TAG])
        if subj_label not in subject_min_trials or subj_trial < subject_min_trials[subj_label]:
            subject_min_trials[subj_label] = subj_trial
    gt_labels['Trial'] = [
        int(analyzer.trial_conditions[file_name][TRIAL_TAG]) -
        subject_min_trials[gt_labels['Subject'][i]] for i, file_name in enumerate(file_names)]

    # performs clustering evaluation according to the different gt partitions and combinations thereof
    evaluate_clustering(clustering, gt_labels, output_dir, analyzer.img_format, 3)
    if INCLUDE_RANDOM_MODEL:
        model_list.append({'name': RANDOM_MODEL, 'reward': {GREEN_VICTIM: 0, YELLOW_VICTIM: 0,
                                                            'rationality': MODEL_RATIONALITY,
                                                            'selection': MODEL_SELECTION}})

    set_player_models(world, observer.name, agent.name, victimsObj, model_list)
    return [m['name'] for m in model_list]


if __name__ == '__main__':
    # create output
    create_clear_dir(OUTPUT_DIR)

    # sets up log to file
    change_log_handler(os.path.join(OUTPUT_DIR, 'inference.log'), 2 if DEBUG else 1)

    maps = get_default_maps()
    if EXPT not in maps:
        raise NameError(f'Experiment "{EXPT}" is not implemented yet')

    # create world, agent and observer
    map_data = maps[EXPT]
    world, agent, observer, victims, world_map = \
        make_single_player_world(AGENT_NAME, map_data.init_loc, map_data.adjacency, map_data.victims, False, FULL_OBS)
    agent.setAttribute('horizon', HORIZON)
    agent.setAttribute('selection', AGENT_SELECTION)
    agent.resetBelief(ignore={modelKey(observer.name)})

    model_names = create_mental_models(world, agent, observer, victims)
예제 #7
0
def evaluate_reward_models(analyzer,
                           output_dir,
                           cluster_rwds_file=None,
                           datapoint_clusters_file=None,
                           clear=False,
                           verbosity=1):
    """
    Evaluates the learned reward functions by using internal evaluation metrics. It mainly computes the mismatch
    between observed player policies and policies resulting from different reward functions, including the ones
    resulting from IRL for each player and the means for each reward cluster.
    :param RewardModelAnalyzer analyzer: the reward model analyzer containing the necessary data.
    :param str output_dir: the directory in which to save the results.
    :param str cluster_rwds_file: the path to the file from which to load the clusters' reward weights.
    :param str datapoint_clusters_file: the path to the file from which to load the datapoints' clusters.
    :param bool clear: whether to clear the directory before processing.
    :param int verbosity: the verbosity level of the log file.
    :return:
    """
    create_clear_dir(output_dir, clear)
    change_log_handler(os.path.join(output_dir, 'post-process.log'), verbosity)
    file_names = list(analyzer.results)

    # tries to load cluster info and sorts datapoints by cluster
    if datapoint_clusters_file is not None and os.path.isfile(
            datapoint_clusters_file):
        clusters = load_datapoints_clusters(datapoint_clusters_file)
        file_names.sort(key=lambda f: clusters[f] if f in clusters else -1)

    logging.info('\n=================================')
    logging.info(
        'Performing cross-evaluation of reward functions for {} results...'.
        format(len(file_names)))

    # first gets data needed to compute players' "observed" policies
    trajectories = [analyzer.trajectories[filename] for filename in file_names]
    agent_names = [analyzer.agent_names[filename] for filename in file_names]
    agents = [
        trajectories[i][-1][0].agents[agent_names[i]]
        for i in range(len(trajectories))
    ]
    map_locs = [
        analyzer.map_tables[filename].rooms_list for filename in file_names
    ]
    rwd_vectors = [
        create_reward_vector(agents[i], map_locs[i],
                             WorldMap.get_move_actions(agents[i]))
        for i in range(len(agents))
    ]

    # saves nominal weight vectors/profiles
    save_mean_cluster_weights(
        {k: v.reshape(1, -1)
         for k, v in REWARD_MODELS.items()},
        os.path.join(output_dir, 'nominal-weights.csv'), rwd_vectors[0].names)

    # calculates eval metrics for each player policy against nominal and cluster-based policies
    num_states = analyzer.num_trajectories * analyzer.length
    rwd_weights = OrderedDict(REWARD_MODELS)
    if cluster_rwds_file is not None and os.path.isfile(cluster_rwds_file):
        rwd_weights.update({
            'Cluster {}'.format(k): v
            for k, v in load_cluster_reward_weights(cluster_rwds_file).items()
        })
    eval_matrix = cross_evaluation(trajectories, agent_names, rwd_vectors,
                                   list(rwd_weights.values()),
                                   AGENT_RATIONALITY, analyzer.horizon,
                                   analyzer.prune, analyzer.processes,
                                   num_states, analyzer.seed)

    # saves confusion matrix for cross-evaluation of each metric
    x_labels = [analyzer.get_player_name(filename) for filename in file_names]
    y_labels = list(rwd_weights.keys())
    for metric_name, matrix in eval_matrix.items():
        file_path = os.path.join(
            output_dir, '{}-cross-eval-matrix.{}'.format(
                metric_name.lower().replace(' ', '-'), analyzer.img_format))
        plot_confusion_matrix(
            matrix, file_path, x_labels, y_labels, CONF_MAT_COLOR_MAP,
            '{} Cross-Evaluation'.format(metric_name),
            'Agent Policy Using Player\'s Optimal Reward Function',
            'Player\'s Observed Policy', 0, 1)

    # calculates eval metrics for each player policy against its optimal reward weights discovered via IRL (self-eval)
    metrics_values = {}
    for i, filename in enumerate(file_names):
        n_states = min(num_states, len(trajectories[i]))
        eval_matrix = cross_evaluation(
            [trajectories[i]], [agent_names[i]], [rwd_vectors[i]],
            [analyzer.results[filename].stats[THETA_STR]], AGENT_RATIONALITY,
            analyzer.horizon, analyzer.prune, analyzer.processes, n_states,
            analyzer.seed + i)

        # organizes by metric name and then by player
        player_name = analyzer.get_player_name(filename)
        for metric_name, matrix in eval_matrix.items():
            if metric_name not in metrics_values:
                metrics_values[metric_name] = {}
            metrics_values[metric_name][player_name] = matrix[0, 0]

    # plots mean self-eval performance
    for metric_name, metric_values in metrics_values.items():
        plot_bar(metric_values,
                 metric_name.title(),
                 os.path.join(
                     output_dir, '{}-self-eval.{}'.format(
                         metric_name.lower().replace(' ', '-'),
                         analyzer.img_format)),
                 None,
                 y_label=metric_name)
예제 #8
0
        img_format=args.format
    )
    analyzer.process_files()

    logging.info('=================================')
    if analyzer.results is None or len(analyzer.results) == 0 or \
            analyzer.trajectories is None or len(analyzer.trajectories) == 0:
        logging.warning('Inexistent or incomplete results!')
        exit()

    if args.post_process:

        # performs post-processing of results
        output_dir = os.path.join(args.output, 'post-process')
        create_clear_dir(output_dir, False)
        change_log_handler(os.path.join(output_dir, 'post-process.log'), args.verbosity)

        logging.info('Post-processing IRL data for the following {} files:'.format(len(analyzer.results)))
        for filename in analyzer.results:
            logging.info('\t{}, player: "{}", agent: "{}", map: "{}", {} steps'.format(
                filename, analyzer.get_player_name(filename), analyzer.agent_names[filename],
                analyzer.map_tables[filename].name, len(analyzer.trajectories[filename])))

        logging.info('Saving post-process results in "{}"...'.format(output_dir))

        process_players_data(analyzer, os.path.join(output_dir, 'player_behavior'), args.clear, args.verbosity)
        cluster_reward_weights(analyzer, os.path.join(output_dir, 'rewards'),
                               clear=args.clear, verbosity=args.verbosity)
        evaluate_reward_models(analyzer, os.path.join(output_dir, 'evaluation'),
                               os.path.join(output_dir, 'rewards', 'cluster-weights.csv'),
                               os.path.join(output_dir, 'rewards', 'clusters.csv'),
예제 #9
0
# agents properties
AGENT_NAME = 'Player'
SELECTION = 'distribution'
RATIONALITY = 1 / 0.1  # inverse temperature
HORIZON = 2

NUM_STEPS = 100

if __name__ == '__main__':
    np.set_printoptions(precision=2, suppress=True)

    # create output
    create_clear_dir(OUTPUT_DIR)

    # sets up log to file
    change_log_handler(os.path.join(OUTPUT_DIR, 'output.log'),
                       2 if DEBUG else 1)

    # loads clusters
    cluster_weights = load_cluster_reward_weights(CLUSTERS_FILE)
    logging.info('Loaded {} clusters from {}:'.format(len(cluster_weights),
                                                      CLUSTERS_FILE))
    for cluster in sorted(cluster_weights):
        logging.info('\tCluster {}: {}'.format(cluster,
                                               cluster_weights[cluster]))

    # create world and agent
    loc_neighbors = MAP_TABLE.adjacency
    locations = MAP_TABLE.rooms_list
    coords = MAP_TABLE.coordinates

    world, agent, observer, victims, world_map = \