def replay(self, duration, logger): # checks results and avoids replaying episode if self._check_results(): return # create output self._output_dir = os.path.join( self.output, get_file_name_without_extension(self.parser.filename)) create_clear_dir(self._output_dir, self.clear) # sets up log to file change_log_handler(os.path.join(self._output_dir, 'learning.log'), self.verbosity) # replays trajectory super().replay(duration, logger)
help='Whether to clear output directories before generating results.', action='store_true') parser.add_argument('-v', '--verbosity', action='count', default=0, help='Verbosity level.') args = parser.parse_args() # sets up log to file log_level = logging.WARN if args.verbosity == 0 else logging.INFO if args.verbosity == 1 else logging.DEBUG logging.basicConfig(format='%(message)s', level=log_level) # create output output_dir = os.path.join(args.output, os.path.basename(args.replays)) create_clear_dir(output_dir, args.clear) # checks input files csv_files = get_files(args.replays, 'csv') meta_files = get_files(args.metadata, 'metadata') csv_to_meta = {} for csv_file in csv_files: csv_meta_file = get_file_name_without_extension( csv_file.replace(CSV_PREFIX, '')) + '.metadata' for meta_file in meta_files: if os.path.basename(meta_file) == csv_meta_file: csv_to_meta[csv_file] = meta_file break not_found_csv = set(csv_files) - set(csv_to_meta.keys()) logging.info('Could not find matching metadata file for:\n\t{}'.format(
type=int, required=True, help='Height of the map (approx. num rooms).') parser.add_argument('-v', '--verbosity', action='count', default=0, help='Verbosity level.') args = parser.parse_args() # sets up log to file log_level = logging.WARN if args.verbosity == 0 else logging.INFO if args.verbosity == 1 else logging.DEBUG logging.basicConfig(format='%(message)s', level=log_level) # create output create_clear_dir(args.output, args.clear) # checks input file if not os.path.isfile(args.input): raise ValueError('Input file is not valid: {}.'.format(args.input)) logging.info('Processing room file: "{}"...'.format(args.input)) df = pd.read_csv(args.input, index_col=0) # get rooms' means df['x'] = 0.5 * (df[Z0_COORD_COL] + df[Z1_COORD_COL]) df['y'] = 0.5 * (df[X0_COORD_COL] + df[X1_COORD_COL]) # min-max scale and invert Y-axis coords df['x'] = (df['x'] - df['x'].min()) / (df['x'].max() - df['x'].min()) df['y'] = 1 - ((df['y'] - df['y'].min()) / (df['y'].max() - df['y'].min()))
def process_players_data(analyzer, output_dir, clear=False, verbosity=1): """ Collects statistics regarding the players' behavior, mean location and action frequencies from the collected trajectories. :param RewardModelAnalyzer analyzer: the reward model analyzer containing the necessary data. :param str output_dir: the directory in which to save the results. :param bool clear: whether to clear the directory before processing. :param int verbosity: the verbosity level of the log file. :return: """ create_clear_dir(output_dir, clear) change_log_handler(os.path.join(output_dir, 'post-process.log'), verbosity) file_names = list(analyzer.trajectories) logging.info('\n=================================') logging.info('Analyzing mean player behavior for {} results...'.format( len(file_names))) # separates stats by map name map_files = {} for file_name in file_names: map_table = analyzer.map_tables[file_name] map_name = map_table.name.lower() if map_name not in map_files: map_files[map_name] = [] map_files[map_name].append(file_name) for map_name, files in map_files.items(): map_table = analyzer.map_tables[files[0]] locations = map_table.rooms_list trajectories = [analyzer.trajectories[filename] for filename in files] agents = [ trajectories[i][-1][0].agents[analyzer.agent_names[files[i]]] for i in range(len(files)) ] # saves mean location frequencies location_data = get_locations_frequencies(trajectories, agents, locations) plot_bar(location_data, 'Mean Location Visitation Frequencies', os.path.join( output_dir, '{}-loc-frequencies.{}'.format(map_name, analyzer.img_format)), y_label='Frequency') # saves mean action frequencies act_data = get_actions_frequencies(trajectories, agents) plot_bar(act_data, 'Mean Action Execution Frequencies', os.path.join( output_dir, '{}-action-frequencies.{}'.format(map_name, analyzer.img_format)), y_label='Frequency') # saves mean action durations act_data = get_actions_durations(trajectories, agents) plot_bar(act_data, 'Mean Action Durations', os.path.join( output_dir, '{}-action-durations.{}'.format(map_name, analyzer.img_format)), y_label='Duration (secs)') # saves all player trajectories plot_trajectories(agents, trajectories, locations, map_table.adjacency, os.path.join( output_dir, '{}-trajectories.{}'.format( map_name, analyzer.img_format)), map_table.coordinates, title='Player Trajectories') # saves trajectory length traj_len_data = OrderedDict({ analyzer.get_player_name(file_name): len(analyzer.trajectories[file_name]) for file_name in file_names }) traj_len_data = { name: traj_len_data[name] for name in sorted(traj_len_data) } plot_bar( traj_len_data, 'Player Trajectory Length', os.path.join(output_dir, 'trajectory-length.{}'.format(analyzer.img_format))) # saves game mission times mission_time_data = {} for file_name in file_names: mission_time_feat = get_mission_seconds_key() world = analyzer.trajectories[file_name][-1][0] state = copy.deepcopy(world.state) state.select(True) mission_time_data[analyzer.get_player_name( file_name)] = world.getFeature(mission_time_feat, state, True) mission_time_data = { name: mission_time_data[name] for name in sorted(mission_time_data) } plot_bar( mission_time_data, 'Player Mission Time (secs)', os.path.join(output_dir, 'mission-time.{}'.format(analyzer.img_format)))
'--map-name', type=str, default=MAP_NAME, help='Name of the map for trajectory generation.') parser.add_argument( '-p', '--processes', type=none_or_int, default=PROCESSES, help= 'Number of processes/cores to use. If unspecified, all available cores will be used' ) args = parser.parse_args() # create output and log file create_clear_dir(args.output, False) change_log_handler(os.path.join(args.output, 'learning.log')) # saves args with open(os.path.join(args.output, 'args.json'), 'w') as fp: json.dump(vars(args), fp, indent=4) # checks input files files = [] if args.replays is None: logging.info( 'No replay file provided, skipping parsing benchmark.'.format( args.replays)) elif os.path.isfile(args.replays): files = [args.replays] elif os.path.isdir(args.replays):
def cluster_reward_weights(analyzer, output_dir, linkage='ward', dist_threshold=DEF_DIST_THRESHOLD, stds=DEF_STDS, clear=False, verbosity=1): """ Analyzes the reward functions resulting from IRL optimization for each player log file. Performs clustering of reward functions based on the weight vectors and computes the mean rewards in each cluster. :param RewardModelAnalyzer analyzer: the reward model analyzer containing the necessary data. :param str output_dir: the directory in which to save the results. :param str linkage: the clustering linkage criterion. :param float dist_threshold: the distance above which clusters are not merged. :param float stds: the number of standard deviations above the gradient mean used for automatic cluster detection. :param bool clear: whether to clear the directory before processing. :param int verbosity: the verbosity level of the log file. :return: """ create_clear_dir(output_dir, clear) change_log_handler(os.path.join(output_dir, 'post-process.log'), verbosity) file_names = list(analyzer.results) logging.info('\n=================================') logging.info('Analyzing models\' reward weights for {} results...'.format(len(file_names))) # performs clustering of reward weights results = [analyzer.results[filename] for filename in file_names] clustering, thetas = cluster_linear_rewards(results, linkage, dist_threshold, stds) # gets rwd feature names with dummy info agent_name = analyzer.agent_names[file_names[0]] agent = analyzer.trajectories[file_names[0]][-1][0].agents[agent_name] locations = analyzer.map_tables[file_names[0]].rooms_list rwd_feat_names = create_reward_vector(agent, locations, WorldMap.get_move_actions(agent)).names # overall weight mean data = np.array([np.mean(thetas, axis=0), np.std(thetas, axis=0) / len(thetas)]).T.tolist() plot_bar(OrderedDict(zip(rwd_feat_names, data)), 'Overall Mean Weights', os.path.join(output_dir, 'weights-mean.{}'.format(analyzer.img_format)), plot_mean=False) # mean weights within each cluster clusters, cluster_weights = get_clusters_means(clustering, thetas) logging.info('Found {} clusters at max. distance: {:.2f}'.format( clustering.n_clusters_, clustering.distance_threshold)) for cluster in sorted(cluster_weights.keys()): idxs = clusters[cluster] data = cluster_weights[cluster] data[1] = data[1] / len(idxs) with np.printoptions(precision=2, suppress=True): logging.info('\tCluster {}: {}, \n\tmean: {}\n'.format(cluster, idxs, data[0])) plot_bar(OrderedDict(zip(rwd_feat_names, data.T.tolist())), 'Mean Weights for Cluster {}'.format(cluster), os.path.join(output_dir, 'weights-mean-{}.{}'.format(cluster, analyzer.img_format)), plot_mean=False) subject_ids = [analyzer.get_player_name(file_name) for file_name in file_names] player_names = [analyzer.agent_names[file_name] for file_name in file_names] save_mean_cluster_weights(cluster_weights, os.path.join(output_dir, 'cluster-weights.csv'), rwd_feat_names) extra_info = OrderedDict({ 'Internal subject ID': subject_ids, 'File name': file_names, 'Game player name': player_names}) save_clusters_info(clustering, extra_info, thetas, os.path.join(output_dir, 'clusters.csv'), rwd_feat_names) # individual rwd weights thetas = np.array([result.stats[THETA_STR] for result in results]) ind_df = pd.DataFrame(list(zip(file_names, *thetas.T.tolist())), columns=['File name'] + rwd_feat_names) ind_df.to_csv(os.path.join(output_dir, 'individual-weights.csv'), index=False) # cluster sizes cluster_sizes = OrderedDict({str(cluster): len(clusters[cluster]) for cluster in sorted(clusters.keys())}) plot_bar(cluster_sizes, 'Clusters Size', os.path.join(output_dir, 'sizes.{}'.format(analyzer.img_format))) # dendrogram plot_clustering_dendrogram( clustering, os.path.join(output_dir, 'weights-dendrogram.{}'.format(analyzer.img_format))) # player_names) plot_clustering_distances( clustering, os.path.join(output_dir, 'weights-distance.{}'.format(analyzer.img_format))) # gets different data partitions according to maps, conditions, subjects, etc gt_labels = { 'Subject': [analyzer.trial_conditions[file_name][SUBJECT_ID_TAG] for file_name in file_names], 'Map Condition': [analyzer.trial_conditions[file_name][COND_MAP_TAG][0] for file_name in file_names], 'Dynamic Map Cond.': [analyzer.trial_conditions[file_name][COND_MAP_TAG][1] for file_name in file_names], 'Train Condition': [analyzer.trial_conditions[file_name][COND_TRAIN_TAG] for file_name in file_names] } subject_min_trials = {} for i, file_name in enumerate(file_names): subj_label = gt_labels['Subject'][i] subj_trial = int(analyzer.trial_conditions[file_name][TRIAL_TAG]) if subj_label not in subject_min_trials or subj_trial < subject_min_trials[subj_label]: subject_min_trials[subj_label] = subj_trial gt_labels['Trial'] = [ int(analyzer.trial_conditions[file_name][TRIAL_TAG]) - subject_min_trials[gt_labels['Subject'][i]] for i, file_name in enumerate(file_names)] # performs clustering evaluation according to the different gt partitions and combinations thereof evaluate_clustering(clustering, gt_labels, output_dir, analyzer.img_format, 3)
'rationality': MODEL_RATIONALITY, 'selection': MODEL_SELECTION}, {'name': PREFER_YELLOW_MODEL, 'reward': {GREEN_VICTIM: LOW_VAL, YELLOW_VICTIM: HIGH_VAL}, 'rationality': MODEL_RATIONALITY, 'selection': MODEL_SELECTION}] if INCLUDE_RANDOM_MODEL: model_list.append({'name': RANDOM_MODEL, 'reward': {GREEN_VICTIM: 0, YELLOW_VICTIM: 0, 'rationality': MODEL_RATIONALITY, 'selection': MODEL_SELECTION}}) set_player_models(world, observer.name, agent.name, victimsObj, model_list) return [m['name'] for m in model_list] if __name__ == '__main__': # create output create_clear_dir(OUTPUT_DIR) # sets up log to file change_log_handler(os.path.join(OUTPUT_DIR, 'inference.log'), 2 if DEBUG else 1) maps = get_default_maps() if EXPT not in maps: raise NameError(f'Experiment "{EXPT}" is not implemented yet') # create world, agent and observer map_data = maps[EXPT] world, agent, observer, victims, world_map = \ make_single_player_world(AGENT_NAME, map_data.init_loc, map_data.adjacency, map_data.victims, False, FULL_OBS) agent.setAttribute('horizon', HORIZON) agent.setAttribute('selection', AGENT_SELECTION) agent.resetBelief(ignore={modelKey(observer.name)})
def evaluate_reward_models(analyzer, output_dir, cluster_rwds_file=None, datapoint_clusters_file=None, clear=False, verbosity=1): """ Evaluates the learned reward functions by using internal evaluation metrics. It mainly computes the mismatch between observed player policies and policies resulting from different reward functions, including the ones resulting from IRL for each player and the means for each reward cluster. :param RewardModelAnalyzer analyzer: the reward model analyzer containing the necessary data. :param str output_dir: the directory in which to save the results. :param str cluster_rwds_file: the path to the file from which to load the clusters' reward weights. :param str datapoint_clusters_file: the path to the file from which to load the datapoints' clusters. :param bool clear: whether to clear the directory before processing. :param int verbosity: the verbosity level of the log file. :return: """ create_clear_dir(output_dir, clear) change_log_handler(os.path.join(output_dir, 'post-process.log'), verbosity) file_names = list(analyzer.results) # tries to load cluster info and sorts datapoints by cluster if datapoint_clusters_file is not None and os.path.isfile( datapoint_clusters_file): clusters = load_datapoints_clusters(datapoint_clusters_file) file_names.sort(key=lambda f: clusters[f] if f in clusters else -1) logging.info('\n=================================') logging.info( 'Performing cross-evaluation of reward functions for {} results...'. format(len(file_names))) # first gets data needed to compute players' "observed" policies trajectories = [analyzer.trajectories[filename] for filename in file_names] agent_names = [analyzer.agent_names[filename] for filename in file_names] agents = [ trajectories[i][-1][0].agents[agent_names[i]] for i in range(len(trajectories)) ] map_locs = [ analyzer.map_tables[filename].rooms_list for filename in file_names ] rwd_vectors = [ create_reward_vector(agents[i], map_locs[i], WorldMap.get_move_actions(agents[i])) for i in range(len(agents)) ] # saves nominal weight vectors/profiles save_mean_cluster_weights( {k: v.reshape(1, -1) for k, v in REWARD_MODELS.items()}, os.path.join(output_dir, 'nominal-weights.csv'), rwd_vectors[0].names) # calculates eval metrics for each player policy against nominal and cluster-based policies num_states = analyzer.num_trajectories * analyzer.length rwd_weights = OrderedDict(REWARD_MODELS) if cluster_rwds_file is not None and os.path.isfile(cluster_rwds_file): rwd_weights.update({ 'Cluster {}'.format(k): v for k, v in load_cluster_reward_weights(cluster_rwds_file).items() }) eval_matrix = cross_evaluation(trajectories, agent_names, rwd_vectors, list(rwd_weights.values()), AGENT_RATIONALITY, analyzer.horizon, analyzer.prune, analyzer.processes, num_states, analyzer.seed) # saves confusion matrix for cross-evaluation of each metric x_labels = [analyzer.get_player_name(filename) for filename in file_names] y_labels = list(rwd_weights.keys()) for metric_name, matrix in eval_matrix.items(): file_path = os.path.join( output_dir, '{}-cross-eval-matrix.{}'.format( metric_name.lower().replace(' ', '-'), analyzer.img_format)) plot_confusion_matrix( matrix, file_path, x_labels, y_labels, CONF_MAT_COLOR_MAP, '{} Cross-Evaluation'.format(metric_name), 'Agent Policy Using Player\'s Optimal Reward Function', 'Player\'s Observed Policy', 0, 1) # calculates eval metrics for each player policy against its optimal reward weights discovered via IRL (self-eval) metrics_values = {} for i, filename in enumerate(file_names): n_states = min(num_states, len(trajectories[i])) eval_matrix = cross_evaluation( [trajectories[i]], [agent_names[i]], [rwd_vectors[i]], [analyzer.results[filename].stats[THETA_STR]], AGENT_RATIONALITY, analyzer.horizon, analyzer.prune, analyzer.processes, n_states, analyzer.seed + i) # organizes by metric name and then by player player_name = analyzer.get_player_name(filename) for metric_name, matrix in eval_matrix.items(): if metric_name not in metrics_values: metrics_values[metric_name] = {} metrics_values[metric_name][player_name] = matrix[0, 0] # plots mean self-eval performance for metric_name, metric_values in metrics_values.items(): plot_bar(metric_values, metric_name.title(), os.path.join( output_dir, '{}-self-eval.{}'.format( metric_name.lower().replace(' ', '-'), analyzer.img_format)), None, y_label=metric_name)