def replay(self, duration, logger): # checks results and avoids replaying episode if self._check_results(): return # create output self._output_dir = os.path.join( self.output, get_file_name_without_extension(self.parser.filename)) create_clear_dir(self._output_dir, self.clear) # sets up log to file change_log_handler(os.path.join(self._output_dir, 'learning.log'), self.verbosity) # replays trajectory super().replay(duration, logger)
def process_players_data(analyzer, output_dir, clear=False, verbosity=1): """ Collects statistics regarding the players' behavior, mean location and action frequencies from the collected trajectories. :param RewardModelAnalyzer analyzer: the reward model analyzer containing the necessary data. :param str output_dir: the directory in which to save the results. :param bool clear: whether to clear the directory before processing. :param int verbosity: the verbosity level of the log file. :return: """ create_clear_dir(output_dir, clear) change_log_handler(os.path.join(output_dir, 'post-process.log'), verbosity) file_names = list(analyzer.trajectories) logging.info('\n=================================') logging.info('Analyzing mean player behavior for {} results...'.format( len(file_names))) # separates stats by map name map_files = {} for file_name in file_names: map_table = analyzer.map_tables[file_name] map_name = map_table.name.lower() if map_name not in map_files: map_files[map_name] = [] map_files[map_name].append(file_name) for map_name, files in map_files.items(): map_table = analyzer.map_tables[files[0]] locations = map_table.rooms_list trajectories = [analyzer.trajectories[filename] for filename in files] agents = [ trajectories[i][-1][0].agents[analyzer.agent_names[files[i]]] for i in range(len(files)) ] # saves mean location frequencies location_data = get_locations_frequencies(trajectories, agents, locations) plot_bar(location_data, 'Mean Location Visitation Frequencies', os.path.join( output_dir, '{}-loc-frequencies.{}'.format(map_name, analyzer.img_format)), y_label='Frequency') # saves mean action frequencies act_data = get_actions_frequencies(trajectories, agents) plot_bar(act_data, 'Mean Action Execution Frequencies', os.path.join( output_dir, '{}-action-frequencies.{}'.format(map_name, analyzer.img_format)), y_label='Frequency') # saves mean action durations act_data = get_actions_durations(trajectories, agents) plot_bar(act_data, 'Mean Action Durations', os.path.join( output_dir, '{}-action-durations.{}'.format(map_name, analyzer.img_format)), y_label='Duration (secs)') # saves all player trajectories plot_trajectories(agents, trajectories, locations, map_table.adjacency, os.path.join( output_dir, '{}-trajectories.{}'.format( map_name, analyzer.img_format)), map_table.coordinates, title='Player Trajectories') # saves trajectory length traj_len_data = OrderedDict({ analyzer.get_player_name(file_name): len(analyzer.trajectories[file_name]) for file_name in file_names }) traj_len_data = { name: traj_len_data[name] for name in sorted(traj_len_data) } plot_bar( traj_len_data, 'Player Trajectory Length', os.path.join(output_dir, 'trajectory-length.{}'.format(analyzer.img_format))) # saves game mission times mission_time_data = {} for file_name in file_names: mission_time_feat = get_mission_seconds_key() world = analyzer.trajectories[file_name][-1][0] state = copy.deepcopy(world.state) state.select(True) mission_time_data[analyzer.get_player_name( file_name)] = world.getFeature(mission_time_feat, state, True) mission_time_data = { name: mission_time_data[name] for name in sorted(mission_time_data) } plot_bar( mission_time_data, 'Player Mission Time (secs)', os.path.join(output_dir, 'mission-time.{}'.format(analyzer.img_format)))
help= 'Directory containing the replay logs or single replay file to process.' ) parser.add_argument('-o', '--output', type=str, default=OUTPUT_DIR, help='Directory in which to save results.') args = parser.parse_args() # checks input files if os.path.isfile(args.replays): files = [args.replays] elif os.path.isdir(args.replays): files = list(get_files_with_extension(args.replays, 'csv')) else: raise ValueError( 'Input path is not a valid file or directory: {}.'.format( args.replays)) # create output and log file create_clear_dir(args.output, False) change_log_handler(os.path.join(args.output, 'parsing.log')) # create replayer and process all files analyzer = TrajectoryAnalyzer(files) analyzer.process_files() # prints some charts about data process_players_data(analyzer, args.output)
type=str, default=MAP_NAME, help='Name of the map for trajectory generation.') parser.add_argument( '-p', '--processes', type=none_or_int, default=PROCESSES, help= 'Number of processes/cores to use. If unspecified, all available cores will be used' ) args = parser.parse_args() # create output and log file create_clear_dir(args.output, False) change_log_handler(os.path.join(args.output, 'learning.log')) # saves args with open(os.path.join(args.output, 'args.json'), 'w') as fp: json.dump(vars(args), fp, indent=4) # checks input files files = [] if args.replays is None: logging.info( 'No replay file provided, skipping parsing benchmark.'.format( args.replays)) elif os.path.isfile(args.replays): files = [args.replays] elif os.path.isdir(args.replays): files = list(get_files_with_extension(args.replays, 'csv'))
def cluster_reward_weights(analyzer, output_dir, linkage='ward', dist_threshold=DEF_DIST_THRESHOLD, stds=DEF_STDS, clear=False, verbosity=1): """ Analyzes the reward functions resulting from IRL optimization for each player log file. Performs clustering of reward functions based on the weight vectors and computes the mean rewards in each cluster. :param RewardModelAnalyzer analyzer: the reward model analyzer containing the necessary data. :param str output_dir: the directory in which to save the results. :param str linkage: the clustering linkage criterion. :param float dist_threshold: the distance above which clusters are not merged. :param float stds: the number of standard deviations above the gradient mean used for automatic cluster detection. :param bool clear: whether to clear the directory before processing. :param int verbosity: the verbosity level of the log file. :return: """ create_clear_dir(output_dir, clear) change_log_handler(os.path.join(output_dir, 'post-process.log'), verbosity) file_names = list(analyzer.results) logging.info('\n=================================') logging.info('Analyzing models\' reward weights for {} results...'.format(len(file_names))) # performs clustering of reward weights results = [analyzer.results[filename] for filename in file_names] clustering, thetas = cluster_linear_rewards(results, linkage, dist_threshold, stds) # gets rwd feature names with dummy info agent_name = analyzer.agent_names[file_names[0]] agent = analyzer.trajectories[file_names[0]][-1][0].agents[agent_name] locations = analyzer.map_tables[file_names[0]].rooms_list rwd_feat_names = create_reward_vector(agent, locations, WorldMap.get_move_actions(agent)).names # overall weight mean data = np.array([np.mean(thetas, axis=0), np.std(thetas, axis=0) / len(thetas)]).T.tolist() plot_bar(OrderedDict(zip(rwd_feat_names, data)), 'Overall Mean Weights', os.path.join(output_dir, 'weights-mean.{}'.format(analyzer.img_format)), plot_mean=False) # mean weights within each cluster clusters, cluster_weights = get_clusters_means(clustering, thetas) logging.info('Found {} clusters at max. distance: {:.2f}'.format( clustering.n_clusters_, clustering.distance_threshold)) for cluster in sorted(cluster_weights.keys()): idxs = clusters[cluster] data = cluster_weights[cluster] data[1] = data[1] / len(idxs) with np.printoptions(precision=2, suppress=True): logging.info('\tCluster {}: {}, \n\tmean: {}\n'.format(cluster, idxs, data[0])) plot_bar(OrderedDict(zip(rwd_feat_names, data.T.tolist())), 'Mean Weights for Cluster {}'.format(cluster), os.path.join(output_dir, 'weights-mean-{}.{}'.format(cluster, analyzer.img_format)), plot_mean=False) subject_ids = [analyzer.get_player_name(file_name) for file_name in file_names] player_names = [analyzer.agent_names[file_name] for file_name in file_names] save_mean_cluster_weights(cluster_weights, os.path.join(output_dir, 'cluster-weights.csv'), rwd_feat_names) extra_info = OrderedDict({ 'Internal subject ID': subject_ids, 'File name': file_names, 'Game player name': player_names}) save_clusters_info(clustering, extra_info, thetas, os.path.join(output_dir, 'clusters.csv'), rwd_feat_names) # individual rwd weights thetas = np.array([result.stats[THETA_STR] for result in results]) ind_df = pd.DataFrame(list(zip(file_names, *thetas.T.tolist())), columns=['File name'] + rwd_feat_names) ind_df.to_csv(os.path.join(output_dir, 'individual-weights.csv'), index=False) # cluster sizes cluster_sizes = OrderedDict({str(cluster): len(clusters[cluster]) for cluster in sorted(clusters.keys())}) plot_bar(cluster_sizes, 'Clusters Size', os.path.join(output_dir, 'sizes.{}'.format(analyzer.img_format))) # dendrogram plot_clustering_dendrogram( clustering, os.path.join(output_dir, 'weights-dendrogram.{}'.format(analyzer.img_format))) # player_names) plot_clustering_distances( clustering, os.path.join(output_dir, 'weights-distance.{}'.format(analyzer.img_format))) # gets different data partitions according to maps, conditions, subjects, etc gt_labels = { 'Subject': [analyzer.trial_conditions[file_name][SUBJECT_ID_TAG] for file_name in file_names], 'Map Condition': [analyzer.trial_conditions[file_name][COND_MAP_TAG][0] for file_name in file_names], 'Dynamic Map Cond.': [analyzer.trial_conditions[file_name][COND_MAP_TAG][1] for file_name in file_names], 'Train Condition': [analyzer.trial_conditions[file_name][COND_TRAIN_TAG] for file_name in file_names] } subject_min_trials = {} for i, file_name in enumerate(file_names): subj_label = gt_labels['Subject'][i] subj_trial = int(analyzer.trial_conditions[file_name][TRIAL_TAG]) if subj_label not in subject_min_trials or subj_trial < subject_min_trials[subj_label]: subject_min_trials[subj_label] = subj_trial gt_labels['Trial'] = [ int(analyzer.trial_conditions[file_name][TRIAL_TAG]) - subject_min_trials[gt_labels['Subject'][i]] for i, file_name in enumerate(file_names)] # performs clustering evaluation according to the different gt partitions and combinations thereof evaluate_clustering(clustering, gt_labels, output_dir, analyzer.img_format, 3)
if INCLUDE_RANDOM_MODEL: model_list.append({'name': RANDOM_MODEL, 'reward': {GREEN_VICTIM: 0, YELLOW_VICTIM: 0, 'rationality': MODEL_RATIONALITY, 'selection': MODEL_SELECTION}}) set_player_models(world, observer.name, agent.name, victimsObj, model_list) return [m['name'] for m in model_list] if __name__ == '__main__': # create output create_clear_dir(OUTPUT_DIR) # sets up log to file change_log_handler(os.path.join(OUTPUT_DIR, 'inference.log'), 2 if DEBUG else 1) maps = get_default_maps() if EXPT not in maps: raise NameError(f'Experiment "{EXPT}" is not implemented yet') # create world, agent and observer map_data = maps[EXPT] world, agent, observer, victims, world_map = \ make_single_player_world(AGENT_NAME, map_data.init_loc, map_data.adjacency, map_data.victims, False, FULL_OBS) agent.setAttribute('horizon', HORIZON) agent.setAttribute('selection', AGENT_SELECTION) agent.resetBelief(ignore={modelKey(observer.name)}) model_names = create_mental_models(world, agent, observer, victims)
def evaluate_reward_models(analyzer, output_dir, cluster_rwds_file=None, datapoint_clusters_file=None, clear=False, verbosity=1): """ Evaluates the learned reward functions by using internal evaluation metrics. It mainly computes the mismatch between observed player policies and policies resulting from different reward functions, including the ones resulting from IRL for each player and the means for each reward cluster. :param RewardModelAnalyzer analyzer: the reward model analyzer containing the necessary data. :param str output_dir: the directory in which to save the results. :param str cluster_rwds_file: the path to the file from which to load the clusters' reward weights. :param str datapoint_clusters_file: the path to the file from which to load the datapoints' clusters. :param bool clear: whether to clear the directory before processing. :param int verbosity: the verbosity level of the log file. :return: """ create_clear_dir(output_dir, clear) change_log_handler(os.path.join(output_dir, 'post-process.log'), verbosity) file_names = list(analyzer.results) # tries to load cluster info and sorts datapoints by cluster if datapoint_clusters_file is not None and os.path.isfile( datapoint_clusters_file): clusters = load_datapoints_clusters(datapoint_clusters_file) file_names.sort(key=lambda f: clusters[f] if f in clusters else -1) logging.info('\n=================================') logging.info( 'Performing cross-evaluation of reward functions for {} results...'. format(len(file_names))) # first gets data needed to compute players' "observed" policies trajectories = [analyzer.trajectories[filename] for filename in file_names] agent_names = [analyzer.agent_names[filename] for filename in file_names] agents = [ trajectories[i][-1][0].agents[agent_names[i]] for i in range(len(trajectories)) ] map_locs = [ analyzer.map_tables[filename].rooms_list for filename in file_names ] rwd_vectors = [ create_reward_vector(agents[i], map_locs[i], WorldMap.get_move_actions(agents[i])) for i in range(len(agents)) ] # saves nominal weight vectors/profiles save_mean_cluster_weights( {k: v.reshape(1, -1) for k, v in REWARD_MODELS.items()}, os.path.join(output_dir, 'nominal-weights.csv'), rwd_vectors[0].names) # calculates eval metrics for each player policy against nominal and cluster-based policies num_states = analyzer.num_trajectories * analyzer.length rwd_weights = OrderedDict(REWARD_MODELS) if cluster_rwds_file is not None and os.path.isfile(cluster_rwds_file): rwd_weights.update({ 'Cluster {}'.format(k): v for k, v in load_cluster_reward_weights(cluster_rwds_file).items() }) eval_matrix = cross_evaluation(trajectories, agent_names, rwd_vectors, list(rwd_weights.values()), AGENT_RATIONALITY, analyzer.horizon, analyzer.prune, analyzer.processes, num_states, analyzer.seed) # saves confusion matrix for cross-evaluation of each metric x_labels = [analyzer.get_player_name(filename) for filename in file_names] y_labels = list(rwd_weights.keys()) for metric_name, matrix in eval_matrix.items(): file_path = os.path.join( output_dir, '{}-cross-eval-matrix.{}'.format( metric_name.lower().replace(' ', '-'), analyzer.img_format)) plot_confusion_matrix( matrix, file_path, x_labels, y_labels, CONF_MAT_COLOR_MAP, '{} Cross-Evaluation'.format(metric_name), 'Agent Policy Using Player\'s Optimal Reward Function', 'Player\'s Observed Policy', 0, 1) # calculates eval metrics for each player policy against its optimal reward weights discovered via IRL (self-eval) metrics_values = {} for i, filename in enumerate(file_names): n_states = min(num_states, len(trajectories[i])) eval_matrix = cross_evaluation( [trajectories[i]], [agent_names[i]], [rwd_vectors[i]], [analyzer.results[filename].stats[THETA_STR]], AGENT_RATIONALITY, analyzer.horizon, analyzer.prune, analyzer.processes, n_states, analyzer.seed + i) # organizes by metric name and then by player player_name = analyzer.get_player_name(filename) for metric_name, matrix in eval_matrix.items(): if metric_name not in metrics_values: metrics_values[metric_name] = {} metrics_values[metric_name][player_name] = matrix[0, 0] # plots mean self-eval performance for metric_name, metric_values in metrics_values.items(): plot_bar(metric_values, metric_name.title(), os.path.join( output_dir, '{}-self-eval.{}'.format( metric_name.lower().replace(' ', '-'), analyzer.img_format)), None, y_label=metric_name)
img_format=args.format ) analyzer.process_files() logging.info('=================================') if analyzer.results is None or len(analyzer.results) == 0 or \ analyzer.trajectories is None or len(analyzer.trajectories) == 0: logging.warning('Inexistent or incomplete results!') exit() if args.post_process: # performs post-processing of results output_dir = os.path.join(args.output, 'post-process') create_clear_dir(output_dir, False) change_log_handler(os.path.join(output_dir, 'post-process.log'), args.verbosity) logging.info('Post-processing IRL data for the following {} files:'.format(len(analyzer.results))) for filename in analyzer.results: logging.info('\t{}, player: "{}", agent: "{}", map: "{}", {} steps'.format( filename, analyzer.get_player_name(filename), analyzer.agent_names[filename], analyzer.map_tables[filename].name, len(analyzer.trajectories[filename]))) logging.info('Saving post-process results in "{}"...'.format(output_dir)) process_players_data(analyzer, os.path.join(output_dir, 'player_behavior'), args.clear, args.verbosity) cluster_reward_weights(analyzer, os.path.join(output_dir, 'rewards'), clear=args.clear, verbosity=args.verbosity) evaluate_reward_models(analyzer, os.path.join(output_dir, 'evaluation'), os.path.join(output_dir, 'rewards', 'cluster-weights.csv'), os.path.join(output_dir, 'rewards', 'clusters.csv'),
# agents properties AGENT_NAME = 'Player' SELECTION = 'distribution' RATIONALITY = 1 / 0.1 # inverse temperature HORIZON = 2 NUM_STEPS = 100 if __name__ == '__main__': np.set_printoptions(precision=2, suppress=True) # create output create_clear_dir(OUTPUT_DIR) # sets up log to file change_log_handler(os.path.join(OUTPUT_DIR, 'output.log'), 2 if DEBUG else 1) # loads clusters cluster_weights = load_cluster_reward_weights(CLUSTERS_FILE) logging.info('Loaded {} clusters from {}:'.format(len(cluster_weights), CLUSTERS_FILE)) for cluster in sorted(cluster_weights): logging.info('\tCluster {}: {}'.format(cluster, cluster_weights[cluster])) # create world and agent loc_neighbors = MAP_TABLE.adjacency locations = MAP_TABLE.rooms_list coords = MAP_TABLE.coordinates world, agent, observer, victims, world_map = \