def __init__(self, experiment_dir, generation, timestamp, logger=None): """ Constructor. :param experiment_dir: the directory where experiment results go :param generation: the generation number of the results dict :param timestamp: A double timestamp of when the error occurred. :param logger: A logger to send messaging to """ filer = ExperimentFiler(experiment_dir) error_dir = filer.experiment_file("errors") ts_datetime = datetime.fromtimestamp(timestamp) time_format = '%Y-%m-%d-%H:%M:%S' time_string = ts_datetime.strftime(time_format) filer = GenerationFiler(experiment_dir, generation) gen_name = filer.get_generation_name() basename = "experiment_host_error_{0}_{1}".format( gen_name, time_string) dictionary_converter = PassThroughDictionaryConverter() factory = PersistenceFactory(object_type="string", dictionary_converter=dictionary_converter, logger=logger) self.dict_persistence = factory.create_persistence( error_dir, basename, persistence_mechanism=PersistenceMechanisms.LOCAL, serialization_format=SerializationFormats.TEXT, must_exist=False)
def __init__(self, experiment_dir, generation, candidate_id, timestamp, logger=None): """ Constructor. :param experiment_dir: the directory where experiment results go :param generation: the generation number of the results dict :param candidate_id: The id of the candidate that had the error :param timestamp: A double timestamp of when the error occurred. :param logger: A logger to send messaging to """ filer = ExperimentFiler(experiment_dir) error_dir = filer.experiment_file("errors") ts_datetime = datetime.fromtimestamp(timestamp) time_format = '%Y-%m-%d-%H:%M:%S' time_string = ts_datetime.strftime(time_format) filer = GenerationFiler(experiment_dir, generation) gen_name = filer.get_generation_name() base_name = "evaluation_error_{0}_candidate_{1}_{2}".format( gen_name, candidate_id, time_string) super(EvaluationErrorPersistence, self).__init__( base_name=base_name, folder=error_dir, dictionary_converter=CandidateDictionaryConverter(), logger=logger)
def write_training_runs(args): filer = ExperimentFiler(args.experiment_dir) csv_file = filer.experiment_file("training_runs.csv") with open(csv_file, 'wb') as my_file: my_file.write('Generation, %s, Fitness\n' % resolve_alt_objective(args)) results_files = get_results_files(args) generation_filer = GenerationFiler(args.experiment_dir) for results_file in results_files: generation = generation_filer.get_generation_from_path(results_file) persistence = ResultsDictPersistence(args.experiment_dir, generation, logger=None) results_dict = persistence.restore() if len(results_dict) == 0: # File not found continue for key in results_dict.keys(): result = results_dict[key] try: # XXX Use FitnessObjectives prepared from config? alt_objective = result['metrics'][args.alt_objective] fitness = result['metrics']['fitness'] # XXX not kosher except Exception: try: alt_objective = result['metrics'][args.alt_objective] fitness = result['fitness'] # XXX not kosher except Exception as exception: if args.alt_objective == "num_params": fitness = result['fitness'] # XXX not kosher # XXX What generates this params file? cache_file = generation_filer.get_generation_file( "candidate_{0}.params".format(key)) if os.path.exists(cache_file): with open(cache_file, 'rb') as my_file: alt_objective = my_file.read() else: undefined = 0 k = undefined # XXX print("Extracting num params from network {}".format(k)) model = get_model(args.experiment_dir, key, generation) alt_objective = str(model.count_params()) with open(cache_file, 'wb') as my_file: my_file.write(alt_objective) else: raise exception if args.alt_objective == 'training_time': alt_objective = str(float(alt_objective) / 3600.0) with open(csv_file, 'ab') as my_file: line = '%s %s %s\n' % (generation, alt_objective, fitness) my_file.write(line) return csv_file
def get_results_files(args): filer = ExperimentFiler(args.experiment_dir) glob_spec = filer.experiment_file("*/results_dict.json") glob_results = glob.glob(glob_spec) results_files = sorted(glob_results) if args.max_gen > 0: results_files = results_files[:args.max_gen] return results_files
def run(self): """ Entry point for the session task execution to take over. """ print("Running AnalyzeResultsSessionTask") # Read the results files for each generation. # These are written out by write_results_file() filer = ExperimentFiler(self.experiment_dir) glob_spec = filer.experiment_file("gen_*/results_dict.json") results_dicts = glob.glob(glob_spec) worker_results_files = sorted(results_dicts) if len(worker_results_files) <= 0: raise ValueError("No results_dicts.json files found in {0}".format( self.experiment_dir)) # No generation number needed, we are only looking to # parse path components with it. generation_filer = GenerationFiler(self.experiment_dir) worker_results_dict = {} for worker_results_file in worker_results_files: generation = generation_filer.get_generation_from_path( worker_results_file) # This slurps in results information returned by workers from all # candidates of a specific generation results_dict_persistence = ResultsDictPersistence( self.experiment_dir, generation, logger=self.logger) one_worker_results_dict = results_dict_persistence.restore() # results_dict here will have one entry per candidate over all # generations worker_results_dict.update(one_worker_results_dict) fitness_objective = self.fitness_objectives.get_fitness_objectives(0) is_maximize = fitness_objective.is_maximize_fitness() best_result = sorted(list(worker_results_dict.items()), key=lambda \ x: max(self.candidate_util.get_candidate_fitness(x)), reverse=is_maximize)[0] best_id = best_result.get('id') # Open the file of the best candidate. best_candidate_persistence = BestFitnessCandidatePersistence( self.experiment_dir, best_id, logger=self.logger) best_candidate = best_candidate_persistence.restore() best_id = self.candidate_util.get_candidate_id(best_candidate) self.draw_best_candidate_results(best_candidate, generation, suffix='abs')
def __init__(self, experiment_dir, generation, fitness_objectives): """ Constructor. """ self.filer = ExperimentFiler(experiment_dir) self.generation = generation self.fitness_objectives = fitness_objectives self.candidate_util = CandidateUtil(fitness_objectives) self.basename = 'fitness.csv' self.time_format = '%Y-%m-%d-%H:%M:%S'
def __init__(self, experiment_dir, generation=0): """ Constructor. :param experiment_dir: The directory where experiment results go :param generation: The generation number of the experiment """ self.experiment_filer = ExperimentFiler(experiment_dir) self.generation = generation self.prefix = "gen_"
def draw_best_candidate_results(self, best_candidate, generation=None, suffix=''): """ :param best_candidate: A candidate object comprising the best of a generation. :param generation: Default value is None :param suffix: Default value is an empty string """ experiment_config = self.master_config.get('experiment_config') if not experiment_config.get('visualize'): return best_id = self.candidate_util.get_candidate_id(best_candidate) best_fitness = self.candidate_util.get_candidate_fitness( best_candidate) fitness = best_fitness if best_fitness is None else \ round(best_fitness, 4) # Determine the output file name basis # XXX Use fitness for now. # Later on can address multi-objective goals. metric_name = "fitness" if generation is not None: # Put the file in the gen_NN directory. # Call it best_candidate to match the best_candidate.json # that gets put there base_name = "best_{0}_candidate".format(metric_name) filer = GenerationFiler(self.experiment_dir, generation) base_path = filer.get_generation_file(base_name) else: # We do not have a generation that we know about so write out # the old-school file name. # XXX Not entirely sure when this path would be taken base_name = "F{0}_ID-{1}_{2}best_{3}".format( fitness, best_id, suffix, metric_name) filer = ExperimentFiler(self.experiment_dir) base_path = filer.experiment_file(base_name) # NetworkVisualizers use the build_training_model() which requires # a data_dict of file keys -> file paths to exist. Domains that # wish to visualize their networks that use the data_dict will # need to deal with a None value for data dict in the visualization # case. data_dict = None visualizer = NetworkMultiVisualizer(self.master_config, data_dict, base_path, logger=self.logger) visualizer.visualize(best_candidate)
def visualize_model(args): model_name = args.visualize_model model = get_model(args.experiment_dir, model_name, args.generation) filer = ExperimentFiler(args.experiment_dir) blueprint_file = filer.experiment_file("{0}_blueprint.png".format( model_name)) # XXX Use a NetworkVisualizer plot_model(model, blueprint_file, show_layer_names=True, draw_submodels_only=True) for layer in model.layers: try: layer_file = filer.experiment_file("{0}_module-{1}.png".format( model_name, layer.name)) # XXX Use a NetworkVisualizer plot_model(layer, layer_file, show_layer_names=False) except Exception: print("{} is not a model".format(layer))
class CleanUpArchive(): """ Class to assist with cleaning up persisted weights. """ def __init__(self, experiment_dir): self.filer = ExperimentFiler(experiment_dir) def clean_up(self, population): """ Removes persisted weights of individuals that are no longer in the population. """ base_path = self.filer.experiment_file("archive") candidate_util = CandidateUtil() candidate_ids = [candidate_util.get_candidate_id(candidate) \ for candidate in population] # XXX Impenetrable! for filepath in glob.glob(base_path + "/*"): file_id = (filepath.split('/')[-1]).split('.')[0] file_type = (filepath.split('/')[-1]).split('.')[1] if file_id not in candidate_ids and file_type == 'h5a': os.remove(filepath)
def visualize_training_runs(args): total_training_times = [] longest_training_times = [] average_training_times = [] results_files = get_results_files(args) num_generations = len(results_files) generation_filer = GenerationFiler(args.experiment_dir) for results_file in results_files: generation = generation_filer.get_generation_from_path(results_file) persistence = ResultsDictPersistence(args.experiment_dir, generation, logger=None) results_dict = persistence.restore() if len(results_dict) == 0: # File not found continue times = [] for key in results_dict.keys(): result = results_dict[key] try: training_time = result['metrics']['training_time'] except Exception: training_time = result['metrics']['training_time'] times.append(training_time) total_training_times.append(np.sum(times) / 3600) longest_training_times.append(np.max(times) / 3600) average_training_times.append(np.mean(times) / 3600) plt.plot(total_training_times) plt.title('Total Training Machine Hours per Generation') plt.ylabel('Hours') plt.xlabel('Generation') plt.xlim(0, num_generations) filer = ExperimentFiler(args.experiment_dir) runs_total_file = filer.experiment_file("training_runs_total.png") plt.savefig(runs_total_file, bbox_inches='tight') plt.clf() plt.plot(longest_training_times) plt.title('Longest Training Machine Hours per Generation') plt.ylabel('Hours') plt.xlabel('Generation') plt.xlim(0, num_generations) runs_longest_file = filer.experiment_file("training_runs_longest.png") plt.savefig(runs_longest_file, bbox_inches='tight') plt.clf() plt.plot(average_training_times) plt.title('Mean Training Machine Hours per Generation') plt.ylabel('Hours') plt.xlabel('Generation') plt.xlim(0, num_generations) runs_avg_file = filer.experiment_file("training_runs_avg.png") plt.savefig(runs_avg_file, bbox_inches='tight') plt.clf() total_machine_hours = np.sum(total_training_times) print("Total machine hours used: {}".format(total_machine_hours))
def visualize_pareto_front(args, sorted_gens, sorted_accs, sorted_alt_objs, experiment_names=None): plt.figure(figsize=(25, 15)) plt.xlabel(resolve_alt_objective(args)) plt.title('Fitness vs. %s Tradeoff' % resolve_alt_objective(args)) plt.ylabel('Fitness') colors = ['g', 'b', 'r', 'm', 'c', 'y'] * 100 if experiment_names is not None: assert len(experiment_names) == len(sorted_gens) == \ len(sorted_accs) == len(sorted_alt_objs) num_experiments = len(experiment_names) else: num_experiments = 1 sorted_gens = [sorted_gens] sorted_accs = [sorted_accs] sorted_alt_objs = [sorted_alt_objs] for i in range(num_experiments): label = None if experiment_names is None else experiment_names[i] plt.scatter(sorted_alt_objs[i], sorted_accs[i], alpha=0.5, marker='.', label=label, color=colors[i]) min_fit = min(sorted_accs[i]) max_alt = max(sorted_alt_objs[i]) for j in range(len(sorted_gens[i])): # Get relevant values. if j == 0: prev_fit = min_fit else: prev_fit = sorted_accs[i][j-1] if j == len(sorted_gens[i]) - 1: next_alt = max_alt else: next_alt = sorted_alt_objs[i][j+1] curr_fit = sorted_accs[i][j] curr_alt = sorted_alt_objs[i][j] # Plot vertical line up to point. plt.plot([curr_alt, curr_alt], [prev_fit, curr_fit], color=colors[i]) # Plot horizontal line to next point. plt.plot([curr_alt, next_alt], [curr_fit, curr_fit], color=colors[i]) # Label the new point. label_x = 30 plt.annotate(str(sorted_gens[i][j]), (sorted_alt_objs[i][j], sorted_accs[i][j]), color='black', xytext=(label_x, -label_x), textcoords='offset points', arrowprops=dict(arrowstyle="->", color=colors[i], lw=0.5)) if (args.min_x is not None) and (args.max_x is not None): plt.xlim(args.min_x, args.max_x) if (args.min_y is not None) and (args.max_y is not None): plt.ylim(args.min_y, args.max_y) if experiment_names is not None: plt.legend() # Turn on the minor TICKS, which are required for the minor GRID plt.minorticks_on() # Customize the major grid plt.grid(which='major', linestyle='-', alpha=0.5, linewidth='0.8') # Customize the minor grid plt.grid(which='minor', linestyle=':', alpha=0.5, linewidth='0.5') plt.tight_layout() filer = ExperimentFiler(args.experiment_dir) runs_pareto_file = filer.experiment_file("training_runs_pareto.png") plt.savefig(runs_pareto_file) plt.clf()
def __init__(self, experiment_dir): self.filer = ExperimentFiler(experiment_dir)
class FitnessPersistor(Persistor): """ This implementation of the Persistor interface creates the fitness.csv file. """ def __init__(self, experiment_dir, generation, fitness_objectives): """ Constructor. """ self.filer = ExperimentFiler(experiment_dir) self.generation = generation self.fitness_objectives = fitness_objectives self.candidate_util = CandidateUtil(fitness_objectives) self.basename = 'fitness.csv' self.time_format = '%Y-%m-%d-%H:%M:%S' def persist(self, obj): """ Persists the object passed in. :param obj: an object to persist In this case we are expecting an advanced stats dictionary from the SoftOrderPersistor """ advanced_stats = obj filename = self.filer.experiment_file(self.basename) self.write_csv_file(filename, advanced_stats) def write_csv_file(self, filename, advanced_stats): """ Writes out the fitness.csv file :param filename: The filename to write to :param advanced_stats: The advanced_stats dict gathered by the SoftOrderPersistor :return: Nothing """ with open(filename, 'w') as csv_file: # Prepare dynamic column names primary_objective = self.fitness_objectives.get_fitness_objective( 0) fitness_name = primary_objective.get_metric_name() best_fitness_field_name = 'Best ' + fitness_name best_fitness_id_field_name = best_fitness_field_name + ' id' avg_fitness_field_name = 'Avg ' + fitness_name field_names = [ 'Generation', 'Timestamp', best_fitness_id_field_name, best_fitness_field_name, avg_fitness_field_name ] csv_writer = csv.DictWriter(csv_file, fieldnames=field_names, quoting=csv.QUOTE_MINIMAL, lineterminator="\n") csv_writer.writeheader() for gen in range(self.generation + 1): # Get timestamp in human-readable format timestamp = advanced_stats['time'][gen] ts_datetime = datetime.fromtimestamp(timestamp) time_string = ts_datetime.strftime(self.time_format) # Get best candidate # XXX multi-objective best_id = None best_fitness = None candidate = advanced_stats['best_candidate'][gen] if candidate is not None: best_id = self.candidate_util.get_candidate_id(candidate) best_fitness = self.candidate_util.get_candidate_fitness( candidate) # Get average fitness # XXX multi-objective avg_fitness = advanced_stats['avg_fitness'][gen] row = { 'Generation': gen, 'Timestamp': time_string, best_fitness_id_field_name: best_id, best_fitness_field_name: best_fitness, avg_fitness_field_name: avg_fitness } csv_writer.writerow(row)
def __init__(self, session, master_config, experiment_dir, fitness_objectives, generation, experiment_start_time, experiment_id, completion_service, initial_generation, population, checkpoint_id=None, novelty_policy=None, server_stats=None): """ Constructor. :param session: The session with which the task can communicate with the service :param master_config: The master config for the task :param experiment_dir: The experiment directory for results :param fitness_objectives: The FitnessObjectives object :param generation: the generation number of the population :param experiment_start_time: the experiment start time in seconds :param experiment_id: the experiment id XXX Can this be derived from experiment_dir? :param completion_service: A handle to the CompletionService object for performing distributed evaluations. :param initial_generation: Flag saying whether or not this is the first generation. :param population: The list of candidates to evaluate :param checkpoint_id: The checkpoint id (if any) relevant to the task. :param novelty_policy: The NoveltyPolicy (if any) relevant to the task :param server_stats: Statistics from the ENN Service (if any) """ super(EvaluatorSessionTask, self).__init__(session, master_config, experiment_dir, fitness_objectives, checkpoint_id) self.generation = generation self.experiment_start_time = experiment_start_time self.experiment_id = experiment_id self.completion_service = completion_service self.initial_generation = initial_generation self.novelty_policy = novelty_policy self.population = population self.evaluated_population = None self.server_stats = server_stats self.candidate_util = CandidateUtil(self.fitness_objectives) self.result_update_frequency = 100 self.timeout_max = 10000000 # Set up the FitnessRegression policy filer = ExperimentFiler(self.experiment_dir) regression_archive_file = filer.experiment_file("regression_archive") regression_factory = FitnessRegressionFactory() experiment_config = self.master_config.get('experiment_config') self.fitness_regression = regression_factory.create_fitness_regression( experiment_config, self.fitness_objectives, regression_archive_file) # Set up the NoveltyPolicy if none was given if self.novelty_policy is None: # XXX We use the factory to look at the config, but if # we were not sent a novelty_policy, then we should probably # just use the NullNoveltyPolicy. But this is this way to be in # the spirit of the original implementation. novelty_factory = NoveltyPolicyFactory() self.novelty_policy = novelty_factory.create_novelty_policy( experiment_config, self.experiment_dir)
class GenerationFiler(): """ Class to handle creation of file names that go in generation folders. """ def __init__(self, experiment_dir, generation=0): """ Constructor. :param experiment_dir: The directory where experiment results go :param generation: The generation number of the experiment """ self.experiment_filer = ExperimentFiler(experiment_dir) self.generation = generation self.prefix = "gen_" def get_generation_file(self, filename): """ :param filename: A string filename which does not have any path information associated with it. :return: A new string path to the filename in the appropriate generation folder, given the constructor arguments """ gen_dir = self.get_generation_dir() gen_file = os.path.join(gen_dir, filename) return gen_file def get_generation_dir(self): """ :return: A string path to the generation folder, given the constructor arguments """ name = self.get_generation_name() gen_dir = self.experiment_filer.experiment_file(name) return gen_dir def get_generation_name(self): """ :return: A cannonical string for the generation. This is used as the primary component for the generation folder, but it can be used for other purposes as well. """ name = "{0}{1:02d}".format(self.prefix, self.generation) return name def get_generation_from_path(self, path): """ :param path: The path from which we will get generation information. :return: the generation number from the given path. """ generation_number = -1 # Find the component of the path that start with the prefix (head, component) = os.path.split(path) while component is not None and \ not component.startswith(self.prefix): (head, component) = os.path.split(head) if component is None: raise ValueError("Could not find prefix {0} in {1}".format( self.prefix, path)) # Strings are "gen_XX". Find the number part of that string. number_part = None if component.startswith(self.prefix): number_part = component[len(self.prefix):] if number_part is None: raise ValueError( "Could not find prefix {0} in path component {1}".format( self.prefix, component)) try: generation_number = int(number_part) except: raise ValueError( "Could not find generation number in {1}".format(component)) return generation_number