def __init__(self, experiment_dir, candidate_id=None, generation=None, base_name=None, logger=None): """ Constructor. :param experiment_dir: the directory where experiment results go :param candidate_id: the id of the candidate :param generation: the generation number for the candidate :param base_name: a full base name to use (minus extension) :param logger: The logger to use for messaging """ use_base_name = base_name if use_base_name is None: use_base_name = self.get_base_name(candidate_id, generation) use_dir = experiment_dir if generation is not None: filer = GenerationFiler(experiment_dir, generation) use_dir = filer.get_generation_dir() dictionary_converter = CandidateDictionaryConverter( allow_restore_none=False) super(CandidatePersistence, self).__init__( base_name=use_base_name, folder=use_dir, dictionary_converter=dictionary_converter, must_exist=True, logger=logger)
def write_training_runs(args): filer = ExperimentFiler(args.experiment_dir) csv_file = filer.experiment_file("training_runs.csv") with open(csv_file, 'wb') as my_file: my_file.write('Generation, %s, Fitness\n' % resolve_alt_objective(args)) results_files = get_results_files(args) generation_filer = GenerationFiler(args.experiment_dir) for results_file in results_files: generation = generation_filer.get_generation_from_path(results_file) persistence = ResultsDictPersistence(args.experiment_dir, generation, logger=None) results_dict = persistence.restore() if len(results_dict) == 0: # File not found continue for key in results_dict.keys(): result = results_dict[key] try: # XXX Use FitnessObjectives prepared from config? alt_objective = result['metrics'][args.alt_objective] fitness = result['metrics']['fitness'] # XXX not kosher except Exception: try: alt_objective = result['metrics'][args.alt_objective] fitness = result['fitness'] # XXX not kosher except Exception as exception: if args.alt_objective == "num_params": fitness = result['fitness'] # XXX not kosher # XXX What generates this params file? cache_file = generation_filer.get_generation_file( "candidate_{0}.params".format(key)) if os.path.exists(cache_file): with open(cache_file, 'rb') as my_file: alt_objective = my_file.read() else: undefined = 0 k = undefined # XXX print("Extracting num params from network {}".format(k)) model = get_model(args.experiment_dir, key, generation) alt_objective = str(model.count_params()) with open(cache_file, 'wb') as my_file: my_file.write(alt_objective) else: raise exception if args.alt_objective == 'training_time': alt_objective = str(float(alt_objective) / 3600.0) with open(csv_file, 'ab') as my_file: line = '%s %s %s\n' % (generation, alt_objective, fitness) my_file.write(line) return csv_file
def run(self): """ Entry point for the session task execution to take over. """ print("Running AnalyzeResultsSessionTask") # Read the results files for each generation. # These are written out by write_results_file() filer = ExperimentFiler(self.experiment_dir) glob_spec = filer.experiment_file("gen_*/results_dict.json") results_dicts = glob.glob(glob_spec) worker_results_files = sorted(results_dicts) if len(worker_results_files) <= 0: raise ValueError("No results_dicts.json files found in {0}".format( self.experiment_dir)) # No generation number needed, we are only looking to # parse path components with it. generation_filer = GenerationFiler(self.experiment_dir) worker_results_dict = {} for worker_results_file in worker_results_files: generation = generation_filer.get_generation_from_path( worker_results_file) # This slurps in results information returned by workers from all # candidates of a specific generation results_dict_persistence = ResultsDictPersistence( self.experiment_dir, generation, logger=self.logger) one_worker_results_dict = results_dict_persistence.restore() # results_dict here will have one entry per candidate over all # generations worker_results_dict.update(one_worker_results_dict) fitness_objective = self.fitness_objectives.get_fitness_objectives(0) is_maximize = fitness_objective.is_maximize_fitness() best_result = sorted(list(worker_results_dict.items()), key=lambda \ x: max(self.candidate_util.get_candidate_fitness(x)), reverse=is_maximize)[0] best_id = best_result.get('id') # Open the file of the best candidate. best_candidate_persistence = BestFitnessCandidatePersistence( self.experiment_dir, best_id, logger=self.logger) best_candidate = best_candidate_persistence.restore() best_id = self.candidate_util.get_candidate_id(best_candidate) self.draw_best_candidate_results(best_candidate, generation, suffix='abs')
def draw_best_candidate_results(self, best_candidate, generation=None, suffix=''): """ :param best_candidate: A candidate object comprising the best of a generation. :param generation: Default value is None :param suffix: Default value is an empty string """ experiment_config = self.master_config.get('experiment_config') if not experiment_config.get('visualize'): return best_id = self.candidate_util.get_candidate_id(best_candidate) best_fitness = self.candidate_util.get_candidate_fitness( best_candidate) fitness = best_fitness if best_fitness is None else \ round(best_fitness, 4) # Determine the output file name basis # XXX Use fitness for now. # Later on can address multi-objective goals. metric_name = "fitness" if generation is not None: # Put the file in the gen_NN directory. # Call it best_candidate to match the best_candidate.json # that gets put there base_name = "best_{0}_candidate".format(metric_name) filer = GenerationFiler(self.experiment_dir, generation) base_path = filer.get_generation_file(base_name) else: # We do not have a generation that we know about so write out # the old-school file name. # XXX Not entirely sure when this path would be taken base_name = "F{0}_ID-{1}_{2}best_{3}".format( fitness, best_id, suffix, metric_name) filer = ExperimentFiler(self.experiment_dir) base_path = filer.experiment_file(base_name) # NetworkVisualizers use the build_training_model() which requires # a data_dict of file keys -> file paths to exist. Domains that # wish to visualize their networks that use the data_dict will # need to deal with a None value for data dict in the visualization # case. data_dict = None visualizer = NetworkMultiVisualizer(self.master_config, data_dict, base_path, logger=self.logger) visualizer.visualize(best_candidate)
def __init__(self, experiment_dir, generation, candidate_id, timestamp, logger=None): """ Constructor. :param experiment_dir: the directory where experiment results go :param generation: the generation number of the results dict :param candidate_id: The id of the candidate that had the error :param timestamp: A double timestamp of when the error occurred. :param logger: A logger to send messaging to """ filer = ExperimentFiler(experiment_dir) error_dir = filer.experiment_file("errors") ts_datetime = datetime.fromtimestamp(timestamp) time_format = '%Y-%m-%d-%H:%M:%S' time_string = ts_datetime.strftime(time_format) filer = GenerationFiler(experiment_dir, generation) gen_name = filer.get_generation_name() base_name = "evaluation_error_{0}_candidate_{1}_{2}".format( gen_name, candidate_id, time_string) super(EvaluationErrorPersistence, self).__init__( base_name=base_name, folder=error_dir, dictionary_converter=CandidateDictionaryConverter(), logger=logger)
def __init__(self, experiment_dir, generation, timestamp, logger=None): """ Constructor. :param experiment_dir: the directory where experiment results go :param generation: the generation number of the results dict :param timestamp: A double timestamp of when the error occurred. :param logger: A logger to send messaging to """ filer = ExperimentFiler(experiment_dir) error_dir = filer.experiment_file("errors") ts_datetime = datetime.fromtimestamp(timestamp) time_format = '%Y-%m-%d-%H:%M:%S' time_string = ts_datetime.strftime(time_format) filer = GenerationFiler(experiment_dir, generation) gen_name = filer.get_generation_name() basename = "experiment_host_error_{0}_{1}".format( gen_name, time_string) dictionary_converter = PassThroughDictionaryConverter() factory = PersistenceFactory(object_type="string", dictionary_converter=dictionary_converter, logger=logger) self.dict_persistence = factory.create_persistence( error_dir, basename, persistence_mechanism=PersistenceMechanisms.LOCAL, serialization_format=SerializationFormats.TEXT, must_exist=False)
def __init__(self, experiment_dir, generation, logger=None): """ Constructor. :param experiment_dir: the directory where experiment results go :param generation: the generation number of the population results :param logger: A logger to send messaging to """ filer = GenerationFiler(experiment_dir, generation) generation_dir = filer.get_generation_dir() super(PopulationResultsPersistence, self).__init__( base_name="population_results", folder=generation_dir, dictionary_converter=PopulationResultsDictionaryConverter(), logger=logger)
def visualize_training_runs(args): total_training_times = [] longest_training_times = [] average_training_times = [] results_files = get_results_files(args) num_generations = len(results_files) generation_filer = GenerationFiler(args.experiment_dir) for results_file in results_files: generation = generation_filer.get_generation_from_path(results_file) persistence = ResultsDictPersistence(args.experiment_dir, generation, logger=None) results_dict = persistence.restore() if len(results_dict) == 0: # File not found continue times = [] for key in results_dict.keys(): result = results_dict[key] try: training_time = result['metrics']['training_time'] except Exception: training_time = result['metrics']['training_time'] times.append(training_time) total_training_times.append(np.sum(times) / 3600) longest_training_times.append(np.max(times) / 3600) average_training_times.append(np.mean(times) / 3600) plt.plot(total_training_times) plt.title('Total Training Machine Hours per Generation') plt.ylabel('Hours') plt.xlabel('Generation') plt.xlim(0, num_generations) filer = ExperimentFiler(args.experiment_dir) runs_total_file = filer.experiment_file("training_runs_total.png") plt.savefig(runs_total_file, bbox_inches='tight') plt.clf() plt.plot(longest_training_times) plt.title('Longest Training Machine Hours per Generation') plt.ylabel('Hours') plt.xlabel('Generation') plt.xlim(0, num_generations) runs_longest_file = filer.experiment_file("training_runs_longest.png") plt.savefig(runs_longest_file, bbox_inches='tight') plt.clf() plt.plot(average_training_times) plt.title('Mean Training Machine Hours per Generation') plt.ylabel('Hours') plt.xlabel('Generation') plt.xlim(0, num_generations) runs_avg_file = filer.experiment_file("training_runs_avg.png") plt.savefig(runs_avg_file, bbox_inches='tight') plt.clf() total_machine_hours = np.sum(total_training_times) print("Total machine hours used: {}".format(total_machine_hours))