def visualize(self, candidate): """ Visualizes the given candidate using one or more NetworkVisualizers available from the Factory. :param candidate: Dictionary representing the candidate to visualize :return: Nothing """ candidate_util = CandidateUtil() candidate_id = candidate_util.get_candidate_id(candidate) # Get the value for the key describing how network visualization # is to be performed. experiment_config = self.master_config.get("experiment_config", {}) vis_value = experiment_config.get("network_visualization", None) # Parse the value to be in a cannonical form of a list of # configurations for visualizers name_key = "name" parser = CanonicalMultiConfigParser(name_key=name_key, logger=self.logger) vis_config_list = parser.parse(vis_value) vis_factory = NetworkVisualizerFactory(self.master_config, self.data_dict, self.base_path, logger=self.logger) # Loop through the compiled vis_config_list to invoke all the desired # NetworkVisualizers. for vis_config in vis_config_list: # Get the name to use for the factory from the config vis_name = vis_config.get(name_key, None) # Create the visualizer visualizer = vis_factory.create_network_visualizer( vis_name, vis_config) if visualizer is not None: # We have a visualizer. Draw! print("Using {0} to draw candidate {1}".format( vis_name, candidate_id)) visualizer.visualize(candidate) else: # Do not fail just because of a typo. print("Don't know network visualizer '{0}'. Skipping.".format( vis_name))
def clean_up(self, population): """ Removes persisted weights of individuals that are no longer in the population. """ base_path = self.filer.experiment_file("archive") candidate_util = CandidateUtil() candidate_ids = [candidate_util.get_candidate_id(candidate) \ for candidate in population] # XXX Impenetrable! for filepath in glob.glob(base_path + "/*"): file_id = (filepath.split('/')[-1]).split('.')[0] file_type = (filepath.split('/')[-1]).split('.')[1] if file_id not in candidate_ids and file_type == 'h5a': os.remove(filepath)
class SoftOrderPersistor(): """ A LEAF-ier ploy to separate out the files that are persisted by softorder_coevolution -- all except for checkpointing. We do this so that the session_server can be the one to do the persistence and the files can persist on the session_server machine. """ def __init__(self, experiment_dir, fitness_objectives, save_best=True, draw=True, logger=None): self.experiment_dir = experiment_dir self.save_best = save_best self.draw = draw self.fitness_objectives = fitness_objectives self.candidate_util = CandidateUtil(fitness_objectives) self.advanced_stats = { 'best_candidate': [], 'avg_fitness': [], 'time': [] } self.logger = logger def persist(self, population, generation): """ Gather statistics and persist what we want to files """ best_candidate = self.gather_advanced_stats(population) self.do_save(generation, best_candidate) self.do_draw(generation) fitness_persistence = FitnessPersistor(self.experiment_dir, generation, self.fitness_objectives) fitness_persistence.persist(self.advanced_stats) def get_candidate_fitness(self, candidate): return self.candidate_util.get_candidate_fitness(candidate) def average_fitness(self, population): """ Returns the average raw fitness of population """ my_sum = 0.0 counter = 1e-308 for candidate in population: fitness = self.get_candidate_fitness(candidate) if fitness is not None: my_sum += fitness counter += 1 return my_sum / counter def find_best_candidate(self, population): if population is None or len(population) == 0: return None one = population[0] best = None if isinstance(one, dict): # Candidates are dictionaries best_fitness = None for candidate in population: fitness = self.get_candidate_fitness(candidate) if best_fitness is None: best_fitness = fitness best = candidate elif fitness > best_fitness: best_fitness = fitness best = candidate else: # Candidates are ChromosomeData best = max(population) return best def gather_advanced_stats(self, population): """ Populates the advanced_stats member dictionary with info about the generation just evaluated. """ best_candidate = self.find_best_candidate(population) self.advanced_stats['best_candidate'].append( copy.deepcopy(best_candidate)) self.advanced_stats['avg_fitness'].append( self.average_fitness(population)) self.advanced_stats['time'].append(time.time()) return best_candidate def do_save(self, generation, best_candidate): # saves the best candidate from the current generation if not self.save_best: return if best_candidate is not None: candidate_id = self.candidate_util.get_candidate_id(best_candidate) best_persistence = BestFitnessCandidatePersistence( self.experiment_dir, candidate_id, generation, logger=self.logger) best_persistence.persist(best_candidate) def do_draw(self, generation): if self.draw: if generation >= 2: stats = (self.advanced_stats['best_candidate'], self.advanced_stats['avg_fitness']) visualize.plot_stats(stats, self.candidate_util, self.experiment_dir)
class FitnessPersistor(Persistor): """ This implementation of the Persistor interface creates the fitness.csv file. """ def __init__(self, experiment_dir, generation, fitness_objectives): """ Constructor. """ self.filer = ExperimentFiler(experiment_dir) self.generation = generation self.fitness_objectives = fitness_objectives self.candidate_util = CandidateUtil(fitness_objectives) self.basename = 'fitness.csv' self.time_format = '%Y-%m-%d-%H:%M:%S' def persist(self, obj): """ Persists the object passed in. :param obj: an object to persist In this case we are expecting an advanced stats dictionary from the SoftOrderPersistor """ advanced_stats = obj filename = self.filer.experiment_file(self.basename) self.write_csv_file(filename, advanced_stats) def write_csv_file(self, filename, advanced_stats): """ Writes out the fitness.csv file :param filename: The filename to write to :param advanced_stats: The advanced_stats dict gathered by the SoftOrderPersistor :return: Nothing """ with open(filename, 'w') as csv_file: # Prepare dynamic column names primary_objective = self.fitness_objectives.get_fitness_objective( 0) fitness_name = primary_objective.get_metric_name() best_fitness_field_name = 'Best ' + fitness_name best_fitness_id_field_name = best_fitness_field_name + ' id' avg_fitness_field_name = 'Avg ' + fitness_name field_names = [ 'Generation', 'Timestamp', best_fitness_id_field_name, best_fitness_field_name, avg_fitness_field_name ] csv_writer = csv.DictWriter(csv_file, fieldnames=field_names, quoting=csv.QUOTE_MINIMAL, lineterminator="\n") csv_writer.writeheader() for gen in range(self.generation + 1): # Get timestamp in human-readable format timestamp = advanced_stats['time'][gen] ts_datetime = datetime.fromtimestamp(timestamp) time_string = ts_datetime.strftime(self.time_format) # Get best candidate # XXX multi-objective best_id = None best_fitness = None candidate = advanced_stats['best_candidate'][gen] if candidate is not None: best_id = self.candidate_util.get_candidate_id(candidate) best_fitness = self.candidate_util.get_candidate_fitness( candidate) # Get average fitness # XXX multi-objective avg_fitness = advanced_stats['avg_fitness'][gen] row = { 'Generation': gen, 'Timestamp': time_string, best_fitness_id_field_name: best_id, best_fitness_field_name: best_fitness, avg_fitness_field_name: avg_fitness } csv_writer.writerow(row)
class ReevaluateBestSessionTask(SessionTask): """ SessionTask that performs a re-evaluation of the best candidates from each generation. """ # Tied for Public Enemy #5 for too-many-arguments # pylint: disable=too-many-arguments def __init__(self, session, master_config, experiment_dir, fitness_objectives, generation, experiment_start_time, experiment_id, completion_service, initial_generation, checkpoint_id=None): """ Constructor. :param session: The session with which the task can communicate with the service :param master_config: The master config for the task :param experiment_dir: The experiment directory for results :param fitness_objectives: The FitnessObjectives object :param generation: the generation number of the population :param experiment_start_time: the experiment start time in seconds :param experiment_id: the experiment id XXX Can this be derived from experiment_dir? :param completion_service: A handle to the CompletionService object for performing distributed evaluations. :param initial_generation: Flag saying whether or not this is the first generation. :param checkpoint_id: The checkpoint id (if any) relevant to the task. """ super(ReevaluateBestSessionTask, self).__init__(session, master_config, experiment_dir, fitness_objectives, checkpoint_id) self.generation = generation self.experiment_start_time = experiment_start_time self.experiment_id = experiment_id self.completion_service = completion_service self.initial_generation = initial_generation self.candidate_util = CandidateUtil(fitness_objectives) self.population_response_util = PopulationResponseUtil() # These are fields to be populated by unpack_response() experiment_config = self.master_config.get('experiment_config') self.persistor = SoftOrderPersistor( self.experiment_dir, self.fitness_objectives, draw=experiment_config.get('visualize'), logger=self.logger) self.server_stats = {} self.seen_checkpoint_ids = [] def run(self): """ Entry point for the session task execution to take over. """ experiment_config = self.master_config.get('experiment_config') assert os.path.exists( experiment_config.get('reevaluate_checkpoint_dir')) print("Re-evaluating top %s chromosomes found from experiment %s" % \ (experiment_config.get('reevaluate_num'), experiment_config.get('reevaluate_checkpoint_dir'))) candidate_fit_dict = {} # Read in the contents of the checkpoint_ids.txt file which contains # all references to any checkpoint training has seen. # By convention reevalute_checkpoint_dir is where this file is coming # from, and self.checkpoint_dir is where new results are being # written to. restoring_checkpoint_persistence = CheckpointPersistence( folder=experiment_config.get('reevaluate_checkpoint_dir'), logger=self.logger) self.seen_checkpoint_ids = restoring_checkpoint_persistence.restore() for checkpoint_id in self.seen_checkpoint_ids: print("Analyzing chromos in %s" % checkpoint_id) population_response = self.session.get_population( experiment_config.get('reevaluate_checkpoint_dir'), checkpoint_id) pop = self.population_response_util.unpack_response( population_response, self) for candidate in pop: id_key = self.candidate_util.get_candidate_id(candidate) # Get the persisted Worker Results dictionaries results_dict_persistence = ResultsDictPersistence( experiment_config.get('reevaluate_checkpoint_dir'), self.generation, logger=self.logger) results_dict = results_dict_persistence.restore() candidate_fitness = None if any(results_dict): if id_key in results_dict: candidate_results_dict = results_dict[id_key] # This is not quite a candidate, but the get-mechanism # should be the same candidate_fitness = \ self.candidate_util.get_candidate_fitness( candidate_results_dict) if candidate_fitness is None: candidate_fitness = 0.0 if id_key not in candidate_fit_dict: candidate_fit_dict[id_key] = { 'candidate': candidate, 'fit': [candidate_fitness] } else: candidate_fit_dict[id_key]['candidate'] = candidate candidate_fit_dict[id_key]['fit'].append(candidate_fitness) avg = [(x['candidate'], np.mean(x['fit'])) \ for x in list(candidate_fit_dict.values())] best = sorted(avg, key=lambda x: x[1], reverse=True)[:experiment_config.get('reevaluate_num')] best_candidates = [x[0] for x in best] best_candidate_ids = [self.candidate_util.get_candidate_id(x[0]) \ for x in best] best_fit = [round(x[1], 4) for x in best] if len(best_candidates) == 0: print("No chromos found, doing nothing") return for candidate in best_candidates: candidate_id = self.candidate_util.get_candidate_id(candidate) best_candidate_persistence = BestFitnessCandidatePersistence( self.experiment_dir, candidate_id, logger=self.logger) best_candidate_persistence.persist(candidate) print("Best chromos:") print(list(zip(best_candidate_ids, best_fit))) print("Best chromo stats:") print("Min: %s Mean: %s Max: %s Std: %s" % \ (round(np.min(best_fit), 4), round(np.mean(best_fit), 4), round(np.max(best_fit), 4), round(np.std(best_fit), 4))) # We use generation + 1 for reporting here because we are really # composing a population of the best candidates across many # different previous generations, and as such doesn't really # correspond to any generation number of the past. reevaluate_candidate_task = ReevaluateCandidateSessionTask(\ self.session, self.master_config, self.experiment_dir, self.fitness_objectives, self.generation, self.experiment_start_time, self.experiment_id, self.completion_service, self.initial_generation, self.checkpoint_id) reevaluate_candidate_task.evaluate_and_analyze_results( best_candidates, self.generation + 1)
class ReevaluateCandidateSessionTask(SessionTask): """ SessionTask that performs a re-evaluation of a specific candidate given a candidate JSON file specified in the config (for now). This task doesn't actually use the Session object that talks to the server, but is used within the hierarchy of other SessionTasks that do. """ # Tied for Public Enemy #5 for too-many-arguments # pylint: disable=too-many-arguments def __init__(self, session, master_config, experiment_dir, fitness_objectives, generation, experiment_start_time, experiment_id, completion_service, initial_generation, checkpoint_id=None): """ Constructor. :param session: The session with which the task can communicate with the service :param master_config: The master config for the task :param experiment_dir: The experiment directory for results :param fitness_objectives: The FitnessObjectives object :param generation: the generation number of the population :param experiment_start_time: the experiment start time in seconds :param experiment_id: the experiment id XXX Can this be derived from experiment_dir? :param completion_service: A handle to the CompletionService object for performing distributed evaluations. :param initial_generation: Flag saying whether or not this is the first generation. :param checkpoint_id: The checkpoint id (if any) relevant to the task. """ super(ReevaluateCandidateSessionTask, self).__init__(session, master_config, experiment_dir, fitness_objectives, checkpoint_id) self.generation = generation self.experiment_start_time = experiment_start_time self.experiment_id = experiment_id self.completion_service = completion_service self.initial_generation = initial_generation self.candidate_util = CandidateUtil(fitness_objectives) def run(self): """ Entry point for the session task execution to take over. """ experiment_config = self.master_config.get('experiment_config') reevaluate_file = experiment_config.get('reevaluate_chromo') assert os.path.exists(reevaluate_file) assert experiment_config.get('reevaluate_checkpoint_dir') is None assert experiment_config.get('reevaluate_num') < 1000 candidate_persistence = CandidatePersistence(self.experiment_dir, reevaluate_file, logger=self.logger) orig_candidate = candidate_persistence.restore() orig_candidate_id = self.candidate_util.get_candidate_id( orig_candidate) print("Re-evaluating chromo %s %s times" % \ (orig_candidate_id, experiment_config.get('reevaluate_num'))) copies = [] counter = 0.001 # Note: _ is pythonic for unused variable for _ in range(experiment_config.get('reevaluate_num')): copy = copy.deepcopy(orig_candidate) copy['id'] = orig_candidate_id + "." + str(counter) copies.append(copy) counter += 0.001 for copy in copies: candidate_id = self.candidate_util.get_candidate_id(copy) best_candidate_persistence = \ BestFitnessCandidatePersistence(self.experiment_dir, candidate_id, logger=self.logger) best_candidate_persistence.persist(copy) # XXX There is a mismatch here. # We should not expect ids to always be integers use_generation = int(orig_candidate_id) self.evaluate_and_analyze_results(copies, use_generation) def evaluate_and_analyze_results(self, population, generation_count): # Ignore the population_results that is returned. # AnalyzeResultsSessionTask works on reading results_dict.json # files from the generation directory. evaluate_population_task = CompletionServiceEvaluatorSessionTask( self.session, self.master_config, self.experiment_dir, self.fitness_objectives, generation_count, self.experiment_start_time, self.experiment_id, self.completion_service, self.initial_generation, population, self.checkpoint_id) evaluate_population_task.run() analyze_results_task = AnalyzeResultsSessionTask( self.session, self.master_config, self.experiment_dir, self.fitness_objectives, self.checkpoint_id) analyze_results_task.run()
class AnalyzeResultsSessionTask(SessionTask): """ SessionTask that performs the AnalyzeResults task. This task doesn't actually use the Session object that talks to the server, but instead takes all the results files created by a run and does some analysis on them. XXX What? """ def __init__(self, session, master_config, experiment_dir, fitness_objectives, checkpoint_id=None): """ Constructor. :param session: The session with which the task can communicate with the service :param master_config: The master config for the task :param experiment_dir: The experiment directory for results :param fitness_objectives: The FitnessObjectives object :param checkpoint_id: The checkpoint id (if any) relevant to the task. """ super(AnalyzeResultsSessionTask, self).__init__(session, master_config, experiment_dir, fitness_objectives, checkpoint_id) self.candidate_util = CandidateUtil(fitness_objectives) def run(self): """ Entry point for the session task execution to take over. """ print("Running AnalyzeResultsSessionTask") # Read the results files for each generation. # These are written out by write_results_file() filer = ExperimentFiler(self.experiment_dir) glob_spec = filer.experiment_file("gen_*/results_dict.json") results_dicts = glob.glob(glob_spec) worker_results_files = sorted(results_dicts) if len(worker_results_files) <= 0: raise ValueError("No results_dicts.json files found in {0}".format( self.experiment_dir)) # No generation number needed, we are only looking to # parse path components with it. generation_filer = GenerationFiler(self.experiment_dir) worker_results_dict = {} for worker_results_file in worker_results_files: generation = generation_filer.get_generation_from_path( worker_results_file) # This slurps in results information returned by workers from all # candidates of a specific generation results_dict_persistence = ResultsDictPersistence( self.experiment_dir, generation, logger=self.logger) one_worker_results_dict = results_dict_persistence.restore() # results_dict here will have one entry per candidate over all # generations worker_results_dict.update(one_worker_results_dict) fitness_objective = self.fitness_objectives.get_fitness_objectives(0) is_maximize = fitness_objective.is_maximize_fitness() best_result = sorted(list(worker_results_dict.items()), key=lambda \ x: max(self.candidate_util.get_candidate_fitness(x)), reverse=is_maximize)[0] best_id = best_result.get('id') # Open the file of the best candidate. best_candidate_persistence = BestFitnessCandidatePersistence( self.experiment_dir, best_id, logger=self.logger) best_candidate = best_candidate_persistence.restore() best_id = self.candidate_util.get_candidate_id(best_candidate) self.draw_best_candidate_results(best_candidate, generation, suffix='abs') def draw_best_candidate_results(self, best_candidate, generation=None, suffix=''): """ :param best_candidate: A candidate object comprising the best of a generation. :param generation: Default value is None :param suffix: Default value is an empty string """ experiment_config = self.master_config.get('experiment_config') if not experiment_config.get('visualize'): return best_id = self.candidate_util.get_candidate_id(best_candidate) best_fitness = self.candidate_util.get_candidate_fitness( best_candidate) fitness = best_fitness if best_fitness is None else \ round(best_fitness, 4) # Determine the output file name basis # XXX Use fitness for now. # Later on can address multi-objective goals. metric_name = "fitness" if generation is not None: # Put the file in the gen_NN directory. # Call it best_candidate to match the best_candidate.json # that gets put there base_name = "best_{0}_candidate".format(metric_name) filer = GenerationFiler(self.experiment_dir, generation) base_path = filer.get_generation_file(base_name) else: # We do not have a generation that we know about so write out # the old-school file name. # XXX Not entirely sure when this path would be taken base_name = "F{0}_ID-{1}_{2}best_{3}".format( fitness, best_id, suffix, metric_name) filer = ExperimentFiler(self.experiment_dir) base_path = filer.experiment_file(base_name) # NetworkVisualizers use the build_training_model() which requires # a data_dict of file keys -> file paths to exist. Domains that # wish to visualize their networks that use the data_dict will # need to deal with a None value for data dict in the visualization # case. data_dict = None visualizer = NetworkMultiVisualizer(self.master_config, data_dict, base_path, logger=self.logger) visualizer.visualize(best_candidate)