def visualize(self, candidate):
        """
        Visualizes the given candidate using one or more
        NetworkVisualizers available from the Factory.

        :param candidate: Dictionary representing the candidate to visualize
        :return: Nothing
        """

        candidate_util = CandidateUtil()
        candidate_id = candidate_util.get_candidate_id(candidate)

        # Get the value for the key describing how network visualization
        # is to be performed.
        experiment_config = self.master_config.get("experiment_config", {})
        vis_value = experiment_config.get("network_visualization", None)

        # Parse the value to be in a cannonical form of a list of
        # configurations for visualizers
        name_key = "name"
        parser = CanonicalMultiConfigParser(name_key=name_key,
                                            logger=self.logger)
        vis_config_list = parser.parse(vis_value)

        vis_factory = NetworkVisualizerFactory(self.master_config,
                                               self.data_dict,
                                               self.base_path,
                                               logger=self.logger)

        # Loop through the compiled vis_config_list to invoke all the desired
        # NetworkVisualizers.
        for vis_config in vis_config_list:

            # Get the name to use for the factory from the config
            vis_name = vis_config.get(name_key, None)

            # Create the visualizer
            visualizer = vis_factory.create_network_visualizer(
                vis_name, vis_config)

            if visualizer is not None:
                # We have a visualizer. Draw!
                print("Using {0} to draw candidate {1}".format(
                    vis_name, candidate_id))
                visualizer.visualize(candidate)

            else:
                # Do not fail just because of a typo.
                print("Don't know network visualizer '{0}'. Skipping.".format(
                    vis_name))
    def clean_up(self, population):
        """
        Removes persisted weights of individuals that are
        no longer in the population.
        """
        base_path = self.filer.experiment_file("archive")
        candidate_util = CandidateUtil()
        candidate_ids = [candidate_util.get_candidate_id(candidate) \
                            for candidate in population]

        # XXX Impenetrable!
        for filepath in glob.glob(base_path + "/*"):
            file_id = (filepath.split('/')[-1]).split('.')[0]
            file_type = (filepath.split('/')[-1]).split('.')[1]
            if file_id not in candidate_ids and file_type == 'h5a':
                os.remove(filepath)
class SoftOrderPersistor():
    """
    A LEAF-ier ploy to separate out the files that are persisted by
    softorder_coevolution -- all except for checkpointing.

    We do this so that the session_server can be the one to do the
    persistence and the files can persist on the session_server machine.
    """
    def __init__(self,
                 experiment_dir,
                 fitness_objectives,
                 save_best=True,
                 draw=True,
                 logger=None):

        self.experiment_dir = experiment_dir
        self.save_best = save_best
        self.draw = draw
        self.fitness_objectives = fitness_objectives
        self.candidate_util = CandidateUtil(fitness_objectives)
        self.advanced_stats = {
            'best_candidate': [],
            'avg_fitness': [],
            'time': []
        }
        self.logger = logger

    def persist(self, population, generation):
        """
        Gather statistics and persist what we want to files
        """

        best_candidate = self.gather_advanced_stats(population)
        self.do_save(generation, best_candidate)
        self.do_draw(generation)

        fitness_persistence = FitnessPersistor(self.experiment_dir, generation,
                                               self.fitness_objectives)
        fitness_persistence.persist(self.advanced_stats)

    def get_candidate_fitness(self, candidate):
        return self.candidate_util.get_candidate_fitness(candidate)

    def average_fitness(self, population):
        """
        Returns the average raw fitness of population
        """
        my_sum = 0.0
        counter = 1e-308
        for candidate in population:
            fitness = self.get_candidate_fitness(candidate)
            if fitness is not None:
                my_sum += fitness
                counter += 1
        return my_sum / counter

    def find_best_candidate(self, population):

        if population is None or len(population) == 0:
            return None

        one = population[0]
        best = None

        if isinstance(one, dict):
            # Candidates are dictionaries
            best_fitness = None
            for candidate in population:
                fitness = self.get_candidate_fitness(candidate)
                if best_fitness is None:
                    best_fitness = fitness
                    best = candidate
                elif fitness > best_fitness:
                    best_fitness = fitness
                    best = candidate
        else:
            # Candidates are ChromosomeData
            best = max(population)

        return best

    def gather_advanced_stats(self, population):
        """
        Populates the advanced_stats member dictionary
        with info about the generation just evaluated.
        """
        best_candidate = self.find_best_candidate(population)
        self.advanced_stats['best_candidate'].append(
            copy.deepcopy(best_candidate))
        self.advanced_stats['avg_fitness'].append(
            self.average_fitness(population))
        self.advanced_stats['time'].append(time.time())
        return best_candidate

    def do_save(self, generation, best_candidate):

        # saves the best candidate from the current generation
        if not self.save_best:
            return

        if best_candidate is not None:
            candidate_id = self.candidate_util.get_candidate_id(best_candidate)
            best_persistence = BestFitnessCandidatePersistence(
                self.experiment_dir,
                candidate_id,
                generation,
                logger=self.logger)
            best_persistence.persist(best_candidate)

    def do_draw(self, generation):

        if self.draw:
            if generation >= 2:
                stats = (self.advanced_stats['best_candidate'],
                         self.advanced_stats['avg_fitness'])
                visualize.plot_stats(stats, self.candidate_util,
                                     self.experiment_dir)
class FitnessPersistor(Persistor):
    """
    This implementation of the Persistor interface creates the
    fitness.csv file.
    """
    def __init__(self, experiment_dir, generation, fitness_objectives):
        """
        Constructor.

        """
        self.filer = ExperimentFiler(experiment_dir)
        self.generation = generation
        self.fitness_objectives = fitness_objectives
        self.candidate_util = CandidateUtil(fitness_objectives)
        self.basename = 'fitness.csv'
        self.time_format = '%Y-%m-%d-%H:%M:%S'

    def persist(self, obj):
        """
        Persists the object passed in.

        :param obj: an object to persist
                In this case we are expecting an advanced stats dictionary
                from the SoftOrderPersistor
        """
        advanced_stats = obj

        filename = self.filer.experiment_file(self.basename)
        self.write_csv_file(filename, advanced_stats)

    def write_csv_file(self, filename, advanced_stats):
        """
        Writes out the fitness.csv file

        :param filename: The filename to write to
        :param advanced_stats: The advanced_stats dict gathered by the
                            SoftOrderPersistor
        :return: Nothing
        """
        with open(filename, 'w') as csv_file:

            # Prepare dynamic column names
            primary_objective = self.fitness_objectives.get_fitness_objective(
                0)
            fitness_name = primary_objective.get_metric_name()

            best_fitness_field_name = 'Best ' + fitness_name
            best_fitness_id_field_name = best_fitness_field_name + ' id'
            avg_fitness_field_name = 'Avg ' + fitness_name

            field_names = [
                'Generation', 'Timestamp', best_fitness_id_field_name,
                best_fitness_field_name, avg_fitness_field_name
            ]
            csv_writer = csv.DictWriter(csv_file,
                                        fieldnames=field_names,
                                        quoting=csv.QUOTE_MINIMAL,
                                        lineterminator="\n")
            csv_writer.writeheader()
            for gen in range(self.generation + 1):

                # Get timestamp in human-readable format
                timestamp = advanced_stats['time'][gen]
                ts_datetime = datetime.fromtimestamp(timestamp)
                time_string = ts_datetime.strftime(self.time_format)

                # Get best candidate
                # XXX multi-objective
                best_id = None
                best_fitness = None
                candidate = advanced_stats['best_candidate'][gen]
                if candidate is not None:
                    best_id = self.candidate_util.get_candidate_id(candidate)
                    best_fitness = self.candidate_util.get_candidate_fitness(
                        candidate)

                # Get average fitness
                # XXX multi-objective
                avg_fitness = advanced_stats['avg_fitness'][gen]

                row = {
                    'Generation': gen,
                    'Timestamp': time_string,
                    best_fitness_id_field_name: best_id,
                    best_fitness_field_name: best_fitness,
                    avg_fitness_field_name: avg_fitness
                }
                csv_writer.writerow(row)
class ReevaluateBestSessionTask(SessionTask):
    """
    SessionTask that performs a re-evaluation of the best candidates
    from each generation.
    """

    # Tied for Public Enemy #5 for too-many-arguments
    # pylint: disable=too-many-arguments
    def __init__(self,
                 session,
                 master_config,
                 experiment_dir,
                 fitness_objectives,
                 generation,
                 experiment_start_time,
                 experiment_id,
                 completion_service,
                 initial_generation,
                 checkpoint_id=None):
        """
        Constructor.

        :param session: The session with which the task can communicate
                    with the service
        :param master_config: The master config for the task
        :param experiment_dir: The experiment directory for results
        :param fitness_objectives: The FitnessObjectives object
        :param generation: the generation number of the population
        :param experiment_start_time: the experiment start time in seconds
        :param experiment_id: the experiment id
                XXX Can this be derived from experiment_dir?
        :param completion_service: A handle to the CompletionService object
                for performing distributed evaluations.
        :param initial_generation: Flag saying whether or not this is the first
                generation.
        :param checkpoint_id: The checkpoint id (if any) relevant to the task.
        """
        super(ReevaluateBestSessionTask,
              self).__init__(session, master_config, experiment_dir,
                             fitness_objectives, checkpoint_id)

        self.generation = generation
        self.experiment_start_time = experiment_start_time
        self.experiment_id = experiment_id
        self.completion_service = completion_service
        self.initial_generation = initial_generation

        self.candidate_util = CandidateUtil(fitness_objectives)
        self.population_response_util = PopulationResponseUtil()

        # These are fields to be populated by unpack_response()
        experiment_config = self.master_config.get('experiment_config')
        self.persistor = SoftOrderPersistor(
            self.experiment_dir,
            self.fitness_objectives,
            draw=experiment_config.get('visualize'),
            logger=self.logger)
        self.server_stats = {}
        self.seen_checkpoint_ids = []

    def run(self):
        """
        Entry point for the session task execution to take over.
        """

        experiment_config = self.master_config.get('experiment_config')
        assert os.path.exists(
            experiment_config.get('reevaluate_checkpoint_dir'))
        print("Re-evaluating top %s chromosomes found from experiment %s" % \
              (experiment_config.get('reevaluate_num'),
               experiment_config.get('reevaluate_checkpoint_dir')))

        candidate_fit_dict = {}

        # Read in the contents of the checkpoint_ids.txt file which contains
        # all references to any checkpoint training has seen.
        # By convention reevalute_checkpoint_dir is where this file is coming
        # from, and self.checkpoint_dir is where new results are being
        # written to.
        restoring_checkpoint_persistence = CheckpointPersistence(
            folder=experiment_config.get('reevaluate_checkpoint_dir'),
            logger=self.logger)
        self.seen_checkpoint_ids = restoring_checkpoint_persistence.restore()

        for checkpoint_id in self.seen_checkpoint_ids:

            print("Analyzing chromos in %s" % checkpoint_id)

            population_response = self.session.get_population(
                experiment_config.get('reevaluate_checkpoint_dir'),
                checkpoint_id)
            pop = self.population_response_util.unpack_response(
                population_response, self)

            for candidate in pop:
                id_key = self.candidate_util.get_candidate_id(candidate)

                # Get the persisted Worker Results dictionaries
                results_dict_persistence = ResultsDictPersistence(
                    experiment_config.get('reevaluate_checkpoint_dir'),
                    self.generation,
                    logger=self.logger)
                results_dict = results_dict_persistence.restore()

                candidate_fitness = None
                if any(results_dict):
                    if id_key in results_dict:
                        candidate_results_dict = results_dict[id_key]
                        # This is not quite a candidate, but the get-mechanism
                        # should be the same
                        candidate_fitness = \
                            self.candidate_util.get_candidate_fitness(
                                candidate_results_dict)
                if candidate_fitness is None:
                    candidate_fitness = 0.0

                if id_key not in candidate_fit_dict:
                    candidate_fit_dict[id_key] = {
                        'candidate': candidate,
                        'fit': [candidate_fitness]
                    }
                else:
                    candidate_fit_dict[id_key]['candidate'] = candidate
                    candidate_fit_dict[id_key]['fit'].append(candidate_fitness)

        avg = [(x['candidate'], np.mean(x['fit'])) \
                for x in list(candidate_fit_dict.values())]
        best = sorted(avg, key=lambda x: x[1],
                      reverse=True)[:experiment_config.get('reevaluate_num')]
        best_candidates = [x[0] for x in best]
        best_candidate_ids = [self.candidate_util.get_candidate_id(x[0]) \
                                for x in best]
        best_fit = [round(x[1], 4) for x in best]

        if len(best_candidates) == 0:
            print("No chromos found, doing nothing")
            return

        for candidate in best_candidates:
            candidate_id = self.candidate_util.get_candidate_id(candidate)
            best_candidate_persistence = BestFitnessCandidatePersistence(
                self.experiment_dir, candidate_id, logger=self.logger)
            best_candidate_persistence.persist(candidate)

        print("Best chromos:")
        print(list(zip(best_candidate_ids, best_fit)))
        print("Best chromo stats:")
        print("Min: %s Mean: %s Max: %s Std: %s" % \
              (round(np.min(best_fit), 4), round(np.mean(best_fit), 4),
               round(np.max(best_fit), 4), round(np.std(best_fit), 4)))

        # We use generation + 1 for reporting here because we are really
        # composing a population of the best candidates across many
        # different previous generations, and as such doesn't really
        # correspond to any generation number of the past.
        reevaluate_candidate_task = ReevaluateCandidateSessionTask(\
            self.session,
            self.master_config,
            self.experiment_dir,
            self.fitness_objectives,
            self.generation,
            self.experiment_start_time,
            self.experiment_id,
            self.completion_service,
            self.initial_generation,
            self.checkpoint_id)
        reevaluate_candidate_task.evaluate_and_analyze_results(
            best_candidates, self.generation + 1)
コード例 #6
0
class ReevaluateCandidateSessionTask(SessionTask):
    """
    SessionTask that performs a re-evaluation of a specific candidate
    given a candidate JSON file specified in the config (for now).

    This task doesn't actually use the Session object that talks
    to the server, but is used within the hierarchy of other SessionTasks
    that do.
    """

    # Tied for Public Enemy #5 for too-many-arguments
    # pylint: disable=too-many-arguments
    def __init__(self,
                 session,
                 master_config,
                 experiment_dir,
                 fitness_objectives,
                 generation,
                 experiment_start_time,
                 experiment_id,
                 completion_service,
                 initial_generation,
                 checkpoint_id=None):
        """
        Constructor.

        :param session: The session with which the task can communicate
                    with the service
        :param master_config: The master config for the task
        :param experiment_dir: The experiment directory for results
        :param fitness_objectives: The FitnessObjectives object
        :param generation: the generation number of the population
        :param experiment_start_time: the experiment start time in seconds
        :param experiment_id: the experiment id
                XXX Can this be derived from experiment_dir?
        :param completion_service: A handle to the CompletionService object
                for performing distributed evaluations.
        :param initial_generation: Flag saying whether or not this is the first
                generation.
        :param checkpoint_id: The checkpoint id (if any) relevant to the task.
        """
        super(ReevaluateCandidateSessionTask,
              self).__init__(session, master_config, experiment_dir,
                             fitness_objectives, checkpoint_id)

        self.generation = generation
        self.experiment_start_time = experiment_start_time
        self.experiment_id = experiment_id
        self.completion_service = completion_service
        self.initial_generation = initial_generation

        self.candidate_util = CandidateUtil(fitness_objectives)

    def run(self):
        """
        Entry point for the session task execution to take over.
        """
        experiment_config = self.master_config.get('experiment_config')

        reevaluate_file = experiment_config.get('reevaluate_chromo')

        assert os.path.exists(reevaluate_file)
        assert experiment_config.get('reevaluate_checkpoint_dir') is None
        assert experiment_config.get('reevaluate_num') < 1000

        candidate_persistence = CandidatePersistence(self.experiment_dir,
                                                     reevaluate_file,
                                                     logger=self.logger)
        orig_candidate = candidate_persistence.restore()

        orig_candidate_id = self.candidate_util.get_candidate_id(
            orig_candidate)
        print("Re-evaluating chromo %s %s times" % \
              (orig_candidate_id, experiment_config.get('reevaluate_num')))

        copies = []
        counter = 0.001
        # Note: _ is pythonic for unused variable
        for _ in range(experiment_config.get('reevaluate_num')):
            copy = copy.deepcopy(orig_candidate)
            copy['id'] = orig_candidate_id + "." + str(counter)
            copies.append(copy)
            counter += 0.001

        for copy in copies:
            candidate_id = self.candidate_util.get_candidate_id(copy)
            best_candidate_persistence = \
                BestFitnessCandidatePersistence(self.experiment_dir,
                                                candidate_id,
                                                logger=self.logger)
            best_candidate_persistence.persist(copy)

        # XXX There is a mismatch here.
        #     We should not expect ids to always be integers
        use_generation = int(orig_candidate_id)
        self.evaluate_and_analyze_results(copies, use_generation)

    def evaluate_and_analyze_results(self, population, generation_count):

        # Ignore the population_results that is returned.
        # AnalyzeResultsSessionTask works on reading results_dict.json
        # files from the generation directory.
        evaluate_population_task = CompletionServiceEvaluatorSessionTask(
            self.session, self.master_config, self.experiment_dir,
            self.fitness_objectives, generation_count,
            self.experiment_start_time, self.experiment_id,
            self.completion_service, self.initial_generation, population,
            self.checkpoint_id)
        evaluate_population_task.run()

        analyze_results_task = AnalyzeResultsSessionTask(
            self.session, self.master_config, self.experiment_dir,
            self.fitness_objectives, self.checkpoint_id)
        analyze_results_task.run()
コード例 #7
0
class AnalyzeResultsSessionTask(SessionTask):
    """
    SessionTask that performs the AnalyzeResults task.

    This task doesn't actually use the Session object that talks
    to the server, but instead takes all the results files created
    by a run and does some analysis on them.

    XXX What?
    """
    def __init__(self,
                 session,
                 master_config,
                 experiment_dir,
                 fitness_objectives,
                 checkpoint_id=None):
        """
        Constructor.

        :param session: The session with which the task can communicate
                    with the service
        :param master_config: The master config for the task
        :param experiment_dir: The experiment directory for results
        :param fitness_objectives: The FitnessObjectives object
        :param checkpoint_id: The checkpoint id (if any) relevant to the task.
        """
        super(AnalyzeResultsSessionTask,
              self).__init__(session, master_config, experiment_dir,
                             fitness_objectives, checkpoint_id)

        self.candidate_util = CandidateUtil(fitness_objectives)

    def run(self):
        """
        Entry point for the session task execution to take over.
        """

        print("Running AnalyzeResultsSessionTask")

        # Read the results files for each generation.
        # These are written out by write_results_file()

        filer = ExperimentFiler(self.experiment_dir)
        glob_spec = filer.experiment_file("gen_*/results_dict.json")
        results_dicts = glob.glob(glob_spec)

        worker_results_files = sorted(results_dicts)
        if len(worker_results_files) <= 0:
            raise ValueError("No results_dicts.json files found in {0}".format(
                self.experiment_dir))

        # No generation number needed, we are only looking to
        # parse path components with it.
        generation_filer = GenerationFiler(self.experiment_dir)

        worker_results_dict = {}
        for worker_results_file in worker_results_files:

            generation = generation_filer.get_generation_from_path(
                worker_results_file)

            # This slurps in results information returned by workers from all
            # candidates of a specific generation
            results_dict_persistence = ResultsDictPersistence(
                self.experiment_dir, generation, logger=self.logger)
            one_worker_results_dict = results_dict_persistence.restore()

            # results_dict here will have one entry per candidate over all
            # generations
            worker_results_dict.update(one_worker_results_dict)

        fitness_objective = self.fitness_objectives.get_fitness_objectives(0)
        is_maximize = fitness_objective.is_maximize_fitness()
        best_result = sorted(list(worker_results_dict.items()),
                            key=lambda \
                            x: max(self.candidate_util.get_candidate_fitness(x)),
                            reverse=is_maximize)[0]
        best_id = best_result.get('id')

        # Open the file of the best candidate.
        best_candidate_persistence = BestFitnessCandidatePersistence(
            self.experiment_dir, best_id, logger=self.logger)
        best_candidate = best_candidate_persistence.restore()

        best_id = self.candidate_util.get_candidate_id(best_candidate)

        self.draw_best_candidate_results(best_candidate,
                                         generation,
                                         suffix='abs')

    def draw_best_candidate_results(self,
                                    best_candidate,
                                    generation=None,
                                    suffix=''):
        """
        :param best_candidate: A candidate object comprising the best of a
                        generation.
        :param generation: Default value is None
        :param suffix: Default value is an empty string
        """
        experiment_config = self.master_config.get('experiment_config')
        if not experiment_config.get('visualize'):
            return

        best_id = self.candidate_util.get_candidate_id(best_candidate)
        best_fitness = self.candidate_util.get_candidate_fitness(
            best_candidate)

        fitness = best_fitness if best_fitness is None else \
            round(best_fitness, 4)

        # Determine the output file name basis

        # XXX Use fitness for now.
        #     Later on can address multi-objective goals.
        metric_name = "fitness"
        if generation is not None:
            # Put the file in the gen_NN directory.
            # Call it best_candidate to match the best_candidate.json
            # that gets put there

            base_name = "best_{0}_candidate".format(metric_name)
            filer = GenerationFiler(self.experiment_dir, generation)
            base_path = filer.get_generation_file(base_name)
        else:
            # We do not have a generation that we know about so write out
            # the old-school file name.
            # XXX Not entirely sure when this path would be taken
            base_name = "F{0}_ID-{1}_{2}best_{3}".format(
                fitness, best_id, suffix, metric_name)
            filer = ExperimentFiler(self.experiment_dir)
            base_path = filer.experiment_file(base_name)

        # NetworkVisualizers use the build_training_model() which requires
        # a data_dict of file keys -> file paths to exist.  Domains that
        # wish to visualize their networks that use the data_dict will
        # need to deal with a None value for data dict in the visualization
        # case.
        data_dict = None

        visualizer = NetworkMultiVisualizer(self.master_config,
                                            data_dict,
                                            base_path,
                                            logger=self.logger)
        visualizer.visualize(best_candidate)