Beispiel #1
0
    def __init__(self, experiment_dir, generation, timestamp, logger=None):
        """
        Constructor.

        :param experiment_dir: the directory where experiment results go
        :param generation: the generation number of the results dict
        :param timestamp: A double timestamp of when the error occurred.
        :param logger: A logger to send messaging to
        """

        filer = ExperimentFiler(experiment_dir)
        error_dir = filer.experiment_file("errors")

        ts_datetime = datetime.fromtimestamp(timestamp)
        time_format = '%Y-%m-%d-%H:%M:%S'
        time_string = ts_datetime.strftime(time_format)

        filer = GenerationFiler(experiment_dir, generation)
        gen_name = filer.get_generation_name()

        basename = "experiment_host_error_{0}_{1}".format(
            gen_name, time_string)

        dictionary_converter = PassThroughDictionaryConverter()
        factory = PersistenceFactory(object_type="string",
                                     dictionary_converter=dictionary_converter,
                                     logger=logger)
        self.dict_persistence = factory.create_persistence(
            error_dir,
            basename,
            persistence_mechanism=PersistenceMechanisms.LOCAL,
            serialization_format=SerializationFormats.TEXT,
            must_exist=False)
Beispiel #2
0
    def __init__(self,
                 experiment_dir,
                 generation,
                 candidate_id,
                 timestamp,
                 logger=None):
        """
        Constructor.

        :param experiment_dir: the directory where experiment results go
        :param generation: the generation number of the results dict
        :param candidate_id: The id of the candidate that had the error
        :param timestamp: A double timestamp of when the error occurred.
        :param logger: A logger to send messaging to
        """

        filer = ExperimentFiler(experiment_dir)
        error_dir = filer.experiment_file("errors")

        ts_datetime = datetime.fromtimestamp(timestamp)
        time_format = '%Y-%m-%d-%H:%M:%S'
        time_string = ts_datetime.strftime(time_format)

        filer = GenerationFiler(experiment_dir, generation)
        gen_name = filer.get_generation_name()

        base_name = "evaluation_error_{0}_candidate_{1}_{2}".format(
            gen_name, candidate_id, time_string)

        super(EvaluationErrorPersistence, self).__init__(
            base_name=base_name,
            folder=error_dir,
            dictionary_converter=CandidateDictionaryConverter(),
            logger=logger)
def write_training_runs(args):

    filer = ExperimentFiler(args.experiment_dir)
    csv_file = filer.experiment_file("training_runs.csv")
    with open(csv_file, 'wb') as my_file:
        my_file.write('Generation, %s, Fitness\n' % resolve_alt_objective(args))

    results_files = get_results_files(args)

    generation_filer = GenerationFiler(args.experiment_dir)
    for results_file in results_files:
        generation = generation_filer.get_generation_from_path(results_file)
        persistence = ResultsDictPersistence(args.experiment_dir, generation,
                                             logger=None)
        results_dict = persistence.restore()

        if len(results_dict) == 0:
            # File not found
            continue

        for key in results_dict.keys():
            result = results_dict[key]
            try:
                # XXX Use FitnessObjectives prepared from config?
                alt_objective = result['metrics'][args.alt_objective]
                fitness = result['metrics']['fitness'] # XXX not kosher
            except Exception:
                try:
                    alt_objective = result['metrics'][args.alt_objective]
                    fitness = result['fitness'] # XXX not kosher
                except Exception as exception:
                    if args.alt_objective == "num_params":
                        fitness = result['fitness'] # XXX not kosher

                        # XXX What generates this params file?
                        cache_file = generation_filer.get_generation_file(
                                        "candidate_{0}.params".format(key))

                        if os.path.exists(cache_file):
                            with open(cache_file, 'rb') as my_file:
                                alt_objective = my_file.read()
                        else:
                            undefined = 0
                            k = undefined  # XXX
                            print("Extracting num params from network {}".format(k))
                            model = get_model(args.experiment_dir, key, generation)
                            alt_objective = str(model.count_params())
                            with open(cache_file, 'wb') as my_file:
                                my_file.write(alt_objective)
                    else:
                        raise exception

            if args.alt_objective == 'training_time':
                alt_objective = str(float(alt_objective) / 3600.0)
            with open(csv_file, 'ab') as my_file:
                line = '%s %s %s\n' % (generation, alt_objective, fitness)
                my_file.write(line)
    return csv_file
def get_results_files(args):

    filer = ExperimentFiler(args.experiment_dir)
    glob_spec = filer.experiment_file("*/results_dict.json")
    glob_results = glob.glob(glob_spec)
    results_files = sorted(glob_results)
    if args.max_gen > 0:
        results_files = results_files[:args.max_gen]
    return results_files
Beispiel #5
0
    def run(self):
        """
        Entry point for the session task execution to take over.
        """

        print("Running AnalyzeResultsSessionTask")

        # Read the results files for each generation.
        # These are written out by write_results_file()

        filer = ExperimentFiler(self.experiment_dir)
        glob_spec = filer.experiment_file("gen_*/results_dict.json")
        results_dicts = glob.glob(glob_spec)

        worker_results_files = sorted(results_dicts)
        if len(worker_results_files) <= 0:
            raise ValueError("No results_dicts.json files found in {0}".format(
                self.experiment_dir))

        # No generation number needed, we are only looking to
        # parse path components with it.
        generation_filer = GenerationFiler(self.experiment_dir)

        worker_results_dict = {}
        for worker_results_file in worker_results_files:

            generation = generation_filer.get_generation_from_path(
                worker_results_file)

            # This slurps in results information returned by workers from all
            # candidates of a specific generation
            results_dict_persistence = ResultsDictPersistence(
                self.experiment_dir, generation, logger=self.logger)
            one_worker_results_dict = results_dict_persistence.restore()

            # results_dict here will have one entry per candidate over all
            # generations
            worker_results_dict.update(one_worker_results_dict)

        fitness_objective = self.fitness_objectives.get_fitness_objectives(0)
        is_maximize = fitness_objective.is_maximize_fitness()
        best_result = sorted(list(worker_results_dict.items()),
                            key=lambda \
                            x: max(self.candidate_util.get_candidate_fitness(x)),
                            reverse=is_maximize)[0]
        best_id = best_result.get('id')

        # Open the file of the best candidate.
        best_candidate_persistence = BestFitnessCandidatePersistence(
            self.experiment_dir, best_id, logger=self.logger)
        best_candidate = best_candidate_persistence.restore()

        best_id = self.candidate_util.get_candidate_id(best_candidate)

        self.draw_best_candidate_results(best_candidate,
                                         generation,
                                         suffix='abs')
    def __init__(self, experiment_dir, generation, fitness_objectives):
        """
        Constructor.

        """
        self.filer = ExperimentFiler(experiment_dir)
        self.generation = generation
        self.fitness_objectives = fitness_objectives
        self.candidate_util = CandidateUtil(fitness_objectives)
        self.basename = 'fitness.csv'
        self.time_format = '%Y-%m-%d-%H:%M:%S'
Beispiel #7
0
    def __init__(self, experiment_dir, generation=0):
        """
        Constructor.

        :param experiment_dir: The directory where experiment results go
        :param generation: The generation number of the experiment
        """

        self.experiment_filer = ExperimentFiler(experiment_dir)
        self.generation = generation
        self.prefix = "gen_"
Beispiel #8
0
    def draw_best_candidate_results(self,
                                    best_candidate,
                                    generation=None,
                                    suffix=''):
        """
        :param best_candidate: A candidate object comprising the best of a
                        generation.
        :param generation: Default value is None
        :param suffix: Default value is an empty string
        """
        experiment_config = self.master_config.get('experiment_config')
        if not experiment_config.get('visualize'):
            return

        best_id = self.candidate_util.get_candidate_id(best_candidate)
        best_fitness = self.candidate_util.get_candidate_fitness(
            best_candidate)

        fitness = best_fitness if best_fitness is None else \
            round(best_fitness, 4)

        # Determine the output file name basis

        # XXX Use fitness for now.
        #     Later on can address multi-objective goals.
        metric_name = "fitness"
        if generation is not None:
            # Put the file in the gen_NN directory.
            # Call it best_candidate to match the best_candidate.json
            # that gets put there

            base_name = "best_{0}_candidate".format(metric_name)
            filer = GenerationFiler(self.experiment_dir, generation)
            base_path = filer.get_generation_file(base_name)
        else:
            # We do not have a generation that we know about so write out
            # the old-school file name.
            # XXX Not entirely sure when this path would be taken
            base_name = "F{0}_ID-{1}_{2}best_{3}".format(
                fitness, best_id, suffix, metric_name)
            filer = ExperimentFiler(self.experiment_dir)
            base_path = filer.experiment_file(base_name)

        # NetworkVisualizers use the build_training_model() which requires
        # a data_dict of file keys -> file paths to exist.  Domains that
        # wish to visualize their networks that use the data_dict will
        # need to deal with a None value for data dict in the visualization
        # case.
        data_dict = None

        visualizer = NetworkMultiVisualizer(self.master_config,
                                            data_dict,
                                            base_path,
                                            logger=self.logger)
        visualizer.visualize(best_candidate)
def visualize_model(args):

    model_name = args.visualize_model
    model = get_model(args.experiment_dir, model_name, args.generation)

    filer = ExperimentFiler(args.experiment_dir)
    blueprint_file = filer.experiment_file("{0}_blueprint.png".format(
                                            model_name))

    # XXX Use a NetworkVisualizer
    plot_model(model, blueprint_file, show_layer_names=True,
                draw_submodels_only=True)
    for layer in model.layers:
        try:
            layer_file = filer.experiment_file("{0}_module-{1}.png".format(
                                            model_name, layer.name))
            # XXX Use a NetworkVisualizer
            plot_model(layer, layer_file, show_layer_names=False)
        except Exception:
            print("{} is not a model".format(layer))
class CleanUpArchive():
    """
    Class to assist with cleaning up persisted weights.
    """
    def __init__(self, experiment_dir):
        self.filer = ExperimentFiler(experiment_dir)

    def clean_up(self, population):
        """
        Removes persisted weights of individuals that are
        no longer in the population.
        """
        base_path = self.filer.experiment_file("archive")
        candidate_util = CandidateUtil()
        candidate_ids = [candidate_util.get_candidate_id(candidate) \
                            for candidate in population]

        # XXX Impenetrable!
        for filepath in glob.glob(base_path + "/*"):
            file_id = (filepath.split('/')[-1]).split('.')[0]
            file_type = (filepath.split('/')[-1]).split('.')[1]
            if file_id not in candidate_ids and file_type == 'h5a':
                os.remove(filepath)
def visualize_training_runs(args):
    total_training_times = []
    longest_training_times = []
    average_training_times = []

    results_files = get_results_files(args)
    num_generations = len(results_files)

    generation_filer = GenerationFiler(args.experiment_dir)
    for results_file in results_files:
        generation = generation_filer.get_generation_from_path(results_file)
        persistence = ResultsDictPersistence(args.experiment_dir, generation,
                                            logger=None)
        results_dict = persistence.restore()

        if len(results_dict) == 0:
            # File not found
            continue

        times = []
        for key in results_dict.keys():
            result = results_dict[key]
            try:
                training_time = result['metrics']['training_time']
            except Exception:
                training_time = result['metrics']['training_time']
            times.append(training_time)
        total_training_times.append(np.sum(times) / 3600)
        longest_training_times.append(np.max(times) / 3600)
        average_training_times.append(np.mean(times) / 3600)

    plt.plot(total_training_times)
    plt.title('Total Training Machine Hours per Generation')
    plt.ylabel('Hours')
    plt.xlabel('Generation')
    plt.xlim(0, num_generations)

    filer = ExperimentFiler(args.experiment_dir)
    runs_total_file = filer.experiment_file("training_runs_total.png")
    plt.savefig(runs_total_file, bbox_inches='tight')
    plt.clf()

    plt.plot(longest_training_times)
    plt.title('Longest Training Machine Hours per Generation')
    plt.ylabel('Hours')
    plt.xlabel('Generation')
    plt.xlim(0, num_generations)

    runs_longest_file = filer.experiment_file("training_runs_longest.png")
    plt.savefig(runs_longest_file, bbox_inches='tight')
    plt.clf()

    plt.plot(average_training_times)
    plt.title('Mean Training Machine Hours per Generation')
    plt.ylabel('Hours')
    plt.xlabel('Generation')
    plt.xlim(0, num_generations)

    runs_avg_file = filer.experiment_file("training_runs_avg.png")
    plt.savefig(runs_avg_file, bbox_inches='tight')
    plt.clf()

    total_machine_hours = np.sum(total_training_times)
    print("Total machine hours used: {}".format(total_machine_hours))
def visualize_pareto_front(args, sorted_gens, sorted_accs, sorted_alt_objs,
    experiment_names=None):

    plt.figure(figsize=(25, 15))
    plt.xlabel(resolve_alt_objective(args))
    plt.title('Fitness vs. %s Tradeoff' % resolve_alt_objective(args))
    plt.ylabel('Fitness')
    colors = ['g', 'b', 'r', 'm', 'c', 'y'] * 100

    if experiment_names is not None:
        assert len(experiment_names) == len(sorted_gens) == \
            len(sorted_accs) == len(sorted_alt_objs)
        num_experiments = len(experiment_names)
    else:
        num_experiments = 1
        sorted_gens = [sorted_gens]
        sorted_accs = [sorted_accs]
        sorted_alt_objs = [sorted_alt_objs]

    for i in range(num_experiments):
        label = None if experiment_names is None else experiment_names[i]
        plt.scatter(sorted_alt_objs[i], sorted_accs[i], alpha=0.5, marker='.',
            label=label, color=colors[i])
        min_fit = min(sorted_accs[i])
        max_alt = max(sorted_alt_objs[i])
        for j in range(len(sorted_gens[i])):

            # Get relevant values.
            if j == 0:
                prev_fit = min_fit
            else:
                prev_fit = sorted_accs[i][j-1]
            if j == len(sorted_gens[i]) - 1:
                next_alt = max_alt
            else:
                next_alt = sorted_alt_objs[i][j+1]
            curr_fit = sorted_accs[i][j]
            curr_alt = sorted_alt_objs[i][j]

            # Plot vertical line up to point.
            plt.plot([curr_alt, curr_alt], [prev_fit, curr_fit], color=colors[i])

            # Plot horizontal line to next point.
            plt.plot([curr_alt, next_alt], [curr_fit, curr_fit], color=colors[i])

            # Label the new point.

            label_x = 30
            plt.annotate(str(sorted_gens[i][j]),
                (sorted_alt_objs[i][j], sorted_accs[i][j]),
                color='black', xytext=(label_x, -label_x),
                textcoords='offset points',
                arrowprops=dict(arrowstyle="->", color=colors[i], lw=0.5))

    if (args.min_x is not None) and (args.max_x is not None):
        plt.xlim(args.min_x, args.max_x)
    if (args.min_y is not None) and (args.max_y is not None):
        plt.ylim(args.min_y, args.max_y)

    if experiment_names is not None:
        plt.legend()
    # Turn on the minor TICKS, which are required for the minor GRID
    plt.minorticks_on()
    # Customize the major grid
    plt.grid(which='major', linestyle='-', alpha=0.5, linewidth='0.8')
    # Customize the minor grid
    plt.grid(which='minor', linestyle=':', alpha=0.5, linewidth='0.5')

    plt.tight_layout()

    filer = ExperimentFiler(args.experiment_dir)
    runs_pareto_file = filer.experiment_file("training_runs_pareto.png")
    plt.savefig(runs_pareto_file)
    plt.clf()
 def __init__(self, experiment_dir):
     self.filer = ExperimentFiler(experiment_dir)
class FitnessPersistor(Persistor):
    """
    This implementation of the Persistor interface creates the
    fitness.csv file.
    """
    def __init__(self, experiment_dir, generation, fitness_objectives):
        """
        Constructor.

        """
        self.filer = ExperimentFiler(experiment_dir)
        self.generation = generation
        self.fitness_objectives = fitness_objectives
        self.candidate_util = CandidateUtil(fitness_objectives)
        self.basename = 'fitness.csv'
        self.time_format = '%Y-%m-%d-%H:%M:%S'

    def persist(self, obj):
        """
        Persists the object passed in.

        :param obj: an object to persist
                In this case we are expecting an advanced stats dictionary
                from the SoftOrderPersistor
        """
        advanced_stats = obj

        filename = self.filer.experiment_file(self.basename)
        self.write_csv_file(filename, advanced_stats)

    def write_csv_file(self, filename, advanced_stats):
        """
        Writes out the fitness.csv file

        :param filename: The filename to write to
        :param advanced_stats: The advanced_stats dict gathered by the
                            SoftOrderPersistor
        :return: Nothing
        """
        with open(filename, 'w') as csv_file:

            # Prepare dynamic column names
            primary_objective = self.fitness_objectives.get_fitness_objective(
                0)
            fitness_name = primary_objective.get_metric_name()

            best_fitness_field_name = 'Best ' + fitness_name
            best_fitness_id_field_name = best_fitness_field_name + ' id'
            avg_fitness_field_name = 'Avg ' + fitness_name

            field_names = [
                'Generation', 'Timestamp', best_fitness_id_field_name,
                best_fitness_field_name, avg_fitness_field_name
            ]
            csv_writer = csv.DictWriter(csv_file,
                                        fieldnames=field_names,
                                        quoting=csv.QUOTE_MINIMAL,
                                        lineterminator="\n")
            csv_writer.writeheader()
            for gen in range(self.generation + 1):

                # Get timestamp in human-readable format
                timestamp = advanced_stats['time'][gen]
                ts_datetime = datetime.fromtimestamp(timestamp)
                time_string = ts_datetime.strftime(self.time_format)

                # Get best candidate
                # XXX multi-objective
                best_id = None
                best_fitness = None
                candidate = advanced_stats['best_candidate'][gen]
                if candidate is not None:
                    best_id = self.candidate_util.get_candidate_id(candidate)
                    best_fitness = self.candidate_util.get_candidate_fitness(
                        candidate)

                # Get average fitness
                # XXX multi-objective
                avg_fitness = advanced_stats['avg_fitness'][gen]

                row = {
                    'Generation': gen,
                    'Timestamp': time_string,
                    best_fitness_id_field_name: best_id,
                    best_fitness_field_name: best_fitness,
                    avg_fitness_field_name: avg_fitness
                }
                csv_writer.writerow(row)
    def __init__(self,
                 session,
                 master_config,
                 experiment_dir,
                 fitness_objectives,
                 generation,
                 experiment_start_time,
                 experiment_id,
                 completion_service,
                 initial_generation,
                 population,
                 checkpoint_id=None,
                 novelty_policy=None,
                 server_stats=None):
        """
        Constructor.

        :param session: The session with which the task can communicate
                    with the service
        :param master_config: The master config for the task
        :param experiment_dir: The experiment directory for results
        :param fitness_objectives: The FitnessObjectives object
        :param generation: the generation number of the population
        :param experiment_start_time: the experiment start time in seconds
        :param experiment_id: the experiment id
                XXX Can this be derived from experiment_dir?
        :param completion_service: A handle to the CompletionService object
                for performing distributed evaluations.
        :param initial_generation: Flag saying whether or not this is the first
                generation.
        :param population: The list of candidates to evaluate
        :param checkpoint_id: The checkpoint id (if any) relevant to the task.
        :param novelty_policy: The NoveltyPolicy (if any) relevant to the task
        :param server_stats: Statistics from the ENN Service (if any)
        """
        super(EvaluatorSessionTask,
              self).__init__(session, master_config, experiment_dir,
                             fitness_objectives, checkpoint_id)

        self.generation = generation
        self.experiment_start_time = experiment_start_time
        self.experiment_id = experiment_id
        self.completion_service = completion_service
        self.initial_generation = initial_generation
        self.novelty_policy = novelty_policy
        self.population = population
        self.evaluated_population = None
        self.server_stats = server_stats

        self.candidate_util = CandidateUtil(self.fitness_objectives)
        self.result_update_frequency = 100
        self.timeout_max = 10000000

        # Set up the FitnessRegression policy
        filer = ExperimentFiler(self.experiment_dir)
        regression_archive_file = filer.experiment_file("regression_archive")
        regression_factory = FitnessRegressionFactory()

        experiment_config = self.master_config.get('experiment_config')
        self.fitness_regression = regression_factory.create_fitness_regression(
            experiment_config, self.fitness_objectives,
            regression_archive_file)

        # Set up the NoveltyPolicy if none was given
        if self.novelty_policy is None:
            # XXX We use the factory to look at the config, but if
            # we were not sent a novelty_policy, then we should probably
            # just use the NullNoveltyPolicy.  But this is this way to be in
            # the spirit of the original implementation.
            novelty_factory = NoveltyPolicyFactory()
            self.novelty_policy = novelty_factory.create_novelty_policy(
                experiment_config, self.experiment_dir)
Beispiel #16
0
class GenerationFiler():
    """
    Class to handle creation of file names that go in generation folders.
    """
    def __init__(self, experiment_dir, generation=0):
        """
        Constructor.

        :param experiment_dir: The directory where experiment results go
        :param generation: The generation number of the experiment
        """

        self.experiment_filer = ExperimentFiler(experiment_dir)
        self.generation = generation
        self.prefix = "gen_"

    def get_generation_file(self, filename):
        """
        :param filename: A string filename which does not have any path
                         information associated with it.
        :return: A new string path to the filename in the appropriate
                generation folder, given the constructor arguments
        """

        gen_dir = self.get_generation_dir()
        gen_file = os.path.join(gen_dir, filename)
        return gen_file

    def get_generation_dir(self):
        """
        :return: A string path to the generation folder,
                 given the constructor arguments
        """

        name = self.get_generation_name()
        gen_dir = self.experiment_filer.experiment_file(name)

        return gen_dir

    def get_generation_name(self):
        """
        :return: A cannonical string for the generation.
                 This is used as the primary component for the generation
                 folder, but it can be used for other purposes as well.
        """

        name = "{0}{1:02d}".format(self.prefix, self.generation)
        return name

    def get_generation_from_path(self, path):
        """
        :param path: The path from which we will get generation information.
        :return: the generation number from the given path.
        """

        generation_number = -1

        # Find the component of the path that start with the prefix
        (head, component) = os.path.split(path)
        while component is not None and \
            not component.startswith(self.prefix):
            (head, component) = os.path.split(head)

        if component is None:
            raise ValueError("Could not find prefix {0} in {1}".format(
                self.prefix, path))

        # Strings are "gen_XX".  Find the number part of that string.
        number_part = None
        if component.startswith(self.prefix):
            number_part = component[len(self.prefix):]

        if number_part is None:
            raise ValueError(
                "Could not find prefix {0} in path component {1}".format(
                    self.prefix, component))

        try:
            generation_number = int(number_part)
        except:
            raise ValueError(
                "Could not find generation number in {1}".format(component))

        return generation_number