def update_stats(individuals, end):
    """
    Update all stats in the stats dictionary.

    :param individuals: A population of individuals.
    :param end: Boolean flag for indicating the end of an evolutionary run.
    :return: Nothing.
    """

    if not end:
        # Time Stats
        trackers.time_list.append(time() - stats['time_adjust'])
        stats['time_taken'] = trackers.time_list[-1] - \
                              trackers.time_list[-2]
        stats['total_time'] = trackers.time_list[-1] - \
                              trackers.time_list[0]

    # Population Stats
    stats['total_inds'] = params['POPULATION_SIZE'] * (stats['gen'] + 1)
    stats['runtime_error'] = len(trackers.runtime_error_cache)
    if params['CACHE']:
        stats['unique_inds'] = len(trackers.unique_ind_tracker)
        stats['unused_search'] = 100 - stats['unique_inds'] / \
                                       stats['total_inds'] * 100

    # Genome Stats
    genome_lengths = [len(i.genome) for i in individuals]
    stats['max_genome_length'] = np.nanmax(genome_lengths)
    stats['ave_genome_length'] = np.nanmean(genome_lengths)
    stats['min_genome_length'] = np.nanmin(genome_lengths)

    # Used Codon Stats
    codons = [i.used_codons for i in individuals]
    stats['max_used_codons'] = np.nanmax(codons)
    stats['ave_used_codons'] = np.nanmean(codons)
    stats['min_used_codons'] = np.nanmin(codons)

    # Tree Depth Stats
    depths = [i.depth for i in individuals]
    stats['max_tree_depth'] = np.nanmax(depths)
    stats['ave_tree_depth'] = np.nanmean(depths)
    stats['min_tree_depth'] = np.nanmin(depths)

    # Tree Node Stats
    nodes = [i.nodes for i in individuals]
    stats['max_tree_nodes'] = np.nanmax(nodes)
    stats['ave_tree_nodes'] = np.nanmean(nodes)
    stats['min_tree_nodes'] = np.nanmin(nodes)

    # Not using this for current research, don't need to waste time calculating
    # Novelty Stats
    # n = novelty()
    # total_geno = 0
    # total_levi = 0
    # total_ast = 0
    # total_deriv = 0
    # total_output = 0
    # ind_size = len(individuals)
    # for ind in individuals:
    #     ind.novelty = np.NaN
    #     total_output += n.evaluate_distance(ind, "output")
    #     ind.novelty = np.NaN
    #     total_geno += n.evaluate_distance(ind, "genotype")
    #     ind.novelty = np.NaN
    #     total_levi += n.evaluate_distance(ind, "levi")
    #     ind.novelty = np.NaN
    #     total_ast += n.evaluate_distance(ind, "ast")
    #     ind.novelty = np.NaN
    #     total_deriv += n.evaluate_distance(ind, "derivation")
    #     ind.novelty = np.NaN
    #
    # stats["novelty_output"] = total_output / ind_size
    # stats["novelty_genotype"] = total_geno / ind_size
    # stats["novelty_phenotype"] = total_levi / ind_size
    # stats["novelty_ast"] = total_ast / ind_size
    # stats["novelty_derivation"] = total_deriv / ind_size

    # import datetime
    # start = datetime.datetime.now()
    if params["NOVELTY"]:
        if end:
            import random
            from representation.individual import Individual
            # Total Novelty Stats
            n = novelty()
            total_output = 0
            total_geno = 0
            total_levi = 0
            total_ast = 0
            total_deriv = 0
            cache_size = len(trackers.cache)
            individual_dics = list(trackers.cache.values())
            sample_size = min(max(1000, cache_size // 10), 10000)
            sample_size = min(cache_size, sample_size)
            ind_sample = random.sample(individual_dics, sample_size)
            derivation_novelties = []
            output_novelties = []
            for ind_dic in ind_sample:
                ind = Individual(ind_dic["genome"], None, False)
                ind.fitness = ind_dic["fitness"]
                ind.phenotype = ind_dic["phenotype"]
                ind.AST = ind_dic["AST"]
                ind.derivation = ind_dic["derivation"]
                ind.test_cases = ind_dic["output_cases"]

                ind.novelty = np.NaN
                out_distance = n.evaluate_distance(ind, "output")
                output_novelties.append(out_distance)
                total_output += out_distance
                ind.novelty = np.NaN
                total_geno += n.evaluate_distance(ind, "genotype")
                ind.novelty = np.NaN
                total_levi += n.evaluate_distance(ind, "levi")
                ind.novelty = np.NaN
                total_ast += n.evaluate_distance(ind, "ast")
                ind.novelty = np.NaN
                der_distance = n.evaluate_distance(ind, "derivation")
                derivation_novelties.append(der_distance)
                total_deriv += der_distance
                ind.novelty = np.NaN

            stats["nov_output_total"] = total_output / sample_size
            stats["nov_genotype_total"] = total_geno / sample_size
            stats["nov_phenotype_total"] = total_levi / sample_size
            stats["nov_ast_total"] = total_ast / sample_size
            stats["nov_derivation_total"] = total_deriv / sample_size

            # Change the last generation stats in the stats list too
            final_stats = trackers.stats_list[-1]
            final_stats["nov_output_total"] = total_output / sample_size
            final_stats["nov_genotype_total"] = total_geno / sample_size
            final_stats["nov_phenotype_total"] = total_levi / sample_size
            final_stats["nov_ast_total"] = total_ast / sample_size
            final_stats["nov_derivation_total"] = total_deriv / sample_size
    # print("Novelty calculation time: " + str(datetime.datetime.now() - start))

    if not hasattr(params['FITNESS_FUNCTION'], 'multi_objective'):
        # Fitness Stats
        fitnesses = [i.fitness for i in individuals]
        stats['ave_fitness'] = np.nanmean(fitnesses, axis=0)
        stats['best_fitness'] = trackers.best_ever.fitness
Exemple #2
0
def load_population(target):
    """
    Given a target folder, read all files in the folder and load/parse
    solutions found in each file.
    
    :param target: A target folder stored in the "seeds" folder.
    :return: A list of all parsed individuals stored in the target folder.
    """

    # Set path for seeds folder
    path_1 = path.join(getcwd(), "..", "seeds")

    if not path.isdir(path_1):
        # Seeds folder does not exist.

        s = "scripts.seed_PonyGE2.load_population\n" \
            "Error: `seeds` folder does not exist in root directory."
        raise Exception(s)

    path_2 = path.join(path_1, target)

    if not path.isdir(path_2):
        # Target folder does not exist.

        s = "scripts.seed_PonyGE2.load_population\n" \
            "Error: target folder " + target + \
            " does not exist in seeds directory."
        raise Exception(s)

    # Get list of all target individuals in the target folder.
    target_inds = [i for i in listdir(path_2) if i.endswith(".txt")]

    # Initialize empty list for seed individuals.
    seed_inds = []

    for ind in target_inds:
        # Loop over all target individuals.

        # Get full file path.
        file_name = path.join(path_2, ind)

        # Initialise None data for ind info.
        genotype, phenotype = None, None

        # Open file.
        with open(file_name, "r") as f:

            # Read file.
            raw_content = f.read()

            # Read file.
            content = raw_content.split("\n")

            # Check if genotype is already saved in file.
            if "Genotype:" in content:

                # Get index location of genotype.
                gen_idx = content.index("Genotype:") + 1

                # Get the genotype.
                try:
                    genotype = eval(content[gen_idx])
                except:
                    s = "scripts.seed_PonyGE2.load_population\n" \
                        "Error: Genotype from file " + file_name + \
                        " not recognized: " + content[gen_idx]
                    raise Exception(s)

            # Check if phenotype (target string) is already saved in file.
            if "Phenotype:" in content:

                # Get index location of genotype.
                phen_idx = content.index("Phenotype:") + 1

                # Get the phenotype.
                phenotype = content[phen_idx]

                # TODO: Current phenotype is read in as single-line only. Split is performed on "\n", meaning phenotypes that span multiple lines will not be parsed correctly. This must be fixed in later editions.

            elif "Genotype:" not in content:
                # There is no explicit genotype or phenotype in the target
                # file, read in entire file as phenotype.
                phenotype = raw_content

        if genotype:
            # Generate individual from genome.
            ind = Individual(genotype, None)

            if phenotype and ind.phenotype != phenotype:
                s = "scripts.seed_PonyGE2.load_population\n" \
                    "Error: Specified genotype from file " + file_name + \
                    " doesn't map to same phenotype. Check the specified " \
                    "grammar to ensure all is correct: " + \
                    params['GRAMMAR_FILE']
                raise Exception(s)

        else:
            # Set target for GE LR Parser.
            params['REVERSE_MAPPING_TARGET'] = phenotype

            # Parse target phenotype.
            ind = GE_LR_parser.main()

        # Add new ind to the list of seed individuals.
        seed_inds.append(ind)

    return seed_inds
    def evaluate_distance(self,
                          ind: Individual,
                          novelty_alg: str = "levi",
                          max_comparisons: int = 100) -> float:
        """Compare current phenotype with phenotypes from other seen phenotypes:
        scales very poorly without a max number of comparisons, as the cache is constantly
        growing"

        :param ind: An individual to be evaluated
        :param novelty_alg: algorithm to be used
        :param max_comparisons: The upper bound on the number of comparisons to run
        :return: The novelty of the individual, larger number represents larger novelty
        """
        if not np.isnan(ind.novelty):
            return ind.novelty

        size_cache = len(cache)
        # Bound the number of comparisons
        number_comparisons = (size_cache if size_cache < max_comparisons else
                              max_comparisons)
        total_novelty = 0
        if size_cache > 0:
            choices = sample(cache.keys(), number_comparisons)
            for other_phenotype in choices:
                # If comparing to itself, don't count it
                if other_phenotype == ind.phenotype:
                    number_comparisons -= 1
                    continue

                # Want hamming distance of genotype
                if novelty_alg in ("geno", "genotype"):
                    other_geno = cache[other_phenotype]["genome"]
                    smaller_size = min(len(ind.genome), len(other_geno))
                    this_novelty = 0
                    for index in range(smaller_size):
                        if ind.genome[index] != other_geno[index]:
                            this_novelty += 1
                    total_novelty += this_novelty / smaller_size

                # Compute hamming distance of phenotype
                elif novelty_alg == 'hamming':
                    smaller_size = min(len(ind.phenotype),
                                       len(other_phenotype))
                    total_novelty += hdistance(ind.phenotype[:smaller_size],
                                               other_phenotype[:smaller_size])

                # Compute the normalized levenshtein distance
                elif novelty_alg in ("levi", "levenshtein", "pheno",
                                     "phenotype"):
                    total_novelty += ldistance(
                        ind.phenotype, other_phenotype) / max(
                            len(ind.phenotype), len(other_phenotype))

                # Compute distance of flat AST trees
                elif novelty_alg == "ast":
                    other_ind = cache[other_phenotype]
                    total_novelty += self.compare_tree_dicts(
                        ind.AST, other_ind["AST"])

                # Compute distance of flat derivation trees
                elif novelty_alg == "derivation":
                    other_ind = cache[other_phenotype]
                    total_novelty += self.compare_tree_dicts(
                        ind.derivation, other_ind["derivation"])

                elif novelty_alg == "fitness":
                    other_ind = cache[other_phenotype]
                    total_novelty += abs(ind.fitness - other_ind["fitness"])

                elif novelty_alg == "output":
                    other_ind = cache[other_phenotype]
                    count = 0
                    for tcase_ind in range(len(ind.test_cases)):
                        count += ((ind.test_cases[tcase_ind] +
                                   other_ind["output_cases"][tcase_ind]) % 2)
                    total_novelty += count

                else:
                    raise NotImplementedError(novelty_alg +
                                              " has not been implemented")
            ind.novelty = total_novelty / number_comparisons
            return ind.novelty
        # If cache is empty, doesn't matter what is returned since every individual will reach this point
        # and thus will all have the same novelty. Also, cache should never be empty.
        return 0