Ejemplo n.º 1
0
def test_mutate_randint1():
    """If you send me two individuals with two genes each and ask for 1 gene to
    be mutated on average, then on average each gene has a probability
    of 0.5 of being mutated."""

    N = 1000  # We'll sample 1,000 independent genomes
    mutator = intrep_ops.mutate_randint(bounds=[(0, 1), (0, 1)], expected_num_mutations=1)
    observed = collect_two_gene_mutation_counts(mutator, N)

    # Expected distribution of mutations.
    # We arrive at this by the following reasoning: each gene has a 1/L = 0.5
    # chance of not being mutated, in which case it keeps it original value.
    # Otherwise, it's value is sampled uniformly from the set {0, 1}.
    expected_ind0_gene0 = { 0: 0.5*N + 0.25*N, 1: 0.25*N }
    expected_ind0_gene1 = expected_ind0_gene0
    expected_ind1_gene0 = { 0: 0.25*N, 1: 0.5*N + 0.25*N }
    expected_ind1_gene1 = expected_ind1_gene0

    # Use a chi2 test to see if the observed gene-value counts are
    # differ significantly from the expected distributions.
    p = 0.001
    assert(stat.stochastic_equals(expected_ind0_gene0, observed[0][0], p=p))
    assert(stat.stochastic_equals(expected_ind0_gene1, observed[0][1], p=p))
    assert(stat.stochastic_equals(expected_ind1_gene0, observed[1][0], p=p))
    assert(stat.stochastic_equals(expected_ind1_gene1, observed[1][1], p=p))
Ejemplo n.º 2
0
def test_create_int_vector():
    """Genomes created by this initializer should sample uniformly within each 
    gene's range."""
    N = 10000  # Well sample 10,000 independent genomes
    # Two genes, with two diffrent ranges
    init = create_int_vector(bounds=[(0, 1), (55, 64)])
    population = [init() for _ in range(N)]

    # Set up average distribution we expect to see for each gene,
    # as `value: expected_count` pairs
    # Both are uniform, but with different ranges.
    expected_dist0 = {0: N / 2, 1: N / 2}
    expected_dist1 = {
        55: N / 10,
        56: N / 10,
        57: N / 10,
        58: N / 10,
        59: N / 10,
        60: N / 10,
        61: N / 10,
        62: N / 10,
        63: N / 10,
        64: N / 10
    }

    # Count how many times we observe each value in the sampled genomes
    dist0 = Counter([genome[0] for genome in population])
    dist1 = Counter([genome[1] for genome in population])

    # Use a chi2 test to see if the observed gene-value counts are
    # differ significantly from the expected distributions.
    p = 0.001
    assert (stat.stochastic_equals(expected_dist0, dist0, p=p))
    assert (stat.stochastic_equals(expected_dist1, dist1, p=p))
Ejemplo n.º 3
0
def test_mutate_randint1():
    """If you send me two individuals with two genes each and keep the 
    default mutation rate, then on average, each gene has a probability 
    of 0.5 of being mutated."""

    N = 1000  # We'll sample 1,000 independent genomes

    # Set up arrays to collect the values of 4 different loci after mutation
    ind0_gene0_values = []
    ind0_gene1_values = []
    ind1_gene0_values = []
    ind1_gene1_values = []

    for _ in range(N):
        # Set up two parents with fixed genomes, two genes each
        ind1 = Individual([0, 0])
        ind2 = Individual([1, 1])
        population = iter([ind1, ind2])

        # Mutate the parents
        result = ops.mutate_randint(population, bounds=[(0, 1), (0, 1)])
        result = list(result)  # Pulse the iterator

        # Collect the values of each of the genes after mutation
        ind0_gene0_values.append(result[0].genome[0])
        ind0_gene1_values.append(result[0].genome[1])
        ind1_gene0_values.append(result[1].genome[0])
        ind1_gene1_values.append(result[1].genome[1])

    # Count the number of times that each gene value occurs at each locus
    ind0_gene0_counts = Counter(ind0_gene0_values)
    ind0_gene1_counts = Counter(ind0_gene1_values)
    ind1_gene0_counts = Counter(ind1_gene0_values)
    ind1_gene1_counts = Counter(ind1_gene1_values)

    # Expected distribution of mutations.
    # We arrive at this by the following reasoning: each gene has a 1/L = 0.5
    # chance of not being mutated, in which case it keeps it original value.
    # Otherwise, it's value is sampled uniformly from the set {0, 1}.
    expected_ind0_gene0 = {0: 0.5 * N + 0.25 * N, 1: 0.25 * N}
    expected_ind0_gene1 = expected_ind0_gene0
    expected_ind1_gene0 = {0: 0.25 * N, 1: 0.5 * N + 0.25 * N}
    expected_ind1_gene1 = expected_ind1_gene0

    # Use a chi2 test to see if the observed gene-value counts are
    # differ significantly from the expected distributions.
    p = 0.001
    assert (stat.stochastic_equals(expected_ind0_gene0, ind0_gene0_counts,
                                   p=p))
    assert (stat.stochastic_equals(expected_ind0_gene1, ind0_gene1_counts,
                                   p=p))
    assert (stat.stochastic_equals(expected_ind1_gene0, ind1_gene0_counts,
                                   p=p))
    assert (stat.stochastic_equals(expected_ind1_gene1, ind1_gene1_counts,
                                   p=p))
Ejemplo n.º 4
0
def test_mutate_binomial_dist():
    """When we apply binomial mutation repeatedly, the resulting distribution
    of offspring should follow the expected theoretical distribution."""

    N = 5000  # Number of mutantes to generate
    binom_n = 10000  # "coin flips" parameter for the binomial
    std = 2.5  # Standard deviation of the mutation distribution

    # We'll set up our operator with infinite bounds, so we needn't worry about clipping
    operator = intrep_ops.mutate_binomial(std=std, expected_num_mutations=2,
                                   bounds=[(-float('inf'), float('inf')), (-float('inf'), float('inf'))])

    # Any value could appear, but we'll focus on measuring just a few
    # nearby values
    genome = np.array([5, 10])
    gene0_observed_dist = { '3': 0, '4': 0, '5': 0, '6': 0, '7':0 }
    gene1_observed_dist = { '8': 0, '9': 0, '10': 0, '11': 0, '12': 0 }

    # Count the observed mutations in N trials
    for i in range(N):
        population = iter([ Individual(genome.copy()) ])
        mutated = next(operator(population))
        gene0, gene1 = mutated.genome
        gene0, gene1 = str(int(gene0)), str(int(gene1))

        # Count the observed values of the first gene
        if gene0 in gene0_observed_dist.keys():
            gene0_observed_dist[gene0] += 1

        # Count the observed values of the second gene
        if gene1 in gene1_observed_dist.keys():
            gene1_observed_dist[gene1] += 1

    # Set up the expected distribution by using SciPy's binomial PMF function
    binom_p = intrep_ops._binomial_p_from_std(binom_n, std)
    binom = stats.binom(binom_n, binom_p)
    mu = binom_n * binom_p  # Mean of a binomial distribution is n*p

    gene0_expected_dist = { k: int(N*binom.pmf(int(mu - (genome[0] - int(k))))) for k in gene0_observed_dist.keys() }
    gene1_expected_dist = { k: int(N*binom.pmf(int(mu - (genome[1] - int(k))))) for k in gene1_observed_dist.keys() }

    # Toss all the other values under one value
    gene0_observed_dist['other'] = N - sum(gene0_observed_dist.values())
    gene1_observed_dist['other'] = N - sum(gene1_observed_dist.values())
    gene0_expected_dist['other'] = N - sum(gene0_expected_dist.values())
    gene1_expected_dist['other'] = N - sum(gene1_expected_dist.values())

    p = 0.01
    assert(stat.stochastic_equals(gene0_expected_dist, gene0_observed_dist, p=p))
    assert(stat.stochastic_equals(gene1_expected_dist, gene1_observed_dist, p=p))
Ejemplo n.º 5
0
def test_segmented_initializer_variable_length():
    """ created variable length segments

        We generate segments with length drawn from  U(1,5).  We would therefore
        expect the distribution of the number of segments and the distribution
        that was generated to be statistically significantly similar.
    """
    distribution_func = functools.partial(random.randint, a=1, b=5)

    segments = []
    segment_lengths = []

    N = 10000

    for i in range(N):
        # randomly generate a sequence of segments with the number of segments
        # drawn from a uniform distribution
        segments.append(
            create_segmented_sequence(distribution_func, gen_sequence))

        # track the lengths of those segments
        segment_lengths.append(len(segments[-1]))

    distribution = Counter(segment_lengths)

    # TODO have a stat helper that can generate this conveniently
    # We expect the values to be evenly distributed in [1,5]
    expected_distribution = {1: N / 5, 2: N / 5, 3: N / 5, 4: N / 5, 5: N / 5}

    assert stat.stochastic_equals(distribution, expected_distribution, p=0.001)
Ejemplo n.º 6
0
def test_random_selection1():
    """If there are just two individuals in the population, then random
    selection will select the better one with 50% probability."""
    pop = [
        Individual(np.array([0, 0, 0]), problem=MaxOnes()),
        Individual(np.array([1, 1, 1]), problem=MaxOnes())
    ]
    # Assign a unique identifier to each individual
    pop[0].id = 0
    pop[1].id = 1

    # We first need to evaluate all the individuals so that
    # selection has fitnesses to compare
    pop = Individual.evaluate_population(pop)
    selected = ops.random_selection(pop)

    N = 1000
    p_thresh = 0.1
    observed_dist = statistical_helpers.collect_distribution(
        lambda: next(selected).id, samples=N)
    expected_dist = {pop[0].id: 0.5 * N, pop[1].id: 0.5 * N}
    print(f"Observed: {observed_dist}")
    print(f"Expected: {expected_dist}")
    assert (statistical_helpers.stochastic_equals(expected_dist,
                                                  observed_dist,
                                                  p=p_thresh))
Ejemplo n.º 7
0
def test_tournament_selection2():
    """If there are just two individuals in the population, and we set select_worst=True,
    then binary tournament selection will select the worse one with 75% probability."""
    # Make a population where binary tournament_selection has an obvious
    # reproducible choice
    pop = [
        Individual(np.array([0, 0, 0]), problem=MaxOnes()),
        Individual(np.array([1, 1, 1]), problem=MaxOnes())
    ]
    # Assign a unique identifier to each individual
    pop[0].id = 0
    pop[1].id = 1

    # We first need to evaluate all the individuals so that
    # selection has fitnesses to compare
    pop = Individual.evaluate_population(pop)
    selected = ops.tournament_selection(pop, select_worst=True)

    N = 1000
    p_thresh = 0.1
    observed_dist = statistical_helpers.collect_distribution(
        lambda: next(selected).id, samples=N)
    expected_dist = {pop[0].id: 0.75 * N, pop[1].id: 0.25 * N}
    print(f"Observed: {observed_dist}")
    print(f"Expected: {expected_dist}")
    assert (statistical_helpers.stochastic_equals(expected_dist,
                                                  observed_dist,
                                                  p=p_thresh))
Ejemplo n.º 8
0
def test_sus_selection_shuffle():
    ''' Test of a stochastic case of SUS selection '''
    # Make a population where sus_selection has an obvious
    # reproducible choice
    # Proportions here should be 1/4 and 3/4, respectively
    pop = [
        Individual(np.array([0, 1, 0]), problem=MaxOnes()),
        Individual(np.array([1, 1, 1]), problem=MaxOnes())
    ]

    # Assign a unique identifier to each individual
    pop[0].id = 0
    pop[1].id = 1

    # We first need to evaluate all the individuals so that
    # selection has fitnesses to compare
    pop = Individual.evaluate_population(pop)
    selected = ops.sus_selection(pop)

    N = 1000
    p_thresh = 0.1
    observed_dist = statistical_helpers.collect_distribution(
        lambda: next(selected).id, samples=N)
    expected_dist = {pop[0].id: 0.25 * N, pop[1].id: 0.75 * N}
    print(f"Observed: {observed_dist}")
    print(f"Expected: {expected_dist}")
    assert (statistical_helpers.stochastic_equals(expected_dist,
                                                  observed_dist,
                                                  p=p_thresh))
Ejemplo n.º 9
0
def test_mutate_randint2():
    """If we set the expected number of mutations to 2 when our genomes have
     only 2 genes, then each gene is always mutated, meaning individuals are
     completely resampled from a uniform distribution."""

    N = 1000  # We'll sample 1,000 independent genomes

    # Set up arrays to collect the values of 4 different loci after mutation
    ind0_gene0_values = []
    ind0_gene1_values = []
    ind1_gene0_values = []
    ind1_gene1_values = []

    for _ in range(N):
        # Set up two parents with fixed genomes, two genes each
        ind1 = Individual([0, 0])
        ind2 = Individual([1, 1])
        population = iter([ind1, ind2])

        # Mutate the parents
        result = ops.mutate_randint(population,
                                    bounds=[(0, 1), (0, 1)],
                                    expected_num_mutations=2)
        result = list(result)  # Pulse the iterator

        # Collect the values of each of the genes after mutation
        ind0_gene0_values.append(result[0].genome[0])
        ind0_gene1_values.append(result[0].genome[1])
        ind1_gene0_values.append(result[1].genome[0])
        ind1_gene1_values.append(result[1].genome[1])

    # Count the number of times that each gene value occurs at each locus
    ind0_gene0_counts = Counter(ind0_gene0_values)
    ind0_gene1_counts = Counter(ind0_gene1_values)
    ind1_gene0_counts = Counter(ind1_gene0_values)
    ind1_gene1_counts = Counter(ind1_gene1_values)

    # Expected distribution of mutations.
    # We arrive at this by the following reasoning: since we only have
    # two genes, our mutation probability is 2/L = 1.0.  So all four genes
    # should be sampled uniformly from the set {0, 1}.
    expected = {0: 0.5 * N, 1: 0.5 * N}
    p = 0.001
    assert (stat.stochastic_equals(expected, ind0_gene0_counts, p=p))
    assert (stat.stochastic_equals(expected, ind0_gene1_counts, p=p))
    assert (stat.stochastic_equals(expected, ind1_gene0_counts, p=p))
    assert (stat.stochastic_equals(expected, ind1_gene1_counts, p=p))
Ejemplo n.º 10
0
def test_mutate_randint2():
    """If we set the expected number of mutations to 2 when our genomes have
     only 2 genes, then each gene is always mutated, meaning individuals are
     completely resampled from a uniform distribution."""

    N = 1000  # We'll sample 1,000 independent genomes
    mutator = intrep_ops.mutate_randint(bounds=[(0, 1), (0, 1)], expected_num_mutations=2)
    observed = collect_two_gene_mutation_counts(mutator, N)

    # Expected distribution of mutations.
    # We arrive at this by the following reasoning: since we only have
    # two genes, our mutation probability is 2/L = 1.0.  So all four genes
    # should be sampled uniformly from the set {0, 1}.
    expected = { 0: 0.5*N, 1: 0.5*N }
    p = 0.001
    assert(stat.stochastic_equals(expected, observed[0][0], p=p))
    assert(stat.stochastic_equals(expected, observed[0][1], p=p))
    assert(stat.stochastic_equals(expected, observed[1][0], p=p))
    assert(stat.stochastic_equals(expected, observed[1][1], p=p))
Ejemplo n.º 11
0
def test_cgp_mutate1(test_2layer_circuit):
    genome, _, decoder = test_2layer_circuit

    N = 1000
    mutator = cgp.cgp_mutate(decoder)
    parents = (Individual(genome[:]) for _ in range(N)
               )  # Copying the parent N times, since mutation is destructive
    offspring = list(mutator(parents))

    observed = {}
    observed[0] = Counter([ind.genome[0] for ind in offspring])
    observed[1] = Counter([ind.genome[1] for ind in offspring])
    observed[2] = Counter([ind.genome[2] for ind in offspring])
    observed[3] = Counter([ind.genome[3] for ind in offspring])
    observed[4] = Counter([ind.genome[4] for ind in offspring])
    observed[5] = Counter([ind.genome[5] for ind in offspring])
    observed[6] = Counter([ind.genome[6] for ind in offspring])

    expected = {}
    # Genes 0, 3, 6, and 9 specify primitives.  Since we only have one
    # primitive, this gene will not change.
    expected[0] = {0: N}
    expected[3] = {0: N}
    expected[6] = {0: N}
    expected[9] = {0: N}

    # We expect the mutation chance to be 1/L
    p_mut = 1 / len(genome)
    p_stay = 1 - p_mut

    # Genes 1 and 2 may be mutated to one of the input nodes,
    # with probability 1/L and uniform sampling
    expected[1] = {
        0: floor((p_stay + p_mut * 0.5) * N),
        1: ceil(p_mut * 0.5 * N)
    }
    expected[2] = {
        0: floor(p_mut * 0.5 * N),
        1: ceil((p_stay + p_mut * 0.5) * N)
    }
    expected[4] = {
        0: floor(p_mut * 0.5 * N),
        1: ceil((p_stay + p_mut * 0.5) * N)
    }
    expected[5] = {
        0: floor((p_stay + p_mut * 0.5) * N),
        1: ceil(p_mut * 0.5 * N)
    }

    p = 0.001
    for i in range(7):
        print(f"Gene {i}, expected={expected[i]}, observed={observed[i]}")
        assert (stat.stochastic_equals(expected[i], observed[i], p=p))
Ejemplo n.º 12
0
def test_mutate_randint4():
    """If you send me two individuals with two genes each and ask for a mutations
    probability of 1.0, then all genes should be completely resampled from a
    uniform distribution."""

    N = 1000  # We'll sample 1,000 independent genomes
    mutator = intrep_ops.mutate_randint(bounds=[(0, 1), (0, 1)], probability=1.0)
    observed = collect_two_gene_mutation_counts(mutator, N)

    # Expected distribution of mutations.
    # We arrive at this by the following reasoning: each gene has a 0.8
    # chance of not being mutated, in which case it keeps it original value.
    # Otherwise, it's value is sampled uniformly from the set {0, 1}.
    expected = { 0: 0.5*N, 1: 0.5*N }

    # Use a chi2 test to see if the observed gene-value counts are
    # differ significantly from the expected distributions.
    p = 0.001
    assert(stat.stochastic_equals(expected, observed[0][0], p=p))
    assert(stat.stochastic_equals(expected, observed[0][1], p=p))
    assert(stat.stochastic_equals(expected, observed[1][0], p=p))
    assert(stat.stochastic_equals(expected, observed[1][1], p=p))
Ejemplo n.º 13
0
def test_n_ary_crossover_probability2():
    """If we perform uniform crossover with a probabilty of 1.0, then we should see genes swapped
    by default with probability 0.2."""
    N = 5000
    observed_dist = {
        'Unmodified': 0,
        'Only left swapped': 0,
        'Only right swapped': 0,
        'Both swapped': 0
    }

    # Run crossover N times on a fixed pair of two-gene individuals
    for i in range(N):

        pop = [Individual(np.array([0, 0])), Individual(np.array([1, 1]))]
        i = ops.naive_cyclic_selection(pop)
        new_pop = list(
            itertools.islice(ops.uniform_crossover(i, p_xover=1.0), 2))

        # There are four possible outcomes, which we will count the occurence of
        if np.all(new_pop[0].genome == [0, 0]) and np.all(
                new_pop[1].genome == [1, 1]):
            observed_dist['Unmodified'] += 1
        elif np.all(new_pop[0].genome == [1, 0]) and np.all(
                new_pop[1].genome == [0, 1]):
            observed_dist['Only left swapped'] += 1
        elif np.all(new_pop[0].genome == [0, 1]) and np.all(
                new_pop[1].genome == [1, 0]):
            observed_dist['Only right swapped'] += 1
        elif np.all(new_pop[0].genome == [1, 1]) and np.all(
                new_pop[1].genome == [0, 0]):
            observed_dist['Both swapped'] += 1
        else:
            assert (False)

    assert (N == sum(observed_dist.values()))

    p = 0.01
    p_swap = 0.2
    # This is the count we expect to see of each combination
    # Each locus swaps with p_swap.
    expected_dist = {
        'Unmodified': int((1 - p_swap) * (1 - p_swap) * N),
        'Only left swapped': int(p_swap * (1 - p_swap) * N),
        'Only right swapped': int((1 - p_swap) * p_swap * N),
        'Both swapped': int(p_swap**2 * N)
    }

    # Use a χ-squared test to see if our experiment matches what we expect
    assert (stat.stochastic_equals(expected_dist, observed_dist, p=p))
Ejemplo n.º 14
0
def test_stochastic_equals2():
    """Equal distributions should be equal, even if they only have 1 outcome."""
    observed = {0: 1000}
    expected = {0: 1000}
    assert (stat.stochastic_equals(expected, observed, p=0.001))
Ejemplo n.º 15
0
def test_stochastic_equals1():
    """If the expected and observed dists are identical, return true."""
    observed = {0: 1000, 1: 500}
    expected = {1: 500, 0: 1000}
    assert (stat.stochastic_equals(expected, observed, p=0.001))