def test_mutate_randint1(): """If you send me two individuals with two genes each and ask for 1 gene to be mutated on average, then on average each gene has a probability of 0.5 of being mutated.""" N = 1000 # We'll sample 1,000 independent genomes mutator = intrep_ops.mutate_randint(bounds=[(0, 1), (0, 1)], expected_num_mutations=1) observed = collect_two_gene_mutation_counts(mutator, N) # Expected distribution of mutations. # We arrive at this by the following reasoning: each gene has a 1/L = 0.5 # chance of not being mutated, in which case it keeps it original value. # Otherwise, it's value is sampled uniformly from the set {0, 1}. expected_ind0_gene0 = { 0: 0.5*N + 0.25*N, 1: 0.25*N } expected_ind0_gene1 = expected_ind0_gene0 expected_ind1_gene0 = { 0: 0.25*N, 1: 0.5*N + 0.25*N } expected_ind1_gene1 = expected_ind1_gene0 # Use a chi2 test to see if the observed gene-value counts are # differ significantly from the expected distributions. p = 0.001 assert(stat.stochastic_equals(expected_ind0_gene0, observed[0][0], p=p)) assert(stat.stochastic_equals(expected_ind0_gene1, observed[0][1], p=p)) assert(stat.stochastic_equals(expected_ind1_gene0, observed[1][0], p=p)) assert(stat.stochastic_equals(expected_ind1_gene1, observed[1][1], p=p))
def test_create_int_vector(): """Genomes created by this initializer should sample uniformly within each gene's range.""" N = 10000 # Well sample 10,000 independent genomes # Two genes, with two diffrent ranges init = create_int_vector(bounds=[(0, 1), (55, 64)]) population = [init() for _ in range(N)] # Set up average distribution we expect to see for each gene, # as `value: expected_count` pairs # Both are uniform, but with different ranges. expected_dist0 = {0: N / 2, 1: N / 2} expected_dist1 = { 55: N / 10, 56: N / 10, 57: N / 10, 58: N / 10, 59: N / 10, 60: N / 10, 61: N / 10, 62: N / 10, 63: N / 10, 64: N / 10 } # Count how many times we observe each value in the sampled genomes dist0 = Counter([genome[0] for genome in population]) dist1 = Counter([genome[1] for genome in population]) # Use a chi2 test to see if the observed gene-value counts are # differ significantly from the expected distributions. p = 0.001 assert (stat.stochastic_equals(expected_dist0, dist0, p=p)) assert (stat.stochastic_equals(expected_dist1, dist1, p=p))
def test_mutate_randint1(): """If you send me two individuals with two genes each and keep the default mutation rate, then on average, each gene has a probability of 0.5 of being mutated.""" N = 1000 # We'll sample 1,000 independent genomes # Set up arrays to collect the values of 4 different loci after mutation ind0_gene0_values = [] ind0_gene1_values = [] ind1_gene0_values = [] ind1_gene1_values = [] for _ in range(N): # Set up two parents with fixed genomes, two genes each ind1 = Individual([0, 0]) ind2 = Individual([1, 1]) population = iter([ind1, ind2]) # Mutate the parents result = ops.mutate_randint(population, bounds=[(0, 1), (0, 1)]) result = list(result) # Pulse the iterator # Collect the values of each of the genes after mutation ind0_gene0_values.append(result[0].genome[0]) ind0_gene1_values.append(result[0].genome[1]) ind1_gene0_values.append(result[1].genome[0]) ind1_gene1_values.append(result[1].genome[1]) # Count the number of times that each gene value occurs at each locus ind0_gene0_counts = Counter(ind0_gene0_values) ind0_gene1_counts = Counter(ind0_gene1_values) ind1_gene0_counts = Counter(ind1_gene0_values) ind1_gene1_counts = Counter(ind1_gene1_values) # Expected distribution of mutations. # We arrive at this by the following reasoning: each gene has a 1/L = 0.5 # chance of not being mutated, in which case it keeps it original value. # Otherwise, it's value is sampled uniformly from the set {0, 1}. expected_ind0_gene0 = {0: 0.5 * N + 0.25 * N, 1: 0.25 * N} expected_ind0_gene1 = expected_ind0_gene0 expected_ind1_gene0 = {0: 0.25 * N, 1: 0.5 * N + 0.25 * N} expected_ind1_gene1 = expected_ind1_gene0 # Use a chi2 test to see if the observed gene-value counts are # differ significantly from the expected distributions. p = 0.001 assert (stat.stochastic_equals(expected_ind0_gene0, ind0_gene0_counts, p=p)) assert (stat.stochastic_equals(expected_ind0_gene1, ind0_gene1_counts, p=p)) assert (stat.stochastic_equals(expected_ind1_gene0, ind1_gene0_counts, p=p)) assert (stat.stochastic_equals(expected_ind1_gene1, ind1_gene1_counts, p=p))
def test_mutate_binomial_dist(): """When we apply binomial mutation repeatedly, the resulting distribution of offspring should follow the expected theoretical distribution.""" N = 5000 # Number of mutantes to generate binom_n = 10000 # "coin flips" parameter for the binomial std = 2.5 # Standard deviation of the mutation distribution # We'll set up our operator with infinite bounds, so we needn't worry about clipping operator = intrep_ops.mutate_binomial(std=std, expected_num_mutations=2, bounds=[(-float('inf'), float('inf')), (-float('inf'), float('inf'))]) # Any value could appear, but we'll focus on measuring just a few # nearby values genome = np.array([5, 10]) gene0_observed_dist = { '3': 0, '4': 0, '5': 0, '6': 0, '7':0 } gene1_observed_dist = { '8': 0, '9': 0, '10': 0, '11': 0, '12': 0 } # Count the observed mutations in N trials for i in range(N): population = iter([ Individual(genome.copy()) ]) mutated = next(operator(population)) gene0, gene1 = mutated.genome gene0, gene1 = str(int(gene0)), str(int(gene1)) # Count the observed values of the first gene if gene0 in gene0_observed_dist.keys(): gene0_observed_dist[gene0] += 1 # Count the observed values of the second gene if gene1 in gene1_observed_dist.keys(): gene1_observed_dist[gene1] += 1 # Set up the expected distribution by using SciPy's binomial PMF function binom_p = intrep_ops._binomial_p_from_std(binom_n, std) binom = stats.binom(binom_n, binom_p) mu = binom_n * binom_p # Mean of a binomial distribution is n*p gene0_expected_dist = { k: int(N*binom.pmf(int(mu - (genome[0] - int(k))))) for k in gene0_observed_dist.keys() } gene1_expected_dist = { k: int(N*binom.pmf(int(mu - (genome[1] - int(k))))) for k in gene1_observed_dist.keys() } # Toss all the other values under one value gene0_observed_dist['other'] = N - sum(gene0_observed_dist.values()) gene1_observed_dist['other'] = N - sum(gene1_observed_dist.values()) gene0_expected_dist['other'] = N - sum(gene0_expected_dist.values()) gene1_expected_dist['other'] = N - sum(gene1_expected_dist.values()) p = 0.01 assert(stat.stochastic_equals(gene0_expected_dist, gene0_observed_dist, p=p)) assert(stat.stochastic_equals(gene1_expected_dist, gene1_observed_dist, p=p))
def test_segmented_initializer_variable_length(): """ created variable length segments We generate segments with length drawn from U(1,5). We would therefore expect the distribution of the number of segments and the distribution that was generated to be statistically significantly similar. """ distribution_func = functools.partial(random.randint, a=1, b=5) segments = [] segment_lengths = [] N = 10000 for i in range(N): # randomly generate a sequence of segments with the number of segments # drawn from a uniform distribution segments.append( create_segmented_sequence(distribution_func, gen_sequence)) # track the lengths of those segments segment_lengths.append(len(segments[-1])) distribution = Counter(segment_lengths) # TODO have a stat helper that can generate this conveniently # We expect the values to be evenly distributed in [1,5] expected_distribution = {1: N / 5, 2: N / 5, 3: N / 5, 4: N / 5, 5: N / 5} assert stat.stochastic_equals(distribution, expected_distribution, p=0.001)
def test_random_selection1(): """If there are just two individuals in the population, then random selection will select the better one with 50% probability.""" pop = [ Individual(np.array([0, 0, 0]), problem=MaxOnes()), Individual(np.array([1, 1, 1]), problem=MaxOnes()) ] # Assign a unique identifier to each individual pop[0].id = 0 pop[1].id = 1 # We first need to evaluate all the individuals so that # selection has fitnesses to compare pop = Individual.evaluate_population(pop) selected = ops.random_selection(pop) N = 1000 p_thresh = 0.1 observed_dist = statistical_helpers.collect_distribution( lambda: next(selected).id, samples=N) expected_dist = {pop[0].id: 0.5 * N, pop[1].id: 0.5 * N} print(f"Observed: {observed_dist}") print(f"Expected: {expected_dist}") assert (statistical_helpers.stochastic_equals(expected_dist, observed_dist, p=p_thresh))
def test_tournament_selection2(): """If there are just two individuals in the population, and we set select_worst=True, then binary tournament selection will select the worse one with 75% probability.""" # Make a population where binary tournament_selection has an obvious # reproducible choice pop = [ Individual(np.array([0, 0, 0]), problem=MaxOnes()), Individual(np.array([1, 1, 1]), problem=MaxOnes()) ] # Assign a unique identifier to each individual pop[0].id = 0 pop[1].id = 1 # We first need to evaluate all the individuals so that # selection has fitnesses to compare pop = Individual.evaluate_population(pop) selected = ops.tournament_selection(pop, select_worst=True) N = 1000 p_thresh = 0.1 observed_dist = statistical_helpers.collect_distribution( lambda: next(selected).id, samples=N) expected_dist = {pop[0].id: 0.75 * N, pop[1].id: 0.25 * N} print(f"Observed: {observed_dist}") print(f"Expected: {expected_dist}") assert (statistical_helpers.stochastic_equals(expected_dist, observed_dist, p=p_thresh))
def test_sus_selection_shuffle(): ''' Test of a stochastic case of SUS selection ''' # Make a population where sus_selection has an obvious # reproducible choice # Proportions here should be 1/4 and 3/4, respectively pop = [ Individual(np.array([0, 1, 0]), problem=MaxOnes()), Individual(np.array([1, 1, 1]), problem=MaxOnes()) ] # Assign a unique identifier to each individual pop[0].id = 0 pop[1].id = 1 # We first need to evaluate all the individuals so that # selection has fitnesses to compare pop = Individual.evaluate_population(pop) selected = ops.sus_selection(pop) N = 1000 p_thresh = 0.1 observed_dist = statistical_helpers.collect_distribution( lambda: next(selected).id, samples=N) expected_dist = {pop[0].id: 0.25 * N, pop[1].id: 0.75 * N} print(f"Observed: {observed_dist}") print(f"Expected: {expected_dist}") assert (statistical_helpers.stochastic_equals(expected_dist, observed_dist, p=p_thresh))
def test_mutate_randint2(): """If we set the expected number of mutations to 2 when our genomes have only 2 genes, then each gene is always mutated, meaning individuals are completely resampled from a uniform distribution.""" N = 1000 # We'll sample 1,000 independent genomes # Set up arrays to collect the values of 4 different loci after mutation ind0_gene0_values = [] ind0_gene1_values = [] ind1_gene0_values = [] ind1_gene1_values = [] for _ in range(N): # Set up two parents with fixed genomes, two genes each ind1 = Individual([0, 0]) ind2 = Individual([1, 1]) population = iter([ind1, ind2]) # Mutate the parents result = ops.mutate_randint(population, bounds=[(0, 1), (0, 1)], expected_num_mutations=2) result = list(result) # Pulse the iterator # Collect the values of each of the genes after mutation ind0_gene0_values.append(result[0].genome[0]) ind0_gene1_values.append(result[0].genome[1]) ind1_gene0_values.append(result[1].genome[0]) ind1_gene1_values.append(result[1].genome[1]) # Count the number of times that each gene value occurs at each locus ind0_gene0_counts = Counter(ind0_gene0_values) ind0_gene1_counts = Counter(ind0_gene1_values) ind1_gene0_counts = Counter(ind1_gene0_values) ind1_gene1_counts = Counter(ind1_gene1_values) # Expected distribution of mutations. # We arrive at this by the following reasoning: since we only have # two genes, our mutation probability is 2/L = 1.0. So all four genes # should be sampled uniformly from the set {0, 1}. expected = {0: 0.5 * N, 1: 0.5 * N} p = 0.001 assert (stat.stochastic_equals(expected, ind0_gene0_counts, p=p)) assert (stat.stochastic_equals(expected, ind0_gene1_counts, p=p)) assert (stat.stochastic_equals(expected, ind1_gene0_counts, p=p)) assert (stat.stochastic_equals(expected, ind1_gene1_counts, p=p))
def test_mutate_randint2(): """If we set the expected number of mutations to 2 when our genomes have only 2 genes, then each gene is always mutated, meaning individuals are completely resampled from a uniform distribution.""" N = 1000 # We'll sample 1,000 independent genomes mutator = intrep_ops.mutate_randint(bounds=[(0, 1), (0, 1)], expected_num_mutations=2) observed = collect_two_gene_mutation_counts(mutator, N) # Expected distribution of mutations. # We arrive at this by the following reasoning: since we only have # two genes, our mutation probability is 2/L = 1.0. So all four genes # should be sampled uniformly from the set {0, 1}. expected = { 0: 0.5*N, 1: 0.5*N } p = 0.001 assert(stat.stochastic_equals(expected, observed[0][0], p=p)) assert(stat.stochastic_equals(expected, observed[0][1], p=p)) assert(stat.stochastic_equals(expected, observed[1][0], p=p)) assert(stat.stochastic_equals(expected, observed[1][1], p=p))
def test_cgp_mutate1(test_2layer_circuit): genome, _, decoder = test_2layer_circuit N = 1000 mutator = cgp.cgp_mutate(decoder) parents = (Individual(genome[:]) for _ in range(N) ) # Copying the parent N times, since mutation is destructive offspring = list(mutator(parents)) observed = {} observed[0] = Counter([ind.genome[0] for ind in offspring]) observed[1] = Counter([ind.genome[1] for ind in offspring]) observed[2] = Counter([ind.genome[2] for ind in offspring]) observed[3] = Counter([ind.genome[3] for ind in offspring]) observed[4] = Counter([ind.genome[4] for ind in offspring]) observed[5] = Counter([ind.genome[5] for ind in offspring]) observed[6] = Counter([ind.genome[6] for ind in offspring]) expected = {} # Genes 0, 3, 6, and 9 specify primitives. Since we only have one # primitive, this gene will not change. expected[0] = {0: N} expected[3] = {0: N} expected[6] = {0: N} expected[9] = {0: N} # We expect the mutation chance to be 1/L p_mut = 1 / len(genome) p_stay = 1 - p_mut # Genes 1 and 2 may be mutated to one of the input nodes, # with probability 1/L and uniform sampling expected[1] = { 0: floor((p_stay + p_mut * 0.5) * N), 1: ceil(p_mut * 0.5 * N) } expected[2] = { 0: floor(p_mut * 0.5 * N), 1: ceil((p_stay + p_mut * 0.5) * N) } expected[4] = { 0: floor(p_mut * 0.5 * N), 1: ceil((p_stay + p_mut * 0.5) * N) } expected[5] = { 0: floor((p_stay + p_mut * 0.5) * N), 1: ceil(p_mut * 0.5 * N) } p = 0.001 for i in range(7): print(f"Gene {i}, expected={expected[i]}, observed={observed[i]}") assert (stat.stochastic_equals(expected[i], observed[i], p=p))
def test_mutate_randint4(): """If you send me two individuals with two genes each and ask for a mutations probability of 1.0, then all genes should be completely resampled from a uniform distribution.""" N = 1000 # We'll sample 1,000 independent genomes mutator = intrep_ops.mutate_randint(bounds=[(0, 1), (0, 1)], probability=1.0) observed = collect_two_gene_mutation_counts(mutator, N) # Expected distribution of mutations. # We arrive at this by the following reasoning: each gene has a 0.8 # chance of not being mutated, in which case it keeps it original value. # Otherwise, it's value is sampled uniformly from the set {0, 1}. expected = { 0: 0.5*N, 1: 0.5*N } # Use a chi2 test to see if the observed gene-value counts are # differ significantly from the expected distributions. p = 0.001 assert(stat.stochastic_equals(expected, observed[0][0], p=p)) assert(stat.stochastic_equals(expected, observed[0][1], p=p)) assert(stat.stochastic_equals(expected, observed[1][0], p=p)) assert(stat.stochastic_equals(expected, observed[1][1], p=p))
def test_n_ary_crossover_probability2(): """If we perform uniform crossover with a probabilty of 1.0, then we should see genes swapped by default with probability 0.2.""" N = 5000 observed_dist = { 'Unmodified': 0, 'Only left swapped': 0, 'Only right swapped': 0, 'Both swapped': 0 } # Run crossover N times on a fixed pair of two-gene individuals for i in range(N): pop = [Individual(np.array([0, 0])), Individual(np.array([1, 1]))] i = ops.naive_cyclic_selection(pop) new_pop = list( itertools.islice(ops.uniform_crossover(i, p_xover=1.0), 2)) # There are four possible outcomes, which we will count the occurence of if np.all(new_pop[0].genome == [0, 0]) and np.all( new_pop[1].genome == [1, 1]): observed_dist['Unmodified'] += 1 elif np.all(new_pop[0].genome == [1, 0]) and np.all( new_pop[1].genome == [0, 1]): observed_dist['Only left swapped'] += 1 elif np.all(new_pop[0].genome == [0, 1]) and np.all( new_pop[1].genome == [1, 0]): observed_dist['Only right swapped'] += 1 elif np.all(new_pop[0].genome == [1, 1]) and np.all( new_pop[1].genome == [0, 0]): observed_dist['Both swapped'] += 1 else: assert (False) assert (N == sum(observed_dist.values())) p = 0.01 p_swap = 0.2 # This is the count we expect to see of each combination # Each locus swaps with p_swap. expected_dist = { 'Unmodified': int((1 - p_swap) * (1 - p_swap) * N), 'Only left swapped': int(p_swap * (1 - p_swap) * N), 'Only right swapped': int((1 - p_swap) * p_swap * N), 'Both swapped': int(p_swap**2 * N) } # Use a χ-squared test to see if our experiment matches what we expect assert (stat.stochastic_equals(expected_dist, observed_dist, p=p))
def test_stochastic_equals2(): """Equal distributions should be equal, even if they only have 1 outcome.""" observed = {0: 1000} expected = {0: 1000} assert (stat.stochastic_equals(expected, observed, p=0.001))
def test_stochastic_equals1(): """If the expected and observed dists are identical, return true.""" observed = {0: 1000, 1: 500} expected = {1: 500, 0: 1000} assert (stat.stochastic_equals(expected, observed, p=0.001))