def main(): """ Run a genetic algorithm. TODO: - Allow arbitrary representations (binary, real, enum, ...) - Add crossover and mutation funcs for these representations - Allow stateful/changing mutation and crossover probabilities - Allow constraint handling (repairing infeasible solutions, penalty functions, time-variable penalty functions) - Maybe include constraints in representations (JSON?) """ args = setup_args() #--------------------------------------------------------------------------- # Print a short summary #--------------------------------------------------------------------------- print 'population size=%d, representation=%s, generations=%d, ' \ 'crossover probability=%f, mutation probability=%f, elite count=%d' \ % (args.population_size, args.representation, args.generations, args.crossover_probability, args.mutation_probability, args.elite_count) print 'selection scheme=%s, crossover scheme=%s, mutation scheme=%s, ' \ 'fitness function=%s, natural_fitness=%s' \ % (args.selection_scheme, args.crossover_scheme, args.mutation_scheme, args.fitness_function, args.natural_fitness) args.representation = {"type":"enum","length":50,"values":[1,2,3,4,5]} args.mutation_scheme = mutation.swap #--------------------------------------------------------------------------- # Generate the initial population #--------------------------------------------------------------------------- p = Population(representation=Representation(args.representation), size=args.population_size, fitness_func=args.fitness_function, selection_func=args.selection_scheme, crossover_func=args.crossover_scheme, mutation_func=args.mutation_scheme, natural_fitness=args.natural_fitness, crossover_probability=args.crossover_probability, mutation_probability=args.mutation_probability, elite_count=args.elite_count, tournament_size=args.tournament_size) p.gen_population() #--------------------------------------------------------------------------- # Run the GA #--------------------------------------------------------------------------- p.run(args.generations)
def main(): """ tsplib: bays29 gen=500 cfunc=noop mfunc=swap sfunc=tournament ---------------------------------------------------------------------------- size=50 cprob=0.5 mprob=0.5 ecnt=6 tsize=5 max=2333 size=50 cprob=0.5 mprob=0.5 ecnt=6 tsize=10 max=2330 size=50 cprob=0.5 mprob=0.5 ecnt=6 tsize=15 max=2455 size=100 cprob=0.5 mprob=0.5 ecnt=6 tsize=5 max=2106 size=100 cprob=0.5 mprob=0.5 ecnt=6 tsize=10 max=2319 size=100 cprob=0.5 mprob=0.5 ecnt=6 tsize=15 max=2165 size=100 cprob=0.5 mprob=0.7 ecnt=6 tsize=5 max=2352 size=100 cprob=0.5 mprob=0.3 ecnt=6 tsize=5 max= """ tsp = TSP('tsplib/bays29.tsp') #--------------------------------------------------------------------------- # Generate the initial population #--------------------------------------------------------------------------- representation = { "length": 5, "type": "enum", "values": tsp.city_names, "duplicates": False } generations = 50 p = Population(representation=Representation(representation), size=100, fitness_func=tsp.calc_tour, selection_func=tournament, crossover_func=noop, mutation_func=swap, natural_fitness=False, crossover_probability=0.5, mutation_probability=0.3, elite_count=6, tournament_size=10) p.gen_population() #--------------------------------------------------------------------------- # Run the GA #--------------------------------------------------------------------------- p.run(generations)
def main(): data_file = 'classifier/data/bcw/breast-cancer-wisconsin.data.txt' data = list() #--------------------------------------------------------------------------- # Load the data #--------------------------------------------------------------------------- with open(data_file, 'r') as f: for line in f: data_line = list() # Split the line but throw away first number (ID number) line = line.split(',')[1:] # Store class label # data_line['class'] = line[-1] # Store data if '?' not in line: for item in line[:-1]: data_line.append(normalise(float(item))) data_line.append(int(line[-1].rstrip())) data.append(data_line) num_genes = 40 gene_length = 18 # lower + upper bound for each 9 data points classifier = BcwClassifier(data, num_genes, gene_length) #--------------------------------------------------------------------------- # Generate the initial population #--------------------------------------------------------------------------- generations = classifier.generations p = Population(representation=classifier.representation, size=classifier.population_size, fitness_func=classifier.fitness_func, selection_func=classifier.selection_func, crossover_func=classifier.crossover_func, mutation_func=classifier.mutation_func, natural_fitness=True, crossover_probability=classifier.crossover_prob, mutation_probability=classifier.mutation_prob, elite_count=classifier.elite_count, tournament_size=classifier.tournament_size) p.gen_population() #--------------------------------------------------------------------------- # Fiddle the population (ugly hack alert) #--------------------------------------------------------------------------- for i, individual in enumerate(p.population): new_genes = list() average_sigmas = list() individual.average_sigmas = list() for genes in classifier.batch_gen(individual.genes, classifier.gene_length): g = Gene(genes)#[normalise(float(gene)) for gene in genes]) g.class_label = 2 if random.random() < 0.5 else 4 g.mutation_step_sizes = [0.015 for _ in xrange(classifier.gene_length)] new_genes.append(g) # Update info for plotter average_sigmas.append(sum(g.mutation_step_sizes) / len(g.mutation_step_sizes)) individual.genes = new_genes individual.average_sigmas.append(sum(average_sigmas) / len(average_sigmas)) # Add strategy parameters individual.strategy_params = {'mutation_step_size': 0.05} #--------------------------------------------------------------------------- # Run the GA #--------------------------------------------------------------------------- p.run(generations) #--------------------------------------------------------------------------- # Validate the population #--------------------------------------------------------------------------- print avg = 0 for individual in p: avg += classifier.fitness_func(individual.genes, validate=True) print 'min individual: %d/%dt, %d/%dv (len=%d, num genes=%d) %s' % \ (classifier.fitness_func(p.min_individual().genes), len(classifier.training_set), classifier.fitness_func(p.min_individual().genes, validate=True), len(classifier.validation_set), len(p.min_individual()), len(p.min_individual()) / classifier.gene_length, p.min_individual()) print 'mean validation fitness:', avg / len(p) print 'max individual: %d/%dt, %d/%dv (len=%d, num genes=%d) %s' % \ (classifier.fitness_func(p.max_individual().genes), len(classifier.training_set), classifier.fitness_func(p.max_individual().genes, validate=True), len(classifier.validation_set), len(p.max_individual()), len(p.max_individual()) / classifier.gene_length, p.max_individual()) data = list() for chunk in classifier.batch_gen(p.average_sigmas, p.size): data.append(sum(chunk) / len(chunk)) p.add_to_plot(data, 'average sigma') for gene in p.max_individual().genes: for pair in classifier.batch_gen(gene.alleles, 2): print pair print gene.class_label print p.show_plot()
def main(): """ Run the classification GA on the data file given on the the command line """ if len(sys.argv) != 2: sys.exit('usage: classifier.py datafile') data_file = sys.argv[1] with open(data_file, 'r') as f: # Read the first (informational) line info_line = f.readline().split() # Set length of variables + class gene_length = int(info_line[3]) + 1 # Derive the length of an individual genome_length = (int(info_line[0]) * gene_length) # Binary data (data1.txt) if genome_length == 192: data = [list(line.rstrip().replace(' ', '')) for line in f] classifier = BinaryClassifier(data, gene_length, genome_length) # Binary data (data2.txt) elif genome_length == 448: data = [list(line.rstrip().replace(' ', '')) for line in f] classifier = VariableLengthBinaryClassifier(data, gene_length, genome_length) # Real-valued data elif genome_length == 14000: data = [map(float, line.rstrip().split()) for line in f] # 2 floats (upper, lower) per "bit", plus class gene_length = (gene_length * 2) - 2 genome_length = (int(info_line[0]) * gene_length) classifier = RealValueClassifier(data, gene_length, genome_length) else: raise IOError('unknown data file format') print '[i] loaded data file:', data_file #--------------------------------------------------------------------------- # Generate the initial population #--------------------------------------------------------------------------- generations = classifier.generations p = Population(representation=classifier.representation, size=classifier.population_size, fitness_func=classifier.fitness_func, selection_func=classifier.selection_func, crossover_func=classifier.crossover_func, mutation_func=classifier.mutation_func, natural_fitness=True, crossover_probability=classifier.crossover_prob, mutation_probability=classifier.mutation_prob, elite_count=classifier.elite_count, tournament_size=classifier.tournament_size) p.gen_population() #--------------------------------------------------------------------------- # Fiddle the population (ugly hack alert) #--------------------------------------------------------------------------- step = classifier.gene_length if isinstance(classifier, VariableLengthBinaryClassifier): for individual in p: # Fix a 0 or 1 in the class position for i in xrange(step - 1, len(individual.genes), step): if individual.genes[i] == '#': individual.genes[i] = '1' if random.random() < 0.5 else '0' classifier.genome_lengths.append(len(individual.genes)) if isinstance(classifier, RealValueClassifier): for i, individual in enumerate(p.population): new_genes = list() average_sigmas = list() individual.average_sigmas = list() for genes in classifier.batch_gen(individual.genes, classifier.gene_length): g = Gene(genes) g.class_label = 1 if random.random() < 0.5 else 0 g.mutation_step_sizes = [0.05 for _ in xrange(classifier.gene_length)] new_genes.append(g) # Update info for plotter average_sigmas.append(sum(g.mutation_step_sizes) / len(g.mutation_step_sizes)) individual.genes = new_genes individual.average_sigmas.append(sum(average_sigmas) / len(average_sigmas)) # Add strategy parameters individual.strategy_params = {'mutation_step_size': 0.05} print '[i] fiddled population' # if hasattr(classifier, 'genome_lengths'): # p.add_to_plot([len(i) for i in p], 'avg genome length') #--------------------------------------------------------------------------- # Run the GA #--------------------------------------------------------------------------- p.run(generations) #--------------------------------------------------------------------------- # Validate the population #--------------------------------------------------------------------------- print avg = 0 for individual in p: avg += classifier.fitness_func(individual.genes, validate=True) print 'min individual: %d/%dt, %d/%dv (len=%d, num genes=%d) %s' % \ (classifier.fitness_func(p.min_individual().genes), len(classifier.training_set), classifier.fitness_func(p.min_individual().genes, validate=True), len(classifier.validation_set), len(p.min_individual()), len(p.min_individual()) / classifier.gene_length, p.min_individual()) print 'mean validation fitness:', avg / len(p) print 'max individual: %d/%dt, %d/%dv (len=%d, num genes=%d) %s' % \ (classifier.fitness_func(p.max_individual().genes), len(classifier.training_set), classifier.fitness_func(p.max_individual().genes, validate=True), len(classifier.validation_set), len(p.max_individual()), len(p.max_individual()) / classifier.gene_length, p.max_individual()) # TODO: plot amount of generalisation if isinstance(classifier, VariableLengthBinaryClassifier): data = list() for chunk in classifier.chunker(classifier.genome_lengths, p.size): data.append(sum(chunk) / len(chunk)) p.add_to_plot(data, 'avg genome length') for gene in classifier.chunker(p.max_individual().genes, classifier.gene_length): print gene[:-1], gene[-1] elif isinstance(classifier, RealValueClassifier): data = list() for chunk in chunker(p.average_sigmas, p.size): data.append(sum(chunk) / len(chunk)) p.add_to_plot(data, 'average sigma') for gene in p.max_individual().genes: for pair in chunker(gene.alleles, 2): print pair print gene.class_label print p.show_plot()