def main(problem, job): global trainingInputs, trainingOutputs, testingInputs, testingOutputs random.seed(1000 + job) #read training data # training and testing file dir = '/home/pta/Dropbox/uci/regression/' # training and testing file train = dir + problem + ".training.in" test = dir + problem + ".testing.in" lines = open(train).readlines() trainingInputs = [] trainingOutputs = [] for line in lines[1:]: xs = line.split() trainingInputs.append([float(x) for x in xs[:-1]]) trainingOutputs.append(float(xs[-1])) #N = len(trainingOutputs) # read testing data lines = open(test).readlines() testingInputs = [] testingOutputs = [] for line in lines[1:]: xs = line.split() testingInputs.append([float(x) for x in xs[:-1]]) testingOutputs.append(float(xs[-1])) pop = toolbox.population(n=POPSIZE) hof = tools.HallOfFame(1) stats_fit = tools.Statistics(lambda ind: ind.fitness.values) stats_size = tools.Statistics(len) mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size) mstats.register("avg", numpy.mean) mstats.register("std", numpy.std) mstats.register("min", numpy.min) mstats.register("max", numpy.max) pop, log, best_fitness_each_gen, num_distinct_ind, avg_semantic_distance = algorithms.eaPTA(pop, toolbox, CROSSOVER_RATE, MUTATION_RATE, NUMGEN, stats=mstats, halloffame=hof, verbose=False) trainingError = evalSymbReg(hof[0])[0] testingError = describe(hof[0]) print 'job ', job, ': fitness=', trainingError, '; fittest=',testingError, '; size=',len(hof[0]) # print 'training error: ', trainingError # print 'testing error: ', testingError # print log return job, trainingError, testingError, len(hof[0]), best_fitness_each_gen, num_distinct_ind, avg_semantic_distance
def gp_run(pop): hof = tools.HallOfFame(1) stats_fit = tools.Statistics(lambda ind: ind.fitness.values) stats_size = tools.Statistics(len) mstats = tools.MultiStatistics(fitness=stats_fit, size=stats_size) mstats.register("avg", numpy.mean) mstats.register("std", numpy.std) mstats.register("min", numpy.min) mstats.register("max", numpy.max) pop, log, best_fitness_each_gen, num_distinct_ind, avg_semantic_distance = algorithms.eaPTA(pop, toolbox, CROSSOVER_RATE, MUTATION_RATE, NUMGEN, stats=mstats, halloffame=hof, verbose=False) trainingError = evalSymbReg(hof[0])[0] testingError = describe(hof[0]) print 'job ', job, ': fitness=', trainingError, '; fittest=',testingError, '; size=',len(hof[0]), '; distinct ind= ', num_distinct_ind # print 'training error: ', trainingError # print 'testing error: ', testingError # print log return job, trainingError, testingError, len(hof[0]), best_fitness_each_gen, num_distinct_ind, avg_semantic_distance