def coevo(): # Create a pool for the policies pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8) # Create a pool of z's, starting around [0.5,0.5], should probably be better z_list = [[x] for x in np.linspace(0, 0.5, 5)] genomes = BasicGenome.from_list(z_list, 5) org_list = [Organism(genome) for genome in genomes] z_pool = Pool(org_list) avg_fitness = [] champ_fitness = [] for i in xrange(150): pi_pool = eonn.epoch(pi_pool, len(pi_pool)) z_pool = eonn.epoch(z_pool, len(z_pool)) for pi_org, z_org in itertools.product(pi_pool, z_pool): reward = cliff(pi_org.genome, z=[z_org.weights[0]], verbose=False) pi_org.evals.append(reward) z_org.evals.append(reward) for org in z_pool: org.evals = [np.var(org.evals)] avg_fitness.append(pi_pool.fitness) champion = max(pi_pool) champ_fitness.append(champion.fitness) return avg_fitness, champ_fitness
def acquisition(GP, epochs): """ Select the best (pi,z)-pair to evaluate using GP and GA """ # Create a pool for the policies pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8) # Create a pool of z's, starting around [0.5,0.5], should probably be better z_list = list(itertools.product(np.arange(0, max_wind, 1. / 20))) genomes = BasicGenome.from_list(z_list, 20) org_list = [Organism(genome) for genome in genomes] z_pool = Pool(org_list) for _ in xrange(epochs): pi_pool, z_pool, x_predict, reward_predict, MSE = do_evolution( pi_pool, z_pool, GP) # get scores reward_predictGrid = np.reshape(reward_predict, (len(pi_pool), len(z_pool))) ub = 1.96 * np.sqrt(MSE) ub_predictGrid = np.reshape(ub, (len(pi_pool), len(z_pool))) pi_score = score_pi(reward_predictGrid, ub_predictGrid) z_score = score_z(reward_predictGrid, ub_predictGrid) # add scores to organisms add_pi_scores(pi_pool, x_predict, pi_score) add_z_scores(z_pool, x_predict, z_score) # return current best pi and z pi_org = max(pi_pool) z_org = max(z_pool) return pi_org, z_org