Example #1
0
def coevo():
    # Create a pool for the policies
    pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8)

    # Create a pool of z's, starting around [0.5,0.5], should probably be better
    z_list = [[x] for x in np.linspace(0, 0.5, 5)]

    genomes = BasicGenome.from_list(z_list, 5)
    org_list = [Organism(genome) for genome in genomes]
    z_pool = Pool(org_list)
    avg_fitness = []
    champ_fitness = []
    for i in xrange(150):
        pi_pool = eonn.epoch(pi_pool, len(pi_pool))
        z_pool = eonn.epoch(z_pool, len(z_pool))
        for pi_org, z_org in itertools.product(pi_pool, z_pool):
            reward = cliff(pi_org.genome, z=[z_org.weights[0]], verbose=False)
            pi_org.evals.append(reward)
            z_org.evals.append(reward)
        for org in z_pool:
            org.evals = [np.var(org.evals)]

        avg_fitness.append(pi_pool.fitness)
        champion = max(pi_pool)
        champ_fitness.append(champion.fitness)
    return avg_fitness, champ_fitness
Example #2
0
def acquisition(GP, epochs):
    """
		Select the best (pi,z)-pair to evaluate using GP and GA
	"""

    # Create a pool for the policies
    pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8)

    # Create a pool of z's, starting around [0.5,0.5], should probably be better
    z_list = list(itertools.product(np.arange(0, max_wind, 1. / 20)))

    genomes = BasicGenome.from_list(z_list, 20)
    org_list = [Organism(genome) for genome in genomes]
    z_pool = Pool(org_list)

    for _ in xrange(epochs):
        pi_pool, z_pool, x_predict, reward_predict, MSE = do_evolution(
            pi_pool, z_pool, GP)

        # get scores
        reward_predictGrid = np.reshape(reward_predict,
                                        (len(pi_pool), len(z_pool)))

        ub = 1.96 * np.sqrt(MSE)

        ub_predictGrid = np.reshape(ub, (len(pi_pool), len(z_pool)))

        pi_score = score_pi(reward_predictGrid, ub_predictGrid)
        z_score = score_z(reward_predictGrid, ub_predictGrid)

        # add scores to organisms

        add_pi_scores(pi_pool, x_predict, pi_score)
        add_z_scores(z_pool, x_predict, z_score)

    # return current best pi and z
    pi_org = max(pi_pool)
    z_org = max(z_pool)

    return pi_org, z_org