예제 #1
0
  def __init__(self):
    """ Initialize agent. """
    self.episode = 0
#    self.org = Organism(Genome.open(PREFIX + 'generic.net'))
    self.org = Organism(Genome.open(random.choice(POLICIES)))
    self.pool = [self.org]
    self.log = open('%s.dat' % ''.join(random.sample(letters + digits, 10)), 'w')
예제 #2
0
def coevo():
    # Create a pool for the policies
    pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8)

    # Create a pool of z's, starting around [0.5,0.5], should probably be better
    z_list = [[x] for x in np.linspace(0, 0.5, 5)]

    genomes = BasicGenome.from_list(z_list, 5)
    org_list = [Organism(genome) for genome in genomes]
    z_pool = Pool(org_list)
    avg_fitness = []
    champ_fitness = []
    for i in xrange(150):
        pi_pool = eonn.epoch(pi_pool, len(pi_pool))
        z_pool = eonn.epoch(z_pool, len(z_pool))
        for pi_org, z_org in itertools.product(pi_pool, z_pool):
            reward = cliff(pi_org.genome, z=[z_org.weights[0]], verbose=False)
            pi_org.evals.append(reward)
            z_org.evals.append(reward)
        for org in z_pool:
            org.evals = [np.var(org.evals)]

        avg_fitness.append(pi_pool.fitness)
        champion = max(pi_pool)
        champ_fitness.append(champion.fitness)
    return avg_fitness, champ_fitness
예제 #3
0
def acquisition(GP, epochs):
    """
		Select the best (pi,z)-pair to evaluate using GP and GA
	"""

    # Create a pool for the policies
    pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8)

    # Create a pool of z's, starting around [0.5,0.5], should probably be better
    z_list = list(itertools.product(np.arange(0, max_wind, 1. / 20)))

    genomes = BasicGenome.from_list(z_list, 20)
    org_list = [Organism(genome) for genome in genomes]
    z_pool = Pool(org_list)

    for _ in xrange(epochs):
        pi_pool, z_pool, x_predict, reward_predict, MSE = do_evolution(
            pi_pool, z_pool, GP)

        # get scores
        reward_predictGrid = np.reshape(reward_predict,
                                        (len(pi_pool), len(z_pool)))

        ub = 1.96 * np.sqrt(MSE)

        ub_predictGrid = np.reshape(ub, (len(pi_pool), len(z_pool)))

        pi_score = score_pi(reward_predictGrid, ub_predictGrid)
        z_score = score_z(reward_predictGrid, ub_predictGrid)

        # add scores to organisms

        add_pi_scores(pi_pool, x_predict, pi_score)
        add_z_scores(z_pool, x_predict, z_score)

    # return current best pi and z
    pi_org = max(pi_pool)
    z_org = max(z_pool)

    return pi_org, z_org
예제 #4
0
 def __init__(self):
     """ Initialize agent. """
     self.episode = 0
     self.org = Organism(Genome.open(PREFIX + 'generic.net'))
     self.log = open('log_%s.txt' % ''.join(random.sample(TOKENS, 10)), 'w')
예제 #5
0
# relative paths required
dir = os.path.dirname(__file__)
prev = os.path.dirname(dir)

two = prev + '/site-packages/'
sys.path.append(prev)
sys.path.append(dir)
sys.path.append(two)
from eonn.genome import Genome
from eonn.organism import Organism
from apprentice import *
import numpy as np
import math

POLICIES = [Genome.open('policies/mdp%i.net' % i) for i in range(10)]
pol = Organism(POLICIES[2])


def normalise_vector(vector):
    return np.divide(vector, np.linalg.norm(vector))


def modify_mu(mu_apprentice, mu_modified, mu_expert):
    factor = np.dot(mu_apprentice[-1] - mu_modified[-1],
                    mu_expert - mu_apprentice[-1]) / np.dot(
                        mu_apprentice[-1] - mu_modified[-1],
                        mu_apprentice[-1] - mu_modified[-1])
    mu_new = mu_modified[-1] + factor * (mu_apprentice[-1] - mu_modified[-1])
    mu_modified.append(mu_new)

예제 #6
0
 def crash_control(self, state):
   if sum([normalize(v, SAFE_LIMITS[i]) for i, v in enumerate(state)]) > 0.15:
     self.crashed = True
     self.org.evals.append(100000)
     self.org = Organism(Genome.open(PREFIX + 'baseline.net'))
예제 #7
0
 def __init__(self, policyNumber):
     """ Initialize expert. """
     self.episode = 0
     #self.org = [Genome.open('policies/mdp%i.net' %i)]
     self.pool = POLICIES[policyNumber]
     self.org = Organism(POLICIES[policyNumber])
예제 #8
0
 def __init__(self):
     """ Initialize agent. """
     self.episode = 0
     self.pool = [Organism(genome) for genome in POLICIES]
     self.backup = Organism(Genome.open(PREFIX + 'generic.net'))
     self.log = open('log_%s.txt' % ''.join(random.sample(TOKENS, 10)), 'w')