def __init__(self): """ Initialize agent. """ self.episode = 0 # self.org = Organism(Genome.open(PREFIX + 'generic.net')) self.org = Organism(Genome.open(random.choice(POLICIES))) self.pool = [self.org] self.log = open('%s.dat' % ''.join(random.sample(letters + digits, 10)), 'w')
def coevo(): # Create a pool for the policies pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8) # Create a pool of z's, starting around [0.5,0.5], should probably be better z_list = [[x] for x in np.linspace(0, 0.5, 5)] genomes = BasicGenome.from_list(z_list, 5) org_list = [Organism(genome) for genome in genomes] z_pool = Pool(org_list) avg_fitness = [] champ_fitness = [] for i in xrange(150): pi_pool = eonn.epoch(pi_pool, len(pi_pool)) z_pool = eonn.epoch(z_pool, len(z_pool)) for pi_org, z_org in itertools.product(pi_pool, z_pool): reward = cliff(pi_org.genome, z=[z_org.weights[0]], verbose=False) pi_org.evals.append(reward) z_org.evals.append(reward) for org in z_pool: org.evals = [np.var(org.evals)] avg_fitness.append(pi_pool.fitness) champion = max(pi_pool) champ_fitness.append(champion.fitness) return avg_fitness, champ_fitness
def acquisition(GP, epochs): """ Select the best (pi,z)-pair to evaluate using GP and GA """ # Create a pool for the policies pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8) # Create a pool of z's, starting around [0.5,0.5], should probably be better z_list = list(itertools.product(np.arange(0, max_wind, 1. / 20))) genomes = BasicGenome.from_list(z_list, 20) org_list = [Organism(genome) for genome in genomes] z_pool = Pool(org_list) for _ in xrange(epochs): pi_pool, z_pool, x_predict, reward_predict, MSE = do_evolution( pi_pool, z_pool, GP) # get scores reward_predictGrid = np.reshape(reward_predict, (len(pi_pool), len(z_pool))) ub = 1.96 * np.sqrt(MSE) ub_predictGrid = np.reshape(ub, (len(pi_pool), len(z_pool))) pi_score = score_pi(reward_predictGrid, ub_predictGrid) z_score = score_z(reward_predictGrid, ub_predictGrid) # add scores to organisms add_pi_scores(pi_pool, x_predict, pi_score) add_z_scores(z_pool, x_predict, z_score) # return current best pi and z pi_org = max(pi_pool) z_org = max(z_pool) return pi_org, z_org
def __init__(self): """ Initialize agent. """ self.episode = 0 self.org = Organism(Genome.open(PREFIX + 'generic.net')) self.log = open('log_%s.txt' % ''.join(random.sample(TOKENS, 10)), 'w')
# relative paths required dir = os.path.dirname(__file__) prev = os.path.dirname(dir) two = prev + '/site-packages/' sys.path.append(prev) sys.path.append(dir) sys.path.append(two) from eonn.genome import Genome from eonn.organism import Organism from apprentice import * import numpy as np import math POLICIES = [Genome.open('policies/mdp%i.net' % i) for i in range(10)] pol = Organism(POLICIES[2]) def normalise_vector(vector): return np.divide(vector, np.linalg.norm(vector)) def modify_mu(mu_apprentice, mu_modified, mu_expert): factor = np.dot(mu_apprentice[-1] - mu_modified[-1], mu_expert - mu_apprentice[-1]) / np.dot( mu_apprentice[-1] - mu_modified[-1], mu_apprentice[-1] - mu_modified[-1]) mu_new = mu_modified[-1] + factor * (mu_apprentice[-1] - mu_modified[-1]) mu_modified.append(mu_new)
def crash_control(self, state): if sum([normalize(v, SAFE_LIMITS[i]) for i, v in enumerate(state)]) > 0.15: self.crashed = True self.org.evals.append(100000) self.org = Organism(Genome.open(PREFIX + 'baseline.net'))
def __init__(self, policyNumber): """ Initialize expert. """ self.episode = 0 #self.org = [Genome.open('policies/mdp%i.net' %i)] self.pool = POLICIES[policyNumber] self.org = Organism(POLICIES[policyNumber])
def __init__(self): """ Initialize agent. """ self.episode = 0 self.pool = [Organism(genome) for genome in POLICIES] self.backup = Organism(Genome.open(PREFIX + 'generic.net')) self.log = open('log_%s.txt' % ''.join(random.sample(TOKENS, 10)), 'w')