def evolve_policy(self): """ Evolve a specialized policy using learned model. """ pool = spawn(Genome.open(PREFIX + "model.net"), 50) feval = functions.Evaluator(self.model) self.org = max(eonn.optimize(pool, feval.call, 2500, verbose=False)) self.org.evals = [] self.pool.append(self.org)
def evolve_policy(self): """ Evolve a specialized policy using learned model. """ pool = spawn(Genome.open(PREFIX + 'model.net'), 50) feval = functions.Evaluator(self.model) self.org = max(eonn.optimize(pool, feval.call, 2500, verbose=False)) self.org.evals = [] self.pool.append(self.org)
def evolve_policy(self): """ Learn a model from flightdata and evolve specialized policies. """ params = model.estimate_params(self.log.name) noise_std = model.estimate_std(self.log.name, params) heli = ghh.Helicopter(params, noise_std, 0.1) genome = Genome.open(PREFIX + 'baseline.net') self.org = functions.evolve(heli, genome, epochs=500)
def main(): """ Main function. """ pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=1) # Set evolutionary parameters eonn.samplesize = 5 # Sample size used for tournament selection eonn.keep = 5 # Nr. of organisms copied to the next generation (elitism) eonn.mutate_prob = 0.75 # Probability that offspring is being mutated eonn.mutate_frac = 0.2 # Fraction of genes that get mutated eonn.mutate_std = 0.1 # Std. dev. of mutation distribution (gaussian) eonn.mutate_repl = 0.25 # Probability that a gene gets replaced directory = "pics/" + ''.join(rand.sample(letters + digits, 5)) os.makedirs(directory) # Evolve population for j in xrange(1, ROUNDS + 1): pool = eonn.optimize(pool, cliff, epochs=EPOCHS, evals=EVALS) print "AFTER EPOCH", j * EPOCHS print "average fitness %.1f" % pool.fitness champion = max(pool) print "champion fitness %.1f" % champion.fitness for i in xrange(10): cliff(champion.policy, verbose=True) plt.savefig(directory + "/" + str(j * EPOCHS) + ".png") plt.clf() with open(directory + '/best.net', 'w') as f: f.write('%s' % champion.genome) print "Done, everything saved in ", directory
def __init__(self): """ Initialize agent. """ self.episode = 0 # self.org = Organism(Genome.open(PREFIX + 'generic.net')) self.org = Organism(Genome.open(random.choice(POLICIES))) self.pool = [self.org] self.log = open('%s.dat' % ''.join(random.sample(letters + digits, 10)), 'w')
def initGP(): """Do simulations with random pi,z and create GP, X, y""" poolsize = 68 pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), poolsize, std=10) X = [] for i, org in enumerate(pool): org.mutate() genome = org.genome w = genome.weights z = [np.random.uniform(0, 0.3)] reward = cliff(genome, z) while reward <= 0 and len(X) < poolsize / 2: #Train input policies to reach the goal. org.mutate() genome = org.genome w = genome.weights reward = cliff(genome, z) if not len(X): X = np.atleast_2d(w + z) y = np.atleast_2d([reward]) else: X = np.append(X, [w + z], axis=0) y = np.append(y, [reward]) # Initialize GP with kernel parameters. GP = GaussianProcess(theta0=0.1, thetaL=.001, thetaU=1.) GP.fit(X, y) return GP, X, y
def coevo(): # Create a pool for the policies pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8) # Create a pool of z's, starting around [0.5,0.5], should probably be better z_list = [[x] for x in np.linspace(0, 0.5, 5)] genomes = BasicGenome.from_list(z_list, 5) org_list = [Organism(genome) for genome in genomes] z_pool = Pool(org_list) avg_fitness = [] champ_fitness = [] for i in xrange(150): pi_pool = eonn.epoch(pi_pool, len(pi_pool)) z_pool = eonn.epoch(z_pool, len(z_pool)) for pi_org, z_org in itertools.product(pi_pool, z_pool): reward = cliff(pi_org.genome, z=[z_org.weights[0]], verbose=False) pi_org.evals.append(reward) z_org.evals.append(reward) for org in z_pool: org.evals = [np.var(org.evals)] avg_fitness.append(pi_pool.fitness) champion = max(pi_pool) champ_fitness.append(champion.fitness) return avg_fitness, champ_fitness
def __init__(self): """ Initialize agent. """ self.episode = 0 # self.org = Organism(Genome.open(PREFIX + 'generic.net')) self.org = Organism(Genome.open(random.choice(POLICIES))) self.pool = [self.org] self.log = open("%s.dat" % "".join(random.sample(letters + digits, 10)), "w")
def evolve_policy(self, n=1): """ Learn a model from flightdata and evolve specialized policies. """ params = model.estimate_params(self.log.name) noise_std = model.estimate_std(self.log.name, params) heli = ghh.Helicopter(params, noise_std, 0.1) genome = Genome.open(PREFIX + 'baseline.net') for i in range(n): champion = functions.evolve(heli, genome, epochs=500) champion.evals = list() self.pool.append(champion)
def learn_genomeIRL(self): #input the reward function pool = Pool.spawn(Genome.open('policies/generic.net'), 20) # Set evolutionary parameters eonnIRL.keep = 15 ; eonnIRL.mutate_prob = 0.4 ; eonnIRL.mutate_frac = 0.1;eonnIRL.mutate_std = 0.8;eonnIRL.mutate_repl = 0.15 # Evolve population pool = eonnIRL.optimize(pool, self.percieved_eval,400) # These are imported functions from EONNIRL champion = max(pool) # Print results print '\nerror:', math.exp(1 / self.percieved_eval(champion.policy)) #print '\ngenome:\n%s' % champion.genome return champion.policy
def learnGenomeIRL(theta): #input the reward function pool = Pool.spawn(Genome.open('policies/generic.net'), 20) # Set evolutionary parameters eonnIRL.keep = 15 eonnIRL.mutate_prob = 0.4 eonnIRL.mutate_frac = 0.1 eonnIRL.mutate_std = 0.8 eonnIRL.mutate_repl = 0.15 # Evolve population pool = eonnIRL.optimize(pool, hoverIRL, theta) champion = max(pool) # Print results print '\nerror:', math.exp(1 / hover(champion.policy)) print '\nerror:', math.exp(1 / hoverIRL(champion.policy, theta)) print '\ngenome:\n%s' % champion.genome
def learnGenomeIRL(theta): #input the reward function pool = Pool.spawn(Genome.open('policies/generic.net'), 20) # Set evolutionary parameters eonnIRL.keep = 15 eonnIRL.mutate_prob = 0.4 eonnIRL.mutate_frac = 0.1 eonnIRL.mutate_std = 0.8 eonnIRL.mutate_repl = 0.15 # Evolve population pool = eonnIRL.optimize(pool, hoverIRL,theta) champion = max(pool) # Print results print '\nerror:', math.exp(1 / hover(champion.policy)) print '\nerror:', math.exp(1 / hoverIRL(champion.policy,theta)) print '\ngenome:\n%s' % champion.genome
def main(): """ Main function. """ pool = Pool.spawn(Genome.open('mc.net'), 20, std=5.0) # Set evolutionary parameters eonn.keep = 5 eonn.mutate_prob = 0.9 eonn.mutate_frac = 0.2 eonn.mutate_std = 8.0 eonn.mutate_repl = 0.1 # Evolve population pool = eonn.optimize(pool, mc) champion = max(pool) # Print results print '\ntrace:' mc(champion.policy, verbose=True) print '\ngenome:\n%s' % champion.genome
def find_best(GP, epochs=100): """ Find the best policy in the GP """ pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 50, std=8) all_z = list(np.linspace(0, max_wind, 10)) for n in xrange(epochs): if n != 0: pool = eonn.epoch(pool, len(pool)) weights = [np.append(org.weights, z) for org in pool for z in all_z] reward = GP.predict(weights) for i in xrange(len(pool)): pool[i].evals = list(reward[i * len(all_z):(i + 1) * len(all_z)]) champion = max(pool) return champion
def main(): """ Main function. """ pool = Pool.spawn(Genome.open('mc.net'), 20, std=5.0) # Set evolutionary parameters eonn.KEEP = 5 eonn.MUTATE_PROB = 0.9 eonn.MUTATE_FRAC = 0.2 eonn.MUTATE_STD = 8.0 eonn.MUTATE_REPL = 0.1 # Evolve population pool = eonn.optimize(pool, mc) champion = max(pool) # Print results print '\ntrace:' mc(champion.policy, verbose=True) print '\ngenome:\n%s' % champion.genome
def find_best_upper(GP, epochs=100): """ Find policy with highest upperbound """ pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 50, std=8) all_z = list(np.linspace(0, max_wind, 10)) for n in xrange(epochs): if n != 0: pool = eonn.epoch(pool, len(pool)) weights = [np.append(org.weights, z) for org in pool for z in all_z] reward, MSE = GP.predict(weights, eval_MSE=True) reward += 1.96 * np.sqrt(MSE) for i in xrange(len(pool)): pool[i].evals = list(reward[i * len(all_z):(i + 1) * len(all_z)]) champion = max(pool) return champion
def readPolicies(): """ Reads in policies from relative path littleWindPath and muchWindPath """ policies = [] for ptype in [ regularPolicies, rarePolicies, GPSmartPolicies, GPExtremePolicies ]: typePolicies = [] for policy in os.walk(ptype).next()[1]: org = Network(Genome.open(ptype + "/" + policy + "/best.net")) typePolicies.append(org) policies.append(typePolicies) return policies
def acquisition(GP, epochs): """ Select the best (pi,z)-pair to evaluate using GP and GA """ # Create a pool for the policies pi_pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=8) # Create a pool of z's, starting around [0.5,0.5], should probably be better z_list = list(itertools.product(np.arange(0, max_wind, 1. / 20))) genomes = BasicGenome.from_list(z_list, 20) org_list = [Organism(genome) for genome in genomes] z_pool = Pool(org_list) for _ in xrange(epochs): pi_pool, z_pool, x_predict, reward_predict, MSE = do_evolution( pi_pool, z_pool, GP) # get scores reward_predictGrid = np.reshape(reward_predict, (len(pi_pool), len(z_pool))) ub = 1.96 * np.sqrt(MSE) ub_predictGrid = np.reshape(ub, (len(pi_pool), len(z_pool))) pi_score = score_pi(reward_predictGrid, ub_predictGrid) z_score = score_z(reward_predictGrid, ub_predictGrid) # add scores to organisms add_pi_scores(pi_pool, x_predict, pi_score) add_z_scores(z_pool, x_predict, z_score) # return current best pi and z pi_org = max(pi_pool) z_org = max(z_pool) return pi_org, z_org
from eonn.organism import Pool def xor(policy, verbose=False): """ XOR evaluation function. """ err = 0.0 input = [(i, j) for i in range(2) for j in range(2)] for i in input: output = policy.propagate(i, 1); err += (output[0] - (i[0] ^ i[1]))**2 if verbose: print i, '-> %.4f' % output[0] return 1.0 / err if __name__ == '__main__': pool = Pool.spawn(Genome.open('xor.net'), 30) # Set evolutionary parameters eonn.keep = 1 eonn.mutate_prob = 0.9 eonn.mutate_frac = 0.25 eonn.mutate_std = 0.8 eonn.mutate_repl = 0.2 # Evolve population pool = eonn.optimize(pool, xor) champion = max(pool) # Print results print '\noutput:' xor(champion.policy, True) print '\ngenome:\n%s' % champion.genome
from eonn.genome import Genome from eonn.organism import Pool from helicopter.helicopter import Helicopter, XcellTempest def hover(policy): """ Helicopter evaluation function. """ state, sum_error = heli.reset() while not heli.terminal: action = policy.propagate(state, 1) state, error = heli.update(action) sum_error += error return 1 / math.log(sum_error) if __name__ == '__main__': heli = Helicopter(XcellTempest.params, XcellTempest.noise_std) pool = Pool.spawn(Genome.open('baseline.net'), 20) # Set evolutionary parameters eonn.keep = 15 eonn.mutate_prob = 0.9 eonn.mutate_frac = 0.1 eonn.mutate_std = 0.8 eonn.mutate_repl = 0.15 # Evolve population pool = eonn.optimize(pool, hover) champion = max(pool) # Print results print '\nerror:', math.exp(1 / hover(champion.policy)) print '\ngenome:\n%s' % champion.genome
import sys import random sys.path.append('/home/koppejan/projects/helicopter') import functions from string import letters, digits from eonn.genome import Genome from eonn.organism import Organism from helicopter import ghh, model from helicopter.quaternion import quaternion_from_orientation TOKENS = letters + digits PREFIX = '/home/koppejan/helicopter/helicopter/ghh09/policies/' POLICIES = [Genome.open(PREFIX + 'mdp%i.net' % i) for i in (3, 6)] class Agent: """ Hybrid agent, a combination of model-free and model-based learning. """ def __init__(self): """ Initialize agent. """ self.episode = 0 self.pool = [Organism(genome) for genome in POLICIES] self.backup = Organism(Genome.open(PREFIX + 'generic.net')) self.log = open('log_%s.txt' % ''.join(random.sample(TOKENS, 10)), 'w') def start(self): """ Start a new episode """ self.set_policy() self.episode += 1 self.reward = 0
def __init__(self): """ Initialize agent. """ self.episode = 0 self.org = Organism(Genome.open(PREFIX + 'generic.net')) self.log = open('log_%s.txt' % ''.join(random.sample(TOKENS, 10)), 'w')
from eonn.organism import Pool def xor(policy, verbose=False): """ XOR evaluation function. """ err = 0.0 input = [(i, j) for i in range(2) for j in range(2)] for i in input: output = policy.propagate(i, 1); err += (output[0] - (i[0] ^ i[1]))**2 if verbose: print i, '-> %.4f (%d)' % (output[0], round(output[0])) return 1.0 / err if __name__ == '__main__': pool = Pool.spawn(Genome.open('xor.net'), 30) # Set evolutionary parameters eonn.KEEP = 1 eonn.MUTATE_PROB = 0.9 eonn.MUTATE_FRAC = 0.25 eonn.MUTATE_STD = 0.8 eonn.MUTATE_REPL = 0.2 # Evolve population pool = eonn.optimize(pool, xor) champion = max(pool) # Print results print '\noutput:' xor(champion.policy, True) print '\ngenome:\n%s' % champion.genome
def __init__(self): """ Initialize agent. """ self.episode = 0 self.pool = [Organism(genome) for genome in POLICIES] self.backup = Organism(Genome.open(PREFIX + 'generic.net')) self.log = open('log_%s.txt' % ''.join(random.sample(TOKENS, 10)), 'w')
from eonn.genome import Genome from eonn.organism import Organism POLICIES = [Genome.open('policies/mdp%i.net' % i) for i in range(10)] class Expert: """ Model-Free agent, chooses best policy from a set of pre-trained policies. """ def __init__(self,policyNumber): """ Initialize expert. """ self.episode = 0 #self.org = [Genome.open('policies/mdp%i.net' %i)] self.pool = POLICIES[policyNumber] self.org = Organism(POLICIES[policyNumber]) def start(self): """ Start a new episode """ self.reward = 0 self.steps = 0 def step(self, state, reward): """ Choose an action based on the current state. """ self.steps += 1 self.reward += reward # DEBUG action = self.org.policy.propagate(state, 1) return action def end(self, reward):
def crash_control(self, state): if sum([normalize(v, SAFE_LIMITS[i]) for i, v in enumerate(state)]) > 0.15: self.crashed = True self.org.evals.append(100000) self.org = Organism(Genome.open(PREFIX + "baseline.net"))
def crash_control(self, state): if sum([normalize(v, SAFE_LIMITS[i]) for i, v in enumerate(state)]) > 0.15: self.crashed = True self.org.evals.append(100000) self.org = Organism(Genome.open(PREFIX + 'baseline.net'))
import sys, os # relative paths required dir = os.path.dirname(__file__) prev = os.path.dirname(dir) two = prev + '/site-packages/' sys.path.append(prev) sys.path.append(dir) sys.path.append(two) from eonn.genome import Genome from eonn.organism import Organism from apprentice import * import numpy as np import math POLICIES = [Genome.open('policies/mdp%i.net' % i) for i in range(10)] pol = Organism(POLICIES[2]) def normalise_vector(vector): return np.divide(vector, np.linalg.norm(vector)) def modify_mu(mu_apprentice, mu_modified, mu_expert): factor = np.dot(mu_apprentice[-1] - mu_modified[-1], mu_expert - mu_apprentice[-1]) / np.dot( mu_apprentice[-1] - mu_modified[-1], mu_apprentice[-1] - mu_modified[-1]) mu_new = mu_modified[-1] + factor * (mu_apprentice[-1] - mu_modified[-1]) mu_modified.append(mu_new)
from eonn.genome import Genome from eonn.organism import Organism #POLICIES = [Genome.open('policies/mdp%i.net' % i) for i in range(10)] POLICIES = [Genome.open('policies/mdp0.net')] class Agent: """ Model-Free agent, chooses best policy from a set of pre-trained policies. """ def __init__(self): """ Initialize agent. """ self.episode = 0 self.pool = [Organism(genome) for genome in POLICIES] def start(self): """ Start a new episode """ self.set_policy() self.episode += 1 self.reward = 0 self.steps = 0 def step(self, state, reward): """ Choose an action based on the current state. """ self.steps += 1 self.reward += reward # DEBUG action = self.org.policy.propagate(state, 1) if self.steps==1: print '%+.10f ' * 12 % tuple(state), print '%+.10f ' * 4 % tuple(action)