Ejemplo n.º 1
 def evolve_policy(self):
     """ Evolve a specialized policy using learned model. """
     pool = spawn(Genome.open(PREFIX + "model.net"), 50)
     feval = functions.Evaluator(self.model)
     self.org = max(eonn.optimize(pool, feval.call, 2500, verbose=False))
     self.org.evals = []
Ejemplo n.º 3
 def evolve_policy(self):
     """ Learn a model from flightdata and evolve specialized policies. """
     params = model.estimate_params(self.log.name)
     noise_std = model.estimate_std(self.log.name, params)
     heli = ghh.Helicopter(params, noise_std, 0.1)
     genome = Genome.open(PREFIX + 'baseline.net')
     self.org = functions.evolve(heli, genome, epochs=500)
def main():
    """ Main function. """
    pool = Pool.spawn(Genome.open(NN_STRUCTURE_FILE), 20, std=1)

    # Set evolutionary parameters
    eonn.samplesize = 5  # Sample size used for tournament selection
    eonn.keep = 5  # Nr. of organisms copied to the next generation (elitism)
    eonn.mutate_prob = 0.75  # Probability that offspring is being mutated
    eonn.mutate_frac = 0.2  # Fraction of genes that get mutated
    eonn.mutate_std = 0.1  # Std. dev. of mutation distribution (gaussian)
    eonn.mutate_repl = 0.25  # Probability that a gene gets replaced

    directory = "pics/" + ''.join(rand.sample(letters + digits, 5))
    # Evolve population
    for j in xrange(1, ROUNDS + 1):
        pool = eonn.optimize(pool, cliff, epochs=EPOCHS, evals=EVALS)
        print "AFTER EPOCH", j * EPOCHS
        print "average fitness %.1f" % pool.fitness
        champion = max(pool)
        print "champion fitness %.1f" % champion.fitness
        for i in xrange(10):
            cliff(champion.policy, verbose=True)
        plt.savefig(directory + "/" + str(j * EPOCHS) + ".png")
    with open(directory + '/best.net', 'w') as f:
        f.write('%s' % champion.genome)
    print "Done, everything saved in ", directory
Archivo: heli.py Proyecto: afcarl/ATAA
from eonn.genome import Genome
from eonn.organism import Pool
from helicopter.helicopter import Helicopter, XcellTempest

def hover(policy):
    """ Helicopter evaluation function. """
    state, sum_error = heli.reset()
    while not heli.terminal:
        action = policy.propagate(state, 1)
        state, error = heli.update(action)
        sum_error += error
    return 1 / math.log(sum_error)

if __name__ == '__main__':
    heli = Helicopter(XcellTempest.params, XcellTempest.noise_std)
    pool = Pool.spawn(Genome.open('baseline.net'), 20)
    # Set evolutionary parameters
    eonn.keep = 15
    eonn.mutate_prob = 0.9
    eonn.mutate_frac = 0.1
    eonn.mutate_std = 0.8
    eonn.mutate_repl = 0.15
    # Evolve population
    pool = eonn.optimize(pool, hover)
    champion = max(pool)
    # Print results
    print '\nerror:', math.exp(1 / hover(champion.policy))
    print '\ngenome:\n%s' % champion.genome
import sys
import random


import functions
from string import letters, digits
from eonn.genome import Genome
from eonn.organism import Organism
from helicopter import ghh, model
from helicopter.quaternion import quaternion_from_orientation

TOKENS = letters + digits
PREFIX = '/home/koppejan/helicopter/helicopter/ghh09/policies/'
POLICIES = [Genome.open(PREFIX + 'mdp%i.net' % i) for i in (3, 6)]

class Agent:
    """ Hybrid agent, a combination of model-free and model-based learning. """
    def __init__(self):
        """ Initialize agent. """
        self.episode = 0
        self.pool = [Organism(genome) for genome in POLICIES]
        self.backup = Organism(Genome.open(PREFIX + 'generic.net'))
        self.log = open('log_%s.txt' % ''.join(random.sample(TOKENS, 10)), 'w')

    def start(self):
        """ Start a new episode """
        self.episode += 1
        self.reward = 0
