Esempio n. 1
0
    def train(self, episodes, maxSteps):

        avgReward = 0

        # set up environment and task
        self.env = InfoMaxEnv(self.objectNames, self.actionNames,
                              self.numCategories)
        self.task = InfoMaxTask(self.env, maxSteps=maxSteps, \
           do_decay_beliefs = True, uniformInitialBeliefs = True)

        # create neural net and learning agent
        self.params = buildNetwork(self.task.outdim, self.task.indim, \
            bias=True, outclass=SoftmaxLayer)

        if self._PGPE:
            self.agent = OptimizationAgent(self.params,
                                           PGPE(minimize=False, verbose=False))
        elif self._CMAES:
            self.agent = OptimizationAgent(
                self.params, CMAES(minimize=False, verbose=False))

        # init and perform experiment
        exp = EpisodicExperiment(self.task, self.agent)

        for i in range(episodes):
            exp.doEpisodes(1)
            avgReward += self.task.getTotalReward()
            print "reward episode ", i, self.task.getTotalReward()

        # print initial info
        print "\naverage reward over training = ", avgReward / episodes

        # save trained network
        self._saveWeights()
Esempio n. 2
0
env = None
for runs in range(numbExp):
    # create environment
    #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
    if env != None: env.closeSocket()
    env = ShipSteeringEnvironment()
    # create task
    task = GoNorthwardTask(env, maxsteps=500)
    # create controller network
    net = buildNetwork(task.outdim, task.indim, outclass=TanhLayer)
    # create agent with controller and learner (and its options)
    agent = OptimizationAgent(
        net,
        PGPE(learningRate=0.3,
             sigmaLearningRate=0.15,
             momentum=0.0,
             epsilon=2.0,
             rprop=False,
             storeAllEvaluations=True))
    et.agent = agent
    #create experiment
    experiment = EpisodicExperiment(task, agent)

    #Do the experiment
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch)
        et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
    et.addExps()
et.showExps()
#To view what the simulation is doing at the moment set the environment with True, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)
Esempio n. 3
0
from pybrain.optimization import ExactNES
from pybrain.rl.experiments import EpisodicExperiment

batch = 2  #number of samples per learning step
prnts = 100  #number of learning steps after results are printed
epis = 4000 / batch / prnts  #number of roleouts
numbExp = 10  #number of experiments
et = ExTools(batch, prnts)  #tool for printing and plotting

for runs in range(numbExp):
    # create environment
    env = CartPoleEnvironment()
    # create task
    task = BalanceTask(env, 200, desiredValue=None)
    # create controller network
    net = buildNetwork(4, 1, bias=False)
    # create agent with controller and learner (and its options)
    agent = OptimizationAgent(net, ExactNES(storeAllEvaluations=True))
    et.agent = agent
    # create the experiment
    experiment = EpisodicExperiment(task, agent)

    #Do the experiment
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch)
        print "Epsilon   : ", agent.learner.sigma
        et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
    et.addExps()
et.showExps()
Esempio n. 4
0
hiddenUnits = 4
batch=2 #number of samples per learning step
prnts=1 #number of learning steps after results are printed
epis=5000000/batch/prnts #number of roleouts
numbExp=10 #number of experiments
et = ExTools(batch, prnts) #tool for printing and plotting

for runs in range(numbExp):
    # create environment
    #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
    env = FlexCubeEnvironment()
    # create task
    task = WalkTask(env)
    # create controller network
    net = buildNetwork(len(task.getObservation()), hiddenUnits, env.actLen, outclass=TanhLayer)    
    # create agent with controller and learner (and its options)
    agent = OptimizationAgent(net, SimpleSPSA(storeAllEvaluations = True))
    et.agent = agent
     # create the experiment
    experiment = EpisodicExperiment(task, agent)

    #Do the experiment
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch)
        et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
    et.addExps()
et.showExps()
#To view what the simulation is doing at the moment, go to pybrain/rl/environments/flexcube/ and start renderer.py (python-openGL musst be installed)
# -*- coding: utf-8 -*-
"""
Created on Thu Mar  7 20:41:24 2013

Just to try and get things working with a GA


@author: david
"""

from pybrain.tools.shortcuts import buildNetwork
from pybrain.structure.parametercontainer import ParameterContainer
from pybrain.rl.environments.functions.unimodal import TabletFunction
from pybrain.rl.environments.cartpole.balancetask import BalanceTask, CartPoleEnvironment
from pybrain.optimization import GA
from pybrain.rl.agents import LearningAgent, OptimizationAgent

environment = CartPoleEnvironment()
task = BalanceTask()

nn = buildNetwork(task.outdim, 6, task.indim)

learning_agent = OptimizationAgent(nn, GA())
    rospy.loginfo('Using %d CPUs for experiments', num_cpus)

    exp_desciptions = []

    for i in range(num_experiments):
        # set up environment, task, neural net, agent, and experiment
        env = InfoMaxEnv(object_names, action_names, num_objects, False)
        task = InfoMaxTask(env, max_steps=max_steps)
        net = buildNetwork(task.outdim,
                           task.indim,
                           bias=True,
                           outclass=SoftmaxLayer)

        if algorithm == 'pgpe':
            agent = OptimizationAgent(
                net,
                PGPE(storeAllEvaluations=True, minimize=False, verbose=False))
        elif algorithm == 'cmaes':
            agent = OptimizationAgent(net, CMAES(minimize=False,
                                                 verbose=False))

        experiment = EpisodicExperiment(task, agent)

        exp_desciptions.append([i, agent, experiment, task])

    pool = Pool(processes=num_cpus)

    res = []
    if algorithm == 'pgpe':
        res = pool.map(run_experiment, exp_desciptions)
    elif algorithm == 'cmaes':
Esempio n. 7
0
def main():
    if len(sys.argv) != 2:
        print 'Please provide a path to a model data directory.'
        print ('The script will load the newest model data from the directory,'
               'then continue to improve that model')
        sys.exit(0)

    model_directory = sys.argv[1]
    existing_models = sorted(glob(os.path.join(model_directory, '*.rlmdl')))

    if existing_models:
        newest_model_name = existing_models[-1]
        iteration_count = int(newest_model_name[-12:-6]) + 1
        print 'Loading model {}'.format(newest_model_name)

        newest_model = open(newest_model_name, 'r')
        agent = pickle.load(newest_model)
    else:
        net = buildNetwork(Environment.outdim,
                           Environment.outdim + Environment.indim,
                           Environment.indim)
        agent = OptimizationAgent(net, PGPE())
        iteration_count = 1

    environment = Environment(LOCAL_HOST, PORT, PATH_TO_SCENE)
    task = Task(environment)


    experiment = EpisodicExperiment(task, agent)


    def signal_handler(signal, frame):
        print 'Exiting gracefully'
        environment.teardown()
        sys.exit(0)

    signal.signal(signal.SIGINT, signal_handler)


    while True:
        time.sleep(1)

        print '>>>>> Running iteration {}'.format(iteration_count)
        # NOTE this weird stuff is hacky, but we need it to plug in our autosave
        # stuff properly. Took a long time to figure this out.
        experiment.optimizer.maxEvaluations = experiment.optimizer.numEvaluations + experiment.optimizer.batchSize

        try:
            experiment.doEpisodes()
        except Exception as e:
            print 'ERROR RUNNING SIMULATION: \n{}'.format(e)
            environment.teardown()
        else:
            if iteration_count % AUTOSAVE_INTERVAL == 0:
                filename = str(iteration_count).zfill(6) + '.rlmdl'
                filename = os.path.join(model_directory, filename)
                f = open(filename, 'w+')
                print 'Saving model to {}'.format(filename)

                pickle.dump(agent, f)

            iteration_count += 1

        print 'Iteration finished <<<<<'
Esempio n. 8
0
from pybrain.rl.experiments import EpisodicExperiment

# any episodic task
task = BalanceTask()

# any neural network controller
net = buildNetwork(task.outdim, 1, task.indim)

# any optimization algorithm to be plugged in, for example:
# learner = CMAES(storeAllEvaluations = True)
# or:
learner = HillClimber(storeAllEvaluations=True)

# in a non-optimization case the agent would be a LearningAgent:
# agent = LearningAgent(net, ENAC())
# here it is an OptimizationAgent:
agent = OptimizationAgent(net, learner)

# the agent and task are linked in an Experiment
# and everything else happens under the hood.
exp = EpisodicExperiment(task, agent)
exp.doEpisodes(100)

print('Episodes learned from:', len(learner._allEvaluations))
n, fit = learner._bestFound()
print('Best fitness found:', fit)
print('with this network:')
print(n)
print('containing these parameters:')
print(fListToString(n.params, 4))
Esempio n. 9
0
numbExp = 10  #number of experiments
et = ExTools(batch, prnts)  #tool for printing and plotting

env = None
for runs in range(numbExp):
    # create environment
    #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
    if env != None: env.closeSocket()
    env = JohnnieEnvironment()
    # create task
    task = StandingTask(env)
    # create controller network
    net = buildNetwork(len(task.getObservation()),
                       hiddenUnits,
                       env.actLen,
                       outclass=TanhLayer)
    # create agent with controller and learner (and its options)
    agent = OptimizationAgent(net, PGPE(storeAllEvaluations=True))
    et.agent = agent
    # create the experiment
    experiment = EpisodicExperiment(task, agent)

    #Do the experiment
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch)
        et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
    et.addExps()
et.showExps()
#To view what the simulation is doing at the moment, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)
Esempio n. 10
0
from pybrain.rl.experiments import EpisodicExperiment

batch=2 #number of samples per learning step
prnts=100 #number of learning steps after results are printed
epis=int(4000/batch/prnts) #number of roleouts
numbExp=40 #number of experiments
et = ExTools(batch, prnts) #tool for printing and plotting
expList = ["PGPE(storeAllEvaluations = True)", "ExactNES(storeAllEvaluations = True)", "FEM(storeAllEvaluations = True)", "CMAES(storeAllEvaluations = True)"]
for e in expList:
    for runs in range(numbExp):
        # create environment
        env = CartPoleEnvironment()    
        # create task
        task = BalanceTask(env, 200, desiredValue=None)
        # create controller network
        net = buildNetwork(4, 1, bias=False)
        # create agent with controller and learner (and its options)
        agent = OptimizationAgent(net, eval(e))
        et.agent = agent
        # create the experiment
        experiment = EpisodicExperiment(task, agent)

        #Do the experiment
        for updates in range(epis):
            for i in range(prnts):
                experiment.doEpisodes(batch)
            et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
        et.addExps()
    et.nextExps()
et.showExps()
Esempio n. 11
0
from pybrain.rl.environments.cartpole.balancetask import BalanceTask
from pybrain.tools.shortcuts import buildNetwork
from pybrain.rl.agents import OptimizationAgent
from pybrain.rl.experiments import EpisodicExperiment
from pybrain.optimization import HillClimber

task = BalanceTask()
net = buildNetwork(task.outdim, 3, task.indim)
HillClimber(task, net, maxEvaluations=100).learn()
agent = OptimizationAgent(net, HillClimber())
exp = EpisodicExperiment(task, agent)
print(exp.doEpisodes(100))
Esempio n. 12
0
from pybrain.rl.agents import OptimizationAgent
from pybrain.optimization import CMAES
from pybrain.rl.experiments import EpisodicExperiment

batch = 2  #number of samples per learning step
prnts = 100  #number of learning steps after results are printed
epis = int(4000 / batch / prnts)  #number of roleouts
numbExp = 10  #number of experiments
et = ExTools(batch, prnts)  #tool for printing and plotting

for runs in range(numbExp):
    # create environment
    env = CartPoleEnvironment()
    # create task
    task = BalanceTask(env, 200, desiredValue=None)
    # create controller network
    net = buildNetwork(4, 1, bias=False)
    # create agent with controller and learner (and its options)
    agent = OptimizationAgent(net, CMAES(storeAllEvaluations=True))
    et.agent = agent
    # create the experiment
    experiment = EpisodicExperiment(task, agent)

    #Do the experiment
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch)
        et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
    et.addExps()
et.showExps()
Esempio n. 13
0
def run_experiment():
    # Create the controller network
    HIDDEN_NODES = 4

    RUNS = 2
    BATCHES = 1
    PRINTS = 1
    EPISODES = 500

    env = None
    start_state_net = None

    run_results = []

    # Set up plotting tools for the experiments
    tools = ExTools(BATCHES, PRINTS)

    # Run the experiment
    for run in range(RUNS):
        if run == 0:
            continue

        # If an environment already exists, shut it down
        if env:
            env.closeSocket()

        # Create the environment
        env = create_environment()

        # Create the task
        task = Pa10MovementTask(env)

        # Create the neural network. Only create the network once so it retains
        # the same starting values for each run.
        if start_state_net:
            net = start_state_net.copy()
        else:
            # Create the initial neural network
            net = create_network(
                    in_nodes=env.obsLen,
                    hidden_nodes=HIDDEN_NODES,
                    out_nodes=env.actLen
            )
            start_state_net = net.copy()

        # Create the learning agent
        learner = HillClimber(storeAllEvaluations=True)
        agent = OptimizationAgent(net, learner)
        tools.agent = agent

        # Create the experiment
        experiment = EpisodicExperiment(task, agent)

        # Perform all episodes in the run
        for episode in range(EPISODES):
            experiment.doEpisodes(BATCHES)

        # Calculate results
        all_results = agent.learner._allEvaluations
        max_result = np.max(all_results)
        min_result = np.min(all_results)
        avg_result = np.sum(all_results) / len(all_results)
        run_results.append((run, max_result, min_result, avg_result))

        # Make the results directory if it does not exist
        if not os.path.exists(G_RESULTS_DIR):
            os.mkdir(G_RESULTS_DIR)

        # Write all results to the results file
        with open(os.path.join(G_RESULTS_DIR, 'run_%d.txt' % run), 'w+') as f:
            # Store the calculated max, min, avg
            f.write('RUN, MAX, MIN, AVG\n')
            f.write('%d, %f, %f, %f\n' % (run, max_result, min_result, avg_result))

            # Store all results from this run
            f.write('EPISODE, REWARD\n')
            for episode, result in enumerate(all_results):
                f.write('%d, %f\n' % (episode, result))

    return
Esempio n. 14
0
batch = 2  #number of samples per learning step
prnts = 1  #number of learning steps after results are printed
epis = 4000 / batch / prnts  #number of roleouts
numbExp = 10  #number of experiments
et = ExTools(batch, prnts)  #tool for printing and plotting

for runs in range(numbExp):
    # create environment
    #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
    env = AcrobotEnvironment()
    # create task
    task = GradualRewardTask(env)
    # create controller network
    net = buildNetwork(len(task.getObservation()),
                       env.actLen,
                       outclass=TanhLayer)
    # create agent with controller and learner (and its options)
    agent = OptimizationAgent(net, FiniteDifferences(storeAllEvaluations=True))
    et.agent = agent
    # create the experiment
    experiment = EpisodicExperiment(task, agent)

    #Do the experiment
    for updates in range(epis):
        for i in range(prnts):
            experiment.doEpisodes(batch)
        et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates)
    et.addExps()
et.showExps()
#To view what the simulation is doing at the moment, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)
 best_params = []
 best_reward = -1000
 
 num_cpus = cpu_count()
 rospy.loginfo('Using %d CPUs for experiments', num_cpus)
 
 exp_desciptions = []
 
 for i in range(num_experiments):
     # set up environment, task, neural net, agent, and experiment
     env = InfoMaxEnv(object_names, action_names, num_objects, False)
     task = InfoMaxTask(env, max_steps=max_steps)
     net = buildNetwork(task.outdim, task.indim, bias=True, outclass=SoftmaxLayer)
     
     if algorithm == 'pgpe':
         agent = OptimizationAgent(net, PGPE(storeAllEvaluations=True,minimize=False,verbose=False))
     elif algorithm == 'cmaes':
         agent = OptimizationAgent(net, CMAES(minimize=False,verbose=False))
         
     experiment = EpisodicExperiment(task, agent)
     
     exp_desciptions.append([i, agent, experiment, task])
     
 pool = Pool(processes=num_cpus)
 
 res = []
 if algorithm == 'pgpe':
     res = pool.map(run_experiment, exp_desciptions)
 elif algorithm == 'cmaes':
     for desc in exp_desciptions:
         res.append(run_experiment(desc))