Exemple #1
0
__version__ = '$Id$'

from pybrain.tools.example_tools import ExTools
from pybrain.structure.modules.tanhlayer import TanhLayer
from pybrain.tools.shortcuts import buildNetwork
from pybrain.rl.environments.shipsteer import ShipSteeringEnvironment
from pybrain.rl.environments.shipsteer import GoNorthwardTask
from pybrain.rl.agents import OptimizationAgent
from pybrain.optimization import PGPE
from pybrain.rl.experiments import EpisodicExperiment

batch = 1  #number of samples per learning step
prnts = 50  #number of learning steps after results are printed
epis = int(2000 / batch / prnts)  #number of roleouts
numbExp = 10  #number of experiments
et = ExTools(batch, prnts)  #tool for printing and plotting

env = None
for runs in range(numbExp):
    # create environment
    #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
    if env != None: env.closeSocket()
    env = ShipSteeringEnvironment()
    # create task
    task = GoNorthwardTask(env, maxsteps=500)
    # create controller network
    net = buildNetwork(task.outdim, task.indim, outclass=TanhLayer)
    # create agent with controller and learner (and its options)
    agent = OptimizationAgent(
        net,
        PGPE(learningRate=0.3,
Exemple #2
0
__version__ = '$Id$' 

from pybrain.tools.example_tools import ExTools
from pybrain.structure.modules.tanhlayer import TanhLayer
from pybrain.tools.shortcuts import buildNetwork
from pybrain.rl.environments.flexcube import FlexCubeEnvironment, WalkTask
from pybrain.rl.agents import OptimizationAgent
from pybrain.optimization import SimpleSPSA 
from pybrain.rl.experiments import EpisodicExperiment

hiddenUnits = 4
batch=2 #number of samples per learning step
prnts=1 #number of learning steps after results are printed
epis=5000000/batch/prnts #number of roleouts
numbExp=10 #number of experiments
et = ExTools(batch, prnts) #tool for printing and plotting

for runs in range(numbExp):
    # create environment
    #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560)
    env = FlexCubeEnvironment()
    # create task
    task = WalkTask(env)
    # create controller network
    net = buildNetwork(len(task.getObservation()), hiddenUnits, env.actLen, outclass=TanhLayer)    
    # create agent with controller and learner (and its options)
    agent = OptimizationAgent(net, SimpleSPSA(storeAllEvaluations = True))
    et.agent = agent
     # create the experiment
    experiment = EpisodicExperiment(task, agent)
Exemple #3
0
#########################################################################
__author__ = "Thomas Rueckstiess, Frank Sehnke"
__version__ = '$Id$'

from pybrain.tools.example_tools import ExTools
from pybrain.tools.shortcuts import buildNetwork
from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Reinforce
from pybrain.rl.experiments import EpisodicExperiment

batch = 50  #number of samples per learning step
prnts = 4  #number of learning steps after results are printed
epis = 4000 / batch / prnts  #number of roleouts
numbExp = 10  #number of experiments
et = ExTools(batch, prnts, kind="learner")  #tool for printing and plotting

for runs in range(numbExp):
    print 'run: ', runs
    # create environment
    env = CartPoleEnvironment()
    # create task
    task = BalanceTask(env, 200, desiredValue=None)
    # create controller network
    net = buildNetwork(4, 1, bias=False)
    # create agent with controller and learner (and its options)
    agent = LearningAgent(net, Reinforce())
    et.agent = agent
    # create the experiment
    experiment = EpisodicExperiment(task, agent)
Exemple #4
0
__author__ = "Thomas Rueckstiess, Frank Sehnke"


from pybrain.tools.example_tools import ExTools
from pybrain.tools.shortcuts import buildNetwork
from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import ENAC
from pybrain.rl.experiments import EpisodicExperiment

batch=50 #number of samples per learning step
prnts=4 #number of learning steps after results are printed
epis=4000/batch/prnts #number of roleouts
numbExp=10 #number of experiments
et = ExTools(batch, prnts, kind = "learner") #tool for printing and plotting

for runs in range(numbExp):
    # create environment
    env = CartPoleEnvironment()    
    # create task
    task = BalanceTask(env, 200, desiredValue=None)
    # create controller network
    net = buildNetwork(4, 1, bias=False)
    # create agent with controller and learner (and its options)
    agent = LearningAgent(net, ENAC())
    et.agent = agent
    # create the experiment
    experiment = EpisodicExperiment(task, agent)

    #Do the experiment
def run_experiment():
    # Create the controller network
    HIDDEN_NODES = 4

    RUNS = 2
    BATCHES = 1
    PRINTS = 1
    EPISODES = 500

    env = None
    start_state_net = None

    run_results = []

    # Set up plotting tools for the experiments
    tools = ExTools(BATCHES, PRINTS)

    # Run the experiment
    for run in range(RUNS):
        if run == 0:
            continue

        # If an environment already exists, shut it down
        if env:
            env.closeSocket()

        # Create the environment
        env = create_environment()

        # Create the task
        task = Pa10MovementTask(env)

        # Create the neural network. Only create the network once so it retains
        # the same starting values for each run.
        if start_state_net:
            net = start_state_net.copy()
        else:
            # Create the initial neural network
            net = create_network(
                    in_nodes=env.obsLen,
                    hidden_nodes=HIDDEN_NODES,
                    out_nodes=env.actLen
            )
            start_state_net = net.copy()

        # Create the learning agent
        learner = HillClimber(storeAllEvaluations=True)
        agent = OptimizationAgent(net, learner)
        tools.agent = agent

        # Create the experiment
        experiment = EpisodicExperiment(task, agent)

        # Perform all episodes in the run
        for episode in range(EPISODES):
            experiment.doEpisodes(BATCHES)

        # Calculate results
        all_results = agent.learner._allEvaluations
        max_result = np.max(all_results)
        min_result = np.min(all_results)
        avg_result = np.sum(all_results) / len(all_results)
        run_results.append((run, max_result, min_result, avg_result))

        # Make the results directory if it does not exist
        if not os.path.exists(G_RESULTS_DIR):
            os.mkdir(G_RESULTS_DIR)

        # Write all results to the results file
        with open(os.path.join(G_RESULTS_DIR, 'run_%d.txt' % run), 'w+') as f:
            # Store the calculated max, min, avg
            f.write('RUN, MAX, MIN, AVG\n')
            f.write('%d, %f, %f, %f\n' % (run, max_result, min_result, avg_result))

            # Store all results from this run
            f.write('EPISODE, REWARD\n')
            for episode, result in enumerate(all_results):
                f.write('%d, %f\n' % (episode, result))

    return
Exemple #6
0
def main():
    """ Main program for automatic asset allocation problem.
    """
    # Directories
    input_data_dir = '../../Data/Input/'
    output_data_dir = '../../Data/Output/'

    # Experiment parameters
    batch = 1                      # Number of samples per learning step
    prnts = 100                    # Learning steps before printing results
    nEpisodes = 100/batch/prnts   # Number of rollouts
    nExperiments = 1               # Number of experiments
    et = ExTools(batch, prnts)     # Tool for printing and plotting

    # Paramenters
    X = 0.0 / 252    # Daily risk-free rate
    deltaP = 0.00    # Proportional transaction costs
    deltaF = 0.0      # Fixed transaction costs
    deltaS = 0.00    # Short-selling borrowing costs
    P = 5             # Number of past days the agent considers
    discount = 0.95   # Discount factor

    # Evaluation interval sizes
    start = P + 1
    trainingIntervalLength = 70
    testIntervalLength = 30

    # Initialize the market environment
    market = MarketEnvironment(input_data_dir + 'daily_returns.csv', X, P)
    nSamples = len(market.data)
    nPeriods = (nSamples - start + 1) / (trainingIntervalLength + testIntervalLength)

    # Initialize the asset allocation tasks
    task = AssetAllocationTask(market, deltaP, deltaF, deltaS, discount)

    # Initialize controller module
    module = buildNetwork(market.outdim,  # Input layer
                          market.indim,   # Output layer
                          outclass=SoftmaxLayer)  # Output activation function

    # Initialize learner module
    learner = PGPE(storeAllEvaluations=True,
                   learningRate=0.01,
                   sigmaLearningRate=0.01,
                   batchSize=batch,
                   # momentum=0.05,
                   # epsilon=6.0,
                   rprop=False)

    # Initialize learning agent
    agent = OptimizationAgent(module, learner)
    et.agent = agent

    for period in xrange(5):  #  nPeriods):

        # Set initial and final time steps for training
        initialTimeStep = start
        finalTimeStep = start + trainingIntervalLength
        task.setEvaluationInterval(initialTimeStep, finalTimeStep)
        task.trainingMode()

        # Initialize experiment
        experiment = EpisodicExperiment(task, agent)

        # Train the agent
        for episode in xrange(nEpisodes):
            for i in xrange(prnts):
                experiment.doEpisodes(batch)
            et.printResults((agent.learner._allEvaluations)[-50:-1],
                            1, episode)

        # Set initial and final time steps for training
        initialTimeStep = start + trainingIntervalLength
        finalTimeStep = initialTimeStep + testIntervalLength
        task.setEvaluationInterval(initialTimeStep, finalTimeStep)
        task.backtestMode()

        # Initialize experiment
        experiment = EpisodicExperiment(task, agent)

        # Test the agent
        experiment.doEpisodes(batch)

        # Slide evaluation window
        start += testIntervalLength

    # Print allocations
    task.report.iloc[:, :-1].plot.area(title='Portfolio Allocation - PGPE')
    plt.ylim(0.0, 1.0)
    plt.xlabel('Date')
    plt.ylabel('Portfolio Allocation')
    plt.show()

    # Print cumulative log-returns
    buyHold = market.data.ix[task.report.index, 'SPY']
    buyHoldCumLogReturns = np.log(buyHold + 1.0).cumsum(axis=0)
    ptfCumLogReturns = task.report['ptfLogReturn'].cumsum(axis=0)
    cumLogReturns = pd.DataFrame(index=task.report.index)
    cumLogReturns['Buy & Hold'] = buyHoldCumLogReturns
    cumLogReturns['PGPE'] = ptfCumLogReturns
    cumLogReturns.plot(title='Cumulative Log-Returns - PGPE',
                       lw=2, grid=True)
    plt.xlabel('Date')
    plt.ylabel('Cumulative Log-Returns')
    plt.show()