for runs in range(numbExp): # create environment #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) if env != None: env.closeSocket() env = ShipSteeringEnvironment() # create task task = GoNorthwardTask(env, maxsteps=500) # create controller network net = buildNetwork(task.outdim, task.indim, outclass=TanhLayer) # create agent with controller and learner (and its options) agent = OptimizationAgent( net, PGPE(learningRate=0.3, sigmaLearningRate=0.15, momentum=0.0, epsilon=2.0, rprop=False, storeAllEvaluations=True)) et.agent = agent #create experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps() #To view what the simulation is doing at the moment set the environment with True, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)
hiddenUnits = 4 batch=2 #number of samples per learning step prnts=1 #number of learning steps after results are printed epis=5000000/batch/prnts #number of roleouts numbExp=10 #number of experiments et = ExTools(batch, prnts) #tool for printing and plotting for runs in range(numbExp): # create environment #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) env = FlexCubeEnvironment() # create task task = WalkTask(env) # create controller network net = buildNetwork(len(task.getObservation()), hiddenUnits, env.actLen, outclass=TanhLayer) # create agent with controller and learner (and its options) agent = OptimizationAgent(net, SimpleSPSA(storeAllEvaluations = True)) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps() #To view what the simulation is doing at the moment, go to pybrain/rl/environments/flexcube/ and start renderer.py (python-openGL musst be installed)
from pybrain.rl.learners import ENAC from pybrain.rl.experiments import EpisodicExperiment batch=50 #number of samples per learning step prnts=4 #number of learning steps after results are printed epis=4000/batch/prnts #number of roleouts numbExp=10 #number of experiments et = ExTools(batch, prnts, kind = "learner") #tool for printing and plotting for runs in range(numbExp): # create environment env = CartPoleEnvironment() # create task task = BalanceTask(env, 200, desiredValue=None) # create controller network net = buildNetwork(4, 1, bias=False) # create agent with controller and learner (and its options) agent = LearningAgent(net, ENAC()) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) state, action, reward = agent.learner.dataset.getSequence(agent.learner.dataset.getNumSequences()-1) et.printResults(reward.sum(), runs, updates) et.addExps() et.showExps()
prnts = 4 #number of learning steps after results are printed epis = 4000 / batch / prnts #number of roleouts numbExp = 10 #number of experiments et = ExTools(batch, prnts, kind="learner") #tool for printing and plotting for runs in range(numbExp): print 'run: ', runs # create environment env = CartPoleEnvironment() # create task task = BalanceTask(env, 200, desiredValue=None) # create controller network net = buildNetwork(4, 1, bias=False) # create agent with controller and learner (and its options) agent = LearningAgent(net, Reinforce()) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) state, action, reward = agent.learner.dataset.getSequence( agent.learner.dataset.getNumSequences() - 1) et.printResults(reward.sum(), runs, updates) et.addExps() et.showExps() print 'done'
def main(): """ Main program for automatic asset allocation problem. """ # Directories input_data_dir = '../../Data/Input/' output_data_dir = '../../Data/Output/' # Experiment parameters batch = 1 # Number of samples per learning step prnts = 100 # Learning steps before printing results nEpisodes = 100/batch/prnts # Number of rollouts nExperiments = 1 # Number of experiments et = ExTools(batch, prnts) # Tool for printing and plotting # Paramenters X = 0.0 / 252 # Daily risk-free rate deltaP = 0.00 # Proportional transaction costs deltaF = 0.0 # Fixed transaction costs deltaS = 0.00 # Short-selling borrowing costs P = 5 # Number of past days the agent considers discount = 0.95 # Discount factor # Evaluation interval sizes start = P + 1 trainingIntervalLength = 70 testIntervalLength = 30 # Initialize the market environment market = MarketEnvironment(input_data_dir + 'daily_returns.csv', X, P) nSamples = len(market.data) nPeriods = (nSamples - start + 1) / (trainingIntervalLength + testIntervalLength) # Initialize the asset allocation tasks task = AssetAllocationTask(market, deltaP, deltaF, deltaS, discount) # Initialize controller module module = buildNetwork(market.outdim, # Input layer market.indim, # Output layer outclass=SoftmaxLayer) # Output activation function # Initialize learner module learner = PGPE(storeAllEvaluations=True, learningRate=0.01, sigmaLearningRate=0.01, batchSize=batch, # momentum=0.05, # epsilon=6.0, rprop=False) # Initialize learning agent agent = OptimizationAgent(module, learner) et.agent = agent for period in xrange(5): # nPeriods): # Set initial and final time steps for training initialTimeStep = start finalTimeStep = start + trainingIntervalLength task.setEvaluationInterval(initialTimeStep, finalTimeStep) task.trainingMode() # Initialize experiment experiment = EpisodicExperiment(task, agent) # Train the agent for episode in xrange(nEpisodes): for i in xrange(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], 1, episode) # Set initial and final time steps for training initialTimeStep = start + trainingIntervalLength finalTimeStep = initialTimeStep + testIntervalLength task.setEvaluationInterval(initialTimeStep, finalTimeStep) task.backtestMode() # Initialize experiment experiment = EpisodicExperiment(task, agent) # Test the agent experiment.doEpisodes(batch) # Slide evaluation window start += testIntervalLength # Print allocations task.report.iloc[:, :-1].plot.area(title='Portfolio Allocation - PGPE') plt.ylim(0.0, 1.0) plt.xlabel('Date') plt.ylabel('Portfolio Allocation') plt.show() # Print cumulative log-returns buyHold = market.data.ix[task.report.index, 'SPY'] buyHoldCumLogReturns = np.log(buyHold + 1.0).cumsum(axis=0) ptfCumLogReturns = task.report['ptfLogReturn'].cumsum(axis=0) cumLogReturns = pd.DataFrame(index=task.report.index) cumLogReturns['Buy & Hold'] = buyHoldCumLogReturns cumLogReturns['PGPE'] = ptfCumLogReturns cumLogReturns.plot(title='Cumulative Log-Returns - PGPE', lw=2, grid=True) plt.xlabel('Date') plt.ylabel('Cumulative Log-Returns') plt.show()