for runs in range(numbExp): # create environment #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) if env != None: env.closeSocket() env = ShipSteeringEnvironment() # create task task = GoNorthwardTask(env, maxsteps=500) # create controller network net = buildNetwork(task.outdim, task.indim, outclass=TanhLayer) # create agent with controller and learner (and its options) agent = OptimizationAgent( net, PGPE(learningRate=0.3, sigmaLearningRate=0.15, momentum=0.0, epsilon=2.0, rprop=False, storeAllEvaluations=True)) et.agent = agent #create experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps() #To view what the simulation is doing at the moment set the environment with True, go to pybrain/rl/environments/ode/ and start viewer.py (python-openGL musst be installed, see PyBrain documentation)
def run_experiment(): # Create the controller network HIDDEN_NODES = 4 RUNS = 2 BATCHES = 1 PRINTS = 1 EPISODES = 500 env = None start_state_net = None run_results = [] # Set up plotting tools for the experiments tools = ExTools(BATCHES, PRINTS) # Run the experiment for run in range(RUNS): if run == 0: continue # If an environment already exists, shut it down if env: env.closeSocket() # Create the environment env = create_environment() # Create the task task = Pa10MovementTask(env) # Create the neural network. Only create the network once so it retains # the same starting values for each run. if start_state_net: net = start_state_net.copy() else: # Create the initial neural network net = create_network( in_nodes=env.obsLen, hidden_nodes=HIDDEN_NODES, out_nodes=env.actLen ) start_state_net = net.copy() # Create the learning agent learner = HillClimber(storeAllEvaluations=True) agent = OptimizationAgent(net, learner) tools.agent = agent # Create the experiment experiment = EpisodicExperiment(task, agent) # Perform all episodes in the run for episode in range(EPISODES): experiment.doEpisodes(BATCHES) # Calculate results all_results = agent.learner._allEvaluations max_result = np.max(all_results) min_result = np.min(all_results) avg_result = np.sum(all_results) / len(all_results) run_results.append((run, max_result, min_result, avg_result)) # Make the results directory if it does not exist if not os.path.exists(G_RESULTS_DIR): os.mkdir(G_RESULTS_DIR) # Write all results to the results file with open(os.path.join(G_RESULTS_DIR, 'run_%d.txt' % run), 'w+') as f: # Store the calculated max, min, avg f.write('RUN, MAX, MIN, AVG\n') f.write('%d, %f, %f, %f\n' % (run, max_result, min_result, avg_result)) # Store all results from this run f.write('EPISODE, REWARD\n') for episode, result in enumerate(all_results): f.write('%d, %f\n' % (episode, result)) return
hiddenUnits = 4 batch=2 #number of samples per learning step prnts=1 #number of learning steps after results are printed epis=5000000/batch/prnts #number of roleouts numbExp=10 #number of experiments et = ExTools(batch, prnts) #tool for printing and plotting for runs in range(numbExp): # create environment #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) env = FlexCubeEnvironment() # create task task = WalkTask(env) # create controller network net = buildNetwork(len(task.getObservation()), hiddenUnits, env.actLen, outclass=TanhLayer) # create agent with controller and learner (and its options) agent = OptimizationAgent(net, SimpleSPSA(storeAllEvaluations = True)) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment for updates in range(epis): for i in range(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], runs, updates) et.addExps() et.showExps() #To view what the simulation is doing at the moment, go to pybrain/rl/environments/flexcube/ and start renderer.py (python-openGL musst be installed)
def main(): """ Main program for automatic asset allocation problem. """ # Directories input_data_dir = '../../Data/Input/' output_data_dir = '../../Data/Output/' # Experiment parameters batch = 1 # Number of samples per learning step prnts = 100 # Learning steps before printing results nEpisodes = 100/batch/prnts # Number of rollouts nExperiments = 1 # Number of experiments et = ExTools(batch, prnts) # Tool for printing and plotting # Paramenters X = 0.0 / 252 # Daily risk-free rate deltaP = 0.00 # Proportional transaction costs deltaF = 0.0 # Fixed transaction costs deltaS = 0.00 # Short-selling borrowing costs P = 5 # Number of past days the agent considers discount = 0.95 # Discount factor # Evaluation interval sizes start = P + 1 trainingIntervalLength = 70 testIntervalLength = 30 # Initialize the market environment market = MarketEnvironment(input_data_dir + 'daily_returns.csv', X, P) nSamples = len(market.data) nPeriods = (nSamples - start + 1) / (trainingIntervalLength + testIntervalLength) # Initialize the asset allocation tasks task = AssetAllocationTask(market, deltaP, deltaF, deltaS, discount) # Initialize controller module module = buildNetwork(market.outdim, # Input layer market.indim, # Output layer outclass=SoftmaxLayer) # Output activation function # Initialize learner module learner = PGPE(storeAllEvaluations=True, learningRate=0.01, sigmaLearningRate=0.01, batchSize=batch, # momentum=0.05, # epsilon=6.0, rprop=False) # Initialize learning agent agent = OptimizationAgent(module, learner) et.agent = agent for period in xrange(5): # nPeriods): # Set initial and final time steps for training initialTimeStep = start finalTimeStep = start + trainingIntervalLength task.setEvaluationInterval(initialTimeStep, finalTimeStep) task.trainingMode() # Initialize experiment experiment = EpisodicExperiment(task, agent) # Train the agent for episode in xrange(nEpisodes): for i in xrange(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], 1, episode) # Set initial and final time steps for training initialTimeStep = start + trainingIntervalLength finalTimeStep = initialTimeStep + testIntervalLength task.setEvaluationInterval(initialTimeStep, finalTimeStep) task.backtestMode() # Initialize experiment experiment = EpisodicExperiment(task, agent) # Test the agent experiment.doEpisodes(batch) # Slide evaluation window start += testIntervalLength # Print allocations task.report.iloc[:, :-1].plot.area(title='Portfolio Allocation - PGPE') plt.ylim(0.0, 1.0) plt.xlabel('Date') plt.ylabel('Portfolio Allocation') plt.show() # Print cumulative log-returns buyHold = market.data.ix[task.report.index, 'SPY'] buyHoldCumLogReturns = np.log(buyHold + 1.0).cumsum(axis=0) ptfCumLogReturns = task.report['ptfLogReturn'].cumsum(axis=0) cumLogReturns = pd.DataFrame(index=task.report.index) cumLogReturns['Buy & Hold'] = buyHoldCumLogReturns cumLogReturns['PGPE'] = ptfCumLogReturns cumLogReturns.plot(title='Cumulative Log-Returns - PGPE', lw=2, grid=True) plt.xlabel('Date') plt.ylabel('Cumulative Log-Returns') plt.show()