__version__ = '$Id$' from pybrain.tools.example_tools import ExTools from pybrain.structure.modules.tanhlayer import TanhLayer from pybrain.tools.shortcuts import buildNetwork from pybrain.rl.environments.shipsteer import ShipSteeringEnvironment from pybrain.rl.environments.shipsteer import GoNorthwardTask from pybrain.rl.agents import OptimizationAgent from pybrain.optimization import PGPE from pybrain.rl.experiments import EpisodicExperiment batch = 1 #number of samples per learning step prnts = 50 #number of learning steps after results are printed epis = int(2000 / batch / prnts) #number of roleouts numbExp = 10 #number of experiments et = ExTools(batch, prnts) #tool for printing and plotting env = None for runs in range(numbExp): # create environment #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) if env != None: env.closeSocket() env = ShipSteeringEnvironment() # create task task = GoNorthwardTask(env, maxsteps=500) # create controller network net = buildNetwork(task.outdim, task.indim, outclass=TanhLayer) # create agent with controller and learner (and its options) agent = OptimizationAgent( net, PGPE(learningRate=0.3,
__version__ = '$Id$' from pybrain.tools.example_tools import ExTools from pybrain.structure.modules.tanhlayer import TanhLayer from pybrain.tools.shortcuts import buildNetwork from pybrain.rl.environments.flexcube import FlexCubeEnvironment, WalkTask from pybrain.rl.agents import OptimizationAgent from pybrain.optimization import SimpleSPSA from pybrain.rl.experiments import EpisodicExperiment hiddenUnits = 4 batch=2 #number of samples per learning step prnts=1 #number of learning steps after results are printed epis=5000000/batch/prnts #number of roleouts numbExp=10 #number of experiments et = ExTools(batch, prnts) #tool for printing and plotting for runs in range(numbExp): # create environment #Options: Bool(OpenGL), Bool(Realtime simu. while client is connected), ServerIP(default:localhost), Port(default:21560) env = FlexCubeEnvironment() # create task task = WalkTask(env) # create controller network net = buildNetwork(len(task.getObservation()), hiddenUnits, env.actLen, outclass=TanhLayer) # create agent with controller and learner (and its options) agent = OptimizationAgent(net, SimpleSPSA(storeAllEvaluations = True)) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent)
######################################################################### __author__ = "Thomas Rueckstiess, Frank Sehnke" __version__ = '$Id$' from pybrain.tools.example_tools import ExTools from pybrain.tools.shortcuts import buildNetwork from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask from pybrain.rl.agents import LearningAgent from pybrain.rl.learners import Reinforce from pybrain.rl.experiments import EpisodicExperiment batch = 50 #number of samples per learning step prnts = 4 #number of learning steps after results are printed epis = 4000 / batch / prnts #number of roleouts numbExp = 10 #number of experiments et = ExTools(batch, prnts, kind="learner") #tool for printing and plotting for runs in range(numbExp): print 'run: ', runs # create environment env = CartPoleEnvironment() # create task task = BalanceTask(env, 200, desiredValue=None) # create controller network net = buildNetwork(4, 1, bias=False) # create agent with controller and learner (and its options) agent = LearningAgent(net, Reinforce()) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent)
__author__ = "Thomas Rueckstiess, Frank Sehnke" from pybrain.tools.example_tools import ExTools from pybrain.tools.shortcuts import buildNetwork from pybrain.rl.environments.cartpole import CartPoleEnvironment, BalanceTask from pybrain.rl.agents import LearningAgent from pybrain.rl.learners import ENAC from pybrain.rl.experiments import EpisodicExperiment batch=50 #number of samples per learning step prnts=4 #number of learning steps after results are printed epis=4000/batch/prnts #number of roleouts numbExp=10 #number of experiments et = ExTools(batch, prnts, kind = "learner") #tool for printing and plotting for runs in range(numbExp): # create environment env = CartPoleEnvironment() # create task task = BalanceTask(env, 200, desiredValue=None) # create controller network net = buildNetwork(4, 1, bias=False) # create agent with controller and learner (and its options) agent = LearningAgent(net, ENAC()) et.agent = agent # create the experiment experiment = EpisodicExperiment(task, agent) #Do the experiment
def run_experiment(): # Create the controller network HIDDEN_NODES = 4 RUNS = 2 BATCHES = 1 PRINTS = 1 EPISODES = 500 env = None start_state_net = None run_results = [] # Set up plotting tools for the experiments tools = ExTools(BATCHES, PRINTS) # Run the experiment for run in range(RUNS): if run == 0: continue # If an environment already exists, shut it down if env: env.closeSocket() # Create the environment env = create_environment() # Create the task task = Pa10MovementTask(env) # Create the neural network. Only create the network once so it retains # the same starting values for each run. if start_state_net: net = start_state_net.copy() else: # Create the initial neural network net = create_network( in_nodes=env.obsLen, hidden_nodes=HIDDEN_NODES, out_nodes=env.actLen ) start_state_net = net.copy() # Create the learning agent learner = HillClimber(storeAllEvaluations=True) agent = OptimizationAgent(net, learner) tools.agent = agent # Create the experiment experiment = EpisodicExperiment(task, agent) # Perform all episodes in the run for episode in range(EPISODES): experiment.doEpisodes(BATCHES) # Calculate results all_results = agent.learner._allEvaluations max_result = np.max(all_results) min_result = np.min(all_results) avg_result = np.sum(all_results) / len(all_results) run_results.append((run, max_result, min_result, avg_result)) # Make the results directory if it does not exist if not os.path.exists(G_RESULTS_DIR): os.mkdir(G_RESULTS_DIR) # Write all results to the results file with open(os.path.join(G_RESULTS_DIR, 'run_%d.txt' % run), 'w+') as f: # Store the calculated max, min, avg f.write('RUN, MAX, MIN, AVG\n') f.write('%d, %f, %f, %f\n' % (run, max_result, min_result, avg_result)) # Store all results from this run f.write('EPISODE, REWARD\n') for episode, result in enumerate(all_results): f.write('%d, %f\n' % (episode, result)) return
def main(): """ Main program for automatic asset allocation problem. """ # Directories input_data_dir = '../../Data/Input/' output_data_dir = '../../Data/Output/' # Experiment parameters batch = 1 # Number of samples per learning step prnts = 100 # Learning steps before printing results nEpisodes = 100/batch/prnts # Number of rollouts nExperiments = 1 # Number of experiments et = ExTools(batch, prnts) # Tool for printing and plotting # Paramenters X = 0.0 / 252 # Daily risk-free rate deltaP = 0.00 # Proportional transaction costs deltaF = 0.0 # Fixed transaction costs deltaS = 0.00 # Short-selling borrowing costs P = 5 # Number of past days the agent considers discount = 0.95 # Discount factor # Evaluation interval sizes start = P + 1 trainingIntervalLength = 70 testIntervalLength = 30 # Initialize the market environment market = MarketEnvironment(input_data_dir + 'daily_returns.csv', X, P) nSamples = len(market.data) nPeriods = (nSamples - start + 1) / (trainingIntervalLength + testIntervalLength) # Initialize the asset allocation tasks task = AssetAllocationTask(market, deltaP, deltaF, deltaS, discount) # Initialize controller module module = buildNetwork(market.outdim, # Input layer market.indim, # Output layer outclass=SoftmaxLayer) # Output activation function # Initialize learner module learner = PGPE(storeAllEvaluations=True, learningRate=0.01, sigmaLearningRate=0.01, batchSize=batch, # momentum=0.05, # epsilon=6.0, rprop=False) # Initialize learning agent agent = OptimizationAgent(module, learner) et.agent = agent for period in xrange(5): # nPeriods): # Set initial and final time steps for training initialTimeStep = start finalTimeStep = start + trainingIntervalLength task.setEvaluationInterval(initialTimeStep, finalTimeStep) task.trainingMode() # Initialize experiment experiment = EpisodicExperiment(task, agent) # Train the agent for episode in xrange(nEpisodes): for i in xrange(prnts): experiment.doEpisodes(batch) et.printResults((agent.learner._allEvaluations)[-50:-1], 1, episode) # Set initial and final time steps for training initialTimeStep = start + trainingIntervalLength finalTimeStep = initialTimeStep + testIntervalLength task.setEvaluationInterval(initialTimeStep, finalTimeStep) task.backtestMode() # Initialize experiment experiment = EpisodicExperiment(task, agent) # Test the agent experiment.doEpisodes(batch) # Slide evaluation window start += testIntervalLength # Print allocations task.report.iloc[:, :-1].plot.area(title='Portfolio Allocation - PGPE') plt.ylim(0.0, 1.0) plt.xlabel('Date') plt.ylabel('Portfolio Allocation') plt.show() # Print cumulative log-returns buyHold = market.data.ix[task.report.index, 'SPY'] buyHoldCumLogReturns = np.log(buyHold + 1.0).cumsum(axis=0) ptfCumLogReturns = task.report['ptfLogReturn'].cumsum(axis=0) cumLogReturns = pd.DataFrame(index=task.report.index) cumLogReturns['Buy & Hold'] = buyHoldCumLogReturns cumLogReturns['PGPE'] = ptfCumLogReturns cumLogReturns.plot(title='Cumulative Log-Returns - PGPE', lw=2, grid=True) plt.xlabel('Date') plt.ylabel('Cumulative Log-Returns') plt.show()