Example #1
0
def testAugmented():
    from vgdl.core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.mdpmap import MDPconverter
    from vgdl.agents import PolicyDrivenAgent
    
    
    zelda_level2 = """
wwwwwwwwwwwww
wA wwk1ww   w
ww  ww    1 w
ww     wwww+w
wwwww1ww  www
wwwww  0  Gww
wwwwwwwwwwwww
"""

    
    from examples.gridphysics.mazes.rigidzelda import rigidzelda_game
    g = VGDLParser().parseGame(rigidzelda_game)
    g.buildLevel(zelda_level2)
    env = GameEnvironment(g, visualize=False,
                          recordingEnabled=True, actionDelay=150)
    C = MDPconverter(g, env=env, verbose=True)
    Ts, R, _ = C.convert()
    print C.states
    print Ts[0]
    print R
    env.reset()
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    task = GameTask(env)    
    exper = EpisodicExperiment(task, agent)
    exper.doEpisodes(1)
Example #2
0
def testRecordingToGif(human=False):
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from agents import PolicyDrivenAgent, InteractiveAgent
    from tools import makeGifVideo

    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    env = GameEnvironment(g,
                          visualize=human,
                          recordingEnabled=True,
                          actionDelay=200)
    task = GameTask(env)
    if human:
        agent = InteractiveAgent()
    else:
        agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(1)
    print res

    actions = [a for _, a, _ in env._allEvents]
    print actions
    makeGifVideo(env, actions, initstate=env._initstate)
Example #3
0
def testAugmented():
    from core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from mdpmap import MDPconverter
    from agents import PolicyDrivenAgent

    zelda_level2 = """
wwwwwwwwwwwww
wA wwk1ww   w
ww  ww    1 w
ww     wwww+w
wwwww1ww  www
wwwww  0  Gww
wwwwwwwwwwwww
"""

    from examples.gridphysics.mazes.rigidzelda import rigidzelda_game
    g = VGDLParser().parseGame(rigidzelda_game)
    g.buildLevel(zelda_level2)
    env = GameEnvironment(g,
                          visualize=False,
                          recordingEnabled=True,
                          actionDelay=150)
    C = MDPconverter(g, env=env, verbose=True)
    Ts, R, _ = C.convert()
    print C.states
    print Ts[0]
    print R
    env.reset()
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    task = GameTask(env)
    exper = EpisodicExperiment(task, agent)
    exper.doEpisodes(1)
Example #4
0
   def __init__(self, task, agent):
       '''
       Constructor
       '''
       EpisodicExperiment.__init__(self, task, agent)
       
       
 
       
Example #5
0
class QAlgorithm:
  def Pause(self):#if menu says pause pause exicution 
    while self.state == 1:
      time.sleep(.05)
    return True

  def Quit(self):#if menu says quit stop running
    self.process.terminate()
    return False

  def Start(self):#starts the Bot
    if self.process == None:
      self.runBot()
      #self.process = multiprocessing.Process(target=self.runBot, args= [])
      #self.process.start() 
    return True

  def CheckState(self):#checks to see what state the menu says to be in 
    if self.state == 0 :
      self.Start()
    elif self.state == 1:
      self.Pause()
    elif self.state == 2:
      self.Quit()

  def GameOver(self):#checks to see if state requires bot pause, quit or if the game is over
    return self.CheckState() or self.sr.checkEndGame(self.endBox,self.gameOver)

  def __init__(self,rewardBox,box,gameOver,endGame,scoreArea):
    self.reward = rewardBox
    self.bbox = box
    self.environment = TEnviroment(box)#Custom environment class
    if os.path.isfile("bot.txt"):
      self.controller  = pickle.load(open("bot.txt","rb")) 
    else:
      self.controller = ActionValueNetwork(50**2,4)#Arguments (framerate*maxPlaytime, Number of acitons)
    self.learner = Q()
    gf = {0:self.GameOver}
    self.agent = LearningAgent(self.controller, self.learner)
    self.task = TTask(self.environment,scoreArea,gf)#needs custom task
    self.experiment = EpisodicExperiment(self.task, self.agent)
    self.process = None
    self.endBox = endGame

  def runBot(self):#runes the bot for a single Episode
      self.experiment.doEpisodes()
      self.agent.learn()
      self.agent.reset()
      file = open("bot.txt","wb+")
      pickle.dump(self.controller,file)
Example #6
0
def test4():
    """ Same thing, but animated. """
    from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.interfaces import GameEnvironment, GameTask
    from vgdl.agents import PolicyDrivenAgent
    g = VGDLParser().parseGame(windy_stoch_game)
    g.buildLevel(windy_level)
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(5)
    print res
Example #7
0
def test4():
    """ Same thing, but animated. """
    from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.interfaces import GameEnvironment, GameTask
    from vgdl.agents import PolicyDrivenAgent 
    g = VGDLParser().parseGame(windy_stoch_game)
    g.buildLevel(windy_level)
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(5)
    print res
Example #8
0
 def playSubjectiveGame(game_str, map_str):
     from pybrain.rl.experiments.episodic import EpisodicExperiment
     from vgdl.interfaces import GameTask
     from vgdl.subjective import SubjectiveGame
     from vgdl.agents import InteractiveAgent, UserTiredException
     g = VGDLParser().parseGame(game_str)
     g.buildLevel(map_str)    
     senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True)
     task = GameTask(senv)    
     iagent = InteractiveAgent()
     exper = EpisodicExperiment(task, iagent)
     try:
         exper.doEpisodes(1)
     except UserTiredException:
         pass
Example #9
0
 def playSubjectiveGame(game_str, map_str):
     from pybrain.rl.experiments.episodic import EpisodicExperiment
     from interfaces import GameTask
     from subjective import SubjectiveGame
     from agents import InteractiveAgent, UserTiredException
     g = VGDLParser().parseGame(game_str)
     g.buildLevel(map_str)
     senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True)
     task = GameTask(senv)
     iagent = InteractiveAgent()
     exper = EpisodicExperiment(task, iagent)
     try:
         exper.doEpisodes(1)
     except UserTiredException:
         pass
Example #10
0
def combinedScore(agent, task = None):
    """ Let the agent act on a number of levels of increasing difficulty. 
    Return the combined score."""
    if task == None:
        task = MarioTask(agentName = agent.name)
    exp = EpisodicExperiment(task, agent)
    res = 0
    for difficulty in range(1):
        for seed in range(1):
            task.env.levelSeed = seed
            task.env.levelDifficulty = difficulty  
            exp.doEpisodes(1)
            print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % (difficulty, seed, task.reward)
            res += task.reward
    return res
def combinedScore(agent, task=None):
    """ Let the agent act on a number of levels of increasing difficulty. 
    Return the combined score."""
    if task == None:
        task = MarioTask(agentName=agent.name)
    exp = EpisodicExperiment(task, agent)
    res = 0
    for difficulty in range(1):
        for seed in range(1):
            task.env.levelSeed = seed
            task.env.levelDifficulty = difficulty
            exp.doEpisodes(1)
            print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % (
                difficulty, seed, task.reward)
            res += task.reward
    return res
Example #12
0
def testPolicyAgent():
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from vgdl.agents import PolicyDrivenAgent
    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    
    env = GameEnvironment(g, visualize=False, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
Example #13
0
def testPolicyAgent():
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from agents import PolicyDrivenAgent
    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)

    env = GameEnvironment(g, visualize=False, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
Example #14
0
 def mlDriver(cv, stateTransfer, actionTransfer):
     #parameter setup
     #dimensionality of state argument (could be less than stateTransfer)
     stateDim = 352
     #Number of moves possible
     numMoves = 361
     env = SettleEnv(cv, stateTransfer, actionTransfer)
     task = SettleTask(env)
     controller = RestrictedActionValueNetwork(stateDim, numMoves, env)
     learner = NFQ()
     learner.explorer = EpsilonHackedExplorer(env)
     agent = LearningAgent(controller, learner)
     experiment = EpisodicExperiment(task, agent)
     while True:
         experiment.doEpisodes(10)
         print "Done with experiments"
         agent.learn()
         print "Learned"
         agent.reset()
         print "Cycled"
Example #15
0
 def mlDriver(cv, stateTransfer, actionTransfer):
     #parameter setup
     #dimensionality of state argument (could be less than stateTransfer)
     stateDim = 352
     #Number of moves possible
     numMoves = 361
     env = SettleEnv(cv, stateTransfer, actionTransfer)
     task = SettleTask(env)
     controller = RestrictedActionValueNetwork(stateDim, numMoves, env)
     learner = NFQ()
     learner.explorer = EpsilonHackedExplorer(env)
     agent = LearningAgent(controller, learner)
     experiment = EpisodicExperiment(task, agent)
     while True:
         experiment.doEpisodes(10)
         print "Done with experiments"
         agent.learn()
         print "Learned"
         agent.reset()
         print "Cycled"
Example #16
0
def test3():
    from examples.gridphysics.mazes import polarmaze_game
    from examples.gridphysics.mazes.simple import maze_level_1b
    from core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from interfaces import GameTask
    from agents import InteractiveAgent, UserTiredException
    game_str, map_str = polarmaze_game, maze_level_1b
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True)
    #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True)
    task = GameTask(senv)
    iagent = InteractiveAgent()
    exper = EpisodicExperiment(task, iagent)
    try:
        exper.doEpisodes(1)
    except UserTiredException:
        pass
    print senv._allEvents
Example #17
0
def test3():
    from examples.gridphysics.mazes import polarmaze_game
    from examples.gridphysics.mazes.simple import maze_level_1b
    from vgdl.core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.interfaces import GameTask
    from vgdl.agents import InteractiveAgent, UserTiredException
    game_str, map_str = polarmaze_game, maze_level_1b
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)    
    senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True)
    #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True)
    task = GameTask(senv)    
    iagent = InteractiveAgent()
    exper = EpisodicExperiment(task, iagent)
    try:
        exper.doEpisodes(1)
    except UserTiredException:
        pass
    print senv._allEvents
Example #18
0
def testRecordingToGif(human=False):
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from vgdl.agents import PolicyDrivenAgent, InteractiveAgent
    from vgdl.tools import makeGifVideo
    
    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    env = GameEnvironment(g, visualize=human, recordingEnabled=True, actionDelay=200)
    task = GameTask(env)
    if human:
        agent = InteractiveAgent()
    else:
        agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(1)
    print res
    
    actions = [a for _, a, _ in env._allEvents]
    print actions
    makeGifVideo(env, actions, initstate=env._initstate)
Example #19
0
def testInteractions():
    from random import randint
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1    
    from pybrain.rl.agents.agent import Agent
    
    class DummyAgent(Agent):
        total = 4
        def getAction(self):
            res = randint(0, self.total - 1)
            return res    
        
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = DummyAgent()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
Example #20
0
def main():
    agent = SimpleMLPMarioAgent(10, inGridSize = 3,
                                )
    print agent.name
    NetworkWriter.writeToFile(agent.module, "../temp/MarioNetwork-"+agent.name+".xml")
    
    task = MarioTask(agent.name, timeLimit = 200)
    exp = EpisodicExperiment(task, agent)
    res = 0
    cumul = 0
    for seed in [0]:
        for difficulty in [0,3,5,10]:
            task.env.levelSeed = seed
            task.env.levelDifficulty = difficulty  
            
            exp.doEpisodes(1)
            print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % (difficulty, seed, task.reward)
            cumul += task.reward
            if task.reward < 4000:
                break
            res += 1
    print res
    print agent.module.inputbuffer*1.
def main():
    agent = ForwardAgent()
    task = MarioTask(agent.name, initMarioMode=1)
    exp = EpisodicExperiment(task, agent)
    print 'Task Ready'
    exp.doEpisodes(3)
    print 'mm 1:', task.reward

    task.env.initMarioMode = 2
    exp.doEpisodes(1)
    print 'mm 2:', task.reward

    task.env.initMarioMode = 0
    exp.doEpisodes(1)
    print 'mm 0:', task.reward

    print "finished"
Example #22
0
 def __init__(self,rewardBox,box,gameOver,endGame,scoreArea):
   self.reward = rewardBox
   self.bbox = box
   self.environment = TEnviroment(box)#Custom environment class
   if os.path.isfile("bot.txt"):
     self.controller  = pickle.load(open("bot.txt","rb")) 
   else:
     self.controller = ActionValueNetwork(50**2,4)#Arguments (framerate*maxPlaytime, Number of acitons)
   self.learner = Q()
   gf = {0:self.GameOver}
   self.agent = LearningAgent(self.controller, self.learner)
   self.task = TTask(self.environment,scoreArea,gf)#needs custom task
   self.experiment = EpisodicExperiment(self.task, self.agent)
   self.process = None
   self.endBox = endGame
Example #23
0
def testInteractions():
    from random import randint
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    from pybrain.rl.agents.agent import Agent

    class DummyAgent(Agent):
        total = 4

        def getAction(self):
            res = randint(0, self.total - 1)
            return res

    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)

    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = DummyAgent()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
Example #24
0
 def f(self, x):
     """ An episodic task can be used as an evaluation function of a module that produces actions
     from observations, or as an evaluator of an agent. """
     r = 0.
     for _ in range(self.batchSize):
         if isinstance(x, Module):
             x.reset()
             self.reset()
             while not self.isFinished():
                 self.performAction(x.activate(self.getObservation()))
         elif isinstance(x, Agent):
             EpisodicExperiment(self, x).doEpisodes()
         else:
             raise ValueError(self.__class__.__name__+' cannot evaluate the fitness of '+str(type(x)))
         r += self.getTotalReward()
     return r / float(self.batchSize)
Example #25
0
 def __call__(self, module):
     """ An episodic task can be used as an evaluation function of a module that produces actions 
     from observations, or as an evaluator of an agent. """
     if isinstance(module, Module):
         module.reset()
         self.reset()
         while not self.isFinished():
             self.performAction(module.activate(self.getObservation()))
         return self.getTotalReward()
     elif isinstance(module, Agent):
         EpisodicExperiment(self, module).doEpisodes(self.batchSize)
         return self.getTotalReward() / float(self.batchSize)
     else:
         raise NotImplementedError('Missing implementation for ' +
                                   module.__class__.__name__ +
                                   ' evaluation')
Example #26
0
def main():
    agent = ForwardAgent()
    task = MarioTask(agent.name, initMarioMode=1)
    exp = EpisodicExperiment(task, agent)
    print "Task Ready"
    exp.doEpisodes(3)
    print "mm 1:", task.reward

    task.env.initMarioMode = 2
    exp.doEpisodes(1)
    print "mm 2:", task.reward

    task.env.initMarioMode = 0
    exp.doEpisodes(1)
    print "mm 0:", task.reward

    print "finished"
Example #27
0
# create agent
learner = ENAC()
learner.gd.rprop = True
# only relevant for RP
learner.gd.deltamin = 0.0001
#agent.learner.gd.deltanull = 0.05
# only relevant for BP
learner.gd.alpha = 0.01
learner.gd.momentum = 0.9

agent = LearningAgent(net, learner)
agent.actaspg = False

# create experiment
experiment = EpisodicExperiment(task, agent)

# print weights at beginning
print(agent.module.params)

rewards = []
if useGraphics:
    figure()
    ion()
    pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1])
    pl.setLineStyle(linewidth=2)

# queued version
# experiment._fillQueue(30)
# while True:
#     experiment._stepQueueLoop()
Example #28
0
def main():
    """ Main program for automatic asset allocation problem.
    """
    # Directories
    input_data_dir = '../../Data/Input/'
    output_data_dir = '../../Data/Output/'

    # Experiment parameters
    batch = 1                      # Number of samples per learning step
    prnts = 100                    # Learning steps before printing results
    nEpisodes = 100/batch/prnts   # Number of rollouts
    nExperiments = 1               # Number of experiments
    et = ExTools(batch, prnts)     # Tool for printing and plotting

    # Paramenters
    X = 0.0 / 252    # Daily risk-free rate
    deltaP = 0.00    # Proportional transaction costs
    deltaF = 0.0      # Fixed transaction costs
    deltaS = 0.00    # Short-selling borrowing costs
    P = 5             # Number of past days the agent considers
    discount = 0.95   # Discount factor

    # Evaluation interval sizes
    start = P + 1
    trainingIntervalLength = 70
    testIntervalLength = 30

    # Initialize the market environment
    market = MarketEnvironment(input_data_dir + 'daily_returns.csv', X, P)
    nSamples = len(market.data)
    nPeriods = (nSamples - start + 1) / (trainingIntervalLength + testIntervalLength)

    # Initialize the asset allocation tasks
    task = AssetAllocationTask(market, deltaP, deltaF, deltaS, discount)

    # Initialize controller module
    module = buildNetwork(market.outdim,  # Input layer
                          market.indim,   # Output layer
                          outclass=SoftmaxLayer)  # Output activation function

    # Initialize learner module
    learner = PGPE(storeAllEvaluations=True,
                   learningRate=0.01,
                   sigmaLearningRate=0.01,
                   batchSize=batch,
                   # momentum=0.05,
                   # epsilon=6.0,
                   rprop=False)

    # Initialize learning agent
    agent = OptimizationAgent(module, learner)
    et.agent = agent

    for period in xrange(5):  #  nPeriods):

        # Set initial and final time steps for training
        initialTimeStep = start
        finalTimeStep = start + trainingIntervalLength
        task.setEvaluationInterval(initialTimeStep, finalTimeStep)
        task.trainingMode()

        # Initialize experiment
        experiment = EpisodicExperiment(task, agent)

        # Train the agent
        for episode in xrange(nEpisodes):
            for i in xrange(prnts):
                experiment.doEpisodes(batch)
            et.printResults((agent.learner._allEvaluations)[-50:-1],
                            1, episode)

        # Set initial and final time steps for training
        initialTimeStep = start + trainingIntervalLength
        finalTimeStep = initialTimeStep + testIntervalLength
        task.setEvaluationInterval(initialTimeStep, finalTimeStep)
        task.backtestMode()

        # Initialize experiment
        experiment = EpisodicExperiment(task, agent)

        # Test the agent
        experiment.doEpisodes(batch)

        # Slide evaluation window
        start += testIntervalLength

    # Print allocations
    task.report.iloc[:, :-1].plot.area(title='Portfolio Allocation - PGPE')
    plt.ylim(0.0, 1.0)
    plt.xlabel('Date')
    plt.ylabel('Portfolio Allocation')
    plt.show()

    # Print cumulative log-returns
    buyHold = market.data.ix[task.report.index, 'SPY']
    buyHoldCumLogReturns = np.log(buyHold + 1.0).cumsum(axis=0)
    ptfCumLogReturns = task.report['ptfLogReturn'].cumsum(axis=0)
    cumLogReturns = pd.DataFrame(index=task.report.index)
    cumLogReturns['Buy & Hold'] = buyHoldCumLogReturns
    cumLogReturns['PGPE'] = ptfCumLogReturns
    cumLogReturns.plot(title='Cumulative Log-Returns - PGPE',
                       lw=2, grid=True)
    plt.xlabel('Date')
    plt.ylabel('Cumulative Log-Returns')
    plt.show()
Example #29
0
 def __init__(self, task, agent):
     '''
     Constructor
     '''
     EpisodicExperiment.__init__(self, task, agent)
Example #30
0
# create agent
learner = ENAC()
learner.gd.rprop = True
# only relevant for RP
learner.gd.deltamin = 0.0001
#agent.learner.gd.deltanull = 0.05
# only relevant for BP
learner.gd.alpha = 0.01
learner.gd.momentum = 0.9

agent = LearningAgent(net, learner)
agent.actaspg = False

# create experiment
experiment = EpisodicExperiment(task, agent)

# print weights at beginning
print(agent.module.params)

rewards = []
if useGraphics:
    figure()
    ion()
    pl = MultilinePlotter(autoscale=1.2, xlim=[0, 50], ylim=[0, 1])
    pl.setLineStyle(linewidth=2)

# queued version
# experiment._fillQueue(30)
# while True:
#     experiment._stepQueueLoop()
Example #31
0
from scipy import *
import sys, time

from pybrain.rl.learners.valuebased import ActionValueNetwork
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q, SARSA, NFQ
from pybrain.rl.experiments.episodic import EpisodicExperiment
from pybrain.rl.environments import Task
from tasktest import TestTask
from envtest import TestEnv

env = TestEnv()
task = TestTask(env)

controller = ActionValueNetwork(200, 3)
learner = NFQ()
agent = LearningAgent(controller, learner)

experiment = EpisodicExperiment(task, agent)

i = 0
while True:
    experiment.doEpisodes(10)
    print "Learning"
    agent.learn()
    agent.reset()
    i += 1
    print "Cycle: %d" % i
    if i > 60:
        agent.learning = False
Example #32
0
import sys, time

from pybrain.rl.learners.valuebased import ActionValueNetwork
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q, SARSA, NFQ
from pybrain.rl.experiments.episodic import EpisodicExperiment
from pybrain.rl.environments import Task
from tasktest import TestTask
from envtest import TestEnv

env = TestEnv()
task = TestTask(env)

controller = ActionValueNetwork(200, 3)
learner = NFQ()
agent = LearningAgent(controller, learner)

experiment = EpisodicExperiment(task, agent)

i = 0
while True:
    experiment.doEpisodes(10)
    print "Learning"
    agent.learn()
    agent.reset()
    i += 1
    print "Cycle: %d" %i
    if i > 60:
        agent.learning = False

Example #33
0
import timeit

environment 	= 	EuphoriaGame()

controller 		= 	euActionValueNetwork(582,113)

learner 		= 	NFQ()

agent 			= 	EuphoriaLearningAgent(controller,learner)

agentOp 		= 	EuphoriaRandomPlayer(environment)

task 			= 	EuphoriaTask(agentOp)

experiment 		= 	EpisodicExperiment(task, agent)

i = 0
reward = []
while i<1:
	tic=timeit.default_timer()

	r = experiment.doEpisodes(3)
	for ri in r:
		reward.append(ri[-1])

	with open('rewardList_'+str(i)+'.csv', 'wb') as f:
	    writer = csv.writer(f)
	    writer.writerows([reward])

	# print reward
# coding=utf-8
from pybrain.optimization.hillclimber import HillClimber
from pybrain.rl.agents.optimization import OptimizationAgent

from pybrain.rl.environments.cartpole.balancetask import BalanceTask
from pybrain.rl.experiments.episodic import EpisodicExperiment
from pybrain.tools.shortcuts import buildNetwork


task = BalanceTask()

net = buildNetwork(task.outdim, 3, task.indim)

agent = OptimizationAgent(net, HillClimber())
exp = EpisodicExperiment(task, agent)
exp.doEpisodes(100)

print(exp)