def testAugmented():
    from vgdl.core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.mdpmap import MDPconverter
    from vgdl.agents import PolicyDrivenAgent
    
    
    zelda_level2 = """
wwwwwwwwwwwww
wA wwk1ww   w
ww  ww    1 w
ww     wwww+w
wwwww1ww  www
wwwww  0  Gww
wwwwwwwwwwwww
"""

    
    from examples.gridphysics.mazes.rigidzelda import rigidzelda_game
    g = VGDLParser().parseGame(rigidzelda_game)
    g.buildLevel(zelda_level2)
    env = GameEnvironment(g, visualize=False,
                          recordingEnabled=True, actionDelay=150)
    C = MDPconverter(g, env=env, verbose=True)
    Ts, R, _ = C.convert()
    print C.states
    print Ts[0]
    print R
    env.reset()
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    task = GameTask(env)    
    exper = EpisodicExperiment(task, agent)
    exper.doEpisodes(1)
Exemple #2
0
def testAugmented():
    from core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from mdpmap import MDPconverter
    from agents import PolicyDrivenAgent

    zelda_level2 = """
wwwwwwwwwwwww
wA wwk1ww   w
ww  ww    1 w
ww     wwww+w
wwwww1ww  www
wwwww  0  Gww
wwwwwwwwwwwww
"""

    from examples.gridphysics.mazes.rigidzelda import rigidzelda_game
    g = VGDLParser().parseGame(rigidzelda_game)
    g.buildLevel(zelda_level2)
    env = GameEnvironment(g,
                          visualize=False,
                          recordingEnabled=True,
                          actionDelay=150)
    C = MDPconverter(g, env=env, verbose=True)
    Ts, R, _ = C.convert()
    print C.states
    print Ts[0]
    print R
    env.reset()
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    task = GameTask(env)
    exper = EpisodicExperiment(task, agent)
    exper.doEpisodes(1)
def main():
    agent = SimpleMLPMarioAgent(
        10,
        inGridSize=3,
    )
    print agent.name
    NetworkWriter.writeToFile(agent.module,
                              "../temp/MarioNetwork-" + agent.name + ".xml")

    task = MarioTask(agent.name, timeLimit=200)
    exp = EpisodicExperiment(task, agent)
    res = 0
    cumul = 0
    for seed in [0]:
        for difficulty in [0, 3, 5, 10]:
            task.env.levelSeed = seed
            task.env.levelDifficulty = difficulty

            exp.doEpisodes(1)
            print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % (
                difficulty, seed, task.reward)
            cumul += task.reward
            if task.reward < 4000:
                break
            res += 1
    print res
    print agent.module.inputbuffer * 1.
class QAlgorithm:
  def Pause(self):#if menu says pause pause exicution 
    while self.state == 1:
      time.sleep(.05)
    return True

  def Quit(self):#if menu says quit stop running
    self.process.terminate()
    return False

  def Start(self):#starts the Bot
    if self.process == None:
      self.runBot()
      #self.process = multiprocessing.Process(target=self.runBot, args= [])
      #self.process.start() 
    return True

  def CheckState(self):#checks to see what state the menu says to be in 
    if self.state == 0 :
      self.Start()
    elif self.state == 1:
      self.Pause()
    elif self.state == 2:
      self.Quit()

  def GameOver(self):#checks to see if state requires bot pause, quit or if the game is over
    return self.CheckState() or self.sr.checkEndGame(self.endBox,self.gameOver)

  def __init__(self,rewardBox,box,gameOver,endGame,scoreArea):
    self.reward = rewardBox
    self.bbox = box
    self.environment = TEnviroment(box)#Custom environment class
    if os.path.isfile("bot.txt"):
      self.controller  = pickle.load(open("bot.txt","rb")) 
    else:
      self.controller = ActionValueNetwork(50**2,4)#Arguments (framerate*maxPlaytime, Number of acitons)
    self.learner = Q()
    gf = {0:self.GameOver}
    self.agent = LearningAgent(self.controller, self.learner)
    self.task = TTask(self.environment,scoreArea,gf)#needs custom task
    self.experiment = EpisodicExperiment(self.task, self.agent)
    self.process = None
    self.endBox = endGame

  def runBot(self):#runes the bot for a single Episode
      self.experiment.doEpisodes()
      self.agent.learn()
      self.agent.reset()
      file = open("bot.txt","wb+")
      pickle.dump(self.controller,file)
Exemple #5
0
 def playSubjectiveGame(game_str, map_str):
     from pybrain.rl.experiments.episodic import EpisodicExperiment
     from interfaces import GameTask
     from subjective import SubjectiveGame
     from agents import InteractiveAgent, UserTiredException
     g = VGDLParser().parseGame(game_str)
     g.buildLevel(map_str)
     senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True)
     task = GameTask(senv)
     iagent = InteractiveAgent()
     exper = EpisodicExperiment(task, iagent)
     try:
         exper.doEpisodes(1)
     except UserTiredException:
         pass
Exemple #6
0
 def playSubjectiveGame(game_str, map_str):
     from pybrain.rl.experiments.episodic import EpisodicExperiment
     from vgdl.interfaces import GameTask
     from vgdl.subjective import SubjectiveGame
     from vgdl.agents import InteractiveAgent, UserTiredException
     g = VGDLParser().parseGame(game_str)
     g.buildLevel(map_str)    
     senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True)
     task = GameTask(senv)    
     iagent = InteractiveAgent()
     exper = EpisodicExperiment(task, iagent)
     try:
         exper.doEpisodes(1)
     except UserTiredException:
         pass
def combinedScore(agent, task = None):
    """ Let the agent act on a number of levels of increasing difficulty. 
    Return the combined score."""
    if task == None:
        task = MarioTask(agentName = agent.name)
    exp = EpisodicExperiment(task, agent)
    res = 0
    for difficulty in range(1):
        for seed in range(1):
            task.env.levelSeed = seed
            task.env.levelDifficulty = difficulty  
            exp.doEpisodes(1)
            print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % (difficulty, seed, task.reward)
            res += task.reward
    return res
Exemple #8
0
def testRecordingToGif(human=False):
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from agents import PolicyDrivenAgent, InteractiveAgent
    from tools import makeGifVideo

    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    env = GameEnvironment(g,
                          visualize=human,
                          recordingEnabled=True,
                          actionDelay=200)
    task = GameTask(env)
    if human:
        agent = InteractiveAgent()
    else:
        agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(1)
    print res

    actions = [a for _, a, _ in env._allEvents]
    print actions
    makeGifVideo(env, actions, initstate=env._initstate)
def combinedScore(agent, task=None):
    """ Let the agent act on a number of levels of increasing difficulty. 
    Return the combined score."""
    if task == None:
        task = MarioTask(agentName=agent.name)
    exp = EpisodicExperiment(task, agent)
    res = 0
    for difficulty in range(1):
        for seed in range(1):
            task.env.levelSeed = seed
            task.env.levelDifficulty = difficulty
            exp.doEpisodes(1)
            print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % (
                difficulty, seed, task.reward)
            res += task.reward
    return res
Exemple #10
0
 def mlDriver(cv, stateTransfer, actionTransfer):
     #parameter setup
     #dimensionality of state argument (could be less than stateTransfer)
     stateDim = 352
     #Number of moves possible
     numMoves = 361
     env = SettleEnv(cv, stateTransfer, actionTransfer)
     task = SettleTask(env)
     controller = RestrictedActionValueNetwork(stateDim, numMoves, env)
     learner = NFQ()
     learner.explorer = EpsilonHackedExplorer(env)
     agent = LearningAgent(controller, learner)
     experiment = EpisodicExperiment(task, agent)
     while True:
         experiment.doEpisodes(10)
         print "Done with experiments"
         agent.learn()
         print "Learned"
         agent.reset()
         print "Cycled"
def test3():
    from examples.gridphysics.mazes import polarmaze_game
    from examples.gridphysics.mazes.simple import maze_level_1b
    from vgdl.core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.interfaces import GameTask
    from vgdl.agents import InteractiveAgent, UserTiredException
    game_str, map_str = polarmaze_game, maze_level_1b
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)    
    senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True)
    #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True)
    task = GameTask(senv)    
    iagent = InteractiveAgent()
    exper = EpisodicExperiment(task, iagent)
    try:
        exper.doEpisodes(1)
    except UserTiredException:
        pass
    print senv._allEvents
Exemple #12
0
 def mlDriver(cv, stateTransfer, actionTransfer):
     #parameter setup
     #dimensionality of state argument (could be less than stateTransfer)
     stateDim = 352
     #Number of moves possible
     numMoves = 361
     env = SettleEnv(cv, stateTransfer, actionTransfer)
     task = SettleTask(env)
     controller = RestrictedActionValueNetwork(stateDim, numMoves, env)
     learner = NFQ()
     learner.explorer = EpsilonHackedExplorer(env)
     agent = LearningAgent(controller, learner)
     experiment = EpisodicExperiment(task, agent)
     while True:
         experiment.doEpisodes(10)
         print "Done with experiments"
         agent.learn()
         print "Learned"
         agent.reset()
         print "Cycled"
Exemple #13
0
def test3():
    from examples.gridphysics.mazes import polarmaze_game
    from examples.gridphysics.mazes.simple import maze_level_1b
    from core import VGDLParser
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from interfaces import GameTask
    from agents import InteractiveAgent, UserTiredException
    game_str, map_str = polarmaze_game, maze_level_1b
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    senv = SubjectiveGame(g, actionDelay=100, recordingEnabled=True)
    #senv = GameEnvironment(g, actionDelay=100, recordingEnabled=True, visualize=True)
    task = GameTask(senv)
    iagent = InteractiveAgent()
    exper = EpisodicExperiment(task, iagent)
    try:
        exper.doEpisodes(1)
    except UserTiredException:
        pass
    print senv._allEvents
Exemple #14
0
def test4():
    """ Same thing, but animated. """
    from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.interfaces import GameEnvironment, GameTask
    from vgdl.agents import PolicyDrivenAgent 
    g = VGDLParser().parseGame(windy_stoch_game)
    g.buildLevel(windy_level)
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(5)
    print res
def main():
    agent = SimpleMLPMarioAgent(10, inGridSize = 3,
                                )
    print agent.name
    NetworkWriter.writeToFile(agent.module, "../temp/MarioNetwork-"+agent.name+".xml")
    
    task = MarioTask(agent.name, timeLimit = 200)
    exp = EpisodicExperiment(task, agent)
    res = 0
    cumul = 0
    for seed in [0]:
        for difficulty in [0,3,5,10]:
            task.env.levelSeed = seed
            task.env.levelDifficulty = difficulty  
            
            exp.doEpisodes(1)
            print 'Difficulty: %d, Seed: %d, Fitness: %.2f' % (difficulty, seed, task.reward)
            cumul += task.reward
            if task.reward < 4000:
                break
            res += 1
    print res
    print agent.module.inputbuffer*1.
Exemple #16
0
def test4():
    """ Same thing, but animated. """
    from examples.gridphysics.mazes.windy import windy_stoch_game, windy_level
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.interfaces import GameEnvironment, GameTask
    from vgdl.agents import PolicyDrivenAgent
    g = VGDLParser().parseGame(windy_stoch_game)
    g.buildLevel(windy_level)
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(5)
    print res
def testPolicyAgent():
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from vgdl.agents import PolicyDrivenAgent
    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    
    env = GameEnvironment(g, visualize=False, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
Exemple #18
0
def testPolicyAgent():
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from agents import PolicyDrivenAgent
    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)

    env = GameEnvironment(g, visualize=False, actionDelay=100)
    task = GameTask(env)
    agent = PolicyDrivenAgent.buildOptimal(env)
    env.visualize = True
    env.reset()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
def main():
    agent = ForwardAgent()
    task = MarioTask(agent.name, initMarioMode=1)
    exp = EpisodicExperiment(task, agent)
    print 'Task Ready'
    exp.doEpisodes(3)
    print 'mm 1:', task.reward

    task.env.initMarioMode = 2
    exp.doEpisodes(1)
    print 'mm 2:', task.reward

    task.env.initMarioMode = 0
    exp.doEpisodes(1)
    print 'mm 0:', task.reward

    print "finished"
Exemple #20
0
def main():
    agent = ForwardAgent()
    task = MarioTask(agent.name, initMarioMode=1)
    exp = EpisodicExperiment(task, agent)
    print "Task Ready"
    exp.doEpisodes(3)
    print "mm 1:", task.reward

    task.env.initMarioMode = 2
    exp.doEpisodes(1)
    print "mm 2:", task.reward

    task.env.initMarioMode = 0
    exp.doEpisodes(1)
    print "mm 0:", task.reward

    print "finished"
def testInteractions():
    from random import randint
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1    
    from pybrain.rl.agents.agent import Agent
    
    class DummyAgent(Agent):
        total = 4
        def getAction(self):
            res = randint(0, self.total - 1)
            return res    
        
    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    
    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = DummyAgent()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
def testRecordingToGif(human=False):
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from vgdl.core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_2
    from vgdl.agents import PolicyDrivenAgent, InteractiveAgent
    from vgdl.tools import makeGifVideo
    
    game_str, map_str = polarmaze_game, maze_level_2
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)
    env = GameEnvironment(g, visualize=human, recordingEnabled=True, actionDelay=200)
    task = GameTask(env)
    if human:
        agent = InteractiveAgent()
    else:
        agent = PolicyDrivenAgent.buildOptimal(env)
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(1)
    print res
    
    actions = [a for _, a, _ in env._allEvents]
    print actions
    makeGifVideo(env, actions, initstate=env._initstate)
Exemple #23
0
def testInteractions():
    from random import randint
    from pybrain.rl.experiments.episodic import EpisodicExperiment
    from core import VGDLParser
    from examples.gridphysics.mazes import polarmaze_game, maze_level_1
    from pybrain.rl.agents.agent import Agent

    class DummyAgent(Agent):
        total = 4

        def getAction(self):
            res = randint(0, self.total - 1)
            return res

    game_str, map_str = polarmaze_game, maze_level_1
    g = VGDLParser().parseGame(game_str)
    g.buildLevel(map_str)

    env = GameEnvironment(g, visualize=True, actionDelay=100)
    task = GameTask(env)
    agent = DummyAgent()
    exper = EpisodicExperiment(task, agent)
    res = exper.doEpisodes(2)
    print res
# coding=utf-8
from pybrain.optimization.hillclimber import HillClimber
from pybrain.rl.agents.optimization import OptimizationAgent

from pybrain.rl.environments.cartpole.balancetask import BalanceTask
from pybrain.rl.experiments.episodic import EpisodicExperiment
from pybrain.tools.shortcuts import buildNetwork


task = BalanceTask()

net = buildNetwork(task.outdim, 3, task.indim)

agent = OptimizationAgent(net, HillClimber())
exp = EpisodicExperiment(task, agent)
exp.doEpisodes(100)

print(exp)
Exemple #25
0
def main():
    """ Main program for automatic asset allocation problem.
    """
    # Directories
    input_data_dir = '../../Data/Input/'
    output_data_dir = '../../Data/Output/'

    # Experiment parameters
    batch = 1                      # Number of samples per learning step
    prnts = 100                    # Learning steps before printing results
    nEpisodes = 100/batch/prnts   # Number of rollouts
    nExperiments = 1               # Number of experiments
    et = ExTools(batch, prnts)     # Tool for printing and plotting

    # Paramenters
    X = 0.0 / 252    # Daily risk-free rate
    deltaP = 0.00    # Proportional transaction costs
    deltaF = 0.0      # Fixed transaction costs
    deltaS = 0.00    # Short-selling borrowing costs
    P = 5             # Number of past days the agent considers
    discount = 0.95   # Discount factor

    # Evaluation interval sizes
    start = P + 1
    trainingIntervalLength = 70
    testIntervalLength = 30

    # Initialize the market environment
    market = MarketEnvironment(input_data_dir + 'daily_returns.csv', X, P)
    nSamples = len(market.data)
    nPeriods = (nSamples - start + 1) / (trainingIntervalLength + testIntervalLength)

    # Initialize the asset allocation tasks
    task = AssetAllocationTask(market, deltaP, deltaF, deltaS, discount)

    # Initialize controller module
    module = buildNetwork(market.outdim,  # Input layer
                          market.indim,   # Output layer
                          outclass=SoftmaxLayer)  # Output activation function

    # Initialize learner module
    learner = PGPE(storeAllEvaluations=True,
                   learningRate=0.01,
                   sigmaLearningRate=0.01,
                   batchSize=batch,
                   # momentum=0.05,
                   # epsilon=6.0,
                   rprop=False)

    # Initialize learning agent
    agent = OptimizationAgent(module, learner)
    et.agent = agent

    for period in xrange(5):  #  nPeriods):

        # Set initial and final time steps for training
        initialTimeStep = start
        finalTimeStep = start + trainingIntervalLength
        task.setEvaluationInterval(initialTimeStep, finalTimeStep)
        task.trainingMode()

        # Initialize experiment
        experiment = EpisodicExperiment(task, agent)

        # Train the agent
        for episode in xrange(nEpisodes):
            for i in xrange(prnts):
                experiment.doEpisodes(batch)
            et.printResults((agent.learner._allEvaluations)[-50:-1],
                            1, episode)

        # Set initial and final time steps for training
        initialTimeStep = start + trainingIntervalLength
        finalTimeStep = initialTimeStep + testIntervalLength
        task.setEvaluationInterval(initialTimeStep, finalTimeStep)
        task.backtestMode()

        # Initialize experiment
        experiment = EpisodicExperiment(task, agent)

        # Test the agent
        experiment.doEpisodes(batch)

        # Slide evaluation window
        start += testIntervalLength

    # Print allocations
    task.report.iloc[:, :-1].plot.area(title='Portfolio Allocation - PGPE')
    plt.ylim(0.0, 1.0)
    plt.xlabel('Date')
    plt.ylabel('Portfolio Allocation')
    plt.show()

    # Print cumulative log-returns
    buyHold = market.data.ix[task.report.index, 'SPY']
    buyHoldCumLogReturns = np.log(buyHold + 1.0).cumsum(axis=0)
    ptfCumLogReturns = task.report['ptfLogReturn'].cumsum(axis=0)
    cumLogReturns = pd.DataFrame(index=task.report.index)
    cumLogReturns['Buy & Hold'] = buyHoldCumLogReturns
    cumLogReturns['PGPE'] = ptfCumLogReturns
    cumLogReturns.plot(title='Cumulative Log-Returns - PGPE',
                       lw=2, grid=True)
    plt.xlabel('Date')
    plt.ylabel('Cumulative Log-Returns')
    plt.show()
Exemple #26
0
# queued version
# experiment._fillQueue(30)
# while True:
#     experiment._stepQueueLoop()
#     # rewards.append(mean(agent.history.getSumOverSequences('reward')))
#     print agent.module.getParameters(),
#     print mean(agent.history.getSumOverSequences('reward'))
#     clf()
#     plot(rewards)

# episodic version
x = 0
batch = 30  #number of samples per gradient estimate (was: 20; more here due to stochastic setting)
while x < 5000:
    #while True:
    experiment.doEpisodes(batch)
    x += batch
    reward = mean(
        agent.history.getSumOverSequences('reward')) * task.rewardscale
    if useGraphics:
        pl.addData(0, x, reward)
    print(agent.module.params)
    print(reward)
    #if reward > 3:
    #    pass
    agent.learn()
    agent.reset()
    if useGraphics:
        pl.update()

if len(sys.argv) > 2:
Exemple #27
0
import sys, time

from pybrain.rl.learners.valuebased import ActionValueNetwork
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q, SARSA, NFQ
from pybrain.rl.experiments.episodic import EpisodicExperiment
from pybrain.rl.environments import Task
from tasktest import TestTask
from envtest import TestEnv

env = TestEnv()
task = TestTask(env)

controller = ActionValueNetwork(200, 3)
learner = NFQ()
agent = LearningAgent(controller, learner)

experiment = EpisodicExperiment(task, agent)

i = 0
while True:
    experiment.doEpisodes(10)
    print "Learning"
    agent.learn()
    agent.reset()
    i += 1
    print "Cycle: %d" %i
    if i > 60:
        agent.learning = False

Exemple #28
0
from scipy import *
import sys, time

from pybrain.rl.learners.valuebased import ActionValueNetwork
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q, SARSA, NFQ
from pybrain.rl.experiments.episodic import EpisodicExperiment
from pybrain.rl.environments import Task
from tasktest import TestTask
from envtest import TestEnv

env = TestEnv()
task = TestTask(env)

controller = ActionValueNetwork(200, 3)
learner = NFQ()
agent = LearningAgent(controller, learner)

experiment = EpisodicExperiment(task, agent)

i = 0
while True:
    experiment.doEpisodes(10)
    print "Learning"
    agent.learn()
    agent.reset()
    i += 1
    print "Cycle: %d" % i
    if i > 60:
        agent.learning = False
Exemple #29
0
learner 		= 	NFQ()

agent 			= 	EuphoriaLearningAgent(controller,learner)

agentOp 		= 	EuphoriaRandomPlayer(environment)

task 			= 	EuphoriaTask(agentOp)

experiment 		= 	EpisodicExperiment(task, agent)

i = 0
reward = []
while i<1:
	tic=timeit.default_timer()

	r = experiment.doEpisodes(3)
	for ri in r:
		reward.append(ri[-1])

	with open('rewardList_'+str(i)+'.csv', 'wb') as f:
	    writer = csv.writer(f)
	    writer.writerows([reward])

	# print reward
	# agent.learn()
	# agent.reset()

	toc=timeit.default_timer()
	print toc - tic #elapsed time in seconds

	i+=1
Exemple #30
0
# queued version
# experiment._fillQueue(30)
# while True:
#     experiment._stepQueueLoop()
#     # rewards.append(mean(agent.history.getSumOverSequences('reward')))
#     print agent.module.getParameters(),
#     print mean(agent.history.getSumOverSequences('reward'))
#     clf()
#     plot(rewards)

# episodic version
x = 0
batch = 30 #number of samples per gradient estimate (was: 20; more here due to stochastic setting)
while x<5000:
#while True:
    experiment.doEpisodes(batch)
    x += batch
    reward = mean(agent.history.getSumOverSequences('reward'))*task.rewardscale
    if useGraphics:
        pl.addData(0,x,reward)
    print(agent.module.params)
    print(reward)
    #if reward > 3:
    #    pass
    agent.learn()
    agent.reset()
    if useGraphics:
        pl.update()


if len(sys.argv) > 2: