Python Experiment.Experiment Examples, pybrain.rl.experiments.Experiment.Experiment Python Examples

Example #1

0

Show file

File: mcarcontinuous.py Project: anetczuk/pybraingym

def createExperimentInstance():
    gymRawEnv = gym.make('MountainCarContinuous-v0')

    cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16)
    cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 4)
    actionDedigitizer = Digitizer.build(-1.0, 1.0, 5, True)

    #     print("Cart position bins:", cartPositionGroup)
    #     print("Cart velocity bins:", cartVelocityGroup)
    #     print("Cart force bins:", actionDedigitizer.bins, actionDedigitizer.possibleValues())

    observationDigitizer = ArrayDigitizer(
        [cartPositionGroup, cartVelocityGroup])
    transformation = EnvTransformation(observationDigitizer, actionDedigitizer)

    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation(transformation)
    # env.setCumulativeRewardMode()

    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    ## alpha -- learning rate (preference of new information)
    ## gamma -- discount factor (importance of future reward)

    # create value table and initialize with ones
    table = ActionValueTable(observationDigitizer.states,
                             actionDedigitizer.states)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)

    experiment = Experiment(task, agent)
    experiment = ProcessExperiment(experiment, doSingleExperiment)
    return experiment

Example #2

0

Show file

File: mcardiscrete_parallel.py Project: anetczuk/pybraingym

def createExperimentInstance():
    gymRawEnv = gym.make('MountainCar-v0')

    cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16)
    cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 16)

    #     print("Cart position bins:", cartPositionGroup)
    #     print("Cart velocity bins:", cartVelocityGroup)

    observationDigitizer = ArrayDigitizer(
        [cartPositionGroup, cartVelocityGroup])
    transformation = EnvTransformation(observationDigitizer)

    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation(transformation)
    # env.setCumulativeRewardMode()

    # create value table and initialize with ones
    table = ActionValueTable(observationDigitizer.states, env.numActions)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)

    experiment = Experiment(task, agent)
    experiment = ProcessExperiment(experiment, ExperimentIteration())
    return experiment

Example #3

0

Show file

def setup_RL():
    # create the maze with walls (1)
    envmatrix = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
                          [1, 0, 0, 1, 0, 0, 0, 0, 1],
                          [1, 0, 0, 1, 0, 0, 1, 0, 1],
                          [1, 0, 0, 1, 0, 0, 1, 0, 1],
                          [1, 0, 0, 1, 0, 1, 1, 0, 1],
                          [1, 0, 0, 0, 0, 0, 1, 0, 1],
                          [1, 1, 1, 1, 1, 1, 1, 0, 1],
                          [1, 0, 0, 0, 0, 0, 0, 0, 1],
                          [1, 1, 1, 1, 1, 1, 1, 1, 1]])
    env = Maze(envmatrix, (7, 7))
    # create task
    task = MDPMazeTask(env)
    # create value table and initialize with ones
    table = ActionValueTable(81, 4)
    table.initialize(0.)
    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    # learner = Q()
    learner = SARSA()
    # create agent
    agent = LearningAgent(table, learner)
    # create experiment
    experiment = Experiment(task, agent)
    return experiment, agent, table

Example #4

0

Show file

File: td.py Project: terry2012/portfolio

def initExperiment(alg, optimistic=True):
    env = Maze(envmatrix, (7, 7))

    # create task
    task = MDPMazeTask(env)

    # create value table and initialize with ones
    table = ActionValueTable(81, 4)
    if optimistic:
        table.initialize(1.)
    else:
        table.initialize(0.)

    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    learner = alg()

    # standard exploration is e-greedy, but a different type can be chosen as well
    # learner.explorer = BoltzmannExplorer()

    agent = LearningAgent(table, learner)
    agent.batchMode = False

    experiment = Experiment(task, agent)
    experiment.allRewards = []
    return experiment

Example #5

0

Show file

 def __init__(self, mode):
     self.mode = mode
     cu.mem('Reinforcement Learning Started')
     self.environment = RegionFilteringEnvironment(
         config.get(mode + 'Database'), mode)
     self.controller = QNetwork()
     cu.mem('QNetwork controller created')
     self.learner = None
     self.agent = RegionFilteringAgent(self.controller, self.learner)
     self.task = RegionFilteringTask(self.environment,
                                     config.get(mode + 'GroundTruth'))
     self.experiment = Experiment(self.task, self.agent)

Example #6

0

Show file

File: rl.py Project: weiyuchen/RLHPot

    def __init__(self):
	self.av_table = ActionValueTable(2, 3)
	self.av_table.initialize(0.1)

	learner = SARSA()
	learner._setExplorer(EpsilonGreedyExplorer(0.0))
	self.agent = LearningAgent(self.av_table, learner)

	env = HASSHEnv()

	task = HASSHTask(env)

	self.experiment = Experiment(task, self.agent)

Example #7

0

Show file

 def learn(self, number_of_iterations):
     learner = Q(0.2, 0.8)
     task = CartMovingTask(self.environment)
     self.controller = ActionValueTable(
         reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)),
         self.force_granularity)
     self.controller.initialize(1.)
     agent = LearningAgent(self.controller, learner)
     experiment = Experiment(task, agent)
     for i in range(number_of_iterations):
         experiment.doInteractions(1)
         agent.learn()
         agent.reset()
     with open("test.pcl", "w+") as f:
         pickle.dump(self.controller, f)

Example #8

0

Show file

def initExperiment(learnalg='Q',
                   history=None,
                   binEdges='10s',
                   scriptfile='./rlRunExperiment_v2.pl',
                   resetscript='./rlResetExperiment.pl'):

    if binEdges == '10s':
        centerBinEdges = centerBinEdges_10s
    elif binEdges == '30s':
        centerBinEdges = centerBinEdges_30s
    elif binEdges == 'lessperturbed':
        centerBinEdges = centerBinEdges_10s_lessperturbed
    elif binEdges is None:
        centerBinEdges = None
    else:
        raise Exception("No bins for given binEdges setting")

    env = OmnetEnvironment(centerBinEdges, scriptfile, resetscript)
    if history is not None:
        env.data = history['data']

    task = OmnetTask(env, centerBinEdges)
    if history is not None:
        task.allrewards = history['rewards']

    if learnalg == 'Q':
        nstates = env.numSensorBins**env.numSensors
        if history is None:
            av_table = ActionValueTable(nstates, env.numActions)
            av_table.initialize(1.)
        else:
            av_table = history['av_table']
        learner = Q(0.1, 0.9)  # alpha, gamma
        learner._setExplorer(EpsilonGreedyExplorer(0.05))  # epsilon
    elif learnalg == 'NFQ':
        av_table = ActionValueNetwork(env.numSensors, env.numActions)
        learner = NFQ()
    else:
        raise Exception("learnalg unknown")

    agent = LearningAgent(av_table, learner)

    experiment = Experiment(task, agent)
    if history is None:
        experiment.nruns = 0
    else:
        experiment.nruns = history['nruns']
    return experiment

Example #9

0

Show file

File: example.py Project: nvaller/pug-ann

    def maze():
        # import sys, time
        pylab.gray()
        pylab.ion()
        # The goal appears to be in the upper right
        structure = [
            '!!!!!!!!!!',
            '! !  ! ! !',
            '! !! ! ! !',
            '!    !   !',
            '! !!!!!! !',
            '! ! !    !',
            '! ! !!!! !',
            '!        !',
            '! !!!!!  !',
            '!   !    !',
            '!!!!!!!!!!',
            ]
        structure = np.array([[ord(c)-ord(' ') for c in row] for row in structure])
        shape = np.array(structure.shape)
        environment = Maze(structure, tuple(shape - 2))
        controller = ActionValueTable(shape.prod(), 4)
        controller.initialize(1.)
        learner = Q()
        agent = LearningAgent(controller, learner)
        task = MDPMazeTask(environment)
        experiment = Experiment(task, agent)

        for i in range(100):
            experiment.doInteractions(100)
            agent.learn()
            agent.reset()
            # 4 actions, 81 locations/states (9x9 grid)
            # max(1) gives/plots the biggest objective function value for that square
            pylab.pcolor(controller.params.reshape(81, 4).max(1).reshape(9, 9))
            pylab.draw()

        # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
        greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
        greedy_policy = np.flipud(np.array(list('NESW'))[greedy_policy].reshape(shape))
        maze = np.flipud(np.array(list(' #'))[structure])
        print('Maze map:')
        print('\n'.join(''.join(row) for row in maze))
        print('Greedy policy:')
        print('\n'.join(''.join(row) for row in greedy_policy))

Example #10

0

Show file

def main():
    rospy.init_node("lauron_reinforcement_learning")
    environment = RLEnvironment()
    dim_state = environment.joint_states.shape[0]
    num_actions = len(environment.actions)
    controller = ActionValueNetwork(dim_state, num_actions)
    learner = SARSA()
    agent = LearningAgent(controller, learner)
    task = RLTask(environment)
    experiment = Experiment(task, agent)

    episode_counter = 0
    while True:
        print("Training episode {}".format(episode_counter))
        experiment.doInteractions(NUM_INTERACTIONS)
        agent.learn()
        agent.reset()
        episode_counter += 1

Example #11

0

Show file

File: test_maze.py Project: ishatserka/MachineLearningAndDataAnalysisCoursera

def test_maze():
    # simplified version of the reinforcement learning tutorial example
    structure = [
        list('!!!!!!!!!!'),
        list('! !  ! ! !'),
        list('! !! ! ! !'),
        list('!    !   !'),
        list('! !!!!!! !'),
        list('! ! !    !'),
        list('! ! !!!! !'),
        list('!        !'),
        list('! !!!!!  !'),
        list('!   !    !'),
        list('!!!!!!!!!!'),
    ]
    structure = np.array([[ord(c) - ord(' ') for c in row]
                          for row in structure])
    shape = np.array(structure.shape)
    environment = Maze(structure, tuple(shape - 2))
    controller = ActionValueTable(shape.prod(), 4)
    controller.initialize(1.)
    learner = Q()
    agent = LearningAgent(controller, learner)
    task = MDPMazeTask(environment)
    experiment = Experiment(task, agent)

    for i in range(30):
        experiment.doInteractions(30)
        agent.learn()
        agent.reset()

    controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
    # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
    greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
    greedy_policy = np.flipud(
        np.array(list('NESW'))[greedy_policy].reshape(shape))
    maze = np.flipud(np.array(list(' #'))[structure])
    print('Maze map:')
    print('\n'.join(''.join(row) for row in maze))
    print('Greedy policy:')
    print('\n'.join(''.join(row) for row in greedy_policy))
    assert '\n'.join(
        ''.join(row)
        for row in greedy_policy) == 'NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN'

Example #12

0

Show file

def createExperimentInstance():
    gymRawEnv = gym.make('Taxi-v2')
    
    transformation = EnvTransformation()
     
    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation( transformation )
    ## env.setCumulativeRewardMode()
     
    ## create value table and initialize with ones
    table = ActionValueTable(env.numStates, env.numActions)
#     table = ActionValueTableWrapper(table)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)
     
    experiment = Experiment(task, agent)
    experiment = ProcessExperiment( experiment, experimentIteration )
    return experiment

Example #13

0

Show file

File: main.py Project: nbeguier/Q_Blackjack

def run():
    """
    number of states is:
    current value: 0-20

    number of actions:
    Stand=0, Hit=1 """

    # define action value table
    av_table = ActionValueTable(MAX_VAL, MIN_VAL)
    av_table.initialize(0.)

    # define Q-learning agent
    q_learner = Q(Q_ALPHA, Q_GAMMA)
    q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
    agent = LearningAgent(av_table, q_learner)

    # define the environment
    env = BlackjackEnv()

    # define the task
    task = BlackjackTask(env, verbosity=VERBOSE)

    # finally, define experiment
    experiment = Experiment(task, agent)

    # ready to go, start the process
    for _ in range(NB_ITERATION):
        experiment.doInteractions(1)
        if task.lastreward != 0:
            if VERBOSE:
                print "Agent learn"
            agent.learn()

    print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
    print '|:-------:|:-------|:-----|:-----|'
    for i in range(MAX_VAL):
        print '| %s | %s | %s | %s |' % (
            (i + 1),
            av_table.getActionValues(i)[0], av_table.getActionValues(i)[1],
            av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1])

Example #14

0

Show file

def createExperimentInstance():
    gymRawEnv = gym.make('FrozenLake-v0')

    transformation = EnvTransformation()

    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation(transformation)
    ## env.setCumulativeRewardMode()

    # create value table and initialize with ones
    table = ActionValueTable(gymRawEnv.observation_space.n,
                             gymRawEnv.action_space.n)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)

    experiment = Experiment(task, agent)
    iterator = ExperimentIteration()
    quality = QualityFunctor()
    experiment = ProcessExperiment(experiment, iterator, quality)
    return experiment

Example #15

0

Show file

File: playyourcardsright.py Project: billthefighter/HydrousGlyphFinder

    def __init__(self, text_to_speech, speech_to_text):
        Feature.__init__(self)

        # setup AV Table
        self.av_table = GameTable(13, 2)
        if (self.av_table.loadParameters() == False):
            self.av_table.initialize(0.)

        # setup a Q-Learning agent
        learner = Q(0.5, 0.0)
        learner._setExplorer(EpsilonGreedyExplorer(0.0))
        self.agent = LearningAgent(self.av_table, learner)

        # setup game interaction
        self.game_interaction = GameInteraction(text_to_speech, speech_to_text)

        # setup environment
        environment = GameEnvironment(self.game_interaction)

        # setup task
        task = GameTask(environment, self.game_interaction)

        # setup experiment
        self.experiment = Experiment(task, self.agent)

Example #16

0

Show file

    # Initialize Reinforcement Learning
    learner = Q(0.5, 0.0)
    learner._setExplorer(EpsilonGreedyExplorer(0.0))
    agent = LearningAgent(controller, learner)

    # Setup the PyBrain and PyGame Environments
    environment = Environment()
    game = RunPacman(environment)

    # Create the Task for the Pac-Man Agent to Accomplish and initialize the first Action
    task = PacmanTask(environment, game)
    task.performAction(np.array([1]))

    # The Experiment is the PyBrain link between the task to be completed and the agent completing it
    experiment = Experiment(task, agent)
    currentGame = 1

    # Continue to loop program until the 'X' on the GUI is clicked
    while True:

        # Allow the agent to interaction with the environment (Move in a direction) then learn from it.
        experiment.doInteractions(1)
        agent.learn()

        # Check if current pacman game ended and needs to start a new one
        if game.wonGame == 1 or game.wonGame == -1:
            currentGame += 1

            # Store the information the agent has learned in long term memory,
            # Clear the short term memory to reduce any chance of overfitting,

Example #17

0

Show file

File: example1.py Project: zbn123/Practical-Reinforcement-Learning

import sys, time
from scipy import *

from pybrain.rl.environments import Task
from pybrain.rl.learners.valuebased import ActionValueTable
from pybrain.rl.environments.mazes import Maze, MDPMazeTask
from pybrain.rl.experiments import Experiment
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q, SARSA

var_structure_arr_ = array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
                            [1, 0, 0, 1, 0, 0, 0, 0, 1],
                            [1, 0, 0, 1, 0, 0, 1, 0, 1],
                            [1, 0, 0, 1, 0, 0, 1, 0, 1],
                            [1, 0, 0, 1, 0, 1, 1, 0, 1],
                            [1, 0, 0, 0, 0, 0, 1, 0, 1],
                            [1, 1, 1, 1, 1, 1, 1, 0, 1],
                            [1, 0, 0, 0, 0, 0, 0, 0, 1],
                            [1, 1, 1, 1, 1, 1, 1, 1, 1]])

var_controller_ = ActionValueTable(81, 4)
var_controller_.initialize(1.0)

var_learner_ = Q()
var_Agent_ = LearningAgent(var_controller_, var_learner_)

var_task_ = MDPMazeTask(Task)

experiment = Experiment(var_task_, var_Agent_)

Example #18

0

Show file

import pickle
import time

# Create environment
sub_env = Environment(20, 20)
world = World(sub_env)

# Brain for the animat, we have already trained the data
f = open('neuro.net', 'r')
trained_net = pickle.load(f)
brain = BrainController(trained_net)

# Learning method we use
#learner = PolicyGradientLearner()
learner = ENAC()
learner._setLearningRate(0.2)
# Create an animat
animat = StupidAnimat(trained_net, learner, sub_env)

# Establish a task
task = InteractTask(world, animat)

brain.validate_net()
experiment = Experiment(task, animat)
while True:
    experiment.doInteractions(10000)
    animat.learn()
    animat.reset()
    brain.validate_net()
    time.sleep(3)

Example #19

0

Show file

File: NUMPY.py Project: rbobkoskie3/OS

def Py_Brain():
    ############################
    # pybrain
    ############################
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    from matplotlib.colors import ListedColormap
    import itertools
    from scipy import linalg

    from pybrain.rl.environments.mazes import Maze, MDPMazeTask
    from pybrain.rl.learners.valuebased import ActionValueTable
    from pybrain.rl.agents import LearningAgent
    from pybrain.rl.learners import Q, SARSA
    from pybrain.rl.experiments import Experiment
    from pybrain.rl.environments import Task

    import pylab
    #pylab.gray()
    #pylab.ion()

    '''
    structure = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
                          [1, 0, 0, 1, 0, 0, 0, 0, 1],
                          [1, 0, 0, 1, 0, 0, 1, 0, 1],
                          [1, 0, 0, 1, 0, 0, 1, 0, 1],
                          [1, 0, 0, 1, 0, 1, 1, 0, 1],
                          [1, 0, 0, 0, 0, 0, 1, 0, 1],
                          [1, 1, 1, 1, 1, 1, 1, 0, 1],
                          [1, 0, 0, 0, 0, 0, 0, 0, 1],
                          [1, 1, 1, 1, 1, 1, 1, 1, 1]])
    '''
    structure = np.array([[1, 1, 1, 1, 1],
                          [1, 1, 0, 0, 1],
                          [1, 1, 0, 1, 1],
                          [1, 0, 0, 1, 1],
                          [1, 1, 1, 1, 1]])

    num_states = int(structure.shape[0]*structure.shape[1])
    SQRT = int(math.sqrt(num_states))
    #print structure.item((1, 3))
    #environment = Maze(structure, (7, 7)) #second parameter is goal field tuple
    environment = Maze(structure, (1, 3)) #second parameter is goal field tuple
    print type(environment)
    print environment
    # Standard maze environment comes with the following 4 actions:
    # North, South, East, West
    controller = ActionValueTable(num_states, 4) #[N, S, E, W] 
    controller.initialize(1)

    learner = Q()
    agent = LearningAgent(controller, learner)
    np.not_equal(agent.lastobs, None)
    task = MDPMazeTask(environment)
    experiment = Experiment(task, agent)

    #while True:
    for x in range(4):
        print x
        experiment.doInteractions(10)
        agent.learn()
        agent.reset()

        pylab.pcolor(controller.params.reshape(num_states,4).max(1).reshape(SQRT,SQRT))
        pylab.draw()
        #pylab.show()
        name='MAZE'
        plt.savefig(str(name)+'_PLOT.png')
    plt.close()