Python ActionValueTableの例、pybrain.rl.learners.valuebased.ActionValueTable Pythonの例

コード例 #1

0

ファイルを表示

ファイル: td.py プロジェクト: bgrant/portfolio

def initExperiment(alg, optimistic=True):
    env = Maze(envmatrix, (7, 7))

    # create task
    task = MDPMazeTask(env)

    # create value table and initialize with ones
    table = ActionValueTable(81, 4)
    if optimistic:
        table.initialize(1.)
    else:
        table.initialize(0.)

    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    learner = alg()

    # standard exploration is e-greedy, but a different type can be chosen as well
    # learner.explorer = BoltzmannExplorer()

    agent = LearningAgent(table, learner)
    agent.batchMode = False

    experiment = Experiment(task, agent)
    experiment.allRewards = []
    return experiment

コード例 #2

0

ファイルを表示

ファイル: optimizationtest.py プロジェクト: gabrielhuang/pybrain

def testMaze():
    # simplified version of the reinforcement learning tutorial example
    structure = np.array([[1, 1, 1, 1, 1],
                          [1, 0, 0, 0, 1],
                          [1, 0, 1, 0, 1],
                          [1, 0, 1, 0, 1],
                          [1, 1, 1, 1, 1]])
    shape = np.array(structure.shape)
    environment = Maze(structure,  tuple(shape - 2))
    controller = ActionValueTable(shape.prod(), 4)
    controller.initialize(1.)
    learner = Q()
    agent = LearningAgent(controller, learner)
    task = MDPMazeTask(environment)
    experiment = Experiment(task, agent)

    for i in range(3):
        experiment.doInteractions(40)

    controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
    # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order 
    greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4),1)
    greedy_policy = np.flipud(np.array(list('NESW'))[greedy_policy].reshape(shape))
    maze = np.flipud(np.array(list(' #'))[structure])
    print('Maze map:')
    print('\n'.join(''.join(row) for row in maze))
    print('Greedy policy:')
    print('\n'.join(''.join(row) for row in greedy_policy))
    assert '\n'.join(''.join(row) for row in greedy_policy) == 'NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN'

コード例 #3

0

ファイルを表示

ファイル: SpadesPlayer.py プロジェクト: justinproxmire/Spades

class SpadesPlayer:

	def __init__(self,game_deck, game_env):
		self.gameDeck = game_deck
		self.hand = SpadesDeckTest.SpadesDeckTest.draw_hand(self.gameDeck)
		self.gamesWon = 0
		self.gamesTied = 0
		self.av_table = ActionValueTable(4, 1)
		self.av_table.initialize(0.0)
		self.env = game_env
		self.task = SpadesTask.SpadesTask(game_env)
		self.agent = None
		self.learner = None


	def get_value(self):
		return self.hand

	def play_card(self, cardindex):
		print cardindex
		retCard = copy.copy(self.hand[cardindex])
		self.hand.remove(self.hand[cardindex])
		return retCard
	
	def get_new_hand(self):
		self.hand = SpadesDeckTest.SpadesDeckTest.draw_hand(self.gameDeck)

コード例 #4

0

ファイルを表示

ファイル: bot.py プロジェクト: tsvvladimir95/simple_bot

def run_bbox(verbose=False):
    n_features = n_actions = max_time = -1

    if bbox.is_level_loaded():
        bbox.reset_level()
    else:
        bbox.load_level("../levels/train_level.data", verbose=1)
        n_features = bbox.get_num_of_features()
        n_actions = bbox.get_num_of_actions()
        max_time = bbox.get_max_time()

    av_table = ActionValueTable(n_features, n_actions)
    av_table.initialize(0.2)
    print av_table._params
    learner = Q(0.5, 0.1)
    learner._setExplorer(EpsilonGreedyExplorer(0.4))
    agent = LearningAgent(av_table, learner)
    environment = GameEnvironment()
    task = GameTask(environment)
    experiment = Experiment(task, agent)

    while environment.finish_flag:
        experiment.doInteractions(1)
        agent.learn()
 
    bbox.finish(verbose=1)

コード例 #5

0

ファイルを表示

ファイル: pybrain_rl_simple2.py プロジェクト: kokukuma/reinforcement_learning_2048

def q_learning_table():
    controller = ActionValueTable(36, 4)
    learner = Q()
    controller.initialize(1.)

    agent = LearningAgent(controller, learner)

    score_list = []
    turn_list = []
    # neural側のトレーニング分 +100
    for i in range(600):
        print_state(agent.module.getValue, 'table')

        score, turn = play(agent, 'table')
        score_list.append(score)
        turn_list.append(turn)

        agent.learn()
        agent.reset()

        print i, int(numpy.mean(score_list)), max(score_list), score, turn

        with open('./agent.dump', 'w') as f:
            pickle.dump(agent, f)
        with open('./score.dump', 'w') as f:
            pickle.dump([score_list, turn_list], f)

コード例 #6

0

ファイルを表示

ファイル: mcarcontinuous.py プロジェクト: anetczuk/pybraingym

def createExperimentInstance():
    gymRawEnv = gym.make('MountainCarContinuous-v0')

    cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16)
    cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 4)
    actionDedigitizer = Digitizer.build(-1.0, 1.0, 5, True)

    #     print("Cart position bins:", cartPositionGroup)
    #     print("Cart velocity bins:", cartVelocityGroup)
    #     print("Cart force bins:", actionDedigitizer.bins, actionDedigitizer.possibleValues())

    observationDigitizer = ArrayDigitizer(
        [cartPositionGroup, cartVelocityGroup])
    transformation = EnvTransformation(observationDigitizer, actionDedigitizer)

    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation(transformation)
    # env.setCumulativeRewardMode()

    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    ## alpha -- learning rate (preference of new information)
    ## gamma -- discount factor (importance of future reward)

    # create value table and initialize with ones
    table = ActionValueTable(observationDigitizer.states,
                             actionDedigitizer.states)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)

    experiment = Experiment(task, agent)
    experiment = ProcessExperiment(experiment, doSingleExperiment)
    return experiment

コード例 #7

0

ファイルを表示

ファイル: mcardiscrete_parallel.py プロジェクト: anetczuk/pybraingym

def createExperimentInstance():
    gymRawEnv = gym.make('MountainCar-v0')

    cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16)
    cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 16)

    #     print("Cart position bins:", cartPositionGroup)
    #     print("Cart velocity bins:", cartVelocityGroup)

    observationDigitizer = ArrayDigitizer(
        [cartPositionGroup, cartVelocityGroup])
    transformation = EnvTransformation(observationDigitizer)

    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation(transformation)
    # env.setCumulativeRewardMode()

    # create value table and initialize with ones
    table = ActionValueTable(observationDigitizer.states, env.numActions)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)

    experiment = Experiment(task, agent)
    experiment = ProcessExperiment(experiment, ExperimentIteration())
    return experiment

コード例 #8

0

ファイルを表示

def setup_RL():
    # create the maze with walls (1)
    envmatrix = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
                          [1, 0, 0, 1, 0, 0, 0, 0, 1],
                          [1, 0, 0, 1, 0, 0, 1, 0, 1],
                          [1, 0, 0, 1, 0, 0, 1, 0, 1],
                          [1, 0, 0, 1, 0, 1, 1, 0, 1],
                          [1, 0, 0, 0, 0, 0, 1, 0, 1],
                          [1, 1, 1, 1, 1, 1, 1, 0, 1],
                          [1, 0, 0, 0, 0, 0, 0, 0, 1],
                          [1, 1, 1, 1, 1, 1, 1, 1, 1]])
    env = Maze(envmatrix, (7, 7))
    # create task
    task = MDPMazeTask(env)
    # create value table and initialize with ones
    table = ActionValueTable(81, 4)
    table.initialize(0.)
    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    # learner = Q()
    learner = SARSA()
    # create agent
    agent = LearningAgent(table, learner)
    # create experiment
    experiment = Experiment(task, agent)
    return experiment, agent, table

コード例 #9

0

ファイルを表示

ファイル: pybrain_rl_simple2.py プロジェクト: kokukuma/reinforcement_learning_2048

def q_learning_table():
    controller = ActionValueTable(36, 4)
    learner = Q()
    controller.initialize(1.)

    agent = LearningAgent(controller, learner)

    score_list = []
    turn_list  = []
    # neural側のトレーニング分 +100
    for i in range(600):
        print_state(agent.module.getValue, 'table')

        score, turn = play(agent, 'table')
        score_list.append(score)
        turn_list.append(turn)

        agent.learn()
        agent.reset()

        print i, int(numpy.mean(score_list)) , max(score_list), score, turn

        with open('./agent.dump', 'w') as f:
            pickle.dump(agent, f)
        with open('./score.dump', 'w') as f:
            pickle.dump([score_list, turn_list], f)

コード例 #10

0

ファイルを表示

    def load_AV_Table(self):

        load_D = loadData(self._filename)
        if load_D[1] == True:
            self._av_table = load_D[0]
            print "Found Table!"

        else:
            self._av_table = ActionValueTable(self._number_of_states, self._actions)
            self._av_table.initialize(0.0)
            print "No training for this format. Creating new AV table"

コード例 #11

0

ファイルを表示

ファイル: rl.py プロジェクト: weiyuchen/RLHPot

    def __init__(self):
	self.av_table = ActionValueTable(2, 3)
	self.av_table.initialize(0.1)

	learner = SARSA()
	learner._setExplorer(EpsilonGreedyExplorer(0.0))
	self.agent = LearningAgent(self.av_table, learner)

	env = HASSHEnv()

	task = HASSHTask(env)

	self.experiment = Experiment(task, self.agent)

コード例 #12

0

ファイルを表示

ファイル: abm.py プロジェクト: phelps-sg/py-abm

class IntelligentAgent(Agent, LearningAgent):
    """An agent that learns through a value-based RL algorithm"""
  
    def __init__(self, name, num_states, num_actions, epsilon=0.3, gamma=0.99, alpha=0.95):
        self.controller = ActionValueTable(num_states, num_actions)
        self.controller.initialize(np.random.rand(num_states * num_actions))
        self.learner = Q(gamma=gamma, alpha=alpha)
        self.learner.batchMode = False
        self.learner.explorer.epsilon = epsilon
        LearningAgent.__init__(self, self.controller, self.learner)
        Agent.__init__(self, name)
   
    def choose_action(self):
        return self.getAction()[0]

コード例 #13

0

ファイルを表示

 def __init__(self,
              name,
              num_states,
              num_actions,
              epsilon=0.3,
              gamma=0.99,
              alpha=0.95):
     self.controller = ActionValueTable(num_states, num_actions)
     self.controller.initialize(np.random.rand(num_states * num_actions))
     self.learner = Q(gamma=gamma, alpha=alpha)
     self.learner.batchMode = False
     self.learner.explorer.epsilon = epsilon
     LearningAgent.__init__(self, self.controller, self.learner)
     Agent.__init__(self, name)

コード例 #14

0

ファイルを表示

def initExperiment(learnalg='Q',
                   history=None,
                   binEdges='10s',
                   scriptfile='./rlRunExperiment_v2.pl',
                   resetscript='./rlResetExperiment.pl'):

    if binEdges == '10s':
        centerBinEdges = centerBinEdges_10s
    elif binEdges == '30s':
        centerBinEdges = centerBinEdges_30s
    elif binEdges == 'lessperturbed':
        centerBinEdges = centerBinEdges_10s_lessperturbed
    elif binEdges is None:
        centerBinEdges = None
    else:
        raise Exception("No bins for given binEdges setting")

    env = OmnetEnvironment(centerBinEdges, scriptfile, resetscript)
    if history is not None:
        env.data = history['data']

    task = OmnetTask(env, centerBinEdges)
    if history is not None:
        task.allrewards = history['rewards']

    if learnalg == 'Q':
        nstates = env.numSensorBins**env.numSensors
        if history is None:
            av_table = ActionValueTable(nstates, env.numActions)
            av_table.initialize(1.)
        else:
            av_table = history['av_table']
        learner = Q(0.1, 0.9)  # alpha, gamma
        learner._setExplorer(EpsilonGreedyExplorer(0.05))  # epsilon
    elif learnalg == 'NFQ':
        av_table = ActionValueNetwork(env.numSensors, env.numActions)
        learner = NFQ()
    else:
        raise Exception("learnalg unknown")

    agent = LearningAgent(av_table, learner)

    experiment = Experiment(task, agent)
    if history is None:
        experiment.nruns = 0
    else:
        experiment.nruns = history['nruns']
    return experiment

コード例 #15

0

ファイルを表示

ファイル: common.py プロジェクト: oosterden/pylon

def get_discrete_task_agent(generators,
                            market,
                            nStates,
                            nOffer,
                            markups,
                            withholds,
                            maxSteps,
                            learner,
                            Pd0=None,
                            Pd_min=0.0):
    """ Returns a tuple of task and agent for the given learner.
    """
    env = pyreto.discrete.MarketEnvironment(generators,
                                            market,
                                            numStates=nStates,
                                            numOffbids=nOffer,
                                            markups=markups,
                                            withholds=withholds,
                                            Pd0=Pd0,
                                            Pd_min=Pd_min)
    task = pyreto.discrete.ProfitTask(env, maxSteps=maxSteps)

    nActions = len(env._allActions)
    module = ActionValueTable(numStates=nStates, numActions=nActions)

    agent = LearningAgent(module, learner)

    return task, agent

コード例 #16

0

ファイルを表示

ファイル: rl_op.py プロジェクト: jasonboyer/dcs

    def __init__(self, event_queue_name, hub_queue_name):
        super().__init__()
        # create environment
        self.conn = boto.sqs.connect_to_region(constants.REGION)
        self.event_queue = self.conn.get_queue(event_queue_name)
        self.event_queue.set_message_class(MHMessage)
        self.env = DogEnv(DogEnv.ALL_QUIET, DogEnv.ALL_QUIET, self.event_queue, hub_queue_name)
        self.env.delay = (self.episodes == 1)

        # create task
        self.task = QuietDogTask(self.env)

        # create value table and initialize with ones
        # TODO: Get number of states from DogEnv
        self.table = ActionValueTable(2*5*4, 5*4)
        self.table.initialize(1.)

        # create agent with controller and learner - use SARSA(), Q() or QLambda() here
        self.learner = SARSA()

        # standard exploration is e-greedy, but a different type can be chosen as well
        self.learner.explorer = BoltzmannExplorer()

        # create agent
        self.agent = DogAgent(self.table, self.learner)

        # create experiment
        self.experiment = Experiment(self.task, self.agent)

コード例 #17

0

ファイルを表示

ファイル: abm.py プロジェクト: phelps-sg/py-abm

 def __init__(self, name, num_states, num_actions, epsilon=0.3, gamma=0.99, alpha=0.95):
     self.controller = ActionValueTable(num_states, num_actions)
     self.controller.initialize(np.random.rand(num_states * num_actions))
     self.learner = Q(gamma=gamma, alpha=alpha)
     self.learner.batchMode = False
     self.learner.explorer.epsilon = epsilon
     LearningAgent.__init__(self, self.controller, self.learner)
     Agent.__init__(self, name)

コード例 #18

0

ファイルを表示

ファイル: manet_learner.py プロジェクト: bgrant/portfolio

def initExperiment(learnalg='Q', history=None, binEdges='10s',
        scriptfile='./rlRunExperiment_v2.pl',
        resetscript='./rlResetExperiment.pl'):

    if binEdges == '10s':
        centerBinEdges = centerBinEdges_10s
    elif binEdges == '30s':
        centerBinEdges = centerBinEdges_30s
    elif binEdges == 'lessperturbed':
        centerBinEdges = centerBinEdges_10s_lessperturbed
    elif binEdges is None:
        centerBinEdges = None
    else:
        raise Exception("No bins for given binEdges setting")

    env = OmnetEnvironment(centerBinEdges, scriptfile, resetscript)
    if history is not None:
        env.data = history['data']

    task = OmnetTask(env, centerBinEdges)
    if history is not None:
        task.allrewards = history['rewards']

    if learnalg == 'Q':
        nstates = env.numSensorBins ** env.numSensors
        if history is None:
            av_table = ActionValueTable(nstates, env.numActions)
            av_table.initialize(1.)
        else:
            av_table = history['av_table']
        learner = Q(0.1, 0.9) # alpha, gamma
        learner._setExplorer(EpsilonGreedyExplorer(0.05)) # epsilon
    elif learnalg == 'NFQ':
        av_table = ActionValueNetwork(env.numSensors, env.numActions)
        learner = NFQ()
    else:
        raise Exception("learnalg unknown")

    agent = LearningAgent(av_table, learner)

    experiment = Experiment(task, agent)
    if history is None:
        experiment.nruns = 0
    else:
        experiment.nruns = history['nruns']
    return experiment

コード例 #19

0

ファイルを表示

 def __init__(self, name, clientID, sensorHandle, bodyHandle):
     '''
     Constructor
     '''
     self.resetParameters()
     controller = ActionValueTable(150, 5)   # pyBrain
     controller.initialize(1.)               # pyBrain
     learner = Q()                           # pyBrain
     self.__mind=AgentMind(controller, learner)  # with pyBrain
     self.__controller=controller
     self.__name=name
     self.__clientID=clientID          # Client ID of the Dummy object
     self.__sensorHandle=sensorHandle  # Proximity sensor handle of the V-Rep agent
     self.__bodyHandle=bodyHandle      # BubbleRob body handle
     self.__mind.setInput("name", name)
     self.__pybrainEnvironment = LocomotionEnvironment()
     self.__pybrainTask = LocomotionTask(self.__pybrainEnvironment)

コード例 #20

0

ファイルを表示

ファイル: example.py プロジェクト: nvaller/pug-ann

    def maze():
        # import sys, time
        pylab.gray()
        pylab.ion()
        # The goal appears to be in the upper right
        structure = [
            "!!!!!!!!!!",
            "! !  ! ! !",
            "! !! ! ! !",
            "!    !   !",
            "! !!!!!! !",
            "! ! !    !",
            "! ! !!!! !",
            "!        !",
            "! !!!!!  !",
            "!   !    !",
            "!!!!!!!!!!",
        ]
        structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
        shape = np.array(structure.shape)
        environment = Maze(structure, tuple(shape - 2))
        controller = ActionValueTable(shape.prod(), 4)
        controller.initialize(1.0)
        learner = Q()
        agent = LearningAgent(controller, learner)
        task = MDPMazeTask(environment)
        experiment = Experiment(task, agent)

        for i in range(100):
            experiment.doInteractions(100)
            agent.learn()
            agent.reset()
            # 4 actions, 81 locations/states (9x9 grid)
            # max(1) gives/plots the biggest objective function value for that square
            pylab.pcolor(controller.params.reshape(81, 4).max(1).reshape(9, 9))
            pylab.draw()

        # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
        greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
        greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
        maze = np.flipud(np.array(list(" #"))[structure])
        print("Maze map:")
        print("\n".join("".join(row) for row in maze))
        print("Greedy policy:")
        print("\n".join("".join(row) for row in greedy_policy))

コード例 #21

0

ファイルを表示

ファイル: example.py プロジェクト: nvaller/pug-ann

    def maze():
        # import sys, time
        pylab.gray()
        pylab.ion()
        # The goal appears to be in the upper right
        structure = [
            '!!!!!!!!!!',
            '! !  ! ! !',
            '! !! ! ! !',
            '!    !   !',
            '! !!!!!! !',
            '! ! !    !',
            '! ! !!!! !',
            '!        !',
            '! !!!!!  !',
            '!   !    !',
            '!!!!!!!!!!',
            ]
        structure = np.array([[ord(c)-ord(' ') for c in row] for row in structure])
        shape = np.array(structure.shape)
        environment = Maze(structure, tuple(shape - 2))
        controller = ActionValueTable(shape.prod(), 4)
        controller.initialize(1.)
        learner = Q()
        agent = LearningAgent(controller, learner)
        task = MDPMazeTask(environment)
        experiment = Experiment(task, agent)

        for i in range(100):
            experiment.doInteractions(100)
            agent.learn()
            agent.reset()
            # 4 actions, 81 locations/states (9x9 grid)
            # max(1) gives/plots the biggest objective function value for that square
            pylab.pcolor(controller.params.reshape(81, 4).max(1).reshape(9, 9))
            pylab.draw()

        # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
        greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
        greedy_policy = np.flipud(np.array(list('NESW'))[greedy_policy].reshape(shape))
        maze = np.flipud(np.array(list(' #'))[structure])
        print('Maze map:')
        print('\n'.join(''.join(row) for row in maze))
        print('Greedy policy:')
        print('\n'.join(''.join(row) for row in greedy_policy))

コード例 #22

0

ファイルを表示

ファイル: test_maze.py プロジェクト: ishatserka/MachineLearningAndDataAnalysisCoursera

def test_maze():
    # simplified version of the reinforcement learning tutorial example
    structure = [
        list('!!!!!!!!!!'),
        list('! !  ! ! !'),
        list('! !! ! ! !'),
        list('!    !   !'),
        list('! !!!!!! !'),
        list('! ! !    !'),
        list('! ! !!!! !'),
        list('!        !'),
        list('! !!!!!  !'),
        list('!   !    !'),
        list('!!!!!!!!!!'),
    ]
    structure = np.array([[ord(c) - ord(' ') for c in row]
                          for row in structure])
    shape = np.array(structure.shape)
    environment = Maze(structure, tuple(shape - 2))
    controller = ActionValueTable(shape.prod(), 4)
    controller.initialize(1.)
    learner = Q()
    agent = LearningAgent(controller, learner)
    task = MDPMazeTask(environment)
    experiment = Experiment(task, agent)

    for i in range(30):
        experiment.doInteractions(30)
        agent.learn()
        agent.reset()

    controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
    # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
    greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
    greedy_policy = np.flipud(
        np.array(list('NESW'))[greedy_policy].reshape(shape))
    maze = np.flipud(np.array(list(' #'))[structure])
    print('Maze map:')
    print('\n'.join(''.join(row) for row in maze))
    print('Greedy policy:')
    print('\n'.join(''.join(row) for row in greedy_policy))
    assert '\n'.join(
        ''.join(row)
        for row in greedy_policy) == 'NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN'

コード例 #23

0

ファイルを表示

class IntelligentAgent(Agent, LearningAgent):
    """An agent that learns through a value-based RL algorithm"""
    def __init__(self,
                 name,
                 num_states,
                 num_actions,
                 epsilon=0.3,
                 gamma=0.99,
                 alpha=0.95):
        self.controller = ActionValueTable(num_states, num_actions)
        self.controller.initialize(np.random.rand(num_states * num_actions))
        self.learner = Q(gamma=gamma, alpha=alpha)
        self.learner.batchMode = False
        self.learner.explorer.epsilon = epsilon
        LearningAgent.__init__(self, self.controller, self.learner)
        Agent.__init__(self, name)

    def choose_action(self):
        return self.getAction()[0]

コード例 #24

0

ファイルを表示

ファイル: actionValueTable.py プロジェクト: jaegs/AI_Practicum

    def initialize(self, grid):
        """
            initializes all the (s,a) pairs with the no-traffic travel time
        """
        ActionValueTable.initialize(self, float("-inf")) #not every action is possible from every state
        for node, time in grid.all_shortest_path_lengths():
            in_edges = grid.grid.in_edges([node])
            for edge in in_edges:
                for period in xrange(const.PERIODS):
                    s = task.get_state(g.node_number(edge[0]), period) #state involves node previous to current node
                    a = g.action(edge)
                    q = - time - grid.grid.get_edge_data(*edge)["weight"]
                    self.updateValue(s, a, q)

        #Q(s_final, a) for all actions is 0
        for p in xrange(const.PERIODS):
            s = task.get_state(const.NODES - 1, p)
            for a in xrange(const.POSSIBLE_ACTIONS):
                self.updateValue(s, a, 0)

コード例 #25

0

ファイルを表示

ファイル: SpadesPlayer.py プロジェクト: justinproxmire/Spades

	def __init__(self,game_deck, game_env):
		self.gameDeck = game_deck
		self.hand = SpadesDeckTest.SpadesDeckTest.draw_hand(self.gameDeck)
		self.gamesWon = 0
		self.gamesTied = 0
		self.av_table = ActionValueTable(4, 1)
		self.av_table.initialize(0.0)
		self.env = game_env
		self.task = SpadesTask.SpadesTask(game_env)
		self.agent = None
		self.learner = None

コード例 #26

0

ファイルを表示

ファイル: rl.py プロジェクト: savamarius/rassh

class RL:
    def __init__(self):
	self.av_table = ActionValueTable(4, 5)
	self.av_table.initialize(0.1)

	learner = SARSA()
	learner._setExplorer(EpsilonGreedyExplorer(0.0))
	self.agent = LearningAgent(self.av_table, learner)

	env = HASSHEnv()

	task = HASSHTask(env)

	self.experiment = Experiment(task, self.agent)

    def go(self):
      global rl_params
      rassh.core.constants.rl_params = self.av_table.params.reshape(4,5)[0]
      self.experiment.doInteractions(1)
      self.agent.learn()

コード例 #27

0

ファイルを表示

def createExperimentInstance():
    gymRawEnv = gym.make('Taxi-v2')
    
    transformation = EnvTransformation()
     
    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation( transformation )
    ## env.setCumulativeRewardMode()
     
    ## create value table and initialize with ones
    table = ActionValueTable(env.numStates, env.numActions)
#     table = ActionValueTableWrapper(table)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)
     
    experiment = Experiment(task, agent)
    experiment = ProcessExperiment( experiment, experimentIteration )
    return experiment

コード例 #28

0

ファイルを表示

ファイル: rl.py プロジェクト: weiyuchen/RLHPot

class RL:
    def __init__(self):
	self.av_table = ActionValueTable(2, 3)
	self.av_table.initialize(0.1)

	learner = SARSA()
	learner._setExplorer(EpsilonGreedyExplorer(0.0))
	self.agent = LearningAgent(self.av_table, learner)

	env = HASSHEnv()

	task = HASSHTask(env)

	self.experiment = Experiment(task, self.agent)

    def go(self):
      global rl_params
      kippo.core.constants.rl_params = self.av_table.params.reshape(2,3)[0]
      self.experiment.doInteractions(1)
      self.agent.learn()

コード例 #29

0

ファイルを表示

ファイル: experiment_test.py プロジェクト: rwl/pyreto

    def testValueBased(self):
        """ Test value-based learner.
        """
        mkt = SmartMarket(self.case)
        exp = MarketExperiment([], [], mkt)
        for g in self.case.generators:
            env = DiscreteMarketEnvironment([g], mkt)
            dim_state, num_actions = (10, 10)
            exp.tasks.append(ProfitTask(env, dim_state, num_actions))
            module = ActionValueTable(dim_state, num_actions)
            module.initialize(1.0)
#            module = ActionValueNetwork(dimState=1, numActions=4)
            learner = SARSA() #Q() QLambda()
#            learner.explorer = BoltzmannExplorer() # default is e-greedy.
            exp.agents.append(LearningAgent(module, learner))
        for _ in range(1000):
            exp.doInteractions(24) # interact with the env in batch mode
            for agent in exp.agents:
                agent.learn()
                agent.reset()

コード例 #30

0

ファイルを表示

ファイル: experiment_test.py プロジェクト: ronahi/pyreto

 def testValueBased(self):
     """ Test value-based learner.
     """
     mkt = SmartMarket(self.case)
     exp = MarketExperiment([], [], mkt)
     for g in self.case.generators:
         env = DiscreteMarketEnvironment([g], mkt)
         dim_state, num_actions = (10, 10)
         exp.tasks.append(ProfitTask(env, dim_state, num_actions))
         module = ActionValueTable(dim_state, num_actions)
         module.initialize(1.0)
         #            module = ActionValueNetwork(dimState=1, numActions=4)
         learner = SARSA()  #Q() QLambda()
         #            learner.explorer = BoltzmannExplorer() # default is e-greedy.
         exp.agents.append(LearningAgent(module, learner))
     for _ in range(1000):
         exp.doInteractions(24)  # interact with the env in batch mode
         for agent in exp.agents:
             agent.learn()
             agent.reset()

コード例 #31

0

ファイルを表示

ファイル: brain.py プロジェクト: eychung/animats

  def __init__(self):
    self.interactionscount = 0

    # Define action-value table
    controller = ActionValueTable(DerivedConstants.NUM_STATES,
                                  DerivedConstants.NUM_ACTIONS)
    controller.initialize(INITIAL_ACTION_VALUE_TABLE_VALUE)

    # Define Q-learning agent
    learner = Q(ALPHA, GAMMA)
    learner._setExplorer(EpsilonGreedyExplorer(EPSILON))
    self.agent = LearningAgent(controller, learner)

    # Define the environment
    self.environment = BeaverEnv()

    # Define the task
    self.task = BeaverTask(self.environment)

    # Finally, define experiment
    self.experiment = Experiment(self.task, self.agent)

コード例 #32

0

ファイルを表示

ファイル: example.py プロジェクト: nvaller/pug-ann

def explore_maze():
    # simplified version of the reinforcement learning tutorial example
    structure = [
        list("!!!!!!!!!!"),
        list("! !  ! ! !"),
        list("! !! ! ! !"),
        list("!    !   !"),
        list("! !!!!!! !"),
        list("! ! !    !"),
        list("! ! !!!! !"),
        list("!        !"),
        list("! !!!!!  !"),
        list("!   !    !"),
        list("!!!!!!!!!!"),
    ]
    structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
    shape = np.array(structure.shape)
    environment = Maze(structure, tuple(shape - 2))
    controller = ActionValueTable(shape.prod(), 4)
    controller.initialize(1.0)
    learner = Q()
    agent = LearningAgent(controller, learner)
    task = MDPMazeTask(environment)
    experiment = Experiment(task, agent)

    for i in range(30):
        experiment.doInteractions(30)
        agent.learn()
        agent.reset()

    controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
    # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
    greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
    greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
    maze = np.flipud(np.array(list(" #"))[structure])
    print("Maze map:")
    print("\n".join("".join(row) for row in maze))
    print("Greedy policy:")
    print("\n".join("".join(row) for row in greedy_policy))
    assert "\n".join("".join(row) for row in greedy_policy) == "NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN"

コード例 #33

0

ファイルを表示

ファイル: td.py プロジェクト: terry2012/portfolio

def initExperiment(alg, optimistic=True):
    env = Maze(envmatrix, (7, 7))

    # create task
    task = MDPMazeTask(env)

    # create value table and initialize with ones
    table = ActionValueTable(81, 4)
    if optimistic:
        table.initialize(1.)
    else:
        table.initialize(0.)

    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    learner = alg()

    # standard exploration is e-greedy, but a different type can be chosen as well
    # learner.explorer = BoltzmannExplorer()

    agent = LearningAgent(table, learner)
    agent.batchMode = False

    experiment = Experiment(task, agent)
    experiment.allRewards = []
    return experiment

コード例 #34

0

ファイルを表示

ファイル: rl.py プロジェクト: savamarius/rassh

    def __init__(self):
	self.av_table = ActionValueTable(4, 5)
	self.av_table.initialize(0.1)

	learner = SARSA()
	learner._setExplorer(EpsilonGreedyExplorer(0.0))
	self.agent = LearningAgent(self.av_table, learner)

	env = HASSHEnv()

	task = HASSHTask(env)

	self.experiment = Experiment(task, self.agent)

コード例 #35

0

ファイルを表示

def createExperimentInstance():
    gymRawEnv = gym.make('FrozenLake-v0')

    transformation = EnvTransformation()

    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation(transformation)
    ## env.setCumulativeRewardMode()

    # create value table and initialize with ones
    table = ActionValueTable(gymRawEnv.observation_space.n,
                             gymRawEnv.action_space.n)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)

    experiment = Experiment(task, agent)
    iterator = ExperimentIteration()
    quality = QualityFunctor()
    experiment = ProcessExperiment(experiment, iterator, quality)
    return experiment

コード例 #36

0

ファイルを表示

ファイル: actionValueTable.py プロジェクト: jaegs/AI_Practicum

    def initialize(self, grid):
        """
            initializes all the (s,a) pairs with the no-traffic travel time
        """
        ActionValueTable.initialize(
            self,
            float("-inf"))  #not every action is possible from every state
        for node, time in grid.all_shortest_path_lengths():
            in_edges = grid.grid.in_edges([node])
            for edge in in_edges:
                for period in xrange(const.PERIODS):
                    s = task.get_state(
                        g.node_number(edge[0]),
                        period)  #state involves node previous to current node
                    a = g.action(edge)
                    q = -time - grid.grid.get_edge_data(*edge)["weight"]
                    self.updateValue(s, a, q)

        #Q(s_final, a) for all actions is 0
        for p in xrange(const.PERIODS):
            s = task.get_state(const.NODES - 1, p)
            for a in xrange(const.POSSIBLE_ACTIONS):
                self.updateValue(s, a, 0)

コード例 #37

0

ファイルを表示

ファイル: blackjack.py プロジェクト: ColdHam/Blackjack-Pybrain

def runMainProg():
  # define action value table
  av_table = ActionValueTable(32, 2)
  av_table.initialize(0.)
  for i in range (0,32):
    print "The AV Value At ",i," is: ", av_table.getActionValues(i)

  # define Q-learning agent
  learner = Q(0.5, 0.0)
  learner._setExplorer(EpsilonGreedyExplorer(0,0))
  agent = LearningAgent(av_table, learner)

  #define a blackjack deck
  theDeck = BlackjackCardDeck()

  #define the environment
  env = BlackjackEnv(theDeck)
  env.createHand()

  #define a Dealer
  dealer = BlackjackDealer(theDeck)

  #define the task
  task = BlackjackTask(env)

  #define the experiment
  experiment = Experiment(task, agent)

  #run the game
  for i in range(0,10000):
    playGame(dealer, task, env, experiment, agent)
  print "Games Agent Won: ", GamesAgentWon
  print "Games Dealer won: ", GamesDealerWon
  print "Games Tied: ", GamesTied
  print "Total Games Played: ", TotalGames
  for i in range (0,32):
    print "The AV Value At ",i," is: ", av_table.getActionValues(i)

コード例 #38

0

ファイルを表示

ファイル: main.py プロジェクト: nbeguier/Q_Blackjack

def run():
    """
    number of states is:
    current value: 0-20

    number of actions:
    Stand=0, Hit=1 """

    # define action value table
    av_table = ActionValueTable(MAX_VAL, MIN_VAL)
    av_table.initialize(0.)

    # define Q-learning agent
    q_learner = Q(Q_ALPHA, Q_GAMMA)
    q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
    agent = LearningAgent(av_table, q_learner)

    # define the environment
    env = BlackjackEnv()

    # define the task
    task = BlackjackTask(env, verbosity=VERBOSE)

    # finally, define experiment
    experiment = Experiment(task, agent)

    # ready to go, start the process
    for _ in range(NB_ITERATION):
        experiment.doInteractions(1)
        if task.lastreward != 0:
            if VERBOSE:
                print "Agent learn"
            agent.learn()

    print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
    print '|:-------:|:-------|:-----|:-----|'
    for i in range(MAX_VAL):
        print '| %s | %s | %s | %s |' % (
            (i + 1),
            av_table.getActionValues(i)[0], av_table.getActionValues(i)[1],
            av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1])

コード例 #39

0

ファイルを表示

def initData(targetPlatform):
    global numActions, numStates, table, trainDataFile, tableFile

    if targetPlatform not in supportedPlatforms:
        sys.stderr.write("------------------------------------------\n")
        sys.stderr.write(
            "ERROR: target platform '%s' not supported by RL in training set\n"
            % (targetPlatform))
        sys.stderr.write("------------------------------------------\n")

    # sys.stderr.write("\n\n%s\n\n" % targetPlatform)

    defaultTarget = "maxj"

    if targetPlatform == "none":
        targetPlatform = defaultTarget

    replaceStr = "_%s.txt" % (targetPlatform)
    trainDataFile = trainDataFile.replace(".txt", replaceStr)
    tableFile = tableFile.replace(".txt", replaceStr)

    readTrainData(trainDataPath + trainDataFile)

    readActionValueTable(trainDataPath + tableFile)

    numActions = transitionTable.shape[1]
    numStates = transitionTable.shape[0]

    # create value table and initialize with ones
    table = ActionValueTable(numStates, numActions)

    # print(actionValueTable)
    for i in range(transitionTable.shape[0]):
        for j in range(transitionTable.shape[1]):
            table._params[i * transitionTable.shape[1] +
                          j] = actionValueTable[i * transitionTable.shape[1] +
                                                j]

コード例 #40

0

ファイルを表示

ファイル: main.py プロジェクト: Petlefeu/Q_Blackjack

def run():
    """
    number of states is:
    current value: 0-20

    number of actions:
    Stand=0, Hit=1 """

    # define action value table
    av_table = ActionValueTable(MAX_VAL, MIN_VAL)
    av_table.initialize(0.)

    # define Q-learning agent
    q_learner = Q(Q_ALPHA, Q_GAMMA)
    q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
    agent = LearningAgent(av_table, q_learner)

    # define the environment
    env = BlackjackEnv()

    # define the task
    task = BlackjackTask(env, verbosity=VERBOSE)

    # finally, define experiment
    experiment = Experiment(task, agent)

    # ready to go, start the process
    for _ in range(NB_ITERATION):
        experiment.doInteractions(1)
        if task.lastreward != 0:
            if VERBOSE:
                print "Agent learn"
            agent.learn()

    print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
    print '|:-------:|:-------|:-----|:-----|'
    for i in range(MAX_VAL):
        print '| %s | %s | %s | %s |' % (
            (i+1),
            av_table.getActionValues(i)[0],
            av_table.getActionValues(i)[1],
            av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1]
        )

コード例 #41

0

ファイルを表示

ファイル: actionValueTable.py プロジェクト: jaegs/AI_Practicum

 def __init__(self):
     ActionValueTable.__init__(self, const.STATES, const.POSSIBLE_ACTIONS)

コード例 #42

0

ファイルを表示

ファイル: actionValueTable.py プロジェクト: jaegs/AI_Practicum

 def __init__(self):
     ActionValueTable.__init__(self, const.STATES, const.POSSIBLE_ACTIONS)

コード例 #43

0

ファイルを表示

from pybrain.rl.experiments import Experiment
from pybrain.rl.explorers import EpsilonGreedyExplorer

from pacmanTask import PacmanTask
from pacmanAgent import PacmanAgent
from runPacman import RunPacman
from ghost import Ghost
from pacmanEnvironment import Environment

###############################################################
# The main function that begins running our Pacman-In-AI game #
###############################################################
if __name__ == "__main__":

    # Initialize our Action-Environment-Reward Table
    controller = ActionValueTable(196, 4)
    controller.initialize(0.)

    # Initialize Reinforcement Learning
    learner = Q(0.5, 0.0)
    learner._setExplorer(EpsilonGreedyExplorer(0.0))
    agent = LearningAgent(controller, learner)

    # Setup the PyBrain and PyGame Environments
    environment = Environment()
    game = RunPacman(environment)

    # Create the Task for the Pac-Man Agent to Accomplish and initialize the first Action
    task = PacmanTask(environment, game)
    task.performAction(np.array([1]))

コード例 #44

0

ファイルを表示

pylab.ion()
pylab.hot()
pylab.show()

with CurrentController(3) as control:
    environment = ControllerEnvironment(control)
    task = MdpRedCubeTask(environment, False)

    control.cubes_x = 2
    control.cubes_y = 3
    control.cubes_size = 4
    task.max_samples = 500

    actions = len(environment.actions)

    actionValueNetwork = ActionValueTable(task.outdim, task.indim)
    actionValueNetwork.stdParams = 0.0001
    actionValueNetwork.randomize()
    # actionValueNetwork = ActionValueNetwork(task.outdim,task.indim)
    # if os.path.isfile("q/q_train.npy"):
    #    actionValueNetwork.param = np.load("q/q_train.npy")
    #else: actionValueNetwork.initialize(0.0001)
    # if os.path.isfile("nfq.xml"): actionValueNetwork.network = NetworkReader.readFrom('nfq.xml')
    pylab.pcolor(actionValueNetwork.params.reshape(32, actions).max(1).reshape(8,4).T)
    pylab.pause(0.01)

    learner = Q()
    agent = LearningAgent(actionValueNetwork, learner)
    experiment = Experiment(task, agent)

    start = time()

コード例 #45

0

ファイルを表示

ファイル: chain.py プロジェクト: vishruthb/bayesianreinforcementlearning

    def getObservation(self):
        return self.env.getSensors()


if __name__ == "__main__":
    # testing the environment and task

    from pybrain.rl.learners.valuebased import ActionValueTable
    from pybrain.rl.learners import Q
    from pybrain.rl.agents import LearningAgent
    from pybrain.rl.experiments import Experiment
    from pybrain.rl.explorers import EpsilonGreedyExplorer

    env = Chain()
    controller = ActionValueTable(env.outdim, env.indim)
    controller.initialize(1.)
    #    controller.initialize(0.)

    #    learner = Q(0.5, 0.8) # alpha 0.5, gamma 0.8
    learner = Q()  # default alpha 0.5, gamma 0.99
    #    learner._setExplorer(EpsilonGreedyExplorer(0.5))
    agent = LearningAgent(controller, learner)

    task = ChainTask(env)
    exp = Experiment(task, agent)

    reward = 0
    xs = []
    ys = []

コード例 #46

0

ファイルを表示

ファイル: rl_test.py プロジェクト: lopiola/cowboys_ai

def run(
        learning_rounds,
        test_rounds,
        player1_learn_file,
        player2_learn_file,
        player1_test_file,
        player2_test_file,
        alpha,
        gamma,
        epsilon,
        logs,
        interactive_test):
    """
    Run a learning process with given parameters, than tests agent's performance
    by playing given amount of test games and returns the percent of won games.
    """

    # define the environment
    env = CowboyEnv(player1_learn_file, player2_learn_file, player1_test_file, player2_test_file)

    # define the task
    task = CowboyTask(env)

    av_table = ActionValueTable(env.outdim, env.indim)
    av_table.initialize(0.)

    # define Q-learning agent
    learner = Q(alpha, gamma)
    learner._setExplorer(EpsilonGreedyExplorer(epsilon))
    agent = LearningAgent(av_table, learner)

    # finally, define experiment
    experiment = Experiment(task, agent)

    def play_one_game(learn):
        """
        Orders the agent to play a single game and learn from it.
        Returns number of rounds played
        """
        # Do interactions until the game finishes
        rounds_played = 0
        while not env.game_finished():
            experiment.doInteractions(1)
            if learn:
                agent.learn()
            agent.reset()
            rounds_played += 1
        env.reset()
        return rounds_played

    env.toggle_logs(False)

    # Learn for given number of rounds
    round_counter = 0
    while round_counter < learning_rounds:
        round_counter += play_one_game(True)
        if logs:
            sys.stdout.write("Learning progress: %d%%   \r" %
                             (round_counter * 100.0 / learning_rounds))
            sys.stdout.flush()

    # Test for given number of rounds
    env.toggle_test(True)
    round_counter = 0
    game_counter = 0
    score = 0
    if interactive_test:
        env.toggle_logs(True)
    while round_counter < test_rounds:
        round_counter += play_one_game(False)
        game_counter += 1
        score += env.agent_score()
        if interactive_test:
            print("Testing progress: %d%%" % (round_counter * 100.0 / learning_rounds))
            raw_input('Score: {0} ->'.format(score))
        elif logs:
            if learning_rounds > 0:
                sys.stdout.write("Testing progress: %d%%   \r" %
                                 (round_counter * 100.0 / learning_rounds))
                sys.stdout.flush()

    if logs:
        sys.stdout.write("                                                  \r")
        sys.stdout.flush()

    return score * 100.0 / game_counter

コード例 #47

0

ファイルを表示

ファイル: animats.py プロジェクト: ericmarcincuddy/cs263c

def table_print(table, nstates):
  print '\n'.join(
    str(get_color(i, nstates)) + str(a)
    for i, a in enumerate(np.array_split(table, nstates))
  )

################################################################################
### main

if __name__ == '__main__':

  world = WorldInteraction()

  predTable = ActionValueTable(
    PredatorInteraction.NSTATES,
    len(PredatorInteraction.ACTIONS)
  )
  predTable.initialize(0.)

  predLearner = Q(ALPHA, GAMMA)
  predLearner._setExplorer(EpsilonGreedyExplorer(EPSILON))
  predAgent = LearningAgent(predTable, predLearner)

  predEnv = PredatorEnvironment(world)
  predTask = PredatorTask(predEnv)
  predExp = Experiment(predTask, predAgent)

  try:
    for t in xrange(MAX_TIME):
      print 't = %d' % t 
      world.t = t

コード例 #48

0

ファイルを表示

ファイル: animats2.py プロジェクト: ericmarcincuddy/cs263c

def table_print(table, nstates):
  print '\n'.join(
    str(get_color(i, nstates)) + str(a)
    for i, a in enumerate(np.array_split(table, nstates))
  )

################################################################################
### main

if __name__ == '__main__':

  world = WorldInteraction()

  predTable = ActionValueTable(
    PredatorInteraction.NSTATES,
    len(PredatorInteraction.ACTIONS)
  )
  predTable.initialize(0.)

  predLearner = Q(ALPHA, GAMMA)
  predLearner._setExplorer(EpsilonGreedyExplorer(EPSILON))
  predAgent = LearningAgent(predTable, predLearner)

  predEnv = PredatorEnvironment(world)
  predTask = PredatorTask(predEnv)
  predExp = Experiment(predTask, predAgent)

  mimicTable = ActionValueTable(
    MimicryPreyInteraction.NSTATES,
    len(MimicryPreyInteraction.ACTIONS)
  )

コード例 #49

0

ファイルを表示

ファイル: NUMPY.py プロジェクト: rbobkoskie3/OS

def Py_Brain():
    ############################
    # pybrain
    ############################
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    from matplotlib.colors import ListedColormap
    import itertools
    from scipy import linalg

    from pybrain.rl.environments.mazes import Maze, MDPMazeTask
    from pybrain.rl.learners.valuebased import ActionValueTable
    from pybrain.rl.agents import LearningAgent
    from pybrain.rl.learners import Q, SARSA
    from pybrain.rl.experiments import Experiment
    from pybrain.rl.environments import Task

    import pylab
    #pylab.gray()
    #pylab.ion()

    '''
    structure = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
                          [1, 0, 0, 1, 0, 0, 0, 0, 1],
                          [1, 0, 0, 1, 0, 0, 1, 0, 1],
                          [1, 0, 0, 1, 0, 0, 1, 0, 1],
                          [1, 0, 0, 1, 0, 1, 1, 0, 1],
                          [1, 0, 0, 0, 0, 0, 1, 0, 1],
                          [1, 1, 1, 1, 1, 1, 1, 0, 1],
                          [1, 0, 0, 0, 0, 0, 0, 0, 1],
                          [1, 1, 1, 1, 1, 1, 1, 1, 1]])
    '''
    structure = np.array([[1, 1, 1, 1, 1],
                          [1, 1, 0, 0, 1],
                          [1, 1, 0, 1, 1],
                          [1, 0, 0, 1, 1],
                          [1, 1, 1, 1, 1]])

    num_states = int(structure.shape[0]*structure.shape[1])
    SQRT = int(math.sqrt(num_states))
    #print structure.item((1, 3))
    #environment = Maze(structure, (7, 7)) #second parameter is goal field tuple
    environment = Maze(structure, (1, 3)) #second parameter is goal field tuple
    print type(environment)
    print environment
    # Standard maze environment comes with the following 4 actions:
    # North, South, East, West
    controller = ActionValueTable(num_states, 4) #[N, S, E, W] 
    controller.initialize(1)

    learner = Q()
    agent = LearningAgent(controller, learner)
    np.not_equal(agent.lastobs, None)
    task = MDPMazeTask(environment)
    experiment = Experiment(task, agent)

    #while True:
    for x in range(4):
        print x
        experiment.doInteractions(10)
        agent.learn()
        agent.reset()

        pylab.pcolor(controller.params.reshape(num_states,4).max(1).reshape(SQRT,SQRT))
        pylab.draw()
        #pylab.show()
        name='MAZE'
        plt.savefig(str(name)+'_PLOT.png')
    plt.close()

コード例 #50

0

ファイルを表示

a learner, which updates the controller parameters according to the
interaction it had with the world, and an explorer, which adds some
explorative behaviour to the actions. All standard agents already have a
default explorer, so we don't need to take care of that in this
tutorial.

The controller in PyBrain is a module, that takes states as inputs and
transforms them into actions. For value-based methods, like the
Q-Learning algorithm we will use here, we need a module that implements
the ActionValueInterface. There are currently two modules in PyBrain
that do this: The ActionValueTable for discrete actions and the
ActionValueNetwork for continuous actions. Our maze uses discrete
actions, so we need a table:
"""

controller = ActionValueTable(81, 4)
controller.initialize(1.)
"""
The table needs the number of states and actions as parameters. The standard
maze environment comes with the following 4 actions: north, east, south, west.

Then, we initialize the table with 1 everywhere. This is not always necessary
but will help converge faster, because unvisited state-action pairs have a
promising positive value and will be preferred over visited ones that didn't
lead to the goal.

Each agent also has a learner component. Several classes of RL learners
are currently implemented in PyBrain: black box optimizers, direct
search methods, and value-based learners. The classical Reinforcement
Learning mostly consists of value-based learning, in which of the most
well-known algorithms is the Q-Learning algorithm. Let's now create

コード例 #51

0

ファイルを表示

ファイル: rl.py プロジェクト: lusy/lala-subgoal-discovery

from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q
from pybrain.rl.experiments import Experiment

#Create 2d 2room gridworld
structure = array([[1,1,1,1,1,1,1],
                   [1,0,0,1,0,0,1],
                   [1,0,0,0,0,0,1],
                   [1,0,0,1,0,0,1],
                   [1,0,0,1,0,0,1],
                   [1,1,1,1,1,1,1]])


#Initialize agent doing Q-Learning

controller = ActionValueTable(49, 4)
controller.initialize(0.)

learner = Q()
agent = LearningAgent(controller, learner)


while True:
    #place random goal for each walk
    [i,j] = structure.shape
    goal = (randint(0,i-1),randint(0,j-1))

    #place the goal in a field which is not a wall
    while structure[goal] != 0:
        goal = (randint(0,i-1),randint(0,j-1))

コード例 #52

0

ファイルを表示

ファイル: td.py プロジェクト: Angeliqe/pybrain

                   [1, 0, 0, 1, 0, 0, 0, 0, 1],
                   [1, 0, 0, 1, 0, 0, 1, 0, 1],
                   [1, 0, 0, 1, 0, 0, 1, 0, 1],
                   [1, 0, 0, 1, 0, 1, 1, 0, 1],
                   [1, 0, 0, 0, 0, 0, 1, 0, 1],
                   [1, 1, 1, 1, 1, 1, 1, 0, 1],
                   [1, 0, 0, 0, 0, 0, 0, 0, 1],
                   [1, 1, 1, 1, 1, 1, 1, 1, 1]])

env = Maze(envmatrix, (7, 7))

# create task
task = MDPMazeTask(env)

# create value table and initialize with ones
table = ActionValueTable(81, 4)
table.initialize(1.)

# create agent with controller and learner - use SARSA(), Q() or QLambda() here
learner = SARSA()

# standard exploration is e-greedy, but a different type can be chosen as well
# learner.explorer = BoltzmannExplorer()

# create agent
agent = LearningAgent(table, learner)

# create experiment
experiment = Experiment(task, agent)

# prepare plotting

コード例 #53

0

ファイルを表示

ファイル: mine.py プロジェクト: Tjorriemorrie/pybrain

explorative behaviour to the actions. All standard agents already have a
default explorer, so we don't need to take care of that in this
tutorial.

The controller in PyBrain is a module, that takes states as inputs and
transforms them into actions. For value-based methods, like the
Q-Learning algorithm we will use here, we need a module that implements
the ActionValueInterface. There are currently two modules in PyBrain
that do this: The ActionValueTable for discrete actions and the
ActionValueNetwork for continuous actions. Our maze uses discrete
actions, so we need a table:

I will need to use continuous actions network
"""

controller = ActionValueTable(16, 3)
controller.initialize(0.0020)

"""
The table needs the number of states and actions as parameters. The standard
market environment comes with the following 4 actions: long, short and wait

Then, we initialize the table with min gap everywhere. This is not always necessary
but will help converge faster, because unvisited state-action pairs have a
promising positive value and will be preferred over visited ones that didn't
lead to the goal.

Each agent also has a learner component. Several classes of RL learners
are currently implemented in PyBrain: black box optimizers, direct
search methods, and value-based learners. The classical Reinforcement
Learning mostly consists of value-based learning, in which of the most

コード例 #54

0

ファイルを表示

ファイル: main.py プロジェクト: dcastro9/machine_learning_p4

pylab.gray()
pylab.ion()

structure = array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
                   [1, 0, 0, 1, 0, 0, 0, 0, 1],
                   [1, 0, 0, 1, 0, 0, 1, 0, 1],
                   [1, 0, 0, 1, 0, 0, 1, 0, 1],
                   [1, 0, 0, 1, 0, 1, 1, 0, 1],
                   [1, 0, 0, 0, 0, 0, 1, 0, 1],
                   [1, 1, 1, 1, 1, 1, 1, 0, 1],
                   [1, 0, 0, 0, 0, 0, 0, 0, 1],
                   [1, 1, 1, 1, 1, 1, 1, 1, 1]])

environment = Maze(structure, (7, 7))

controller = ActionValueTable(81, 4)
controller.initialize(1.)

learner = Q()
agent = LearningAgent(controller, learner)

task = MDPMazeTask(environment)

experiment = Experiment(task, agent)

experiment.doInteractions(100)
agent.learn()
agent.reset()

pylab.pcolor(controller.params.reshape(81,4).max(1).reshape(9,9))
pylab.draw()

コード例 #55

0

ファイルを表示

ファイル: reinforcement_avt.py プロジェクト: BenderV/ml-xor

""" Reinforcement Learning to learn xor function
"""
# generic import
import numpy as np
import random

# pybrain import 
from pybrain import SigmoidLayer, LinearLayer
from pybrain.rl.explorers import EpsilonGreedyExplorer
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q
from pybrain.rl.learners.valuebased import ActionValueTable

# The parameters of your algorithm
av_table = ActionValueTable(4, 2)
av_table.initialize(0.) # For Action Value Table
learner = Q(0.5, 0.0) # define Q-learning agent
learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, learner)



for x in xrange(1,100):
    # The training 
    listxor = random.choice([[0, 0],[0, 1], [1, 0], [1, 1]])
    qstate = listxor[0] + listxor[1]*2
    resultxor = listxor[0]^listxor[1]

    agent.integrateObservation([qstate])
    action = agent.getAction()

コード例 #56

0

ファイルを表示

ファイル: main.py プロジェクト: tsvvladimir95/blacvkjack

from blackjackenv import BlackjackEnv
from pybrain.rl.learners.valuebased import ActionValueTable
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q
from pybrain.rl.experiments import Experiment
from pybrain.rl.explorers import EpsilonGreedyExplorer

# define action-value table
# number of states is:
#
#    current value: 1-21
#
# number of actions:
#
#    Stand=0, Hit=1
av_table = ActionValueTable(21, 2)
av_table.initialize(0.)

# define Q-learning agent
learner = Q(0.5, 0.0)
learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, learner)

# define the environment
env = BlackjackEnv()

# define the task
task = BlackjackTask(env)

# finally, define experiment
experiment = Experiment(task, agent)

コード例 #57

0

ファイルを表示

ファイル: blackjack_main.py プロジェクト: dog-face/minesweeper_machine_learning

from pybrain.rl.environments.blackjackenv import BlackjackEnv
from pybrain.rl.learners.valuebased import ActionValueTable
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q
from pybrain.rl.experiments import Experiment
from pybrain.rl.explorers import EpsilonGreedyExplorer

# define action-value table
# number of states is:
#
#    current value: 1-21
#
# number of actions:
#
#    Stand=0, Hit=1
av_table = ActionValueTable(21, 2)
av_table.initialize(0.)

# define Q-learning agent
learner = Q(0.5, 0.0)
learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, learner)

# define the environment
env = BlackjackEnv()

# define the task
task = BlackjackTask(env)

# finally, define experiment
experiment = Experiment(task, agent)

コード例 #58

0

ファイルを表示

ファイル: Agent2.py プロジェクト: noi01/Ray_bot

#
# number of actions:
# 3 the number of action values the environment accepts -  Foreward, Backward and Snooze

states = 165  #Has to match class Env(Environment) - outdim  in environment_01.py
actions = 2  #Has to match class Env(Environment) - indim  in environment_01.py

try:
    arr = np.loadtxt('/home/pi/Desktop/ray_bot/ray_bot2.csv', delimiter=';')
    # open action value table  from .csv file
except Exception as e:
    #    print e
    arr = np.zeros((states, actions))
    # except if the file does not exist - ie. first time - then creat and initialize it with numpy of zeros

av_table = ActionValueTable(states, actions)
av_table.initialize(arr.flatten())

# define Q-learning agent
learner = Q(0.1, 0.5)
learner._setExplorer(EpsilonGreedyExplorer(0.5))
agent = LearningAgent(av_table, learner)

# define the environment
env = Env()

# define the task
task = Task(env)

# define experiment
experiment = Experiment(task, agent)

コード例 #59

0

ファイルを表示

from pybrain.rl.experiments import Experiment
from pybrain.rl.experiments import EpisodicExperiment
from pybrain.rl.environments import Task, EpisodicTask

warnings.filterwarnings("ignore")
# create the maze with walls (1)
envmatrix = array([[1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 0, 0, 1, 0, 0, 0, 0, 1],
                   [1, 0, 0, 1, 0, 0, 1, 0, 1], [1, 0, 0, 1, 0, 0, 1, 0, 1],
                   [1, 0, 0, 1, 0, 1, 1, 0, 1], [1, 0, 0, 0, 0, 0, 1, 0, 1],
                   [1, 1, 1, 1, 1, 1, 1, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0, 1],
                   [1, 1, 1, 1, 1, 1, 1, 1, 1]])
env = Maze(envmatrix, (7, 7))
# create task
task = MDPMazeTask(env)
# create value table and initialize with ones
table = ActionValueTable(81, 4)
table.initialize(1.)
# create agent with controller and learner - use SARSA(), Q() or QLambda() here
# learner = Q()
learner = SARSA()
# standard exploration is e-greedy, but a different type can be chosen as well
# learner.explorer = BoltzmannExplorer()
# create agent
agent = LearningAgent(table, learner)
# create experiment
# experiment = Experiment(task, agent)
experiment = EpisodicExperiment(task, agent)
# prepare plotting
pylab.gray()
pylab.ion()
for i in range(50):