def testNet(learner, moduleNet, env, maxPlaneStartDist, stepSize,numAngs,thermRadius):
    # Turn off exploration
    from pybrain.rl.explorers.discrete.egreedy import EpsilonGreedyExplorer
    learner._setExplorer(EpsilonGreedyExplorer(0))
    agent = LearningAgent(moduleNet, learner)      

    # Move the plane back to the start by resetting the environment
    env = contEnv.contThermEnvironment(maxPlaneStartDist, stepSize,numAngs,thermRadius) 
    from simpleThermalTask import SimpThermTask
    task = SimpThermTask(env)
    from pybrain.rl.experiments import Experiment
    experiment = Experiment(task, agent)

    # Have the plane move 100 times, and plot the position of the plane (hopefully it moves to the high reward area)
    testIter = 100
    trainResults = [env.distPlane()]
    for i in range(testIter):
        experiment.doInteractions(1) 
        trainResults.append(env.distPlane())  
        
    # Plot the training results
    import matplotlib.pyplot as plt
    plt.figure(1)
    plt.plot(trainResults,'o')
    plt.ylabel('Distance from center of thermal')
    plt.xlabel('Interaction iteration')
    plt.title('Test Results for Neural Fitted Q Learner')
    plt.show()        
Example #2
0
def initExperiment(alg, optimistic=True):
    env = Maze(envmatrix, (7, 7))

    # create task
    task = MDPMazeTask(env)

    # create value table and initialize with ones
    table = ActionValueTable(81, 4)
    if optimistic:
        table.initialize(1.)
    else:
        table.initialize(0.)

    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    learner = alg()

    # standard exploration is e-greedy, but a different type can be chosen as well
    # learner.explorer = BoltzmannExplorer()

    agent = LearningAgent(table, learner)
    agent.batchMode = False

    experiment = Experiment(task, agent)
    experiment.allRewards = []
    return experiment
Example #3
0
    def __init__(self, task, agent):
        Experiment.__init__(self, task, agent)
        
        agent.learner.explorer.experiment = self
        # agent.learner.module.getValue()
        
        self.screen = pygame.display.set_mode(((xsize+2)*MAGNIFY,(ysize+2)*MAGNIFY))
        pygame.display.set_caption('Policy Visualizer')
        self.clock = pygame.time.Clock()

        self.screenBuffer = pygame.Surface(self.screen.get_size())
        self.screenBuffer = self.screenBuffer.convert()
        self.screenBuffer.fill((64, 64, 64)) # Dark Gray
        
        self.bombImage = pygame.image.load("bomb_image.png")
        self.bombImage = pygame.transform.scale(self.bombImage, (MAGNIFY - 2, MAGNIFY - 2))

    
        self.isPaused = False
        self.isCrashed = False
        self.speed = 10
        self.num = 0
        self.robotXA = -1
        self.robotYA = -1
        self.bomb_counter = 0
        
        self.count = 0
        self.acc_reward = 0
        self.collect_data = False
        if collect_data_file != None:
            self.collect_data = True
            self.collect_episode_data_file = open(collect_data_file + "_episodelen.data", "w")
            self.collect_reward_data_file = open(collect_data_file + "_avg_reward.data", "w")
Example #4
0
def run_bbox(verbose=False):
    n_features = n_actions = max_time = -1

    if bbox.is_level_loaded():
        bbox.reset_level()
    else:
        bbox.load_level("../levels/train_level.data", verbose=1)
        n_features = bbox.get_num_of_features()
        n_actions = bbox.get_num_of_actions()
        max_time = bbox.get_max_time()

    av_table = ActionValueTable(n_features, n_actions)
    av_table.initialize(0.2)
    print av_table._params
    learner = Q(0.5, 0.1)
    learner._setExplorer(EpsilonGreedyExplorer(0.4))
    agent = LearningAgent(av_table, learner)
    environment = GameEnvironment()
    task = GameTask(environment)
    experiment = Experiment(task, agent)

    while environment.finish_flag:
        experiment.doInteractions(1)
        agent.learn()
 
    bbox.finish(verbose=1)
Example #5
0
def testMaze():
    # simplified version of the reinforcement learning tutorial example
    structure = np.array([[1, 1, 1, 1, 1],
                          [1, 0, 0, 0, 1],
                          [1, 0, 1, 0, 1],
                          [1, 0, 1, 0, 1],
                          [1, 1, 1, 1, 1]])
    shape = np.array(structure.shape)
    environment = Maze(structure,  tuple(shape - 2))
    controller = ActionValueTable(shape.prod(), 4)
    controller.initialize(1.)
    learner = Q()
    agent = LearningAgent(controller, learner)
    task = MDPMazeTask(environment)
    experiment = Experiment(task, agent)

    for i in range(3):
        experiment.doInteractions(40)

    controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
    # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order 
    greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4),1)
    greedy_policy = np.flipud(np.array(list('NESW'))[greedy_policy].reshape(shape))
    maze = np.flipud(np.array(list(' #'))[structure])
    print('Maze map:')
    print('\n'.join(''.join(row) for row in maze))
    print('Greedy policy:')
    print('\n'.join(''.join(row) for row in greedy_policy))
    assert '\n'.join(''.join(row) for row in greedy_policy) == 'NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN'
Example #6
0
def initExperiment(alg, optimistic=True):
    env = Maze(envmatrix, (7, 7))

    # create task
    task = MDPMazeTask(env)

    # create value table and initialize with ones
    table = ActionValueTable(81, 4)
    if optimistic:
        table.initialize(1.)
    else:
        table.initialize(0.)

    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    learner = alg()

    # standard exploration is e-greedy, but a different type can be chosen as well
    # learner.explorer = BoltzmannExplorer()

    agent = LearningAgent(table, learner)
    agent.batchMode = False

    experiment = Experiment(task, agent)
    experiment.allRewards = []
    return experiment
Example #7
0
 def __init__(self, mode):
     self.mode = mode
     cu.mem('Reinforcement Learning Started')
     self.environment = RegionFilteringEnvironment(
         config.get(mode + 'Database'), mode)
     self.controller = QNetwork()
     cu.mem('QNetwork controller created')
     self.learner = None
     self.agent = RegionFilteringAgent(self.controller, self.learner)
     self.task = RegionFilteringTask(self.environment,
                                     config.get(mode + 'GroundTruth'))
     self.experiment = Experiment(self.task, self.agent)
Example #8
0
    def __init__(self):
	self.av_table = ActionValueTable(2, 3)
	self.av_table.initialize(0.1)

	learner = SARSA()
	learner._setExplorer(EpsilonGreedyExplorer(0.0))
	self.agent = LearningAgent(self.av_table, learner)

	env = HASSHEnv()

	task = HASSHTask(env)

	self.experiment = Experiment(task, self.agent)
Example #9
0
 def learn(self, number_of_iterations):
     learner = Q(0.2, 0.8)
     task = CartMovingTask(self.environment)
     self.controller = ActionValueTable(
         reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)), self.force_granularity
     )
     self.controller.initialize(1.0)
     agent = LearningAgent(self.controller, learner)
     experiment = Experiment(task, agent)
     for i in range(number_of_iterations):
         experiment.doInteractions(1)
         agent.learn()
         agent.reset()
     with open("test.pcl", "w+") as f:
         pickle.dump(self.controller, f)
Example #10
0
 def learn(self, number_of_iterations):
     learner = Q(0.2, 0.8)
     task = CartMovingTask(self.environment)
     self.controller = ActionValueTable(
         reduce(lambda x, y: x * y, map(lambda x: len(x), self.ranges)),
         self.force_granularity)
     self.controller.initialize(1.)
     agent = LearningAgent(self.controller, learner)
     experiment = Experiment(task, agent)
     for i in range(number_of_iterations):
         experiment.doInteractions(1)
         agent.learn()
         agent.reset()
     with open("test.pcl", "w+") as f:
         pickle.dump(self.controller, f)
Example #11
0
def initExperiment(learnalg='Q',
                   history=None,
                   binEdges='10s',
                   scriptfile='./rlRunExperiment_v2.pl',
                   resetscript='./rlResetExperiment.pl'):

    if binEdges == '10s':
        centerBinEdges = centerBinEdges_10s
    elif binEdges == '30s':
        centerBinEdges = centerBinEdges_30s
    elif binEdges == 'lessperturbed':
        centerBinEdges = centerBinEdges_10s_lessperturbed
    elif binEdges is None:
        centerBinEdges = None
    else:
        raise Exception("No bins for given binEdges setting")

    env = OmnetEnvironment(centerBinEdges, scriptfile, resetscript)
    if history is not None:
        env.data = history['data']

    task = OmnetTask(env, centerBinEdges)
    if history is not None:
        task.allrewards = history['rewards']

    if learnalg == 'Q':
        nstates = env.numSensorBins**env.numSensors
        if history is None:
            av_table = ActionValueTable(nstates, env.numActions)
            av_table.initialize(1.)
        else:
            av_table = history['av_table']
        learner = Q(0.1, 0.9)  # alpha, gamma
        learner._setExplorer(EpsilonGreedyExplorer(0.05))  # epsilon
    elif learnalg == 'NFQ':
        av_table = ActionValueNetwork(env.numSensors, env.numActions)
        learner = NFQ()
    else:
        raise Exception("learnalg unknown")

    agent = LearningAgent(av_table, learner)

    experiment = Experiment(task, agent)
    if history is None:
        experiment.nruns = 0
    else:
        experiment.nruns = history['nruns']
    return experiment
Example #12
0
    def __init__(self, event_queue_name, hub_queue_name):
        super().__init__()
        # create environment
        self.conn = boto.sqs.connect_to_region(constants.REGION)
        self.event_queue = self.conn.get_queue(event_queue_name)
        self.event_queue.set_message_class(MHMessage)
        self.env = DogEnv(DogEnv.ALL_QUIET, DogEnv.ALL_QUIET, self.event_queue, hub_queue_name)
        self.env.delay = (self.episodes == 1)

        # create task
        self.task = QuietDogTask(self.env)

        # create value table and initialize with ones
        # TODO: Get number of states from DogEnv
        self.table = ActionValueTable(2*5*4, 5*4)
        self.table.initialize(1.)

        # create agent with controller and learner - use SARSA(), Q() or QLambda() here
        self.learner = SARSA()

        # standard exploration is e-greedy, but a different type can be chosen as well
        self.learner.explorer = BoltzmannExplorer()

        # create agent
        self.agent = DogAgent(self.table, self.learner)

        # create experiment
        self.experiment = Experiment(self.task, self.agent)
Example #13
0
def setup_RL():
    # create the maze with walls (1)
    envmatrix = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
                          [1, 0, 0, 1, 0, 0, 0, 0, 1],
                          [1, 0, 0, 1, 0, 0, 1, 0, 1],
                          [1, 0, 0, 1, 0, 0, 1, 0, 1],
                          [1, 0, 0, 1, 0, 1, 1, 0, 1],
                          [1, 0, 0, 0, 0, 0, 1, 0, 1],
                          [1, 1, 1, 1, 1, 1, 1, 0, 1],
                          [1, 0, 0, 0, 0, 0, 0, 0, 1],
                          [1, 1, 1, 1, 1, 1, 1, 1, 1]])
    env = Maze(envmatrix, (7, 7))
    # create task
    task = MDPMazeTask(env)
    # create value table and initialize with ones
    table = ActionValueTable(81, 4)
    table.initialize(0.)
    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    # learner = Q()
    learner = SARSA()
    # create agent
    agent = LearningAgent(table, learner)
    # create experiment
    experiment = Experiment(task, agent)
    return experiment, agent, table
Example #14
0
def createExperimentInstance():
    gymRawEnv = gym.make('MountainCarContinuous-v0')

    cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16)
    cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 4)
    actionDedigitizer = Digitizer.build(-1.0, 1.0, 5, True)

    #     print("Cart position bins:", cartPositionGroup)
    #     print("Cart velocity bins:", cartVelocityGroup)
    #     print("Cart force bins:", actionDedigitizer.bins, actionDedigitizer.possibleValues())

    observationDigitizer = ArrayDigitizer(
        [cartPositionGroup, cartVelocityGroup])
    transformation = EnvTransformation(observationDigitizer, actionDedigitizer)

    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation(transformation)
    # env.setCumulativeRewardMode()

    # create agent with controller and learner - use SARSA(), Q() or QLambda() here
    ## alpha -- learning rate (preference of new information)
    ## gamma -- discount factor (importance of future reward)

    # create value table and initialize with ones
    table = ActionValueTable(observationDigitizer.states,
                             actionDedigitizer.states)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)

    experiment = Experiment(task, agent)
    experiment = ProcessExperiment(experiment, doSingleExperiment)
    return experiment
def createExperimentInstance():
    gymRawEnv = gym.make('MountainCar-v0')

    cartPositionGroup = Digitizer.buildBins(-1.2, 0.6, 16)
    cartVelocityGroup = Digitizer.buildBins(-0.07, 0.07, 16)

    #     print("Cart position bins:", cartPositionGroup)
    #     print("Cart velocity bins:", cartVelocityGroup)

    observationDigitizer = ArrayDigitizer(
        [cartPositionGroup, cartVelocityGroup])
    transformation = EnvTransformation(observationDigitizer)

    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation(transformation)
    # env.setCumulativeRewardMode()

    # create value table and initialize with ones
    table = ActionValueTable(observationDigitizer.states, env.numActions)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)

    experiment = Experiment(task, agent)
    experiment = ProcessExperiment(experiment, ExperimentIteration())
    return experiment
Example #16
0
def initExperiment(learnalg='Q', history=None, binEdges='10s',
        scriptfile='./rlRunExperiment_v2.pl',
        resetscript='./rlResetExperiment.pl'):

    if binEdges == '10s':
        centerBinEdges = centerBinEdges_10s
    elif binEdges == '30s':
        centerBinEdges = centerBinEdges_30s
    elif binEdges == 'lessperturbed':
        centerBinEdges = centerBinEdges_10s_lessperturbed
    elif binEdges is None:
        centerBinEdges = None
    else:
        raise Exception("No bins for given binEdges setting")

    env = OmnetEnvironment(centerBinEdges, scriptfile, resetscript)
    if history is not None:
        env.data = history['data']

    task = OmnetTask(env, centerBinEdges)
    if history is not None:
        task.allrewards = history['rewards']

    if learnalg == 'Q':
        nstates = env.numSensorBins ** env.numSensors
        if history is None:
            av_table = ActionValueTable(nstates, env.numActions)
            av_table.initialize(1.)
        else:
            av_table = history['av_table']
        learner = Q(0.1, 0.9) # alpha, gamma
        learner._setExplorer(EpsilonGreedyExplorer(0.05)) # epsilon
    elif learnalg == 'NFQ':
        av_table = ActionValueNetwork(env.numSensors, env.numActions)
        learner = NFQ()
    else:
        raise Exception("learnalg unknown")

    agent = LearningAgent(av_table, learner)

    experiment = Experiment(task, agent)
    if history is None:
        experiment.nruns = 0
    else:
        experiment.nruns = history['nruns']
    return experiment
Example #17
0
    def maze():
        # import sys, time
        pylab.gray()
        pylab.ion()
        # The goal appears to be in the upper right
        structure = [
            "!!!!!!!!!!",
            "! !  ! ! !",
            "! !! ! ! !",
            "!    !   !",
            "! !!!!!! !",
            "! ! !    !",
            "! ! !!!! !",
            "!        !",
            "! !!!!!  !",
            "!   !    !",
            "!!!!!!!!!!",
        ]
        structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
        shape = np.array(structure.shape)
        environment = Maze(structure, tuple(shape - 2))
        controller = ActionValueTable(shape.prod(), 4)
        controller.initialize(1.0)
        learner = Q()
        agent = LearningAgent(controller, learner)
        task = MDPMazeTask(environment)
        experiment = Experiment(task, agent)

        for i in range(100):
            experiment.doInteractions(100)
            agent.learn()
            agent.reset()
            # 4 actions, 81 locations/states (9x9 grid)
            # max(1) gives/plots the biggest objective function value for that square
            pylab.pcolor(controller.params.reshape(81, 4).max(1).reshape(9, 9))
            pylab.draw()

        # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
        greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
        greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
        maze = np.flipud(np.array(list(" #"))[structure])
        print("Maze map:")
        print("\n".join("".join(row) for row in maze))
        print("Greedy policy:")
        print("\n".join("".join(row) for row in greedy_policy))
Example #18
0
    def maze():
        # import sys, time
        pylab.gray()
        pylab.ion()
        # The goal appears to be in the upper right
        structure = [
            '!!!!!!!!!!',
            '! !  ! ! !',
            '! !! ! ! !',
            '!    !   !',
            '! !!!!!! !',
            '! ! !    !',
            '! ! !!!! !',
            '!        !',
            '! !!!!!  !',
            '!   !    !',
            '!!!!!!!!!!',
            ]
        structure = np.array([[ord(c)-ord(' ') for c in row] for row in structure])
        shape = np.array(structure.shape)
        environment = Maze(structure, tuple(shape - 2))
        controller = ActionValueTable(shape.prod(), 4)
        controller.initialize(1.)
        learner = Q()
        agent = LearningAgent(controller, learner)
        task = MDPMazeTask(environment)
        experiment = Experiment(task, agent)

        for i in range(100):
            experiment.doInteractions(100)
            agent.learn()
            agent.reset()
            # 4 actions, 81 locations/states (9x9 grid)
            # max(1) gives/plots the biggest objective function value for that square
            pylab.pcolor(controller.params.reshape(81, 4).max(1).reshape(9, 9))
            pylab.draw()

        # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
        greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
        greedy_policy = np.flipud(np.array(list('NESW'))[greedy_policy].reshape(shape))
        maze = np.flipud(np.array(list(' #'))[structure])
        print('Maze map:')
        print('\n'.join(''.join(row) for row in maze))
        print('Greedy policy:')
        print('\n'.join(''.join(row) for row in greedy_policy))
Example #19
0
def main():
    rospy.init_node("lauron_reinforcement_learning")
    environment = RLEnvironment()
    dim_state = environment.joint_states.shape[0]
    num_actions = len(environment.actions)
    controller = ActionValueNetwork(dim_state, num_actions)
    learner = SARSA()
    agent = LearningAgent(controller, learner)
    task = RLTask(environment)
    experiment = Experiment(task, agent)

    episode_counter = 0
    while True:
        print("Training episode {}".format(episode_counter))
        experiment.doInteractions(NUM_INTERACTIONS)
        agent.learn()
        agent.reset()
        episode_counter += 1
def test_maze():
    # simplified version of the reinforcement learning tutorial example
    structure = [
        list('!!!!!!!!!!'),
        list('! !  ! ! !'),
        list('! !! ! ! !'),
        list('!    !   !'),
        list('! !!!!!! !'),
        list('! ! !    !'),
        list('! ! !!!! !'),
        list('!        !'),
        list('! !!!!!  !'),
        list('!   !    !'),
        list('!!!!!!!!!!'),
    ]
    structure = np.array([[ord(c) - ord(' ') for c in row]
                          for row in structure])
    shape = np.array(structure.shape)
    environment = Maze(structure, tuple(shape - 2))
    controller = ActionValueTable(shape.prod(), 4)
    controller.initialize(1.)
    learner = Q()
    agent = LearningAgent(controller, learner)
    task = MDPMazeTask(environment)
    experiment = Experiment(task, agent)

    for i in range(30):
        experiment.doInteractions(30)
        agent.learn()
        agent.reset()

    controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
    # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
    greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
    greedy_policy = np.flipud(
        np.array(list('NESW'))[greedy_policy].reshape(shape))
    maze = np.flipud(np.array(list(' #'))[structure])
    print('Maze map:')
    print('\n'.join(''.join(row) for row in maze))
    print('Greedy policy:')
    print('\n'.join(''.join(row) for row in greedy_policy))
    assert '\n'.join(
        ''.join(row)
        for row in greedy_policy) == 'NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN'
 def __init__(self, mode):
   self.mode = mode
   cu.mem('Reinforcement Learning Started')
   self.environment = BoxSearchEnvironment(config.get(mode+'Database'), mode, config.get(mode+'GroundTruth'))
   self.controller = QNetwork()
   cu.mem('QNetwork controller created')
   self.learner = None
   self.agent = BoxSearchAgent(self.controller, self.learner)
   self.task = BoxSearchTask(self.environment, config.get(mode+'GroundTruth'))
   self.experiment = Experiment(self.task, self.agent)
Example #22
0
def run():
    """
    number of states is:
    current value: 0-20

    number of actions:
    Stand=0, Hit=1 """

    # define action value table
    av_table = ActionValueTable(MAX_VAL, MIN_VAL)
    av_table.initialize(0.)

    # define Q-learning agent
    q_learner = Q(Q_ALPHA, Q_GAMMA)
    q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
    agent = LearningAgent(av_table, q_learner)

    # define the environment
    env = BlackjackEnv()

    # define the task
    task = BlackjackTask(env, verbosity=VERBOSE)

    # finally, define experiment
    experiment = Experiment(task, agent)

    # ready to go, start the process
    for _ in range(NB_ITERATION):
        experiment.doInteractions(1)
        if task.lastreward != 0:
            if VERBOSE:
                print "Agent learn"
            agent.learn()

    print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
    print '|:-------:|:-------|:-----|:-----|'
    for i in range(MAX_VAL):
        print '| %s | %s | %s | %s |' % (
            (i+1),
            av_table.getActionValues(i)[0],
            av_table.getActionValues(i)[1],
            av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1]
        )
Example #23
0
class RL:
    def __init__(self):
	self.av_table = ActionValueTable(4, 5)
	self.av_table.initialize(0.1)

	learner = SARSA()
	learner._setExplorer(EpsilonGreedyExplorer(0.0))
	self.agent = LearningAgent(self.av_table, learner)

	env = HASSHEnv()

	task = HASSHTask(env)

	self.experiment = Experiment(task, self.agent)

    def go(self):
      global rl_params
      rassh.core.constants.rl_params = self.av_table.params.reshape(4,5)[0]
      self.experiment.doInteractions(1)
      self.agent.learn()
Example #24
0
class RL:
    def __init__(self):
	self.av_table = ActionValueTable(2, 3)
	self.av_table.initialize(0.1)

	learner = SARSA()
	learner._setExplorer(EpsilonGreedyExplorer(0.0))
	self.agent = LearningAgent(self.av_table, learner)

	env = HASSHEnv()

	task = HASSHTask(env)

	self.experiment = Experiment(task, self.agent)

    def go(self):
      global rl_params
      kippo.core.constants.rl_params = self.av_table.params.reshape(2,3)[0]
      self.experiment.doInteractions(1)
      self.agent.learn()
Example #25
0
    def _oneInteraction(self):

        resetInThisRound = False

        old = (self.XA, self.switch_state)
        (self.XA, self.switch_state) = reverseStateMapper[level.state]
        payoff = stateToRewardMapper[level.state]

        self.acc_reward += payoff * 10
        if self.collect_data:
            self.count += 1
            if payoff > 0:
                self.collect_episode_data_file.write(str(self.count) + "\n")
                self.count = 0
            if self.stepid % interval == 0:
                self.collect_reward_data_file.write(
                    str(self.acc_reward / float(interval)) + "\n")
                self.acc_reward = 0
            if self.stepid % 100000 == 0:
                pass

        if self.stepid % interval == 0:
            sys.stdout.write("\033[K")
            sys.stdout.write(
                "[{2}{3}] ({0}/{1}) | alpha = {4} | epsilon = {5}\n".format(
                    self.stepid, MAX_STEPS,
                    '#' * int(math.floor(self.stepid / float(MAX_STEPS) * 20)),
                    ' ' * int(
                        (20 -
                         math.floor(self.stepid / float(MAX_STEPS) * 20))),
                    learner.alpha, learner.explorer.exploration))
            sys.stdout.write("\033[F")

        if self.stepid >= MAX_STEPS:
            print("\nSimulation done!")

            sys.exit()

        if payoff > 0:
            # episode done
            if save_file != None:
                controller.params.reshape(
                    controller.numRows,
                    controller.numColumns).tofile(save_file)
            learner.alpha *= 0.999999
            learner.explorer.exploration *= 0.999999
        if level.state == errorState:
            level.reset()

        self.isCrashed = False
        if not self.isPaused:
            return Experiment._oneInteraction(self)
        else:
            return self.stepid
Example #26
0
def run():
    """
    number of states is:
    current value: 0-20

    number of actions:
    Stand=0, Hit=1 """

    # define action value table
    av_table = ActionValueTable(MAX_VAL, MIN_VAL)
    av_table.initialize(0.)

    # define Q-learning agent
    q_learner = Q(Q_ALPHA, Q_GAMMA)
    q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
    agent = LearningAgent(av_table, q_learner)

    # define the environment
    env = BlackjackEnv()

    # define the task
    task = BlackjackTask(env, verbosity=VERBOSE)

    # finally, define experiment
    experiment = Experiment(task, agent)

    # ready to go, start the process
    for _ in range(NB_ITERATION):
        experiment.doInteractions(1)
        if task.lastreward != 0:
            if VERBOSE:
                print "Agent learn"
            agent.learn()

    print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
    print '|:-------:|:-------|:-----|:-----|'
    for i in range(MAX_VAL):
        print '| %s | %s | %s | %s |' % (
            (i + 1),
            av_table.getActionValues(i)[0], av_table.getActionValues(i)[1],
            av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1])
Example #27
0
    def __init__(self, task, agent):
        Experiment.__init__(self, task, agent)

        agent.learner.explorer.experiment = self

        self.isPaused = False
        self.isCrashed = False
        self.speed = 10
        self.num = 0
        self.XA = 50
        self.switch_state = 1

        self.count = 0
        self.acc_reward = 0
        self.collect_data = False
        if collect_data_file != None:
            self.collect_data = True
            self.collect_episode_data_file = open(
                collect_data_file + "_episodelen.data", "w")
            self.collect_reward_data_file = open(
                collect_data_file + "_avg_reward.data", "w")
Example #28
0
File: bot.py Project: zmuda/iwium
    def __init__(self):
        self.environment = GameEnv()

        av_table = ActionValueTable(self.environment.outdim, self.environment.indim)
        av_table.initialize(0.)  # todo: save & restore agents state
        learner = Q()
        learner._setExplorer(EpsilonGreedyExplorer())
        agent = LearningAgent(av_table, learner)

        self.agent = agent
        self.task = GameTask(self.environment)
        self.experiment = Experiment(self.task, self.agent)
Example #29
0
def explore_maze():
    # simplified version of the reinforcement learning tutorial example
    structure = [
        list("!!!!!!!!!!"),
        list("! !  ! ! !"),
        list("! !! ! ! !"),
        list("!    !   !"),
        list("! !!!!!! !"),
        list("! ! !    !"),
        list("! ! !!!! !"),
        list("!        !"),
        list("! !!!!!  !"),
        list("!   !    !"),
        list("!!!!!!!!!!"),
    ]
    structure = np.array([[ord(c) - ord(" ") for c in row] for row in structure])
    shape = np.array(structure.shape)
    environment = Maze(structure, tuple(shape - 2))
    controller = ActionValueTable(shape.prod(), 4)
    controller.initialize(1.0)
    learner = Q()
    agent = LearningAgent(controller, learner)
    task = MDPMazeTask(environment)
    experiment = Experiment(task, agent)

    for i in range(30):
        experiment.doInteractions(30)
        agent.learn()
        agent.reset()

    controller.params.reshape(shape.prod(), 4).max(1).reshape(*shape)
    # (0, 0) is upper left and (0, N) is upper right, so flip matrix upside down to match NESW action order
    greedy_policy = np.argmax(controller.params.reshape(shape.prod(), 4), 1)
    greedy_policy = np.flipud(np.array(list("NESW"))[greedy_policy].reshape(shape))
    maze = np.flipud(np.array(list(" #"))[structure])
    print("Maze map:")
    print("\n".join("".join(row) for row in maze))
    print("Greedy policy:")
    print("\n".join("".join(row) for row in greedy_policy))
    assert "\n".join("".join(row) for row in greedy_policy) == "NNNNN\nNSNNN\nNSNNN\nNEENN\nNNNNN"
Example #30
0
    def __init__(self):
	self.av_table = ActionValueTable(4, 5)
	self.av_table.initialize(0.1)

	learner = SARSA()
	learner._setExplorer(EpsilonGreedyExplorer(0.0))
	self.agent = LearningAgent(self.av_table, learner)

	env = HASSHEnv()

	task = HASSHTask(env)

	self.experiment = Experiment(task, self.agent)
Example #31
0
class PlayYourCardsRight(Feature):
  
    def __init__(self, text_to_speech, speech_to_text):
        Feature.__init__(self)

        # setup AV Table
        self.av_table = GameTable(13, 2)
        if(self.av_table.loadParameters() == False):
            self.av_table.initialize(0.)
 
        # setup a Q-Learning agent
        learner = Q(0.5, 0.0)
        learner._setExplorer(EpsilonGreedyExplorer(0.0))
        self.agent = LearningAgent(self.av_table, learner)
 
        # setup game interaction
        self.game_interaction = GameInteraction(text_to_speech, speech_to_text)

        # setup environment
        environment = GameEnvironment(self.game_interaction)
 
        # setup task
        task = GameTask(environment, self.game_interaction)
 
        # setup experiment
        self.experiment = Experiment(task, self.agent)
    
    @property
    def is_speaking(self):
        return self.game_interaction.is_speaking

    def _thread(self, args):
        # let's play our cards right!
        while not self.is_stop:
            self.experiment.doInteractions(1)
            self.agent.learn()
            self.av_table.saveParameters()
    def __init__(self, text_to_speech, speech_to_text):
        Feature.__init__(self)

        # setup AV Table
        self.av_table = GameTable(13, 2)
        if (self.av_table.loadParameters() == False):
            self.av_table.initialize(0.)

        # setup a Q-Learning agent
        learner = Q(0.5, 0.0)
        learner._setExplorer(EpsilonGreedyExplorer(0.0))
        self.agent = LearningAgent(self.av_table, learner)

        # setup game interaction
        self.game_interaction = GameInteraction(text_to_speech, speech_to_text)

        # setup environment
        environment = GameEnvironment(self.game_interaction)

        # setup task
        task = GameTask(environment, self.game_interaction)

        # setup experiment
        self.experiment = Experiment(task, self.agent)
class PlayYourCardsRight(Feature):
    def __init__(self, text_to_speech, speech_to_text):
        Feature.__init__(self)

        # setup AV Table
        self.av_table = GameTable(13, 2)
        if (self.av_table.loadParameters() == False):
            self.av_table.initialize(0.)

        # setup a Q-Learning agent
        learner = Q(0.5, 0.0)
        learner._setExplorer(EpsilonGreedyExplorer(0.0))
        self.agent = LearningAgent(self.av_table, learner)

        # setup game interaction
        self.game_interaction = GameInteraction(text_to_speech, speech_to_text)

        # setup environment
        environment = GameEnvironment(self.game_interaction)

        # setup task
        task = GameTask(environment, self.game_interaction)

        # setup experiment
        self.experiment = Experiment(task, self.agent)

    @property
    def is_speaking(self):
        return self.game_interaction.is_speaking

    def _thread(self, args):
        # let's play our cards right!
        while not self.is_stop:
            self.experiment.doInteractions(1)
            self.agent.learn()
            self.av_table.saveParameters()
Example #34
0
def createExperimentInstance():
    gymRawEnv = gym.make('Taxi-v2')
    
    transformation = EnvTransformation()
     
    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation( transformation )
    ## env.setCumulativeRewardMode()
     
    ## create value table and initialize with ones
    table = ActionValueTable(env.numStates, env.numActions)
#     table = ActionValueTableWrapper(table)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)
     
    experiment = Experiment(task, agent)
    experiment = ProcessExperiment( experiment, experimentIteration )
    return experiment
Example #35
0
def createExperimentInstance():
    gymRawEnv = gym.make('FrozenLake-v0')

    transformation = EnvTransformation()

    task = GymTask.createTask(gymRawEnv)
    env = task.env
    env.setTransformation(transformation)
    ## env.setCumulativeRewardMode()

    # create value table and initialize with ones
    table = ActionValueTable(gymRawEnv.observation_space.n,
                             gymRawEnv.action_space.n)
    table.initialize(0.0)
    # table.initialize( np.random.rand( table.paramdim ) )
    agent = createAgent(table)

    experiment = Experiment(task, agent)
    iterator = ExperimentIteration()
    quality = QualityFunctor()
    experiment = ProcessExperiment(experiment, iterator, quality)
    return experiment
Example #36
0
    def __init__(self, text_to_speech, speech_to_text):
        Feature.__init__(self)

        # setup AV Table
        self.av_table = GameTable(13, 2)
        if(self.av_table.loadParameters() == False):
            self.av_table.initialize(0.)
 
        # setup a Q-Learning agent
        learner = Q(0.5, 0.0)
        learner._setExplorer(EpsilonGreedyExplorer(0.0))
        self.agent = LearningAgent(self.av_table, learner)
 
        # setup game interaction
        self.game_interaction = GameInteraction(text_to_speech, speech_to_text)

        # setup environment
        environment = GameEnvironment(self.game_interaction)
 
        # setup task
        task = GameTask(environment, self.game_interaction)
 
        # setup experiment
        self.experiment = Experiment(task, self.agent)
Example #37
0
class RlOp(threading.Thread):
    episodes = 1
    epilen = 200
    def __init__(self, event_queue_name, hub_queue_name):
        super().__init__()
        # create environment
        self.conn = boto.sqs.connect_to_region(constants.REGION)
        self.event_queue = self.conn.get_queue(event_queue_name)
        self.event_queue.set_message_class(MHMessage)
        self.env = DogEnv(DogEnv.ALL_QUIET, DogEnv.ALL_QUIET, self.event_queue, hub_queue_name)
        self.env.delay = (self.episodes == 1)

        # create task
        self.task = QuietDogTask(self.env)

        # create value table and initialize with ones
        # TODO: Get number of states from DogEnv
        self.table = ActionValueTable(2*5*4, 5*4)
        self.table.initialize(1.)

        # create agent with controller and learner - use SARSA(), Q() or QLambda() here
        self.learner = SARSA()

        # standard exploration is e-greedy, but a different type can be chosen as well
        self.learner.explorer = BoltzmannExplorer()

        # create agent
        self.agent = DogAgent(self.table, self.learner)

        # create experiment
        self.experiment = Experiment(self.task, self.agent)

    def run(self):
        self.call_run()

    def call_run(self):
        print('RlOp: running')
        # prepare plotting
        pylab.gray()
        pylab.ion()

        for i in range(1000):

            # interact with the environment (here in batch mode)
            self.experiment.doInteractions(100)
            self.agent.learn()
            self.agent.reset()

            results0 = self.table.params.reshape(2, 4, 5, 20)[0]
            results1 = self.table.params.reshape(2, 4, 5, 20)[1]
            pp.pprint(results0.argmax(2))
            pp.pprint(results1.argmax(2))

            # and draw the table
            #ar=self.table.params.reshape(2,5,4,5,4)
            #for state1 in range(len(constants.SOUNDS)):
            #    for state2 in range(4):
            #        pylab.pcolor(ar[1][state1][state2])
            #        pylab.draw()

        results0 = self.table.params.reshape(2, 4, 5, 20)[0]
        results1 = self.table.params.reshape(2, 4, 5, 20)[1]
        while True:
            time.sleep(60)
            pp.pprint(results0.argmax(2))
            pp.pprint(results1.argmax(2))
class ReinforcementLearningRunner():

  def __init__(self, mode):
    self.mode = mode
    cu.mem('Reinforcement Learning Started')
    self.environment = RegionFilteringEnvironment(config.get(mode+'Database'), mode)
    self.controller = QNetwork()
    cu.mem('QNetwork controller created')
    self.learner = None
    self.agent = RegionFilteringAgent(self.controller, self.learner)
    self.task = RegionFilteringTask(self.environment, config.get(mode+'GroundTruth'))
    self.experiment = Experiment(self.task, self.agent)

  def runEpoch(self, interactions, maxImgs):
    img = 0
    s = cu.tic()
    while img < maxImgs:
      self.experiment.doInteractions(interactions)
      self.agent.learn()
      self.agent.reset()
      self.environment.loadNextEpisode()
      img += 1
    s = cu.toc('Run epoch with ' + str(maxImgs) + ' episodes', s)

  def run(self):
    if self.mode == 'train':
      self.agent.persistMemory = True
      self.agent.startReplayMemory(len(self.environment.db.images), config.geti('trainInteractions'), config.geti('stateFeatures'))
      self.train()
    elif self.mode == 'test':
      self.agent.persistMemory = False
      self.test()

  def train(self):
    interactions = config.geti('trainInteractions')
    minEpsilon = config.getf('minTrainingEpsilon')
    epochSize = len(self.environment.db.images)/2
    epsilon = 1.0
    self.controller.setEpsilonGreedy(epsilon)
    print 'Epoch 0: Exploration'
    self.runEpoch(interactions, len(self.environment.db.images))
    self.learner = QLearning()
    self.agent.learner = self.learner
    epoch = 1
    egEpochs = config.geti('epsilonGreedyEpochs')
    while epoch <= egEpochs:
      epsilon = epsilon - (1.0-minEpsilon)/float(egEpochs) 
      if epsilon < minEpsilon: epsilon = minEpsilon
      self.controller.setEpsilonGreedy(epsilon)
      print 'Epoch',epoch ,'(epsilon-greedy:{:5.3f})'.format(epsilon)
      self.runEpoch(interactions, epochSize)
      epoch += 1
    epoch = 1
    maxEpochs = config.geti('exploitLearningEpochs')
    while epoch <= maxEpochs:
      print 'Epoch',epoch+egEpochs,'(exploitation mode: epsilon={:5.3f})'.format(epsilon)
      self.runEpoch(interactions, epochSize)
      epoch += 1

  def test(self):
    interactions = config.geti('testInteractions')
    self.controller.setEpsilonGreedy(config.getf('testEpsilon'))
    self.runEpoch(interactions, len(self.environment.db.images))
Example #39
0
    from pybrain.rl.agents import LearningAgent
    from pybrain.rl.experiments import Experiment
    from pybrain.rl.explorers import EpsilonGreedyExplorer

    env = Chain()
    controller = ActionValueTable(env.outdim, env.indim)
    controller.initialize(1.)
#    controller.initialize(0.)

#    learner = Q(0.5, 0.8) # alpha 0.5, gamma 0.8
    learner = Q() # default alpha 0.5, gamma 0.99
#    learner._setExplorer(EpsilonGreedyExplorer(0.5))
    agent = LearningAgent(controller, learner)

    task = ChainTask(env)
    exp = Experiment(task, agent)

    reward = 0
    xs = []
    ys = []

    import matplotlib.pyplot as plt
    for i in xrange(5000):
        exp.doInteractions(1)
        agent.learn()

        reward += agent.lastreward

        if i%100 == 0:
            xs.append(i)
            ys.append(reward)
from ObjectLocalizerEnvironment import ObjectLocalizerEnvironment
from DeepQNetwork import DeepQNetwork
from DeepQLearning import DeepQLearning
from MDPObjectLocalizerTask import MDPObjectLocalizerTask
from ObjectLocalizationAgent import ObjectLocalizationAgent

print 'Starting Environment'
epsilon = 1.0
environment = ObjectLocalizerEnvironment(config.get('imageDir'), config.get('candidatesFile'), 'Training')
print 'Initializing DeepQNetwork'
controller = DeepQNetwork()
controller.setEpsilonGreedy(epsilon)
print 'Initializing Q Learner'
learner = DeepQLearning()
print 'Preparing Agent'
agent = ObjectLocalizationAgent(controller, learner)
print 'Configuring Task'
task = MDPObjectLocalizerTask(environment, config.get('groundTruth'))
print 'Setting up Experiment'
experiment = Experiment(task, agent)
i = 0
print 'Main Loop'
while i < config.geti('maximumEpochs'):
  print 'Epoch',i,'(epsilon:{:5.3f})'.format(epsilon)
  experiment.doInteractions(int(config.get('numInteractions')))
  agent.learn()
  agent.reset()
  i += 1
  epsilon = adjustEpsilon(config.geti('maximumEpochs'), i, epsilon)
  controller.setEpsilonGreedy(epsilon)
Example #41
0
                       [0.3, 0.5, 0.2]])

env = BanditEnvironment(payouts, distrib)

task = BanditTask(env)

table = PropensityTable(payouts.shape[0])
table.initialize(500.0)

#learner = RothErev(experimentation=0.55, recency=0.3)
learner = VariantRothErev(experimentation=0.65, recency=0.3)
learner.explorer = BoltzmannExplorer(tau=100.0, decay=0.9995)

agent = LearningAgent(table, learner)

experiment = Experiment(task, agent)

epis = int(1e1)
batch = 2
avgRewards = scipy.zeros(epis)
allActions = scipy.zeros(epis * batch)
c = 0
for i in range(epis):
    experiment.doInteractions(batch)
    avgRewards[i] = scipy.mean(agent.history["reward"])
    allActions[c:c + batch] = agent.history["action"].flatten() + 1
    agent.learn()
    agent.reset()

    c += batch
Example #42
0
import numpy
env=HitTheGoalEnv(5)
task=HitTheGoalTask(env,[5,0,0])


net = buildNetwork(2, 1, bias=False)
    # create agent with controller and learner (and its options)
#agent=OptimizationAgent(net, CMAES())
#agent.learner.setEvaluator(task,agent.module)
agent = LearningAgent(net,Reinforce())
#agent.learner.explorer=EpsilonGreedyExplorer(0.0)
#agent.learner._setExplorer(EpsilonGreedyExplorer(0.0))
#agent.learner.explorer.sigma=[0.1]
#print agent.learner.explorer.sigma
#exit()
experiment = Experiment(task, agent)

itr=0
#task.performAction(numpy.array([36]))
while  True:
	 #print itr
	# agent.learner.maxEvaluations += 1
	 #agent.learner.learn()
	experiment.doInteractions(50)
	agent.learn()
	agent.reset()
	task.reset()
#	 env.reset()
	# itr=itr+1

table = ActionValueTable(matrix_size, 2)
#table = ActionValueTable(matrix_size, matrix_size)
table.initialize(1.)


# create agent with controller and learner - use SARSA(), Q() or QLambda() here
learner = Q()

# standard exploration is e-greedy, but a different type can be chosen as well
# learner.explorer = BoltzmannExplorer()

# create agent
agent = LearningAgent(table, learner)

# create experiment
experiment = Experiment(task, agent)

# prepare plotting
pylab.gray()
pylab.ion()

#for i in range(100):
while True:
    # interact with the environment (here in batch mode)
    experiment.doInteractions(matrix_size)
    agent.learn()
    agent.reset()

    # and draw the table
    print table.params.reshape(matrix_size,2)
    #print table.params.reshape(matrix_size,matrix_size)
Example #44
0
  world = WorldInteraction()

  predTable = ActionValueTable(
    PredatorInteraction.NSTATES,
    len(PredatorInteraction.ACTIONS)
  )
  predTable.initialize(0.)

  predLearner = Q(ALPHA, GAMMA)
  predLearner._setExplorer(EpsilonGreedyExplorer(EPSILON))
  predAgent = LearningAgent(predTable, predLearner)

  predEnv = PredatorEnvironment(world)
  predTask = PredatorTask(predEnv)
  predExp = Experiment(predTask, predAgent)

  mimicTable = ActionValueTable(
    MimicryPreyInteraction.NSTATES,
    len(MimicryPreyInteraction.ACTIONS)
  )
  mimicTable.initialize(0.)

  mimicLearner = Q(ALPHA, GAMMA)
  mimicLearner._setExplorer(EpsilonGreedyExplorer(EPSILON))
  mimicAgent = LearningAgent(mimicTable, mimicLearner)

  mimicEnv = MimicryPreyEnvironment(world)
  mimicTask = MimicryPreyTask(mimicEnv)
  mimicExp = Experiment(mimicTask, mimicAgent)
import sys, time
from scipy import *

from pybrain.rl.environments import Task
from pybrain.rl.learners.valuebased import ActionValueTable
from pybrain.rl.environments.mazes import Maze, MDPMazeTask
from pybrain.rl.experiments import Experiment
from pybrain.rl.agents import LearningAgent
from pybrain.rl.learners import Q, SARSA

var_structure_arr_ = array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
                            [1, 0, 0, 1, 0, 0, 0, 0, 1],
                            [1, 0, 0, 1, 0, 0, 1, 0, 1],
                            [1, 0, 0, 1, 0, 0, 1, 0, 1],
                            [1, 0, 0, 1, 0, 1, 1, 0, 1],
                            [1, 0, 0, 0, 0, 0, 1, 0, 1],
                            [1, 1, 1, 1, 1, 1, 1, 0, 1],
                            [1, 0, 0, 0, 0, 0, 0, 0, 1],
                            [1, 1, 1, 1, 1, 1, 1, 1, 1]])

var_controller_ = ActionValueTable(81, 4)
var_controller_.initialize(1.0)

var_learner_ = Q()
var_Agent_ = LearningAgent(var_controller_, var_learner_)

var_task_ = MDPMazeTask(Task)

experiment = Experiment(var_task_, var_Agent_)
Example #46
0
    def _oneInteraction(self):
        global draw

        resetInThisRound = False

        # Process events
        for event in pygame.event.get():
            if event.type == pygame.locals.QUIT or (
                    event.type == pygame.locals.KEYDOWN and event.key
                    in [pygame.locals.K_ESCAPE, pygame.locals.K_q]):
                return
            if (event.type == pygame.locals.KEYDOWN
                    and event.key == pygame.locals.K_SPACE):
                print len(controller.params)
                print controller.params.reshape(controller.numRows,
                                                controller.numColumns)
                controller.params.reshape(
                    controller.numRows,
                    controller.numColumns).tofile("test.table")
                self.isPaused = not self.isPaused
            if (event.type == pygame.locals.KEYDOWN
                    and event.key == pygame.locals.K_r):
                resetInThisRound = True
            if (event.type == pygame.locals.KEYDOWN
                    and event.key == pygame.locals.K_PLUS):
                self.speed += 1
            if (event.type == pygame.locals.KEYDOWN
                    and event.key == pygame.locals.K_MINUS):
                self.speed = max(self.speed - 1, 1)
            if (event.type == pygame.locals.KEYDOWN
                    and event.key == pygame.locals.K_d):
                draw = not draw

        # if self.isCrashed:
#           self.isCrashed = False
#           # level.reset()
#
# Update
        if resetInThisRound:
            print "reset"
            level.reset()

        old = (self.robotXA, self.robotYA)
        (self.robotXA, self.robotYA, csf,
         payoff) = reverseStateMapper[level.state]

        if not self.isCrashed and enemies_enabled:
            enemy_handler.update(old)
            for e in enemy_handler.getEnemyPositions():
                if (self.robotXA, self.robotYA) == e:
                    self.isCrashed = True
                    level.penalty += 1
                    self.acc_reward -= 1
                    if shield_options > 0 and not args.huge_neg_reward:
                        print "Shields are not allowed to make errors!"
                        exit()
                    break

        if (self.robotXA + 1, self.robotYA + 1) in bombs:
            self.bomb_counter += 1
            if self.bomb_counter == 4:
                self.isCrashed = True
                level.penalty += 1
                self.acc_reward -= 1
                if shield_options > 0 and not args.huge_neg_reward:
                    print "Shields are not allowed to make errors!"
                    exit()
        else:
            self.bomb_counter = 0

        if draw:
            q_max = 0
            for state in range(len(reverseStateMapper) - 1):
                q_max = max(q_max, max(controller.getActionValues(state)))

            # Draw Field
            for x in xrange(0, xsize):
                for y in xrange(0, ysize):
                    paletteColor = imageData[y * xsize + x]
                    color = palette[paletteColor * 3:paletteColor * 3 + 3]
                    pygame.draw.rect(self.screenBuffer, color,
                                     ((x + 1) * MAGNIFY,
                                      (y + 1) * MAGNIFY, MAGNIFY, MAGNIFY), 0)

            # Draw boundary
            if self.robotXA == -1 or self.isCrashed:
                boundaryColor = (255, 0, 0)
            else:
                boundaryColor = (64, 64, 64)
            pygame.draw.rect(self.screenBuffer, boundaryColor,
                             (0, 0, MAGNIFY * (xsize + 2), MAGNIFY), 0)
            pygame.draw.rect(self.screenBuffer, boundaryColor,
                             (0, MAGNIFY, MAGNIFY, MAGNIFY * (ysize + 1)), 0)
            pygame.draw.rect(self.screenBuffer, boundaryColor,
                             (MAGNIFY * (xsize + 1), MAGNIFY, MAGNIFY,
                              MAGNIFY * (ysize + 1)), 0)
            pygame.draw.rect(self.screenBuffer, boundaryColor,
                             (MAGNIFY, MAGNIFY *
                              (ysize + 1), MAGNIFY * xsize, MAGNIFY), 0)
            # pygame.draw.rect(screenBuffer,boundaryColor,(0,0,MAGNIFY*(xsize+2),MAGNIFY),0)

            # Draw cell frames
            for x in xrange(0, xsize):
                for y in xrange(0, ysize):
                    pygame.draw.rect(self.screenBuffer, (0, 0, 0),
                                     ((x + 1) * MAGNIFY,
                                      (y + 1) * MAGNIFY, MAGNIFY, MAGNIFY), 1)
                    if (x + 1, y + 1) in bombs:
                        self.screenBuffer.blit(self.bombImage,
                                               ((x + 1) * MAGNIFY + 1,
                                                (y + 1) * MAGNIFY + 1))
            pygame.draw.rect(self.screenBuffer, (0, 0, 0),
                             (MAGNIFY - 1, MAGNIFY - 1, MAGNIFY * xsize + 2,
                              MAGNIFY * ysize + 2), 1)

            # Draw "Good" Robot
            if self.robotXA != -1:
                pygame.draw.circle(
                    self.screenBuffer, (192, 32, 32),
                    ((self.robotXA + 1) * MAGNIFY + MAGNIFY / 2,
                     (self.robotYA + 1) * MAGNIFY + MAGNIFY / 2),
                    MAGNIFY / 3 - 2, 0)
                pygame.draw.circle(
                    self.screenBuffer, (255, 255, 255),
                    ((self.robotXA + 1) * MAGNIFY + MAGNIFY / 2,
                     (self.robotYA + 1) * MAGNIFY + MAGNIFY / 2),
                    MAGNIFY / 3 - 1, 1)
                pygame.draw.circle(
                    self.screenBuffer, (0, 0, 0),
                    ((self.robotXA + 1) * MAGNIFY + MAGNIFY / 2,
                     (self.robotYA + 1) * MAGNIFY + MAGNIFY / 2), MAGNIFY / 3,
                    1)

            # Draw "Bad" Robots
            if enemies_enabled:
                for (e_x, e_y) in enemy_handler.getEnemyPositions():
                    pygame.draw.circle(self.screenBuffer, (32, 32, 192),
                                       ((e_x + 1) * MAGNIFY + MAGNIFY / 2,
                                        (e_y + 1) * MAGNIFY + MAGNIFY / 2),
                                       MAGNIFY / 3 - 2, 0)
                    pygame.draw.circle(self.screenBuffer, (255, 255, 255),
                                       ((e_x + 1) * MAGNIFY + MAGNIFY / 2,
                                        (e_y + 1) * MAGNIFY + MAGNIFY / 2),
                                       MAGNIFY / 3 - 1, 1)
                    pygame.draw.circle(self.screenBuffer, (0, 0, 0),
                                       ((e_x + 1) * MAGNIFY + MAGNIFY / 2,
                                        (e_y + 1) * MAGNIFY + MAGNIFY / 2),
                                       MAGNIFY / 3, 1)

            # zone_width = danger_zone[-1][0] - danger_zone[0][0] + 1

    #        zone_height = danger_zone[-1][1] - danger_zone[0][1] + 1
    # pygame.draw.rect(screenBuffer,(200,200,0),(MAGNIFY*(danger_zone[0][0]+1),MAGNIFY*(danger_zone[0][1]+1),MAGNIFY*zone_width,MAGNIFY*zone_height),5)

    # Flip!
            self.screen.blit(self.screenBuffer, (0, 0))
            pygame.display.flip()

            # Make the transition
            if not self.isPaused:
                # Done
                self.clock.tick(self.speed)
            else:
                self.clock.tick(3)

        self.acc_reward += payoff * 10
        if self.collect_data:
            self.count += 1
            if payoff > 0:
                self.collect_episode_data_file.write(str(self.count) + "\n")
                self.count = 0
            if self.stepid % 100 == 0:
                self.collect_reward_data_file.write(
                    str(self.acc_reward / 100.) + "\n")
                self.acc_reward = 0
            if self.stepid % 100000 == 0:
                pass
                # print learner.alpha
#                 print learner.explorer.exploration
#                 print self.stepid
#                 raw_input()

        if self.stepid % 100 == 0:
            sys.stdout.write("\033[K")
            sys.stdout.write(
                "[{2}{3}] ({0}/{1}) | alpha = {4} | epsilon = {5}\n".format(
                    self.stepid, MAX_STEPS,
                    '#' * int(math.floor(self.stepid / float(MAX_STEPS) * 20)),
                    ' ' * int(
                        (20 -
                         math.floor(self.stepid / float(MAX_STEPS) * 20))),
                    learner.alpha, learner.explorer.exploration))
            sys.stdout.write("\033[F")

        if self.stepid >= MAX_STEPS:
            print "\nSimulation done!"

            sys.exit()

        if payoff > 0:
            # episode done
            if save_file != None:
                controller.params.reshape(
                    controller.numRows,
                    controller.numColumns).tofile(save_file)
            learner.alpha *= 1.  #0.999
            learner.explorer.exploration *= 1.  #0.999

        self.isCrashed = False
        if not self.isPaused:
            return Experiment._oneInteraction(self)
        else:
            return self.stepid
Example #47
0
class ReinforcementLearningRunner():
    def __init__(self, mode):
        self.mode = mode
        cu.mem('Reinforcement Learning Started')
        self.environment = RegionFilteringEnvironment(
            config.get(mode + 'Database'), mode)
        self.controller = QNetwork()
        cu.mem('QNetwork controller created')
        self.learner = None
        self.agent = RegionFilteringAgent(self.controller, self.learner)
        self.task = RegionFilteringTask(self.environment,
                                        config.get(mode + 'GroundTruth'))
        self.experiment = Experiment(self.task, self.agent)

    def runEpoch(self, interactions, maxImgs):
        img = 0
        s = cu.tic()
        while img < maxImgs:
            self.experiment.doInteractions(interactions)
            self.agent.learn()
            self.agent.reset()
            self.environment.loadNextEpisode()
            img += 1
        s = cu.toc('Run epoch with ' + str(maxImgs) + ' episodes', s)

    def run(self):
        if self.mode == 'train':
            self.agent.persistMemory = True
            self.agent.startReplayMemory(len(self.environment.db.images),
                                         config.geti('trainInteractions'),
                                         config.geti('stateFeatures'))
            self.train()
        elif self.mode == 'test':
            self.agent.persistMemory = False
            self.test()

    def train(self):
        interactions = config.geti('trainInteractions')
        minEpsilon = config.getf('minTrainingEpsilon')
        epochSize = len(self.environment.db.images) / 2
        epsilon = 1.0
        self.controller.setEpsilonGreedy(epsilon)
        print 'Epoch 0: Exploration'
        self.runEpoch(interactions, len(self.environment.db.images))
        self.learner = QLearning()
        self.agent.learner = self.learner
        epoch = 1
        egEpochs = config.geti('epsilonGreedyEpochs')
        while epoch <= egEpochs:
            epsilon = epsilon - (1.0 - minEpsilon) / float(egEpochs)
            if epsilon < minEpsilon: epsilon = minEpsilon
            self.controller.setEpsilonGreedy(epsilon)
            print 'Epoch', epoch, '(epsilon-greedy:{:5.3f})'.format(epsilon)
            self.runEpoch(interactions, epochSize)
            epoch += 1
        epoch = 1
        maxEpochs = config.geti('exploitLearningEpochs')
        while epoch <= maxEpochs:
            print 'Epoch', epoch + egEpochs, '(exploitation mode: epsilon={:5.3f})'.format(
                epsilon)
            self.runEpoch(interactions, epochSize)
            epoch += 1

    def test(self):
        interactions = config.geti('testInteractions')
        self.controller.setEpsilonGreedy(config.getf('testEpsilon'))
        self.runEpoch(interactions, len(self.environment.db.images))
Example #48
0
class BoxSearchRunner():
    def __init__(self, mode):
        self.mode = mode
        cu.mem('Reinforcement Learning Started')
        self.environment = BoxSearchEnvironment(
            config.get(mode + 'Database'), mode,
            config.get(mode + 'GroundTruth'))
        self.controller = QNetwork()
        cu.mem('QNetwork controller created')
        self.learner = None
        self.agent = BoxSearchAgent(self.controller, self.learner)
        self.task = BoxSearchTask(self.environment,
                                  config.get(mode + 'GroundTruth'))
        self.experiment = Experiment(self.task, self.agent)

    def runEpoch(self, interactions, maxImgs):
        img = 0
        s = cu.tic()
        while img < maxImgs:
            k = 0
            while not self.environment.episodeDone and k < interactions:
                self.experiment._oneInteraction()
                k += 1
            self.agent.learn()
            self.agent.reset()
            self.environment.loadNextEpisode()
            img += 1
        s = cu.toc('Run epoch with ' + str(maxImgs) + ' episodes', s)

    def run(self):
        if self.mode == 'train':
            self.agent.persistMemory = True
            self.agent.startReplayMemory(len(self.environment.imageList),
                                         config.geti('trainInteractions'))
            #self.agent.assignPriorMemory(self.environment.priorMemory)
            self.train()
        elif self.mode == 'test':
            self.agent.persistMemory = False
            self.test()

    def train(self):
        networkFile = config.get('networkDir') + config.get(
            'snapshotPrefix') + '_iter_' + config.get(
                'trainingIterationsPerBatch') + '.caffemodel'
        interactions = config.geti('trainInteractions')
        minEpsilon = config.getf('minTrainingEpsilon')
        epochSize = len(self.environment.imageList) / 1
        epsilon = 1.0
        self.controller.setEpsilonGreedy(epsilon,
                                         self.environment.sampleAction)
        epoch = 1
        exEpochs = config.geti('explorationEpochs')
        while epoch <= exEpochs:
            s = cu.tic()
            print 'Epoch', epoch, ': Exploration (epsilon=1.0)'
            self.runEpoch(interactions, len(self.environment.imageList))
            self.task.flushStats()
            s = cu.toc('Epoch done in ', s)
            epoch += 1
        self.learner = QLearning()
        self.agent.learner = self.learner
        egEpochs = config.geti('epsilonGreedyEpochs')
        while epoch <= egEpochs + exEpochs:
            s = cu.tic()
            epsilon = epsilon - (1.0 - minEpsilon) / float(egEpochs)
            if epsilon < minEpsilon: epsilon = minEpsilon
            self.controller.setEpsilonGreedy(epsilon,
                                             self.environment.sampleAction)
            print 'Epoch', epoch, '(epsilon-greedy:{:5.3f})'.format(epsilon)
            self.runEpoch(interactions, epochSize)
            self.task.flushStats()
            self.doValidation(epoch)
            s = cu.toc('Epoch done in ', s)
            epoch += 1
        maxEpochs = config.geti('exploitLearningEpochs') + exEpochs + egEpochs
        while epoch <= maxEpochs:
            s = cu.tic()
            print 'Epoch', epoch, '(exploitation mode: epsilon={:5.3f})'.format(
                epsilon)
            self.runEpoch(interactions, epochSize)
            self.task.flushStats()
            self.doValidation(epoch)
            s = cu.toc('Epoch done in ', s)
            shutil.copy(networkFile, networkFile + '.' + str(epoch))
            epoch += 1

    def test(self):
        interactions = config.geti('testInteractions')
        self.controller.setEpsilonGreedy(config.getf('testEpsilon'))
        self.runEpoch(interactions, len(self.environment.imageList))

    def doValidation(self, epoch):
        if epoch % config.geti('validationEpochs') != 0:
            return
        auxRL = BoxSearchRunner('test')
        auxRL.run()
        indexType = config.get('evaluationIndexType')
        category = config.get('category')
        if indexType == 'pascal':
            categories, catIndex = bse.get20Categories()
        elif indexType == 'relations':
            categories, catIndex = bse.getCategories()
        elif indexType == 'finetunedRelations':
            categories, catIndex = bse.getRelationCategories()
        catI = categories.index(category)
        scoredDetections = bse.loadScores(config.get('testMemory'), catI)
        groundTruthFile = config.get('testGroundTruth')
        ps, rs = bse.evaluateCategory(scoredDetections, 'scores',
                                      groundTruthFile)
        pl, rl = bse.evaluateCategory(scoredDetections, 'landmarks',
                                      groundTruthFile)
        line = lambda x, y, z: x + '\t{:5.3f}\t{:5.3f}\n'.format(y, z)
        print line('Validation Scores:', ps, rs)
        print line('Validation Landmarks:', pl, rl)
Example #49
0
File: bot.py Project: zmuda/iwium
class Player:
    def __init__(self):
        self.environment = GameEnv()

        av_table = ActionValueTable(self.environment.outdim, self.environment.indim)
        av_table.initialize(0.)  # todo: save & restore agents state
        learner = Q()
        learner._setExplorer(EpsilonGreedyExplorer())
        agent = LearningAgent(av_table, learner)

        self.agent = agent
        self.task = GameTask(self.environment)
        self.experiment = Experiment(self.task, self.agent)

    def name(self, index):
        self.me = index
        [self.opp1, self.opp2] = [i for i in range(3) if i != self.me]

    def hand(self, card):
        self.environment.reset()
        self.environment.setHand(card)
        self.environment.setStack(300)

    def bet1(self, min):
        self.environment.setPhase('bet-1')
        self.environment.setMinBet(min)
        self.experiment.doInteractions(1)
        bet = self.environment.getTranslatedAction()
        return bet

    def bet1_info(self, bets):
        opp1_bet = bets[self.opp1]
        opp2_bet = bets[self.opp2]
        self.environment.setOpponentsBets(opp1_bet, opp2_bet)

    def call1(self, current_bet):
        self.environment.setPhase('call-1')
        self.environment.setToCall(current_bet)
        self.experiment.doInteractions(1)
        is_calling = self.environment.getTranslatedAction()
        return is_calling

    def call1_info(self, in_game):
        opp1_in_game = in_game[self.opp1]
        opp2_in_game = in_game[self.opp2]
        self.environment.setOpponentsFolded(not opp1_in_game, not opp2_in_game)

    def bet2(self, min):
        self.environment.setPhase('bet-2')
        self.environment.setMinBet(min)
        self.experiment.doInteractions(1)
        bet = self.environment.getTranslatedAction()
        return bet

    def bet2_info(self, bets):
        opp1_bet = bets[self.opp1]
        opp2_bet = bets[self.opp2]
        self.environment.setOpponentsBets(opp1_bet, opp2_bet)

    def call2(self, current_bet):
        self.environment.setPhase('call-1')
        self.environment.setToCall(current_bet)
        self.experiment.doInteractions(1)
        is_calling = self.environment.getTranslatedAction()
        return is_calling

    def call2_info(self, in_game):
        opp1_in_game = in_game[self.opp1]
        opp2_in_game = in_game[self.opp2]

    def showdown(self, hand):
        opp1_hand = hand[self.opp1]
        opp2_hand = hand[self.opp2]

    def result(self, winnings):
        my_winnings = winnings[self.me]
        opp1_winnings = winnings[self.opp1]
        opp2_winnings = winnings[self.opp2]

        self.environment.setPhase('results')
        self.task.setWinnings(my_winnings)
        self.experiment.doInteractions(1)

        self.agent.learn()
        self.agent.reset()
Example #50
0
def Py_Brain():
    ############################
    # pybrain
    ############################
    import matplotlib as mpl
    import matplotlib.pyplot as plt
    from matplotlib.colors import ListedColormap
    import itertools
    from scipy import linalg

    from pybrain.rl.environments.mazes import Maze, MDPMazeTask
    from pybrain.rl.learners.valuebased import ActionValueTable
    from pybrain.rl.agents import LearningAgent
    from pybrain.rl.learners import Q, SARSA
    from pybrain.rl.experiments import Experiment
    from pybrain.rl.environments import Task

    import pylab
    #pylab.gray()
    #pylab.ion()

    '''
    structure = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1],
                          [1, 0, 0, 1, 0, 0, 0, 0, 1],
                          [1, 0, 0, 1, 0, 0, 1, 0, 1],
                          [1, 0, 0, 1, 0, 0, 1, 0, 1],
                          [1, 0, 0, 1, 0, 1, 1, 0, 1],
                          [1, 0, 0, 0, 0, 0, 1, 0, 1],
                          [1, 1, 1, 1, 1, 1, 1, 0, 1],
                          [1, 0, 0, 0, 0, 0, 0, 0, 1],
                          [1, 1, 1, 1, 1, 1, 1, 1, 1]])
    '''
    structure = np.array([[1, 1, 1, 1, 1],
                          [1, 1, 0, 0, 1],
                          [1, 1, 0, 1, 1],
                          [1, 0, 0, 1, 1],
                          [1, 1, 1, 1, 1]])

    num_states = int(structure.shape[0]*structure.shape[1])
    SQRT = int(math.sqrt(num_states))
    #print structure.item((1, 3))
    #environment = Maze(structure, (7, 7)) #second parameter is goal field tuple
    environment = Maze(structure, (1, 3)) #second parameter is goal field tuple
    print type(environment)
    print environment
    # Standard maze environment comes with the following 4 actions:
    # North, South, East, West
    controller = ActionValueTable(num_states, 4) #[N, S, E, W] 
    controller.initialize(1)

    learner = Q()
    agent = LearningAgent(controller, learner)
    np.not_equal(agent.lastobs, None)
    task = MDPMazeTask(environment)
    experiment = Experiment(task, agent)

    #while True:
    for x in range(4):
        print x
        experiment.doInteractions(10)
        agent.learn()
        agent.reset()

        pylab.pcolor(controller.params.reshape(num_states,4).max(1).reshape(SQRT,SQRT))
        pylab.draw()
        #pylab.show()
        name='MAZE'
        plt.savefig(str(name)+'_PLOT.png')
    plt.close()
Example #51
0
  world = WorldInteraction()

  predTable = ActionValueTable(
    PredatorInteraction.NSTATES,
    len(PredatorInteraction.ACTIONS)
  )
  predTable.initialize(0.)

  predLearner = Q(ALPHA, GAMMA)
  predLearner._setExplorer(EpsilonGreedyExplorer(EPSILON))
  predAgent = LearningAgent(predTable, predLearner)

  predEnv = PredatorEnvironment(world)
  predTask = PredatorTask(predEnv)
  predExp = Experiment(predTask, predAgent)

  try:
    for t in xrange(MAX_TIME):
      print 't = %d' % t 
      world.t = t
      predExp.doInteractions(1)
      predAgent.learn()
      print 'Colors vs. Q-table:'
      table_print(predTable._params, PredatorInteraction.NSTATES)
      print

  except KeyboardInterrupt:
    pass

  finally:
Example #52
0
import pickle
import time

# Create environment
sub_env = Environment(20, 20)
world = World(sub_env)

# Brain for the animat, we have already trained the data
f = open('neuro.net', 'r')
trained_net = pickle.load(f)
brain = BrainController(trained_net)

# Learning method we use
#learner = PolicyGradientLearner()
learner = ENAC()
learner._setLearningRate(0.2)
# Create an animat
animat = StupidAnimat(trained_net, learner, sub_env)

# Establish a task
task = InteractTask(world, animat)

brain.validate_net()
experiment = Experiment(task, animat)
while True:
    experiment.doInteractions(10000)
    animat.learn()
    animat.reset()
    brain.validate_net()
    time.sleep(3)
class BoxSearchRunner():

  def __init__(self, mode):
    self.mode = mode
    cu.mem('Reinforcement Learning Started')
    self.environment = BoxSearchEnvironment(config.get(mode+'Database'), mode, config.get(mode+'GroundTruth'))
    self.controller = QNetwork()
    cu.mem('QNetwork controller created')
    self.learner = None
    self.agent = BoxSearchAgent(self.controller, self.learner)
    self.task = BoxSearchTask(self.environment, config.get(mode+'GroundTruth'))
    self.experiment = Experiment(self.task, self.agent)

  def runEpoch(self, interactions, maxImgs):
    img = 0
    s = cu.tic()
    while img < maxImgs:
      k = 0
      while not self.environment.episodeDone and k < interactions:
        self.experiment._oneInteraction()
        k += 1
      self.agent.learn()
      self.agent.reset()
      self.environment.loadNextEpisode()
      img += 1
    s = cu.toc('Run epoch with ' + str(maxImgs) + ' episodes', s)

  def run(self):
    if self.mode == 'train':
      self.agent.persistMemory = True
      self.agent.startReplayMemory(len(self.environment.imageList), config.geti('trainInteractions'))
      self.train()
    elif self.mode == 'test':
      self.agent.persistMemory = False
      self.test()

  def train(self):
    networkFile = config.get('networkDir') + config.get('snapshotPrefix') + '_iter_' + config.get('trainingIterationsPerBatch') + '.caffemodel'
    interactions = config.geti('trainInteractions')
    minEpsilon = config.getf('minTrainingEpsilon')
    epochSize = len(self.environment.imageList)/1
    epsilon = 1.0
    self.controller.setEpsilonGreedy(epsilon, self.environment.sampleAction)
    epoch = 1
    exEpochs = config.geti('explorationEpochs')
    while epoch <= exEpochs:
      s = cu.tic()
      print 'Epoch',epoch,': Exploration (epsilon=1.0)'
      self.runEpoch(interactions, len(self.environment.imageList))
      self.task.flushStats()
      self.doValidation(epoch)
      s = cu.toc('Epoch done in ',s)
      epoch += 1
    self.learner = QLearning()
    self.agent.learner = self.learner
    egEpochs = config.geti('epsilonGreedyEpochs')
    while epoch <= egEpochs + exEpochs:
      s = cu.tic()
      epsilon = epsilon - (1.0-minEpsilon)/float(egEpochs)
      if epsilon < minEpsilon: epsilon = minEpsilon
      self.controller.setEpsilonGreedy(epsilon, self.environment.sampleAction)
      print 'Epoch',epoch ,'(epsilon-greedy:{:5.3f})'.format(epsilon)
      self.runEpoch(interactions, epochSize)
      self.task.flushStats()
      self.doValidation(epoch)
      s = cu.toc('Epoch done in ',s)
      epoch += 1
    maxEpochs = config.geti('exploitLearningEpochs') + exEpochs + egEpochs
    while epoch <= maxEpochs:
      s = cu.tic()
      print 'Epoch',epoch,'(exploitation mode: epsilon={:5.3f})'.format(epsilon)
      self.runEpoch(interactions, epochSize)
      self.task.flushStats()
      self.doValidation(epoch)
      s = cu.toc('Epoch done in ',s)
      shutil.copy(networkFile, networkFile + '.' + str(epoch))
      epoch += 1

  def test(self):
    interactions = config.geti('testInteractions')
    self.controller.setEpsilonGreedy(config.getf('testEpsilon'))
    self.runEpoch(interactions, len(self.environment.imageList))

  def doValidation(self, epoch):
    if epoch % config.geti('validationEpochs') != 0:
      return
    auxRL = BoxSearchRunner('test')
    auxRL.run()
    indexType = config.get('evaluationIndexType')
    category = config.get('category')
    if indexType == 'pascal':
      categories, catIndex = bse.get20Categories()
    elif indexType == 'relations':
      categories, catIndex = bse.getCategories()
    elif indexType == 'finetunedRelations':
      categories, catIndex = bse.getRelationCategories()
    if category in categories:
        catI = categories.index(category)
    else:
        catI = -1
    scoredDetections = bse.loadScores(config.get('testMemory'), catI)
    groundTruthFile = config.get('testGroundTruth')
    #ps,rs = bse.evaluateCategory(scoredDetections, 'scores', groundTruthFile)
    pl,rl = bse.evaluateCategory(scoredDetections, 'landmarks', groundTruthFile)
    line = lambda x,y,z: x + '\t{:5.3f}\t{:5.3f}\n'.format(y,z)
    #print line('Validation Scores:',ps,rs)
    print line('Validation Landmarks:',pl,rl)
Example #54
0
    # Initialize Reinforcement Learning
    learner = Q(0.5, 0.0)
    learner._setExplorer(EpsilonGreedyExplorer(0.0))
    agent = LearningAgent(controller, learner)

    # Setup the PyBrain and PyGame Environments
    environment = Environment()
    game = RunPacman(environment)

    # Create the Task for the Pac-Man Agent to Accomplish and initialize the first Action
    task = PacmanTask(environment, game)
    task.performAction(np.array([1]))

    # The Experiment is the PyBrain link between the task to be completed and the agent completing it
    experiment = Experiment(task, agent)
    currentGame = 1

    # Continue to loop program until the 'X' on the GUI is clicked
    while True:

        # Allow the agent to interaction with the environment (Move in a direction) then learn from it.
        experiment.doInteractions(1)
        agent.learn()

        # Check if current pacman game ended and needs to start a new one
        if game.wonGame == 1 or game.wonGame == -1:
            currentGame += 1

            # Store the information the agent has learned in long term memory,
            # Clear the short term memory to reduce any chance of overfitting,
Example #55
0
# define action-value table
# number of states is:
#
#    current value: 1-21
#
# number of actions:
#
#    Stand=0, Hit=1
av_table = ActionValueTable(21, 2)
av_table.initialize(0.)

# define Q-learning agent
learner = Q(0.5, 0.0)
learner._setExplorer(EpsilonGreedyExplorer(0.0))
agent = LearningAgent(av_table, learner)

# define the environment
env = BlackjackEnv()

# define the task
task = BlackjackTask(env)

# finally, define experiment
experiment = Experiment(task, agent)

# ready to go, start the process
while True:
    experiment.doInteractions(1)
    agent.learn()
    agent.reset()
Example #56
0
from DeepQLearning import DeepQLearning
from MDPObjectLocalizerTask import MDPObjectLocalizerTask
from ObjectLocalizationAgent import ObjectLocalizationAgent

print 'Starting Environment'
epsilon = 1.0
environment = ObjectLocalizerEnvironment(config.get('imageDir'),
                                         config.get('candidatesFile'),
                                         'Training')
print 'Initializing DeepQNetwork'
controller = DeepQNetwork()
controller.setEpsilonGreedy(epsilon)
print 'Initializing Q Learner'
learner = DeepQLearning()
print 'Preparing Agent'
agent = ObjectLocalizationAgent(controller, learner)
print 'Configuring Task'
task = MDPObjectLocalizerTask(environment, config.get('groundTruth'))
print 'Setting up Experiment'
experiment = Experiment(task, agent)
i = 0
print 'Main Loop'
while i < config.geti('maximumEpochs'):
    print 'Epoch', i, '(epsilon:{:5.3f})'.format(epsilon)
    experiment.doInteractions(int(config.get('numInteractions')))
    agent.learn()
    agent.reset()
    i += 1
    epsilon = adjustEpsilon(config.geti('maximumEpochs'), i, epsilon)
    controller.setEpsilonGreedy(epsilon)
Example #57
0
table.initialize(0.0)
# table.initialize( np.random.rand( table.paramdim ) )

# create agent with controller and learner - use SARSA(), Q() or QLambda() here
## alpha -- learning rate (preference of new information)
## gamma -- discount factor (importance of future reward)

# learner = Q(0.5, 0.99)
learner = SARSA(0.5, 0.99)
# learner = QLambda(0.5, 0.99, 0.9)
explorer = learner.explorer
explorer.decay = 0.999992

agent = LearningAgent(table, learner)

experiment = Experiment(task, agent)

## prevents "ImportError: sys.meta_path is None, Python is likely shutting down"
atexit.register(task.close)

render_demo = False
render_steps = False
imax = 7000
period_print = 100
eval_periods = 100

print("\nStarting")

total_reward = 0
period_reward = 0
Example #58
0
# create value table and initialize with ones
table = ActionValueTable(numStates, numActions)
table.initialize(1.)

# create agent with controller and learner - use SARSA(), Q() or QLambda() here
# learner = QLambda()
learner = SARSA()
# learner = Q()
# standard exploration is e-greedy, but a different type can be chosen as well
# learner.explorer = BoltzmannExplorer()

# create agent
agent = LearningAgent(table, learner)

# create experiment
experiment = Experiment(task, agent)

# prepare plotting
# pylab.gray()
# pylab.ion()


# Learning phase
# Num iterations used for PROHA Workshop perliminary evaluation
# numIterations   = 1600
numIterations   = 1500
numInteractions = 600

# Num iterations used for PROHA and PROLE slides
# numIterations   = 10
# numInteractions = 3