Ejemplo n.º 1
0
 def __init__(self, question, testDict):
     super(QLearningTest, self).__init__(question, testDict)
     self.discount = float(testDict['discount'])
     self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
     if 'noise' in testDict: self.grid.setNoise(float(testDict['noise']))
     if 'livingReward' in testDict:
         self.grid.setLivingReward(float(testDict['livingReward']))
     self.grid = gridworld.Gridworld(parseGrid(testDict['grid']))
     self.env = gridworld.GridworldEnvironment(self.grid)
     self.epsilon = float(testDict['epsilon'])
     self.learningRate = float(testDict['learningRate'])
     self.opts = {
         'actionFn': self.env.getPossibleActions,
         'epsilon': self.epsilon,
         'gamma': self.discount,
         'alpha': self.learningRate
     }
     numExperiences = int(testDict['numExperiences'])
     maxPreExperiences = 10
     self.numsExperiencesForDisplay = list(
         range(min(numExperiences, maxPreExperiences)))
     self.testOutFile = testDict['test_out_file']
     if maxPreExperiences < numExperiences:
         self.numsExperiencesForDisplay.append(numExperiences)
Ejemplo n.º 2
0

if __name__ == '__main__':

    opts = parseOptions()

    ###########################
    # GET THE GRIDWORLD
    ###########################

    import gridworld
    mdpFunction = getattr(gridworld, "get" + opts.grid)
    mdp = mdpFunction()
    mdp.setLivingReward(opts.livingReward)
    mdp.setNoise(opts.noise)
    env = gridworld.GridworldEnvironment(mdp)

    ###########################
    # GET THE DISPLAY ADAPTER
    ###########################
    import textGridworldDisplay
    display = textGridworldDisplay.TextGridworldDisplay(mdp)
    if not opts.textDisplay:
        import graphicsGridworldDisplay
        display = graphicsGridworldDisplay.GraphicsGridworldDisplay(
            mdp, opts.gridSize, opts.speed)
    try:
        display.start()
    except KeyboardInterrupt:
        sys.exit(0)
Ejemplo n.º 3
0
def main(myargs):
    sys.argv = myargs.split()
    opts = parseOptions()

    ###########################
    # GET THE GRIDWORLD
    ###########################

    if opts.grid == 'VerticalBridgeGrid':
        opts.gridSize = 120

    import gridworld
    mdpFunction = getattr(gridworld, "get"+opts.grid)
    mdp = mdpFunction()
    mdp.setLivingReward(opts.livingReward)
    mdp.setNoise(opts.noise)
    env = gridworld.GridworldEnvironment(mdp)


    ###########################
    # GET THE DISPLAY ADAPTER
    ###########################

    import textGridworldDisplay
    display = textGridworldDisplay.TextGridworldDisplay(mdp)
    if not opts.textDisplay:
        import graphicsGridworldDisplay
        display = graphicsGridworldDisplay.GraphicsGridworldDisplay(mdp, opts.gridSize, opts.speed)
    try:
        display.start()
    except KeyboardInterrupt:
        sys.exit(0)

    ###########################
    # GET THE AGENT
    ###########################

    import valueIterationAgents, qlearningAgents
    a = None
    if opts.agent == 'value':
        a = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, opts.iters)
    elif opts.agent == 'q':
        #env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        #simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        gridWorldEnv = GridworldEnvironment(mdp)
        actionFn = lambda state: mdp.getPossibleActions(state)
        qLearnOpts = {'gamma': opts.discount,
                      'alpha': opts.learningRate,
                      'epsilon': opts.epsilon,
                      'actionFn': actionFn}
        a = qlearningAgents.QLearningAgent(**qLearnOpts)
    elif opts.agent == 'random':
        # # No reason to use the random agent without episodes
        if opts.episodes == 0:
            opts.episodes = 10
        class RandomAgent:
            def getAction(self, state):
                return random.choice(mdp.getPossibleActions(state))
            def getValue(self, state):
                return 0.0
            def getQValue(self, state, action):
                return 0.0
            def getPolicy(self, state):
                "NOTE: 'random' is a special policy value; don't use it in your code."
                return 'random'
            def update(self, state, action, nextState, reward):
                pass
        a = RandomAgent()
    else:
        if not opts.manual: raise 'Unknown agent type: '+opts.agent


    ###########################
    # RUN EPISODES
    ###########################
    # DISPLAY Q/V VALUES BEFORE SIMULATION OF EPISODES
    try:
        if not opts.manual and opts.agent == 'value':
            if opts.valueSteps:
                for i in range(opts.iters):
                    tempAgent = valueIterationAgents.ValueIterationAgent(mdp, opts.discount, i)
                    display.displayValues(tempAgent, message = "VALUES AFTER "+str(i)+" ITERATIONS")
                    display.pause()

            display.displayValues(a, message = "VALUES AFTER "+str(opts.iters)+" ITERATIONS")
            display.pause()
            display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.iters)+" ITERATIONS")
            display.pause()
    except KeyboardInterrupt:
        sys.exit(0)



    # FIGURE OUT WHAT TO DISPLAY EACH TIME STEP (IF ANYTHING)
    displayCallback = lambda x: None
    if not opts.quiet:
        if opts.manual and opts.agent == None:
            displayCallback = lambda state: display.displayNullValues(state)
        else:
            if opts.agent == 'random': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES")
            if opts.agent == 'value': displayCallback = lambda state: display.displayValues(a, state, "CURRENT VALUES")
            if opts.agent == 'q': displayCallback = lambda state: display.displayQValues(a, state, "CURRENT Q-VALUES")

    messageCallback = lambda x: printString(x)
    if opts.quiet:
        messageCallback = lambda x: None

    # FIGURE OUT WHETHER TO WAIT FOR A KEY PRESS AFTER EACH TIME STEP
    pauseCallback = lambda : None
    if opts.pause:
        pauseCallback = lambda : display.pause()

    # FIGURE OUT WHETHER THE USER WANTS MANUAL CONTROL (FOR DEBUGGING AND DEMOS)
    if opts.manual:
        decisionCallback = lambda state : getUserAction(state, mdp.getPossibleActions)
    else:
        decisionCallback = a.getAction

    # RUN EPISODES
    if opts.episodes > 0:
        print()
        print("RUNNING", opts.episodes, "EPISODES")
        print()
    returns = 0
    for episode in range(1, opts.episodes+1):
        returns += runEpisode(a, env, opts.discount, decisionCallback, displayCallback, messageCallback, pauseCallback, episode)
    if opts.episodes > 0:
        print()
        print("AVERAGE RETURNS FROM START STATE: "+str((returns+0.0) / opts.episodes))
        print()
        print()

    # DISPLAY POST-LEARNING VALUES / Q-VALUES
    if opts.agent == 'q' and not opts.manual:
        try:
            display.displayQValues(a, message = "Q-VALUES AFTER "+str(opts.episodes)+" EPISODES")
            display.pause()
            display.displayValues(a, message = "VALUES AFTER "+str(opts.episodes)+" EPISODES")
            display.pause()
        except KeyboardInterrupt:
            sys.exit(0)
Ejemplo n.º 4
0
    def __init__(self, grid_name='DiscountGrid', discount=0.9, learning_rate=0.5, living_reward=0.0
                 , noise=0, epsilon=0.3, display_speed=0.5
                 , grid_size=150, text_only=False, n_episodes=100
                 , agent_window_size=1
                 , agent_max_n_experiences=1000
                 , check_policy_converge=False
                 , optimal_policy=None
                 , expr_log_dir=None
                 , agent_type="qLearningAgent"
                 , init_temp=1024.0
                 , temp_decrease_rate=2.0
                 , is_asyn_input=True):
        """
        :param agent_type: "qLearningAgent" or "TamerAgent" or "preferenceTAMERAgent"
        """
        ###########################
        # GENERAL CONTROL
        ###########################

        self.text_only = text_only
        self.display_speed = display_speed
        self.n_episodes = n_episodes
        self.discount = discount
        self.check_policy_converge = check_policy_converge
        self.optimal_policy = optimal_policy
        self.expr_log_dir = expr_log_dir
        self.save_VDBE = False

        ###########################
        # GET THE INPUT MODULE
        ###########################
        if agent_type == qlearningAgents.QLearningAgent.getAgentType():
            self.user_input_module = None
        else:
            self.user_input_module = user_input.UserInputModule(is_asyn=is_asyn_input)

        ###########################
        # GET THE GRIDWORLD
        ###########################

        # noinspection PyUnresolvedReferences
        import gridworld
        mdp_function = getattr(gridworld, "get" + grid_name)
        self.mdp = mdp_function()
        self.mdp.setLivingReward(living_reward)
        self.mdp.setNoise(noise)
        self.env = gridworld.GridworldEnvironment(self.mdp)

        ###########################
        # Variables used to store parameters values
        ###########################

        # init VDBE values records
        global VDBE_RECORDS
        VDBE_RECORDS = dict()
        for state in self.env.getGridWorld().getNonTerminalStates():
            VDBE_RECORDS[state] = list()

        ###########################
        # GET THE DISPLAY ADAPTER
        ###########################

        import textGridworldDisplay
        self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp)
        if not text_only:
            import graphicsGridworldDisplay
            self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(self.mdp, grid_size, display_speed)
        try:
            self.display.start()
        except KeyboardInterrupt:
            sys.exit(0)

        ###########################
        # GET THE TAMER AGENT
        ###########################

        # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        self.gridWorldEnv = GridworldEnvironment(self.mdp)
        action_function = lambda m_state: self.mdp.getPossibleActions(m_state)
        q_learn_opts = {
            'gamma': discount,
            'alpha': learning_rate,
            'epsilon': epsilon,
            'actionFn': action_function,
            'init_temp': init_temp,
            'temp_decrease_rate': temp_decrease_rate
        }

        if agent_type == qlearningAgents.QLearningAgent.getAgentType():
            self.agent = qlearningAgents.QLearningAgent(**q_learn_opts)
        elif agent_type == qlearningAgents.TamerQAgent.getAgentType():
            self.agent = qlearningAgents.TamerQAgent(max_n_experiences=agent_max_n_experiences
                                                     , window_size=agent_window_size
                                                     , is_asyn_input=is_asyn_input
                                                     , **q_learn_opts)
        elif agent_type == preferenceTamerAgent.PreferenceTAMERAgent.getAgentType():
            self.agent = preferenceTamerAgent.PreferenceTAMERAgent(max_n_experiences=agent_max_n_experiences
                                                     , window_size=agent_window_size
                                                     , is_asyn_input=is_asyn_input
                                                     , **q_learn_opts)
    def __init__(self, grid_name='DiscountGrid', discount=0.9, learning_rate=0.5, living_reward=0.0
                 , noise=0, epsilon=0.3, display_speed=0.5
                 , grid_size=150, text_only=False
                 , save_optimal_policy_file=None
                 , init_temp=1024.0
                 , temp_decrease_rate=2.0
                 , delta=0.02):

        ###########################
        # GENERAL CONTROL
        ###########################

        self.text_only = text_only
        self.display_speed = display_speed
        self.discount = discount
        self.delta = delta
        self.save_optimal_policy_file = save_optimal_policy_file

        ###########################
        # GET THE GRIDWORLD
        ###########################

        # noinspection PyUnresolvedReferences
        import gridworld
        mdp_function = getattr(gridworld, "get" + grid_name)
        self.mdp = mdp_function()
        self.mdp.setLivingReward(living_reward)
        self.mdp.setNoise(noise)
        self.env = gridworld.GridworldEnvironment(self.mdp)

        ###########################
        # GET THE DISPLAY ADAPTER
        ###########################

        import textGridworldDisplay
        self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp)
        if not text_only:
            import graphicsGridworldDisplay
            self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(self.mdp, grid_size, display_speed)
        try:
            self.display.start()
        except KeyboardInterrupt:
            sys.exit(0)

        ###########################
        # GET THE TAMER AGENT
        ###########################

        import qlearningAgents
        # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        self.gridWorldEnv = gridworld.GridworldEnvironment(self.mdp)
        action_function = lambda state: self.mdp.getPossibleActions(state)
        q_learn_opts = {
            'gamma': discount,
            'alpha': learning_rate,
            'epsilon': epsilon,
            'actionFn': action_function,
            'init_temp': init_temp,
            'temp_decrease_rate': temp_decrease_rate
        }

        self.agent = qlearningAgents.QLearningAgent(**q_learn_opts)
Ejemplo n.º 6
0
    def do_turn(self, ants):
        # track all moves, prevent collisions
        orders = {}

        def do_move_direction(loc, direction):
            #Rrr destination takes care of wrapping around, returns the destination
            #issues the moving order
            new_loc = ants.destination(loc, direction)
            #Rrr orders is the dictionary of location of ants
            if (ants.unoccupied(new_loc) and new_loc not in orders):
                ants.issue_order((loc, direction))
                orders[new_loc] = loc
                return True
            else:
                return False

        targets = {}

        #ROHAN added the variable directn
        def do_move_location(loc, dirctn):
            #Rrr ants.direction takes a location and a destination and returns a list of the closest direction "as the crow flies".
            #If the target is up and to the left, it will return ['n', 'w'] and we should then try and move our ant one of the two directions.
            #If the target is directly down, it will return ['s'], which is a list of one item.
            directions = dirctn
            for direction in directions:
                if do_move_direction(loc, direction):
                    #targets[dest] = loc
                    return True
            return False

# --------------------------------starts from here--------------------------------------
# find close

        self.turn = self.turn + 1

        #MY HILLLLSSS
        for hill_loc in ants.my_hills():
            x, y = hill_loc
            self.grid[x][y] = self.MYHILL
            #Rrr The dummy entry doesn't need a from location, so we just set the value to None.
            #prevent stepping on own hill
            orders[hill_loc] = None
        #ENEMY HILLLSSSS
        for hill_loc, hill_owner in ants.enemy_hills():
            hillrow, hillcol = hill_loc
            self.grid[hillrow][hillcol] = self.ENEMYHILL

        #LAND, water food
        for i in range(ants.rows):
            for j in range(ants.cols):
                #if ((ants.visible((i,j))==True) or (self.grid[i][j]==(self.FOOD or self.ENEMYANTS or self.BOUNDARY2 or self.ENEMYANTS2))):
                #    self.grid[i][j]=' '
                #    self.gridu[i][j]='v'
                if ants.visible((i, j)) == True:
                    self.gridu[i][j] = 'v'
                    if (self.grid[i][j] != (self.MYHILL or self.MYHILL2
                                            or self.ENEMYHILL or self.WATER)):
                        self.grid[i][j] = ' '

                elif self.grid[i][j] == (self.FOOD or self.ENEMYANTS
                                         or self.BOUNDARY2 or self.ENEMYANTS2
                                         or self.MYANTS):
                    self.grid[i][j] = ' '

                if ants.map[i][j] == -3:
                    self.grid[i][j] = self.FOOD
                elif ants.map[i][j] == -4:
                    self.grid[i][j] = self.WATER

            # if i cant see my hill, retreat to it urgently
                if self.grid[i][j] == self.MYHILL:
                    if ants.visible((i, j)) == False:
                        print >> sys.stderr, 'hill retreat', i, j
                        sys.stderr.flush()
                        self.grid[i][j] = self.MYHILL2
                    else:
                        self.grid[i][j] = self.MYHILL

                if self.grid[i][j] == self.ENEMYHILL:
                    print >> sys.stderr, 'hill attack!!!!!!!!!!!!!!!!!!', i, j
                    sys.stderr.flush()

        #MY ANTSSSSSSSSSS S
        num_ants = 0
        sx = 0
        sy = 0
        for ant_loc in ants.my_ants():
            antrow, antcol = ant_loc
            sx = sx + antrow
            sy = sy + antcol
            #self.grid[antrow][antcol]=self.MYANTS
            num_ants = num_ants + 1

        sx = int(sx / num_ants)
        sy = int(sy / num_ants)
        self.grid[sx][sy] = self.MYANTS

        ## change MODE------------------------------------------what do i do here ?????????
        if num_ants >= 0:  ##(ants.rows*ants.cols/200):
            self.BOUNDARY = self.BOUNDARY2
        else:
            self.BOUNDARY = ' '

        #ENEMYYYYYYYY ANTSSSSSSS
        for enemy_loc, enemy_owner in ants.enemy_ants():
            enemyrow, enemycol = enemy_loc
            #TO DO, if own ant concentration is good near enemy ant (enemy ant conc in the area), then a positive reward
            self.grid[enemyrow][enemycol] = self.ENEMYANTS

            #if they're near my base, retreat to base
            for hill_loc in ants.my_hills():
                x, y = hill_loc
                if ants.distance(hill_loc, enemy_loc) < 9.0:
                    self.grid[enemyrow][enemycol] = self.ENEMYANTS2

            #if i can surround em attack :) TODO: also check the enemy density
            surround = 0
            for ant_loc in ants.my_ants():
                antrow, antcol = ant_loc
                if ants.distance(ant_loc, enemy_loc) < 6.0:
                    surround = surround + 1

            if surround >= 3:
                self.grid[enemyrow][enemycol] = self.ENEMYANTS2
                print >> sys.stderr, 'ursurrounded attack: ', enemyrow, enemycol
                sys.stderr.flush()

#BOUNDARY EXPANDING !!!!!!!!!
        for i in range(ants.rows):
            for j in range(ants.cols):
                if (self.gridu[i][j] == 'v' and self.grid[i][j] == ' '):
                    if (ants.visible(ants.destination((i, j), 'n')) == False
                            or ants.visible(ants.destination(
                                (i, j), 'e')) == False
                            or ants.visible(ants.destination(
                                (i, j), 'w')) == False
                            or ants.visible(ants.destination(
                                (i, j), 's')) == False):
                        self.grid[i][j] = self.BOUNDARY

#-----------------------------------------------------------------------------------------VALUE ITERATION

#opts={'agent': 'value', 'discount': 0.9, 'iters': 200, 'noise': 0.01, 'livingReward': 0.0, 'epsilon': 0.0, 'pause': False, 'manual': False, 'quiet': True, 'episodes': 100, 'learningRate': 0.5, 'grid': 'BookGrid', 'gridSize': 150, 'speed': 1000.0, 'textDisplay': False}
        opts = {
            'livingReward': 0.0,
            'discount': 0.9,
            'iters': 300,
            'noise': 0.05,
            'epsilon': 0.0,
            'manual': False,
            'quiet': True,
            'agent': 'value',
            'pause': False,
            'episodes': 100,
            'learningRate': 0.5,
            'grid': 'BookGrid',
            'gridSize': 150,
            'speed': 1000.0,
            'textDisplay': False
        }

        mdp = gridworld.Gridworld(self.grid)
        mdp.setLivingReward(opts['livingReward'])
        mdp.setNoise(opts['noise'])
        env = gridworld.GridworldEnvironment(mdp)

        ###########################
        # GET THE AGENT
        ###########################

        #time_to_spare = (ants.turntime/1000.0) - (0.00064286 + 0.0000547619*num_ants + 0.0000065476*(num_ants*num_ants)) - 0.01
        if num_ants <= 60:
            time_to_spare = (ants.turntime / 1000.0) - 0.03
        else:
            time_to_spare = (ants.turntime / 1000.0) - (
                -0.003512 + 0.00047632 * num_ants - 0.00000105286 *
                (num_ants * num_ants)) - 0.005

        a = None
        a = valueIterationAgents.ValueIterationAgent(ants.turn_start_time,
                                                     time_to_spare, mdp,
                                                     opts['discount'],
                                                     opts['iters'])

        #TIME TIME TIME TIME
        #t1 = time.time()

        for ant_loc in ants.my_ants():
            antcol, antrow = ant_loc
            antcol = ants.rows - antcol - 1
            inverted_ant_loc = (antrow, antcol)
            if (a.getQValue(inverted_ant_loc, 'north') == a.getQValue(
                    inverted_ant_loc, 'south') == a.getQValue(
                        inverted_ant_loc, 'east') == a.getQValue(
                            inverted_ant_loc, 'west')):
                direct = random.choice('sewn')
                do_move_location(ant_loc, direct)
            elif a.getPolicy(inverted_ant_loc) == 'north':
                direct = 'n'
                do_move_location(ant_loc, direct)

            elif a.getPolicy(inverted_ant_loc) == 'south':
                direct = 's'
                do_move_location(ant_loc, direct)
            elif a.getPolicy(inverted_ant_loc) == 'east':
                direct = 'e'
                do_move_location(ant_loc, direct)
            elif a.getPolicy(inverted_ant_loc) == 'west':
                direct = 'w'
                do_move_location(ant_loc, direct)
            else:
                direct = random.choice('sewn')
                do_move_location(ant_loc, direct)

        #TIME 2 TIME 2 TIME 2
#t2 = time.time() - t1
        print >> sys.stderr, 'turn: ', self.turn, 'ants :', num_ants, 'spare:', time_to_spare, 'time:', (
            time.time() - ants.turn_start_time)
        sys.stderr.flush()

        # unblock own hill
        for hill_loc in ants.my_hills():
            if hill_loc in ants.my_ants() and hill_loc not in orders.values():
                for direction in ('s', 'e', 'w', 'n'):
                    if do_move_direction(hill_loc, direction):
                        break
Ejemplo n.º 7
0
    def __init__(self,
                 grid_name='DiscountGrid',
                 discount=0.9,
                 learning_rate=0.5,
                 living_reward=0.0,
                 noise=0,
                 epsilon=0.3,
                 display_speed=0.5,
                 grid_size=150,
                 text_only=False,
                 n_episodes=100,
                 agent_window_size=1,
                 agent_max_n_experiences=1000,
                 check_value_converge=False,
                 check_policy_converge=False,
                 optimal_policy=None,
                 expr_log_dir=None,
                 delta=0.02,
                 is_use_q_agent=False,
                 init_temp=1024.0,
                 temp_decrease_rate=2.0,
                 is_asyn_input=True):

        ###########################
        # GENERAL CONTROL
        ###########################

        self.text_only = text_only
        self.display_speed = display_speed
        self.n_episodes = n_episodes
        self.discount = discount
        self.check_value_converge = check_value_converge
        self.check_policy_converge = check_policy_converge
        self.optimal_policy = optimal_policy
        self.expr_log_dir = expr_log_dir
        self.delta = delta

        ###########################
        # GET THE INPUT MODULE
        ###########################

        if is_use_q_agent:
            self.user_input_module = None
        else:
            self.user_input_module = user_input.UserInputModule(
                is_asyn=is_asyn_input)

        ###########################
        # GET THE GRIDWORLD
        ###########################

        # noinspection PyUnresolvedReferences
        import gridworld
        mdp_function = getattr(gridworld, "get" + grid_name)
        self.mdp = mdp_function()
        self.mdp.setLivingReward(living_reward)
        self.mdp.setNoise(noise)
        self.env = gridworld.GridworldEnvironment(self.mdp)

        ###########################
        # GET THE DISPLAY ADAPTER
        ###########################

        import textGridworldDisplay
        self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp)
        if not text_only:
            import graphicsGridworldDisplay
            self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(
                self.mdp, grid_size, display_speed)
        try:
            self.display.start()
        except KeyboardInterrupt:
            sys.exit(0)

        ###########################
        # GET THE TAMER AGENT
        ###########################

        import qlearningAgents
        # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        self.gridWorldEnv = GridworldEnvironment(self.mdp)
        action_function = lambda state: self.mdp.getPossibleActions(state)
        q_learn_opts = {
            'gamma': discount,
            'alpha': learning_rate,
            'epsilon': epsilon,
            'actionFn': action_function,
            'init_temp': init_temp,
            'temp_decrease_rate': temp_decrease_rate
        }

        if is_use_q_agent:
            self.agent = qlearningAgents.QLearningAgent(**q_learn_opts)
        else:
            self.agent = qlearningAgents.TamerQAgent(
                max_n_experiences=agent_max_n_experiences,
                window_size=agent_window_size,
                is_asyn_input=is_asyn_input,
                **q_learn_opts)
Ejemplo n.º 8
0
    def __init__(self, optimal_policy=None, expr_log_dir=None):

        ###########################
        # GENERAL CONTROL
        ###########################

        self.text_only = ExperimentConfigurator.experimentConfig['text_only']
        self.display_speed = ExperimentConfigurator.experimentConfig[
            'display_speed']
        self.n_episodes = ExperimentConfigurator.gridWorldConfig['n_episodes']
        self.discount = ExperimentConfigurator.gridWorldConfig['discount']
        self.check_policy_converge = ExperimentConfigurator.experimentConfig[
            'check_policy_converge']
        self.optimal_policy = optimal_policy
        self.expr_log_dir = expr_log_dir
        self.save_VDBE = ExperimentConfigurator.experimentConfig['save_VDBE']

        ###########################
        # GET THE INPUT MODULE
        ###########################
        if ExperimentConfigurator.experimentConfig[
                'agent_type'] == qlearningAgents.QLearningAgent.getAgentType():
            self.user_input_module = None
        else:
            self.user_input_module = user_input.UserInputModule(
                is_asyn=ExperimentConfigurator.TamerConfig['is_asyn_input'])

        self.auto_feedback = AutoFeedback()

        ###########################
        # GET THE GRIDWORLD
        ###########################

        # noinspection PyUnresolvedReferences
        import gridworld
        mdp_function = getattr(
            gridworld,
            "get" + ExperimentConfigurator.gridWorldConfig['grid_name'])
        self.mdp = mdp_function()
        self.mdp.setLivingReward(
            ExperimentConfigurator.gridWorldConfig['living_reward'])
        self.mdp.setNoise(ExperimentConfigurator.gridWorldConfig['noise'])
        self.env = gridworld.GridworldEnvironment(self.mdp)

        ###########################
        # Variables used to store parameters values
        ###########################

        # init VDBE values records
        global VDBE_RECORDS
        VDBE_RECORDS = dict()
        for state in self.env.getGridWorld().getNonTerminalStates():
            VDBE_RECORDS[state] = list()

        ###########################
        # GET THE DISPLAY ADAPTER
        ###########################

        import textGridworldDisplay
        self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp)
        if not self.text_only:
            import graphicsGridworldDisplay
            self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(
                self.mdp, ExperimentConfigurator.gridWorldConfig['grid_size'],
                self.display_speed)
        try:
            self.display.start()
        except KeyboardInterrupt:
            sys.exit(0)

        ###########################
        # GET THE TAMER AGENT
        ###########################

        # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        self.gridWorldEnv = GridworldEnvironment(self.mdp)
        action_function = lambda m_state: self.mdp.getPossibleActions(m_state)
        q_learn_opts = {'actionFn': action_function}

        if ExperimentConfigurator.experimentConfig[
                'agent_type'] == qlearningAgents.QLearningAgent.getAgentType():
            self.agent = qlearningAgents.QLearningAgent(**q_learn_opts)
        elif ExperimentConfigurator.experimentConfig[
                'agent_type'] == qlearningAgents.TamerQAgent.getAgentType():
            self.agent = qlearningAgents.TamerQAgent(
                max_n_experiences=ExperimentConfigurator.
                TamerConfig['agent_max_n_experiences'],
                window_size=ExperimentConfigurator.
                TamerConfig['agent_window_size'],
                is_asyn_input=ExperimentConfigurator.
                TamerConfig['is_asyn_input'],
                **q_learn_opts)
        elif ExperimentConfigurator.experimentConfig[
                'agent_type'] == preferenceTamerAgent.PreferenceTAMERAgent.getAgentType(
                ):
            self.agent = preferenceTamerAgent.PreferenceTAMERAgent(
                max_n_experiences=ExperimentConfigurator.
                TamerConfig['agent_max_n_experiences'],
                window_size=ExperimentConfigurator.
                TamerConfig['agent_window_size'],
                is_asyn_input=ExperimentConfigurator.
                TamerConfig['is_asyn_input'],
                **q_learn_opts)
Ejemplo n.º 9
0
    def __init__(self,
                 grid_name='DiscountGrid',
                 discount=0.9,
                 learning_rate=0.5,
                 living_reward=0.0,
                 noise=0.2,
                 epsilon=0.3,
                 display_speed=0.5,
                 grid_size=150,
                 text_only=False,
                 n_episodes=100,
                 agent_window_size=1,
                 agent_max_n_experiences=1000,
                 is_use_q_agent=False):
        self.text_only = text_only
        self.display_speed = display_speed
        self.n_episodes = n_episodes
        self.discount = discount

        ###########################
        # GET THE INPUT MODULE
        ###########################

        if is_use_q_agent:
            self.user_input_module = None
        else:
            self.user_input_module = user_input.UserInputModule()

        ###########################
        # GET THE GRIDWORLD
        ###########################

        # noinspection PyUnresolvedReferences
        import gridworld
        mdp_function = getattr(gridworld, "get" + grid_name)
        self.mdp = mdp_function()
        self.mdp.setLivingReward(living_reward)
        self.mdp.setNoise(noise)
        self.env = gridworld.GridworldEnvironment(self.mdp)

        ###########################
        # GET THE DISPLAY ADAPTER
        ###########################

        import textGridworldDisplay
        self.display = textGridworldDisplay.TextGridworldDisplay(self.mdp)
        if not text_only:
            import graphicsGridworldDisplay
            self.display = graphicsGridworldDisplay.GraphicsGridworldDisplay(
                self.mdp, grid_size, display_speed)
        try:
            self.display.start()
        except KeyboardInterrupt:
            sys.exit(0)

        ###########################
        # GET THE TAMER AGENT
        ###########################

        import qlearningAgents
        # env.getPossibleActions, opts.discount, opts.learningRate, opts.epsilon
        # simulationFn = lambda agent, state: simulation.GridworldSimulation(agent,state,mdp)
        self.gridWorldEnv = GridworldEnvironment(self.mdp)
        action_function = lambda state: self.mdp.getPossibleActions(state)
        q_learn_opts = {
            'gamma': discount,
            'alpha': learning_rate,
            'epsilon': epsilon,
            'actionFn': action_function
        }

        if is_use_q_agent:
            self.agent = qlearningAgents.QLearningAgent(**q_learn_opts)
        else:
            self.agent = qlearningAgents.TamerQAgent(
                max_n_experiences=agent_max_n_experiences,
                window_size=agent_window_size,
                **q_learn_opts)