Exemple #1
0
    def agent_step(self, reward, observation):
        action = None

        self.window.erase()
        self.window.addstr('STATE: %s\n' % (observation.intArray))
        self.window.addstr('REWARD: %s\n' % (reward))
        self.window.addstr('HIT UP, DOWN, LEFT or RIGHT to move...\n')
        self.window.refresh()

        try:
            c = self.window.getch()
            if c == curses.KEY_UP:
                action = 'N'
            elif c == curses.KEY_DOWN:
                action = 'S'
            elif c == curses.KEY_LEFT:
                action = 'W'
            elif c == curses.KEY_RIGHT:
                action = 'E'

            self.window.refresh()

        except KeyboardInterrupt:
            RLGlue.RL_cleanup()

        a = Action()

        if action:
            a.charArray = [action]

        return a
Exemple #2
0
 def do_step(self, state, reward = None):
     """
     Runs the actual learning algorithm.
     In a separate function so it can be called both on start and on step.
     """
     #self.debug('do_step(', state, ',', reward, ')')
     
     #if not state in self.Q:
         # State not yet visited, initialize randomly
     #    self.Q[state] = self.random_actions()
     
     # Run the Q update if this isn't the first step
     action = None
     
     if reward is not None:
         action = self.update_Q(self.last_state, self.last_action, reward, state)
     
     # Action object
     a_obj = Action()
     
     if action is None:
         # Query the policy to find the best action
         action = self.policy(state)
     
     a_obj.charArray = list(action)
     
     # Save the current state-action pair for the next step's Q update.
     self.last_state = state
     self.last_action = action
     
     # And we're done
     return a_obj
Exemple #3
0
    def agent_step(self, reward, observation):
        action = None
        
        self.window.erase()
        self.window.addstr('STATE: %s\n' % (observation.intArray))
        self.window.addstr('REWARD: %s\n' % (reward))
        self.window.addstr('HIT UP, DOWN, LEFT or RIGHT to move...\n')
        self.window.refresh()

        try:
            c = self.window.getch()
            if c == curses.KEY_UP:
                action = 'N'
            elif c == curses.KEY_DOWN:
                action = 'S'
            elif c == curses.KEY_LEFT:
                action = 'W'
            elif c == curses.KEY_RIGHT:
                action = 'E'
            
            self.window.refresh()
        
        except KeyboardInterrupt:
            RLGlue.RL_cleanup()
            
        
        a = Action()
        
        if action:
            a.charArray = [action]
        
        return a
Exemple #4
0
    def do_step(self, state, reward=None):
        """
        Runs the actual learning algorithm.
        In a separate function so it can be called both on start and on step.
        """
        #self.debug('do_step(', state, ',', reward, ')')

        #if not state in self.Q:
        # State not yet visited, initialize randomly
        #    self.Q[state] = self.random_actions()

        # Run the Q update if this isn't the first step
        action = None

        if reward is not None:
            action = self.update_Q(self.last_state, self.last_action, reward,
                                   state)

        # Action object
        a_obj = Action()

        if action is None:
            # Query the policy to find the best action
            action = self.policy(state)

        a_obj.charArray = list(action)

        # Save the current state-action pair for the next step's Q update.
        self.last_state = state
        self.last_action = action

        # And we're done
        return a_obj
Exemple #5
0
    def agent_step(self, reward, observation):
        self.stepCount = self.stepCount + 1
        action = Action()
        action.intArray = observation.intArray
        action.doubleArray = observation.doubleArray
        action.charArray = observation.charArray

        return action
Exemple #6
0
    def agent_start(self, observation):
        self.stepCount = 0
        action = Action()
        action.intArray = observation.intArray
        action.doubleArray = observation.doubleArray
        action.charArray = observation.charArray

        return action
    def agent_step(self,reward, observation):
        self.stepCount=self.stepCount+1
        action=Action()
        action.intArray=observation.intArray
        action.doubleArray=observation.doubleArray
        action.charArray=observation.charArray

        return action
    def agent_start(self,observation):
        self.stepCount=0
        action=Action()
        action.intArray=observation.intArray
        action.doubleArray=observation.doubleArray
        action.charArray=observation.charArray

        return action
        def agent_step(self, reward, observation):
            hypstr = ''.join(observation.charArray)
            hyplist = hypstr.split('\t')
            hyp_prob_list = [(hyplist[i], observation.doubleArray[i]) for i in range(len(hyplist))]
            self.last_sys_act = self.dman.act_on(self.last_sys_act, hyp_prob_list)
            self.dman.train(reward)

            return_act = Action()
            return_act.charArray = list(self.last_sys_act.encode('ascii', 'replace'))
            return return_act
Exemple #10
0
        def agent_step(self, reward, observation):
            hypstr = ''.join(observation.charArray)
            hyplist = hypstr.split('\t')
            hyp_prob_list = [(hyplist[i], observation.doubleArray[i])
                             for i in range(len(hyplist))]
            self.last_sys_act = self.dman.act_on(self.last_sys_act,
                                                 hyp_prob_list)
            self.dman.train(reward)

            return_act = Action()
            return_act.charArray = list(
                self.last_sys_act.encode('ascii', 'replace'))
            return return_act
Exemple #11
0
def getAction(dir, isJump, isSpeed):
    #-1, 0, 1 for direction, 1 is to the right
    #0, 1 for jump
    #0, 1 for speed
    action = Action()
    action.numInts = 3
    action.numDoubles = 0
    action.numChars = 0
    action.intArray = []
    action.doubleArray = []
    action.charArray = []
    action.intArray.append(dir)
    action.intArray.append(isJump)
    action.intArray.append(isSpeed)
    return action
Exemple #12
0
    def do_step(self, state, reward=None):
        """
        Runs the actual learning algorithm.
        In a separate function so it can be called both on start and on step.
        """
        #self.debug('do_step(', state, ',', reward, ')')
        a_obj = Action()

        # Query the policy to find the best action
        action = self.policy(state)
        a_obj.charArray = list(action)

        # Run the Q update if this isn't the first step
        if reward is not None:
            self.update_Q(self.last_state, self.last_action, reward, state)

        # Save the current state-action pair for the next step's Q update.
        self.last_state = state
        self.last_action = action

        # And we're done
        return a_obj
Exemple #13
0
 def do_step(self, state, reward = None):
     """
     Runs the actual learning algorithm.
     In a separate function so it can be called both on start and on step.
     """
     #self.debug('do_step(', state, ',', reward, ')')
     a_obj = Action()
     
     # Query the policy to find the best action
     action = self.policy(state)
     a_obj.charArray = list(action)
     
     # Run the Q update if this isn't the first step
     if reward is not None:
         self.update_Q(self.last_state, self.last_action, reward, state)
         
     # Save the current state-action pair for the next step's Q update.
     self.last_state = state
     self.last_action = action
     
     # And we're done
     return a_obj
Exemple #14
0
    def do_step(self, state, reward = None):
        """ Make an action from state, given an optional (previous) reward

        In a separate function so it can be called both on start and on step.
        """
        a_obj = Action()

        # Query the policy to find the best action
        action = self.policy(state)
        a_obj.charArray = list(action)

        #print 'action: ', action

        # Run the parameter update if this isn't the first step
        if reward is not None:
            self.update_delta(tuple(self.last_state), tuple(self.last_action), reward)

        # Save the current state-action pair for the next step update.
        self.last_state = state
        self.last_action = action

        # Actionify!
        return a_obj
Exemple #15
0
    def do_step(self, state, reward=None):
        """ Make an action from state, given an optional (previous) reward

        In a separate function so it can be called both on start and on step.
        """
        a_obj = Action()

        # Query the policy to find the best action
        action = self.policy(state)
        a_obj.charArray = list(action)

        #print 'action: ', action

        # Run the parameter update if this isn't the first step
        if reward is not None:
            self.update_delta(tuple(self.last_state), tuple(self.last_action),
                              reward)

        # Save the current state-action pair for the next step update.
        self.last_state = state
        self.last_action = action

        # Actionify!
        return a_obj