def agent_step(self, reward, observation): action = None self.window.erase() self.window.addstr('STATE: %s\n' % (observation.intArray)) self.window.addstr('REWARD: %s\n' % (reward)) self.window.addstr('HIT UP, DOWN, LEFT or RIGHT to move...\n') self.window.refresh() try: c = self.window.getch() if c == curses.KEY_UP: action = 'N' elif c == curses.KEY_DOWN: action = 'S' elif c == curses.KEY_LEFT: action = 'W' elif c == curses.KEY_RIGHT: action = 'E' self.window.refresh() except KeyboardInterrupt: RLGlue.RL_cleanup() a = Action() if action: a.charArray = [action] return a
def do_step(self, state, reward = None): """ Runs the actual learning algorithm. In a separate function so it can be called both on start and on step. """ #self.debug('do_step(', state, ',', reward, ')') #if not state in self.Q: # State not yet visited, initialize randomly # self.Q[state] = self.random_actions() # Run the Q update if this isn't the first step action = None if reward is not None: action = self.update_Q(self.last_state, self.last_action, reward, state) # Action object a_obj = Action() if action is None: # Query the policy to find the best action action = self.policy(state) a_obj.charArray = list(action) # Save the current state-action pair for the next step's Q update. self.last_state = state self.last_action = action # And we're done return a_obj
def agent_step(self, reward, observation): action = None self.window.erase() self.window.addstr('STATE: %s\n' % (observation.intArray)) self.window.addstr('REWARD: %s\n' % (reward)) self.window.addstr('HIT UP, DOWN, LEFT or RIGHT to move...\n') self.window.refresh() try: c = self.window.getch() if c == curses.KEY_UP: action = 'N' elif c == curses.KEY_DOWN: action = 'S' elif c == curses.KEY_LEFT: action = 'W' elif c == curses.KEY_RIGHT: action = 'E' self.window.refresh() except KeyboardInterrupt: RLGlue.RL_cleanup() a = Action() if action: a.charArray = [action] return a
def do_step(self, state, reward=None): """ Runs the actual learning algorithm. In a separate function so it can be called both on start and on step. """ #self.debug('do_step(', state, ',', reward, ')') #if not state in self.Q: # State not yet visited, initialize randomly # self.Q[state] = self.random_actions() # Run the Q update if this isn't the first step action = None if reward is not None: action = self.update_Q(self.last_state, self.last_action, reward, state) # Action object a_obj = Action() if action is None: # Query the policy to find the best action action = self.policy(state) a_obj.charArray = list(action) # Save the current state-action pair for the next step's Q update. self.last_state = state self.last_action = action # And we're done return a_obj
def agent_step(self, reward, observation): self.stepCount = self.stepCount + 1 action = Action() action.intArray = observation.intArray action.doubleArray = observation.doubleArray action.charArray = observation.charArray return action
def agent_start(self, observation): self.stepCount = 0 action = Action() action.intArray = observation.intArray action.doubleArray = observation.doubleArray action.charArray = observation.charArray return action
def agent_step(self,reward, observation): self.stepCount=self.stepCount+1 action=Action() action.intArray=observation.intArray action.doubleArray=observation.doubleArray action.charArray=observation.charArray return action
def agent_start(self,observation): self.stepCount=0 action=Action() action.intArray=observation.intArray action.doubleArray=observation.doubleArray action.charArray=observation.charArray return action
def agent_step(self, reward, observation): hypstr = ''.join(observation.charArray) hyplist = hypstr.split('\t') hyp_prob_list = [(hyplist[i], observation.doubleArray[i]) for i in range(len(hyplist))] self.last_sys_act = self.dman.act_on(self.last_sys_act, hyp_prob_list) self.dman.train(reward) return_act = Action() return_act.charArray = list(self.last_sys_act.encode('ascii', 'replace')) return return_act
def agent_step(self, reward, observation): hypstr = ''.join(observation.charArray) hyplist = hypstr.split('\t') hyp_prob_list = [(hyplist[i], observation.doubleArray[i]) for i in range(len(hyplist))] self.last_sys_act = self.dman.act_on(self.last_sys_act, hyp_prob_list) self.dman.train(reward) return_act = Action() return_act.charArray = list( self.last_sys_act.encode('ascii', 'replace')) return return_act
def getAction(dir, isJump, isSpeed): #-1, 0, 1 for direction, 1 is to the right #0, 1 for jump #0, 1 for speed action = Action() action.numInts = 3 action.numDoubles = 0 action.numChars = 0 action.intArray = [] action.doubleArray = [] action.charArray = [] action.intArray.append(dir) action.intArray.append(isJump) action.intArray.append(isSpeed) return action
def do_step(self, state, reward=None): """ Runs the actual learning algorithm. In a separate function so it can be called both on start and on step. """ #self.debug('do_step(', state, ',', reward, ')') a_obj = Action() # Query the policy to find the best action action = self.policy(state) a_obj.charArray = list(action) # Run the Q update if this isn't the first step if reward is not None: self.update_Q(self.last_state, self.last_action, reward, state) # Save the current state-action pair for the next step's Q update. self.last_state = state self.last_action = action # And we're done return a_obj
def do_step(self, state, reward = None): """ Runs the actual learning algorithm. In a separate function so it can be called both on start and on step. """ #self.debug('do_step(', state, ',', reward, ')') a_obj = Action() # Query the policy to find the best action action = self.policy(state) a_obj.charArray = list(action) # Run the Q update if this isn't the first step if reward is not None: self.update_Q(self.last_state, self.last_action, reward, state) # Save the current state-action pair for the next step's Q update. self.last_state = state self.last_action = action # And we're done return a_obj
def do_step(self, state, reward = None): """ Make an action from state, given an optional (previous) reward In a separate function so it can be called both on start and on step. """ a_obj = Action() # Query the policy to find the best action action = self.policy(state) a_obj.charArray = list(action) #print 'action: ', action # Run the parameter update if this isn't the first step if reward is not None: self.update_delta(tuple(self.last_state), tuple(self.last_action), reward) # Save the current state-action pair for the next step update. self.last_state = state self.last_action = action # Actionify! return a_obj
def do_step(self, state, reward=None): """ Make an action from state, given an optional (previous) reward In a separate function so it can be called both on start and on step. """ a_obj = Action() # Query the policy to find the best action action = self.policy(state) a_obj.charArray = list(action) #print 'action: ', action # Run the parameter update if this isn't the first step if reward is not None: self.update_delta(tuple(self.last_state), tuple(self.last_action), reward) # Save the current state-action pair for the next step update. self.last_state = state self.last_action = action # Actionify! return a_obj