def env_step(self, thisAction): # Make sure the action is valid assert len(thisAction.intArray) == 1, "Expected 1 integer action." assert thisAction.intArray[0] >= 0, "Expected action to be in [0,4]" assert thisAction.intArray[0] < 4, "Expected action to be in [0,4]" self.updatePosition(thisAction.intArray[0]) lastActionValue = thisAction.intArray[0] theObs = Observation() theObs.intArray = [self.calculateFlatState()] theObs.charArray = ["T", "T", "T", "T"] if len(self.optionsArray[self.agentRow][self.agentCol]) != 0: for i in range(len( self.optionsArray[self.agentRow][self.agentCol])): theObs.charArray[ 2 + self.optionsArray[self.agentRow][self.agentCol][i]] = "T" returnRO = Reward_observation_terminal() returnRO.r = self.calculateReward(lastActionValue) returnRO.o = theObs returnRO.terminal = self.checkCurrentTerminal() return returnRO
def env_start(self): if self.fixedStartState: stateValid = self.setAgentState(self.startRow, self.startCol) if not stateValid: print "The fixed start state was NOT valid: " + str( int(self.startRow)) + "," + str(int(self.startRow)) self.setRandomState() else: self.setRandomState() returnObs = Observation() returnObs.intArray = [self.calculateFlatState()] #Up, Right, Down, Option1, Option2 returnObs.charArray = ["T", "T", "T", "T"] if len(self.optionsArray[self.startRow][self.startCol]) != 0: for i in range(len( self.optionsArray[self.startRow][self.startCol])): returnObs.charArray[ 3 + self.optionsArray[self.startRow][self.startCol][i]] = "T" # print returnObs.charArray #Now add characters based on options present return returnObs
def env_step(self,thisAction): # Make sure the action is valid assert len(thisAction.intArray)==1,"Expected 1 integer action." assert thisAction.intArray[0]>=0, "Expected action to be in [0,4]" assert thisAction.intArray[0]<4, "Expected action to be in [0,4]" self.updatePosition(thisAction.intArray[0]) lastActionValue = thisAction.intArray[0] theObs=Observation() theObs.intArray=[self.calculateFlatState()] theObs.charArray = ["T", "T", "T", "T"] if len(self.optionsArray[self.agentRow][self.agentCol]) != 0: for i in range(len(self.optionsArray[self.agentRow][self.agentCol])): theObs.charArray[2+self.optionsArray[self.agentRow][self.agentCol][i]] = "T" returnRO=Reward_observation_terminal() returnRO.r=self.calculateReward(lastActionValue) returnRO.o=theObs returnRO.terminal=self.checkCurrentTerminal() return returnRO
def env_start(self): if self.fixedStartState: stateValid=self.setAgentState(self.startRow,self.startCol) if not stateValid: print "The fixed start state was NOT valid: "+str(int(self.startRow))+","+str(int(self.startRow)) self.setRandomState() else: self.setRandomState() returnObs=Observation() returnObs.intArray=[self.calculateFlatState()] #Up, Right, Down, Option1, Option2 returnObs.charArray = ["T", "T", "T", "T"] if len(self.optionsArray[self.startRow][self.startCol]) != 0: for i in range(len(self.optionsArray[self.startRow][self.startCol])): returnObs.charArray[3+self.optionsArray[self.startRow][self.startCol][i]] = "T" # print returnObs.charArray #Now add characters based on options present return returnObs