def env_step(self, thisAction):
        # Make sure the action is valid
        assert len(thisAction.intArray) == 1, "Expected 1 integer action."
        assert thisAction.intArray[0] >= 0, "Expected action to be in [0,4]"
        assert thisAction.intArray[0] < 4, "Expected action to be in [0,4]"

        self.updatePosition(thisAction.intArray[0])

        lastActionValue = thisAction.intArray[0]
        theObs = Observation()
        theObs.intArray = [self.calculateFlatState()]
        theObs.charArray = ["T", "T", "T", "T"]
        if len(self.optionsArray[self.agentRow][self.agentCol]) != 0:
            for i in range(len(
                    self.optionsArray[self.agentRow][self.agentCol])):
                theObs.charArray[
                    2 +
                    self.optionsArray[self.agentRow][self.agentCol][i]] = "T"

        returnRO = Reward_observation_terminal()
        returnRO.r = self.calculateReward(lastActionValue)
        returnRO.o = theObs
        returnRO.terminal = self.checkCurrentTerminal()

        return returnRO
    def env_start(self):
        if self.fixedStartState:
            stateValid = self.setAgentState(self.startRow, self.startCol)
            if not stateValid:
                print "The fixed start state was NOT valid: " + str(
                    int(self.startRow)) + "," + str(int(self.startRow))
                self.setRandomState()
        else:
            self.setRandomState()

        returnObs = Observation()
        returnObs.intArray = [self.calculateFlatState()]
        #Up, Right, Down, Option1, Option2
        returnObs.charArray = ["T", "T", "T", "T"]
        if len(self.optionsArray[self.startRow][self.startCol]) != 0:
            for i in range(len(
                    self.optionsArray[self.startRow][self.startCol])):
                returnObs.charArray[
                    3 +
                    self.optionsArray[self.startRow][self.startCol][i]] = "T"

    # print returnObs.charArray
    #Now add characters based on options present

        return returnObs
    def env_step(self,thisAction):
        # Make sure the action is valid
        assert len(thisAction.intArray)==1,"Expected 1 integer action."
        assert thisAction.intArray[0]>=0, "Expected action to be in [0,4]"
        assert thisAction.intArray[0]<4, "Expected action to be in [0,4]"
        
        self.updatePosition(thisAction.intArray[0])

        lastActionValue = thisAction.intArray[0]
        theObs=Observation()
        theObs.intArray=[self.calculateFlatState()]
        theObs.charArray = ["T", "T", "T", "T"]
        if len(self.optionsArray[self.agentRow][self.agentCol]) != 0:
            for i in range(len(self.optionsArray[self.agentRow][self.agentCol])):
                theObs.charArray[2+self.optionsArray[self.agentRow][self.agentCol][i]] = "T"
        
        returnRO=Reward_observation_terminal()
        returnRO.r=self.calculateReward(lastActionValue)
        returnRO.o=theObs
        returnRO.terminal=self.checkCurrentTerminal()

        return returnRO
    def env_start(self):
        if self.fixedStartState:
            stateValid=self.setAgentState(self.startRow,self.startCol)
            if not stateValid:
                print "The fixed start state was NOT valid: "+str(int(self.startRow))+","+str(int(self.startRow))
                self.setRandomState()
        else:
            self.setRandomState()

        returnObs=Observation()
        returnObs.intArray=[self.calculateFlatState()]
        #Up, Right, Down, Option1, Option2
        returnObs.charArray = ["T", "T", "T", "T"]
        if len(self.optionsArray[self.startRow][self.startCol]) != 0:
            for i in range(len(self.optionsArray[self.startRow][self.startCol])):
                returnObs.charArray[3+self.optionsArray[self.startRow][self.startCol][i]] = "T"
       # print returnObs.charArray
        #Now add characters based on options present
        
        

        return returnObs