def getAction(self, stateTime, stateDay, stateLocation, stateActivity,
                  stateLastNotification):
        """
        The function feedObservation() receives the 4-tuple elements (i.e., time, location,
        activity, and time elapsed since last notification) and makes a decision of sending a
        notificatiion or not.

        The function is anticipated to be provided implementation

        Returns:
          A bool indicating whether to send the notification or not
        """

        if self.operatingMode == BaseAgent.MODE_ITERATIVE:
            # check stage
            if self.stage != BaseAgent.STAGE_WAIT_ACTION:
                raise Exception("It is not in the stage of determining action")
            self.stage = BaseAgent.STAGE_WAIT_REWARD

        # check argument value
        if stateTime not in utils.allTimeStates():
            raise Exception("Invalid stateTime value (got %d)" % stateTime)
        if stateDay not in utils.allDayStates():
            raise Exception("Invalid stateDay value (got %d)" % stateDay)
        if stateLocation not in utils.allLocationStates():
            raise Exception("Invalid stateLocation value (got %d)" %
                            stateLocation)
        if stateActivity not in utils.allActivityStates():
            raise Exception("Invalid stateActivity value (got %d)" %
                            stateActivity)
        if stateLastNotification not in utils.allLastNotificationStates():
            raise Exception("Invalid stateActivity value (got %d)" %
                            stateLastNotification)
 def generateInitialModel(self):
     self.qTable = {}
     for sTime in utils.allTimeStates():
         for sDay in utils.allDayStates():
             for sLocation in utils.allLocationStates():
                 for sActivity in utils.allActivityStates():
                     for sNotification in utils.allLastNotificationStates():
                         state = (sTime, sDay, sLocation, sActivity, sNotification)
                         self.qTable[state] = {a: 0.0 for a in [True, False]}
     self.numSteps = 0
 def __init__(self):
     self.behavior = {}
     for sTime in utils.allTimeStates():
         for sDay in utils.allDayStates():
             for sLocation in utils.allLocationStates():
                 for sActivity in utils.allActivityStates():
                     for sNotification in utils.allLastNotificationStates():
                         state = (sTime, sDay, sLocation, sActivity,
                                  sNotification)
                         self.behavior[state] = (random.random() < 0.5)
 def __init__(self, deviationProb=0.1):
     self.behavior = {}
     for sTime in utils.allTimeStates():
         for sDay in utils.allDayStates():
             for sLocation in utils.allLocationStates():
                 for sActivity in utils.allActivityStates():
                     for sNotification in utils.allLastNotificationStates():
                         state = (sTime, sDay, sLocation, sActivity,
                                  sNotification)
                         self.behavior[state] = (random.random() < 0.5)
     self.probTake = 1. - deviationProb
     self.probNotTake = deviationProb
 def generateInitialModel(self):
     self.qTable = {}
     for sTime in utils.allTimeStates():
         for sDay in utils.allDayStates():
             for sLocation in utils.allLocationStates():
                 for sActivity in utils.allActivityStates():
                     for sNotification in utils.allLastNotificationStates():
                         state = (sTime, sDay, sLocation, sActivity,
                                  sNotification)
                         #self.qTable[state] = {a: np.random.random() * 1e-3 for a in [True, False]}
                         self.qTable[state] = {True: 1e-5, False: 0.}
     self.lastState = None
     self.lastAction = None
     self.lastReward = None
     self.numSteps = 0