Exemple #1
0
    def step(self, action):
        # This gym only controls basal insulin
        if self._normalize:
          _action=self.rescale_action(action)
          if self._discrete:
            _action=action/5
        else:
          _action=action
          #print('A=',_action,'O=',obs.CGM,'s=',self.episode_steps)
        act = Action(basal=_action, bolus=0)
        if self.reward_fun is None:
          _obs, reward, done , info =  self.env.step(act)
        else:
          _obs, reward, done , info = self.env.step(act, reward_fun=self.reward_fun)
        #print('Au=',action,'A=',_action,'O=',obs.CGM,'r=',reward,'s=',self.episode_steps)
        #_obs, r, done, info = self.env.step(act)
        if not done and self.sequence>0:
           for i in range(1, self.sequence-1):
               #Remember: we pass unnormalized actions to the simglucose env
               act=Action(basal=0,bolus=0)
               #_obs, r, d, info = self.env.step(act)
               _obs, r, d, info = self.env.step(act,reward_fun=self.reward_fun)
               #Make sure that one done=True is not replaced by a later done=False
               reward += r
               if d==True:
                   done=d
                   break
        #if done:
        #   reward=-100 
        #else:
        #    _obs, reward, done, info = self.env.step(act,reward_fun=self.reward_fun)
        if self.append_time:
            if self._normalize:
               obs=self.normalize_obs(np.array([_obs.CGM,self.tomin()]))
            else:
               obs=np.array([_obs.CGM,self.tomin()])
        else:
            if self._normalize:
               obs=self.normalize_obs(_obs.CGM)
            else:
               obs=_obs.CGM

        #print('An=',action,'On=',obs,'A=',_action,'O=',_obs,'r=',reward,'s=',self.episode_steps)
        #print('time=',self.env.time, 'hour', (self.env.time.hour*60 + self.env.time.minute))
        if self.save_callback is not None:
           go_on=self.save_callback.step(self.episode_steps)
           if not go_on:
              info['Time-limit truncated at callback'] = not done
              print('Time-limit truncated from callback at step', self.episode_steps)
              done=True
        self.episode_steps +=1
        #return ([obs], reward, done, info)
        return (np.array([obs]), reward, done, info)
Exemple #2
0
 def _step(self, action):
     # This gym only controls basal insulin
     act = Action(basal=action, bolus=0)
     if self.reward_fun is None:
         return self.env.step(act)
     else:
         return self.env.step(act, reward_fun=self.reward_fun)
    def _bb_policy(self, name, meal, glucose, env_sample_time):
        if any(self.quest.Name.str.match(name)):
            q = self.quest[self.quest.Name.str.match(name)]
            params = self.patient_params[self.patient_params.Name.str.match(
                name)]
            u2ss = np.asscalar(params.u2ss.values)
            BW = np.asscalar(params.BW.values)
        else:
            q = pd.DataFrame([['Average', 1 / 15, 1 / 50, 50, 30]],
                             columns=['Name', 'CR', 'CF', 'TDI', 'Age'])
            u2ss = 1.43
            BW = 57.0

        basal = u2ss * BW / 6000
        # basal = 0.0093
        if meal > 0:
            logger.info('Calculating bolus ...')
            logger.debug('glucose = {}'.format(glucose))
            bolus = np.asscalar(meal / q.CR.values + (glucose > 150) *
                                (glucose - self.target) / q.CF.values)
            # bolus = np.asscalar(meal /23 + (glucose > 150) * (glucose - self.target) / 33.5)
            # bolus = np.asscalar(meal /4 + (glucose > 150) * (glucose - self.target) / 12)

        else:
            bolus = 0

        bolus = bolus / env_sample_time
        # action = Action(basal=basal, bolus=bolus)
        action = Action(basal=0, bolus=bolus)
        # action = Action(basal=basal, bolus=0)
        return action
Exemple #4
0
    def step(self, action):
        # This gym only controls basal insulin
        if self._normalize:
          _action=self.rescale_action(action)
          if self._discrete:
            _action=action/5
        else:
          _action=action
          #print('A=',_action,'O=',obs.CGM,'s=',self.episode_steps)
        act = Action(basal=_action, bolus=0)
        if self.reward_fun is None:
          _obs, reward, done , info =  self.env.step(act)
        else:
          _obs, reward, done , info = self.env.step(act, reward_fun=self.reward_fun)
        #print('Au=',action,'A=',_action,'O=',obs.CGM,'r=',reward,'s=',self.episode_steps)
        obs=self.env.patient.state
        #if done:
        #   reward=-100 
        #else:
        #    _obs, reward, done, info = self.env.step(act,reward_fun=self.reward_fun)
       

        print('An=',action,'On=',obs,'A=',_action,'O=',_obs,'r=',reward,'s=',self.episode_steps)
        if self.save_callback is not None:
           go_on=self.save_callback.step(self.episode_steps)
           if not go_on:
              info['Time-limit truncated at callback'] = not done
              print('Time-limit truncated from callback at step', self.episode_steps)
              done=True
        self.episode_steps +=1
        return (np.array([obs]), reward, done, info)
 def _step(self, action):
     # This gym only controls basal insulin
     # act = Action(basal=action[0], bolus=action[1])
     act = Action(basal=action, bolus=0)
     if self.reward_fun is None:  # TODO: see why reward_fun is None
         return self.env.step(act)
     else:
         return self.env.step(act, reward_fun=self.reward_fun)
Exemple #6
0
    def step(self, action):
        # Goal is to estimate the correct CR and CF value for the patient
        CR, CF = action
        basal = 0

        # Optimal (for debugging)
        # Person A
        # CR, CF = 23, 33.5
        # CR, CF = 30, 25
        # basal = ?
        #
        # Person B
        # CR, CF = 4, 12
        # basal = ?


        obs, r, done, info = self.all_vars
        total_r = 0
        ctr = 0
        # temp = []
        while not done:
            meal = info['meal']
            glucose = obs[0]

            bolus = 0
            # Basal-Bolus controller
            # Note: Value of Bolus gets clipped to the desired range in the simulator
            if meal > 0:
                bolus = meal / CR + (glucose > 150) * (glucose - self.target) / CF
                # bolus = 1000

            # This gym only controls bolus insulin
            # Divide bolus by sample time because this action will be repeated 'sample time' times in the simulator
            bolus = bolus / info['sample_time']
            act = Action(basal=basal, bolus=bolus)
            # if self.reward_fun is None:
            #     obs, r, done, info = self.env.step(act)
            # else:
            obs, r, done, info = self.env.step(act, reward_fun=self.reward_fun)
            # print("r", r)

            total_r += r
            ctr += 1
            # temp.append(glucose)

        # print((total_r/ctr + 26.5) * 2)
        # reward = total_r/ctr                      # It is ensured that ctr is a fixed constant
        reward = (total_r/ctr + 26.5) * 2          # makes the return normalized to [-10, 10]
        # plt.plot(temp)
        # plt.axhspan(70, 180, alpha=0.3, color='limegreen', lw=0)
        # plt.axhspan(50, 70, alpha=0.3, color='red', lw=0)
        # plt.axhspan(0, 50, alpha=0.3, color='darkred', lw=0)
        # plt.axhspan(180, 250, alpha=0.3, color='red', lw=0)
        # plt.axhspan(250, 1000, alpha=0.3, color='darkred', lw=0)
        # plt.show()
        return [1], reward, done, info
    def step(self, action):
        # This gym only controls basal insulin
        if self._normalize:
          _action=self.rescale_action(action)
          if self._discrete:
            _action=action/5
        else:
          _action=action
        act = Action(basal=_action, bolus=0)
        if self.reward_fun is None:
          _obs, reward, done , info =  self.env.step(act)
        else:
          _obs, reward, done , info = self.env.step(act, reward_fun=self.reward_fun)
        if not done and self.sequence>0:
           for i in range(1, self.sequence-1):
               #Remember: we pass unnormalized actions to the simglucose env
               act=Action(basal=0,bolus=0)
               _obs, r, d, info = self.env.step(act,reward_fun=self.reward_fun)
               #Make sure that one done=True is not replaced by a later done=False
               reward += r
               if d==True:
                   done=d
                   break
        if self.append_time:
            if self._normalize:
               obs=self.normalize_obs(np.array([_obs.CGM,self.tomin()]))
            else:
               obs=np.array([_obs.CGM,self.tomin()])
        else:
            if self._normalize:
               obs=self.normalize_obs(_obs.CGM)
            else:
               obs=_obs.CGM

        if self.save_callback is not None:
           go_on=self.save_callback.step(self.episode_steps)
           if not go_on:
              info['Time-limit truncated at callback'] = not done
              done=True
        self.episode_steps +=1
        return (np.array([obs]), reward, done, info)
Exemple #8
0
    def policy(self, observation, reward, done, **kwargs):
        ''' define vars and solve optimization problem '''
        self.state = np.asscalar(observation.CGM)

        '''list of previous doses'''
        self.prev_doses.append()



        # state var
        x = cp.Variable((3, self.T+1))
        u = cp.Variable((1, self.T))
        # init cost and constraints
        cost = 0
        constraints = []
        # build costs, constraints across horizon
        for t in range(self.T):
            # quadratic cost away from target
            cost += cp.sum_squares(x[:,t] - self.targetBG) + cp.sum_squares(u[:,t])
            constraints += [
                x[:,t+1] == self.A @ x[:,t] + self.B @ u[:,t], # state dependence
                u[:,t] >= 0,    # dose is non-negative
                u[:,t] <= 1.0,  # single dose cannot be larger than 1u
                u[:,self.M:self.T] == 0 # no control action beyond control horizon
            ]
        # we add the constraint that we are starting from the observation.
        constraints += [
            x[:,0] == self.state
        ]
        # solve problem
        problem = cp.Problem(cp.Minimize(cost), constraints)
        problem.solve(solver='ECOS')

        '''logging'''
        logging.debug("Start BG: {}".format(self.state))
        logging.debug("Problem Status: {}".format(problem.status))
        logging.debug("\tSetup Time: {}".format(problem.solver_stats.setup_time))
        logging.debug("\tSolved in: {}".format(problem.solver_stats.solve_time))
        logging.debug("\tNumber of iterations: {}".format(problem.solver_stats.num_iters))

        if self.state >= self.lowBG:
            # take only first control action
            bolus = u.value[0,1]
            basal = self.patient_basal
        else:
            bolus = 0
            basal = 0
        return Action(basal=basal, bolus=bolus)
Exemple #9
0
    def step(self, action):
        # Goal is to estimate the correct CR and CF value for the patient
        CR, CF = action
        basal = 0

        # Optimal (for debugging)
        # CR, CF = 5, 13.175
        # CR, CF = 15, 33.175
        # basal = 0.0153

        obs, r, done, info = self.all_vars
        total_r = 0
        ctr = 0
        # temp = []
        while not done:
            meal = info['meal']
            glucose = obs[0]

            bolus = 0
            # Basal-Bolus controller
            # Note: Value of Bolus gets clipped to the desired range in the simulator
            if meal > 0:
                bolus = meal / CR + (glucose > 150) * (glucose - self.target) / CF

            # This gym only controls bolus insulin
            bolus = bolus / info['sample_time']
            act = Action(basal=basal, bolus=bolus)
            if self.reward_fun is None:
                obs, r, done, info = self.env.step(act)
            else:
                obs, r, done, info = self.env.step(act, reward_fun=self.reward_fun)

            total_r += r
            ctr += 1
            # temp.append(glucose)

        reward = total_r/ctr        # It is ensured that ctr is a fixed constant
        # plt.plot(temp)
        # plt.axhspan(70, 180, alpha=0.3, color='limegreen', lw=0)
        # plt.axhspan(50, 70, alpha=0.3, color='red', lw=0)
        # plt.axhspan(0, 50, alpha=0.3, color='darkred', lw=0)
        # plt.axhspan(180, 250, alpha=0.3, color='red', lw=0)
        # plt.axhspan(250, 1000, alpha=0.3, color='darkred', lw=0)
        # plt.show()
        return [1], reward, done, info
 def policy(self, observation, reward, done, **info):
     '''
     Every controller must have this implementation!
     ----
     Inputs:
     observation - a namedtuple defined in simglucose.simulation.env. For
                   now, it only has one entry: blood glucose level measured
                   by CGM sensor.
     reward      - current reward returned by environment
     done        - True, game over. False, game continues
     info        - additional information as key word arguments,
                   simglucose.simulation.env.T1DSimEnv returns patient_name
                   and sample_time
     ----
     Output:
     action - a namedtuple defined at the beginning of this file. The
              controller action contains two entries: basal, bolus
     '''
     self.state = observation
     action = Action(basal=0, bolus=0)
     return action
    def _step(self, action):
        # This gym only controls basal insulin
        act = Action(basal=action, bolus=0)

        # ===========================================
        # This has been added by JONAS TODO
        # ===========================================
        cgm = []
        insulin = []
        reward = []
        for i in range(self.state_space_length):

            s, r, done, _ = self.env.step(act)
            cgm.append(s.CGM)
            insulin.append(act.basal)
            reward.append(r)

        # Updating state
        state = np.concatenate(
            [cgm, np.ravel(np.fliplr(self.env.insulin_hist[-4:]))])

        return np.array(state), np.mean(reward), done, {}
Exemple #12
0
    def step(self, action):
        if self._normalize:
          _action=self.rescale_action(action)
        else:
          _action=action
          #print('A=',_action,'O=',obs.CGM,'s=',self.episode_steps)
        act = Action(basal=_action, bolus=0)
        if self.reward_fun is None:
          _obs, reward, done , info =  self.env.step(act)
        else:
          _obs, reward, done , info = self.env.step(act, reward_fun=self.reward_fun)
        #print('Au=',action,'A=',_action,'O=',obs.CGM,'r=',reward,'s=',self.episode_steps)
        if self.append_time:
           if self._normalize:
              obs=self.normalize_obs(np.append(_obs,self.tomin()))
           else:
              obs=np.append(_obs,self.tomin())
        else:
           if self._normalize:
              obs=self.normalize_obs(_obs)
           else:
              obs=_obs

        #print('An=',action,'On=',obs,'A=',_action,'O=',_obs,'r=',reward,'s=',self.episode_steps)
        #print('time=',self.env.time, 'hour', (self.env.time.hour*60 + self.env.time.minute))
        if self.save_callback is not None:
           go_on=self.save_callback.step(self.episode_steps)
           if not go_on:
              info['Time-limit truncated at callback'] = not done
              print('Time-limit truncated from callback at step', self.episode_steps)
              done=True
        self.episode_steps +=1
        if (self.limit_time>0):
          self.time_in_env +=self.time_per_step
          if (self.time_in_env>self.limit_time) :
                done =True
          else:
                done=False
        return (obs, reward, done, info)
Exemple #13
0
    def policy(self, observation, reward, done, **kwargs):
        self.state = observation

        # find and integrate error
        error = np.asscalar((observation.CGM - self.target))
        self.ierror += error

        # calculate derivative
        if 'sample_time' in kwargs:
            sample_time = kwargs.get('sample_time', 1)
        else:
            raise KeyError("sample_time not in arguments")
        deriv = (observation.CGM - self.prev_glucose) / sample_time

        # suspension if low
        if observation.CGM <= self.low:
            basal = 0
            bolus = 0
        else:
            basal = self.patient_basal
            bolus = self.kp * error + self.ki * self.ierror + self.kd * deriv

        action = Action(basal=basal, bolus=bolus)
        return action
Exemple #14
0
 def policy(self, observation, reward, done, **info):
     self.state = observation
     action = Action(basal=.03, bolus=0)
     return action
Exemple #15
0
<<<<<<< HEAD

        pterm = self.k_c * error
        iterm = self.k_c / self.tau_i * self.ierror
        dterm = self.k_c * self.tau_d * deriv

=======
        pterm = self.k_c * error
        iterm = self.k_c / self.tau_i * self.ierror
        dterm = self.k_c * self.tau_d * deriv
>>>>>>> MPC-exploration
        bolus = pterm + iterm + dterm
        basal = self.patient_basal
        if bolus + basal < 0:
            bolus = -1 * basal
        return Action(basal=basal, bolus=bolus)

class MPCNaive(Controller):
    '''
    control params: tuple, (targetBG, lowBG)
    '''
    def __init__(self, controller_params, name):
        self.targetBG = controller_params[0]
        self.lowBG = controller_params[1]
        self.patient_params = pd.read_csv(PATIENT_PARA_FILE)
        self.quest = pd.read_csv(CONTROL_QUEST)
        if any(self.patient_params.Name.str.match(name)):
            params = self.patient_params[self.patient_params.Name.str.match(name)]
            # Patient Basal
            self.patient_basal = np.asscalar(params.u2ss.values) * np.asscalar(params.BW.values) / 6000
        else:
Exemple #16
0
    env9 = DummyVecEnv([lambda: env9])
    state[9] = env9.reset()

    print('Environment Created')
    model_name = 'ACKTR_MlpLSTM_' + group + '_' + args.reward
    MODEL_PATH = 'Saved_models'
    tr_model = ACKTR.load(MODEL_PATH + '/' + model_name)

    t = 480  ## number of time steps to evaluate. t = 480 is 1 day
    all_state = np.zeros((10, t))

    print('Simulation Started ... ...')
    for i in range(t):
        aa, _ = tr_model.predict(state)
        # print(aa)
        action = Action(basal=aa[0] / 6000, bolus=0)
        state[0], reward, done, _ = env0.step(action)
        action = Action(basal=aa[1] / 6000, bolus=0)
        state[1], reward, done, _ = env1.step(action)
        action = Action(basal=aa[2] / 6000, bolus=0)
        state[2], reward, done, _ = env2.step(action)
        action = Action(basal=aa[3] / 6000, bolus=0)
        state[3], reward, done, _ = env3.step(action)
        action = Action(basal=aa[4] / 6000, bolus=0)
        state[4], reward, done, _ = env4.step(action)
        action = Action(basal=aa[5] / 6000, bolus=0)
        state[5], reward, done, _ = env5.step(action)
        action = Action(basal=aa[6] / 6000, bolus=0)
        state[6], reward, done, _ = env6.step(action)
        action = Action(basal=aa[7] / 6000, bolus=0)
        state[7], reward, done, _ = env7.step(action)