def step(self, action): # This gym only controls basal insulin if self._normalize: _action=self.rescale_action(action) if self._discrete: _action=action/5 else: _action=action #print('A=',_action,'O=',obs.CGM,'s=',self.episode_steps) act = Action(basal=_action, bolus=0) if self.reward_fun is None: _obs, reward, done , info = self.env.step(act) else: _obs, reward, done , info = self.env.step(act, reward_fun=self.reward_fun) #print('Au=',action,'A=',_action,'O=',obs.CGM,'r=',reward,'s=',self.episode_steps) #_obs, r, done, info = self.env.step(act) if not done and self.sequence>0: for i in range(1, self.sequence-1): #Remember: we pass unnormalized actions to the simglucose env act=Action(basal=0,bolus=0) #_obs, r, d, info = self.env.step(act) _obs, r, d, info = self.env.step(act,reward_fun=self.reward_fun) #Make sure that one done=True is not replaced by a later done=False reward += r if d==True: done=d break #if done: # reward=-100 #else: # _obs, reward, done, info = self.env.step(act,reward_fun=self.reward_fun) if self.append_time: if self._normalize: obs=self.normalize_obs(np.array([_obs.CGM,self.tomin()])) else: obs=np.array([_obs.CGM,self.tomin()]) else: if self._normalize: obs=self.normalize_obs(_obs.CGM) else: obs=_obs.CGM #print('An=',action,'On=',obs,'A=',_action,'O=',_obs,'r=',reward,'s=',self.episode_steps) #print('time=',self.env.time, 'hour', (self.env.time.hour*60 + self.env.time.minute)) if self.save_callback is not None: go_on=self.save_callback.step(self.episode_steps) if not go_on: info['Time-limit truncated at callback'] = not done print('Time-limit truncated from callback at step', self.episode_steps) done=True self.episode_steps +=1 #return ([obs], reward, done, info) return (np.array([obs]), reward, done, info)
def _step(self, action): # This gym only controls basal insulin act = Action(basal=action, bolus=0) if self.reward_fun is None: return self.env.step(act) else: return self.env.step(act, reward_fun=self.reward_fun)
def _bb_policy(self, name, meal, glucose, env_sample_time): if any(self.quest.Name.str.match(name)): q = self.quest[self.quest.Name.str.match(name)] params = self.patient_params[self.patient_params.Name.str.match( name)] u2ss = np.asscalar(params.u2ss.values) BW = np.asscalar(params.BW.values) else: q = pd.DataFrame([['Average', 1 / 15, 1 / 50, 50, 30]], columns=['Name', 'CR', 'CF', 'TDI', 'Age']) u2ss = 1.43 BW = 57.0 basal = u2ss * BW / 6000 # basal = 0.0093 if meal > 0: logger.info('Calculating bolus ...') logger.debug('glucose = {}'.format(glucose)) bolus = np.asscalar(meal / q.CR.values + (glucose > 150) * (glucose - self.target) / q.CF.values) # bolus = np.asscalar(meal /23 + (glucose > 150) * (glucose - self.target) / 33.5) # bolus = np.asscalar(meal /4 + (glucose > 150) * (glucose - self.target) / 12) else: bolus = 0 bolus = bolus / env_sample_time # action = Action(basal=basal, bolus=bolus) action = Action(basal=0, bolus=bolus) # action = Action(basal=basal, bolus=0) return action
def step(self, action): # This gym only controls basal insulin if self._normalize: _action=self.rescale_action(action) if self._discrete: _action=action/5 else: _action=action #print('A=',_action,'O=',obs.CGM,'s=',self.episode_steps) act = Action(basal=_action, bolus=0) if self.reward_fun is None: _obs, reward, done , info = self.env.step(act) else: _obs, reward, done , info = self.env.step(act, reward_fun=self.reward_fun) #print('Au=',action,'A=',_action,'O=',obs.CGM,'r=',reward,'s=',self.episode_steps) obs=self.env.patient.state #if done: # reward=-100 #else: # _obs, reward, done, info = self.env.step(act,reward_fun=self.reward_fun) print('An=',action,'On=',obs,'A=',_action,'O=',_obs,'r=',reward,'s=',self.episode_steps) if self.save_callback is not None: go_on=self.save_callback.step(self.episode_steps) if not go_on: info['Time-limit truncated at callback'] = not done print('Time-limit truncated from callback at step', self.episode_steps) done=True self.episode_steps +=1 return (np.array([obs]), reward, done, info)
def _step(self, action): # This gym only controls basal insulin # act = Action(basal=action[0], bolus=action[1]) act = Action(basal=action, bolus=0) if self.reward_fun is None: # TODO: see why reward_fun is None return self.env.step(act) else: return self.env.step(act, reward_fun=self.reward_fun)
def step(self, action): # Goal is to estimate the correct CR and CF value for the patient CR, CF = action basal = 0 # Optimal (for debugging) # Person A # CR, CF = 23, 33.5 # CR, CF = 30, 25 # basal = ? # # Person B # CR, CF = 4, 12 # basal = ? obs, r, done, info = self.all_vars total_r = 0 ctr = 0 # temp = [] while not done: meal = info['meal'] glucose = obs[0] bolus = 0 # Basal-Bolus controller # Note: Value of Bolus gets clipped to the desired range in the simulator if meal > 0: bolus = meal / CR + (glucose > 150) * (glucose - self.target) / CF # bolus = 1000 # This gym only controls bolus insulin # Divide bolus by sample time because this action will be repeated 'sample time' times in the simulator bolus = bolus / info['sample_time'] act = Action(basal=basal, bolus=bolus) # if self.reward_fun is None: # obs, r, done, info = self.env.step(act) # else: obs, r, done, info = self.env.step(act, reward_fun=self.reward_fun) # print("r", r) total_r += r ctr += 1 # temp.append(glucose) # print((total_r/ctr + 26.5) * 2) # reward = total_r/ctr # It is ensured that ctr is a fixed constant reward = (total_r/ctr + 26.5) * 2 # makes the return normalized to [-10, 10] # plt.plot(temp) # plt.axhspan(70, 180, alpha=0.3, color='limegreen', lw=0) # plt.axhspan(50, 70, alpha=0.3, color='red', lw=0) # plt.axhspan(0, 50, alpha=0.3, color='darkred', lw=0) # plt.axhspan(180, 250, alpha=0.3, color='red', lw=0) # plt.axhspan(250, 1000, alpha=0.3, color='darkred', lw=0) # plt.show() return [1], reward, done, info
def step(self, action): # This gym only controls basal insulin if self._normalize: _action=self.rescale_action(action) if self._discrete: _action=action/5 else: _action=action act = Action(basal=_action, bolus=0) if self.reward_fun is None: _obs, reward, done , info = self.env.step(act) else: _obs, reward, done , info = self.env.step(act, reward_fun=self.reward_fun) if not done and self.sequence>0: for i in range(1, self.sequence-1): #Remember: we pass unnormalized actions to the simglucose env act=Action(basal=0,bolus=0) _obs, r, d, info = self.env.step(act,reward_fun=self.reward_fun) #Make sure that one done=True is not replaced by a later done=False reward += r if d==True: done=d break if self.append_time: if self._normalize: obs=self.normalize_obs(np.array([_obs.CGM,self.tomin()])) else: obs=np.array([_obs.CGM,self.tomin()]) else: if self._normalize: obs=self.normalize_obs(_obs.CGM) else: obs=_obs.CGM if self.save_callback is not None: go_on=self.save_callback.step(self.episode_steps) if not go_on: info['Time-limit truncated at callback'] = not done done=True self.episode_steps +=1 return (np.array([obs]), reward, done, info)
def policy(self, observation, reward, done, **kwargs): ''' define vars and solve optimization problem ''' self.state = np.asscalar(observation.CGM) '''list of previous doses''' self.prev_doses.append() # state var x = cp.Variable((3, self.T+1)) u = cp.Variable((1, self.T)) # init cost and constraints cost = 0 constraints = [] # build costs, constraints across horizon for t in range(self.T): # quadratic cost away from target cost += cp.sum_squares(x[:,t] - self.targetBG) + cp.sum_squares(u[:,t]) constraints += [ x[:,t+1] == self.A @ x[:,t] + self.B @ u[:,t], # state dependence u[:,t] >= 0, # dose is non-negative u[:,t] <= 1.0, # single dose cannot be larger than 1u u[:,self.M:self.T] == 0 # no control action beyond control horizon ] # we add the constraint that we are starting from the observation. constraints += [ x[:,0] == self.state ] # solve problem problem = cp.Problem(cp.Minimize(cost), constraints) problem.solve(solver='ECOS') '''logging''' logging.debug("Start BG: {}".format(self.state)) logging.debug("Problem Status: {}".format(problem.status)) logging.debug("\tSetup Time: {}".format(problem.solver_stats.setup_time)) logging.debug("\tSolved in: {}".format(problem.solver_stats.solve_time)) logging.debug("\tNumber of iterations: {}".format(problem.solver_stats.num_iters)) if self.state >= self.lowBG: # take only first control action bolus = u.value[0,1] basal = self.patient_basal else: bolus = 0 basal = 0 return Action(basal=basal, bolus=bolus)
def step(self, action): # Goal is to estimate the correct CR and CF value for the patient CR, CF = action basal = 0 # Optimal (for debugging) # CR, CF = 5, 13.175 # CR, CF = 15, 33.175 # basal = 0.0153 obs, r, done, info = self.all_vars total_r = 0 ctr = 0 # temp = [] while not done: meal = info['meal'] glucose = obs[0] bolus = 0 # Basal-Bolus controller # Note: Value of Bolus gets clipped to the desired range in the simulator if meal > 0: bolus = meal / CR + (glucose > 150) * (glucose - self.target) / CF # This gym only controls bolus insulin bolus = bolus / info['sample_time'] act = Action(basal=basal, bolus=bolus) if self.reward_fun is None: obs, r, done, info = self.env.step(act) else: obs, r, done, info = self.env.step(act, reward_fun=self.reward_fun) total_r += r ctr += 1 # temp.append(glucose) reward = total_r/ctr # It is ensured that ctr is a fixed constant # plt.plot(temp) # plt.axhspan(70, 180, alpha=0.3, color='limegreen', lw=0) # plt.axhspan(50, 70, alpha=0.3, color='red', lw=0) # plt.axhspan(0, 50, alpha=0.3, color='darkred', lw=0) # plt.axhspan(180, 250, alpha=0.3, color='red', lw=0) # plt.axhspan(250, 1000, alpha=0.3, color='darkred', lw=0) # plt.show() return [1], reward, done, info
def policy(self, observation, reward, done, **info): ''' Every controller must have this implementation! ---- Inputs: observation - a namedtuple defined in simglucose.simulation.env. For now, it only has one entry: blood glucose level measured by CGM sensor. reward - current reward returned by environment done - True, game over. False, game continues info - additional information as key word arguments, simglucose.simulation.env.T1DSimEnv returns patient_name and sample_time ---- Output: action - a namedtuple defined at the beginning of this file. The controller action contains two entries: basal, bolus ''' self.state = observation action = Action(basal=0, bolus=0) return action
def _step(self, action): # This gym only controls basal insulin act = Action(basal=action, bolus=0) # =========================================== # This has been added by JONAS TODO # =========================================== cgm = [] insulin = [] reward = [] for i in range(self.state_space_length): s, r, done, _ = self.env.step(act) cgm.append(s.CGM) insulin.append(act.basal) reward.append(r) # Updating state state = np.concatenate( [cgm, np.ravel(np.fliplr(self.env.insulin_hist[-4:]))]) return np.array(state), np.mean(reward), done, {}
def step(self, action): if self._normalize: _action=self.rescale_action(action) else: _action=action #print('A=',_action,'O=',obs.CGM,'s=',self.episode_steps) act = Action(basal=_action, bolus=0) if self.reward_fun is None: _obs, reward, done , info = self.env.step(act) else: _obs, reward, done , info = self.env.step(act, reward_fun=self.reward_fun) #print('Au=',action,'A=',_action,'O=',obs.CGM,'r=',reward,'s=',self.episode_steps) if self.append_time: if self._normalize: obs=self.normalize_obs(np.append(_obs,self.tomin())) else: obs=np.append(_obs,self.tomin()) else: if self._normalize: obs=self.normalize_obs(_obs) else: obs=_obs #print('An=',action,'On=',obs,'A=',_action,'O=',_obs,'r=',reward,'s=',self.episode_steps) #print('time=',self.env.time, 'hour', (self.env.time.hour*60 + self.env.time.minute)) if self.save_callback is not None: go_on=self.save_callback.step(self.episode_steps) if not go_on: info['Time-limit truncated at callback'] = not done print('Time-limit truncated from callback at step', self.episode_steps) done=True self.episode_steps +=1 if (self.limit_time>0): self.time_in_env +=self.time_per_step if (self.time_in_env>self.limit_time) : done =True else: done=False return (obs, reward, done, info)
def policy(self, observation, reward, done, **kwargs): self.state = observation # find and integrate error error = np.asscalar((observation.CGM - self.target)) self.ierror += error # calculate derivative if 'sample_time' in kwargs: sample_time = kwargs.get('sample_time', 1) else: raise KeyError("sample_time not in arguments") deriv = (observation.CGM - self.prev_glucose) / sample_time # suspension if low if observation.CGM <= self.low: basal = 0 bolus = 0 else: basal = self.patient_basal bolus = self.kp * error + self.ki * self.ierror + self.kd * deriv action = Action(basal=basal, bolus=bolus) return action
def policy(self, observation, reward, done, **info): self.state = observation action = Action(basal=.03, bolus=0) return action
<<<<<<< HEAD pterm = self.k_c * error iterm = self.k_c / self.tau_i * self.ierror dterm = self.k_c * self.tau_d * deriv ======= pterm = self.k_c * error iterm = self.k_c / self.tau_i * self.ierror dterm = self.k_c * self.tau_d * deriv >>>>>>> MPC-exploration bolus = pterm + iterm + dterm basal = self.patient_basal if bolus + basal < 0: bolus = -1 * basal return Action(basal=basal, bolus=bolus) class MPCNaive(Controller): ''' control params: tuple, (targetBG, lowBG) ''' def __init__(self, controller_params, name): self.targetBG = controller_params[0] self.lowBG = controller_params[1] self.patient_params = pd.read_csv(PATIENT_PARA_FILE) self.quest = pd.read_csv(CONTROL_QUEST) if any(self.patient_params.Name.str.match(name)): params = self.patient_params[self.patient_params.Name.str.match(name)] # Patient Basal self.patient_basal = np.asscalar(params.u2ss.values) * np.asscalar(params.BW.values) / 6000 else:
env9 = DummyVecEnv([lambda: env9]) state[9] = env9.reset() print('Environment Created') model_name = 'ACKTR_MlpLSTM_' + group + '_' + args.reward MODEL_PATH = 'Saved_models' tr_model = ACKTR.load(MODEL_PATH + '/' + model_name) t = 480 ## number of time steps to evaluate. t = 480 is 1 day all_state = np.zeros((10, t)) print('Simulation Started ... ...') for i in range(t): aa, _ = tr_model.predict(state) # print(aa) action = Action(basal=aa[0] / 6000, bolus=0) state[0], reward, done, _ = env0.step(action) action = Action(basal=aa[1] / 6000, bolus=0) state[1], reward, done, _ = env1.step(action) action = Action(basal=aa[2] / 6000, bolus=0) state[2], reward, done, _ = env2.step(action) action = Action(basal=aa[3] / 6000, bolus=0) state[3], reward, done, _ = env3.step(action) action = Action(basal=aa[4] / 6000, bolus=0) state[4], reward, done, _ = env4.step(action) action = Action(basal=aa[5] / 6000, bolus=0) state[5], reward, done, _ = env5.step(action) action = Action(basal=aa[6] / 6000, bolus=0) state[6], reward, done, _ = env6.step(action) action = Action(basal=aa[7] / 6000, bolus=0) state[7], reward, done, _ = env7.step(action)