def custom_reward(BG_last_hour): if len(BG_last_hour) < 2: return 0 else: _, _, risk_current = risk_index([BG_last_hour[-1]], 1) _, _, risk_prev = risk_index([BG_last_hour[-2]], 1) return risk_prev - risk_current
def risk_diff(BG_last_hour): if len(BG_last_hour) < 2: return 0 else: _, _, risk_current = risk_index([BG_last_hour[-1]], 1) _, _, risk_prev = risk_index([BG_last_hour[-2]], 1) return risk_prev - risk_current
def risk_diff(BG_last_hour, done=False): if len(BG_last_hour) < 2: reward = 0 else: _, _, risk_current = risk_index([BG_last_hour[-1]], 1) _, _, risk_prev = risk_index([BG_last_hour[-2]], 1) reward = risk_prev - risk_current return reward
def reset(self): super(T1DSimEnvExtendedObs, self).reset() CGM = np.zeros(self._n_samples) BG = np.zeros(self._n_samples) insulin = np.zeros(self._n_samples) horizon = 1 for i in range(self._n_samples): BG[i] = self.patient.observation.Gsub CGM[i] = self.sensor.measure(self.patient) insulin[i] = 0 LBGI, HBGI, risk = risk_index([BG[i]], horizon) self.risk_hist.append(risk) self.LBGI_hist.append(LBGI) self.HBGI_hist.append(HBGI) self.CHO_hist.append(0) self.insulin_hist.append(insulin[i]) self.CGM_hist = CGM.tolist() self.BG_hist = BG.tolist() if self.append_action: obs = np.concatenate((CGM, insulin), axis=None) else: obs = CGM return Step(observation=obs, reward=0, done=False, sample_time=self.sample_time, patient_name=self.patient.name, meal=0, patient_state=self.patient.state)
def call(self, trajectory): lbgi, hbgi, ri = risk_index( self.rescale_obs(trajectory.observation[0]), 1) if self._show: print("RI_metric obs", trajectory.observation[0]) print("RI_metric reward=", trajectory.reward) print("RI_metric action=", trajectory.action) print("RI_metric info=", trajectory.policy_info) print("RI_metric ri=", ri) print("RI_metric is_first=", trajectory.is_first()) print("RI_metric is_boundary=", trajectory.is_boundary()) if not trajectory.is_boundary(): self._np_state.risk = np.append(self._np_state.risk, ri) self._np_state.lbgi = np.append(self._np_state.lbgi, lbgi) self._np_state.hbgi = np.append(self._np_state.hbgi, hbgi) if trajectory.is_last(): self._np_state.total = np.append(self._np_state.total, np.mean(self._np_state.risk)) self._np_state.total_lbgi = np.append(self._np_state.total_lbgi, np.mean(self._np_state.lbgi)) self._np_state.total_hbgi = np.append(self._np_state.total_hbgi, np.mean(self._np_state.hbgi)) self._np_state.risk = np.array([], dtype=np.float32) self._np_state.lbgi = np.array([], dtype=np.float32) self._np_state.hbgi = np.array([], dtype=np.float32) return trajectory
def step(self, action, reward_fun=risk_diff): ''' action is a namedtuple with keys: basal, bolus ''' CHO = 0.0 insulin = 0.0 BG = 0.0 CGM = 0.0 for _ in range(int(self.sample_time)): # Compute moving average as the sample measurements tmp_CHO, tmp_insulin, tmp_BG, tmp_CGM = self.mini_step(action) CHO += tmp_CHO / self.sample_time insulin += tmp_insulin / self.sample_time BG += tmp_BG / self.sample_time CGM += tmp_CGM / self.sample_time # Compute risk index horizon = 1 LBGI, HBGI, risk = risk_index([BG], horizon) # Record current action self.CHO_hist.append(CHO) self.insulin_hist.append(insulin) # Record next observation self.time_hist.append(self.time) self.BG_hist.append(BG) self.CGM_hist.append(CGM) self.risk_hist.append(risk) self.LBGI_hist.append(LBGI) self.HBGI_hist.append(HBGI) # Compute reward, and decide whether game is over window_size = int(60 / self.sample_time) BG_last_hour = self.CGM_hist[-window_size:] done = BG < 70 or BG > 350 if done: done = True else: done = False if self.append_action: obs = np.concatenate( (np.array(self.CGM_hist[-self._n_samples:]), np.array(self.insulin_hist[-self._n_samples:], dtype=np.float64)), axis=None) else: obs = np.array(self.CGM_hist[-self._n_samples:]) reward = reward_fun(BG, CGM, CHO, insulin, done=done) return Step(observation=obs, reward=reward, done=done, sample_time=self.sample_time, patient_name=self.patient.name, meal=CHO, patient_state=self.patient.state)
def stepReward3(BG, CGM, CHO, insulin, done=False): bg_current = BG LBGI, HBGI, RI = risk_index([bg_current], 1) mRI = magni_RI([bg_current], 1) if bg_current >= 70 and bg_current <= 180: reward = 1 else: reward = 0 if done: reward = -100 return reward
def step(self, action, reward_fun=neg_risk): ''' action is a namedtuple with keys: basal, bolus ''' CHO = 0.0 insulin = 0.0 BG = 0.0 CGM = 0.0 # This loop runs for n minutes in patients life # n is determined by the sample time, which is in turn defined by # the quality of sensor and the interval between each measurement for _ in range(int(self.sample_time)): # Compute moving average as the sample measurements tmp_CHO, tmp_insulin, tmp_BG, tmp_CGM = self.mini_step(action) CHO += tmp_CHO / self.sample_time insulin += tmp_insulin / self.sample_time BG += tmp_BG / self.sample_time CGM += tmp_CGM / self.sample_time # Compute risk index horizon = 1 LBGI, HBGI, risk = risk_index([BG], horizon) # Record current action self.CHO_hist.append(CHO) self.insulin_hist.append(insulin) # Record next observation self.time_hist.append(self.time) self.BG_hist.append(BG) self.CGM_hist.append(CGM) self.risk_hist.append(risk) self.LBGI_hist.append(LBGI) self.HBGI_hist.append(HBGI) # Compute reward, and decide whether game is over window_size = int(60 / self.sample_time) BG_last_hour = self.CGM_hist[-window_size:] reward = reward_fun(BG_last_hour) # Stopping criteria # done = BG < 70 or BG > 350 # If the BG level go out of control done = self.patient.t >= 1440 # At the end of the day (24hrs = 1440 mins) obs = Observation(CGM=CGM) return Step(observation=obs, reward=reward, done=done, sample_time=self.sample_time, patient_name=self.patient.name, meal=CHO, patient_state=self.patient.state)
def step(self, action, reward_fun=risk_diff): ''' action is a namedtuple with keys: basal, bolus ''' CHO = 0.0 insulin = 0.0 BG = 0.0 CGM = 0.0 for _ in range(int(self.sample_time)): # Compute moving average as the sample measurements tmp_CHO, tmp_insulin, tmp_BG, tmp_CGM = self.mini_step(action) CHO += tmp_CHO / self.sample_time insulin += tmp_insulin / self.sample_time BG += tmp_BG / self.sample_time CGM += tmp_CGM / self.sample_time # Compute risk index horizon = 1 LBGI, HBGI, risk = risk_index([BG], horizon) # Record current action self.CHO_hist.append(CHO) self.insulin_hist.append(insulin) # Record next observation self.time_hist.append(self.time) self.BG_hist.append(BG) self.CGM_hist.append(CGM) self.risk_hist.append(risk) self.LBGI_hist.append(LBGI) self.HBGI_hist.append(HBGI) # Compute reward, and decide whether game is over window_size = int(60 / self.sample_time) BG_last_hour = self.CGM_hist[-window_size:] reward = reward_fun(BG_last_hour) done = BG < 70 or BG > 350 obs = Observation(CGM=CGM) return Step(observation=obs, reward=reward, done=done, sample_time=self.sample_time, patient_name=self.patient.name, meal=CHO, patient_state=self.patient.state, time=self.time, bg=BG, lbgi=LBGI, hbgi=HBGI, risk=risk)
def stepReward3_eval(BG, CGM, CHO, insulin, done=False): bg_current = BG LBGI, HBGI, RI = risk_index([bg_current], 1) mRI = magni_RI([bg_current], 1) if bg_current >= 70 and bg_current <= 180: reward = 1 else: reward = 0 if done: reward = -100 print("Action:", insulin, ";CHO:", CHO, ";reward:", reward, ";BG:", BG, ";CGM:", CGM, ";RI:", RI, ";LBGI:", LBGI, ";HBGI:", HBGI, ";mRI:", mRI) return reward
def compute_reward( self, achieved_goal: np.ndarray, desired_goal: np.ndarray, _info: dict ) -> np.float32: #reward =np.linalg.norm(achieved_goal-desired_goal) _, _, risk_current = risk_index(self.ext_env.rescale_obs(achieved_goal), len(achieved_goal)) #risk_goal = #reward = np.abs(risk_current-risk_goal) #print('current', risk_current) #print('goal', risk_goal) reward=risk_current if reward >0.5: reward=0 #print('goal reward=',reward) return reward
def call(self, trajectory): if trajectory.is_first(): print("Starting episode ", self._np_state.episode) self._np_state.episode += 1 print("\tStep", self._np_state.steps) self._np_state.steps += 1 _, _, ri = risk_index(trajectory.observation[0], 1) print("\t\tobs=", trajectory.observation[0]) print("\t\treward=", trajectory.reward[0]) print("\t\tri=", ri) if trajectory.is_last(): print("Ending episode ", self._np_state.episode) self._np_state.steps = np.int64(0) return trajectory
def _reset(self): self.sample_time = self.sensor.sample_time self.viewer = None BG = self.patient.observation.Gsub horizon = 1 LBGI, HBGI, risk = risk_index([BG], horizon) CGM = self.sensor.measure(self.patient) self.time_hist = [self.scenario.start_time] self.BG_hist = [BG] self.CGM_hist = [CGM] self.risk_hist = [risk] self.LBGI_hist = [LBGI] self.HBGI_hist = [HBGI] self.CHO_hist = [] self.insulin_hist = []
def step(self, action, reward_fun=risk_diff): ''' action is a namedtuple with keys: basal, bolus ''' CHO = 0.0 insulin = 0.0 BG = 0.0 CGM = 0.0 CHO_norm = 0.0 insulin_norm = 0.0 CGM_norm = 0.0 for _ in range(int(self.sample_time)): # Compute moving average as the sample measurements tmp_CHO, tmp_insulin, tmp_BG, tmp_CGM = self.mini_step(action) if tmp_insulin < 0: print(tmp_insulin) print(self.patient.t) CHO += tmp_CHO / self.sample_time insulin += tmp_insulin / self.sample_time BG += tmp_BG / self.sample_time CGM += tmp_CGM / self.sample_time # CHO_norm += normalize_cho(tmp_CHO / self.sample_time) # CGM_norm += normalize_cgm(tmp_CGM / self.sample_time) # insulin_norm += normalize_ins(tmp_insulin / self.sample_time) # Compute risk index horizon = 1 LBGI, HBGI, risk = risk_index([BG], horizon) # Record current action self.CHO_hist.append(CHO) # self.CHO_hist.append(CHO_norm) self.insulin_hist.append(insulin) # self.insulin_hist.append(insulin_norm) # Record next observation self.time_hist.append(self.time) self.BG_hist.append(BG) self.CGM_hist.append(CGM) # self.CGM_hist.append(CGM_norm) self.risk_hist.append(risk) self.LBGI_hist.append(LBGI) self.HBGI_hist.append(HBGI) # Compute reward, and decide whether game is over window_size = int(60 / self.sample_time) # Horizon BG_last_hour = self.CGM_hist[-window_size:] reward = reward_fun(BG_last_hour) # done = BG < 70 or BG > 350 done = self.patient.t == (24 * 60) / self.sample_time - 1 * self.sample_time cgm_s = self.CGM_hist[-window_size:] ins_s = self.insulin_hist[-window_size:] cho_s = self.CHO_hist[-window_size:] if min(len(self.CGM_hist), len( self.insulin_hist)) < window_size: # Padding pad_size_cgm = max(window_size - len(self.CGM_hist), 0) pad_size_IN = max(window_size - len(self.insulin_hist), 0) pad_size_CHO = max(window_size - len(self.CHO_hist), 0) cgm_s = [ self.CGM_hist[0] ] * pad_size_cgm + self.CGM_hist # Blood Glucose Last Hour ins_s = [self.insulin_hist[0] ] * pad_size_IN + self.insulin_hist # Insulin Last Hour cho_s = [self.CHO_hist[0] ] * pad_size_CHO + self.CHO_hist # Insulin Last Hour obs = Observation(CGM=cgm_s, INSULIN=ins_s, CHO=cho_s) return Step(observation=obs, reward=reward, done=done, sample_time=float(self.sample_time), patient_name=self.patient.name, meal=CHO, patient_state=self.patient.state)
def neg_risk(BG_last_hour): if len(BG_last_hour) < 2: return 0 else: _, _, risk_current = risk_index([BG_last_hour[-1]], 1) return -risk_current
def step(self, action, reward_fun=risk_diff): ''' action is a namedtuple with keys: basal, bolus ''' CHO = 0.0 insulin = 0.0 BG = 0.0 CGM = 0.0 for _ in range(int(self.sample_time)): # Compute moving average as the sample measurements tmp_CHO, tmp_insulin, tmp_BG, tmp_CGM = self.mini_step(action) CHO += tmp_CHO / self.sample_time insulin += tmp_insulin / self.sample_time BG += tmp_BG / self.sample_time CGM += tmp_CGM / self.sample_time kafBG = round(BG, 2) #카프카 데이터 전송 producer = KafkaProducer( acks=0, compression_type='gzip', bootstrap_servers=[ "220.69.209.21%d:9092" % i for i in range(3) ], value_serializer=lambda x: dumps(x).encode('utf-8')) data = {'Glucose': str(kafBG)} producer.send('BG', value=data) producer.flush() # Compute risk index horizon = 1 LBGI, HBGI, risk = risk_index([BG], horizon) # Record current action self.CHO_hist.append(CHO) self.insulin_hist.append(insulin) # Record next observation self.time_hist.append(self.time) self.BG_hist.append(BG) self.CGM_hist.append(CGM) self.risk_hist.append(risk) self.LBGI_hist.append(LBGI) self.HBGI_hist.append(HBGI) # Compute reward, and decide whether game is over window_size = int(60 / self.sample_time) BG_last_hour = self.CGM_hist[-window_size:] reward = reward_fun(BG_last_hour) done = BG < 70 or BG > 350 obs = Observation(CGM=CGM) return Step(observation=obs, reward=reward, done=done, sample_time=self.sample_time, patient_name=self.patient.name, meal=CHO, patient_state=self.patient.state)