Beispiel #1
0
def custom_reward(BG_last_hour):
    if len(BG_last_hour) < 2:
        return 0
    else:
        _, _, risk_current = risk_index([BG_last_hour[-1]], 1)
        _, _, risk_prev = risk_index([BG_last_hour[-2]], 1)
        return risk_prev - risk_current
Beispiel #2
0
def risk_diff(BG_last_hour):
    if len(BG_last_hour) < 2:
        return 0
    else:
        _, _, risk_current = risk_index([BG_last_hour[-1]], 1)
        _, _, risk_prev = risk_index([BG_last_hour[-2]], 1)
        return risk_prev - risk_current
Beispiel #3
0
def risk_diff(BG_last_hour, done=False):

    if len(BG_last_hour) < 2:
        reward = 0
    else:
        _, _, risk_current = risk_index([BG_last_hour[-1]], 1)
        _, _, risk_prev = risk_index([BG_last_hour[-2]], 1)
        reward = risk_prev - risk_current
    return reward
Beispiel #4
0
 def reset(self):
     super(T1DSimEnvExtendedObs, self).reset()
     CGM = np.zeros(self._n_samples)
     BG = np.zeros(self._n_samples)
     insulin = np.zeros(self._n_samples)
     horizon = 1
     for i in range(self._n_samples):
         BG[i] = self.patient.observation.Gsub
         CGM[i] = self.sensor.measure(self.patient)
         insulin[i] = 0
         LBGI, HBGI, risk = risk_index([BG[i]], horizon)
         self.risk_hist.append(risk)
         self.LBGI_hist.append(LBGI)
         self.HBGI_hist.append(HBGI)
         self.CHO_hist.append(0)
         self.insulin_hist.append(insulin[i])
     self.CGM_hist = CGM.tolist()
     self.BG_hist = BG.tolist()
     if self.append_action:
         obs = np.concatenate((CGM, insulin), axis=None)
     else:
         obs = CGM
     return Step(observation=obs,
                 reward=0,
                 done=False,
                 sample_time=self.sample_time,
                 patient_name=self.patient.name,
                 meal=0,
                 patient_state=self.patient.state)
Beispiel #5
0
 def call(self, trajectory):
     lbgi, hbgi, ri = risk_index(
         self.rescale_obs(trajectory.observation[0]), 1)
     if self._show:
         print("RI_metric obs", trajectory.observation[0])
         print("RI_metric reward=", trajectory.reward)
         print("RI_metric action=", trajectory.action)
         print("RI_metric info=", trajectory.policy_info)
         print("RI_metric ri=", ri)
         print("RI_metric is_first=", trajectory.is_first())
         print("RI_metric is_boundary=", trajectory.is_boundary())
     if not trajectory.is_boundary():
         self._np_state.risk = np.append(self._np_state.risk, ri)
         self._np_state.lbgi = np.append(self._np_state.lbgi, lbgi)
         self._np_state.hbgi = np.append(self._np_state.hbgi, hbgi)
     if trajectory.is_last():
         self._np_state.total = np.append(self._np_state.total,
                                          np.mean(self._np_state.risk))
         self._np_state.total_lbgi = np.append(self._np_state.total_lbgi,
                                               np.mean(self._np_state.lbgi))
         self._np_state.total_hbgi = np.append(self._np_state.total_hbgi,
                                               np.mean(self._np_state.hbgi))
         self._np_state.risk = np.array([], dtype=np.float32)
         self._np_state.lbgi = np.array([], dtype=np.float32)
         self._np_state.hbgi = np.array([], dtype=np.float32)
     return trajectory
Beispiel #6
0
    def step(self, action, reward_fun=risk_diff):
        '''
        action is a namedtuple with keys: basal, bolus
        '''
        CHO = 0.0
        insulin = 0.0
        BG = 0.0
        CGM = 0.0

        for _ in range(int(self.sample_time)):
            # Compute moving average as the sample measurements
            tmp_CHO, tmp_insulin, tmp_BG, tmp_CGM = self.mini_step(action)
            CHO += tmp_CHO / self.sample_time
            insulin += tmp_insulin / self.sample_time
            BG += tmp_BG / self.sample_time
            CGM += tmp_CGM / self.sample_time

        # Compute risk index
        horizon = 1
        LBGI, HBGI, risk = risk_index([BG], horizon)

        # Record current action
        self.CHO_hist.append(CHO)
        self.insulin_hist.append(insulin)

        # Record next observation
        self.time_hist.append(self.time)
        self.BG_hist.append(BG)
        self.CGM_hist.append(CGM)
        self.risk_hist.append(risk)
        self.LBGI_hist.append(LBGI)
        self.HBGI_hist.append(HBGI)

        # Compute reward, and decide whether game is over
        window_size = int(60 / self.sample_time)
        BG_last_hour = self.CGM_hist[-window_size:]
        done = BG < 70 or BG > 350
        if done:
            done = True
        else:
            done = False
        if self.append_action:
            obs = np.concatenate(
                (np.array(self.CGM_hist[-self._n_samples:]),
                 np.array(self.insulin_hist[-self._n_samples:],
                          dtype=np.float64)),
                axis=None)
        else:
            obs = np.array(self.CGM_hist[-self._n_samples:])
        reward = reward_fun(BG, CGM, CHO, insulin, done=done)
        return Step(observation=obs,
                    reward=reward,
                    done=done,
                    sample_time=self.sample_time,
                    patient_name=self.patient.name,
                    meal=CHO,
                    patient_state=self.patient.state)
Beispiel #7
0
def stepReward3(BG, CGM, CHO, insulin, done=False):
    bg_current = BG
    LBGI, HBGI, RI = risk_index([bg_current], 1)
    mRI = magni_RI([bg_current], 1)
    if bg_current >= 70 and bg_current <= 180:
        reward = 1
    else:
        reward = 0
    if done:
        reward = -100
    return reward
    def step(self, action, reward_fun=neg_risk):
        '''
        action is a namedtuple with keys: basal, bolus
        '''
        CHO = 0.0
        insulin = 0.0
        BG = 0.0
        CGM = 0.0

        # This loop runs for n minutes in patients life
        # n is determined by the sample time, which is in turn defined by
        # the quality of sensor and the interval between each measurement
        for _ in range(int(self.sample_time)):
            # Compute moving average as the sample measurements
            tmp_CHO, tmp_insulin, tmp_BG, tmp_CGM = self.mini_step(action)
            CHO += tmp_CHO / self.sample_time
            insulin += tmp_insulin / self.sample_time
            BG += tmp_BG / self.sample_time
            CGM += tmp_CGM / self.sample_time

        # Compute risk index
        horizon = 1
        LBGI, HBGI, risk = risk_index([BG], horizon)

        # Record current action
        self.CHO_hist.append(CHO)
        self.insulin_hist.append(insulin)

        # Record next observation
        self.time_hist.append(self.time)
        self.BG_hist.append(BG)
        self.CGM_hist.append(CGM)
        self.risk_hist.append(risk)
        self.LBGI_hist.append(LBGI)
        self.HBGI_hist.append(HBGI)

        # Compute reward, and decide whether game is over
        window_size = int(60 / self.sample_time)
        BG_last_hour = self.CGM_hist[-window_size:]
        reward = reward_fun(BG_last_hour)

        # Stopping criteria
        # done = BG < 70 or BG > 350    # If the BG level go out of control
        done = self.patient.t >= 1440  # At the end of the day (24hrs = 1440 mins)

        obs = Observation(CGM=CGM)

        return Step(observation=obs,
                    reward=reward,
                    done=done,
                    sample_time=self.sample_time,
                    patient_name=self.patient.name,
                    meal=CHO,
                    patient_state=self.patient.state)
Beispiel #9
0
    def step(self, action, reward_fun=risk_diff):
        '''
        action is a namedtuple with keys: basal, bolus
        '''
        CHO = 0.0
        insulin = 0.0
        BG = 0.0
        CGM = 0.0

        for _ in range(int(self.sample_time)):
            # Compute moving average as the sample measurements
            tmp_CHO, tmp_insulin, tmp_BG, tmp_CGM = self.mini_step(action)
            CHO += tmp_CHO / self.sample_time
            insulin += tmp_insulin / self.sample_time
            BG += tmp_BG / self.sample_time
            CGM += tmp_CGM / self.sample_time

        # Compute risk index
        horizon = 1
        LBGI, HBGI, risk = risk_index([BG], horizon)

        # Record current action
        self.CHO_hist.append(CHO)
        self.insulin_hist.append(insulin)

        # Record next observation
        self.time_hist.append(self.time)
        self.BG_hist.append(BG)
        self.CGM_hist.append(CGM)
        self.risk_hist.append(risk)
        self.LBGI_hist.append(LBGI)
        self.HBGI_hist.append(HBGI)

        # Compute reward, and decide whether game is over
        window_size = int(60 / self.sample_time)
        BG_last_hour = self.CGM_hist[-window_size:]
        reward = reward_fun(BG_last_hour)
        done = BG < 70 or BG > 350
        obs = Observation(CGM=CGM)

        return Step(observation=obs,
                    reward=reward,
                    done=done,
                    sample_time=self.sample_time,
                    patient_name=self.patient.name,
                    meal=CHO,
                    patient_state=self.patient.state,
                    time=self.time,
                    bg=BG,
                    lbgi=LBGI,
                    hbgi=HBGI,
                    risk=risk)
Beispiel #10
0
def stepReward3_eval(BG, CGM, CHO, insulin, done=False):
    bg_current = BG
    LBGI, HBGI, RI = risk_index([bg_current], 1)
    mRI = magni_RI([bg_current], 1)
    if bg_current >= 70 and bg_current <= 180:
        reward = 1
    else:
        reward = 0
    if done:
        reward = -100
    print("Action:", insulin, ";CHO:", CHO, ";reward:", reward, ";BG:", BG,
          ";CGM:", CGM, ";RI:", RI, ";LBGI:", LBGI, ";HBGI:", HBGI, ";mRI:",
          mRI)
    return reward
Beispiel #11
0
 def compute_reward(
     self, achieved_goal:  np.ndarray, desired_goal:  np.ndarray, _info: dict
 ) -> np.float32:
     #reward =np.linalg.norm(achieved_goal-desired_goal)
     _, _, risk_current = risk_index(self.ext_env.rescale_obs(achieved_goal), len(achieved_goal))
     #risk_goal = 
     #reward = np.abs(risk_current-risk_goal)
     #print('current', risk_current)
     #print('goal', risk_goal)
     reward=risk_current
     if reward >0.5:
        reward=0
     #print('goal reward=',reward)
     return reward 
Beispiel #12
0
 def call(self, trajectory):
     if trajectory.is_first():
         print("Starting episode ", self._np_state.episode)
         self._np_state.episode += 1
     print("\tStep", self._np_state.steps)
     self._np_state.steps += 1
     _, _, ri = risk_index(trajectory.observation[0], 1)
     print("\t\tobs=", trajectory.observation[0])
     print("\t\treward=", trajectory.reward[0])
     print("\t\tri=", ri)
     if trajectory.is_last():
         print("Ending episode ", self._np_state.episode)
         self._np_state.steps = np.int64(0)
     return trajectory
Beispiel #13
0
    def _reset(self):
        self.sample_time = self.sensor.sample_time
        self.viewer = None

        BG = self.patient.observation.Gsub
        horizon = 1
        LBGI, HBGI, risk = risk_index([BG], horizon)
        CGM = self.sensor.measure(self.patient)
        self.time_hist = [self.scenario.start_time]
        self.BG_hist = [BG]
        self.CGM_hist = [CGM]
        self.risk_hist = [risk]
        self.LBGI_hist = [LBGI]
        self.HBGI_hist = [HBGI]
        self.CHO_hist = []
        self.insulin_hist = []
    def step(self, action, reward_fun=risk_diff):
        '''
        action is a namedtuple with keys: basal, bolus
        '''
        CHO = 0.0
        insulin = 0.0
        BG = 0.0
        CGM = 0.0

        CHO_norm = 0.0
        insulin_norm = 0.0
        CGM_norm = 0.0

        for _ in range(int(self.sample_time)):
            # Compute moving average as the sample measurements
            tmp_CHO, tmp_insulin, tmp_BG, tmp_CGM = self.mini_step(action)
            if tmp_insulin < 0:
                print(tmp_insulin)
                print(self.patient.t)
            CHO += tmp_CHO / self.sample_time
            insulin += tmp_insulin / self.sample_time
            BG += tmp_BG / self.sample_time
            CGM += tmp_CGM / self.sample_time
            # CHO_norm += normalize_cho(tmp_CHO / self.sample_time)
            # CGM_norm += normalize_cgm(tmp_CGM / self.sample_time)
            # insulin_norm += normalize_ins(tmp_insulin / self.sample_time)

        # Compute risk index
        horizon = 1
        LBGI, HBGI, risk = risk_index([BG], horizon)

        # Record current action
        self.CHO_hist.append(CHO)
        # self.CHO_hist.append(CHO_norm)
        self.insulin_hist.append(insulin)
        # self.insulin_hist.append(insulin_norm)

        # Record next observation
        self.time_hist.append(self.time)
        self.BG_hist.append(BG)
        self.CGM_hist.append(CGM)
        # self.CGM_hist.append(CGM_norm)
        self.risk_hist.append(risk)
        self.LBGI_hist.append(LBGI)
        self.HBGI_hist.append(HBGI)

        # Compute reward, and decide whether game is over
        window_size = int(60 / self.sample_time)  # Horizon
        BG_last_hour = self.CGM_hist[-window_size:]
        reward = reward_fun(BG_last_hour)
        # done = BG < 70 or BG > 350
        done = self.patient.t == (24 *
                                  60) / self.sample_time - 1 * self.sample_time
        cgm_s = self.CGM_hist[-window_size:]
        ins_s = self.insulin_hist[-window_size:]
        cho_s = self.CHO_hist[-window_size:]
        if min(len(self.CGM_hist), len(
                self.insulin_hist)) < window_size:  # Padding
            pad_size_cgm = max(window_size - len(self.CGM_hist), 0)
            pad_size_IN = max(window_size - len(self.insulin_hist), 0)
            pad_size_CHO = max(window_size - len(self.CHO_hist), 0)
            cgm_s = [
                self.CGM_hist[0]
            ] * pad_size_cgm + self.CGM_hist  # Blood Glucose Last Hour
            ins_s = [self.insulin_hist[0]
                     ] * pad_size_IN + self.insulin_hist  # Insulin Last Hour
            cho_s = [self.CHO_hist[0]
                     ] * pad_size_CHO + self.CHO_hist  # Insulin Last Hour

        obs = Observation(CGM=cgm_s, INSULIN=ins_s, CHO=cho_s)

        return Step(observation=obs,
                    reward=reward,
                    done=done,
                    sample_time=float(self.sample_time),
                    patient_name=self.patient.name,
                    meal=CHO,
                    patient_state=self.patient.state)
def neg_risk(BG_last_hour):
    if len(BG_last_hour) < 2:
        return 0
    else:
        _, _, risk_current = risk_index([BG_last_hour[-1]], 1)
        return -risk_current
Beispiel #16
0
    def step(self, action, reward_fun=risk_diff):
        '''
        action is a namedtuple with keys: basal, bolus
        '''
        CHO = 0.0
        insulin = 0.0
        BG = 0.0
        CGM = 0.0

        for _ in range(int(self.sample_time)):
            # Compute moving average as the sample measurements
            tmp_CHO, tmp_insulin, tmp_BG, tmp_CGM = self.mini_step(action)

            CHO += tmp_CHO / self.sample_time
            insulin += tmp_insulin / self.sample_time
            BG += tmp_BG / self.sample_time
            CGM += tmp_CGM / self.sample_time

            kafBG = round(BG, 2)

            #카프카 데이터 전송
            producer = KafkaProducer(
                acks=0,
                compression_type='gzip',
                bootstrap_servers=[
                    "220.69.209.21%d:9092" % i for i in range(3)
                ],
                value_serializer=lambda x: dumps(x).encode('utf-8'))
            data = {'Glucose': str(kafBG)}
            producer.send('BG', value=data)
            producer.flush()

        # Compute risk index
        horizon = 1
        LBGI, HBGI, risk = risk_index([BG], horizon)

        # Record current action
        self.CHO_hist.append(CHO)
        self.insulin_hist.append(insulin)

        # Record next observation
        self.time_hist.append(self.time)
        self.BG_hist.append(BG)
        self.CGM_hist.append(CGM)
        self.risk_hist.append(risk)
        self.LBGI_hist.append(LBGI)
        self.HBGI_hist.append(HBGI)

        # Compute reward, and decide whether game is over
        window_size = int(60 / self.sample_time)
        BG_last_hour = self.CGM_hist[-window_size:]
        reward = reward_fun(BG_last_hour)
        done = BG < 70 or BG > 350
        obs = Observation(CGM=CGM)

        return Step(observation=obs,
                    reward=reward,
                    done=done,
                    sample_time=self.sample_time,
                    patient_name=self.patient.name,
                    meal=CHO,
                    patient_state=self.patient.state)