예제 #1
0
 def read(self, p, episode, rem, flag):
     self.F_ward[self.link_tail] = [0.0, p]
     self.B_ward[p] = [0.0, self.link_tail]
     s = self.B_ward[self.link_tail][1]
     m = self.link_tail
     e = self.F_ward[self.link_tail][1]
     if flag:
         self.F_ward[m][0] = F.sed_op([self.ori_traj_set[episode][s], self.ori_traj_set[episode][rem], self.ori_traj_set[episode][m], self.ori_traj_set[episode][e]])
         self.B_ward[m][0] = F.sed_op([self.ori_traj_set[episode][s], self.ori_traj_set[episode][rem], self.ori_traj_set[episode][m], self.ori_traj_set[episode][e]])
     else:
         self.F_ward[m][0] = F.sed_op([self.ori_traj_set[episode][s], self.ori_traj_set[episode][m], self.ori_traj_set[episode][e]])
         self.B_ward[m][0] = F.sed_op([self.ori_traj_set[episode][s], self.ori_traj_set[episode][m], self.ori_traj_set[episode][e]])
     heapq.heappush(self.heap, (self.F_ward[m][0], m))# save (state_value, point index of ori traj)
     self.link_tail = p
 def run_by_drop_value_2(self, episode, err_bounded, k, total_drop=0):
     anchor_check = F.sed_op(
         self.ori_traj_set[episode][self.origin_index:self.e + 1])
     if anchor_check > err_bounded:
         if self.e - self.origin_index - 1 < k:  #deterministic rule
             self.simplified_index.append(self.e - 1)
             self.simplified_tra.append(self.ori_traj_set[episode][self.e -
                                                                   1])
             self.origin_index = self.e - 1
             self.e = self.origin_index + 2
             self.observation_container = [0.0]  #init , 1.0
             return []
         self.conOpw = False
         self.observation_index = list(range(self.origin_index + 1,
                                             self.e))[-k:]
         observation = self.observation_container[-self.len_ * k:]
         #observation.extend([self.ori_traj_set[episode][self.origin_index][0], self.ori_traj_set[episode][self.origin_index][1]])
         observation = self.states_normalized(observation, self.len_)
         return np.array(observation).reshape(-1, self.len_ * k)
     else:
         tmp = [anchor_check]  #, self.e - self.origin_index
         '''
         self.ori_traj_set[episode][self.origin_index][0], 
         self.ori_traj_set[episode][self.origin_index][1],
         self.ori_traj_set[episode][self.e][0],
         self.ori_traj_set[episode][self.e][1],
         '''
         self.observation_container.extend(tmp)
         self.e += 1
         return []
    def run_by_drop_value(self, episode, err_bounded, k, total_drop=0):
        anchor_check = F.sed_op(
            self.ori_traj_set[episode][self.origin_index:self.e + 1])
        if anchor_check > err_bounded:
            self.conOpw = False
            # padding observation_index
            self.observation_index = list(range(self.origin_index + 1,
                                                self.e))[-k:]
            if len(self.observation_index) < k:
                rb = RingBuffer(k)
                while rb.append(self.observation_index):
                    continue
                self.observation_index = rb.view
            # padding observation
            observation = self.observation_container[-self.len_ * k:]
            if len(observation) < self.len_ * k:
                rb = RingBuffer(self.len_ * k)
                while rb.append(observation):
                    continue
                observation = rb.view
            return np.array(observation).reshape(-1, self.len_ * k)
        else:

            tmp = [anchor_check]  #, self.e - self.origin_index
            '''
            self.ori_traj_set[episode][self.origin_index][0], 
            self.ori_traj_set[episode][self.origin_index][1],
            self.ori_traj_set[episode][self.e][0],
            self.ori_traj_set[episode][self.e][1],
            '''
            self.observation_container.extend(tmp)
            self.e += 1
            return []
예제 #4
0
    def reset(self, episode, buffer_size):
        self.rw = 0.0
        self.INX = 0
        self.heap = []
        self.last_error = 0.0
        self.current = 0.0
        self.c_left = 0
        self.c_right = 0
        #self.copy_traj = copy.deepcopy(self.ori_traj_set[episode])
        self.start = {}
        self.end = {}
        self.err_seg = {}
        self.err_record = {}
        self.steps = len(self.ori_traj_set[episode])
        self.F_ward = {}  # save (state_value, next_point)
        self.B_ward = {}  # save (state_value, last_point)
        self.F_ward[0] = [0.0, 1]
        self.B_ward[1] = [0.0, 0]
        self.link_head = 0
        self.link_tail = 1
        for i in range(2, buffer_size + 1):
            self.read(i, episode)
        t = heapq.nsmallest(self.n_features, self.heap)
        if len(t) < self.n_features:
            self.check = [t[0][1], t[0][1], t[1][1]]
            self.state = [t[0][0], t[0][0], t[1][0], t[0][0], t[0][0]]
        else:
            self.check = [t[0][1], t[1][1], t[2][1]]
            if buffer_size + 4 <= self.steps:
                J1 = F.sed_op(
                    self.ori_traj_set[episode][buffer_size:buffer_size + 3])
                J2 = F.sed_op(
                    self.ori_traj_set[episode][buffer_size:buffer_size + 4])
                self.state = [t[0][0], t[1][0], t[2][0], J1, J2]
            else:
                self.state = [t[0][0], t[1][0], t[2][0], t[0][0], t[0][0]]
#        self.check = [self.heap[0][1], self.heap[1][1]]
#        self.state = [self.heap[0][0], self.heap[1][0]] # state = (min_non_b; min_b; current)
#print('len, obs, heap and state', len(self.heap), self.observation, self.heap, self.state)
        return self.steps, np.array(self.state).reshape(1, -1)
예제 #5
0
 def read(self, p, episode):
     self.F_ward[self.link_tail] = [0.0, p]
     self.B_ward[p] = [0.0, self.link_tail]
     s = self.B_ward[self.link_tail][1]
     m = self.link_tail
     e = self.F_ward[self.link_tail][1]
     self.err_record[(s, e)] = F.sed_op(self.ori_traj_set[episode][s:e + 1])
     self.F_ward[m][0] = self.err_record[(s, e)]
     self.B_ward[m][0] = self.err_record[(s, e)]
     heapq.heappush(self.heap,
                    (self.F_ward[m][0],
                     m))  # save (state_value, point index of ori traj)
     self.link_tail = p
 def run_by_drop_num(self, episode, err_bounded, k, total_drop=0):
     if F.sed_op(self.ori_traj_set[episode][self.origin_index:self.e +
                                            1]) > err_bounded:
         self.conOpw = False
         # padding observation_index
         self.observation_index = list(range(self.origin_index + 1,
                                             self.e))[-k:]
         if len(self.observation_index) < k:
             self.observation_index.extend(
                 self.observation_index[-1:] *
                 (k - len(self.observation_index)))
         # padding observation
         observation = list(
             range(total_drop + 0,
                   total_drop + self.e - self.origin_index - 1))[-k:]
         if len(observation) < k:
             observation.extend(observation[-1:] * (k - len(observation)))
         return np.array(observation).reshape(-1, k)
     else:
         self.e += 1
         return []
    def run_by_skip_value_4(self, episode, J, err_bounded):
        self.err_record = {}
        for i in range(self.e, self.e + J):
            tmp = F.sed_op(self.ori_traj_set[episode][self.origin_index:i + 1])
            self.observation_container.append(tmp)
            self.observation_index.append(i)
            self.err_record[i] = tmp

        self.observation_index = self.observation_index[-J:]
        observation = self.observation_container[-J:]
        if len(self.observation_index) < J:
            self.observation_index.extend(self.observation_index[-1:] *
                                          (J - len(self.observation_index)))
            observation.extend(observation[-1:] * (J - len(observation)))

        observation = self.states_normalized(observation, 1)
        observation, self.observation_index = shuffle(
            np.array(observation).reshape(-1, 1),
            self.observation_index,
            random_state=0)

        return np.array(observation).reshape(-1, J)
    def run_by_skip_value_3(self, episode, J):
        for i in range(self.e, self.e + J):
            if i + 1 < len(self.ori_traj_set[episode]):
                self.observation_container.append(
                    F.sed_op([
                        self.ori_traj_set[episode][self.origin_index],
                        self.ori_traj_set[episode][i],
                        self.ori_traj_set[episode][i + 1]
                    ]))
                self.observation_index.append(i)
        self.observation_index = self.observation_index[-J:]
        observation = self.observation_container[-J:]
        if len(self.observation_index) < J:
            self.observation_index.extend(self.observation_index[-1:] *
                                          (J - len(self.observation_index)))
            observation.extend(observation[-1:] * (J - len(observation)))
        observation = self.states_normalized(observation, 1)
        observation, self.observation_index = shuffle(
            np.array(observation).reshape(-1, 1),
            self.observation_index,
            random_state=0)

        return np.array(observation).reshape(-1, J)
예제 #9
0
 def reward_update(self, episode, rem):
     if (rem not in self.start) and (rem not in self.end):
         #interval insert
         a = self.B_ward[rem][1]
         b = self.F_ward[rem][1]
         self.start[a] = b
         self.end[b] = a
         NOW = F.sed_op(self.ori_traj_set[episode][a: b + 1])
         self.err_seg[(a,b)] = NOW
         if NOW >= self.last_error:
             self.current = NOW
             self.current_left, self.current_right = a, b
     
     elif (rem in self.start) and (rem not in self.end):
         #interval expand left
         a = self.B_ward[rem][1]
         b = rem
         c = self.start[rem]
         BEFORE = self.err_seg[(b,c)]
         NOW = F.sed_op(self.ori_traj_set[episode][a: c + 1])
         del self.err_seg[(b,c)]
         self.err_seg[(a,c)] = NOW
         
         if  math.isclose(self.last_error,BEFORE):
             if NOW >= BEFORE:
                 #interval expand left_case1
                 self.current = NOW
                 self.current_left, self.current_right = a, c
             else:
                 #interval expand left_case2
                 (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get)
                 self.current = self.err_seg[(self.current_left, self.current_right)]
         else:
             #interval expand left_case3
             if NOW >= self.last_error:
                 self.current = NOW
                 self.current_left, self.current_right = a, c
         self.end[c] = a
         self.start[a] = c
         del self.start[b]
         
     # interval expand right
     elif (rem not in self.start) and (rem in self.end):
         #interval expand right
         a = self.end[rem]
         b = rem
         c = self.F_ward[rem][1]
         BEFORE = self.err_seg[(a,b)]
         NOW = F.sed_op(self.ori_traj_set[episode][a: c + 1])
         del self.err_seg[(a,b)]
         self.err_seg[(a,c)] = NOW
         if math.isclose(self.last_error,BEFORE):
             if NOW >= BEFORE:
                 #interval expand right_case1
                 self.current = NOW
                 self.current_left, self.current_right = a, c
             else:
                 #interval expand right_case2
                 (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get)
                 self.current = self.err_seg[(self.current_left, self.current_right)]
         else:
             #interval expand right_case3
             if NOW >= self.last_error:
                 self.current = NOW
                 self.current_left, self.current_right = a, c
         self.start[a] = c
         self.end[c] = a
         del self.end[b]
     
     # interval merge
     elif (rem in self.start) and (rem in self.end):
         #interval merge
         b = rem
         a = self.end[b]
         c = self.start[b]
         # get values quickly
         BEFORE_1 = self.err_seg[(a,b)]
         BEFORE_2 = self.err_seg[(b,c)]
         NOW = F.sed_op(self.ori_traj_set[episode][a: c + 1])
         del self.err_seg[(a,b)]
         del self.err_seg[(b,c)]
         self.err_seg[(a,c)] = NOW            
         if math.isclose(self.last_error,BEFORE_1):
             if NOW >= BEFORE_1:
                 #interval merge_case1
                 self.current = NOW
                 self.current_left, self.current_right = a, c
             else:
                 #interval merge_case2
                 (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get)
                 self.current = self.err_seg[(self.current_left, self.current_right)]
                 
         elif math.isclose(self.last_error,BEFORE_2):
             if NOW >= BEFORE_2:
                 #interval merge_case3
                 self.current = NOW
                 self.current_left, self.current_right = a, c
             else:
                 #interval merge_case4
                 (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get)
                 self.current = self.err_seg[(self.current_left, self.current_right)]
         else:
             #interval merge_case5
             if NOW >= self.last_error:
                 self.current = NOW
                 self.current_left, self.current_right = a, c
                 
         self.start[a] = c
         self.end[c] = a
         del self.start[b]
         del self.end[b]
     else:
         print('Here is a bug!!!')
예제 #10
0
    def step(self, episode, action, index, done, label = 'T'):        
        # update state and compute reward
        
        rem = self.check[action] # point index in ori traj

        NEXT_P = self.F_ward[rem][1]
        NEXT_V = self.B_ward[NEXT_P][0]
        LAST_P = self.B_ward[rem][1]
        LAST_V = self.F_ward[LAST_P][0]

        if LAST_P > self.link_head:
            self.delete_heap(self.heap, (LAST_V, LAST_P))
            s = self.ori_traj_set[episode][self.B_ward[LAST_P][1]]
            m1 = self.ori_traj_set[episode][LAST_P]
            m2 = self.ori_traj_set[episode][rem]
            e = self.ori_traj_set[episode][NEXT_P]
            self.F_ward[LAST_P][0] = F.sed_op([s,m1,m2,e])
            self.B_ward[LAST_P][0] = F.sed_op([s,m1,m2,e])
            heapq.heappush(self.heap, (self.F_ward[LAST_P][0], LAST_P))
        if NEXT_P < self.link_tail:
            self.delete_heap(self.heap, (NEXT_V, NEXT_P))
            s = self.ori_traj_set[episode][LAST_P]
            m1 = self.ori_traj_set[episode][rem]
            m2 = self.ori_traj_set[episode][NEXT_P]
            e = self.ori_traj_set[episode][self.F_ward[NEXT_P][1]]
            self.F_ward[NEXT_P][0] = F.sed_op([s,m1,m2,e])
            self.B_ward[NEXT_P][0] = F.sed_op([s,m1,m2,e])
            heapq.heappush(self.heap, (self.F_ward[NEXT_P][0], NEXT_P))
            
        #self.copy_traj.remove(self.ori_traj_set[episode][rem]) #for testing the correctness of inc rewards
        if  label == 'T':
            self.reward_update(episode, rem)
        
        self.F_ward[LAST_P][1] = NEXT_P
        self.B_ward[NEXT_P][1] = LAST_P
        self.delete_heap(self.heap, (self.F_ward[rem][0], rem))
        del self.F_ward[rem]
        del self.B_ward[rem]     
        
        #_,  self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj) #for testing the correctness of inc rewards
        rw = self.last_error - self.current
        self.last_error = self.current
        #print('self.current',self.current)            
            
#        if not done: #boundary process
#            if NEXT_P == self.link_tail:
#                self.read(index + 1, episode, rem, True)
#                self.check = [self.heap[0][1], LAST_P, LAST_P]
#                self.state = [self.heap[0][0], self.F_ward[LAST_P][0], self.F_ward[LAST_P][0]]
#            else:
#                self.read(index + 1, episode, rem, False)
#                if LAST_P == self.link_head:
#                    self.check = [self.heap[0][1], NEXT_P, NEXT_P]
#                    self.state = [self.heap[0][0], self.B_ward[NEXT_P][0], self.B_ward[NEXT_P][0]]
#                else:
#                    self.check = [self.heap[0][1], LAST_P, NEXT_P]
#                    self.state = [self.heap[0][0], self.F_ward[LAST_P][0], self.B_ward[NEXT_P][0]]
        
        if not done: #boundary process
            if NEXT_P == self.link_tail:
                self.read(index + 1, episode, rem, True)
                if len(self.heap) < self.n_features: 
                    self.check = [self.heap[0][1],self.heap[0][1], self.heap[1][1]]
                    self.state = [self.heap[0][0],self.heap[0][0], self.heap[1][0]]
                else:
                    t = heapq.nsmallest(self.n_features, self.heap)
                    self.check = [t[0][1], t[1][1], t[2][1]]
                    self.state = [t[0][0], t[1][0], t[2][0]]
                    
            else:
                self.read(index + 1, episode, rem, False)
                if len(self.heap) < self.n_features:
                    self.check = [self.heap[0][1],self.heap[0][1],self.heap[1][1]]
                    self.state = [self.heap[0][0],self.heap[0][0],self.heap[1][0]]
                else:
                    t = heapq.nsmallest(self.n_features, self.heap)
                    self.check = [t[0][1], t[1][1], t[2][1]]
                    self.state = [t[0][0], t[1][0], t[2][0]]
        
        #print('heap', self.heap)
        #print('check and state', self.check, self.state)
        return np.array(self.state).reshape(1, -1), rw
예제 #11
0
    def step(self, episode, action, index, done, label='T'):
        # update state and compute reward

        rem = self.check[action]  # point index in ori traj

        NEXT_P = self.F_ward[rem][1]
        NEXT_V = self.B_ward[NEXT_P][0]
        LAST_P = self.B_ward[rem][1]
        LAST_V = self.F_ward[LAST_P][0]

        if LAST_P > self.link_head:
            self.delete_heap(self.heap, (LAST_V, LAST_P))
            self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] = F.sed_op(
                self.ori_traj_set[episode][self.B_ward[LAST_P][1]:NEXT_P + 1])
            self.F_ward[LAST_P][0] = self.err_record[(self.B_ward[LAST_P][1],
                                                      NEXT_P)]
            self.B_ward[LAST_P][0] = self.err_record[(self.B_ward[LAST_P][1],
                                                      NEXT_P)]
            heapq.heappush(self.heap, (self.F_ward[LAST_P][0], LAST_P))
        if NEXT_P < self.link_tail:
            self.delete_heap(self.heap, (NEXT_V, NEXT_P))
            self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] = F.sed_op(
                self.ori_traj_set[episode][LAST_P:self.F_ward[NEXT_P][1] + 1])
            self.F_ward[NEXT_P][0] = self.err_record[(LAST_P,
                                                      self.F_ward[NEXT_P][1])]
            self.B_ward[NEXT_P][0] = self.err_record[(LAST_P,
                                                      self.F_ward[NEXT_P][1])]
            heapq.heappush(self.heap, (self.F_ward[NEXT_P][0], NEXT_P))

        #self.copy_traj.remove(self.ori_traj_set[episode][rem]) # for testing the correctness of inc rewards
        self.reward_update(episode, rem)

        self.F_ward[LAST_P][1] = NEXT_P
        self.B_ward[NEXT_P][1] = LAST_P
        self.delete_heap(self.heap, (self.F_ward[rem][0], rem))
        del self.F_ward[rem]
        del self.B_ward[rem]

        #_,  self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj) # for testing the correctness of inc rewards
        rw = self.last_error - self.current
        self.last_error = self.current
        #print('self.current',self.current)

        if not done:
            self.read(index + 1, episode)
            t = heapq.nsmallest(self.n_features, self.heap)
            if len(t) < self.n_features:
                self.check = [t[0][1], t[0][1], t[1][1]]
                self.state = [t[0][0], t[0][0], t[1][0]]
            else:
                self.check = [t[0][1], t[1][1], t[2][1]]
                self.state = [t[0][0], t[1][0], t[2][0]]

        #cannot remove the starting and ending
#        if self.current_left == self.link_head:
#            self.check.append(self.current_right)
#            self.state.append(self.B_ward[self.current_right][0])
#        elif self.current_right == self.link_tail:
#            self.check.append(self.current_left)
#            self.state.append(self.F_ward[self.current_left][0])
#        elif self.F_ward[self.current_left][0] < self.B_ward[self.current_right][0]:
#            self.check.append(self.current_left)
#            self.state.append(self.F_ward[self.current_left][0])
#        else:
#            self.check.append(self.current_right)
#            self.state.append(self.B_ward[self.current_right][0])

#self.state.append(self.current)
#self.state[1] = self.state[1] - self.current
#print('check and state', self.check, self.state)
        return np.array(self.state).reshape(1, -1), rw
예제 #12
0
 def reward_update(self, episode, rem, label=''):
     if label == 'skip':
         a = rem[0]
         b = rem[1]
         self.start[a] = b
         self.end[b] = a
         NOW = F.sed_op(self.ori_traj_set[episode][a: b + 1])
         self.err_seg[(a,b)] = NOW
         if NOW >= self.last_error:
             self.current = NOW
             self.current_left, self.current_right = a, b
         return 
     
     if (rem not in self.start) and (rem not in self.end):
         #print('interval insert')
         #f.write('interval insert\n')
         a = self.B_ward[rem][1]
         b = self.F_ward[rem][1]
         self.start[a] = b
         self.end[b] = a
         NOW = F.sed_op(self.ori_traj_set[episode][a: b + 1])
         self.err_seg[(a,b)] = NOW
         if NOW >= self.last_error:
             self.current = NOW
             self.current_left, self.current_right = a, b
     
     elif (rem in self.start) and (rem not in self.end):
         #print('interval expand left')
         a = self.B_ward[rem][1]
         b = rem
         c = self.start[rem]
         BEFORE = self.err_seg[(b,c)] #F.sed_op(self.ori_traj_set[episode][b: c + 1])
         NOW = F.sed_op(self.ori_traj_set[episode][a: c + 1])
         del self.err_seg[(b,c)]
         self.err_seg[(a,c)] = NOW
         
         if  math.isclose(self.last_error,BEFORE):
             if NOW >= BEFORE:
                 #print('interval expand left_case1')
                 #f.write('interval expand left_case1 \n')
                 self.current = NOW
                 self.current_left, self.current_right = a, c
             else:
                 #print('interval expand left_case2')
                 #f.write('interval expand left_case2 \n')
                 (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get)
                 self.current = self.err_seg[(self.current_left, self.current_right)]
         else:
             #print('interval expand left_case3')
             #f.write('interval expand left_case3 \n')
             if NOW >= self.last_error:
                 self.current = NOW
                 self.current_left, self.current_right = a, c
         self.end[c] = a
         self.start[a] = c
         del self.start[b]
         
     # interval expand right
     elif (rem not in self.start) and (rem in self.end):
         #print('interval expand right')
         a = self.end[rem]
         b = rem
         c = self.F_ward[rem][1]
         BEFORE = self.err_seg[(a,b)] #F.sed_op(self.ori_traj_set[episode][a: b + 1])
         NOW = F.sed_op(self.ori_traj_set[episode][a: c + 1])
         del self.err_seg[(a,b)]
         self.err_seg[(a,c)] = NOW
         if math.isclose(self.last_error,BEFORE):
             if NOW >= BEFORE:
                 #print('interval expand right_case1')
                 #f.write('interval expand right_case1 \n')
                 self.current = NOW
                 self.current_left, self.current_right = a, c
             else:
                 #print('interval expand right_case2')
                 #f.write('interval expand right_case2 \n')
                 (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get)
                 self.current = self.err_seg[(self.current_left, self.current_right)]
         else:
             #print('interval expand right_case3')
             #f.write('interval expand right_case3 \n')
             if NOW >= self.last_error:
                 self.current = NOW
                 self.current_left, self.current_right = a, c
         self.start[a] = c
         self.end[c] = a
         del self.end[b]
     
     # interval merge
     elif (rem in self.start) and (rem in self.end):
         #print('interval merge')
         b = rem
         a = self.end[b]
         c = self.start[b]
         # get values quickly
         BEFORE_1 = self.err_seg[(a,b)] #F.sed_op(self.ori_traj_set[episode][a: b + 1])
         BEFORE_2 = self.err_seg[(b,c)] #F.sed_op(self.ori_traj_set[episode][b: c + 1])
         NOW = F.sed_op(self.ori_traj_set[episode][a: c + 1])
         del self.err_seg[(a,b)]
         del self.err_seg[(b,c)]
         self.err_seg[(a,c)] = NOW            
         if math.isclose(self.last_error,BEFORE_1):
             if NOW >= BEFORE_1:
                 #print('interval merge_case1')
                 #f.write('interval merge_case1 \n')
                 self.current = NOW
                 self.current_left, self.current_right = a, c
             else:
                 #print('interval merge_case2')
                 #f.write('interval merge_case2 \n')
                 (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get)
                 self.current = self.err_seg[(self.current_left, self.current_right)]
                 
         elif math.isclose(self.last_error,BEFORE_2):
             if NOW >= BEFORE_2:
                 #print('interval merge_case3')
                 #f.write('interval merge_case3 \n')
                 self.current = NOW
                 self.current_left, self.current_right = a, c
             else:
                 #print('interval merge_case4')
                 #f.write('interval merge_case4 \n')
                 (self.current_left, self.current_right) = max(self.err_seg, key=self.err_seg.get)
                 self.current = self.err_seg[(self.current_left, self.current_right)]
         else:
             #print('interval merge_case5')
             #f.write('interval merge_case5 \n')
             if NOW >= self.last_error:
                 self.current = NOW
                 self.current_left, self.current_right = a, c
                 
         self.start[a] = c
         self.end[c] = a
         del self.start[b]
         del self.end[b]
     else:
         print('Here is a bug!!!')
예제 #13
0
    def step(self, episode, action, index, done, label = 'T'):        
        # update state and compute reward
        #print('self.F_ward', self.F_ward)
        #print('check, state, heap', self.check, self.state, self.heap)
        if action >= len(self.check):
            rem = self.check[0]
        else:
            rem = self.check[action] # point index in ori traj
        #print('remove point index and value', self.state, rem, self.F_ward[rem][0])
        NEXT_P = self.F_ward[rem][1]
        NEXT_V = self.B_ward[NEXT_P][0]
        LAST_P = self.B_ward[rem][1]
        LAST_V = self.F_ward[LAST_P][0]

        if LAST_P > self.link_head:
            self.delete_heap(self.heap, (LAST_V, LAST_P))
            s = self.ori_traj_set[episode][self.B_ward[LAST_P][1]]
            m1 = self.ori_traj_set[episode][LAST_P]
            m2 = self.ori_traj_set[episode][rem]
            e = self.ori_traj_set[episode][NEXT_P]
            #F.sed_op(self.ori_traj_set[episode][self.B_ward[LAST_P][1]: NEXT_P + 1])
            t = F.sed_op([s,m1,m2,e])
            self.F_ward[LAST_P][0] = t
            self.B_ward[LAST_P][0] = t
            heapq.heappush(self.heap, (self.F_ward[LAST_P][0], LAST_P))
        if NEXT_P < self.link_tail:
            self.delete_heap(self.heap, (NEXT_V, NEXT_P))
            s = self.ori_traj_set[episode][LAST_P]
            m1 = self.ori_traj_set[episode][rem]
            m2 = self.ori_traj_set[episode][NEXT_P]
            e = self.ori_traj_set[episode][self.F_ward[NEXT_P][1]]
            #F.sed_op(self.ori_traj_set[episode][LAST_P: self.F_ward[NEXT_P][1] + 1])
            t = F.sed_op([s,m1,m2,e])
            self.F_ward[NEXT_P][0] = t
            self.B_ward[NEXT_P][0] = t
            heapq.heappush(self.heap, (self.F_ward[NEXT_P][0], NEXT_P))
            
        if  label == 'T':
            self.reward_update(episode, rem)
            '''
            self.copy_traj.remove(self.ori_traj_set[episode][rem])
            _,  self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj)
            '''
            self.rw = self.last_error - self.current
            self.last_error = self.current
            #print('self.current',self.current)
            
        self.F_ward[LAST_P][1] = NEXT_P
        self.B_ward[NEXT_P][1] = LAST_P
        self.delete_heap(self.heap, (self.F_ward[rem][0], rem))
        del self.F_ward[rem]
        del self.B_ward[rem]     
            
#        if not done: #boundary process
#            if NEXT_P == self.link_tail:
#                self.read(index + 1, episode, rem, True)
#                self.check = [self.heap[0][1], LAST_P, LAST_P]
#                self.state = [self.heap[0][0], self.F_ward[LAST_P][0], self.F_ward[LAST_P][0]]
#            else:
#                self.read(index + 1, episode, rem, False)
#                if LAST_P == self.link_head:
#                    self.check = [self.heap[0][1], NEXT_P, NEXT_P]
#                    self.state = [self.heap[0][0], self.B_ward[NEXT_P][0], self.B_ward[NEXT_P][0]]
#                else:
#                    self.check = [self.heap[0][1], LAST_P, NEXT_P]
#                    self.state = [self.heap[0][0], self.F_ward[LAST_P][0], self.B_ward[NEXT_P][0]]
        
        if not done: #boundary process
            if NEXT_P == self.link_tail:
                if action >= len(self.check):
                    self.INX = min(index + 2 + action - len(self.check), len(self.ori_traj_set[episode]) - 1)
                    self.read(self.INX, episode, rem, True)
                    if  label == 'T':
                        self.reward_update(episode, [index, self.INX], 'skip')
                        '''
                        for skip in range(index + 1, self.INX):
                            self.copy_traj.remove(self.ori_traj_set[episode][skip])
                        _,  self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj)
                        '''
                        self.rw += self.last_error - self.current
                        self.last_error = self.current
                else:
                    self.read(index + 1, episode, rem, True)
                
                if len(self.heap) < self.n_features: 
                    self.check = [self.heap[0][1],self.heap[0][1], self.heap[1][1]]
                    self.state = [self.heap[0][0],self.heap[0][0], self.heap[1][0]]
                else:
                    t = heapq.nsmallest(self.n_features, self.heap)
                    self.check = [t[0][1], t[1][1], t[2][1]]
                    self.state = [t[0][0], t[1][0], t[2][0]]
                    
            else:
                if action >= len(self.check):
                    self.INX = min(index + 2 + action - len(self.check), len(self.ori_traj_set[episode]) - 1)
                    self.read(self.INX, episode, rem, False)
                    if  label == 'T':
                        self.reward_update(episode, [index, self.INX], 'skip')
                        '''
                        for skip in range(index + 1, self.INX):
                            self.copy_traj.remove(self.ori_traj_set[episode][skip])
                        _,  self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj)
                        '''
                        self.rw += self.last_error - self.current
                        self.last_error = self.current
                else:
                    self.read(index + 1, episode, rem, False)
                if len(self.heap) < self.n_features:
                    self.check = [self.heap[0][1],self.heap[0][1],self.heap[1][1]]
                    self.state = [self.heap[0][0],self.heap[0][0],self.heap[1][0]]
                else:
                    t = heapq.nsmallest(self.n_features, self.heap)
                    self.check = [t[0][1], t[1][1], t[2][1]]
                    self.state = [t[0][0], t[1][0], t[2][0]]

        #f.write('--->'+str(self.rw)+'\n')
        
        #print('heap', self.heap)
        #print('check and state', self.check, self.state)
        return np.array(self.state).reshape(1, -1), self.rw
예제 #14
0
    def step(self, episode, action, index, done, label='T'):
        # update state and compute reward
        #print('self.F_ward', self.F_ward)
        #print('check, state, heap', self.check, self.state, self.heap)
        if action >= len(self.check):
            rem = self.check[0]
        else:
            rem = self.check[action]  # point index in ori traj
        #print('remove point index and value', self.state, rem, self.F_ward[rem][0])
        NEXT_P = self.F_ward[rem][1]
        NEXT_V = self.B_ward[NEXT_P][0]
        LAST_P = self.B_ward[rem][1]
        LAST_V = self.F_ward[LAST_P][0]

        if LAST_P > self.link_head:
            self.delete_heap(self.heap, (LAST_V, LAST_P))
            #s = self.ori_traj_set[episode][self.B_ward[LAST_P][1]]
            #m1 = self.ori_traj_set[episode][LAST_P]
            #m2 = self.ori_traj_set[episode][rem]
            #e = self.ori_traj_set[episode][NEXT_P]
            self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] = F.sed_op(
                self.ori_traj_set[episode][self.B_ward[LAST_P][1]:NEXT_P + 1])
            #self.err_record[(self.B_ward[LAST_P][1], NEXT_P)] = F.sed_op([s,m1,m2,e])
            self.F_ward[LAST_P][0] = self.err_record[(self.B_ward[LAST_P][1],
                                                      NEXT_P)]
            self.B_ward[LAST_P][0] = self.err_record[(self.B_ward[LAST_P][1],
                                                      NEXT_P)]
            heapq.heappush(self.heap, (self.F_ward[LAST_P][0], LAST_P))
        if NEXT_P < self.link_tail:
            self.delete_heap(self.heap, (NEXT_V, NEXT_P))
            #s = self.ori_traj_set[episode][LAST_P]
            #m1 = self.ori_traj_set[episode][rem]
            #m2 = self.ori_traj_set[episode][NEXT_P]
            #e = self.ori_traj_set[episode][self.F_ward[NEXT_P][1]]
            self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] = F.sed_op(
                self.ori_traj_set[episode][LAST_P:self.F_ward[NEXT_P][1] + 1])
            #self.err_record[(LAST_P, self.F_ward[NEXT_P][1])] = F.sed_op([s,m1,m2,e])
            self.F_ward[NEXT_P][0] = self.err_record[(LAST_P,
                                                      self.F_ward[NEXT_P][1])]
            self.B_ward[NEXT_P][0] = self.err_record[(LAST_P,
                                                      self.F_ward[NEXT_P][1])]
            heapq.heappush(self.heap, (self.F_ward[NEXT_P][0], NEXT_P))

        #self.copy_traj.remove(self.ori_traj_set[episode][rem])
        if label == 'T':
            self.reward_update(episode, rem)
            '''
            self.copy_traj.remove(self.ori_traj_set[episode][rem])
            _,  self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj)
            '''
            self.rw = self.last_error - self.current
            self.last_error = self.current
            #print('self.current',self.current)

        self.F_ward[LAST_P][1] = NEXT_P
        self.B_ward[NEXT_P][1] = LAST_P
        self.delete_heap(self.heap, (self.F_ward[rem][0], rem))
        del self.F_ward[rem]
        del self.B_ward[rem]

        if not done:
            if action >= len(self.check):
                self.INX = min(index + 2 + action - len(self.check),
                               len(self.ori_traj_set[episode]) - 1)
                self.read(self.INX, episode)
                if label == 'T':
                    self.reward_update(episode, [index, self.INX], 'skip')
                    '''
                    for skip in range(index + 1, self.INX):
                        self.copy_traj.remove(self.ori_traj_set[episode][skip])
                    _,  self.current = F.sed_error(self.ori_traj_set[episode], self.copy_traj)
                    '''
                    self.rw += self.last_error - self.current
                    self.last_error = self.current
            else:
                self.read(index + 1, episode)
            t = heapq.nsmallest(self.n_features, self.heap)
            if len(t) < self.n_features:
                self.check = [t[0][1], t[0][1], t[1][1]]
                self.state = [t[0][0], t[0][0], t[1][0], t[0][0], t[0][0]]
            else:
                self.check = [t[0][1], t[1][1], t[2][1]]
                if self.INX + 4 <= self.steps:
                    J1 = F.sed_op(
                        self.ori_traj_set[episode][self.INX:self.INX + 3])
                    J2 = F.sed_op(
                        self.ori_traj_set[episode][self.INX:self.INX + 4])
                    self.state = [t[0][0], t[1][0], t[2][0], J1, J2]
                else:
                    self.state = [t[0][0], t[1][0], t[2][0], t[0][0], t[0][0]]
#            self.check = [self.heap[0][1], self.heap[1][1]]
#            self.state = [self.heap[0][0], self.heap[1][0]]
#f.write('--->'+str(rw)+'\n')
#self.state = [max(self.heap[0][0] - self.current, 0.0)]
#cannot remove the starting and ending
#        if self.current_left == self.link_head:
#            self.check.append(self.current_right)
#            self.state.append(self.B_ward[self.current_right][0])
#        elif self.current_right == self.link_tail:
#            self.check.append(self.current_left)
#            self.state.append(self.F_ward[self.current_left][0])
#        elif self.F_ward[self.current_left][0] < self.B_ward[self.current_right][0]:
#            self.check.append(self.current_left)
#            self.state.append(self.F_ward[self.current_left][0])
#        else:
#            self.check.append(self.current_right)
#            self.state.append(self.B_ward[self.current_right][0])

#self.state.append(self.current)

#self.state[1] = self.state[1] - self.current

#print('check and state', self.check, self.state)
        return np.array(self.state).reshape(1, -1), self.rw