Example #1
0
 def construct_RT(self):
     ''' n x 1'''
     self.RT = np.zeros((self.n,1)) 
     for i in range(self.n):
         state_vec = self.sdic.get_state(i)
         # if the current position is equal to destination, give a reward
         if state.same_loc([state_vec[0], state_vec[1]], [state_vec[2], state_vec[3]]):
             self.RT[i,0] = REWARD
         # if the current position is equal to start, give a cost
         else:
             dest = [state_vec[2], state_vec[3]]
             start = START[DESTINATION.index(dest)]
             if state.same_loc([state_vec[0], state_vec[1]], [start[0], start[1]]):
                 self.RT[i,0] = COST
             else:
                 self.RT[i,0] = 0.5
Example #2
0
 def construct_RT(self):
     ''' n x 1'''
     self.RT = np.zeros((self.n,1)) 
     for i in range(self.n):
         state_vec = self.sdic.get_state(i)
         # if the current position is equal to destination
         if state.same_loc([state_vec[0], state_vec[1]], [state_vec[2], state_vec[3]]):
             self.RT[i,0] = REWARD
Example #3
0
 def construct_x0(self):
     ''' n x 1, assume cars are distributed equally in start '''
     self.x0 = np.zeros((self.n, 1))
     for i in range(self.n):
         state_vec = self.sdic.get_state(i)
         start_pos = [state_vec[0], state_vec[1]]
         if not(start_pos in START):
             continue
         else:
             des_pos = [state_vec[2], state_vec[3]]
             if state.same_loc(DESTINATION[START.index(start_pos)], des_pos):
                 self.x0[i, 0] = INIT_DENSITY_CORNER
Example #4
0
 def construct_G(self):
     '''A x n x n'''
     self.G = np.zeros((self.A, self.n, self.n))
     for act in range(self.A):
         if act == STAY:
             self.G[act,:,:] = np.eye(self.n) # stay results in an identity matrix
         else:
             # probability from j to i
             G_act = np.zeros((self.n, self.n))
             for j in range(self.n):
                 state_j = self.sdic.get_state(j) # start from this state
                 loc_j = [state_j[0], state_j[1]]
                 result_loc = self.world.move_consq(loc_j, act)
                 for i in range(self.n):
                     state_i = self.sdic.get_state(i)
                     loc_i = [state_i[0], state_i[1]]
                     if state.same_loc(result_loc, loc_i) \
                     and state_i[2] == state_j[2] and state_i[3] == state_j[3] and state_i[4] == state_j[4]:
                         G_act[i][j] = 1
             self.G[act,:,:] = cp.deepcopy(G_act)