def load_batch_data(self,batch_list):#batch_list.dim:[batch_size] # print(batch_list) batch_state_rgb = [] batch_state_depth = [] batch_action = [] batch_reward = [] batch_next_state_rgb = [] batch_next_state_depth = [] for item in batch_list: data = item.split('#')#state+'#'+str(action)+'#'+str(reward)+'#'+next_state action_id = int(data[1]) batch_state_rgb.append(my_utils.get_rotate_rgb(action_id,data[0].replace('npy','png').replace('state_depth','state_image'))) batch_state_depth.append(my_utils.copy_depth_to_3_channel(my_utils.get_rotate_depth(action_id,data[0])).reshape((3,DIM_STATES[0],DIM_STATES[1]))) batch_action.append([int(data[1])]) batch_reward.append([float(data[2])]) batch_next_state_rgb.append(my_utils.get_rotate_rgb(action_id, data[3].replace('npy','png').replace('state_depth', 'state_image'))) batch_next_state_depth.append(my_utils.copy_depth_to_3_channel(my_utils.get_rotate_depth(action_id,data[3])).reshape((3,DIM_STATES[0],DIM_STATES[1]))) batch_state_depth = np.array(batch_state_depth) batch_next_state_depth = np.array(batch_next_state_depth) # # normlize # batch_state_depth = (batch_state_depth - Train_Configs.MIN_DEPTH_ARR) / (Train_Configs.MAX_DEPTH_ARR - Train_Configs.MIN_DEPTH_ARR) # batch_next_state_depth = (batch_next_state_depth - Train_Configs.MIN_DEPTH_ARR) / (Train_Configs.MAX_DEPTH_ARR - Train_Configs.MIN_DEPTH_ARR) return torch.cuda.FloatTensor(batch_state_rgb),torch.cuda.FloatTensor(batch_state_depth),torch.cuda.LongTensor(batch_action),torch.cuda.FloatTensor(batch_reward),torch.cuda.FloatTensor(batch_next_state_rgb),torch.cuda.FloatTensor(batch_next_state_depth)
def choose_action(self, state_path, EPSILON): state_rgb = [] state_depth = [] state_rgb.append( my_utils.trans_HWC_to_CHW( cv2.imread( state_path.replace('npy', 'png').replace('state_depth', 'state_image')))) state_depth.append( my_utils.copy_depth_to_3_channel(state_path) ) #dim:[3, DIM_STATES[0], DIM_STATES[1]]#.reshape(1, 3, DIM_STATES[0], DIM_STATES[1])) for i in range(1, Train_Configs.ROTATION_BINS): state_rotate_rgb = my_utils.get_rotate_rgb( i, state_path.replace('npy', 'png').replace('state_depth', 'state_image')) state_rgb.append(state_rotate_rgb) #------------------------ state_rotate_depth = my_utils.get_rotate_depth(i, state_path) state_rotate_3_depth = my_utils.copy_depth_to_3_channel( state_rotate_depth) state_depth.append(state_rotate_3_depth) state_rgb = np.array(state_rgb) state_depth = np.array(state_depth) # normlize state_depth = (state_depth - np.min(state_depth)) / ( np.max(state_depth) - np.min(state_depth)) # numpy to tensor state_rgb = torch.cuda.FloatTensor( state_rgb) # dim:[INPUT_IMAGE,3,224,224] state_depth = torch.cuda.FloatTensor( state_depth) #dim:[INPUT_IMAGE,3,224,224] # random exploration prob = np.min((EPSILON, 1)) p_select = np.array([prob, 1 - prob]) selected_ac_type = np.random.choice([0, 1], p=p_select.ravel()) if selected_ac_type == 0: #origin predicted action target_multiChannel_q_map = self.eval_net.forward( state_rgb, state_depth) # dim:[INPUT_IMAGES,1,224,224] action = my_utils.find_maxQ_in_qmap( target_multiChannel_q_map.cpu().detach().numpy()) ac_ty = '0' else: if np.random.randn( ) <= 0.5: #sample action according to depth image action = my_utils.select_randpID_from_mask(state_path) ac_ty = '1' else: # random sample action = np.random.randint(0, DIM_ACTIONS) ac_ty = '2' return ac_ty, action # the id of action
def store_trans(self, state_path, action, reward, next_state_path,done): ## action type: id x, y, c = my_utils.translate_actionID_to_XY_and_channel(action) trans = state_path+'#'+str(action)+'#'+str(reward)+'#'+next_state_path#np.hstack((state, [action], [reward], next_state)) #------ calculate TD errors from (s,a,r,s'), #--only from the first depth image, without considering other 9 rotated depth images state_d = state_path next_state_d = next_state_path if c > 0: state_d = my_utils.get_rotate_depth(c,state_d) next_state_d = my_utils.get_rotate_depth(c, next_state_d) state_depth = my_utils.copy_depth_to_3_channel(state_d).reshape(1, 3, DIM_STATES[0], DIM_STATES[1]) next_state_depth = my_utils.copy_depth_to_3_channel(next_state_d).reshape(1, 3, DIM_STATES[0], DIM_STATES[1]) if c == 0: state_rgb = my_utils.trans_HWC_to_CHW(cv2.imread(state_path.replace('npy','png').replace('state_depth','state_image'))).reshape(1, 3, DIM_STATES[0], DIM_STATES[1]) next_state_rgb = my_utils.trans_HWC_to_CHW(cv2.imread(next_state_path.replace('npy','png').replace('state_depth', 'state_image'))).reshape(1, 3, DIM_STATES[0], DIM_STATES[1]) else: state_rgb = my_utils.get_rotate_rgb(c,state_path.replace('npy','png').replace('state_depth','state_image')).reshape(1, 3, DIM_STATES[0], DIM_STATES[1]) next_state_rgb = my_utils.get_rotate_rgb(c,next_state_path.replace('npy','png').replace('state_depth','state_image')).reshape(1, 3, DIM_STATES[0], DIM_STATES[1]) # # normlize # state_depth = (state_depth - Train_Configs.MIN_DEPTH_ARR) / (Train_Configs.MAX_DEPTH_ARR - Train_Configs.MIN_DEPTH_ARR) # next_state_depth = (next_state_depth - Train_Configs.MIN_DEPTH_ARR) / (Train_Configs.MAX_DEPTH_ARR - Train_Configs.MIN_DEPTH_ARR) # numpy to tensor state_depth = torch.cuda.FloatTensor(state_depth) next_state_depth = torch.cuda.FloatTensor(next_state_depth) state_rgb = torch.cuda.FloatTensor(state_rgb) next_state_rgb = torch.cuda.FloatTensor(next_state_rgb) target_singleChannel_q_map = self.eval_net.forward(state_rgb,state_depth)#dim:[1,1,224,224],CHANNEL=1 # x,y,c = my_utils.translate_actionID_to_XY_and_channel(action) old_val = target_singleChannel_q_map[0][0][x][y] # old_val = target[0][action] target_val_singleChannel_q_map = self.target_net.forward(next_state_rgb,next_state_depth)#dim:[1,1,224,224] if done == 1: target_q = reward # target[0][action] = reward else: target_q = reward + self.discount_factor * torch.max(target_val_singleChannel_q_map) # target[0][action] = reward + self.discount_factor * torch.max(target_val) error = abs(old_val - target_q) self.memory.add(float(error), trans)
def choose_action_for_eval(self, state_path): state_rgb = [] state_depth = [] state_rgb.append( my_utils.trans_HWC_to_CHW( cv2.imread( state_path.replace('npy', 'png').replace('state_depth', 'state_image')))) state_depth.append( my_utils.copy_depth_to_3_channel(state_path) ) # dim:[3, DIM_STATES[0], DIM_STATES[1]]#.reshape(1, 3, DIM_STATES[0], DIM_STATES[1])) for i in range(1, Train_Configs.ROTATION_BINS): state_rotate_rgb = my_utils.get_rotate_rgb( i, state_path.replace('npy', 'png').replace('state_depth', 'state_image')) state_rgb.append(state_rotate_rgb) # ------------------------ state_rotate_depth = my_utils.get_rotate_depth(i, state_path) state_rotate_3_depth = my_utils.copy_depth_to_3_channel( state_rotate_depth) state_depth.append(state_rotate_3_depth) state_rgb = np.array(state_rgb) state_depth = np.array(state_depth) # normlize state_depth = (state_depth - Train_Configs.MIN_HEIGHTMAP_ARR) / ( Train_Configs.MAX_HEIGHTMAP_ARR - Train_Configs.MIN_HEIGHTMAP_ARR ) #(state_depth - Train_Configs.MIN_DEPTH_ARR) / (Train_Configs.MAX_DEPTH_ARR - Train_Configs.MIN_DEPTH_ARR) # numpy to tensor state_rgb = torch.cuda.FloatTensor( state_rgb) # dim:[INPUT_IMAGE,3,224,224] state_depth = torch.cuda.FloatTensor( state_depth) # dim:[INPUT_IMAGE,3,224,224] target_multiChannel_q_map = self.eval_net.forward( state_rgb, state_depth) # dim:[INPUT_IMAGES,1,224,224] action = my_utils.find_maxQ_in_qmap( target_multiChannel_q_map.cpu().detach().numpy()) return action # the id of action