Exemple #1
0
    def load_batch_data(self,batch_list):#batch_list.dim:[batch_size]
        # print(batch_list)
        batch_state_rgb = []
        batch_state_depth = []
        batch_action = []
        batch_reward = []
        batch_next_state_rgb = []
        batch_next_state_depth = []

        for item in batch_list:
            data = item.split('#')#state+'#'+str(action)+'#'+str(reward)+'#'+next_state
            action_id = int(data[1])
            batch_state_rgb.append(my_utils.get_rotate_rgb(action_id,data[0].replace('npy','png').replace('state_depth','state_image')))
            batch_state_depth.append(my_utils.copy_depth_to_3_channel(my_utils.get_rotate_depth(action_id,data[0])).reshape((3,DIM_STATES[0],DIM_STATES[1])))
            batch_action.append([int(data[1])])
            batch_reward.append([float(data[2])])
            batch_next_state_rgb.append(my_utils.get_rotate_rgb(action_id, data[3].replace('npy','png').replace('state_depth', 'state_image')))
            batch_next_state_depth.append(my_utils.copy_depth_to_3_channel(my_utils.get_rotate_depth(action_id,data[3])).reshape((3,DIM_STATES[0],DIM_STATES[1])))

        batch_state_depth = np.array(batch_state_depth)
        batch_next_state_depth = np.array(batch_next_state_depth)
        # # normlize
        # batch_state_depth = (batch_state_depth - Train_Configs.MIN_DEPTH_ARR) / (Train_Configs.MAX_DEPTH_ARR - Train_Configs.MIN_DEPTH_ARR)
        # batch_next_state_depth = (batch_next_state_depth - Train_Configs.MIN_DEPTH_ARR) / (Train_Configs.MAX_DEPTH_ARR - Train_Configs.MIN_DEPTH_ARR)

        return torch.cuda.FloatTensor(batch_state_rgb),torch.cuda.FloatTensor(batch_state_depth),torch.cuda.LongTensor(batch_action),torch.cuda.FloatTensor(batch_reward),torch.cuda.FloatTensor(batch_next_state_rgb),torch.cuda.FloatTensor(batch_next_state_depth)
    def choose_action(self, state_path, EPSILON):
        state_rgb = []
        state_depth = []
        state_rgb.append(
            my_utils.trans_HWC_to_CHW(
                cv2.imread(
                    state_path.replace('npy',
                                       'png').replace('state_depth',
                                                      'state_image'))))
        state_depth.append(
            my_utils.copy_depth_to_3_channel(state_path)
        )  #dim:[3, DIM_STATES[0], DIM_STATES[1]]#.reshape(1, 3, DIM_STATES[0], DIM_STATES[1]))
        for i in range(1, Train_Configs.ROTATION_BINS):
            state_rotate_rgb = my_utils.get_rotate_rgb(
                i,
                state_path.replace('npy',
                                   'png').replace('state_depth',
                                                  'state_image'))
            state_rgb.append(state_rotate_rgb)
            #------------------------
            state_rotate_depth = my_utils.get_rotate_depth(i, state_path)
            state_rotate_3_depth = my_utils.copy_depth_to_3_channel(
                state_rotate_depth)
            state_depth.append(state_rotate_3_depth)

        state_rgb = np.array(state_rgb)
        state_depth = np.array(state_depth)
        # normlize
        state_depth = (state_depth - np.min(state_depth)) / (
            np.max(state_depth) - np.min(state_depth))
        # numpy to tensor
        state_rgb = torch.cuda.FloatTensor(
            state_rgb)  # dim:[INPUT_IMAGE,3,224,224]
        state_depth = torch.cuda.FloatTensor(
            state_depth)  #dim:[INPUT_IMAGE,3,224,224]

        # random exploration
        prob = np.min((EPSILON, 1))
        p_select = np.array([prob, 1 - prob])
        selected_ac_type = np.random.choice([0, 1], p=p_select.ravel())

        if selected_ac_type == 0:  #origin predicted action
            target_multiChannel_q_map = self.eval_net.forward(
                state_rgb, state_depth)  # dim:[INPUT_IMAGES,1,224,224]
            action = my_utils.find_maxQ_in_qmap(
                target_multiChannel_q_map.cpu().detach().numpy())
            ac_ty = '0'
        else:
            if np.random.randn(
            ) <= 0.5:  #sample action according to depth image
                action = my_utils.select_randpID_from_mask(state_path)
                ac_ty = '1'
            else:  # random sample
                action = np.random.randint(0, DIM_ACTIONS)
                ac_ty = '2'

        return ac_ty, action  # the id of action
Exemple #3
0
    def store_trans(self, state_path, action, reward, next_state_path,done):
        ## action type: id
        x, y, c = my_utils.translate_actionID_to_XY_and_channel(action)
        trans = state_path+'#'+str(action)+'#'+str(reward)+'#'+next_state_path#np.hstack((state, [action], [reward], next_state))
        #------ calculate TD errors from (s,a,r,s'), #--only from the first depth image, without considering other 9 rotated depth images
        state_d = state_path
        next_state_d = next_state_path
        if c > 0:
            state_d = my_utils.get_rotate_depth(c,state_d)
            next_state_d = my_utils.get_rotate_depth(c, next_state_d)
        state_depth = my_utils.copy_depth_to_3_channel(state_d).reshape(1, 3, DIM_STATES[0], DIM_STATES[1])
        next_state_depth = my_utils.copy_depth_to_3_channel(next_state_d).reshape(1, 3, DIM_STATES[0], DIM_STATES[1])

        if c == 0:
            state_rgb = my_utils.trans_HWC_to_CHW(cv2.imread(state_path.replace('npy','png').replace('state_depth','state_image'))).reshape(1, 3, DIM_STATES[0], DIM_STATES[1])
            next_state_rgb = my_utils.trans_HWC_to_CHW(cv2.imread(next_state_path.replace('npy','png').replace('state_depth', 'state_image'))).reshape(1, 3, DIM_STATES[0], DIM_STATES[1])
        else:
            state_rgb = my_utils.get_rotate_rgb(c,state_path.replace('npy','png').replace('state_depth','state_image')).reshape(1, 3, DIM_STATES[0], DIM_STATES[1])
            next_state_rgb = my_utils.get_rotate_rgb(c,next_state_path.replace('npy','png').replace('state_depth','state_image')).reshape(1, 3, DIM_STATES[0], DIM_STATES[1])

        # # normlize
        # state_depth = (state_depth - Train_Configs.MIN_DEPTH_ARR) / (Train_Configs.MAX_DEPTH_ARR - Train_Configs.MIN_DEPTH_ARR)
        # next_state_depth = (next_state_depth - Train_Configs.MIN_DEPTH_ARR) / (Train_Configs.MAX_DEPTH_ARR - Train_Configs.MIN_DEPTH_ARR)
        # numpy to tensor
        state_depth = torch.cuda.FloatTensor(state_depth)
        next_state_depth = torch.cuda.FloatTensor(next_state_depth)
        state_rgb = torch.cuda.FloatTensor(state_rgb)
        next_state_rgb = torch.cuda.FloatTensor(next_state_rgb)

        target_singleChannel_q_map = self.eval_net.forward(state_rgb,state_depth)#dim:[1,1,224,224],CHANNEL=1
        # x,y,c = my_utils.translate_actionID_to_XY_and_channel(action)
        old_val = target_singleChannel_q_map[0][0][x][y]
        # old_val = target[0][action]
        target_val_singleChannel_q_map = self.target_net.forward(next_state_rgb,next_state_depth)#dim:[1,1,224,224]

        if done == 1:
            target_q = reward # target[0][action] = reward
        else:
            target_q = reward + self.discount_factor * torch.max(target_val_singleChannel_q_map) # target[0][action] = reward + self.discount_factor * torch.max(target_val)

        error = abs(old_val - target_q)
        self.memory.add(float(error), trans)
    def choose_action_for_eval(self, state_path):
        state_rgb = []
        state_depth = []
        state_rgb.append(
            my_utils.trans_HWC_to_CHW(
                cv2.imread(
                    state_path.replace('npy',
                                       'png').replace('state_depth',
                                                      'state_image'))))
        state_depth.append(
            my_utils.copy_depth_to_3_channel(state_path)
        )  # dim:[3, DIM_STATES[0], DIM_STATES[1]]#.reshape(1, 3, DIM_STATES[0], DIM_STATES[1]))
        for i in range(1, Train_Configs.ROTATION_BINS):
            state_rotate_rgb = my_utils.get_rotate_rgb(
                i,
                state_path.replace('npy',
                                   'png').replace('state_depth',
                                                  'state_image'))
            state_rgb.append(state_rotate_rgb)
            # ------------------------
            state_rotate_depth = my_utils.get_rotate_depth(i, state_path)
            state_rotate_3_depth = my_utils.copy_depth_to_3_channel(
                state_rotate_depth)
            state_depth.append(state_rotate_3_depth)

        state_rgb = np.array(state_rgb)
        state_depth = np.array(state_depth)
        # normlize
        state_depth = (state_depth - Train_Configs.MIN_HEIGHTMAP_ARR) / (
            Train_Configs.MAX_HEIGHTMAP_ARR - Train_Configs.MIN_HEIGHTMAP_ARR
        )  #(state_depth - Train_Configs.MIN_DEPTH_ARR) / (Train_Configs.MAX_DEPTH_ARR - Train_Configs.MIN_DEPTH_ARR)
        # numpy to tensor
        state_rgb = torch.cuda.FloatTensor(
            state_rgb)  # dim:[INPUT_IMAGE,3,224,224]
        state_depth = torch.cuda.FloatTensor(
            state_depth)  # dim:[INPUT_IMAGE,3,224,224]

        target_multiChannel_q_map = self.eval_net.forward(
            state_rgb, state_depth)  # dim:[INPUT_IMAGES,1,224,224]
        action = my_utils.find_maxQ_in_qmap(
            target_multiChannel_q_map.cpu().detach().numpy())

        return action  # the id of action