def Csv_6D_pose(rgb_img, depth_img):
        iteration = 4
        bs = 1
        # knn = KNearestNeighbor(1)
        points, choose, img = testdataset.getitem_by_array(rgb_img, depth_img)
        if choose.ndim < 3:
                return []
        # print("choose.ndim =", choose.ndim)
        obj_id = torch.LongTensor([0]).unsqueeze(0)

        points, choose, img, obj_id = Variable(points).cuda(),  Variable(choose).cuda(), Variable(img).cuda(), Variable(obj_id).cuda()


        pred_r, pred_t, pred_c, emb = estimator(img, points, choose, obj_id)
        pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
        pred_c = pred_c.view(bs, num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(bs * num_points, 1, 3)

        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)

        for ite in range(0, iteration):
                T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
                my_mat = quaternion_matrix(my_r)
                R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
                my_mat[0:3, 3] = my_t

                new_points = torch.bmm((points - T), R).contiguous()
                pred_r, pred_t = refiner(new_points, emb, obj_id)
                pred_r = pred_r.view(1, 1, -1)
                pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
                my_r_2 = pred_r.view(-1).cpu().data.numpy()
                my_t_2 = pred_t.view(-1).cpu().data.numpy()
                my_mat_2 = quaternion_matrix(my_r_2)
                my_mat_2[0:3, 3] = my_t_2

                my_mat_final = np.dot(my_mat, my_mat_2)
                my_r_final = copy.deepcopy(my_mat_final)
                my_r_final[0:3, 3] = 0
                my_r_final = quaternion_from_matrix(my_r_final, True)
                my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

                my_pred = np.append(my_r_final, my_t_final)
                my_r = my_r_final
                my_t = my_t_final

        print("final prediction: quaternion + translation")
        my_pred = np.asarray(my_pred, dtype='float')
        print(list(my_pred))
        return list(my_pred)
Example #2
0
    def update_PROJ(self, ned, yaw_rad, pitch_rad, roll_rad):
        cam_yaw, cam_pitch, cam_roll = self.get_ypr()
        body2cam = transformations.quaternion_from_euler(
            cam_yaw * d2r, cam_pitch * d2r, cam_roll * d2r, 'rzyx')

        # this function modifies the parameters you pass in so, avoid
        # getting our data changed out from under us, by forcing copies
        # (a = b, wasn't sufficient, but a = float(b) forced a copy.
        tmp_yaw = float(yaw_rad)
        tmp_pitch = float(pitch_rad)
        tmp_roll = float(roll_rad)
        ned2body = transformations.quaternion_from_euler(
            tmp_yaw, tmp_pitch, tmp_roll, 'rzyx')
        #body2ned = transformations.quaternion_inverse(ned2body)

        #print 'ned2body(q):', ned2body
        ned2cam_q = transformations.quaternion_multiply(ned2body, body2cam)
        ned2cam = np.matrix(
            transformations.quaternion_matrix(np.array(ned2cam_q))[:3, :3]).T
        #print 'ned2cam:', ned2cam
        R = ned2proj.dot(ned2cam)
        rvec, jac = cv2.Rodrigues(R)
        tvec = -np.matrix(R) * np.matrix(ned).T
        R, jac = cv2.Rodrigues(rvec)
        # is this R the same as the earlier R?
        self.PROJ = np.concatenate((R, tvec), axis=1)
        #print 'PROJ:', PROJ
        #print lat_deg, lon_deg, altitude, ref[0], ref[1], ref[2]
        #print ned

        return self.PROJ
Example #3
0
    def refine(self, obj_id, pose, emb, cloud, iterations=1):
        class_id = self.obj_list.index(obj_id)
        index = torch.LongTensor([class_id])
        index = Variable(index).cuda()

        pose_ref = pose.copy()
        for _ in range(iterations):
            # transform cloud according to pose estimate
            R = Variable(torch.from_numpy(pose_ref[:3, :3].astype(
                np.float32))).cuda().view(1, 3, 3)
            ts = Variable(torch.from_numpy(pose_ref[:3, 3].astype(np.float32))).cuda().view(1, 3)\
                .repeat(self.num_points, 1).contiguous().view(1, self.num_points, 3)
            new_cloud = torch.bmm((cloud - ts), R).contiguous()

            # predict delta pose
            pred_q, pred_t = self.refiner(new_cloud, emb, index)
            pred_q = pred_q.view(1, 1, -1)
            pred_q = pred_q / (torch.norm(pred_q, dim=2).view(1, 1, 1))
            pred_q = pred_q.view(-1).cpu().data.numpy()
            pred_t = pred_t.view(-1).cpu().data.numpy()

            # apply delta to get new pose estimate
            pose_delta = quaternion_matrix(pred_q)
            pose_delta[:3, 3] = pred_t
            pose_ref = pose_ref @ pose_delta

        return pose_ref
Example #4
0
def run_epoch(model,
              loss_fn,
              loader,
              dtype,
              rot_repr,
              loss_type="regression",
              train=True,
              model_points=None,
              optimizer=None):
    """
    Train the model for one epoch.
    """
    total_distance, total_loss, num_samples = 0.0, 0.0, 0.0
    if train:
        model.train()
    else:
        model.eval()
        avg_dists = []

    for i, data in enumerate(loader, 0):
        img, depth, boxes2d, boxes2d_proj, boxes3d, label, pose_r, pose_t, pose, cam, idx = data

        x_var = Variable(img.type(dtype))
        scores = model(x_var)

        gts = (pose_r.type(dtype), pose_t.type(dtype), pose.type(dtype),
               boxes2d.type(dtype), boxes2d_proj.type(dtype),
               boxes3d.type(dtype))
        loss = calc_loss(scores,
                         loss_fn,
                         rot_repr,
                         dtype,
                         gts,
                         loss_type=loss_type)

        total_loss += loss.item()
        if train:
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        else:
            preds_rot = rotReprToRotMat(scores, rot_repr)
            for i in range(preds_rot.shape[0]):
                rot1 = preds_rot[i].detach().cpu().numpy()
                rot1_4 = np.eye(4)
                rot1_4[:3, :3] = rot1
                rot2 = quaternion_matrix(pose_r[i])
                dist = compare_rotation(model_points, rot1_4[:3, :4],
                                        rot2[:3, :4])
                avg_dists.append(dist)
                num_samples += 1

    avg_loss = total_loss / len(loader)  # Divide by unber of epoches for

    if train:
        return avg_loss
    else:
        avg_dist = np.sum(avg_dists) / len(avg_dists)
        return avg_dist, avg_loss
def get_pose(root_dset,
             obj_category,
             item,
             art_index,
             frame_order,
             mode='train',
             num_parts=5):
    # pose infos
    parts_world_pos = [None] * num_parts
    parts_world_orn = [None] * num_parts
    parts_model2world = [None] * num_parts
    if mode == 'demo':
        sub_dir0 = root_dset + '/demo/' + obj_category + '/' + item + '/' + art_index
    else:
        sub_dir0 = root_dset + '/render/' + obj_category + '/' + item + '/' + art_index
    meta_file = open(sub_dir0 + '/gt.yml', 'r')
    meta_instance = yaml.load(meta_file)
    pose_dict = meta_instance['frame_{}'.format(frame_order)]
    viewMat = np.array(pose_dict['viewMat']).reshape(4, 4).T
    projMat = np.array(pose_dict['projMat']).reshape(4, 4).T
    #
    parts_world_pos[0] = np.array([0, 0, 0])
    parts_world_orn[0] = np.array([0, 0, 0, 1])
    for link in range(0, num_parts):
        # parts_urdf_box[link] = np.array(urdf_dict['link']['box'][link])
        if link > 0:
            parts_world_pos[link] = np.array(
                pose_dict['obj'][link - 1][4]).astype(np.float32)
            parts_world_orn[link] = np.array(
                pose_dict['obj'][link - 1][5]).astype(np.float32)
    # manual correction
    if obj_category == 'bike':
        parts_world_pos[1], parts_world_pos[2] = parts_world_pos[
            2], parts_world_pos[1]
        parts_world_orn[1], parts_world_orn[2] = parts_world_orn[
            2], parts_world_orn[1]

    # target rotation, translation, and target points
    for k in range(num_parts):
        # matrix computation
        center_world_orn = parts_world_orn[k]
        center_world_orn = np.array([
            center_world_orn[3], center_world_orn[0], center_world_orn[1],
            center_world_orn[2]
        ])
        my_model2world_r = quaternion_matrix(
            center_world_orn)[:4, :4]  # [w, x, y, z]
        my_model2world_t = parts_world_pos[k]
        my_model2world_mat = my_model2world_r
        for m in range(3):
            my_model2world_mat[m, 3] = my_model2world_t[m]
        my_world2camera_mat = viewMat
        my_camera2clip_mat = projMat
        my_model2camera_mat = np.dot(my_world2camera_mat, my_model2world_mat)
        parts_model2world[k] = my_model2world_mat

    return parts_model2world, viewMat, projMat
Example #6
0
    def refinePose(self,
                   emb,
                   cloud,
                   object_label,
                   init_t,
                   init_r,
                   iterations=2):
        init_t = init_t.cpu().data.numpy()
        init_r = init_r.cpu().data.numpy()

        for ite in range(0, iteration):
            T = Variable(torch.from_numpy(
                init_t.astype(np.float32))).cuda().view(1, 3).repeat(
                    num_points, 1).contiguous().view(1, self.num_points, 3)
            init_mat = quaternion_matrix(init_r)
            R = Variable(torch.from_numpy(init_mat[:3, :3].astype(
                np.float32))).cuda().view(1, 3, 3)
            init_mat[0:3, 3] = init_t

            new_cloud = torch.bmm((cloud - T), R).contiguous()
            pred_r, pred_t = self.refiner(new_cloud, emb, object_label)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))

            delta_r = pred_r.view(-1).cpu().data.numpy()
            delta_t = pred_t.view(-1).cpu().data.numpy()
            delta_mat = quaternion_matrix(delta_r)

            delta_mat[0:3, 3] = delta_t

            refined_mat = np.dot(init_mat, delta_mat)
            refined_r = copy.deepcopy(refined_mat)
            refined_r[0:3, 3] = 0
            refined_r = quaternion_from_matrix(refined_r, True)
            refined_t = np.array(
                [refined_mat[0][3], refined_mat[1][3], refined_mat[2][3]])

            init_r = r_final
            init_t = t_final
        return refined_t, refined_t
    def refine_posenet(self, refine_args):
        iteration = refine_args.iteration
        my_t, my_r = refine_args.t, refine_args.r
        num_points = refine_args.num_points
        cloud = refine_args.cloud
        refiner = refine_args.refiner_network
        emb = refine_args.emb
        index = refine_args.index

        for ite in range(0, iteration):
            T = Variable(torch.from_numpy(
                my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                    num_points, 1).contiguous().view(1, num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(
                np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t

            new_cloud = torch.bmm((cloud - T), R).contiguous()
            time_refiner = time.time()
            pred_r, pred_t = refiner(new_cloud, emb, index)
            print("--- RE %s seconds ---" % (time.time() - time_refiner))
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)

            my_mat_2[0:3, 3] = my_t_2

            my_mat_final = np.dot(my_mat, my_mat_2)
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
            my_r_final = quaternion_from_matrix(my_r_final, True)
            my_t_final = np.array(
                [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

            my_pred = np.append(my_r_final, my_t_final)
            my_r = my_r_final
            my_t = my_t_final
Example #8
0
def quatToRotRepr(quat, rot_repr, input=None):
    # First generate
    rot = Rotation.from_quat(quat.reshape(-1))
    if rot_repr == "quat":
        return quat.reshape(-1).numpy()
    elif rot_repr == "mat":
        return quaternion_matrix(quat)[:3, :3].reshape(-1)
    elif rot_repr == "bbox":
        return input.reshape(-1).numpy()
    elif rot_repr == "rodr":
        rotvec = rot.as_rotvec().reshape(-1)
        return rotvec
    elif rot_repr == "euler":
        angles = rot.as_euler('xzy', degrees=False)
        angles = normalizeEulerAngle(angles)
        angles = angles.copy()
        return angles.reshape(-1)
    else:
        raise ValueError("Unknown rot_repr: %s" % rot_repr)
Example #9
0
        pred_c = pred_c.view(bs, num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(bs * num_points, 1, 3)
        points = cloud.view(bs * num_points, 1, 3)

        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)
        my_result_wo_refine.append(my_pred.tolist())

        for ite in range(0, iteration):
            T = Variable(torch.from_numpy(
                my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                    num_points, 1).contiguous().view(1, num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(
                np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t

            new_cloud = torch.bmm((cloud - T), R).contiguous()
            pred_r, pred_t = refiner(new_cloud, emb, index)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)
            my_mat_2[0:3, 3] = my_t_2
            my_mat_final = np.dot(my_mat, my_mat_2)
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
Example #10
0
    def __preprocess_and_save__(self, index):
        obj_category = self.list_obj[index]
        ins = self.list_instance[index]
        obj = self.objnamelist.index(obj_category)
        art_status = self.list_status[index]
        frame_order = self.list_rank[index]
        label = self.list_label[index]
        h5_save_path = (self.root_dir + "/hdf5/" + obj_category + "/" + ins +
                        "/" + art_status)
        if not os.path.exists(h5_save_path):
            os.makedirs(h5_save_path)
        h5_save_name = h5_save_path + "/{}.h5".format(frame_order)
        num_parts = self.urdf_dict[obj_category][ins]["num_links"]
        new_num_parts = num_parts - 1

        model_offsets = self.urdf_dict[obj_category][ins]["link"]
        joint_offsets = self.urdf_dict[obj_category][ins]["joint"]

        parts_model_point = [None] * new_num_parts
        parts_world_point = [None] * new_num_parts
        parts_target_point = [None] * new_num_parts

        parts_cloud_cam = [None] * new_num_parts
        parts_cloud_world = [None] * new_num_parts
        parts_cloud_canon = [None] * new_num_parts
        parts_cloud_urdf = [None] * new_num_parts
        parts_cloud_norm = [None] * new_num_parts

        parts_world_pos = [None] * new_num_parts
        parts_world_orn = [None] * new_num_parts
        parts_urdf_pos = [None] * new_num_parts
        parts_urdf_orn = [None] * new_num_parts
        parts_urdf_box = [None] * new_num_parts

        parts_model2world = [None] * new_num_parts
        parts_canon2urdf = [None] * new_num_parts
        parts_target_r = [None] * new_num_parts
        parts_target_t = [None] * new_num_parts

        parts_mask = [None] * new_num_parts
        choose_x = [None] * new_num_parts
        choose_y = [None] * new_num_parts
        choose_to_whole = [None] * new_num_parts

        # rgb/depth/label
        print("current image: ", self.list_rgb[index])
        img = Image.open(self.list_rgb[index])
        img = np.array(img)  # .astype(np.uint8)
        depth = np.array(h5py.File(self.list_depth[index], "r")["data"])
        label = np.array(Image.open(self.list_label[index]))

        # pose infos
        pose_dict = self.meta_dict[obj_category][ins][art_status][
            "frame_{}".format(frame_order)]
        urdf_dict = self.urdf_dict[obj_category][ins]
        viewMat = np.array(pose_dict["viewMat"]).reshape(4, 4).T
        projMat = np.array(pose_dict["projMat"]).reshape(4, 4).T

        parts_world_pos[0] = np.array([0, 0, 0])
        parts_world_orn[0] = np.array([0, 0, 0, 1])
        # SAPIEN: skip the virtual base part
        for link in range(1, num_parts):
            if link > 0:
                parts_world_pos[link - 1] = np.array(
                    pose_dict["obj"][link - 1][4]).astype(np.float32)
                parts_world_orn[link - 1] = np.array(
                    pose_dict["obj"][link - 1][5]).astype(np.float32)

        for link in range(1, num_parts):
            if link == 1 and num_parts == 2:
                parts_urdf_pos[link] = np.array(urdf_dict["joint"]["xyz"][
                    link -
                    1])  # todo, accumulate joints offsets != link offsets
            else:
                # Only change this branch for SAPIEN data
                parts_urdf_pos[link - 1] = -np.array(
                    urdf_dict["link"]["xyz"][link][0])
            parts_urdf_orn[link - 1] = np.array(
                urdf_dict["link"]["rpy"][link][0])

        for k in range(1, num_parts):
            center_world_orn = parts_world_orn[k - 1]
            center_world_orn = np.array([
                center_world_orn[3],
                center_world_orn[0],
                center_world_orn[1],
                center_world_orn[2],
            ])
            my_model2world_r = quaternion_matrix(
                center_world_orn)[:4, :4]  # [w, x, y, z]
            my_model2world_t = parts_world_pos[k - 1]
            my_model2world_mat = np.copy(my_model2world_r)
            for m in range(3):
                my_model2world_mat[m, 3] = my_model2world_t[m]
            my_world2camera_mat = viewMat
            my_camera2clip_mat = projMat
            my_model2camera_mat = np.dot(my_world2camera_mat,
                                         my_model2world_mat)
            parts_model2world[k - 1] = my_model2world_mat

        # depth to cloud data
        # SAPIEN, throw away the label 0 for virtual base link
        mask = np.array((label[:, :] < num_parts) & (label[:, :] > 0)).astype(
            np.uint8)
        mask_whole = np.copy(mask)
        for n in range(1, num_parts):
            parts_mask[n - 1] = np.array((label[:, :] == (n))).astype(np.uint8)
            choose_to_whole[n - 1] = np.where(parts_mask[n - 1] > 0)

        # >>>>>>>>>>------- rendering target pcloud from depth image --------<<<<<<<<<#
        # first get projected map
        # ymap = self.ymap
        # xmap = self.xmap
        # h = self.height
        # w = self.width
        # u_map = ymap * 2 / w - 1
        # v_map = (512 - xmap) * 2 / h - 1
        # v1_map = xmap * 2 / h - 1
        # w_channel = -depth
        # projected_map = np.stack(
        #     [u_map * w_channel, v_map * w_channel, depth, w_channel]
        # ).transpose([1, 2, 0])
        # projected_map1 = np.stack(
        #     [u_map * w_channel, v1_map * w_channel, depth, w_channel]
        # ).transpose([1, 2, 0])
        for s in range(1, num_parts):
            x_set, y_set = choose_to_whole[s - 1]
            if len(x_set) < 10:
                print(ins, art_status, frame_order)
                print(
                    "data is empty, skipping!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
                )
                return None
            else:
                choose_x[s - 1] = x_set
                choose_y[s - 1] = y_set

            # ---------------> from projected map into target part_cloud
            # order: cam->world->canon)

            # MotionNet Modification
            point_num = np.shape(choose_x[s - 1])[0]
            cloud_cam = []
            cloud_cam_real = []
            for i in range(point_num):
                x = choose_x[s - 1][i]
                y = choose_y[s - 1][i]
                y_real = 512 - y
                cloud_cam.append([
                    (y + projMat[0, 2]) * depth[x, y] / projMat[0, 0],
                    -(x + projMat[1, 2]) * depth[x, y] / (-projMat[1, 1]),
                    -depth[x, y],
                ])
                cloud_cam_real.append([
                    (y + projMat[0, 2]) * depth[x, y] / projMat[0, 0],
                    -(x + projMat[1, 2]) * depth[x, y] / (-projMat[1, 1]),
                    -depth[x, y],
                ])
                # cloud_cam.append(
                #     [
                #         (x + projMat[0, 2]) * depth[x, y_real] / projMat[0, 0],
                #         -(y_real + projMat[1, 2]) * depth[x, y_real] / (-projMat[1, 1]),
                #         -depth[x, y_real],
                #     ]
                # )
                # pdb.set_trace()
                # print(cloud_cam)
                # break
            cloud_cam = np.array(cloud_cam)
            cloud_cam_real = np.array(cloud_cam_real)

            # pdb.set_trace()
            # MotionNet Modification
            # projected_points = projected_map[choose_x[s-1][:].astype(np.uint16), choose_y[s-1][:].astype(np.uint16), :]
            # projected_points = np.reshape(projected_points, [-1, 4])
            # depth_channel    = - projected_points[:, 3:4]
            # cloud_cam        = np.dot(projected_points[:, 0:2] - np.dot(depth_channel, projMat[0:2, 2:3].T), np.linalg.pinv(projMat[:2, :2].T))

            # projected_points1 = projected_map1[choose_x[s-1][:].astype(np.uint16), choose_y[s-1][:].astype(np.uint16), :]
            # projected_points1 = np.reshape(projected_points1, [-1, 4])
            # cloud_cam_real    = np.dot(projected_points1[:, 0:2] - np.dot(depth_channel, projMat[0:2, 2:3].T), np.linalg.pinv(projMat[:2, :2].T))
            # cloud_cam_real    = np.concatenate((cloud_cam_real, depth_channel), axis=1)

            # cloud_cam      = np.concatenate((cloud_cam, depth_channel), axis=1)

            cloud_cam_full = np.concatenate(
                (cloud_cam, np.ones((cloud_cam.shape[0], 1))), axis=1)

            # modify, todo
            camera_pose_mat = np.linalg.pinv(viewMat.T)
            # MotionNet modification
            # camera_pose_mat[:3, :] = -camera_pose_mat[:3, :]
            cloud_world = np.dot(cloud_cam_full, camera_pose_mat)
            cloud_canon = np.dot(cloud_world,
                                 np.linalg.pinv(parts_model2world[s - 1].T))

            # canon points should be points coordinates centered in the inertial frame
            parts_cloud_cam[s - 1] = cloud_cam_real[:, :3]
            parts_cloud_world[s - 1] = cloud_world[:, :3]
            parts_cloud_canon[s - 1] = cloud_canon[:, :3]

        for k in range(1, num_parts):
            center_joint_orn = parts_urdf_orn[k - 1]
            my_canon2urdf_r = euler_matrix(
                center_joint_orn[0], center_joint_orn[1],
                center_joint_orn[2])[:4, :4]  # [w, x, y, z]
            my_canon2urdf_t = parts_urdf_pos[k - 1]
            my_canon2urdf_mat = my_canon2urdf_r
            for m in range(3):
                my_canon2urdf_mat[m, 3] = my_canon2urdf_t[m]
            part_points_space = np.concatenate(
                (
                    parts_cloud_canon[k - 1],
                    np.ones((parts_cloud_canon[k - 1].shape[0], 1)),
                ),
                axis=1,
            )
            parts_cloud_urdf[k - 1] = np.dot(part_points_space,
                                             my_canon2urdf_mat.T)

        # >>>>>>>>>>>>>>> go to NPCS space
        # NPCS here is not stored into hdf5
        for link in range(1, num_parts):
            tight_w = max(parts_cloud_urdf[link - 1][:, 0]) - min(
                parts_cloud_urdf[link - 1][:, 0])
            tight_l = max(parts_cloud_urdf[link - 1][:, 1]) - min(
                parts_cloud_urdf[link - 1][:, 1])
            tight_h = max(parts_cloud_urdf[link - 1][:, 2]) - min(
                parts_cloud_urdf[link - 1][:, 2])
            norm_factor = np.sqrt(1) / np.sqrt(tight_w**2 + tight_l**2 +
                                               tight_h**2)
            base_p = np.array([
                min(parts_cloud_urdf[link - 1][:, 0]),
                min(parts_cloud_urdf[link - 1][:, 1]),
                min(parts_cloud_urdf[link - 1][:, 2]),
            ]).reshape(1, 3)
            extre_p = np.array([
                max(parts_cloud_urdf[link - 1][:, 0]),
                max(parts_cloud_urdf[link - 1][:, 1]),
                max(parts_cloud_urdf[link - 1][:, 2]),
            ]).reshape(1, 3)
            center_p = (extre_p - base_p) / 2 * norm_factor

            parts_cloud_norm[link - 1] = (
                (parts_cloud_urdf[link - 1][:, :3] - base_p) * norm_factor +
                np.array([0.5, 0.5, 0.5]).reshape(1, 3) -
                center_p.reshape(1, 3))

        x_set_pcloud = np.concatenate(choose_x, axis=0)
        y_set_pcloud = np.concatenate(choose_y, axis=0)

        # save into h5 for rgb_img, input_pts, mask, correpsonding urdf_points
        print("Writing to ", h5_save_name)
        hf = h5py.File(h5_save_name, "w")
        hf.create_dataset("rgb", data=img)
        hf.create_dataset("mask", data=mask)
        cloud_cam = hf.create_group("gt_points")
        for part_i, points in enumerate(parts_cloud_cam):
            cloud_cam.create_dataset(str(part_i), data=points)
        coord_gt = hf.create_group("gt_coords")
        for part_i, points in enumerate(parts_cloud_urdf):
            coord_gt.create_dataset(str(part_i), data=points)
        hf.close()

        ################# for debug only, let me know if you have questions #################
        if self.is_debug:
            figure = plt.figure(dpi=200)
            ax = plt.subplot(121)
            plt.imshow(img)
            plt.title('RGB image')
            ax1 = plt.subplot(122)
            plt.imshow(depth)
            plt.title('depth image')
            plt.show()
            plot3d_pts(
                [parts_cloud_cam],
                [['part {}'.format(i) for i in range(len(parts_cloud_cam))]],
                s=5,
                title_name=['camera coords'])
            plot3d_pts(
                [parts_cloud_world],
                [['part {}'.format(i) for i in range(len(parts_cloud_world))]],
                s=5,
                title_name=['world coords'])
            plot3d_pts(
                [parts_cloud_canon],
                [['part {}'.format(i) for i in range(len(parts_cloud_canon))]],
                s=5,
                title_name=['canon coords'])
            plot3d_pts(
                [parts_cloud_urdf],
                [['part {}'.format(i) for i in range(len(parts_cloud_urdf))]],
                s=5,
                title_name=['urdf coords'])

        return None
                                                         Variable(model_points).cuda(), \
                                                         Variable(idx).cuda()

        pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
        pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
        pred_c = pred_c.view(bs, num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(bs * num_points, 1, 3)

        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)

        for ite in range(0, iteration):
            T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t

            new_points = torch.bmm((points - T), R).contiguous()
            pred_r, pred_t = refiner(new_points, emb, idx)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)
            my_mat_2[0:3, 3] = my_t_2

            my_mat_final = np.dot(my_mat, my_mat_2)
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
Example #12
0
def pose_predict(img, depth, rois):
    label_pub = rospy.Publisher('/label', Image, queue_size=10)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    class_list = [
        '002_master_chef_can', '003_cracker_box', '004_sugar_box',
        '005_tomato_soup_can', '006_mustard_bottle', '007_tuna_fish_can',
        '008_pudding_box', '009_gelatin_box', '010_potted_meat_can',
        '011_banana', '019_pitcher_base', '025_mug', '021_bleach_cleanser',
        '024_bowl', '035_power_drill', '036_wood_block', '037_scissors',
        '040_large_marker', '051_large_clamp', '052_extra_large_clamp',
        '061_foam_brick'
    ]
    try:
        object_number = len(rois)

        #lst = posecnn_rois[:,0:1].flatten()
        #lst = np.unique(label)
        my_result_wo_refine = []
        my_result = []
        for idx in range(object_number):
            #itemid = lst[idx]
            itemid = class_list.index(rois[idx].Class) + 1
            #itemid = class_list.index(rois[idx].Class) +3
            print(object_number, itemid, rois[idx])

            try:
                label, pub_label = seg_predict(img)
                pub_label = pub_label * 50
                label_pub.publish(bridge.cv2_to_imgmsg(pub_label, '8UC1'))
                ####################### with Detection algorithm #################################
                # rmin, rmax, cmin,cmax = get_bbox(rois,idx)
                #####################################################################################
                mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
                mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
                mask = mask_label * mask_depth
                rmin, rmax, cmin, cmax = get_bbox(mask_label)

                choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
                if len(choose) > num_points:
                    c_mask = np.zeros(len(choose), dtype=int)
                    c_mask[:num_points] = 1
                    np.random.shuffle(c_mask)
                    choose = choose[c_mask.nonzero()]
                else:
                    choose = np.pad(choose, (0, num_points - len(choose)),
                                    'wrap')

                depth_masked = depth[
                    rmin:rmax,
                    cmin:cmax].flatten()[choose][:,
                                                 np.newaxis].astype(np.float32)
                xmap_masked = xmap[
                    rmin:rmax,
                    cmin:cmax].flatten()[choose][:,
                                                 np.newaxis].astype(np.float32)
                ymap_masked = ymap[
                    rmin:rmax,
                    cmin:cmax].flatten()[choose][:,
                                                 np.newaxis].astype(np.float32)
                choose = np.array([choose])

                pt2 = depth_masked / cam_scale
                pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
                pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
                cloud = np.concatenate((pt0, pt1, pt2), axis=1)

                img_masked = np.array(img)[:, :, :3]
                img_masked = np.transpose(img_masked, (2, 0, 1))
                img_masked = img_masked[:, rmin:rmax, cmin:cmax]

                cloud = torch.from_numpy(cloud.astype(np.float32))
                choose = torch.LongTensor(choose.astype(np.int32))
                img_masked = norm(
                    torch.from_numpy(img_masked.astype(np.float32)))
                index = torch.LongTensor([itemid - 1])

                cloud = Variable(cloud).cuda()
                choose = Variable(choose).cuda()
                img_masked = Variable(img_masked).cuda()
                index = Variable(index).cuda()
                cloud = cloud.view(1, num_points, 3)
                img_masked = img_masked.view(1, 3,
                                             img_masked.size()[1],
                                             img_masked.size()[2])
                pred_r, pred_t, pred_c, emb = estimator(
                    img_masked, cloud, choose, index)
                pred_r = pred_r / torch.norm(pred_r, dim=2).view(
                    1, num_points, 1)
                pred_c = pred_c.view(bs, num_points)
                how_max, which_max = torch.max(pred_c, 1)
                pred_t = pred_t.view(bs * num_points, 1, 3)
                points = cloud.view(bs * num_points, 1, 3)
                my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
                my_t = (points +
                        pred_t)[which_max[0]].view(-1).cpu().data.numpy()
                my_pred = np.append(my_r, my_t)
                # making pose matrix
                rot_to_angle = rotationMatrixToEulerAngles(dof[:3, :3])
                rot_to_angle = rot_to_angle.reshape(1, 3)
                my_t = my_t.reshape(1, 3)
                rot_t = np.concatenate([rot_to_angle, my_t], axis=0)

                # cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy],
                #   [0, 0, 1]]))
                #tl = np.array([100,100,100])
                #cam_mat = cv2.UMat(np.matrix([[960.14238289, 0, 252.43270692], [0, 960.14238289, 317.39366696],
                #             [0, 0, 1]]))

                for ite in range(0, iteration):
                    T = Variable(torch.from_numpy(
                        my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                            num_points, 1).contiguous().view(1, num_points, 3)
                    my_mat = quaternion_matrix(my_r)
                    R = Variable(
                        torch.from_numpy(my_mat[:3, :3].astype(
                            np.float32))).cuda().view(1, 3, 3)
                    my_mat[0:3, 3] = my_t

                    new_cloud = torch.bmm((cloud - T), R).contiguous()
                    pred_r, pred_t = refiner(new_cloud, emb, index)
                    pred_r = pred_r.view(1, 1, -1)
                    pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
                    my_r_2 = pred_r.view(-1).cpu().data.numpy()
                    my_t_2 = pred_t.view(-1).cpu().data.numpy()
                    my_mat_2 = quaternion_matrix(my_r_2)

                    my_mat_2[0:3, 3] = my_t_2
                    my_mat_final = np.dot(my_mat, my_mat_2)
                    my_r_final = copy.deepcopy(my_mat_final)
                    my_r_final[0:3, 3] = 0
                    my_r_final = quaternion_from_matrix(my_r_final, True)

                    my_t_final = np.array([
                        my_mat_final[0][3], my_mat_final[1][3],
                        my_mat_final[2][3]
                    ])

                    my_pred = np.append(my_r_final, my_t_final)
                    my_r = my_r_final
                    my_t = my_t_final
                open_cv_image = img.copy()
                open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)
                dof = quaternion_matrix(my_r)
                dof[0:3, 3] = my_t

                object_poses = {
                    'tx': my_t[0][0],
                    'ty': my_t[0][1],
                    'tz': my_t[0][2],
                    'qx': my_r[0],
                    'qy': my_r[1],
                    'qz': my_r[2],
                    'qw': my_r[3]
                }
                my_result.append(object_poses)
                open_cv_image = img.copy()
                open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)
                imgpts, jac = cv2.projectPoints(cld[itemid], dof[0:3, 0:3],
                                                dof[0:3, 3], cam_mat,
                                                dist)  # 13 = mug
                open_cv_image = draw(open_cv_image, imgpts, itemid)

            except ZeroDivisionError:
                open_cv_image = None
                print('Fail')
    except CvBridgeError as e:
        print(e)
    return my_result, open_cv_image
Example #13
0
    def __preprocess_and_save__(self, index):
        obj_category = self.list_obj[index]
        ins = self.list_instance[index]
        obj = self.objnamelist.index(obj_category)
        art_status = self.list_status[index]
        frame_order = self.list_rank[index]
        label = self.list_label[index]
        h5_save_path = self.root_dir + '/hdf5/' + obj_category + '/' + ins + '/' + art_status
        if (not os.path.exists(h5_save_path)):
            os.makedirs(h5_save_path)
        h5_save_name = h5_save_path + '/{}.h5'.format(frame_order)
        num_parts = self.urdf_dict[obj_category][ins]['num_links']

        model_offsets = self.urdf_dict[obj_category][ins]['link']
        joint_offsets = self.urdf_dict[obj_category][ins]['joint']

        parts_model_point = [None] * num_parts
        parts_world_point = [None] * num_parts
        parts_target_point = [None] * num_parts

        parts_cloud_cam = [None] * num_parts
        parts_cloud_world = [None] * num_parts
        parts_cloud_canon = [None] * num_parts
        parts_cloud_urdf = [None] * num_parts
        parts_cloud_norm = [None] * num_parts

        parts_world_pos = [None] * num_parts
        parts_world_orn = [None] * num_parts
        parts_urdf_pos = [None] * num_parts
        parts_urdf_orn = [None] * num_parts
        parts_urdf_box = [None] * num_parts

        parts_model2world = [None] * num_parts
        parts_canon2urdf = [None] * num_parts
        parts_target_r = [None] * num_parts
        parts_target_t = [None] * num_parts

        parts_mask = [None] * num_parts
        choose_x = [None] * num_parts
        choose_y = [None] * num_parts
        choose_to_whole = [None] * num_parts

        # rgb/depth/label
        print('current image: ', self.list_rgb[index])
        img = Image.open(self.list_rgb[index])
        img = np.array(img)  #.astype(np.uint8)
        depth = np.array(h5py.File(self.list_depth[index], 'r')['data'])
        label = np.array(Image.open(self.list_label[index]))

        # pose infos
        pose_dict = self.meta_dict[obj_category][ins][art_status][
            'frame_{}'.format(frame_order)]
        urdf_dict = self.urdf_dict[obj_category][ins]
        viewMat = np.array(pose_dict['viewMat']).reshape(4, 4).T
        projMat = np.array(pose_dict['projMat']).reshape(4, 4).T

        parts_world_pos[0] = np.array([0, 0, 0])
        parts_world_orn[0] = np.array([0, 0, 0, 1])
        for link in range(0, num_parts):
            if link > 0:
                parts_world_pos[link] = np.array(
                    pose_dict['obj'][link - 1][4]).astype(np.float32)
                parts_world_orn[link] = np.array(
                    pose_dict['obj'][link - 1][5]).astype(np.float32)

        for link in range(num_parts):
            if link == 1 and num_parts == 2:
                parts_urdf_pos[link] = np.array(urdf_dict['joint']['xyz'][
                    link -
                    1])  # todo, accumulate joints pffsets != link offsets
            else:
                parts_urdf_pos[link] = -np.array(
                    urdf_dict['link']['xyz'][link])
            parts_urdf_orn[link] = np.array(urdf_dict['link']['rpy'][link])

        for k in range(num_parts):
            center_world_orn = parts_world_orn[k]
            center_world_orn = np.array([
                center_world_orn[3], center_world_orn[0], center_world_orn[1],
                center_world_orn[2]
            ])
            my_model2world_r = quaternion_matrix(
                center_world_orn)[:4, :4]  # [w, x, y, z]
            my_model2world_t = parts_world_pos[k]
            my_model2world_mat = np.copy(my_model2world_r)
            for m in range(3):
                my_model2world_mat[m, 3] = my_model2world_t[m]
            my_world2camera_mat = viewMat
            my_camera2clip_mat = projMat
            my_model2camera_mat = np.dot(my_world2camera_mat,
                                         my_model2world_mat)
            parts_model2world[k] = my_model2world_mat

        # depth to cloud data
        mask = np.array((label[:, :] < num_parts) & (label[:, :] > -1)).astype(
            np.uint8)
        mask_whole = np.copy(mask)
        for n in range(num_parts):
            parts_mask[n] = np.array((label[:, :] == (n))).astype(np.uint8)
            choose_to_whole[n] = np.where(parts_mask[n] > 0)

        #>>>>>>>>>>------- rendering target pcloud from depth image --------<<<<<<<<<#
        # first get projected map
        ymap = self.ymap
        xmap = self.xmap
        h = self.height
        w = self.width
        u_map = ymap * 2 / w - 1
        v_map = (512 - xmap) * 2 / h - 1
        v1_map = xmap * 2 / h - 1
        w_channel = -depth
        projected_map = np.stack(
            [u_map * w_channel, v_map * w_channel, depth,
             w_channel]).transpose([1, 2, 0])
        projected_map1 = np.stack(
            [u_map * w_channel, v1_map * w_channel, depth,
             w_channel]).transpose([1, 2, 0])
        for s in range(num_parts):
            x_set, y_set = choose_to_whole[s]
            if len(x_set) < 10:
                print('data is empty, skipping!!!')
                return None
            else:
                choose_x[s] = x_set
                choose_y[s] = y_set

            # ---------------> from projected map into target part_cloud
            # order: cam->world->canon)
            projected_points = projected_map[
                choose_x[s][:].astype(np.uint16),
                choose_y[s][:].astype(np.uint16), :]
            projected_points = np.reshape(projected_points, [-1, 4])
            depth_channel = -projected_points[:, 3:4]
            cloud_cam = np.dot(
                projected_points[:, 0:2] -
                np.dot(depth_channel, projMat[0:2, 2:3].T),
                np.linalg.pinv(projMat[:2, :2].T))

            projected_points1 = projected_map1[
                choose_x[s][:].astype(np.uint16),
                choose_y[s][:].astype(np.uint16), :]
            projected_points1 = np.reshape(projected_points1, [-1, 4])
            cloud_cam_real = np.dot(
                projected_points1[:, 0:2] -
                np.dot(depth_channel, projMat[0:2, 2:3].T),
                np.linalg.pinv(projMat[:2, :2].T))
            cloud_cam_real = np.concatenate((cloud_cam_real, depth_channel),
                                            axis=1)

            cloud_cam = np.concatenate((cloud_cam, depth_channel), axis=1)
            cloud_cam_full = np.concatenate(
                (cloud_cam, np.ones((cloud_cam.shape[0], 1))), axis=1)

            # modify, todo
            camera_pose_mat = np.linalg.pinv(viewMat.T)
            camera_pose_mat[:3, :] = -camera_pose_mat[:3, :]
            cloud_world = np.dot(cloud_cam_full, camera_pose_mat)
            cloud_canon = np.dot(cloud_world,
                                 np.linalg.pinv(parts_model2world[s].T))

            # canon points should be points coordinates centered in the inertial frame
            parts_cloud_cam[s] = cloud_cam_real[:, :3]
            parts_cloud_world[s] = cloud_world[:, :3]
            parts_cloud_canon[s] = cloud_canon[:, :3]

        for k in range(num_parts):
            center_joint_orn = parts_urdf_orn[k]
            my_canon2urdf_r = euler_matrix(
                center_joint_orn[0], center_joint_orn[1],
                center_joint_orn[2])[:4, :4]  # [w, x, y, z]
            my_canon2urdf_t = parts_urdf_pos[k]
            my_canon2urdf_mat = my_canon2urdf_r
            for m in range(3):
                my_canon2urdf_mat[m, 3] = my_canon2urdf_t[m]
            part_points_space = np.concatenate(
                (parts_cloud_canon[k],
                 np.ones((parts_cloud_canon[k].shape[0], 1))),
                axis=1)
            parts_cloud_urdf[k] = np.dot(part_points_space,
                                         my_canon2urdf_mat.T)

        #>>>>>>>>>>>>>>> go to PNCS space
        for link in range(num_parts):
            tight_w = max(parts_cloud_urdf[link][:, 0]) - min(
                parts_cloud_urdf[link][:, 0])
            tight_l = max(parts_cloud_urdf[link][:, 1]) - min(
                parts_cloud_urdf[link][:, 1])
            tight_h = max(parts_cloud_urdf[link][:, 2]) - min(
                parts_cloud_urdf[link][:, 2])
            norm_factor = np.sqrt(1) / np.sqrt(tight_w**2 + tight_l**2 +
                                               tight_h**2)
            base_p = np.array([
                min(parts_cloud_urdf[link][:, 0]),
                min(parts_cloud_urdf[link][:, 1]),
                min(parts_cloud_urdf[link][:, 2])
            ]).reshape(1, 3)
            extre_p = np.array([
                max(parts_cloud_urdf[link][:, 0]),
                max(parts_cloud_urdf[link][:, 1]),
                max(parts_cloud_urdf[link][:, 2])
            ]).reshape(1, 3)
            center_p = (extre_p - base_p) / 2 * norm_factor

            parts_cloud_norm[link] = (parts_cloud_urdf[link][:, :3] -
                                      base_p) * norm_factor + np.array(
                                          [0.5, 0.5, 0.5]).reshape(
                                              1, 3) - center_p.reshape(1, 3)

        x_set_pcloud = np.concatenate(choose_x, axis=0)
        y_set_pcloud = np.concatenate(choose_y, axis=0)

        # save into h5 for rgb_img, input_pts, mask, correpsonding urdf_points
        print('Writing to ', h5_save_name)
        hf = h5py.File(h5_save_name, 'w')
        hf.create_dataset('rgb', data=img)
        hf.create_dataset('mask', data=mask)
        cloud_cam = hf.create_group('gt_points')
        for part_i, points in enumerate(parts_cloud_cam):
            cloud_cam.create_dataset(str(part_i), data=points)
        coord_gt = hf.create_group('gt_coords')
        for part_i, points in enumerate(parts_cloud_urdf):
            coord_gt.create_dataset(str(part_i), data=points)
        hf.close()

        ################# for debug only, let me know if you have questions #################
        if self.is_debug:
            figure = plt.figure(dpi=200)
            ax = plt.subplot(121)
            plt.imshow(img)
            plt.title('RGB image')
            ax1 = plt.subplot(122)
            plt.imshow(depth)
            plt.title('depth image')
            plt.show()
            plot3d_pts(
                [parts_cloud_cam],
                [['part {}'.format(i) for i in range(len(parts_cloud_cam))]],
                s=5,
                title_name=['camera coords'])
            plot3d_pts(
                [parts_cloud_world],
                [['part {}'.format(i) for i in range(len(parts_cloud_world))]],
                s=5,
                title_name=['world coords'])
            plot3d_pts(
                [parts_cloud_canon],
                [['part {}'.format(i) for i in range(len(parts_cloud_canon))]],
                s=5,
                title_name=['canon coords'])
            plot3d_pts(
                [parts_cloud_urdf],
                [['part {}'.format(i) for i in range(len(parts_cloud_urdf))]],
                s=5,
                title_name=['urdf coords'])

        return None
Example #14
0
    pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
    '''
    pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
    pred_c = pred_c.view(bs, num_points)
    how_max, which_max = torch.max(pred_c, 1)
    pred_t = pred_t.view(bs * num_points, 1, 3)

    my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
    my_t = (points.view(bs * num_points, 1, 3) +
            pred_t)[which_max[0]].view(-1).cpu().data.numpy()
    my_pred = np.append(my_r, my_t)
    #SACHIT TRY LEFT MULTIPLY
    T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(
        1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
    my_mat = quaternion_matrix(my_r)
    R = Variable(torch.from_numpy(my_mat[:3, :3].astype(
        np.float32))).cuda().view(1, 3, 3)
    my_mat[0:3, 3] = my_t
    my_r_unrefined = quaternion_matrix(my_r)[:3, :3]
    my_t_unrefined = my_t + 0

    if istracked:
        my_r = my_r_last
        my_t = my_t_last
        my_r_unrefined = quaternion_matrix(my_r)[:3, :3]
        my_t_unrefined = my_t + 0
        emb = last_emb

    ICP = False
    if ICP:
    def pose(self):

        # get mask and segmentation
        mask, bbox, viz = self.draw_seg(self.batch_predict())
        pred = mask
        pred = pred * 255
        pred = np.transpose(pred, (1, 2, 0))  # (CxHxW)->(HxWxC)

        # convert img into tensor
        rgb_original = self.rgb
        norm = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225])
        self.rgb = Variable(norm(torch.from_numpy(self.rgb.astype(
            np.float32)))).cuda()

        all_masks = []
        mask_depth = ma.getmaskarray(ma.masked_not_equal(self.depth, 0))
        mask_label = ma.getmaskarray(ma.masked_equal(pred, np.array(255)))

        for b in range(len(bbox)):

            mask = mask_depth * mask_label[:, :, b]
            rmin = int(bbox[b, 0])
            rmax = int(bbox[b, 1])
            cmin = int(bbox[b, 2])
            cmax = int(bbox[b, 3])

            img = np.transpose(rgb_original, (0, 1, 2))  #CxHxW
            img_masked = img[:, rmin:rmax, cmin:cmax]
            choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]

            if len(choose) == 0:
                cc = torch.LongTensor([0])
                return (cc, cc, cc, cc, cc, cc)

            if len(choose) > num_points:
                c_mask = np.zeros(len(choose), dtype=int)
                c_mask[:num_points] = 1
                np.random.shuffle(c_mask)
                choose = choose[c_mask.nonzero()]
            else:
                choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')

            # visualize each masks
            # plt.imshow(mask), plt.show()

            depth_masked = self.depth[
                rmin:rmax,
                cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
            xmap_masked = self.xmap[
                rmin:rmax,
                cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
            ymap_masked = self.ymap[
                rmin:rmax,
                cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
            choose = np.array([choose])

            cam_scale = 1.0
            pt2 = depth_masked / cam_scale
            pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx
            pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy
            cloud = np.concatenate((pt0, pt1, pt2), axis=1)
            cloud = cloud / 1000

            points = torch.from_numpy(cloud.astype(np.float32))
            choose = torch.LongTensor(choose.astype(np.int32))

            img_ = norm(torch.from_numpy(img_masked.astype(np.float32)))
            idx = torch.LongTensor([self.object_index])
            img_ = Variable(img_).cuda().unsqueeze(0)
            points = Variable(points).cuda().unsqueeze(0)
            choose = Variable(choose).cuda().unsqueeze(0)
            idx = Variable(idx).cuda().unsqueeze(0)

            pred_r, pred_t, pred_c, emb = self.estimator(
                img_, points, choose, idx)
            pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
            pred_c = pred_c.view(bs, num_points)
            how_max, which_max = torch.max(pred_c, 0)  #1
            pred_t = pred_t.view(bs * num_points, 1, 3)

            # print("max confidence", how_max)

            my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
            my_t = (points.view(bs * num_points, 1, 3) +
                    pred_t)[which_max[0]].view(-1).cpu().data.numpy()
            my_pred = np.append(my_r, my_t)

            for ite in range(0, iteration):

                T = Variable(torch.from_numpy(
                    my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                        num_points, 1).contiguous().view(1, num_points, 3)
                my_mat = quaternion_matrix(my_r)
                R = Variable(
                    torch.from_numpy(my_mat[:3, :3].astype(
                        np.float32))).cuda().view(1, 3, 3)
                my_mat[0:3, 3] = my_t

                new_points = torch.bmm((points - T), R).contiguous()
                pred_r, pred_t = self.refiner(new_points, emb, idx)
                pred_r = pred_r.view(1, 1, -1)
                pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
                my_r_2 = pred_r.view(-1).cpu().data.numpy()
                my_t_2 = pred_t.view(-1).cpu().data.numpy()
                my_mat_2 = quaternion_matrix(my_r_2)
                my_mat_2[0:3, 3] = my_t_2

                my_mat_final = np.dot(
                    my_mat,
                    my_mat_2)  # refine pose means two matrix multiplication
                my_r_final = copy.deepcopy(my_mat_final)
                my_r_final[0:3, 3] = 0
                my_r_final = quaternion_from_matrix(my_r_final, True)
                my_t_final = np.array([
                    my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]
                ])

                my_pred = np.append(my_r_final, my_t_final)
                my_r = my_r_final
                my_t = my_t_final

            # POSITION # ndds has cm units
            my_t = np.array(my_t)
            # my_t = np.array([my_t[0], my_t[1], 1-my_t[2]])
            # print('estimated translation is:{0}'.format(my_t))

            # ROTATION
            my_r = quaternion_matrix(my_r)[:3, :3]
            # my_r = np.dot(my_r, np.array([[1, 0, 0], [0, 0, -1], [0, -1, 0]]))
            # print('estimated rotation is\n:{0}'.format(my_r))

            # Draw estimated pose 3Dbox
            target = np.dot(self.scaled, my_r.T)  #my_r.T
            target = np.add(target, my_t)
            self.draw_cube(target, viz)

            # Norm pose
            NormPos = np.linalg.norm((my_t), ord=1)
            print("Pos:{0}".format(my_t))

        plt.figure(figsize=(10, 10)), plt.imshow(viz), plt.show()

        return viz
def pose_predict(img, depth, rois):
    class_list = [
        '002_master_chef_can', '003_cracker_box', '004_sugar_box',
        '005_tomato_soup_can', '006_mustard_bottle', '007_tuna_fish_can',
        '008_pudding_box', '009_gelatin_box', '010_potted_meat_can',
        '011_banana', '019_pitcher_base', '025_mug', '021_bleach_cleanser',
        '024_bowl', '035_power_drill', '036_wood_block', '037_scissors',
        '040_large_marker', '051_large_clamp', '052_extra_large_clamp',
        '061_foam_brick'
    ]
    try:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        object_number = len(rois)
        bridge = CvBridge()

        #lst = posecnn_rois[:,0:1].flatten()
        #lst = np.unique(label)
        my_result_wo_refine = []
        my_result = []
        for idx in range(object_number):
            #itemid = lst[idx]
            #itemid = class_list.index(rois[idx].Class) +1
            itemid = class_list.index(rois[idx].Class) + 3

            try:
                label = seg_predict(img)
                cv2.imwrite(
                    '/root/catkin_ws/src/dnsefusion/scripts/experiments/scripts/segmentation_image.png',
                    label)
                rmin, rmax, cmin, cmax = get_bbox(rois, idx)
                # bounding box cutting
                #label = seg_predict(img[rmin:rmax,cmin:cmax,:])
                #mask_depth = ma.getmaskarray(ma.masked_not_equal(depth[rmin:rmax, cmin:cmax], 0))
                #mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
                #mask = mask_label * mask_depth
                # only image
                mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
                mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
                mask = mask_label * mask_depth
                #rmin, rmax, cmin, cmax = get_bbox(mask_label)

                choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
                print(choose)
                if len(choose) > num_points:
                    c_mask = np.zeros(len(choose), dtype=int)
                    c_mask[:num_points] = 1
                    np.random.shuffle(c_mask)
                    choose = choose[c_mask.nonzero()]
                else:
                    choose = np.pad(choose, (0, num_points - len(choose)),
                                    'wrap')

                depth_masked = depth[
                    rmin:rmax,
                    cmin:cmax].flatten()[choose][:,
                                                 np.newaxis].astype(np.float32)
                xmap_masked = xmap[
                    rmin:rmax,
                    cmin:cmax].flatten()[choose][:,
                                                 np.newaxis].astype(np.float32)
                ymap_masked = ymap[
                    rmin:rmax,
                    cmin:cmax].flatten()[choose][:,
                                                 np.newaxis].astype(np.float32)
                choose = np.array([choose])

                pt2 = depth_masked / cam_scale
                pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
                pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
                cloud = np.concatenate((pt0, pt1, pt2), axis=1)
                img_masked = np.array(img)[:, :, :3]
                img_masked = np.transpose(img_masked, (2, 0, 1))
                img_masked = img_masked[:, rmin:rmax, cmin:cmax]

                cloud = torch.from_numpy(cloud.astype(np.float32))
                choose = torch.LongTensor(choose.astype(np.int32))
                img_masked = norm(
                    torch.from_numpy(img_masked.astype(np.float32)))
                index = torch.LongTensor([itemid - 1])

                cloud = Variable(cloud).cuda()
                choose = Variable(choose).cuda()
                img_masked = Variable(img_masked).cuda()
                index = Variable(index).cuda()
                cloud = cloud.view(1, num_points, 3)
                img_masked = img_masked.view(1, 3,
                                             img_masked.size()[1],
                                             img_masked.size()[2])
                pred_r, pred_t, pred_c, emb = estimator(
                    img_masked, cloud, choose, index)
                pred_r = pred_r / torch.norm(pred_r, dim=2).view(
                    1, num_points, 1)
                pred_c = pred_c.view(bs, num_points)
                how_max, which_max = torch.max(pred_c, 1)
                pred_t = pred_t.view(bs * num_points, 1, 3)
                points = cloud.view(bs * num_points, 1, 3)
                my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
                my_t = (points +
                        pred_t)[which_max[0]].view(-1).cpu().data.numpy()
                my_pred = np.append(my_r, my_t)
                # making pose matrix
                dof = quaternion_matrix(my_r)
                dof[0:3, 3] = my_t
                rot_to_angle = rotationMatrixToEulerAngles(dof[:3, :3])
                rot_to_angle = rot_to_angle.reshape(1, 3)
                my_t = my_t.reshape(1, 3)
                rot_t = np.concatenate([rot_to_angle, my_t], axis=0)
                object_poses = {
                    'tx': my_t[0][0],
                    'ty': my_t[0][1],
                    'tz': my_t[0][2],
                    'qx': my_r[0],
                    'qy': my_r[1],
                    'qz': my_r[2],
                    'qw': my_r[3]
                }
                my_result.append(object_poses)
                open_cv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
                cam_mat = cv2.UMat(
                    np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy],
                               [0, 0, 1]]))
                imgpts, jac = cv2.projectPoints(cld[14], dof[0:3,
                                                             0:3], dof[0:3, 3],
                                                cam_mat, dist)  # 14 mugcup
                open_cv_image = draw(open_cv_image, imgpts.get(), itemid)
                my_result_wo_refine.append(my_pred.tolist())
            except ZeroDivisionError:
                # my_result_wo_refine.append([0.0 for i in range(7)])
                # my_result.append([0.0 for i in range(7)])
                open_cv_image = None
                print('Fail')
    except CvBridgeError as e:
        print(e)

    return my_result, open_cv_image
Example #17
0
    def callback(self, rgb, depth):
        if DEBUG:
            print('received depth image of type: ' + depth.encoding)
            print('received rgb image of type: ' + rgb.encoding)
        #https://answers.ros.org/question/64318/how-do-i-convert-an-ros-image-into-a-numpy-array/
        depth = np.frombuffer(depth.data,
                              dtype=np.uint16).reshape(depth.height,
                                                       depth.width, -1)
        rgb = np.frombuffer(rgb.data,
                            dtype=np.uint8).reshape(rgb.height, rgb.width, -1)
        rgb_original = rgb
        #cv2.imshow('depth', depth)

        #time1 = time.time()
        rgb = np.transpose(rgb, (2, 0, 1))
        rgb = norm(torch.from_numpy(rgb.astype(np.float32)))
        rgb = Variable(rgb).cuda()
        semantic = self.model(rgb.unsqueeze(0))
        _, pred = torch.max(semantic, dim=1)
        pred = pred * 255
        pred = np.transpose(pred, (1, 2, 0))  # (CxHxW)->(HxWxC)
        #print(pred.shape)

        #ret, threshold = cv2.threshold(pred.cpu().numpy(), 1, 255, cv2.THRESH_BINARY)    #pred is already binary, therefore, this line is unnecessary
        contours, hierarchy = cv2.findContours(np.uint8(pred),
                                               cv2.RETR_EXTERNAL,
                                               cv2.CHAIN_APPROX_SIMPLE)
        cnt = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(cnt)
        rmin, rmax, cmin, cmax = get_bbox([x, y, w, h])
        #cv2.rectangle(rgb_original,(cmin,rmin), (cmax,rmax) , (0,255,0),2)
        #cv2.imwrite('depth.png', depth)          #save depth image

        mask_depth = ma.getmasksarray(ma.masked_not_equal(depth, 0))
        mask_label = ma.getmaskarray(ma.masked_equal(pred, np.array(255)))
        mask = mask_depth * mask_label

        #print(rgb.shape)             #torch.Size([3, 480, 640])
        #print(rgb_original.shape)    #(480, 640, 3)
        img = np.transpose(rgb_original, (2, 0, 1))
        img_masked = img[:, rmin:rmax, cmin:cmax]

        choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]

        #print("length of choose is :{0}".format(len(choose)))
        if len(choose) == 0:
            cc = torch.LongTensor([0])
            return (cc, cc, cc, cc, cc, cc)

        if len(choose) > num_points:
            c_mask = np.zeros(len(choose), dtype=int)
            c_mask[:
                   num_points] = 1  # if number of object pixels are bigger than 500, we select just 500
            np.random.shuffle(c_mask)
            choose = choose[c_mask.nonzero()]  # now len(choose) = 500
        else:
            choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')

        depth_masked = depth[rmin:rmax,
                             cmin:cmax].flatten()[choose][:,
                                                          np.newaxis].astype(
                                                              np.float32)
        xmap_masked = self.xmap[
            rmin:rmax,
            cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        ymap_masked = self.ymap[
            rmin:rmax,
            cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)

        choose = np.array([choose])

        pt2 = depth_masked
        #print(pt2)
        pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx
        pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy
        cloud = np.concatenate((pt0, pt1, pt2), axis=1)
        cloud = cloud / 1000

        points = torch.from_numpy(cloud.astype(np.float32))
        choose = torch.LongTensor(choose.astype(np.int32))
        img = norm(torch.from_numpy(img_masked.astype(np.float32)))
        idx = torch.LongTensor([self.object_index])

        img = Variable(img).cuda().unsqueeze(0)
        points = Variable(points).cuda().unsqueeze(0)
        choose = Variable(choose).cuda().unsqueeze(0)
        idx = Variable(idx).cuda().unsqueeze(0)

        pred_r, pred_t, pred_c, emb = self.estimator(img, points, choose, idx)
        pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
        pred_c = pred_c.view(bs, num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(bs * num_points, 1, 3)

        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points.view(bs * num_points, 1, 3) +
                pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)

        for ite in range(0, iteration):
            T = Variable(torch.from_numpy(
                my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                    num_points, 1).contiguous().view(1, num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(
                np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t

            new_points = torch.bmm((points - T), R).contiguous()
            pred_r, pred_t = self.refiner(new_points, emb, idx)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)
            my_mat_2[0:3, 3] = my_t_2

            my_mat_final = np.dot(
                my_mat,
                my_mat_2)  # refine pose means two matrix multiplication
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
            my_r_final = quaternion_from_matrix(my_r_final, True)
            my_t_final = np.array(
                [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

            my_pred = np.append(my_r_final, my_t_final)
            my_r = my_r_final
            my_t = my_t_final

        my_r = quaternion_matrix(my_r)[:3, :3]
        #print(my_t.shape)
        my_t = np.array(my_t)
        #print(my_t.shape)
        #print(my_r.shape)

        target = np.dot(self.scaled, my_r.T)
        target = np.add(target, my_t)

        p0 = (int((target[0][0] / target[0][2]) * self.cam_fx + self.cam_cx),
              int((target[0][1] / target[0][2]) * self.cam_fy + self.cam_cy))
        p1 = (int((target[1][0] / target[1][2]) * self.cam_fx + self.cam_cx),
              int((target[1][1] / target[1][2]) * self.cam_fy + self.cam_cy))
        p2 = (int((target[2][0] / target[2][2]) * self.cam_fx + self.cam_cx),
              int((target[2][1] / target[2][2]) * self.cam_fy + self.cam_cy))
        p3 = (int((target[3][0] / target[3][2]) * self.cam_fx + self.cam_cx),
              int((target[3][1] / target[3][2]) * self.cam_fy + self.cam_cy))
        p4 = (int((target[4][0] / target[4][2]) * self.cam_fx + self.cam_cx),
              int((target[4][1] / target[4][2]) * self.cam_fy + self.cam_cy))
        p5 = (int((target[5][0] / target[5][2]) * self.cam_fx + self.cam_cx),
              int((target[5][1] / target[5][2]) * self.cam_fy + self.cam_cy))
        p6 = (int((target[6][0] / target[6][2]) * self.cam_fx + self.cam_cx),
              int((target[6][1] / target[6][2]) * self.cam_fy + self.cam_cy))
        p7 = (int((target[7][0] / target[7][2]) * self.cam_fx + self.cam_cx),
              int((target[7][1] / target[7][2]) * self.cam_fy + self.cam_cy))

        cv2.line(rgb_original, p0, p1, (255, 255, 255), 2)
        cv2.line(rgb_original, p0, p3, (255, 255, 255), 2)
        cv2.line(rgb_original, p0, p4, (255, 255, 255), 2)
        cv2.line(rgb_original, p1, p2, (255, 255, 255), 2)
        cv2.line(rgb_original, p1, p5, (255, 255, 255), 2)
        cv2.line(rgb_original, p2, p3, (255, 255, 255), 2)
        cv2.line(rgb_original, p2, p6, (255, 255, 255), 2)
        cv2.line(rgb_original, p3, p7, (255, 255, 255), 2)
        cv2.line(rgb_original, p4, p5, (255, 255, 255), 2)
        cv2.line(rgb_original, p4, p7, (255, 255, 255), 2)
        cv2.line(rgb_original, p5, p6, (255, 255, 255), 2)
        cv2.line(rgb_original, p6, p7, (255, 255, 255), 2)

        #print('estimated rotation is :{0}'.format(my_r))
        #print('estimated translation is :{0}'.format(my_t))

        #time2 = time.time()
        #print('inference time is :{0}'.format(time2-time1))
        cv2.imshow('rgb',
                   cv2.cvtColor(rgb_original,
                                cv2.COLOR_BGR2RGB))  # OpenCV uses BGR model
        cv2.waitKey(
            1
        )  # pass any integr except 0, as 0 will freeze the display windows
Example #18
0
    def estimation_callback(self, rgb,depth,rois):
        try:
            img = self.bridge.imgmsg_to_cv2(rgb,'bgr8')
            depth = self.bridge.imgmsg_to_cv2(depth,'32SC1')
            rois = rois.bounding_boxes
            print(img, depth,posecnn_rois)
            class_list = ['002_master_chef_can',
                '003_cracker_box',
                '004_sugar_box',
                    '005_tomato_soup_can',
                    '006_mustard_bottle',
                    '007_tuna_fish_can',
                    '008_pudding_box',
                    '009_gelatin_box',
                    '010_potted_meat_can',
                    '011_banana',#'019_pitcher_base',
                    '025_mug',
                    '021_bleach_cleanser',
                    '024_bowl',
                    '035_power_drill',
                    '036_wood_block',
                    '037_scissors',
                    '040_large_marker',
                    '051_large_clamp',
                    '052_extra_large_clamp',
                    '061_foam_brick']
            object_number = len(rois)
            #lst = posecnn_rois[:,0:1].flatten()
            #lst = np.unique(label)
            my_result_wo_refine = []
            my_result = []
            for idx in range(object_number):
                #itemid = lst[idx]
                itemid = class_list.index(rois[idx].Class) +1
                print(itemid, rois[idx])
            
                try:
                    label = seg_predict(img) 
                    rmin, rmax, cmin,cmax = get_bbox(rois,idx)
                    # bounding box cutting
                    #label = seg_predict(img[rmin:rmax,cmin:cmax,:]) 
                    #mask_depth = ma.getmaskarray(ma.masked_not_equal(depth[rmin:rmax, cmin:cmax], 0))
                    #mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
                    #mask = mask_label * mask_depth
                    # only image
                    mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
                    mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
                    mask = mask_label * mask_depth
                    #rmin, rmax, cmin, cmax = get_bbox(mask_label)


                    choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
                    if len(choose) > num_points:
                        c_mask = np.zeros(len(choose), dtype=int)
                        c_mask[:num_points] = 1
                        np.random.shuffle(c_mask)
                        choose = choose[c_mask.nonzero()]
                    else:
                        choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')

                    depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
                    xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
                    ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
                    choose = np.array([choose])

                    pt2 = depth_masked / cam_scale
                    pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
                    pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
                    cloud = np.concatenate((pt0, pt1, pt2), axis=1)

                    img_masked = np.array(img)[:, :, :3]
                    img_masked = np.transpose(img_masked, (2, 0, 1))
                    img_masked = img_masked[:, rmin:rmax, cmin:cmax]
    
                    cloud = torch.from_numpy(cloud.astype(np.float32))
                    choose = torch.LongTensor(choose.astype(np.int32))
                    img_masked = norm(torch.from_numpy(img_masked.astype(np.float32)))
                    index = torch.LongTensor([itemid - 1])

                    cloud = Variable(cloud).cuda()
                    choose = Variable(choose).cuda()
                    img_masked = Variable(img_masked).cuda()
                    index = Variable(index).cuda()
                    cloud = cloud.view(1, num_points, 3)
                    img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2])
                    pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index)
                    pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
                    pred_c = pred_c.view(bs, num_points)
                    how_max, which_max = torch.max(pred_c, 1)
                    pred_t = pred_t.view(bs * num_points, 1, 3)
                    points = cloud.view(bs * num_points, 1, 3)
                    my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
                    my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
                    my_pred = np.append(my_r, my_t)
                    # making pose matrix
                    dof = quaternion_matrix(my_r)
                    dof[0:3,3] = my_t
                    rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3])
                    rot_to_angle = rot_to_angle.reshape(1,3)
                    my_t = my_t.reshape(1,3)
                    rot_t = np.concatenate([rot_to_angle,my_t], axis= 0)
                    object_poses = {
                        'tx':my_t[0][0],
                        'ty':my_t[0][1],
                        'tz':my_t[0][2],
                        'qx':my_r[0],
                        'qy':my_r[1],
                        'qz':my_r[2],
                        'qw':my_r[3]}
                    my_result.append(object_poses)
                    open_cv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
                    cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy],
                                [0, 0, 1]]))
                    imgpts, jac = cv2.projectPoints(cld[13], dof[0:3,0:3],dof[0:3,3],cam_mat,dist)
                    open_cv_image = draw(open_cv_image,imgpts.get(), itemid)
                    my_result_wo_refine.append(my_pred.tolist())
                    pose_array = PoseArray()
                    pose_msg = Pose()
                    pose_pub.publish(pose_array)
                    pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8'))

                    """
                    for ite in range(0, iteration):
                        T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
                        my_mat = quaternion_matrix(my_r)
                        R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
                        my_mat[0:3, 3] = my_t

                        new_cloud = torch.bmm((cloud - T), R).contiguous()
                        pred_r, pred_t = refiner(new_cloud, emb, index)
                        pred_r = pred_r.view(1, 1, -1)
                        pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
                        my_r_2 = pred_r.view(-1).cpu().data.numpy()
                        my_t_2 = pred_t.view(-1).cpu().data.numpy()
                        my_mat_2 = quaternion_matrix(my_r_2)


                        my_mat_2[0:3, 3] = my_t_2
                        my_mat_final = np.dot(my_mat, my_mat_2)
                        my_r_final = copy.deepcopy(my_mat_final)
                        my_r_final[0:3, 3] = 0
                        my_r_final = quaternion_from_matrix(my_r_final, True)
    
                        my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

                        my_pred = np.append(my_r_final, my_t_final)
                        my_r = my_r_final
                        my_t = my_t_final
                    """
                    # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation)
                    #my_result.append(my_pred.tolist())
                except ZeroDivisionError:
                    # my_result_wo_refine.append([0.0 for i in range(7)])
                    # my_result.append([0.0 for i in range(7)])
                    print('Fail')
        except CvBridgeError as e:
            print(e)
Example #19
0
    def __getitem__(self, index):
        img = Image.open('{0}/{1}.png'.format(self.root, self.list[index]))
        depth = np.array(Image.open('{0}/{1}_depth.png'.format(self.root, self.list[index])))
        #label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, self.list[index])))
        with open('{0}/{1}.yaml'.format(self.root, self.list[index]), 'r') as s:
            meta = yaml.safe_load(s)

        #mask_back = ma.getmaskarray(ma.masked_equal(label, 0))

        self.add_noise=False
        add_front = False
        if self.add_noise:
            for k in range(5):
                seed = random.choice(self.syn)
                front = np.array(self.trancolor(Image.open('{0}/{1}.png'.format(self.root, seed)).convert("RGB")))
                front = np.transpose(front, (2, 0, 1))
                f_label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, seed)))
                front_label = np.unique(f_label).tolist()[1:]
                if len(front_label) < self.front_num:
                   continue
                front_label = random.sample(front_label, self.front_num)
                for f_i in front_label:
                    mk = ma.getmaskarray(ma.masked_not_equal(f_label, f_i))
                    if f_i == front_label[0]:
                        mask_front = mk
                    else:
                        mask_front = mask_front * mk
                t_label = label * mask_front
                if len(t_label.nonzero()[0]) > 1000:
                    label = t_label
                    add_front = True
                    break
        '''
        obj=[k for k in meta]
        n=[int(meta[k]['num_pixels']) for k in meta]
        labels,nb=np.unique(label,return_counts=True)
        labels=labels[1:]
        nb=nb[1:]
        l=list(np.where(nb>self.num_pt))
        idx = l[0][np.random.randint(0, len(l[0]))]
        it=obj[n.index(nb[idx])]
        mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
        mask_label = ma.getmaskarray(ma.masked_equal(label, labels[idx]))
        mask = mask_label * mask_depth
        '''
        it='sd'
        mask = ma.getmaskarray(ma.masked_not_equal(depth, 0))


        if self.add_noise:
            img = self.trancolor(img)
        #TODO
        #rmin, rmax, cmin, cmax = get_bbox(mask_label)
        rmin, rmax, cmin, cmax = [250.0,250.0,250.0,250.0]
        img = np.transpose(np.array(img)[:, :, :3], (2, 0, 1))[:, rmin:rmax, cmin:cmax]

        # if self.list[index][:8] == 'data_syn':
        #     seed = random.choice(self.real)
        #     back = np.array(self.trancolor(Image.open('{0}/{1}-color.png'.format(self.root, seed)).convert("RGB")))
        #     back = np.transpose(back, (2, 0, 1))[:, rmin:rmax, cmin:cmax]
        #     img_masked = back * mask_back[rmin:rmax, cmin:cmax] + img
        # else:
        img_masked = img

        if self.add_noise and add_front:
            img_masked = img_masked * mask_front[rmin:rmax, cmin:cmax] + front[:, rmin:rmax, cmin:cmax] * ~(mask_front[rmin:rmax, cmin:cmax])

        # if self.list[index][:8] == 'data_syn':
        #     img_masked = img_masked + np.random.normal(loc=0.0, scale=7.0, size=img_masked.shape)

        # p_img = np.transpose(img_masked, (1, 2, 0))
        # scipy.misc.imsave('temp/{0}_input.png'.format(index), p_img)
        # scipy.misc.imsave('temp/{0}_label.png'.format(index), mask[rmin:rmax, cmin:cmax].astype(np.int32))
        #print([k for k in meta[obj[idx]]])

        target_r = quaternion_matrix(meta[it]['pose'][1])[:3,:3]
        target_t = np.array([meta[it]['pose'][0]])

        add_t = np.array([random.uniform(-self.noise_trans, self.noise_trans) for i in range(3)])

        choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]

        if len(choose) > self.num_pt:
            c_mask = np.zeros(len(choose), dtype=int)
            c_mask[:self.num_pt] = 1
            np.random.shuffle(c_mask)
            choose = choose[c_mask.nonzero()]
        else:
            choose = np.pad(choose, (0, self.num_pt - len(choose)), 'wrap')
        
        depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        choose = np.array([choose])

        #cam_scale = meta['factor_depth'][0][0]
        #TODO
        cam_scale = 1.0
        pt2 = depth_masked / cam_scale
        pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx
        pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy
        cloud = np.concatenate((pt0, pt1, pt2), axis=1) #/ 1000.0
        if self.add_noise:
            cloud = np.add(cloud, add_t)

        # fw = open('temp/{0}_cld.xyz'.format(index), 'w')
        # for it in cloud:
        #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
        # fw.close()

        dellist = [j for j in range(0, len(self.cld[meta[it]['label']+1]))]
        if self.refine:
            dellist = random.sample(dellist, len(self.cld[meta[it]['label']+1]) - self.num_pt_mesh_large)
        else:
            dellist = random.sample(dellist, len(self.cld[meta[it]['label']+1]) - self.num_pt_mesh_small)
        model_points = np.delete(self.cld[meta[it]['label']+1], dellist, axis=0)

        # fw = open('temp/{0}_model_points.xyz'.format(index), 'w')
        # for it in model_points:
        #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
        # fw.close()
        target = np.dot(model_points, target_r.T)
        if self.add_noise:
            target = np.add(target, target_t + add_t) #target_t/1000.0
        else:
            target = np.add(target, target_t) #target_t/1000.0
        
        # fw = open('temp/{0}_tar.xyz'.format(index), 'w')
        # for it in target:
        #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
        # fw.close()
        
        return torch.from_numpy(cloud.astype(np.float32)), \
               torch.LongTensor(choose.astype(np.int32)), \
               self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
               torch.from_numpy(target.astype(np.float32)), \
               torch.from_numpy(model_points.astype(np.float32)), \
               torch.LongTensor([int(meta[it]['label']+1)])
def main():
    # g13: parameter setting -------------------
    batch_id = 1
    
    opt.dataset ='linemod'
    opt.dataset_root = './datasets/linemod/Linemod_preprocessed'
    estimator_path = 'trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth'
    refiner_path = 'trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth'
    opt.resume_posenet = estimator_path
    opt.resume_posenet = refiner_path
    dataset_config_dir = 'datasets/linemod/dataset_config'
    output_result_dir = 'experiments/eval_result/linemod'
    bs = 1 #fixed because of the default setting in torch.utils.data.DataLoader
    opt.iteration = 2 #default is 4 in eval_linemod.py
    t1_idx = 0
    t1_total_eval_num = 3
    
    axis_range = 0.1   # the length of X, Y, and Z axis in 3D
    vimg_dir = 'verify_img'
    if not os.path.exists(vimg_dir):
        os.makedirs(vimg_dir)
    #-------------------------------------------
    
    if opt.dataset == 'ycb':
        opt.num_objects = 21 #number of object classes in the dataset
        opt.num_points = 1000 #number of points on the input pointcloud
        opt.outf = 'trained_models/ycb' #folder to save trained models
        opt.log_dir = 'experiments/logs/ycb' #folder to save logs
        opt.repeat_epoch = 1 #number of repeat times for one epoch training
    elif opt.dataset == 'linemod':
        opt.num_objects = 13
        opt.num_points = 500
        opt.outf = 'trained_models/linemod'
        opt.log_dir = 'experiments/logs/linemod'
        opt.repeat_epoch = 20
    else:
        print('Unknown dataset')
        return
    
    estimator = PoseNet(num_points = opt.num_points, num_obj = opt.num_objects)
    estimator.cuda()
    refiner = PoseRefineNet(num_points = opt.num_points, num_obj = opt.num_objects)
    refiner.cuda()

    if opt.resume_posenet != '':
        estimator.load_state_dict(torch.load(estimator_path))

    if opt.resume_refinenet != '':
        refiner.load_state_dict(torch.load(refiner_path))
        opt.refine_start = True
        opt.decay_start = True
        opt.lr *= opt.lr_rate
        opt.w *= opt.w_rate
        opt.batch_size = int(opt.batch_size / opt.iteration)
        optimizer = optim.Adam(refiner.parameters(), lr=opt.lr)
    else:
        opt.refine_start = False
        opt.decay_start = False
        optimizer = optim.Adam(estimator.parameters(), lr=opt.lr)


    if opt.dataset == 'ycb':
        test_dataset = PoseDataset_ycb('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
    elif opt.dataset == 'linemod':
        test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
    testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers)
    print('complete loading testing loader\n')
    opt.sym_list = test_dataset.get_sym_list()
    opt.num_points_mesh = test_dataset.get_num_points_mesh()

    print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\n\
        length of the testing set: {0}\nnumber of sample points on mesh: {1}\n\
        symmetry object list: {2}'\
        .format( len(test_dataset), opt.num_points_mesh, opt.sym_list))
    
    
    
    #load pytorch model
    estimator.eval()    
    refiner.eval()
    criterion = Loss(opt.num_points_mesh, opt.sym_list)
    criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list)
    fw = open('{0}/t1_eval_result_logs.txt'.format(output_result_dir), 'w')

    #Pose estimation
    for j, data in enumerate(testdataloader, 0):
        # g13: modify this part for evaluation target--------------------
        if j == t1_total_eval_num:
            break
        #----------------------------------------------------------------
        points, choose, img, target, model_points, idx = data
        if len(points.size()) == 2:
            print('No.{0} NOT Pass! Lost detection!'.format(j))
            fw.write('No.{0} NOT Pass! Lost detection!\n'.format(j))
            continue
        points, choose, img, target, model_points, idx = Variable(points).cuda(), \
                                                             Variable(choose).cuda(), \
                                                             Variable(img).cuda(), \
                                                             Variable(target).cuda(), \
                                                             Variable(model_points).cuda(), \
                                                             Variable(idx).cuda()
        pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
        _, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start)

        #if opt.refine_start: #iterative poserefinement
        #    for ite in range(0, opt.iteration):
        #        pred_r, pred_t = refiner(new_points, emb, idx)
        #        dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points)
        
        pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, opt.num_points, 1)
        pred_c = pred_c.view(bs, opt.num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(bs * opt.num_points, 1, 3)
    
        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points.view(bs * opt.num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)
    
        for ite in range(0, opt.iteration):
            T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(opt.num_points, 1).contiguous().view(1, opt.num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t
            
            new_points = torch.bmm((points - T), R).contiguous()
            pred_r, pred_t = refiner(new_points, emb, idx)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)
            my_mat_2[0:3, 3] = my_t_2
    
            my_mat_final = np.dot(my_mat, my_mat_2)
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
            my_r_final = quaternion_from_matrix(my_r_final, True)
            my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])
    
            my_pred = np.append(my_r_final, my_t_final)
            my_r = my_r_final
            my_t = my_t_final

        # g13: start drawing pose on image------------------------------------
        # pick up image
        print("index {0}: {1}".format(j, test_dataset.list_rgb[j]))
        img = Image.open(test_dataset.list_rgb[j])
        
        # pick up center position by bbox
        meta_file = open('{0}/data/{1}/gt.yml'.format(opt.dataset_root, '%02d' % test_dataset.list_obj[j]), 'r')
        meta = {}
        meta = yaml.load(meta_file)
        which_item = test_dataset.list_rank[j]
        bbx = meta[which_item][0]['obj_bb']
        draw = ImageDraw.Draw(img) 
        
        # draw box (ensure this is the right object)
        draw.line((bbx[0],bbx[1], bbx[0], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        draw.line((bbx[0],bbx[1], bbx[0]+bbx[2], bbx[1]), fill=(255,0,0), width=5)
        draw.line((bbx[0],bbx[1]+bbx[3], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        draw.line((bbx[0]+bbx[2],bbx[1], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        
        #get center
        c_x = bbx[0]+int(bbx[2]/2)
        c_y = bbx[1]+int(bbx[3]/2)
        draw.point((c_x,c_y), fill=(255,255,0))
        
        #get the 3D position of center
        cam_intrinsic = np.zeros((3,3))
        cam_intrinsic.itemset(0, test_dataset.cam_fx)
        cam_intrinsic.itemset(4, test_dataset.cam_fy)
        cam_intrinsic.itemset(2, test_dataset.cam_cx)
        cam_intrinsic.itemset(5, test_dataset.cam_cy)
        cam_intrinsic.itemset(8, 1)
        cam_extrinsic = my_mat_final[0:3, :]
        cam2d_3d = np.matmul(cam_intrinsic, cam_extrinsic)
        cen_3d = np.matmul(np.linalg.pinv(cam2d_3d), [[c_x],[c_y],[1]])
        # replace img.show() with plt.imshow(img)
        
        #transpose three 3D axis point into 2D
        x_3d = cen_3d + [[axis_range],[0],[0],[0]]
        y_3d = cen_3d + [[0],[axis_range],[0],[0]]
        z_3d = cen_3d + [[0],[0],[axis_range],[0]]
        x_2d = np.matmul(cam2d_3d, x_3d)
        y_2d = np.matmul(cam2d_3d, y_3d)
        z_2d = np.matmul(cam2d_3d, z_3d)
        
        #draw the axis on 2D
        draw.line((c_x, c_y, x_2d[0], x_2d[1]), fill=(255,255,0), width=5)
        draw.line((c_x, c_y, y_2d[0], y_2d[1]), fill=(0,255,0), width=5)
        draw.line((c_x, c_y, z_2d[0], z_2d[1]), fill=(0,0,255), width=5)

        #g13: show image
        #img.show()
        
        #save file under file 
        img_file_name = '{0}/pred_obj{1}_pic{2}.png'.format(vimg_dir, test_dataset.list_obj[j], which_item)
        img.save( img_file_name, "PNG" )
        img.close()
Example #21
0
def main():
    # g13: parameter setting -------------------
    '''
    posemodel is trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth
    refine model is trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth

    '''
    objlist = [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15]
    knn = KNearestNeighbor(1)
    opt.dataset ='linemod'
    opt.dataset_root = './datasets/linemod/Linemod_preprocessed'
    estimator_path = 'trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth'
    refiner_path = 'trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth'
    opt.model = estimator_path
    opt.refine_model = refiner_path
    dataset_config_dir = 'datasets/linemod/dataset_config'
    output_result_dir = 'experiments/eval_result/linemod'
    opt.refine_start = True
    bs = 1 #fixed because of the default setting in torch.utils.data.DataLoader
    opt.iteration = 2 #default is 4 in eval_linemod.py
    t1_start = True
    t1_idx = 0
    t1_total_eval_num = 3
    t2_start = False
    t2_target_list = [22, 30, 172, 187, 267, 363, 410, 471, 472, 605, 644, 712, 1046, 1116, 1129, 1135, 1263]
    #t2_target_list = [0, 1]
    axis_range = 0.1   # the length of X, Y, and Z axis in 3D
    vimg_dir = 'verify_img'
    diameter = []
    meta_file = open('{0}/models_info.yml'.format(dataset_config_dir), 'r')
    meta_d = yaml.load(meta_file)
    for obj in objlist:
        diameter.append(meta_d[obj]['diameter'] / 1000.0 * 0.1)
    print(diameter)
    if not os.path.exists(vimg_dir):
        os.makedirs(vimg_dir)
    #-------------------------------------------
    
    if opt.dataset == 'ycb':
        opt.num_objects = 21 #number of object classes in the dataset
        opt.num_points = 1000 #number of points on the input pointcloud
        opt.outf = 'trained_models/ycb' #folder to save trained models
        opt.log_dir = 'experiments/logs/ycb' #folder to save logs
        opt.repeat_epoch = 1 #number of repeat times for one epoch training
    elif opt.dataset == 'linemod':
        opt.num_objects = 13
        opt.num_points = 500
        opt.outf = 'trained_models/linemod'
        opt.log_dir = 'experiments/logs/linemod'
        opt.repeat_epoch = 20
    else:
        print('Unknown dataset')
        return
    
    estimator = PoseNet(num_points = opt.num_points, num_obj = opt.num_objects)
    estimator.cuda()
    refiner = PoseRefineNet(num_points = opt.num_points, num_obj = opt.num_objects)
    refiner.cuda()
  
    estimator.load_state_dict(torch.load(estimator_path))    
    refiner.load_state_dict(torch.load(refiner_path))
    opt.refine_start = True
    
    test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
    testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers)
    
    opt.sym_list = test_dataset.get_sym_list()
    opt.num_points_mesh = test_dataset.get_num_points_mesh()

    print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\n\
        length of the testing set: {0}\nnumber of sample points on mesh: {1}\n\
        symmetry object list: {2}'\
        .format( len(test_dataset), opt.num_points_mesh, opt.sym_list))
   
    
    #load pytorch model
    estimator.eval()    
    refiner.eval()
    criterion = Loss(opt.num_points_mesh, opt.sym_list)
    criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list)
    fw = open('{0}/t1_eval_result_logs.txt'.format(output_result_dir), 'w')

    #Pose estimation
    for j, data in enumerate(testdataloader, 0):
        # g13: modify this part for evaluation target--------------------
        if t1_start and j == t1_total_eval_num:
            break
        if t2_start and not (j in t2_target_list):
            continue
        #----------------------------------------------------------------
        points, choose, img, target, model_points, idx = data
        if len(points.size()) == 2:
            print('No.{0} NOT Pass! Lost detection!'.format(j))
            fw.write('No.{0} NOT Pass! Lost detection!\n'.format(j))
            continue
        points, choose, img, target, model_points, idx = Variable(points).cuda(), \
                                                             Variable(choose).cuda(), \
                                                             Variable(img).cuda(), \
                                                             Variable(target).cuda(), \
                                                             Variable(model_points).cuda(), \
                                                             Variable(idx).cuda()
        pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
        _, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start)

        #if opt.refine_start: #iterative poserefinement
        #    for ite in range(0, opt.iteration):
        #        pred_r, pred_t = refiner(new_points, emb, idx)
        #        dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points)
        
        pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, opt.num_points, 1)
        pred_c = pred_c.view(bs, opt.num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(bs * opt.num_points, 1, 3)
    
        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points.view(bs * opt.num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)
    
        for ite in range(0, opt.iteration):
            T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(opt.num_points, 1).contiguous().view(1, opt.num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t
            
            new_points = torch.bmm((points - T), R).contiguous()
            pred_r, pred_t = refiner(new_points, emb, idx)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)
            my_mat_2[0:3, 3] = my_t_2
    
            my_mat_final = np.dot(my_mat, my_mat_2)
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
            my_r_final = quaternion_from_matrix(my_r_final, True)
            my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])
    
            my_pred = np.append(my_r_final, my_t_final)
            my_r = my_r_final
            my_t = my_t_final
            # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation)
        
        #g13: checking the dis value
        success_count = [0 for i in range(opt.num_objects)]
        num_count = [0 for i in range(opt.num_objects)]
        model_points = model_points[0].cpu().detach().numpy()
        my_r = quaternion_matrix(my_r)[:3, :3]
        pred = np.dot(model_points, my_r.T) + my_t
        target = target[0].cpu().detach().numpy()
    
        if idx[0].item() in opt.sym_list:
            pred = torch.from_numpy(pred.astype(np.float32)).cuda().transpose(1, 0).contiguous()
            target = torch.from_numpy(target.astype(np.float32)).cuda().transpose(1, 0).contiguous()
            inds = knn(target.unsqueeze(0), pred.unsqueeze(0))
            target = torch.index_select(target, 1, inds.view(-1) - 1)
            dis = torch.mean(torch.norm((pred.transpose(1, 0) - target.transpose(1, 0)), dim=1), dim=0).item()
        else:
            dis = np.mean(np.linalg.norm(pred - target, axis=1))
    
        if dis < diameter[idx[0].item()]:
            success_count[idx[0].item()] += 1
            print('No.{0} Pass! Distance: {1}'.format(j, dis))
            fw.write('No.{0} Pass! Distance: {1}\n'.format(j, dis))
        else:
            print('No.{0} NOT Pass! Distance: {1}'.format(j, dis))
            fw.write('No.{0} NOT Pass! Distance: {1}\n'.format(j, dis))
        num_count[idx[0].item()] += 1
        
        # g13: start drawing pose on image------------------------------------
        # pick up image
        print('{0}:\nmy_r is {1}\nmy_t is {2}\ndis:{3}'.format(j, my_r, my_t, dis.item()))    
        print("index {0}: {1}".format(j, test_dataset.list_rgb[j]))
        img = Image.open(test_dataset.list_rgb[j])
        
        # pick up center position by bbox
        meta_file = open('{0}/data/{1}/gt.yml'.format(opt.dataset_root, '%02d' % test_dataset.list_obj[j]), 'r')
        meta = {}
        meta = yaml.load(meta_file)
        which_item = test_dataset.list_rank[j]
        which_obj = test_dataset.list_obj[j]
        which_dict = 0
        dict_leng = len(meta[which_item])
        #print('get meta[{0}][{1}][obj_bb]'.format(which_item, which_obj))
        k_idx = 0
        while 1:
            if meta[which_item][k_idx]['obj_id'] == which_obj:
                which_dict = k_idx
                break
            k_idx = k_idx+1
        
        bbx = meta[which_item][which_dict]['obj_bb']
        draw = ImageDraw.Draw(img) 
        
        # draw box (ensure this is the right object)
        draw.line((bbx[0],bbx[1], bbx[0], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        draw.line((bbx[0],bbx[1], bbx[0]+bbx[2], bbx[1]), fill=(255,0,0), width=5)
        draw.line((bbx[0],bbx[1]+bbx[3], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        draw.line((bbx[0]+bbx[2],bbx[1], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        
        #get center
        c_x = bbx[0]+int(bbx[2]/2)
        c_y = bbx[1]+int(bbx[3]/2)
        draw.point((c_x,c_y), fill=(255,255,0))
        print('center:({0},{1})'.format(c_x, c_y))
        
        #get the 3D position of center
        cam_intrinsic = np.zeros((3,3))
        cam_intrinsic.itemset(0, test_dataset.cam_fx)
        cam_intrinsic.itemset(4, test_dataset.cam_fy)
        cam_intrinsic.itemset(2, test_dataset.cam_cx)
        cam_intrinsic.itemset(5, test_dataset.cam_cy)
        cam_intrinsic.itemset(8, 1)
        cam_extrinsic = my_mat_final[0:3, :]
        cam2d_3d = np.matmul(cam_intrinsic, cam_extrinsic)
        cen_3d = np.matmul(np.linalg.pinv(cam2d_3d), [[c_x],[c_y],[1]])
        # replace img.show() with plt.imshow(img)
        
        #transpose three 3D axis point into 2D
        x_3d = cen_3d + [[axis_range],[0],[0],[0]]
        y_3d = cen_3d + [[0],[axis_range],[0],[0]]
        z_3d = cen_3d + [[0],[0],[axis_range],[0]]
        x_2d = np.matmul(cam2d_3d, x_3d)
        y_2d = np.matmul(cam2d_3d, y_3d)
        z_2d = np.matmul(cam2d_3d, z_3d)
        
        #draw the axis on 2D
        draw.line((c_x, c_y, x_2d[0], x_2d[1]), fill=(255,255,0), width=5)
        draw.line((c_x, c_y, y_2d[0], y_2d[1]), fill=(0,255,0), width=5)
        draw.line((c_x, c_y, z_2d[0], z_2d[1]), fill=(0,0,255), width=5)
        
        #g13: draw the estimate pred obj
        for pti in pred:
            pti.transpose()
            pti_2d = np.matmul(cam_intrinsic, pti)
            #print('({0},{1})\n'.format(int(pti_2d[0]),int(pti_2d[1])))
            draw.point([int(pti_2d[0]),int(pti_2d[1])], fill=(255,255,0))
            
        
        #g13: show image
        #img.show()
        
        #save file under file 
        img_file_name = '{0}/batch{1}_pred_obj{2}_pic{3}.png'.format(vimg_dir, j, test_dataset.list_obj[j], which_item)
        img.save( img_file_name, "PNG" )
        img.close()
        
        # plot ground true ----------------------------
        img = Image.open(test_dataset.list_rgb[j])
        draw = ImageDraw.Draw(img) 
        draw.line((bbx[0],bbx[1], bbx[0], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        draw.line((bbx[0],bbx[1], bbx[0]+bbx[2], bbx[1]), fill=(255,0,0), width=5)
        draw.line((bbx[0],bbx[1]+bbx[3], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        draw.line((bbx[0]+bbx[2],bbx[1], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5)        
        target_r = np.resize(np.array(meta[which_item][k_idx]['cam_R_m2c']), (3, 3))                
        target_t = np.array(meta[which_item][k_idx]['cam_t_m2c'])
        target_t = target_t[np.newaxis, :]               
        cam_extrinsic_GT = np.concatenate((target_r, target_t.T), axis=1)
        
        
        #get center 3D
        cam2d_3d_GT = np.matmul(cam_intrinsic, cam_extrinsic_GT)
        cen_3d_GT = np.matmul(np.linalg.pinv(cam2d_3d_GT), [[c_x],[c_y],[1]])
        
        #transpose three 3D axis point into 2D
        x_3d = cen_3d_GT + [[axis_range],[0],[0],[0]]
        y_3d = cen_3d_GT + [[0],[axis_range],[0],[0]]
        z_3d = cen_3d_GT + [[0],[0],[axis_range],[0]]
        
        x_2d = np.matmul(cam2d_3d_GT, x_3d)
        y_2d = np.matmul(cam2d_3d_GT, y_3d)
        z_2d = np.matmul(cam2d_3d_GT, z_3d)

        #draw the axis on 2D
        draw.line((c_x, c_y, x_2d[0], x_2d[1]), fill=(255,255,0), width=5)
        draw.line((c_x, c_y, y_2d[0], y_2d[1]), fill=(0,255,0), width=5)
        draw.line((c_x, c_y, z_2d[0], z_2d[1]), fill=(0,0,255), width=5)
      
       
        print('pred:\n{0}\nGT:\n{1}\n'.format(cam_extrinsic,cam_extrinsic_GT))
        print('pred 3D:{0}\nGT 3D:{1}\n'.format(cen_3d, cen_3d_GT))
        img_file_name = '{0}/batch{1}_pred_obj{2}_pic{3}_gt.png'.format(vimg_dir, j, test_dataset.list_obj[j], which_item)
        img.save( img_file_name, "PNG" )
        img.close()
        meta_file.close()
    print('\nplot_result_img.py completed the task\n')
                how_max, which_max = torch.max(pred_c, 1)

                pred_t = pred_t.view(bs * num_points, 1, 3)
                points = cloud.view(bs * num_points, 1, 3)

                my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
                my_t = (points +
                        pred_t)[which_max[0]].view(-1).cpu().data.numpy()
                my_pred = np.append(my_r, my_t)
                ### print("my_pred w/o refinement: \n", my_pred)

                for ite in range(0, iteration):
                    T = Variable(torch.from_numpy(
                        my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                            num_points, 1).contiguous().view(1, num_points, 3)
                    my_mat = quaternion_matrix(my_r)
                    R = Variable(
                        torch.from_numpy(my_mat[:3, :3].astype(
                            np.float32))).cuda().view(1, 3, 3)
                    my_mat[0:3, 3] = my_t

                    new_cloud = torch.bmm((cloud - T), R).contiguous()
                    pred_r, pred_t = refiner(new_cloud, emb, index)
                    pred_r = pred_r.view(1, 1, -1)
                    pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
                    my_r_2 = pred_r.view(-1).cpu().data.numpy()
                    my_t_2 = pred_t.view(-1).cpu().data.numpy()
                    my_mat_2 = quaternion_matrix(my_r_2)

                    my_mat_2[0:3, 3] = my_t_2
    #     my_mat_2 = quaternion_matrix(my_r_2)
    #     my_mat_2[0:3, 3] = my_t_2

    #     my_mat_final = np.dot(my_mat, my_mat_2)
    #     my_r_final = copy.deepcopy(my_mat_final)
    #     my_r_final[0:3, 3] = 0
    #     my_r_final = quaternion_from_matrix(my_r_final, True)
    #     my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

    #     my_pred = np.append(my_r_final, my_t_final)
    #     my_r = my_r_final
    #     my_t = my_t_final

    # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation)
    model_points = model_points[0].cpu().detach().numpy()
    my_r = quaternion_matrix(my_r)[:3, :3]
    pred = np.dot(model_points, my_r.T) + my_t
    target = target[0].cpu().detach().numpy()

    if idx[0].item() in sym_list:
        pred = torch.from_numpy(pred.astype(np.float32)).cuda().transpose(
            1, 0).contiguous()
        target = torch.from_numpy(target.astype(np.float32)).cuda().transpose(
            1, 0).contiguous()
        inds = knn(target.unsqueeze(0), pred.unsqueeze(0))
        target = torch.index_select(target, 1, inds.view(-1) - 1)
        dis = torch.mean(torch.norm(
            (pred.transpose(1, 0) - target.transpose(1, 0)), dim=1),
                         dim=0).item()
    else:
        dis = np.mean(np.linalg.norm(pred - target, axis=1))
Example #24
0
    def callback(self):

        time1 = time.time()

        rgb_original = self.rgb
        self.rgb = np.transpose(self.rgb, (2, 0, 1))
        norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        self.rgb = norm(torch.from_numpy(self.rgb.astype(np.float32)))
        
        self.rgb = Variable(self.rgb).cuda()
        semantic = self.model(self.rgb.unsqueeze(0))
        _, pred = torch.max(semantic, dim=1)
        pred = pred *255
        if IMGSAVE:
            torchvision.utils.save_image(pred, path + '/seg_result/out/' + 'torchpred.png')

        pred = np.transpose(pred.cpu().numpy(), (1, 2, 0)) # (CxHxW)->(HxWxC)
        if IMGSAVE:
            cv2.imwrite(path + '/seg_result/out/' + 'numpypred.png', pred)
        
        _, contours, _ = cv2.findContours(np.uint8(pred),cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnt = max(contours, key=cv2.contourArea)
        x,y,w,h = cv2.boundingRect(cnt)
        rmin, rmax, cmin, cmax = get_bbox([x,y,w,h ])
        print(get_bbox([x,y,w,h ]))

        if IMGSAVE:
            img_bbox = np.array(rgb_original.copy())
            cv2.rectangle(img_bbox, (cmin, rmin), (cmax, rmax), (255, 0, 0), 2)
            cv2.imwrite(path + '/seg_result/out/' + 'bbox.png', img_bbox)

        mask_depth = ma.getmaskarray(ma.masked_not_equal(self.depth,0))
        mask_label = ma.getmaskarray(ma.masked_equal(pred, np.array(255)))
        # print(mask_depth.shape, mask_label.shape)
        mask = mask_depth * mask_label.reshape(480, 640)

        img = np.transpose(rgb_original, (2, 0, 1))
        img_masked = img[:, rmin:rmax, cmin:cmax]
        choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]

        #print("length of choose is :{0}".format(len(choose))) 
        if len(choose) == 0:
            cc = torch.LongTensor([0])
            return(cc, cc, cc, cc, cc, cc)
        
        if len(choose) > num_points:
            c_mask = np.zeros(len(choose), dtype=int)
            c_mask[:num_points] = 1  # if number of object pixels are bigger than 500, we select just 500
            np.random.shuffle(c_mask)
            choose = choose[c_mask.nonzero()]  # now len(choose) = 500
        else:
            choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')

        depth_masked = self.depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)

        choose = np.array([choose])

        pt2 = depth_masked
        pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx
        pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy
        cloud = np.concatenate((pt0, pt1, pt2), axis=1)
        cloud = cloud /1000

        points = torch.from_numpy(cloud.astype(np.float32))
        choose = torch.LongTensor(choose.astype(np.int32))
        img = norm(torch.from_numpy(img_masked.astype(np.float32)))
        idx = torch.LongTensor([self.object_index])

        img = Variable(img).cuda().unsqueeze(0)
        points = Variable(points).cuda().unsqueeze(0)
        choose = Variable(choose).cuda().unsqueeze(0)
        idx = Variable(idx).cuda().unsqueeze(0)
 
        pred_r, pred_t, pred_c, emb = self.estimator(img, points, choose, idx)
        pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
        pred_c = pred_c.view(bs, num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(bs * num_points, 1, 3)

        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)

        for ite in range(0, iteration):
            T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t
            
            new_points = torch.bmm((points - T), R).contiguous()
            pred_r, pred_t = self.refiner(new_points, emb, idx)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)
            my_mat_2[0:3, 3] = my_t_2

            my_mat_final = np.dot(my_mat, my_mat_2) # refine pose means two matrix multiplication
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
            my_r_final = quaternion_from_matrix(my_r_final, True)
            my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

            my_pred = np.append(my_r_final, my_t_final)
            my_r = my_r_final
            my_t = my_t_final

        my_r = quaternion_matrix(my_r)[:3, :3]
        my_t = np.array(my_t)
        
        print('estimated rotation is\n:{0}'.format(my_r))
        print('estimated translation is\n :{0}'.format(my_t))

        ## custom scaling for 3Dbox
        col = [2,0,1]
        new_col = np.zeros((len(col), len(col)))
        for idx, i in enumerate(col):
            new_col[idx, i] = 1

        self.scaled = np.dot(self.scaled, new_col)
        target = np.dot(self.scaled, my_r.T)
        target = np.add(target, my_t)

        p0 = (int((target[0][0]/ target[0][2])*self.cam_fx + self.cam_cx),  int((target[0][1]/ target[0][2])*self.cam_fy + self.cam_cy))
        p1 = (int((target[1][0]/ target[1][2])*self.cam_fx + self.cam_cx),  int((target[1][1]/ target[1][2])*self.cam_fy + self.cam_cy))
        p2 = (int((target[2][0]/ target[2][2])*self.cam_fx + self.cam_cx),  int((target[2][1]/ target[2][2])*self.cam_fy + self.cam_cy))
        p3 = (int((target[3][0]/ target[3][2])*self.cam_fx + self.cam_cx),  int((target[3][1]/ target[3][2])*self.cam_fy + self.cam_cy))
        p4 = (int((target[4][0]/ target[4][2])*self.cam_fx + self.cam_cx),  int((target[4][1]/ target[4][2])*self.cam_fy + self.cam_cy))
        p5 = (int((target[5][0]/ target[5][2])*self.cam_fx + self.cam_cx),  int((target[5][1]/ target[5][2])*self.cam_fy + self.cam_cy))
        p6 = (int((target[6][0]/ target[6][2])*self.cam_fx + self.cam_cx),  int((target[6][1]/ target[6][2])*self.cam_fy + self.cam_cy))
        p7 = (int((target[7][0]/ target[7][2])*self.cam_fx + self.cam_cx),  int((target[7][1]/ target[7][2])*self.cam_fy + self.cam_cy))
        
        cv2.line(rgb_original, p0,p1,(0,0,255), 2)
        cv2.line(rgb_original, p0,p3,(0,0,255), 2)
        cv2.line(rgb_original, p0,p4,(0,0,255), 2)
        cv2.line(rgb_original, p1,p2,(0,0,255), 2)
        cv2.line(rgb_original, p1,p5,(0,0,255), 2)
        cv2.line(rgb_original, p2,p3,(0,0,255), 2)
        cv2.line(rgb_original, p2,p6,(0,0,255), 2)
        cv2.line(rgb_original, p3,p7,(0,0,255), 2)
        cv2.line(rgb_original, p4,p5,(0,0,255), 2)
        cv2.line(rgb_original, p4,p7,(0,0,255), 2)
        cv2.line(rgb_original, p5,p6,(0,0,255), 2)
        cv2.line(rgb_original, p6,p7,(0,0,255), 2)
        

        """ Do not support live-view like cv.imshow """
        plt.figure(figsize = (10,10))
        plt.imshow(rgb_original, cmap = 'gray', interpolation = 'nearest', aspect='auto')
        plt.xticks([]), plt.yticks([])  # to hide tick values on X and Y axis
        plt.show()
        
        """ need python3.x """
        ## https://stackoverflow.com/questions/14655969/opencv-error-the-function-is-not-implemented
        # cv2.imshow('rgb', cv2.cvtColor(rgb_original, cv2.COLOR_BGR2RGB))  # OpenCV uses BGR model
        # # cv2.waitKey(1)
        # key = cv2.waitKey(1) & 0xFF
        # if  key == 27:
        #     print("stopping streaming...")
        #     break

        time2 = time.time()
        print('inference time is :{0}'.format(time2-time1))
Example #25
0
    def forward(self, img, x, choose, obj, focal_length, principal_point,
                motion, is_train):
        if img.shape[1] == 0 and is_train:
            if self.last_R_total[int(obj)] is None:
                return None, None, None, None
            last_pose = torch.cat([
                torch.cat([
                    self.last_R_total[int(obj)],
                    (self.last_t_total[int(obj)] +
                     self.last_x_total[int(obj)]).reshape(1000, 3, 1)
                ],
                          dim=2),
                torch.as_tensor([[0, 0, 0, 1]], dtype=torch.float32).repeat(
                    1000, 1, 1).cuda()
            ],
                                  dim=1)
            init_pose = motion[0].cuda().repeat(1000, 1, 1)
            transformed_pose = torch.bmm(init_pose, last_pose)
            self.last_R_total[int(obj)] = transformed_pose[:, 0:3, 0:3]
            self.last_t_total[int(obj)] = transformed_pose[:, 0:3, 3].reshape(
                1, 1000, 3) - self.last_x_total[int(obj)]
            out_rx = self.last_R_total[int(obj)]
            out_tx = self.last_t_total[int(obj)]
            out_cx = self.last_c_total[int(obj)]
            out_x = self.last_x_total[int(obj)]
            return out_rx, out_tx, out_cx, out_x

        out_img = self.cnn(img)
        bs, di, _, _ = out_img.size()
        choose_label = choose.repeat(1, di, 1)
        label_img = out_img.view(bs, di, -1)
        x_label = torch.gather(label_img, 2, choose_label).transpose(1, 2)

        x_six = torch.cat([x, x_label], 2)

        pred_r_from_last = torch.as_tensor([1., 0, 0, 0]).view(1, 4, 1).cuda()
        pred_t_from_last = torch.as_tensor([1., 0, 0]).view(1, 3, 1).cuda()

        if self.last_R[int(obj)] is not None:

            init_pose = motion[0].cuda()
            last_R_matrix = quaternion_matrix(self.last_R[int(obj)])[0:3,
                                                                     0:3].T
            last_pose = torch.cat([
                torch.cat([
                    torch.as_tensor(last_R_matrix, dtype=torch.float32),
                    self.last_t[int(obj)].view(3, 1)
                ],
                          dim=1),
                torch.as_tensor([[0, 0, 0, 1]], dtype=torch.float32)
            ],
                                  dim=0)

            x_six, pred_r_from_last, pred_t_from_last = merge_pc(
                x_six, last_pose, init_pose)

        temporal_x = self.temporal_feat(x_six.transpose(1, 2),
                                        pred_r_from_last, pred_t_from_last)

        rx = F.relu(self.conv1_r(temporal_x))
        tx = F.relu(self.conv1_t(temporal_x))
        cx = F.relu(self.conv1_c(temporal_x))

        rx = F.relu(self.conv2_r(rx))
        tx = F.relu(self.conv2_t(tx))
        cx = F.relu(self.conv2_c(cx))

        rx = F.relu(self.conv3_r(rx))
        tx = F.relu(self.conv3_t(tx))
        cx = F.relu(self.conv3_c(cx))

        rx = self.conv4_r(rx).view(bs, self.num_obj, 4, -1)
        tx = self.conv4_t(tx).view(bs, self.num_obj, 3, -1)
        cx = torch.sigmoid(self.conv4_c(cx)).view(bs, self.num_obj, 1, -1)

        b = 0
        out_rx = torch.index_select(rx[b], 0, obj[b])
        out_tx = torch.index_select(tx[b], 0, obj[b])
        out_cx = torch.index_select(cx[b], 0, obj[b])

        out_rx = out_rx.contiguous().transpose(2, 1).contiguous()
        out_cx = out_cx.contiguous().transpose(2, 1).contiguous()
        out_tx = out_tx.contiguous().transpose(2, 1).contiguous()

        pred_r, pred_t = getMaxRt(out_rx, out_cx, out_tx, x_six)
        self.last_R[int(obj)] = pred_r
        self.last_t[int(obj)] = pred_t

        out_rx = self.Q2R(out_rx)
        out_x = x_six[:, :, 0:3]
        # save current state
        if self.last_R_total[int(obj)] is not None:

            last_pose = torch.cat([
                torch.cat([
                    self.last_R_total[int(obj)],
                    (self.last_t_total[int(obj)] +
                     self.last_x_total[int(obj)]).reshape(1000, 3, 1)
                ],
                          dim=2),
                torch.as_tensor([[0, 0, 0, 1]], dtype=torch.float32).repeat(
                    1000, 1, 1).cuda()
            ],
                                  dim=1)
            init_pose = motion[0].cuda().repeat(1000, 1, 1)
            transformed_pose = torch.bmm(init_pose, last_pose)
            self.last_R_total[int(obj)] = transformed_pose[:, 0:3, 0:3]
            self.last_t_total[int(obj)] = transformed_pose[:, 0:3, 3].reshape(
                1, 1000, 3) - self.last_x_total[int(obj)]

            lrt = torch.cat([self.last_R_total[int(obj)], out_rx], dim=0)
            ltt = torch.cat([self.last_t_total[int(obj)], out_tx], dim=1)
            lct = torch.cat([self.last_c_total[int(obj)], out_cx], dim=1)
            lxt = torch.cat([self.last_x_total[int(obj)], x_six[:, :, 0:3]],
                            dim=1)

            out_rx = torch.cat([self.last_R_total[int(obj)], out_rx], dim=0)
            out_tx = torch.cat([self.last_t_total[int(obj)], out_tx], dim=1)
            out_cx = torch.cat([self.last_c_total[int(obj)], out_cx], dim=1)
            out_x = torch.cat([self.last_x_total[int(obj)], x_six[:, :, 0:3]],
                              dim=1)

            mask = np.zeros(lrt.shape[0], dtype=int)
            mask[:1000] = 1
            np.random.shuffle(mask)
            lrt = lrt[mask.nonzero(), :, :][0]
            ltt = ltt[0, mask.nonzero(), :]
            lct = lct[0, mask.nonzero(), :]
            lxt = lxt[0, mask.nonzero(), :]

            self.last_R_total[int(obj)] = lrt
            self.last_t_total[int(obj)] = ltt
            self.last_c_total[int(obj)] = lct
            self.last_x_total[int(obj)] = lxt
        else:
            self.last_R_total[int(obj)] = out_rx
            self.last_t_total[int(obj)] = out_tx
            self.last_c_total[int(obj)] = out_cx
            self.last_x_total[int(obj)] = x_six[:, :, 0:3]

        return out_rx, out_tx, out_cx, out_x
    # this function modifies the parameters you pass in so, avoid
    # getting our data changed out from under us, by forcing copies (a
    # = b, wasn't sufficient, but a = float(b) forced a copy.
    tmp_yaw = float(yaw_rad)
    tmp_pitch = float(pitch_rad)
    tmp_roll = float(roll_rad)    
    ned2body = transformations.quaternion_from_euler(tmp_yaw,
                                                     tmp_pitch,
                                                     tmp_roll,
                                                     'rzyx')
    body2ned = transformations.quaternion_inverse(ned2body)

    #print 'ned2body(q):', ned2body
    ned2cam_q = transformations.quaternion_multiply(ned2body, body2cam)
    ned2cam = np.matrix(transformations.quaternion_matrix(np.array(ned2cam_q))[:3,:3]).T
    #print 'ned2cam:', ned2cam
    R = ned2proj.dot( ned2cam )
    rvec, jac = cv2.Rodrigues(R)
    ned = navpy.lla2ned( lat_deg, lon_deg, filt_alt,
                         ref[0], ref[1], ref[2] )
    if args.correction:
        ned[0] += correction.north_interp(time)
        ned[1] += correction.east_interp(time)
        ned[2] += correction.down_interp(time)
    #print 'ned:', ned
    tvec = -np.matrix(R) * np.matrix(ned).T
    R, jac = cv2.Rodrigues(rvec)
    # is this R the same as the earlier R?
    PROJ = np.concatenate((R, tvec), axis=1)
    #print 'PROJ:', PROJ
Example #27
0
            pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index)
            pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)

            pred_c = pred_c.view(bs, num_points)
            how_max, which_max = torch.max(pred_c, 1)
            pred_t = pred_t.view(bs * num_points, 1, 3)
            points = cloud.view(bs * num_points, 1, 3)

            my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
            my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
            my_pred = np.append(my_r, my_t)
            my_result_wo_refine.append(my_pred.tolist())

            for ite in range(0, iteration):
                T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
                my_mat = quaternion_matrix(my_r)
                R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
                my_mat[0:3, 3] = my_t
                
                new_cloud = torch.bmm((cloud - T), R).contiguous()
                pred_r, pred_t = refiner(new_cloud, emb, index)
                pred_r = pred_r.view(1, 1, -1)
                pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
                my_r_2 = pred_r.view(-1).cpu().data.numpy()
                my_t_2 = pred_t.view(-1).cpu().data.numpy()
                my_mat_2 = quaternion_matrix(my_r_2)

                my_mat_2[0:3, 3] = my_t_2

                my_mat_final = np.dot(my_mat, my_mat_2)
                my_r_final = copy.deepcopy(my_mat_final)
Example #28
0
        # [0.03678917 0.85402822 0.51892423 0.]
        # [0.          0.          0.          1.]]
        # x=np.dot(my_rotation_matrix_from_euler,np.array([[1,0,0,1]]).transpose()).flatten()
        # y=np.dot(my_rotation_matrix_from_euler,np.array([[0,1,0,1]]).transpose()).flatten()
        # z=np.dot(my_rotation_matrix_from_euler,np.array([[0,0,1,1]]).transpose()).flatten()
        # newvs=[]
        # grasp_vector=[math.cos(my_euler_yaw),math.sin(my_euler_yaw),0]
        #
        # mx=my_rotation_matrix_from_euler[0,0:3]
        # my=my_rotation_matrix_from_euler[1,0:3]
        # mz=my_rotation_matrix_from_euler[2,0:3]
        # newvs.append([0,0,0,mx[0],mx[1],mx[2]])
        # newvs.append([0,0,0,my[0],my[1],my[2]])
        # newvs.append([0,0,0,mz[0],mz[1],mz[2]])
        # vector_ploter.addvs(newvs)


if __name__ == "__main__":
    posecnn = mask_cnn_segmentor()
    dfyw = DenseFusion_YW(posecnn=posecnn)

    #Notice: Change this to zed
    test_one_img, test_one_depth = dfyw.posecnn.get_an_test_img_and_depth()

    seg_res = posecnn.get_eval_res_by_name(TF.to_tensor(test_one_img),
                                           "banana")
    pred_r, pred_t = dfyw.DenseFusion(img=test_one_img,
                                      depth=test_one_depth,
                                      posecnn_res=seg_res)
    pred_r_matrix = quaternion_matrix(pred_r)
    print(pred_r, pred_t)
Example #29
0
def main():
    cfg = setup_config()
    pipeline = rs.pipeline()
    realsense_cfg = setup_realsense()
    pipeline.start(realsense_cfg)  # Start streaming
    visualizer = predictor.VisualizationDemo(cfg)

    ref_frame_axies = []
    ref_frame_label = []
    min_distance = 0.9
    label_cnt = 0
    frameth = 0

    my_t_pool = {}
    my_r_pool = {}

    while True:
        frameth += 1
        cur_frame_axies = []
        cur_frame_label = []
        my_t_per_frame = []
        my_r_per_frame = []

        align = rs.align(rs.stream.color)
        frames = pipeline.wait_for_frames()
        aligned_frames = align.process(frames)

        rgb = aligned_frames.get_color_frame()
        rgb = np.asanyarray(rgb.get_data())
        frame = rgb.copy()

        # Do instance segmentation
        start = time.time()
        segmentation, vis = visualizer.run_on_image(frame)
        #print("Time = " + str(time.time()-start))

        cv2.imshow('Mask', vis)
        cv2.waitKey(1)

        # Get segmentation mask
        ori_label = segmentation['instances'].pred_masks.cpu().numpy()
        label = np.sum(ori_label, axis=0).astype(np.uint8)
        label = np.where(label != 0, 255, label)
        label = Image.fromarray(label).convert("L")
        label = np.asarray(label.convert('RGB')).astype(np.uint8)

        bboxes = segmentation['instances'].pred_boxes.tensor.cpu().numpy()
        xyxy_bboxes = bboxes
        bboxes = bbox_convert(bboxes)

        if len(bboxes) > 0:
            #depth_frames = frames.get_depth_frame()
            depth_frames = aligned_frames.get_depth_frame()

            video_profile = depth_frames.profile.as_video_stream_profile()
            intr = video_profile.get_intrinsics()
            depth = np.asanyarray(depth_frames.get_data())
            #centers = segmentation['instances'].pred_boxes.get_centers()
            if len(my_t_pool) > 0:
                last_key = list(my_t_pool.keys())[-1]

            for i in range(0, len(bboxes)):
                bbox_xyxy = np.array(list(xyxy_bboxes[i]))
                bbox = list(bboxes[i])
                print("Bounding Box:" + str(bbox))
                #center = bboxes[i].get_centers()
                #center = centers[i].cpu().numpy()
                num_idx = float('nan')
                max_value = 0

                label_of_object = ori_label[i].astype(np.uint8)
                label_of_object = np.where(label_of_object != 0, 255,
                                           label_of_object)
                label_of_object = Image.fromarray(label_of_object).convert("L")
                label_of_object = np.asarray(
                    label_of_object.convert('RGB')).astype(np.uint8)

                if len(ref_frame_label) > 0:
                    iou_list = []
                    b = bbox_xyxy
                    a = np.array(ref_frame_axies)
                    for k in range(len(ref_frame_axies)):
                        iou = iou_score(a[k], b)
                        iou_list.append(iou)
                    iou_list = np.array(iou_list)
                    max_value = iou_list.max()
                    if (max_value > min_distance):
                        min_idx = np.where(iou_list == max_value)[0][0]
                        num_idx = ref_frame_label[min_idx]

                if (math.isnan(num_idx)):
                    num_idx = label_cnt
                    label_cnt += 1
                cur_frame_label.append(num_idx)
                cur_frame_axies.append(bbox_xyxy)

                print(max_value)
                if (frameth == 1) or (max_value < 0.9) or (
                        i > len(my_t_pool[last_key]) - 1) or (frameth % 20
                                                              == 0):
                    pos_text = (bbox[0], bbox[1])

                    class_id = segmentation['instances'].pred_classes[i].cpu(
                    ).data.numpy()
                    print("Class: " + str(class_id))
                    #idx = class_id
                    if class_id == 0:
                        idx = 0
                    if class_id == 2:
                        idx = 1

                    model_points = model_points_list[idx]

                    mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
                    #mask_label = ma.getmaskarray(ma.masked_equal(label, np.array(255)))
                    mask_label = ma.getmaskarray(
                        ma.masked_equal(label_of_object,
                                        np.array([255, 255, 255])))[:, :, 0]
                    mask = mask_label * mask_depth

                    rmin, rmax, cmin, cmax = posenet_deploy.get_bbox(bbox)

                    # choose
                    choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
                    if len(choose) == 0:
                        choose = torch.LongTensor([0])
                    if len(choose) > num_points:
                        c_mask = np.zeros(len(choose), dtype=int)
                        c_mask[:num_points] = 1
                        np.random.shuffle(c_mask)
                        choose = choose[c_mask.nonzero()]
                    else:
                        choose = np.pad(choose, (0, num_points - len(choose)),
                                        'wrap')

                    depth_masked = depth[
                        rmin:rmax,
                        cmin:cmax].flatten()[choose][:, np.newaxis].astype(
                            np.float32)
                    xmap_masked = xmap[
                        rmin:rmax,
                        cmin:cmax].flatten()[choose][:, np.newaxis].astype(
                            np.float32)
                    ymap_masked = ymap[
                        rmin:rmax,
                        cmin:cmax].flatten()[choose][:, np.newaxis].astype(
                            np.float32)
                    choose = np.array([choose])

                    # point cloud
                    pt2 = depth_masked / cam_scale
                    pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
                    pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
                    cloud = np.concatenate((pt0, pt1, pt2), axis=1)
                    cloud = cloud / 1000.0
                    # print(cloud.shape)

                    # cropped img
                    #img_masked = rgb[:, :, :3]
                    img_masked = rgb[:, :, ::-1]  # bgr to rgb
                    img_masked = np.transpose(img_masked, (2, 0, 1))
                    img_masked = img_masked[:, rmin:rmax, cmin:cmax]

                    my_mask = np.transpose(label_of_object, (2, 0, 1))
                    my_mask = my_mask[:, rmin:rmax, cmin:
                                      cmax]  ## Added by me to crop the mask
                    mask_img = np.transpose(my_mask, (1, 2, 0))
                    img_rgb = np.transpose(img_masked, (1, 2, 0))
                    croped_img_mask = cv2.bitwise_and(img_rgb, mask_img)
                    crop_image_to_check = croped_img_mask.copy()
                    cv2.imshow("mask_crop", croped_img_mask)
                    croped_img_mask = np.transpose(croped_img_mask, (2, 0, 1))

                    # Variables
                    cloud = torch.from_numpy(cloud.astype(
                        np.float32)).unsqueeze(0)
                    choose = torch.LongTensor(choose.astype(
                        np.int32)).unsqueeze(0)
                    #img_masked = torch.from_numpy(img_masked.astype(np.float32)).unsqueeze(0)
                    img_masked = torch.from_numpy(
                        croped_img_mask.astype(np.float32)).unsqueeze(0)
                    index = torch.LongTensor([idx]).unsqueeze(
                        0)  # Specify which object

                    cloud = Variable(cloud).cuda()
                    choose = Variable(choose).cuda()
                    img_masked = Variable(img_masked).cuda()
                    index = Variable(index).cuda()

                    # Deploy
                    with torch.no_grad():
                        pred_r, pred_t, pred_c, emb = estimator(
                            img_masked, cloud, choose, index)

                    pred_r = pred_r / torch.norm(pred_r, dim=2).view(
                        1, num_points, 1)
                    pred_c = pred_c.view(bs, num_points)
                    how_max, which_max = torch.max(pred_c, 1)
                    pred_t = pred_t.view(bs * num_points, 1, 3)
                    points = cloud.view(bs * num_points, 1, 3)

                    my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
                    my_t = (points.view(bs * num_points, 1, 3) +
                            pred_t)[which_max[0]].view(-1).cpu().data.numpy()
                    my_pred = np.append(my_r, my_t)

                    # Refinement
                    for ite in range(0, iteration):
                        T = Variable(torch.from_numpy(my_t.astype(
                            np.float32))).cuda().view(1, 3).repeat(
                                num_points,
                                1).contiguous().view(1, num_points, 3)
                        my_mat = quaternion_matrix(my_r)
                        R = Variable(
                            torch.from_numpy(my_mat[:3, :3].astype(
                                np.float32))).cuda().view(1, 3, 3)
                        my_mat[0:3, 3] = my_t

                        new_cloud = torch.bmm((cloud - T), R).contiguous()
                        pred_r, pred_t = refiner(new_cloud, emb, index)
                        pred_r = pred_r.view(1, 1, -1)
                        pred_r = pred_r / (torch.norm(pred_r, dim=2).view(
                            1, 1, 1))
                        my_r_2 = pred_r.view(-1).cpu().data.numpy()
                        my_t_2 = pred_t.view(-1).cpu().data.numpy()
                        my_mat_2 = quaternion_matrix(my_r_2)

                        my_mat_2[0:3, 3] = my_t_2
                        my_mat_final = np.dot(my_mat, my_mat_2)
                        my_r_final = copy.deepcopy(my_mat_final)
                        my_r_final[0:3, 3] = 0
                        my_r_final = quaternion_from_matrix(my_r_final, True)
                        my_t_final = np.array([
                            my_mat_final[0][3], my_mat_final[1][3],
                            my_mat_final[2][3]
                        ])

                        my_pred = np.append(my_r_final, my_t_final)
                        my_r = my_r_final
                        my_t = my_t_final

                        my_r_matrix = quaternion_matrix(my_r)[:3, :3]
                    #print("Time = " + str(time.time()-start))
                    my_t_per_frame.append(my_t)
                    my_r_per_frame.append(my_r_matrix)

                    #rotation = Rot.from_matrix(my_r_matrix)
                    #angle = rotation.as_euler('xyz', degrees=True)

                    my_t = np.around(my_t, 5)
                    #print("translation vector = " + str(my_t))
                    #print("rotation angles = " + str(my_r))

                    frame = posenet_deploy.get_3d_bbox(frame, model_points,
                                                       my_r_matrix, my_t)
                    frame = posenet_deploy.draw_axes(frame, my_r_matrix, my_t)

                    if check_inverted(crop_image_to_check):
                        cv2.putText(frame,
                                    str(num_idx) + "_inverted", pos_text,
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),
                                    2, cv2.LINE_AA)
                    else:
                        cv2.putText(frame, str(num_idx), pos_text,
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),
                                    2, cv2.LINE_AA)

                    #cv2.putText(frame, str(num_idx), pos_text, cv2.FONT_HERSHEY_SIMPLEX,
                    #            0.5, (0,255,0), 2, cv2.LINE_AA)

                    posenet_deploy.putText(frame, i, num_idx, class_id, my_t)
                    #cv2.imshow('Result', rgb)
                    #cv2.waitKey(1)

                else:
                    rmin, rmax, cmin, cmax = posenet_deploy.get_bbox(bbox)
                    img_masked = rgb[:, :, ::-1]  # bgr to rgb
                    img_masked = np.transpose(img_masked, (2, 0, 1))
                    img_masked = img_masked[:, rmin:rmax, cmin:cmax]

                    my_mask = np.transpose(label_of_object, (2, 0, 1))
                    my_mask = my_mask[:, rmin:rmax, cmin:
                                      cmax]  ## Added by me to crop the mask
                    mask_img = np.transpose(my_mask, (1, 2, 0))
                    img_rgb = np.transpose(img_masked, (1, 2, 0))
                    croped_img_mask = cv2.bitwise_and(img_rgb, mask_img)
                    crop_image_to_check = croped_img_mask.copy()

                    pos_text = (bbox[0], bbox[1])
                    last_key = list(my_t_pool.keys())[-1]

                    print("POOL: " + str(my_t_pool[last_key]))
                    class_id = segmentation['instances'].pred_classes[i].cpu(
                    ).data.numpy()

                    my_t = my_t_pool[last_key][min_idx]
                    my_r_matrix = my_r_pool[last_key][min_idx]

                    my_t_per_frame.append(my_t)
                    my_r_per_frame.append(my_r_matrix)

                    frame = posenet_deploy.get_3d_bbox(frame, model_points,
                                                       my_r_matrix, my_t)
                    frame = posenet_deploy.draw_axes(frame, my_r_matrix, my_t)

                    if check_inverted(crop_image_to_check):
                        cv2.putText(frame,
                                    str(num_idx) + "_inverted", pos_text,
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),
                                    2, cv2.LINE_AA)
                    else:
                        cv2.putText(frame, str(num_idx), pos_text,
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),
                                    2, cv2.LINE_AA)

                    #cv2.putText(frame, str(num_idx), pos_text, cv2.FONT_HERSHEY_SIMPLEX,
                    #            0.5, (0,255,0), 2, cv2.LINE_AA)

                    posenet_deploy.putText(frame, i, num_idx, class_id, my_t)

            if len(my_t_per_frame) > 0:
                my_t_pool[frameth] = my_t_per_frame
                my_r_pool[frameth] = my_r_per_frame

            ref_frame_label = cur_frame_label
            ref_frame_axies = cur_frame_axies

            end = time.time() - start
            cv2.putText(frame,
                        "Time processing: " + str(round(end, 3)) + " seconds",
                        (100, 700), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255),
                        2, cv2.LINE_AA)
            cv2.imshow('Result', frame)
            cv2.waitKey(1)

        else:
            # Show images
            #video_writer.write(rgb)
            cv2.imshow('Result', rgb)
            cv2.waitKey(1)

    pipeline.stop()
Example #30
0
    def DenseFusion(self, img, depth, posecnn_res):
        my_result_wo_refine = []

        itemid = 1  # this is simplified for single label decttion, if multi-label used, check DFYW3.py for more

        depth = np.array(depth)
        # img = img

        seg_res = posecnn_res

        x1, y1, x2, y2 = seg_res["box"]
        banana_bbox_draw = self.posecnn.get_box_rcwh(seg_res["box"])
        rmin, rmax, cmin, cmax = int(x1), int(x2), int(y1), int(y2)
        depth = depth[:, :,
                      1]  # because depth has 3 dimensions RGB but they are the all the same with each other
        mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))  # ok

        label_banana = np.squeeze(seg_res["mask"])
        label_banana = ma.getmaskarray(ma.masked_greater(label_banana, 0.5))
        label_banana_nonzeros = label_banana.flatten().nonzero()

        mask_label = ma.getmaskarray(ma.masked_equal(
            label_banana, itemid))  # label from banana label
        mask = mask_label * mask_depth

        mask_nonzeros = mask[:].flatten().nonzero()
        choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
        if len(choose) > self.num_points:
            c_mask = np.zeros(len(choose), dtype=int)
            c_mask[:self.num_points] = 1
            np.random.shuffle(c_mask)
            choose = choose[c_mask.nonzero()]
        else:
            print("len of choose is 0, check error")
            choose = np.pad(choose, (0, self.num_points - len(choose)), 'wrap')

        depth_masked = depth[rmin:rmax,
                             cmin:cmax].flatten()[choose][:,
                                                          np.newaxis].astype(
                                                              np.float32)
        xmap_masked = self.xmap[
            rmin:rmax,
            cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        ymap_masked = self.ymap[
            rmin:rmax,
            cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        choose = np.array([choose])
        pt2 = depth_masked / self.cam_scale
        pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx
        pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy
        cloud = np.concatenate((pt0, pt1, pt2), axis=1)
        img_np = np.array(img)
        img_masked = np.array(img)[:, :, :3]
        img_masked = np.transpose(img_masked, (2, 0, 1))
        img_masked = img_masked[:, rmin:rmax, cmin:cmax]

        cloud = torch.from_numpy(cloud.astype(np.float32))
        choose = torch.LongTensor(choose.astype(np.int32))
        img_masked = self.norm(torch.from_numpy(img_masked.astype(np.float32)))
        index = torch.LongTensor([itemid - 1])

        cloud = Variable(cloud).cuda()
        choose = Variable(choose).cuda()
        img_masked = Variable(img_masked).cuda()
        index = Variable(index).cuda()

        cloud = cloud.view(1, self.num_points, 3)
        img_masked = img_masked.view(1, 3,
                                     img_masked.size()[1],
                                     img_masked.size()[2])

        pred_r, pred_t, pred_c, emb = self.estimator(img_masked, cloud, choose,
                                                     index)
        pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, self.num_points, 1)

        pred_c = pred_c.view(self.bs, self.num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(self.bs * self.num_points, 1, 3)
        points = cloud.view(self.bs * self.num_points, 1, 3)

        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)
        my_result_wo_refine.append(my_pred.tolist())

        my_result = []
        for ite in range(0, self.iteration):
            T = Variable(torch.from_numpy(
                my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                    self.num_points,
                    1).contiguous().view(1, self.num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(
                np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t

            new_cloud = torch.bmm((cloud - T), R).contiguous()
            pred_r, pred_t = self.refiner(new_cloud, emb, index)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)

            my_mat_2[0:3, 3] = my_t_2
            my_mat_final = np.dot(my_mat, my_mat_2)
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
            my_r_final = quaternion_from_matrix(my_r_final, True)
            my_t_final = np.array(
                [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

            my_pred = np.append(my_r_final, my_t_final)
            my_result.append(my_pred.tolist())
        my_result_np = np.array(my_result)
        my_result_mean = np.mean(my_result, axis=0)
        my_r = my_result_mean[:4]
        my_t = my_result_mean[4:]
        my_r_quaternion = my_r
        return my_r_quaternion, my_t