コード例 #1
0
def sample_rotations_60():
    """ icosahedral_group: 60 rotations

    """
    phi = (1 + math.sqrt(5)) / 2
    R1 = np.array([[-phi/2, 1/(2*phi), -0.5], [-1/(2*phi), 0.5, phi/2], [0.5, phi/2, -1/(2*phi)]])
    R2 = np.array([[phi/2, 1/(2*phi), -0.5], [1/(2*phi), 0.5, phi/2], [0.5, -phi/2, 1/(2*phi)]])
    group = [np.eye(3, dtype=float)]
    n = 0
    while len(group) > n:
        n = len(group)
        set_so_far = group
        for rot in set_so_far:
            for R in [R1, R2]:
                new_R = np.matmul(rot, R)
                new = True
                for item in set_so_far:
                    if np.sum(np.absolute(item - new_R)) < 1e-6:
                        new = False
                        break
                if new:
                    group.append(new_R)
                    break
            if new:
                break
    # return np.array(group)
    group = np.array(group)
    quaternion_group = np.zeros((60, 4))
    for i in range(60):
        quaternion_group[i] = quaternion_from_matrix(group[i])
    return quaternion_group.astype(float)
コード例 #2
0
ファイル: main_test.py プロジェクト: cuhkjpan/PlueckerNet
def evaluate_R_t(R_gt, t_gt, R_est, t_est, q_gt=None):
    t = t_est.flatten()
    t_gt = t_gt.flatten()
    eps = 1e-15

    if q_gt is None:
        q_gt = quaternion_from_matrix(R_gt)
    q = quaternion_from_matrix(R_est)
    q = q / (np.linalg.norm(q) + eps)
    q_gt = q_gt / (np.linalg.norm(q_gt) + eps)
    loss_q = np.maximum(eps, (1.0 - np.sum(q * q_gt)**2))
    err_q = np.arccos(1 - 2 * loss_q)
    # absolute distance error on t
    err_t = np.linalg.norm(t_gt - t)
    if np.sum(np.isnan(err_q)) or np.sum(np.isnan(err_t)):
        # This should never happen! Debug here
        err_q = np.pi
        err_t = np.inf
    return err_q, err_t
コード例 #3
0
def Csv_6D_pose(rgb_img, depth_img):
        iteration = 4
        bs = 1
        # knn = KNearestNeighbor(1)
        points, choose, img = testdataset.getitem_by_array(rgb_img, depth_img)
        if choose.ndim < 3:
                return []
        # print("choose.ndim =", choose.ndim)
        obj_id = torch.LongTensor([0]).unsqueeze(0)

        points, choose, img, obj_id = Variable(points).cuda(),  Variable(choose).cuda(), Variable(img).cuda(), Variable(obj_id).cuda()


        pred_r, pred_t, pred_c, emb = estimator(img, points, choose, obj_id)
        pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
        pred_c = pred_c.view(bs, num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(bs * num_points, 1, 3)

        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)

        for ite in range(0, iteration):
                T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
                my_mat = quaternion_matrix(my_r)
                R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
                my_mat[0:3, 3] = my_t

                new_points = torch.bmm((points - T), R).contiguous()
                pred_r, pred_t = refiner(new_points, emb, obj_id)
                pred_r = pred_r.view(1, 1, -1)
                pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
                my_r_2 = pred_r.view(-1).cpu().data.numpy()
                my_t_2 = pred_t.view(-1).cpu().data.numpy()
                my_mat_2 = quaternion_matrix(my_r_2)
                my_mat_2[0:3, 3] = my_t_2

                my_mat_final = np.dot(my_mat, my_mat_2)
                my_r_final = copy.deepcopy(my_mat_final)
                my_r_final[0:3, 3] = 0
                my_r_final = quaternion_from_matrix(my_r_final, True)
                my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

                my_pred = np.append(my_r_final, my_t_final)
                my_r = my_r_final
                my_t = my_t_final

        print("final prediction: quaternion + translation")
        my_pred = np.asarray(my_pred, dtype='float')
        print(list(my_pred))
        return list(my_pred)
コード例 #4
0
ファイル: evaluate.py プロジェクト: r-pad/DenseFusion
    def refinePose(self,
                   emb,
                   cloud,
                   object_label,
                   init_t,
                   init_r,
                   iterations=2):
        init_t = init_t.cpu().data.numpy()
        init_r = init_r.cpu().data.numpy()

        for ite in range(0, iteration):
            T = Variable(torch.from_numpy(
                init_t.astype(np.float32))).cuda().view(1, 3).repeat(
                    num_points, 1).contiguous().view(1, self.num_points, 3)
            init_mat = quaternion_matrix(init_r)
            R = Variable(torch.from_numpy(init_mat[:3, :3].astype(
                np.float32))).cuda().view(1, 3, 3)
            init_mat[0:3, 3] = init_t

            new_cloud = torch.bmm((cloud - T), R).contiguous()
            pred_r, pred_t = self.refiner(new_cloud, emb, object_label)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))

            delta_r = pred_r.view(-1).cpu().data.numpy()
            delta_t = pred_t.view(-1).cpu().data.numpy()
            delta_mat = quaternion_matrix(delta_r)

            delta_mat[0:3, 3] = delta_t

            refined_mat = np.dot(init_mat, delta_mat)
            refined_r = copy.deepcopy(refined_mat)
            refined_r[0:3, 3] = 0
            refined_r = quaternion_from_matrix(refined_r, True)
            refined_t = np.array(
                [refined_mat[0][3], refined_mat[1][3], refined_mat[2][3]])

            init_r = r_final
            init_t = t_final
        return refined_t, refined_t
コード例 #5
0
    def refine_posenet(self, refine_args):
        iteration = refine_args.iteration
        my_t, my_r = refine_args.t, refine_args.r
        num_points = refine_args.num_points
        cloud = refine_args.cloud
        refiner = refine_args.refiner_network
        emb = refine_args.emb
        index = refine_args.index

        for ite in range(0, iteration):
            T = Variable(torch.from_numpy(
                my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                    num_points, 1).contiguous().view(1, num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(
                np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t

            new_cloud = torch.bmm((cloud - T), R).contiguous()
            time_refiner = time.time()
            pred_r, pred_t = refiner(new_cloud, emb, index)
            print("--- RE %s seconds ---" % (time.time() - time_refiner))
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)

            my_mat_2[0:3, 3] = my_t_2

            my_mat_final = np.dot(my_mat, my_mat_2)
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
            my_r_final = quaternion_from_matrix(my_r_final, True)
            my_t_final = np.array(
                [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

            my_pred = np.append(my_r_final, my_t_final)
            my_r = my_r_final
            my_t = my_t_final
コード例 #6
0
def sample_rotations_24():
    """ octahedral_group: 24 rotations

    """
    group = np.array([[[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                      [[1, 0, 0], [0, -1, 0], [0, 0, -1]],
                      [[-1, 0, 0], [0, 1, 0], [0, 0, -1]],
                      [[-1, 0, 0], [0, -1, 0], [0, 0, 1]],

                      [[1, 0, 0], [0, 0, 1], [0, 1, 0]],
                      [[1, 0, 0], [0, 0, -1], [0, -1, 0]],
                      [[-1, 0, 0], [0, 0, 1], [0, -1, 0]],
                      [[-1, 0, 0], [0, 0, -1], [0, 1, 0]],

                      [[0, 1, 0], [1, 0, 0], [0, 0, 1]],
                      [[0, 1, 0], [-1, 0, 0], [0, 0, -1]],
                      [[0, -1, 0], [1, 0, 0], [0, 0, -1]],
                      [[0, -1, 0], [-1, 0, 0], [0, 0, 1]],

                      [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
                      [[0, 1, 0], [0, 0, -1], [-1, 0, 0]],
                      [[0, -1, 0], [0, 0, 1], [-1, 0, 0]],
                      [[0, -1, 0], [0, 0, -1], [1, 0, 0]],

                      [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
                      [[0, 0, 1], [-1, 0, 0], [0, -1, 0]],
                      [[0, 0, -1], [1, 0, 0], [0, -1, 0]],
                      [[0, 0, -1], [-1, 0, 0], [0, 1, 0]],

                      [[0, 0, 1], [0, 1, 0], [1, 0, 0]],
                      [[0, 0, 1], [0, -1, 0], [-1, 0, 0]],
                      [[0, 0, -1], [0, 1, 0], [-1, 0, 0]],
                      [[0, 0, -1], [0, -1, 0], [1, 0, 0]]])
    # return group.astype(float)
    quaternion_group = np.zeros((24, 4))
    for i in range(24):
        quaternion_group[i] = quaternion_from_matrix(group[i])
    return quaternion_group.astype(float)
コード例 #7
0
def sample_rotations_12():
    """ tetrahedral_group: 12 rotations

    """
    group = np.array([[[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                      [[1, 0, 0], [0, -1, 0], [0, 0, -1]],
                      [[-1, 0, 0], [0, 1, 0], [0, 0, -1]],
                      [[-1, 0, 0], [0, -1, 0], [0, 0, 1]],

                      [[0, 1, 0], [0, 0, 1], [1, 0, 0]],
                      [[0, 1, 0], [0, 0, -1], [-1, 0, 0]],
                      [[0, -1, 0], [0, 0, 1], [-1, 0, 0]],
                      [[0, -1, 0], [0, 0, -1], [1, 0, 0]],

                      [[0, 0, 1], [1, 0, 0], [0, 1, 0]],
                      [[0, 0, 1], [-1, 0, 0], [0, -1, 0]],
                      [[0, 0, -1], [1, 0, 0], [0, -1, 0]],
                      [[0, 0, -1], [-1, 0, 0], [0, 1, 0]]])
    # return group.astype(float)
    quaternion_group = np.zeros((12, 4))
    for i in range(12):
        quaternion_group[i] = quaternion_from_matrix(group[i])
    return quaternion_group.astype(float)
コード例 #8
0
def upload_file():
    global refiner
    if flask.request.method == 'POST':
        file1 = flask.request.files['file1']
        file2 = flask.request.files['file2']
        if file1 and allowedFile(file1.filename) and file2 and allowedFile(
                file2.filename):
            # Gets filenames, paths, and saves them
            fname1 = wz.secure_filename(file1.filename)
            fpath1 = os.path.join(app.config['UPLOAD_FOLDER'], fname1)
            fname2 = wz.secure_filename(file2.filename)
            fpath2 = os.path.join(app.config['UPLOAD_FOLDER'], fname2)
            # print(fname1, fname2)
            file1.save(fpath1)
            file2.save(fpath2)

            # Gets labels, bbox, and masks
            retUrl = upload(FULLDOMAIN, fpath1)
            objDict = downloadZip(retUrl, UPLOAD_FOLDER)

            # DEBUG 1
            # print('objDict: \n', objDict)
            # retUrl = FULLDOMAIN + UPLOAD_FOLDER_REL + 'tmp.zip'
            # return retUrl

            # DEBUG 2
            # retCsv = createCSV(objDict)
            # retStr = str()
            # with open(os.path.join(UPLOAD_FOLDER, 'pose.csv'), 'w') as of:
            # 	for line in retCsv:
            # 		retStr += line + '\n'
            # 		of.write(line + '\n')
            # return retStr

            # Starts shit
            bbList, maskList, scoreList, labelList = getLists(objDict)
            img = Image.open(fpath1)
            depth = np.array(Image.open(fpath2))
            print('depth:\n', depth[:10, :10])
            print('max depth:', depth.max())
            my_result_wo_refine = []
            my_result = []
            itemid = 1

            # Original Network
            # posecnn_meta = scio.loadmat('mycode/samples/input/000000.mat')
            # label = np.array(posecnn_meta['labels'])
            # posecnn_rois = np.array(posecnn_meta['rois'])
            # lst = posecnn_rois[:, 1:2].flatten()
            # for idx in range(len(lst)):
            # 	itemid = lst[idx]
            # 	# try:
            # 	# cmin, rmin, cmax, rmax = int(posecnn_rois[idx][2]), int(posecnn_rois[idx][3]), int(posecnn_rois[idx][4]), int(posecnn_rois[idx][5])
            # 	rmin, rmax, cmin, cmax = get_bbox(posecnn_rois, idx)
            # 	print(cmin, rmin, cmax, rmax)
            # 	mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
            # 	mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
            # 	mask = mask_label * mask_depth

            for bb, mask, score, label in zip(bbList, maskList, scoreList,
                                              labelList):
                # cmin, rmin, cmax, rmax = bb
                # print(cmin, rmin, cmax, rmax)
                rmin, rmax, cmin, cmax = get_bbox(bb, None)
                # print(cmin, rmin, cmax, rmax)
                mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
                mask_label = ma.getmaskarray(ma.masked_equal(mask, 1))
                mask = mask_label * mask_depth

                choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
                # print(mask.shape)
                # print(len(choose))
                # for i in range(rmin, rmax):
                # 	for j in range(cmin, cmax):
                # 		val = mask[i,j]
                # 		print(val, end=' ')
                # 	print()
                # print(mask[rmin:rmax, cmin:cmax])
                if len(choose) >= num_points:
                    c_mask = np.zeros(len(choose), dtype=int)
                    c_mask[:num_points] = 1
                    np.random.shuffle(c_mask)
                    choose = choose[c_mask.nonzero()]
                else:
                    # print(choose)
                    choose = np.pad(choose, (0, num_points - len(choose)),
                                    'wrap')

                depth_masked = depth[
                    rmin:rmax,
                    cmin:cmax].flatten()[choose][:,
                                                 np.newaxis].astype(np.float32)
                xmap_masked = xmap[
                    rmin:rmax,
                    cmin:cmax].flatten()[choose][:,
                                                 np.newaxis].astype(np.float32)
                ymap_masked = ymap[
                    rmin:rmax,
                    cmin:cmax].flatten()[choose][:,
                                                 np.newaxis].astype(np.float32)
                choose = np.array([choose])

                pt2 = depth_masked / cam_scale
                pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
                pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
                cloud = np.concatenate((pt0, pt1, pt2), axis=1)

                img_masked = np.array(img)[:, :, :3]
                img_masked = np.transpose(img_masked, (2, 0, 1))
                img_masked = img_masked[:, rmin:rmax, cmin:cmax]

                cloud = torch.from_numpy(cloud.astype(np.float32))
                choose = torch.LongTensor(choose.astype(np.int32))
                img_masked = norm(
                    torch.from_numpy(img_masked.astype(np.float32)))
                index = torch.LongTensor([itemid - 1])

                cloud = Variable(cloud).cuda()
                choose = Variable(choose).cuda()
                img_masked = Variable(img_masked).cuda()
                index = Variable(index).cuda()

                # print('DEBUG')
                cloud = cloud.view(1, num_points, 3)
                img_masked = img_masked.view(1, 3,
                                             img_masked.size()[1],
                                             img_masked.size()[2])

                pred_r, pred_t, pred_c, emb = estimator(
                    img_masked, cloud, choose, index)
                pred_r = pred_r / torch.norm(pred_r, dim=2).view(
                    1, num_points, 1)

                pred_c = pred_c.view(bs, num_points)
                how_max, which_max = torch.max(pred_c, 1)
                pred_t = pred_t.view(bs * num_points, 1, 3)
                points = cloud.view(bs * num_points, 1, 3)

                my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
                my_t = (points +
                        pred_t)[which_max[0]].view(-1).cpu().data.numpy()
                my_pred = np.append(my_r, my_t)
                my_result_wo_refine.append(my_pred.tolist())

                for ite in range(0, iteration):
                    T = Variable(torch.from_numpy(
                        my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                            num_points, 1).contiguous().view(1, num_points, 3)
                    my_mat = quaternion_matrix(my_r)
                    R = Variable(
                        torch.from_numpy(my_mat[:3, :3].astype(
                            np.float32))).cuda().view(1, 3, 3)
                    my_mat[0][3] = my_t[0]
                    my_mat[1][3] = my_t[1]
                    my_mat[2][3] = my_t[2]

                    new_cloud = torch.bmm((cloud - T), R).contiguous()
                    pred_r, pred_t = refiner(new_cloud, emb, index)
                    pred_r = pred_r.view(1, 1, -1)
                    pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
                    my_r_2 = pred_r.view(-1).cpu().data.numpy()
                    my_t_2 = pred_t.view(-1).cpu().data.numpy()
                    my_mat_2 = quaternion_matrix(my_r_2)

                    my_mat_2[0][3] = my_t_2[0]
                    my_mat_2[1][3] = my_t_2[1]
                    my_mat_2[2][3] = my_t_2[2]

                    my_mat_final = np.dot(my_mat, my_mat_2)
                    my_r_final = copy.deepcopy(my_mat_final)
                    my_r_final[0][3] = 0
                    my_r_final[1][3] = 0
                    my_r_final[2][3] = 0
                    my_r_final = quaternion_from_matrix(my_r_final, True)
                    my_t_final = np.array([
                        my_mat_final[0][3], my_mat_final[1][3],
                        my_mat_final[2][3]
                    ])

                    my_pred = np.append(my_r_final, my_t_final)
                    my_r = my_r_final
                    my_t = my_t_final

                my_result.append(my_pred.tolist())
                itemid += 1
                # except ZeroDivisionError:
                # 	# print("PoseCNN Detector Lost {0} at No.{1} keyframe".format(itemid, now))
                # 	print('divide by zero error')
                # 	# my_result_wo_refine.append([0.0 for i in range(7)])
                # 	my_result.append([0.0 for i in range(7)])

            # DEBUG
            # print(my_result)

            # Creates return csv
            retCsv = createCSV(objDict, my_result)
            retStr = str()
            with open(os.path.join(UPLOAD_FOLDER, 'pose.csv'), 'w') as of:
                for line in retCsv:
                    retStr += line + '\n'
                    of.write(line + '\n')

            # retStr = str()
            # with open(os.path.join(UPLOAD_FOLDER, 'pose.csv'), 'w') as of:
            # 	for line in my_result:
            # 		lineStr = ','.join([str(l) for l in line])
            # 		retStr += ','.join([str(l) for l in line]) + '\n'
            # 		of.write(lineStr + '\n')

            return retStr
コード例 #9
0
ファイル: aligning.py プロジェクト: hiyyg/articulated-pose
            print(
                'scales, translation for part 0 and part {} is {}, {}'.format(
                    1, scales, translation))
            print(
                'ransac with with coordinate descent takes {} seconds for part 0, {}'
                .format(tend - tstart, 1))
        aligned_RT = compose_rt(rotation[0], translation[0])
        rt_dict['pred_it'][0] = aligned_RT
        scale_dict['pred_it'][0] = scales
        aligned_RT = compose_rt(rotation[1], translation[1])
        rt_dict['pred_it'][1] = aligned_RT
        scale_dict['pred_it'][1] = scales

        # final evaluation per part
        for j in range(num_parts - 1):
            q_pred = quaternion_from_matrix(rt_dict['pred'][j][:3, :3])
            q_pred_it = quaternion_from_matrix(rt_dict['pred_it'][j][:3, :3])
            q_gt = quaternion_from_matrix(rt_dict['gt'][j][:3, :3])
            q_pred_list = [q_pred, q_pred_it, q_gt]

            # # how to deal with err
            rt_pred_list = [rt_dict['pred'][j], rt_dict['pred_it'][j]]
            methods = ['vanilla SVD', 'coords descent']
            for m in range(2):
                ang_dis = 2 * np.arccos(sum(
                    q_pred_list[m] * q_gt)) * 180 / np.pi
                xyz_dis = np.linalg.norm(rt_pred_list[m][:3, 3] -
                                         rt_dict['gt'][j][:3, 3])
                if args.verbose:
                    print(
                        'Angular distance is : {} for part {} with {}'.format(
コード例 #10
0
    def DenseFusion(self, img, depth, posecnn_res):
        my_result_wo_refine = []

        itemid = 1  # this is simplified for single label decttion, if multi-label used, check DFYW3.py for more

        depth = np.array(depth)
        # img = img

        seg_res = posecnn_res

        x1, y1, x2, y2 = seg_res["box"]
        banana_bbox_draw = self.posecnn.get_box_rcwh(seg_res["box"])
        rmin, rmax, cmin, cmax = int(x1), int(x2), int(y1), int(y2)
        depth = depth[:, :,
                      1]  # because depth has 3 dimensions RGB but they are the all the same with each other
        mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))  # ok

        label_banana = np.squeeze(seg_res["mask"])
        label_banana = ma.getmaskarray(ma.masked_greater(label_banana, 0.5))
        label_banana_nonzeros = label_banana.flatten().nonzero()

        mask_label = ma.getmaskarray(ma.masked_equal(
            label_banana, itemid))  # label from banana label
        mask = mask_label * mask_depth

        mask_nonzeros = mask[:].flatten().nonzero()
        choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
        if len(choose) > self.num_points:
            c_mask = np.zeros(len(choose), dtype=int)
            c_mask[:self.num_points] = 1
            np.random.shuffle(c_mask)
            choose = choose[c_mask.nonzero()]
        else:
            print("len of choose is 0, check error")
            choose = np.pad(choose, (0, self.num_points - len(choose)), 'wrap')

        depth_masked = depth[rmin:rmax,
                             cmin:cmax].flatten()[choose][:,
                                                          np.newaxis].astype(
                                                              np.float32)
        xmap_masked = self.xmap[
            rmin:rmax,
            cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        ymap_masked = self.ymap[
            rmin:rmax,
            cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        choose = np.array([choose])
        pt2 = depth_masked / self.cam_scale
        pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx
        pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy
        cloud = np.concatenate((pt0, pt1, pt2), axis=1)
        img_np = np.array(img)
        img_masked = np.array(img)[:, :, :3]
        img_masked = np.transpose(img_masked, (2, 0, 1))
        img_masked = img_masked[:, rmin:rmax, cmin:cmax]

        cloud = torch.from_numpy(cloud.astype(np.float32))
        choose = torch.LongTensor(choose.astype(np.int32))
        img_masked = self.norm(torch.from_numpy(img_masked.astype(np.float32)))
        index = torch.LongTensor([itemid - 1])

        cloud = Variable(cloud).cuda()
        choose = Variable(choose).cuda()
        img_masked = Variable(img_masked).cuda()
        index = Variable(index).cuda()

        cloud = cloud.view(1, self.num_points, 3)
        img_masked = img_masked.view(1, 3,
                                     img_masked.size()[1],
                                     img_masked.size()[2])

        pred_r, pred_t, pred_c, emb = self.estimator(img_masked, cloud, choose,
                                                     index)
        pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, self.num_points, 1)

        pred_c = pred_c.view(self.bs, self.num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(self.bs * self.num_points, 1, 3)
        points = cloud.view(self.bs * self.num_points, 1, 3)

        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)
        my_result_wo_refine.append(my_pred.tolist())

        my_result = []
        for ite in range(0, self.iteration):
            T = Variable(torch.from_numpy(
                my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                    self.num_points,
                    1).contiguous().view(1, self.num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(
                np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t

            new_cloud = torch.bmm((cloud - T), R).contiguous()
            pred_r, pred_t = self.refiner(new_cloud, emb, index)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)

            my_mat_2[0:3, 3] = my_t_2
            my_mat_final = np.dot(my_mat, my_mat_2)
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
            my_r_final = quaternion_from_matrix(my_r_final, True)
            my_t_final = np.array(
                [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

            my_pred = np.append(my_r_final, my_t_final)
            my_result.append(my_pred.tolist())
        my_result_np = np.array(my_result)
        my_result_mean = np.mean(my_result, axis=0)
        my_r = my_result_mean[:4]
        my_t = my_result_mean[4:]
        my_r_quaternion = my_r
        return my_r_quaternion, my_t
コード例 #11
0
    def callback(self):

        time1 = time.time()

        rgb_original = self.rgb
        self.rgb = np.transpose(self.rgb, (2, 0, 1))
        norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        self.rgb = norm(torch.from_numpy(self.rgb.astype(np.float32)))
        
        self.rgb = Variable(self.rgb).cuda()
        semantic = self.model(self.rgb.unsqueeze(0))
        _, pred = torch.max(semantic, dim=1)
        pred = pred *255
        if IMGSAVE:
            torchvision.utils.save_image(pred, path + '/seg_result/out/' + 'torchpred.png')

        pred = np.transpose(pred.cpu().numpy(), (1, 2, 0)) # (CxHxW)->(HxWxC)
        if IMGSAVE:
            cv2.imwrite(path + '/seg_result/out/' + 'numpypred.png', pred)
        
        _, contours, _ = cv2.findContours(np.uint8(pred),cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        cnt = max(contours, key=cv2.contourArea)
        x,y,w,h = cv2.boundingRect(cnt)
        rmin, rmax, cmin, cmax = get_bbox([x,y,w,h ])
        print(get_bbox([x,y,w,h ]))

        if IMGSAVE:
            img_bbox = np.array(rgb_original.copy())
            cv2.rectangle(img_bbox, (cmin, rmin), (cmax, rmax), (255, 0, 0), 2)
            cv2.imwrite(path + '/seg_result/out/' + 'bbox.png', img_bbox)

        mask_depth = ma.getmaskarray(ma.masked_not_equal(self.depth,0))
        mask_label = ma.getmaskarray(ma.masked_equal(pred, np.array(255)))
        # print(mask_depth.shape, mask_label.shape)
        mask = mask_depth * mask_label.reshape(480, 640)

        img = np.transpose(rgb_original, (2, 0, 1))
        img_masked = img[:, rmin:rmax, cmin:cmax]
        choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]

        #print("length of choose is :{0}".format(len(choose))) 
        if len(choose) == 0:
            cc = torch.LongTensor([0])
            return(cc, cc, cc, cc, cc, cc)
        
        if len(choose) > num_points:
            c_mask = np.zeros(len(choose), dtype=int)
            c_mask[:num_points] = 1  # if number of object pixels are bigger than 500, we select just 500
            np.random.shuffle(c_mask)
            choose = choose[c_mask.nonzero()]  # now len(choose) = 500
        else:
            choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')

        depth_masked = self.depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)

        choose = np.array([choose])

        pt2 = depth_masked
        pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx
        pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy
        cloud = np.concatenate((pt0, pt1, pt2), axis=1)
        cloud = cloud /1000

        points = torch.from_numpy(cloud.astype(np.float32))
        choose = torch.LongTensor(choose.astype(np.int32))
        img = norm(torch.from_numpy(img_masked.astype(np.float32)))
        idx = torch.LongTensor([self.object_index])

        img = Variable(img).cuda().unsqueeze(0)
        points = Variable(points).cuda().unsqueeze(0)
        choose = Variable(choose).cuda().unsqueeze(0)
        idx = Variable(idx).cuda().unsqueeze(0)
 
        pred_r, pred_t, pred_c, emb = self.estimator(img, points, choose, idx)
        pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
        pred_c = pred_c.view(bs, num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(bs * num_points, 1, 3)

        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)

        for ite in range(0, iteration):
            T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t
            
            new_points = torch.bmm((points - T), R).contiguous()
            pred_r, pred_t = self.refiner(new_points, emb, idx)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)
            my_mat_2[0:3, 3] = my_t_2

            my_mat_final = np.dot(my_mat, my_mat_2) # refine pose means two matrix multiplication
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
            my_r_final = quaternion_from_matrix(my_r_final, True)
            my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

            my_pred = np.append(my_r_final, my_t_final)
            my_r = my_r_final
            my_t = my_t_final

        my_r = quaternion_matrix(my_r)[:3, :3]
        my_t = np.array(my_t)
        
        print('estimated rotation is\n:{0}'.format(my_r))
        print('estimated translation is\n :{0}'.format(my_t))

        ## custom scaling for 3Dbox
        col = [2,0,1]
        new_col = np.zeros((len(col), len(col)))
        for idx, i in enumerate(col):
            new_col[idx, i] = 1

        self.scaled = np.dot(self.scaled, new_col)
        target = np.dot(self.scaled, my_r.T)
        target = np.add(target, my_t)

        p0 = (int((target[0][0]/ target[0][2])*self.cam_fx + self.cam_cx),  int((target[0][1]/ target[0][2])*self.cam_fy + self.cam_cy))
        p1 = (int((target[1][0]/ target[1][2])*self.cam_fx + self.cam_cx),  int((target[1][1]/ target[1][2])*self.cam_fy + self.cam_cy))
        p2 = (int((target[2][0]/ target[2][2])*self.cam_fx + self.cam_cx),  int((target[2][1]/ target[2][2])*self.cam_fy + self.cam_cy))
        p3 = (int((target[3][0]/ target[3][2])*self.cam_fx + self.cam_cx),  int((target[3][1]/ target[3][2])*self.cam_fy + self.cam_cy))
        p4 = (int((target[4][0]/ target[4][2])*self.cam_fx + self.cam_cx),  int((target[4][1]/ target[4][2])*self.cam_fy + self.cam_cy))
        p5 = (int((target[5][0]/ target[5][2])*self.cam_fx + self.cam_cx),  int((target[5][1]/ target[5][2])*self.cam_fy + self.cam_cy))
        p6 = (int((target[6][0]/ target[6][2])*self.cam_fx + self.cam_cx),  int((target[6][1]/ target[6][2])*self.cam_fy + self.cam_cy))
        p7 = (int((target[7][0]/ target[7][2])*self.cam_fx + self.cam_cx),  int((target[7][1]/ target[7][2])*self.cam_fy + self.cam_cy))
        
        cv2.line(rgb_original, p0,p1,(0,0,255), 2)
        cv2.line(rgb_original, p0,p3,(0,0,255), 2)
        cv2.line(rgb_original, p0,p4,(0,0,255), 2)
        cv2.line(rgb_original, p1,p2,(0,0,255), 2)
        cv2.line(rgb_original, p1,p5,(0,0,255), 2)
        cv2.line(rgb_original, p2,p3,(0,0,255), 2)
        cv2.line(rgb_original, p2,p6,(0,0,255), 2)
        cv2.line(rgb_original, p3,p7,(0,0,255), 2)
        cv2.line(rgb_original, p4,p5,(0,0,255), 2)
        cv2.line(rgb_original, p4,p7,(0,0,255), 2)
        cv2.line(rgb_original, p5,p6,(0,0,255), 2)
        cv2.line(rgb_original, p6,p7,(0,0,255), 2)
        

        """ Do not support live-view like cv.imshow """
        plt.figure(figsize = (10,10))
        plt.imshow(rgb_original, cmap = 'gray', interpolation = 'nearest', aspect='auto')
        plt.xticks([]), plt.yticks([])  # to hide tick values on X and Y axis
        plt.show()
        
        """ need python3.x """
        ## https://stackoverflow.com/questions/14655969/opencv-error-the-function-is-not-implemented
        # cv2.imshow('rgb', cv2.cvtColor(rgb_original, cv2.COLOR_BGR2RGB))  # OpenCV uses BGR model
        # # cv2.waitKey(1)
        # key = cv2.waitKey(1) & 0xFF
        # if  key == 27:
        #     print("stopping streaming...")
        #     break

        time2 = time.time()
        print('inference time is :{0}'.format(time2-time1))
コード例 #12
0
ファイル: aligning.py プロジェクト: hiyyg/articulated-pose
def getRANSACInliersCoords(SourceHom0, TargetHom0, \
                    SourceHom1, TargetHom1, joints=None, rt_ref=[None, None], rt_pre=[None, None], MaxIterations=100, PassThreshold=[200, 200], StopThreshold=[1, 1], \
                       viz=False, viz_ransac=False, viz_sample=False, viz_normal=False, verbose=False, \
                       use_jt_pts=False, use_ext_rot=False, \
                       eval_rts=False):
    """
    joints: [position, axis, pts]
            position: [1, 3]
            axis : 3
            pts  : [N, 3]
    """
    BestResidual0 = 1e10
    BestResidual1 = 1e10
    BestInlierRatio0 = 0
    BestInlierRatio1 = 0
    BestInlierIdx0 = np.arange(SourceHom0.shape[1])
    BestInlierIdx1 = np.arange(SourceHom1.shape[1])

    # if viz_ransac: # todo
    #     plot3d_pts([[SourceHom0[:3].transpose(), SourceHom1[:3].transpose(), TargetHom0[:3].transpose(), TargetHom1[:3].transpose()]], [['source0', 'source1', 'target0', 'target1']], s=5, title_name=['points to ransac'], color_channel=None, save_fig=False, sub_name='default')

    position, joint_axis, joint_pts = get_joint_features(joints)
    assert joint_pts.shape[0] == 4
    ang_dis_list = [[], []]
    inliers_ratio = [[], []]
    select_index = [0] * 2
    for i in range(0, MaxIterations):
        if i > 5:
            verbose = False
        RandIdx0 = np.random.randint(SourceHom0.shape[1], size=5)
        RandIdx1 = np.random.randint(SourceHom1.shape[1], size=5)

        scale, Rs, Ts, OutTrans = estimateSimilarityUmeyamaCoords(SourceHom0[:, RandIdx0], TargetHom0[:, RandIdx0],\
                         SourceHom1[:, RandIdx1], TargetHom1[:, RandIdx1], joint_axis, joint_pts=joint_pts, rt_ref=rt_ref, rt_pre=rt_pre, \
                         viz=viz, viz_ransac=viz_ransac, viz_sample=viz_sample, use_jt_pts=use_jt_pts, use_ext_rot=use_ext_rot, verbose=verbose, index=i+1)

        # evaluate per part pts
        if eval_rts:
            # print('evaluating inliers using rts for pair 0')
            Residual0, InlierRatio0, InlierIdx0 = evaluateModel(
                OutTrans[0], SourceHom0, TargetHom0, PassThreshold[0])
        else:
            Residual0, InlierRatio0, InlierIdx0 = evaluateModelRotation(
                Rs[0].T,
                SourceHom0,
                TargetHom0,
                0.05 * PassThreshold[0],
                rt_ref=rt_ref[0],
                viz_normal=viz_normal)

        # if Residual0 < BestResidual0: # todo
        # if InlierRatio0 > BestInlierRatio0 and Residual0 < BestResidual0:

        if eval_rts:
            # print('evaluating inliers using rts for pair 1')
            Residual1, InlierRatio1, InlierIdx1 = evaluateModel(
                OutTrans[1], SourceHom1, TargetHom1, PassThreshold[1])
        else:
            Residual1, InlierRatio1, InlierIdx1 = evaluateModelRotation(
                Rs[1].T,
                SourceHom1,
                TargetHom1,
                0.05 * PassThreshold[1],
                rt_ref=rt_ref[1],
                viz_normal=viz_normal)

        if viz_ransac:
            inliers_ratio[0].append(InlierRatio0)
            inliers_ratio[1].append(InlierRatio1)
            for j in range(2):
                q_gt = quaternion_from_matrix(rt_ref[j][:3, :3])
                q_iter = quaternion_from_matrix(Rs[j].T)
                ang_dis = 2 * np.arccos(sum(q_iter * q_gt)) * 180 / np.pi
                if ang_dis > 180:
                    ang_dis = 360 - ang_dis
                ang_dis_list[j].append(ang_dis)

        if InlierRatio0 > BestInlierRatio0:
            select_index[0] = i
            BestResidual0 = Residual0
            BestInlierRatio0 = InlierRatio0
            BestInlierIdx0 = InlierIdx0

        # if Residual1 < BestResidual1: # todo
        # if InlierRatio1 > BestInlierRatio1 and Residual1 < BestResidual1:
        if InlierRatio1 > BestInlierRatio1:
            select_index[1] = i
            BestResidual1 = Residual1
            BestInlierRatio1 = InlierRatio1
            BestInlierIdx1 = InlierIdx1
        # print('Iteration: ', i, '\n Residual: ', [Residual0, Residual1], 'Inlier ratio: ', [InlierRatio0, InlierRatio1])

        if BestResidual0 < StopThreshold[0] and BestResidual1 < StopThreshold[
                1]:
            break

    # if viz_ransac:
    #     fig = plt.figure(dpi=200)
    #     for j in range(2):
    #         ax = plt.subplot(1, 2, j+1)
    #         plt.plot(range(len(ang_dis_list[j])), ang_dis_list[j], label='rotation err')
    #         plt.plot(range(len(inliers_ratio[j])), inliers_ratio[j], label='inliers ratio')
    #         plt.plot([select_index[j]], [ang_dis_list[j][select_index[j]]], 'bo')
    #         plt.plot([select_index[0]], [ang_dis_list[j][select_index[0]]], 'ro')
    #         plt.xlabel('Ransac sampling order')
    #         plt.ylabel('value')
    #         ax.text(0.55, 0.80, 'Select {0}th inliers with {1:0.4f} rotation error'.format(select_index[j], ang_dis_list[j][select_index[j]]), transform=ax.transAxes, color='blue', fontsize=6)
    #         plt.grid(True)
    #         plt.legend()
    #         plt.title('part {}'.format(j))
    #     plt.show()
    inliers = [
        SourceHom0[:, BestInlierIdx0], TargetHom0[:, BestInlierIdx0],
        BestInlierRatio0, SourceHom1[:, BestInlierIdx1],
        TargetHom1[:, BestInlierIdx1], BestInlierRatio1
    ]

    return inliers, [ang_dis_list, inliers_ratio, select_index]
コード例 #13
0
def main():
    # g13: parameter setting -------------------
    batch_id = 1
    
    opt.dataset ='linemod'
    opt.dataset_root = './datasets/linemod/Linemod_preprocessed'
    estimator_path = 'trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth'
    refiner_path = 'trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth'
    opt.resume_posenet = estimator_path
    opt.resume_posenet = refiner_path
    dataset_config_dir = 'datasets/linemod/dataset_config'
    output_result_dir = 'experiments/eval_result/linemod'
    bs = 1 #fixed because of the default setting in torch.utils.data.DataLoader
    opt.iteration = 2 #default is 4 in eval_linemod.py
    t1_idx = 0
    t1_total_eval_num = 3
    
    axis_range = 0.1   # the length of X, Y, and Z axis in 3D
    vimg_dir = 'verify_img'
    if not os.path.exists(vimg_dir):
        os.makedirs(vimg_dir)
    #-------------------------------------------
    
    if opt.dataset == 'ycb':
        opt.num_objects = 21 #number of object classes in the dataset
        opt.num_points = 1000 #number of points on the input pointcloud
        opt.outf = 'trained_models/ycb' #folder to save trained models
        opt.log_dir = 'experiments/logs/ycb' #folder to save logs
        opt.repeat_epoch = 1 #number of repeat times for one epoch training
    elif opt.dataset == 'linemod':
        opt.num_objects = 13
        opt.num_points = 500
        opt.outf = 'trained_models/linemod'
        opt.log_dir = 'experiments/logs/linemod'
        opt.repeat_epoch = 20
    else:
        print('Unknown dataset')
        return
    
    estimator = PoseNet(num_points = opt.num_points, num_obj = opt.num_objects)
    estimator.cuda()
    refiner = PoseRefineNet(num_points = opt.num_points, num_obj = opt.num_objects)
    refiner.cuda()

    if opt.resume_posenet != '':
        estimator.load_state_dict(torch.load(estimator_path))

    if opt.resume_refinenet != '':
        refiner.load_state_dict(torch.load(refiner_path))
        opt.refine_start = True
        opt.decay_start = True
        opt.lr *= opt.lr_rate
        opt.w *= opt.w_rate
        opt.batch_size = int(opt.batch_size / opt.iteration)
        optimizer = optim.Adam(refiner.parameters(), lr=opt.lr)
    else:
        opt.refine_start = False
        opt.decay_start = False
        optimizer = optim.Adam(estimator.parameters(), lr=opt.lr)


    if opt.dataset == 'ycb':
        test_dataset = PoseDataset_ycb('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
    elif opt.dataset == 'linemod':
        test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
    testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers)
    print('complete loading testing loader\n')
    opt.sym_list = test_dataset.get_sym_list()
    opt.num_points_mesh = test_dataset.get_num_points_mesh()

    print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\n\
        length of the testing set: {0}\nnumber of sample points on mesh: {1}\n\
        symmetry object list: {2}'\
        .format( len(test_dataset), opt.num_points_mesh, opt.sym_list))
    
    
    
    #load pytorch model
    estimator.eval()    
    refiner.eval()
    criterion = Loss(opt.num_points_mesh, opt.sym_list)
    criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list)
    fw = open('{0}/t1_eval_result_logs.txt'.format(output_result_dir), 'w')

    #Pose estimation
    for j, data in enumerate(testdataloader, 0):
        # g13: modify this part for evaluation target--------------------
        if j == t1_total_eval_num:
            break
        #----------------------------------------------------------------
        points, choose, img, target, model_points, idx = data
        if len(points.size()) == 2:
            print('No.{0} NOT Pass! Lost detection!'.format(j))
            fw.write('No.{0} NOT Pass! Lost detection!\n'.format(j))
            continue
        points, choose, img, target, model_points, idx = Variable(points).cuda(), \
                                                             Variable(choose).cuda(), \
                                                             Variable(img).cuda(), \
                                                             Variable(target).cuda(), \
                                                             Variable(model_points).cuda(), \
                                                             Variable(idx).cuda()
        pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
        _, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start)

        #if opt.refine_start: #iterative poserefinement
        #    for ite in range(0, opt.iteration):
        #        pred_r, pred_t = refiner(new_points, emb, idx)
        #        dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points)
        
        pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, opt.num_points, 1)
        pred_c = pred_c.view(bs, opt.num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(bs * opt.num_points, 1, 3)
    
        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points.view(bs * opt.num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)
    
        for ite in range(0, opt.iteration):
            T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(opt.num_points, 1).contiguous().view(1, opt.num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t
            
            new_points = torch.bmm((points - T), R).contiguous()
            pred_r, pred_t = refiner(new_points, emb, idx)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)
            my_mat_2[0:3, 3] = my_t_2
    
            my_mat_final = np.dot(my_mat, my_mat_2)
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
            my_r_final = quaternion_from_matrix(my_r_final, True)
            my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])
    
            my_pred = np.append(my_r_final, my_t_final)
            my_r = my_r_final
            my_t = my_t_final

        # g13: start drawing pose on image------------------------------------
        # pick up image
        print("index {0}: {1}".format(j, test_dataset.list_rgb[j]))
        img = Image.open(test_dataset.list_rgb[j])
        
        # pick up center position by bbox
        meta_file = open('{0}/data/{1}/gt.yml'.format(opt.dataset_root, '%02d' % test_dataset.list_obj[j]), 'r')
        meta = {}
        meta = yaml.load(meta_file)
        which_item = test_dataset.list_rank[j]
        bbx = meta[which_item][0]['obj_bb']
        draw = ImageDraw.Draw(img) 
        
        # draw box (ensure this is the right object)
        draw.line((bbx[0],bbx[1], bbx[0], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        draw.line((bbx[0],bbx[1], bbx[0]+bbx[2], bbx[1]), fill=(255,0,0), width=5)
        draw.line((bbx[0],bbx[1]+bbx[3], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        draw.line((bbx[0]+bbx[2],bbx[1], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        
        #get center
        c_x = bbx[0]+int(bbx[2]/2)
        c_y = bbx[1]+int(bbx[3]/2)
        draw.point((c_x,c_y), fill=(255,255,0))
        
        #get the 3D position of center
        cam_intrinsic = np.zeros((3,3))
        cam_intrinsic.itemset(0, test_dataset.cam_fx)
        cam_intrinsic.itemset(4, test_dataset.cam_fy)
        cam_intrinsic.itemset(2, test_dataset.cam_cx)
        cam_intrinsic.itemset(5, test_dataset.cam_cy)
        cam_intrinsic.itemset(8, 1)
        cam_extrinsic = my_mat_final[0:3, :]
        cam2d_3d = np.matmul(cam_intrinsic, cam_extrinsic)
        cen_3d = np.matmul(np.linalg.pinv(cam2d_3d), [[c_x],[c_y],[1]])
        # replace img.show() with plt.imshow(img)
        
        #transpose three 3D axis point into 2D
        x_3d = cen_3d + [[axis_range],[0],[0],[0]]
        y_3d = cen_3d + [[0],[axis_range],[0],[0]]
        z_3d = cen_3d + [[0],[0],[axis_range],[0]]
        x_2d = np.matmul(cam2d_3d, x_3d)
        y_2d = np.matmul(cam2d_3d, y_3d)
        z_2d = np.matmul(cam2d_3d, z_3d)
        
        #draw the axis on 2D
        draw.line((c_x, c_y, x_2d[0], x_2d[1]), fill=(255,255,0), width=5)
        draw.line((c_x, c_y, y_2d[0], y_2d[1]), fill=(0,255,0), width=5)
        draw.line((c_x, c_y, z_2d[0], z_2d[1]), fill=(0,0,255), width=5)

        #g13: show image
        #img.show()
        
        #save file under file 
        img_file_name = '{0}/pred_obj{1}_pic{2}.png'.format(vimg_dir, test_dataset.list_obj[j], which_item)
        img.save( img_file_name, "PNG" )
        img.close()
コード例 #14
0
ファイル: aligning.py プロジェクト: hiyyg/articulated-pose
def estimateSimilarityTransformCoords(source: np.array, target: np.array, source1=None, target1=None, joints=None, rt_ref=[None, None], rt_pre=[None, None],\
            viz=False, viz_ransac=False, viz_sample=False, viz_normal=False, use_jt_pts=False, eval_rts=False, use_ext_rot=False, verbose=False, index=0):
    nIter = 100
    # [4, N], [4, N]
    SourceHom, TargetHom, TargetNorm, SourceNorm, RatioTS, RatioST, PassT, StopT = set_config(
        source, target, verbose)
    SourceHom1, TargetHom1, TargetNorm1, SourceNorm1, RatioTS1, RatioST1, PassT1, StopT1 = set_config(
        source1, target1, verbose)

    # 1. find inliers
    inliers, records = getRANSACInliersCoords(SourceHom, TargetHom, SourceHom1, TargetHom1, joints=joints, rt_ref=rt_ref, rt_pre=rt_pre, \
                     MaxIterations=nIter, PassThreshold=[PassT, PassT1], StopThreshold=[StopT, StopT1], \
                     viz=viz, viz_ransac=viz_ransac, viz_sample=viz_sample, viz_normal=viz_normal, use_jt_pts=use_jt_pts, eval_rts=eval_rts, use_ext_rot=use_ext_rot, verbose=verbose)

    SourceInliersHom, TargetInliersHom, BestInlierRatio0, SourceInliersHom1, TargetInliersHom1, BestInlierRatio1 = inliers
    ang_dis_list, inliers_ratio, select_index = records

    if (BestInlierRatio0 < 0.05) or (BestInlierRatio1 < 0.05):
        print('[ WARN ] - Something is wrong. Small BestInlierRatio: ',
              [BestInlierRatio0, BestInlierRatio1])
        return None, None, None, None

    # 2. further use inlier points and joints to decide the final pose
    position, joint_axis, joint_pts = get_joint_features(joints)
    assert joint_pts.shape[0] == 4
    Scale, Rotations, Translations, OutTransforms = estimateSimilarityUmeyamaCoords(SourceInliersHom, TargetInliersHom, SourceInliersHom1, TargetInliersHom1, joint_axis, rt_ref=rt_ref, joint_pts=joint_pts, \
         viz=viz, viz_ransac=viz_ransac, viz_sample=viz_sample, use_jt_pts=use_jt_pts, use_ext_rot=use_ext_rot, verbose=verbose)

    if verbose:
        print('BestInlierRatio:', BestInlierRatio0)

    if viz_ransac:
        fig = plt.figure(dpi=200)
        for j in range(2):
            q_gt = quaternion_from_matrix(rt_ref[j][:3, :3])
            q_iter = quaternion_from_matrix(Rotations[j].T)
            ang_dis = 2 * np.arccos(sum(q_iter * q_gt)) * 180 / np.pi
            if ang_dis > 180:
                ang_dis = 360 - ang_dis
            ax = plt.subplot(1, 2, j + 1)
            plt.plot(range(len(ang_dis_list[j])),
                     ang_dis_list[j],
                     label='rotation err')
            plt.plot(range(len(inliers_ratio[j])),
                     inliers_ratio[j],
                     label='inliers ratio')
            plt.plot([select_index[j]], [ang_dis_list[j][select_index[j]]],
                     'bo')
            plt.plot([select_index[0]], [ang_dis_list[j][select_index[0]]],
                     'ro')
            plt.plot([select_index[j]], [ang_dis],
                     'yo',
                     label='final rotation error')
            plt.xlabel('Ransac sampling order')
            plt.ylabel('value')
            ax.text(0.55,
                    0.80,
                    'Select {0}th inliers with {1:0.4f} rotation error'.format(
                        select_index[j], ang_dis_list[j][select_index[j]]),
                    transform=ax.transAxes,
                    color='blue',
                    fontsize=6)
            plt.grid(True)
            plt.legend()
            plt.title('part {}'.format(j))
        plt.show()
        save_path = '/home/lxiaol9/Downloads/ARCwork/6DPOSE/results/test_pred/images'
        fig.savefig('{}/{}_{}.png'.format(save_path, index, 'coord_descent'),
                    pad_inches=0)

    return Scale, Rotations, Translations, OutTransforms
コード例 #15
0
    def pose(self):

        # get mask and segmentation
        mask, bbox, viz = self.draw_seg(self.batch_predict())
        pred = mask
        pred = pred * 255
        pred = np.transpose(pred, (1, 2, 0))  # (CxHxW)->(HxWxC)

        # convert img into tensor
        rgb_original = self.rgb
        norm = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225])
        self.rgb = Variable(norm(torch.from_numpy(self.rgb.astype(
            np.float32)))).cuda()

        all_masks = []
        mask_depth = ma.getmaskarray(ma.masked_not_equal(self.depth, 0))
        mask_label = ma.getmaskarray(ma.masked_equal(pred, np.array(255)))

        for b in range(len(bbox)):

            mask = mask_depth * mask_label[:, :, b]
            rmin = int(bbox[b, 0])
            rmax = int(bbox[b, 1])
            cmin = int(bbox[b, 2])
            cmax = int(bbox[b, 3])

            img = np.transpose(rgb_original, (0, 1, 2))  #CxHxW
            img_masked = img[:, rmin:rmax, cmin:cmax]
            choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]

            if len(choose) == 0:
                cc = torch.LongTensor([0])
                return (cc, cc, cc, cc, cc, cc)

            if len(choose) > num_points:
                c_mask = np.zeros(len(choose), dtype=int)
                c_mask[:num_points] = 1
                np.random.shuffle(c_mask)
                choose = choose[c_mask.nonzero()]
            else:
                choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')

            # visualize each masks
            # plt.imshow(mask), plt.show()

            depth_masked = self.depth[
                rmin:rmax,
                cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
            xmap_masked = self.xmap[
                rmin:rmax,
                cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
            ymap_masked = self.ymap[
                rmin:rmax,
                cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
            choose = np.array([choose])

            cam_scale = 1.0
            pt2 = depth_masked / cam_scale
            pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx
            pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy
            cloud = np.concatenate((pt0, pt1, pt2), axis=1)
            cloud = cloud / 1000

            points = torch.from_numpy(cloud.astype(np.float32))
            choose = torch.LongTensor(choose.astype(np.int32))

            img_ = norm(torch.from_numpy(img_masked.astype(np.float32)))
            idx = torch.LongTensor([self.object_index])
            img_ = Variable(img_).cuda().unsqueeze(0)
            points = Variable(points).cuda().unsqueeze(0)
            choose = Variable(choose).cuda().unsqueeze(0)
            idx = Variable(idx).cuda().unsqueeze(0)

            pred_r, pred_t, pred_c, emb = self.estimator(
                img_, points, choose, idx)
            pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
            pred_c = pred_c.view(bs, num_points)
            how_max, which_max = torch.max(pred_c, 0)  #1
            pred_t = pred_t.view(bs * num_points, 1, 3)

            # print("max confidence", how_max)

            my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
            my_t = (points.view(bs * num_points, 1, 3) +
                    pred_t)[which_max[0]].view(-1).cpu().data.numpy()
            my_pred = np.append(my_r, my_t)

            for ite in range(0, iteration):

                T = Variable(torch.from_numpy(
                    my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                        num_points, 1).contiguous().view(1, num_points, 3)
                my_mat = quaternion_matrix(my_r)
                R = Variable(
                    torch.from_numpy(my_mat[:3, :3].astype(
                        np.float32))).cuda().view(1, 3, 3)
                my_mat[0:3, 3] = my_t

                new_points = torch.bmm((points - T), R).contiguous()
                pred_r, pred_t = self.refiner(new_points, emb, idx)
                pred_r = pred_r.view(1, 1, -1)
                pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
                my_r_2 = pred_r.view(-1).cpu().data.numpy()
                my_t_2 = pred_t.view(-1).cpu().data.numpy()
                my_mat_2 = quaternion_matrix(my_r_2)
                my_mat_2[0:3, 3] = my_t_2

                my_mat_final = np.dot(
                    my_mat,
                    my_mat_2)  # refine pose means two matrix multiplication
                my_r_final = copy.deepcopy(my_mat_final)
                my_r_final[0:3, 3] = 0
                my_r_final = quaternion_from_matrix(my_r_final, True)
                my_t_final = np.array([
                    my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]
                ])

                my_pred = np.append(my_r_final, my_t_final)
                my_r = my_r_final
                my_t = my_t_final

            # POSITION # ndds has cm units
            my_t = np.array(my_t)
            # my_t = np.array([my_t[0], my_t[1], 1-my_t[2]])
            # print('estimated translation is:{0}'.format(my_t))

            # ROTATION
            my_r = quaternion_matrix(my_r)[:3, :3]
            # my_r = np.dot(my_r, np.array([[1, 0, 0], [0, 0, -1], [0, -1, 0]]))
            # print('estimated rotation is\n:{0}'.format(my_r))

            # Draw estimated pose 3Dbox
            target = np.dot(self.scaled, my_r.T)  #my_r.T
            target = np.add(target, my_t)
            self.draw_cube(target, viz)

            # Norm pose
            NormPos = np.linalg.norm((my_t), ord=1)
            print("Pos:{0}".format(my_t))

        plt.figure(figsize=(10, 10)), plt.imshow(viz), plt.show()

        return viz
コード例 #16
0
ファイル: dataset.py プロジェクト: densechen/CASS
    def __getitem__(self, index):
        try:
            img = np.array(
                cv2.imread('{0}/{1}_color.png'.format(
                    self.root, self.list[index]))) / 255.
            depth = np.array(
                cv2.imread(
                    '{0}/{1}_depth.png'.format(self.root, self.list[index]),
                    -1))
            if len(depth.shape) == 3:
                depth = np.uint16(depth[:, :, 1] * 256) + \
                    np.uint16(depth[:, :, 2])
            label = np.array(
                cv2.imread('{0}/{1}_mask.png'.format(self.root,
                                                     self.list[index]))[:, :,
                                                                        2])

            meta = dict()
            with open("{0}/{1}_meta.txt".format(self.root, self.list[index]),
                      "r") as f:
                for line in f:
                    line = line.replace("\n", "")
                    line = line.split(" ")
                    if int(line[1]) == 0:  # mask out background
                        continue
                    d = {"cls_id": line[1], "inst_name": line[2]}
                    if "real_train" in self.list[index]:
                        d["inst_dir"] = os.path.join(
                            self.root, "obj_models", "real_train",
                            line[2] + "_{}.ply".format(self.num_pt))
                        d["ori_inst_dir"] = os.path.join(
                            self.root, "obj_models", "real_train",
                            line[2] + ".obj")
                    elif "real_test" in self.list[index]:
                        d["inst_dir"] = os.path.join(
                            self.root, "obj_models", "real_test",
                            line[2] + "_{}.ply".format(self.num_pt))
                        d["ori_inst_dir"] = os.path.join(
                            self.root, "obj_models", "real_test",
                            line[2] + ".obj")
                    else:
                        d["inst_dir"] = os.path.join(
                            self.root, "obj_models", "train", *line[2:],
                            "model_{}.ply".format(self.num_pt))
                        d["ori_inst_dir"] = os.path.join(
                            self.root, "obj_models", "train", *line[2:],
                            "model.obj")
                    meta[int(line[0])] = d

            if not self.list[index].startswith("real"):
                cam_cx = self.cam_cx_2
                cam_cy = self.cam_cy_2
                cam_fx = self.cam_fx_2
                cam_fy = self.cam_fy_2
            else:
                cam_cx = self.cam_cx_1
                cam_cy = self.cam_cy_1
                cam_fx = self.cam_fx_1
                cam_fy = self.cam_fy_1

            obj = list(meta.keys())
            iidx = np.arange(len(obj))
            np.random.shuffle(iidx)
            for idx in iidx:
                mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
                mask_label = ma.getmaskarray(ma.masked_equal(label, obj[idx]))
                mask = mask_label * mask_depth
                if len(mask.nonzero()[0]) > self.minimum_num_pt:
                    break
            else:
                print("Can't find any valid training object in {}".format(
                    self.list[index]))
                raise ValueError

            # A method to load target_r and target_t
            if os.path.isfile("{}/gts/{}_poses.txt".format(
                    self.root, self.list[index])) and os.path.isfile(
                        "{}/gts/{}_scales.txt".format(self.root,
                                                      self.list[index])):
                meta["poses"] = np.loadtxt("{}/gts/{}_poses.txt".format(
                    self.root, self.list[index])).reshape(-1, 4, 4)
                meta["scales"] = np.loadtxt("{}/gts/{}_scales.txt".format(
                    self.root, self.list[index])).reshape(-1, 3)
            else:
                coord = cv2.imread('{0}/{1}_coord.png'.format(
                    self.root, self.list[index]))[:, :, :3][:, :, (2, 1, 0)]
                coord = np.array(coord, dtype=np.float32) / 255.
                coord[:, :, 2] = 1.0 - coord[:, :, 2]
                intr = np.array([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy],
                                 [0., 0., 1.]])
                poses, scales = align(obj, label, coord, depth, intr)
                os.makedirs(os.path.dirname("{}/gts/{}_poses.txt".format(
                    self.root, self.list[index])),
                            exist_ok=True)
                np.savetxt(
                    "{}/gts/{}_poses.txt".format(self.root, self.list[index]),
                    poses.reshape(-1, 4))
                np.savetxt(
                    "{}/gts/{}_scales.txt".format(self.root, self.list[index]),
                    scales.reshape(-1, 3))
                meta["poses"] = poses
                meta["scales"] = scales
            rmin, rmax, cmin, cmax = get_bbox(mask_label)
            img_masked = np.transpose(img, (2, 0, 1))[:, rmin:rmax, cmin:cmax]
            target_r = meta['poses'][idx][:3, 0:3]
            target_t = np.array([meta['poses'][idx][:3, 3:4].flatten()])

            choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
            if len(choose) > self.num_pt:
                c_mask = np.zeros(len(choose), dtype=int)
                c_mask[:self.num_pt] = 1
                np.random.shuffle(c_mask)
                choose = choose[c_mask.nonzero()]
            else:
                choose = np.pad(choose, (0, self.num_pt - len(choose)), 'wrap')

            depth_masked = depth[
                rmin:rmax,
                cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
            xmap_masked = self.xmap[
                rmin:rmax,
                cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
            ymap_masked = self.ymap[
                rmin:rmax,
                cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
            choose = np.array([choose])

            cam_scale = 1000.0
            pt2 = depth_masked / cam_scale
            pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
            pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
            cloud = np.concatenate((-pt0, -pt1, pt2), axis=1)

            model_points = load_obj(path=meta[obj[idx]]["inst_dir"],
                                    ori_path=meta[obj[idx]]["ori_inst_dir"],
                                    num_points=self.num_pt)

            model_points = model_points * meta["scales"][idx]

            target = np.dot(model_points, target_r.T)
            target = np.add(target, target_t)
            matrix = np.eye(4)
            matrix[:3, :3] = target_r
            quat = quaternion_from_matrix(matrix)

            return torch.from_numpy(cloud.astype(np.float32)), \
                torch.LongTensor(choose.astype(np.int32)), \
                self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
                torch.from_numpy(target.astype(np.float32)), \
                torch.from_numpy(model_points.astype(np.float32)), \
                torch.LongTensor([int(meta[obj[idx]]["cls_id"])-1]), \
                torch.from_numpy(quat.astype(np.float32)), \
                torch.from_numpy(target_t.astype(np.float32))
        except:
            return self.__getitem__(index // 2)
コード例 #17
0
                                                 init_cloud,
                                                 max_iterations=20000,
                                                 tolerance=0.000001)
        t_itr.append(iterations)
        # pcd_src = o3d.geometry.PointCloud()
        # pcd_target = o3d.geometry.PointCloud()
        # pcd_src.points = o3d.utility.Vector3dVector(init_cloud)
        # pcd_target.points = o3d.utility.Vector3dVector(original_cloud)
        # t_itr.append(0)
        # reg_p2p = o3d.pipelines.registration.registration_icp(pcd_target, pcd_src, 0.2, np.eye(4),o3d.pipelines.registration.TransformationEstimationPointToPoint(), o3d.pipelines.registration.ICPConvergenceCriteria(max_iteration = 20000, relative_rmse = 1.0e-10, relative_fitness=1.000000e-10))
        # delta_T = reg_p2p.transformation

        my_mat_final = np.dot(my_mat, delta_T)
        my_r_final = copy.deepcopy(my_mat_final)
        my_r_final[0:3, 3] = 0
        my_r_final = quaternion_from_matrix(my_r_final, True)
        my_t_final = np.array(
            [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

        my_pred = np.append(my_r_final, my_t_final)

        my_r = my_r_final
        my_t = my_t_final
    else:
        for ite in range(0, iteration):
            T = Variable(torch.from_numpy(
                my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                    num_points, 1).contiguous().view(1, num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(
                np.float32))).cuda().view(1, 3, 3)
コード例 #18
0
def pose_predict(img, depth, rois):
    label_pub = rospy.Publisher('/label', Image, queue_size=10)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    class_list = [
        '002_master_chef_can', '003_cracker_box', '004_sugar_box',
        '005_tomato_soup_can', '006_mustard_bottle', '007_tuna_fish_can',
        '008_pudding_box', '009_gelatin_box', '010_potted_meat_can',
        '011_banana', '019_pitcher_base', '025_mug', '021_bleach_cleanser',
        '024_bowl', '035_power_drill', '036_wood_block', '037_scissors',
        '040_large_marker', '051_large_clamp', '052_extra_large_clamp',
        '061_foam_brick'
    ]
    try:
        object_number = len(rois)

        #lst = posecnn_rois[:,0:1].flatten()
        #lst = np.unique(label)
        my_result_wo_refine = []
        my_result = []
        for idx in range(object_number):
            #itemid = lst[idx]
            itemid = class_list.index(rois[idx].Class) + 1
            #itemid = class_list.index(rois[idx].Class) +3
            print(object_number, itemid, rois[idx])

            try:
                label, pub_label = seg_predict(img)
                pub_label = pub_label * 50
                label_pub.publish(bridge.cv2_to_imgmsg(pub_label, '8UC1'))
                ####################### with Detection algorithm #################################
                # rmin, rmax, cmin,cmax = get_bbox(rois,idx)
                #####################################################################################
                mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
                mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
                mask = mask_label * mask_depth
                rmin, rmax, cmin, cmax = get_bbox(mask_label)

                choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
                if len(choose) > num_points:
                    c_mask = np.zeros(len(choose), dtype=int)
                    c_mask[:num_points] = 1
                    np.random.shuffle(c_mask)
                    choose = choose[c_mask.nonzero()]
                else:
                    choose = np.pad(choose, (0, num_points - len(choose)),
                                    'wrap')

                depth_masked = depth[
                    rmin:rmax,
                    cmin:cmax].flatten()[choose][:,
                                                 np.newaxis].astype(np.float32)
                xmap_masked = xmap[
                    rmin:rmax,
                    cmin:cmax].flatten()[choose][:,
                                                 np.newaxis].astype(np.float32)
                ymap_masked = ymap[
                    rmin:rmax,
                    cmin:cmax].flatten()[choose][:,
                                                 np.newaxis].astype(np.float32)
                choose = np.array([choose])

                pt2 = depth_masked / cam_scale
                pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
                pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
                cloud = np.concatenate((pt0, pt1, pt2), axis=1)

                img_masked = np.array(img)[:, :, :3]
                img_masked = np.transpose(img_masked, (2, 0, 1))
                img_masked = img_masked[:, rmin:rmax, cmin:cmax]

                cloud = torch.from_numpy(cloud.astype(np.float32))
                choose = torch.LongTensor(choose.astype(np.int32))
                img_masked = norm(
                    torch.from_numpy(img_masked.astype(np.float32)))
                index = torch.LongTensor([itemid - 1])

                cloud = Variable(cloud).cuda()
                choose = Variable(choose).cuda()
                img_masked = Variable(img_masked).cuda()
                index = Variable(index).cuda()
                cloud = cloud.view(1, num_points, 3)
                img_masked = img_masked.view(1, 3,
                                             img_masked.size()[1],
                                             img_masked.size()[2])
                pred_r, pred_t, pred_c, emb = estimator(
                    img_masked, cloud, choose, index)
                pred_r = pred_r / torch.norm(pred_r, dim=2).view(
                    1, num_points, 1)
                pred_c = pred_c.view(bs, num_points)
                how_max, which_max = torch.max(pred_c, 1)
                pred_t = pred_t.view(bs * num_points, 1, 3)
                points = cloud.view(bs * num_points, 1, 3)
                my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
                my_t = (points +
                        pred_t)[which_max[0]].view(-1).cpu().data.numpy()
                my_pred = np.append(my_r, my_t)
                # making pose matrix
                rot_to_angle = rotationMatrixToEulerAngles(dof[:3, :3])
                rot_to_angle = rot_to_angle.reshape(1, 3)
                my_t = my_t.reshape(1, 3)
                rot_t = np.concatenate([rot_to_angle, my_t], axis=0)

                # cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy],
                #   [0, 0, 1]]))
                #tl = np.array([100,100,100])
                #cam_mat = cv2.UMat(np.matrix([[960.14238289, 0, 252.43270692], [0, 960.14238289, 317.39366696],
                #             [0, 0, 1]]))

                for ite in range(0, iteration):
                    T = Variable(torch.from_numpy(
                        my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                            num_points, 1).contiguous().view(1, num_points, 3)
                    my_mat = quaternion_matrix(my_r)
                    R = Variable(
                        torch.from_numpy(my_mat[:3, :3].astype(
                            np.float32))).cuda().view(1, 3, 3)
                    my_mat[0:3, 3] = my_t

                    new_cloud = torch.bmm((cloud - T), R).contiguous()
                    pred_r, pred_t = refiner(new_cloud, emb, index)
                    pred_r = pred_r.view(1, 1, -1)
                    pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
                    my_r_2 = pred_r.view(-1).cpu().data.numpy()
                    my_t_2 = pred_t.view(-1).cpu().data.numpy()
                    my_mat_2 = quaternion_matrix(my_r_2)

                    my_mat_2[0:3, 3] = my_t_2
                    my_mat_final = np.dot(my_mat, my_mat_2)
                    my_r_final = copy.deepcopy(my_mat_final)
                    my_r_final[0:3, 3] = 0
                    my_r_final = quaternion_from_matrix(my_r_final, True)

                    my_t_final = np.array([
                        my_mat_final[0][3], my_mat_final[1][3],
                        my_mat_final[2][3]
                    ])

                    my_pred = np.append(my_r_final, my_t_final)
                    my_r = my_r_final
                    my_t = my_t_final
                open_cv_image = img.copy()
                open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)
                dof = quaternion_matrix(my_r)
                dof[0:3, 3] = my_t

                object_poses = {
                    'tx': my_t[0][0],
                    'ty': my_t[0][1],
                    'tz': my_t[0][2],
                    'qx': my_r[0],
                    'qy': my_r[1],
                    'qz': my_r[2],
                    'qw': my_r[3]
                }
                my_result.append(object_poses)
                open_cv_image = img.copy()
                open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)
                imgpts, jac = cv2.projectPoints(cld[itemid], dof[0:3, 0:3],
                                                dof[0:3, 3], cam_mat,
                                                dist)  # 13 = mug
                open_cv_image = draw(open_cv_image, imgpts, itemid)

            except ZeroDivisionError:
                open_cv_image = None
                print('Fail')
    except CvBridgeError as e:
        print(e)
    return my_result, open_cv_image
コード例 #19
0
def tcplink(sock,addr):
    print("Accept a new connection from %s:%s..."%addr)
    sock.send(b'Welcome!')
    # flag = 1
    # cv2.namedWindow('color label')
    while True:
        length = recvall(sock,16)
        if not length:
            break
        stringData = recvall(sock,int(length))
        if not stringData:
            break
        data = np.fromstring(stringData,dtype = 'uint8')

        color_image = cv2.imdecode(data,cv2.IMREAD_COLOR)
        color_image = np.asanyarray(color_image)

############ depth image 
        length2 = recvall(sock,16)
        if not length2:
            break
        stringData2 = recvall(sock,int(length2))
        if not stringData:
            break
        data2 = np.fromstring(stringData2,dtype = 'uint8')
        depth_image = cv2.imdecode(data2,-1) 
        depth_image = np.asanyarray(depth_image)

        rgb2 =copy.deepcopy(color_image)
        rgb3 = Image.fromarray(rgb2.astype('uint8')).convert('RGB')
        rgb3 = ImageEnhance.Brightness(rgb3).enhance(1.4)
        # rgb3 = ImageEnhance.Contrast(rgb3).enhance(1.4)
        # rgb3 = rgb3.filter(ImageFilter.GaussianBlur(radius=2))
        # rgb3 = copy.deepcopy(rgb2)

        rgb = np.array(rgb2).astype(np.float32)

        rgb = torch.from_numpy(rgb).cuda().permute(2, 0, 1).contiguous()
        rgb = rgb_norm(rgb).view(1,3,480,640)
        semantic = model(rgb)
        semantic = semantic.view(4,480,640).permute(1,2,0).contiguous()
        max_values , labels = torch.max( semantic , 2 )
        labels = labels.cpu().detach().numpy().astype(np.uint8)

        encode_labels = cv2.imencode('.jpg',labels)[1]
        # cv2.waitKey()
        label_encode = np.array(encode_labels)
        str_label = label_encode.tostring()

        label_length = str.encode(str(len(str_label)).ljust(16))
        sock.send(label_length)
        sock.send(str_label)
        ######## pose prediction
        obj_ids = np.unique(labels)[1:]
        print(obj_ids)

        posenetlist = [1,2,3]
        zero_mat = np.zeros((4,4))
        pose_result = []

        for obj in posenetlist:
            arr = copy.deepcopy(labels)
            arr = np.where(arr != obj,   0, arr)
            arr = np.where(arr == obj, 255, arr)
            
            contours,hierachy = cv2.findContours(arr,cv2.RETR_TREE,cv2.CHAIN_APPROX_NONE)
            contour = 0
            x,y,w,h = 0,0,0,0

            if len(contours)==0:
                continue
                
            continue_flag = 0

            for i in range(len(contours)):
                area =cv2.contourArea(contours[i])
                if area > 2500:
                    contour =contours[i]
                    x,y,w,h =cv2.boundingRect(contour)
                    continue_flag = 0
                    break
                else:
                    continue_flag = 1

            if (continue_flag==1):
                pose_result.append(zero_mat)
                continue
            idx = posenetlist.index(obj)

            bbx = []

            bbx.append(y)
            bbx.append(y+h)
            bbx.append(x)
            bbx.append(x+w)

            rmin, rmax, cmin, cmax = get_bbox(bbx)

            # img  = copy.deepcopy(color_image)

            img_masked = np.transpose(np.array(rgb3)[:, :, :3], (2, 0, 1))[:, rmin:rmax, cmin:cmax]

            img_masked_shape = img_masked.shape

            mask_label = ma.getmaskarray(ma.masked_equal(labels, np.array(obj)))

            choose = mask_label[rmin:rmax, cmin:cmax].flatten().nonzero()[0]

            if len(choose) > num_points:
                c_mask = np.zeros(len(choose), dtype=int)
                c_mask[:num_points] = 1
                np.random.shuffle(c_mask)
                choose = choose[c_mask.nonzero()]
            else:
                choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')

            choose_shape = choose.shape

            xmap = np.array([[j for i in range(640)] for j in range(480)])
            ymap = np.array([[i for i in range(640)] for j in range(480)])

            depth = copy.deepcopy(depth_image)
                    
            depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
            xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
            ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
            choose = np.array([choose])

            cam_scale = 1.0
            pt2 = depth_masked / cam_scale
            pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
            pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
            cloud = np.concatenate((pt0, pt1, pt2), axis=1)
            cloud /= 1000
            cloud_shape = cloud.shape
            # points = cloud.reshape((1,cloud_shape[0,cloud_shape[1]]))
            # print(cloud_shape)
            
            # points = cloud.view(1,cloud_shape[0],cloud_shape[1])
            # choose = choose.view(1,choose_shape[0],choose[1])
            # img_masked = img_masked.reshape((1,img_masked_shape[0],img_masked_shape[1],img_masked_shape[2]))

            if cloud.shape[0] < 2:
                print('Lost detection!')
            # fw.write('No.{0} NOT Pass! Lost detection!\n'.format(i))
            # continue
            points = torch.from_numpy(cloud.astype(np.float32)).cuda()
            choose = torch.LongTensor(choose.astype(np.int32)).cuda()
            img = rgb_norm(torch.from_numpy(img_masked.astype(np.float32))).cuda()
            idx = torch.LongTensor([idx]).cuda()

            points = points.view(1,cloud_shape[0],cloud_shape[1]).contiguous()
            img = img.view(1,img_masked_shape[0],img_masked_shape[1],img_masked_shape[2]).contiguous()

            pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
            pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
            pred_c = pred_c.view(bs, num_points)
            how_max, which_max = torch.max(pred_c, 1)
            pred_t = pred_t.view(bs * num_points, 1, 3)

            my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
            my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
            my_pred = np.append(my_r, my_t)

            for ite in range(0, iteration):
                T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
                my_mat = quaternion_matrix(my_r)
                R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
                my_mat[0:3, 3] = my_t

                new_points = torch.bmm((points - T), R).contiguous()
                pred_r, pred_t = refiner(new_points, emb, idx)
                pred_r = pred_r.view(1, 1, -1)
                pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
                my_r_2 = pred_r.view(-1).cpu().data.numpy()
                my_t_2 = pred_t.view(-1).cpu().data.numpy()
                my_mat_2 = quaternion_matrix(my_r_2)
                my_mat_2[0:3, 3] = my_t_2

                my_mat_final = np.dot(my_mat, my_mat_2)
                my_r_final = copy.deepcopy(my_mat_final)
                my_r_final[0:3, 3] = 0
                my_r_final = quaternion_from_matrix(my_r_final, True)
                my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

                my_pred = np.append(my_r_final, my_t_final)
                my_r = my_r_final
                my_t = my_t_final

            my_mat_final[:3,:3] = quaternion_matrix(my_r)[:3, :3] 
            my_mat_final[:3,3] = my_t
            # pose_mat[obj-1,3,:] = np.array([0,0,0,1])
            # pose_mat[:,:,obj-1]=my_mat_final
            pose_result.append(my_mat_final)
            if (obj == posenetlist[-1]):
                break

        pose_result = np.array(pose_result)
        print(pose_result)
        my_mat_str = pose_result.tostring()
        length = str.encode(str(len(my_mat_str)).ljust(16))

        sock.send(length)
        sock.send(my_mat_str)
        print()
        print(my_mat_final)
        print()
    sock.close()
    print('connection from %s:%s  is closed'% addr)
コード例 #20
0
def merge_pc(cur_cloud, last_pose, init_pose):
    pred_pose = torch.mm(init_pose.cpu(), last_pose)
    pred_r = torch.as_tensor(quaternion_from_matrix(pred_pose[0:3, 0:3]).T,
                             dtype=torch.float32).view(1, 4, 1).cuda()
    pred_t = pred_pose[0:3, 3].view(1, 3, 1).cuda()
    return cur_cloud[0].reshape(1, -1, 35), pred_r, pred_t
コード例 #21
0
def main():
    # g13: parameter setting -------------------
    '''
    posemodel is trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth
    refine model is trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth

    '''
    objlist = [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15]
    knn = KNearestNeighbor(1)
    opt.dataset ='linemod'
    opt.dataset_root = './datasets/linemod/Linemod_preprocessed'
    estimator_path = 'trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth'
    refiner_path = 'trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth'
    opt.model = estimator_path
    opt.refine_model = refiner_path
    dataset_config_dir = 'datasets/linemod/dataset_config'
    output_result_dir = 'experiments/eval_result/linemod'
    opt.refine_start = True
    bs = 1 #fixed because of the default setting in torch.utils.data.DataLoader
    opt.iteration = 2 #default is 4 in eval_linemod.py
    t1_start = True
    t1_idx = 0
    t1_total_eval_num = 3
    t2_start = False
    t2_target_list = [22, 30, 172, 187, 267, 363, 410, 471, 472, 605, 644, 712, 1046, 1116, 1129, 1135, 1263]
    #t2_target_list = [0, 1]
    axis_range = 0.1   # the length of X, Y, and Z axis in 3D
    vimg_dir = 'verify_img'
    diameter = []
    meta_file = open('{0}/models_info.yml'.format(dataset_config_dir), 'r')
    meta_d = yaml.load(meta_file)
    for obj in objlist:
        diameter.append(meta_d[obj]['diameter'] / 1000.0 * 0.1)
    print(diameter)
    if not os.path.exists(vimg_dir):
        os.makedirs(vimg_dir)
    #-------------------------------------------
    
    if opt.dataset == 'ycb':
        opt.num_objects = 21 #number of object classes in the dataset
        opt.num_points = 1000 #number of points on the input pointcloud
        opt.outf = 'trained_models/ycb' #folder to save trained models
        opt.log_dir = 'experiments/logs/ycb' #folder to save logs
        opt.repeat_epoch = 1 #number of repeat times for one epoch training
    elif opt.dataset == 'linemod':
        opt.num_objects = 13
        opt.num_points = 500
        opt.outf = 'trained_models/linemod'
        opt.log_dir = 'experiments/logs/linemod'
        opt.repeat_epoch = 20
    else:
        print('Unknown dataset')
        return
    
    estimator = PoseNet(num_points = opt.num_points, num_obj = opt.num_objects)
    estimator.cuda()
    refiner = PoseRefineNet(num_points = opt.num_points, num_obj = opt.num_objects)
    refiner.cuda()
  
    estimator.load_state_dict(torch.load(estimator_path))    
    refiner.load_state_dict(torch.load(refiner_path))
    opt.refine_start = True
    
    test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
    testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers)
    
    opt.sym_list = test_dataset.get_sym_list()
    opt.num_points_mesh = test_dataset.get_num_points_mesh()

    print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\n\
        length of the testing set: {0}\nnumber of sample points on mesh: {1}\n\
        symmetry object list: {2}'\
        .format( len(test_dataset), opt.num_points_mesh, opt.sym_list))
   
    
    #load pytorch model
    estimator.eval()    
    refiner.eval()
    criterion = Loss(opt.num_points_mesh, opt.sym_list)
    criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list)
    fw = open('{0}/t1_eval_result_logs.txt'.format(output_result_dir), 'w')

    #Pose estimation
    for j, data in enumerate(testdataloader, 0):
        # g13: modify this part for evaluation target--------------------
        if t1_start and j == t1_total_eval_num:
            break
        if t2_start and not (j in t2_target_list):
            continue
        #----------------------------------------------------------------
        points, choose, img, target, model_points, idx = data
        if len(points.size()) == 2:
            print('No.{0} NOT Pass! Lost detection!'.format(j))
            fw.write('No.{0} NOT Pass! Lost detection!\n'.format(j))
            continue
        points, choose, img, target, model_points, idx = Variable(points).cuda(), \
                                                             Variable(choose).cuda(), \
                                                             Variable(img).cuda(), \
                                                             Variable(target).cuda(), \
                                                             Variable(model_points).cuda(), \
                                                             Variable(idx).cuda()
        pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
        _, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start)

        #if opt.refine_start: #iterative poserefinement
        #    for ite in range(0, opt.iteration):
        #        pred_r, pred_t = refiner(new_points, emb, idx)
        #        dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points)
        
        pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, opt.num_points, 1)
        pred_c = pred_c.view(bs, opt.num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(bs * opt.num_points, 1, 3)
    
        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points.view(bs * opt.num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)
    
        for ite in range(0, opt.iteration):
            T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(opt.num_points, 1).contiguous().view(1, opt.num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t
            
            new_points = torch.bmm((points - T), R).contiguous()
            pred_r, pred_t = refiner(new_points, emb, idx)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)
            my_mat_2[0:3, 3] = my_t_2
    
            my_mat_final = np.dot(my_mat, my_mat_2)
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
            my_r_final = quaternion_from_matrix(my_r_final, True)
            my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])
    
            my_pred = np.append(my_r_final, my_t_final)
            my_r = my_r_final
            my_t = my_t_final
            # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation)
        
        #g13: checking the dis value
        success_count = [0 for i in range(opt.num_objects)]
        num_count = [0 for i in range(opt.num_objects)]
        model_points = model_points[0].cpu().detach().numpy()
        my_r = quaternion_matrix(my_r)[:3, :3]
        pred = np.dot(model_points, my_r.T) + my_t
        target = target[0].cpu().detach().numpy()
    
        if idx[0].item() in opt.sym_list:
            pred = torch.from_numpy(pred.astype(np.float32)).cuda().transpose(1, 0).contiguous()
            target = torch.from_numpy(target.astype(np.float32)).cuda().transpose(1, 0).contiguous()
            inds = knn(target.unsqueeze(0), pred.unsqueeze(0))
            target = torch.index_select(target, 1, inds.view(-1) - 1)
            dis = torch.mean(torch.norm((pred.transpose(1, 0) - target.transpose(1, 0)), dim=1), dim=0).item()
        else:
            dis = np.mean(np.linalg.norm(pred - target, axis=1))
    
        if dis < diameter[idx[0].item()]:
            success_count[idx[0].item()] += 1
            print('No.{0} Pass! Distance: {1}'.format(j, dis))
            fw.write('No.{0} Pass! Distance: {1}\n'.format(j, dis))
        else:
            print('No.{0} NOT Pass! Distance: {1}'.format(j, dis))
            fw.write('No.{0} NOT Pass! Distance: {1}\n'.format(j, dis))
        num_count[idx[0].item()] += 1
        
        # g13: start drawing pose on image------------------------------------
        # pick up image
        print('{0}:\nmy_r is {1}\nmy_t is {2}\ndis:{3}'.format(j, my_r, my_t, dis.item()))    
        print("index {0}: {1}".format(j, test_dataset.list_rgb[j]))
        img = Image.open(test_dataset.list_rgb[j])
        
        # pick up center position by bbox
        meta_file = open('{0}/data/{1}/gt.yml'.format(opt.dataset_root, '%02d' % test_dataset.list_obj[j]), 'r')
        meta = {}
        meta = yaml.load(meta_file)
        which_item = test_dataset.list_rank[j]
        which_obj = test_dataset.list_obj[j]
        which_dict = 0
        dict_leng = len(meta[which_item])
        #print('get meta[{0}][{1}][obj_bb]'.format(which_item, which_obj))
        k_idx = 0
        while 1:
            if meta[which_item][k_idx]['obj_id'] == which_obj:
                which_dict = k_idx
                break
            k_idx = k_idx+1
        
        bbx = meta[which_item][which_dict]['obj_bb']
        draw = ImageDraw.Draw(img) 
        
        # draw box (ensure this is the right object)
        draw.line((bbx[0],bbx[1], bbx[0], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        draw.line((bbx[0],bbx[1], bbx[0]+bbx[2], bbx[1]), fill=(255,0,0), width=5)
        draw.line((bbx[0],bbx[1]+bbx[3], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        draw.line((bbx[0]+bbx[2],bbx[1], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        
        #get center
        c_x = bbx[0]+int(bbx[2]/2)
        c_y = bbx[1]+int(bbx[3]/2)
        draw.point((c_x,c_y), fill=(255,255,0))
        print('center:({0},{1})'.format(c_x, c_y))
        
        #get the 3D position of center
        cam_intrinsic = np.zeros((3,3))
        cam_intrinsic.itemset(0, test_dataset.cam_fx)
        cam_intrinsic.itemset(4, test_dataset.cam_fy)
        cam_intrinsic.itemset(2, test_dataset.cam_cx)
        cam_intrinsic.itemset(5, test_dataset.cam_cy)
        cam_intrinsic.itemset(8, 1)
        cam_extrinsic = my_mat_final[0:3, :]
        cam2d_3d = np.matmul(cam_intrinsic, cam_extrinsic)
        cen_3d = np.matmul(np.linalg.pinv(cam2d_3d), [[c_x],[c_y],[1]])
        # replace img.show() with plt.imshow(img)
        
        #transpose three 3D axis point into 2D
        x_3d = cen_3d + [[axis_range],[0],[0],[0]]
        y_3d = cen_3d + [[0],[axis_range],[0],[0]]
        z_3d = cen_3d + [[0],[0],[axis_range],[0]]
        x_2d = np.matmul(cam2d_3d, x_3d)
        y_2d = np.matmul(cam2d_3d, y_3d)
        z_2d = np.matmul(cam2d_3d, z_3d)
        
        #draw the axis on 2D
        draw.line((c_x, c_y, x_2d[0], x_2d[1]), fill=(255,255,0), width=5)
        draw.line((c_x, c_y, y_2d[0], y_2d[1]), fill=(0,255,0), width=5)
        draw.line((c_x, c_y, z_2d[0], z_2d[1]), fill=(0,0,255), width=5)
        
        #g13: draw the estimate pred obj
        for pti in pred:
            pti.transpose()
            pti_2d = np.matmul(cam_intrinsic, pti)
            #print('({0},{1})\n'.format(int(pti_2d[0]),int(pti_2d[1])))
            draw.point([int(pti_2d[0]),int(pti_2d[1])], fill=(255,255,0))
            
        
        #g13: show image
        #img.show()
        
        #save file under file 
        img_file_name = '{0}/batch{1}_pred_obj{2}_pic{3}.png'.format(vimg_dir, j, test_dataset.list_obj[j], which_item)
        img.save( img_file_name, "PNG" )
        img.close()
        
        # plot ground true ----------------------------
        img = Image.open(test_dataset.list_rgb[j])
        draw = ImageDraw.Draw(img) 
        draw.line((bbx[0],bbx[1], bbx[0], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        draw.line((bbx[0],bbx[1], bbx[0]+bbx[2], bbx[1]), fill=(255,0,0), width=5)
        draw.line((bbx[0],bbx[1]+bbx[3], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5)
        draw.line((bbx[0]+bbx[2],bbx[1], bbx[0]+bbx[2], bbx[1]+bbx[3]), fill=(255,0,0), width=5)        
        target_r = np.resize(np.array(meta[which_item][k_idx]['cam_R_m2c']), (3, 3))                
        target_t = np.array(meta[which_item][k_idx]['cam_t_m2c'])
        target_t = target_t[np.newaxis, :]               
        cam_extrinsic_GT = np.concatenate((target_r, target_t.T), axis=1)
        
        
        #get center 3D
        cam2d_3d_GT = np.matmul(cam_intrinsic, cam_extrinsic_GT)
        cen_3d_GT = np.matmul(np.linalg.pinv(cam2d_3d_GT), [[c_x],[c_y],[1]])
        
        #transpose three 3D axis point into 2D
        x_3d = cen_3d_GT + [[axis_range],[0],[0],[0]]
        y_3d = cen_3d_GT + [[0],[axis_range],[0],[0]]
        z_3d = cen_3d_GT + [[0],[0],[axis_range],[0]]
        
        x_2d = np.matmul(cam2d_3d_GT, x_3d)
        y_2d = np.matmul(cam2d_3d_GT, y_3d)
        z_2d = np.matmul(cam2d_3d_GT, z_3d)

        #draw the axis on 2D
        draw.line((c_x, c_y, x_2d[0], x_2d[1]), fill=(255,255,0), width=5)
        draw.line((c_x, c_y, y_2d[0], y_2d[1]), fill=(0,255,0), width=5)
        draw.line((c_x, c_y, z_2d[0], z_2d[1]), fill=(0,0,255), width=5)
      
       
        print('pred:\n{0}\nGT:\n{1}\n'.format(cam_extrinsic,cam_extrinsic_GT))
        print('pred 3D:{0}\nGT 3D:{1}\n'.format(cen_3d, cen_3d_GT))
        img_file_name = '{0}/batch{1}_pred_obj{2}_pic{3}_gt.png'.format(vimg_dir, j, test_dataset.list_obj[j], which_item)
        img.save( img_file_name, "PNG" )
        img.close()
        meta_file.close()
    print('\nplot_result_img.py completed the task\n')
コード例 #22
0
    def callback(self, rgb, depth):
        if DEBUG:
            print('received depth image of type: ' + depth.encoding)
            print('received rgb image of type: ' + rgb.encoding)
        #https://answers.ros.org/question/64318/how-do-i-convert-an-ros-image-into-a-numpy-array/
        depth = np.frombuffer(depth.data,
                              dtype=np.uint16).reshape(depth.height,
                                                       depth.width, -1)
        rgb = np.frombuffer(rgb.data,
                            dtype=np.uint8).reshape(rgb.height, rgb.width, -1)
        rgb_original = rgb
        #cv2.imshow('depth', depth)

        #time1 = time.time()
        rgb = np.transpose(rgb, (2, 0, 1))
        rgb = norm(torch.from_numpy(rgb.astype(np.float32)))
        rgb = Variable(rgb).cuda()
        semantic = self.model(rgb.unsqueeze(0))
        _, pred = torch.max(semantic, dim=1)
        pred = pred * 255
        pred = np.transpose(pred, (1, 2, 0))  # (CxHxW)->(HxWxC)
        #print(pred.shape)

        #ret, threshold = cv2.threshold(pred.cpu().numpy(), 1, 255, cv2.THRESH_BINARY)    #pred is already binary, therefore, this line is unnecessary
        contours, hierarchy = cv2.findContours(np.uint8(pred),
                                               cv2.RETR_EXTERNAL,
                                               cv2.CHAIN_APPROX_SIMPLE)
        cnt = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(cnt)
        rmin, rmax, cmin, cmax = get_bbox([x, y, w, h])
        #cv2.rectangle(rgb_original,(cmin,rmin), (cmax,rmax) , (0,255,0),2)
        #cv2.imwrite('depth.png', depth)          #save depth image

        mask_depth = ma.getmasksarray(ma.masked_not_equal(depth, 0))
        mask_label = ma.getmaskarray(ma.masked_equal(pred, np.array(255)))
        mask = mask_depth * mask_label

        #print(rgb.shape)             #torch.Size([3, 480, 640])
        #print(rgb_original.shape)    #(480, 640, 3)
        img = np.transpose(rgb_original, (2, 0, 1))
        img_masked = img[:, rmin:rmax, cmin:cmax]

        choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]

        #print("length of choose is :{0}".format(len(choose)))
        if len(choose) == 0:
            cc = torch.LongTensor([0])
            return (cc, cc, cc, cc, cc, cc)

        if len(choose) > num_points:
            c_mask = np.zeros(len(choose), dtype=int)
            c_mask[:
                   num_points] = 1  # if number of object pixels are bigger than 500, we select just 500
            np.random.shuffle(c_mask)
            choose = choose[c_mask.nonzero()]  # now len(choose) = 500
        else:
            choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')

        depth_masked = depth[rmin:rmax,
                             cmin:cmax].flatten()[choose][:,
                                                          np.newaxis].astype(
                                                              np.float32)
        xmap_masked = self.xmap[
            rmin:rmax,
            cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
        ymap_masked = self.ymap[
            rmin:rmax,
            cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)

        choose = np.array([choose])

        pt2 = depth_masked
        #print(pt2)
        pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx
        pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy
        cloud = np.concatenate((pt0, pt1, pt2), axis=1)
        cloud = cloud / 1000

        points = torch.from_numpy(cloud.astype(np.float32))
        choose = torch.LongTensor(choose.astype(np.int32))
        img = norm(torch.from_numpy(img_masked.astype(np.float32)))
        idx = torch.LongTensor([self.object_index])

        img = Variable(img).cuda().unsqueeze(0)
        points = Variable(points).cuda().unsqueeze(0)
        choose = Variable(choose).cuda().unsqueeze(0)
        idx = Variable(idx).cuda().unsqueeze(0)

        pred_r, pred_t, pred_c, emb = self.estimator(img, points, choose, idx)
        pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
        pred_c = pred_c.view(bs, num_points)
        how_max, which_max = torch.max(pred_c, 1)
        pred_t = pred_t.view(bs * num_points, 1, 3)

        my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
        my_t = (points.view(bs * num_points, 1, 3) +
                pred_t)[which_max[0]].view(-1).cpu().data.numpy()
        my_pred = np.append(my_r, my_t)

        for ite in range(0, iteration):
            T = Variable(torch.from_numpy(
                my_t.astype(np.float32))).cuda().view(1, 3).repeat(
                    num_points, 1).contiguous().view(1, num_points, 3)
            my_mat = quaternion_matrix(my_r)
            R = Variable(torch.from_numpy(my_mat[:3, :3].astype(
                np.float32))).cuda().view(1, 3, 3)
            my_mat[0:3, 3] = my_t

            new_points = torch.bmm((points - T), R).contiguous()
            pred_r, pred_t = self.refiner(new_points, emb, idx)
            pred_r = pred_r.view(1, 1, -1)
            pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
            my_r_2 = pred_r.view(-1).cpu().data.numpy()
            my_t_2 = pred_t.view(-1).cpu().data.numpy()
            my_mat_2 = quaternion_matrix(my_r_2)
            my_mat_2[0:3, 3] = my_t_2

            my_mat_final = np.dot(
                my_mat,
                my_mat_2)  # refine pose means two matrix multiplication
            my_r_final = copy.deepcopy(my_mat_final)
            my_r_final[0:3, 3] = 0
            my_r_final = quaternion_from_matrix(my_r_final, True)
            my_t_final = np.array(
                [my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])

            my_pred = np.append(my_r_final, my_t_final)
            my_r = my_r_final
            my_t = my_t_final

        my_r = quaternion_matrix(my_r)[:3, :3]
        #print(my_t.shape)
        my_t = np.array(my_t)
        #print(my_t.shape)
        #print(my_r.shape)

        target = np.dot(self.scaled, my_r.T)
        target = np.add(target, my_t)

        p0 = (int((target[0][0] / target[0][2]) * self.cam_fx + self.cam_cx),
              int((target[0][1] / target[0][2]) * self.cam_fy + self.cam_cy))
        p1 = (int((target[1][0] / target[1][2]) * self.cam_fx + self.cam_cx),
              int((target[1][1] / target[1][2]) * self.cam_fy + self.cam_cy))
        p2 = (int((target[2][0] / target[2][2]) * self.cam_fx + self.cam_cx),
              int((target[2][1] / target[2][2]) * self.cam_fy + self.cam_cy))
        p3 = (int((target[3][0] / target[3][2]) * self.cam_fx + self.cam_cx),
              int((target[3][1] / target[3][2]) * self.cam_fy + self.cam_cy))
        p4 = (int((target[4][0] / target[4][2]) * self.cam_fx + self.cam_cx),
              int((target[4][1] / target[4][2]) * self.cam_fy + self.cam_cy))
        p5 = (int((target[5][0] / target[5][2]) * self.cam_fx + self.cam_cx),
              int((target[5][1] / target[5][2]) * self.cam_fy + self.cam_cy))
        p6 = (int((target[6][0] / target[6][2]) * self.cam_fx + self.cam_cx),
              int((target[6][1] / target[6][2]) * self.cam_fy + self.cam_cy))
        p7 = (int((target[7][0] / target[7][2]) * self.cam_fx + self.cam_cx),
              int((target[7][1] / target[7][2]) * self.cam_fy + self.cam_cy))

        cv2.line(rgb_original, p0, p1, (255, 255, 255), 2)
        cv2.line(rgb_original, p0, p3, (255, 255, 255), 2)
        cv2.line(rgb_original, p0, p4, (255, 255, 255), 2)
        cv2.line(rgb_original, p1, p2, (255, 255, 255), 2)
        cv2.line(rgb_original, p1, p5, (255, 255, 255), 2)
        cv2.line(rgb_original, p2, p3, (255, 255, 255), 2)
        cv2.line(rgb_original, p2, p6, (255, 255, 255), 2)
        cv2.line(rgb_original, p3, p7, (255, 255, 255), 2)
        cv2.line(rgb_original, p4, p5, (255, 255, 255), 2)
        cv2.line(rgb_original, p4, p7, (255, 255, 255), 2)
        cv2.line(rgb_original, p5, p6, (255, 255, 255), 2)
        cv2.line(rgb_original, p6, p7, (255, 255, 255), 2)

        #print('estimated rotation is :{0}'.format(my_r))
        #print('estimated translation is :{0}'.format(my_t))

        #time2 = time.time()
        #print('inference time is :{0}'.format(time2-time1))
        cv2.imshow('rgb',
                   cv2.cvtColor(rgb_original,
                                cv2.COLOR_BGR2RGB))  # OpenCV uses BGR model
        cv2.waitKey(
            1
        )  # pass any integr except 0, as 0 will freeze the display windows
コード例 #23
0
ファイル: aligning.py プロジェクト: hiyyg/articulated-pose
def estimateSimilarityUmeyamaCoords(SourceHom0, TargetHom0, SourceHom1, TargetHom1, joint_axis, joint_pts=None, rt_ref=[None, None], rt_pre=[None, None], \
                 viz=False, viz_ransac=False, viz_sample=False, use_jt_pts=False, use_ext_rot=False, verbose=False, index=0):
    """
    SourceHom0: [4, 5]
    joint_pts : [4, 5]
    joint_axis: [4, 1]
    """
    U, D0, Vh = svd_pts(SourceHom0, TargetHom0)  #
    R0 = np.matmul(U, Vh).T  # Transpose is the one that works
    U, D1, Vh = svd_pts(SourceHom1, TargetHom1)  #
    R1 = np.matmul(U, Vh).T  #
    # begin EM
    max_iter = 100
    # max_iter = 1 # todo
    StopThreshold = 2 * np.cos(0.5 / 180 * np.pi)
    if viz_sample:
        plot3d_pts([[
            SourceHom0[:3].transpose(), SourceHom1[:3].transpose(),
            TargetHom0[:3].transpose(), TargetHom1[:3].transpose(),
            joint_pts[:3].transpose()
        ]], [['source0', 'source1', 'target0', 'target1', 'joint_points']],
                   s=100,
                   title_name=['sampled points'],
                   color_channel=None,
                   save_fig=False,
                   sub_name='default')
    joint_axis_tiled0 = np.tile(joint_axis, (1, int(SourceHom0.shape[1] / 5)))
    joint_axis_tiled1 = np.tile(joint_axis, (1, int(SourceHom1.shape[1] / 5)))
    # joint_axis_tiled0 = np.tile(joint_axis, (1, int(SourceHom0.shape[1])))
    # joint_axis_tiled1 = np.tile(joint_axis, (1, int(SourceHom1.shape[1])))
    if use_ext_rot and rt_pre[0] is not None:
        # print('using external rotation')
        R0 = rt_pre[0][:3, :3].T
        R1 = rt_pre[1][:3, :3].T
    else:
        r_list = [[R0], [R1]]
        for i in range(max_iter):
            rotated_axis = np.matmul(R0.T, joint_axis_tiled1[:3])  # [3, 1]
            U, D1, Vh = svd_pts(SourceHom1,
                                TargetHom1,
                                joint_axis_tiled1,
                                rotated_axis,
                                viz_sample=viz_sample,
                                index=2 * i)
            R1_new = np.matmul(U, Vh).T
            rotated_axis = np.matmul(R1_new.T, joint_axis_tiled0[:3])
            U, D0, Vh = svd_pts(SourceHom0,
                                TargetHom0,
                                joint_axis_tiled0,
                                rotated_axis,
                                viz_sample=viz_sample,
                                index=2 * i + 1)
            R0_new = np.matmul(U, Vh).T
            eigen_sum0 = np.trace(np.matmul(R0_new.T, R0)) - 1
            eigen_sum1 = np.trace(np.matmul(R1_new.T, R1)) - 1
            R0 = R0_new
            R1 = R1_new
            r_list[0].append(R0)
            r_list[1].append(R1)
            if eigen_sum0 > StopThreshold and eigen_sum1 > StopThreshold:
                # if verbose:
                #     print('Algorithm converges at {}th iteration for Coordinate Descent'.format(i))
                break
    if viz_ransac and index < 10:  # and SourceHom0.shape[1]>5:
        ang_dis_list = [[], []]
        for j in range(2):
            q_gt = quaternion_from_matrix(rt_ref[j][:3, :3])
            for rot_iter in r_list[j]:
                q_iter = quaternion_from_matrix(rot_iter.T)
                ang_dis = 2 * np.arccos(sum(q_iter * q_gt)) * 180 / np.pi
                if ang_dis > 180:
                    ang_dis = 360 - ang_dis
                ang_dis_list[j].append(ang_dis)
        fig = plt.figure(dpi=200)
        for j in range(2):
            ax = plt.subplot(1, 2, j + 1)
            plt.plot(range(len(ang_dis_list[j])), ang_dis_list[j])
            plt.xlabel('iteration')
            plt.ylabel('rotation error')
            plt.title('{}th sampling part {}'.format(index, j))
        plt.show()
    Rs = [R0, R1]

    if use_jt_pts:
        if viz_sample:
            plot3d_pts([[
                SourceHom0[:3].transpose(), SourceHom1[:3].transpose(),
                TargetHom0[:3].transpose(), TargetHom1[:3].transpose(),
                joint_pts[:3].transpose()
            ]], [['source0', 'source1', 'target0', 'target1', 'joint_points']],
                       s=100,
                       title_name=['sampled points'],
                       color_channel=None,
                       save_fig=False,
                       sub_name='default')
        final_scale, Ts, OutTrans = compute_scale_translation(
            [SourceHom0, SourceHom1], [TargetHom0, TargetHom1], Rs, joint_pts)
        if verbose:
            print("scale by adding joints are \n: {}".format(final_scale))
    else:
        if viz_sample:
            plot3d_pts([[
                SourceHom0[:3].transpose(), SourceHom1[:3].transpose(),
                TargetHom0[:3].transpose(), TargetHom1[:3].transpose()
            ]], [['source0', 'source1', 'target0', 'target1']],
                       s=100,
                       title_name=['points after sampling'],
                       color_channel=None,
                       save_fig=False,
                       sub_name='default')
        final_scale0, T0, OutTrans0 = est_ST(SourceHom0, TargetHom0, D0, Rs[0])
        final_scale1, T1, OutTrans1 = est_ST(SourceHom1, TargetHom1, D1, Rs[1])
        final_scale = [final_scale0, final_scale1]
        Ts = [T0, T1]
        OutTrans = [OutTrans0, OutTrans1]
        if verbose:
            print("scale by direct solving per part are \n: {}".format(
                final_scale))

    return final_scale, Rs, Ts, OutTrans
コード例 #24
0
def main():
    cfg = setup_config()
    pipeline = rs.pipeline()
    realsense_cfg = setup_realsense()
    pipeline.start(realsense_cfg)  # Start streaming
    visualizer = predictor.VisualizationDemo(cfg)

    ref_frame_axies = []
    ref_frame_label = []
    min_distance = 0.9
    label_cnt = 0
    frameth = 0

    my_t_pool = {}
    my_r_pool = {}

    while True:
        frameth += 1
        cur_frame_axies = []
        cur_frame_label = []
        my_t_per_frame = []
        my_r_per_frame = []

        align = rs.align(rs.stream.color)
        frames = pipeline.wait_for_frames()
        aligned_frames = align.process(frames)

        rgb = aligned_frames.get_color_frame()
        rgb = np.asanyarray(rgb.get_data())
        frame = rgb.copy()

        # Do instance segmentation
        start = time.time()
        segmentation, vis = visualizer.run_on_image(frame)
        #print("Time = " + str(time.time()-start))

        cv2.imshow('Mask', vis)
        cv2.waitKey(1)

        # Get segmentation mask
        ori_label = segmentation['instances'].pred_masks.cpu().numpy()
        label = np.sum(ori_label, axis=0).astype(np.uint8)
        label = np.where(label != 0, 255, label)
        label = Image.fromarray(label).convert("L")
        label = np.asarray(label.convert('RGB')).astype(np.uint8)

        bboxes = segmentation['instances'].pred_boxes.tensor.cpu().numpy()
        xyxy_bboxes = bboxes
        bboxes = bbox_convert(bboxes)

        if len(bboxes) > 0:
            #depth_frames = frames.get_depth_frame()
            depth_frames = aligned_frames.get_depth_frame()

            video_profile = depth_frames.profile.as_video_stream_profile()
            intr = video_profile.get_intrinsics()
            depth = np.asanyarray(depth_frames.get_data())
            #centers = segmentation['instances'].pred_boxes.get_centers()
            if len(my_t_pool) > 0:
                last_key = list(my_t_pool.keys())[-1]

            for i in range(0, len(bboxes)):
                bbox_xyxy = np.array(list(xyxy_bboxes[i]))
                bbox = list(bboxes[i])
                print("Bounding Box:" + str(bbox))
                #center = bboxes[i].get_centers()
                #center = centers[i].cpu().numpy()
                num_idx = float('nan')
                max_value = 0

                label_of_object = ori_label[i].astype(np.uint8)
                label_of_object = np.where(label_of_object != 0, 255,
                                           label_of_object)
                label_of_object = Image.fromarray(label_of_object).convert("L")
                label_of_object = np.asarray(
                    label_of_object.convert('RGB')).astype(np.uint8)

                if len(ref_frame_label) > 0:
                    iou_list = []
                    b = bbox_xyxy
                    a = np.array(ref_frame_axies)
                    for k in range(len(ref_frame_axies)):
                        iou = iou_score(a[k], b)
                        iou_list.append(iou)
                    iou_list = np.array(iou_list)
                    max_value = iou_list.max()
                    if (max_value > min_distance):
                        min_idx = np.where(iou_list == max_value)[0][0]
                        num_idx = ref_frame_label[min_idx]

                if (math.isnan(num_idx)):
                    num_idx = label_cnt
                    label_cnt += 1
                cur_frame_label.append(num_idx)
                cur_frame_axies.append(bbox_xyxy)

                print(max_value)
                if (frameth == 1) or (max_value < 0.9) or (
                        i > len(my_t_pool[last_key]) - 1) or (frameth % 20
                                                              == 0):
                    pos_text = (bbox[0], bbox[1])

                    class_id = segmentation['instances'].pred_classes[i].cpu(
                    ).data.numpy()
                    print("Class: " + str(class_id))
                    #idx = class_id
                    if class_id == 0:
                        idx = 0
                    if class_id == 2:
                        idx = 1

                    model_points = model_points_list[idx]

                    mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
                    #mask_label = ma.getmaskarray(ma.masked_equal(label, np.array(255)))
                    mask_label = ma.getmaskarray(
                        ma.masked_equal(label_of_object,
                                        np.array([255, 255, 255])))[:, :, 0]
                    mask = mask_label * mask_depth

                    rmin, rmax, cmin, cmax = posenet_deploy.get_bbox(bbox)

                    # choose
                    choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
                    if len(choose) == 0:
                        choose = torch.LongTensor([0])
                    if len(choose) > num_points:
                        c_mask = np.zeros(len(choose), dtype=int)
                        c_mask[:num_points] = 1
                        np.random.shuffle(c_mask)
                        choose = choose[c_mask.nonzero()]
                    else:
                        choose = np.pad(choose, (0, num_points - len(choose)),
                                        'wrap')

                    depth_masked = depth[
                        rmin:rmax,
                        cmin:cmax].flatten()[choose][:, np.newaxis].astype(
                            np.float32)
                    xmap_masked = xmap[
                        rmin:rmax,
                        cmin:cmax].flatten()[choose][:, np.newaxis].astype(
                            np.float32)
                    ymap_masked = ymap[
                        rmin:rmax,
                        cmin:cmax].flatten()[choose][:, np.newaxis].astype(
                            np.float32)
                    choose = np.array([choose])

                    # point cloud
                    pt2 = depth_masked / cam_scale
                    pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
                    pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
                    cloud = np.concatenate((pt0, pt1, pt2), axis=1)
                    cloud = cloud / 1000.0
                    # print(cloud.shape)

                    # cropped img
                    #img_masked = rgb[:, :, :3]
                    img_masked = rgb[:, :, ::-1]  # bgr to rgb
                    img_masked = np.transpose(img_masked, (2, 0, 1))
                    img_masked = img_masked[:, rmin:rmax, cmin:cmax]

                    my_mask = np.transpose(label_of_object, (2, 0, 1))
                    my_mask = my_mask[:, rmin:rmax, cmin:
                                      cmax]  ## Added by me to crop the mask
                    mask_img = np.transpose(my_mask, (1, 2, 0))
                    img_rgb = np.transpose(img_masked, (1, 2, 0))
                    croped_img_mask = cv2.bitwise_and(img_rgb, mask_img)
                    crop_image_to_check = croped_img_mask.copy()
                    cv2.imshow("mask_crop", croped_img_mask)
                    croped_img_mask = np.transpose(croped_img_mask, (2, 0, 1))

                    # Variables
                    cloud = torch.from_numpy(cloud.astype(
                        np.float32)).unsqueeze(0)
                    choose = torch.LongTensor(choose.astype(
                        np.int32)).unsqueeze(0)
                    #img_masked = torch.from_numpy(img_masked.astype(np.float32)).unsqueeze(0)
                    img_masked = torch.from_numpy(
                        croped_img_mask.astype(np.float32)).unsqueeze(0)
                    index = torch.LongTensor([idx]).unsqueeze(
                        0)  # Specify which object

                    cloud = Variable(cloud).cuda()
                    choose = Variable(choose).cuda()
                    img_masked = Variable(img_masked).cuda()
                    index = Variable(index).cuda()

                    # Deploy
                    with torch.no_grad():
                        pred_r, pred_t, pred_c, emb = estimator(
                            img_masked, cloud, choose, index)

                    pred_r = pred_r / torch.norm(pred_r, dim=2).view(
                        1, num_points, 1)
                    pred_c = pred_c.view(bs, num_points)
                    how_max, which_max = torch.max(pred_c, 1)
                    pred_t = pred_t.view(bs * num_points, 1, 3)
                    points = cloud.view(bs * num_points, 1, 3)

                    my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
                    my_t = (points.view(bs * num_points, 1, 3) +
                            pred_t)[which_max[0]].view(-1).cpu().data.numpy()
                    my_pred = np.append(my_r, my_t)

                    # Refinement
                    for ite in range(0, iteration):
                        T = Variable(torch.from_numpy(my_t.astype(
                            np.float32))).cuda().view(1, 3).repeat(
                                num_points,
                                1).contiguous().view(1, num_points, 3)
                        my_mat = quaternion_matrix(my_r)
                        R = Variable(
                            torch.from_numpy(my_mat[:3, :3].astype(
                                np.float32))).cuda().view(1, 3, 3)
                        my_mat[0:3, 3] = my_t

                        new_cloud = torch.bmm((cloud - T), R).contiguous()
                        pred_r, pred_t = refiner(new_cloud, emb, index)
                        pred_r = pred_r.view(1, 1, -1)
                        pred_r = pred_r / (torch.norm(pred_r, dim=2).view(
                            1, 1, 1))
                        my_r_2 = pred_r.view(-1).cpu().data.numpy()
                        my_t_2 = pred_t.view(-1).cpu().data.numpy()
                        my_mat_2 = quaternion_matrix(my_r_2)

                        my_mat_2[0:3, 3] = my_t_2
                        my_mat_final = np.dot(my_mat, my_mat_2)
                        my_r_final = copy.deepcopy(my_mat_final)
                        my_r_final[0:3, 3] = 0
                        my_r_final = quaternion_from_matrix(my_r_final, True)
                        my_t_final = np.array([
                            my_mat_final[0][3], my_mat_final[1][3],
                            my_mat_final[2][3]
                        ])

                        my_pred = np.append(my_r_final, my_t_final)
                        my_r = my_r_final
                        my_t = my_t_final

                        my_r_matrix = quaternion_matrix(my_r)[:3, :3]
                    #print("Time = " + str(time.time()-start))
                    my_t_per_frame.append(my_t)
                    my_r_per_frame.append(my_r_matrix)

                    #rotation = Rot.from_matrix(my_r_matrix)
                    #angle = rotation.as_euler('xyz', degrees=True)

                    my_t = np.around(my_t, 5)
                    #print("translation vector = " + str(my_t))
                    #print("rotation angles = " + str(my_r))

                    frame = posenet_deploy.get_3d_bbox(frame, model_points,
                                                       my_r_matrix, my_t)
                    frame = posenet_deploy.draw_axes(frame, my_r_matrix, my_t)

                    if check_inverted(crop_image_to_check):
                        cv2.putText(frame,
                                    str(num_idx) + "_inverted", pos_text,
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),
                                    2, cv2.LINE_AA)
                    else:
                        cv2.putText(frame, str(num_idx), pos_text,
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),
                                    2, cv2.LINE_AA)

                    #cv2.putText(frame, str(num_idx), pos_text, cv2.FONT_HERSHEY_SIMPLEX,
                    #            0.5, (0,255,0), 2, cv2.LINE_AA)

                    posenet_deploy.putText(frame, i, num_idx, class_id, my_t)
                    #cv2.imshow('Result', rgb)
                    #cv2.waitKey(1)

                else:
                    rmin, rmax, cmin, cmax = posenet_deploy.get_bbox(bbox)
                    img_masked = rgb[:, :, ::-1]  # bgr to rgb
                    img_masked = np.transpose(img_masked, (2, 0, 1))
                    img_masked = img_masked[:, rmin:rmax, cmin:cmax]

                    my_mask = np.transpose(label_of_object, (2, 0, 1))
                    my_mask = my_mask[:, rmin:rmax, cmin:
                                      cmax]  ## Added by me to crop the mask
                    mask_img = np.transpose(my_mask, (1, 2, 0))
                    img_rgb = np.transpose(img_masked, (1, 2, 0))
                    croped_img_mask = cv2.bitwise_and(img_rgb, mask_img)
                    crop_image_to_check = croped_img_mask.copy()

                    pos_text = (bbox[0], bbox[1])
                    last_key = list(my_t_pool.keys())[-1]

                    print("POOL: " + str(my_t_pool[last_key]))
                    class_id = segmentation['instances'].pred_classes[i].cpu(
                    ).data.numpy()

                    my_t = my_t_pool[last_key][min_idx]
                    my_r_matrix = my_r_pool[last_key][min_idx]

                    my_t_per_frame.append(my_t)
                    my_r_per_frame.append(my_r_matrix)

                    frame = posenet_deploy.get_3d_bbox(frame, model_points,
                                                       my_r_matrix, my_t)
                    frame = posenet_deploy.draw_axes(frame, my_r_matrix, my_t)

                    if check_inverted(crop_image_to_check):
                        cv2.putText(frame,
                                    str(num_idx) + "_inverted", pos_text,
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),
                                    2, cv2.LINE_AA)
                    else:
                        cv2.putText(frame, str(num_idx), pos_text,
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0),
                                    2, cv2.LINE_AA)

                    #cv2.putText(frame, str(num_idx), pos_text, cv2.FONT_HERSHEY_SIMPLEX,
                    #            0.5, (0,255,0), 2, cv2.LINE_AA)

                    posenet_deploy.putText(frame, i, num_idx, class_id, my_t)

            if len(my_t_per_frame) > 0:
                my_t_pool[frameth] = my_t_per_frame
                my_r_pool[frameth] = my_r_per_frame

            ref_frame_label = cur_frame_label
            ref_frame_axies = cur_frame_axies

            end = time.time() - start
            cv2.putText(frame,
                        "Time processing: " + str(round(end, 3)) + " seconds",
                        (100, 700), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255),
                        2, cv2.LINE_AA)
            cv2.imshow('Result', frame)
            cv2.waitKey(1)

        else:
            # Show images
            #video_writer.write(rgb)
            cv2.imshow('Result', rgb)
            cv2.waitKey(1)

    pipeline.stop()
コード例 #25
0
ファイル: match-yaw1.py プロジェクト: sulijune/ImageAnalysis
def find_essential(i1, i2):
    # quick sanity checks
    if i1 == i2:
        return None
    if not i2.name in i1.match_list:
        return None
    if len(i1.match_list[i2.name]) == 0:
        return None

    if not i1.kp_list or not len(i1.kp_list):
        i1.load_features()
    if not i2.kp_list or not len(i2.kp_list):
        i2.load_features()

    # camera calibration
    K = camera.get_K()
    IK = np.linalg.inv(K)

    # setup data structurs of cv2 call
    uv1 = []
    uv2 = []
    indices = []
    for pair in i1.match_list[i2.name]:
        uv1.append(i1.kp_list[pair[0]].pt)
        uv2.append(i2.kp_list[pair[1]].pt)
    uv1 = np.float32(uv1)
    uv2 = np.float32(uv2)
    E, mask = cv2.findEssentialMat(uv1, uv2, K, method=method)
    print(i1.name, 'vs', i2.name)
    print("E:\n", E)
    print()
    (n, R, tvec, mask) = cv2.recoverPose(E, uv1, uv2, K)
    print('  inliers:', n, 'of', len(uv1))
    print('  R:', R)
    print('  tvec:', tvec)

    # convert R to homogeonous
    #Rh = np.concatenate((R, np.zeros((3,1))), axis=1)
    #Rh = np.concatenate((Rh, np.zeros((1,4))), axis=0)
    #Rh[3,3] = 1
    # extract the equivalent quaternion, and invert
    q = transformations.quaternion_from_matrix(R)
    q_inv = transformations.quaternion_inverse(q)

    (ned1, ypr1, quat1) = i1.get_camera_pose()
    (ned2, ypr2, quat2) = i2.get_camera_pose()
    diff = np.array(ned2) - np.array(ned1)
    dist = np.linalg.norm(diff)
    dir = diff / dist
    print('dist:', dist, 'ned dir:', dir[0], dir[1], dir[2])
    crs_gps = 90 - math.atan2(dir[0], dir[1]) * r2d
    if crs_gps < 0: crs_gps += 360
    if crs_gps > 360: crs_gps -= 360
    print('crs_gps: %.1f' % crs_gps)

    Rbody2ned = i1.get_body2ned()
    cam2body = i1.get_cam2body()
    body2cam = i1.get_body2cam()
    est_dir = Rbody2ned.dot(cam2body).dot(R).dot(tvec)
    est_dir = est_dir / np.linalg.norm(est_dir)  # normalize
    print('est dir:', est_dir.tolist())
    crs_fit = 90 - math.atan2(-est_dir[0], -est_dir[1]) * r2d
    if crs_fit < 0: crs_fit += 360
    if crs_fit > 360: crs_fit -= 360
    print('est crs_fit: %.1f' % crs_fit)
    print("est yaw error: %.1f" % (crs_fit - crs_gps))