Esempio n. 1
0
    def test_argmin(self):
        rows = 420
        cols = 421
        d = 52
        d_step = 1
        arr = np.float32(np.random.uniform(size=(d, rows, cols)))
        arr = np.ascontiguousarray(arr, dtype=np.float32)

        compiler_constants = {
            'D_STEP': d_step,
            'D': d,
            'ARR_SIZE': math.floor(d / d_step),
            'P1': 5,
            'P2': 90000,
            'SHMEM_SIZE': 64
        }

        build_options = [format_compiler_constants(compiler_constants)]
        mod = SourceModule(open("../lib/sgbm_helper.cu").read(),
                           options=build_options)

        gpu_argmin = mod.get_function("__argmin_3d_mat")
        out = np.zeros((rows, cols), dtype=np.int32)
        out = np.ascontiguousarray(out, dtype=np.int32)
        gpu_argmin(drv.In(arr),
                   drv.Out(out),
                   np.int32(rows),
                   np.int32(cols),
                   block=(16, 16, 1),
                   grid=(1, 1))
        self.assertTrue(
            np.all(np.isclose(out, np.int32(np.argmin(arr, axis=0)))))
    d, rows, cols = cost_images.shape
    rows = np.int32(rows)
    cols = np.int32(cols)
    d = np.int32(d)

    d_step = 1

    compiler_constants = {
        'D_STEP': d_step,
        'D': d,
        'ARR_SIZE': math.floor(d / d_step),
        'P1': 5,
        'P2': 90000,
        'SHMEM_SIZE': 64
    }
    build_options = [format_compiler_constants(compiler_constants)]
    mod = SourceModule(open("../lib/sgbm_helper.cu").read(),
                       options=build_options)

    r_aggregate = mod.get_function('__r_aggregate')
    vertical_aggregate_down = mod.get_function('__vertical_aggregate_down')
    vertical_aggregate_up = mod.get_function('__vertical_aggregate_up')
    diagonal_br_tl_aggregate = mod.get_function('__diagonal_br_tl_aggregate')
    diagonal_tl_br_aggregate = mod.get_function('__diagonal_tl_br_aggregate')
    diagonal_tr_bl_aggregate = mod.get_function('__diagonal_tr_bl_aggregate')
    diagonal_bl_tr_aggregate = mod.get_function('__diagonal_bl_tr_aggregate')
    l_aggregate = mod.get_function('__l_aggregate')

    t1 = time()
    cost_images_ptr = drv.to_device(cost_images)
    dp_ptr = drv.mem_alloc(cost_images.nbytes)
Esempio n. 3
0
    def test_diagonal_bl_tr(self):

        IMAGE_DIR = "Backpack-perfect"
        im1 = cv2.imread(os.path.join("../data", IMAGE_DIR, "im1.png"))
        im2 = cv2.imread(os.path.join("../data", IMAGE_DIR, "im0.png"))

        stereo = SemiGlobalMatching(im1,
                                    im2,
                                    os.path.join("../data", IMAGE_DIR,
                                                 "calib.txt"),
                                    window_size=3,
                                    resize=(640, 480))

        params = {
            "p1": 5,
            "p2": 90000,
            "census_kernel_size": 7,
            "reversed": True
        }
        stereo.set_params(params)
        stereo.params['ndisp'] = 50
        t1 = time()

        assert stereo.p1 is not None, "parameters have not been set"
        t1 = time()
        cim1 = stereo.census_transform(stereo.im1)
        cim2 = stereo.census_transform(stereo.im2)
        #print(f"census transform time {time() - t1}")

        if not stereo.reversed:
            D = range(int(stereo.params['ndisp']))
        else:
            D = reversed(range(int(-stereo.params['ndisp']), 1))
        cost_images = stereo.compute_disparity_img(cim1, cim2, D)
        cost_images = np.float32(cost_images)
        m, n, D = cost_images.shape
        # direction == (1,0)
        stereo.directions = [(-1, 1)]
        t1 = time()
        L = stereo.aggregate_cost(cost_images)
        print("python aggregate cost %f" % (time() - t1))
        L = L.transpose((2, 0, 1))

        cost_images = cost_images.transpose((2, 0, 1))
        cost_images = np.ascontiguousarray(cost_images, dtype=np.float32)
        d, rows, cols = cost_images.shape
        d_step = 1

        compiler_constants = {
            'D_STEP': d_step,
            'D': d,
            'ARR_SIZE': math.floor(d / d_step),
            'P1': 5,
            'P2': 90000,
            'SHMEM_SIZE': 64
        }
        build_options = [format_compiler_constants(compiler_constants)]
        mod = SourceModule(open("../lib/sgbm_helper.cu").read(),
                           options=build_options)

        diagonal_aggregate = mod.get_function("diagonal_bl_tr_aggregate")
        out = np.zeros_like(L)
        out = np.ascontiguousarray(out, dtype=np.float32)
        t1 = time()
        diagonal_aggregate(drv.Out(out),
                           drv.In(cost_images),
                           np.int32(rows),
                           np.int32(cols),
                           block=(256, 1, 1),
                           grid=(1, 1))
        print("cuda aggregate cost %f" % (time() - t1))
        drv.stop_profiler()
        s1 = np.sum(np.float64(L))
        s2 = np.sum(np.float64(out))

        print("L sum: %f" % s1)
        print("out sum: %f" % s2)
        self.assertTrue(np.all(np.isclose(out, L)))
Esempio n. 4
0
    def test_two_directions(self):

        IMAGE_DIR = "Backpack-perfect"
        im1 = cv2.imread(os.path.join("../data", IMAGE_DIR, "im1.png"))
        im2 = cv2.imread(os.path.join("../data", IMAGE_DIR, "im0.png"))

        stereo = SemiGlobalMatching(im1,
                                    im2,
                                    os.path.join("../data", IMAGE_DIR,
                                                 "calib.txt"),
                                    window_size=3,
                                    resize=(640, 480))

        params = {
            "p1": 5,
            "p2": 90000,
            "census_kernel_size": 7,
            "reversed": True
        }
        stereo.set_params(params)
        stereo.params['ndisp'] = 50
        t1 = time()

        assert stereo.p1 is not None, "parameters have not been set"
        t1 = time()
        cim1 = stereo.census_transform(stereo.im1)
        cim2 = stereo.census_transform(stereo.im2)
        #print(f"census transform time {time() - t1}")

        if not stereo.reversed:
            D = range(int(stereo.params['ndisp']))
        else:
            D = reversed(range(int(-stereo.params['ndisp']), 1))
        cost_images = stereo.compute_disparity_img(cim1, cim2, D)
        cost_images = np.float32(cost_images)
        m, n, D = cost_images.shape
        # direction == (1,0)
        stereo.directions = [(1, 0), (-1, 0), (1, 1), (-1, 1), (1, -1),
                             (-1, -1)]
        #stereo.directions = [(0,1)]
        t1 = time()
        L = stereo.aggregate_cost(cost_images)
        print("python aggregate cost %f" % (time() - t1))
        L = L.transpose((2, 0, 1))

        cost_images = cost_images.transpose((2, 0, 1))
        cost_images = np.ascontiguousarray(cost_images, dtype=np.float32)
        d, rows, cols = cost_images.shape
        d_step = 1
        rows = np.int32(rows)
        cols = np.int32(cols)

        compiler_constants = {
            'D_STEP': d_step,
            'D': d,
            'ARR_SIZE': math.floor(d / d_step),
            'P1': 5,
            'P2': 90000,
            'SHMEM_SIZE': 64
        }
        build_options = [format_compiler_constants(compiler_constants)]
        mod = SourceModule(open("../lib/sgbm_helper.cu").read(),
                           options=build_options)

        shmem_size = 16
        vertical_blocks = int(math.ceil(rows / shmem_size))

        #r_aggregate = mod.get_function('r_aggregate')
        vertical_aggregate_down = mod.get_function('vertical_aggregate_down')
        vertical_aggregate_up = mod.get_function('vertical_aggregate_up')
        diagonal_br_tl_aggregate = mod.get_function('diagonal_br_tl_aggregate')
        diagonal_tl_br_aggregate = mod.get_function('diagonal_tl_br_aggregate')
        diagonal_tr_bl_aggregate = mod.get_function('diagonal_tr_bl_aggregate')
        diagonal_bl_tr_aggregate = mod.get_function('diagonal_bl_tr_aggregate')
        #l_aggregate = mod.get_function('l_aggregate')

        t1 = time()
        cost_images_ptr = drv.to_device(cost_images)
        dp_ptr = drv.mem_alloc(cost_images.nbytes)

        vertical_aggregate_down(dp_ptr,
                                cost_images_ptr,
                                rows,
                                cols,
                                block=(256, 1, 1),
                                grid=(1, 1))
        vertical_aggregate_up(dp_ptr,
                              cost_images_ptr,
                              rows,
                              cols,
                              block=(256, 1, 1),
                              grid=(1, 1))

        #r_aggregate(dp_ptr, cost_images_ptr, rows, cols, block = (shmem_size, shmem_size, 1), grid = (1, vertical_blocks))
        #l_aggregate(dp_ptr, cost_images_ptr, rows, cols, block = (shmem_size, shmem_size, 1), grid = (1, vertical_blocks))

        diagonal_tl_br_aggregate(dp_ptr,
                                 cost_images_ptr,
                                 rows,
                                 cols,
                                 block=(256, 1, 1),
                                 grid=(1, 1))
        diagonal_bl_tr_aggregate(dp_ptr,
                                 cost_images_ptr,
                                 rows,
                                 cols,
                                 block=(256, 1, 1),
                                 grid=(1, 1))

        diagonal_tr_bl_aggregate(dp_ptr,
                                 cost_images_ptr,
                                 rows,
                                 cols,
                                 block=(256, 1, 1),
                                 grid=(1, 1))
        diagonal_br_tl_aggregate(dp_ptr,
                                 cost_images_ptr,
                                 rows,
                                 cols,
                                 block=(256, 1, 1),
                                 grid=(1, 1))

        print("cuda aggregate cost %f" % (time() - t1))
        drv.stop_profiler()

        agg_image = drv.from_device(dp_ptr,
                                    cost_images.shape,
                                    dtype=np.float32)
        s1 = np.sum(np.float64(L))
        s2 = np.sum(np.float64(agg_image))

        print("L sum: %f" % s1)
        print("out sum: %f" % s2)
        self.assertTrue(np.all(np.isclose(agg_image, L)))
Esempio n. 5
0
    def test_horizontal_left(self):

        IMAGE_DIR = "Backpack-perfect"
        im1 = cv2.imread(os.path.join("../data", IMAGE_DIR, "im1.png"))
        im2 = cv2.imread(os.path.join("../data", IMAGE_DIR, "im0.png"))
        #im1 = im1[:32,:,:]
        #im2 = im2[:32,:,:]

        stereo = SemiGlobalMatching(im1,
                                    im2,
                                    os.path.join("../data", IMAGE_DIR,
                                                 "calib.txt"),
                                    window_size=3,
                                    resize=(640, 480))

        params = {
            "p1": 5,
            "p2": 90000,
            "census_kernel_size": 7,
            "reversed": True
        }
        stereo.set_params(params)
        stereo.params['ndisp'] = 50
        t1 = time()

        assert stereo.p1 is not None, "parameters have not been set"
        t1 = time()
        cim1 = stereo.census_transform(stereo.im1)
        cim2 = stereo.census_transform(stereo.im2)
        #print(f"census transform time {time() - t1}")
        #pdb.set_trace()
        if not stereo.reversed:
            D = range(int(stereo.params['ndisp']))
        else:
            D = reversed(range(int(-stereo.params['ndisp']), 1))
        cost_images = stereo.compute_disparity_img(cim1, cim2, D)
        cost_images = np.float32(cost_images)
        ##cost_images = cost_images[:,:,8:45]
        #cost_images = cost_images[:,:,9:50]
        #shape = (480,640,36)
        #cost_images = np.float32(np.random.normal(loc=100, size=shape))
        m, n, D = cost_images.shape
        # direction == (1,0)
        stereo.directions = [(0, -1)]
        t1 = time()
        L = stereo.aggregate_cost_optimization_test(cost_images)
        print("python aggregate cost %f" % (time() - t1))
        L = L.transpose((2, 0, 1))

        cost_images = cost_images.transpose((2, 0, 1))
        cost_images = np.ascontiguousarray(cost_images, dtype=np.float32)
        d, rows, cols = cost_images.shape
        d_step = 1

        shmem_size = 16
        compiler_constants = {
            'D_STEP': d_step,
            'D': d,
            'ARR_SIZE': math.floor(d / d_step),
            'P1': 5,
            'P2': 90000,
            'SHMEM_SIZE': shmem_size
        }
        build_options = [format_compiler_constants(compiler_constants)]
        mod = SourceModule(open("../lib/sgbm_helper.cu").read(),
                           options=build_options)

        l_aggregate = mod.get_function("l_aggregate")
        out = np.zeros_like(L)
        out = np.ascontiguousarray(out, dtype=np.float32)
        vertical_blocks = int(math.ceil(rows / shmem_size))
        t1 = time()
        # pycuda complains when block size is greater than 32 x 32
        l_aggregate(drv.Out(out),
                    drv.In(cost_images),
                    np.int32(rows),
                    np.int32(cols),
                    block=(shmem_size, shmem_size, 1),
                    grid=(1, vertical_blocks))
        print("cuda aggregate cost %f" % (time() - t1))
        drv.stop_profiler()
        s1 = np.sum(np.float64(L))
        s2 = np.sum(np.float64(out))

        print("L sum: %f" % s1)
        print("out sum: %f" % s2)

        #pdb.set_trace()
        self.assertTrue(np.all(np.isclose(out, L)))