Esempio n. 1
0
 def _compile_kernels(self):
     """Call the OpenCL compiler
     """
     for kernel, wg_size in list(self.kernels.items()):
         kernel_src = get_opencl_code(kernel)
         if isinstance(wg_size, tuple):
             wg_size = self.max_workgroup_size
         try:
             program = pyopencl.Program(self.ctx, kernel_src).build("-D WORKGROUP_SIZE=%s" % wg_size)
         except pyopencl.MemoryError as error:
             raise MemoryError(error)
         except pyopencl.RuntimeError as error:
             if kernel == "keypoints_gpu2":
                 logger.warning(
                     "Failed compiling kernel '%s' with workgroup size %s: %s: use low_end alternative",
                     kernel,
                     wg_size,
                     error,
                 )
                 self.LOW_END += 1
             elif kernel == "keypoints_gpu1":
                 logger.warning(
                     "Failed compiling kernel '%s' with workgroup size %s: %s: use CPU alternative",
                     kernel,
                     wg_size,
                     error,
                 )
                 self.LOW_END += 1
             else:
                 logger.error("Failed compiling kernel '%s' with workgroup size %s: %s", kernel, wg_size, error)
                 raise error
         self.programs[kernel] = program
         for one_function in program.all_kernels():
             workgroup_size = kernel_workgroup_size(program, one_function)
             self.kernels[kernel + "." + one_function.function_name] = workgroup_size
Esempio n. 2
0
    def test_interpolation(self):
        """
        tests the keypoints interpolation kernel
        Requires the following: "self.keypoints1", "self.actual_nb_keypoints",     "self.gpu_dog_prev", self.gpu_dog",             "self.gpu_dog_next", "self.s", "self.width", "self.height", "self.peakthresh"
        """

        # interpolation_setup :
        border_dist, peakthresh, EdgeThresh, EdgeThresh0, octsize, nb_keypoints, actual_nb_keypoints, width, height, DOGS, s, keypoints_prev, blur = interpolation_setup()

        # actual_nb_keypoints is the number of keypoints returned by "local_maxmin".
        # After the interpolation, it will be reduced, but we can still use it as a boundary.
        maxwg = kernel_workgroup_size(self.program, "interp_keypoint")
        shape = calc_size((keypoints_prev.shape[0],), maxwg)
        gpu_dogs = pyopencl.array.to_device(self.queue, DOGS)
        gpu_keypoints1 = pyopencl.array.to_device(self.queue, keypoints_prev)
        # actual_nb_keypoints = numpy.int32(len((keypoints_prev[:,0])[keypoints_prev[:,1] != -1]))
        start_keypoints = numpy.int32(0)
        actual_nb_keypoints = numpy.int32(actual_nb_keypoints)
        InitSigma = numpy.float32(1.6)  #   warning: it must be the same in my_keypoints_interpolation
        t0 = time.time()
        k1 = self.program.interp_keypoint(self.queue, shape, (maxwg,),
                                          gpu_dogs.data, gpu_keypoints1.data, start_keypoints, actual_nb_keypoints,
                                          peakthresh, InitSigma, width, height)
        res = gpu_keypoints1.get()

        t1 = time.time()
        ref = numpy.copy(keypoints_prev)  # important here
        for i, k in enumerate(ref[:nb_keypoints, :]):
            ref[i] = my_interp_keypoint(DOGS, s, k[1], k[2], 5, peakthresh, width, height)

        t2 = time.time()

        # we have to compare keypoints different from (-1,-1,-1,-1)
        res2 = res[res[:, 1] != -1]
        ref2 = ref[ref[:, 1] != -1]

        if (PRINT_KEYPOINTS):
            logger.info("[s=%s]Keypoints before interpolation: %s", s, actual_nb_keypoints)
            # logger.info(keypoints_prev[0:10,:]
            logger.info("[s=%s]Keypoints after interpolation : %s", s, res2.shape[0])
            logger.info(res[0:actual_nb_keypoints])  # [0:10,:]
            # logger.info("Ref:")
            # logger.info(ref[0:32,:]


#         print(maxwg, self.maxwg, self.wg[0], self.wg[1])
        if self.maxwg < self.wg[0] * self.wg[1]:
            logger.info("Not testing result as the WG is too little %s", self.maxwg)
            return
        self.assertLess(abs(len(ref2) - len(res2)) / (len(ref2) + len(res2)), 0.33, "the number of keypoint is almost the same")
#         print(ref2)
#         print(res2)

        delta = norm_L1(ref2, res2)
        self.assert_(delta < 0.43, "delta=%s" % (delta))
        logger.info("delta=%s" % delta)

        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Keypoints interpolation took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
Esempio n. 3
0
 def _compile_kernels(self):
     """Call the OpenCL compiler
     """
     for kernel, wg_size in list(self.kernels.items()):
         kernel_src = get_opencl_code(os.path.join("sift", kernel))
         if isinstance(wg_size, tuple):
             wg_size = self.max_workgroup_size
         try:
             program = pyopencl.Program(self.ctx, kernel_src).build(
                 '-D WORKGROUP_SIZE=%s' % wg_size)
         except pyopencl.MemoryError as error:
             raise MemoryError(error)
         except pyopencl.RuntimeError as error:
             if kernel == "keypoints_gpu2":
                 logger.warning(
                     "Failed compiling kernel '%s' with workgroup size %s: %s: use low_end alternative",
                     kernel, wg_size, error)
                 self.LOW_END += 1
             elif kernel == "keypoints_gpu1":
                 logger.warning(
                     "Failed compiling kernel '%s' with workgroup size %s: %s: use CPU alternative",
                     kernel, wg_size, error)
                 self.LOW_END += 1
             else:
                 logger.error(
                     "Failed compiling kernel '%s' with workgroup size %s: %s",
                     kernel, wg_size, error)
                 raise error
         self.programs[kernel] = program
         for one_function in program.all_kernels():
             workgroup_size = kernel_workgroup_size(program, one_function)
             self.kernels[kernel + "." +
                          one_function.function_name] = workgroup_size
Esempio n. 4
0
    def test_interpolation(self):
        """
        tests the keypoints interpolation kernel
        Requires the following: "self.keypoints1", "self.actual_nb_keypoints",     "self.gpu_dog_prev", self.gpu_dog",             "self.gpu_dog_next", "self.s", "self.width", "self.height", "self.peakthresh"
        """

        # interpolation_setup :
        border_dist, peakthresh, EdgeThresh, EdgeThresh0, octsize, nb_keypoints, actual_nb_keypoints, width, height, DOGS, s, keypoints_prev, blur = interpolation_setup()

        # actual_nb_keypoints is the number of keypoints returned by "local_maxmin".
        # After the interpolation, it will be reduced, but we can still use it as a boundary.
        maxwg = kernel_workgroup_size(self.program, "interp_keypoint")
        shape = calc_size((keypoints_prev.shape[0],), maxwg)
        gpu_dogs = pyopencl.array.to_device(self.queue, DOGS)
        gpu_keypoints1 = pyopencl.array.to_device(self.queue, keypoints_prev)
        # actual_nb_keypoints = numpy.int32(len((keypoints_prev[:,0])[keypoints_prev[:,1] != -1]))
        start_keypoints = numpy.int32(0)
        actual_nb_keypoints = numpy.int32(actual_nb_keypoints)
        InitSigma = numpy.float32(1.6)  #   warning: it must be the same in my_keypoints_interpolation
        t0 = time.time()
        k1 = self.program.interp_keypoint(self.queue, shape, (maxwg,),
                                          gpu_dogs.data, gpu_keypoints1.data, start_keypoints, actual_nb_keypoints,
                                          peakthresh, InitSigma, width, height)
        res = gpu_keypoints1.get()

        t1 = time.time()
        ref = numpy.copy(keypoints_prev)  # important here
        for i, k in enumerate(ref[:nb_keypoints, :]):
            ref[i] = my_interp_keypoint(DOGS, s, k[1], k[2], 5, peakthresh, width, height)

        t2 = time.time()

        # we have to compare keypoints different from (-1,-1,-1,-1)
        res2 = res[res[:, 1] != -1]
        ref2 = ref[ref[:, 1] != -1]

        if (PRINT_KEYPOINTS):
            logger.info("[s=%s]Keypoints before interpolation: %s", s, actual_nb_keypoints)
            # logger.info(keypoints_prev[0:10,:]
            logger.info("[s=%s]Keypoints after interpolation : %s", s, res2.shape[0])
            logger.info(res[0:actual_nb_keypoints])  # [0:10,:]
            # logger.info("Ref:")
            # logger.info(ref[0:32,:]


#         print(maxwg, self.maxwg, self.wg[0], self.wg[1])
        if self.maxwg < self.wg[0] * self.wg[1]:
            logger.info("Not testing result as the WG is too little %s", self.maxwg)
            return
        self.assertLess(abs(len(ref2) - len(res2)) / (len(ref2) + len(res2)), 0.33, "the number of keypoint is almost the same")
#         print(ref2)
#         print(res2)

        delta = norm_L1(ref2, res2)
        self.assertLess(delta, 0.43, "delta=%s" % (delta))
        logger.info("delta=%s" % delta)

        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Keypoints interpolation took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
Esempio n. 5
0
    def test_rgb(self):
        """
        tests the int64 kernel
        """
        max_wg = kernel_workgroup_size(self.reduction, "max_min_global_stage1")
        if max_wg < self.red_size:
            logger.warning(
                "test_uint16: Skipping test of WG=%s when maximum is %s (%s)",
                self.red_size, max_wg, self.max_wg)
            return

        lint = numpy.empty((self.input.shape[0], self.input.shape[1], 3),
                           dtype=numpy.uint8)
        lint[:, :, 0] = self.input.astype(numpy.uint8)
        lint[:, :, 1] = self.input.astype(numpy.uint8)
        lint[:, :, 2] = self.input.astype(numpy.uint8)
        t0 = time.time()
        au8 = pyopencl.array.to_device(self.queue, lint)
        k1 = self.program.rgb_to_float(self.queue, self.shape, self.wg,
                                       au8.data, self.gpudata.data,
                                       self.IMAGE_W, self.IMAGE_H)
        k2 = self.reduction.max_min_global_stage1(
            self.queue, (self.red_size * self.red_size, ), (self.red_size, ),
            self.gpudata.data, self.buffers_max_min.data,
            (self.IMAGE_W * self.IMAGE_H))
        k3 = self.reduction.max_min_global_stage2(self.queue,
                                                  (self.red_size, ),
                                                  (self.red_size, ),
                                                  self.buffers_max_min.data,
                                                  self.buffers_max.data,
                                                  self.buffers_min.data)
        k4 = self.program.normalizes(self.queue, self.shape, self.wg,
                                     self.gpudata.data, self.buffers_min.data,
                                     self.buffers_max.data,
                                     self.twofivefive.data, self.IMAGE_W,
                                     self.IMAGE_H)
        res = self.gpudata.get()
        t1 = time.time()
        ref = normalize(lint.max(axis=-1))
        t2 = time.time()
        delta = abs(ref - res).max()
        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." %
                        (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Conversion  RGB ->float took %.3fms" %
                        (1e-6 * (k1.profile.end - k1.profile.start)))
            logger.info("Reduction stage1 took        %.3fms" %
                        (1e-6 * (k2.profile.end - k2.profile.start)))
            logger.info("Reduction stage2 took        %.3fms" %
                        (1e-6 * (k3.profile.end - k3.profile.start)))
            logger.info("Normalization                %.3fms" %
                        (1e-6 * (k4.profile.end - k4.profile.start)))
            logger.info("--------------------------------------")
        self.assert_(delta < 1e-4, "delta=%s" % delta)
Esempio n. 6
0
    def test_uint8(self):
        """
        tests the uint8 kernel
        """
        max_wg = kernel_workgroup_size(self.reduction, "max_min_global_stage1")
        if max_wg < self.red_size:
            logger.warning(
                "test_uint8: Skipping test of WG=%s when maximum is %s (%s)",
                self.red_size, max_wg, self.max_wg)
            return

        lint = self.input.astype(numpy.uint8)
        t0 = time.time()
        au8 = pyopencl.array.to_device(self.queue, lint)
        k1 = self.program.u8_to_float(self.queue, self.shape, self.wg,
                                      au8.data, self.gpudata.data,
                                      self.IMAGE_W, self.IMAGE_H)
        #        print abs(au8.get() - self.gpudata.get()).max()
        k2 = self.reduction.max_min_global_stage1(
            self.queue, (self.red_size * self.red_size, ), (self.red_size, ),
            self.gpudata.data, self.buffers_max_min.data,
            (self.IMAGE_W * self.IMAGE_H),
            pyopencl.LocalMemory(8 * self.red_size))
        k3 = self.reduction.max_min_global_stage2(
            self.queue, (self.red_size, ), (self.red_size, ),
            self.buffers_max_min.data, self.buffers_max.data,
            self.buffers_min.data, pyopencl.LocalMemory(8 * self.red_size))
        #        print self.buffers_max.get(), self.buffers_min.get(), self.input.min(), self.input.max()
        k4 = self.program.normalizes(self.queue, self.shape, self.wg,
                                     self.gpudata.data, self.buffers_min.data,
                                     self.buffers_max.data,
                                     self.twofivefive.data, self.IMAGE_W,
                                     self.IMAGE_H)
        k4.wait()
        res = self.gpudata.get()
        t1 = time.time()
        ref = normalize(lint)
        t2 = time.time()
        delta = abs(ref - res).max()
        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." %
                        (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Conversion uint8->float took %.3fms" %
                        (1e-6 * (k1.profile.end - k1.profile.start)))
            logger.info("Reduction stage1 took        %.3fms" %
                        (1e-6 * (k2.profile.end - k2.profile.start)))
            logger.info("Reduction stage2 took        %.3fms" %
                        (1e-6 * (k3.profile.end - k3.profile.start)))
            logger.info("Normalization                %.3fms" %
                        (1e-6 * (k4.profile.end - k4.profile.start)))
            logger.info("--------------------------------------")

        self.assertLess(delta, 1e-4, "delta=%s" % delta)
Esempio n. 7
0
 def _compile_kernels(self):
     """
     Call the OpenCL compiler
     """
     for kernel in list(self.kernels.keys()):
         kernel_src = get_opencl_code(kernel)
         try:
             program = pyopencl.Program(self.ctx, kernel_src).build()
         except pyopencl.MemoryError as error:
             raise MemoryError(error)
         self.program = program
         for one_function in program.all_kernels():
             workgroup_size = kernel_workgroup_size(program, one_function)
             self.kernels[kernel+"."+one_function.function_name] = workgroup_size
Esempio n. 8
0
 def test_v2_even(self):
     """
     test odd kernel size
     """
     sigma = 3.0
     size = 28
     ref = gaussian_cpu(sigma, size)
     max_wg = kernel_workgroup_size(self.kernels["gaussian"], "gaussian")
     if max_wg < size:
         logger.warning("Skipping test of WG=%s when maximum is %s", size, max_wg)
         return
     res = self.gaussian_gpu_v2(sigma, size)
     delta = ref - res
     self.assert_(abs(delta).max() < 1e-6, "gaussian are the same ")
Esempio n. 9
0
 def test_v2_even(self):
     """
     test odd kernel size
     """
     sigma = 3.0
     size = 28
     ref = gaussian_cpu(sigma, size)
     max_wg = kernel_workgroup_size(self.kernels["gaussian"], "gaussian")
     if max_wg < size:
         logger.warning("Skipping test of WG=%s when maximum is %s", size,
                        max_wg)
         return
     res = self.gaussian_gpu_v2(sigma, size)
     delta = ref - res
     self.assert_(abs(delta).max() < 1e-6, "gaussian are the same ")
Esempio n. 10
0
 def _compile_kernels(self):
     """
     Call the OpenCL compiler
     """
     for kernel in list(self.kernels.keys()):
         kernel_src = get_opencl_code(os.path.join("sift", kernel))
         try:
             program = pyopencl.Program(self.ctx, kernel_src).build()
         except pyopencl.MemoryError as error:
             raise MemoryError(error)
         self.program = program
         for one_function in program.all_kernels():
             workgroup_size = kernel_workgroup_size(program, one_function)
             self.kernels[kernel + "." +
                          one_function.function_name] = workgroup_size
Esempio n. 11
0
    def test_uint8(self):
        """
        tests the uint8 kernel
        """
        max_wg = kernel_workgroup_size(self.reduction, "max_min_global_stage1")
        if max_wg < self.red_size:
            logger.warning("test_uint8: Skipping test of WG=%s when maximum is %s (%s)", self.red_size, max_wg, self.max_wg)
            return

        lint = self.input.astype(numpy.uint8)
        t0 = time.time()
        au8 = pyopencl.array.to_device(self.queue, lint)
        k1 = self.program.u8_to_float(self.queue, self.shape, self.wg, au8.data, self.gpudata.data, self.IMAGE_W, self.IMAGE_H)
#        print abs(au8.get() - self.gpudata.get()).max()
        k2 = self.reduction.max_min_global_stage1(self.queue, (self.red_size * self.red_size,), (self.red_size,),
                                                  self.gpudata.data,
                                                  self.buffers_max_min.data,
                                                  (self.IMAGE_W * self.IMAGE_H),
                                                  pyopencl.LocalMemory(8 * self.red_size))
        k3 = self.reduction.max_min_global_stage2(self.queue, (self.red_size,), (self.red_size,),
                                                  self.buffers_max_min.data,
                                                  self.buffers_max.data,
                                                  self.buffers_min.data,
                                                  pyopencl.LocalMemory(8 * self.red_size))
#        print self.buffers_max.get(), self.buffers_min.get(), self.input.min(), self.input.max()
        k4 = self.program.normalizes(self.queue, self.shape, self.wg,
                                     self.gpudata.data,
                                     self.buffers_min.data,
                                     self.buffers_max.data,
                                     self.twofivefive.data,
                                     self.IMAGE_W, self.IMAGE_H)
        k4.wait()
        res = self.gpudata.get()
        t1 = time.time()
        ref = normalize(lint)
        t2 = time.time()
        delta = abs(ref - res).max()
        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Conversion uint8->float took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
            logger.info("Reduction stage1 took        %.3fms" % (1e-6 * (k2.profile.end - k2.profile.start)))
            logger.info("Reduction stage2 took        %.3fms" % (1e-6 * (k3.profile.end - k3.profile.start)))
            logger.info("Normalization                %.3fms" % (1e-6 * (k4.profile.end - k4.profile.start)))
            logger.info("--------------------------------------")

        self.assertLess(delta, 1e-4, "delta=%s" % delta)
Esempio n. 12
0
    def test_rgb(self):
        """
        tests the int64 kernel
        """
        max_wg = kernel_workgroup_size(self.reduction, "max_min_global_stage1")
        if max_wg < self.red_size:
            logger.warning("test_uint16: Skipping test of WG=%s when maximum is %s (%s)", self.red_size, max_wg, self.max_wg)
            return

        lint = numpy.empty((self.input.shape[0], self.input.shape[1], 3), dtype=numpy.uint8)
        lint[:, :, 0] = self.input.astype(numpy.uint8)
        lint[:, :, 1] = self.input.astype(numpy.uint8)
        lint[:, :, 2] = self.input.astype(numpy.uint8)
        t0 = time.time()
        au8 = pyopencl.array.to_device(self.queue, lint)
        k1 = self.program.rgb_to_float(self.queue, self.shape, self.wg, au8.data, self.gpudata.data, self.IMAGE_W, self.IMAGE_H)
        k2 = self.reduction.max_min_global_stage1(self.queue, (self.red_size * self.red_size,), (self.red_size,),
                                                  self.gpudata.data,
                                                  self.buffers_max_min.data,
                                                  (self.IMAGE_W * self.IMAGE_H))
        k3 = self.reduction.max_min_global_stage2(self.queue, (self.red_size,), (self.red_size,),
                                                  self.buffers_max_min.data,
                                                  self.buffers_max.data,
                                                  self.buffers_min.data)
        k4 = self.program.normalizes(self.queue, self.shape, self.wg,
                                     self.gpudata.data,
                                     self.buffers_min.data,
                                     self.buffers_max.data,
                                     self.twofivefive.data,
                                     self.IMAGE_W, self.IMAGE_H)
        res = self.gpudata.get()
        t1 = time.time()
        ref = normalize(lint.max(axis=-1))
        t2 = time.time()
        delta = abs(ref - res).max()
        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Conversion  RGB ->float took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
            logger.info("Reduction stage1 took        %.3fms" % (1e-6 * (k2.profile.end - k2.profile.start)))
            logger.info("Reduction stage2 took        %.3fms" % (1e-6 * (k3.profile.end - k3.profile.start)))
            logger.info("Normalization                %.3fms" % (1e-6 * (k4.profile.end - k4.profile.start)))
            logger.info("--------------------------------------")
        self.assert_(delta < 1e-4, "delta=%s" % delta)
Esempio n. 13
0
 def setUp(self):
     kernel_src = os.linesep.join(get_opencl_code(os.path.join("sift", i)) for i in ("sift.cl", "algebra.cl"))
     self.program = pyopencl.Program(self.ctx, kernel_src).build()
     self.wg_compact = kernel_workgroup_size(self.program, "compact")
Esempio n. 14
0
    def test_transform(self):
        '''
        tests transform kernel
        '''

        if (USE_LENA):
            # original image
            if hasattr(scipy.misc, "ascent"):
                image = scipy.misc.ascent().astype(numpy.float32)
            else:
                image = scipy.misc.lena().astype(numpy.float32)

            image = numpy.ascontiguousarray(image[0:512, 0:512])


            # transformation
            angle = 1.9  # numpy.pi/5.0
    #        matrix = numpy.array([[numpy.cos(angle),-numpy.sin(angle)],[numpy.sin(angle),numpy.cos(angle)]],dtype=numpy.float32)
    #        offset_value = numpy.array([1000.0, 100.0],dtype=numpy.float32)
    #        matrix = numpy.array([[0.9,0.2],[-0.4,0.9]],dtype=numpy.float32)
    #        offset_value = numpy.array([-20.0,256.0],dtype=numpy.float32)
            matrix = numpy.array([[1.0, -0.75], [0.7, 0.5]], dtype=numpy.float32)

            offset_value = numpy.array([250.0, -150.0], dtype=numpy.float32)

            image2 = scipy.ndimage.interpolation.affine_transform(image, matrix, offset=offset_value, order=1, mode="constant")

        else:  # use images of a stack
            image = scipy.misc.imread("/home/paleo/Titanium/test/frame0.png")
            image2 = scipy.misc.imread("/home/paleo/Titanium/test/frame1.png")
            offset_value = numpy.array([0.0, 0.0], dtype=numpy.float32)
        image_height, image_width = image.shape
        image2_height, image2_width = image2.shape

        fill_value = numpy.float32(0.0)
        mode = numpy.int32(1)

        if IMAGE_RESHAPE:  # turns out that image should always be reshaped
            output_height, output_width = int(3000), int(3000)
            image, image_height, image_width = self.image_reshape(image, output_height, output_width, image_height, image_width)
            image2, image2_height, image2_width = self.image_reshape(image2, output_height, output_width, image2_height, image2_width)
        else:
            output_height, output_width = int(image_height * numpy.sqrt(2)), int(image_width * numpy.sqrt(2))
        logger.info("Image : (%s, %s) -- Output: (%s, %s)", image_height, image_width, output_height, output_width)

        # perform correction by least square
        sol, MSE = self.matching_correction(image, image2)
        logger.info(sol)

        correction_matrix = numpy.zeros((2, 2), dtype=numpy.float32)
        correction_matrix[0] = sol[0:2, 0]
        correction_matrix[1] = sol[3:5, 0]
        matrix_for_gpu = correction_matrix.reshape(4, 1)  # for float4 struct
        offset_value[0] = sol[2, 0]
        offset_value[1] = sol[5, 0]

        maxwg = kernel_workgroup_size(self.program,"transform")
        wg = maxwg, 1
        shape = calc_size((output_width, output_height), wg)
        gpu_image = pyopencl.array.to_device(self.queue, image2)
        gpu_output = pyopencl.array.empty(self.queue, (output_height, output_width), dtype=numpy.float32, order="C")
        gpu_matrix = pyopencl.array.to_device(self.queue, matrix_for_gpu)
        gpu_offset = pyopencl.array.to_device(self.queue, offset_value)
        image_height, image_width = numpy.int32((image_height, image_width))
        output_height, output_width = numpy.int32((output_height, output_width))

        t0 = time.time()
        k1 = self.program.transform(self.queue, shape, wg,
                                    gpu_image.data, gpu_output.data, gpu_matrix.data, gpu_offset.data,
                                    image_width, image_height, output_width, output_height, fill_value, mode)
        res = gpu_output.get()
        t1 = time.time()
#        logger.info(res[0,0]

        ref = scipy.ndimage.interpolation.affine_transform(image2, correction_matrix,
                                                           offset=offset_value,
                                                           output_shape=(output_height, output_width),
                                                           order=1,
                                                           mode="constant",
                                                           cval=fill_value)
        t2 = time.time()

        delta = abs(res - image)
        delta_arg = delta.argmax()
        delta_max = delta.max()
#        delta_mse_res = ((res-image)**2).sum()/image.size
#        delta_mse_ref = ((ref-image)**2).sum()/image.size
        at_0, at_1 = delta_arg / output_width, delta_arg % output_width
        logger.info("Max error: %f at (%d, %d)", delta_max, at_0, at_1)
#        print("Mean Squared Error Res/Original : %f" %(delta_mse_res))
#        print("Mean Squared Error Ref/Original: %f" %(delta_mse_ref))
        logger.info("minimal MSE according to least squares : %f", MSE)
#        logger.info(res[at_0,at_1]
#        logger.info(ref[at_0,at_1]

        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms.", 1000.0 * (t2 - t1), 1000.0 * (t1 - t0))
            logger.info("Transformation took %.3fms", 1e-6 * (k1.profile.end - k1.profile.start))
    def test_descriptor(self):
        '''
        #tests keypoints descriptors creation kernel
        '''
        if self.abort:
            return

        # Descriptor_setup
        keypoints_o, nb_keypoints, actual_nb_keypoints, grad, ori, octsize = descriptor_setup(
        )
        # keypoints should be a compacted vector of keypoints
        keypoints_o, compact_cnt = my_compact(numpy.copy(keypoints_o),
                                              nb_keypoints)
        actual_nb_keypoints = compact_cnt
        keypoints_start, keypoints_end = 0, actual_nb_keypoints
        keypoints = keypoints_o[
            keypoints_start:keypoints_end +
            2]  # to check if we actually stop at keypoints_end
        logger.info(
            "Working on keypoints : [%s,%s] (octave = %s)" %
            (keypoints_start, keypoints_end - 1, int(numpy.log2(octsize) + 1)))

        # Prepare kernel call
        wg = self.wg_keypoint
        if len(wg) == 1:
            shape = keypoints.shape[0] * wg[0],
        else:
            shape = keypoints.shape[0] * wg[0], wg[1], wg[2]
        kernel = self.program_keypoint.all_kernels()[0]
        # kernel_name = kernel.name
        max_wg = kernel_workgroup_size(self.program_keypoint, kernel)
        if max_wg < wg[0]:
            logger.warning(
                "test_descriptor: Skipping test of WG=%s when maximum for this kernel is %s ",
                wg, max_wg)
            return
        gpu_keypoints = pyopencl.array.to_device(self.queue, keypoints)
        gpu_descriptors = pyopencl.array.zeros(
            self.queue, (keypoints_end - keypoints_start, 128),
            dtype=numpy.uint8,
            order="C")
        gpu_grad = pyopencl.array.to_device(self.queue, grad)
        gpu_ori = pyopencl.array.to_device(self.queue, ori)
        keypoints_start, keypoints_end = numpy.int32(
            keypoints_start), numpy.int32(keypoints_end)
        grad_height, grad_width = numpy.int32(grad.shape)
        counter = pyopencl.array.to_device(self.queue, keypoints_end)
        kargs = [
            gpu_keypoints.data, gpu_descriptors.data, gpu_grad.data,
            gpu_ori.data,
            numpy.int32(octsize), keypoints_start, counter.data, grad_width,
            grad_height
        ]

        # Call the kernel
        t0 = time.time()
        k1 = kernel(self.queue, shape, wg, *kargs)
        try:
            res = gpu_descriptors.get()
        except (pyopencl.LogicError, RuntimeError) as error:
            logger.warning(
                "Segmentation fault like error (%s) on Descriptor for %s" %
                (error, self.param))
            return
        t1 = time.time()

        # Reference Python implementation
        ref = my_descriptor(keypoints_o, grad, ori, octsize, keypoints_start,
                            keypoints_end)
        ref_sort = ref[numpy.argsort(keypoints[keypoints_start:keypoints_end,
                                               1])]
        t2 = time.time()

        res_sort = res[numpy.argsort(keypoints[keypoints_start:keypoints_end,
                                               1])]
        logger.info(res_sort[5:10])
        logger.info(ref_sort[5:10])
        logger.info("Comparing descriptors (OpenCL and cpp) :")
        match, nulldesc = descriptors_compare(
            ref[keypoints_start:keypoints_end], res)
        logger.info(("%s/%s match found", match,
                     (keypoints_end - keypoints_start) - nulldesc))

        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms.",
                        1000.0 * (t2 - t1), 1000.0 * (t1 - t0))
            logger.info("Descriptors computation took %.3fms" %
                        (1e-6 * (k1.profile.end - k1.profile.start)))
    def test_orientation(self):
        '''
        #tests keypoints orientation assignment kernel
        '''
        if self.abort:
            return
        # orientation_setup :
        keypoints, nb_keypoints, updated_nb_keypoints, grad, ori, octsize = orientation_setup(
        )
        keypoints, compact_cnt = my_compact(numpy.copy(keypoints),
                                            nb_keypoints)
        updated_nb_keypoints = compact_cnt
        logger.info("Number of keypoints before orientation assignment : %s",
                    updated_nb_keypoints)

        # Prepare kernel call
        wg = self.wg_orient
        kernel = self.program_orient.all_kernels()[0]
        max_wg = kernel_workgroup_size(self.program_orient, kernel)
        if max_wg < wg[0]:
            logger.warning(
                "test_orientation: Skipping test of WG=%s when maximum for this kernel is %s ",
                wg, max_wg)
            return
        shape = keypoints.shape[0] * wg[
            0],  # shape = calc_size(keypoints.shape, self.wg)
        gpu_keypoints = pyopencl.array.to_device(self.queue, keypoints)
        actual_nb_keypoints = numpy.int32(updated_nb_keypoints)
        gpu_grad = pyopencl.array.to_device(self.queue, grad)
        gpu_ori = pyopencl.array.to_device(self.queue, ori)
        orisigma = numpy.float32(1.5)  # SIFT
        grad_height, grad_width = numpy.int32(grad.shape)
        keypoints_start = numpy.int32(0)
        keypoints_end = numpy.int32(actual_nb_keypoints)
        counter = pyopencl.array.to_device(
            self.queue, keypoints_end)  # actual_nb_keypoints)
        kargs = [
            gpu_keypoints.data, gpu_grad.data, gpu_ori.data, counter.data,
            octsize, orisigma, nb_keypoints, keypoints_start, keypoints_end,
            grad_width, grad_height
        ]
        if not self.USE_CPU:
            kargs += [
                pyopencl.LocalMemory(36 * 4),
                pyopencl.LocalMemory(128 * 4),
                pyopencl.LocalMemory(128 * 4)
            ]

        # Call the kernel
        t0 = time.time()
        k1 = kernel(self.queue, shape, wg, *kargs)
        res = gpu_keypoints.get()
        cnt = counter.get()
        t1 = time.time()

        # Reference Python implemenattion
        ref, updated_nb_keypoints = my_orientation(keypoints, nb_keypoints,
                                                   keypoints_start,
                                                   keypoints_end, grad, ori,
                                                   octsize, orisigma)
        t2 = time.time()

        # sort to compare added keypoints
        upbound = min(cnt, updated_nb_keypoints)
        d1, d2, d3, d4 = keypoints_compare(ref[0:upbound], res[0:upbound])
        self.assertLess(d1, 1e-4, "delta_cols=%s" % (d1))
        self.assertLess(d2, 1e-4, "delta_rows=%s" % (d2))
        self.assertLess(d3, 1e-4, "delta_sigma=%s" % (d3))
        self.assertLess(d4, 1e-1, "delta_angle=%s" %
                        (d4))  # orientation has a poor precision
        logger.info("delta_cols=%s" % d1)
        logger.info("delta_rows=%s" % d2)
        logger.info("delta_sigma=%s" % d3)
        logger.info("delta_angle=%s" % d4)

        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." %
                        (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Orientation assignment took %.3fms" %
                        (1e-6 * (k1.profile.end - k1.profile.start)))
Esempio n. 17
0
    def test_descriptor(self):
        '''
        #tests keypoints descriptors creation kernel
        '''
        if self.abort:
            return

        # Descriptor_setup
        keypoints_o, nb_keypoints, actual_nb_keypoints, grad, ori, octsize = descriptor_setup()
        # keypoints should be a compacted vector of keypoints
        keypoints_o, compact_cnt = my_compact(numpy.copy(keypoints_o), nb_keypoints)
        actual_nb_keypoints = compact_cnt
        keypoints_start, keypoints_end = 0, actual_nb_keypoints
        keypoints = keypoints_o[keypoints_start:keypoints_end + 2]  # to check if we actually stop at keypoints_end
        logger.info("Working on keypoints : [%s,%s] (octave = %s)" % (keypoints_start, keypoints_end - 1, int(numpy.log2(octsize) + 1)))

        # Prepare kernel call
        wg = self.wg_keypoint
        if len(wg) == 1:
            shape = keypoints.shape[0] * wg[0],
        else:
            shape = keypoints.shape[0] * wg[0], wg[1], wg[2]
        kernel = self.program_keypoint.all_kernels()[0]
        # kernel_name = kernel.name
        max_wg = kernel_workgroup_size(self.program_keypoint, kernel)
        if max_wg < wg[0]:
            logger.warning("test_descriptor: Skipping test of WG=%s when maximum for this kernel is %s ", wg, max_wg)
            return
        gpu_keypoints = pyopencl.array.to_device(self.queue, keypoints)
        gpu_descriptors = pyopencl.array.zeros(self.queue, (keypoints_end - keypoints_start, 128), dtype=numpy.uint8, order="C")
        gpu_grad = pyopencl.array.to_device(self.queue, grad)
        gpu_ori = pyopencl.array.to_device(self.queue, ori)
        keypoints_start, keypoints_end = numpy.int32(keypoints_start), numpy.int32(keypoints_end)
        grad_height, grad_width = numpy.int32(grad.shape)
        counter = pyopencl.array.to_device(self.queue, keypoints_end)
        kargs = [
            gpu_keypoints.data,
            gpu_descriptors.data,
            gpu_grad.data,
            gpu_ori.data,
            numpy.int32(octsize),
            keypoints_start,
            counter.data,
            grad_width,
            grad_height
        ]

        # Call the kernel
        t0 = time.time()
        k1 = kernel(self.queue, shape, wg, *kargs)
        try:
            res = gpu_descriptors.get()
        except (pyopencl.LogicError, RuntimeError) as error:
            logger.warning("Segmentation fault like error (%s) on Descriptor for %s" % (error, self.param))
            return
        t1 = time.time()

        # Reference Python implementation
        ref = my_descriptor(keypoints_o,
                            grad,
                            ori,
                            octsize,
                            keypoints_start,
                            keypoints_end)
        ref_sort = ref[numpy.argsort(keypoints[keypoints_start: keypoints_end, 1])]
        t2 = time.time()

        res_sort = res[numpy.argsort(keypoints[keypoints_start:keypoints_end, 1])]
        logger.info(res_sort[5:10])
        logger.info(ref_sort[5:10])
        logger.info("Comparing descriptors (OpenCL and cpp) :")
        match, nulldesc = descriptors_compare(ref[keypoints_start:keypoints_end], res)
        logger.info(("%s/%s match found", match, (keypoints_end - keypoints_start) - nulldesc))

        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms.", 1000.0 * (t2 - t1), 1000.0 * (t1 - t0))
            logger.info("Descriptors computation took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))
Esempio n. 18
0
    def test_transform(self):
        '''
        tests transform kernel
        '''

        if (USE_LENA):
            # original image
            if hasattr(scipy.misc, "ascent"):
                image = scipy.misc.ascent().astype(numpy.float32)
            else:
                image = scipy.misc.lena().astype(numpy.float32)

            image = numpy.ascontiguousarray(image[0:512, 0:512])


            # transformation
            angle = 1.9  # numpy.pi/5.0
    #        matrix = numpy.array([[numpy.cos(angle),-numpy.sin(angle)],[numpy.sin(angle),numpy.cos(angle)]],dtype=numpy.float32)
    #        offset_value = numpy.array([1000.0, 100.0],dtype=numpy.float32)
    #        matrix = numpy.array([[0.9,0.2],[-0.4,0.9]],dtype=numpy.float32)
    #        offset_value = numpy.array([-20.0,256.0],dtype=numpy.float32)
            matrix = numpy.array([[1.0, -0.75], [0.7, 0.5]], dtype=numpy.float32)

            offset_value = numpy.array([250.0, -150.0], dtype=numpy.float32)

            image2 = scipy.ndimage.interpolation.affine_transform(image, matrix, offset=offset_value, order=1, mode="constant")

        else:  # use images of a stack
            image = scipy.misc.imread("/home/paleo/Titanium/test/frame0.png")
            image2 = scipy.misc.imread("/home/paleo/Titanium/test/frame1.png")
            offset_value = numpy.array([0.0, 0.0], dtype=numpy.float32)
        image_height, image_width = image.shape
        image2_height, image2_width = image2.shape

        fill_value = numpy.float32(0.0)
        mode = numpy.int32(1)

        if IMAGE_RESHAPE:  # turns out that image should always be reshaped
            output_height, output_width = int(3000), int(3000)
            image, image_height, image_width = self.image_reshape(image, output_height, output_width, image_height, image_width)
            image2, image2_height, image2_width = self.image_reshape(image2, output_height, output_width, image2_height, image2_width)
        else:
            output_height, output_width = int(image_height * numpy.sqrt(2)), int(image_width * numpy.sqrt(2))
        logger.info("Image : (%s, %s) -- Output: (%s, %s)", image_height, image_width, output_height, output_width)

        # perform correction by least square
        sol, MSE = self.matching_correction(image, image2)
        logger.info(sol)

        correction_matrix = numpy.zeros((2, 2), dtype=numpy.float32)
        correction_matrix[0] = sol[0:2, 0]
        correction_matrix[1] = sol[3:5, 0]
        matrix_for_gpu = correction_matrix.reshape(4, 1)  # for float4 struct
        offset_value[0] = sol[2, 0]
        offset_value[1] = sol[5, 0]

        maxwg = kernel_workgroup_size(self.program,"transform")
        wg = maxwg, 1
        shape = calc_size((output_width, output_height), wg)
        gpu_image = pyopencl.array.to_device(self.queue, image2)
        gpu_output = pyopencl.array.empty(self.queue, (output_height, output_width), dtype=numpy.float32, order="C")
        gpu_matrix = pyopencl.array.to_device(self.queue, matrix_for_gpu)
        gpu_offset = pyopencl.array.to_device(self.queue, offset_value)
        image_height, image_width = numpy.int32((image_height, image_width))
        output_height, output_width = numpy.int32((output_height, output_width))

        t0 = time.time()
        k1 = self.program.transform(self.queue, shape, wg,
                                    gpu_image.data, gpu_output.data, gpu_matrix.data, gpu_offset.data,
                                    image_width, image_height, output_width, output_height, fill_value, mode)
        res = gpu_output.get()
        t1 = time.time()
#        logger.info(res[0,0]

        ref = scipy.ndimage.interpolation.affine_transform(image2, correction_matrix,
                                                           offset=offset_value,
                                                           output_shape=(output_height, output_width),
                                                           order=1,
                                                           mode="constant",
                                                           cval=fill_value)
        t2 = time.time()

        delta = abs(res - image)
        delta_arg = delta.argmax()
        delta_max = delta.max()
#        delta_mse_res = ((res-image)**2).sum()/image.size
#        delta_mse_ref = ((ref-image)**2).sum()/image.size
        at_0, at_1 = delta_arg / output_width, delta_arg % output_width
        logger.info("Max error: %f at (%d, %d)", delta_max, at_0, at_1)
#        print("Mean Squared Error Res/Original : %f" %(delta_mse_res))
#        print("Mean Squared Error Ref/Original: %f" %(delta_mse_ref))
        logger.info("minimal MSE according to least squares : %f", MSE)
#        logger.info(res[at_0,at_1]
#        logger.info(ref[at_0,at_1]

        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms.", 1000.0 * (t2 - t1), 1000.0 * (t1 - t0))
            logger.info("Transformation took %.3fms", 1e-6 * (k1.profile.end - k1.profile.start))
Esempio n. 19
0
    def test_orientation(self):
        '''
        #tests keypoints orientation assignment kernel
        '''
        if self.abort:
            return
        # orientation_setup :
        keypoints, nb_keypoints, updated_nb_keypoints, grad, ori, octsize = orientation_setup(
        )
        keypoints, compact_cnt = my_compact(numpy.copy(keypoints),
                                            nb_keypoints)
        updated_nb_keypoints = compact_cnt

        #        if (USE_CPU):
        #            logger.info("Using CPU-optimized kernels")
        #            wg = 1,
        #            shape = keypoints.shape[0]*wg[0],
        #        else:
        #            wg = 128, #FIXME : have to choose it for histograms #wg = max(self.wg),
        wg = self.wg_orient

        shape = keypoints.shape[0] * wg[
            0],  # shape = calc_size(keypoints.shape, self.wg)

        gpu_keypoints = pyopencl.array.to_device(self.queue, keypoints)
        actual_nb_keypoints = numpy.int32(updated_nb_keypoints)
        logger.info("Number of keypoints before orientation assignment : %s",
                    actual_nb_keypoints)

        gpu_grad = pyopencl.array.to_device(self.queue, grad)
        gpu_ori = pyopencl.array.to_device(self.queue, ori)
        orisigma = numpy.float32(1.5)  # SIFT
        grad_height, grad_width = numpy.int32(grad.shape)
        keypoints_start = numpy.int32(0)
        keypoints_end = numpy.int32(actual_nb_keypoints)
        counter = pyopencl.array.to_device(
            self.queue, keypoints_end)  # actual_nb_keypoints)

        max_wg = kernel_workgroup_size(self.program_orient,
                                       "orientation_assignment")
        if max_wg < wg[0]:
            logger.warning(
                "test_orientation: Skipping test of WG=%s when maximum for this kernel is %s ",
                wg, max_wg)
            return

        t0 = time.time()
        k1 = self.program_orient.orientation_assignment(
            self.queue, shape, wg, gpu_keypoints.data, gpu_grad.data,
            gpu_ori.data, counter.data, octsize, orisigma, nb_keypoints,
            keypoints_start, keypoints_end, grad_width, grad_height)
        res = gpu_keypoints.get()
        cnt = counter.get()
        t1 = time.time()

        if (USE_CPP_SIFT):
            import feature
            sc = feature.SiftAlignment()
            ref2 = sc.sift(
                self.testdata
            )  # ref2.x, ref2.y, ref2.scale, ref2.angle, ref2.desc --- ref2[numpy.argsort(ref2.y)]).desc
            ref = ref2.angle
            kp_ref = numpy.empty((ref2.size, 4), dtype=numpy.float32)
            kp_ref[:, 0] = ref2.x
            kp_ref[:, 1] = ref2.y
            kp_ref[:, 2] = ref2.scale
            kp_ref[:, 3] = ref2.angle

        else:
            ref, updated_nb_keypoints = my_orientation(keypoints, nb_keypoints,
                                                       keypoints_start,
                                                       keypoints_end, grad,
                                                       ori, octsize, orisigma)

        t2 = time.time()

        if (PRINT_KEYPOINTS):
            #            logger.info("Keypoints after orientation assignment :")
            #            logger.info(res[numpy.argsort(res[0:cnt,1])][0:cnt+10,3] #res[0:compact_cnt]
            pass
#            logger.info(kp_ref[0:cnt+10]
#            logger.info("Showing error (NOTE: significant error at position (i) should have its opposite at (i+1))"
#            logger.info(res[numpy.argsort(res[0:compact_cnt,1])][0:compact_cnt,3] - ref[0:compact_cnt]

#        logger.info("Total keypoints for kernel : %s -- For Python : %s \t [octsize = %s]" % (cnt, updated_nb_keypoints, octsize))
#        logger.info("Opencl found %s keypoints (%s added)" %(cnt,cnt-compact_cnt))

# sort to compare added keypoints
        upbound = min(cnt, updated_nb_keypoints)
        d1, d2, d3, d4 = keypoints_compare(ref[0:upbound], res[0:upbound])
        self.assert_(d1 < 1e-4, "delta_cols=%s" % (d1))
        self.assert_(d2 < 1e-4, "delta_rows=%s" % (d2))
        self.assert_(d3 < 1e-4, "delta_sigma=%s" % (d3))
        self.assert_(d4 < 1e-1, "delta_angle=%s" %
                     (d4))  # orientation has a poor precision
        logger.info("delta_cols=%s" % d1)
        logger.info("delta_rows=%s" % d2)
        logger.info("delta_sigma=%s" % d3)
        logger.info("delta_angle=%s" % d4)

        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." %
                        (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Orientation assignment took %.3fms" %
                        (1e-6 * (k1.profile.end - k1.profile.start)))
Esempio n. 20
0
    def test_descriptor(self):
        '''
        #tests keypoints descriptors creation kernel
        '''
        if self.abort:
            return

        # descriptor_setup :
        keypoints_o, nb_keypoints, actual_nb_keypoints, grad, ori, octsize = descriptor_setup(
        )
        # keypoints should be a compacted vector of keypoints
        keypoints_o, compact_cnt = my_compact(numpy.copy(keypoints_o),
                                              nb_keypoints)
        actual_nb_keypoints = compact_cnt
        keypoints_start, keypoints_end = 0, actual_nb_keypoints
        keypoints = keypoints_o[
            keypoints_start:keypoints_end +
            52]  # to check if we actually stop at keypoints_end
        logger.info(
            "Working on keypoints : [%s,%s] (octave = %s)" %
            (keypoints_start, keypoints_end - 1, int(numpy.log2(octsize) + 1)))
        if not (USE_CPP_SIFT) and (100 < keypoints_end - keypoints_start):
            logger.info(
                "NOTE: Python implementation of descriptors is slow. Do not handle more than 100 keypoints, or grab a coffee..."
            )

#        if (self.USE_CPU):
#            logger.info("Using CPU-optimized kernels"
#            wg = 1,
#            shape = keypoints.shape[0]*wg[0],
#        else:
#            wg = (8, 8, 8)
#            shape = int(keypoints.shape[0]*wg[0]), 8, 8
#            wg = (4, 4, 8)
#            shape = int(keypoints.shape[0]*wg[0]), 4, 8
        wg = self.wg_keypoint
        if len(wg) == 1:
            shape = keypoints.shape[0] * wg[0],
        else:
            shape = keypoints.shape[0] * wg[0], wg[1], wg[2]
        gpu_keypoints = pyopencl.array.to_device(self.queue, keypoints)
        # NOTE: for the following line, use pyopencl.array.empty instead of pyopencl.array.zeros if the keypoints are compacted
        gpu_descriptors = pyopencl.array.zeros(
            self.queue, (keypoints_end - keypoints_start, 128),
            dtype=numpy.uint8,
            order="C")
        gpu_grad = pyopencl.array.to_device(self.queue, grad)
        gpu_ori = pyopencl.array.to_device(self.queue, ori)

        keypoints_start, keypoints_end = numpy.int32(
            keypoints_start), numpy.int32(keypoints_end)
        grad_height, grad_width = numpy.int32(grad.shape)
        counter = pyopencl.array.to_device(self.queue, keypoints_end)

        max_wg = kernel_workgroup_size(self.program_keypoint, "descriptor")
        if max_wg < wg[0]:
            logger.warning(
                "test_descriptor: Skipping test of WG=%s when maximum for this kernel is %s ",
                wg, max_wg)
            return

        t0 = time.time()
        k1 = self.program_keypoint.descriptor(self.queue, shape, wg,
                                              gpu_keypoints.data,
                                              gpu_descriptors.data,
                                              gpu_grad.data, gpu_ori.data,
                                              numpy.int32(octsize),
                                              keypoints_start, counter.data,
                                              grad_width, grad_height)
        try:
            res = gpu_descriptors.get()
        except (pyopencl.LogicError, RuntimeError) as error:
            logger.warning(
                "Segmentation fault like error (%s) on Descriptor for %s" %
                (error, self.param))
            return
        t1 = time.time()

        if (USE_CPP_SIFT):
            import feature
            sc = feature.SiftAlignment()
            ref2 = sc.sift(
                self.testdata
            )  # ref2.x, ref2.y, ref2.scale, ref2.angle, ref2.desc --- ref2[numpy.argsort(ref2.y)]).desc
            ref = ref2.desc
            ref_sort = ref
        else:
            ref = my_descriptor(keypoints_o, grad, ori, octsize,
                                keypoints_start, keypoints_end)
            ref_sort = ref[numpy.argsort(
                keypoints[keypoints_start:keypoints_end, 1])]

        t2 = time.time()

        if (PRINT_KEYPOINTS):
            res_sort = res[numpy.argsort(
                keypoints[keypoints_start:keypoints_end, 1])]
            logger.info(res_sort[5:10])  # keypoints_end-keypoints_start,0:15]
            #            logger.info(res_sort[9]
            pass
            logger.info(ref_sort[5:10])
            #            numpy.savetxt("grrr_ocl_4_3.txt",res_sort,fmt='%d')
            #            numpy.savetxt("grrr_cpp_4_3.txt",ref_sort,fmt='%d')
            #            logger.info(ref[50:80,0:15]#[0:keypoints_end-keypoints_start,0:15]
            if (USE_CPP_SIFT and octsize == 1) or not (
                    USE_CPP_SIFT
            ):  # this comparison is only relevant for the first keypoints
                logger.info("Comparing descriptors (OpenCL and cpp) :")
                match, nulldesc = descriptors_compare(
                    ref[keypoints_start:keypoints_end], res)
                logger.info(("%s/%s match found", match,
                             (keypoints_end - keypoints_start) - nulldesc))


#            logger.info(ref[1,:]
#            logger.info(res[1,:].sum(), ref[1,:].sum()

# append to existing text file
#            f_handle = file('desc_by_test_keypoints.txt', 'a')
#            numpy.savetxt(f_handle,res_sort,fmt='%d')
#            f_handle.close()
        '''
            For now, the descriptor kernel is not precise enough to get exactly the same descriptors values
        (we have several difference of 1, but it is OK for the SIFT matching).
            Use descriptors_compare(ref,res) to count how many descriptors are exactly the same.

        #sort to compare added keypoints
        delta = abs(res_sort-ref_sort).max()
        self.assert_(delta <= 1, "delta=%s" % (delta))
        logger.info("delta=%s" % delta)
        '''
        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms.",
                        1000.0 * (t2 - t1), 1000.0 * (t1 - t0))
            logger.info("Descriptors computation took %.3fms" %
                        (1e-6 * (k1.profile.end - k1.profile.start)))
Esempio n. 21
0
    def test_transform(self):
        '''
        tests transform kernel
        '''

        # Transformation
        # ---------------
        matrix = numpy.array([[1.0, -0.75], [0.7, 0.5]], dtype=numpy.float32)
        offset_value = numpy.array([250.0, -150.0], dtype=numpy.float32)
        transformation = lambda img: scipy.ndimage.interpolation.affine_transform(img, matrix, offset=offset_value, order=1, mode="constant")
        image_transformed = transformation(self.image)

        fill_value = numpy.float32(0.0)
        mode = numpy.int32(1)

        # computing keypoints matching with SIFT
        sift_plan = SiftPlan(template=self.image, block_size=self.maxwg)
        kp1 = sift_plan.keypoints(self.image)
        kp2 = sift_plan.keypoints(image_transformed)  # image2 and image must have the same size
        match_plan = MatchPlan()  # cls.ctx
        matching = match_plan.match(kp2, kp1)

        # Retrieve the linear transformation from the matching pairs
        sol = matching_correction(matching)
        logger.info(sol)

        # Compute the correction matrix (inverse of transformation)
        correction_matrix = numpy.zeros((2, 2), dtype=numpy.float32)
        correction_matrix[0] = sol[0:2, 0]
        correction_matrix[1] = sol[3:5, 0]
        matrix_for_gpu = correction_matrix.reshape(4, 1)  # for float4 struct
        offset_value[0] = sol[2, 0]
        offset_value[1] = sol[5, 0]

        # Prepare the arguments for the "transform" kernel call
        maxwg = kernel_workgroup_size(self.program, "transform")
        wg = maxwg, 1
        shape = calc_size(self.image.shape[::-1], wg)
        gpu_image = pyopencl.array.to_device(self.queue, image_transformed)
        gpu_output = pyopencl.array.empty(self.queue, self.image.shape, dtype=numpy.float32, order="C")
        gpu_matrix = pyopencl.array.to_device(self.queue, matrix_for_gpu)
        gpu_offset = pyopencl.array.to_device(self.queue, offset_value)
        image_height, image_width = numpy.int32(self.image.shape)
        output_height, output_width = numpy.int32(gpu_output.shape)
        kargs = [
            gpu_image.data,
            gpu_output.data,
            gpu_matrix.data,
            gpu_offset.data,
            image_width,
            image_height,
            output_width,
            output_height,
            fill_value, mode
        ]

        # Call the kernel
        t0 = time.time()
        k1 = self.program.transform(self.queue, shape, wg, *kargs)
        res = gpu_output.get()

        # Reference result
        t1 = time.time()
        ref = scipy.ndimage.interpolation.affine_transform(image_transformed, correction_matrix,
                                                           offset=offset_value,
                                                           output_shape=(output_height, output_width),
                                                           order=1,
                                                           mode="constant",
                                                           cval=fill_value)
        t2 = time.time()

        # Compare the implementations
        delta = numpy.abs(res - ref)
        delta_arg = delta.argmax()
        delta_max = delta.max()
        at_0, at_1 = delta_arg / output_width, delta_arg % output_width
        logger.info("Max difference wrt scipy : %f at (%d, %d)", delta_max, at_0, at_1)

        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms.", 1000.0 * (t2 - t1), 1000.0 * (t1 - t0))
            logger.info("Transformation took %.3fms", 1e-6 * (k1.profile.end - k1.profile.start))
Esempio n. 22
0
    def test_transform(self):
        '''
        tests transform kernel
        '''

        # Transformation
        # ---------------
        matrix = numpy.array([[1.0, -0.75], [0.7, 0.5]], dtype=numpy.float32)
        offset_value = numpy.array([250.0, -150.0], dtype=numpy.float32)
        transformation = lambda img: scipy.ndimage.interpolation.affine_transform(
            img, matrix, offset=offset_value, order=1, mode="constant")
        image_transformed = transformation(self.image)

        fill_value = numpy.float32(0.0)
        mode = numpy.int32(1)

        # computing keypoints matching with SIFT
        sift_plan = SiftPlan(template=self.image,
                             max_workgroup_size=self.maxwg)
        kp1 = sift_plan.keypoints(self.image)
        kp2 = sift_plan.keypoints(
            image_transformed)  # image2 and image must have the same size
        match_plan = MatchPlan()  # cls.ctx
        matching = match_plan.match(kp2, kp1)

        # Retrieve the linear transformation from the matching pairs
        sol = matching_correction(matching)
        logger.info(sol)

        # Compute the correction matrix (inverse of transformation)
        correction_matrix = numpy.zeros((2, 2), dtype=numpy.float32)
        correction_matrix[0] = sol[0:2, 0]
        correction_matrix[1] = sol[3:5, 0]
        matrix_for_gpu = correction_matrix.reshape(4, 1)  # for float4 struct
        offset_value[0] = sol[2, 0]
        offset_value[1] = sol[5, 0]

        # Prepare the arguments for the "transform" kernel call
        maxwg = kernel_workgroup_size(self.program, "transform")
        wg = maxwg, 1
        shape = calc_size(self.image.shape[::-1], wg)
        gpu_image = pyopencl.array.to_device(self.queue, image_transformed)
        gpu_output = pyopencl.array.empty(self.queue,
                                          self.image.shape,
                                          dtype=numpy.float32,
                                          order="C")
        gpu_matrix = pyopencl.array.to_device(self.queue, matrix_for_gpu)
        gpu_offset = pyopencl.array.to_device(self.queue, offset_value)
        image_height, image_width = numpy.int32(self.image.shape)
        output_height, output_width = numpy.int32(gpu_output.shape)
        kargs = [
            gpu_image.data, gpu_output.data, gpu_matrix.data, gpu_offset.data,
            image_width, image_height, output_width, output_height, fill_value,
            mode
        ]

        # Call the kernel
        t0 = time.time()
        k1 = self.program.transform(self.queue, shape, wg, *kargs)
        res = gpu_output.get()

        # Reference result
        t1 = time.time()
        ref = scipy.ndimage.interpolation.affine_transform(
            image_transformed,
            correction_matrix,
            offset=offset_value,
            output_shape=(output_height, output_width),
            order=1,
            mode="constant",
            cval=fill_value)
        t2 = time.time()

        # Compare the implementations
        delta = numpy.abs(res - ref)
        delta_arg = delta.argmax()
        delta_max = delta.max()
        at_0, at_1 = delta_arg / output_width, delta_arg % output_width
        logger.info("Max difference wrt scipy : %f at (%d, %d)", delta_max,
                    at_0, at_1)

        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms.",
                        1000.0 * (t2 - t1), 1000.0 * (t1 - t0))
            logger.info("Transformation took %.3fms",
                        1e-6 * (k1.profile.end - k1.profile.start))
 def setUp(self):
     kernel_src = os.linesep.join(get_opencl_code(os.path.join("sift", i)) for i in ("sift.cl", "algebra.cl"))
     self.program = pyopencl.Program(self.ctx, kernel_src).build()
     self.wg_compact = kernel_workgroup_size(self.program, "compact")
Esempio n. 24
0
    def test_orientation(self):
        '''
        #tests keypoints orientation assignment kernel
        '''
        if self.abort:
            return
        # orientation_setup :
        keypoints, nb_keypoints, updated_nb_keypoints, grad, ori, octsize = orientation_setup()
        keypoints, compact_cnt = my_compact(numpy.copy(keypoints), nb_keypoints)
        updated_nb_keypoints = compact_cnt
        logger.info("Number of keypoints before orientation assignment : %s", updated_nb_keypoints)

        # Prepare kernel call
        wg = self.wg_orient
        kernel = self.program_orient.all_kernels()[0]
        max_wg = kernel_workgroup_size(self.program_orient, kernel)
        if max_wg < wg[0]:
            logger.warning("test_orientation: Skipping test of WG=%s when maximum for this kernel is %s ", wg, max_wg)
            return
        shape = keypoints.shape[0] * wg[0],  # shape = calc_size(keypoints.shape, self.wg)
        gpu_keypoints = pyopencl.array.to_device(self.queue, keypoints)
        actual_nb_keypoints = numpy.int32(updated_nb_keypoints)
        gpu_grad = pyopencl.array.to_device(self.queue, grad)
        gpu_ori = pyopencl.array.to_device(self.queue, ori)
        orisigma = numpy.float32(1.5)  # SIFT
        grad_height, grad_width = numpy.int32(grad.shape)
        keypoints_start = numpy.int32(0)
        keypoints_end = numpy.int32(actual_nb_keypoints)
        counter = pyopencl.array.to_device(self.queue, keypoints_end)  # actual_nb_keypoints)
        kargs = [
            gpu_keypoints.data,
            gpu_grad.data,
            gpu_ori.data,
            counter.data,
            octsize,
            orisigma,
            nb_keypoints,
            keypoints_start,
            keypoints_end,
            grad_width,
            grad_height
        ]
        if not self.USE_CPU:
            kargs += [pyopencl.LocalMemory(36 * 4),
                      pyopencl.LocalMemory(128 * 4),
                      pyopencl.LocalMemory(128 * 4)]

        # Call the kernel
        t0 = time.time()
        k1 = kernel(self.queue, shape, wg, *kargs)
        res = gpu_keypoints.get()
        cnt = counter.get()
        t1 = time.time()

        # Reference Python implemenattion
        ref, updated_nb_keypoints = my_orientation(keypoints,
                                                   nb_keypoints,
                                                   keypoints_start,
                                                   keypoints_end, grad, ori,
                                                   octsize, orisigma)
        t2 = time.time()

        # sort to compare added keypoints
        upbound = min(cnt, updated_nb_keypoints)
        d1, d2, d3, d4 = keypoints_compare(ref[0:upbound], res[0:upbound])
        self.assert_(d1 < 1e-4, "delta_cols=%s" % (d1))
        self.assert_(d2 < 1e-4, "delta_rows=%s" % (d2))
        self.assert_(d3 < 1e-4, "delta_sigma=%s" % (d3))
        self.assert_(d4 < 1e-1, "delta_angle=%s" % (d4))  # orientation has a poor precision
        logger.info("delta_cols=%s" % d1)
        logger.info("delta_rows=%s" % d2)
        logger.info("delta_sigma=%s" % d3)
        logger.info("delta_angle=%s" % d4)

        if self.PROFILE:
            logger.info("Global execution time: CPU %.3fms, GPU: %.3fms." % (1000.0 * (t2 - t1), 1000.0 * (t1 - t0)))
            logger.info("Orientation assignment took %.3fms" % (1e-6 * (k1.profile.end - k1.profile.start)))