Example #1
0
    def loadKernel(self, device):
        #Load the kernel and initialize the device.
        self.context = cl.Context([device], None, None)

        # get the maximum worksize of the device
        maxWorkSize = self.device.get_info(cl.device_info.MAX_WORK_GROUP_SIZE)

        # If the user didn't specify their own worksize, use the maximum supported worksize of the device
        if self.WORKSIZE is None:
            self.interface.error('WORKSIZE not supplied, using HW max. of ' + str(maxWorkSize))
            self.WORKSIZE = maxWorkSize
        else:
            # If the worksize is larger than the maximum supported worksize of the device
            if (self.WORKSIZE > maxWorkSize):
                self.interface.error('WORKSIZE out of range, using HW max. of ' + str(maxWorkSize))
                self.WORKSIZE = maxWorkSize
            # If the worksize is not a power of 2
            if (self.WORKSIZE & (self.WORKSIZE - 1)) != 0:
                self.interface.error('WORKSIZE invalid, using HW max. of ' + str(maxWorkSize))
                self.WORKSIZE = maxWorkSize

        # These definitions are required for the kernel to function.
        self.defines += (' -DOUTPUT_SIZE=' + str(self.OUTPUT_SIZE))
        self.defines += (' -DOUTPUT_MASK=' + str(self.OUTPUT_SIZE - 1))
        self.defines += (' -DWORKSIZE=' + str(self.WORKSIZE))

        # If the user wants to mine with vectors, enable the appropriate code
        # in the kernel source.
        if self.VECTORS:
            self.defines += ' -DVECTORS'
            self.rateDivisor = 2
        elif self.VECTORS4:
            self.defines += ' -DVECTORS4'
            self.rateDivisor = 4
        else:
            self.rateDivisor = 1

        # Some AMD devices support a special "bitalign" instruction that makes
        # bitwise rotation (required for SHA-256) much faster.
        if (device.extensions.find('cl_amd_media_ops') != -1):
            self.defines += ' -DBITALIGN'
            #enable the expierimental BFI_INT instruction optimization
            if self.BFI_INT:
                self.defines += ' -DBFI_INT'

        # Locate and read the OpenCL source code in the kernel's directory.
        kernelFileDir, pyfile = os.path.split(__file__)
        kernelFilePath = os.path.join(kernelFileDir, 'kernel.cl')
        kernelFile = open(kernelFilePath, 'r')
        kernel = kernelFile.read()
        kernelFile.close()

        # For fast startup, we cache the compiled OpenCL code. The name of the
        # cache is determined as the hash of a few important,
        # compilation-specific pieces of information.
        m = md5()
        m.update(device.platform.name)
        m.update(device.platform.version)
        m.update(device.name)
        m.update(self.defines)
        m.update(kernel)
        cacheName = '%s.elf' % m.hexdigest()

        fileName = os.path.join(kernelFileDir, cacheName)

        # Finally, the actual work of loading the kernel...
        try:
            binary = open(fileName, 'rb')
        except IOError:
            binary = None

        try:
            if binary is None:
                self.kernel = cl.Program(
                    self.context, kernel).build(self.defines)

                #apply BFI_INT if enabled
                if self.BFI_INT:
                    #patch the binary output from the compiler
                    patcher = BFIPatcher(self.interface)
                    binaryData = patcher.patch(self.kernel.binaries[0])

                    self.interface.debug("Applied BFI_INT patch")

                    #reload the kernel with the patched binary
                    self.kernel = cl.Program(
                        self.context, [device],
                        [binaryData]).build(self.defines)

                #write the kernel binaries to file
                binaryW = open(fileName, 'wb')
                binaryW.write(self.kernel.binaries[0])
                binaryW.close()
            else:
                binaryData = binary.read()
                self.kernel = cl.Program(
                    self.context, [device], [binaryData]).build(self.defines)

        except cl.LogicError:
            self.interface.fatal("Failed to compile OpenCL kernel!")
            return
        except PatchError:
            self.interface.fatal('Failed to apply BFI_INT patch to kernel! '
                'Is BFI_INT supported on this hardware?')
            return
        finally:
            if binary: binary.close()

        #unload the compiler to reduce memory usage
        cl.unload_compiler()
Example #2
0
 def loadKernel(self, device):
     """Load the kernel and initialize the device."""
     self.context = cl.Context([device], None, None)
     
     # These definitions are required for the kernel to function.
     self.defines += (' -DOUTPUT_SIZE=' + str(self.OUTPUT_SIZE))
     self.defines += (' -DOUTPUT_MASK=' + str(self.OUTPUT_SIZE - 1))
     
     # If the user wants to mine with vectors, enable the appropriate code
     # in the kernel source.
     if self.VECTORS:
         self.defines += ' -DVECTORS'
     
     # Some AMD devices support a special "bitalign" instruction that makes
     # bitwise rotation (required for SHA-256) much faster.
     if (device.extensions.find('cl_amd_media_ops') != -1):
         self.defines += ' -DBITALIGN'
         #enable the expierimental BFI_INT instruction optimization
         if self.BFI_INT:
             self.defines += ' -DBFI_INT'
     else:
         #since BFI_INT requires cl_amd_media_ops, disable it
         if self.BFI_INT:
             self.BFI_INT = False
     
     # Locate and read the OpenCL source code in the kernel's directory.
     kernelFileDir, pyfile = os.path.split(__file__)
     kernelFilePath = os.path.join(kernelFileDir, 'kernel.cl')
     kernelFile = open(kernelFilePath, 'r')
     kernel = kernelFile.read()
     kernelFile.close()
     
     # For fast startup, we cache the compiled OpenCL code. The name of the
     # cache is determined as the hash of a few important,
     # compilation-specific pieces of information.
     m = md5()
     m.update(device.platform.name)
     m.update(device.platform.version)
     m.update(device.name)
     m.update(self.defines)
     m.update(kernel)
     cacheName = '%s.elf' % m.hexdigest()
     
     fileName = os.path.join(kernelFileDir, cacheName)
     
     # Finally, the actual work of loading the kernel...
     try:
         binary = open(fileName, 'rb')
     except IOError: 
         binary = None
     
     try:
         if binary is None:
             self.kernel = cl.Program(
                 self.context, kernel).build(self.defines)
              
             #apply BFI_INT if enabled
             if self.BFI_INT:
                 #patch the binary output from the compiler
                 patcher = BFIPatcher(self.interface)
                 binaryData = patcher.patch(self.kernel.binaries[0])
                 
                 self.interface.debug("Applied BFI_INT patch")
                 
                 #reload the kernel with the patched binary
                 self.kernel = cl.Program(
                     self.context, [device],
                     [binaryData]).build(self.defines)
             
             #write the kernel binaries to file
             binaryW = open(fileName, 'wb')
             binaryW.write(self.kernel.binaries[0])
             binaryW.close()
         else:
             binaryData = binary.read()
             self.kernel = cl.Program(
                 self.context, [device], [binaryData]).build(self.defines)
                 
     except cl.LogicError:
         self.interface.fatal("Failed to compile OpenCL kernel!")
         return
     except PatchError:
         self.interface.fatal('Failed to apply BFI_INT patch to kernel! '
             'Is BFI_INT supported on this hardware?')
         return
     finally:
         if binary: binary.close()
    
     cl.unload_compiler()
     
     # If the user didn't specify their own worksize, use the maxium
     # supported by the device.
     maxSize = self.kernel.search.get_work_group_info(
               cl.kernel_work_group_info.WORK_GROUP_SIZE, self.device)
     
     if self.WORKSIZE is None:
         self.WORKSIZE = maxSize
     else:
         if self.WORKSIZE > maxSize:
             self.interface.log('Warning: Worksize exceeds the maximum of '
                                 + str(maxSize) + ', using default.')
         if self.WORKSIZE < 1:
             self.interface.log('Warning: Invalid worksize, using default.')
         
         self.WORKSIZE = min(self.WORKSIZE, maxSize)
         self.WORKSIZE = max(self.WORKSIZE, 1)
         #if the worksize is not a power of 2, round down to the nearest one
         if (self.WORKSIZE & (self.WORKSIZE - 1)) != 0:   
             self.WORKSIZE = 1 << int(math.floor(math.log(X)/math.log(2)))
         
     self.interface.setWorkFactor(self.WORKSIZE)
Example #3
0
    def test_get_info(self, platform, device):
        failure_count = [0]

        CRASH_QUIRKS = [
            (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.0 CUDA 3.0.1"), [
                (cl.Event, cl.event_info.COMMAND_QUEUE),
            ]),
        ]
        QUIRKS = []

        plat_quirk_key = (platform.vendor, platform.name, platform.version)

        def find_quirk(quirk_list, cl_obj, info):
            for entry_plat_key, quirks in quirk_list:
                if entry_plat_key == plat_quirk_key:
                    for quirk_cls, quirk_info in quirks:
                        if (isinstance(cl_obj, quirk_cls)
                                and quirk_info == info):
                            return True

            return False

        def do_test(cl_obj, info_cls, func=None, try_attr_form=True):
            if func is None:

                def func(info):
                    cl_obj.get_info(info)

            for info_name in dir(info_cls):
                if not info_name.startswith("_") and info_name != "to_string":
                    info = getattr(info_cls, info_name)

                    if find_quirk(CRASH_QUIRKS, cl_obj, info):
                        print "not executing get_info", type(cl_obj), info_name
                        print "(known crash quirk for %s)" % platform.name
                        continue

                    try:
                        func(info)
                    except:
                        msg = "failed get_info", type(cl_obj), info_name

                        if find_quirk(QUIRKS, cl_obj, info):
                            msg += ("(known quirk for %s)" % platform.name)
                        else:
                            failure_count[0] += 1

                    if try_attr_form:
                        try:
                            getattr(cl_obj, info_name.lower())
                        except:
                            print "failed attr-based get_info", type(
                                cl_obj), info_name

                            if find_quirk(QUIRKS, cl_obj, info):
                                print "(known quirk for %s)" % platform.name
                            else:
                                failure_count[0] += 1

        do_test(platform, cl.platform_info)

        do_test(device, cl.device_info)

        ctx = cl.Context([device])
        do_test(ctx, cl.context_info)

        props = 0
        if (device.queue_properties
                & cl.command_queue_properties.PROFILING_ENABLE):
            profiling = True
            props = cl.command_queue_properties.PROFILING_ENABLE
        queue = cl.CommandQueue(ctx, properties=props)
        do_test(queue, cl.command_queue_info)

        prg = cl.Program(
            ctx, """
            __kernel void sum(__global float *a)
            { a[get_global_id(0)] *= 2; }
            """).build()
        do_test(prg, cl.program_info)
        do_test(prg,
                cl.program_build_info,
                lambda info: prg.get_build_info(device, info),
                try_attr_form=False)

        cl.unload_compiler()  # just for the heck of it

        mf = cl.mem_flags
        n = 2000
        a_buf = cl.Buffer(ctx, 0, n * 4)

        do_test(a_buf, cl.mem_info)

        kernel = prg.sum
        do_test(kernel, cl.kernel_info)

        evt = kernel(queue, (n, ), None, a_buf)
        do_test(evt, cl.event_info)

        if profiling:
            evt.wait()
            do_test(evt,
                    cl.profiling_info,
                    lambda info: evt.get_profiling_info(info),
                    try_attr_form=False)

        if device.image_support:
            smp = cl.Sampler(ctx, True, cl.addressing_mode.CLAMP,
                             cl.filter_mode.NEAREST)
            do_test(smp, cl.sampler_info)

            img_format = cl.get_supported_image_formats(
                ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]

            img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
            assert img.shape == (128, 256)

            img.depth
            img.image.depth
            do_test(img, cl.image_info, lambda info: img.get_image_info(info))

        if failure_count[0]:
            raise RuntimeError(
                "get_info testing had %d errors "
                "(If you compiled against OpenCL 1.1 but are testing a 1.0 "
                "implementation, you can safely ignore this.)" %
                failure_count[0])
Example #4
0
    def test_get_info(self, platform, device):
        failure_count = [0]

        CRASH_QUIRKS = [
                (("NVIDIA Corporation", "NVIDIA CUDA",
                    "OpenCL 1.0 CUDA 3.0.1"),
                    [
                    (cl.Event, cl.event_info.COMMAND_QUEUE),
                    ]),
                ]
        QUIRKS = []

        plat_quirk_key = (
                platform.vendor,
                platform.name,
                platform.version)

        def find_quirk(quirk_list, cl_obj, info):
            for entry_plat_key, quirks in quirk_list:
                if entry_plat_key == plat_quirk_key:
                    for quirk_cls, quirk_info in quirks:
                        if (isinstance(cl_obj, quirk_cls)
                                and quirk_info == info):
                            return True

            return False

        def do_test(cl_obj, info_cls, func=None, try_attr_form=True):
            if func is None:
                def func(info):
                    cl_obj.get_info(info)

            for info_name in dir(info_cls):
                if not info_name.startswith("_") and info_name != "to_string":
                    info = getattr(info_cls, info_name)

                    if find_quirk(CRASH_QUIRKS, cl_obj, info):
                        print("not executing get_info", type(cl_obj), info_name)
                        print("(known crash quirk for %s)" % platform.name)
                        continue

                    try:
                        func(info)
                    except:
                        msg = "failed get_info", type(cl_obj), info_name

                        if find_quirk(QUIRKS, cl_obj, info):
                            msg += ("(known quirk for %s)" % platform.name)
                        else:
                            failure_count[0] += 1

                    if try_attr_form:
                        try:
                            getattr(cl_obj, info_name.lower())
                        except:
                            print("failed attr-based get_info", type(cl_obj), info_name)

                            if find_quirk(QUIRKS, cl_obj, info):
                                print("(known quirk for %s)" % platform.name)
                            else:
                                failure_count[0] += 1

        do_test(platform, cl.platform_info)

        do_test(device, cl.device_info)

        ctx = cl.Context([device])
        do_test(ctx, cl.context_info)

        props = 0
        if (device.queue_properties
                & cl.command_queue_properties.PROFILING_ENABLE):
            profiling = True
            props = cl.command_queue_properties.PROFILING_ENABLE
        queue = cl.CommandQueue(ctx,
                properties=props)
        do_test(queue, cl.command_queue_info)

        prg = cl.Program(ctx, """
            __kernel void sum(__global float *a)
            { a[get_global_id(0)] *= 2; }
            """).build()
        do_test(prg, cl.program_info)
        do_test(prg, cl.program_build_info,
                lambda info: prg.get_build_info(device, info),
                try_attr_form=False)

        cl.unload_compiler() # just for the heck of it

        mf = cl.mem_flags
        n = 2000
        a_buf = cl.Buffer(ctx, 0, n*4)

        do_test(a_buf, cl.mem_info)

        kernel = prg.sum
        do_test(kernel, cl.kernel_info)

        evt = kernel(queue, (n,), None, a_buf)
        do_test(evt, cl.event_info)

        if profiling:
            evt.wait()
            do_test(evt, cl.profiling_info,
                    lambda info: evt.get_profiling_info(info),
                    try_attr_form=False)

        if device.image_support:
            smp = cl.Sampler(ctx, True,
                    cl.addressing_mode.CLAMP,
                    cl.filter_mode.NEAREST)
            do_test(smp, cl.sampler_info)

            img_format = cl.get_supported_image_formats(
                    ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]

            img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
            assert img.shape == (128, 256)

            img.depth
            img.image.depth
            do_test(img, cl.image_info,
                    lambda info: img.get_image_info(info))
Example #5
0
      else:
          binaryData = binary.read()
          self.kernel = cl.Program(
              self.context, [device], [binaryData]).build(self.defines)
              
  except cl.LogicError:
      self.interface.fatal("Failed to compile OpenCL kernel!")
      return
  except PatchError:
      self.interface.fatal('Failed to apply BFI_INT patch to kernel! '
          'Is BFI_INT supported on this hardware?')
      return
  finally:
      if binary: binary.close()
 
  cl.unload_compiler()
  
  # If the user didn't specify their own worksize, use the maxium
  # supported by the device.
  maxSize = self.kernel.search.get_work_group_info(
            cl.kernel_work_group_info.WORK_GROUP_SIZE, self.device)
  
  if self.WORKSIZE is None:
      self.WORKSIZE = maxSize
  else:
      if self.WORKSIZE > maxSize:
          self.interface.log('Warning: Worksize exceeds the maximum of '
                              + str(maxSize) + ', using default.')
      if self.WORKSIZE < 1:
          self.interface.log('Warning: Invalid worksize, using default.')
      
    def loadKernel(self, device):
        #Load the kernel and initialize the device.
        self.context = cl.Context([device], None, None)
        
        # If the user didn't specify their own worksize, use 256
        if self.WORKSIZE is None:
            self.WORKSIZE = 256
        else:
            #if the worksize is not a power of 2, round down to the nearest one
            if (self.WORKSIZE & (self.WORKSIZE - 1)) != 0:   
                self.WORKSIZE = 1 << int(math.floor(math.log(X)/math.log(2)))
        
        # These definitions are required for the kernel to function.
        self.defines += (' -DOUTPUT_SIZE=' + str(self.OUTPUT_SIZE))
        self.defines += (' -DOUTPUT_MASK=' + str(self.OUTPUT_SIZE - 1))
        self.defines += (' -DWORKSIZE=' + str(self.WORKSIZE))
                
        # If the user wants to mine with vectors, enable the appropriate code
        # in the kernel source.        
        if self.VECTORS:
            self.defines += ' -DVECTORS'
            self.rateDivisor = 2
        elif self.VECTORS4:
            self.defines += ' -DVECTORS4'
            self.rateDivisor = 4
        else:
            self.rateDivisor = 1
            
        # Some AMD devices support a special "bitalign" instruction that makes
        # bitwise rotation (required for SHA-256) much faster.
        if (device.extensions.find('cl_amd_media_ops') != -1):
            self.defines += ' -DBITALIGN'
            #enable the expierimental BFI_INT instruction optimization
            if self.BFI_INT:
                self.defines += ' -DBFI_INT'
        else:
            #Since phatk and phatk2 will error out on Nvidia GPUs
            #make sure the user knows that they need to use poclbm
            self.interface.fatal("GPU not supported! phatk2 is designed for "
                "ATI 5xxx and newer only. Try -k poclbm instead.")
            return
        
        # Locate and read the OpenCL source code in the kernel's directory.
        kernelFileDir, pyfile = os.path.split(__file__)
        kernelFilePath = os.path.join(kernelFileDir, 'kernel.cl')
        kernelFile = open(kernelFilePath, 'r')
        kernel = kernelFile.read()
        kernelFile.close()
        
        # For fast startup, we cache the compiled OpenCL code. The name of the
        # cache is determined as the hash of a few important,
        # compilation-specific pieces of information.
        m = md5()
        m.update(device.platform.name)
        m.update(device.platform.version)
        m.update(device.name)
        m.update(self.defines)
        m.update(kernel)
        cacheName = '%s.elf' % m.hexdigest()
        
        fileName = os.path.join(kernelFileDir, cacheName)
        
        # Finally, the actual work of loading the kernel...
        try:
            binary = open(fileName, 'rb')
        except IOError: 
            binary = None
        
        try:
            if binary is None:
                self.kernel = cl.Program(
                    self.context, kernel).build(self.defines)
                 
                #apply BFI_INT if enabled
                if self.BFI_INT:
                    #patch the binary output from the compiler
                    patcher = BFIPatcher(self.interface)
                    binaryData = patcher.patch(self.kernel.binaries[0])
                    
                    self.interface.debug("Applied BFI_INT patch")
                    
                    #reload the kernel with the patched binary
                    self.kernel = cl.Program(
                        self.context, [device],
                        [binaryData]).build(self.defines)
                
                #write the kernel binaries to file
                binaryW = open(fileName, 'wb')
                binaryW.write(self.kernel.binaries[0])
                binaryW.close()
            else:
                binaryData = binary.read()
                self.kernel = cl.Program(
                    self.context, [device], [binaryData]).build(self.defines)
                    
        except cl.LogicError:
            self.interface.fatal("Failed to compile OpenCL kernel!")
            return
        except PatchError:
            self.interface.fatal('Failed to apply BFI_INT patch to kernel! '
                'Is BFI_INT supported on this hardware?')
            return
        finally:
            if binary: binary.close()

        cl.unload_compiler()
        
        # Since this can't be run before compiling the kernel, all we can do is
        # check to make sure the selected size is not too large
        maxSize = self.kernel.search.get_work_group_info(
                  cl.kernel_work_group_info.WORK_GROUP_SIZE, self.device)
                  
        if self.WORKSIZE > maxSize:
            self.interface.fatal('Maximum WORKSIZE on the selected device is '
                    + str(maxSize))