예제 #1
0
 def loadKernel(self, device):
     """Load the kernel and initialize the device."""
     self.context = cl.Context([device], None, None)
     
     # These definitions are required for the kernel to function.
     self.defines += (' -DOUTPUT_SIZE=' + str(self.OUTPUT_SIZE))
     self.defines += (' -DOUTPUT_MASK=' + str(self.OUTPUT_SIZE - 1))
     
     # If the user wants to mine with vectors, enable the appropriate code
     # in the kernel source.
     if self.VECTORS:
         self.defines += ' -DVECTORS'
     
     # Some AMD devices support a special "bitalign" instruction that makes
     # bitwise rotation (required for SHA-256) much faster.
     if (device.extensions.find('cl_amd_media_ops') != -1):
         self.defines += ' -DBITALIGN'
         #enable the expierimental BFI_INT instruction optimization
         if self.BFI_INT:
             self.defines += ' -DBFI_INT'
     else:
         #since BFI_INT requires cl_amd_media_ops, disable it
         if self.BFI_INT:
             self.BFI_INT = False
     
     # Locate and read the OpenCL source code in the kernel's directory.
     kernelFileDir, pyfile = os.path.split(__file__)
     kernelFilePath = os.path.join(kernelFileDir, 'kernel.cl')
     kernelFile = open(kernelFilePath, 'r')
     kernel = kernelFile.read()
     kernelFile.close()
     
     # For fast startup, we cache the compiled OpenCL code. The name of the
     # cache is determined as the hash of a few important,
     # compilation-specific pieces of information.
     m = md5()
     m.update(device.platform.name)
     m.update(device.platform.version)
     m.update(device.name)
     m.update(self.defines)
     m.update(kernel)
     cacheName = '%s.elf' % m.hexdigest()
     
     fileName = os.path.join(kernelFileDir, cacheName)
     
     # Finally, the actual work of loading the kernel...
     try:
         binary = open(fileName, 'rb')
     except IOError: 
         binary = None
     
     try:
         if binary is None:
             self.kernel = cl.Program(
                 self.context, kernel).build(self.defines)
              
             #apply BFI_INT if enabled
             if self.BFI_INT:
                 #patch the binary output from the compiler
                 patcher = BFIPatcher(self.interface)
                 binaryData = patcher.patch(self.kernel.binaries[0])
                 
                 self.interface.debug("Applied BFI_INT patch")
                 
                 #reload the kernel with the patched binary
                 self.kernel = cl.Program(
                     self.context, [device],
                     [binaryData]).build(self.defines)
             
             #write the kernel binaries to file
             binaryW = open(fileName, 'wb')
             binaryW.write(self.kernel.binaries[0])
             binaryW.close()
         else:
             binaryData = binary.read()
             self.kernel = cl.Program(
                 self.context, [device], [binaryData]).build(self.defines)
                 
     except cl.LogicError:
         self.interface.fatal("Failed to compile OpenCL kernel!")
         return
     except PatchError:
         self.interface.fatal('Failed to apply BFI_INT patch to kernel! '
             'Is BFI_INT supported on this hardware?')
         return
     finally:
         if binary: binary.close()
    
     cl.unload_compiler()
     
     # If the user didn't specify their own worksize, use the maxium
     # supported by the device.
     maxSize = self.kernel.search.get_work_group_info(
               cl.kernel_work_group_info.WORK_GROUP_SIZE, self.device)
     
     if self.WORKSIZE is None:
         self.WORKSIZE = maxSize
     else:
         if self.WORKSIZE > maxSize:
             self.interface.log('Warning: Worksize exceeds the maximum of '
                                 + str(maxSize) + ', using default.')
         if self.WORKSIZE < 1:
             self.interface.log('Warning: Invalid worksize, using default.')
         
         self.WORKSIZE = min(self.WORKSIZE, maxSize)
         self.WORKSIZE = max(self.WORKSIZE, 1)
         #if the worksize is not a power of 2, round down to the nearest one
         if (self.WORKSIZE & (self.WORKSIZE - 1)) != 0:   
             self.WORKSIZE = 1 << int(math.floor(math.log(X)/math.log(2)))
         
     self.interface.setWorkFactor(self.WORKSIZE)
예제 #2
0
    def loadKernel(self, device):
        #Load the kernel and initialize the device.
        self.context = cl.Context([device], None, None)

        # get the maximum worksize of the device
        maxWorkSize = self.device.get_info(cl.device_info.MAX_WORK_GROUP_SIZE)

        # If the user didn't specify their own worksize, use the maximum supported worksize of the device
        if self.WORKSIZE is None:
            self.interface.error('WORKSIZE not supplied, using HW max. of ' + str(maxWorkSize))
            self.WORKSIZE = maxWorkSize
        else:
            # If the worksize is larger than the maximum supported worksize of the device
            if (self.WORKSIZE > maxWorkSize):
                self.interface.error('WORKSIZE out of range, using HW max. of ' + str(maxWorkSize))
                self.WORKSIZE = maxWorkSize
            # If the worksize is not a power of 2
            if (self.WORKSIZE & (self.WORKSIZE - 1)) != 0:
                self.interface.error('WORKSIZE invalid, using HW max. of ' + str(maxWorkSize))
                self.WORKSIZE = maxWorkSize

        # These definitions are required for the kernel to function.
        self.defines += (' -DOUTPUT_SIZE=' + str(self.OUTPUT_SIZE))
        self.defines += (' -DOUTPUT_MASK=' + str(self.OUTPUT_SIZE - 1))
        self.defines += (' -DWORKSIZE=' + str(self.WORKSIZE))

        # If the user wants to mine with vectors, enable the appropriate code
        # in the kernel source.
        if self.VECTORS:
            self.defines += ' -DVECTORS'
            self.rateDivisor = 2
        elif self.VECTORS4:
            self.defines += ' -DVECTORS4'
            self.rateDivisor = 4
        else:
            self.rateDivisor = 1

        # Some AMD devices support a special "bitalign" instruction that makes
        # bitwise rotation (required for SHA-256) much faster.
        if (device.extensions.find('cl_amd_media_ops') != -1):
            self.defines += ' -DBITALIGN'
            #enable the expierimental BFI_INT instruction optimization
            if self.BFI_INT:
                self.defines += ' -DBFI_INT'

        # Locate and read the OpenCL source code in the kernel's directory.
        kernelFileDir, pyfile = os.path.split(__file__)
        kernelFilePath = os.path.join(kernelFileDir, 'kernel.cl')
        kernelFile = open(kernelFilePath, 'r')
        kernel = kernelFile.read()
        kernelFile.close()

        # For fast startup, we cache the compiled OpenCL code. The name of the
        # cache is determined as the hash of a few important,
        # compilation-specific pieces of information.
        m = md5()
        m.update(device.platform.name)
        m.update(device.platform.version)
        m.update(device.name)
        m.update(self.defines)
        m.update(kernel)
        cacheName = '%s.elf' % m.hexdigest()

        fileName = os.path.join(kernelFileDir, cacheName)

        # Finally, the actual work of loading the kernel...
        try:
            binary = open(fileName, 'rb')
        except IOError:
            binary = None

        try:
            if binary is None:
                self.kernel = cl.Program(
                    self.context, kernel).build(self.defines)

                #apply BFI_INT if enabled
                if self.BFI_INT:
                    #patch the binary output from the compiler
                    patcher = BFIPatcher(self.interface)
                    binaryData = patcher.patch(self.kernel.binaries[0])

                    self.interface.debug("Applied BFI_INT patch")

                    #reload the kernel with the patched binary
                    self.kernel = cl.Program(
                        self.context, [device],
                        [binaryData]).build(self.defines)

                #write the kernel binaries to file
                binaryW = open(fileName, 'wb')
                binaryW.write(self.kernel.binaries[0])
                binaryW.close()
            else:
                binaryData = binary.read()
                self.kernel = cl.Program(
                    self.context, [device], [binaryData]).build(self.defines)

        except cl.LogicError:
            self.interface.fatal("Failed to compile OpenCL kernel!")
            return
        except PatchError:
            self.interface.fatal('Failed to apply BFI_INT patch to kernel! '
                'Is BFI_INT supported on this hardware?')
            return
        finally:
            if binary: binary.close()

        #unload the compiler to reduce memory usage
        cl.unload_compiler()
예제 #3
0
 
 # Finally, the actual work of loading the kernel...
 try:
     binary = open(fileName, 'rb')
 except IOError: 
     binary = None
 
 try:
     if binary is None:
         self.kernel = cl.Program(
             self.context, kernel).build(self.defines)
          
         #apply BFI_INT if enabled
         if self.BFI_INT:
             #patch the binary output from the compiler
             patcher = BFIPatcher(self.interface)
             binaryData = patcher.patch(self.kernel.binaries[0])
             
             self.interface.debug("Applied BFI_INT patch")
             
             #reload the kernel with the patched binary
             self.kernel = cl.Program(
                 self.context, [device],
                 [binaryData]).build(self.defines)
         
         #write the kernel binaries to file
         binaryW = open(fileName, 'wb')
         binaryW.write(self.kernel.binaries[0])
         binaryW.close()
     else:
         binaryData = binary.read()