def loadKernel(self, device): #Load the kernel and initialize the device. self.context = cl.Context([device], None, None) # get the maximum worksize of the device maxWorkSize = self.device.get_info(cl.device_info.MAX_WORK_GROUP_SIZE) # If the user didn't specify their own worksize, use the maximum supported worksize of the device if self.WORKSIZE is None: self.interface.error('WORKSIZE not supplied, using HW max. of ' + str(maxWorkSize)) self.WORKSIZE = maxWorkSize else: # If the worksize is larger than the maximum supported worksize of the device if (self.WORKSIZE > maxWorkSize): self.interface.error('WORKSIZE out of range, using HW max. of ' + str(maxWorkSize)) self.WORKSIZE = maxWorkSize # If the worksize is not a power of 2 if (self.WORKSIZE & (self.WORKSIZE - 1)) != 0: self.interface.error('WORKSIZE invalid, using HW max. of ' + str(maxWorkSize)) self.WORKSIZE = maxWorkSize # These definitions are required for the kernel to function. self.defines += (' -DOUTPUT_SIZE=' + str(self.OUTPUT_SIZE)) self.defines += (' -DOUTPUT_MASK=' + str(self.OUTPUT_SIZE - 1)) self.defines += (' -DWORKSIZE=' + str(self.WORKSIZE)) # If the user wants to mine with vectors, enable the appropriate code # in the kernel source. if self.VECTORS: self.defines += ' -DVECTORS' self.rateDivisor = 2 elif self.VECTORS4: self.defines += ' -DVECTORS4' self.rateDivisor = 4 else: self.rateDivisor = 1 # Some AMD devices support a special "bitalign" instruction that makes # bitwise rotation (required for SHA-256) much faster. if (device.extensions.find('cl_amd_media_ops') != -1): self.defines += ' -DBITALIGN' #enable the expierimental BFI_INT instruction optimization if self.BFI_INT: self.defines += ' -DBFI_INT' # Locate and read the OpenCL source code in the kernel's directory. kernelFileDir, pyfile = os.path.split(__file__) kernelFilePath = os.path.join(kernelFileDir, 'kernel.cl') kernelFile = open(kernelFilePath, 'r') kernel = kernelFile.read() kernelFile.close() # For fast startup, we cache the compiled OpenCL code. The name of the # cache is determined as the hash of a few important, # compilation-specific pieces of information. m = md5() m.update(device.platform.name) m.update(device.platform.version) m.update(device.name) m.update(self.defines) m.update(kernel) cacheName = '%s.elf' % m.hexdigest() fileName = os.path.join(kernelFileDir, cacheName) # Finally, the actual work of loading the kernel... try: binary = open(fileName, 'rb') except IOError: binary = None try: if binary is None: self.kernel = cl.Program( self.context, kernel).build(self.defines) #apply BFI_INT if enabled if self.BFI_INT: #patch the binary output from the compiler patcher = BFIPatcher(self.interface) binaryData = patcher.patch(self.kernel.binaries[0]) self.interface.debug("Applied BFI_INT patch") #reload the kernel with the patched binary self.kernel = cl.Program( self.context, [device], [binaryData]).build(self.defines) #write the kernel binaries to file binaryW = open(fileName, 'wb') binaryW.write(self.kernel.binaries[0]) binaryW.close() else: binaryData = binary.read() self.kernel = cl.Program( self.context, [device], [binaryData]).build(self.defines) except cl.LogicError: self.interface.fatal("Failed to compile OpenCL kernel!") return except PatchError: self.interface.fatal('Failed to apply BFI_INT patch to kernel! ' 'Is BFI_INT supported on this hardware?') return finally: if binary: binary.close() #unload the compiler to reduce memory usage cl.unload_compiler()
# Finally, the actual work of loading the kernel... try: binary = open(fileName, 'rb') except IOError: binary = None try: if binary is None: self.kernel = cl.Program( self.context, kernel).build(self.defines) #apply BFI_INT if enabled if self.BFI_INT: #patch the binary output from the compiler patcher = BFIPatcher(self.interface) binaryData = patcher.patch(self.kernel.binaries[0]) self.interface.debug("Applied BFI_INT patch") #reload the kernel with the patched binary self.kernel = cl.Program( self.context, [device], [binaryData]).build(self.defines) #write the kernel binaries to file binaryW = open(fileName, 'wb') binaryW.write(self.kernel.binaries[0]) binaryW.close() else: binaryData = binary.read()
def loadKernel(self, device): """Load the kernel and initialize the device.""" self.context = cl.Context([device], None, None) # These definitions are required for the kernel to function. self.defines += (' -DOUTPUT_SIZE=' + str(self.OUTPUT_SIZE)) self.defines += (' -DOUTPUT_MASK=' + str(self.OUTPUT_SIZE - 1)) # If the user wants to mine with vectors, enable the appropriate code # in the kernel source. if self.VECTORS: self.defines += ' -DVECTORS' # Some AMD devices support a special "bitalign" instruction that makes # bitwise rotation (required for SHA-256) much faster. if (device.extensions.find('cl_amd_media_ops') != -1): self.defines += ' -DBITALIGN' #enable the expierimental BFI_INT instruction optimization if self.BFI_INT: self.defines += ' -DBFI_INT' else: #since BFI_INT requires cl_amd_media_ops, disable it if self.BFI_INT: self.BFI_INT = False # Locate and read the OpenCL source code in the kernel's directory. kernelFileDir, pyfile = os.path.split(__file__) kernelFilePath = os.path.join(kernelFileDir, 'kernel.cl') kernelFile = open(kernelFilePath, 'r') kernel = kernelFile.read() kernelFile.close() # For fast startup, we cache the compiled OpenCL code. The name of the # cache is determined as the hash of a few important, # compilation-specific pieces of information. m = md5() m.update(device.platform.name) m.update(device.platform.version) m.update(device.name) m.update(self.defines) m.update(kernel) cacheName = '%s.elf' % m.hexdigest() fileName = os.path.join(kernelFileDir, cacheName) # Finally, the actual work of loading the kernel... try: binary = open(fileName, 'rb') except IOError: binary = None try: if binary is None: self.kernel = cl.Program( self.context, kernel).build(self.defines) #apply BFI_INT if enabled if self.BFI_INT: #patch the binary output from the compiler patcher = BFIPatcher(self.interface) binaryData = patcher.patch(self.kernel.binaries[0]) self.interface.debug("Applied BFI_INT patch") #reload the kernel with the patched binary self.kernel = cl.Program( self.context, [device], [binaryData]).build(self.defines) #write the kernel binaries to file binaryW = open(fileName, 'wb') binaryW.write(self.kernel.binaries[0]) binaryW.close() else: binaryData = binary.read() self.kernel = cl.Program( self.context, [device], [binaryData]).build(self.defines) except cl.LogicError: self.interface.fatal("Failed to compile OpenCL kernel!") return except PatchError: self.interface.fatal('Failed to apply BFI_INT patch to kernel! ' 'Is BFI_INT supported on this hardware?') return finally: if binary: binary.close() cl.unload_compiler() # If the user didn't specify their own worksize, use the maxium # supported by the device. maxSize = self.kernel.search.get_work_group_info( cl.kernel_work_group_info.WORK_GROUP_SIZE, self.device) if self.WORKSIZE is None: self.WORKSIZE = maxSize else: if self.WORKSIZE > maxSize: self.interface.log('Warning: Worksize exceeds the maximum of ' + str(maxSize) + ', using default.') if self.WORKSIZE < 1: self.interface.log('Warning: Invalid worksize, using default.') self.WORKSIZE = min(self.WORKSIZE, maxSize) self.WORKSIZE = max(self.WORKSIZE, 1) #if the worksize is not a power of 2, round down to the nearest one if (self.WORKSIZE & (self.WORKSIZE - 1)) != 0: self.WORKSIZE = 1 << int(math.floor(math.log(X)/math.log(2))) self.interface.setWorkFactor(self.WORKSIZE)
def loadKernel(self, device): #Load the kernel and initialize the device. self.context = cl.Context([device], None, None) # If the user didn't specify their own worksize, use 256 if self.WORKSIZE is None: self.WORKSIZE = 256 else: #if the worksize is not a power of 2, round down to the nearest one if (self.WORKSIZE & (self.WORKSIZE - 1)) != 0: self.WORKSIZE = 1 << int(math.floor(math.log(X)/math.log(2))) # These definitions are required for the kernel to function. self.defines += (' -DOUTPUT_SIZE=' + str(self.OUTPUT_SIZE)) self.defines += (' -DOUTPUT_MASK=' + str(self.OUTPUT_SIZE - 1)) self.defines += (' -DWORKSIZE=' + str(self.WORKSIZE)) # If the user wants to mine with vectors, enable the appropriate code # in the kernel source. if self.VECTORS: self.defines += ' -DVECTORS' self.rateDivisor = 2 elif self.VECTORS4: self.defines += ' -DVECTORS4' self.rateDivisor = 4 else: self.rateDivisor = 1 # Some AMD devices support a special "bitalign" instruction that makes # bitwise rotation (required for SHA-256) much faster. if (device.extensions.find('cl_amd_media_ops') != -1): self.defines += ' -DBITALIGN' #enable the expierimental BFI_INT instruction optimization if self.BFI_INT: self.defines += ' -DBFI_INT' else: #Since phatk and phatk2 will error out on Nvidia GPUs #make sure the user knows that they need to use poclbm self.interface.fatal("GPU not supported! phatk2 is designed for " "ATI 5xxx and newer only. Try -k poclbm instead.") return # Locate and read the OpenCL source code in the kernel's directory. kernelFileDir, pyfile = os.path.split(__file__) kernelFilePath = os.path.join(kernelFileDir, 'kernel.cl') kernelFile = open(kernelFilePath, 'r') kernel = kernelFile.read() kernelFile.close() # For fast startup, we cache the compiled OpenCL code. The name of the # cache is determined as the hash of a few important, # compilation-specific pieces of information. m = md5() m.update(device.platform.name) m.update(device.platform.version) m.update(device.name) m.update(self.defines) m.update(kernel) cacheName = '%s.elf' % m.hexdigest() fileName = os.path.join(kernelFileDir, cacheName) # Finally, the actual work of loading the kernel... try: binary = open(fileName, 'rb') except IOError: binary = None try: if binary is None: self.kernel = cl.Program( self.context, kernel).build(self.defines) #apply BFI_INT if enabled if self.BFI_INT: #patch the binary output from the compiler patcher = BFIPatcher(self.interface) binaryData = patcher.patch(self.kernel.binaries[0]) self.interface.debug("Applied BFI_INT patch") #reload the kernel with the patched binary self.kernel = cl.Program( self.context, [device], [binaryData]).build(self.defines) #write the kernel binaries to file binaryW = open(fileName, 'wb') binaryW.write(self.kernel.binaries[0]) binaryW.close() else: binaryData = binary.read() self.kernel = cl.Program( self.context, [device], [binaryData]).build(self.defines) except cl.LogicError: self.interface.fatal("Failed to compile OpenCL kernel!") return except PatchError: self.interface.fatal('Failed to apply BFI_INT patch to kernel! ' 'Is BFI_INT supported on this hardware?') return finally: if binary: binary.close() cl.unload_compiler() # Since this can't be run before compiling the kernel, all we can do is # check to make sure the selected size is not too large maxSize = self.kernel.search.get_work_group_info( cl.kernel_work_group_info.WORK_GROUP_SIZE, self.device) if self.WORKSIZE > maxSize: self.interface.fatal('Maximum WORKSIZE on the selected device is ' + str(maxSize))