Exemple #1
0
    def __init__(self, interface):
        platforms = cl.get_platforms()

        # Initialize object attributes and retrieve command-line options...)
        self.device = None
        self.kernel = None
        self.interface = interface
        self.core = self.interface.addCore()
        self.defines = ''
        self.loopExponent = 0

        # Set the initial number of nonces to run per execution
        # 2^(16 + aggression)
        self.AGGRESSION += 16
        self.AGGRESSION = min(32, self.AGGRESSION)
        self.AGGRESSION = max(16, self.AGGRESSION)
        self.size = 1 << self.AGGRESSION

        # We need a QueueReader to efficiently provide our dedicated thread
        # with work.
        self.qr = QueueReader(self.core, lambda nr: self.preprocess(nr),
                                lambda x,y: self.size * 1 << self.loopExponent)

        # The platform selection must be valid to mine.
        if self.PLATFORM >= len(platforms) or \
            (self.PLATFORM is None and len(platforms) > 1):
            self.interface.log(
                'Wrong platform or more than one OpenCL platform found, '
                'use PLATFORM=ID to select one of the following\n',
                False, True)

            for i,p in enumerate(platforms):
                self.interface.log('    [%d]\t%s' % (i, p.name), False, False)

            # Since the platform is invalid, we can't mine.
            self.interface.fatal()
            return
        elif self.PLATFORM is None:
            self.PLATFORM = 0

        devices = platforms[self.PLATFORM].get_devices()

        # The device selection must be valid to mine.
        if self.DEVICE >= len(devices) or \
            (self.DEVICE is None and len(devices) > 1):
            self.interface.log(
                'No device specified or device not found, '
                'use DEVICE=ID to specify one of the following\n',
                False, True)

            for i,d in enumerate(devices):
                self.interface.log('    [%d]\t%s' % (i, d.name), False, False)

            # Since the device selection is invalid, we can't mine.
            self.interface.fatal()
            return
        elif self.DEVICE is None:
            self.DEVICE = 0

        self.device = devices[self.DEVICE]

        # We need the appropriate kernel for this device...
        try:
            self.loadKernel(self.device)
        except Exception:
            self.interface.fatal("Failed to load OpenCL kernel!")
            return

        # Initialize a command queue to send commands to the device, and a
        # buffer to collect results in...
        self.commandQueue = cl.CommandQueue(self.context)
        self.output = np.zeros(self.OUTPUT_SIZE+1, np.uint32)
        self.output_buf = cl.Buffer(
            self.context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.USE_HOST_PTR,
            hostbuf=self.output)

        self.applyMeta()
Exemple #2
0
class MiningKernel(object):
    #A Phoenix Miner-compatible OpenCL kernel created by Phateus

    PLATFORM = KernelOption(
        'PLATFORM', int, default=None,
        help='The ID of the OpenCL platform to use')
    DEVICE = KernelOption(
        'DEVICE', int, default=None,
        help='The ID of the OpenCL device to use')
    VECTORS = KernelOption(
        'VECTORS', bool, default=False, advanced=True,
        help='Enable vector support in the kernel?')
    VECTORS4 = KernelOption(
        'VECTORS4', bool, default=False, advanced=True,
        help='Enable vector uint4 support in the kernel?')
    FASTLOOP = KernelOption(
        'FASTLOOP', bool, default=True, advanced=True,
        help='Run iterative mining thread?')
    AGGRESSION = KernelOption(
        'AGGRESSION', int, default=5, advanced=True,
        help='Exponential factor indicating how much work to run '
        'per OpenCL execution')
    WORKSIZE = KernelOption(
        'WORKSIZE', int, default=None, advanced=True,
        help='The worksize to use when executing CL kernels.')
    BFI_INT = KernelOption(
        'BFI_INT', bool, default=True, advanced=True,
        help='Use the BFI_INT instruction for AMD/ATI GPUs.')
    OUTPUT_SIZE = WORKSIZE

    # This must be manually set for Git
    REVISION = 121

    def __init__(self, interface):
        platforms = cl.get_platforms()

        # Initialize object attributes and retrieve command-line options...)
        self.device = None
        self.kernel = None
        self.interface = interface
        self.core = self.interface.addCore()
        self.defines = ''
        self.loopExponent = 0

        # Set the initial number of nonces to run per execution
        # 2^(16 + aggression)
        self.AGGRESSION += 16
        self.AGGRESSION = min(32, self.AGGRESSION)
        self.AGGRESSION = max(16, self.AGGRESSION)
        self.size = 1 << self.AGGRESSION

        # We need a QueueReader to efficiently provide our dedicated thread
        # with work.
        self.qr = QueueReader(self.core, lambda nr: self.preprocess(nr),
                                lambda x,y: self.size * 1 << self.loopExponent)

        # The platform selection must be valid to mine.
        if self.PLATFORM >= len(platforms) or \
            (self.PLATFORM is None and len(platforms) > 1):
            self.interface.log(
                'Wrong platform or more than one OpenCL platform found, '
                'use PLATFORM=ID to select one of the following\n',
                False, True)

            for i,p in enumerate(platforms):
                self.interface.log('    [%d]\t%s' % (i, p.name), False, False)

            # Since the platform is invalid, we can't mine.
            self.interface.fatal()
            return
        elif self.PLATFORM is None:
            self.PLATFORM = 0

        devices = platforms[self.PLATFORM].get_devices()

        # The device selection must be valid to mine.
        if self.DEVICE >= len(devices) or \
            (self.DEVICE is None and len(devices) > 1):
            self.interface.log(
                'No device specified or device not found, '
                'use DEVICE=ID to specify one of the following\n',
                False, True)

            for i,d in enumerate(devices):
                self.interface.log('    [%d]\t%s' % (i, d.name), False, False)

            # Since the device selection is invalid, we can't mine.
            self.interface.fatal()
            return
        elif self.DEVICE is None:
            self.DEVICE = 0

        self.device = devices[self.DEVICE]

        # We need the appropriate kernel for this device...
        try:
            self.loadKernel(self.device)
        except Exception:
            self.interface.fatal("Failed to load OpenCL kernel!")
            return

        # Initialize a command queue to send commands to the device, and a
        # buffer to collect results in...
        self.commandQueue = cl.CommandQueue(self.context)
        self.output = np.zeros(self.OUTPUT_SIZE+1, np.uint32)
        self.output_buf = cl.Buffer(
            self.context, cl.mem_flags.WRITE_ONLY | cl.mem_flags.USE_HOST_PTR,
            hostbuf=self.output)

        self.applyMeta()

    def applyMeta(self):
        #Apply any kernel-specific metadata.
        self.interface.setMeta('kernel', 'phatk2 r%s' % self.REVISION)
        self.interface.setMeta('device', self.device.name.replace('\x00',''))
        self.interface.setMeta('cores', self.device.max_compute_units)

    def loadKernel(self, device):
        #Load the kernel and initialize the device.
        self.context = cl.Context([device], None, None)

        # get the maximum worksize of the device
        maxWorkSize = self.device.get_info(cl.device_info.MAX_WORK_GROUP_SIZE)

        # If the user didn't specify their own worksize, use the maximum supported worksize of the device
        if self.WORKSIZE is None:
            self.interface.error('WORKSIZE not supplied, using HW max. of ' + str(maxWorkSize))
            self.WORKSIZE = maxWorkSize
        else:
            # If the worksize is larger than the maximum supported worksize of the device
            if (self.WORKSIZE > maxWorkSize):
                self.interface.error('WORKSIZE out of range, using HW max. of ' + str(maxWorkSize))
                self.WORKSIZE = maxWorkSize
            # If the worksize is not a power of 2
            if (self.WORKSIZE & (self.WORKSIZE - 1)) != 0:
                self.interface.error('WORKSIZE invalid, using HW max. of ' + str(maxWorkSize))
                self.WORKSIZE = maxWorkSize

        # These definitions are required for the kernel to function.
        self.defines += (' -DOUTPUT_SIZE=' + str(self.OUTPUT_SIZE))
        self.defines += (' -DOUTPUT_MASK=' + str(self.OUTPUT_SIZE - 1))
        self.defines += (' -DWORKSIZE=' + str(self.WORKSIZE))

        # If the user wants to mine with vectors, enable the appropriate code
        # in the kernel source.
        if self.VECTORS:
            self.defines += ' -DVECTORS'
            self.rateDivisor = 2
        elif self.VECTORS4:
            self.defines += ' -DVECTORS4'
            self.rateDivisor = 4
        else:
            self.rateDivisor = 1

        # Some AMD devices support a special "bitalign" instruction that makes
        # bitwise rotation (required for SHA-256) much faster.
        if (device.extensions.find('cl_amd_media_ops') != -1):
            self.defines += ' -DBITALIGN'
            #enable the expierimental BFI_INT instruction optimization
            if self.BFI_INT:
                self.defines += ' -DBFI_INT'

        # Locate and read the OpenCL source code in the kernel's directory.
        kernelFileDir, pyfile = os.path.split(__file__)
        kernelFilePath = os.path.join(kernelFileDir, 'kernel.cl')
        kernelFile = open(kernelFilePath, 'r')
        kernel = kernelFile.read()
        kernelFile.close()

        # For fast startup, we cache the compiled OpenCL code. The name of the
        # cache is determined as the hash of a few important,
        # compilation-specific pieces of information.
        m = md5()
        m.update(device.platform.name)
        m.update(device.platform.version)
        m.update(device.name)
        m.update(self.defines)
        m.update(kernel)
        cacheName = '%s.elf' % m.hexdigest()

        fileName = os.path.join(kernelFileDir, cacheName)

        # Finally, the actual work of loading the kernel...
        try:
            binary = open(fileName, 'rb')
        except IOError:
            binary = None

        try:
            if binary is None:
                self.kernel = cl.Program(
                    self.context, kernel).build(self.defines)

                #apply BFI_INT if enabled
                if self.BFI_INT:
                    #patch the binary output from the compiler
                    patcher = BFIPatcher(self.interface)
                    binaryData = patcher.patch(self.kernel.binaries[0])

                    self.interface.debug("Applied BFI_INT patch")

                    #reload the kernel with the patched binary
                    self.kernel = cl.Program(
                        self.context, [device],
                        [binaryData]).build(self.defines)

                #write the kernel binaries to file
                binaryW = open(fileName, 'wb')
                binaryW.write(self.kernel.binaries[0])
                binaryW.close()
            else:
                binaryData = binary.read()
                self.kernel = cl.Program(
                    self.context, [device], [binaryData]).build(self.defines)

        except cl.LogicError:
            self.interface.fatal("Failed to compile OpenCL kernel!")
            return
        except PatchError:
            self.interface.fatal('Failed to apply BFI_INT patch to kernel! '
                'Is BFI_INT supported on this hardware?')
            return
        finally:
            if binary: binary.close()

        #unload the compiler to reduce memory usage
        cl.unload_compiler()

    def start(self):
        #Phoenix wants the kernel to start.

        self.qr.start()
        reactor.callInThread(self.mineThread)

    def stop(self):
        #Phoenix wants this kernel to stop. The kernel is not necessarily
        #reusable, so it's safe to clean up as well.

        self.qr.stop()

    def updateIterations(self):
        # Set up the number of internal iterations to run if FASTLOOP enabled
        rate = self.core.getRate()

        if not (rate <= 0):
            #calculate the number of iterations to run
            EXP = max(0, (math.log(rate)/math.log(2)) - (self.AGGRESSION - 8))
            #prevent switching between loop exponent sizes constantly
            if EXP > self.loopExponent + 0.54:
                EXP = round(EXP)
            elif EXP < self.loopExponent - 0.65:
                EXP = round(EXP)
            else:
                EXP = self.loopExponent

            self.loopExponent = int(max(0, EXP))

    def preprocess(self, nr):
        if self.FASTLOOP:
            self.updateIterations()

        kd = KernelData(nr, self.core, self.rateDivisor, self.AGGRESSION)
        return kd

    def postprocess(self, output, nr):
        #Scans over a single buffer produced as a result of running the
        #OpenCL kernel on the device. This is done outside of the mining thread
        #for efficiency reasons.

        # Iterate over only the first OUTPUT_SIZE items. Exclude the last item
        # which is a duplicate of the most recently-found nonce.
        for i in xrange(self.OUTPUT_SIZE):
            if output[i]:
                if not self.interface.foundNonce(nr, int(output[i])):
                    hash = self.interface.calculateHash(nr, int(output[i]))
                    if not hash.endswith('\x00\x00\x00\x00'):
                        self.interface.error('Unusual behavior from OpenCL. '
                            'Hardware problem?')

    def mineThread(self):
        for data in self.qr:
            for i in range(data.iterations):
                self.kernel.search(
                    self.commandQueue, (data.size, ), (self.WORKSIZE, ),
                    data.state[0], data.state[1], data.state[2], data.state[3],
                    data.state[4], data.state[5], data.state[6], data.state[7],
                    data.state2[1], data.state2[2], data.state2[3],
                    data.state2[5], data.state2[6], data.state2[7],
                    data.base[i],
                    data.f[1],data.f[2],
                    data.f[3],data.f[4],
                    data.f[5],data.f[6],
                    data.f[7],data.f[8],
                    self.output_buf)
                cl.enqueue_read_buffer(
                    self.commandQueue, self.output_buf, self.output)
                self.commandQueue.finish()

                # The OpenCL code will flag the last item in the output buffer
                # when it finds a valid nonce. If that's the case, send it to
                # the main thread for postprocessing and clean the buffer
                # for the next pass.
                if self.output[self.OUTPUT_SIZE]:
                    reactor.callFromThread(self.postprocess,
                    self.output.copy(), data.nr)

                    self.output.fill(0)
                    cl.enqueue_write_buffer(
                        self.commandQueue, self.output_buf, self.output)