def setup_opencl(data, cube_size):
    import pycl

    blocking = True

    with timeify("Making context, loading kernel"):
        devices = pycl.clGetDeviceIDs()
        ctx = pycl.clCreateContext(devices=devices)
        queue = pycl.clCreateCommandQueue(ctx)

        program = pycl.clCreateProgramWithSource(ctx, SOURCE).build()

        score_matrix = program['score_matrix_to_rms']
        score_matrix.argtypes = (pycl.cl_mem, pycl.cl_mem, pycl.cl_mem,
                                 pycl.cl_mem, pycl.cl_mem, pycl.cl_int,
                                 pycl.cl_int)

    sub_divisions = cube_size**3

    with timeify("Creating buffers"):
        in_r_buf, in_evt1 = pycl.buffer_from_pyarray(queue,
                                                     data['in_r'],
                                                     blocking=blocking)
        in_g_buf, in_evt2 = pycl.buffer_from_pyarray(queue,
                                                     data['in_g'],
                                                     blocking=blocking)
        in_b_buf, in_evt3 = pycl.buffer_from_pyarray(queue,
                                                     data['in_b'],
                                                     blocking=blocking)

        out_r = data['out_r']
        out_r_buf, in_evt4 = pycl.buffer_from_pyarray(queue,
                                                      out_r,
                                                      blocking=blocking)

        score = array.array('f', [0 for x in range(sub_divisions)])
        score_buf, in_evt5 = pycl.buffer_from_pyarray(queue,
                                                      score,
                                                      blocking=blocking)

    with timeify("Run kernel r"):
        run_evt = score_matrix(
            #in_r_buf, in_g_buf, in_b_buf, out_r_buf, score_buf,
            in_r_buf,
            in_g_buf,
            in_b_buf,
            in_r_buf,
            score_buf,
            len(data['in_r']),
            cube_size,
            wait_for=[in_evt1, in_evt2, in_evt3, in_evt4,
                      in_evt5]).on(queue, sub_divisions)

    with timeify("Retrive data"):
        score_from_gpu, evt = pycl.buffer_to_pyarray(queue,
                                                     score_buf,
                                                     wait_for=run_evt,
                                                     like=score)

    return score_from_gpu
Beispiel #2
0
 def __init__(self):
     """__init__
     Creates a context and queue that can be reused across calls to this
     function.
     """
     devices = cl.clGetDeviceIDs()
     self.device = devices[-1]
     self.context = cl.clCreateContext([self.device])
     self.queue = cl.clCreateCommandQueue(self.context)
Beispiel #3
0
    def test_simple_cache(self):
        import pycl as cl

        from ctree.ocl import get_context_and_queue_from_devices
        devices = cl.clGetDeviceIDs()
        device = devices[-1]
        results1 = get_context_and_queue_from_devices([device])
        results2 = get_context_and_queue_from_devices([device])
        self.assertEqual(results1, results2)
Beispiel #4
0
def get_gpu():
    try:
        name = None
        gpu_id = None
        if ctree.CONFIG.has_option("opencl", "gpu"):
            name = ctree.CONFIG.get("opencl", "gpu")
        if ctree.CONFIG.has_option("opencl", "gpu_id"):
            gpu_id = ctree.CONFIG.get("opencl", "gpu_id")
        if not (gpu_id or name):
            return pycl.clGetDeviceIDs(device_type=pycl.CL_DEVICE_TYPE_GPU)[0]
        else:
            for gpu in pycl.clGetDeviceIDs():
                if gpu.name == name:
                    return gpu
                if gpu.value == gpu_id:
                    return gpu
    except (pycl.DeviceNotFoundError, KeyError):
        return None
    def __init__(self):
        """__init__
        Creates a context and queue that can be reused across calls to this
        function.
        """
        # TODO: Need dependency injection to control ocl device selection
        self.desired_ocl_device = -1
        devices = cl.clGetDeviceIDs()
        self.context, self.queue = get_context_and_queue_from_devices(
            [devices[self.desired_ocl_device]])
        self.max_work_group_size = \
            devices[self.desired_ocl_device].max_work_group_size

        # some variables that will be used that PEP-8 wants to see initialized
        # in __init__
        self.kernel = None
        self.output = None
        self._c_function = None
def setup_opencl(data, cube_size):
    import pycl

    blocking = True

    with timeify("Making context, loading kernel"):
        devices = pycl.clGetDeviceIDs()
        ctx = pycl.clCreateContext(devices = devices)
        queue = pycl.clCreateCommandQueue(ctx)

        program = pycl.clCreateProgramWithSource(ctx, SOURCE).build()

        score_matrix = program['score_matrix_to_rms']
        score_matrix.argtypes = (pycl.cl_mem, pycl.cl_mem, pycl.cl_mem,
                                 pycl.cl_mem, pycl.cl_mem, pycl.cl_int, pycl.cl_int)

    sub_divisions = cube_size**3

    with timeify("Creating buffers"):
        in_r_buf, in_evt1 = pycl.buffer_from_pyarray(queue, data['in_r'], blocking = blocking)
        in_g_buf, in_evt2 = pycl.buffer_from_pyarray(queue, data['in_g'], blocking = blocking)
        in_b_buf, in_evt3 = pycl.buffer_from_pyarray(queue, data['in_b'], blocking = blocking)

        out_r = data['out_r']
        out_r_buf, in_evt4 = pycl.buffer_from_pyarray(queue, out_r, blocking = blocking)

        score = array.array('f', [0 for x in range(sub_divisions)])
        score_buf, in_evt5 = pycl.buffer_from_pyarray(queue, score, blocking = blocking)


    with timeify("Run kernel r"):
        run_evt = score_matrix(
            #in_r_buf, in_g_buf, in_b_buf, out_r_buf, score_buf,
            in_r_buf, in_g_buf, in_b_buf, in_r_buf, score_buf,
            len(data['in_r']), cube_size,
            wait_for = [in_evt1, in_evt2, in_evt3, in_evt4, in_evt5]).on(queue,
                                                                         sub_divisions)

    with timeify("Retrive data"):
        score_from_gpu, evt = pycl.buffer_to_pyarray(queue, score_buf,
                                                     wait_for=run_evt,
                                                     like=score)

    return score_from_gpu
Beispiel #7
0
    def get_tuning_driver(self):
        from ctree.tune import BruteForceTuningDriver as TuningDriver
        from ctree.tune import MinimizeTime
        from ctree.tune import IntegerParameter
        from ctree.tune import BooleanArrayParameter
        from ctree.tune import IntegerArrayParameter

        """
        from ctree.opentuner.driver import OpenTunerDriver as TuningDriver
        from opentuner.search.objective import MinimizeTime
        from opentuner.search.manipulator import ConfigurationManipulator
        from opentuner.search.manipulator import IntegerParameter
        from opentuner.search.manipulator import BooleanArrayParameter
        from opentuner.search.manipulator import IntegerArrayParameter
        """

        nMemorySpaces = len(cl.clGetDeviceIDs())

        params = [
            BooleanArrayParameter("parallelize", 7),
            IntegerArrayParameter("locs", 7, 0, nMemorySpaces),
            BooleanArrayParameter("distribute", 4),
            BooleanArrayParameter("fusion", 7),
            BooleanArrayParameter("reassociate", 4),
        ]

        """
        manip = ConfigurationManipulator()
        for param in params:
            manip.add_parameter(param)
        return TuningDriver(manipulator=manip, objective=MinimizeTime())
        """

        return TuningDriver(params, MinimizeTime())

        from ctree.tune import ConstantTuningDriver
        return ConstantTuningDriver({
            'locs': (0, 0, 1, 1, 0, 1, 1),
            'fusion': (True, True, True, True, True, True),
            'distribute': (True, True, True, True),
            'reassociate': (True, True, True, True),
            'parallelize': (True,) * 7
        })
Beispiel #8
0
def ocl_init( ocl_src ):
    platforms = cl.clGetPlatformIDs()
    use_devices = None
    for platform in platforms:
        try:
            devices = cl.clGetDeviceIDs(platform,device_type=cl.CL_DEVICE_TYPE_GPU)
            use_devices = devices[0:1] # arbitraily choose first device
        except cl.DeviceNotFoundError:
            pass
        if use_devices is not None: break
    if use_devices is None: raise ValueError( "no GPU openCL device found" )
    assert use_devices is not None
    print( "OpenCL use_devices: " + str(use_devices) )

    context = cl.clCreateContext(use_devices)
    queue = cl.clCreateCommandQueue(context)

    prog = cl.clCreateProgramWithSource( context, ocl_src ).build()
    print prog
    #run_mxplusb( prog, queue )
    run_conv( prog, queue )
Beispiel #9
0
def ocl_init(ocl_src):
    platforms = cl.clGetPlatformIDs()
    use_devices = None
    for platform in platforms:
        try:
            devices = cl.clGetDeviceIDs(platform, device_type=cl.CL_DEVICE_TYPE_GPU)
            use_devices = devices[0:1]  # arbitraily choose first device
        except cl.DeviceNotFoundError:
            pass
        if use_devices is not None:
            break
    if use_devices is None:
        raise ValueError("no GPU openCL device found")
    assert use_devices is not None
    print ("OpenCL use_devices: " + str(use_devices))

    context = cl.clCreateContext(use_devices)
    queue = cl.clCreateCommandQueue(context)

    prog = cl.clCreateProgramWithSource(context, ocl_src).build()
    print prog
    # run_mxplusb( prog, queue )
    run_conv(prog, queue)
Beispiel #10
0
 def __init__(self):
     self.device = clGetDeviceIDs()[-1]
     self.context, self.queue = get_context_and_queue_from_devices([
         self.device
     ])
Beispiel #11
0
 def __init__(self, array, output):
     self.device = clGetDeviceIDs()[-1]
     self.context = clCreateContext([self.device])
     self.queue = clCreateCommandQueue(self.context)
     self.array = array
     self.output = output
Beispiel #12
0
    def visit_FunctionDecl(self, node):
        # This function grabs the input and output grid names which are used to
        self.local_block = SymbolRef.unique()
        # generate the proper array macros.
        arg_cfg = self.arg_cfg

        global_size = arg_cfg[0].shape

        if self.testing:
            local_size = (1, 1, 1)
        else:
            desired_device_number = -1
            device = cl.clGetDeviceIDs()[desired_device_number]
            lcs = LocalSizeComputer(global_size, device)
            local_size = lcs.compute_local_size_bulky()
            virtual_global_size = lcs.compute_virtual_global_size(local_size)
            self.global_size = global_size
            self.local_size = local_size
            self.virtual_global_size = virtual_global_size

        super(StencilOclTransformer, self).visit_FunctionDecl(node)
        for index, param in enumerate(node.params[:-1]):
            # TODO: Transform numpy type to ctype
            param.type = ct.POINTER(ct.c_float)()
            param.set_global()
            param.set_const()
        node.set_kernel()
        node.params[-1].set_global()
        node.params[-1].type = ct.POINTER(ct.c_float)()
        node.params.append(SymbolRef(self.local_block.name,
                                     ct.POINTER(ct.c_float)()))
        node.params[-1].set_local()
        node.defn = node.defn[0]

        # if boundary handling is copy we have to generate a collection of
        # boundary kernels to handle the on-gpu boundary copy
        if self.is_copied:
            device = cl.clGetDeviceIDs()[-1]
            self.boundary_handlers = boundary_kernel_factory(
                self.ghost_depth, self.output_grid,
                node.params[0].name,
                node.params[-2].name,  # second last parameter is output
                device
            )
            boundary_kernels = [
                FunctionDecl(
                    name=boundary_handler.kernel_name,
                    params=node.params,
                    defn=boundary_handler.generate_ocl_kernel_body(),
                )
                for boundary_handler in self.boundary_handlers
            ]

            self.project.files.append(OclFile('kernel', [node]))

            for dim, boundary_kernel in enumerate(boundary_kernels):
                boundary_kernel.set_kernel()
                self.project.files.append(OclFile(kernel_dim_name(dim),
                                                  [boundary_kernel]))

            self.boundary_kernels = boundary_kernels

            # ctree.browser_show_ast(node)
            # import ctree
            # ctree.browser_show_ast(boundary_kernels[0])
        else:
            self.project.files.append(OclFile('kernel', [node]))

        # print(self.project.files[0])
        # print(self.project.files[-1])

        defn = [
            ArrayDef(
                SymbolRef('global', ct.c_ulong()), arg_cfg[0].ndim,
                [Constant(d) for d in self.virtual_global_size]
            ),
            ArrayDef(
                SymbolRef('local', ct.c_ulong()), arg_cfg[0].ndim,
                [Constant(s) for s in local_size]
                # [Constant(s) for s in [512, 512]]  # use this line to force a
                # opencl local size error
            ),
            Assign(SymbolRef("error_code", ct.c_int()), Constant(0)),
        ]
        setargs = [clSetKernelArg(
            SymbolRef('kernel'), Constant(d),
            FunctionCall(SymbolRef('sizeof'), [SymbolRef('cl_mem')]),
            Ref(SymbolRef('buf%d' % d))
        ) for d in range(len(arg_cfg) + 1)]
        from functools import reduce
        import operator
        local_mem_size = reduce(
            operator.mul,
            (size + 2 * self.kernel.ghost_depth[index]
             for index, size in enumerate(local_size)),
            ct.sizeof(cl.cl_float())
        )
        setargs.append(
            clSetKernelArg(
                'kernel', len(arg_cfg) + 1,
                local_mem_size,
                NULL()
            )
        )

        defn.extend(setargs)
        enqueue_call = FunctionCall(SymbolRef('clEnqueueNDRangeKernel'), [
            SymbolRef('queue'), SymbolRef('kernel'),
            Constant(self.kernel.dim), NULL(),
            SymbolRef('global'), SymbolRef('local'),
            Constant(0), NULL(), NULL()
        ])

        defn.extend(check_ocl_error(enqueue_call, "clEnqueueNDRangeKernel"))

        params = [
            SymbolRef('queue', cl.cl_command_queue()),
            SymbolRef('kernel', cl.cl_kernel())
        ]
        if self.is_copied:
            for dim, boundary_kernel in enumerate(self.boundary_kernels):
                defn.extend([
                    ArrayDef(
                        SymbolRef(global_for_dim_name(dim), ct.c_ulong()),
                        arg_cfg[0].ndim,
                        [Constant(d)
                         for d in self.boundary_handlers[dim].global_size]
                    ),
                    ArrayDef(
                        SymbolRef(local_for_dim_name(dim), ct.c_ulong()),
                        arg_cfg[0].ndim,
                        [Constant(s) for s in
                         self.boundary_handlers[dim].local_size]
                    )
                ])
                setargs = [clSetKernelArg(
                    SymbolRef(kernel_dim_name(dim)), Constant(d),
                    FunctionCall(SymbolRef('sizeof'), [SymbolRef('cl_mem')]),
                    Ref(SymbolRef('buf%d' % d))
                ) for d in range(len(arg_cfg) + 1)]
                setargs.append(
                    clSetKernelArg(
                        SymbolRef(kernel_dim_name(dim)), len(arg_cfg) + 1,
                        local_mem_size,
                        NULL()
                    )
                )
                defn.extend(setargs)

                enqueue_call = FunctionCall(
                    SymbolRef('clEnqueueNDRangeKernel'), [
                        SymbolRef('queue'), SymbolRef(kernel_dim_name(dim)),
                        Constant(self.kernel.dim), NULL(),
                        SymbolRef(global_for_dim_name(dim)),
                        SymbolRef(local_for_dim_name(dim)),
                        Constant(0), NULL(), NULL()
                    ]
                )
                defn.append(enqueue_call)

                params.extend([
                    SymbolRef(kernel_dim_name(dim), cl.cl_kernel())
                ])

        # finish_call = FunctionCall(SymbolRef('clFinish'),
        # [SymbolRef('queue')])
        # defn.append(finish_call)
        # finish_call = [
        #     Assign(
        #         SymbolRef("error_code", ct.c_int()),
        #         FunctionCall(SymbolRef('clFinish'), [SymbolRef('queue')])
        #     ),
        #     If(
        #         NotEq(SymbolRef("error_code"), Constant(0)),
        #         FunctionCall(
        #             SymbolRef("printf"),
        #             [
        #                 String("OPENCL KERNEL RETURNED ERROR CODE %d"),
        #                 SymbolRef("error_code")
        #             ]
        #         )
        #     )
        # ]

        finish_call = check_ocl_error(
            FunctionCall(SymbolRef('clFinish'), [SymbolRef('queue')]),
            "clFinish"
        )
        defn.extend(finish_call)
        defn.append(Return(SymbolRef("error_code")))

        params.extend(SymbolRef('buf%d' % d, cl.cl_mem())
                      for d in range(len(arg_cfg) + 1))

        control = FunctionDecl(ct.c_int32(), "stencil_control",
                               params=params,
                               defn=defn)

        return control
Beispiel #13
0
def main():
    json_db = {}
    json_db['CPUs'] = {}
    json_db['GPUs'] = {}

    #email = raw_input("What is your e-mail address?: ")
    email = "*****@*****.**"

    #Get operating system.
    system = platform.system()

    #Get RAM, bits, CPU and GPU information.
    CPUcount = 0
    GPUcount = 0
    gpuType = None
    gpuDriver = None
    global CL

    if CL == 'openCL':
        CL_Devices = cl.clGetDeviceIDs()
        for device in CL_Devices:
            #CPUs
            if re.search('CPU', str(device.type)):
                CPUcount += 1
                CPUd = dict([("DeviceName", str(device.name)), \
                             ("DeviceVendor", str(device.vendor)), \
                             ("DeviceBits", int(device.address_bits)), \
                             ("DeviceSpeedMHz", int(device.max_clock_frequency)), \
                             ("DeviceCores", int(device.max_compute_units)), \
                             ])

                json_db['CPUs']['CPU' + str(CPUcount)] = CPUd
            #GPUs
            elif re.search('GPU', str(device.type)):
                GPUcount += 1
                if re.search('(AMD|Advanced Micro Device)',
                             str(device.vendor)):
                    gpuType = 'ocl'
                    #Verify GPU driver version is 13.1 or higher.
                    gpuDriverTest = float(
                        str(device.version).split()[3].strip('() '))
                    if gpuDriverTest >= 1084.4:
                        gpuDriver = gpuDriverTest
                    else:
                        gpuDriver = None

                elif re.search('NV', str(device.vendor)):
                    gpuType = 'cuda'
                    #Do a RE for ###.# and test its above cut off
                    gpuDriver = 'test'
                else:
                    gpuType = None
                    gpuDriver = None

                GPUd = dict([("DeviceName", str(device.name)), \
                                 ("DeviceVendor", str(device.vendor)), \
                                 ("DeviceBits", int(device.address_bits)), \
                                 ("Device memory", int(device.global_mem_size) / 1024/1024 ), \
                                 ("DeviceSpeedMHz", int(device.max_clock_frequency)), \
                                 ("DeviceCores", int(device.max_compute_units)), \
                                 ("GpuType", str(gpuType)), \
                                 ("gpuDriver", gpuDriver), \
                                 ])
                json_db['GPUs']['GPU' + str(GPUcount)] = GPUd
            else:
                print("Unknown device")

        #Create a clientID based off system information + 4digit random number
        clientID = system[0] + str(device.address_bits)[0] + str(
            CPUcount) + str(GPUcount) + str(gpuType)[0] + '.' + str(
                random.randint(0000, 9999)).rjust(4, '0')
        SYSd = dict([ ("OS", str(system)), \
                          ("RAM", int(device.local_mem_size) / 1024), \
                          ("Bits", int(device.address_bits)), \
                          ("CPUs", CPUcount), \
                          ("GPUs", GPUcount), \
                          ("email", str(email)), \
                          ("ClientID", clientID), \
                        ])
        json_db['System'] = SYSd
    #if the system doesn't have OpenCL then we can't use the GPU's anyways so just get CPU info
    if CL == 'nonCL':
        if system == 'Windows':
            cpus, cores, speed, cname, vendor = nonCL.windowsInfo.getCPUinfo()
            bits = int(nonCL.windowsInfo.getBits())
            ram = int(nonCL.windowsInfo.getRAMinfo())
        if system == 'Linux':
            cpus, cores, speed, cname, vendor = nonCL.linuxInfo().getCPUinfo()
            bits = int(nonCL.linuxInfo().getBits())
            ram = int(nonCL.linuxInfo().getRAMinfo())

        for cpu in range(len(cpus)):
            CPUcount += 1
            CPUd = dict([("DeviceName", str(cname)), \
                         ("DeviceVendor", str(vendor)), \
                         ("DeviceBits", bits), \
                         ("DeviceSpeedMHz", int(speed)), \
                         ("DeviceCores", int(cores)), \
                         ])
            json_db['CPUs']['CPU' + str(CPUcount)] = CPUd

        #Create a clientID based off system information + 4digit random number
        clientID = system[0] + str(bits)[0] + str(
            CPUcount) + str(GPUcount) + str(gpuType)[0] + '.' + str(
                random.randint(0000, 9999)).rjust(4, '0')
        SYSd = dict([ ("OS", str(system)), \
                          ("RAM", ram), \
                          ("Bits", bits), \
                          ("CPUs", CPUcount), \
                          ("GPUs", GPUcount), \
                          ("email", str(email)), \
                          ("ClientID", clientID), \
                        ])
        json_db['System'] = SYSd

    #write json_db to file in human readable format.
    with open('info.json', 'w') as f:
        f.write(json.dumps(json_db, sort_keys=True, indent=4))
        f.close
    print(json.dumps(json_db, sort_keys=True, indent=4))
Beispiel #14
0
backend = os.getenv("HM_BACKEND", "ocl")

count = 0


def get_unique_kernel_name():
    global count
    count += 1
    return "fn{}".format(count)


if backend in {"ocl", "opencl", "OCL"}:
    try:
        # platforms = cl.clGetPlatformIDs()
        # devices = cl.clGetDeviceIDs(platforms[1])
        devices = cl.clGetDeviceIDs(device_type=cl.CL_DEVICE_TYPE_GPU)
    except cl.DeviceNotFoundError:
        devices = cl.clGetDeviceIDs()
    context = cl.clCreateContext(devices[-1:])
    if os.environ.get("TRAVIS"):
        queues = [cl.clCreateCommandQueue(context)]
    else:
        queues = [cl.clCreateCommandQueue(context) for _ in range(8)]
        # queues = [
        #     cl.clCreateCommandQueue(
        #         context,
        #         properties=cl.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
        #     ) for _ in range(10)
        # ]
    queue = queues[0]
    def __init__(self, shape, device=None):
        self.shape = shape[:]
        self.dimensions = len(shape)
        if device is None:
            try:
                device = pycl.clGetDeviceIDs()[-1]
                self.max_local_group_sizes = pycl.clGetDeviceInfo(
                    device, pycl.cl_device_info.CL_DEVICE_MAX_WORK_ITEM_SIZES)
                self.max_work_group_size = pycl.clGetDeviceInfo(
                    device, pycl.cl_device_info.CL_DEVICE_MAX_WORK_GROUP_SIZE)
                self.compute_units = pycl.clGetDeviceInfo(
                    device, pycl.cl_device_info.CL_DEVICE_MAX_COMPUTE_UNITS)
            except:
                self.max_work_group_size = 512
                self.max_local_group_sizes = [512, 512, 512]
                self.compute_units = 40
        else:
            self.max_local_group_sizes = device.max_work_item_sizes
            self.max_work_group_size = device.max_work_group_size
            self.compute_units = device.max_compute_units

        overshoot = 1.5
        #
        # make a first estimate of the largest index to consider in each dimension
        # that will be the n-th root of the max work group size in order to minimize surface area to volume ratio
        self.root_size = int((self.max_work_group_size ** (1.0 / self.dimensions)) + 0.5)
        self.max_indices = [
            int(self.root_size * overshoot)
            for _ in range(self.dimensions)
        ]
        #
        # adjust each dimension downward if it exceeds the max_local_size for that dimension
        # adjust the other dimensions upward if there is room
        for dim in range(self.dimensions):
            if self.max_indices[dim] > self.max_local_group_sizes[dim]:
                self.max_indices[dim] = self.max_local_group_sizes[dim]
                indices_to_fix = []
                for dim2 in range(self.dimensions):
                    if dim2 != dim and self.max_indices[dim2] < self.max_local_group_sizes[dim2]:
                        indices_to_fix.append(dim2)
                if len(indices_to_fix) > 0:
                    new_root = int(int(self.max_work_group_size ** (1.0 / len(indices_to_fix)) + 0.5) * 1.5)
                    for dim2 in indices_to_fix:
                        self.max_indices[dim2] = min(new_root, self.max_local_group_sizes[dim2])

        # if the indices we have selected so far are significantly larger than the size of the target matrix
        # then adjust them that dimensions index downward and adjust upward any trailing indices
        for dim in range(self.dimensions):
            if self.shape[dim] * overshoot < self.max_indices[dim]:
                self.max_indices[dim] = min(self.max_local_group_sizes[dim], int(self.shape[dim] * overshoot))
                if dim == 0:  # increase the size of the other dimensions
                    if self.dimensions == 2:
                        self.max_indices[1] = min(
                            int((self.max_work_group_size / float(self.max_indices[0])) * overshoot),
                            self.max_local_group_sizes[1]
                        )
                    if self.dimensions == 3:
                        temp_root = int(math.sqrt(self.max_work_group_size / float(self.max_indices[0])) * overshoot)
                        self.max_indices[1] = min(temp_root, self.max_local_group_sizes[1])
                        self.max_indices[2] = min(temp_root, self.max_local_group_sizes[2])
                elif dim == 1:  # increase the size of the remaining direction
                    if self.dimensions == 3:
                        self.max_indices[2] = max(
                            int((self.max_work_group_size / float(self.max_indices[0] * self.max_indices[1])) * overshoot),
                            self.max_local_group_sizes[2]
                        )
Beispiel #16
0
def main():
    json_db = {}
    json_db['CPUs'] = {}
    json_db['GPUs'] = {}

    #email = raw_input("What is your e-mail address?: ")
    email = "*****@*****.**"
    
    #Get operating system.
    system = platform.system()     

    #Get RAM, bits, CPU and GPU information.
    CPUcount = 0
    GPUcount = 0
    gpuType = None
    gpuDriver = None
    global CL

    if CL == 'openCL':    
        CL_Devices = cl.clGetDeviceIDs()
        for device in CL_Devices:
            #CPUs
            if re.search('CPU', str(device.type)):
                CPUcount += 1
                CPUd = dict([("DeviceName", str(device.name)), \
                             ("DeviceVendor", str(device.vendor)), \
                             ("DeviceBits", int(device.address_bits)), \
                             ("DeviceSpeedMHz", int(device.max_clock_frequency)), \
                             ("DeviceCores", int(device.max_compute_units)), \
                             ])
                            
                
                json_db['CPUs']['CPU'+str(CPUcount)] = CPUd
            #GPUs
            elif re.search('GPU', str(device.type)):
                GPUcount += 1
                if re.search( '(AMD|Advanced Micro Device)', str(device.vendor) ):
                    gpuType = 'ocl'
                    #Verify GPU driver version is 13.1 or higher.
                    gpuDriverTest = float(str(device.version).split()[3].strip('() ') )
                    if gpuDriverTest >= 1084.4:
                        gpuDriver = gpuDriverTest
                    else:
                        gpuDriver = None
                        
                elif re.search('NV', str(device.vendor)):
                    gpuType = 'cuda'
                    #Do a RE for ###.# and test its above cut off
                    gpuDriver = 'test'
                else:
                    gpuType = None
                    gpuDriver = None
                            
                GPUd = dict([("DeviceName", str(device.name)), \
                                 ("DeviceVendor", str(device.vendor)), \
                                 ("DeviceBits", int(device.address_bits)), \
                                 ("Device memory", int(device.global_mem_size) / 1024/1024 ), \
                                 ("DeviceSpeedMHz", int(device.max_clock_frequency)), \
                                 ("DeviceCores", int(device.max_compute_units)), \
                                 ("GpuType", str(gpuType)), \
                                 ("gpuDriver", gpuDriver), \
                                 ])
                json_db['GPUs']['GPU'+str(GPUcount)] = GPUd
            else:
                    print("Unknown device")

        #Create a clientID based off system information + 4digit random number
        clientID = system[0] + str(device.address_bits)[0] + str(CPUcount) + str(GPUcount) + str(gpuType)[0] + '.' + str(random.randint(0000,9999)).rjust(4, '0')        
        SYSd = dict([ ("OS", str(system)), \
                          ("RAM", int(device.local_mem_size) / 1024), \
                          ("Bits", int(device.address_bits)), \
                          ("CPUs", CPUcount), \
                          ("GPUs", GPUcount), \
                          ("email", str(email)), \
                          ("ClientID", clientID), \
                        ])
        json_db['System'] = SYSd
    #if the system doesn't have OpenCL then we can't use the GPU's anyways so just get CPU info
    if CL == 'nonCL':
        if system == 'Windows':
            cpus, cores, speed, cname, vendor = nonCL.windowsInfo.getCPUinfo()
            bits = int(nonCL.windowsInfo.getBits())
            ram = int(nonCL.windowsInfo.getRAMinfo())
        if system == 'Linux':
            cpus, cores, speed, cname, vendor = nonCL.linuxInfo().getCPUinfo()
            bits = int(nonCL.linuxInfo().getBits())
            ram = int(nonCL.linuxInfo().getRAMinfo())
        
        for cpu in range(len(cpus)):
            CPUcount += 1
            CPUd = dict([("DeviceName", str(cname)), \
                         ("DeviceVendor", str(vendor)), \
                         ("DeviceBits", bits), \
                         ("DeviceSpeedMHz", int(speed)), \
                         ("DeviceCores", int(cores)), \
                         ])
            json_db['CPUs']['CPU'+str(CPUcount)] = CPUd

        #Create a clientID based off system information + 4digit random number
        clientID = system[0] + str(bits)[0] + str(CPUcount) + str(GPUcount) + str(gpuType)[0] + '.' + str(random.randint(0000,9999)).rjust(4, '0')        
        SYSd = dict([ ("OS", str(system)), \
                          ("RAM", ram), \
                          ("Bits", bits), \
                          ("CPUs", CPUcount), \
                          ("GPUs", GPUcount), \
                          ("email", str(email)), \
                          ("ClientID", clientID), \
                        ])
        json_db['System'] = SYSd

    #write json_db to file in human readable format.    
    with open('info.json', 'w') as f:
        f.write(json.dumps(json_db, sort_keys=True, indent=4))
        f.close
    print(json.dumps(json_db, sort_keys=True, indent=4))
Beispiel #17
0
 def test_simple_cache(self):
     devices = cl.clGetDeviceIDs()
     device = devices[-1]
     results1 = get_context_and_queue_from_devices([device])
     results2 = get_context_and_queue_from_devices([device])
     self.assertEqual(results1, results2)
Beispiel #18
0

backend = os.getenv("HM_BACKEND", "ocl")

count = 0
def get_unique_kernel_name():
    global count
    count += 1
    return "fn{}".format(count)


if backend in {"ocl", "opencl", "OCL"}:
    try:
        # platforms = cl.clGetPlatformIDs()
        # devices = cl.clGetDeviceIDs(platforms[1])
        devices = cl.clGetDeviceIDs(device_type=cl.CL_DEVICE_TYPE_GPU)
    except cl.DeviceNotFoundError:
        devices = cl.clGetDeviceIDs()
    context = cl.clCreateContext(devices[-1:])
    if os.environ.get("TRAVIS"):
        queues = [cl.clCreateCommandQueue(context)]
    else:
        queues = [
            cl.clCreateCommandQueue(
                context
            ) for _ in range(8)
        ]
        # queues = [
        #     cl.clCreateCommandQueue(
        #         context,
        #         properties=cl.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
Beispiel #19
0
 def __init__(self):
     self.device = clGetDeviceIDs()[-1]
     self.context = clCreateContext([self.device])
     self.queue = clCreateCommandQueue(self.context)