def bench_cpu1d_lut(self): self.update_mp() print("Working on processor: %s" % self.get_cpu()) label = "1D_CPU_parallel_OpenMP" results = {} self.new_curve(results, label) for param in ds_list: self.update_mp() ref = self.get_ref(param) fn = datasets[param] ai = pyFAI.load(param) data = fabio.open(fn).data size = data.size N = min(data.shape) print("1D integration of %s %.1f Mpixel -> %i bins" % (op.basename(fn), size / 1e6, N)) t0 = time.time() res = ai.xrpd_LUT(data, N) t1 = time.time() self.print_init(t1 - t0) print "lut.shape=", ai._lut_integrator.lut.shape, "lut.nbytes (MB)", ai._lut_integrator.size * 8 / 1e6 self.update_mp() del ai, data self.update_mp() setup = """ import pyFAI,fabio ai=pyFAI.load(r"%s") data = fabio.open(r"%s").data N=min(data.shape) out=ai.xrpd_LUT(data,N)""" % (param, fn) t = timeit.Timer("ai.xrpd_LUT(data,N,safe=False)", setup) tmin = min([ i / self.nbr for i in t.repeat(repeat=self.repeat, number=self.nbr) ]) self.print_exec(tmin) R = utilstest.Rwp(res, ref) print( "%sResults are bad with R=%.3f%s" % (self.WARNING, R, self.ENDC) if R > self.LIMIT else "%sResults are good with R=%.3f%s" % (self.OKGREEN, R, self.ENDC)) self.update_mp() if R < self.LIMIT: size /= 1e6 tmin *= 1000.0 results[size] = tmin self.new_point(size, tmin) self.update_mp() self.print_sep() self.meth.append(label) self.results[label] = results self.update_mp()
def bench_gpu1d(self, devicetype="gpu", useFp64=True, platformid=None, deviceid=None): self.update_mp() print("Working on %s, in " % devicetype + ("64 bits mode" if useFp64 else "32 bits mode") + "(%s.%s)" % (platformid, deviceid)) if ocl is None or not ocl.select_device(devicetype): print("No pyopencl or no such device: skipping benchmark") return results = {} label = "Forward_OpenCL_%s_%s_bits" % (devicetype, ("64" if useFp64 else "32")) first = True for param in ds_list: self.update_mp() fn = datasets[param] ai = pyFAI.load(param) data = fabio.open(fn).data size = data.size N = min(data.shape) print("1D integration of %s %.1f Mpixel -> %i bins (%s)" % (op.basename(fn), size / 1e6, N, ("64 bits mode" if useFp64 else "32 bits mode"))) try: t0 = time.time() res = ai.xrpd_OpenCL(data, N, devicetype=devicetype, useFp64=useFp64, platformid=platformid, deviceid=deviceid) t1 = time.time() except Exception as error: print("Failed to find an OpenCL GPU (useFp64:%s) %s" % (useFp64, error)) continue self.print_init(t1 - t0) self.update_mp() ref = ai.xrpd(data, N) R = utilstest.Rwp(res, ref) print( "%sResults are bad with R=%.3f%s" % (self.WARNING, R, self.ENDC) if R > self.LIMIT else "%sResults are good with R=%.3f%s" % (self.OKGREEN, R, self.ENDC)) setup = """ import pyFAI,fabio ai=pyFAI.load(r"%s") data = fabio.open(r"%s").data N=min(data.shape) out=ai.xrpd_OpenCL(data,N, devicetype=r"%s", useFp64=%s, platformid=%s, deviceid=%s)""" % ( param, fn, devicetype, useFp64, platformid, deviceid) t = timeit.Timer("ai.xrpd_OpenCL(data,N,safe=False)", setup) tmin = min([ i / self.nbr for i in t.repeat(repeat=self.repeat, number=self.nbr) ]) del t self.update_mp() self.print_exec(tmin) print("") if R < self.LIMIT: size /= 1e6 tmin *= 1000.0 results[size] = tmin if first: self.new_curve(results, label) first = False else: self.new_point(size, tmin) self.update_mp() self.print_sep() self.meth.append(label) self.results[label] = results self.update_mp()
def bench_cpu1d_csr_ocl(self, devicetype="GPU", platformid=None, deviceid=None, padded=False, block_size=32): self.update_mp() if (ocl is None): print("No pyopencl") return if (platformid is None) or (deviceid is None): platdev = ocl.select_device(devicetype) if not platdev: print("No such OpenCL device: skipping benchmark") return platformid, deviceid = platdev print( "Working on device: %s platform: %s device: %s padding: %s block_size= %s" % (devicetype, ocl.platforms[platformid], ocl.platforms[platformid].devices[deviceid], padded, block_size)) label = "1D_%s_parallel_OpenCL, padded=%s, block_size=%s" % ( devicetype, padded, block_size) first = True results = {} for param in ds_list: self.update_mp() ref = self.get_ref(param) fn = datasets[param] ai = pyFAI.load(param) data = fabio.open(fn).data size = data.size N = min(data.shape) print("1D integration of %s %.1f Mpixel -> %i bins" % (op.basename(fn), size / 1e6, N)) t0 = time.time() try: res = ai.xrpd_CSR_OCL(data, N, devicetype=devicetype, platformid=platformid, deviceid=deviceid, padded=padded, block_size=block_size) except MemoryError as error: print(error) break t1 = time.time() self.print_init(t1 - t0) self.update_mp() ai.reset() del ai, data self.update_mp() setup = """ import pyFAI,fabio ai=pyFAI.load(r"%s") data = fabio.open(r"%s").data N=min(data.shape) out=ai.xrpd_CSR_OCL(data,N,devicetype=r"%s",platformid=%s,deviceid=%s,padded=%s,block_size=%s)""" % ( param, fn, devicetype, platformid, deviceid, padded, block_size) t = timeit.Timer( "ai.xrpd_CSR_OCL(data,N,safe=False,padded=%s,block_size=%s)" % (padded, block_size), setup) tmin = min([ i / self.nbr for i in t.repeat(repeat=self.repeat, number=self.nbr) ]) self.update_mp() del t self.update_mp() self.print_exec(tmin) R = utilstest.Rwp(res, ref) print( "%sResults are bad with R=%.3f%s" % (self.WARNING, R, self.ENDC) if R > self.LIMIT else "%sResults are good with R=%.3f%s" % (self.OKGREEN, R, self.ENDC)) if R < self.LIMIT: size /= 1e6 tmin *= 1000.0 results[size] = tmin if first: self.new_curve(results, label) first = False else: self.new_point(size, tmin) self.update_mp() self.print_sep() self.meth.append(label) self.results[label] = results self.update_mp()
def bench_1d_ocl_csr(self, check=False, opencl=None): """ @param method: method to be bechmarked @param check: check results vs ref if method is LUT based @param opencl: dict containing platformid, deviceid and devicetype """ method = "ocl_csr" self.update_mp() if opencl: if (ocl is None): print("No pyopencl") return if (opencl.get("platformid") is None) or (opencl.get("deviceid") is None): platdev = ocl.select_device(opencl.get("devicetype")) if not platdev: print("No such OpenCL device: skipping benchmark") return platformid, deviceid = opencl["platformid"], opencl[ "deviceid"] = platdev devicetype = opencl["devicetype"] = ocl.platforms[ platformid].devices[deviceid].type print("Working on device: %s platform: %s device: %s" % (devicetype, ocl.platforms[platformid], ocl.platforms[platformid].devices[deviceid])) label = "1D_" + method + "_" + devicetype method += "_%i,%i" % (opencl["platformid"], opencl["deviceid"]) else: print("Working on processor: %s" % self.get_cpu()) label = "1D_" + self.LABELS[method] results = {} flops = {} mem_band = {} first = True param = "Pilatus1M.poni" block_size_list = [1, 2, 4, 8, 16, 32, 64, 128, 256] for block_size in block_size_list: self.update_mp() fn = datasets[param] setup = self.setup_1d % (param, fn) stmt = self.stmt_1d % (method, block_size) exec setup size = data.size / 1.0e6 print("1D integration of %s %.1f Mpixel -> %i bins" % (op.basename(fn), size, N)) t0 = time.time() res = eval(stmt) self.print_init(time.time() - t0) self.update_mp() if check: if "csr" in method: print("csr: size= %s \t nbytes %.3f MB " % (ai._csr_integrator.data.size, ai._csr_integrator.lut_nbytes / 2**20)) bins = ai._csr_integrator.bins nnz = ai._csr_integrator.nnz parallel_reduction = sum( [2**i for i in range(1, int(log2(block_size)))]) FLOPs = 9 * nnz + 11 * parallel_reduction + 1 * bins mem_access = (2 * block_size * bins + 5 * nnz + 7 * bins) * 4 del ai, data self.update_mp() t_repeat = [] for j in range(self.repeat): t = [] exec setup for i in range(self.nbr): eval(stmt) for e in ai._ocl_csr_integr.events: if "integrate" in e[0]: et = 1e-6 * (e[1].profile.end - e[1].profile.start) t.append(et) exec(self.unsetup) t_repeat.append(numpy.mean(t)) tmin = min(t_repeat) self.update_mp() self.print_exec(tmin) if check: ref = self.get_ref(param) R = utilstest.Rwp(res, ref) print( "%sResults are bad with R=%.3f%s" % (self.WARNING, R, self.ENDC) if R > self.LIMIT else "%sResults are good with R=%.3f%s" % (self.OKGREEN, R, self.ENDC)) self.update_mp() if R < self.LIMIT: results[block_size] = tmin flops[block_size] = (FLOPs / tmin) * 1e-6 mem_band[block_size] = (mem_access / tmin) * 1e-6 self.update_mp() else: results[block_size] = tmin flops[block_size] = FLOPs / tmin mem_band[block_size] = mem_access / tmin if first: self.new_curve(results, label) first = False else: self.new_point(block_size, tmin) self.print_sep() self.meth.append(label) self.results[label] = results self.flops[label] = flops self.mem_band[label] = mem_band self.update_mp()
def bench_1d(self, method="splitBBox", check=False, opencl=None): """ @param method: method to be bechmarked @param check: check results vs ref if method is LUT based @param opencl: dict containing platformid, deviceid and devicetype """ self.update_mp() if opencl: if (ocl is None): print("No pyopencl") return if (opencl.get("platformid") is None) or (opencl.get("deviceid") is None): platdev = ocl.select_device(opencl.get("devicetype")) if not platdev: print("No such OpenCL device: skipping benchmark") return platformid, deviceid = opencl["platformid"], opencl[ "deviceid"] = platdev else: platformid, deviceid = opencl["platformid"], opencl["deviceid"] devicetype = opencl["devicetype"] = ocl.platforms[ platformid].devices[deviceid].type print("Working on device: %s platform: %s device: %s" % (devicetype, ocl.platforms[platformid], ocl.platforms[platformid].devices[deviceid])) label = "1D_" + (self.LABELS[method] % devicetype) method += "_%i,%i" % (opencl["platformid"], opencl["deviceid"]) memory_error = (pyFAI.opencl.pyopencl.MemoryError, MemoryError, pyFAI.opencl.pyopencl.RuntimeError, RuntimeError) else: print("Working on processor: %s" % self.get_cpu()) label = "1D_" + self.LABELS[method] memory_error = (MemoryError, RuntimeError) results = {} first = True for param in ds_list: self.update_mp() fn = datasets[param] setup = self.setup_1d % (param, fn) stmt = self.stmt_1d % method exec setup size = data.size / 1.0e6 print("1D integration of %s %.1f Mpixel -> %i bins" % (op.basename(fn), size, N)) try: t0 = time.time() res = eval(stmt) self.print_init(time.time() - t0) except memory_error as error: print(error) break self.update_mp() if check: if "lut" in method: print("lut: shape= %s \t nbytes %.3f MB " % (ai._lut_integrator.lut.shape, ai._lut_integrator.lut_nbytes / 2**20)) elif "csr" in method: print("csr: size= %s \t nbytes %.3f MB " % (ai._csr_integrator.data.size, ai._csr_integrator.lut_nbytes / 2**20)) del ai, data self.update_mp() try: t = timeit.Timer(stmt, setup + stmt) tmin = min([ i / self.nbr for i in t.repeat(repeat=self.repeat, number=self.nbr) ]) except memory_error as error: print(error) break self.update_mp() self.print_exec(tmin) tmin *= 1000.0 if check: ref = self.get_ref(param) R = utilstest.Rwp(res, ref) print( "%sResults are bad with R=%.3f%s" % (self.WARNING, R, self.ENDC) if R > self.LIMIT else "%sResults are good with R=%.3f%s" % (self.OKGREEN, R, self.ENDC)) self.update_mp() if R < self.LIMIT: results[size] = tmin self.update_mp() if first: if opencl: self.new_curve(results, label, style="--") else: self.new_curve(results, label, style="-") first = False else: self.new_point(size, tmin) else: results[size] = tmin if first: self.new_curve(results, label) first = False else: self.new_point(size, tmin) self.print_sep() self.meth.append(label) self.results[label] = results self.update_mp()
ai = pyFAI.load("testimages/Pilatus1M.poni") data = fabio.open("testimages/Pilatus1M.edf").data ref = ai.xrpd_LUT(data, 1000) obt = ai.xrpd_LUT_OCL(data, 1000) print abs(obt[1] - ref[1]).max() lut = ai._lut_integrator.lut gpu = pyFAI.ocl_azim_lut.OCL_LUT_Integrator(lut, data.size, "GPU") print gpu.device img = numpy.zeros(data.shape, dtype="float32") print "ref", (data == -2).sum(), (data == -1).sum() pyopencl.enqueue_copy(gpu._queue, img, gpu._cl_mem["image"]) #.wait() print "obt", (img == -2).sum(), (img == -1).sum() out_cyt = ai._lut_integrator.integrate(data) out_ocl = gpu.integrate(data)[0] print "NoCorr R=", utilstest.Rwp((out_cyt[0], out_ocl), out_cyt[:2], "no corrections") nodummy = out_cyt[1] plot(nodummy + 1, label="no_corr") out_cyt = ai._lut_integrator.integrate(data, dummy=-2, delta_dummy=1.5) out_ocl = gpu.integrate(data, dummy=-2, delta_dummy=1.5)[0] print "Dummy R=", utilstest.Rwp((out_cyt[0], out_ocl), out_cyt[:2], "Dummy") #print "nodummy/Dummy", utilstest.Rwp((out_cyt[0], out_cyt[1]), (out_cyt[0], nodummy), "nodummy/Dummy") dark = numpy.random.random(data.shape) out_cyt = ai._lut_integrator.integrate(data, dark=dark) out_ocl = gpu.integrate(data, dark=dark)[0] print "Dark R=", utilstest.Rwp((out_cyt[0], out_ocl), out_cyt[:2], "dark") flat = 2 * numpy.ones_like(data) out_cyt = ai._lut_integrator.integrate(data, flat=flat) out_ocl = gpu.integrate(data, flat=flat)[0]