def measure_cache_bandwidths(): FUNC_CODE = """ int go(unsigned array_size, unsigned steps) { int *ary = (int *) malloc(sizeof(int) * array_size); unsigned asm1 = array_size - 1; for (unsigned i = 0; i < 100*steps;) { #define ONE ary[(i++*16) & asm1] ++; #define FIVE ONE ONE ONE ONE ONE #define TEN FIVE FIVE #define FIFTY TEN TEN TEN TEN TEN #define HUNDRED FIFTY FIFTY HUNDRED } int result = 0; for (unsigned i = 0; i < array_size; ++i) result += ary[i]; free(ary); return result; } """ from codepy.jit import extension_from_string cmod = extension_from_string(toolchain, "module", MODULE_CODE % FUNC_CODE) sizes = [] bandwidths = [] steps = 2**(26-7) for array_size in [2**i for i in range(10, 27)]: start = time() cmod.go(array_size, steps) stop = time() sizes.append(array_size*4) elapsed = stop-start gb_transferred = 2*100*steps*4/1e9 # 2 for rw, 4 for sizeof(int) bandwidth = gb_transferred/elapsed bandwidths.append(bandwidth) print array_size, bandwidth pt.clf() pt.rc("font", size=20) pt.loglog(sizes, bandwidths, "o-", basex=2) pt.loglog(sizes, 16*np.array(bandwidths), "--", basex=2) pt.xlabel("Array Size [Bytes]") pt.ylabel("Eff. Bandwidth [GB/s]") pt.grid() pt.subplots_adjust(bottom=0.15) pt.savefig("bw.pdf") open("bw.c", "w").write(FUNC_CODE) os.system("pdfcrop bw.pdf")
def measure_cache_bandwidths(): FUNC_CODE = """ int go(unsigned array_size, unsigned steps) { int *ary = (int *) malloc(sizeof(int) * array_size); unsigned asm1 = array_size - 1; for (unsigned i = 0; i < 100*steps;) { #define ONE ary[(i++*16) & asm1] ++; #define FIVE ONE ONE ONE ONE ONE #define TEN FIVE FIVE #define FIFTY TEN TEN TEN TEN TEN #define HUNDRED FIFTY FIFTY HUNDRED } int result = 0; for (unsigned i = 0; i < array_size; ++i) result += ary[i]; free(ary); return result; } """ from codepy.jit import extension_from_string cmod = extension_from_string(toolchain, "module", MODULE_CODE % FUNC_CODE) sizes = [] bandwidths = [] steps = 2**(26 - 7) for array_size in [2**i for i in range(10, 27)]: start = time() cmod.go(array_size, steps) stop = time() sizes.append(array_size * 4) elapsed = stop - start gb_transferred = 2 * 100 * steps * 4 / 1e9 # 2 for rw, 4 for sizeof(int) bandwidth = gb_transferred / elapsed bandwidths.append(bandwidth) print array_size, bandwidth pt.clf() pt.rc("font", size=20) pt.loglog(sizes, bandwidths, "o-", basex=2) pt.loglog(sizes, 16 * np.array(bandwidths), "--", basex=2) pt.xlabel("Array Size [Bytes]") pt.ylabel("Eff. Bandwidth [GB/s]") pt.grid() pt.subplots_adjust(bottom=0.15) pt.savefig("bw.pdf") open("bw.c", "w").write(FUNC_CODE) os.system("pdfcrop bw.pdf")
def compile(self, toolchain, **kwargs): """Return the extension module generated from the code described by *self*. If necessary, build the code using *toolchain* with :func:`codepy.jit.extension_from_string`. Any keyword arguments accept by that latter function may be passed in *kwargs*. """ from codepy.libraries import add_boost_python toolchain = toolchain.copy() add_boost_python(toolchain) from codepy.jit import extension_from_string return extension_from_string(toolchain, self.name, str(self.generate())+"\n", **kwargs)
def find_associativity(): FUNC_CODE = """ int go(unsigned array_size, unsigned stride, unsigned steps) { char *ary = (char *) malloc(sizeof(int) * array_size); unsigned p = 0; for (unsigned i = 0; i < steps; ++i) { ary[p] ++; p += stride; if (p >= array_size) p = 0; } int result = 0; for (unsigned i = 0; i < array_size; ++i) result += ary[i]; free(ary); return result; } """ from codepy.jit import extension_from_string cmod = extension_from_string(toolchain, "module", MODULE_CODE % FUNC_CODE) result = {} steps = 2**20 from pytools import ProgressBar meg_range = range(1, 25) stride_range = range(1, 640) pb = ProgressBar("bench", len(meg_range) * len(stride_range)) for array_megs in meg_range: for stride in stride_range: start = time() cmod.go(array_megs << 20, stride, steps) stop = time() elapsed = stop - start gb_transferred = 2 * steps / 1e9 # 2 for rw, 4 for sizeof(int) bandwidth = gb_transferred / elapsed result[array_megs, stride] = bandwidth pb.progress() from cPickle import dump dump(result, open("assoc_result.dat", "w")) open("assoc.c", "w").write(FUNC_CODE)
def compile(self, toolchain, **kwargs): """Return the extension module generated from the code described by *self*. If necessary, build the code using *toolchain* with :func:`codepy.jit.extension_from_string`. Any keyword arguments accept by that latter function may be passed in *kwargs*. """ from codepy.libraries import add_boost_python toolchain = toolchain.copy() add_boost_python(toolchain) from codepy.jit import extension_from_string return extension_from_string(toolchain, self.name, "{}\n".format(self.generate()), **kwargs)
def find_associativity(): FUNC_CODE = """ int go(unsigned array_size, unsigned stride, unsigned steps) { char *ary = (char *) malloc(sizeof(int) * array_size); unsigned p = 0; for (unsigned i = 0; i < steps; ++i) { ary[p] ++; p += stride; if (p >= array_size) p = 0; } int result = 0; for (unsigned i = 0; i < array_size; ++i) result += ary[i]; free(ary); return result; } """ from codepy.jit import extension_from_string cmod = extension_from_string(toolchain, "module", MODULE_CODE % FUNC_CODE) result = {} steps = 2**20 from pytools import ProgressBar meg_range = range(1, 25) stride_range = range(1, 640) pb = ProgressBar("bench", len(meg_range)*len(stride_range)) for array_megs in meg_range: for stride in stride_range: start = time() cmod.go(array_megs<<20, stride, steps) stop = time() elapsed = stop-start gb_transferred = 2*steps/1e9 # 2 for rw, 4 for sizeof(int) bandwidth = gb_transferred/elapsed result[array_megs, stride] = bandwidth pb.progress() from cPickle import dump dump(result, open("assoc_result.dat", "w")) open("assoc.c", "w").write(FUNC_CODE)
def measure_strides(): FUNC_CODE = """ int go(unsigned count, unsigned stride) { const unsigned array_size = 64 * 1024 * 1024; int *ary = (int *) malloc(sizeof(int) * array_size); for (unsigned it = 0; it < count; ++it) { for (unsigned i = 0; i < array_size; i += stride) ary[i] *= 17; } int result = 0; for (unsigned i = 0; i < array_size; ++i) result += ary[i]; free(ary); return result; } """ from codepy.jit import extension_from_string cmod = extension_from_string(toolchain, "module", MODULE_CODE % FUNC_CODE) strides = [] times = [] count = 30 for stride in [2**i for i in range(0, 11)]: start = time() cmod.go(count, stride) stop = time() strides.append(stride) times.append((stop - start) / count) pt.clf() pt.rc("font", size=20) pt.semilogx(strides, times, "o-", basex=2) pt.xlabel("Stride") pt.ylabel("Time [s]") pt.grid() pt.subplots_adjust(bottom=0.15) pt.savefig("strides.pdf") open("strides.c", "w").write(FUNC_CODE) os.system("pdfcrop strides.pdf")
def measure_strides(): FUNC_CODE = """ int go(unsigned count, unsigned stride) { const unsigned array_size = 64 * 1024 * 1024; int *ary = (int *) malloc(sizeof(int) * array_size); for (unsigned it = 0; it < count; ++it) { for (unsigned i = 0; i < array_size; i += stride) ary[i] *= 17; } int result = 0; for (unsigned i = 0; i < array_size; ++i) result += ary[i]; free(ary); return result; } """ from codepy.jit import extension_from_string cmod = extension_from_string(toolchain, "module", MODULE_CODE % FUNC_CODE) strides = [] times = [] count = 30 for stride in [2**i for i in range(0, 11)]: start = time() cmod.go(count, stride) stop = time() strides.append(stride) times.append((stop-start)/count) pt.clf() pt.rc("font", size=20) pt.semilogx(strides, times, "o-", basex=2) pt.xlabel("Stride") pt.ylabel("Time [s]") pt.grid() pt.subplots_adjust(bottom=0.15) pt.savefig("strides.pdf") open("strides.c", "w").write(FUNC_CODE) os.system("pdfcrop strides.pdf")
MODULE_CODE = """ #include <boost/python.hpp> namespace { char const *greet() { return "hello world"; } } BOOST_PYTHON_MODULE(module) { boost::python::def("greet", &greet); } """ from codepy.toolchain import guess_toolchain toolchain = guess_toolchain() from codepy.libraries import add_boost_python add_boost_python(toolchain) from codepy.jit import extension_from_string cmod = extension_from_string(toolchain, "module", MODULE_CODE) print cmod.greet()
MODULE_CODE = """ #include <boost/python.hpp> namespace { char const *greet() { return "hello world"; } } BOOST_PYTHON_MODULE(module) { boost::python::def("greet", &greet); } """ from codepy.toolchain import guess_toolchain toolchain = guess_toolchain() from codepy.libraries import add_boost_python add_boost_python(toolchain) from codepy.jit import extension_from_string cmod = extension_from_string(toolchain, "module", MODULE_CODE) print(cmod.greet())
def create_native(self): from cgen import (ArrayOf, POD, Block, For, Statement, Struct) from cgen import dtype_to_ctype import numpy members = [] code = [] for pk, pv in config.parameters.iteritems(): if isinstance(pv, int): members.append(POD(numpy.int, pk)) code.append( Statement("params.%s = extract<%s>(cppdict[\"%s\"])" % (pk, dtype_to_ctype(numpy.int), pk))) elif isinstance(pv, float): members.append(POD(numpy.float64, pk)) code.append( Statement("params.%s = extract<%s>(cppdict[\"%s\"])" % (pk, dtype_to_ctype(numpy.float64), pk))) elif isinstance(pv, list): if isinstance(pv[0], int): members.append(ArrayOf(POD(numpy.int, pk), len(pv))) code.append( Block([ Statement("list v = extract<%s>(cppdict[\"%s\"])" % (list.__name__, pk)), For( "unsigned int i = 0", "i<len(v)", "++i", Statement("params.%s[i] = extract<%s>(v[i])" % (pk, dtype_to_ctype(numpy.int)))), ])) elif isinstance(pv[0], float): members.append(ArrayOf(POD(numpy.float64, pk), len(pv))) code.append( Block([ Statement("list v = extract<%s>(cppdict[\"%s\"])" % (list.__name__, pk)), For( "unsigned int i = 0", "i < len(v)", "++i", Block([ Statement( "params.%s[i] = extract<%s>(v[i])" % (pk, dtype_to_ctype(numpy.float64))), Statement( "//std::cout << params.%s[i] << std::endl" % (pk)) ])), ])) mystruct = Struct('Parameters', members) mycode = Block(code) # print mystruct # print mycode from jinja2 import Template tpl = Template(""" #include <boost/python.hpp> #include <boost/python/object.hpp> #include <boost/python/extract.hpp> #include <boost/python/list.hpp> #include <boost/python/dict.hpp> #include <boost/python/str.hpp> #include <stdexcept> #include <iostream> {{my_struct}} Parameters params; void CopyDictionary(boost::python::object pydict) { using namespace boost::python; extract< dict > cppdict_ext(pydict); if(!cppdict_ext.check()){ throw std::runtime_error( "PassObj::pass_dict: type error: not a python dict."); } dict cppdict = cppdict_ext(); list keylist = cppdict.keys(); {{my_extractor}} } BOOST_PYTHON_MODULE({{my_module}}) { boost::python::def("copy_dict", &CopyDictionary); } """) rendered_tpl = tpl.render(my_module="NativeParameters", my_extractor=mycode, my_struct=mystruct) # print rendered_tpl from codepy.toolchain import NVCCToolchain import codepy.toolchain kwargs = codepy.toolchain._guess_toolchain_kwargs_from_python_config() # print kwargs kwargs["cc"] = "nvcc" # kwargs["cflags"]=["-m64","-x","cu","-Xcompiler","-fPIC","-ccbin","/opt/local/bin/g++-mp-4.4"] kwargs["cflags"] = ["-m64", "-x", "cu", "-Xcompiler", "-fPIC"] kwargs["include_dirs"].append("/usr/local/cuda/include") kwargs["defines"] = [] kwargs["ldflags"] = ["-shared"] # kwargs["libraries"]=["python2.7"] kwargs["libraries"] = ["python2.6"] print kwargs toolchain = NVCCToolchain(**kwargs) from codepy.libraries import add_boost_python add_boost_python(toolchain) from codepy.jit import extension_from_string mymod = extension_from_string(toolchain, "NativeParameters", rendered_tpl) mymod.copy_dict(config.parameters)