def flop_tests(n): """ Test the efficiency of floating point operations: one multiplication per pass in a loop 'for i in xrange(n)'. """ try: from call import loop, fempty, fwconsts, flops fempty.__name__ = 'empty_func in F77' fwconsts.__name__ = 'func_with_consts in F77' except: print 'Run f2py -c -m call call.f' sys.exit(1) def empty_loop(n): for i in xrange(n): pass def flops_py(n): b = 1.0000001 a = 1.1 for i in xrange(n): a = a*b return a print '\n\n*** Multiplication test ***\n' t1 = timeit.Timer('a*b*c', setup='a=1.01; b=0.98; c=0.99').timeit(n) print n, 'multiplications in a loop' t2 = timer(flops, (n,), repetitions=100, comment='F77:') # result in F77 is multiplication _with_ loop best = min(t1, t2) print 'multiplication: python=%.2f F77=%.2f' % (t1/best, t2/best)
def run(solvers, methods, data, datasets): results = {} # find largest data sets: maxsize = max([size(getattr(data, d)) for d in datasets \ if hasattr(data, d)]) print maxsize # combine all solvers, methods, and datasets: for s in solvers: for m in methods: for d in datasets: if hasattr(solver, m) and hasattr(data, d): f = getattr(solver, m) x = getattr(data, d) r = timer(f, (x,), repetitions=maxsize/size(x)) results[(m,d)] = r return results
def timing2(n=2000): """Time different implementations of the extension module.""" dx = 1.0/n g = Grid2Deff(dx=dx, dy=dx) # here we use straight NumPy sin in a scalar context: def myfunc(x, y): return sin(x*y) + 8*x expression = 'sin(x*y) + 8*x' # here we use math.sin (=mathsin, global variable): def myfunc(x, y): return mathsin(x*y) + 8*x expression = 'mathsin(x*y) + 8*x' print 'basic NumPy module:', basic_NumPy t1 = timer(g.ext_gridloop1, (myfunc,), repetitions=1) t2 = timer(g.ext_gridloop2, (myfunc,), repetitions=1) t3 = timer(g.ext_gridloop2, (expression,), repetitions=1) print """ Results using an extension module (%dx%d grid), with callback to Python for each point: gridloop1 (w/func): %s gridloop2 (w/func): %s gridloop1 (w/string expression): %s """ % (n, n, t1, t2, t3) # try the improved functions (works only for the F77 module): nrep = 20 if 'gridloop_vec2' in dir(ext_gridloop): t4 = timer(g.ext_gridloop_vec2, (myfuncf2,), repetitions=nrep) print """\ gridloop_vec2 (w/func & NumPy): %s""" % t4 if 'gridloop2_str' in dir(ext_gridloop): t5 = timer(g.ext_gridloop2_str, ('myfunc',), repetitions=nrep) print """\ gridloop2_str (no Py callback): %s""" % t5 # try 'inline' F77 compiled callback too: # (give F77 source for core of callback function as argument) g.ext_gridloop2_fcb_compile('sin(x*y) + 8*x') t6 = timer(g.ext_gridloop2_fcb, (), repetitions=nrep) g.ext_gridloop2_compile('sin(x*y) + 8*x') t7 = timer(g.ext_gridloop2_v2, (), repetitions=nrep)
def timing(n=2000): # timing: dx = 1.0/n g = Grid2D(xmin=0, xmax=1, dx=dx, ymin=0, ymax=1, dy=dx) expression = 'sin(x*y) + 8*x' print 'evaluating', expression def myfunc(x, y): return sin(x*y) + 8*x from py4cs.misc import timer t0 = time.clock() # vectorized expressions are so fast that we run the code # repeatedly rep=20 print 'vectorized code with eval... (%d calls)' % rep t1 = timer(g.__call__, (expression,), repetitions=rep, comment='eval(str)') print 'vectorized code with function call... (%d calls)' % rep t2 = timer(g.__call__, (myfunc,), repetitions=rep, comment='myfunc') print 'explicit loops with formula hardcoded...(1 call)' f = g.gridloop_hardcoded_func() t3 = timer(g.gridloop_hardcoded_func, (), repetitions=1, comment='') print 'explicit loops with eval...(1 call)' t4 = timer(g.gridloop, (expression,), repetitions=1, comment='eval(str)') print 'explicit loops with myfunc...(1 call)' t5 = timer(g.gridloop, (myfunc,), repetitions=1, comment='myfunc') print 'explicit loops with list and eval...(1 call)' t6 = timer(g.gridloop_list, (expression,), repetitions=1, comment='eval(str)') print 'explicit loops with list and myfunc...(1 call)' t7 = timer(g.gridloop_list, (myfunc,), repetitions=1, comment='myfunc') # The scalar computations above used sin from NumPy, which is # known to be slow for scalar arguments. Here we use math.sin # (stored in mathsin, could also use the slightly slower math.sin # explicitly) # taken globally so eval works: from math import sin as mathsin def myfunc_scalar(x, y): return mathsin(x*y) + 8*x expression_scalar = 'mathsin(x*y) + 8*x' print 'explicit loops with eval...(1 call) and math sin' t8 = timer(g.gridloop, (expression_scalar,), repetitions=1, comment='eval(str)') print 'explicit loops with myfunc...(1 call) and math sin' t9 = timer(g.gridloop, (myfunc_scalar,), repetitions=1, comment='myfunc') # report f = max(t1,t2) # fastest implementation print """ Basic NumPy module: %s vectorized with eval(expression): %.2f %.1f vectorized with myfunc call: %.2f %.1f scalar versions with NumPy sin function: loops with inline formula: %.2f %.1f loops with eval(expression): %.2f %.1f loops with myfunc call: %.2f %.1f loops with list and eval: %.2f %.1f loops with list and myfunc: %.2f %.1f scalar versions with math.sin function: loops with eval(expression_scalar): %.2f %.1f loops with myfunc_scalar: %.2f %.1f """ % (basic_NumPy, t1, t1/f, t2, t2/f, t3, t3/f, t4, t4/f, t5, t5/f, t6, t6/f, t7, t7/f, t8, t8/f, t9, t9/f)
else: r[i] = sin(x[i]) r.shape = x.shape return r def somefunc_NumPy2(x): """Vectorized version of somefunc.""" lt0_indices = less(x, 0) # find all indices where x<0 r = sin(x) # truncate, i.e., insert 0 for all indices where x<0: r = where(lt0_indices, 0.0, r) return r somefunc_list = [somefunc_NumPy, somefunc_NumPy2] try: import scipy.special somefunc_SciPy = scipy.special.general_function(somefunc) somefunc_SciPy.__name__ = somefunc.__name__ + '_SciPy_vectorized' somefunc_list.append(somefunc_SciPy) except: print 'unsuccessful scipy import, cannot use scipy' n = 1000000 x = sequence(0, 2, 1.0 / n, Float) from py4cs.misc import timer for f in somefunc_list: timer(f, (x, ), repetitions=1) timer(somefunc_NumPy2, (x, ), repetitions=10) print 'end of', sys.argv[0]
def allocate_tests(n): def list_append1(n): r = [] for i in xrange(n): r.append(i+2) return r def list_chunk1(n): r = [0.0]*n for i in xrange(n): r[i] = i+2 return r def list_append2(n): r = [] for i in xrange(n): r.append(random.gauss(0,1)) return r def list_chunk2(n): r = [0.0]*n for i in xrange(n): r[i] = random.gauss(0,1) return r def list_append3(n): g = random.gauss r = [] for i in xrange(n): r.append(g(0,1)) return r def list_append4(n): return [random.gauss(0,1) for i in xrange(n)] def list_chunk3(n): g = random.gauss r = [0.0]*n for i in xrange(n): r[i] = g(0,1) return r def NumPy_zeros(n): return zeros(n, Float) def NumPy_arange(n): return sequence(0,1,1.0/(n-1)) def NumPy_random(n): import RandomArray return RandomArray.normal(0,1,n) print 'allocate n numbers in an array/list: n =', n # few calls, use timer instead of timeit rep = 3 print timer(NumPy_zeros, (n,), repetitions=rep) print timer(NumPy_arange, (n,), repetitions=rep) print timer(NumPy_random, (n,), repetitions=rep) print timer(list_append1, (n,), repetitions=rep) print timer(list_chunk1, (n,), repetitions=rep) print timer(list_append2, (n,), repetitions=rep) print timer(list_append4, (n,), repetitions=rep) print timer(list_chunk2, (n,), repetitions=rep) print timer(list_append3, (n,), repetitions=rep) print timer(list_chunk3, (n,), repetitions=rep)
def matrixfill_tests(language, n): from math import sin, exp if language == 'F77': try: from matrix_f77 import makematrix, set, tonumpy, adump, \ fill1, fill2, lfill1, lfill2 except: print 'Run f2py -c m matrix matrix_f77.f' sys.exit(1) makematrix(n, n) # make matrix in F77 elif language == 'C++': try: from matrix_cpp import Matrix from _matrix_cpp import Matrix_set import _matrix_cpp # for efficiency comparison except: print 'run make_cpp.sh' sys.exit(1) m = Matrix(n) a = zeros((n, n), Float) def setmatrix1_py(): """Fill NumPy matrix in Python loop.""" for i in xrange(n): for j in xrange(n): a[i, j] = i*j-2 return a def setmatrix2_py(): """Fill NumPy matrix in Python loop; sin/exp formula.""" for i in xrange(n): x = i*0.1 for j in xrange(n): y = j*0.1 a[i, j] = sin(x)*sin(y)*exp(-x*y) return a #======== F77 functions ========= def setmatrix1_f_index(): """Fill F77 matrix in a Python loop with F77 indexing.""" for i in xrange(n): for j in xrange(n): set(i, j, i*j-2) r = tonumpy(n, n) # could perhaps tune the interface file such that tonumpy # doesn't need arguments return r def setmatrix2_f_index(): """Fill F77 matrix in a Python loop with F77 indexing; sin/exp.""" for i in xrange(n): x = 0.1*i for j in xrange(n): y = 0.1*j set(i, j, sin(x)*sin(y)*exp(-x*y)) r = tonumpy(n, n) return r def setmatrix1_f_loop1(): """Fill F77 matrix in Fortran loops.""" fill1() # all loops in F77 r = tonumpy(n, n) return r def setmatrix1_f_loop2(): """Fill NumPy matrix in Fortran loops.""" r = lfill1(a) # all loops in F77, fill NumPy array return r def setmatrix2_f_loop1(): """Fill F77 matrix in Fortran loops; sin/exp formula.""" fill2() # all loops in F77 r = tonumpy(n, n) return r def setmatrix2_f_loop2(): """Fill NumPy matrix in Fortran loops; sin/exp formula.""" r = lfill2(a) # all loops in F77, fill NumPy array return r #======== C++ functions ========= def setmatrix1_c_index1(): """Fill C++ matrix in a Python loop with F77 indexing.""" for i in xrange(n): for j in xrange(n): m.set(i, j, i*j-2) # could perhaps tune the interface file such that tonumpy # doesn't need arguments return m def setmatrix2_c_index1(): """Fill F77 matrix in a Python loop with F77 indexing; sin/exp.""" for i in xrange(n): x = 0.1*i for j in xrange(n): y = 0.1*j m.set(i, j, sin(x)*sin(y)*exp(-x*y)) return m def setmatrix1_c_index3(): """Avoid proxy class, call Matrix_set directly.""" for i in xrange(n): for j in xrange(n): Matrix_set(m, i, j, i*j-2) # could perhaps tune the interface file such that tonumpy # doesn't need arguments return m def setmatrix2_c_index3(): """Avoid proxy class, call Matrix_set directly; sin/exp formula.""" for i in xrange(n): x = 0.1*i for j in xrange(n): y = 0.1*j Matrix_set(m, i, j, sin(x)*sin(y)*exp(-x*y)) return m def setmatrix1_c_index4(): """Avoid proxy class, call _matrix_cpp.Matrix_set directly.""" for i in xrange(n): for j in xrange(n): _matrix_cpp.Matrix_set(m, i, j, i*j-2) # could perhaps tune the interface file such that tonumpy # doesn't need arguments return m def setmatrix2_c_index4(): """Avoid proxy class, call _matrix_cpp.Matrix_set directly.""" for i in xrange(n): x = 0.1*i for j in xrange(n): y = 0.1*j _matrix_cpp.Matrix_set(m, i, j, sin(x)*sin(y)*exp(-x*y)) return m def setmatrix1_c_loop1(): """Fill F77 matrix in Fortran loops.""" m.fill1() # all loops in F77 return m def setmatrix2_c_loop1(): """Fill F77 matrix in Fortran loops; sin/exp formula.""" m.fill2() # all loops in F77 return m #======== end of C++ functions ========= def sort(d): """Sort result dictionary: d[problem_description]=time.""" list = [(key, d[key]) for key in d] def s(a, b): """sort list of 2-tuples""" return cmp(a[1],b[1]) # a,b = (comment,time) list.sort(s) return list if language == 'F77': res = {} # one multiplication in Python for each matrix entry: t = timer(setmatrix1_py, (), repetitions=10) res['Python loop and NumPy array'] = t t = timer(setmatrix1_f_index , (), repetitions=10) res['Python loop and F77 indexing in F77 array'] = t t = timer(setmatrix1_f_loop1, (), repetitions=100) res['F77 loop over F77 array'] = t t = timer(setmatrix1_f_loop2, (), repetitions=100) res['F77 loop over NumPy array'] = t res = sort(res) print '\n\n\nTable: a[i,j] = i*j-2\n(one mult+sub per iteration)\n' for comment, time in res: print '%60s %7.3f' % (comment,time) print '\n\n' # a sin/exp function expression for each matrix entry: res = {} t = timer(setmatrix2_py, (), repetitions=10) res['Python loop and NumPy array'] = t t = timer(setmatrix2_f_index , (), repetitions=10) res['Python loop and F77 indexing in F77 array'] = t t = timer(setmatrix2_f_loop1, (), repetitions=100) res['F77 loop over F77 array'] = t t = timer(setmatrix2_f_loop2, (), repetitions=100) res['F77 loop over NumPy array'] = t res = sort(res) print '\n\n\nTable: a[i,j] = sin/exp expression\n' for comment, time in res: print '%60s %7.3f' % (comment,time) print '\n\n' elif language == 'C++': res = {} # one multiplication in Python for each matrix entry: t = timer(setmatrix1_py, (), repetitions=10) res['Python loop and NumPy array'] = t t = timer(setmatrix1_c_index1, (), repetitions=10) res['Indexing: m.set'] = t t = timer(setmatrix1_c_index3, (), repetitions=10) res['Indexing: Matrix_set'] = t t = timer(setmatrix1_c_index4, (), repetitions=10) res['Indexing: matrix_cpp.Matrix_set'] = t t = timer(setmatrix1_c_loop1, (), repetitions=100) res['C++ loop over C++ array'] = t res = sort(res) print '\n\n\nTable: a[i,j] = i*j-2\n(one mult+sub per iteration)\n' for comment, time in res: print '%60s %7.3f' % (comment,time) print '\n\n' # a sin/exp function expression for each matrix entry: res = {} t = timer(setmatrix2_py, (), repetitions=10) res['Python loop and NumPy array'] = t t = timer(setmatrix2_c_index1, (), repetitions=10) res['Indexing: m.set'] = t t = timer(setmatrix2_c_index3, (), repetitions=10) res['Indexing: Matrix_set'] = t t = timer(setmatrix2_c_index4, (), repetitions=10) res['Indexing: matrix_cpp.Matrix_set'] = t t = timer(setmatrix2_c_loop1, (), repetitions=100) res['C++ loop over C++ array'] = t res = sort(res) print '\n\n\nTable: a[i,j] = sin/exp-expression\n' for comment, time in res: print '%60s %7.3f' % (comment,time) print '\n\n'
def call_nargs_test(rep): for n in range(1,101,20): code = generate_func_with_many_args(n) exec code in globals(), globals() print timer(func_with_many_args, tuple(range(n)), repetitions=rep, comment='empty func with %d arguments:' % n)
def call_tests(rep, x, y, z): try: from call import loop, fempty, fwconsts, flops fempty.__name__ = 'empty_func in F77' fwconsts.__name__ = 'func_with_consts in F77' except: print 'Run f2py -c -m call call.f' sys.exit(1) def empty_func(x, y, z): pass def func_with_consts1(x, y, z): a = 0.3 b = 1.2 c = 1.22E+02 q = a*x + b*y + c*z return q def func_with_consts2(x, y, z, a=0.3, b=1.2, c=1.22E+02): q = a*x + b*y + c*z return q def func_with_consts3(x, y, z): # hardcoded coefficients q = 0.3*x + 1.2*y + 1.22E+02*z return q def _help(x): return x+1 def func_loop_with_call(n): r = 0.1 for i in xrange(n): r = _help(r) def func_loop_with_inline(n): r = 0.1 for i in xrange(n): r = r + 1 # test NumPy vs math sin for scalar arguments: # see sin_comparison.py print '\n*** Testing function calls ***\n' fclass1 = """ class MyFunc: def __call__(self, x): return 2.0 f = MyFunc() fm = f.__call__ """ fclass3 = """ from math import sin class MyFunc: def __call__(self, x, y, z): return sin(x)*sin(y)*sin(z) f = MyFunc() """ setup = """ from py4cs.numpytools import wrap2callable from math import sin f = wrap2callable""" t1 = timeit.Timer('f(0.9)', setup=setup+'(2.0)').timeit(rep) t2 = timeit.Timer('f(0.9)', setup=setup+'(lambda x: 2.0)').timeit(rep) t3 = timeit.Timer('f(0.9)', setup=setup+'("2.0")').timeit(rep) t4 = timeit.Timer('f(0.9)', setup=fclass1).timeit(rep) t4b = timeit.Timer('fm(0.9)', setup=fclass1).timeit(rep) t5 = timeit.Timer('f(0.9)', setup='def f(x): return 2.0').timeit(rep) t6 = timeit.Timer('f(0.9, 0.1, 1)', setup=setup+'(2.0)').timeit(rep) t7 = timeit.Timer('f(0.9, 0.1, 1)', setup=setup+'(lambda x,y,z: 2.0)').timeit(rep) t8 = timeit.Timer('f(0.9, 0.1, 1)', setup=setup+'("2.0")').timeit(rep) t9 = timeit.Timer('f(1,1,1)', setup=setup+'(lambda x,y,z: sin(x)*sin(y)*sin(z))').timeit(rep) t10 = timeit.Timer('f(1,1,1)', setup=setup+'("sin(x)*sin(y)*sin(z)", ' + \ 'independent_variables=("x","y","z"))').timeit(rep) t11 = timeit.Timer('f(1,1,1)', setup=fclass3).timeit(rep) best = min(t1, t2, t3, t4, t5, t6, t7, t8, t9 ,t10, t11) best3 = min(t9, t10, t11) print """ overhead with wrap2callable: constant function 2.0 (best=%f) f = wrap2callable(2.0); f(0.9) %.2f const f = wrap2callable(lambda x: 2.0); f(0.9) %.2f func f = wrap2callable("2.0"); f(0.9) %.2f StringFunction function object %.2f %s function object f: fm = f.__call__, fm(0.9) %.2f def f(x): return 2.0 %.2f func increasing the number of arguments: f = wrap2callable(2.0); f(0.9, 0.1, 1) %.2f const f = wrap2callable(lambda x,y,z: 2.0); f(0.9, 0.1, 1) %.2f func f = wrap2callable("2.0"); f(0.9, 0.1, 1) %.2f StringFunction f = wrap2callable(lambda x,y,z: sin(x)*sin(y)*sin(z)) %.2f %.2f f = wrap2callable("sin(x)*sin(y)*sin(z)"); f(1,1,1) %.2f %.2f function object %.2f %.2f %s """ % (best, t1/best, t2/best, t3/best, t4/best, fclass1, t4b/best, t5/best, t6/best, t7/best, t8/best, t9/best, t9/best3, t10/best, t10/best3, t11/best, t11/best3, fclass3) # F77 versions: print timer(empty_func, (x, y, z), repetitions=rep) f77rep = 100 c = timer(loop, (f77rep*rep, 'fempty'), repetitions=1, comment='loop over fempty in F77:') print c/float(f77rep*rep) # time for a single call print timer(fwconsts, (x, y, z), repetitions=rep) c = timer(loop, (f77rep*rep, 'fwconsts'), repetitions=1, comment='loop over fwconsts in F77:') print c/float(f77rep*rep) print timer(empty_func, (x, y, z), repetitions=rep, comment='no body:') print timer(func_with_consts1, (x, y, z), repetitions=rep, comment='constants in statements:') print timer(func_with_consts2, (x, y, z), repetitions=rep, comment='constants as default kwargs:') print timer(func_with_consts3, (x, y, z), repetitions=rep, comment='constants hardcoded:') print timer(func_loop_with_call, (rep,), repetitions=1, comment='loop with function call:') print timer(func_loop_with_inline, (rep,), repetitions=1, comment='loop with inline expression:') m = 10*rep t1 = timeit.Timer('myfunc(2)', setup='def myfunc(x): return x').timeit(m) t2 = timeit.Timer('isinstance(x,list)', setup='x=(1,2,3)').timeit(m) print 'isinstance is %.2f times slower than a trivial func' % (t2/t1) call_nargs_test(rep) """
print 'import error (from call import ...):', msg print 'Run f2py -c -m call call.f' sys.exit(1) # 2D grid: print '\n\n' m0 = sqrt(n) #m0 = 1600 for j in (4, 2, 1): m = m0/j print '\n\ninitializing a %dx%d array\n' % (m,m) x = seq(0, 1, 1/float(m-1)) y = x.copy() u = zeros((len(x), len(y)), Float) t1 = timer(py_loop3_2Dsincos, args=(x,y), repetitions=1*j) t1 = timer(py_loop2_2Dsincos, args=(x,y), repetitions=1*j) t1 = timer(py_loop1_2Dsincos, args=(x,y), repetitions=1*j) print 'pure Python loop:%d u[i,j]=sin(x[i])*cos(y[j]):' % size(u), t1 from py4cs.numpytools import sin, cos # ensure vectorized versions t2 = timer(NumPy_loop1_2Dsincos, args=(x,y), repetitions=20*j) print 'NumPy:%d u=sin(x)*cos(y):' % size(u), t2 # numarray does not work 100% with f2py: if not isinstance(u, NumArray): import call u = call.as_column_major_storage(u) t3a = timer(F77_loop1_2Dsincos, args=(u,x,y), repetitions=20*j, comment='call I1') t3b = timer(F77_loop2_2Dsincos, args=(u,x,y), repetitions=20*j, comment='inline') t3c = timer(F77_loop3_2Dsincos, args=(u,x,y), repetitions=20*j,