def flop_tests(n):
    """
    Test the efficiency of floating point operations: one multiplication
    per pass in a loop 'for i in xrange(n)'.
    """
    try:
        from call import loop, fempty, fwconsts, flops
        fempty.__name__ = 'empty_func in F77'
        fwconsts.__name__ = 'func_with_consts in F77'
    except:
        print 'Run f2py -c -m call call.f'
        sys.exit(1)


    def empty_loop(n):
        for i in xrange(n):
            pass
        
    def flops_py(n):
        b = 1.0000001
        a = 1.1
        for i in xrange(n):
            a = a*b
        return a

    print '\n\n*** Multiplication test ***\n'
    t1 = timeit.Timer('a*b*c', setup='a=1.01; b=0.98; c=0.99').timeit(n)
    print n, 'multiplications in a loop'
    t2 = timer(flops, (n,), repetitions=100, comment='F77:')
    # result in F77 is multiplication _with_ loop
    best = min(t1, t2)
    print 'multiplication: python=%.2f F77=%.2f' % (t1/best, t2/best)
Exemple #2
0
def run(solvers, methods, data, datasets):
    results = {}
    # find largest data sets:
    maxsize = max([size(getattr(data, d)) for d in datasets \
                   if hasattr(data, d)])
    print maxsize
    # combine all solvers, methods, and datasets:
    for s in solvers:
        for m in methods:
            for d in datasets:
                if hasattr(solver, m) and hasattr(data, d):
                    f = getattr(solver, m)
                    x = getattr(data, d)
                    r = timer(f, (x,), repetitions=maxsize/size(x))
                    results[(m,d)] = r
    return results
def timing2(n=2000):
    """Time different implementations of the extension module."""
    dx = 1.0/n
    g = Grid2Deff(dx=dx, dy=dx)
    # here we use straight NumPy sin in a scalar context:
    def myfunc(x, y):
        return sin(x*y) + 8*x
    expression = 'sin(x*y) + 8*x'
    # here we use math.sin (=mathsin, global variable):
    def myfunc(x, y):
        return mathsin(x*y) + 8*x
    expression = 'mathsin(x*y) + 8*x'

    print 'basic NumPy module:', basic_NumPy
    t1 = timer(g.ext_gridloop1, (myfunc,), repetitions=1)
    t2 = timer(g.ext_gridloop2, (myfunc,), repetitions=1)
    t3 = timer(g.ext_gridloop2, (expression,), repetitions=1)
    print """
Results using an extension module (%dx%d grid),
with callback to Python for each point:
gridloop1 (w/func):               %s
gridloop2 (w/func):               %s
gridloop1 (w/string expression):  %s
""" % (n, n, t1, t2, t3)

    # try the improved functions (works only for the F77 module):
    nrep = 20
    if 'gridloop_vec2' in dir(ext_gridloop):
        t4 = timer(g.ext_gridloop_vec2, (myfuncf2,), repetitions=nrep)
        print """\
gridloop_vec2 (w/func & NumPy):    %s""" % t4
    if 'gridloop2_str' in dir(ext_gridloop):        
        t5 = timer(g.ext_gridloop2_str, ('myfunc',), repetitions=nrep)
        print """\
gridloop2_str (no Py callback):   %s""" % t5

        # try 'inline' F77 compiled callback too:
        # (give F77 source for core of callback function as argument)
        g.ext_gridloop2_fcb_compile('sin(x*y) + 8*x')
        t6 = timer(g.ext_gridloop2_fcb, (), repetitions=nrep)
        g.ext_gridloop2_compile('sin(x*y) + 8*x')
        t7 = timer(g.ext_gridloop2_v2, (), repetitions=nrep)
Exemple #4
0
def timing(n=2000):
    # timing:
    dx = 1.0/n
    g = Grid2D(xmin=0, xmax=1, dx=dx,
               ymin=0, ymax=1, dy=dx)

    expression = 'sin(x*y) + 8*x'
    print 'evaluating', expression
    def myfunc(x, y):
        return sin(x*y) + 8*x

    from py4cs.misc import timer
    t0 = time.clock()

    # vectorized expressions are so fast that we run the code
    # repeatedly
    rep=20
    print 'vectorized code with eval... (%d calls)' % rep
    t1 = timer(g.__call__, (expression,), repetitions=rep, comment='eval(str)')
    print 'vectorized code with function call... (%d calls)' % rep
    t2 = timer(g.__call__, (myfunc,), repetitions=rep, comment='myfunc')

    print 'explicit loops with formula hardcoded...(1 call)'
    f = g.gridloop_hardcoded_func()
    t3 = timer(g.gridloop_hardcoded_func, (), repetitions=1, comment='')

    print 'explicit loops with eval...(1 call)'
    t4 = timer(g.gridloop, (expression,), repetitions=1, comment='eval(str)')
    print 'explicit loops with myfunc...(1 call)'
    t5 = timer(g.gridloop, (myfunc,), repetitions=1, comment='myfunc')

    print 'explicit loops with list and eval...(1 call)'
    t6 = timer(g.gridloop_list, (expression,), repetitions=1,
               comment='eval(str)')
    print 'explicit loops with list and myfunc...(1 call)'
    t7 = timer(g.gridloop_list, (myfunc,), repetitions=1, comment='myfunc')

    # The scalar computations above used sin from NumPy, which is
    # known to be slow for scalar arguments. Here we use math.sin
    # (stored in mathsin, could also use the slightly slower math.sin
    # explicitly)
    # taken globally so eval works: from math import sin as mathsin
    def myfunc_scalar(x, y):
        return mathsin(x*y) + 8*x
    expression_scalar = 'mathsin(x*y) + 8*x'
    print 'explicit loops with eval...(1 call) and math sin'
    t8 = timer(g.gridloop, (expression_scalar,), repetitions=1,
               comment='eval(str)')
    print 'explicit loops with myfunc...(1 call) and math sin'
    t9 = timer(g.gridloop, (myfunc_scalar,), repetitions=1, comment='myfunc')

    # report
    f = max(t1,t2)  # fastest implementation
    print """
Basic NumPy module: %s

vectorized with eval(expression):   %.2f  %.1f
vectorized with myfunc call:        %.2f  %.1f

scalar versions with NumPy sin function:
loops with inline formula:          %.2f  %.1f
loops with eval(expression):        %.2f  %.1f
loops with myfunc call:             %.2f  %.1f
loops with list and eval:           %.2f  %.1f
loops with list and myfunc:         %.2f  %.1f

scalar versions with math.sin function:
loops with eval(expression_scalar): %.2f  %.1f
loops with myfunc_scalar:           %.2f  %.1f
""" % (basic_NumPy, t1, t1/f, t2, t2/f, t3, t3/f, t4, t4/f, t5, t5/f,
       t6, t6/f, t7, t7/f, t8, t8/f, t9, t9/f)
Exemple #5
0
        else:
            r[i] = sin(x[i])
    r.shape = x.shape
    return r


def somefunc_NumPy2(x):
    """Vectorized version of somefunc."""
    lt0_indices = less(x, 0)  # find all indices where x<0
    r = sin(x)
    # truncate, i.e., insert 0 for all indices where x<0:
    r = where(lt0_indices, 0.0, r)
    return r


somefunc_list = [somefunc_NumPy, somefunc_NumPy2]
try:
    import scipy.special
    somefunc_SciPy = scipy.special.general_function(somefunc)
    somefunc_SciPy.__name__ = somefunc.__name__ + '_SciPy_vectorized'
    somefunc_list.append(somefunc_SciPy)
except:
    print 'unsuccessful scipy import, cannot use scipy'
n = 1000000
x = sequence(0, 2, 1.0 / n, Float)
from py4cs.misc import timer
for f in somefunc_list:
    timer(f, (x, ), repetitions=1)
timer(somefunc_NumPy2, (x, ), repetitions=10)
print 'end of', sys.argv[0]
def allocate_tests(n):

    def list_append1(n):
        r = []
        for i in xrange(n):
            r.append(i+2)
        return r

    def list_chunk1(n):
        r = [0.0]*n
        for i in xrange(n):
            r[i] = i+2
        return r

    def list_append2(n):
        r = []
        for i in xrange(n):
            r.append(random.gauss(0,1))
        return r

    def list_chunk2(n):
        r = [0.0]*n
        for i in xrange(n):
            r[i] = random.gauss(0,1)
        return r

    def list_append3(n):
        g = random.gauss
        r = []
        for i in xrange(n):
            r.append(g(0,1))
        return r

    def list_append4(n):
        return [random.gauss(0,1) for i in xrange(n)]

    def list_chunk3(n):
        g = random.gauss
        r = [0.0]*n
        for i in xrange(n):
            r[i] = g(0,1)
        return r

    def NumPy_zeros(n):
        return zeros(n, Float)
    
    def NumPy_arange(n):
        return sequence(0,1,1.0/(n-1))
    
    def NumPy_random(n):
        import RandomArray
        return RandomArray.normal(0,1,n)
    

    print 'allocate n numbers in an array/list: n =', n
    # few calls, use timer instead of timeit
    rep = 3
    print timer(NumPy_zeros,  (n,), repetitions=rep)
    print timer(NumPy_arange, (n,), repetitions=rep)
    print timer(NumPy_random, (n,), repetitions=rep)
    print timer(list_append1, (n,), repetitions=rep)
    print timer(list_chunk1,  (n,), repetitions=rep)
    print timer(list_append2, (n,), repetitions=rep)
    print timer(list_append4, (n,), repetitions=rep)
    print timer(list_chunk2,  (n,), repetitions=rep)
    print timer(list_append3, (n,), repetitions=rep)
    print timer(list_chunk3,  (n,), repetitions=rep)
def matrixfill_tests(language, n):

    from math import sin, exp
    if language == 'F77':
        try:
            from matrix_f77 import makematrix, set, tonumpy, adump, \
                 fill1, fill2, lfill1, lfill2
        except:
            print 'Run f2py -c m matrix matrix_f77.f'
            sys.exit(1)
        makematrix(n, n)  # make matrix in F77
    elif language == 'C++':
        try:
            from matrix_cpp import Matrix
            from _matrix_cpp import Matrix_set
            import _matrix_cpp  # for efficiency comparison
        except:
            print 'run make_cpp.sh'
            sys.exit(1)
        m = Matrix(n)

    a = zeros((n, n), Float)

    def setmatrix1_py():
        """Fill NumPy matrix in Python loop."""
        for i in xrange(n):
            for j in xrange(n):
                a[i, j] = i*j-2
        return a

    def setmatrix2_py():
        """Fill NumPy matrix in Python loop; sin/exp formula."""
        for i in xrange(n):
            x = i*0.1
            for j in xrange(n):
                y = j*0.1
                a[i, j] = sin(x)*sin(y)*exp(-x*y)
        return a

    #======== F77 functions =========

    def setmatrix1_f_index():
        """Fill F77 matrix in a Python loop with F77 indexing."""
        for i in xrange(n):
            for j in xrange(n):
                set(i, j, i*j-2)
        r = tonumpy(n, n)
        # could perhaps tune the interface file such that tonumpy
        # doesn't need arguments
        return r

    def setmatrix2_f_index():
        """Fill F77 matrix in a Python loop with F77 indexing; sin/exp."""
        for i in xrange(n):
            x = 0.1*i
            for j in xrange(n):
                y = 0.1*j
                set(i, j, sin(x)*sin(y)*exp(-x*y))
        r = tonumpy(n, n)
        return r

    def setmatrix1_f_loop1():
        """Fill F77 matrix in Fortran loops."""
        fill1()  # all loops in F77
        r = tonumpy(n, n)
        return r

    def setmatrix1_f_loop2():
        """Fill NumPy matrix in Fortran loops."""
        r = lfill1(a)  # all loops in F77, fill NumPy array
        return r

    def setmatrix2_f_loop1():
        """Fill F77 matrix in Fortran loops; sin/exp formula."""
        fill2()  # all loops in F77
        r = tonumpy(n, n)
        return r

    def setmatrix2_f_loop2():
        """Fill NumPy matrix in Fortran loops; sin/exp formula."""
        r = lfill2(a)  # all loops in F77, fill NumPy array
        return r

    #======== C++ functions =========

    def setmatrix1_c_index1():
        """Fill C++ matrix in a Python loop with F77 indexing."""
        for i in xrange(n):
            for j in xrange(n):
                m.set(i, j, i*j-2)
        # could perhaps tune the interface file such that tonumpy
        # doesn't need arguments
        return m

    def setmatrix2_c_index1():
        """Fill F77 matrix in a Python loop with F77 indexing; sin/exp."""
        for i in xrange(n):
            x = 0.1*i
            for j in xrange(n):
                y = 0.1*j
                m.set(i, j, sin(x)*sin(y)*exp(-x*y))
        return m

    def setmatrix1_c_index3():
        """Avoid proxy class, call Matrix_set directly."""
        for i in xrange(n):
            for j in xrange(n):
                Matrix_set(m, i, j, i*j-2)
        # could perhaps tune the interface file such that tonumpy
        # doesn't need arguments
        return m

    def setmatrix2_c_index3():
        """Avoid proxy class, call Matrix_set directly; sin/exp formula."""
        for i in xrange(n):
            x = 0.1*i
            for j in xrange(n):
                y = 0.1*j
                Matrix_set(m, i, j, sin(x)*sin(y)*exp(-x*y))
        return m

    def setmatrix1_c_index4():
        """Avoid proxy class, call _matrix_cpp.Matrix_set directly."""
        for i in xrange(n):
            for j in xrange(n):
                _matrix_cpp.Matrix_set(m, i, j, i*j-2)
        # could perhaps tune the interface file such that tonumpy
        # doesn't need arguments
        return m

    def setmatrix2_c_index4():
        """Avoid proxy class, call _matrix_cpp.Matrix_set directly."""
        for i in xrange(n):
            x = 0.1*i
            for j in xrange(n):
                y = 0.1*j
                _matrix_cpp.Matrix_set(m, i, j, sin(x)*sin(y)*exp(-x*y))
        return m


    def setmatrix1_c_loop1():
        """Fill F77 matrix in Fortran loops."""
        m.fill1()  # all loops in F77
        return m

    def setmatrix2_c_loop1():
        """Fill F77 matrix in Fortran loops; sin/exp formula."""
        m.fill2()  # all loops in F77
        return m


    #======== end of C++ functions =========

    def sort(d):
        """Sort result dictionary: d[problem_description]=time."""
        list = [(key, d[key]) for key in d]
        def s(a, b):
            """sort list of 2-tuples"""
            return cmp(a[1],b[1])  # a,b = (comment,time)
        list.sort(s)
        return list
        
    if language == 'F77':
        res = {}
        # one multiplication in Python for each matrix entry:
        t = timer(setmatrix1_py, (), repetitions=10)
        res['Python loop and NumPy array'] = t
        t = timer(setmatrix1_f_index , (), repetitions=10)
        res['Python loop and F77 indexing in F77 array'] = t
        t = timer(setmatrix1_f_loop1, (), repetitions=100)
        res['F77 loop over F77 array'] = t
        t = timer(setmatrix1_f_loop2, (), repetitions=100)
        res['F77 loop over NumPy array'] = t
        res = sort(res)
        print '\n\n\nTable: a[i,j] = i*j-2\n(one mult+sub per iteration)\n'
        for comment, time in res:
            print '%60s %7.3f' % (comment,time)
        print '\n\n'

        # a sin/exp function expression for each matrix entry:
        res = {}
        t = timer(setmatrix2_py, (), repetitions=10)
        res['Python loop and NumPy array'] = t
        t = timer(setmatrix2_f_index , (), repetitions=10)
        res['Python loop and F77 indexing in F77 array'] = t
        t = timer(setmatrix2_f_loop1, (), repetitions=100)
        res['F77 loop over F77 array'] = t
        t = timer(setmatrix2_f_loop2, (), repetitions=100)
        res['F77 loop over NumPy array'] = t
        res = sort(res)
        print '\n\n\nTable: a[i,j] = sin/exp expression\n'
        for comment, time in res:
            print '%60s %7.3f' % (comment,time)
        print '\n\n'

    elif language == 'C++':
        res = {}
        # one multiplication in Python for each matrix entry:
        t = timer(setmatrix1_py, (), repetitions=10)
        res['Python loop and NumPy array'] = t
        t = timer(setmatrix1_c_index1, (), repetitions=10)
        res['Indexing: m.set'] = t
        t = timer(setmatrix1_c_index3, (), repetitions=10)
        res['Indexing: Matrix_set'] = t
        t = timer(setmatrix1_c_index4, (), repetitions=10)
        res['Indexing: matrix_cpp.Matrix_set'] = t
        t = timer(setmatrix1_c_loop1, (), repetitions=100)
        res['C++ loop over C++ array'] = t
        res = sort(res)
        print '\n\n\nTable: a[i,j] = i*j-2\n(one mult+sub per iteration)\n'
        for comment, time in res:
            print '%60s %7.3f' % (comment,time)
        print '\n\n'

        # a sin/exp function expression for each matrix entry:
        res = {}
        t = timer(setmatrix2_py, (), repetitions=10)
        res['Python loop and NumPy array'] = t
        t = timer(setmatrix2_c_index1, (), repetitions=10)
        res['Indexing: m.set'] = t
        t = timer(setmatrix2_c_index3, (), repetitions=10)
        res['Indexing: Matrix_set'] = t
        t = timer(setmatrix2_c_index4, (), repetitions=10)
        res['Indexing: matrix_cpp.Matrix_set'] = t
        t = timer(setmatrix2_c_loop1, (), repetitions=100)
        res['C++ loop over C++ array'] = t
        res = sort(res)
        print '\n\n\nTable: a[i,j] = sin/exp-expression\n'
        for comment, time in res:
            print '%60s %7.3f' % (comment,time)
        print '\n\n'
def call_nargs_test(rep):
    for n in range(1,101,20):
        code = generate_func_with_many_args(n)
        exec code in globals(), globals()
        print timer(func_with_many_args, tuple(range(n)), repetitions=rep,
                comment='empty func with %d arguments:' % n)
def call_tests(rep, x, y, z):

    try:
        from call import loop, fempty, fwconsts, flops
        fempty.__name__ = 'empty_func in F77'
        fwconsts.__name__ = 'func_with_consts in F77'
    except:
        print 'Run f2py -c -m call call.f'
        sys.exit(1)

    def empty_func(x, y, z):
        pass

    def func_with_consts1(x, y, z):
        a = 0.3
        b = 1.2
        c = 1.22E+02
        q = a*x + b*y + c*z
        return q
    
    def func_with_consts2(x, y, z, a=0.3, b=1.2, c=1.22E+02):
        q = a*x + b*y + c*z
        return q

    def func_with_consts3(x, y, z):
        # hardcoded coefficients
        q = 0.3*x + 1.2*y + 1.22E+02*z
        return q

    def _help(x):
        return x+1
    
    def func_loop_with_call(n):
        r = 0.1
        for i in xrange(n):
            r = _help(r)

    def func_loop_with_inline(n):
        r = 0.1
        for i in xrange(n):
            r = r + 1

            
    # test NumPy vs math sin for scalar arguments:
    # see sin_comparison.py
    
    print '\n*** Testing function calls ***\n'

    fclass1 = """
class MyFunc:
    def __call__(self, x):
        return 2.0
f = MyFunc()
fm = f.__call__
"""

    fclass3 = """
from math import sin
class MyFunc:
    def __call__(self, x, y, z):
        return sin(x)*sin(y)*sin(z)
f = MyFunc()
"""

    setup = """
from py4cs.numpytools import wrap2callable
from math import sin
f = wrap2callable"""
    t1 = timeit.Timer('f(0.9)', setup=setup+'(2.0)').timeit(rep)
    t2 = timeit.Timer('f(0.9)', setup=setup+'(lambda x: 2.0)').timeit(rep)
    t3 = timeit.Timer('f(0.9)', setup=setup+'("2.0")').timeit(rep)
    t4 = timeit.Timer('f(0.9)', setup=fclass1).timeit(rep)
    t4b = timeit.Timer('fm(0.9)', setup=fclass1).timeit(rep)
    t5 = timeit.Timer('f(0.9)', setup='def f(x): return 2.0').timeit(rep)
    t6 = timeit.Timer('f(0.9, 0.1, 1)', setup=setup+'(2.0)').timeit(rep)
    t7 = timeit.Timer('f(0.9, 0.1, 1)', setup=setup+'(lambda x,y,z: 2.0)').timeit(rep)
    t8 = timeit.Timer('f(0.9, 0.1, 1)', setup=setup+'("2.0")').timeit(rep)
    t9 = timeit.Timer('f(1,1,1)',
         setup=setup+'(lambda x,y,z: sin(x)*sin(y)*sin(z))').timeit(rep)
    t10 = timeit.Timer('f(1,1,1)',
         setup=setup+'("sin(x)*sin(y)*sin(z)", ' + \
                      'independent_variables=("x","y","z"))').timeit(rep)
    t11 = timeit.Timer('f(1,1,1)', setup=fclass3).timeit(rep)
    best = min(t1, t2, t3, t4, t5, t6, t7, t8, t9 ,t10, t11)
    best3 = min(t9, t10, t11)
    print """
overhead with wrap2callable: constant function 2.0      (best=%f)
f = wrap2callable(2.0);  f(0.9)                         %.2f  const
f = wrap2callable(lambda x: 2.0);  f(0.9)               %.2f  func
f = wrap2callable("2.0");  f(0.9)                       %.2f  StringFunction
function object                                         %.2f 
%s
function object f: fm = f.__call__, fm(0.9)             %.2f 
def f(x): return 2.0                                    %.2f  func

increasing the number of arguments:
f = wrap2callable(2.0);  f(0.9, 0.1, 1)                 %.2f  const
f = wrap2callable(lambda x,y,z: 2.0);  f(0.9, 0.1, 1)   %.2f  func
f = wrap2callable("2.0");  f(0.9, 0.1, 1)               %.2f  StringFunction

f = wrap2callable(lambda x,y,z: sin(x)*sin(y)*sin(z))   %.2f  %.2f
f = wrap2callable("sin(x)*sin(y)*sin(z)");  f(1,1,1)    %.2f  %.2f
function object                                         %.2f  %.2f
%s
""" % (best, t1/best, t2/best, t3/best, t4/best, fclass1, t4b/best,
       t5/best, t6/best, t7/best, t8/best, t9/best, t9/best3, 
       t10/best, t10/best3, t11/best, t11/best3, fclass3)

    # F77 versions:
    print timer(empty_func, (x, y, z), repetitions=rep)
    f77rep = 100
    c = timer(loop, (f77rep*rep, 'fempty'), repetitions=1,
              comment='loop over fempty in  F77:')
    print c/float(f77rep*rep)  # time for a single call
    print timer(fwconsts, (x, y, z), repetitions=rep)
    c = timer(loop, (f77rep*rep, 'fwconsts'), repetitions=1,
              comment='loop over fwconsts in F77:')
    print c/float(f77rep*rep)

    print timer(empty_func, (x, y, z), repetitions=rep, comment='no body:')
    print timer(func_with_consts1, (x, y, z), repetitions=rep,
                comment='constants in statements:')
    print timer(func_with_consts2, (x, y, z), repetitions=rep,
                comment='constants as default kwargs:')
    print timer(func_with_consts3, (x, y, z), repetitions=rep,
                comment='constants hardcoded:')

    print timer(func_loop_with_call, (rep,), repetitions=1,
                comment='loop with function call:')

    print timer(func_loop_with_inline, (rep,), repetitions=1,
                comment='loop with inline expression:')


    m = 10*rep
    t1 = timeit.Timer('myfunc(2)', setup='def myfunc(x): return x').timeit(m)
    t2 = timeit.Timer('isinstance(x,list)', setup='x=(1,2,3)').timeit(m)
    print 'isinstance is %.2f times slower than a trivial func' % (t2/t1)
    
    call_nargs_test(rep)

    """
        print 'import error (from call import ...):', msg
        print 'Run f2py -c -m call call.f'
        sys.exit(1)


    # 2D grid:
    print '\n\n'
    m0 = sqrt(n)
    #m0 = 1600
    for j in (4, 2, 1):
        m = m0/j
        print '\n\ninitializing a %dx%d array\n' % (m,m)
        x = seq(0, 1, 1/float(m-1))
        y = x.copy()
        u = zeros((len(x), len(y)), Float)
        t1 = timer(py_loop3_2Dsincos, args=(x,y), repetitions=1*j)
        t1 = timer(py_loop2_2Dsincos, args=(x,y), repetitions=1*j)
        t1 = timer(py_loop1_2Dsincos, args=(x,y), repetitions=1*j)
        print 'pure Python loop:%d u[i,j]=sin(x[i])*cos(y[j]):' % size(u), t1
        from py4cs.numpytools import sin, cos  # ensure vectorized versions
        t2 = timer(NumPy_loop1_2Dsincos, args=(x,y), repetitions=20*j)
        print 'NumPy:%d u=sin(x)*cos(y):' % size(u), t2
        # numarray does not work 100% with f2py:
        if not isinstance(u, NumArray):
            import call
            u = call.as_column_major_storage(u)
            t3a = timer(F77_loop1_2Dsincos, args=(u,x,y), repetitions=20*j,
                        comment='call I1')
            t3b = timer(F77_loop2_2Dsincos, args=(u,x,y), repetitions=20*j,
                        comment='inline')
            t3c = timer(F77_loop3_2Dsincos, args=(u,x,y), repetitions=20*j,