def check_mask(self, expected, result):
     # There's no guarantee that TBB will use a full mask worth of
     # threads if it deems it inefficient to do so
     if threading_layer() == 'tbb':
         self.assertTrue(np.all(result <= expected))
     elif threading_layer() in ('omp', 'workqueue'):
         np.testing.assert_equal(expected, result)
     else:
         assert 0, 'unreachable'
Example #2
0
def numba_environment() -> Dict[str, Any]:
    """return information about the numba setup used

    Returns:
        (dict) information about the numba setup
    """
    # determine whether Nvidia Cuda is available
    try:
        from numba import cuda

        cuda_available = cuda.is_available()
    except ImportError:
        cuda_available = False

    # determine whether AMD ROC is available
    try:
        from numba import roc

        roc_available = roc.is_available()
    except ImportError:
        roc_available = False

    # determine threading layer
    try:
        threading_layer = nb.threading_layer()
    except ValueError:
        # threading layer was not initialized, so compile a mock function
        @nb.jit("i8()", parallel=True)
        def f():
            s = 0
            for i in nb.prange(4):
                s += i
            return s

        f()
        try:
            threading_layer = nb.threading_layer()
        except ValueError:  # cannot initialize threading
            threading_layer = None
    except AttributeError:  # old numba version
        threading_layer = None

    return {
        "version": nb.__version__,
        "parallel": NUMBA_PARALLEL,
        "fastmath": NUMBA_FASTMATH,
        "debug": NUMBA_DEBUG,
        "using_svml": nb.config.USING_SVML,
        "threading_layer": threading_layer,
        "omp_num_threads": os.environ.get("OMP_NUM_THREADS"),
        "mkl_num_threads": os.environ.get("MKL_NUM_THREADS"),
        "num_threads": nb.config.NUMBA_NUM_THREADS,
        "num_threads_default": nb.config.NUMBA_DEFAULT_NUM_THREADS,
        "cuda_available": cuda_available,
        "roc_available": roc_available,
    }
Example #3
0
def numba_environment() -> Dict[str, Any]:
    """ return information about the numba setup used
    
    Returns:
        (dict) information about the numba setup
    """
    # determine whether Nvidia Cuda is available
    try:
        from numba import cuda
        cuda_available = cuda.is_available()
    except ImportError:
        cuda_available = False

    # determine whether AMD ROC is available
    try:
        from numba import roc
        roc_available = roc.is_available()
    except ImportError:
        roc_available = False

    # determine threading layer
    try:
        threading_layer = nb.threading_layer()
    except ValueError:
        # threading layer was not initialized, so compile a mock function
        @nb.jit('i8()', parallel=True)
        def f():
            s = 0
            for i in nb.prange(4):
                s += i
            return s

        f()
        try:
            threading_layer = nb.threading_layer()
        except ValueError:  # cannot initialize threading
            threading_layer = None
    except AttributeError:  # old numba version
        threading_layer = None

    return {
        'version': nb.__version__,
        'parallel': NUMBA_PARALLEL,
        'fastmath': NUMBA_FASTMATH,
        'debug': NUMBA_DEBUG,
        'using_svml': nb.config.USING_SVML,
        'threading_layer': threading_layer,
        'omp_num_threads': os.environ.get('OMP_NUM_THREADS'),
        'mkl_num_threads': os.environ.get('MKL_NUM_THREADS'),
        'num_threads': nb.config.NUMBA_NUM_THREADS,
        'num_threads_default': nb.config.NUMBA_DEFAULT_NUM_THREADS,
        'cuda_available': cuda_available,
        'roc_available': roc_available
    }
Example #4
0
    def __init__(self,
                 *,
                 options: Options,
                 n_dims: (int, None) = None,
                 non_unit_g_factor: bool = False,
                 grid: (tuple, None) = None,
                 n_threads: (int, None) = None):
        self.options = options

        if n_dims is not None and grid is not None:
            raise ValueError()
        if n_dims is None and grid is None:
            raise ValueError()
        if grid is None:
            grid = tuple([-1] * n_dims)
        if n_dims is None:
            n_dims = len(grid)

        if n_threads is None:
            n_threads = numba.get_num_threads()
        self.n_threads = 1 if n_dims == 1 else n_threads
        if self.n_threads > 1 and numba.threading_layer() == 'workqueue':
            warnings.warn(
                "Numba is using the ``workqueue'' threading layer, switch"
                " to ``omp'' or ``tbb'' for higher parallel performance"
                " (see https://numba.pydata.org/numba-doc/latest/user/threading-layer.html)"
            )

        self.n_dims = n_dims
        self.__call = make_step_impl(options, non_unit_g_factor, grid,
                                     self.n_threads)
Example #5
0
def test_numba_info():
    ni = d.numba_info()
    if numba.config.DISABLE_JIT:
        assert ni is None
    else:
        assert ni is not None
        assert ni.threading == numba.threading_layer()
        assert ni.threads == numba.get_num_threads()
    def _test_nested_parallelism_3(self):
        if threading_layer() == 'workqueue':
            self.skipTest("workqueue is not threadsafe")

        # check that the right number of threads are present in nesting
        # this relies on there being a load of cores present
        BIG = 1000000

        @njit(parallel=True)
        def work(local_nt):  # arg is value 3
            tid = np.zeros(BIG)
            acc = 0
            set_num_threads(local_nt)  # set to 3 threads
            for i in prange(BIG):
                acc += 1
                tid[i] = _get_thread_id()
            return acc, np.unique(tid)

        @njit(parallel=True)
        def test_func_jit(nthreads):
            set_num_threads(nthreads)  # set to 2 threads
            lens = np.zeros(nthreads)
            total = 0
            for i in prange(nthreads):
                my_acc, tids = work(nthreads + 1)  # call with value 3
                lens[i] = len(tids)
                total += my_acc
            return total, np.unique(lens)

        NT = 2
        expected_acc = BIG * NT
        expected_thread_count = NT + 1

        got_acc, got_tc = test_func_jit(NT)
        self.assertEqual(expected_acc, got_acc)
        self.check_mask(expected_thread_count, got_tc)

        def test_guvectorize(nthreads):
            @guvectorize(['int64[:], int64[:]'],
                         '(n), (n)',
                         nopython=True,
                         target='parallel')
            def test_func_guvectorize(total, lens):
                my_acc, tids = work(nthreads + 1)
                lens[0] = len(tids)
                total[0] += my_acc

            total = np.zeros((nthreads, 1), dtype=np.int64)
            lens = np.zeros(nthreads, dtype=np.int64).reshape((nthreads, 1))

            test_func_guvectorize(total, lens)
            # vectorize does not reduce, so total is summed
            return total.sum(), np.unique(lens)

        got_acc, got_tc = test_guvectorize(NT)

        self.assertEqual(expected_acc, got_acc)
        self.check_mask(expected_thread_count, got_tc)
Example #7
0
def time_script():
    res_file_name_quad = 'quad.csv' # output data file name
    res_file_name_total= 'total.csv' # output data file name
    max_input_size_mag = 6             # max number of input point (power of 10)
    num_points = 200                   # number of runs collected 
    trial = 5                          # For each run, the number of trials run. 
    num_device = 1                     # number of GPUs used

    config.THREADING_LAYER = 'threadsafe'
    set_num_threads(12)                # numba: number of concurrent CPU threads 
    print("Threading layer chosen: %s" % threading_layer())
    
    ## Header: 
    #  [num input, cpu_result (ms), gpu_result (ms)] 
    result_quad = np.zeros((num_points, 4))
    result_total= np.zeros((num_points, 4))

    this_result_cpu_total = np.zeros(trial)
    this_result_gpu_total = np.zeros(trial)
    this_result_cpu_quad = np.zeros(trial)
    this_result_gpu_quad = np.zeros(trial)

    # generate a set of input data size, linear in log space between 1 and maximum 
    for idx, in_size in enumerate(np.logspace(1, max_input_size_mag, num=num_points)):
    # for idx, in_size in enumerate(np.linspace(1e5, 1e6, num_points)):
        result_quad[idx, 0] = idx
        result_quad[idx, 1] = int(in_size)
        result_total[idx, 0] = idx
        result_total[idx, 1] = int(in_size)

        state, spacing = initialize(int(in_size))
        # output from GPU

        for i in range(0, trial, 1):
            # GPU time
            rhs, total_time, quad_time = single_advance_gpu(state, int(in_size), spacing)
            this_result_gpu_total[i] = total_time
            this_result_gpu_quad[i] = quad_time

            # numba time: 
            rhs, total_time, quad_time = single_advance_cpu(state, int(in_size), spacing)
            this_result_cpu_total[i] = total_time
            this_result_cpu_quad[i] = quad_time

        result_quad[idx, 2] = np.min(this_result_cpu_quad)
        result_quad[idx, 3] = np.min(this_result_gpu_quad)
        result_total[idx, 2] = np.min(this_result_cpu_total)
        result_total[idx, 3] = np.min(this_result_gpu_total)
    
        print("[{}/{}] running on {} inputs, CPU: {:4f}, GPU: {:4f}".format(
            idx, num_points, int(in_size), result_quad[idx, 2], result_quad[idx, 3]))


    np.savetxt(res_file_name_quad, result_quad, delimiter=',')
    np.savetxt(res_file_name_total, result_total, delimiter=',')
Example #8
0
def numba_info():
    x = _par_test(100)
    _log.debug('sum: %d', x)

    try:
        layer = numba.threading_layer()
    except ValueError:
        _log.info('Numba threading not initialized')
        return None
    _log.info('numba threading layer: %s', layer)
    nth = numba.get_num_threads()
    return NumbaInfo(layer, nth)
Example #9
0
def check_threading_layer():
    """
    Check which numba threading_layer is active, and warn if it is "workqueue".
    """
    _dummy_numba(np.ones(1))
    try:
        if threading_layer() == "workqueue":
            warn(
                'Using `numba.threading_layer()=="workqueue"` can be devastatingly slow! See https://numba.pydata.org/numba-doc/latest/user/threading-layer.html for alternatives.',
                SliseWarning,
            )
    except ValueError as e:
        warn(f"Numba: {e}", SliseWarning)
Example #10
0
def time_script():
    res_file_name = 'chyqmom4_res_2.csv' # output data file name
    max_input_size_mag = 6             # max number of input point (power of 10)
    num_points = 200                   # number of runs collected 
    trial = 5                          # For each run, the number of trials run. 
    num_device = 1                     # number of GPUs used

    config.THREADING_LAYER = 'threadsafe'
    set_num_threads(12)                # numba: number of concurrent CPU threads 
    print("Threading layer chosen: %s" % threading_layer())
    
    ## Header: 
    #  [num input, cpu_result (ms), gpu_result (ms)] 
    result = np.zeros((num_points, 4))

    this_result_cpu = np.zeros(trial)
    this_result_gpu = np.zeros(trial)

    # generate a set of input data size, linear in log space between 1 and maximum 
    for idx, in_size in enumerate(np.logspace(1, max_input_size_mag, num=num_points)):
    # for idx, in_size in enumerate(np.linspace(1e5, 1e6, num_points)):
        result[idx, 0] = idx
        result[idx, 1] = int(in_size)

        this_moment = init_moment_27(int(in_size))
        # output from GPU

        for i in range(0, trial, 1):
            # GPU time
            try: 
                this_result_gpu[i], w, x,y,z = chyqmom27(this_moment, int(in_size))
            except: 
                pass
            # chyqmom27(this_moment, int(in_size))
            # numba time: 
            start_time = time.perf_counter()
            chyqmom27_cpu(this_moment.transpose(), int(in_size))
            stop_time = time.perf_counter()
            this_result_cpu[i] = (stop_time - start_time) * 1e3 #ms

            w.free()
            x.free()
            y.free()
            z.free()
        result[idx, 1] = np.min(this_result_cpu)
        result[idx, 2] = np.min(this_result_gpu)
        print("[{}/{}] running on {} inputs, CPU: {:4f}, GPU: {:4f}".format(
            idx, num_points, int(in_size), result[idx, 1], result[idx, 2]))

    np.savetxt(res_file_name, result, delimiter=',')
    def _test_nested_parallelism_1(self):
        if threading_layer() == 'workqueue':
            self.skipTest("workqueue is not threadsafe")

        # check that get_num_threads is ok in nesting
        mask = config.NUMBA_NUM_THREADS - 1

        N = config.NUMBA_NUM_THREADS
        M = 2 * config.NUMBA_NUM_THREADS

        @njit(parallel=True)
        def child_func(buf, fid):
            M, N = buf.shape
            for i in prange(N):
                buf[fid, i] = get_num_threads()

        def get_test(test_type):
            if test_type == 'njit':

                def test_func(nthreads, py_func=False):
                    @njit(parallel=True)
                    def _test_func(nthreads):
                        acc = 0
                        buf = np.zeros((M, N))
                        set_num_threads(nthreads)
                        for i in prange(M):
                            local_mask = 1 + i % mask
                            # set threads in parent function
                            set_num_threads(local_mask)
                            if local_mask < N:
                                child_func(buf, local_mask)
                            acc += get_num_threads()
                        return acc, buf

                    if py_func:
                        return _test_func.py_func(nthreads)
                    else:
                        return _test_func(nthreads)

            elif test_type == 'guvectorize':

                def test_func(nthreads, py_func=False):
                    def _test_func(acc, buf, local_mask):
                        set_num_threads(nthreads)
                        # set threads in parent function
                        set_num_threads(local_mask[0])
                        if local_mask[0] < N:
                            child_func(buf, local_mask[0])
                        acc[0] += get_num_threads()

                    buf = np.zeros((M, N), dtype=np.int64)
                    acc = np.zeros((M, 1), dtype=np.int64)
                    local_mask = (1 + np.arange(M) % mask).reshape((M, 1))
                    sig = ['void(int64[:], int64[:, :], int64[:])']
                    layout = '(p), (n, m), (p)'
                    if not py_func:
                        _test_func = guvectorize(sig,
                                                 layout,
                                                 nopython=True,
                                                 target='parallel')(_test_func)
                    else:
                        _test_func = guvectorize(sig, layout,
                                                 forceobj=True)(_test_func)
                    _test_func(acc, buf, local_mask)
                    return acc, buf

            return test_func

        for test_type in ['njit', 'guvectorize']:
            test_func = get_test(test_type)
            got_acc, got_arr = test_func(mask)
            exp_acc, exp_arr = test_func(mask, py_func=True)
            np.testing.assert_equal(exp_acc, got_acc)
            np.testing.assert_equal(exp_arr, got_arr)

            # check the maths reconciles, guvectorize does not reduce, njit does
            math_acc_exp = 1 + np.arange(M) % mask
            if test_type == 'guvectorize':
                math_acc = math_acc_exp.reshape((M, 1))
            else:
                math_acc = np.sum(math_acc_exp)

            np.testing.assert_equal(math_acc, got_acc)

            math_arr = np.zeros((M, N))
            for i in range(1, N):
                # there's branches on 1, ..., num_threads - 1
                math_arr[i, :] = i
            np.testing.assert_equal(math_arr, got_arr)
Example #12
0
tic = time()
pygbm_model = GradientBoostingClassifier(loss='binary_crossentropy',
                                         learning_rate=lr,
                                         max_iter=n_trees,
                                         max_bins=max_bins,
                                         max_leaf_nodes=n_leaf_nodes,
                                         random_state=0,
                                         scoring=None,
                                         verbose=1,
                                         validation_split=None)
pygbm_model.fit(data_train, target_train)
toc = time()
predicted_test = pygbm_model.predict(data_test)
roc_auc = roc_auc_score(target_test, predicted_test)
acc = accuracy_score(target_test, predicted_test)
print(f"done in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}")

if hasattr(numba, 'threading_layer'):
    print("Threading layer chosen: %s" % numba.threading_layer())

if not args.no_lightgbm:
    print("Fitting a LightGBM model...")
    tic = time()
    lightgbm_model = get_lightgbm_estimator(pygbm_model)
    lightgbm_model.fit(data_train, target_train)
    toc = time()
    predicted_test = lightgbm_model.predict(data_test)
    roc_auc = roc_auc_score(target_test, predicted_test)
    acc = accuracy_score(target_test, predicted_test)
    print(f"done in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}")
Example #13
0
    rhof = p / (pc_r_d * t * (1.0 + pc_rvd_o * qv - qc - qi))
    return rhof


if __name__ == '__main__':
    shapes = [128 * 128 * 80]
    t = np.ones(shapes, dtype=np.float64)
    p = np.ones(shapes, dtype=np.float64)
    qv = np.ones(shapes, dtype=np.float64)
    qc = np.ones(shapes, dtype=np.float64)
    qi = np.ones(shapes, dtype=np.float64)
    rhof = np.ones(shapes, dtype=np.float64)

    start = time.time()
    rhof = rho(t, p, qv, qc, qi)
    #  rho.parallel_diagnostics(level=4)
    end = time.time()
    print("Elapsed (with compilation) = %s" % (end - start))

    times = np.empty(100)
    for count in range(100):
        start = time.time()
        rho(t, p, qv, qc, qi)
        end = time.time()
        times[count] = (end - start)

    print(times)
    print("Elapsed time = {0}, {1}".format(np.mean(times), np.std(times)))
    print("Threading layer chosen: %s" % threading_layer())
    print("Num threads: %s" % numba.get_num_threads())
    def _test_nested_parallelism_2(self):
        if threading_layer() == 'workqueue':
            self.skipTest("workqueue is not threadsafe")

        # check that get_num_threads is ok in nesting

        N = config.NUMBA_NUM_THREADS + 1
        M = 4 * config.NUMBA_NUM_THREADS + 1

        def get_impl(child_type, test_type):

            if child_type == 'parallel':
                child_dec = njit(parallel=True)
            elif child_type == 'njit':
                child_dec = njit(parallel=False)
            elif child_type == 'none':

                def child_dec(x):
                    return x

            @child_dec
            def child(buf, fid):
                M, N = buf.shape
                set_num_threads(fid)  # set threads in child function
                for i in prange(N):
                    buf[fid, i] = get_num_threads()

            if test_type in ['parallel', 'njit', 'none']:
                if test_type == 'parallel':
                    test_dec = njit(parallel=True)
                elif test_type == 'njit':
                    test_dec = njit(parallel=False)
                elif test_type == 'none':

                    def test_dec(x):
                        return x

                @test_dec
                def test_func(nthreads):
                    buf = np.zeros((M, N))
                    set_num_threads(nthreads)
                    for i in prange(M):
                        local_mask = 1 + i % mask
                        # when the threads exit the child functions they should
                        # have a TLS slot value of the local mask as it was set
                        # in child
                        if local_mask < config.NUMBA_NUM_THREADS:
                            child(buf, local_mask)
                            assert get_num_threads() == local_mask
                    return buf
            else:
                if test_type == 'guvectorize':
                    test_dec = guvectorize(['int64[:,:], int64[:]'],
                                           '(n, m), (k)',
                                           nopython=True,
                                           target='parallel')
                elif test_type == 'guvectorize-obj':
                    test_dec = guvectorize(['int64[:,:], int64[:]'],
                                           '(n, m), (k)',
                                           forceobj=True)

                def test_func(nthreads):
                    @test_dec
                    def _test_func(buf, local_mask):
                        set_num_threads(nthreads)
                        # when the threads exit the child functions they should
                        # have a TLS slot value of the local mask as it was set
                        # in child
                        if local_mask[0] < config.NUMBA_NUM_THREADS:
                            child(buf, local_mask[0])
                            assert get_num_threads() == local_mask[0]

                    buf = np.zeros((M, N), dtype=np.int64)
                    local_mask = (1 + np.arange(M) % mask).reshape((M, 1))
                    _test_func(buf, local_mask)
                    return buf

            return test_func

        mask = config.NUMBA_NUM_THREADS - 1

        res_arrays = {}
        for test_type in [
                'parallel', 'njit', 'none', 'guvectorize', 'guvectorize-obj'
        ]:
            for child_type in ['parallel', 'njit', 'none']:
                if child_type == 'none' and test_type != 'none':
                    continue
                set_num_threads(mask)
                res_arrays[test_type,
                           child_type] = get_impl(child_type, test_type)(mask)

        py_arr = res_arrays['none', 'none']
        for arr in res_arrays.values():
            np.testing.assert_equal(arr, py_arr)

        # check the maths reconciles
        math_arr = np.zeros((M, N))
        # there's branches on modulo mask but only NUMBA_NUM_THREADS funcs
        for i in range(1, config.NUMBA_NUM_THREADS):
            math_arr[i, :] = i

        np.testing.assert_equal(math_arr, py_arr)
Example #15
0
def set_numba_threading():
    """Set the numba threading layer.

    For parallel numba jit blocks, the backend threading layer is selected at runtime
    based on an order set inside the numba package.  We would like to change the
    order of selection to prefer one of the thread-based backends (omp or tbb).  We also
    set the maximum number of threads used by numba to be the same as the number of
    threads used by TOAST.  Since TOAST does not use numba, it means that there will
    be a consistent maximum number of threads in use at all times and no
    oversubscription.

    Args:
        None

    Returns:
        None

    """
    global numba_threading_layer
    if numba_threading_layer is not None:
        # Already set.
        return

    # Get the number of threads used by TOAST at runtime.
    env = Environment.get()
    log = Logger.get()
    toastthreads = env.max_threads()
    print("max toast threads = ", toastthreads, flush=True)

    rank = 0
    if env.use_mpi4py():
        from .mpi import MPI

        rank = MPI.COMM_WORLD.rank

    threading = "default"
    have_numba_omp = False
    try:
        # New style package layout
        from numba.np.ufunc import omppool

        have_numba_omp = True
        if rank == 0:
            log.debug("Numba has OpenMP threading support")
    except ImportError:
        try:
            # Old style
            from numba.npyufunc import omppool

            have_numba_omp = True
            if rank == 0:
                log.debug("Numba has OpenMP threading support")
        except ImportError:
            # no OpenMP support
            if rank == 0:
                log.debug("Numba does not support OpenMP")
    have_numba_tbb = False
    try:
        # New style package layout
        from numba.np.ufunc import tbbpool

        have_numba_tbb = True
        if rank == 0:
            log.debug("Numba has TBB threading support")
    except ImportError:
        try:
            # Old style
            from numba.npyufunc import tbbpool

            have_numba_tbb = True
            if rank == 0:
                log.debug("Numba has TBB threading support")
        except ImportError:
            # no TBB
            if rank == 0:
                log.debug("Numba does not support TBB")

    # Prefer OMP backend
    if have_numba_omp:
        threading = "omp"
    elif have_numba_tbb:
        threading = "tbb"

    try:
        from numba import vectorize, config, threading_layer

        # Set threading layer and number of threads.  Note that this still
        # does not always work.  The conf structure is repopulated from the
        # environment on every compilation if any of the NUMBA_* variables
        # have changed.
        config.THREADING_LAYER = threading
        config.NUMBA_DEFAULT_NUM_THREADS = toastthreads
        config.NUMBA_NUM_THREADS = toastthreads
        os.environ["NUMBA_THREADING_LAYER"] = threading
        os.environ["NUMBA_DEFAULT_NUM_THREADS"] = "{:d}".format(toastthreads)
        os.environ["NUMBA_NUM_THREADS"] = "{:d}".format(toastthreads)

        # In order to get numba to actually select a threading layer, we must
        # trigger compilation of a parallel function.
        @vectorize("float64(float64)", target="parallel")
        def force_thread_launch(x):
            return x + 1

        force_thread_launch(np.zeros(1))

        # Log the layer that was selected
        numba_threading_layer = threading_layer()
        if rank == 0:
            log.debug("Numba threading layer set to {}".format(numba_threading_layer))
            log.debug(
                "Numba max threads now forced to {}".format(config.NUMBA_NUM_THREADS)
            )
    except ImportError:
        # Numba not available at all
        if rank == 0:
            log.debug("Cannot import numba- ignoring threading layer.")
        z = T * sig_sig_two
        c = 0.25 * z
        y = 1./sqrt(z)

        w1 = (a - b + c) * y
        w2 = (a - b - c) * y

        d1 = 0.5 + 0.5 * erf(w1)
        d2 = 0.5 + 0.5 * erf(w2)

        Se = exp(b) * S

        r  = P * d1 - Se * d2
        call [i] = r
        put [i] = r - P + Se

@nb.guvectorize('(f8[::1],f8[::1],f8[::1],f8[:],f8[:],f8[::1],f8[::1])',
    '(a),(a),(a),(),()->(a),(a)', nopython=True, target="parallel")
def black_scholes_numba_vec(price, strike, t, mr, sig_sig_two, call, put):
    black_scholes_jit( price, strike, t, mr[0], sig_sig_two[0], call, put)

@nb.jit
def black_scholes(nopt, price, strike, t, rate, vol, call, put):
    sig_sig_two = vol*vol*2
    mr = -rate
    black_scholes_numba_vec(price.reshape((-1,512)), strike.reshape((-1,512)), t.reshape((-1,512)),
                                mr, sig_sig_two, call.reshape((-1,512)), put.reshape((-1,512)).reshape((-1,512)))

base_bs_erf.run("Numba@guvec-par-simd", black_scholes, pass_args=True)
print("Threading layer:", nb.threading_layer())
Example #17
0
    )
    exit()

#read input
C, dim = Q3AP_instance.read_input(sys.argv[1])
moves = generateNhood(dim)

tstart = time.time()

#call once to trigger compilation (Warmup)
sol = solution(np.arange(dim, dtype=np.int64), np.arange(dim, dtype=np.int64),
               0)
sol, nhood_evals = runILS(sol, 1)
elapsed_time = time.time() - tstart
print("Time (First LS/Compilation):\t", elapsed_time)
print("Using Threading layer: %s\n" % threading_layer())

#initial solutions (random)
sol = solution(np.random.permutation(dim), np.random.permutation(dim), 0)
sol.cost = eval(sol)

#nb iterations (ILS outer loop)
ils_iter = 100
ils_iter = int(sys.argv[2])

tstart = time.time()
sol, nhood_evals = runILS(sol, ils_iter)
elapsed_time = time.time() - tstart

print("Best Solution:\n\t", sol.perm1, sol.perm2, "\n\t Cost:\t", sol.cost)
print('nhood-eval:\t', nhood_evals)
Example #18
0
def main(nqubits,
         circuit_name,
         backend="custom",
         precision="double",
         nreps=1,
         nshots=None,
         transfer=False,
         fuse=False,
         device=None,
         accelerators=None,
         threadsafe=False,
         compile=False,
         get_branch=True,
         nlayers=None,
         gate_type=None,
         params={},
         filename=None):
    """Runs circuit simulation benchmarks for different circuits.

    See benchmark documentation for a description of arguments.
    """
    qibo.set_backend(backend)
    qibo.set_precision(precision)
    if device is not None:
        qibo.set_device(device)

    logs = BenchmarkLogger(filename)
    # Create log dict
    logs.append({
        "nqubits": nqubits,
        "circuit_name": circuit_name,
        "threading": "",
        "backend": qibo.get_backend(),
        "precision": qibo.get_precision(),
        "device": qibo.get_device(),
        "accelerators": accelerators,
        "nshots": nshots,
        "transfer": transfer,
        "fuse": fuse,
        "compile": compile,
    })
    if get_branch:
        logs[-1]["branch"] = get_active_branch_name()

    params = {k: v for k, v in params.items() if v is not None}
    kwargs = {"nqubits": nqubits, "circuit_name": circuit_name}
    if params: kwargs["params"] = params
    if nlayers is not None: kwargs["nlayers"] = nlayers
    if gate_type is not None: kwargs["gate_type"] = gate_type
    if accelerators is not None:
        kwargs["accelerators"] = accelerators
    logs[-1].update(kwargs)

    start_time = time.time()
    circuit = circuits.CircuitFactory(**kwargs)
    if nshots is not None:
        # add measurement gates
        circuit.add(qibo.gates.M(*range(nqubits)))
    if fuse:
        circuit = circuit.fuse()
    logs[-1]["creation_time"] = time.time() - start_time

    start_time = time.time()
    if compile:
        circuit.compile()
        # Try executing here so that compile time is not included
        # in the simulation time
        result = circuit(nshots=nshots)
        del (result)
    logs[-1]["compile_time"] = time.time() - start_time

    start_time = time.time()
    result = circuit(nshots=nshots)
    logs[-1]["dry_run_time"] = time.time() - start_time
    start_time = time.time()
    if transfer:
        result = result.numpy()
    logs[-1]["dry_run_transfer_time"] = time.time() - start_time
    del (result)

    simulation_times, transfer_times = [], []
    for _ in range(nreps):
        start_time = time.time()
        result = circuit(nshots=nshots)
        simulation_times.append(time.time() - start_time)
        start_time = time.time()
        if transfer:
            result = result.numpy()
        transfer_times.append(time.time() - start_time)
        logs[-1]["dtype"] = str(result.dtype)
        if nshots is None:
            del (result)

    logs[-1]["simulation_times"] = simulation_times
    logs[-1]["transfer_times"] = transfer_times
    logs[-1]["simulation_times_mean"] = np.mean(simulation_times)
    logs[-1]["simulation_times_std"] = np.std(simulation_times)
    logs[-1]["transfer_times_mean"] = np.mean(transfer_times)
    logs[-1]["transfer_times_std"] = np.std(transfer_times)

    start_time = time.time()
    if nshots is not None:
        freqs = result.frequencies()
    logs[-1]["measurement_time"] = time.time() - start_time

    if logs[-1]["backend"] == "qibojit" and qibo.K.get_platform() == "numba":
        from numba import threading_layer
        logs[-1]["threading"] = threading_layer()

    print()
    print(logs)
    print()
    logs.dump()