예제 #1
0
파일: tools.py 프로젝트: simudream/boxtree
def make_normal_particle_array(queue, nparticles, dims, dtype, seed=15):
    from pyopencl.clrandom import RanluxGenerator
    rng = RanluxGenerator(queue, seed=seed)

    return make_obj_array([
        rng.normal(queue, nparticles, dtype=dtype)
        for i in range(dims)])
예제 #2
0
def test_random(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import RanluxGenerator

    if has_double_support(context.devices[0]):
        dtypes = [np.float32, np.float64]
    else:
        dtypes = [np.float32]

    gen = RanluxGenerator(queue, 5120)

    for ary_size in [300, 301, 302, 303, 10007]:
        for dtype in dtypes:
            ran = cl_array.zeros(queue, ary_size, dtype)
            gen.fill_uniform(ran)
            assert (0 < ran.get()).all()
            assert (ran.get() < 1).all()

            gen.synchronize(queue)

            ran = cl_array.zeros(queue, ary_size, dtype)
            gen.fill_uniform(ran, a=4, b=7)
            assert (4 < ran.get()).all()
            assert (ran.get() < 7).all()

            ran = gen.normal(queue, (10007,), dtype, mu=4, sigma=3)

    dtypes = [np.int32]
    for dtype in dtypes:
        ran = gen.uniform(queue, (10000007,), dtype, a=200, b=300)
        assert (200 <= ran.get()).all()
        assert (ran.get() < 300).all()
예제 #3
0
def valueMonteCarloGPU(ctx,queue,S_init,nPaths,Exp_Time, dtMonte,Strike,Int_Rate,Vol,PTYPE, nMonteLoops=1):
    
    nextStepPathKernel = ElementwiseKernel(ctx,"float *latestStep, float *ran, float Strike, float Int_Rate, float Exp_Time, float dt, float Vol","float rval = exp((Int_Rate - 0.5f * Vol*Vol)*dt + Vol * sqrt(dt) * ran[i]); latestStep[i] *= rval;","nextStepPathKernel")
    
    excersisePriceKernel = ElementwiseKernel(ctx,"float *latestStep, float Strike, float Int_Rate, float Exp_Time","float rval = (latestStep[i]-Strike); latestStep[i] = exp(-Int_Rate*Exp_Time)  * max(rval,0.0f);","excersisePriceKernel")
    
    
    sumKernel = ReductionKernel(ctx, numpy.float32, neutral="0", reduce_expr="a+b", map_expr="x[i]", arguments="__global float *x")
    
    maxWorkItems = 1*2**9
    multiplier = 1
    
    if(nPaths > maxWorkItems):
        multiplier = math.ceil(nPaths/maxWorkItems)
        nPaths = multiplier * maxWorkItems
    else:
        maxWorkItems = nPaths
    #print(maxWorkItems, multiplier, nPaths)
    nTimeStepsMonte = math.ceil(Exp_Time/dtMonte)
    #print(nTimeStepsMonte,nMonteLoops)
    #set up random number generator
    gen = RanluxGenerator(queue, maxWorkItems, luxury=4, seed=time.time())

#the arrays
    ran = cl.array.zeros(queue, maxWorkItems, numpy.float32)
    latestStep = cl.array.zeros_like(ran)
    
    means = numpy.zeros(nMonteLoops)
    theMean = 0
    
    #the loop
    for loop in range(nMonteLoops):
        
        theSum = 0
        
        for mult in range(multiplier):
            
            latestStep.fill(S_init)
            
            
            
            for t in range(nTimeStepsMonte):
                gen.fill_normal(ran)
                gen.synchronize(queue)
                nextStepPathKernel(latestStep, ran, Strike, Int_Rate, Exp_Time, dtMonte, Vol)
            
            
            excersisePriceKernel(latestStep, Strike, Int_Rate, Exp_Time)
            #print(latestStep)
            
            #add to array
            
            theSum += sumKernel(latestStep, queue).get()
        means[loop] = theSum / nPaths
    
    monteAverage = numpy.mean(means)
    monteStdDeviation = numpy.std(means)
    
    return monteAverage,dtMonte, monteStdDeviation
예제 #4
0
def test_random_int_in_range(ctx_factory, dtype):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    from pyopencl.clrandom import RanluxGenerator
    gen = RanluxGenerator(queue, 5120)

    if (dtype == np.int64
            and context.devices[0].platform.vendor.startswith("Advanced Micro")):
        pytest.xfail("AMD miscompiles 64-bit RNG math")

    ran = gen.uniform(queue, (10000007,), dtype, a=200, b=300)
    assert (200 <= ran.get()).all()
    assert (ran.get() < 300).all()
예제 #5
0
def test_sort(ctx_factory, scan_kernel):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dtype = np.int32

    from pyopencl.algorithm import RadixSort
    sort = RadixSort(context,
                     "int *ary",
                     key_expr="ary[i]",
                     sort_arg_names=["ary"],
                     scan_kernel=scan_kernel)

    from pyopencl.clrandom import RanluxGenerator
    rng = RanluxGenerator(queue, seed=15)

    from time import time

    # intermediate arrays for largest size cause out-of-memory on low-end GPUs
    for n in scan_test_counts[:-1]:
        if n >= 2000 and isinstance(scan_kernel, GenericDebugScanKernel):
            continue

        print(n)

        print("  rng")
        a_dev = rng.uniform(queue, (n, ), dtype=dtype, a=0, b=2**16)
        a = a_dev.get()

        dev_start = time()
        print("  device")
        (a_dev_sorted, ), evt = sort(a_dev, key_bits=16)
        queue.finish()
        dev_end = time()
        print("  numpy")
        a_sorted = np.sort(a)
        numpy_end = time()

        numpy_elapsed = numpy_end - dev_end
        dev_elapsed = dev_end - dev_start
        print("  dev: %.2f MKeys/s numpy: %.2f MKeys/s ratio: %.2fx" %
              (1e-6 * n / dev_elapsed, 1e-6 * n / numpy_elapsed,
               numpy_elapsed / dev_elapsed))
        assert (a_dev_sorted.get() == a_sorted).all()
예제 #6
0
def test_sort(ctx_factory):
    from pytest import importorskip

    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dtype = np.int32

    from pyopencl.algorithm import RadixSort

    sort = RadixSort(context, "int *ary", key_expr="ary[i]", sort_arg_names=["ary"])

    from pyopencl.clrandom import RanluxGenerator

    rng = RanluxGenerator(queue, seed=15)

    from time import time

    # intermediate arrays for largest size cause out-of-memory on low-end GPUs
    for n in scan_test_counts[:-1]:
        print(n)

        print("  rng")
        a_dev = rng.uniform(queue, (n,), dtype=dtype, a=0, b=2 ** 16)
        a = a_dev.get()

        dev_start = time()
        print("  device")
        (a_dev_sorted,), evt = sort(a_dev, key_bits=16)
        queue.finish()
        dev_end = time()
        print("  numpy")
        a_sorted = np.sort(a)
        numpy_end = time()

        numpy_elapsed = numpy_end - dev_end
        dev_elapsed = dev_end - dev_start
        print(
            "  dev: %.2f MKeys/s numpy: %.2f MKeys/s ratio: %.2fx"
            % (1e-6 * n / dev_elapsed, 1e-6 * n / numpy_elapsed, numpy_elapsed / dev_elapsed)
        )
        assert (a_dev_sorted.get() == a_sorted).all()
예제 #7
0
def test_sort(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dtype = np.int32

    from pyopencl.algorithm import RadixSort
    sort = RadixSort(context, "int *ary", key_expr="ary[i]",
            sort_arg_names=["ary"])

    from pyopencl.clrandom import RanluxGenerator
    rng = RanluxGenerator(queue, seed=15)

    from time import time

    for n in scan_test_counts:
        print(n)

        print("  rng")
        a_dev = rng.uniform(queue, (n,), dtype=dtype, a=0, b=2**16)
        a = a_dev.get()

        dev_start = time()
        print("  device")
        a_dev_sorted, = sort(a_dev, key_bits=16)
        queue.finish()
        dev_end = time()
        print("  numpy")
        a_sorted = np.sort(a)
        numpy_end = time()

        numpy_elapsed = numpy_end-dev_end
        dev_elapsed = dev_end-dev_start
        print ("  dev: %.2f MKeys/s numpy: %.2f MKeys/s ratio: %.2fx" % (
                1e-6*n/dev_elapsed, 1e-6*n/numpy_elapsed, numpy_elapsed/dev_elapsed))
        assert (a_dev_sorted.get() == a_sorted).all()
예제 #8
0
def make_normal_particle_array(queue, nparticles, dims, dtype, seed=15):
    from pyopencl.clrandom import RanluxGenerator
    rng = RanluxGenerator(queue, seed=seed)

    return make_obj_array(
        [rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)])
예제 #9
0
# STARTEXAMPLE
import pyopencl as cl
import numpy as np
from six.moves import range

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

dims = 2
nparticles = 10**4

# -----------------------------------------------------------------------------
# generate some random particle positions
# -----------------------------------------------------------------------------
from pyopencl.clrandom import RanluxGenerator
rng = RanluxGenerator(queue, seed=15)

from pytools.obj_array import make_obj_array
particles = make_obj_array(
    [rng.normal(queue, nparticles, dtype=np.float64) for i in range(dims)])

# -----------------------------------------------------------------------------
# build tree and traversals (lists)
# -----------------------------------------------------------------------------
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tree, _ = tb(queue, particles, max_particles_in_box=30)

from boxtree.traversal import FMMTraversalBuilder
tg = FMMTraversalBuilder(ctx)
trav, _ = tg(queue, tree)
예제 #10
0
def test_pyfmmlib_fmm(ctx_getter):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip

    importorskip("pyfmmlib")

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 3000
    ntargets = 1000
    dims = 2
    dtype = np.float64

    helmholtz_k = 2

    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = p_normal(queue, ntargets, dims, dtype, seed=18) + np.array([2, 0])

    sources_host = particle_array_to_host(sources)
    targets_host = particle_array_to_host(targets)

    from boxtree import TreeBuilder

    tb = TreeBuilder(ctx)

    tree, _ = tb(queue, sources, targets=targets, max_particles_in_box=30, debug=True)

    from boxtree.traversal import FMMTraversalBuilder

    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)

    from pyopencl.clrandom import RanluxGenerator

    rng = RanluxGenerator(queue, seed=20)

    weights = rng.uniform(queue, nsources, dtype=np.float64).get()
    # weights = np.ones(nsources)

    logger.info("computing direct (reference) result")

    from pyfmmlib import hpotgrad2dall_vec

    ref_pot, _, _ = hpotgrad2dall_vec(
        ifgrad=False, ifhess=False, sources=sources_host.T, charge=weights, targets=targets_host.T, zk=helmholtz_k
    )

    from boxtree.pyfmmlib_integration import Helmholtz2DExpansionWrangler

    wrangler = Helmholtz2DExpansionWrangler(trav.tree, helmholtz_k, nterms=10)

    from boxtree.fmm import drive_fmm

    pot = drive_fmm(trav, wrangler, weights)

    rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
    logger.info("relative l2 error: %g" % rel_err)
    assert rel_err < 1e-5
예제 #11
0
def test_fmm_completeness(
    ctx_getter, dims, nsources_req, ntargets_req, who_has_extent, source_gen, target_gen, filter_kind
):
    """Tests whether the built FMM traversal structures and driver completely
    capture all interactions.
    """

    sources_have_extent = "s" in who_has_extent
    targets_have_extent = "t" in who_has_extent

    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    dtype = np.float64

    try:
        sources = source_gen(queue, nsources_req, dims, dtype, seed=15)
        nsources = len(sources[0])

        if ntargets_req is None:
            # This says "same as sources" to the tree builder.
            targets = None
            ntargets = ntargets_req
        else:
            targets = target_gen(queue, ntargets_req, dims, dtype, seed=16)
            ntargets = len(targets[0])
    except ImportError:
        pytest.skip("loo.py not available, but needed for particle array " "generation")

    from pyopencl.clrandom import RanluxGenerator

    rng = RanluxGenerator(queue, seed=13)
    if sources_have_extent:
        source_radii = 2 ** rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0)
    else:
        source_radii = None

    if targets_have_extent:
        target_radii = 2 ** rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0)
    else:
        target_radii = None

    from boxtree import TreeBuilder

    tb = TreeBuilder(ctx)

    tree, _ = tb(
        queue,
        sources,
        targets=targets,
        max_particles_in_box=30,
        source_radii=source_radii,
        target_radii=target_radii,
        debug=True,
    )
    if 0:
        tree.get().plot()
        import matplotlib.pyplot as pt

        pt.show()

    from boxtree.traversal import FMMTraversalBuilder

    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)
    if trav.sep_close_smaller_starts is not None:
        trav = trav.merge_close_lists(queue)

    weights = np.random.randn(nsources)
    # weights = np.ones(nsources)
    weights_sum = np.sum(weights)

    host_trav = trav.get(queue=queue)
    host_tree = host_trav.tree

    if filter_kind:
        flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2).astype(np.int8)
        if filter_kind == "user":
            from boxtree.tree import filter_target_lists_in_user_order

            filtered_targets = filter_target_lists_in_user_order(queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder(
                host_tree, filtered_targets.get(queue=queue)
            )
        elif filter_kind == "tree":
            from boxtree.tree import filter_target_lists_in_tree_order

            filtered_targets = filter_target_lists_in_tree_order(queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder(
                host_tree, filtered_targets.get(queue=queue)
            )
        else:
            raise ValueError("unsupported value of 'filter_kind'")
    else:
        wrangler = ConstantOneExpansionWrangler(host_tree)

    if ntargets is None and not filter_kind:
        # This check only works for targets == sources.
        assert (wrangler.reorder_potentials(wrangler.reorder_sources(weights)) == weights).all()

    from boxtree.fmm import drive_fmm

    pot = drive_fmm(host_trav, wrangler, weights)

    # {{{ build, evaluate matrix (and identify missing interactions)

    if 0:
        mat = np.zeros((ntargets, nsources), dtype)
        from pytools import ProgressBar

        logging.getLogger().setLevel(logging.WARNING)

        pb = ProgressBar("matrix", nsources)
        for i in range(nsources):
            unit_vec = np.zeros(nsources, dtype=dtype)
            unit_vec[i] = 1
            mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec)
            pb.progress()
        pb.finished()

        logging.getLogger().setLevel(logging.INFO)

        import matplotlib.pyplot as pt

        if 1:
            pt.spy(mat)
            pt.show()

        missing_tgts, missing_srcs = np.where(mat == 0)

        if 1 and len(missing_tgts):
            from boxtree.visualization import TreePlotter

            plotter = TreePlotter(host_tree)
            plotter.draw_tree(fill=False, edgecolor="black")
            plotter.draw_box_numbers()
            plotter.set_bounding_box()

            tree_order_missing_tgts = host_tree.indices_to_tree_target_order(missing_tgts)
            tree_order_missing_srcs = host_tree.indices_to_tree_source_order(missing_srcs)

            src_boxes = [host_tree.find_box_nr_for_source(i) for i in tree_order_missing_srcs]
            tgt_boxes = [host_tree.find_box_nr_for_target(i) for i in tree_order_missing_tgts]
            print(src_boxes)
            print(tgt_boxes)

            pt.plot(host_tree.targets[0][tree_order_missing_tgts], host_tree.targets[1][tree_order_missing_tgts], "rv")
            pt.plot(host_tree.sources[0][tree_order_missing_srcs], host_tree.sources[1][tree_order_missing_srcs], "go")
            pt.gca().set_aspect("equal")

            pt.show()

    # }}}

    if filter_kind:
        pot = pot[flags.get() > 0]

    rel_err = la.norm((pot - weights_sum) / nsources)
    good = rel_err < 1e-8
    if 0 and not good:
        import matplotlib.pyplot as pt

        pt.plot(pot - weights_sum)
        pt.show()

    if 0 and not good:
        import matplotlib.pyplot as pt

        filt_targets = [host_tree.targets[0][flags.get() > 0], host_tree.targets[1][flags.get() > 0]]
        host_tree.plot()
        bad = np.abs(pot - weights_sum) >= 1e-3
        bad_targets = [filt_targets[0][bad], filt_targets[1][bad]]
        print(bad_targets[0].shape)
        pt.plot(filt_targets[0], filt_targets[1], "x")
        pt.plot(bad_targets[0], bad_targets[1], "v")
        pt.show()

    assert good
예제 #12
0
 def setup_rng(self):
     self.rng = RanluxGenerator(self.cl_queue)
예제 #13
0
파일: demo.py 프로젝트: inducer/boxtree
# STARTEXAMPLE
import pyopencl as cl
import numpy as np
from six.moves import range

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

dims = 2
nparticles = 500

# -----------------------------------------------------------------------------
# generate some random particle positions
# -----------------------------------------------------------------------------
from pyopencl.clrandom import RanluxGenerator
rng = RanluxGenerator(queue, seed=15)

from pytools.obj_array import make_obj_array
particles = make_obj_array([
    rng.normal(queue, nparticles, dtype=np.float64)
    for i in range(dims)])

# -----------------------------------------------------------------------------
# build tree and traversals (lists)
# -----------------------------------------------------------------------------
from boxtree import TreeBuilder
tb = TreeBuilder(ctx)
tree, _ = tb(queue, particles, max_particles_in_box=5)

from boxtree.traversal import FMMTraversalBuilder
tg = FMMTraversalBuilder(ctx)
예제 #14
0
            if ((local_index & mask) == 0) {
                float other = scratch[local_index + offset];
                float mine = scratch[local_index];
                scratch[local_index] = (mine < other) ? mine : other;
            }
            
            barrier(CLK_LOCAL_MEM_FENCE);
        }
        
        if (local_index == 0) {
            result[get_group_id(0)] = scratch[0];
        }
    }
"""

gen = RanluxGenerator(queue, nPaths, luxury=4, seed=time.time())
#gen = RanluxGenerator(queue, nPaths, luxury=4)

ran = cl.array.zeros(queue, nPaths, numpy.float32)
latestStep = cl.array.empty_like(ran)

averages = numpy.zeros(nLoops)
#averages = cl.array.zeros(queue, nLoops, numpy.float32)

tStartMonte = time.time()
theSum = 0

for loop in range(0, nLoops):

    latestStep.fill(S_init)
예제 #15
0
def test_pyfmmlib_fmm(ctx_getter):
    logging.basicConfig(level=logging.INFO)

    from pytest import importorskip
    importorskip("pyfmmlib")

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 3000
    ntargets = 1000
    dims = 2
    dtype = np.float64

    helmholtz_k = 2

    sources = p_normal(queue, nsources, dims, dtype, seed=15)
    targets = (p_normal(queue, ntargets, dims, dtype, seed=18) +
               np.array([2, 0]))

    sources_host = particle_array_to_host(sources)
    targets_host = particle_array_to_host(targets)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 debug=True)

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)

    trav = trav.get(queue=queue)

    from pyopencl.clrandom import RanluxGenerator
    rng = RanluxGenerator(queue, seed=20)

    weights = rng.uniform(queue, nsources, dtype=np.float64).get()
    #weights = np.ones(nsources)

    logger.info("computing direct (reference) result")

    from pyfmmlib import hpotgrad2dall_vec
    ref_pot, _, _ = hpotgrad2dall_vec(ifgrad=False,
                                      ifhess=False,
                                      sources=sources_host.T,
                                      charge=weights,
                                      targets=targets_host.T,
                                      zk=helmholtz_k)

    from boxtree.pyfmmlib_integration import Helmholtz2DExpansionWrangler
    wrangler = Helmholtz2DExpansionWrangler(trav.tree, helmholtz_k, nterms=10)

    from boxtree.fmm import drive_fmm
    pot = drive_fmm(trav, wrangler, weights)

    rel_err = la.norm(pot - ref_pot) / la.norm(ref_pot)
    logger.info("relative l2 error: %g" % rel_err)
    assert rel_err < 1e-5
예제 #16
0
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req,
                          who_has_extent, source_gen, target_gen, filter_kind):
    """Tests whether the built FMM traversal structures and driver completely
    capture all interactions.
    """

    sources_have_extent = "s" in who_has_extent
    targets_have_extent = "t" in who_has_extent

    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    dtype = np.float64

    try:
        sources = source_gen(queue, nsources_req, dims, dtype, seed=15)
        nsources = len(sources[0])

        if ntargets_req is None:
            # This says "same as sources" to the tree builder.
            targets = None
            ntargets = ntargets_req
        else:
            targets = target_gen(queue, ntargets_req, dims, dtype, seed=16)
            ntargets = len(targets[0])
    except ImportError:
        pytest.skip("loo.py not available, but needed for particle array "
                    "generation")

    from pyopencl.clrandom import RanluxGenerator
    rng = RanluxGenerator(queue, seed=13)
    if sources_have_extent:
        source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0)
    else:
        source_radii = None

    if targets_have_extent:
        target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0)
    else:
        target_radii = None

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 source_radii=source_radii,
                 target_radii=target_radii,
                 debug=True)
    if 0:
        tree.get().plot()
        import matplotlib.pyplot as pt
        pt.show()

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx)
    trav, _ = tbuild(queue, tree, debug=True)
    if trav.sep_close_smaller_starts is not None:
        trav = trav.merge_close_lists(queue)

    weights = np.random.randn(nsources)
    #weights = np.ones(nsources)
    weights_sum = np.sum(weights)

    host_trav = trav.get(queue=queue)
    host_tree = host_trav.tree

    if filter_kind:
        flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \
                .astype(np.int8)
        if filter_kind == "user":
            from boxtree.tree import filter_target_lists_in_user_order
            filtered_targets = filter_target_lists_in_user_order(
                queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder(
                host_tree, filtered_targets.get(queue=queue))
        elif filter_kind == "tree":
            from boxtree.tree import filter_target_lists_in_tree_order
            filtered_targets = filter_target_lists_in_tree_order(
                queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder(
                host_tree, filtered_targets.get(queue=queue))
        else:
            raise ValueError("unsupported value of 'filter_kind'")
    else:
        wrangler = ConstantOneExpansionWrangler(host_tree)

    if ntargets is None and not filter_kind:
        # This check only works for targets == sources.
        assert (wrangler.reorder_potentials(
            wrangler.reorder_sources(weights)) == weights).all()

    from boxtree.fmm import drive_fmm
    pot = drive_fmm(host_trav, wrangler, weights)

    # {{{ build, evaluate matrix (and identify missing interactions)

    if 0:
        mat = np.zeros((ntargets, nsources), dtype)
        from pytools import ProgressBar

        logging.getLogger().setLevel(logging.WARNING)

        pb = ProgressBar("matrix", nsources)
        for i in range(nsources):
            unit_vec = np.zeros(nsources, dtype=dtype)
            unit_vec[i] = 1
            mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec)
            pb.progress()
        pb.finished()

        logging.getLogger().setLevel(logging.INFO)

        import matplotlib.pyplot as pt

        if 1:
            pt.spy(mat)
            pt.show()

        missing_tgts, missing_srcs = np.where(mat == 0)

        if 1 and len(missing_tgts):
            from boxtree.visualization import TreePlotter
            plotter = TreePlotter(host_tree)
            plotter.draw_tree(fill=False, edgecolor="black")
            plotter.draw_box_numbers()
            plotter.set_bounding_box()

            tree_order_missing_tgts = \
                    host_tree.indices_to_tree_target_order(missing_tgts)
            tree_order_missing_srcs = \
                    host_tree.indices_to_tree_source_order(missing_srcs)

            src_boxes = [
                host_tree.find_box_nr_for_source(i)
                for i in tree_order_missing_srcs
            ]
            tgt_boxes = [
                host_tree.find_box_nr_for_target(i)
                for i in tree_order_missing_tgts
            ]
            print(src_boxes)
            print(tgt_boxes)

            pt.plot(host_tree.targets[0][tree_order_missing_tgts],
                    host_tree.targets[1][tree_order_missing_tgts], "rv")
            pt.plot(host_tree.sources[0][tree_order_missing_srcs],
                    host_tree.sources[1][tree_order_missing_srcs], "go")
            pt.gca().set_aspect("equal")

            pt.show()

    # }}}

    if filter_kind:
        pot = pot[flags.get() > 0]

    rel_err = la.norm((pot - weights_sum) / nsources)
    good = rel_err < 1e-8
    if 0 and not good:
        import matplotlib.pyplot as pt
        pt.plot(pot - weights_sum)
        pt.show()

    if 0 and not good:
        import matplotlib.pyplot as pt
        filt_targets = [
            host_tree.targets[0][flags.get() > 0],
            host_tree.targets[1][flags.get() > 0],
        ]
        host_tree.plot()
        bad = np.abs(pot - weights_sum) >= 1e-3
        bad_targets = [
            filt_targets[0][bad],
            filt_targets[1][bad],
        ]
        print(bad_targets[0].shape)
        pt.plot(filt_targets[0], filt_targets[1], "x")
        pt.plot(bad_targets[0], bad_targets[1], "v")
        pt.show()

    assert good
예제 #17
0
def plot_traversal(ctx_getter, do_plot=False):
    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    #for dims in [2, 3]:
    for dims in [2]:
        nparticles = 10**4
        dtype = np.float64

        from pyopencl.clrandom import RanluxGenerator
        rng = RanluxGenerator(queue, seed=15)

        from pytools.obj_array import make_obj_array
        particles = make_obj_array([
            rng.normal(queue, nparticles, dtype=dtype)
            for i in range(dims)])

        #if do_plot:
            #pt.plot(particles[0].get(), particles[1].get(), "x")

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)

        queue.finish()
        tree = tb(queue, particles, max_particles_in_box=30, debug=True)

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx)
        trav = tg(queue, tree).get()

        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(tree)
        plotter.draw_tree(fill=False, edgecolor="black")
        #plotter.draw_box_numbers()
        plotter.set_bounding_box()

        from random import randrange, seed
        seed(7)

        # {{{ generic box drawing helper

        def draw_some_box_lists(starts, lists, key_to_box=None,
                count=5):
            actual_count = 0
            while actual_count < count:
                if key_to_box is not None:
                    key = randrange(len(key_to_box))
                    ibox = key_to_box[key]
                else:
                    key = ibox = randrange(tree.nboxes)

                start, end = starts[key:key+2]
                if start == end:
                    continue

                #print ibox, start, end, lists[start:end]
                for jbox in lists[start:end]:
                    plotter.draw_box(jbox, facecolor='yellow')

                plotter.draw_box(ibox, facecolor='red')

                actual_count += 1

        # }}}

        if 0:
            # colleagues
            draw_some_box_lists(
                    trav.colleagues_starts,
                    trav.colleagues_lists)
        elif 0:
            # near neighbors ("list 1")
            draw_some_box_lists(
                    trav.neighbor_leaves_starts,
                    trav.neighbor_leaves_lists,
                    key_to_box=trav.source_boxes)
        elif 0:
            # well-separated siblings (list 2)
            draw_some_box_lists(
                    trav.sep_siblings_starts,
                    trav.sep_siblings_lists)
        elif 1:
            # separated smaller (list 3)
            draw_some_box_lists(
                    trav.sep_smaller_starts,
                    trav.sep_smaller_lists,
                    key_to_box=trav.source_boxes)
        elif 1:
            # separated bigger (list 4)
            draw_some_box_lists(
                    trav.sep_bigger_starts,
                    trav.sep_bigger_lists)

        import matplotlib.pyplot as pt
        pt.show()
예제 #18
0
def test_extent_tree(ctx_getter, dims, do_plot=False):
    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    nsources = 100000
    ntargets = 200000
    dtype = np.float64
    npoint_sources_per_source = 16

    sources = make_normal_particle_array(queue, nsources, dims, dtype,
            seed=12)
    targets = make_normal_particle_array(queue, ntargets, dims, dtype,
            seed=19)

    from pyopencl.clrandom import RanluxGenerator
    rng = RanluxGenerator(queue, seed=13)
    source_radii = 2**rng.uniform(queue, nsources, dtype=dtype,
            a=-10, b=0)
    target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype,
            a=-10, b=0)

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    queue.finish()
    dev_tree, _ = tb(queue, sources, targets=targets,
            source_radii=source_radii, target_radii=target_radii,
            max_particles_in_box=10, debug=True)

    logger.info("transfer tree, check orderings")

    tree = dev_tree.get(queue=queue)

    sorted_sources = np.array(list(tree.sources))
    sorted_targets = np.array(list(tree.targets))
    sorted_source_radii = tree.source_radii
    sorted_target_radii = tree.target_radii

    unsorted_sources = np.array([pi.get() for pi in sources])
    unsorted_targets = np.array([pi.get() for pi in targets])
    unsorted_source_radii = source_radii.get()
    unsorted_target_radii = target_radii.get()
    assert (sorted_sources
            == unsorted_sources[:, tree.user_source_ids]).all()
    assert (sorted_source_radii
            == unsorted_source_radii[tree.user_source_ids]).all()

    # {{{ test box structure, stick-out criterion

    logger.info("test box structure, stick-out criterion")

    user_target_ids = np.empty(tree.ntargets, dtype=np.intp)
    user_target_ids[tree.sorted_target_ids] = np.arange(tree.ntargets, dtype=np.intp)
    if ntargets:
        assert (sorted_targets
                == unsorted_targets[:, user_target_ids]).all()
        assert (sorted_target_radii
                == unsorted_target_radii[user_target_ids]).all()

    all_good_so_far = True

    # {{{ check sources, targets

    for ibox in range(tree.nboxes):
        extent_low, extent_high = tree.get_box_extent(ibox)

        box_radius = np.max(extent_high-extent_low) * 0.5
        stick_out_dist = tree.stick_out_factor * box_radius

        assert (extent_low >=
                tree.bounding_box[0] - 1e-12*tree.root_extent).all(), ibox
        assert (extent_high <=
                tree.bounding_box[1] + 1e-12*tree.root_extent).all(), ibox

        box_children = tree.box_child_ids[:, ibox]
        existing_children = box_children[box_children != 0]

        assert (tree.box_source_counts_nonchild[ibox]
                + np.sum(tree.box_source_counts_cumul[existing_children])
                == tree.box_source_counts_cumul[ibox])
        assert (tree.box_target_counts_nonchild[ibox]
                + np.sum(tree.box_target_counts_cumul[existing_children])
                == tree.box_target_counts_cumul[ibox])

        for what, starts, counts, points, radii in [
                ("source", tree.box_source_starts, tree.box_source_counts_cumul,
                    sorted_sources, sorted_source_radii),
                ("target", tree.box_target_starts, tree.box_target_counts_cumul,
                    sorted_targets, sorted_target_radii),
                ]:
            bstart = starts[ibox]
            bslice = slice(bstart, bstart+counts[ibox])
            check_particles = points[:, bslice]
            check_radii = radii[bslice]

            good = (
                    (check_particles + check_radii
                        < extent_high[:, np.newaxis] + stick_out_dist)
                    &
                    (extent_low[:, np.newaxis] - stick_out_dist
                        <= check_particles - check_radii)
                    ).all(axis=0)

            all_good_here = good.all()

            if not all_good_here:
                print("BAD BOX %s %d level %d" % (what, ibox, tree.box_levels[ibox]))

            all_good_so_far = all_good_so_far and all_good_here
            assert all_good_here

    # }}}

    assert all_good_so_far

    # }}}

    # {{{ create, link point sources

    logger.info("creating point sources")

    np.random.seed(20)

    from pytools.obj_array import make_obj_array
    point_sources = make_obj_array([
            cl.array.to_device(queue,
                unsorted_sources[i][:, np.newaxis]
                + unsorted_source_radii[:, np.newaxis]
                * np.random.uniform(
                    -1, 1, size=(nsources, npoint_sources_per_source))
                 )
            for i in range(dims)])

    point_source_starts = cl.array.arange(queue,
            0, (nsources+1)*npoint_sources_per_source, npoint_sources_per_source,
            dtype=tree.particle_id_dtype)

    from boxtree.tree import link_point_sources
    dev_tree = link_point_sources(queue, dev_tree,
            point_source_starts, point_sources,
            debug=True)
예제 #19
0
def valueMonteCarloGPU(ctx,
                       queue,
                       S_init,
                       nPaths,
                       Exp_Time,
                       dtMonte,
                       Strike,
                       Int_Rate,
                       Vol,
                       PTYPE,
                       nMonteLoops=1):

    nextStepPathKernel = ElementwiseKernel(
        ctx,
        "float *latestStep, float *ran, float Strike, float Int_Rate, float Exp_Time, float dt, float Vol",
        "float rval = exp((Int_Rate - 0.5f * Vol*Vol)*dt + Vol * sqrt(dt) * ran[i]); latestStep[i] *= rval;",
        "nextStepPathKernel")

    excersisePriceKernel = ElementwiseKernel(
        ctx, "float *latestStep, float Strike, float Int_Rate, float Exp_Time",
        "float rval = (latestStep[i]-Strike); latestStep[i] = exp(-Int_Rate*Exp_Time)  * max(rval,0.0f);",
        "excersisePriceKernel")

    sumKernel = ReductionKernel(ctx,
                                numpy.float32,
                                neutral="0",
                                reduce_expr="a+b",
                                map_expr="x[i]",
                                arguments="__global float *x")

    maxWorkItems = 1 * 2**9
    multiplier = 1

    if (nPaths > maxWorkItems):
        multiplier = math.ceil(nPaths / maxWorkItems)
        nPaths = multiplier * maxWorkItems
    else:
        maxWorkItems = nPaths
    #print(maxWorkItems, multiplier, nPaths)
    nTimeStepsMonte = math.ceil(Exp_Time / dtMonte)
    #print(nTimeStepsMonte,nMonteLoops)
    #set up random number generator
    gen = RanluxGenerator(queue, maxWorkItems, luxury=4, seed=time.time())

    #the arrays
    ran = cl.array.zeros(queue, maxWorkItems, numpy.float32)
    latestStep = cl.array.zeros_like(ran)

    means = numpy.zeros(nMonteLoops)
    theMean = 0

    #the loop
    for loop in range(nMonteLoops):

        theSum = 0

        for mult in range(multiplier):

            latestStep.fill(S_init)

            for t in range(nTimeStepsMonte):
                gen.fill_normal(ran)
                gen.synchronize(queue)
                nextStepPathKernel(latestStep, ran, Strike, Int_Rate, Exp_Time,
                                   dtMonte, Vol)

            excersisePriceKernel(latestStep, Strike, Int_Rate, Exp_Time)
            #print(latestStep)

            #add to array

            theSum += sumKernel(latestStep, queue).get()
        means[loop] = theSum / nPaths

    monteAverage = numpy.mean(means)
    monteStdDeviation = numpy.std(means)

    return monteAverage, dtMonte, monteStdDeviation
예제 #20
0
def plot_traversal(ctx_getter, do_plot=False):
    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    #for dims in [2, 3]:
    for dims in [2]:
        nparticles = 10**4
        dtype = np.float64

        from pyopencl.clrandom import RanluxGenerator
        rng = RanluxGenerator(queue, seed=15)

        from pytools.obj_array import make_obj_array
        particles = make_obj_array(
            [rng.normal(queue, nparticles, dtype=dtype) for i in range(dims)])

        #if do_plot:
        #pt.plot(particles[0].get(), particles[1].get(), "x")

        from boxtree import TreeBuilder
        tb = TreeBuilder(ctx)

        queue.finish()
        tree = tb(queue, particles, max_particles_in_box=30, debug=True)

        from boxtree.traversal import FMMTraversalBuilder
        tg = FMMTraversalBuilder(ctx)
        trav = tg(queue, tree).get()

        from boxtree.visualization import TreePlotter
        plotter = TreePlotter(tree)
        plotter.draw_tree(fill=False, edgecolor="black")
        #plotter.draw_box_numbers()
        plotter.set_bounding_box()

        from random import randrange, seed
        seed(7)

        # {{{ generic box drawing helper

        def draw_some_box_lists(starts, lists, key_to_box=None, count=5):
            actual_count = 0
            while actual_count < count:
                if key_to_box is not None:
                    key = randrange(len(key_to_box))
                    ibox = key_to_box[key]
                else:
                    key = ibox = randrange(tree.nboxes)

                start, end = starts[key:key + 2]
                if start == end:
                    continue

                #print ibox, start, end, lists[start:end]
                for jbox in lists[start:end]:
                    plotter.draw_box(jbox, facecolor='yellow')

                plotter.draw_box(ibox, facecolor='red')

                actual_count += 1

        # }}}

        if 0:
            # colleagues
            draw_some_box_lists(trav.colleagues_starts, trav.colleagues_lists)
        elif 0:
            # near neighbors ("list 1")
            draw_some_box_lists(trav.neighbor_leaves_starts,
                                trav.neighbor_leaves_lists,
                                key_to_box=trav.source_boxes)
        elif 0:
            # well-separated siblings (list 2)
            draw_some_box_lists(trav.sep_siblings_starts,
                                trav.sep_siblings_lists)
        elif 1:
            # separated smaller (list 3)
            draw_some_box_lists(trav.sep_smaller_starts,
                                trav.sep_smaller_lists,
                                key_to_box=trav.source_boxes)
        elif 1:
            # separated bigger (list 4)
            draw_some_box_lists(trav.sep_bigger_starts, trav.sep_bigger_lists)

        import matplotlib.pyplot as pt
        pt.show()
예제 #21
0
            if ((local_index & mask) == 0) {
                float other = scratch[local_index + offset];
                float mine = scratch[local_index];
                scratch[local_index] = (mine < other) ? mine : other;
            }
            
            barrier(CLK_LOCAL_MEM_FENCE);
        }
        
        if (local_index == 0) {
            result[get_group_id(0)] = scratch[0];
        }
    }
"""

gen = RanluxGenerator(queue, nPaths, luxury=4, seed=time.time())
#gen = RanluxGenerator(queue, nPaths, luxury=4)

ran = cl.array.zeros(queue, nPaths, numpy.float32)
latestStep = cl.array.empty_like(ran)

averages = numpy.zeros(nLoops)
#averages = cl.array.zeros(queue, nLoops, numpy.float32)

tStartMonte = time.time()
theSum = 0

for loop in range(0,nLoops):
    
    latestStep.fill(S_init)
    
예제 #22
0
class GeneticAlgorithmOpenCL(GeneticAlgorithm):
    class Population:
        # Crossover modes
        CM_SEPARATE = 0 # Separate probabilities for translation/rotation genes
        CM_COMBINE = 1  # Combined probability for translation/rotation genes

        def __init__(self, size = 0, dna_size = 0, \
                     cl_ctx = None, cl_queue = None, rng = None, cl_prg = None):
            self.size = size
            self.dna_size = dna_size

            # OpenCL
            self.cl_ctx = cl_ctx
            self.cl_queue = cl_queue
            self.rng = rng
            self.cl_prg = cl_prg
            # Matrix of i by j for individuals and genes (DNA) respectively
            self.individuals_np = None
            self.individuals_buf = None
            self.new_individuals_np = None
            self.new_individuals_buf = None
            self.crossover_translation_mode_np = np.array([], dtype = int)
            self.crossover_translation_mode_buf = None
            self.crossover_rotation_mode_np = np.array([], dtype = int)
            self.crossover_rotation_mode_buf = None
            self.crossover_probability_np = np.array([], dtype = float)
            self.crossover_probability_buf = None
            self.mutation_probability_np = np.array([], dtype = float)
            self.mutation_probability_buf = None

        def setup_opencl(self):
            mf = cl.mem_flags
            # Setup device buffers
            self.individuals_buf = cl.array.zeros(self.cl_queue, \
                                                  (self.size, self.dna_size), \
                                                  dtype = float)
            self.new_individuals_buf = cl.array.zeros(self.cl_queue, \
                                                      (self.size, self.dna_size), \
                                                      dtype = float)
            self.crossover_translation_mode_np = np.array([self.crossover_translation_mode], \
                                                          dtype = int)
            self.crossover_translation_mode_buf =  cl.Buffer(self.cl_ctx, \
                                                             mf.READ_ONLY | mf.COPY_HOST_PTR, \
                                                             hostbuf = self.crossover_translation_mode_np)
            self.crossover_rotation_mode_np = np.array([self.crossover_rotation_mode], \
                                                       dtype = int)
            self.crossover_rotation_mode_buf =  cl.Buffer(self.cl_ctx, \
                                                          mf.READ_ONLY | mf.COPY_HOST_PTR, \
                                                          hostbuf = self.crossover_rotation_mode_np)
            self.crossover_probability_np = np.array([self.crossover_probability], \
                                                     dtype = float)
            self.crossover_probability_buf =  cl.Buffer(self.cl_ctx, \
                                                        mf.READ_ONLY | mf.COPY_HOST_PTR, \
                                                        hostbuf = self.crossover_probability_np)
            self.mutation_probability_np = np.array([self.mutation_probability], \
                                                    dtype = float)
            self.mutation_probability_buf =  cl.Buffer(self.cl_ctx, \
                                                       mf.READ_ONLY | mf.COPY_HOST_PTR, \
                                                       hostbuf = self.mutation_probability_np)

        def __repr__(self):
            self.individuals_np = self.individuals_buf.get()

            ret = "Individuals:\n"
            for idx, individual in enumerate(self.individuals_np):
                ret += "[%3d] " % (idx + 1)
                ret += "%6.2f %6.2f %6.2f | " % (individual[0], \
                                                 individual[1], \
                                                 individual[2])
                ret += "%6.2f %6.2fi %6.2fj %6.2fk | " % (individual[3], \
                                                          individual[4], \
                                                          individual[5], \
                                                          individual[6])
                for torsion in individual[7:]:
                    ret += " %5.2f" % torsion
                ret += "\n"
            return ret

        def get_individual(self, idx = 0):
            self.individuals_np = self.individuals_buf.get()
            return self.individuals_np[idx]

        def create(self, dna_size_buf = None, dock = None):
            self.rng.fill_uniform(self.individuals_buf)
            # Construct individuals
            self.cl_prg.construct_individuals(self.cl_queue, \
                                              (self.size,), None, \
                                              dock.lo_grid_buf, \
                                              dock.dist_grid_buf, \
                                              dna_size_buf, \
                                              self.individuals_buf.data)

        def scoring(self, dock = None, \
                    cl_ctx = None, cl_queue = None):
            dock.reset_poses(self.size, self.individuals_buf, \
                             cl_ctx, cl_queue)
            dock.calc_energy()

        def min_score(self, dock = None):
            scores = dock.e_totals_buf.get()
            return scores.min()

        def crossover(self, parents_idx, ttl_torsions, rng):
            return None

        def mutate(self, individual, mutation_chance, \
                   lo_grid, hi_grid, ttl_torsions, rng):
            return None

    class Settler(Population):
        def __init__(self, size = 0, dna_size = 0, \
                     cl_ctx = None, cl_queue = None, rng = None, cl_prg = None):
            GeneticAlgorithmOpenCL.Population.__init__(self, size, dna_size, \
                                                       cl_ctx, cl_queue, \
                                                       rng, cl_prg)
            self.crossover_translation_mode = self.CM_COMBINE
            self.crossover_rotation_mode = self.CM_COMBINE
            self.crossover_probability = 0.5
            self.mutation_probability = 0.25
            # OpenCL
            self.setup_opencl()

    class Nomad(Population):
        def __init__(self, size = 0, dna_size = 0, \
                     cl_ctx = None, cl_queue = None, rng = None, cl_prg = None):
            GeneticAlgorithmOpenCL.Population.__init__(self, size, dna_size, \
                                                       cl_ctx, cl_queue, \
                                                       rng, cl_prg)
            self.crossover_translation_mode = self.CM_SEPARATE
            self.crossover_rotation_mode = self.CM_SEPARATE
            self.crossover_probability = 0.5
            self.mutation_probability = 0.75
            # OpenCL
            self.setup_opencl()

    def __init__(self, dock = None, cl_device_type = None):
        GeneticAlgorithm.__init__(self, dock)
        # OpenCL
        self.cl_device_type = cl_device_type
        if self.cl_device_type == "gpu":
            self.cl_ctx = cl.Context(dev_type = cl.device_type.GPU)
        elif self.cl_device_type == "cpu":
            self.cl_ctx = cl.Context(dev_type = cl.device_type.CPU)
        else: # manual selection
            self.cl_ctx = cl.create_some_context()
        self.cl_queue = cl.CommandQueue(self.cl_ctx)
        self.cl_filename = "./OpenCL/GeneticAlgorithm.cl"
        fh = open(self.cl_filename, 'r')
        cl_code = "".join(fh.readlines())
        self.cl_prg = cl.Program(self.cl_ctx, cl_code).build()
        self.rng = None
        # OpenCL buffer
        self.population_size_np = np.array([], dtype = int)
        self.population_size_buf =  None
        self.dna_size_np = np.array([], dtype = int)
        self.dna_size_buf = None
        self.max_inherited_prob_np = np.array([], dtype = int)
        self.max_inherited_prob_buf = None
        self.normalizer_np = np.array([], dtype = int)
        self.normalizer_buf = None
        self.chances_np = None
        self.chances_buf = None
        self.chances_sum_buf = None
        self.dna1_buf = None
        self.dna2_buf = None
        self.ttl_reproduction_rns_np = np.array([], dtype = int)
        self.ttl_reproduction_rns_buf = None
        self.reproduction_rns_buf = None
        self.mutation_chance_np = np.array([], dtype = float)
        self.mutation_chance_buf = None

    def setup_opencl(self):
        # OpenCL setup
        self.dock.setup_opencl(self.cl_ctx, self.cl_queue)

        # Setup OpenCL device buffer
        mf = cl.mem_flags
        self.population_size_np = np.array([self.population_size], dtype = int)
        self.population_size_buf =  cl.Buffer(self.cl_ctx, \
                                              mf.READ_ONLY | mf.COPY_HOST_PTR, \
                                              hostbuf = self.population_size_np)
        self.dna_size = 3 + 4 + self.dock.get_total_torsions()
        self.dna_size_np = np.array([self.dna_size], dtype = int)
        self.dna_size_buf = cl.Buffer(self.cl_ctx, \
                                      mf.READ_ONLY | mf.COPY_HOST_PTR, \
                                      hostbuf = self.dna_size_np)
        self.max_inherited_prob_np = np.array([self.max_inherited_prob], \
                                              dtype = int)
        self.max_inherited_prob_buf = cl.Buffer(self.cl_ctx, \
                                                mf.READ_ONLY | mf.COPY_HOST_PTR, \
                                                hostbuf = self.max_inherited_prob_np)
        self.normalizer_np = np.array([self.ttl_ligand_atoms], dtype = int)
        self.normalizer_buf = cl.Buffer(self.cl_ctx, \
                                        mf.READ_ONLY | mf.COPY_HOST_PTR, \
                                        hostbuf = self.normalizer_np)
        self.chances_buf = cl.array.zeros(self.cl_queue, (self.population_size), \
                                          dtype = int)
        self.chances_sum_buf = cl.array.zeros(self.cl_queue, (self.population_size), \
                                              dtype = int)
        self.dna1_buf = cl.array.zeros(self.cl_queue, \
                                       (self.population_size, self.dna_size), \
                                       dtype = float)
        self.dna2_buf = cl.array.zeros(self.cl_queue, \
                                       (self.population_size, self.dna_size), \
                                       dtype = float)
        # Reproduction random numbers needed per individual:
        # - Selecting parents:  2
        # - Crossing over:      Use new_individuals_buf
        # - Mutation:           1 + 2 + total torsions
        # - Mutation pose:      3 + 4 + total torsions
        ttl_reproduction_rns = 2 + 1 + 2 + self.ttl_torsions + \
                               3 + 4 + self.ttl_torsions
        self.ttl_reproduction_rns_np = np.array([ttl_reproduction_rns], \
                                                dtype = int)
        self.ttl_reproduction_rns_buf = cl.Buffer(self.cl_ctx, \
                                                  mf.READ_ONLY | mf.COPY_HOST_PTR, \
                                                  hostbuf = self.ttl_reproduction_rns_np)
        self.reproduction_rns_buf = cl.array.zeros(self.cl_queue, \
                                                   (self.population_size, ttl_reproduction_rns), \
                                                   dtype = float)
        self.mutation_chance_np = np.array([self.mutation_chance], dtype = float)
        self.mutation_chance_buf = cl.Buffer(self.cl_ctx, \
                                             mf.READ_ONLY | mf.COPY_HOST_PTR, \
                                             hostbuf = self.mutation_chance_np)
        # Setup OpenCL buffer for docking object
        self.dock.setup_opencl_buffer(self.population_size, \
                                      self.cl_ctx, self.cl_queue)

    def setup_rng(self):
        self.rng = RanluxGenerator(self.cl_queue)

    def setup(self):
        # Call parent setup
        GeneticAlgorithm.setup(self)
        # OpenCL
        self.setup_opencl()

    def select(self, population):
        # Get individual scores
        population.scoring(self.dock, self.cl_ctx, self.cl_queue)
        self.cl_prg.calc_chances(self.cl_queue, (self.population_size,), None, \
                                 self.dock.e_totals_buf.data, \
                                 self.normalizer_buf, \
                                 self.max_inherited_prob_buf, \
                                 self.chances_buf.data)

    def reproduce(self, population):
        self.rng.fill_uniform(population.new_individuals_buf)
        self.rng.fill_uniform(self.reproduction_rns_buf)

        self.cl_prg.reproduce(self.cl_queue, (self.population_size,), None, \
                              self.population_size_buf, \
                              self.chances_buf.data, \
                              self.ttl_reproduction_rns_buf, \
                              self.reproduction_rns_buf.data, \

                              self.dna_size_buf, \
                              population.individuals_buf.data, \

                              population.crossover_translation_mode_buf, \
                              population.crossover_rotation_mode_buf, \
                              population.crossover_probability_buf, \

                              self.mutation_chance_buf, \
                              population.mutation_probability_buf, \
                              self.dock.ttl_torsions_buf, \
                              self.dock.lo_grid_buf, \
                              self.dock.dist_grid_buf, \

                              self.chances_sum_buf.data, \
                              self.dna1_buf.data, \
                              self.dna2_buf.data, \

                              population.new_individuals_buf.data)

        cl.enqueue_copy(self.cl_queue, population.individuals_buf.data, \
                        population.new_individuals_buf.data)

    def run(self):
        self.setup()
        # Define multiple population
        self.nomad = self.Nomad(self.population_size, self.dna_size, \
                                self.cl_ctx, self.cl_queue, \
                                self.rng, self.cl_prg)
        self.settler = self.Settler(self.population_size, self.dna_size, \
                                    self.cl_ctx, self.cl_queue, \
                                    self.rng, self.cl_prg)
        population_min_scores = []
        for community_idx in xrange(self.community_size):
            tic = time()
            # Nomad portion
            nomad_min_score = float("inf")
            self.nomad.create(self.dna_size_buf, self.dock)
            if VERBOSE: print self.nomad
            for gen_idx in xrange(self.num_gen):
                self.select(self.nomad)
                self.reproduce(self.nomad)
            nomad_min_score = self.nomad.min_score(self.dock)

            # Settler portion
            settler_min_score = float("inf")
            cl.enqueue_copy(self.cl_queue, self.settler.individuals_buf.data, \
                            self.nomad.individuals_buf.data)
            if VERBOSE: print self.settler
            for gen_idx in xrange(self.num_gen):
                self.select(self.settler)
                self.reproduce(self.settler)
            if VERBOSE: print self.settler
            settler_min_score = self.settler.min_score(self.dock)

            population_min_scores.append([nomad_min_score, settler_min_score])
            toc = time()
            print "Elapsed time community %4d: %10.2f - Minimum Scores: %12.3f, %12.3f" \
                  % (community_idx + 1, toc - tic, \
                     nomad_min_score, settler_min_score)

        print "Community Minimum Scores: %s" % population_min_scores