Beispiel #1
0
def find_associativity():
    FUNC_CODE = """
    int go(unsigned array_size, unsigned stride, unsigned steps)
    {
      char *ary = (char *) malloc(sizeof(int) * array_size);

      unsigned p = 0;
      for (unsigned i = 0; i < steps; ++i)
      {
        ary[p] ++;
        p += stride;
        if (p >= array_size)
          p = 0;
      }

      int result = 0;
      for (unsigned i = 0; i < array_size; ++i)
          result += ary[i];

      free(ary);
      return result;
    }
    """
    from codepy.jit import extension_from_string
    cmod = extension_from_string(toolchain, "module", MODULE_CODE % FUNC_CODE)

    result = {}

    steps = 2**20
    from pytools import ProgressBar
    meg_range = range(1, 25)
    stride_range = range(1, 640)
    pb = ProgressBar("bench", len(meg_range) * len(stride_range))
    for array_megs in meg_range:
        for stride in stride_range:
            start = time()
            cmod.go(array_megs << 20, stride, steps)
            stop = time()

            elapsed = stop - start
            gb_transferred = 2 * steps / 1e9  # 2 for rw, 4 for sizeof(int)
            bandwidth = gb_transferred / elapsed

            result[array_megs, stride] = bandwidth
            pb.progress()

    from cPickle import dump
    dump(result, open("assoc_result.dat", "w"))

    open("assoc.c", "w").write(FUNC_CODE)
Beispiel #2
0
def build_matrix(op, dtype=None, shape=None):
    dtype = dtype or op.dtype
    from pytools import ProgressBar
    shape = shape or op.shape
    rows, cols = shape
    pb = ProgressBar("matrix", cols)
    mat = np.zeros(shape, dtype)

    try:
        matvec_method = op.matvec
    except AttributeError:
        matvec_method = op.__call__

    for i in range(cols):
        unit_vec = np.zeros(cols, dtype=dtype)
        unit_vec[i] = 1
        mat[:, i] = matvec_method(unit_vec)
        pb.progress()

    pb.finished()

    return mat
Beispiel #3
0
def test_fmm_completeness(ctx_getter, dims, nsources_req, ntargets_req,
                          who_has_extent, source_gen, target_gen, filter_kind,
                          well_sep_is_n_away, extent_norm,
                          from_sep_smaller_crit):
    """Tests whether the built FMM traversal structures and driver completely
    capture all interactions.
    """

    sources_have_extent = "s" in who_has_extent
    targets_have_extent = "t" in who_has_extent

    logging.basicConfig(level=logging.INFO)

    ctx = ctx_getter()
    queue = cl.CommandQueue(ctx)

    dtype = np.float64

    try:
        sources = source_gen(queue, nsources_req, dims, dtype, seed=15)
        nsources = len(sources[0])

        if ntargets_req is None:
            # This says "same as sources" to the tree builder.
            targets = None
            ntargets = ntargets_req
        else:
            targets = target_gen(queue, ntargets_req, dims, dtype, seed=16)
            ntargets = len(targets[0])
    except ImportError:
        pytest.skip("loo.py not available, but needed for particle array "
                    "generation")

    from pyopencl.clrandom import PhiloxGenerator
    rng = PhiloxGenerator(queue.context, seed=12)
    if sources_have_extent:
        source_radii = 2**rng.uniform(queue, nsources, dtype=dtype, a=-10, b=0)
    else:
        source_radii = None

    if targets_have_extent:
        target_radii = 2**rng.uniform(queue, ntargets, dtype=dtype, a=-10, b=0)
    else:
        target_radii = None

    from boxtree import TreeBuilder
    tb = TreeBuilder(ctx)

    tree, _ = tb(queue,
                 sources,
                 targets=targets,
                 max_particles_in_box=30,
                 source_radii=source_radii,
                 target_radii=target_radii,
                 debug=True,
                 stick_out_factor=0.25,
                 extent_norm=extent_norm)
    if 0:
        tree.get().plot()
        import matplotlib.pyplot as pt
        pt.show()

    from boxtree.traversal import FMMTraversalBuilder
    tbuild = FMMTraversalBuilder(ctx,
                                 well_sep_is_n_away=well_sep_is_n_away,
                                 from_sep_smaller_crit=from_sep_smaller_crit)
    trav, _ = tbuild(queue, tree, debug=True)

    if who_has_extent:
        pre_merge_trav = trav
        trav = trav.merge_close_lists(queue)

    #weights = np.random.randn(nsources)
    weights = np.ones(nsources)
    weights_sum = np.sum(weights)

    host_trav = trav.get(queue=queue)
    host_tree = host_trav.tree

    if who_has_extent:
        pre_merge_host_trav = pre_merge_trav.get(queue=queue)

    from boxtree.tree import ParticleListFilter
    plfilt = ParticleListFilter(ctx)

    if filter_kind:
        flags = rng.uniform(queue, ntargets or nsources, np.int32, a=0, b=2) \
                .astype(np.int8)
        if filter_kind == "user":
            filtered_targets = plfilt.filter_target_lists_in_user_order(
                queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInUserOrder(
                host_tree, filtered_targets.get(queue=queue))
        elif filter_kind == "tree":
            filtered_targets = plfilt.filter_target_lists_in_tree_order(
                queue, tree, flags)
            wrangler = ConstantOneExpansionWranglerWithFilteredTargetsInTreeOrder(
                host_tree, filtered_targets.get(queue=queue))
        else:
            raise ValueError("unsupported value of 'filter_kind'")
    else:
        wrangler = ConstantOneExpansionWrangler(host_tree)
        flags = cl.array.empty(queue, ntargets or nsources, dtype=np.int8)
        flags.fill(1)

    if ntargets is None and not filter_kind:
        # This check only works for targets == sources.
        assert (wrangler.reorder_potentials(
            wrangler.reorder_sources(weights)) == weights).all()

    from boxtree.fmm import drive_fmm
    pot = drive_fmm(host_trav, wrangler, weights)

    if filter_kind:
        pot = pot[flags.get() > 0]

    rel_err = la.norm((pot - weights_sum) / nsources)
    good = rel_err < 1e-8

    # {{{ build, evaluate matrix (and identify incorrect interactions)

    if 0 and not good:
        mat = np.zeros((ntargets, nsources), dtype)
        from pytools import ProgressBar

        logging.getLogger().setLevel(logging.WARNING)

        pb = ProgressBar("matrix", nsources)
        for i in range(nsources):
            unit_vec = np.zeros(nsources, dtype=dtype)
            unit_vec[i] = 1
            mat[:, i] = drive_fmm(host_trav, wrangler, unit_vec)
            pb.progress()
        pb.finished()

        logging.getLogger().setLevel(logging.INFO)

        import matplotlib.pyplot as pt

        if 0:
            pt.imshow(mat)
            pt.colorbar()
            pt.show()

        incorrect_tgts, incorrect_srcs = np.where(mat != 1)

        if 1 and len(incorrect_tgts):
            from boxtree.visualization import TreePlotter
            plotter = TreePlotter(host_tree)
            plotter.draw_tree(fill=False, edgecolor="black")
            plotter.draw_box_numbers()
            plotter.set_bounding_box()

            tree_order_incorrect_tgts = \
                    host_tree.indices_to_tree_target_order(incorrect_tgts)
            tree_order_incorrect_srcs = \
                    host_tree.indices_to_tree_source_order(incorrect_srcs)

            src_boxes = [
                host_tree.find_box_nr_for_source(i)
                for i in tree_order_incorrect_srcs
            ]
            tgt_boxes = [
                host_tree.find_box_nr_for_target(i)
                for i in tree_order_incorrect_tgts
            ]
            print(src_boxes)
            print(tgt_boxes)

            # plot all sources/targets
            if 0:
                pt.plot(host_tree.targets[0],
                        host_tree.targets[1],
                        "v",
                        alpha=0.9)
                pt.plot(host_tree.sources[0],
                        host_tree.sources[1],
                        "gx",
                        alpha=0.9)

            # plot offending sources/targets
            if 0:
                pt.plot(host_tree.targets[0][tree_order_incorrect_tgts],
                        host_tree.targets[1][tree_order_incorrect_tgts], "rv")
                pt.plot(host_tree.sources[0][tree_order_incorrect_srcs],
                        host_tree.sources[1][tree_order_incorrect_srcs], "go")
            pt.gca().set_aspect("equal")

            from boxtree.visualization import draw_box_lists
            draw_box_lists(
                plotter, pre_merge_host_trav if who_has_extent else host_trav,
                22)
            # from boxtree.visualization import draw_same_level_non_well_sep_boxes
            # draw_same_level_non_well_sep_boxes(plotter, host_trav, 2)

            pt.show()

    # }}}

    if 0 and not good:
        import matplotlib.pyplot as pt
        pt.plot(pot - weights_sum)
        pt.show()

    if 0 and not good:
        import matplotlib.pyplot as pt
        filt_targets = [
            host_tree.targets[0][flags.get() > 0],
            host_tree.targets[1][flags.get() > 0],
        ]
        host_tree.plot()
        bad = np.abs(pot - weights_sum) >= 1e-3
        bad_targets = [
            filt_targets[0][bad],
            filt_targets[1][bad],
        ]
        print(bad_targets[0].shape)
        pt.plot(filt_targets[0], filt_targets[1], "x")
        pt.plot(bad_targets[0], bad_targets[1], "v")
        pt.show()

    assert good
    def __init__(self,
                 quad_order,
                 method="gauss-legendre",
                 dim=2,
                 kernel_func=None,
                 kernel_type=None,
                 sumpy_kernel=None,
                 build_method=None,
                 source_box_extent=1,
                 dtype=np.float64,
                 inverse_droste=False,
                 progress_bar=True,
                 **kwargs):
        """
        kernel_type determines how the kernel is scaled w.r.t. box size.
        build_method can be "Transform" or "DrosteSum".

        The source box is [0, source_box_extent]^dim

        :arg inverse_droste True if computing with the fractional Laplacian kernel.
        """
        self.quad_order = quad_order
        self.dim = dim
        self.dtype = dtype
        self.inverse_droste = inverse_droste

        assert source_box_extent > 0
        self.source_box_extent = source_box_extent

        self.center = np.ones(self.dim) * 0.5 * self.source_box_extent

        self.build_method = build_method

        if dim == 1:

            if build_method == "Transform":
                raise NotImplementedError("Use build_method=DrosteSum for 1d")

            self.kernel_func = kernel_func
            self.kernel_type = kernel_type
            self.integral_knl = sumpy_kernel

        elif dim == 2:

            # Constant kernel can be used for fun/testing
            if kernel_func is None:
                kernel_func = constant_one
                kernel_type = "const"
                # for DrosteSum kernel_func is unused
                if build_method == "Transform":
                    logger.warning("setting kernel_func to be constant.")

            # Kernel function differs from OpenCL's kernels
            self.kernel_func = kernel_func
            self.kernel_type = kernel_type
            self.integral_knl = sumpy_kernel

            if build_method == "DrosteSum":
                assert sumpy_kernel is not None

        elif dim == 3:

            if build_method == "Transform":
                raise NotImplementedError("Use build_method=DrosteSum for 3d")

            self.kernel_func = kernel_func
            self.kernel_type = kernel_type
            self.integral_knl = sumpy_kernel

        else:
            raise NotImplementedError

        # number of quad points per box
        # equals to the number of modes per box
        self.n_q_points = self.quad_order**dim

        # Normalizers for polynomial modes
        # Needed only when we want to rescale log type kernels
        self.mode_normalizers = np.zeros(self.n_q_points, dtype=self.dtype)

        # Exterior normalizers for hypersingular kernels
        self.kernel_exterior_normalizers = np.zeros(self.n_q_points,
                                                    dtype=self.dtype)

        # number of (source_mode, target_point) pairs between two boxes
        self.n_pairs = self.n_q_points**2

        # possible interaction cases
        self.interaction_case_vecs, self.case_encode, self.case_indices = \
                gallery.generate_list1_gallery(self.dim)
        self.n_cases = len(self.interaction_case_vecs)

        if method == "gauss-legendre":
            # quad points in [-1,1]
            import volumential.meshgen as mg

            if 'queue' in kwargs:
                queue = kwargs['queue']
            else:
                queue = None

            q_points, _, _ = mg.make_uniform_cubic_grid(degree=quad_order,
                                                        level=1,
                                                        dim=self.dim,
                                                        queue=queue)

            # map to source box
            mapped_q_points = np.array([
                0.5 * self.source_box_extent * (qp + np.ones(self.dim))
                for qp in q_points
            ])
            # sort in dictionary order, preserve only the leading
            # digits to prevent floating point errors from polluting
            # the ordering.
            q_points_ordering = sorted(
                range(len(mapped_q_points)),
                key=lambda i: list(np.floor(mapped_q_points[i] * 10000)),
            )
            self.q_points = mapped_q_points[q_points_ordering]

        else:
            raise NotImplementedError

        self.data = np.empty(self.n_pairs * self.n_cases, dtype=self.dtype)
        self.data.fill(np.nan)

        total_evals = len(self.data) + self.n_q_points

        if progress_bar:
            from pytools import ProgressBar
            self.pb = ProgressBar("Building table:", total_evals)
        else:
            self.pb = None

        self.is_built = False
Beispiel #5
0
def optimize_plan(opt_name, plan_generator, target_func, maximize, debug_flags=set(), occupancy_slack=0.5,
        log_filename=None):
    plans = [p for p in plan_generator() if p.invalid_reason() is None]

    debug = "cuda_%s_plan" % opt_name in debug_flags
    show_progress = ("cuda_plan_no_progress" not in debug_flags) and not debug

    if "cuda_plan_log" not in debug_flags:
        log_filename = None

    if not plans:
        raise RuntimeError, "no valid CUDA execution plans found"

    if set(["cuda_no_plan", "cuda_no_plan_"+opt_name]) & debug_flags:
        from pytools import argmax2
        return argmax2((plan, plan.occupancy_record().occupancy)
                for plan in plans), 0

    max_occup = max(plan.occupancy_record().occupancy for plan in plans)
    desired_occup = occupancy_slack*max_occup

    if log_filename is not None:
        from pytools import single_valued
        feature_columns = single_valued(p.feature_columns() for p in plans)
        feature_names = [fc.split()[0] for fc in feature_columns]

        try:
            import sqlite3 as sqlite
        except ImportError:
            from pysqlite2 import dbapi2 as sqlite

        db_conn = sqlite.connect("plan-%s.dat" % log_filename)

        try:
            db_conn.execute("""
                  create table data (
                    id integer primary key autoincrement,
                    %s,
                    value real)"""
                    % ", ".join(feature_columns))
        except sqlite.OperationalError:
            pass

    if show_progress:
        from pytools import ProgressBar
        pbar = ProgressBar("plan "+opt_name, len(plans))
    try:
        plan_values = []
        for p in plans:
            if show_progress:
                pbar.progress()

            if p.occupancy_record().occupancy >= desired_occup - 1e-10:
                if debug:
                    print "<---- trying %s:" % p

                value = target_func(p)
                if isinstance(value, tuple):
                    extra_info = value[1:]
                    value = value[0]
                else:
                    extra_info = None

                if value is not None:
                    if debug:
                        print "----> yielded %g" % (value)
                    plan_values.append(((len(plan_values), p), value))

                    if log_filename is not None:
                        db_conn.execute(
                                "insert into data (%s,value) values (%s)"
                                % (", ".join(feature_names),
                                    ",".join(["?"]*(1+len(feature_names)))),
                                p.features(*extra_info)+(value,))
    finally:
        if show_progress:
            pbar.finished()

    if log_filename is not None:
        db_conn.commit()

    from pytools import argmax2, argmin2
    if maximize:
        num_plan, plan = argmax2(plan_values)
    else:
        num_plan, plan = argmin2(plan_values)

    plan_value = plan_values[num_plan][1]

    if debug:
        print "----------------------------------------------"
        print "chosen: %s" % plan
        print "value: %g" % plan_value
        print "----------------------------------------------"

    return plan, plan_value
Beispiel #6
0
def dump_couch_to_sqlite(couch_db, outfile, scan_max=None):
    import sqlite3 as sqlite

    # {{{ scan for types
    column_type_dict = {}

    from pytools import ProgressBar
    pb = ProgressBar("scan (pass 1/2)", len(couch_db))
    scan_count = 0
    for doc in generate_all_docs(couch_db):
        if "type" in doc and doc["type"] == "job":
            for k, v in six.iteritems(doc):
                new_type = type(v)
                if (k in column_type_dict and column_type_dict[k] != new_type
                        and v is not None):
                    old_type = column_type_dict[k]
                    if set([old_type, new_type]) == set([float, int]):
                        new_type = float
                    else:
                        raise RuntimeError("ambiguous types for '%s': %s, %s" %
                                           (k, new_type, old_type))
                column_type_dict[k] = new_type

            scan_count += 1
            if scan_max is not None and scan_count >= scan_max:
                break
        pb.progress()

    pb.finished()
    # }}}

    del column_type_dict["type"]
    column_types = []

    for name, tp in six.iteritems(column_type_dict):
        column_types.append((name, tp))

    def get_sql_type(tp):
        if tp in (str, six.text_type):
            return "text"
        elif issubclass(tp, list):
            return "text"
        elif issubclass(tp, int):
            return "integer"
        elif issubclass(tp, (float, numpy.floating)):
            return "real"
        else:
            raise TypeError("No SQL type for %s" % tp)

    create_stmt = ("create table data (%s)" %
                   ",".join("%s %s" % (name, get_sql_type(tp))
                            for name, tp in column_types))
    db_conn = sqlite.connect(outfile, timeout=30)
    db_conn.execute(create_stmt)
    db_conn.commit()

    insert_stmt = "insert into data values (%s)" % (",".join(
        ["?"] * len(column_types)))

    pb = ProgressBar("fill (pass 2/2)", len(couch_db))
    for doc in generate_all_docs(couch_db):
        data = [None] * len(column_types)
        for i, (col_name, col_tp) in enumerate(column_types):
            if "type" in doc and doc["type"] == "job":
                try:
                    if isinstance(doc[col_name], list):
                        data[i] = str(doc[col_name])
                    else:
                        data[i] = doc[col_name]
                except KeyError:
                    print("doc %s had no field %s" % (doc["_id"], col_name))

        db_conn.execute(insert_stmt, data)
        pb.progress()

    pb.finished()

    db_conn.commit()
    db_conn.close()