Example #1
0
    def get_next_step(self, available_names, done_insns):
        from pytools import all, argmax2
        available_insns = [(insn, insn.priority) for insn in self.instructions
                           if insn not in done_insns and all(
                               dep.name in available_names
                               for dep in insn.get_dependencies())]

        if not available_insns:
            raise self.NoInstructionAvailable

        from pytools import flatten
        discardable_vars = set(available_names) - set(
            flatten([dep.name for dep in insn.get_dependencies()]
                    for insn in self.instructions if insn not in done_insns))

        # {{{ make sure results do not get discarded

        dm = mappers.DependencyMapper(composite_leaves=False)

        def remove_result_variable(result_expr):
            # The extra dependency mapper run is necessary
            # because, for instance, subscripts can make it
            # into the result expression, which then does
            # not consist of just variables.

            for var in dm(result_expr):
                assert isinstance(var, Variable)
                discardable_vars.discard(var.name)

        obj_array_vectorize(remove_result_variable, self.result)

        # }}}

        return argmax2(available_insns), discardable_vars
Example #2
0
    def get_next_step(self, available_names, done_insns):
        from pytools import all, argmax2
        available_insns = [
                (insn, insn.priority) for insn in self.instructions
                if insn not in done_insns
                and all(dep.name in available_names
                    for dep in insn.get_dependencies())]

        if not available_insns:
            raise self.NoInstructionAvailable

        needed_vars = set([
            dep.name
            for insn in self.instructions
            if insn not in done_insns
            for dep in insn.get_dependencies()
            ])
        discardable_vars = set(available_names) - needed_vars

        # {{{ make sure results do not get discarded
        from pytools.obj_array import with_object_array_or_scalar

        from pytential.symbolic.mappers import DependencyMapper
        dm = DependencyMapper(composite_leaves=False)

        def remove_result_variable(result_expr):
            # The extra dependency mapper run is necessary
            # because, for instance, subscripts can make it
            # into the result expression, which then does
            # not consist of just variables.

            for var in dm(result_expr):
                from pymbolic.primitives import Variable
                assert isinstance(var, Variable)
                discardable_vars.discard(var.name)

        with_object_array_or_scalar(remove_result_variable, self.result)
        # }}}

        return argmax2(available_insns), discardable_vars
Example #3
0
    def get_next_step(self, available_names, done_insns):
        from pytools import all, argmax2
        available_insns = [(insn, insn.priority) for insn in self.instructions
                           if insn not in done_insns and all(
                               dep.name in available_names
                               for dep in insn.get_dependencies())]

        if not available_insns:
            raise self.NoInstructionAvailable

        needed_vars = set([
            dep.name for insn in self.instructions if insn not in done_insns
            for dep in insn.get_dependencies()
        ])
        discardable_vars = set(available_names) - needed_vars

        # {{{ make sure results do not get discarded
        from pytools.obj_array import with_object_array_or_scalar

        from pytential.symbolic.mappers import DependencyMapper
        dm = DependencyMapper(composite_leaves=False)

        def remove_result_variable(result_expr):
            # The extra dependency mapper run is necessary
            # because, for instance, subscripts can make it
            # into the result expression, which then does
            # not consist of just variables.

            for var in dm(result_expr):
                from pymbolic.primitives import Variable
                assert isinstance(var, Variable)
                discardable_vars.discard(var.name)

        with_object_array_or_scalar(remove_result_variable, self.result)
        # }}}

        return argmax2(available_insns), discardable_vars
Example #4
0
    def prepare_with_pointwise_projection_and_basis_reduction(self):
        discr = self.method.discretization
        backend = self.backend

        backend.elements_on_grid.reserve(
            sum(len(eg.members) for eg in discr.element_groups))

        min_s_values = []
        max_s_values = []
        cond_s_values = []

        basis_len_vec = discr.volume_zeros()
        el_condition_vec = discr.volume_zeros()
        point_count_vec = discr.volume_zeros()

        # Iterate over all elements
        for eg in discr.element_groups:
            ldis = eg.local_discretization

            mode_id_to_index = dict(
                (bid, i)
                for i, bid in enumerate(ldis.generate_mode_identifiers()))

            for el in eg.members:
                basis = list(
                    zip(ldis.generate_mode_identifiers(),
                        ldis.basis_functions()))

                eog, points = self.find_points_in_element(
                    el, self.el_tolerance)

                while True:
                    scaled_vdm = self.scaled_vandermonde(
                        el, eog, points, [bf for bid, bf in basis])

                    max_bid_sum = max(sum(bid) for bid, bf in basis)
                    killable_basis_elements = [
                        (i, bid) for i, (bid, bf) in enumerate(basis)
                        if sum(bid) == max_bid_sum
                    ]

                    try:
                        u, s, vt = svd = la.svd(scaled_vdm)

                        thresh = (numpy.finfo(float).eps *
                                  max(scaled_vdm.shape) * s[0])

                        assert s[-1] == numpy.min(s)
                        assert s[0] == numpy.max(s)

                        # Imagine that--s can have negative entries.
                        # (AK: I encountered one negative zero.)

                        if len(basis) > len(points) or numpy.abs(
                                s[0] / s[-1]) > 10:
                            retry = True

                            # badly conditioned, kill a basis entry
                            vti = vt[-1]

                            from pytools import argmax2
                            kill_idx, kill_bid = argmax2(
                                ((j, bid), abs(vti[j]))
                                for j, bid in killable_basis_elements)

                            assert kill_bid == basis[kill_idx][0]
                            basis.pop(kill_idx)
                        else:
                            retry = False

                    except la.LinAlgError:
                        # SVD likely didn't converge. Lacking an SVD, we don't have
                        # much guidance on what modes to kill. Any of the killable
                        # ones will do.

                        # Bang, you're dead.
                        basis.pop(killable_basis_elements[0][0])

                        retry = True

                    if not retry:
                        break

                    if len(basis) == 1:
                        raise RuntimeError(
                            "basis reduction has killed almost the entire basis on element %d"
                            % el.id)

                if ldis.node_count() > len(basis):
                    print "element %d: #nodes=%d, killed modes=%d" % (
                        el.id,
                        ldis.node_count(),
                        ldis.node_count() - len(basis),
                    )

                basis_len_vec[discr.find_el_range(el.id)] = len(basis)
                el_condition_vec[discr.find_el_range(el.id)] = s[0] / s[-1]
                point_count_vec[discr.find_el_range(el.id)] = len(points)

                min_s_values.append(min(s))
                max_s_values.append(max(s))
                cond_s_values.append(max(s) / min(s))

                self.make_pointwise_interpolation_matrix(
                    eog,
                    eg,
                    el,
                    ldis,
                    svd,
                    scaled_vdm,
                    basis_subset=[mode_id_to_index[bid] for bid, bf in basis])

                backend.elements_on_grid.append(eog)

        # visualize basis length for each element
        if set(["depositor", "vis_files"]) < self.method.debug:
            from hedge.visualization import SiloVisualizer
            vis = SiloVisualizer(discr)
            visf = vis.make_file("rec-debug")
            vis.add_data(visf, [
                ("basis_len", basis_len_vec),
                ("el_condition", el_condition_vec),
                ("point_count", point_count_vec),
            ])
            visf.close()

        # we don't need no stinkin' extra points
        backend.extra_point_brick_starts.extend([0] *
                                                (len(backend.bricks) + 1))

        # print some statistics
        self.generate_point_statistics()
Example #5
0
    def prepare_with_pointwise_projection_and_basis_reduction(self):
        discr = self.method.discretization
        backend = self.backend

        backend.elements_on_grid.reserve(
                sum(len(eg.members) for eg in discr.element_groups))

        min_s_values = []
        max_s_values = []
        cond_s_values = []

        basis_len_vec = discr.volume_zeros()
        el_condition_vec = discr.volume_zeros()
        point_count_vec = discr.volume_zeros()

        # Iterate over all elements
        for eg in discr.element_groups:
            ldis = eg.local_discretization

            mode_id_to_index = dict(
                    (bid, i) for i, bid in enumerate(ldis.generate_mode_identifiers()))

            for el in eg.members:
                basis = list(zip(
                    ldis.generate_mode_identifiers(), 
                    ldis.basis_functions()))

                eog, points = self.find_points_in_element(el, self.el_tolerance)

                while True:
                    scaled_vdm = self.scaled_vandermonde(el, eog, points, 
                            [bf for bid, bf in basis])

                    max_bid_sum = max(sum(bid) for bid, bf in basis)
                    killable_basis_elements = [
                            (i, bid) for i, (bid, bf) in enumerate(basis)
                            if sum(bid) == max_bid_sum]

                    try:
                        u, s, vt = svd = la.svd(scaled_vdm)

                        thresh = (numpy.finfo(float).eps
                                * max(scaled_vdm.shape) * s[0])

                        assert s[-1] == numpy.min(s)
                        assert s[0] == numpy.max(s)
                        
                        # Imagine that--s can have negative entries.
                        # (AK: I encountered one negative zero.)

                        if len(basis) > len(points) or numpy.abs(s[0]/s[-1]) > 10:
                            retry = True

                            # badly conditioned, kill a basis entry
                            vti = vt[-1]

                            from pytools import argmax2
                            kill_idx, kill_bid = argmax2(
                                    ((j, bid), abs(vti[j])) 
                                    for j, bid in killable_basis_elements)

                            assert kill_bid == basis[kill_idx][0]
                            basis.pop(kill_idx)
                        else:
                            retry = False

                    except la.LinAlgError:
                        # SVD likely didn't converge. Lacking an SVD, we don't have 
                        # much guidance on what modes to kill. Any of the killable
                        # ones will do.

                        # Bang, you're dead.
                        basis.pop(killable_basis_elements[0][0])

                        retry = True

                    if not retry:
                        break

                    if len(basis) == 1:
                        raise RuntimeError(
                                "basis reduction has killed almost the entire basis on element %d"
                                % el.id)

                if ldis.node_count() > len(basis):
                    print "element %d: #nodes=%d, killed modes=%d" % (
                            el.id, ldis.node_count(), ldis.node_count()-len(basis),)

                basis_len_vec[discr.find_el_range(el.id)] = len(basis)
                el_condition_vec[discr.find_el_range(el.id)] = s[0]/s[-1]
                point_count_vec[discr.find_el_range(el.id)] = len(points)

                min_s_values.append(min(s))
                max_s_values.append(max(s))
                cond_s_values.append(max(s)/min(s))

                self.make_pointwise_interpolation_matrix(eog, eg, el, ldis, svd, scaled_vdm,
                        basis_subset=[mode_id_to_index[bid] for bid, bf in basis])

                backend.elements_on_grid.append(eog)

        # visualize basis length for each element
        if set(["depositor", "vis_files"]) < self.method.debug:
            from hedge.visualization import SiloVisualizer
            vis = SiloVisualizer(discr)
            visf = vis.make_file("rec-debug")
            vis.add_data(visf, [
                ("basis_len", basis_len_vec),
                ("el_condition", el_condition_vec),
                ("point_count", point_count_vec),
                ])
            visf.close()

        # we don't need no stinkin' extra points
        backend.extra_point_brick_starts.extend([0]*(len(backend.bricks)+1))

        # print some statistics
        self.generate_point_statistics()
Example #6
0
def optimize_plan(opt_name, plan_generator, target_func, maximize, debug_flags=set(), occupancy_slack=0.5,
        log_filename=None):
    plans = [p for p in plan_generator() if p.invalid_reason() is None]

    debug = "cuda_%s_plan" % opt_name in debug_flags
    show_progress = ("cuda_plan_no_progress" not in debug_flags) and not debug

    if "cuda_plan_log" not in debug_flags:
        log_filename = None

    if not plans:
        raise RuntimeError, "no valid CUDA execution plans found"

    if set(["cuda_no_plan", "cuda_no_plan_"+opt_name]) & debug_flags:
        from pytools import argmax2
        return argmax2((plan, plan.occupancy_record().occupancy)
                for plan in plans), 0

    max_occup = max(plan.occupancy_record().occupancy for plan in plans)
    desired_occup = occupancy_slack*max_occup

    if log_filename is not None:
        from pytools import single_valued
        feature_columns = single_valued(p.feature_columns() for p in plans)
        feature_names = [fc.split()[0] for fc in feature_columns]

        try:
            import sqlite3 as sqlite
        except ImportError:
            from pysqlite2 import dbapi2 as sqlite

        db_conn = sqlite.connect("plan-%s.dat" % log_filename)

        try:
            db_conn.execute("""
                  create table data (
                    id integer primary key autoincrement,
                    %s,
                    value real)"""
                    % ", ".join(feature_columns))
        except sqlite.OperationalError:
            pass

    if show_progress:
        from pytools import ProgressBar
        pbar = ProgressBar("plan "+opt_name, len(plans))
    try:
        plan_values = []
        for p in plans:
            if show_progress:
                pbar.progress()

            if p.occupancy_record().occupancy >= desired_occup - 1e-10:
                if debug:
                    print "<---- trying %s:" % p

                value = target_func(p)
                if isinstance(value, tuple):
                    extra_info = value[1:]
                    value = value[0]
                else:
                    extra_info = None

                if value is not None:
                    if debug:
                        print "----> yielded %g" % (value)
                    plan_values.append(((len(plan_values), p), value))

                    if log_filename is not None:
                        db_conn.execute(
                                "insert into data (%s,value) values (%s)"
                                % (", ".join(feature_names),
                                    ",".join(["?"]*(1+len(feature_names)))),
                                p.features(*extra_info)+(value,))
    finally:
        if show_progress:
            pbar.finished()

    if log_filename is not None:
        db_conn.commit()

    from pytools import argmax2, argmin2
    if maximize:
        num_plan, plan = argmax2(plan_values)
    else:
        num_plan, plan = argmin2(plan_values)

    plan_value = plan_values[num_plan][1]

    if debug:
        print "----------------------------------------------"
        print "chosen: %s" % plan
        print "value: %g" % plan_value
        print "----------------------------------------------"

    return plan, plan_value