def get_next_step(self, available_names, done_insns): from pytools import all, argmax2 available_insns = [(insn, insn.priority) for insn in self.instructions if insn not in done_insns and all( dep.name in available_names for dep in insn.get_dependencies())] if not available_insns: raise self.NoInstructionAvailable from pytools import flatten discardable_vars = set(available_names) - set( flatten([dep.name for dep in insn.get_dependencies()] for insn in self.instructions if insn not in done_insns)) # {{{ make sure results do not get discarded dm = mappers.DependencyMapper(composite_leaves=False) def remove_result_variable(result_expr): # The extra dependency mapper run is necessary # because, for instance, subscripts can make it # into the result expression, which then does # not consist of just variables. for var in dm(result_expr): assert isinstance(var, Variable) discardable_vars.discard(var.name) obj_array_vectorize(remove_result_variable, self.result) # }}} return argmax2(available_insns), discardable_vars
def get_next_step(self, available_names, done_insns): from pytools import all, argmax2 available_insns = [ (insn, insn.priority) for insn in self.instructions if insn not in done_insns and all(dep.name in available_names for dep in insn.get_dependencies())] if not available_insns: raise self.NoInstructionAvailable needed_vars = set([ dep.name for insn in self.instructions if insn not in done_insns for dep in insn.get_dependencies() ]) discardable_vars = set(available_names) - needed_vars # {{{ make sure results do not get discarded from pytools.obj_array import with_object_array_or_scalar from pytential.symbolic.mappers import DependencyMapper dm = DependencyMapper(composite_leaves=False) def remove_result_variable(result_expr): # The extra dependency mapper run is necessary # because, for instance, subscripts can make it # into the result expression, which then does # not consist of just variables. for var in dm(result_expr): from pymbolic.primitives import Variable assert isinstance(var, Variable) discardable_vars.discard(var.name) with_object_array_or_scalar(remove_result_variable, self.result) # }}} return argmax2(available_insns), discardable_vars
def get_next_step(self, available_names, done_insns): from pytools import all, argmax2 available_insns = [(insn, insn.priority) for insn in self.instructions if insn not in done_insns and all( dep.name in available_names for dep in insn.get_dependencies())] if not available_insns: raise self.NoInstructionAvailable needed_vars = set([ dep.name for insn in self.instructions if insn not in done_insns for dep in insn.get_dependencies() ]) discardable_vars = set(available_names) - needed_vars # {{{ make sure results do not get discarded from pytools.obj_array import with_object_array_or_scalar from pytential.symbolic.mappers import DependencyMapper dm = DependencyMapper(composite_leaves=False) def remove_result_variable(result_expr): # The extra dependency mapper run is necessary # because, for instance, subscripts can make it # into the result expression, which then does # not consist of just variables. for var in dm(result_expr): from pymbolic.primitives import Variable assert isinstance(var, Variable) discardable_vars.discard(var.name) with_object_array_or_scalar(remove_result_variable, self.result) # }}} return argmax2(available_insns), discardable_vars
def prepare_with_pointwise_projection_and_basis_reduction(self): discr = self.method.discretization backend = self.backend backend.elements_on_grid.reserve( sum(len(eg.members) for eg in discr.element_groups)) min_s_values = [] max_s_values = [] cond_s_values = [] basis_len_vec = discr.volume_zeros() el_condition_vec = discr.volume_zeros() point_count_vec = discr.volume_zeros() # Iterate over all elements for eg in discr.element_groups: ldis = eg.local_discretization mode_id_to_index = dict( (bid, i) for i, bid in enumerate(ldis.generate_mode_identifiers())) for el in eg.members: basis = list( zip(ldis.generate_mode_identifiers(), ldis.basis_functions())) eog, points = self.find_points_in_element( el, self.el_tolerance) while True: scaled_vdm = self.scaled_vandermonde( el, eog, points, [bf for bid, bf in basis]) max_bid_sum = max(sum(bid) for bid, bf in basis) killable_basis_elements = [ (i, bid) for i, (bid, bf) in enumerate(basis) if sum(bid) == max_bid_sum ] try: u, s, vt = svd = la.svd(scaled_vdm) thresh = (numpy.finfo(float).eps * max(scaled_vdm.shape) * s[0]) assert s[-1] == numpy.min(s) assert s[0] == numpy.max(s) # Imagine that--s can have negative entries. # (AK: I encountered one negative zero.) if len(basis) > len(points) or numpy.abs( s[0] / s[-1]) > 10: retry = True # badly conditioned, kill a basis entry vti = vt[-1] from pytools import argmax2 kill_idx, kill_bid = argmax2( ((j, bid), abs(vti[j])) for j, bid in killable_basis_elements) assert kill_bid == basis[kill_idx][0] basis.pop(kill_idx) else: retry = False except la.LinAlgError: # SVD likely didn't converge. Lacking an SVD, we don't have # much guidance on what modes to kill. Any of the killable # ones will do. # Bang, you're dead. basis.pop(killable_basis_elements[0][0]) retry = True if not retry: break if len(basis) == 1: raise RuntimeError( "basis reduction has killed almost the entire basis on element %d" % el.id) if ldis.node_count() > len(basis): print "element %d: #nodes=%d, killed modes=%d" % ( el.id, ldis.node_count(), ldis.node_count() - len(basis), ) basis_len_vec[discr.find_el_range(el.id)] = len(basis) el_condition_vec[discr.find_el_range(el.id)] = s[0] / s[-1] point_count_vec[discr.find_el_range(el.id)] = len(points) min_s_values.append(min(s)) max_s_values.append(max(s)) cond_s_values.append(max(s) / min(s)) self.make_pointwise_interpolation_matrix( eog, eg, el, ldis, svd, scaled_vdm, basis_subset=[mode_id_to_index[bid] for bid, bf in basis]) backend.elements_on_grid.append(eog) # visualize basis length for each element if set(["depositor", "vis_files"]) < self.method.debug: from hedge.visualization import SiloVisualizer vis = SiloVisualizer(discr) visf = vis.make_file("rec-debug") vis.add_data(visf, [ ("basis_len", basis_len_vec), ("el_condition", el_condition_vec), ("point_count", point_count_vec), ]) visf.close() # we don't need no stinkin' extra points backend.extra_point_brick_starts.extend([0] * (len(backend.bricks) + 1)) # print some statistics self.generate_point_statistics()
def prepare_with_pointwise_projection_and_basis_reduction(self): discr = self.method.discretization backend = self.backend backend.elements_on_grid.reserve( sum(len(eg.members) for eg in discr.element_groups)) min_s_values = [] max_s_values = [] cond_s_values = [] basis_len_vec = discr.volume_zeros() el_condition_vec = discr.volume_zeros() point_count_vec = discr.volume_zeros() # Iterate over all elements for eg in discr.element_groups: ldis = eg.local_discretization mode_id_to_index = dict( (bid, i) for i, bid in enumerate(ldis.generate_mode_identifiers())) for el in eg.members: basis = list(zip( ldis.generate_mode_identifiers(), ldis.basis_functions())) eog, points = self.find_points_in_element(el, self.el_tolerance) while True: scaled_vdm = self.scaled_vandermonde(el, eog, points, [bf for bid, bf in basis]) max_bid_sum = max(sum(bid) for bid, bf in basis) killable_basis_elements = [ (i, bid) for i, (bid, bf) in enumerate(basis) if sum(bid) == max_bid_sum] try: u, s, vt = svd = la.svd(scaled_vdm) thresh = (numpy.finfo(float).eps * max(scaled_vdm.shape) * s[0]) assert s[-1] == numpy.min(s) assert s[0] == numpy.max(s) # Imagine that--s can have negative entries. # (AK: I encountered one negative zero.) if len(basis) > len(points) or numpy.abs(s[0]/s[-1]) > 10: retry = True # badly conditioned, kill a basis entry vti = vt[-1] from pytools import argmax2 kill_idx, kill_bid = argmax2( ((j, bid), abs(vti[j])) for j, bid in killable_basis_elements) assert kill_bid == basis[kill_idx][0] basis.pop(kill_idx) else: retry = False except la.LinAlgError: # SVD likely didn't converge. Lacking an SVD, we don't have # much guidance on what modes to kill. Any of the killable # ones will do. # Bang, you're dead. basis.pop(killable_basis_elements[0][0]) retry = True if not retry: break if len(basis) == 1: raise RuntimeError( "basis reduction has killed almost the entire basis on element %d" % el.id) if ldis.node_count() > len(basis): print "element %d: #nodes=%d, killed modes=%d" % ( el.id, ldis.node_count(), ldis.node_count()-len(basis),) basis_len_vec[discr.find_el_range(el.id)] = len(basis) el_condition_vec[discr.find_el_range(el.id)] = s[0]/s[-1] point_count_vec[discr.find_el_range(el.id)] = len(points) min_s_values.append(min(s)) max_s_values.append(max(s)) cond_s_values.append(max(s)/min(s)) self.make_pointwise_interpolation_matrix(eog, eg, el, ldis, svd, scaled_vdm, basis_subset=[mode_id_to_index[bid] for bid, bf in basis]) backend.elements_on_grid.append(eog) # visualize basis length for each element if set(["depositor", "vis_files"]) < self.method.debug: from hedge.visualization import SiloVisualizer vis = SiloVisualizer(discr) visf = vis.make_file("rec-debug") vis.add_data(visf, [ ("basis_len", basis_len_vec), ("el_condition", el_condition_vec), ("point_count", point_count_vec), ]) visf.close() # we don't need no stinkin' extra points backend.extra_point_brick_starts.extend([0]*(len(backend.bricks)+1)) # print some statistics self.generate_point_statistics()
def optimize_plan(opt_name, plan_generator, target_func, maximize, debug_flags=set(), occupancy_slack=0.5, log_filename=None): plans = [p for p in plan_generator() if p.invalid_reason() is None] debug = "cuda_%s_plan" % opt_name in debug_flags show_progress = ("cuda_plan_no_progress" not in debug_flags) and not debug if "cuda_plan_log" not in debug_flags: log_filename = None if not plans: raise RuntimeError, "no valid CUDA execution plans found" if set(["cuda_no_plan", "cuda_no_plan_"+opt_name]) & debug_flags: from pytools import argmax2 return argmax2((plan, plan.occupancy_record().occupancy) for plan in plans), 0 max_occup = max(plan.occupancy_record().occupancy for plan in plans) desired_occup = occupancy_slack*max_occup if log_filename is not None: from pytools import single_valued feature_columns = single_valued(p.feature_columns() for p in plans) feature_names = [fc.split()[0] for fc in feature_columns] try: import sqlite3 as sqlite except ImportError: from pysqlite2 import dbapi2 as sqlite db_conn = sqlite.connect("plan-%s.dat" % log_filename) try: db_conn.execute(""" create table data ( id integer primary key autoincrement, %s, value real)""" % ", ".join(feature_columns)) except sqlite.OperationalError: pass if show_progress: from pytools import ProgressBar pbar = ProgressBar("plan "+opt_name, len(plans)) try: plan_values = [] for p in plans: if show_progress: pbar.progress() if p.occupancy_record().occupancy >= desired_occup - 1e-10: if debug: print "<---- trying %s:" % p value = target_func(p) if isinstance(value, tuple): extra_info = value[1:] value = value[0] else: extra_info = None if value is not None: if debug: print "----> yielded %g" % (value) plan_values.append(((len(plan_values), p), value)) if log_filename is not None: db_conn.execute( "insert into data (%s,value) values (%s)" % (", ".join(feature_names), ",".join(["?"]*(1+len(feature_names)))), p.features(*extra_info)+(value,)) finally: if show_progress: pbar.finished() if log_filename is not None: db_conn.commit() from pytools import argmax2, argmin2 if maximize: num_plan, plan = argmax2(plan_values) else: num_plan, plan = argmin2(plan_values) plan_value = plan_values[num_plan][1] if debug: print "----------------------------------------------" print "chosen: %s" % plan print "value: %g" % plan_value print "----------------------------------------------" return plan, plan_value