def main(): limit = int(os.environ['LIMIT']) if 'LIMIT' in os.environ else None data_dir = os.environ['DATA_DIR'] ds = DataSource(exp='junk', run=1, dir=data_dir, max_events=limit, det_name='spi_cspad') # Note: DataSource doesn't seem to care about max_events when given # a filename. n_runs = 0 runs = [] for run in ds.runs(): # FIXME: must epoch launch data_collector.load_run_data(run) # Right now, we assume one run or a serie of runs with the same # experimental configuration. n_runs += 1 runs.append( run) # Keep run around to avoid having it be garbage collected. solver.solve(n_runs) legion.execution_fence( block=True) # Block to keep runs in scope until solve completes.
def run_smd0_task(run): global_procs = legion.Tunable.select(legion.Tunable.GLOBAL_PYS).get() for i, smd_chunk in enumerate(smd_chunks(run)): run_smd_task(smd_chunk, run, point=i) # Block before returning so that the caller can use this task's future for synchronization legion.execution_fence(block=True)
def run_smd0_task(run): global_procs = legion.Tunable.select(legion.Tunable.GLOBAL_PYS).get() smdr_man = SmdReaderManager(run.smd_dm.fds, run.max_events) for i, (smd_chunk, update_chunk) in enumerate(smdr_man.chunks()): run_smd_task(smd_chunk, run, point=i) # Block before returning so that the caller can use this task's future for synchronization legion.execution_fence(block=True)
def main(): R = legion.Region([4], {'x': legion.float64}) P = legion.Partition.equal(R, [4]) legion.fill(R, 'x', 0) hello2(P[0], 0) for i in legion.IndexLaunch([4]): hello2(P[i], i) legion.index_launch([4], hello2, P[ID], ID) # FIXME: This is needed in nopaint to avoid a race with region deletion legion.execution_fence()
def main(): global global_var global_procs = legion.Tunable.select(legion.Tunable.GLOBAL_PYS).get() with legion.MustEpochLaunch(): # implicit launch domain for i in range(global_procs): hi(i, point=i) legion.execution_fence(block=True) assert global_var == 4123 global_var = 456 with legion.MustEpochLaunch([global_procs]): for i in range(global_procs): hi(i, point=i) legion.execution_fence(block=True) assert global_var == 4456
def solve(n_runs): n_procs = legion.Tunable.select(legion.Tunable.GLOBAL_PYS).get() print(f"Working with {n_procs} processes\n") # Allocate data structures. n_xpp_events_per_node = 1000 xpp_event_raw_shape = (2, 3, 6) xpp_data = legion.Region.create((n_xpp_events_per_node,) + xpp_event_raw_shape, {'x': legion.uint16}) legion.fill(xpp_data, 'x', 0) xpp_part = legion.Partition.create_equal(xpp_data, [n_procs]) gen_data_shape = (N_POINTS,) * 3 data = legion.Region.create(gen_data_shape, { 'amplitudes': legion.float32, 'support': legion.uint8, # should be bool 'rho': legion.complex64}) legion.fill(data, 'amplitudes', 0.) legion.fill(data, 'support', 0) legion.fill(data, 'rho', 0.) complete = False iteration = 0 fences = [] while not complete or iteration < 10: if not complete: # Obtain the newest copy of the data. with legion.MustEpochLaunch([n_procs]): for idx in range(n_procs): # legion.IndexLaunch([n_procs]): # FIXME: index launch data_collector.fill_data_region(xpp_part[idx], point=idx) # Preprocess data. for idx in range(n_procs): # legion.IndexLaunch([n_procs]): # FIXME: index launch preprocess(xpp_part, data) # Generate data on first run if not iteration: generate_data(data) # Run solver. solve_step(data, 0, iteration) if not complete: # Make sure we don't run more than 2 iterations ahead. fences.append(legion.execution_fence(future=True)) if iteration - 2 >= 0: fences[iteration - 2].get() # Check that all runs have been read. complete = data_collector.get_num_runs_complete() == n_runs iteration += 1
def main(): app = app_create(legion.input_args()) graphs = app_task_graphs(app) if once_only(): c.app_display(app) num_fields = max_fields result, primary, secondary, scratch, p_scratch, dset_max_args = init_partitions( graphs, num_fields) legion.execution_fence(block=True) start_time = legion.c.legion_get_current_time_in_nanos() execute_main_loop(graphs, num_fields, result, primary, secondary, scratch, p_scratch, dset_max_args) legion.execution_fence(block=True) stop_time = legion.c.legion_get_current_time_in_nanos() total_time = (stop_time - start_time) / 1e9 if once_only(): c.app_report_timing(app, total_time)
def main(): print_once('Running stencil.py') conf = parse_args(legion.input_args(True)) nbloated = np.array([conf.nx, conf.ny]) nt = np.array([conf.ntx, conf.nty]) init = conf.init n = nbloated - 2*RADIUS assert np.all(n >= nt), "grid too small" grid = Ispace(n + nt*2*RADIUS) tiles = Ispace(nt) point = Fspace(OrderedDict([ ('input', DTYPE), ('output', DTYPE), ])) points = Region(grid, point) private = make_private_partition(points, tiles, n, nt) interior = make_interior_partition(points, tiles, n, nt) exterior = make_exterior_partition(points, tiles, n, nt) xm = Region([nt[0]*RADIUS, n[1]], point) xp = Region([nt[0]*RADIUS, n[1]], point) ym = Region([n[0], nt[1]*RADIUS], point) yp = Region([n[0], nt[1]*RADIUS], point) pxm_in = make_ghost_x_partition(xm, tiles, n, nt, -1) pxp_in = make_ghost_x_partition(xp, tiles, n, nt, 1) pym_in = make_ghost_y_partition(ym, tiles, n, nt, -1) pyp_in = make_ghost_y_partition(yp, tiles, n, nt, 1) pxm_out = make_ghost_x_partition(xm, tiles, n, nt, 0) pxp_out = make_ghost_x_partition(xp, tiles, n, nt, 0) pym_out = make_ghost_y_partition(ym, tiles, n, nt, 0) pyp_out = make_ghost_y_partition(yp, tiles, n, nt, 0) init = conf.init for r in [points, xm, xp, ym, yp]: for f in ['input', 'output']: legion.fill(r, f, init) tsteps = conf.tsteps + 2 * conf.tprune tprune = conf.tprune trace = Trace() for t in range(tsteps): if t == tprune: legion.execution_fence(block=True) start_time = legion.c.legion_get_current_time_in_nanos() with trace: if _constant_time_launches: index_launch(tiles, stencil, private[ID], interior[ID], pxm_in[ID], pxp_in[ID], pym_in[ID], pyp_in[ID], False) index_launch(tiles, increment, private[ID], exterior[ID], pxm_out[ID], pxp_out[ID], pym_out[ID], pyp_out[ID], False) else: for i in IndexLaunch(tiles): stencil(private[i], interior[i], pxm_in[i], pxp_in[i], pym_in[i], pyp_in[i], False) for i in IndexLaunch(tiles): increment(private[i], exterior[i], pxm_out[i], pxp_out[i], pym_out[i], pyp_out[i], False) if t == tsteps - tprune - 1: legion.execution_fence(block=True) stop_time = legion.c.legion_get_current_time_in_nanos() if _constant_time_launches: index_launch(tiles, check, private[ID], interior[ID], tsteps, init) else: for i in IndexLaunch(tiles): check(private[i], interior[i], tsteps, init) print_once('ELAPSED TIME = %7.3f s' % ((stop_time - start_time)/1e9))
def main(): print_once('Running pennant.py') conf = read_config().get() zone = Fspace( OrderedDict([ ('zxp_x', legion.float64), ('zxp_y', legion.float64), ('zx_x', legion.float64), ('zx_y', legion.float64), ('zareap', legion.float64), ('zarea', legion.float64), ('zvol0', legion.float64), ('zvolp', legion.float64), ('zvol', legion.float64), ('zdl', legion.float64), ('zm', legion.float64), ('zrp', legion.float64), ('zr', legion.float64), ('ze', legion.float64), ('zetot', legion.float64), ('zw', legion.float64), ('zwrate', legion.float64), ('zp', legion.float64), ('zss', legion.float64), ('zdu', legion.float64), ('zuc_x', legion.float64), ('zuc_y', legion.float64), ('z0tmp', legion.float64), ('znump', legion.uint8), ])) point = Fspace( OrderedDict([ ('px0_x', legion.float64), ('px0_y', legion.float64), ('pxp_x', legion.float64), ('pxp_y', legion.float64), ('px_x', legion.float64), ('px_y', legion.float64), ('pu0_x', legion.float64), ('pu0_y', legion.float64), ('pu_x', legion.float64), ('pu_y', legion.float64), ('pap_x', legion.float64), ('pap_y', legion.float64), ('pf_x', legion.float64), ('pf_y', legion.float64), ('pmaswt', legion.float64), ('has_bcx', legion.bool_), ('has_bcy', legion.bool_), ])) side = Fspace( OrderedDict([ ('mapsz', legion.int1d), ('mapsp1', legion.int1d), ('mapsp1_r', legion.uint8), ('mapsp2', legion.int1d), ('mapsp2_r', legion.uint8), ('mapss3', legion.int1d), ('mapss4', legion.int1d), ('sareap', legion.float64), ('sarea', legion.float64), ('svolp', legion.float64), ('svol', legion.float64), ('ssurfp_x', legion.float64), ('ssurfp_y', legion.float64), ('smf', legion.float64), ('sfp_x', legion.float64), ('sfp_y', legion.float64), ('sft_x', legion.float64), ('sft_y', legion.float64), ('sfq_x', legion.float64), ('sfq_y', legion.float64), ('exp_x', legion.float64), ('exp_y', legion.float64), ('ex_x', legion.float64), ('ex_y', legion.float64), ('elen', legion.float64), ('carea', legion.float64), ('cevol', legion.float64), ('cdu', legion.float64), ('cdiv', legion.float64), ('ccos', legion.float64), ('cqe1_x', legion.float64), ('cqe1_y', legion.float64), ('cqe2_x', legion.float64), ('cqe2_y', legion.float64), ])) zones = Region([conf.nz], zone) points = Region([conf.np], point) sides = Region([conf.ns], side) assert conf.seq_init or conf.par_init, 'enable one of sequential or parallel initialization' if conf.seq_init: colorings = read_input_sequential(zones, points, sides, conf).get() assert conf.par_init partitions = read_partitions(zones, points, sides, conf).get() pieces = Ispace([conf.npieces]) zones_part = create_partition(True, zones, partitions.rz_all_p, pieces) points_part = create_partition(True, points, partitions.rp_all_p, [2]) private = points_part[0] ghost = points_part[1] private_part = create_partition(True, private, partitions.rp_all_private_p, pieces) ghost_part = create_partition(False, ghost, partitions.rp_all_ghost_p, pieces) shared_part = create_partition(True, ghost, partitions.rp_all_shared_p, pieces) sides_part = create_partition(True, sides, partitions.rs_all_p, pieces) if conf.par_init: if _constant_time_launches: c = Future(conf, value_type=config) index_launch(pieces, initialize_topology, c, ID, zones_part[ID], private_part[ID], shared_part[ID], ghost_part[ID], sides_part[ID]) else: for i in IndexLaunch(pieces): initialize_topology(conf, i, zones_part[i], private_part[i], shared_part[i], ghost_part[i], sides_part[i]) if _constant_time_launches: index_launch(pieces, init_pointers, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID]) index_launch(pieces, init_mesh_zones, zones_part[ID]) index_launch(pieces, calc_centers_full, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], True) index_launch(pieces, calc_volumes_full, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], True) index_launch(pieces, init_side_fracs, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID]) index_launch(pieces, init_hydro, zones_part[ID], conf.rinit, conf.einit, conf.rinitsub, conf.einitsub, conf.subregion[0], conf.subregion[1], conf.subregion[2], conf.subregion[3]) index_launch(pieces, init_radial_velocity, private_part[ID], conf.uinitradial) index_launch(pieces, init_radial_velocity, shared_part[ID], conf.uinitradial) else: for i in IndexLaunch(pieces): init_pointers(zones_part[i], private_part[i], ghost_part[i], sides_part[i]) for i in IndexLaunch(pieces): init_mesh_zones(zones_part[i]) for i in IndexLaunch(pieces): calc_centers_full(zones_part[i], private_part[i], ghost_part[i], sides_part[i], True) for i in IndexLaunch(pieces): calc_volumes_full(zones_part[i], private_part[i], ghost_part[i], sides_part[i], True) for i in IndexLaunch(pieces): init_side_fracs(zones_part[i], private_part[i], ghost_part[i], sides_part[i]) for i in IndexLaunch(pieces): init_hydro(zones_part[i], conf.rinit, conf.einit, conf.rinitsub, conf.einitsub, conf.subregion[0], conf.subregion[1], conf.subregion[2], conf.subregion[3]) for i in IndexLaunch(pieces): init_radial_velocity(private_part[i], conf.uinitradial) for i in IndexLaunch(pieces): init_radial_velocity(shared_part[i], conf.uinitradial) cycle = 0 cstop = conf.cstop + 2 * conf.prune time = 0.0 dt = Future(conf.dtmax, legion.float64) dthydro = conf.dtmax while cycle < cstop and time < conf.tstop: if cycle == conf.prune: legion.execution_fence(block=True) start_time = legion.c.legion_get_current_time_in_nanos() if _constant_time_launches: index_launch(pieces, init_step_points, private_part[ID], True) index_launch(pieces, init_step_points, shared_part[ID], True) index_launch(pieces, init_step_zones, zones_part[ID], True) dt = calc_global_dt(dt, conf.dtfac, conf.dtinit, conf.dtmax, dthydro, time, conf.tstop, cycle) index_launch(pieces, adv_pos_half, private_part[ID], dt, True) index_launch(pieces, adv_pos_half, shared_part[ID], dt, True) index_launch(pieces, calc_centers, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], True) index_launch(pieces, calc_volumes, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], True) index_launch(pieces, calc_char_len, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], True) index_launch(pieces, calc_rho_half, zones_part[ID], True) index_launch(pieces, sum_point_mass, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], True) index_launch(pieces, calc_state_at_half, zones_part[ID], conf.gamma, conf.ssmin, dt, True) index_launch(pieces, calc_force_pgas_tts, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], conf.alfa, conf.ssmin, True) index_launch(pieces, qcs_zone_center_velocity, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], True) index_launch(pieces, qcs_corner_divergence, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], True) index_launch(pieces, qcs_qcn_force, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], conf.gamma, conf.q1, conf.q2, True) index_launch(pieces, qcs_force, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], True) index_launch(pieces, qcs_vel_diff, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], conf.q1, conf.q2, True) index_launch(pieces, sum_point_force, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], True) index_launch(pieces, apply_boundary_conditions, private_part[ID], True) index_launch(pieces, apply_boundary_conditions, shared_part[ID], True) index_launch(pieces, adv_pos_full, private_part[ID], dt, True) index_launch(pieces, adv_pos_full, shared_part[ID], dt, True) index_launch(pieces, calc_centers_full, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], True) index_launch(pieces, calc_volumes_full, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], True) index_launch(pieces, calc_work, zones_part[ID], private_part[ID], ghost_part[ID], sides_part[ID], dt, True) index_launch(pieces, calc_work_rate_energy_rho_full, zones_part[ID], dt, True) future = index_launch(pieces, calc_dt_hydro, zones_part[ID], dt, conf.dtmax, conf.cfl, conf.cflv, True, reduce='min') dthydro = conf.dtmax dthydro = min_task(dthydro, future) else: for i in IndexLaunch(pieces): init_step_points(private_part[i], True) for i in IndexLaunch(pieces): init_step_points(shared_part[i], True) for i in IndexLaunch(pieces): init_step_zones(zones_part[i], True) dt = calc_global_dt(dt, conf.dtfac, conf.dtinit, conf.dtmax, dthydro, time, conf.tstop, cycle) for i in IndexLaunch(pieces): adv_pos_half(private_part[i], dt, True) for i in IndexLaunch(pieces): adv_pos_half(shared_part[i], dt, True) for i in IndexLaunch(pieces): calc_centers(zones_part[i], private_part[i], ghost_part[i], sides_part[i], True) for i in IndexLaunch(pieces): calc_volumes(zones_part[i], private_part[i], ghost_part[i], sides_part[i], True) for i in IndexLaunch(pieces): calc_char_len(zones_part[i], private_part[i], ghost_part[i], sides_part[i], True) for i in IndexLaunch(pieces): calc_rho_half(zones_part[i], True) for i in IndexLaunch(pieces): sum_point_mass(zones_part[i], private_part[i], ghost_part[i], sides_part[i], True) for i in IndexLaunch(pieces): calc_state_at_half(zones_part[i], conf.gamma, conf.ssmin, dt, True) for i in IndexLaunch(pieces): calc_force_pgas_tts(zones_part[i], private_part[i], ghost_part[i], sides_part[i], conf.alfa, conf.ssmin, True) for i in IndexLaunch(pieces): qcs_zone_center_velocity(zones_part[i], private_part[i], ghost_part[i], sides_part[i], True) for i in IndexLaunch(pieces): qcs_corner_divergence(zones_part[i], private_part[i], ghost_part[i], sides_part[i], True) for i in IndexLaunch(pieces): qcs_qcn_force(zones_part[i], private_part[i], ghost_part[i], sides_part[i], conf.gamma, conf.q1, conf.q2, True) for i in IndexLaunch(pieces): qcs_force(zones_part[i], private_part[i], ghost_part[i], sides_part[i], True) for i in IndexLaunch(pieces): qcs_vel_diff(zones_part[i], private_part[i], ghost_part[i], sides_part[i], conf.q1, conf.q2, True) for i in IndexLaunch(pieces): sum_point_force(zones_part[i], private_part[i], ghost_part[i], sides_part[i], True) for i in IndexLaunch(pieces): apply_boundary_conditions(private_part[i], True) for i in IndexLaunch(pieces): apply_boundary_conditions(shared_part[i], True) for i in IndexLaunch(pieces): adv_pos_full(private_part[i], dt, True) for i in IndexLaunch(pieces): adv_pos_full(shared_part[i], dt, True) for i in IndexLaunch(pieces): calc_centers_full(zones_part[i], private_part[i], ghost_part[i], sides_part[i], True) for i in IndexLaunch(pieces): calc_volumes_full(zones_part[i], private_part[i], ghost_part[i], sides_part[i], True) for i in IndexLaunch(pieces): calc_work(zones_part[i], private_part[i], ghost_part[i], sides_part[i], dt, True) for i in IndexLaunch(pieces): calc_work_rate_energy_rho_full(zones_part[i], dt, True) futures = [] for i in IndexLaunch(pieces): futures.append( calc_dt_hydro(zones_part[i], dt, conf.dtmax, conf.cfl, conf.cflv, True)) dthydro = conf.dtmax dthydro = min(dthydro, *list(map(lambda x: x.get(), futures))) cycle += 1 time += dt.get() if cycle == cstop - conf.prune: legion.execution_fence(block=True) stop_time = legion.c.legion_get_current_time_in_nanos() if conf.seq_init: validate_output_sequential(zones, points, sides, conf) else: print_once("Warning: Skipping sequential validation") print_once("ELAPSED TIME = %7.3f s" % ((stop_time - start_time) / 1e9))
def main(): print_once('Running circuit_sparse.py') conf = parse_args(legion.input_args(True)) assert conf.num_pieces % conf.pieces_per_superpiece == 0, "pieces should be evenly distributed to superpieces" conf.shared_nodes_per_piece = int( math.ceil(conf.nodes_per_piece * conf.pct_shared_nodes / 100.0)) print_once( "circuit settings: loops=%d prune=%d pieces=%d (pieces/superpiece=%d) nodes/piece=%d (nodes/piece=%d) wires/piece=%d pct_in_piece=%d seed=%d" % (conf.num_loops, conf.prune, conf.num_pieces, conf.pieces_per_superpiece, conf.nodes_per_piece, conf.shared_nodes_per_piece, conf.wires_per_piece, conf.pct_wire_in_piece, conf.random_seed)) num_pieces = conf.num_pieces num_superpieces = conf.num_pieces // conf.pieces_per_superpiece num_circuit_nodes = num_pieces * conf.nodes_per_piece num_circuit_wires = num_pieces * conf.wires_per_piece node = Fspace( OrderedDict([ ('node_cap', legion.float32), ('leakage', legion.float32), ('charge', legion.float32), ('node_voltage', legion.float32), ])) wire = Fspace( OrderedDict([ ('in_ptr', legion.int64), ('in_ptr_r', legion.uint8), ('out_ptr', legion.int64), ('out_ptr_r', legion.uint8), ('inductance', legion.float32), ('resistance', legion.float32), ('wire_cap', legion.float32), ] + [('current_%d' % i, legion.float32) for i in range(WIRE_SEGMENTS)] + [('voltage_%d' % i, legion.float32) for i in range(WIRE_SEGMENTS - 1)])) all_nodes = Region([num_circuit_nodes], node) all_wires = Region([num_circuit_wires], wire) node_size = np.dtype(list( map(lambda x: (x[0], x[1].numpy_type), node.field_types.items())), align=True).itemsize wire_size = np.dtype(list( map(lambda x: (x[0], x[1].numpy_type), wire.field_types.items())), align=True).itemsize print_once("Circuit memory usage:") print_once(" Nodes : %10d * %4d bytes = %12d bytes" % (num_circuit_nodes, node_size, num_circuit_nodes * node_size)) print_once(" Wires : %10d * %4d bytes = %12d bytes" % (num_circuit_wires, wire_size, num_circuit_wires * wire_size)) total = ((num_circuit_nodes * node_size) + (num_circuit_wires * wire_size)) print_once(" Total %12d bytes" % total) snpp = conf.shared_nodes_per_piece pnpp = conf.nodes_per_piece - conf.shared_nodes_per_piece pps = conf.pieces_per_superpiece num_shared_nodes = num_pieces * snpp privacy_coloring = Region([2], {'rect': legion.rect1d}) np.copyto(privacy_coloring.rect, np.array([(num_shared_nodes, num_circuit_nodes - 1), (0, num_shared_nodes - 1)], dtype=privacy_coloring.rect.dtype), casting='no') privacy_part = Partition.restrict(privacy_coloring, [2], np.eye(1), [1], disjoint_complete) all_nodes_part = Partition.image(all_nodes, privacy_part, 'rect', [2], disjoint_complete) all_private = all_nodes_part[0] all_shared = all_nodes_part[1] launch_domain = Ispace([num_superpieces]) private_part = Partition.restrict(all_private, launch_domain, np.eye(1) * pnpp * pps, Domain([pnpp * pps], [num_shared_nodes]), disjoint_complete) shared_part = Partition.restrict(all_shared, launch_domain, np.eye(1) * snpp * pps, [snpp * pps], disjoint_complete) wires_part = Partition.equal(all_wires, launch_domain) ghost_ranges = Region([num_superpieces], OrderedDict([('rect', legion.rect1d)])) ghost_ranges_part = Partition.equal(ghost_ranges, launch_domain) if _constant_time_launches: c = Future(conf[0], value_type=Config) index_launch(launch_domain, init_piece, ID, c, ghost_ranges_part[ID], private_part[ID], shared_part[ID], all_shared, wires_part[ID]) else: for i in IndexLaunch(launch_domain): init_piece(i, conf[0], ghost_ranges_part[i], private_part[i], shared_part[i], all_shared, wires_part[i]) ghost_part = Partition.image(all_shared, ghost_ranges_part, 'rect', launch_domain) if _constant_time_launches: index_launch(launch_domain, init_pointers, private_part[ID], shared_part[ID], ghost_part[ID], wires_part[ID]) else: for i in IndexLaunch(launch_domain): init_pointers(private_part[i], shared_part[i], ghost_part[i], wires_part[i]) steps = conf.steps prune = conf.prune num_loops = conf.num_loops + 2 * prune trace = Trace() for j in range(num_loops): if j == prune: legion.execution_fence(block=True) start_time = legion.c.legion_get_current_time_in_nanos() with trace: if _constant_time_launches: index_launch(launch_domain, calculate_new_currents, False, steps, private_part[ID], shared_part[ID], ghost_part[ID], wires_part[ID]) index_launch(launch_domain, distribute_charge, private_part[ID], shared_part[ID], ghost_part[ID], wires_part[ID]) index_launch(launch_domain, update_voltages, False, private_part[ID], shared_part[ID]) else: for i in IndexLaunch(launch_domain): calculate_new_currents(False, steps, private_part[i], shared_part[i], ghost_part[i], wires_part[i]) for i in IndexLaunch(launch_domain): distribute_charge(private_part[i], shared_part[i], ghost_part[i], wires_part[i]) for i in IndexLaunch(launch_domain): update_voltages(False, private_part[i], shared_part[i]) if j == num_loops - prune - 1: legion.execution_fence(block=True) stop_time = legion.c.legion_get_current_time_in_nanos() sim_time = (stop_time - start_time) / 1e9 print_once('ELAPSED TIME = %7.3f s' % sim_time) # Compute the floating point operations per second num_circuit_nodes = conf.num_pieces * conf.nodes_per_piece num_circuit_wires = conf.num_pieces * conf.wires_per_piece # calculate currents operations = num_circuit_wires * (WIRE_SEGMENTS * 6 + (WIRE_SEGMENTS - 1) * 4) * conf.steps # distribute charge operations += (num_circuit_wires * 4) # update voltages operations += (num_circuit_nodes * 4) # multiply by the number of loops operations *= conf.num_loops # Compute the number of gflops gflops = (1e-9 * operations) / sim_time print_once("GFLOPS = %7.3f GFLOPS" % gflops)
def main_task(): assert _ds is not None events = _ds.smd().events() repeat = 'REPEAT' in os.environ and os.environ['REPEAT'] == '1' if repeat: assert _ds.config.limit events = itertools.cycle(events) if _ds.config.limit is not None: events = itertools.islice(events, _ds.config.limit) if _ds.config.predicate is not None: events = itertools.ifilter(_ds.config.predicate, events) eager = 'EAGER' in os.environ and os.environ['EAGER'] == '1' if eager: start = legion.c.legion_get_current_time_in_micros() events = list(events) stop = legion.c.legion_get_current_time_in_micros() print('Enumerating: Elapsed time: %e seconds' % ((stop - start) / 1e6)) print('Enumerating: Number of events: %s' % len(events)) print('Enumerating: Events per second: %e' % (len(events) / ((stop - start) / 1e6))) randomize = 'RANDOMIZE' in os.environ and os.environ['RANDOMIZE'] == '1' print('Randomize?', randomize) if randomize: assert eager random.seed(123456789) # Don't actually want this to be random random.shuffle(events) # Number of events per task chunksize = int( os.environ['CHUNKSIZE']) if 'CHUNKSIZE' in os.environ else 8 # Number of tasks per processor per launch overcommit = int( os.environ['OVERCOMMIT']) if 'OVERCOMMIT' in os.environ else 1 # Number of Python processors global_procs = legion.Tunable.select(legion.Tunable.GLOBAL_PYS).get() local_procs = legion.Tunable.select(legion.Tunable.LOCAL_PYS).get() # Number of tasks per launch launchsize = (max(global_procs - local_procs, local_procs)) * overcommit print('Chunk size %s' % chunksize) print('Launch size %s' % launchsize) start = legion.c.legion_get_current_time_in_micros() # Group events by calib cycle so that different cycles don't mix events = itertools.groupby( events, lambda e: e.get(psana.EventOffset).lastBeginCalibCycleDgram()) if _ds.small_data is not None: # create HDF5 output file hdf5 = legion_HDF5.LegionHDF5(_ds.small_data.filepath) nevents = 0 nlaunch = 0 ncalib = 0 file_buffer = [] file_buffer_length = 0 for calib, calib_events in events: calib = legion.Future(calib) for launch_events in chunk(chunk(calib_events, chunksize), launchsize): if nlaunch % 20 == 0: print('Processing event %s' % nevents) sys.stdout.flush() dictsBuffer = [] for idx in legion.IndexLaunch([len(launch_events)]): dicts = analyze_chunk(map(Location, launch_events[idx]), calib) dictsBuffer.append(dicts) nevents += len(launch_events[idx]) nlaunch += 1 if _ds.small_data is not None: for dicts in dictsBuffer: d = dicts.get() file_buffer = file_buffer + d file_buffer_length = file_buffer_length + len(d) if file_buffer_length >= _ds.small_data.gather_interval: hdf5.append_to_file(file_buffer) file_buffer = [] file_buffer_length = 0 ncalib += 1 legion.execution_fence(block=True) stop = legion.c.legion_get_current_time_in_micros() if _ds.config.teardown is not None: # FIXME: Should be a must-epoch launch for idx in legion.IndexLaunch([global_procs]): teardown() print('Elapsed time: %e seconds' % ((stop - start) / 1e6)) print('Number of calib cycles: %s' % ncalib) print('Number of launches: %s' % nlaunch) print('Number of events: %s' % nevents) print('Events per second: %e' % (nevents / ((stop - start) / 1e6))) # Hack: Estimate bandwidth used # total_events = 75522 * repeat # total_size = 875 * repeat # GB # fraction_events = float(nevents)/total_events # bw = fraction_events * total_size / ((stop - start)/1e6) # print('Estimated bandwidth used: %e GB/s' % bw) print('End of run') sys.stdout.flush()
def main(): print_once('Running pennant_fast.py') conf = read_config().get() zone = Fspace( OrderedDict([ ('zxp_x', legion.float64), ('zxp_y', legion.float64), ('zx_x', legion.float64), ('zx_y', legion.float64), ('zareap', legion.float64), ('zarea', legion.float64), ('zvol0', legion.float64), ('zvolp', legion.float64), ('zvol', legion.float64), ('zdl', legion.float64), ('zm', legion.float64), ('zrp', legion.float64), ('zr', legion.float64), ('ze', legion.float64), ('zetot', legion.float64), ('zw', legion.float64), ('zwrate', legion.float64), ('zp', legion.float64), ('zss', legion.float64), ('zdu', legion.float64), ('zuc_x', legion.float64), ('zuc_y', legion.float64), ('z0tmp', legion.float64), ('znump', legion.uint8), ])) point = Fspace( OrderedDict([ ('px0_x', legion.float64), ('px0_y', legion.float64), ('pxp_x', legion.float64), ('pxp_y', legion.float64), ('px_x', legion.float64), ('px_y', legion.float64), ('pu0_x', legion.float64), ('pu0_y', legion.float64), ('pu_x', legion.float64), ('pu_y', legion.float64), ('pap_x', legion.float64), ('pap_y', legion.float64), ('pf_x', legion.float64), ('pf_y', legion.float64), ('pmaswt', legion.float64), ('has_bcx', legion.bool_), ('has_bcy', legion.bool_), ])) side = Fspace( OrderedDict([ ('mapsz', legion.int1d), ('mapsp1', legion.int1d), ('mapsp1_r', legion.uint8), ('mapsp2', legion.int1d), ('mapsp2_r', legion.uint8), ('mapss3', legion.int1d), ('mapss4', legion.int1d), ('sareap', legion.float64), ('sarea', legion.float64), ('svolp', legion.float64), ('svol', legion.float64), ('ssurfp_x', legion.float64), ('ssurfp_y', legion.float64), ('smf', legion.float64), ('sfp_x', legion.float64), ('sfp_y', legion.float64), ('sft_x', legion.float64), ('sft_y', legion.float64), ('sfq_x', legion.float64), ('sfq_y', legion.float64), ('exp_x', legion.float64), ('exp_y', legion.float64), ('ex_x', legion.float64), ('ex_y', legion.float64), ('elen', legion.float64), ('carea', legion.float64), ('cevol', legion.float64), ('cdu', legion.float64), ('cdiv', legion.float64), ('ccos', legion.float64), ('cqe1_x', legion.float64), ('cqe1_y', legion.float64), ('cqe2_x', legion.float64), ('cqe2_y', legion.float64), ])) span = Fspace( OrderedDict([ ('start', legion.int64), ('stop', legion.int64), ('internal', legion.bool_), ])) zones = Region([conf.nz], zone) points = Region([conf.np], point) sides = Region([conf.ns], side) assert conf.par_init, 'parallel initialization required' old_seq_init = conf.seq_init if conf.seq_init: print('Warning: Sequential initialization not supported, skipping') # Since we aren't actually doing sequential intialization, we # have to turn this off or the verification in parallel # initialization will fail. conf.seq_init = False assert conf.par_init partitions = read_partitions(zones, points, sides, conf).get() conf.nspans_zones = partitions.nspans_zones conf.nspans_points = partitions.nspans_points pieces = Ispace([conf.npieces]) zones_part = create_partition(True, zones, partitions.rz_all_p, pieces) points_part = create_partition(True, points, partitions.rp_all_p, [2]) private = points_part[0] ghost = points_part[1] private_part = create_partition(True, private, partitions.rp_all_private_p, pieces) ghost_part = create_partition(False, ghost, partitions.rp_all_ghost_p, pieces) shared_part = create_partition(True, ghost, partitions.rp_all_shared_p, pieces) sides_part = create_partition(True, sides, partitions.rs_all_p, pieces) zone_spans = Region([conf.npieces * conf.nspans_zones], span) zone_spans_part = Partition.equal(zone_spans, pieces) private_spans = Region([conf.npieces * conf.nspans_points], span) private_spans_part = Partition.equal(private_spans, pieces) shared_spans = Region([conf.npieces * conf.nspans_points], span) shared_spans_part = Partition.equal(shared_spans, pieces) side_spans = Region([conf.npieces * conf.nspans_zones], span) side_spans_part = Partition.equal(side_spans, pieces) for region in [zone_spans, private_spans, shared_spans, side_spans]: for field in ['start', 'stop']: legion.fill(region, field, 0) if old_seq_init: # FIXME: These fields are actually never used, fill them here # just to avoid validation errors later. legion.fill(points, 'pap_x', 0) legion.fill(points, 'pap_y', 0) legion.fill(sides, 'svolp', 0) legion.fill(sides, 'svol', 0) legion.fill(sides, 'ssurfp_x', 0) legion.fill(sides, 'ssurfp_y', 0) if conf.par_init: for i in IndexLaunch(pieces): initialize_topology(conf, int(i), zones_part[i], private_part[i], shared_part[i], ghost_part[i], sides_part[i]) for i in IndexLaunch(pieces): initialize_spans(conf, int(i), zone_spans_part[i], private_spans_part[i], shared_spans_part[i], side_spans_part[i]) for i in IndexLaunch(pieces): init_pointers(zones_part[i], private_part[i], ghost_part[i], sides_part[i], side_spans_part[i]) for i in IndexLaunch(pieces): init_mesh_zones(zones_part[i], zone_spans_part[i]) for i in IndexLaunch(pieces): calc_centers_full(zones_part[i], private_part[i], ghost_part[i], sides_part[i], side_spans_part[i], True) for i in IndexLaunch(pieces): calc_volumes_full(zones_part[i], private_part[i], ghost_part[i], sides_part[i], side_spans_part[i], True) for i in IndexLaunch(pieces): init_side_fracs(zones_part[i], private_part[i], ghost_part[i], sides_part[i], side_spans_part[i]) for i in IndexLaunch(pieces): init_hydro(zones_part[i], zone_spans_part[i], conf.rinit, conf.einit, conf.rinitsub, conf.einitsub, conf.subregion[0], conf.subregion[1], conf.subregion[2], conf.subregion[3]) for i in IndexLaunch(pieces): init_radial_velocity(private_part[i], private_spans_part[i], conf.uinitradial) for i in IndexLaunch(pieces): init_radial_velocity(shared_part[i], shared_spans_part[i], conf.uinitradial) cycle = 0 cstop = conf.cstop + 2 * conf.prune time = 0.0 dt = Future(conf.dtmax, legion.float64) dthydro = conf.dtmax while cycle < cstop and time < conf.tstop: if cycle == conf.prune: legion.execution_fence(block=True) start_time = legion.c.legion_get_current_time_in_nanos() dt = calc_global_dt(dt, conf.dtfac, conf.dtinit, conf.dtmax, dthydro, time, conf.tstop, cycle) for i in IndexLaunch(pieces): adv_pos_half(private_part[i], private_spans_part[i], dt, True, False) for i in IndexLaunch(pieces): adv_pos_half(shared_part[i], shared_spans_part[i], dt, True, False) for i in IndexLaunch(pieces): calc_everything(zones_part[i], private_part[i], ghost_part[i], sides_part[i], zone_spans_part[i], side_spans_part[i], conf.alfa, conf.gamma, conf.ssmin, dt, conf.q1, conf.q2, True) for i in IndexLaunch(pieces): adv_pos_full(private_part[i], private_spans_part[i], dt, True) for i in IndexLaunch(pieces): adv_pos_full(shared_part[i], shared_spans_part[i], dt, True) for i in IndexLaunch(pieces): calc_everything_full(zones_part[i], private_part[i], ghost_part[i], sides_part[i], zone_spans_part[i], side_spans_part[i], dt, True) futures = [] for i in IndexLaunch(pieces): futures.append( calc_dt_hydro(zones_part[i], zone_spans_part[i], dt, conf.dtmax, conf.cfl, conf.cflv, True, False)) dthydro = conf.dtmax dthydro = min(dthydro, *list(map(lambda x: x.get(), futures))) cycle += 1 time += dt.get() if cycle == conf.cstop - conf.prune: legion.execution_fence(block=True) stop_time = legion.c.legion_get_current_time_in_nanos() if old_seq_init: validate_output_sequential(zones, points, sides, conf) else: print_once("Warning: Skipping sequential validation") print_once("ELAPSED TIME = %7.3f s" % ((stop_time - start_time) / 1e9))
def solve(n_runs): n_procs = Tunable.select(Tunable.GLOBAL_PYS).get() print(f"Working with {n_procs} processes\n") # Allocate data structures. n_events_per_node = 100 event_raw_shape = (4, 512, 512) images = Region( (n_events_per_node * n_procs,) + event_raw_shape, {'image': legion.float64}) orientations = Region( (n_events_per_node * n_procs, 4), {'orientation': legion.float32}) active = Region((n_procs,), {'active': legion.uint32}) legion.fill(images, 'image', 0) legion.fill(orientations, 'orientation', 0) legion.fill(active, 'active', 0) images_part = Partition.restrict( images, [n_procs], numpy.eye(4, 1) * n_events_per_node, (n_events_per_node,) + event_raw_shape) orient_part = Partition.restrict( orientations, [n_procs], numpy.eye(2, 1) * n_events_per_node, (n_events_per_node, 4)) active_part = Partition.restrict( active, [n_procs], numpy.eye(1, 1), (1,)) volume_shape = (N_POINTS,) * 3 diffraction = Region(volume_shape, { 'accumulator': legion.float32, 'weight': legion.float32}) legion.fill(diffraction, 'accumulator', 0.) legion.fill(diffraction, 'weight', 0.) n_reconstructions = 4 reconstructions = [] for i in range(n_reconstructions): reconstruction = Region(volume_shape, { 'support': legion.bool_, 'rho': legion.complex64}) legion.fill(reconstruction, 'support', False) legion.fill(reconstruction, 'rho', 0.) reconstructions.append(reconstruction) # Load pixel momentum pixels = Region(event_raw_shape + (3,), {'momentum': legion.float64}) legion.fill(pixels, 'momentum', 0.) max_pixel_dist = load_pixels(pixels).get() voxel_length = 2 * max_pixel_dist / (N_POINTS - 1) images_per_solve = n_events_per_node iterations_ahead = 2 complete = False iteration = 0 fences = [] n_events_ready = [] while not complete or iteration < 50: if not complete: # Obtain the newest copy of the data. with MustEpochLaunch([n_procs]): index_launch( [n_procs], data_collector.fill_data_region, images_part[ID], orient_part[ID], active_part[ID], images_per_solve) # Preprocess data. index_launch( [n_procs], preprocess, images_part[ID], orient_part[ID], active_part[ID], pixels, diffraction, voxel_length) # Run solver. assert n_reconstructions == 4 hio_loop = 100 er_loop = hio_loop // 2 solve_step(diffraction, reconstructions[0], 0, iteration, hio_loop, .1, er_loop, .14) solve_step(diffraction, reconstructions[1], 1, iteration, hio_loop, .05, er_loop, .14) solve_step(diffraction, reconstructions[2], 2, iteration, hio_loop, .1, er_loop, .16) solve_step(diffraction, reconstructions[3], 3, iteration, hio_loop, .05, er_loop, .16) if not complete: # Make sure we don't run more than N iterations ahead. fences.append(legion.execution_fence(future=True)) if iteration - iterations_ahead >= 0: fences[iteration - iterations_ahead].get() # Check that all runs have been read and that all events have been consumed. if data_collector.get_num_runs_complete() == n_runs: n_events_ready.append(index_launch([n_procs], data_collector.get_num_events_ready, active_part[ID], reduce='+')) if iteration - iterations_ahead >= 0: ready = n_events_ready[iteration - iterations_ahead].get() print(f'All runs complete, {ready} events remaining', flush=True) complete = ready == 0 iteration += 1 ##### -------------------------------------------------------------- ##### # for idx in range(n_procs): # save_images(images_part[idx], idx, point=idx) for i in range(n_reconstructions): save_rho(reconstructions[i], i) save_diffraction(diffraction, 0)