def main(): # Create a 2D index space of size 4x4. I = legion.Ispace.create([4, 4]) # Create a field space with a single field x of type float64. F = legion.Fspace.create({'x': legion.float64}) # Create a region from I and F. R = legion.Region.create(I, F) # This could have also been done with the following shortand, and # Legion will automatically create an index space and field space. R2 = legion.Region.create([4, 4], {'x': legion.float64}) # Fill the field x of region R with an initial value. legion.fill(R, 'x', 101) # Launch two tasks. The second task will depend on the first, # since they both write R. init(R) child_result = inc(R, 1) # Note: when a task runs, it returns a future. To get the value of # the future, you have to block. However, in idiomatic Legion code # it would be more common to pass the future to another task # (without blocking). print("child task returned", child_result) print("child task future contains", child_result.get()) print("main_task done")
def main(): futures = [] for i in IndexLaunch(10): futures.append(hi(i)) for i, future in enumerate(futures): print("got %s" % future.get()) assert int(future.get()) == i # Same in 2 dimensions. futures = [] for point in IndexLaunch([3, 3]): futures.append(hi(point)) for i, point in enumerate(Domain([3, 3])): assert futures[i].get() == point R = Region([4, 4], {'x': legion.float64}) P = Partition.equal(R, [2, 2]) legion.fill(R, 'x', 0) for i in IndexLaunch([2, 2]): hello(R, i) for i in IndexLaunch([2, 2]): hello(P[i], i) # Again, with a more explicit syntax. # ID is the name of the (implicit) loop variable. futures = index_launch([3, 3], hi, ID) for point in Domain([3, 3]): assert futures[point].get() == point index_launch([2, 2], hello, R, ID) index_launch([2, 2], hello, P[ID], ID)
def main(): R = legion.Region( [10], { 'b': legion.bool_, 'c64': legion.complex64, 'c128': legion.complex128, 'f32': legion.float32, 'f64': legion.float64, 'i8': legion.int8, 'i16': legion.int16, 'i32': legion.int32, 'i64': legion.int64, 'u8': legion.uint8, 'u16': legion.uint16, 'u32': legion.uint32, 'u64': legion.uint64, }) do_local_fills(R) legion.fill(R, 'c64', 5 + 6j) print('value of R.c64[0] after remote fill %s' % R.c64[1]) x = complex_plus_one(3 + 4j) print(x.get())
def make_region(): # If you return a region from a task, the privileges to the region # will be automatically given to the calling task. R = legion.Region.create([4, 4], {'x': legion.float64}) legion.fill(R, 'x', 0) print('returning from make_region with', R) return R
def main(): R = Region([4, 4], {'x': legion.float64}) legion.fill(R, 'x', 0) # Create a partition of R. P = Partition.equal(R, [2, 2]) # Same as above, broken explicitly into two steps. IP2 = Ipartition.equal(R.ispace, [2, 2]) P2 = Partition(R, IP2) assert P.color_space.volume == 4 # Grab a subregion of P. R00 = P[0, 0] print('Parent region has volume %s' % R.ispace.volume) assert R.ispace.volume == 16 assert check_subregion(R00).get() == 4 # Partition the subregion again. P00 = Partition.equal(R00, [2, 2]) total_volume = 0 for x in range(2): for y in range(2): R00xy = P00[x, y] total_volume += check_subregion(R00xy).get() assert total_volume == 4 # An easy way to iterate subregions: for Rij in P: assert Rij.ispace.volume == 4
def main(): R = Region([4, 4], {'x': legion.float64}) legion.fill(R, 'x', 0) # Create a partition of R. colors = [2, 2] transform = [[2, 0], [0, 2]] extent = [2, 2] P = Partition.restrict(R, colors, transform, extent) # Again, with different parameters. colors2 = [3] transform2 = [[1], [2]] extent2 = Domain([2, 2], [-1, -1]) P2 = Partition.restrict(R, colors2, transform2, extent2) assert P.color_space.volume == 4 assert P2.color_space.volume == 3 # Grab a subregion of P. R00 = P[0, 0] print('Parent region has volume %s' % R.ispace.volume) assert R.ispace.volume == 16 assert check_subregion(R00).get() == 4 for Rij in P: assert check_subregion(Rij).get() == 4 assert check_subregion(P2[0]).get() == 1 assert check_subregion(P2[1]).get() == 4 assert check_subregion(P2[2]).get() == 2
def make_region_dict(): # It should also work if the region in question is returned as # part of a larger data structure. R = legion.Region.create([4, 4], {'x': legion.float64}) legion.fill(R, 'x', 0) result = {'asdf': R} print('returning from make_region_dict with', result) return result
def main(): R = Region([4], fspace) for field_name in R.keys(): legion.fill(R, field_name, 0) inc(R, 20) check(R, 20) mul(R, 3) check_except_c128(R, 60)
def main(): R = Region([4, 4], {'x': legion.float64, 'y': legion.float64}) legion.fill(R, 'x', 101) legion.fill(R, 'y', 102) init_x(R) init_y(R) inc(R, 1000) saxpy(R, 2) check(R)
def main(): R = Region([4, 4], {'x': legion.float64, 'y': legion.int32}) legion.fill(R, 'x', 1.25) legion.fill(R, 'y', 2) inc(R, 20) print(R.x) print(R.y) assert R.x[0, 0] == 21.25 assert R.y[0, 0] == 22
def main(): conf = parse_args(legion.input_args(True)) nbloated = np.array([conf.nx, conf.ny]) nt = np.array([conf.ntx, conf.nty]) init = conf.init n = nbloated - 2 * RADIUS assert np.all(n >= nt), "grid too small" grid = Ispace.create(n + nt * 2 * RADIUS) tiles = Ispace.create(nt) point = Fspace.create(OrderedDict([ ('input', DTYPE), ('output', DTYPE), ])) points = Region.create(grid, point) private = make_private_partition(points, tiles, n, nt) interior = make_interior_partition(points, tiles, n, nt) exterior = make_exterior_partition(points, tiles, n, nt) xm = Region.create([nt[0] * RADIUS, n[1]], point) xp = Region.create([nt[0] * RADIUS, n[1]], point) ym = Region.create([n[0], nt[1] * RADIUS], point) yp = Region.create([n[0], nt[1] * RADIUS], point) pxm_in = make_ghost_x_partition(xm, tiles, n, nt, -1) pxp_in = make_ghost_x_partition(xp, tiles, n, nt, 1) pym_in = make_ghost_y_partition(ym, tiles, n, nt, -1) pyp_in = make_ghost_y_partition(yp, tiles, n, nt, 1) pxm_out = make_ghost_x_partition(xm, tiles, n, nt, 0) pxp_out = make_ghost_x_partition(xp, tiles, n, nt, 0) pym_out = make_ghost_y_partition(ym, tiles, n, nt, 0) pyp_out = make_ghost_y_partition(yp, tiles, n, nt, 0) init = conf.init for r in [points, xm, xp, ym, yp]: for f in ['input', 'output']: legion.fill(r, f, init) tsteps = conf.tsteps tprune = conf.tprune for t in range(tsteps): for i in IndexLaunch(tiles): stencil(private[i], interior[i], pxm_in[i], pxp_in[i], pym_in[i], pyp_in[i], t == tprune) for i in IndexLaunch(tiles): increment(private[i], exterior[i], pxm_out[i], pxp_out[i], pym_out[i], pyp_out[i], t == tsteps - tprune - 1) for i in IndexLaunch(tiles): check(private[i], interior[i], tsteps, init)
def main(): R = legion.Region([4, 4], {'x': legion.float64}) legion.fill(R, 'x', 0) P = legion.Partition.equal(R, [2, 2]) hello_subregion(P[0, 0]) # this should work try: hello_subregion(P) # this should fail except TypeError: print('Test passed') else: assert False, 'Test failed'
def main(): R = legion.Region([4], {'x': legion.float64}) P = legion.Partition.equal(R, [4]) legion.fill(R, 'x', 0) hello2(P[0], 0) for i in legion.IndexLaunch([4]): hello2(P[i], i) legion.index_launch([4], hello2, P[ID], ID) # FIXME: This is needed in nopaint to avoid a race with region deletion legion.execution_fence()
def init_partitions(graphs, num_fields): result = [] primary = [] secondary = [] scratch = [] p_scratch = [] dset_max_args = [] fspace = Fspace(dict((str(x), legion.uint8) for x in range(num_fields))) for graph in graphs: colors = Ispace([graph.max_width]) result.append( Region([graph.max_width * graph.output_bytes_per_task], fspace)) primary.append(Partition.equal(result[-1], colors)) for field in fspace.keys(): legion.fill(result[-1], field, 0) num_dsets = c.task_graph_max_dependence_sets(graph) secondary.append([]) dset_max_args.append([]) for dset in range(num_dsets): secondary[-1].append([]) num_args = 0 for arg in range(max_args): secondary[-1][-1].append(Partition.pending(result[-1], colors)) for point in range(graph.max_width): deps = list(task_graph_dependencies(graph, dset, point)) num_args = max(num_args, len(deps)) secondary[-1][-1][-1].union( [point], [primary[-1][deps[arg]]] if arg < len(deps) else []) dset_max_args[-1].append(num_args) if graph.scratch_bytes_per_task > 0: scratch.append( Region([graph.max_width * graph.scratch_bytes_per_task], fspace)) p_scratch.append(Partition.equal(scratch[-1], colors)) index_launch(colors, init_scratch_task, p_scratch[-1][ID]) else: scratch.append(None) p_scratch.append(None) return result, primary, secondary, scratch, p_scratch, dset_max_args
def main(): R = Region([4, 4], {'x': legion.float64}) P = Partition.equal(R, [2, 2]) legion.fill(R, 'x', 0) trace1 = Trace() for t in range(5): with trace1: for i in IndexLaunch([2, 2]): look(R, i) for i in IndexLaunch([2, 2]): incr(P[i], i) trace2 = Trace() for t in range(5): with trace2: index_launch([2, 2], look, R, ID) index_launch([2, 2], incr, P[ID], ID)
def main(): R = legion.Region.create([4, 4], {'x': legion.float64}) P = legion.Partition.create_equal(R, [2, 2]) legion.fill(R, 'x', 0) trace1 = legion.Trace() for t in range(5): with trace1: for i in legion.IndexLaunch([2, 2]): look(R, i) for i in legion.IndexLaunch([2, 2]): incr(P[i], i) trace2 = legion.Trace() for t in range(5): with trace2: legion.index_launch([2, 2], look, R, ID) legion.index_launch([2, 2], incr, P[ID], ID)
def solve(n_runs): n_procs = legion.Tunable.select(legion.Tunable.GLOBAL_PYS).get() print(f"Working with {n_procs} processes\n") # Allocate data structures. n_xpp_events_per_node = 1000 xpp_event_raw_shape = (2, 3, 6) xpp_data = legion.Region.create((n_xpp_events_per_node,) + xpp_event_raw_shape, {'x': legion.uint16}) legion.fill(xpp_data, 'x', 0) xpp_part = legion.Partition.create_equal(xpp_data, [n_procs]) gen_data_shape = (N_POINTS,) * 3 data = legion.Region.create(gen_data_shape, { 'amplitudes': legion.float32, 'support': legion.uint8, # should be bool 'rho': legion.complex64}) legion.fill(data, 'amplitudes', 0.) legion.fill(data, 'support', 0) legion.fill(data, 'rho', 0.) complete = False iteration = 0 fences = [] while not complete or iteration < 10: if not complete: # Obtain the newest copy of the data. with legion.MustEpochLaunch([n_procs]): for idx in range(n_procs): # legion.IndexLaunch([n_procs]): # FIXME: index launch data_collector.fill_data_region(xpp_part[idx], point=idx) # Preprocess data. for idx in range(n_procs): # legion.IndexLaunch([n_procs]): # FIXME: index launch preprocess(xpp_part, data) # Generate data on first run if not iteration: generate_data(data) # Run solver. solve_step(data, 0, iteration) if not complete: # Make sure we don't run more than 2 iterations ahead. fences.append(legion.execution_fence(future=True)) if iteration - 2 >= 0: fences[iteration - 2].get() # Check that all runs have been read. complete = data_collector.get_num_runs_complete() == n_runs iteration += 1
def main(): print('hello from Python') x = hello(1234, 3.14) print('Python got result from Regent task: %s' % x.get()) print('creating a field space with two fields') # Note: Need to use OrderedDict so that the field ordering matches Regent. fs = Fspace(OrderedDict([('x', legion.float64), ('y', legion.float64)])) print('creating a region with 12 elements') r = Region([12], fs) legion.fill(r, 'x', 1) legion.fill(r, 'y', 2) a = 1.5 print('calling SAXPY task in Regent') saxpy(r, a) check(r)
def main(): R = legion.Region.create( [10], { 'b': legion.bool_, 'c64': legion.complex64, 'c128': legion.complex128, 'f32': legion.float32, 'f64': legion.float64, 'i8': legion.int8, 'i16': legion.int16, 'i32': legion.int32, 'i64': legion.int64, 'u8': legion.uint8, 'u16': legion.uint16, 'u32': legion.uint32, 'u64': legion.uint64, }) R.b.fill(False) R.c64.fill(1+2j) R.c128.fill(3+4j) R.f32.fill(3.45) R.f64.fill(6.78) R.i8.fill(-1) R.i16.fill(-123) R.i32.fill(-123456) R.i64.fill(-123456789) R.u8.fill(1) R.u16.fill(123) R.u32.fill(123456) R.u64.fill(123456789) print('value of R.c64[0] after local fill %s' % R.c64[0]) legion.fill(R, 'c64', 5+6j) print('value of R.c64[0] after remote fill %s' % R.c64[1]) x = complex_plus_one(3+4j) print(x.get())
def main(): R = Region([4, 4], { 'x': legion.int32, 'y': legion.int32, 'z': legion.int32, 'w': legion.int32 }) legion.fill(R, 'x', 1) legion.fill(R, 'y', 20) legion.fill(R, ['z', 'w'], 100) legion.copy(R, ['x', 'y'], R, ['z', 'w'], redop='+') legion.copy(R, 'x', R, 'y', redop='+') legion.copy(R, 'y', R, 'x') assert R.x[0, 0] == 21 assert R.y[0, 0] == 21 assert R.z[0, 0] == 101 assert R.w[0, 0] == 120
def main(): R = legion.Region([0, 0], {'x': legion.float64}) legion.fill(R, 'x', 3.14) hello(R)
def solve(n_runs): n_procs = Tunable.select(Tunable.GLOBAL_PYS).get() print(f"Working with {n_procs} processes\n") # Allocate data structures. n_events_per_node = 100 event_raw_shape = (4, 512, 512) images = Region( (n_events_per_node * n_procs,) + event_raw_shape, {'image': legion.float64}) orientations = Region( (n_events_per_node * n_procs, 4), {'orientation': legion.float32}) active = Region((n_procs,), {'active': legion.uint32}) legion.fill(images, 'image', 0) legion.fill(orientations, 'orientation', 0) legion.fill(active, 'active', 0) images_part = Partition.restrict( images, [n_procs], numpy.eye(4, 1) * n_events_per_node, (n_events_per_node,) + event_raw_shape) orient_part = Partition.restrict( orientations, [n_procs], numpy.eye(2, 1) * n_events_per_node, (n_events_per_node, 4)) active_part = Partition.restrict( active, [n_procs], numpy.eye(1, 1), (1,)) volume_shape = (N_POINTS,) * 3 diffraction = Region(volume_shape, { 'accumulator': legion.float32, 'weight': legion.float32}) legion.fill(diffraction, 'accumulator', 0.) legion.fill(diffraction, 'weight', 0.) n_reconstructions = 4 reconstructions = [] for i in range(n_reconstructions): reconstruction = Region(volume_shape, { 'support': legion.bool_, 'rho': legion.complex64}) legion.fill(reconstruction, 'support', False) legion.fill(reconstruction, 'rho', 0.) reconstructions.append(reconstruction) # Load pixel momentum pixels = Region(event_raw_shape + (3,), {'momentum': legion.float64}) legion.fill(pixels, 'momentum', 0.) max_pixel_dist = load_pixels(pixels).get() voxel_length = 2 * max_pixel_dist / (N_POINTS - 1) images_per_solve = n_events_per_node iterations_ahead = 2 complete = False iteration = 0 fences = [] n_events_ready = [] while not complete or iteration < 50: if not complete: # Obtain the newest copy of the data. with MustEpochLaunch([n_procs]): index_launch( [n_procs], data_collector.fill_data_region, images_part[ID], orient_part[ID], active_part[ID], images_per_solve) # Preprocess data. index_launch( [n_procs], preprocess, images_part[ID], orient_part[ID], active_part[ID], pixels, diffraction, voxel_length) # Run solver. assert n_reconstructions == 4 hio_loop = 100 er_loop = hio_loop // 2 solve_step(diffraction, reconstructions[0], 0, iteration, hio_loop, .1, er_loop, .14) solve_step(diffraction, reconstructions[1], 1, iteration, hio_loop, .05, er_loop, .14) solve_step(diffraction, reconstructions[2], 2, iteration, hio_loop, .1, er_loop, .16) solve_step(diffraction, reconstructions[3], 3, iteration, hio_loop, .05, er_loop, .16) if not complete: # Make sure we don't run more than N iterations ahead. fences.append(legion.execution_fence(future=True)) if iteration - iterations_ahead >= 0: fences[iteration - iterations_ahead].get() # Check that all runs have been read and that all events have been consumed. if data_collector.get_num_runs_complete() == n_runs: n_events_ready.append(index_launch([n_procs], data_collector.get_num_events_ready, active_part[ID], reduce='+')) if iteration - iterations_ahead >= 0: ready = n_events_ready[iteration - iterations_ahead].get() print(f'All runs complete, {ready} events remaining', flush=True) complete = ready == 0 iteration += 1 ##### -------------------------------------------------------------- ##### # for idx in range(n_procs): # save_images(images_part[idx], idx, point=idx) for i in range(n_reconstructions): save_rho(reconstructions[i], i) save_diffraction(diffraction, 0)
def main(): print_once('Running stencil.py') conf = parse_args(legion.input_args(True)) nbloated = np.array([conf.nx, conf.ny]) nt = np.array([conf.ntx, conf.nty]) init = conf.init n = nbloated - 2*RADIUS assert np.all(n >= nt), "grid too small" grid = Ispace(n + nt*2*RADIUS) tiles = Ispace(nt) point = Fspace(OrderedDict([ ('input', DTYPE), ('output', DTYPE), ])) points = Region(grid, point) private = make_private_partition(points, tiles, n, nt) interior = make_interior_partition(points, tiles, n, nt) exterior = make_exterior_partition(points, tiles, n, nt) xm = Region([nt[0]*RADIUS, n[1]], point) xp = Region([nt[0]*RADIUS, n[1]], point) ym = Region([n[0], nt[1]*RADIUS], point) yp = Region([n[0], nt[1]*RADIUS], point) pxm_in = make_ghost_x_partition(xm, tiles, n, nt, -1) pxp_in = make_ghost_x_partition(xp, tiles, n, nt, 1) pym_in = make_ghost_y_partition(ym, tiles, n, nt, -1) pyp_in = make_ghost_y_partition(yp, tiles, n, nt, 1) pxm_out = make_ghost_x_partition(xm, tiles, n, nt, 0) pxp_out = make_ghost_x_partition(xp, tiles, n, nt, 0) pym_out = make_ghost_y_partition(ym, tiles, n, nt, 0) pyp_out = make_ghost_y_partition(yp, tiles, n, nt, 0) init = conf.init for r in [points, xm, xp, ym, yp]: for f in ['input', 'output']: legion.fill(r, f, init) tsteps = conf.tsteps + 2 * conf.tprune tprune = conf.tprune trace = Trace() for t in range(tsteps): if t == tprune: legion.execution_fence(block=True) start_time = legion.c.legion_get_current_time_in_nanos() with trace: if _constant_time_launches: index_launch(tiles, stencil, private[ID], interior[ID], pxm_in[ID], pxp_in[ID], pym_in[ID], pyp_in[ID], False) index_launch(tiles, increment, private[ID], exterior[ID], pxm_out[ID], pxp_out[ID], pym_out[ID], pyp_out[ID], False) else: for i in IndexLaunch(tiles): stencil(private[i], interior[i], pxm_in[i], pxp_in[i], pym_in[i], pyp_in[i], False) for i in IndexLaunch(tiles): increment(private[i], exterior[i], pxm_out[i], pxp_out[i], pym_out[i], pyp_out[i], False) if t == tsteps - tprune - 1: legion.execution_fence(block=True) stop_time = legion.c.legion_get_current_time_in_nanos() if _constant_time_launches: index_launch(tiles, check, private[ID], interior[ID], tsteps, init) else: for i in IndexLaunch(tiles): check(private[i], interior[i], tsteps, init) print_once('ELAPSED TIME = %7.3f s' % ((stop_time - start_time)/1e9))
def main(): print_once('Running pennant_fast.py') conf = read_config().get() zone = Fspace( OrderedDict([ ('zxp_x', legion.float64), ('zxp_y', legion.float64), ('zx_x', legion.float64), ('zx_y', legion.float64), ('zareap', legion.float64), ('zarea', legion.float64), ('zvol0', legion.float64), ('zvolp', legion.float64), ('zvol', legion.float64), ('zdl', legion.float64), ('zm', legion.float64), ('zrp', legion.float64), ('zr', legion.float64), ('ze', legion.float64), ('zetot', legion.float64), ('zw', legion.float64), ('zwrate', legion.float64), ('zp', legion.float64), ('zss', legion.float64), ('zdu', legion.float64), ('zuc_x', legion.float64), ('zuc_y', legion.float64), ('z0tmp', legion.float64), ('znump', legion.uint8), ])) point = Fspace( OrderedDict([ ('px0_x', legion.float64), ('px0_y', legion.float64), ('pxp_x', legion.float64), ('pxp_y', legion.float64), ('px_x', legion.float64), ('px_y', legion.float64), ('pu0_x', legion.float64), ('pu0_y', legion.float64), ('pu_x', legion.float64), ('pu_y', legion.float64), ('pap_x', legion.float64), ('pap_y', legion.float64), ('pf_x', legion.float64), ('pf_y', legion.float64), ('pmaswt', legion.float64), ('has_bcx', legion.bool_), ('has_bcy', legion.bool_), ])) side = Fspace( OrderedDict([ ('mapsz', legion.int1d), ('mapsp1', legion.int1d), ('mapsp1_r', legion.uint8), ('mapsp2', legion.int1d), ('mapsp2_r', legion.uint8), ('mapss3', legion.int1d), ('mapss4', legion.int1d), ('sareap', legion.float64), ('sarea', legion.float64), ('svolp', legion.float64), ('svol', legion.float64), ('ssurfp_x', legion.float64), ('ssurfp_y', legion.float64), ('smf', legion.float64), ('sfp_x', legion.float64), ('sfp_y', legion.float64), ('sft_x', legion.float64), ('sft_y', legion.float64), ('sfq_x', legion.float64), ('sfq_y', legion.float64), ('exp_x', legion.float64), ('exp_y', legion.float64), ('ex_x', legion.float64), ('ex_y', legion.float64), ('elen', legion.float64), ('carea', legion.float64), ('cevol', legion.float64), ('cdu', legion.float64), ('cdiv', legion.float64), ('ccos', legion.float64), ('cqe1_x', legion.float64), ('cqe1_y', legion.float64), ('cqe2_x', legion.float64), ('cqe2_y', legion.float64), ])) span = Fspace( OrderedDict([ ('start', legion.int64), ('stop', legion.int64), ('internal', legion.bool_), ])) zones = Region([conf.nz], zone) points = Region([conf.np], point) sides = Region([conf.ns], side) assert conf.par_init, 'parallel initialization required' old_seq_init = conf.seq_init if conf.seq_init: print('Warning: Sequential initialization not supported, skipping') # Since we aren't actually doing sequential intialization, we # have to turn this off or the verification in parallel # initialization will fail. conf.seq_init = False assert conf.par_init partitions = read_partitions(zones, points, sides, conf).get() conf.nspans_zones = partitions.nspans_zones conf.nspans_points = partitions.nspans_points pieces = Ispace([conf.npieces]) zones_part = create_partition(True, zones, partitions.rz_all_p, pieces) points_part = create_partition(True, points, partitions.rp_all_p, [2]) private = points_part[0] ghost = points_part[1] private_part = create_partition(True, private, partitions.rp_all_private_p, pieces) ghost_part = create_partition(False, ghost, partitions.rp_all_ghost_p, pieces) shared_part = create_partition(True, ghost, partitions.rp_all_shared_p, pieces) sides_part = create_partition(True, sides, partitions.rs_all_p, pieces) zone_spans = Region([conf.npieces * conf.nspans_zones], span) zone_spans_part = Partition.equal(zone_spans, pieces) private_spans = Region([conf.npieces * conf.nspans_points], span) private_spans_part = Partition.equal(private_spans, pieces) shared_spans = Region([conf.npieces * conf.nspans_points], span) shared_spans_part = Partition.equal(shared_spans, pieces) side_spans = Region([conf.npieces * conf.nspans_zones], span) side_spans_part = Partition.equal(side_spans, pieces) for region in [zone_spans, private_spans, shared_spans, side_spans]: for field in ['start', 'stop']: legion.fill(region, field, 0) if old_seq_init: # FIXME: These fields are actually never used, fill them here # just to avoid validation errors later. legion.fill(points, 'pap_x', 0) legion.fill(points, 'pap_y', 0) legion.fill(sides, 'svolp', 0) legion.fill(sides, 'svol', 0) legion.fill(sides, 'ssurfp_x', 0) legion.fill(sides, 'ssurfp_y', 0) if conf.par_init: for i in IndexLaunch(pieces): initialize_topology(conf, int(i), zones_part[i], private_part[i], shared_part[i], ghost_part[i], sides_part[i]) for i in IndexLaunch(pieces): initialize_spans(conf, int(i), zone_spans_part[i], private_spans_part[i], shared_spans_part[i], side_spans_part[i]) for i in IndexLaunch(pieces): init_pointers(zones_part[i], private_part[i], ghost_part[i], sides_part[i], side_spans_part[i]) for i in IndexLaunch(pieces): init_mesh_zones(zones_part[i], zone_spans_part[i]) for i in IndexLaunch(pieces): calc_centers_full(zones_part[i], private_part[i], ghost_part[i], sides_part[i], side_spans_part[i], True) for i in IndexLaunch(pieces): calc_volumes_full(zones_part[i], private_part[i], ghost_part[i], sides_part[i], side_spans_part[i], True) for i in IndexLaunch(pieces): init_side_fracs(zones_part[i], private_part[i], ghost_part[i], sides_part[i], side_spans_part[i]) for i in IndexLaunch(pieces): init_hydro(zones_part[i], zone_spans_part[i], conf.rinit, conf.einit, conf.rinitsub, conf.einitsub, conf.subregion[0], conf.subregion[1], conf.subregion[2], conf.subregion[3]) for i in IndexLaunch(pieces): init_radial_velocity(private_part[i], private_spans_part[i], conf.uinitradial) for i in IndexLaunch(pieces): init_radial_velocity(shared_part[i], shared_spans_part[i], conf.uinitradial) cycle = 0 cstop = conf.cstop + 2 * conf.prune time = 0.0 dt = Future(conf.dtmax, legion.float64) dthydro = conf.dtmax while cycle < cstop and time < conf.tstop: if cycle == conf.prune: legion.execution_fence(block=True) start_time = legion.c.legion_get_current_time_in_nanos() dt = calc_global_dt(dt, conf.dtfac, conf.dtinit, conf.dtmax, dthydro, time, conf.tstop, cycle) for i in IndexLaunch(pieces): adv_pos_half(private_part[i], private_spans_part[i], dt, True, False) for i in IndexLaunch(pieces): adv_pos_half(shared_part[i], shared_spans_part[i], dt, True, False) for i in IndexLaunch(pieces): calc_everything(zones_part[i], private_part[i], ghost_part[i], sides_part[i], zone_spans_part[i], side_spans_part[i], conf.alfa, conf.gamma, conf.ssmin, dt, conf.q1, conf.q2, True) for i in IndexLaunch(pieces): adv_pos_full(private_part[i], private_spans_part[i], dt, True) for i in IndexLaunch(pieces): adv_pos_full(shared_part[i], shared_spans_part[i], dt, True) for i in IndexLaunch(pieces): calc_everything_full(zones_part[i], private_part[i], ghost_part[i], sides_part[i], zone_spans_part[i], side_spans_part[i], dt, True) futures = [] for i in IndexLaunch(pieces): futures.append( calc_dt_hydro(zones_part[i], zone_spans_part[i], dt, conf.dtmax, conf.cfl, conf.cflv, True, False)) dthydro = conf.dtmax dthydro = min(dthydro, *list(map(lambda x: x.get(), futures))) cycle += 1 time += dt.get() if cycle == conf.cstop - conf.prune: legion.execution_fence(block=True) stop_time = legion.c.legion_get_current_time_in_nanos() if old_seq_init: validate_output_sequential(zones, points, sides, conf) else: print_once("Warning: Skipping sequential validation") print_once("ELAPSED TIME = %7.3f s" % ((stop_time - start_time) / 1e9))