Python execution_fenceの例、legion.execution_fence Pythonの例

コード例 #1

0

ファイルを表示

def main():
    limit = int(os.environ['LIMIT']) if 'LIMIT' in os.environ else None

    data_dir = os.environ['DATA_DIR']
    ds = DataSource(exp='junk',
                    run=1,
                    dir=data_dir,
                    max_events=limit,
                    det_name='spi_cspad')
    # Note: DataSource doesn't seem to care about max_events when given
    # a filename.

    n_runs = 0
    runs = []
    for run in ds.runs():
        # FIXME: must epoch launch
        data_collector.load_run_data(run)
        # Right now, we assume one run or a serie of runs with the same
        # experimental configuration.

        n_runs += 1

        runs.append(
            run)  # Keep run around to avoid having it be garbage collected.

    solver.solve(n_runs)

    legion.execution_fence(
        block=True)  # Block to keep runs in scope until solve completes.

コード例 #2

0

ファイルを表示

ファイル: legion_node.py プロジェクト: JBlaschke/lcls2

def run_smd0_task(run):
    global_procs = legion.Tunable.select(legion.Tunable.GLOBAL_PYS).get()

    for i, smd_chunk in enumerate(smd_chunks(run)):
        run_smd_task(smd_chunk, run, point=i)
    # Block before returning so that the caller can use this task's future for synchronization
    legion.execution_fence(block=True)

コード例 #3

0

ファイルを表示

def run_smd0_task(run):
    global_procs = legion.Tunable.select(legion.Tunable.GLOBAL_PYS).get()

    smdr_man = SmdReaderManager(run.smd_dm.fds, run.max_events)
    for i, (smd_chunk, update_chunk) in enumerate(smdr_man.chunks()):
        run_smd_task(smd_chunk, run, point=i)
    # Block before returning so that the caller can use this task's future for synchronization
    legion.execution_fence(block=True)

コード例 #4

0

ファイルを表示

ファイル: no_access.py プロジェクト: nerikhman/legion

def main():
    R = legion.Region([4], {'x': legion.float64})
    P = legion.Partition.equal(R, [4])
    legion.fill(R, 'x', 0)

    hello2(P[0], 0)

    for i in legion.IndexLaunch([4]):
        hello2(P[i], i)

    legion.index_launch([4], hello2, P[ID], ID)

    # FIXME: This is needed in nopaint to avoid a race with region deletion
    legion.execution_fence()

コード例 #5

0

ファイルを表示

ファイル: must_epoch.py プロジェクト: architectureofthings/legion

def main():
    global global_var

    global_procs = legion.Tunable.select(legion.Tunable.GLOBAL_PYS).get()

    with legion.MustEpochLaunch(): # implicit launch domain
        for i in range(global_procs):
            hi(i, point=i)
    legion.execution_fence(block=True)

    assert global_var == 4123

    global_var = 456

    with legion.MustEpochLaunch([global_procs]):
        for i in range(global_procs):
            hi(i, point=i)
    legion.execution_fence(block=True)

    assert global_var == 4456

コード例 #6

0

ファイルを表示

def solve(n_runs):
    n_procs = legion.Tunable.select(legion.Tunable.GLOBAL_PYS).get()
    print(f"Working with {n_procs} processes\n")

    # Allocate data structures.
    n_xpp_events_per_node = 1000
    xpp_event_raw_shape = (2, 3, 6)
    xpp_data = legion.Region.create((n_xpp_events_per_node,) + xpp_event_raw_shape, {'x': legion.uint16})
    legion.fill(xpp_data, 'x', 0)
    xpp_part = legion.Partition.create_equal(xpp_data, [n_procs])

    gen_data_shape = (N_POINTS,) * 3
    data = legion.Region.create(gen_data_shape, {
        'amplitudes': legion.float32,
        'support': legion.uint8,  # should be bool
        'rho': legion.complex64})

    legion.fill(data, 'amplitudes', 0.)
    legion.fill(data, 'support', 0)
    legion.fill(data, 'rho', 0.)

    complete = False
    iteration = 0
    fences = []
    while not complete or iteration < 10:
        if not complete:
            # Obtain the newest copy of the data.
            with legion.MustEpochLaunch([n_procs]):
                for idx in range(n_procs): # legion.IndexLaunch([n_procs]): # FIXME: index launch
                    data_collector.fill_data_region(xpp_part[idx], point=idx)

        # Preprocess data.
        for idx in range(n_procs): # legion.IndexLaunch([n_procs]): # FIXME: index launch
            preprocess(xpp_part, data)

        # Generate data on first run
        if not iteration:
            generate_data(data)

        # Run solver.
        solve_step(data, 0, iteration)

        if not complete:
            # Make sure we don't run more than 2 iterations ahead.
            fences.append(legion.execution_fence(future=True))
            if iteration - 2 >= 0:
                fences[iteration - 2].get()

            # Check that all runs have been read.
            complete = data_collector.get_num_runs_complete() == n_runs

        iteration += 1

コード例 #7

0

ファイルを表示

ファイル: main.py プロジェクト: yilerat19/task-bench

def main():
    app = app_create(legion.input_args())
    graphs = app_task_graphs(app)

    if once_only():
        c.app_display(app)

    num_fields = max_fields
    result, primary, secondary, scratch, p_scratch, dset_max_args = init_partitions(
        graphs, num_fields)

    legion.execution_fence(block=True)
    start_time = legion.c.legion_get_current_time_in_nanos()

    execute_main_loop(graphs, num_fields, result, primary, secondary, scratch,
                      p_scratch, dset_max_args)

    legion.execution_fence(block=True)
    stop_time = legion.c.legion_get_current_time_in_nanos()

    total_time = (stop_time - start_time) / 1e9

    if once_only():
        c.app_report_timing(app, total_time)

コード例 #8

0

ファイルを表示

ファイル: stencil.py プロジェクト: nerikhman/legion

def main():
    print_once('Running stencil.py')

    conf = parse_args(legion.input_args(True))

    nbloated = np.array([conf.nx, conf.ny])
    nt = np.array([conf.ntx, conf.nty])
    init = conf.init

    n = nbloated - 2*RADIUS
    assert np.all(n >= nt), "grid too small"

    grid = Ispace(n + nt*2*RADIUS)
    tiles = Ispace(nt)

    point = Fspace(OrderedDict([
        ('input', DTYPE),
        ('output', DTYPE),
    ]))

    points = Region(grid, point)

    private = make_private_partition(points, tiles, n, nt)
    interior = make_interior_partition(points, tiles, n, nt)
    exterior = make_exterior_partition(points, tiles, n, nt)

    xm = Region([nt[0]*RADIUS, n[1]], point)
    xp = Region([nt[0]*RADIUS, n[1]], point)
    ym = Region([n[0], nt[1]*RADIUS], point)
    yp = Region([n[0], nt[1]*RADIUS], point)
    pxm_in = make_ghost_x_partition(xm, tiles, n, nt, -1)
    pxp_in = make_ghost_x_partition(xp, tiles, n, nt,  1)
    pym_in = make_ghost_y_partition(ym, tiles, n, nt, -1)
    pyp_in = make_ghost_y_partition(yp, tiles, n, nt,  1)
    pxm_out = make_ghost_x_partition(xm, tiles, n, nt, 0)
    pxp_out = make_ghost_x_partition(xp, tiles, n, nt, 0)
    pym_out = make_ghost_y_partition(ym, tiles, n, nt, 0)
    pyp_out = make_ghost_y_partition(yp, tiles, n, nt, 0)

    init = conf.init

    for r in [points, xm, xp, ym, yp]:
        for f in ['input', 'output']:
            legion.fill(r, f, init)

    tsteps = conf.tsteps + 2 * conf.tprune
    tprune = conf.tprune

    trace = Trace()
    for t in range(tsteps):
        if t == tprune:
            legion.execution_fence(block=True)
            start_time = legion.c.legion_get_current_time_in_nanos()
        with trace:
            if _constant_time_launches:
                index_launch(tiles, stencil, private[ID], interior[ID], pxm_in[ID], pxp_in[ID], pym_in[ID], pyp_in[ID], False)
                index_launch(tiles, increment, private[ID], exterior[ID], pxm_out[ID], pxp_out[ID], pym_out[ID], pyp_out[ID], False)
            else:
                for i in IndexLaunch(tiles):
                    stencil(private[i], interior[i], pxm_in[i], pxp_in[i], pym_in[i], pyp_in[i], False)
                for i in IndexLaunch(tiles):
                    increment(private[i], exterior[i], pxm_out[i], pxp_out[i], pym_out[i], pyp_out[i], False)
        if t == tsteps - tprune - 1:
            legion.execution_fence(block=True)
            stop_time = legion.c.legion_get_current_time_in_nanos()

    if _constant_time_launches:
        index_launch(tiles, check, private[ID], interior[ID], tsteps, init)
    else:
        for i in IndexLaunch(tiles):
            check(private[i], interior[i], tsteps, init)

    print_once('ELAPSED TIME = %7.3f s' % ((stop_time - start_time)/1e9))

コード例 #9

0

ファイルを表示

ファイル: pennant.py プロジェクト: qyz96/Legion

def main():
    print_once('Running pennant.py')

    conf = read_config().get()

    zone = Fspace(
        OrderedDict([
            ('zxp_x', legion.float64),
            ('zxp_y', legion.float64),
            ('zx_x', legion.float64),
            ('zx_y', legion.float64),
            ('zareap', legion.float64),
            ('zarea', legion.float64),
            ('zvol0', legion.float64),
            ('zvolp', legion.float64),
            ('zvol', legion.float64),
            ('zdl', legion.float64),
            ('zm', legion.float64),
            ('zrp', legion.float64),
            ('zr', legion.float64),
            ('ze', legion.float64),
            ('zetot', legion.float64),
            ('zw', legion.float64),
            ('zwrate', legion.float64),
            ('zp', legion.float64),
            ('zss', legion.float64),
            ('zdu', legion.float64),
            ('zuc_x', legion.float64),
            ('zuc_y', legion.float64),
            ('z0tmp', legion.float64),
            ('znump', legion.uint8),
        ]))

    point = Fspace(
        OrderedDict([
            ('px0_x', legion.float64),
            ('px0_y', legion.float64),
            ('pxp_x', legion.float64),
            ('pxp_y', legion.float64),
            ('px_x', legion.float64),
            ('px_y', legion.float64),
            ('pu0_x', legion.float64),
            ('pu0_y', legion.float64),
            ('pu_x', legion.float64),
            ('pu_y', legion.float64),
            ('pap_x', legion.float64),
            ('pap_y', legion.float64),
            ('pf_x', legion.float64),
            ('pf_y', legion.float64),
            ('pmaswt', legion.float64),
            ('has_bcx', legion.bool_),
            ('has_bcy', legion.bool_),
        ]))

    side = Fspace(
        OrderedDict([
            ('mapsz', legion.int1d),
            ('mapsp1', legion.int1d),
            ('mapsp1_r', legion.uint8),
            ('mapsp2', legion.int1d),
            ('mapsp2_r', legion.uint8),
            ('mapss3', legion.int1d),
            ('mapss4', legion.int1d),
            ('sareap', legion.float64),
            ('sarea', legion.float64),
            ('svolp', legion.float64),
            ('svol', legion.float64),
            ('ssurfp_x', legion.float64),
            ('ssurfp_y', legion.float64),
            ('smf', legion.float64),
            ('sfp_x', legion.float64),
            ('sfp_y', legion.float64),
            ('sft_x', legion.float64),
            ('sft_y', legion.float64),
            ('sfq_x', legion.float64),
            ('sfq_y', legion.float64),
            ('exp_x', legion.float64),
            ('exp_y', legion.float64),
            ('ex_x', legion.float64),
            ('ex_y', legion.float64),
            ('elen', legion.float64),
            ('carea', legion.float64),
            ('cevol', legion.float64),
            ('cdu', legion.float64),
            ('cdiv', legion.float64),
            ('ccos', legion.float64),
            ('cqe1_x', legion.float64),
            ('cqe1_y', legion.float64),
            ('cqe2_x', legion.float64),
            ('cqe2_y', legion.float64),
        ]))

    zones = Region([conf.nz], zone)
    points = Region([conf.np], point)
    sides = Region([conf.ns], side)

    assert conf.seq_init or conf.par_init, 'enable one of sequential or parallel initialization'

    if conf.seq_init:
        colorings = read_input_sequential(zones, points, sides, conf).get()

    assert conf.par_init
    partitions = read_partitions(zones, points, sides, conf).get()

    pieces = Ispace([conf.npieces])

    zones_part = create_partition(True, zones, partitions.rz_all_p, pieces)

    points_part = create_partition(True, points, partitions.rp_all_p, [2])
    private = points_part[0]
    ghost = points_part[1]

    private_part = create_partition(True, private, partitions.rp_all_private_p,
                                    pieces)
    ghost_part = create_partition(False, ghost, partitions.rp_all_ghost_p,
                                  pieces)
    shared_part = create_partition(True, ghost, partitions.rp_all_shared_p,
                                   pieces)

    sides_part = create_partition(True, sides, partitions.rs_all_p, pieces)

    if conf.par_init:
        if _constant_time_launches:
            c = Future(conf, value_type=config)
            index_launch(pieces, initialize_topology, c, ID, zones_part[ID],
                         private_part[ID], shared_part[ID], ghost_part[ID],
                         sides_part[ID])
        else:
            for i in IndexLaunch(pieces):
                initialize_topology(conf, i, zones_part[i], private_part[i],
                                    shared_part[i], ghost_part[i],
                                    sides_part[i])

    if _constant_time_launches:
        index_launch(pieces, init_pointers, zones_part[ID], private_part[ID],
                     ghost_part[ID], sides_part[ID])

        index_launch(pieces, init_mesh_zones, zones_part[ID])

        index_launch(pieces, calc_centers_full, zones_part[ID],
                     private_part[ID], ghost_part[ID], sides_part[ID], True)

        index_launch(pieces, calc_volumes_full, zones_part[ID],
                     private_part[ID], ghost_part[ID], sides_part[ID], True)

        index_launch(pieces, init_side_fracs, zones_part[ID], private_part[ID],
                     ghost_part[ID], sides_part[ID])

        index_launch(pieces, init_hydro, zones_part[ID], conf.rinit,
                     conf.einit, conf.rinitsub, conf.einitsub,
                     conf.subregion[0], conf.subregion[1], conf.subregion[2],
                     conf.subregion[3])

        index_launch(pieces, init_radial_velocity, private_part[ID],
                     conf.uinitradial)

        index_launch(pieces, init_radial_velocity, shared_part[ID],
                     conf.uinitradial)
    else:
        for i in IndexLaunch(pieces):
            init_pointers(zones_part[i], private_part[i], ghost_part[i],
                          sides_part[i])

        for i in IndexLaunch(pieces):
            init_mesh_zones(zones_part[i])

        for i in IndexLaunch(pieces):
            calc_centers_full(zones_part[i], private_part[i], ghost_part[i],
                              sides_part[i], True)

        for i in IndexLaunch(pieces):
            calc_volumes_full(zones_part[i], private_part[i], ghost_part[i],
                              sides_part[i], True)

        for i in IndexLaunch(pieces):
            init_side_fracs(zones_part[i], private_part[i], ghost_part[i],
                            sides_part[i])

        for i in IndexLaunch(pieces):
            init_hydro(zones_part[i], conf.rinit, conf.einit, conf.rinitsub,
                       conf.einitsub, conf.subregion[0], conf.subregion[1],
                       conf.subregion[2], conf.subregion[3])

        for i in IndexLaunch(pieces):
            init_radial_velocity(private_part[i], conf.uinitradial)

        for i in IndexLaunch(pieces):
            init_radial_velocity(shared_part[i], conf.uinitradial)

    cycle = 0
    cstop = conf.cstop + 2 * conf.prune
    time = 0.0
    dt = Future(conf.dtmax, legion.float64)
    dthydro = conf.dtmax
    while cycle < cstop and time < conf.tstop:
        if cycle == conf.prune:
            legion.execution_fence(block=True)
            start_time = legion.c.legion_get_current_time_in_nanos()

        if _constant_time_launches:
            index_launch(pieces, init_step_points, private_part[ID], True)

            index_launch(pieces, init_step_points, shared_part[ID], True)

            index_launch(pieces, init_step_zones, zones_part[ID], True)

            dt = calc_global_dt(dt, conf.dtfac, conf.dtinit, conf.dtmax,
                                dthydro, time, conf.tstop, cycle)

            index_launch(pieces, adv_pos_half, private_part[ID], dt, True)

            index_launch(pieces, adv_pos_half, shared_part[ID], dt, True)

            index_launch(pieces, calc_centers, zones_part[ID],
                         private_part[ID], ghost_part[ID], sides_part[ID],
                         True)

            index_launch(pieces, calc_volumes, zones_part[ID],
                         private_part[ID], ghost_part[ID], sides_part[ID],
                         True)

            index_launch(pieces, calc_char_len, zones_part[ID],
                         private_part[ID], ghost_part[ID], sides_part[ID],
                         True)

            index_launch(pieces, calc_rho_half, zones_part[ID], True)

            index_launch(pieces, sum_point_mass, zones_part[ID],
                         private_part[ID], ghost_part[ID], sides_part[ID],
                         True)

            index_launch(pieces, calc_state_at_half, zones_part[ID],
                         conf.gamma, conf.ssmin, dt, True)

            index_launch(pieces, calc_force_pgas_tts, zones_part[ID],
                         private_part[ID], ghost_part[ID], sides_part[ID],
                         conf.alfa, conf.ssmin, True)

            index_launch(pieces, qcs_zone_center_velocity, zones_part[ID],
                         private_part[ID], ghost_part[ID], sides_part[ID],
                         True)

            index_launch(pieces, qcs_corner_divergence, zones_part[ID],
                         private_part[ID], ghost_part[ID], sides_part[ID],
                         True)

            index_launch(pieces, qcs_qcn_force, zones_part[ID],
                         private_part[ID], ghost_part[ID], sides_part[ID],
                         conf.gamma, conf.q1, conf.q2, True)

            index_launch(pieces, qcs_force, zones_part[ID], private_part[ID],
                         ghost_part[ID], sides_part[ID], True)

            index_launch(pieces, qcs_vel_diff, zones_part[ID],
                         private_part[ID], ghost_part[ID], sides_part[ID],
                         conf.q1, conf.q2, True)

            index_launch(pieces, sum_point_force, zones_part[ID],
                         private_part[ID], ghost_part[ID], sides_part[ID],
                         True)

            index_launch(pieces, apply_boundary_conditions, private_part[ID],
                         True)

            index_launch(pieces, apply_boundary_conditions, shared_part[ID],
                         True)

            index_launch(pieces, adv_pos_full, private_part[ID], dt, True)

            index_launch(pieces, adv_pos_full, shared_part[ID], dt, True)

            index_launch(pieces, calc_centers_full, zones_part[ID],
                         private_part[ID], ghost_part[ID], sides_part[ID],
                         True)

            index_launch(pieces, calc_volumes_full, zones_part[ID],
                         private_part[ID], ghost_part[ID], sides_part[ID],
                         True)

            index_launch(pieces, calc_work, zones_part[ID], private_part[ID],
                         ghost_part[ID], sides_part[ID], dt, True)

            index_launch(pieces, calc_work_rate_energy_rho_full,
                         zones_part[ID], dt, True)

            future = index_launch(pieces,
                                  calc_dt_hydro,
                                  zones_part[ID],
                                  dt,
                                  conf.dtmax,
                                  conf.cfl,
                                  conf.cflv,
                                  True,
                                  reduce='min')

            dthydro = conf.dtmax
            dthydro = min_task(dthydro, future)
        else:
            for i in IndexLaunch(pieces):
                init_step_points(private_part[i], True)

            for i in IndexLaunch(pieces):
                init_step_points(shared_part[i], True)

            for i in IndexLaunch(pieces):
                init_step_zones(zones_part[i], True)

            dt = calc_global_dt(dt, conf.dtfac, conf.dtinit, conf.dtmax,
                                dthydro, time, conf.tstop, cycle)

            for i in IndexLaunch(pieces):
                adv_pos_half(private_part[i], dt, True)

            for i in IndexLaunch(pieces):
                adv_pos_half(shared_part[i], dt, True)

            for i in IndexLaunch(pieces):
                calc_centers(zones_part[i], private_part[i], ghost_part[i],
                             sides_part[i], True)

            for i in IndexLaunch(pieces):
                calc_volumes(zones_part[i], private_part[i], ghost_part[i],
                             sides_part[i], True)

            for i in IndexLaunch(pieces):
                calc_char_len(zones_part[i], private_part[i], ghost_part[i],
                              sides_part[i], True)

            for i in IndexLaunch(pieces):
                calc_rho_half(zones_part[i], True)

            for i in IndexLaunch(pieces):
                sum_point_mass(zones_part[i], private_part[i], ghost_part[i],
                               sides_part[i], True)

            for i in IndexLaunch(pieces):
                calc_state_at_half(zones_part[i], conf.gamma, conf.ssmin, dt,
                                   True)

            for i in IndexLaunch(pieces):
                calc_force_pgas_tts(zones_part[i], private_part[i],
                                    ghost_part[i], sides_part[i], conf.alfa,
                                    conf.ssmin, True)

            for i in IndexLaunch(pieces):
                qcs_zone_center_velocity(zones_part[i], private_part[i],
                                         ghost_part[i], sides_part[i], True)

            for i in IndexLaunch(pieces):
                qcs_corner_divergence(zones_part[i], private_part[i],
                                      ghost_part[i], sides_part[i], True)

            for i in IndexLaunch(pieces):
                qcs_qcn_force(zones_part[i], private_part[i], ghost_part[i],
                              sides_part[i], conf.gamma, conf.q1, conf.q2,
                              True)

            for i in IndexLaunch(pieces):
                qcs_force(zones_part[i], private_part[i], ghost_part[i],
                          sides_part[i], True)

            for i in IndexLaunch(pieces):
                qcs_vel_diff(zones_part[i], private_part[i], ghost_part[i],
                             sides_part[i], conf.q1, conf.q2, True)

            for i in IndexLaunch(pieces):
                sum_point_force(zones_part[i], private_part[i], ghost_part[i],
                                sides_part[i], True)

            for i in IndexLaunch(pieces):
                apply_boundary_conditions(private_part[i], True)

            for i in IndexLaunch(pieces):
                apply_boundary_conditions(shared_part[i], True)

            for i in IndexLaunch(pieces):
                adv_pos_full(private_part[i], dt, True)

            for i in IndexLaunch(pieces):
                adv_pos_full(shared_part[i], dt, True)

            for i in IndexLaunch(pieces):
                calc_centers_full(zones_part[i], private_part[i],
                                  ghost_part[i], sides_part[i], True)

            for i in IndexLaunch(pieces):
                calc_volumes_full(zones_part[i], private_part[i],
                                  ghost_part[i], sides_part[i], True)

            for i in IndexLaunch(pieces):
                calc_work(zones_part[i], private_part[i], ghost_part[i],
                          sides_part[i], dt, True)

            for i in IndexLaunch(pieces):
                calc_work_rate_energy_rho_full(zones_part[i], dt, True)

            futures = []
            for i in IndexLaunch(pieces):
                futures.append(
                    calc_dt_hydro(zones_part[i], dt, conf.dtmax, conf.cfl,
                                  conf.cflv, True))

            dthydro = conf.dtmax
            dthydro = min(dthydro, *list(map(lambda x: x.get(), futures)))

        cycle += 1
        time += dt.get()

        if cycle == cstop - conf.prune:
            legion.execution_fence(block=True)
            stop_time = legion.c.legion_get_current_time_in_nanos()

    if conf.seq_init:
        validate_output_sequential(zones, points, sides, conf)
    else:
        print_once("Warning: Skipping sequential validation")

    print_once("ELAPSED TIME = %7.3f s" % ((stop_time - start_time) / 1e9))

コード例 #10

0

ファイルを表示

def main():
    print_once('Running circuit_sparse.py')

    conf = parse_args(legion.input_args(True))

    assert conf.num_pieces % conf.pieces_per_superpiece == 0, "pieces should be evenly distributed to superpieces"
    conf.shared_nodes_per_piece = int(
        math.ceil(conf.nodes_per_piece * conf.pct_shared_nodes / 100.0))
    print_once(
        "circuit settings: loops=%d prune=%d pieces=%d (pieces/superpiece=%d) nodes/piece=%d (nodes/piece=%d) wires/piece=%d pct_in_piece=%d seed=%d"
        % (conf.num_loops, conf.prune, conf.num_pieces,
           conf.pieces_per_superpiece, conf.nodes_per_piece,
           conf.shared_nodes_per_piece, conf.wires_per_piece,
           conf.pct_wire_in_piece, conf.random_seed))

    num_pieces = conf.num_pieces
    num_superpieces = conf.num_pieces // conf.pieces_per_superpiece
    num_circuit_nodes = num_pieces * conf.nodes_per_piece
    num_circuit_wires = num_pieces * conf.wires_per_piece

    node = Fspace(
        OrderedDict([
            ('node_cap', legion.float32),
            ('leakage', legion.float32),
            ('charge', legion.float32),
            ('node_voltage', legion.float32),
        ]))
    wire = Fspace(
        OrderedDict([
            ('in_ptr', legion.int64),
            ('in_ptr_r', legion.uint8),
            ('out_ptr', legion.int64),
            ('out_ptr_r', legion.uint8),
            ('inductance', legion.float32),
            ('resistance', legion.float32),
            ('wire_cap', legion.float32),
        ] + [('current_%d' % i, legion.float32)
             for i in range(WIRE_SEGMENTS)] +
                    [('voltage_%d' % i, legion.float32)
                     for i in range(WIRE_SEGMENTS - 1)]))

    all_nodes = Region([num_circuit_nodes], node)
    all_wires = Region([num_circuit_wires], wire)

    node_size = np.dtype(list(
        map(lambda x: (x[0], x[1].numpy_type), node.field_types.items())),
                         align=True).itemsize
    wire_size = np.dtype(list(
        map(lambda x: (x[0], x[1].numpy_type), wire.field_types.items())),
                         align=True).itemsize
    print_once("Circuit memory usage:")
    print_once("  Nodes : %10d * %4d bytes = %12d bytes" %
               (num_circuit_nodes, node_size, num_circuit_nodes * node_size))
    print_once("  Wires : %10d * %4d bytes = %12d bytes" %
               (num_circuit_wires, wire_size, num_circuit_wires * wire_size))
    total = ((num_circuit_nodes * node_size) + (num_circuit_wires * wire_size))
    print_once("  Total                             %12d bytes" % total)

    snpp = conf.shared_nodes_per_piece
    pnpp = conf.nodes_per_piece - conf.shared_nodes_per_piece
    pps = conf.pieces_per_superpiece
    num_shared_nodes = num_pieces * snpp

    privacy_coloring = Region([2], {'rect': legion.rect1d})
    np.copyto(privacy_coloring.rect,
              np.array([(num_shared_nodes, num_circuit_nodes - 1),
                        (0, num_shared_nodes - 1)],
                       dtype=privacy_coloring.rect.dtype),
              casting='no')
    privacy_part = Partition.restrict(privacy_coloring, [2], np.eye(1), [1],
                                      disjoint_complete)
    all_nodes_part = Partition.image(all_nodes, privacy_part, 'rect', [2],
                                     disjoint_complete)

    all_private = all_nodes_part[0]
    all_shared = all_nodes_part[1]

    launch_domain = Ispace([num_superpieces])

    private_part = Partition.restrict(all_private, launch_domain,
                                      np.eye(1) * pnpp * pps,
                                      Domain([pnpp * pps], [num_shared_nodes]),
                                      disjoint_complete)
    shared_part = Partition.restrict(all_shared, launch_domain,
                                     np.eye(1) * snpp * pps, [snpp * pps],
                                     disjoint_complete)

    wires_part = Partition.equal(all_wires, launch_domain)

    ghost_ranges = Region([num_superpieces],
                          OrderedDict([('rect', legion.rect1d)]))
    ghost_ranges_part = Partition.equal(ghost_ranges, launch_domain)

    if _constant_time_launches:
        c = Future(conf[0], value_type=Config)
        index_launch(launch_domain, init_piece, ID, c, ghost_ranges_part[ID],
                     private_part[ID], shared_part[ID], all_shared,
                     wires_part[ID])
    else:
        for i in IndexLaunch(launch_domain):
            init_piece(i, conf[0], ghost_ranges_part[i], private_part[i],
                       shared_part[i], all_shared, wires_part[i])

    ghost_part = Partition.image(all_shared, ghost_ranges_part, 'rect',
                                 launch_domain)

    if _constant_time_launches:
        index_launch(launch_domain, init_pointers, private_part[ID],
                     shared_part[ID], ghost_part[ID], wires_part[ID])
    else:
        for i in IndexLaunch(launch_domain):
            init_pointers(private_part[i], shared_part[i], ghost_part[i],
                          wires_part[i])

    steps = conf.steps
    prune = conf.prune
    num_loops = conf.num_loops + 2 * prune

    trace = Trace()
    for j in range(num_loops):
        if j == prune:
            legion.execution_fence(block=True)
            start_time = legion.c.legion_get_current_time_in_nanos()
        with trace:
            if _constant_time_launches:
                index_launch(launch_domain, calculate_new_currents, False,
                             steps, private_part[ID], shared_part[ID],
                             ghost_part[ID], wires_part[ID])
                index_launch(launch_domain, distribute_charge,
                             private_part[ID], shared_part[ID], ghost_part[ID],
                             wires_part[ID])
                index_launch(launch_domain, update_voltages, False,
                             private_part[ID], shared_part[ID])
            else:
                for i in IndexLaunch(launch_domain):
                    calculate_new_currents(False, steps, private_part[i],
                                           shared_part[i], ghost_part[i],
                                           wires_part[i])
                for i in IndexLaunch(launch_domain):
                    distribute_charge(private_part[i], shared_part[i],
                                      ghost_part[i], wires_part[i])
                for i in IndexLaunch(launch_domain):
                    update_voltages(False, private_part[i], shared_part[i])
        if j == num_loops - prune - 1:
            legion.execution_fence(block=True)
            stop_time = legion.c.legion_get_current_time_in_nanos()

    sim_time = (stop_time - start_time) / 1e9
    print_once('ELAPSED TIME = %7.3f s' % sim_time)

    # Compute the floating point operations per second
    num_circuit_nodes = conf.num_pieces * conf.nodes_per_piece
    num_circuit_wires = conf.num_pieces * conf.wires_per_piece
    # calculate currents
    operations = num_circuit_wires * (WIRE_SEGMENTS * 6 +
                                      (WIRE_SEGMENTS - 1) * 4) * conf.steps
    # distribute charge
    operations += (num_circuit_wires * 4)
    # update voltages
    operations += (num_circuit_nodes * 4)
    # multiply by the number of loops
    operations *= conf.num_loops

    # Compute the number of gflops
    gflops = (1e-9 * operations) / sim_time
    print_once("GFLOPS = %7.3f GFLOPS" % gflops)

コード例 #11

0

ファイルを表示

ファイル: psana_legion.py プロジェクト: monarin/psana-legion

def main_task():
    assert _ds is not None

    events = _ds.smd().events()
    repeat = 'REPEAT' in os.environ and os.environ['REPEAT'] == '1'
    if repeat:
        assert _ds.config.limit
        events = itertools.cycle(events)
    if _ds.config.limit is not None:
        events = itertools.islice(events, _ds.config.limit)
    if _ds.config.predicate is not None:
        events = itertools.ifilter(_ds.config.predicate, events)

    eager = 'EAGER' in os.environ and os.environ['EAGER'] == '1'
    if eager:
        start = legion.c.legion_get_current_time_in_micros()
        events = list(events)
        stop = legion.c.legion_get_current_time_in_micros()

        print('Enumerating: Elapsed time: %e seconds' % ((stop - start) / 1e6))
        print('Enumerating: Number of events: %s' % len(events))
        print('Enumerating: Events per second: %e' % (len(events) /
                                                      ((stop - start) / 1e6)))

    randomize = 'RANDOMIZE' in os.environ and os.environ['RANDOMIZE'] == '1'
    print('Randomize?', randomize)
    if randomize:
        assert eager
        random.seed(123456789)  # Don't actually want this to be random
        random.shuffle(events)

    # Number of events per task
    chunksize = int(
        os.environ['CHUNKSIZE']) if 'CHUNKSIZE' in os.environ else 8

    # Number of tasks per processor per launch
    overcommit = int(
        os.environ['OVERCOMMIT']) if 'OVERCOMMIT' in os.environ else 1

    # Number of Python processors
    global_procs = legion.Tunable.select(legion.Tunable.GLOBAL_PYS).get()
    local_procs = legion.Tunable.select(legion.Tunable.LOCAL_PYS).get()

    # Number of tasks per launch
    launchsize = (max(global_procs - local_procs, local_procs)) * overcommit

    print('Chunk size %s' % chunksize)
    print('Launch size %s' % launchsize)

    start = legion.c.legion_get_current_time_in_micros()

    # Group events by calib cycle so that different cycles don't mix
    events = itertools.groupby(
        events, lambda e: e.get(psana.EventOffset).lastBeginCalibCycleDgram())

    if _ds.small_data is not None:
        # create HDF5 output file
        hdf5 = legion_HDF5.LegionHDF5(_ds.small_data.filepath)

    nevents = 0
    nlaunch = 0
    ncalib = 0
    file_buffer = []
    file_buffer_length = 0

    for calib, calib_events in events:
        calib = legion.Future(calib)
        for launch_events in chunk(chunk(calib_events, chunksize), launchsize):
            if nlaunch % 20 == 0:
                print('Processing event %s' % nevents)
                sys.stdout.flush()
            dictsBuffer = []

            for idx in legion.IndexLaunch([len(launch_events)]):
                dicts = analyze_chunk(map(Location, launch_events[idx]), calib)
                dictsBuffer.append(dicts)
                nevents += len(launch_events[idx])
            nlaunch += 1

            if _ds.small_data is not None:
                for dicts in dictsBuffer:
                    d = dicts.get()
                    file_buffer = file_buffer + d
                    file_buffer_length = file_buffer_length + len(d)
                    if file_buffer_length >= _ds.small_data.gather_interval:
                        hdf5.append_to_file(file_buffer)
                        file_buffer = []
                        file_buffer_length = 0

        ncalib += 1

    legion.execution_fence(block=True)
    stop = legion.c.legion_get_current_time_in_micros()

    if _ds.config.teardown is not None:
        # FIXME: Should be a must-epoch launch
        for idx in legion.IndexLaunch([global_procs]):
            teardown()

    print('Elapsed time: %e seconds' % ((stop - start) / 1e6))
    print('Number of calib cycles: %s' % ncalib)
    print('Number of launches: %s' % nlaunch)
    print('Number of events: %s' % nevents)
    print('Events per second: %e' % (nevents / ((stop - start) / 1e6)))

    # Hack: Estimate bandwidth used

    # total_events = 75522 * repeat
    # total_size = 875 * repeat # GB

    # fraction_events = float(nevents)/total_events
    # bw = fraction_events * total_size / ((stop - start)/1e6)
    # print('Estimated bandwidth used: %e GB/s' % bw)

    print('End of run')
    sys.stdout.flush()

コード例 #12

0

ファイルを表示

ファイル: pennant_fast.py プロジェクト: nerikhman/legion

def main():
    print_once('Running pennant_fast.py')

    conf = read_config().get()

    zone = Fspace(
        OrderedDict([
            ('zxp_x', legion.float64),
            ('zxp_y', legion.float64),
            ('zx_x', legion.float64),
            ('zx_y', legion.float64),
            ('zareap', legion.float64),
            ('zarea', legion.float64),
            ('zvol0', legion.float64),
            ('zvolp', legion.float64),
            ('zvol', legion.float64),
            ('zdl', legion.float64),
            ('zm', legion.float64),
            ('zrp', legion.float64),
            ('zr', legion.float64),
            ('ze', legion.float64),
            ('zetot', legion.float64),
            ('zw', legion.float64),
            ('zwrate', legion.float64),
            ('zp', legion.float64),
            ('zss', legion.float64),
            ('zdu', legion.float64),
            ('zuc_x', legion.float64),
            ('zuc_y', legion.float64),
            ('z0tmp', legion.float64),
            ('znump', legion.uint8),
        ]))

    point = Fspace(
        OrderedDict([
            ('px0_x', legion.float64),
            ('px0_y', legion.float64),
            ('pxp_x', legion.float64),
            ('pxp_y', legion.float64),
            ('px_x', legion.float64),
            ('px_y', legion.float64),
            ('pu0_x', legion.float64),
            ('pu0_y', legion.float64),
            ('pu_x', legion.float64),
            ('pu_y', legion.float64),
            ('pap_x', legion.float64),
            ('pap_y', legion.float64),
            ('pf_x', legion.float64),
            ('pf_y', legion.float64),
            ('pmaswt', legion.float64),
            ('has_bcx', legion.bool_),
            ('has_bcy', legion.bool_),
        ]))

    side = Fspace(
        OrderedDict([
            ('mapsz', legion.int1d),
            ('mapsp1', legion.int1d),
            ('mapsp1_r', legion.uint8),
            ('mapsp2', legion.int1d),
            ('mapsp2_r', legion.uint8),
            ('mapss3', legion.int1d),
            ('mapss4', legion.int1d),
            ('sareap', legion.float64),
            ('sarea', legion.float64),
            ('svolp', legion.float64),
            ('svol', legion.float64),
            ('ssurfp_x', legion.float64),
            ('ssurfp_y', legion.float64),
            ('smf', legion.float64),
            ('sfp_x', legion.float64),
            ('sfp_y', legion.float64),
            ('sft_x', legion.float64),
            ('sft_y', legion.float64),
            ('sfq_x', legion.float64),
            ('sfq_y', legion.float64),
            ('exp_x', legion.float64),
            ('exp_y', legion.float64),
            ('ex_x', legion.float64),
            ('ex_y', legion.float64),
            ('elen', legion.float64),
            ('carea', legion.float64),
            ('cevol', legion.float64),
            ('cdu', legion.float64),
            ('cdiv', legion.float64),
            ('ccos', legion.float64),
            ('cqe1_x', legion.float64),
            ('cqe1_y', legion.float64),
            ('cqe2_x', legion.float64),
            ('cqe2_y', legion.float64),
        ]))

    span = Fspace(
        OrderedDict([
            ('start', legion.int64),
            ('stop', legion.int64),
            ('internal', legion.bool_),
        ]))

    zones = Region([conf.nz], zone)
    points = Region([conf.np], point)
    sides = Region([conf.ns], side)

    assert conf.par_init, 'parallel initialization required'

    old_seq_init = conf.seq_init
    if conf.seq_init:
        print('Warning: Sequential initialization not supported, skipping')
        # Since we aren't actually doing sequential intialization, we
        # have to turn this off or the verification in parallel
        # initialization will fail.
        conf.seq_init = False

    assert conf.par_init
    partitions = read_partitions(zones, points, sides, conf).get()

    conf.nspans_zones = partitions.nspans_zones
    conf.nspans_points = partitions.nspans_points

    pieces = Ispace([conf.npieces])

    zones_part = create_partition(True, zones, partitions.rz_all_p, pieces)

    points_part = create_partition(True, points, partitions.rp_all_p, [2])
    private = points_part[0]
    ghost = points_part[1]

    private_part = create_partition(True, private, partitions.rp_all_private_p,
                                    pieces)
    ghost_part = create_partition(False, ghost, partitions.rp_all_ghost_p,
                                  pieces)
    shared_part = create_partition(True, ghost, partitions.rp_all_shared_p,
                                   pieces)

    sides_part = create_partition(True, sides, partitions.rs_all_p, pieces)

    zone_spans = Region([conf.npieces * conf.nspans_zones], span)
    zone_spans_part = Partition.equal(zone_spans, pieces)

    private_spans = Region([conf.npieces * conf.nspans_points], span)
    private_spans_part = Partition.equal(private_spans, pieces)

    shared_spans = Region([conf.npieces * conf.nspans_points], span)
    shared_spans_part = Partition.equal(shared_spans, pieces)

    side_spans = Region([conf.npieces * conf.nspans_zones], span)
    side_spans_part = Partition.equal(side_spans, pieces)

    for region in [zone_spans, private_spans, shared_spans, side_spans]:
        for field in ['start', 'stop']:
            legion.fill(region, field, 0)

    if old_seq_init:
        # FIXME: These fields are actually never used, fill them here
        # just to avoid validation errors later.
        legion.fill(points, 'pap_x', 0)
        legion.fill(points, 'pap_y', 0)
        legion.fill(sides, 'svolp', 0)
        legion.fill(sides, 'svol', 0)
        legion.fill(sides, 'ssurfp_x', 0)
        legion.fill(sides, 'ssurfp_y', 0)

    if conf.par_init:
        for i in IndexLaunch(pieces):
            initialize_topology(conf, int(i), zones_part[i], private_part[i],
                                shared_part[i], ghost_part[i], sides_part[i])

        for i in IndexLaunch(pieces):
            initialize_spans(conf, int(i), zone_spans_part[i],
                             private_spans_part[i], shared_spans_part[i],
                             side_spans_part[i])

    for i in IndexLaunch(pieces):
        init_pointers(zones_part[i], private_part[i], ghost_part[i],
                      sides_part[i], side_spans_part[i])

    for i in IndexLaunch(pieces):
        init_mesh_zones(zones_part[i], zone_spans_part[i])

    for i in IndexLaunch(pieces):
        calc_centers_full(zones_part[i], private_part[i], ghost_part[i],
                          sides_part[i], side_spans_part[i], True)

    for i in IndexLaunch(pieces):
        calc_volumes_full(zones_part[i], private_part[i], ghost_part[i],
                          sides_part[i], side_spans_part[i], True)

    for i in IndexLaunch(pieces):
        init_side_fracs(zones_part[i], private_part[i], ghost_part[i],
                        sides_part[i], side_spans_part[i])

    for i in IndexLaunch(pieces):
        init_hydro(zones_part[i], zone_spans_part[i], conf.rinit, conf.einit,
                   conf.rinitsub, conf.einitsub, conf.subregion[0],
                   conf.subregion[1], conf.subregion[2], conf.subregion[3])

    for i in IndexLaunch(pieces):
        init_radial_velocity(private_part[i], private_spans_part[i],
                             conf.uinitradial)

    for i in IndexLaunch(pieces):
        init_radial_velocity(shared_part[i], shared_spans_part[i],
                             conf.uinitradial)

    cycle = 0
    cstop = conf.cstop + 2 * conf.prune
    time = 0.0
    dt = Future(conf.dtmax, legion.float64)
    dthydro = conf.dtmax
    while cycle < cstop and time < conf.tstop:
        if cycle == conf.prune:
            legion.execution_fence(block=True)
            start_time = legion.c.legion_get_current_time_in_nanos()

        dt = calc_global_dt(dt, conf.dtfac, conf.dtinit, conf.dtmax, dthydro,
                            time, conf.tstop, cycle)

        for i in IndexLaunch(pieces):
            adv_pos_half(private_part[i], private_spans_part[i], dt, True,
                         False)

        for i in IndexLaunch(pieces):
            adv_pos_half(shared_part[i], shared_spans_part[i], dt, True, False)

        for i in IndexLaunch(pieces):
            calc_everything(zones_part[i], private_part[i], ghost_part[i],
                            sides_part[i], zone_spans_part[i],
                            side_spans_part[i], conf.alfa, conf.gamma,
                            conf.ssmin, dt, conf.q1, conf.q2, True)

        for i in IndexLaunch(pieces):
            adv_pos_full(private_part[i], private_spans_part[i], dt, True)

        for i in IndexLaunch(pieces):
            adv_pos_full(shared_part[i], shared_spans_part[i], dt, True)

        for i in IndexLaunch(pieces):
            calc_everything_full(zones_part[i], private_part[i], ghost_part[i],
                                 sides_part[i], zone_spans_part[i],
                                 side_spans_part[i], dt, True)

        futures = []
        for i in IndexLaunch(pieces):
            futures.append(
                calc_dt_hydro(zones_part[i], zone_spans_part[i], dt,
                              conf.dtmax, conf.cfl, conf.cflv, True, False))

        dthydro = conf.dtmax
        dthydro = min(dthydro, *list(map(lambda x: x.get(), futures)))

        cycle += 1
        time += dt.get()

        if cycle == conf.cstop - conf.prune:
            legion.execution_fence(block=True)
            stop_time = legion.c.legion_get_current_time_in_nanos()

    if old_seq_init:
        validate_output_sequential(zones, points, sides, conf)
    else:
        print_once("Warning: Skipping sequential validation")

    print_once("ELAPSED TIME = %7.3f s" % ((stop_time - start_time) / 1e9))

コード例 #13

0

ファイルを表示

ファイル: solver.py プロジェクト: ExaFEL/minifel

def solve(n_runs):
    n_procs = Tunable.select(Tunable.GLOBAL_PYS).get()
    print(f"Working with {n_procs} processes\n")

    # Allocate data structures.
    n_events_per_node = 100
    event_raw_shape = (4, 512, 512)
    images = Region(
        (n_events_per_node * n_procs,) + event_raw_shape, {'image': legion.float64})
    orientations = Region(
        (n_events_per_node * n_procs, 4), {'orientation': legion.float32})
    active = Region((n_procs,), {'active': legion.uint32})
    legion.fill(images, 'image', 0)
    legion.fill(orientations, 'orientation', 0)
    legion.fill(active, 'active', 0)
    images_part = Partition.restrict(
        images, [n_procs], numpy.eye(4, 1) * n_events_per_node, (n_events_per_node,) + event_raw_shape)
    orient_part = Partition.restrict(
        orientations, [n_procs], numpy.eye(2, 1) * n_events_per_node, (n_events_per_node, 4))
    active_part = Partition.restrict(
        active, [n_procs], numpy.eye(1, 1), (1,))

    volume_shape = (N_POINTS,) * 3
    diffraction = Region(volume_shape, {
        'accumulator': legion.float32,
        'weight': legion.float32})
    legion.fill(diffraction, 'accumulator', 0.)
    legion.fill(diffraction, 'weight', 0.)

    n_reconstructions = 4
    reconstructions = []
    for i in range(n_reconstructions):
        reconstruction = Region(volume_shape, {
            'support': legion.bool_,
            'rho': legion.complex64})
        legion.fill(reconstruction, 'support', False)
        legion.fill(reconstruction, 'rho', 0.)
        reconstructions.append(reconstruction)

    # Load pixel momentum
    pixels = Region(event_raw_shape + (3,), {'momentum': legion.float64})
    legion.fill(pixels, 'momentum', 0.)
    max_pixel_dist = load_pixels(pixels).get()
    voxel_length = 2 * max_pixel_dist / (N_POINTS - 1)

    images_per_solve = n_events_per_node
    iterations_ahead = 2

    complete = False
    iteration = 0
    fences = []
    n_events_ready = []
    while not complete or iteration < 50:
        if not complete:
            # Obtain the newest copy of the data.
            with MustEpochLaunch([n_procs]):
                index_launch(
                    [n_procs], data_collector.fill_data_region,
                    images_part[ID], orient_part[ID], active_part[ID], images_per_solve)

            # Preprocess data.
            index_launch(
                [n_procs], preprocess,
                images_part[ID], orient_part[ID], active_part[ID], pixels, diffraction,
                voxel_length)

        # Run solver.
        assert n_reconstructions == 4
        hio_loop = 100
        er_loop = hio_loop // 2
        solve_step(diffraction, reconstructions[0], 0, iteration,
                   hio_loop, .1, er_loop, .14)
        solve_step(diffraction, reconstructions[1], 1, iteration,
                   hio_loop, .05, er_loop, .14)
        solve_step(diffraction, reconstructions[2], 2, iteration,
                   hio_loop, .1, er_loop, .16)
        solve_step(diffraction, reconstructions[3], 3, iteration,
                   hio_loop, .05, er_loop, .16)

        if not complete:
            # Make sure we don't run more than N iterations ahead.
            fences.append(legion.execution_fence(future=True))
            if iteration - iterations_ahead >= 0:
                fences[iteration - iterations_ahead].get()

            # Check that all runs have been read and that all events have been consumed.
            if data_collector.get_num_runs_complete() == n_runs:
                n_events_ready.append(index_launch([n_procs], data_collector.get_num_events_ready, active_part[ID], reduce='+'))
                if iteration - iterations_ahead >= 0:
                    ready = n_events_ready[iteration - iterations_ahead].get()
                    print(f'All runs complete, {ready} events remaining', flush=True)
                    complete = ready == 0

        iteration += 1

    ##### -------------------------------------------------------------- #####

    # for idx in range(n_procs):
    #     save_images(images_part[idx], idx, point=idx)
    for i in range(n_reconstructions):
        save_rho(reconstructions[i], i)
    save_diffraction(diffraction, 0)