Python SDFG.apply_gpu_transformations Exemples

Langage de programmation: Python

Espace de nommage/Pack: dace

Class/Type: SDFG

Méthode/Fonction: apply_gpu_transformations

Exemples au hotexamples.com: 2

Python SDFG.apply_gpu_transformations - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de dace.SDFG.apply_gpu_transformations extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

nodes(24)

all_nodes_recursive(18)

node(13)

SDFG(8)

add_state(7)

node_id(5)

add_array(5)

add_edge(4)

apply_strict_transformations(4)

append_init_code(4)

append_global_code(4)

edges(4)

all_sdfgs_recursive(4)

in_edges(4)

make_array_memlet(4)

add_datadesc(4)

add_state_after(3)

remove_symbol(2)

generate_code(2)

from_json(2)

from_file(2)

remove_data(2)

apply_transformations_repeated(2)

replace_dict(2)

prepend_exit_code(2)

apply_transformations(2)

apply_gpu_transformations(2)

add_temp_transient(2)

add_scalar(2)

simplify(2)

out_edges(2)

save(1)

set_exit_code(1)

remove_nodes_from(1)

remove_edge(1)

set_global_code(1)

set_init_code(1)

signature_arglist(1)

edges_between(1)

get_latest_report(1)

get_instrumented_data(1)

expand_library_nodes(1)

edge_id(1)

constants_prop(1)

compile(1)

arrays_recursive(1)

append_exit_code(1)

all_edges_recursive(1)

add_symbol(1)

add_constant(1)

Méthodes fréquemment utilisées

nodes (24)

all_nodes_recursive (18)

node (13)

SDFG (8)

add_state (7)

node_id (5)

add_array (5)

add_edge (4)

apply_strict_transformations (4)

append_init_code (4)

Méthodes fréquemment utilisées

append_global_code (4)

edges (4)

all_sdfgs_recursive (4)

in_edges (4)

make_array_memlet (4)

add_datadesc (4)

add_state_after (3)

remove_symbol (2)

generate_code (2)

from_json (2)

from_file (2)

remove_data (2)

apply_transformations_repeated (2)

replace_dict (2)

prepend_exit_code (2)

apply_transformations (2)

apply_gpu_transformations (2)

add_temp_transient (2)

add_scalar (2)

simplify (2)

Méthodes fréquemment utilisées

from_file (2)

remove_data (2)

apply_transformations_repeated (2)

replace_dict (2)

prepend_exit_code (2)

apply_transformations (2)

apply_gpu_transformations (2)

add_temp_transient (2)

add_scalar (2)

simplify (2)

out_edges (2)

save (1)

set_exit_code (1)

remove_nodes_from (1)

remove_edge (1)

set_global_code (1)

set_init_code (1)

signature_arglist (1)

edges_between (1)

get_latest_report (1)

get_instrumented_data (1)

expand_library_nodes (1)

edge_id (1)

constants_prop (1)

compile (1)

arrays_recursive (1)

append_exit_code (1)

all_edges_recursive (1)

add_symbol (1)

add_constant (1)

Méthodes fréquemment utilisées

out_edges (2)

save (1)

set_exit_code (1)

remove_nodes_from (1)

remove_edge (1)

set_global_code (1)

set_init_code (1)

signature_arglist (1)

edges_between (1)

get_latest_report (1)

get_instrumented_data (1)

expand_library_nodes (1)

edge_id (1)

constants_prop (1)

compile (1)

arrays_recursive (1)

append_exit_code (1)

all_edges_recursive (1)

add_symbol (1)

add_constant (1)

specialize (1)

Exemple #1

0

Afficher le fichier

Fichier : matmul.py Projet : am-ivanov/dace

def optimize_for_gpu(sdfg: dace.SDFG, m: int, n: int, k: int): """ Optimize the matrix multiplication example for GPUs. """ # Ensure integers are 32-bit by default dace.Config.set('compiler', 'default_data_types', value='C') # Fuse the map and reduce nodes sdfg.apply_transformations(MapReduceFusion) # Apply GPU transformation sdfg.apply_gpu_transformations() # Find multiplication map entry = find_map_by_param(sdfg, 'k') # Create a tiling strategy divides_evenly = (m % 64 == 0) and (n % 64 == 0) and (k % 8 == 0) xfutil.tile(sdfg, entry, divides_evenly, True, i=64, j=64, k=8) xfutil.tile(sdfg, entry, divides_evenly, True, i=8, j=4) # Create kernel schedule by collapsing and reordering maps gtile_i = find_map_by_param(sdfg, 'tile_i') gtile_j = find_map_by_param(sdfg, 'tile_j') btile_i = find_map_by_param(sdfg, 'tile1_i') btile_j = find_map_by_param(sdfg, 'tile1_j') MapCollapse.apply_to(sdfg, outer_map_entry=gtile_i, inner_map_entry=gtile_j, permissive=True) MapCollapse.apply_to(sdfg, outer_map_entry=btile_i, inner_map_entry=btile_j, permissive=True) btile = find_map_by_param(sdfg, 'tile1_i') btile.map.schedule = dace.ScheduleType.GPU_ThreadBlock # Add local storage (shared memory) for A and B on GPU ktile = find_map_by_param(sdfg, 'tile_k') smem_a = InLocalStorage.apply_to(sdfg, dict(array='A'), node_a=ktile, node_b=btile) smem_b = InLocalStorage.apply_to(sdfg, dict(array='B'), node_a=ktile, node_b=btile) sdfg.arrays[smem_a.data].storage = dace.StorageType.GPU_Shared sdfg.arrays[smem_b.data].storage = dace.StorageType.GPU_Shared # Add local storage (registers) for A and B ttile = find_map_by_param(sdfg, 'k') warptile, ttile = xfutil.extract_map_dims(sdfg, ttile, [2]) InLocalStorage.apply_to(sdfg, dict(array='trans_gpu_A'), node_a=warptile, node_b=ttile) InLocalStorage.apply_to(sdfg, dict(array='trans_gpu_B'), node_a=warptile, node_b=ttile) # Add local storage (registers) for C state = next(s for s in sdfg.nodes() if warptile in s.nodes()) warptile_exit = state.exit_node(warptile) btile_exit = state.exit_node(btile) AccumulateTransient.apply_to(sdfg, map_exit=warptile_exit, outer_map_exit=btile_exit) # Set C tile to zero on allocation c_access = next(n for n in state.data_nodes() if n.data == 'trans_gpu_C') c_access.setzero = True # Unroll microkernel maps ttile.map.unroll = True # Apply double-buffering on shared memory DoubleBuffering.apply_to(sdfg, map_entry=ktile, transient=smem_a)

Exemple #2

0

Afficher le fichier

Fichier : pure_implementations.py Projet : manuelburger/daceml

def program_for_node(program, sdfg: SDFG, state: SDFGState, node: onnx_op.ONNXOp) -> SDFG: """ Expand a function to a dace program. The dtypes for the arguments will be extracted by matching the parameter names to edges. """ input_names = node.schema.non_variadic_inputs() variadic_input_names = node.schema.variadic_inputs() output_names = node.schema.non_variadic_outputs() variadic_output_names = node.schema.variadic_outputs() if set(input_names).intersection(output_names): # this is currently the case for only one onnx op raise ValueError( "program_for_node cannot be applied on nodes of this type;" " '{}' is both an input and an output".format( next(input_names.intersection(output_names)))) params = inspect.signature(program).parameters annotations = {} for name, param in params.items(): if name in input_names or ("__" in name and parse_variadic_param(name)[0] in variadic_input_names): annotations[name] = in_desc_with_name(node, state, sdfg, name) elif name in output_names or ("__" in name and parse_variadic_param(name)[0] in variadic_output_names): annotations[name] = out_desc_with_name(node, state, sdfg, name) else: raise ValueError( "'{}' was not found as an input or output for {}".format( name, node.schema.name)) program.__annotations__ = annotations result = DaceProgram(program, (), {}, False, dace.DeviceType.CPU) result.name = node.label + "_expansion" sdfg = result.to_sdfg() if node.schedule in [dtypes.ScheduleType.GPU_Default ] + dtypes.GPU_SCHEDULES: sdfg.apply_gpu_transformations() return sdfg