def generate_code_for_sched_index(kernel, sched_index, codegen_state): sched_item = kernel.schedule[sched_index] if isinstance(sched_item, EnterLoop): tag = kernel.iname_to_tag.get(sched_item.iname) from loopy.codegen.loop import ( generate_unroll_loop, generate_vectorize_loop, generate_sequential_loop_dim_code) from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, LoopedIlpTag, VectorizeTag) if isinstance(tag, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop elif isinstance(tag, VectorizeTag): func = generate_vectorize_loop elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag)): func = generate_sequential_loop_dim_code else: raise RuntimeError("encountered (invalid) EnterLoop " "for '%s', tagged '%s'" % (sched_item.iname, tag)) return func(kernel, sched_index, codegen_state) elif isinstance(sched_item, Barrier): from loopy.codegen import GeneratedInstruction from cgen import Statement as S # noqa if sched_item.comment: comment = " /* %s */" % sched_item.comment else: comment = "" return GeneratedInstruction( ast=S("barrier(CLK_LOCAL_MEM_FENCE)%s" % comment), implemented_domain=None) elif isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] from loopy.codegen.instruction import generate_instruction_code return codegen_state.try_vectorized( "instruction %s" % insn.id, lambda inner_cgs: generate_instruction_code(kernel, insn, inner_cgs)) else: raise RuntimeError("unexpected schedule item type: %s" % type(sched_item))
def generate_code_for_sched_index(kernel, sched_index, codegen_state): sched_item = kernel.schedule[sched_index] if isinstance(sched_item, EnterLoop): tag = kernel.iname_to_tag.get(sched_item.iname) from loopy.codegen.loop import ( generate_unroll_loop, generate_vectorize_loop, generate_sequential_loop_dim_code) from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, LoopedIlpTag, VectorizeTag) if isinstance(tag, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop elif isinstance(tag, VectorizeTag): func = generate_vectorize_loop elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag)): func = generate_sequential_loop_dim_code else: raise RuntimeError("encountered (invalid) EnterLoop " "for '%s', tagged '%s'" % (sched_item.iname, tag)) return func(kernel, sched_index, codegen_state) elif isinstance(sched_item, Barrier): return kernel.target.emit_barrier(sched_item.kind, sched_item.comment) elif isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] from loopy.codegen.instruction import generate_instruction_code return codegen_state.try_vectorized( "instruction %s" % insn.id, lambda inner_cgs: generate_instruction_code(kernel, insn, inner_cgs)) else: raise RuntimeError("unexpected schedule item type: %s" % type(sched_item))
def generate_code_for_sched_index(codegen_state, sched_index): kernel = codegen_state.kernel sched_item = kernel.schedule[sched_index] if isinstance(sched_item, CallKernel): assert not codegen_state.is_generating_device_code from loopy.schedule import (gather_schedule_block, get_insn_ids_for_block_at) _, past_end_i = gather_schedule_block(kernel.schedule, sched_index) assert past_end_i <= codegen_state.schedule_index_end extra_args = synthesize_idis_for_extra_args(kernel, sched_index) new_codegen_state = codegen_state.copy( is_generating_device_code=True, gen_program_name=sched_item.kernel_name, schedule_index_end=past_end_i - 1, implemented_data_info=(codegen_state.implemented_data_info + extra_args)) from loopy.codegen.result import generate_host_or_device_program codegen_result = generate_host_or_device_program( new_codegen_state, sched_index) glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at(kernel.schedule, sched_index)) return merge_codegen_results(codegen_state, [ codegen_result, codegen_state.ast_builder.get_kernel_call( codegen_state, sched_item.kernel_name, glob_grid, loc_grid, extra_args), ]) elif isinstance(sched_item, EnterLoop): tag = kernel.iname_to_tag.get(sched_item.iname) from loopy.codegen.loop import (generate_unroll_loop, generate_vectorize_loop, generate_sequential_loop_dim_code) from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, LoopedIlpTag, VectorizeTag) if isinstance(tag, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop elif isinstance(tag, VectorizeTag): func = generate_vectorize_loop elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag)): func = generate_sequential_loop_dim_code else: raise RuntimeError("encountered (invalid) EnterLoop " "for '%s', tagged '%s'" % (sched_item.iname, tag)) return func(codegen_state, sched_index) elif isinstance(sched_item, Barrier): return codegen_state.ast_builder.emit_barrier(sched_item.kind, sched_item.comment) elif isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] from loopy.codegen.instruction import generate_instruction_code return codegen_state.try_vectorized( "instruction %s" % insn.id, lambda inner_cgs: generate_instruction_code(inner_cgs, insn)) else: raise RuntimeError("unexpected schedule item type: %s" % type(sched_item))
def generate_code_for_sched_index(codegen_state, sched_index): kernel = codegen_state.kernel sched_item = kernel.schedule[sched_index] if isinstance(sched_item, CallKernel): assert not codegen_state.is_generating_device_code from loopy.schedule import (gather_schedule_block, get_insn_ids_for_block_at) _, past_end_i = gather_schedule_block(kernel.schedule, sched_index) assert past_end_i <= codegen_state.schedule_index_end extra_args = synthesize_idis_for_extra_args(kernel, sched_index) new_codegen_state = codegen_state.copy( is_generating_device_code=True, gen_program_name=sched_item.kernel_name, schedule_index_end=past_end_i - 1, implemented_data_info=(codegen_state.implemented_data_info + extra_args)) from loopy.codegen.result import generate_host_or_device_program codegen_result = generate_host_or_device_program( new_codegen_state, sched_index) glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at(kernel.schedule, sched_index)) return merge_codegen_results(codegen_state, [ codegen_result, codegen_state.ast_builder.get_kernel_call( codegen_state, sched_item.kernel_name, glob_grid, loc_grid, extra_args), ]) elif isinstance(sched_item, EnterLoop): tag = kernel.iname_to_tag.get(sched_item.iname) from loopy.codegen.loop import (generate_unroll_loop, generate_vectorize_loop, generate_sequential_loop_dim_code) from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, LoopedIlpTag, VectorizeTag, InOrderSequentialSequentialTag) if isinstance(tag, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop elif isinstance(tag, VectorizeTag): func = generate_vectorize_loop elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag, InOrderSequentialSequentialTag)): func = generate_sequential_loop_dim_code else: raise RuntimeError("encountered (invalid) EnterLoop " "for '%s', tagged '%s'" % (sched_item.iname, tag)) return func(codegen_state, sched_index) elif isinstance(sched_item, Barrier): # {{{ emit barrier code from loopy.codegen.result import CodeGenerationResult if codegen_state.is_generating_device_code: barrier_ast = codegen_state.ast_builder.emit_barrier( sched_item.synchronization_kind, sched_item.mem_kind, sched_item.comment) if sched_item.originating_insn_id: return CodeGenerationResult.new( codegen_state, sched_item.originating_insn_id, barrier_ast, codegen_state.implemented_domain) else: return barrier_ast else: # host code if sched_item.synchronization_kind in ["global", "local"]: # host code is assumed globally and locally synchronous return CodeGenerationResult( host_program=None, device_programs=[], implemented_domains={}, implemented_data_info=codegen_state.implemented_data_info) else: raise LoopyError("do not know how to emit code for barrier " "synchronization kind '%s'" "in host code" % sched_item.synchronization_kind) # }}} elif isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] from loopy.codegen.instruction import generate_instruction_code return codegen_state.try_vectorized( "instruction %s" % insn.id, lambda inner_cgs: generate_instruction_code(inner_cgs, insn)) else: raise RuntimeError("unexpected schedule item type: %s" % type(sched_item))
def generate_code_for_sched_index(codegen_state, sched_index): kernel = codegen_state.kernel sched_item = kernel.schedule[sched_index] if isinstance(sched_item, CallKernel): assert not codegen_state.is_generating_device_code from loopy.schedule import (gather_schedule_block, get_insn_ids_for_block_at) _, past_end_i = gather_schedule_block(kernel.schedule, sched_index) assert past_end_i <= codegen_state.schedule_index_end extra_args = synthesize_idis_for_extra_args(kernel, sched_index) new_codegen_state = codegen_state.copy( is_generating_device_code=True, gen_program_name=sched_item.kernel_name, schedule_index_end=past_end_i-1, implemented_data_info=(codegen_state.implemented_data_info + extra_args)) from loopy.codegen.result import generate_host_or_device_program codegen_result = generate_host_or_device_program( new_codegen_state, sched_index) glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at(kernel.schedule, sched_index)) return merge_codegen_results(codegen_state, [ codegen_result, codegen_state.ast_builder.get_kernel_call( codegen_state, sched_item.kernel_name, glob_grid, loc_grid, extra_args), ]) elif isinstance(sched_item, EnterLoop): tags = kernel.iname_tags(sched_item.iname) tags = tuple(tag for tag in tags if tag) from loopy.codegen.loop import ( generate_unroll_loop, generate_vectorize_loop, generate_sequential_loop_dim_code) from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, LoopedIlpTag, VectorizeTag, InOrderSequentialSequentialTag, filter_iname_tags_by_type) if filter_iname_tags_by_type(tags, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop elif filter_iname_tags_by_type(tags, VectorizeTag): func = generate_vectorize_loop elif not tags or filter_iname_tags_by_type(tags, (LoopedIlpTag, ForceSequentialTag, InOrderSequentialSequentialTag)): func = generate_sequential_loop_dim_code else: raise RuntimeError("encountered (invalid) EnterLoop " "for '%s', tagged '%s'" % (sched_item.iname, ", ".join(str(tag) for tag in tags))) return func(codegen_state, sched_index) elif isinstance(sched_item, Barrier): # {{{ emit barrier code from loopy.codegen.result import CodeGenerationResult if codegen_state.is_generating_device_code: barrier_ast = codegen_state.ast_builder.emit_barrier( sched_item.synchronization_kind, sched_item.mem_kind, sched_item.comment) if sched_item.originating_insn_id: return CodeGenerationResult.new( codegen_state, sched_item.originating_insn_id, barrier_ast, codegen_state.implemented_domain) else: return barrier_ast else: # host code if sched_item.synchronization_kind in ["global", "local"]: # host code is assumed globally and locally synchronous return CodeGenerationResult( host_program=None, device_programs=[], implemented_domains={}, implemented_data_info=codegen_state.implemented_data_info) else: raise LoopyError("do not know how to emit code for barrier " "synchronization kind '%s'" "in host code" % sched_item.synchronization_kind) # }}} elif isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] from loopy.codegen.instruction import generate_instruction_code return codegen_state.try_vectorized( "instruction %s" % insn.id, lambda inner_cgs: generate_instruction_code(inner_cgs, insn)) else: raise RuntimeError("unexpected schedule item type: %s" % type(sched_item))
def generate_code_for_sched_index(codegen_state, sched_index): kernel = codegen_state.kernel sched_item = kernel.schedule[sched_index] if isinstance(sched_item, CallKernel): assert not codegen_state.is_generating_device_code from loopy.schedule import (gather_schedule_block, get_insn_ids_for_block_at) _, past_end_i = gather_schedule_block(kernel.schedule, sched_index) assert past_end_i <= codegen_state.schedule_index_end extra_args = synthesize_idis_for_extra_args(kernel, sched_index) new_codegen_state = codegen_state.copy( is_generating_device_code=True, gen_program_name=sched_item.kernel_name, schedule_index_end=past_end_i-1, implemented_data_info=(codegen_state.implemented_data_info + extra_args)) from loopy.codegen.result import generate_host_or_device_program codegen_result = generate_host_or_device_program( new_codegen_state, sched_index) glob_grid, loc_grid = kernel.get_grid_sizes_for_insn_ids_as_exprs( get_insn_ids_for_block_at(kernel.schedule, sched_index)) return merge_codegen_results(codegen_state, [ codegen_result, codegen_state.ast_builder.get_kernel_call( codegen_state, sched_item.kernel_name, glob_grid, loc_grid, extra_args), ]) elif isinstance(sched_item, EnterLoop): tag = kernel.iname_to_tag.get(sched_item.iname) from loopy.codegen.loop import ( generate_unroll_loop, generate_vectorize_loop, generate_sequential_loop_dim_code) from loopy.kernel.data import (UnrolledIlpTag, UnrollTag, ForceSequentialTag, LoopedIlpTag, VectorizeTag) if isinstance(tag, (UnrollTag, UnrolledIlpTag)): func = generate_unroll_loop elif isinstance(tag, VectorizeTag): func = generate_vectorize_loop elif tag is None or isinstance(tag, (LoopedIlpTag, ForceSequentialTag)): func = generate_sequential_loop_dim_code else: raise RuntimeError("encountered (invalid) EnterLoop " "for '%s', tagged '%s'" % (sched_item.iname, tag)) return func(codegen_state, sched_index) elif isinstance(sched_item, Barrier): return codegen_state.ast_builder.emit_barrier( sched_item.kind, sched_item.comment) elif isinstance(sched_item, RunInstruction): insn = kernel.id_to_insn[sched_item.insn_id] from loopy.codegen.instruction import generate_instruction_code return codegen_state.try_vectorized( "instruction %s" % insn.id, lambda inner_cgs: generate_instruction_code(inner_cgs, insn)) else: raise RuntimeError("unexpected schedule item type: %s" % type(sched_item))