def main() -> None: parser = argparse.ArgumentParser(description="Generate ATen source files") parser.add_argument( "-s", "--source-path", help="path to source directory for ATen", default="caffe2/aten/src/ATen", ) parser.add_argument( "-p", "--generated-ops-cpp-path", help="path to directory to generate op dispatcher .cpp file", default="caffe2/torch/csrc/jit/runtime/static/generated_ops.cpp", ) parser.add_argument( "-t", "--generated-ops-test-cpp-path", help="path to directory to generate op dispatcher .cpp file", default="caffe2/benchmarks/static_runtime/test_generated_ops.cc", ) options = parser.parse_args() native_yaml_path = os.path.join(options.source_path, "native/native_functions.yaml") tags_yaml_path = os.path.join(options.source_path, "native/tags.yaml") parsed_yaml = gen.parse_native_yaml(native_yaml_path, tags_yaml_path) native_functions, backend_indices = ( parsed_yaml.native_functions, parsed_yaml.backend_indices, ) grouped_native_functions = gen.get_grouped_native_functions( native_functions) structured_native_functions = [ g for g in grouped_native_functions if isinstance(g, NativeFunctionsGroup) ] supported_function_groups = group_functions_by_op_name( structured_native_functions) gen_out_variant_dispatcher = generator.GenOutVariantDispatcher() result = [ gen_out_variant_dispatcher(groups, backend_indices[DispatchKey.CPU]) for groups in supported_function_groups ] gen_out_variant_dispatcher_test_case = generator.GenOutVariantDispatcherTestCase( ) test_result = [ gen_out_variant_dispatcher_test_case(groups) for groups in supported_function_groups ] write_cpp(result, options.generated_ops_cpp_path) write_test_cpp(test_result, options.generated_ops_test_cpp_path) print("total grouped native ops: %d" % len(grouped_native_functions)) print("structured grouped native ops: %d" % len(structured_native_functions)) supported_grouped_functions = sum( [len(groups) for groups in supported_function_groups]) print("generated grouped native ops: %d" % supported_grouped_functions)
def assertUfuncErrorInline(self, yaml_str: str, expect: str) -> None: # parse a single structured group out of the yaml to g es = yaml.load(yaml_str, Loader=LineLoader) parsed_yaml = parse_native_yaml_struct(es, set()) native_functions, backend_indices = ( parsed_yaml.native_functions, parsed_yaml.backend_indices, ) grouped_native_functions = gen.get_grouped_native_functions( native_functions) assert len(grouped_native_functions) == 1 g = grouped_native_functions[0] assert isinstance(g, NativeFunctionsGroup) assert g.out.ufunc_inner_loop # this is not ufunc codegen per se, but it does some basic sanity tests for # ufunc generation gen.compute_meta_function_declaration(g) dest.compute_native_function_declaration( g, backend_indices[DispatchKey.CPU]) dest.compute_native_function_declaration( g, backend_indices[DispatchKey.CUDA]) try: # the real kahuna dest.compute_ufunc_cpu(g) dest.compute_ufunc_cpu_kernel(g) dest.compute_ufunc_cuda(g) except AssertionError as e: # hack to strip out the context msg, _ = str(e).split(" in ", 2) self.assertExpectedInline("\n".join(textwrap.wrap(msg)), expect, skip=1) return self.fail(msg="Did not raise when expected to")
def run_gen_lazy_tensor( aten_path: str, source_yaml: str, output_dir: str, dry_run: bool, impl_path: Optional[str], node_base: str = default_args.node_base, node_base_hdr: Optional[str] = default_args.node_base_hdr, tensor_class: str = default_args.tensor_class, tensor_class_hdr: str = default_args.tensor_class_hdr, shape_inference_hdr: str = default_args.shape_inference_hdr, lazy_ir_generator: Type[GenLazyIR] = default_args.lazy_ir_generator, # build_in_tree is true for TS backend and affects include paths build_in_tree: bool = False, # per_operator_headers changes whether ATen/Functions.h or individual operator headers are used # it must match how ATen was built per_operator_headers: bool = False, backend_name: str = default_args.backend_name, gen_forced_fallback_code: bool = False, # the following arguments are temporary customization points for xla backend migration. # do not rely on them otherwise, they should be removed once migration is complete backend_namespace: str = "torch::lazy", get_tensorlist: str = "GetTensorList", get_tensor_or_wrap_number: str = "GetLtcTensorOrCreateForWrappedNumber", try_get_tensor: str = "TryGetLtcTensor", metrics_counter: str = 'TORCH_LAZY_FN_COUNTER("lazy::")', create_tensor: str = "LazyTensor::Create", create_from_first_tensor: bool = False, create_aten_from_ltc_tensor: str = "torch::lazy::CreateAtenFromLtcTensor", tuple_aten_from_ltc_tensors: str = "torch::lazy::TupleAtenFromLtcTensors", lazy_value_class: str = "torch::lazy::Value", lazy_tensor_ptr: str = "LazyTensorPtr", get_device_fn: str = "torch::lazy::GetBackendDevice", ) -> None: lv_tokens = lazy_value_class.split("::") lv_class = lv_tokens[-1] lv_ns = "::".join(lv_tokens[:-1]) setValueT(BaseCppType(lv_ns, lv_class)) template_dir = os.path.join(aten_path, "templates") def make_file_manager(install_dir: str) -> FileManager: return FileManager( install_dir=install_dir, template_dir=template_dir, dry_run=dry_run ) fm = make_file_manager(output_dir) native_yaml_path = os.path.join(aten_path, "native/native_functions.yaml") tags_yaml_path = os.path.join(aten_path, "native/tags.yaml") parsed_yaml = parse_native_yaml(native_yaml_path, tags_yaml_path) native_functions, backend_indices = ( parsed_yaml.native_functions, parsed_yaml.backend_indices, ) grouped_native_functions = get_grouped_native_functions(native_functions) def sort_native_function(f: Union[NativeFunctionsGroup, NativeFunction]) -> str: """ We sort the native function because of the note in concat_map_codegen. TODO(alanwaketan): Remove this sorting hack once all ops are grouped properly. """ func = f.functional.func if isinstance(f, NativeFunctionsGroup) else f.func return str(func.name.name) grouped_native_functions = sorted( grouped_native_functions, key=sort_native_function ) parsed_backend_yaml = parse_backend_yaml( source_yaml, grouped_native_functions, backend_indices ) backend_key = parsed_backend_yaml.backend_key autograd_key = parsed_backend_yaml.autograd_key cpp_namespace = parsed_backend_yaml.cpp_namespace backend_indices = parsed_backend_yaml.backend_indices full_codegen = parse_full_codegen_ops(source_yaml, grouped_native_functions) def concat_map_codegen( func: Callable[[NativeFunction], Sequence[str]], xs: Iterable[Union[NativeFunctionsGroup, NativeFunction]], *, codegenInplaceVariant: bool = False, ) -> Iterator[str]: """ We code-gen for the functional variant, which is all we need for IR classes/lowerings/shape inferences, but we only code-gen additional entries for the inplace variant for the native functions. Note: If xs is not sorted, there may be an edge case when generating IR classes. Considering relu and relu_, if we encounter relu_ before relu. we will then generate an IR class with op = at::aten::relu_ for both relu and relu_ which will cause problems for relu. TODO(alanwaketan): Once all ops are grouped properly, we should no longer need this hack. """ generated = set() def gen_key(func: FunctionSchema) -> Tuple[str, str]: # we want to generate unique entries for overloads of functional variants, # but not for inplace variants unless explicitly told `codegenInplaceVariant` return (func.name.name.base, func.name.overload_name) for x in xs: f = x.functional if isinstance(x, NativeFunctionsGroup) else x # For the 'or'd terms: # 1. codegenInplaceVariant means we can generate the in-place variant corresponding items. # 2. not f.func.name.name.inplace means the op is not a in-place variant, so we can generate the item. # 3. f.func.name.name.base not in generated means even for in-place ops we still need to generate the item # as if they were the functional variants for one time. if f.func.name in full_codegen and ( codegenInplaceVariant or not f.func.name.name.inplace or gen_key(f.func) not in generated ): generated.add(gen_key(f.func)) for r in func(f): yield r selector = SelectiveBuilder.get_nop_selector() assert backend_key is not None class_name = backend_indices[backend_key].native_function_class_name() if impl_path is not None: error_on_missing_kernels( native_functions, backend_indices, backend_key, autograd_key, class_name, impl_path, full_codegen, ) """ Validate Shape Inference Definitions Generated lazy native functions all perform shape inference, by first using a meta:: kernel if available for that op, and otherwise using a 'compute_shape_{op}' function instead. The generator knows the call signature for compute_shape_{op} becuase it matches the nativefunction (and meta::) signature, so it just has to check whether the op is structured and generate a call for one or the other. It's up to the dev to supply the missing compute_shape_{op} function, but the codegen at least warns you about this and provides the expected signature which can be copy-pasted into shape_inference.h. compute_shape_{op} functions are handwritten and should be replaced over time as ops get ported to structured kernels. See torch/csrc/lazy/core/shape_inference.cpp #READ THIS! for more information. """ if shape_inference_hdr is not None: expected_shape_infr_decls = list( concat_map_codegen( dest.GenLazyShapeInferenceDefinition( backend_indices[backend_key], tensor_class ), grouped_native_functions, codegenInplaceVariant=True, ) ) validate_shape_inference_header(shape_inference_hdr, expected_shape_infr_decls) assert class_name is not None # Generate nativefunction declarations # Note, eager registrations is set to False for the lazy TS backend as another LTC backend # may want to register their own lazy kernels instead of registering the TS ones. # The registration will lazily happen when init_ts_backend is called. gen_dispatchkey_nativefunc_headers( fm, class_name, cpp_namespace, backend_indices, grouped_native_functions, backend_key, autograd_key, backend_name, ) # Generate Dispatcher registrations which hook up the nativefunctions for dispatch_key in ( [backend_key] if autograd_key is None else [backend_key, autograd_key] ): gen_dispatcher_registrations( fm, output_dir, class_name, cpp_namespace, backend_indices, grouped_native_functions, backend_key, dispatch_key, selector, build_in_tree=build_in_tree, per_operator_headers=per_operator_headers, backend_name=backend_name, eager_registration=False, ) # Generate native function impls that build IR nodes ns_helper = NamespaceHelper(cpp_namespace) fm.write_with_template( f"{backend_key}NativeFunctions.cpp", "DispatchKeyNativeFunctions.cpp", lambda: { "includes": [ f"#include <{path}>" for path in [ tensor_class_hdr, shape_inference_hdr, "ATen/Functions.h", "ATen/MetaFunctions.h", "ATen/Operators.h", "ATen/native/CPUFallback.h", "torch/csrc/lazy/core/ir_builder.h", "torch/csrc/lazy/core/lazy_graph_executor.h", "torch/csrc/lazy/core/metrics.h", "torch/csrc/lazy/core/shape.h", f"{output_dir}/{backend_key}NativeFunctions.h", f"{output_dir}/LazyIr.h", ] + ( ["torch/csrc/lazy/ts_backend/ts_eager_fallback.h"] if gen_forced_fallback_code else [] ) ], "native_functions_include": "", "namespace_prologue": ns_helper.prologue, "namespace_epilogue": ns_helper.epilogue, "native_function_definitions": list( concat_map_codegen( dest.GenLazyNativeFuncDefinition( f"{backend_key}NativeFunctions", backend_indices[backend_key], tensor_class, gen_forced_fallback_code, backend_namespace, get_tensorlist, get_tensor_or_wrap_number, try_get_tensor, metrics_counter, create_tensor, create_from_first_tensor, create_aten_from_ltc_tensor, tuple_aten_from_ltc_tensors, lazy_tensor_ptr, get_device_fn, ), grouped_native_functions, codegenInplaceVariant=True, ) ), }, ) # Generate IR node classes fm.write_with_template( "LazyIr.h", "LazyIr.h", lambda: { "lazy_ir_sysinc": [ f"#include <{path}>" for path in [ "ATen/core/Formatting.h", "c10/core/ScalarType.h", "c10/util/Optional.h", "torch/csrc/lazy/core/hash.h", "torch/csrc/lazy/core/ir.h", "torch/csrc/lazy/core/shape.h", "vector", ] ], "lazy_ir_inc": [ f'#include "{path}"' for path in [node_base_hdr if node_base_hdr is not None else None] if path is not None ], "ir_declarations": list( concat_map_codegen( lazy_ir_generator(backend_indices[backend_key], node_base), grouped_native_functions, ) ), "namespace_prologue": ns_helper.prologue, "namespace_epilogue": ns_helper.epilogue, }, )
def run(source_yaml: str, output_dir: str, dry_run: bool, impl_path: Optional[str] = None) -> None: # Assumes that this file lives at PYTORCH_ROOT/torchgen/gen_backend_stubs.py pytorch_root = pathlib.Path(__file__).parent.parent.absolute() template_dir = os.path.join(pytorch_root, "aten/src/ATen/templates") def make_file_manager(install_dir: str) -> FileManager: return FileManager(install_dir=install_dir, template_dir=template_dir, dry_run=dry_run) fm = make_file_manager(output_dir) native_yaml_path = os.path.join( pytorch_root, "aten/src/ATen/native/native_functions.yaml") tags_yaml_path = os.path.join(pytorch_root, "aten/src/ATen/native/tags.yaml") parsed_yaml = parse_native_yaml(native_yaml_path, tags_yaml_path) native_functions, backend_indices = ( parsed_yaml.native_functions, parsed_yaml.backend_indices, ) grouped_native_functions = get_grouped_native_functions(native_functions) parsed_backend_yaml = parse_backend_yaml(source_yaml, grouped_native_functions, backend_indices) backend_key = parsed_backend_yaml.backend_key autograd_key = parsed_backend_yaml.autograd_key cpp_namespace = parsed_backend_yaml.cpp_namespace class_name = parsed_backend_yaml.class_name backend_indices = parsed_backend_yaml.backend_indices selector = SelectiveBuilder.get_nop_selector() if backend_key is None: # This could be useful if a backend wants to quickly set up a noop yaml file but doesn't have any kernels ready yet. return if class_name is None: # class_name is an optional argument to backend yaml file. # if specified it allows an external backend to override # the name of the class that all generated kernel definitions live under. # if not specified, its value is given as native_function_class_name. class_name = backend_indices[backend_key].native_function_class_name() assert class_name is not None if impl_path is not None: error_on_missing_kernels( native_functions, backend_indices, backend_key, autograd_key, class_name, impl_path, ) gen_dispatchkey_nativefunc_headers( fm, class_name, cpp_namespace, backend_indices, grouped_native_functions, backend_key, autograd_key, ) for dispatch_key in ([backend_key] if autograd_key is None else [backend_key, autograd_key]): gen_dispatcher_registrations( fm, output_dir, class_name, backend_indices, grouped_native_functions, backend_key, dispatch_key, selector, )
def generate_native_functions(self): logging.info("Generating Native Functions Yaml") native_path = TORCHGEN_DIR.joinpath("packaged", "ATen", "native") native_yaml_path = native_path.joinpath("native_functions.yaml") tags_yaml_path = native_path.joinpath("tags.yaml") ts_native_yaml_path = TORCH_DIR.joinpath( "aten", "src", "ATen", "native", "ts_native_functions.yaml" ) ts_native_yaml = None if ts_native_yaml_path.exists(): ts_native_yaml = yaml.load(ts_native_yaml_path.read_text(), yaml.CLoader) parsed_yaml = parse_native_yaml(native_yaml_path, tags_yaml_path) self.native_functions = parsed_yaml.native_functions self.backend_indices = parsed_yaml.backend_indices self.grouped_native_functions = get_grouped_native_functions( self.native_functions ) def get_native_function_name(f): func = f if hasattr(f, "func") else f.functional return str(func.func.name) self.native_functions = { get_native_function_name(f): f for f in self.native_functions } def get_opnames(ops): opnames = defaultdict(set) for op in ops: opname = op.split(".")[0] opnames[opname].add(op) return opnames aten_funcs = get_opnames( map(get_native_function_name, self.grouped_native_functions) ) with self.config_path.open() as f: config = yaml.load(f, yaml.CLoader) # List of unsupported ops in LTC autogen because of some error blacklist = set(config.get("blacklist", [])) # List of supported ops that we don't want to do the full codegen for # primarily view ops supported = set(config.get("supported", [])) # List of non-native ops to do IR codegen for non_native = config.get("non_native", []) # use ripgrep if available as its much faster if which("rg") is not None: cmd = ["rg", "-o", "-N", r"aten::[0-9a-zA-Z_\.]+"] else: cmd = ["grep", "-o", r"aten::[0-9a-zA-Z_\.]\+"] torch_ops = set( op[6:] for op in subprocess.check_output( cmd + [str(self.torch_ops_file)], encoding="utf-8", ) .strip() .split(os.linesep) ) torch_opnames = get_opnames(torch_ops) # process ops list ops = set() composite_implicit = set() for op in torch_ops: if op not in self.native_functions: continue func = self.native_functions[op] base = func.func.name.name.base if base in blacklist or op in blacklist: continue if base in supported or op in supported: continue # Blacklist new_/_like ops since they are non-differentiable. if any(o.startswith("new_") or o.endswith("_like") for o in (base, op)): continue if func.has_composite_implicit_autograd_kernel: composite_implicit.add(op) elif func.func.name.name.inplace: for autogen in func.autogen: if "functional" in autogen.overload_name: ops.add(str(autogen)) else: ops.add(op) skipped = set(torch_ops) - ops - supported - composite_implicit # List of ops autogen even if not explicitly supported by Torch-MLIR explicitly ops |= set(config.get("whitelist", [])) # Additional ops to support that are not supported by Torch-MLIR explicitly supported |= set(config.get("additional_ops", [])) self.ops = sorted(ops) with self.source_yaml.open("w") as f: source_yaml = { "backend": "Lazy", "cpp_namespace": "torch::lazy", "full_codegen": self.ops, "supported": sorted(supported), "non_native": non_native, } yaml.dump(source_yaml, f, default_flow_style=False) f.write( dedent( """ # Composite implicit ops (supported by Torch-MLIR but not differentiable) {composite_implicit} # Skipped ops (supported by Torch-MLIR but no equivalent native function) {skipped} """ ).format( composite_implicit=os.linesep.join( f"# - {op}" for op in sorted(composite_implicit) ), skipped=os.linesep.join(f"# - {op}" for op in sorted(skipped)), ) ) if ts_native_yaml: ts_full_codegen = set(ts_native_yaml["full_codegen"]) mlir_full_codegen = set(self.ops) if ts_full_codegen - mlir_full_codegen: logging.debug( "Full Codegen ops supported by the TorchScript backend " "but not by the Torch-MLIR backend:\n {}".format( "\n ".join(sorted(ts_full_codegen - mlir_full_codegen)) ) ) if mlir_full_codegen - ts_full_codegen: logging.debug( "Full Codegen ops supported by the Torch-MLIR backend " "but not by the TorchScript backend:\n {}".format( "\n ".join(sorted(mlir_full_codegen - ts_full_codegen)) ) )
def main() -> None: parser = argparse.ArgumentParser(description="Generate ATen source files") parser.add_argument( "-s", "--source-path", help="path to source directory for ATen", default="caffe2/aten/src/ATen", ) parser.add_argument( "-p", "--generated-ops-cpp-path", help="path to directory to generate op dispatcher .cpp file", default="caffe2/torch/csrc/jit/runtime/static/generated_ops.cpp", ) parser.add_argument( "-t", "--generated-ops-test-cpp-path", help="path to directory to generate op dispatcher .cpp file", default="caffe2/benchmarks/static_runtime/test_generated_ops.cc", ) options = parser.parse_args() native_yaml_path = os.path.join(options.source_path, "native/native_functions.yaml") tags_yaml_path = os.path.join(options.source_path, "native/tags.yaml") parsed_yaml = gen.parse_native_yaml(native_yaml_path, tags_yaml_path) native_functions, backend_indices = ( parsed_yaml.native_functions, parsed_yaml.backend_indices, ) op_generator = generator.GenOpDispatcher() test_case_generator = generator.GenOpTestCase() native_functions_groups = [ g for g in gen.get_grouped_native_functions(native_functions) if isinstance(g, NativeFunctionsGroup) ] supported_functions_groups = group_functions_by_op_name( native_functions_groups) out_variant_op_result = [ op_generator.out_variant(groups, backend_indices[DispatchKey.CPU]) for groups in supported_functions_groups ] out_variant_test_result = [ test_case_generator.out_variant(groups) for groups in supported_functions_groups ] native_functions_view_groups = [ g for g in gen.get_grouped_by_view_native_functions(native_functions) if isinstance(g, NativeFunctionsViewGroup) ] supported_functions_view_groups = group_functions_by_op_name( native_functions_view_groups) view_op_result = [ op_generator.view(groups, backend_indices[DispatchKey.CPU]) for groups in supported_functions_view_groups ] view_test_result = [ test_case_generator.view(groups) for groups in supported_functions_view_groups ] op_result = out_variant_op_result + ["\n\n"] + view_op_result test_result = out_variant_test_result + ["\n\n"] + view_test_result write_cpp(op_result, options.generated_ops_cpp_path) write_test_cpp(test_result, options.generated_ops_test_cpp_path) print("\ntotal grouped native ops: %d" % len(gen.get_grouped_native_functions(native_functions))) print("grouped native ops with out variant: %d" % len(native_functions_groups)) supported_functions_num = sum( [len(groups) for groups in supported_functions_groups]) print("generated functions groups with out variant: %d" % supported_functions_num) print("\nview grouped native ops: %d" % len(native_functions_view_groups)) supported_view_functions_num = sum( [len(groups) for groups in supported_functions_view_groups]) print("generated functions view groups: %d" % supported_view_functions_num) print("\noverall generated : %d" % (supported_functions_num + supported_view_functions_num))