def process_function(f: NativeFunction) -> Optional[str]: name = cpp.name(f.func) has_tensor_options = python.has_tensor_options(f) is_factory = has_tensor_options or name.endswith("_like") if Variant.function not in f.variants or not is_factory: return None sig = CppSignatureGroup.from_native_function(f, method=False).signature formals: List[str] = [] exprs: List[str] = [] requires_grad = "false" for arg in sig.arguments(): qualified_type = fully_qualified_type(arg.type) if arg.default: formals.append(f"{qualified_type} {arg.name} = {arg.default}") else: formals.append(f"{qualified_type} {arg.name}") if isinstance(arg.argument, TensorOptionsArguments): # note: we remove the requires_grad setting from the TensorOptions because # it is ignored anyways (and we actually have an assertion that it isn't set # which would fail otherwise). We handle requires_grad explicitly here # instead of passing it through to the kernel. exprs.append( f"at::TensorOptions({arg.name}).requires_grad(c10::nullopt)") # Manually set the requires_grad bit on the result tensor. requires_grad = f"{arg.name}.requires_grad()" else: exprs.append(arg.name) return f"""\
def __call__(self, f: NativeFunction) -> str: if not self.selector.is_root_operator(f"aten::{f.func.name}"): return "" # We unconditionally generate function wrappers, sig_group = CppSignatureGroup.from_native_function(f, method=False) sig = sig_group.most_faithful_signature() # escape double quote in schema, get rid of extra double quotes schema = cpp_string(str(sig.func))[1:-1] # arguments args = sig.arguments() connector = ",\n\t\t" args_code = [] for arg in args: if not arg.default: arg_cpp = "c10::IValue(c10::nullopt)" elif arg.default.startswith("{"): arg_cpp = f"c10::IntArrayRef({arg.default})" else: arg_cpp = f"c10::IValue({arg.default})" args_code.append( f"""c10::Argument("{arg.name}", nullptr, c10::nullopt, {arg_cpp})""" ) returns = f.func.returns returns_code = [] for ret in returns: returns_code.append(f"""c10::Argument("{ret.name if ret.name else ""}")""") return f"""
def signature_original(f: NativeFunction) -> str: # remove inplace suffix but keep outplace suffix opname = str(f.func.name.name.base) if f.func.is_out_fn(): opname += "_out" if f.func.name.name.inplace and pyi: opname += "_" args = CppSignatureGroup.from_native_function( f, method=False).signature.arguments() # Simply ignore TensorOptionsArguments as it does not exist in deprecated.yaml. types = ", ".join( argument_type_str(a.argument.type) for a in args if isinstance(a.argument, Argument)) return f"{opname}({types})"
def __call__(self, f: NativeFunction) -> str: if not self.selector.is_root_operator(f"aten::{f.func.name}"): return "" if self.target is Target.DECLARATION: # Note [The ATen Codegen Unboxing API] # Similar to the ATen Operators API, ATen Codegen Unboxing API lives in the at::unboxing namespace, and # will be used by codegen unboxing wrappers (CodegenUnboxingWrappers.cpp). # The Wrappers will be registered into torch::jit::OperatorRegistry using RegisterOperators API. # # Important characteristics about the Codegen Unboxing API: # (1) It follows the OperatorRegistry API. # This is kind of necessary to avoid overhead. # For example: if it followed the C++ API, then all of the faithful C++ factory functions # would need to wrap their arguments into TensorOptions only to unwrap them again. # (2) Under the hood it calls C++ API. return f""" // aten::{f.func} TORCH_API void {f.func.name.unambiguous_name()}(Stack & stack); """ else: sig_group = CppSignatureGroup.from_native_function( f, method=(Variant.method in f.variants) ) sig = sig_group.most_faithful_signature() # parse arguments into C++ code binding_list, code_list = convert_arguments(f) # for each C++ argument, generate the conversion code code_connector = "\n\t" arg_connector = ", " # function call and push back to stack prefix = "self_base." if sig.method else "at::" translated_args = translate( binding_list, sig.arguments(), method=sig.method ) args_str = f"{arg_connector.join(e.expr for e in translated_args)}" if len(f.func.returns) == 0: ret_str = "" push_str = "" else: ret_str = "auto result_ = " push_str = """ pack(stack, std::move(result_)); """ return f"""
def process_function(f: NativeFunction) -> Optional[str]: name = cpp.name(f.func) has_tensor_options = python.has_tensor_options(f) is_factory = has_tensor_options or name.endswith("_like") if Variant.function not in f.variants or not is_factory: return None cpp_sigs = CppSignatureGroup.from_native_function(f, method=False) sigs = [cpp_sigs.signature] if cpp_sigs.symint_signature is not None: sigs.append(cpp_sigs.symint_signature) r = "" for sig in sigs: formals: List[str] = [] exprs: List[str] = [] requires_grad = "false" for arg in sig.arguments(): qualified_type = fully_qualified_type(arg.type) if arg.default: formals.append(f"{qualified_type} {arg.name} = {arg.default}") else: formals.append(f"{qualified_type} {arg.name}") if isinstance(arg.argument, TensorOptionsArguments): # note: we remove the requires_grad setting from the TensorOptions because # it is ignored anyways (and we actually have an assertion that it isn't set # which would fail otherwise). We handle requires_grad explicitly here # instead of passing it through to the kernel. exprs.append( f"at::TensorOptions({arg.name}).requires_grad(c10::nullopt)" ) # Manually set the requires_grad bit on the result tensor. requires_grad = f"{arg.name}.requires_grad()" else: exprs.append(arg.name) r += f"""\ inline at::Tensor {sig.name()}({', '.join(formals)}) {{ at::AutoDispatchBelowADInplaceOrView guard; return autograd::make_variable(at::{sig.name()}({', '.join(exprs)}), /*requires_grad=*/{requires_grad}); }} """ return r
def __call__(self, f: NativeFunction) -> str: if not self.selector.is_root_operator(f"aten::{f.func.name}"): return "" # We unconditionally generate function wrappers, sig_group = CppSignatureGroup.from_native_function(f, method=False) sig = sig_group.most_faithful_signature() # escape double quote in schema, get rid of extra double quotes schema = cpp_string(str(sig.func))[1:-1] # arguments args = sig.arguments() connector = ",\n\t\t" args_code = [] for arg in args: # Using method=False faithful C++ API, so we should not see SelfArgument/TensorOptionsArgument assert isinstance(arg.argument, Argument) if not arg.argument.default: arg_cpp = "c10::IValue(c10::nullopt)" else: # The unboxing code uses the faithful C++ API to avoid the overhead # from wrapping/unwrapping TensorOptios. # However, we would look to include default args for schema parsing. # Default args only show up in the nonfaithful C++ API, arg_default = cpp.default_expr(arg.argument.default, arg.argument.type) if arg_default.startswith("{"): arg_cpp = f"c10::IntArrayRef({arg_default})" else: arg_cpp = f"c10::IValue({arg_default})" args_code.append( f"""c10::Argument("{arg.name}", nullptr, c10::nullopt, {arg_cpp})""" ) returns = f.func.returns returns_code = [] for ret in returns: returns_code.append( f"""c10::Argument("{ret.name if ret.name else ""}")""") return f"""
def convert_arguments(f: NativeFunction) -> Tuple[List[Binding], List[str]]: # we need the 'self' argument so method needs to be False args = (CppSignatureGroup.from_native_function( f, method=False).most_faithful_signature().arguments()) code_list = [ f"c10::IValue {args[i].name} = std::move(peek(stack, {i}, {len(args)}));" for i in range(len(args)) ] + [""] binding_list = [] for i, arg in enumerate(args): # expecting only Argument if not isinstance(arg.argument, Argument): raise Exception( f"Unexpected argument type, expecting `Argument` but got {arg}" ) argument: Argument = arg.argument unboxed_name, _, code, decl = argumenttype_ivalue_convert( argument.type, argument.name, mutable=argument.is_write) code_list.extend(decl) code_list.extend(code) binding_list.append(arg.with_name(unboxed_name)) return binding_list, code_list
def gen_one(self, f: NativeFunction) -> Optional[str]: assert not f.manual_kernel_registration if (self.target is Target.REGISTRATION and not self.selector.is_native_function_selected(f)): return None # TODO: Now, there is something interesting going on here. In the code below, # we generate CompositeExplicitAutograd implementations of functional and inplace # based on the out implementation. But in fact, out is definable by # functional too (just not very efficiently), and this is honestly the # MORE likely situation for a backend implementor. How do we pick? # Well, taking a page from Haskell type classes and default methods, # we could conceivably register a circular definition (out in terms # of functional, and functional in terms of out) and just require # someone to implement one or the other. We'd have to do a little bit # of work to not register one of these "weak" definitions unless there # is a strong definition somewhere in the DAG! So it's not implemented yet. if (self.backend_index.dispatch_key == DispatchKey.CompositeExplicitAutograd and f.func.kind() is SchemaKind.out): # Never generate a default implementation for out, that's what you # have to define as a backend implementor return None # Note [Direct dispatch bindings] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Signature of the non-dispatched function we'll expose in a header # (e.g., at::cpu::add). We don't generate methods (TODO: do this # when CPUTensor class is a thing); nor do we generate fallback # bindings for manual_cpp_binding functions. cpp_sig_group = CppSignatureGroup.from_native_function( f, method=False, fallback_binding=False) # Signature of the wrapper function we'll register to the dispatcher sig = NativeSignature(f.func, prefix="wrapper_") if self.target is Target.NAMESPACED_DECLARATION: result = f"TORCH_API {cpp_sig_group.signature.decl()};\n" if cpp_sig_group.faithful_signature is not None: result += f"TORCH_API {cpp_sig_group.faithful_signature.decl()};\n" return result elif self.target is Target.NAMESPACED_DEFINITION: def generate_defn(cpp_sig: CppSignature) -> str: return f""" {cpp_sig.defn()} {{ return {sig.name()}({', '.join(e.expr for e in translate(cpp_sig.arguments(), sig.arguments()))}); }} """ result = generate_defn(cpp_sig_group.signature) if cpp_sig_group.faithful_signature is not None: result += generate_defn(cpp_sig_group.faithful_signature) return result elif self.target is Target.ANONYMOUS_DEFINITION: k = f.func.kind() # Construct the body of the wrapper function with signature sig sig_body = [] # We'll use context to keep track of any variables we've brought # into scope while generating code context: List[Union[Binding, Expr]] = list(sig.arguments()) # Initialize the class corresponding to this structured # operator; feeding it the output argument(s) if it is known if self.backend_index.dispatch_key is DispatchKey.Meta: class_name = f"structured_{meta.name(self.g)}_meta_{k.name}" parent_class = f"at::meta::structured_{meta.name(self.g)}" elif (self.backend_index.dispatch_key is DispatchKey.CompositeExplicitAutograd): # TODO: dedup this branch class_name = f"structured_{meta.name(self.g)}_default_backend_{k.name}" parent_class = f"at::meta::structured_{meta.name(self.g)}" else: metadata = self.backend_index.get_kernel(self.g) assert metadata is not None class_name = f"structured_{metadata.kernel}_{k.name}" parent_class = f"{self.cpp_namespace}::structured_{metadata.kernel}" if self.backend_index.device_guard: device_check_args = itertools.chain( f.func.arguments.out, f.func.arguments.flat_positional) sig_body.append( RegisterDispatchKey.gen_device_check( f.device_check, list(device_check_args), sig.name())) if k is SchemaKind.functional: sig_body.append(f"{class_name} op;") elif k is SchemaKind.inplace: sig_body.append(f"{class_name} op(self);") elif k is SchemaKind.out: out_args_str = ", ".join(a.name for a in f.func.arguments.out) sig_body.append(f"{class_name} op({out_args_str});") # Translate the input native arguments into structured # arguments for the meta call meta_exprs = ", ".join(e.expr for e in translate( context, structured.meta_arguments(self.g), method=False)) if self.g.out.precomputed: # If this function group has precomputed elements, the meta function # returns a struct containing them which must be saved so that it # can be unpacked when generating code to call the impl. sig_body.append(f"auto precompute = op.meta({meta_exprs});") # Put all of the contents of the precompute struct into the context # so that translate will be able to return the correct args for the # call to the impl. precomputed_values = [ *self.g.out.precomputed.replace.values(), self.g.out.precomputed.add, ] for precomputed_elems in precomputed_values: for arg in precomputed_elems: context.append( Expr( expr=f"precompute.{arg.name}", type=structured.argument_type(arg, binds=arg.name), )) # Add a use of the precompute struct so FB internal compilers don't # complain that there is an unused variable. sig_body.append("(void)precompute;") else: sig_body.append(f"op.meta({meta_exprs});") # After running meta, op.outputs_ is guaranteed to be valid; # add it to the context out_args = structured.out_arguments(self.g) maybe_star = "*" if k is SchemaKind.functional else "" for i, out_arg in enumerate(out_args): assert ConstRefCType(BaseCType(tensorT)) == out_arg.nctype.type context.append( Expr( expr=f"{maybe_star}op.outputs_[{i}]", # TODO: Stop hardcoding that the output type is a Tensor. Note # that for the codegen here this is fine because outputs_ is # hardcoded to be tensor already type=NamedCType(out_arg.nctype.name, MutRefCType(BaseCType(tensorT))), )) # With the expanded context, do the impl call (if not a meta # function) if self.backend_index.dispatch_key == DispatchKey.CompositeExplicitAutograd: # TODO: https://github.com/pytorch/pytorch/issues/53023 out_sig_group = CppSignatureGroup.from_native_function( self.g.out, method=False, fallback_binding=f.manual_cpp_binding) out_sig = out_sig_group.most_faithful_signature() api_name = out_sig.name() out_exprs = ", ".join(e.expr for e in translate( context, out_sig.arguments(), method=False)) # TODO: I think this means structured won't work with method # only functions (but maybe you're saved by faithful? iunno.) # NB: Originally I wrote this as an at::redispatch call, but # I got in trouble because that meant I needed a DispatchKeySet # in the wrapper function, which meant I needed a DispatchKeySet # in the DispatchKeyFunctions declarations, but the defined API # there does NOT permit a dispatch key set. I think you can # probably unwind this by calling some function to do the TLS # fetch and get the DispatchKeySet when you don't have it, but # I didn't do it for this version sig_body.append(f"at::{api_name}({out_exprs});") elif self.backend_index.dispatch_key != DispatchKey.Meta: impl_exprs = ", ".join(e.expr for e in translate( context, structured.impl_arguments(self.g), method=False)) sig_body.append(f"op.impl({impl_exprs});") # Destructively return the final tensors # TODO: Do this in translate instead if k is SchemaKind.functional: if len(f.func.returns) == 1: ret_expr = "std::move(op.outputs_[0]).take()" # small optimization else: moved = ", ".join(f"std::move(op.outputs_[{i}]).take()" for i in range(len(f.func.returns))) ret_expr = f"std::make_tuple({moved})" elif k is SchemaKind.inplace: ret_expr = "self" elif k is SchemaKind.out: if len(f.func.returns) == 1: ret_expr = f.func.arguments.out[0].name else: refs = ", ".join(a.name for a in f.func.arguments.out) ret_expr = f"std::forward_as_tuple({refs})" sig_body.append(f"return {ret_expr};") sig_body_str = "\n".join(sig_body) # For an overview of what this template code looks like, see # https://github.com/pytorch/rfcs/pull/9 return f"""\ {self.gen_class( f, k, class_name=class_name, parent_class=parent_class, generate_super=self.g.out.structured_inherits is not None )} {sig.defn()} {{ {sig_body_str} }} """ elif self.target is Target.REGISTRATION: return f'm.impl("{f.func.name}", TORCH_FN({sig.name()}));' else: assert_never(self.target) # Silence mypy's "Missing return statement" error return None
def gen_unstructured( self, f: NativeFunction, g: Optional[NativeFunctionsGroup] = None) -> Optional[str]: with native_function_manager(f): inplace_meta = False gets_out_inplace_wrapper = False if not self.backend_index.has_kernel(f): if (self.backend_index.dispatch_key == DispatchKey.Meta and f.func.kind() is SchemaKind.inplace and # Defer to composites for meta implementation not f.has_composite_kernel and # Inplace list operations are not supported len(f.func.returns) == 1): inplace_meta = True elif (not self.backend_index.use_out_as_primary and g is not None and gets_generated_out_inplace_wrapper( f, g, self.backend_index)): # We want to generate inplace/out wrappers, that don't have a kernel for the backend. gets_out_inplace_wrapper = True else: return None if f.manual_kernel_registration: return None if (self.target is Target.REGISTRATION and not self.selector.is_native_function_selected(f)): return None sig = self.wrapper_kernel_sig(f) name = sig.name() returns_type = sig.returns_type().cpp_type() args = sig.arguments() args_str = ", ".join(a.defn() for a in args) # See Note [Direct dispatch bindings] cpp_sig_group = CppSignatureGroup.from_native_function( f, method=False, fallback_binding=False) if self.target is Target.NAMESPACED_DECLARATION: result = f"TORCH_API {cpp_sig_group.signature.decl()};\n" if cpp_sig_group.faithful_signature is not None: result += f"TORCH_API {cpp_sig_group.faithful_signature.decl()};\n" return result elif self.target is Target.NAMESPACED_DEFINITION: def generate_defn(cpp_sig: CppSignature) -> str: return f""" {cpp_sig.defn()} {{ return {sig.name()}({', '.join(e.expr for e in translate(cpp_sig.arguments(), sig.arguments()))}); }} """ result = generate_defn(cpp_sig_group.signature) if cpp_sig_group.faithful_signature is not None: result += generate_defn(cpp_sig_group.faithful_signature) return result elif self.target is Target.ANONYMOUS_DEFINITION: # short circuit for inplace_meta if inplace_meta: assert f.func.arguments.self_arg is not None self_arg_name = f.func.arguments.self_arg.argument.name # TODO: handle in place on tensor list return f""" {returns_type} {name}({args_str}) {{ TORCH_CHECK_NOT_IMPLEMENTED({self_arg_name}.is_meta(), "Cannot inplace into non-meta tensor with meta tensor argument"); return {self_arg_name}; }} """ # short circuit for generated inplace/out wrappers if gets_out_inplace_wrapper: return self.gen_out_inplace_wrapper(f, g) metadata = self.backend_index.get_kernel(f) if metadata is None: return None if self.class_method_name is None: impl_name = f"{self.cpp_namespace}::{metadata.kernel}" else: impl_name = f"{self.cpp_namespace}::{self.class_method_name}::{metadata.kernel}" args_exprs_str = ", ".join(a.name for a in args) device_check = " // No device check\n" # Backends that require device guards presumably also require device checks. if self.backend_index.device_guard: device_check_args = itertools.chain( f.func.arguments.out, f.func.arguments.flat_positional) device_check = RegisterDispatchKey.gen_device_check( f.device_check, list(device_check_args), name) device_guard = "// DeviceGuard omitted" # default if f.device_guard and self.backend_index.device_guard: has_tensor_options = any( isinstance(a, TensorOptionsArguments) for a in f.func.arguments.non_out) if has_tensor_options: # kernel is creating a tensor device_guard = """ const DeviceGuard device_guard(device_or_default(device));""" # CUDA requires special handling if is_cuda_dispatch_key( self.backend_index.dispatch_key): device_guard = ( f"globalContext().lazyInitCUDA();\n{device_guard}" ) else: # kernel is operating on existing tensors # There is precedence for which argument we use to do # device guard. This describes the precedence order. self_arg = ([ f.func.arguments.self_arg.argument ] if f.func.arguments.self_arg is not None else []) candidate_args = itertools.chain( self_arg, f.func.arguments.out, f.func.arguments.flat_positional, ) # Only tensor like arguments are eligible device_of = next( (f"{a.name}" for a in candidate_args if a.type.is_tensor_like()), None, ) if device_of is not None: device_guard = f"const OptionalDeviceGuard device_guard(device_of({device_of}));" return f"""\ namespace {{ {returns_type} {name}({args_str}) {{ {device_check} {device_guard} return {impl_name}({args_exprs_str}); }} }} // anonymous namespace """ elif self.target is Target.REGISTRATION: if f.manual_kernel_registration or self.skip_dispatcher_op_registration: return None else: payload = f"TORCH_FN({name})" return f'm.impl("{f.func.name}",\n{payload});\n' else: assert_never(self.target)
def cpp_arguments(f: NativeFunction) -> Sequence[Binding]: return CppSignatureGroup.from_native_function( f, method=False).signature.arguments()