def invocation(self, f: NativeFunction) -> str: faithful_op_name = self.most_faithful_name(f) args = tuple(arg.name for arg in dispatcher.arguments(f.func)) # Method only if Variant.function not in f.variants: return f"{args[0]}.{faithful_op_name}({', '.join(args[1:])})" return f"at::{faithful_op_name}({', '.join(args)})"
def __init__(self, kernel_name: str, f: NativeFunction): self.__schema = LazyIrSchema(f.func) self.__dispatch_args = ', '.join( [a.decl() for a in dispatcher.arguments(f.func)]) self.__call_args = ", ".join( [f"{t.name}" for t in self.__schema.filtered_types()]) self.__kernel_name = kernel_name
def compute_registration_declarations(f: NativeFunction) -> str: name = dispatcher.name(f.func) returns_type = dispatcher.returns_type(f.func.returns) args = dispatcher.arguments(f.func) args_str = ', '.join(map(str, args)) dispatch = f.dispatch is not None math = dispatch and 'Math' in f.dispatch # type: ignore return f"""{returns_type} {name}({args_str}); // {{"schema": "aten::{f.func}", "dispatch": "{dispatch}", "math": "{math}"}}
def go(f: NativeFunction) -> Optional[str]: if str(f.func.name.name).endswith('_like') or str(f.func.name.name).startswith('new_'): return None name = legacy_dispatcher.name(f.func) legacy_dispatcher_returns_type = legacy_dispatcher.returns_type(f.func.returns) legacy_dispatcher_args = legacy_dispatcher.arguments(f.func) if not any(isinstance(a.argument, TensorOptionsArguments) for a in legacy_dispatcher_args): return None legacy_dispatcher_tensor_args = [ a for a in legacy_dispatcher_args if isinstance(a.argument, Argument) and a.argument.type.is_tensor_like() ] dispatcher_returns_type = dispatcher.returns_type(f.func.returns) dispatcher_args = dispatcher.arguments(f.func) dispatcher_exprs = dispatcher.legacydispatcherarguments_exprs(legacy_dispatcher_args) if target is Target.DEFINITION: # See Note [Byte-for-byte compatibility] # I don't think there's actually a good reason to generate # these two cases differently if legacy_dispatcher_tensor_args: tensor_args = ', '.join(a.name for a in legacy_dispatcher_tensor_args) compute_dk = f"""\ DispatchKeySet _dk_set = DispatchKeySet(options.computeDispatchKey()) | c10::detail::multi_dispatch_key_set({tensor_args}); DispatchKeySet _dk_mask = c10::DispatchKeySet(DispatchKeySet::FULL_AFTER, DispatchKey::BackendSelect); DispatchKey _dk = c10::impl::dispatchTypeId(_dk_set, _dk_mask);""" else: compute_dk = "DispatchKey _dk = options.computeDispatchKey();" return f"""\ // aten::{f.func} {legacy_dispatcher_returns_type} {name}({', '.join(a.str_with_default() for a in legacy_dispatcher_args)}) {{ static auto op = c10::Dispatcher::singleton() .findSchemaOrThrow("aten::{f.func.name.name}", "{f.func.name.overload_name}") .typed<{dispatcher_returns_type} ({', '.join(a.type for a in dispatcher_args)})>(); {compute_dk} DispatchKey _autograd_dk = c10::getAutogradKeyFromBackend(_dk); // This trick allows calling Autograd backend kernel first and then backend kernel, // without adding another AutogradBackendSelect dispatch key. DispatchKey _current_dk = at::impl::variable_excluded_from_dispatch() ? _dk : _autograd_dk; return op.callWithDispatchKey(_current_dk, {', '.join(a.expr for a in dispatcher_exprs)}); }} """ elif target is Target.REGISTRATION: if local.use_c10_dispatcher() is UseC10Dispatcher.full: return f"""m.impl("aten::{f.func.name}", c10::impl::hacky_wrapper_for_legacy_signatures<{dispatcher_returns_type} ({', '.join(a.type for a in dispatcher_args)})>( TORCH_FN({name})));""" else: return f"""m.impl_UNBOXED("aten::{f.func.name}", {name});""" elif target is Target.DECLARATION: raise AssertionError() else: assert_never(target)
def gen_definition(self, f: NativeFunction) -> str: unambiguous_name = self.unambiguous_function_name(f) args = dispatcher.arguments(f.func) sig = DispatcherSignature.from_schema(f.func) return deindent(f"""\ {sig.defn(unambiguous_name)} {{ return {self.invocation(f)}; }}\ """)
def compute_registration_declarations(f: NativeFunction) -> str: name = dispatcher.name(f.func) returns_type = dispatcher.returns_type(f.func.returns) args = dispatcher.arguments(f.func) args_str = ', '.join(map(str, args)) comment_data: Dict[str, str] = { 'schema': f'aten::{f.func}', 'dispatch': str(f.dispatch is not None), 'math': str(f.dispatch is not None and 'Math' in f.dispatch) } return f"""{returns_type} {name}({args_str}); // {json.dumps(comment_data)}
def compute_registration_declarations(f: NativeFunction, backend_indices: Dict[DispatchKey, BackendIndex]) -> str: name = dispatcher.name(f.func) returns_type = dispatcher.returns_type(f.func.returns).cpp_type_registration_declarations() args = dispatcher.arguments(f.func) args_str = ', '.join(a.no_default().decl_registration_declarations() for a in args) comment_data : Dict[str, str] = { 'schema': f'aten::{f.func}', # TODO: What exactly is the semantics of the 'dispatch' field? 'dispatch': str({k for k, v in backend_indices.items() if v.has_kernel(f)} != {DispatchKey.CompositeImplicitAutograd}), 'default': str(f.has_composite_kernel or dest.has_autogenerated_composite_kernel(f)) } return f"""{returns_type} {name}({args_str}); // {json.dumps(comment_data)}
def compute_registration_declarations(f: NativeFunction) -> str: name = dispatcher.name(f.func) returns_type = dispatcher.returns_type(f.func.returns) args = dispatcher.arguments(f.func) args_str = ', '.join(map(str, args)) comment_data: Dict[str, str] = { 'schema': f'aten::{f.func}', # TODO: What exactly is the semantics of the 'dispatch' field? 'dispatch': str(f.dispatch.keys() != {'Math'}), 'default': str(any(is_generic_dispatch_key(k) for k in f.dispatch)) } return f"""{returns_type} {name}({args_str}); // {json.dumps(comment_data)}
def compute_registration_declarations(f: NativeFunction) -> str: name = dispatcher.name(f.func) returns_type = dispatcher.returns_type(f.func.returns).cpp_type_registration_declarations() args = dispatcher.arguments(f.func) args_str = ', '.join(a.no_default().decl_registration_declarations() for a in args) comment_data : Dict[str, str] = { 'schema': f'aten::{f.func}', # TODO: What exactly is the semantics of the 'dispatch' field? 'dispatch': str(f.dispatch.keys() != {DispatchKey.CompositeImplicitAutograd}), 'default': str(any(is_generic_dispatch_key(k) for k in f.dispatch) or dest.has_autogenerated_composite_kernel(f)) } return f"""{returns_type} {name}({args_str}); // {json.dumps(comment_data)}
def func(f: NativeFunction) -> Optional[str]: if dispatch is not None: if f.dispatch is None or dispatch not in f.dispatch: return None else: if f.dispatch is not None and target is not Target.REGISTRATION: return None if op_registration_whitelist is not None and \ f"aten::{f.func.name.name}" not in op_registration_whitelist and target is Target.REGISTRATION: return None name = legacy_dispatcher.name(f.func) returns_type = legacy_dispatcher.returns_type(f.func.returns) args = legacy_dispatcher.arguments(f.func) args_str = ', '.join(map(str, args)) if target is Target.DECLARATION: return f"{returns_type} {name}({args_str});" elif target is Target.DEFINITION: if f.dispatch is None: cpp_name = cpp.name(f.func) impl_name = f"at::native::{cpp_name}" else: assert dispatch is not None impl_name = f"at::native::{f.dispatch[dispatch]}" args_exprs_str = ', '.join(map(lambda a: a.name, args)) # See Note [Byte-for-byte compatibility] # (return void_func() is valid C++) return_kw = " return " if returns_type == "void": return_kw = " " cuda_guard = "" if dispatch is None or 'CUDA' in dispatch or 'Vulkan' == dispatch: self_args = (a for a in f.func.arguments if a.name == "self") # There is precedence for which argument we use to do # device guard. This describes the precedence order. candidate_args = itertools.chain(self_args, f.func.out_arguments, f.func.arguments) # Only tensor like arguments are eligible device_of = next((f'{a.name}' for a in candidate_args if a.type.is_tensor_like()), None) # See Note [Byte-for-byte compatibility] # I wasn't able to figure out the internal logic for # these device guards if str(f.func.name) == "_thnn_fused_lstm_cell_backward": device_of = "cx" elif str(f.func.name) == "_thnn_differentiable_lstm_cell_backward": device_of = "input_gates" has_tensor_options = any(isinstance(a.argument, TensorOptionsArguments) for a in args) # TODO: There is probably a simpler version of this that # works just as well. if f.device_guard and (dispatch is None or 'Vulkan' == dispatch) and has_tensor_options: cuda_guard = """\ const DeviceGuard device_guard(options.device()); """ # See Note [Byte-for-byte compatibility] if dispatch is not None: cuda_guard = f"\n{cuda_guard}" elif f.device_guard and dispatch is not None and 'CUDA' in dispatch and has_tensor_options: cuda_guard = """\ globalContext().lazyInitCUDA(); const DeviceGuard device_guard(options.device()); """ elif f.device_guard and device_of is not None: cuda_guard = f"""\ const OptionalDeviceGuard device_guard(device_of({device_of})); """ # See Note [Byte-for-byte compatibility] if dispatch is not None: cuda_guard = f"\n{cuda_guard}" else: cuda_guard = """\ // DeviceGuard omitted """ # See Note [Byte-for-byte compatibility] if dispatch is not None: cuda_guard = f"\n{cuda_guard}" return f"""\ {returns_type} {name}({args_str}) {{ {cuda_guard}{return_kw}{impl_name}({args_exprs_str}); }} """ elif target is Target.REGISTRATION: assert returns_type == dispatcher.returns_type(f.func.returns) dispatcher_args = dispatcher.arguments(f.func) dispatcher_args_types_str = ', '.join(map(lambda a: a.type, dispatcher_args)) if dispatch is None or dispatch == 'Math': type_name = f'TypeDefault::{name}' else: type_name = f'{dispatch}Type::{name}' # def registration only happens in TypeDefault def_registration = "" if dispatch is None: def_registration = f'm.def("{f.func}");\n' impl_registration = "" if not def_only and not f.manual_kernel_registration and (dispatch is not None or f.dispatch is None): # Figure out which signature the function is if local.use_c10_dispatcher() is UseC10Dispatcher.full: # See Note [Byte-for-byte compatibility] if dispatch is not None: nl = "\n" else: nl = "" payload = "c10::impl::hacky_wrapper_for_legacy_signatures<" \ f"{returns_type} ({dispatcher_args_types_str})>({nl}TORCH_FN({type_name}))" else: payload = f"torch::CppFunction::makeUnboxedOnly(&{type_name})" # Annotate it with dispatch information if necessary # # NB: In the ordinary, TypeDerived code generation work flow, specification # of the backend is handled by the enclosing block, so the torch::dispatch # invocation here is strictly unnecessary. However, in the fbcode mobile # only workflow using per-op registration, these registrations will get dumped # in a TORCH_LIBRARY_FRAGMENT that does not have an ambient backend. So # the torch::dispatch specification here is important! See # Note [Redundancy in registration code is OK] for how we handle redundant info. if dispatch is not None: payload = f"torch::dispatch(DispatchKey::{dispatch},\n{payload})\n" impl_registration = f'm.impl("{f.func.name}",\n{payload});\n' return f"{def_registration}{impl_registration}" else: assert_never(target)
def go(f: NativeFunction) -> Optional[str]: if str(f.func.name.name).endswith('_like') or str( f.func.name.name).startswith('new_'): return None name = legacy_dispatcher.name(f.func) legacy_dispatcher_returns_type = legacy_dispatcher.returns_type( f.func.returns) legacy_dispatcher_args = legacy_dispatcher.arguments(f.func) if not any( isinstance(a.argument, TensorOptionsArguments) for a in legacy_dispatcher_args): return None legacy_dispatcher_tensor_args = [ a for a in legacy_dispatcher_args if isinstance(a.argument, Argument) and a.argument.type.is_tensor_like() ] dispatcher_returns_type = dispatcher.returns_type(f.func.returns) dispatcher_args = dispatcher.arguments(f.func) args: Union[Sequence[DispatcherArgument], Sequence[LegacyDispatcherArgument]] if local.use_c10_dispatcher() is UseC10Dispatcher.full: returns_type = dispatcher_returns_type args = dispatcher_args exprs = dispatcher.exprs(dispatcher_args) dispatch_key = "c10::computeDispatchKey(dtype, layout, device)" else: returns_type = legacy_dispatcher_returns_type args = legacy_dispatcher_args exprs = dispatcher.legacydispatcherarguments_exprs( legacy_dispatcher_args) dispatch_key = "options.computeDispatchKey()" if target is Target.DEFINITION: # I don't think there's actually a good reason to generate # these two cases differently # The first case could probably be improved though- it calls dispatchTypeId(), # which looks at TLS dispatch keys- there should not be any by the time we reach backend select. if legacy_dispatcher_tensor_args: tensor_args = ', '.join(a.name for a in legacy_dispatcher_tensor_args) compute_dk = f"""\ DispatchKeySet _dk_set = c10::DispatchKeySet({dispatch_key}) | c10::detail::multi_dispatch_key_set({tensor_args}); DispatchKeySet _dk_mask = c10::DispatchKeySet(DispatchKeySet::FULL_AFTER, DispatchKey::BackendSelect); DispatchKey _dk = c10::impl::dispatchTypeId(_dk_set, _dk_mask);""" else: compute_dk = f"DispatchKey _dk = {dispatch_key};" return f"""\ // aten::{f.func} {returns_type} {name}({', '.join(str(a) for a in args)}) {{ static auto op = c10::Dispatcher::singleton() .findSchemaOrThrow("aten::{f.func.name.name}", "{f.func.name.overload_name}") .typed<{dispatcher_returns_type} ({', '.join(a.type for a in dispatcher_args)})>(); {compute_dk} DispatchKey _autograd_dk = c10::getAutogradKeyFromBackend(_dk); // This trick allows calling Autograd backend kernel first and then backend kernel, // without adding another AutogradBackendSelect dispatch key. DispatchKey _current_dk = at::impl::variable_excluded_from_dispatch() ? _dk : _autograd_dk; return op.callWithDispatchKey(_current_dk, {', '.join(a.expr for a in exprs)}); }} """ elif target is Target.REGISTRATION: if local.use_c10_dispatcher() is UseC10Dispatcher.full: return f"""m.impl("aten::{f.func.name}", c10::impl::hacky_wrapper_for_legacy_signatures<{dispatcher_returns_type} ({', '.join(a.type for a in dispatcher_args)})>( TORCH_FN({name})));""" else: return f"""m.impl_UNBOXED("aten::{f.func.name}", {name});""" elif target is Target.DECLARATION: raise AssertionError() else: assert_never(target)