def go(f: NativeFunction) -> Optional[str]: if str(f.func.name.name).endswith('_like') or str(f.func.name.name).startswith('new_'): return None name = legacy_dispatcher.name(f.func) legacy_dispatcher_returns_type = legacy_dispatcher.returns_type(f.func.returns) legacy_dispatcher_args = legacy_dispatcher.arguments(f.func) if not any(isinstance(a.argument, TensorOptionsArguments) for a in legacy_dispatcher_args): return None legacy_dispatcher_tensor_args = [ a for a in legacy_dispatcher_args if isinstance(a.argument, Argument) and a.argument.type.is_tensor_like() ] dispatcher_returns_type = dispatcher.returns_type(f.func.returns) dispatcher_args = dispatcher.arguments(f.func) dispatcher_exprs = dispatcher.legacydispatcherarguments_exprs(legacy_dispatcher_args) if target is Target.DEFINITION: # See Note [Byte-for-byte compatibility] # I don't think there's actually a good reason to generate # these two cases differently if legacy_dispatcher_tensor_args: tensor_args = ', '.join(a.name for a in legacy_dispatcher_tensor_args) compute_dk = f"""\ DispatchKeySet _dk_set = DispatchKeySet(options.computeDispatchKey()) | c10::detail::multi_dispatch_key_set({tensor_args}); DispatchKeySet _dk_mask = c10::DispatchKeySet(DispatchKeySet::FULL_AFTER, DispatchKey::BackendSelect); DispatchKey _dk = c10::impl::dispatchTypeId(_dk_set, _dk_mask);""" else: compute_dk = "DispatchKey _dk = options.computeDispatchKey();" return f"""\ // aten::{f.func} {legacy_dispatcher_returns_type} {name}({', '.join(a.str_with_default() for a in legacy_dispatcher_args)}) {{ static auto op = c10::Dispatcher::singleton() .findSchemaOrThrow("aten::{f.func.name.name}", "{f.func.name.overload_name}") .typed<{dispatcher_returns_type} ({', '.join(a.type for a in dispatcher_args)})>(); {compute_dk} DispatchKey _autograd_dk = c10::getAutogradKeyFromBackend(_dk); // This trick allows calling Autograd backend kernel first and then backend kernel, // without adding another AutogradBackendSelect dispatch key. DispatchKey _current_dk = at::impl::variable_excluded_from_dispatch() ? _dk : _autograd_dk; return op.callWithDispatchKey(_current_dk, {', '.join(a.expr for a in dispatcher_exprs)}); }} """ elif target is Target.REGISTRATION: if local.use_c10_dispatcher() is UseC10Dispatcher.full: return f"""m.impl("aten::{f.func.name}", c10::impl::hacky_wrapper_for_legacy_signatures<{dispatcher_returns_type} ({', '.join(a.type for a in dispatcher_args)})>( TORCH_FN({name})));""" else: return f"""m.impl_UNBOXED("aten::{f.func.name}", {name});""" elif target is Target.DECLARATION: raise AssertionError() else: assert_never(target)
def func(f: NativeFunction) -> Optional[str]: if dispatch is not None: if f.dispatch is None or dispatch not in f.dispatch: return None else: if f.dispatch is not None and target is not Target.REGISTRATION: return None if op_registration_whitelist is not None and \ f"aten::{f.func.name.name}" not in op_registration_whitelist and target is Target.REGISTRATION: return None name = legacy_dispatcher.name(f.func) returns_type = legacy_dispatcher.returns_type(f.func.returns) args = legacy_dispatcher.arguments(f.func) args_str = ', '.join(map(str, args)) if target is Target.DECLARATION: return f"{returns_type} {name}({args_str});" elif target is Target.DEFINITION: if f.dispatch is None: cpp_name = cpp.name(f.func) impl_name = f"at::native::{cpp_name}" else: assert dispatch is not None impl_name = f"at::native::{f.dispatch[dispatch]}" args_exprs_str = ', '.join(map(lambda a: a.name, args)) # See Note [Byte-for-byte compatibility] # (return void_func() is valid C++) return_kw = " return " if returns_type == "void": return_kw = " " cuda_guard = "" if dispatch is None or 'CUDA' in dispatch or 'Vulkan' == dispatch: self_args = (a for a in f.func.arguments if a.name == "self") # There is precedence for which argument we use to do # device guard. This describes the precedence order. candidate_args = itertools.chain(self_args, f.func.out_arguments, f.func.arguments) # Only tensor like arguments are eligible device_of = next((f'{a.name}' for a in candidate_args if a.type.is_tensor_like()), None) # See Note [Byte-for-byte compatibility] # I wasn't able to figure out the internal logic for # these device guards if str(f.func.name) == "_thnn_fused_lstm_cell_backward": device_of = "cx" elif str(f.func.name) == "_thnn_differentiable_lstm_cell_backward": device_of = "input_gates" has_tensor_options = any(isinstance(a.argument, TensorOptionsArguments) for a in args) # TODO: There is probably a simpler version of this that # works just as well. if f.device_guard and (dispatch is None or 'Vulkan' == dispatch) and has_tensor_options: cuda_guard = """\ const DeviceGuard device_guard(options.device()); """ # See Note [Byte-for-byte compatibility] if dispatch is not None: cuda_guard = f"\n{cuda_guard}" elif f.device_guard and dispatch is not None and 'CUDA' in dispatch and has_tensor_options: cuda_guard = """\ globalContext().lazyInitCUDA(); const DeviceGuard device_guard(options.device()); """ elif f.device_guard and device_of is not None: cuda_guard = f"""\ const OptionalDeviceGuard device_guard(device_of({device_of})); """ # See Note [Byte-for-byte compatibility] if dispatch is not None: cuda_guard = f"\n{cuda_guard}" else: cuda_guard = """\ // DeviceGuard omitted """ # See Note [Byte-for-byte compatibility] if dispatch is not None: cuda_guard = f"\n{cuda_guard}" return f"""\ {returns_type} {name}({args_str}) {{ {cuda_guard}{return_kw}{impl_name}({args_exprs_str}); }} """ elif target is Target.REGISTRATION: assert returns_type == dispatcher.returns_type(f.func.returns) dispatcher_args = dispatcher.arguments(f.func) dispatcher_args_types_str = ', '.join(map(lambda a: a.type, dispatcher_args)) if dispatch is None or dispatch == 'Math': type_name = f'TypeDefault::{name}' else: type_name = f'{dispatch}Type::{name}' # def registration only happens in TypeDefault def_registration = "" if dispatch is None: def_registration = f'm.def("{f.func}");\n' impl_registration = "" if not def_only and not f.manual_kernel_registration and (dispatch is not None or f.dispatch is None): # Figure out which signature the function is if local.use_c10_dispatcher() is UseC10Dispatcher.full: # See Note [Byte-for-byte compatibility] if dispatch is not None: nl = "\n" else: nl = "" payload = "c10::impl::hacky_wrapper_for_legacy_signatures<" \ f"{returns_type} ({dispatcher_args_types_str})>({nl}TORCH_FN({type_name}))" else: payload = f"torch::CppFunction::makeUnboxedOnly(&{type_name})" # Annotate it with dispatch information if necessary # # NB: In the ordinary, TypeDerived code generation work flow, specification # of the backend is handled by the enclosing block, so the torch::dispatch # invocation here is strictly unnecessary. However, in the fbcode mobile # only workflow using per-op registration, these registrations will get dumped # in a TORCH_LIBRARY_FRAGMENT that does not have an ambient backend. So # the torch::dispatch specification here is important! See # Note [Redundancy in registration code is OK] for how we handle redundant info. if dispatch is not None: payload = f"torch::dispatch(DispatchKey::{dispatch},\n{payload})\n" impl_registration = f'm.impl("{f.func.name}",\n{payload});\n' return f"{def_registration}{impl_registration}" else: assert_never(target)
def go(f: NativeFunction) -> Optional[str]: if str(f.func.name.name).endswith('_like') or str( f.func.name.name).startswith('new_'): return None name = legacy_dispatcher.name(f.func) legacy_dispatcher_returns_type = legacy_dispatcher.returns_type( f.func.returns) legacy_dispatcher_args = legacy_dispatcher.arguments(f.func) if not any( isinstance(a.argument, TensorOptionsArguments) for a in legacy_dispatcher_args): return None legacy_dispatcher_tensor_args = [ a for a in legacy_dispatcher_args if isinstance(a.argument, Argument) and a.argument.type.is_tensor_like() ] dispatcher_returns_type = dispatcher.returns_type(f.func.returns) dispatcher_args = dispatcher.arguments(f.func) args: Union[Sequence[DispatcherArgument], Sequence[LegacyDispatcherArgument]] if local.use_c10_dispatcher() is UseC10Dispatcher.full: returns_type = dispatcher_returns_type args = dispatcher_args exprs = dispatcher.exprs(dispatcher_args) dispatch_key = "c10::computeDispatchKey(dtype, layout, device)" else: returns_type = legacy_dispatcher_returns_type args = legacy_dispatcher_args exprs = dispatcher.legacydispatcherarguments_exprs( legacy_dispatcher_args) dispatch_key = "options.computeDispatchKey()" if target is Target.DEFINITION: # I don't think there's actually a good reason to generate # these two cases differently # The first case could probably be improved though- it calls dispatchTypeId(), # which looks at TLS dispatch keys- there should not be any by the time we reach backend select. if legacy_dispatcher_tensor_args: tensor_args = ', '.join(a.name for a in legacy_dispatcher_tensor_args) compute_dk = f"""\ DispatchKeySet _dk_set = c10::DispatchKeySet({dispatch_key}) | c10::detail::multi_dispatch_key_set({tensor_args}); DispatchKeySet _dk_mask = c10::DispatchKeySet(DispatchKeySet::FULL_AFTER, DispatchKey::BackendSelect); DispatchKey _dk = c10::impl::dispatchTypeId(_dk_set, _dk_mask);""" else: compute_dk = f"DispatchKey _dk = {dispatch_key};" return f"""\ // aten::{f.func} {returns_type} {name}({', '.join(str(a) for a in args)}) {{ static auto op = c10::Dispatcher::singleton() .findSchemaOrThrow("aten::{f.func.name.name}", "{f.func.name.overload_name}") .typed<{dispatcher_returns_type} ({', '.join(a.type for a in dispatcher_args)})>(); {compute_dk} DispatchKey _autograd_dk = c10::getAutogradKeyFromBackend(_dk); // This trick allows calling Autograd backend kernel first and then backend kernel, // without adding another AutogradBackendSelect dispatch key. DispatchKey _current_dk = at::impl::variable_excluded_from_dispatch() ? _dk : _autograd_dk; return op.callWithDispatchKey(_current_dk, {', '.join(a.expr for a in exprs)}); }} """ elif target is Target.REGISTRATION: if local.use_c10_dispatcher() is UseC10Dispatcher.full: return f"""m.impl("aten::{f.func.name}", c10::impl::hacky_wrapper_for_legacy_signatures<{dispatcher_returns_type} ({', '.join(a.type for a in dispatcher_args)})>( TORCH_FN({name})));""" else: return f"""m.impl_UNBOXED("aten::{f.func.name}", {name});""" elif target is Target.DECLARATION: raise AssertionError() else: assert_never(target)