Example #1
0
    def go(f: NativeFunction) -> Optional[str]:
        if str(f.func.name.name).endswith('_like') or str(f.func.name.name).startswith('new_'):
            return None

        name = legacy_dispatcher.name(f.func)
        legacy_dispatcher_returns_type = legacy_dispatcher.returns_type(f.func.returns)
        legacy_dispatcher_args = legacy_dispatcher.arguments(f.func)

        if not any(isinstance(a.argument, TensorOptionsArguments) for a in legacy_dispatcher_args):
            return None

        legacy_dispatcher_tensor_args = [
            a for a in legacy_dispatcher_args
            if isinstance(a.argument, Argument) and a.argument.type.is_tensor_like()
        ]

        dispatcher_returns_type = dispatcher.returns_type(f.func.returns)
        dispatcher_args = dispatcher.arguments(f.func)
        dispatcher_exprs = dispatcher.legacydispatcherarguments_exprs(legacy_dispatcher_args)

        if target is Target.DEFINITION:
            # See Note [Byte-for-byte compatibility]
            # I don't think there's actually a good reason to generate
            # these two cases differently
            if legacy_dispatcher_tensor_args:
                tensor_args = ', '.join(a.name for a in legacy_dispatcher_tensor_args)
                compute_dk = f"""\
DispatchKeySet _dk_set = DispatchKeySet(options.computeDispatchKey()) | c10::detail::multi_dispatch_key_set({tensor_args});
  DispatchKeySet _dk_mask = c10::DispatchKeySet(DispatchKeySet::FULL_AFTER, DispatchKey::BackendSelect);
  DispatchKey _dk = c10::impl::dispatchTypeId(_dk_set, _dk_mask);"""
            else:
                compute_dk = "DispatchKey _dk = options.computeDispatchKey();"
            return f"""\
// aten::{f.func}
{legacy_dispatcher_returns_type} {name}({', '.join(a.str_with_default() for a in legacy_dispatcher_args)}) {{
  static auto op = c10::Dispatcher::singleton()
    .findSchemaOrThrow("aten::{f.func.name.name}", "{f.func.name.overload_name}")
    .typed<{dispatcher_returns_type} ({', '.join(a.type for a in dispatcher_args)})>();
  {compute_dk}
  DispatchKey _autograd_dk = c10::getAutogradKeyFromBackend(_dk);
  // This trick allows calling Autograd backend kernel first and then backend kernel,
  // without adding another AutogradBackendSelect dispatch key.
  DispatchKey _current_dk = at::impl::variable_excluded_from_dispatch() ? _dk : _autograd_dk;
  return op.callWithDispatchKey(_current_dk, {', '.join(a.expr for a in dispatcher_exprs)});
}}
"""
        elif target is Target.REGISTRATION:
            if local.use_c10_dispatcher() is UseC10Dispatcher.full:
                return f"""m.impl("aten::{f.func.name}",
          c10::impl::hacky_wrapper_for_legacy_signatures<{dispatcher_returns_type} ({', '.join(a.type for a in dispatcher_args)})>(
            TORCH_FN({name})));"""
            else:
                return f"""m.impl_UNBOXED("aten::{f.func.name}", {name});"""
        elif target is Target.DECLARATION:
            raise AssertionError()
        else:
            assert_never(target)
Example #2
0
    def func(f: NativeFunction) -> Optional[str]:
        if dispatch is not None:
            if f.dispatch is None or dispatch not in f.dispatch:
                return None
        else:
            if f.dispatch is not None and target is not Target.REGISTRATION:
                return None

        if op_registration_whitelist is not None and \
                f"aten::{f.func.name.name}" not in op_registration_whitelist and target is Target.REGISTRATION:
            return None

        name = legacy_dispatcher.name(f.func)
        returns_type = legacy_dispatcher.returns_type(f.func.returns)
        args = legacy_dispatcher.arguments(f.func)
        args_str = ', '.join(map(str, args))

        if target is Target.DECLARATION:
            return f"{returns_type} {name}({args_str});"
        elif target is Target.DEFINITION:
            if f.dispatch is None:
                cpp_name = cpp.name(f.func)
                impl_name = f"at::native::{cpp_name}"
            else:
                assert dispatch is not None
                impl_name = f"at::native::{f.dispatch[dispatch]}"

            args_exprs_str = ', '.join(map(lambda a: a.name, args))

            # See Note [Byte-for-byte compatibility]
            # (return void_func() is valid C++)
            return_kw = "    return "
            if returns_type == "void":
                return_kw = " "

            cuda_guard = ""
            if dispatch is None or 'CUDA' in dispatch or 'Vulkan' == dispatch:
                self_args = (a for a in f.func.arguments if a.name == "self")

                # There is precedence for which argument we use to do
                # device guard.  This describes the precedence order.
                candidate_args = itertools.chain(self_args, f.func.out_arguments, f.func.arguments)

                # Only tensor like arguments are eligible
                device_of = next((f'{a.name}' for a in candidate_args if a.type.is_tensor_like()), None)

                # See Note [Byte-for-byte compatibility]
                # I wasn't able to figure out the internal logic for
                # these device guards
                if str(f.func.name) == "_thnn_fused_lstm_cell_backward":
                    device_of = "cx"
                elif str(f.func.name) == "_thnn_differentiable_lstm_cell_backward":
                    device_of = "input_gates"

                has_tensor_options = any(isinstance(a.argument, TensorOptionsArguments) for a in args)

                # TODO: There is probably a simpler version of this that
                # works just as well.
                if f.device_guard and (dispatch is None or 'Vulkan' == dispatch) and has_tensor_options:
                    cuda_guard = """\
    const DeviceGuard device_guard(options.device());
"""
                    # See Note [Byte-for-byte compatibility]
                    if dispatch is not None:
                        cuda_guard = f"\n{cuda_guard}"
                elif f.device_guard and dispatch is not None and 'CUDA' in dispatch and has_tensor_options:
                    cuda_guard = """\
    globalContext().lazyInitCUDA();
    const DeviceGuard device_guard(options.device());
"""
                elif f.device_guard and device_of is not None:
                    cuda_guard = f"""\
    const OptionalDeviceGuard device_guard(device_of({device_of}));
"""
                    # See Note [Byte-for-byte compatibility]
                    if dispatch is not None:
                        cuda_guard = f"\n{cuda_guard}"
                else:
                    cuda_guard = """\
    // DeviceGuard omitted
"""
                    # See Note [Byte-for-byte compatibility]
                    if dispatch is not None:
                        cuda_guard = f"\n{cuda_guard}"

            return f"""\
{returns_type} {name}({args_str}) {{
{cuda_guard}{return_kw}{impl_name}({args_exprs_str});
}}
"""

        elif target is Target.REGISTRATION:
            assert returns_type == dispatcher.returns_type(f.func.returns)
            dispatcher_args = dispatcher.arguments(f.func)
            dispatcher_args_types_str = ', '.join(map(lambda a: a.type, dispatcher_args))
            if dispatch is None or dispatch == 'Math':
                type_name = f'TypeDefault::{name}'
            else:
                type_name = f'{dispatch}Type::{name}'

            # def registration only happens in TypeDefault
            def_registration = ""
            if dispatch is None:
                def_registration = f'm.def("{f.func}");\n'

            impl_registration = ""
            if not def_only and not f.manual_kernel_registration and (dispatch is not None or f.dispatch is None):
                # Figure out which signature the function is
                if local.use_c10_dispatcher() is UseC10Dispatcher.full:
                    # See Note [Byte-for-byte compatibility]
                    if dispatch is not None:
                        nl = "\n"
                    else:
                        nl = ""

                    payload = "c10::impl::hacky_wrapper_for_legacy_signatures<" \
                        f"{returns_type} ({dispatcher_args_types_str})>({nl}TORCH_FN({type_name}))"

                else:
                    payload = f"torch::CppFunction::makeUnboxedOnly(&{type_name})"

                # Annotate it with dispatch information if necessary
                #
                # NB: In the ordinary, TypeDerived code generation work flow, specification
                # of the backend is handled by the enclosing block, so the torch::dispatch
                # invocation here is strictly unnecessary.  However, in the fbcode mobile
                # only workflow using per-op registration, these registrations will get dumped
                # in a TORCH_LIBRARY_FRAGMENT that does not have an ambient backend.  So
                # the torch::dispatch specification here is important!  See
                # Note [Redundancy in registration code is OK] for how we handle redundant info.
                if dispatch is not None:
                    payload = f"torch::dispatch(DispatchKey::{dispatch},\n{payload})\n"

                impl_registration = f'm.impl("{f.func.name}",\n{payload});\n'

            return f"{def_registration}{impl_registration}"
        else:
            assert_never(target)
Example #3
0
    def go(f: NativeFunction) -> Optional[str]:
        if str(f.func.name.name).endswith('_like') or str(
                f.func.name.name).startswith('new_'):
            return None

        name = legacy_dispatcher.name(f.func)
        legacy_dispatcher_returns_type = legacy_dispatcher.returns_type(
            f.func.returns)
        legacy_dispatcher_args = legacy_dispatcher.arguments(f.func)

        if not any(
                isinstance(a.argument, TensorOptionsArguments)
                for a in legacy_dispatcher_args):
            return None

        legacy_dispatcher_tensor_args = [
            a for a in legacy_dispatcher_args
            if isinstance(a.argument, Argument)
            and a.argument.type.is_tensor_like()
        ]

        dispatcher_returns_type = dispatcher.returns_type(f.func.returns)
        dispatcher_args = dispatcher.arguments(f.func)

        args: Union[Sequence[DispatcherArgument],
                    Sequence[LegacyDispatcherArgument]]
        if local.use_c10_dispatcher() is UseC10Dispatcher.full:
            returns_type = dispatcher_returns_type
            args = dispatcher_args
            exprs = dispatcher.exprs(dispatcher_args)
            dispatch_key = "c10::computeDispatchKey(dtype, layout, device)"
        else:
            returns_type = legacy_dispatcher_returns_type
            args = legacy_dispatcher_args
            exprs = dispatcher.legacydispatcherarguments_exprs(
                legacy_dispatcher_args)
            dispatch_key = "options.computeDispatchKey()"

        if target is Target.DEFINITION:
            # I don't think there's actually a good reason to generate
            # these two cases differently
            # The first case could probably be improved though- it calls dispatchTypeId(),
            # which looks at TLS dispatch keys- there should not be any by the time we reach backend select.
            if legacy_dispatcher_tensor_args:
                tensor_args = ', '.join(a.name
                                        for a in legacy_dispatcher_tensor_args)
                compute_dk = f"""\
DispatchKeySet _dk_set = c10::DispatchKeySet({dispatch_key}) | c10::detail::multi_dispatch_key_set({tensor_args});
  DispatchKeySet _dk_mask = c10::DispatchKeySet(DispatchKeySet::FULL_AFTER, DispatchKey::BackendSelect);
  DispatchKey _dk = c10::impl::dispatchTypeId(_dk_set, _dk_mask);"""
            else:
                compute_dk = f"DispatchKey _dk = {dispatch_key};"
            return f"""\
// aten::{f.func}
{returns_type} {name}({', '.join(str(a) for a in args)}) {{
  static auto op = c10::Dispatcher::singleton()
    .findSchemaOrThrow("aten::{f.func.name.name}", "{f.func.name.overload_name}")
    .typed<{dispatcher_returns_type} ({', '.join(a.type for a in dispatcher_args)})>();
  {compute_dk}
  DispatchKey _autograd_dk = c10::getAutogradKeyFromBackend(_dk);
  // This trick allows calling Autograd backend kernel first and then backend kernel,
  // without adding another AutogradBackendSelect dispatch key.
  DispatchKey _current_dk = at::impl::variable_excluded_from_dispatch() ? _dk : _autograd_dk;
  return op.callWithDispatchKey(_current_dk, {', '.join(a.expr for a in exprs)});
}}
"""
        elif target is Target.REGISTRATION:
            if local.use_c10_dispatcher() is UseC10Dispatcher.full:
                return f"""m.impl("aten::{f.func.name}",
          c10::impl::hacky_wrapper_for_legacy_signatures<{dispatcher_returns_type} ({', '.join(a.type for a in dispatcher_args)})>(
            TORCH_FN({name})));"""
            else:
                return f"""m.impl_UNBOXED("aten::{f.func.name}", {name});"""
        elif target is Target.DECLARATION:
            raise AssertionError()
        else:
            assert_never(target)