def gen_composite_view_copy_kernel(g: NativeFunctionsViewGroup) -> Optional[str]: if g.view_copy is None: return None # view_copy is a native signature, since we're generating an at::native:: kernel view_copy_sig = NativeSignature(g.view_copy.func) # view is a dispatcher signature, since we're calling into the at::_ops API view_sig = DispatcherSignature(g.view.func) view_api_name = g.view.func.name.unambiguous_name() exprs = ", ".join( [e.expr for e in translate(view_copy_sig.arguments(), view_sig.arguments())] ) # view ops today always return either a Tensor or a list of Tensors assert len(g.view.func.returns) == 1 assert g.view.func.returns[0].type == BaseType( BaseTy.Tensor ) or g.view.func.returns[0].type == ListType(BaseType(BaseTy.Tensor), None) if g.view.func.returns[0].type == BaseType(BaseTy.Tensor): return_cloned_output = """\ return output.clone();""" else: # If the return type is a list, we need to clone each tensor in the list. return_cloned_output = f"""\ {view_copy_sig.returns_type().cpp_type()} out_clone; for (const auto i : c10::irange(output.size())) {{ out_clone.push_back(output[i].clone()); }} return out_clone;""" # The default generated composite kernel for {view}_copy() operators just clones # the input tensor, and runs the underlying view on the clone. return f"""
def gen_composite_view_copy_kernel( g: NativeFunctionsViewGroup) -> Optional[str]: if g.view_copy is None: return None # For view_copy.SymInt overloads, # See gen_symint_view_copy_kernel. if g.view_copy.func.name.overload_name == "SymInt": return None # We can make view_copy work in more cases by using reshape() # when a normal view call would ordinarily fail. # This also makes LTC more efficient, because they don't need to include # clone() calls in their graph (which is normally needed by reshape). if str(g.view_copy.func.name) == "view_copy": return """\ at::Tensor view_copy(const at::Tensor & self, at::IntArrayRef size) { DimVector shape = infer_size_dv(size, self.numel()); if (!at::detail::computeStride(self.sizes(), self.strides(), shape).has_value()) { return self.reshape(size); } else { auto output = at::_ops::view::call(self, size); return output.clone(); } } """ # view_copy is a native signature, since we're generating an at::native:: kernel view_copy_sig = NativeSignature(g.view_copy.func) # view is a dispatcher signature, since we're calling into the at::_ops API view_sig = DispatcherSignature(g.view.func) view_api_name = g.view.func.name.unambiguous_name() exprs = ", ".join([ e.expr for e in translate(view_copy_sig.arguments(), view_sig.arguments()) ]) # view ops today always return either a Tensor or a list of Tensors assert len(g.view.func.returns) == 1 assert g.view.func.returns[0].type == BaseType( BaseTy.Tensor) or g.view.func.returns[0].type == ListType( BaseType(BaseTy.Tensor), None) if g.view.func.returns[0].type == BaseType(BaseTy.Tensor): return_cloned_output = """\ return output.clone();""" else: # If the return type is a list, we need to clone each tensor in the list. return_cloned_output = f"""\ {view_copy_sig.returns_type().cpp_type()} out_clone; for (const auto i : c10::irange(output.size())) {{ out_clone.push_back(output[i].clone()); }} return out_clone;""" # The default generated composite kernel for {view}_copy() operators just clones # the input tensor, and runs the underlying view on the clone. return f"""
def ufunc_type(t: Type, *, binds: ArgName, compute_t: CType) -> NamedCType: r = cpp.valuetype_type(t, binds=binds, symint=False) if r is not None: return r if t == BaseType(BaseTy.Scalar): return NamedCType(binds, compute_t) elif t == BaseType(BaseTy.Tensor): return NamedCType(binds, compute_t) else: raise AssertionError(f"unrecognized type {repr(t)}")
def ufunctor_ctor_type(t: Type, *, binds: ArgName, scalar_t: BaseCppType) -> NamedCType: r = cpp.valuetype_type(t, binds=binds, symint=False) if r is not None: return r if t == BaseType(BaseTy.Scalar): return NamedCType(binds, BaseCType(opmath_type(scalar_t))) elif t == BaseType(BaseTy.Tensor): return NamedCType(binds, BaseCType(opmath_type(scalar_t))) else: raise AssertionError(f"unrecognized type {repr(t)}")
def dispatchstub_type(t: Type, *, binds: ArgName) -> Optional[NamedCType]: r = cpp.valuetype_type(t, binds=binds) if r is not None: return r if t == BaseType(BaseTy.Scalar): return NamedCType(binds, ConstRefCType(BaseCType(scalarT))) elif t == BaseType(BaseTy.Tensor): return None else: raise AssertionError(f"unrecognized type {repr(t)}")
def ufunctor_apply_type( t: Type, *, binds: ArgName, scalar_t: BaseCppType ) -> NamedCType: if t == BaseType(BaseTy.Tensor): return NamedCType(binds, BaseCType(scalar_t)) else: raise AssertionError(f"unrecognized type {repr(t)}")
def inner_arguments(func: FunctionSchema, is_reverse: bool) -> List[Binding]: args = func.arguments.flat_all assert args[0].type == BaseType(BaseTy.Tensor) non_self_args = args[1:] # The forward lambda calls the at::_ops API, while the reverse lambda calls the view inverse API. # Both of these follow the dispatcher API. non_self_bindings = [dispatcher.argument(a) for a in non_self_args] if not is_reverse: # the forward lambda swaps out the original tensor argument with the lambd arg "base" return [base_binding] + non_self_bindings else: # the reverse lambda does the same, but with an additional "mutated_view" arg # additionally, we have a calling convention: for view ops that return multiple tensor outputs # their corresponding view_inverse function takes in an additional index argument. index_binding = inner_call_index(func) if index_binding is not None: return [ base_binding, mutated_view_binding, reapply_views_binding, index_binding, ] + non_self_bindings else: return [ base_binding, mutated_view_binding, reapply_views_binding, ] + non_self_bindings
def argumenttype_type(t: Type, *, mutable: bool, binds: ArgName) -> NamedCType: # If it's a value type, do the value type translation r = cpp.valuetype_type(t, binds=binds) if r is not None: return r if isinstance(t, BaseType): if t.name == BaseTy.Tensor: return NamedCType(binds, ConstRefCType(BaseCType(tensorT))) elif t.name == BaseTy.Scalar: return NamedCType(binds, ConstRefCType(BaseCType(scalarT))) else: raise AssertionError(f"base type should have been value type {t}") elif isinstance(t, OptionalType): if t.elem == BaseType(BaseTy.Tensor): return NamedCType(binds, BaseCType(optionalTensorRefT)) elif t.elem == BaseType(BaseTy.Scalar): return NamedCType(binds, BaseCType(optionalScalarRefT)) elif isinstance(t.elem, ListType) and str(t.elem.elem) == "int": return NamedCType(binds, BaseCType(optionalIntArrayRefT)) elem = argumenttype_type(t.elem, mutable=mutable, binds=binds) return NamedCType(binds, OptionalCType(elem.type)) elif isinstance(t, ListType): if t.elem == BaseType(BaseTy.Tensor): return NamedCType(binds, BaseCType(iTensorListRefT)) elif t.elem == OptionalType(BaseType(BaseTy.Tensor)): return NamedCType(binds, BaseCType(iOptTensorListRefT)) # TODO: delete these special cases; see torchgen.api.cpp--these # must be changed in tandem, but there are problems; see # https://github.com/pytorch/pytorch/pull/51485 elif str(t.elem) == "int": return NamedCType(binds, BaseCType(intArrayRefT)) elif str(t.elem) == "Dimname": return NamedCType(binds, BaseCType(dimnameListT)) elem = argumenttype_type(t.elem, mutable=mutable, binds=binds) return NamedCType(binds, ArrayRefCType(elem.type)) else: raise AssertionError(f"unrecognized type {repr(t)}")
def assert_view_op_properties(func: FunctionSchema) -> None: def is_alias(a: Argument) -> bool: return a.annotation is not None args = func.arguments.flat_non_out # The first argument is a tensor with an alias semantics (annotations) assert len(args) > 0 and args[0].type == BaseType( BaseTy.Tensor ), f"""In the functionalization codegen, we expect the first argument of every view operator to be a tensor, but found an argument of type {str(args[0].type)} for operator: {str(func.name)}.""" # No other arguments have aliasing semantics assert is_alias(args[0]) and not any( is_alias(a) for a in args[1:] ), """In the functionalization codegen, we expect the first argument of every view operator to alias the output.
def capture_arguments(func: FunctionSchema, *, is_reverse: bool) -> List[Binding]: # capture arguments include all arguments except `self`. # Importantly, they don't include any C++ reference types (or else we'll get a dangling reference in the capture), # So any reference types (IntArrayRef) need to be converted to value types (vector<int64_t>) args = func.arguments.flat_all assert args[0].type == BaseType(BaseTy.Tensor) non_self_args = args[1:] non_self_value_bindings = [ dispatcher.argument(a, remove_non_owning_ref_types=True) for a in non_self_args ] all_bindings = [reapply_views_binding] + non_self_value_bindings return all_bindings
def node_ctor_arg_rvalue_string(arg: LazyArgument) -> str: """ Given a LazyArgument, generate a c++ string for materializing an rvalue of that arg for passing into a lazy Node constructor. """ # TODO: Matching on CType seems wrong; should be matching on Type if isValueType(arg.lazy_type): if isinstance(arg.lazy_type, BaseCType): if arg.is_wrapped_scalar: return f"node_{arg.name}" elif arg.lazy_type.type is tensorListValueT: return f"lazy_{arg.name}_tensorlist" elif arg.is_symint_or_list: cpp_type = arg.lazy_type.cpp_type() return f"GetSymIntValue({arg.name})" return f"lazy_{arg.name}->GetIrValue()" elif isinstance(arg.lazy_type, OptionalCType): if arg.is_wrapped_scalar: return f"node_{arg.name}" return (f"lazy_{arg.name} ? " f"c10::make_optional(lazy_{arg.name}->GetIrValue()) : " "c10::nullopt") else: raise AssertionError( f"TODO not sure if there are other valid types to handle here ({arg.lazy_type})" ) else: # NB: this is here because right now we aren't treating SymInt[] as a # value type; when we do this needs to move above # NB: we cannot test arg.lazy_type as we've already specified it is an # int64_t and so we cannot distinguish between SymInt and int64_t if isinstance(arg.orig_type, ListType) and arg.orig_type.elem == BaseType( BaseTy.SymInt): return f"GetSymIntArrayRefValue({arg.name})" elif isinstance(arg.lazy_type, VectorCType) and isinstance( arg.lazy_type.elem, BaseCType): return f"std::vector<{arg.lazy_type.elem.type}>({arg.name}.begin(), {arg.name}.end())" elif (isinstance(arg.lazy_type, OptionalCType) and isinstance(arg.lazy_type.elem, VectorCType) and isinstance(arg.lazy_type.elem.elem, BaseCType)): return f"torch::lazy::ToOptionalVector<{arg.lazy_type.elem.elem.type}>({arg.name})" else: return f"{arg.name}"
def generate_out_args_from_schema( func: FunctionSchema, ) -> Tuple[List[Return], List[Argument]]: # More of a sanity check - our existing restrictions on schemas should enforce that # mutable schema kinds never return their mutable arguments. assert not any( r.annotation is not None and r.annotation.is_write for r in func.returns ) tensorlike_rets = [r for r in func.returns if r.type.is_tensor_like()] assert len(tensorlike_rets) > 0 used_annotations = concatMap( lambda a: [] if a.annotation is None else a.annotation.alias_set, func.arguments.flat_all, ) valid_annotations = [ x for x in "abcdefghijklmnopqrstuvwxyz" if x not in used_annotations ] all_rets_are_tensors = all(r.type == BaseType(BaseTy.Tensor) for r in func.returns) new_out_args: List[Argument] = [] # The end result of new_returns is that: # - If every return is a plain tensor, then the new returns == the old returns, but with the out= alias annotations added. # - Otherwise, none of the out arguments show up in the returns (and we're only left with non-tensor-like returns, if any). new_returns: List[Return] = [] for (i, r) in enumerate(func.returns): if r.type.is_tensor_like(): new_out = Argument( name="out" if len(func.returns) == 1 else f"out{i}", type=r.type, default=None, annotation=Annotation.parse(f"{valid_annotations[i]}!"), ) new_out_args.append(new_out) if all_rets_are_tensors: # The convention for out= schemas is that they only return their out arguments # if the return is a plain Tensor (or if it's a tuple of plain Tensors) new_ret = Return( name=None, type=new_out.type, annotation=new_out.annotation ) new_returns.append(new_ret) else: new_returns.append(r) return new_returns, new_out_args
def compute_ufunc_cpu_dtype_body( g: NativeFunctionsGroup, dtype: ScalarType, inner_loops: Dict[UfuncKey, UfuncSignature], parent_ctx: Sequence[Binding], ) -> str: assert UfuncKey.CPUScalar in inner_loops, f"{dtype}, {inner_loops.keys()}" assert inner_loops.keys() <= {UfuncKey.CPUScalar, UfuncKey.CPUVector} scalar_loop = inner_loops[UfuncKey.CPUScalar] vec_loop = None if UfuncKey.CPUVector in inner_loops: vec_loop = inner_loops[UfuncKey.CPUVector] # NB: We DON'T use translate here, because translate is # incapable of CSE'ing the scalar accesses in case it is also # used by Vectorized; also, the unpacking here is very simple # and only affects Scalar; everything else is implicitly captured # by the lambda # Setup scalar in scope body = [] ctx = [] for b in parent_ctx: if isinstance(b.argument, Argument) and b.argument.type != BaseType(BaseTy.Scalar): continue body.append(f"auto _s_{b.name} = {b.name}.to<scalar_t>();") ctx.append( Expr(f"_s_{b.name}", NamedCType(b.nctype.name, BaseCType(scalar_t)))) if vec_loop is not None: for b in parent_ctx: if isinstance( b.argument, Argument) and b.argument.type != BaseType(BaseTy.Scalar): continue body.append( f"auto _v_{b.name} = at::vec::Vectorized<scalar_t>(_s_{b.name});" ) ctx.append( Expr( f"_v_{b.name}", NamedCType(b.nctype.name, VectorizedCType(BaseCType(scalar_t))), )) # Setup lambda signature # NB: simplified version of ufunctor_arguments scalar_bindings = [] vec_bindings = [] for a in g.functional.func.arguments.flat_non_out: if not a.type.is_tensor_like(): continue assert a.type == BaseType(BaseTy.Tensor) scalar_bindings.append( Binding( name=a.name, nctype=NamedCType(a.name, BaseCType(scalar_t)), argument=a, )) if vec_loop is not None: vec_bindings.append( Binding( name=a.name, nctype=NamedCType(a.name, VectorizedCType(BaseCType(scalar_t))), argument=a, )) def with_ctx(b: Sequence[Binding]) -> List[Union[Expr, Binding]]: r: List[Union[Expr, Binding]] = [] r.extend(ctx) r.extend(b) return r body_str = "\n".join(body) if vec_loop is not None: return f""" {body_str} cpu_kernel_vec(iter, [=]({', '.join(b.decl() for b in scalar_bindings)}) {{ return {scalar_loop.call(with_ctx(scalar_bindings))}; }}, [=]({', '.join(b.decl() for b in vec_bindings)}) {{ return {vec_loop.call(with_ctx(vec_bindings))}; }} ); """ else: return f"""
# - While the forward lambda just directly calls into the at::_ops API # (following the dispatcher convention), the logic here for the reverse lambda # is responsible for generating both the call-site, and the declarations # (which are implemented manually in the at::functionalization::impl namespace). # The lambdas generated for each view op in the functionalization pass are of the form # [capture_arguments](outer_arguments) -> returns_type { # return name(inner_arguments); # } # Define some specific lambda input arguments. base_binding = Binding( name="base", nctype=NamedCType(name="base", type=ConstRefCType(BaseCType(tensorT))), argument=Argument(name="base", type=BaseType(BaseTy.Tensor), default=None, annotation=None), default=None, ) mutated_view_binding = Binding( name="mutated_view", nctype=NamedCType(name="mutated_view", type=ConstRefCType(BaseCType(tensorT))), argument=Argument(name="base", type=BaseType(BaseTy.Tensor), default=None, annotation=None), default=None, ) mutated_view_idx_binding = Binding(
def mutable_to_out_signature(func: FunctionSchema) -> FunctionSchema: # Generating an out= schema from a mutable schema. assert func.kind() == SchemaKind.mutable # The new out= schema has: # - Any non-aliased tensor-like returns are converted to mutable, aliased out= arguments # (if the argument is a tensor then we also return it for method chaining, # otherwise we return nothing) # - an "out" overload name # # Note that: # (1) This also means that we can *only* generate an out= variant from a mutable schema # if the mutable schema has at least one tensor-like non-aliasing return. # (2) The generated out= variant still has mutable positional arguments, # but if necessary we could probably add another out= variant that also # functionalizes the mutable arguments (a functional_out variant) # More of a sanity check - our existing restrictions on schemas should enforce that # mutable schema kinds never return their mutable arguments. assert not any(r.annotation is not None and r.annotation.is_write for r in func.returns) tensorlike_rets = [r for r in func.returns if r.type.is_tensor_like()] assert len(tensorlike_rets) > 0 used_annotations = concatMap( lambda a: [] if a.annotation is None else a.annotation.alias_set, func.arguments.flat_all, ) valid_annotations = [ x for x in "abcdefghijklmnopqrstuvwxyz" if x not in used_annotations ] all_rets_are_tensors = all(r.type == BaseType(BaseTy.Tensor) for r in func.returns) new_out_args: List[Argument] = [] # The end result of new_returns is that: # - If every return is a plain tensor, then the new returns == the old returns, but with the out= alias annotations added. # - Otherwise, none of the out arguments show up in the returns (and we're only left with non-tensor-like returns, if any). new_returns: List[Return] = [] for (i, r) in enumerate(func.returns): if r.type.is_tensor_like(): new_out = Argument( name=f"out{i}", type=r.type, default=None, annotation=Annotation.parse(f"{valid_annotations[i]}!"), ) new_out_args.append(new_out) if all_rets_are_tensors: # The convention for out= schemas is that they only return their out arguments # if the return is a plain Tensor (or if it's a tuple of plain Tensors) new_ret = Return(name=None, type=new_out.type, annotation=new_out.annotation) new_returns.append(new_ret) else: new_returns.append(r) return FunctionSchema( name=func.name.remove_inplace().with_overload( "out" if not func.name.overload_name else f"{func.name.overload_name}_out"), arguments=func.arguments.with_out_args(new_out_args), returns=tuple(new_returns), )
def process_ir_type( typ: Type, properties: "LazyIrProperties" ) -> Union[BaseCType, VectorCType, OptionalCType, ListCType]: """ This function takes a type from NativeFunctions and converts it for use with lazy tensor codegen. Type conversion for lazy currently consists of (1) changing at::Tensors into lazy::Values (2) wrapping everything in a BaseCType (3) making cpp-reference types into cpp-value types (e.g. vector instead of IntArrayRef) (1) converts at::Tensors to lazy::Values (which wrap lazy::Nodes, with which Lazy IR represents tensors.) There is special handling for Optional[Tensor] or List[Tensor], etc- hence 'tensor-like' This is incomplete- there are assertions in places that it's expected to need to add more types as the codegen is used with more operators. """ if isinstance(typ, BaseType): if typ.name == BaseTy.Tensor: return BaseCType(getValueT()) elif typ.name == BaseTy.Scalar: if properties.TreatScalarsAsConstants: return BaseCType(scalarT) # at::scalar has special handling, # and is wrapped in an lazy::Value just like at::tensor return BaseCType(getValueT()) elif typ.name == BaseTy.ScalarType: return BaseCType(scalarTypeT) elif typ.name == BaseTy.int: return BaseCType(longT) elif typ.name == BaseTy.SymInt: return BaseCType(getValueT()) elif typ.name == BaseTy.bool: return BaseCType(boolT) elif typ.name == BaseTy.float: return BaseCType(doubleT) elif typ.name == BaseTy.str: return BaseCType(stringT) elif typ.name == BaseTy.Device: return BaseCType(deviceT) elif typ.name == BaseTy.Layout: return BaseCType(layoutT) elif typ.name == BaseTy.MemoryFormat: return BaseCType(memoryFormatT) else: raise AssertionError(f"TODO add support for type {repr(typ)}") elif isinstance(typ, OptionalType): return OptionalCType(process_ir_type(typ.elem, properties)) elif isinstance(typ, ListType): if str(typ.elem) == "Tensor?": # TODO(whc) is this actually correct? or should it use a Vector like above return ListCType(OptionalCType(BaseCType(getValueT()))) elif str(typ.elem) == "Tensor": # this is a TensorList which comes in from GetTensorList as a Value return BaseCType(tensorListValueT) elif typ.elem == BaseType(BaseTy.SymInt): # TODO: return a value type. The problem here is analogous to # the problem with tensorListValueT: if you have SymInt[] you # cannot conveniently save the list of Value directly, as nodes # expect to save values as a vector for ALL arguments. So you # need a separate IR node that represents all of the size nodes # assembled into a list. I'm not an LTC dev so I don't want to # figure it out right now. Y'all figure it out... return VectorCType(BaseCType(longT)) else: return VectorCType(process_ir_type(typ.elem, properties)) else: raise AssertionError(f"unrecognized type {repr(typ)}")