def wrapper_kernel_sig( self, f: NativeFunction) -> Union[NativeSignature, DispatcherSignature]: # The prefix is just to ensure uniqueness. The Dispatcher API doesn't guarantee unique kernel names. return kernel_signature(f, self.backend_index, prefix=f'wrapper_{f.func.name.overload_name}_')
def gen_unstructured(f: NativeFunction, backend_index: BackendIndex) -> Optional[str]: sig = kernel_signature(f, backend_index) metadata = backend_index.get_kernel(f) if metadata is None: return None if "legacy::" in metadata.kernel: return None else: prefix = '' if backend_index.external else 'TORCH_API ' return f"{prefix}{sig.decl(name=metadata.kernel)};"
def __call__(self, f: NativeFunction) -> List[str]: sig = kernel_signature(f, self.backend_index) metadata = self.backend_index.get_kernel(f) assert metadata is not None schema = LazyIrSchema(f.func) value_args = schema.filtered_args(values=True, scalars=False) lazy_tensor_decls_str = lazy_tensor_decls(value_args, self.tensor_class) node_ctor_input_str = node_ctor_inputs(schema) # Only generate shape/dtype fn for non-structured kernels, # since we just use the meta function for structured kernels if not f.structured and f.structured_delegate is None: shape_sig = ComputeShapeSignature(metadata.kernel, f) return ["\n".join([f"{shape_sig.shape_decl};"])] else: return []
def __call__(self, func: NativeFunction) -> List[str]: sig = kernel_signature(func, self.backend_index) metadata = self.backend_index.get_kernel(func) assert metadata is not None schema = LazyIrSchema(func.func) all_types = schema.filtered_types() value_types = schema.filtered_types(values=True, scalars=False) scalar_types = schema.filtered_types(values=False, scalars=True) returns_length = len(schema.returns) fallback_str = gen_fallback_code( schema, overload_name=func.func.name.overload_name) value_types_names = [ f"{t.name}" for t in value_types if t.name not in schema.wrapped_scalar_names ] assert len(value_types_names ) > 0, "Code below assumes there is at least one tensor arg" get_device_str = f"""auto common_device = torch::lazy::GetBackendDevice({', '.join(value_types_names)}); TORCH_INTERNAL_ASSERT(common_device); """ lazy_tensor_decls_str = lazy_tensor_decls(value_types, self.tensor_class, schema) node_ctor_input_str = node_ctor_inputs(schema) # call the meta kernel if it exists, to compute output shape/dtype for our IR if func.structured or func.structured_delegate is not None: meta_out = """std::vector<Shape> shapes{Shape(out_meta.scalar_type(), out_meta.sizes().vec())};""" if returns_length > 1: def this_shape(i: int) -> str: return f"Shape(std::get<{i}>(out_meta).scalar_type(), std::get<{i}>(out_meta).sizes().vec())" shapes_str = ','.join( [this_shape(i) for i in range(returns_length)]) meta_out = "std::vector<Shape> shapes{" + shapes_str + "};" meta_str = f"""auto out_meta = at::meta::{schema.aten_name}({', '.join(str(t.name) for t in all_types)}); {meta_out}""" else: shape_sig = ComputeShapeSignature(metadata.kernel, func) meta_str = f""" auto shapes = {shape_sig.shape_call};""" meta_str += f""" TORCH_INTERNAL_ASSERT(shapes.size() == {returns_length});""" node_str = f"""auto node = torch::lazy::MakeNode<ir::ops::{schema.node_name}>({node_ctor_input_str}, std::move(shapes));""" first_tensor_name = value_types_names[0] bridge_str = """auto result = torch::lazy::CreateAtenFromLtcTensor( torch::lazy::LazyTensor::Create(std::move(node), *common_device));""" if returns_length > 1: bridge_str = f"""std::vector<{self.tensor_class}Ptr> lazy_tensors; for (int i = 0; i < {returns_length}; i++) {{ lazy_tensors.push_back(torch::lazy::LazyTensor::Create(torch::lazy::Value(node, i), *common_device)); }} auto result = torch::lazy::TupleAtenFromLtcTensors<{returns_length}>(lazy_tensors);""" if schema.name.name.inplace or func.func.is_out_fn(): assert returns_length == 1, "We assumed there was no such case where an op is an in-place variant " \ "and has tuple outputs." bridge_str = f"""lazy_{first_tensor_name}->SetInPlaceIrValue(node); auto& result = {first_tensor_name};""" return [ f"""\ {sig.decl(name=f"{self.class_method_name}::{metadata.kernel}")} {{ {fallback_str} TORCH_LAZY_FN_COUNTER("lazy::"); {get_device_str} {lazy_tensor_decls_str} {meta_str} {node_str} {bridge_str} return result; }};\n """ ]
def __call__(self, func: NativeFunction) -> List[str]: sig = kernel_signature(func, self.backend_index) # Lazy IR stuff schema = LazyIrSchema(func.func) all_types = schema.filtered_types() value_types = schema.filtered_types(values=True, scalars=False) scalar_types = schema.filtered_types(values=False, scalars=True) returns_length = len(schema.returns) value_types_names = ", ".join([f"{t.name}" for t in value_types]) get_device_str = f"""auto device = bridge::GetBackendDevice({value_types_names});""" lazy_tensor_decls_str = lazy_tensor_decls(value_types, self.tensor_class) node_ctor_input_str = node_ctor_inputs(schema) # call the meta kernel if it exists, to compute output shape/dtype for our IR if func.structured or func.structured_delegate is not None: meta_out = """std::vector<Shape> shapes{Shape(out_meta.scalar_type(), out_meta.sizes().vec())};""" if returns_length > 1: def this_shape(i: int) -> str: return f"Shape(std::get<{i}>(out_meta).scalar_type(), std::get<{i}>(out_meta).sizes().vec())" shapes_str = ','.join( [this_shape(i) for i in range(returns_length)]) meta_out = "std::vector<Shape> shapes{" + shapes_str + "};" meta_str = f"""auto out_meta = at::meta::{schema.aten_name}({', '.join(str(t.name) for t in all_types)}); {meta_out}""" else: shape_sig = ComputeShapeSignature(func) meta_str = f""" auto shapes = {shape_sig.shape_call};""" meta_str += f""" TORCH_INTERNAL_ASSERT(shapes.size() == {returns_length});""" node_str = f"""auto node = torch::lazy::MakeNode<ir::ops::{schema.node_name}>({node_ctor_input_str}, std::move(shapes));""" assert len( value_types ) > 0, f"Only supporting tensor ops so far, none found in {sig}" first_tensor = value_types[0] bridge_str = f"""auto result = CreateAtenFromLtcTensor(lazy_{first_tensor.name}.CreateFrom(node));""" if returns_length > 1: bridge_str = f"""std::vector<{self.tensor_class}> lazy_tensors; for (int i = 0; i < {returns_length}; i++) {{ lazy_tensors.push_back(lazy_{first_tensor.name}.CreateFrom(torch::lazy::Value(node, i))); }} auto result = TupleAtenFromLtcTensors<{returns_length}>(lazy_tensors);""" if schema.name.name.inplace: assert returns_length == 1, "We assumed there was no such case where an op is an in-place variant " \ "and has tuple outputs." bridge_str = f"""lazy_{first_tensor.name}.SetInPlaceIrValue(node); auto& result = {first_tensor.name};""" return [ f"""\ // TODO(alanwaketan): Quite a lot inefficient copy-by-value there. Let's optimize it. {sig.decl(name=f"{self.class_method_name}::{schema.aten_name}")} {{ LTC_FN_COUNTER("lazy::"); {get_device_str} {lazy_tensor_decls_str} {meta_str} {node_str} {bridge_str} return result; }};\n """ ]
def __call__(self, func: NativeFunction) -> List[str]: sig = kernel_signature(func, self.backend_index) metadata = self.backend_index.get_kernel(func) assert metadata is not None schema = LazyIrSchema(func.func) value_args = schema.filtered_args(values=True, scalars=False) returns_length = len(schema.returns) fallback_str = "" if self.gen_forced_fallback_code: fallback_str = gen_fallback_code( schema, overload_name=func.func.name.overload_name) value_types_names = [ f"{a.name}" for a in value_args if not a.is_wrapped_scalar ] assert (len(value_types_names) > 0), "Code below assumes there is at least one tensor arg" get_device_str = f"""auto common_device = torch::lazy::GetBackendDevice({', '.join(value_types_names)}); TORCH_INTERNAL_ASSERT(common_device); """ lazy_tensor_decls_str = lazy_tensor_decls(value_args, self.tensor_class) node_ctor_input_str = node_ctor_inputs(schema) shape_str = self.gen_shape_call(func) node_str = f"""auto node = torch::lazy::MakeNode<{schema.node_name}>({node_ctor_input_str}, std::move(shapes));""" first_tensor_name = value_types_names[0] bridge_str = """auto result = torch::lazy::CreateAtenFromLtcTensor( torch::lazy::LazyTensor::Create(std::move(node), *common_device));""" if returns_length > 1: bridge_str = f"""std::vector<{self.tensor_class}Ptr> lazy_tensors; for (int i = 0; i < {returns_length}; i++) {{ lazy_tensors.push_back(torch::lazy::LazyTensor::Create(torch::lazy::Value(node, i), *common_device)); }} auto result = torch::lazy::TupleAtenFromLtcTensors<{returns_length}>(lazy_tensors);""" if schema.name.name.inplace or func.func.is_out_fn(): assert returns_length == 1, ( "We assumed there was no such case where an op is an in-place variant " f"and has tuple outputs, but got tuple of len {returns_length}." ) bridge_str = f"""lazy_{first_tensor_name}->SetInPlaceIrValue(node); auto& result = {first_tensor_name};""" return [ f"""\ {sig.decl(name=f"{self.class_method_name}::{metadata.kernel}")} {{ {fallback_str} TORCH_LAZY_FN_COUNTER("lazy::"); {get_device_str} {lazy_tensor_decls_str} {shape_str} {node_str} {bridge_str} return result; }};\n """ ]