def emit_history() -> str: fn = 'rebase' if modifies_arguments(f) and view_info is None else 'set' output_names = [r.name for r in differentiable_outputs] # TODO: flatten allocates a std::vector, which could be expensive outs = CodeTemplate("flatten_tensor_args( ${outs} )").substitute( outs=output_names) return SET_HISTORY.substitute(fn=fn, differentiable_outputs=outs)
def gen_py_torch_functions(out, declarations, template_path): """ Generate functions in the "torch" module. """ PY_TORCH_FUNCTIONS_CPP = CodeTemplate.from_file(template_path + '/python_torch_functions.cpp') py_torch_functions = get_py_torch_functions(declarations) env = create_python_bindings(py_torch_functions, is_python_method=False, module="torch") write(out, 'python_torch_functions.cpp', PY_TORCH_FUNCTIONS_CPP, env)
def gen_py_variable_methods(out, declarations, template_path): """ Generate Tensor methods. """ PY_VARIABLE_METHODS_CPP = CodeTemplate.from_file(template_path + '/python_variable_methods.cpp') py_variable_methods = get_py_variable_methods(declarations) env = create_python_bindings(py_variable_methods, is_python_method=True, module=None) write(out, 'python_variable_methods.cpp', PY_VARIABLE_METHODS_CPP, env)
def write(dirname: str, name: str, template: CodeTemplate, env: Dict[str, List[str]]) -> None: env['generated_comment'] = GENERATED_COMMENT.substitute( filename=template.filename) path = os.path.join(dirname, name) # See Note [Unchanging results for ninja] try: with open(path, 'r') as f: old_val = f.read() except IOError: old_val = None new_val = template.substitute(env) if old_val != new_val: with open(path, 'w') as f: print("Writing {}".format(path)) f.write(new_val) else: print("Skipped writing {}".format(path))
def genCppH(hFilePath, cppFilePath, templateGlslPaths, tmpDirPath, env): print("hFilePath:{}".format(hFilePath)) print("cppFilePath:{}".format(cppFilePath)) h = "#pragma once\n" nsbegin = "\nnamespace at { namespace native { namespace vulkan { \n" nsend = "\n} } } //namespace at::native::vulkan\n" h += nsbegin cpp = "#include <ATen/native/vulkan/{}>".format(H_NAME) cpp += nsbegin for templateGlslPath in templateGlslPaths: name = getName(templateGlslPath) h += "extern const char* " + name + ";\n" cpp += "const char* " + name + " = \n" codeTemplate = CodeTemplate.from_file(templateGlslPath) srcPath = tmpDirPath + "/" + name + ".glsl" content = codeTemplate.substitute(env) lines = content.split("\n") for l in lines: if (len(l) < 1): continue cpp += "\"" + l + "\\n\"\n" cpp += ";\n" cpp += nsend h += nsend with open(hFilePath, "w") as f: f.write(h) with open(cppFilePath, "w") as f: f.write(cpp)
def gen_dispatcher_registrations( fm: FileManager, output_dir: str, class_name: str, cpp_namespace: str, backend_indices: Dict[DispatchKey, BackendIndex], grouped_native_functions: Sequence[Union[NativeFunction, NativeFunctionsGroup]], backend_dispatch_key: DispatchKey, dispatch_key: DispatchKey, selector: 'SelectiveBuilder', # build_in_tree is true for lazy TS backend and affects include paths, not used for external backends build_in_tree: bool = False, per_operator_headers: bool = False, backend_name: str = "", eager_registration: bool = True) -> None: headers = [ f"{output_dir}/{backend_dispatch_key}NativeFunctions.h", ] if build_in_tree: external_backend_headers_str = "\n".join(f'#include <{h}>' for h in headers) else: external_backend_headers_str = "\n".join(f'#include "{h}"' for h in headers) assert class_name is not None backend_index = backend_indices[dispatch_key] dispatch_registrations_body = list( concatMap( dest.RegisterDispatchKey(backend_index, Target.REGISTRATION, selector, rocm=False, cpp_namespace=cpp_namespace, class_method_name=f'{class_name}', skip_dispatcher_op_registration=False), grouped_native_functions)) deferred_dispatch_registrations = "" static_init_dispatch_registrations = "" if eager_registration: static_template = CodeTemplate("""\ TORCH_LIBRARY_IMPL(aten, $dispatch_key, m) { $dispatch_registrations_body };""") static_init_dispatch_registrations = static_template.substitute( dispatch_key=dispatch_key, dispatch_registrations_body=dispatch_registrations_body) else: deferred_template = CodeTemplate("""\ TORCH_API void Register${backend_name}${dispatch_key}NativeFunctions() { static auto m = MAKE_TORCH_LIBRARY_IMPL(aten, $dispatch_key); $dispatch_registrations_body }""") deferred_dispatch_registrations = deferred_template.substitute( backend_name=backend_name, dispatch_key=dispatch_key, dispatch_registrations_body=dispatch_registrations_body) fm.write_with_template( f'Register{dispatch_key}.cpp', 'RegisterDispatchKey.cpp', lambda: { 'static_init_dispatch_registrations': static_init_dispatch_registrations, 'deferred_dispatch_registrations': deferred_dispatch_registrations, 'extra_cuda_headers': '', 'external_backend_headers': external_backend_headers_str, 'ops_headers': '#include <ATen/Functions.h>' if not per_operator_headers else '', 'DispatchKey': dispatch_key, 'dispatch_namespace': dispatch_key.lower(), 'dispatch_headers': dest.gen_registration_headers(backend_index, per_operator_headers= per_operator_headers, rocm=False), 'dispatch_helpers': dest.gen_registration_helpers(backend_index), 'dispatch_namespaced_definitions': '', 'dispatch_anonymous_definitions': list( concatMap( dest.RegisterDispatchKey(backend_index, Target.ANONYMOUS_DEFINITION, selector, rocm=False, cpp_namespace=cpp_namespace, class_method_name=f'{class_name}', skip_dispatcher_op_registration= False), grouped_native_functions) ), })
} # Some operators invalidate the grad_accumulator. Let's reset it. RESET_GRAD_ACCUMULATOR = { 'set', 'resize' } # NOTE [ Invariant: TensorImpl and Storage Pointer Equality ] # # When a function modifies its input tensors (via inplace or out-variants), # it should never change the the input tensors' underlying c10::TensorImpl pointers # or c10::Storage pointers. # # The following code templates implement the checks for this invariant: SAVE_TENSOR_STORAGE = CodeTemplate("""\ c10::optional<Storage> ${tensor_name}_storage_saved = ${tensor_name}.has_storage() ? c10::optional<Storage>(${tensor_name}.storage()) : c10::nullopt; """) ENFORCE_SAME_TENSOR_STORAGE = CodeTemplate("""\ if (${tensor_name}_storage_saved.has_value()) AT_ASSERT(${tensor_name}_storage_saved.value().is_alias_of(${tensor_name}.storage())); """) SAVE_TENSORLIST_STORAGE = CodeTemplate("""\ std::vector<c10::optional<Storage>> ${tensorlist_name}_storage_saved(${tensorlist_name}.size()); for (const Tensor& tensor : ${tensorlist_name}) ${tensorlist_name}_storage_saved.push_back( tensor.has_storage() ? c10::optional<Storage>(tensor.storage()) : c10::nullopt); """) ENFORCE_SAME_TENSORLIST_STORAGE = CodeTemplate("""\
from tools.codegen.api.autograd import (Derivative, DifferentiabilityInfo, SavedAttribute, uses_retain_variables, uses_single_grad) from tools.codegen.api.types import Binding from tools.codegen.code_template import CodeTemplate from tools.codegen.gen import FileManager from tools.codegen.model import Argument FUNCTION_DECLARATION = CodeTemplate("""\ struct TORCH_API ${op} : public ${superclass} { using ${superclass}::${superclass}; variable_list apply(variable_list&& grads) override; std::string name() const override { return "${op}"; } void release_variables() override { ${thread_lock} ${release_variables} } ${will_release_variables} ${saved_variables} ${saved_list_sizes} }; """) WILL_RELEASE_VARIABLES = CodeTemplate("""\ bool retain_variables = true; void will_release_variables() override { retain_variables = false; } """) FUNCTION_DEFINITION = CodeTemplate("""\
def should_trace(f: NativeFunction) -> bool: # Operations involving Storage or Type are not traceable at the moment if any( str(arg.type) in {'Storage', 'Type', 'ConstQuantizerPtr'} for arg in f.func.schema_order_arguments()): return False # We can't trace functions which don't have any Tensor or TensorList returns if not any(r.type.is_tensor_like() for r in f.func.returns): return False return f.func.name.name.base not in DONT_RECORD_TRACE SELECT = CodeTemplate("""\ if (${cond}) { ${true} } else { ${false} } """) OP_NAME = CodeTemplate("""\ op_name = jit::Symbol::fromQualString("aten::${trace_name}"); """) # These functions have their names recorded under trace renamed, RENAME_TRACE = { 'zero': 'zeros_like', # replacing aten::zero_ with aten::zeros_like 'fill': 'full_like', # replacing aten::fill_ with aten::full_like }
GRADIENT_IMPLEMENTED_FOR_COMPLEX.update( GRADIENT_IMPLEMENTED_FOR_SPARSE_COMPLEX) # Some operators invalidate the grad_accumulator. Let's reset it. RESET_GRAD_ACCUMULATOR = {'set', 'resize'} # NOTE [ Invariant: TensorImpl and Storage Pointer Equality ] # # When a function modifies its input tensors (via inplace or out-variants), # it should never change the the input tensors' underlying c10::TensorImpl pointers # or c10::Storage pointers. # # The following code templates implement the checks for this invariant: SAVE_TENSOR_STORAGE = CodeTemplate("""\ c10::optional<Storage> ${tensor_name}_storage_saved = ${tensor_name}.has_storage() ? c10::optional<Storage>(${tensor_name}.storage()) : c10::nullopt; """) ENFORCE_SAME_TENSOR_STORAGE = CodeTemplate("""\ if (${tensor_name}_storage_saved.has_value()) AT_ASSERT(${tensor_name}_storage_saved.value().is_alias_of(${tensor_name}.storage())); """) SAVE_TENSORLIST_STORAGE = CodeTemplate("""\ std::vector<c10::optional<Storage>> ${tensorlist_name}_storage_saved(${tensorlist_name}.size()); for (const Tensor& tensor : ${tensorlist_name}) ${tensorlist_name}_storage_saved.push_back( tensor.has_storage() ? c10::optional<Storage>(tensor.storage()) : c10::nullopt); """) ENFORCE_SAME_TENSORLIST_STORAGE = CodeTemplate("""\
instructions = 1 constants = 2 types = 3 operators = 4 register_size = 5 EXCLUDED_OP_SET = ([ "aten::full.names", "aten::full.out", "aten::full", ]) EXCLUE_UPGRADER_SET = (["full_0_4", "full_out_0_4"]) ONE_INSTRUCTION = CodeTemplate(""" Instruction{OpCode::${operator_name}, ${X}, ${N}},""") INSTRUCTION_LIST = CodeTemplate("""std::vector<Instruction>({ ${instruction_list} }), // instructions list""") ONE_CONSTANT = CodeTemplate(""" c10::IValue(${constant}),""") CONSTANT_LIST = CodeTemplate("""std::vector<c10::IValue>({ ${constant_list} }), // constants list""") CONSTANTS_LIST_EMPTY = """std::vector<c10::IValue>(), // constants list""" ONE_TYPE = CodeTemplate("""c10::parseType("${type_str}"),""")
'split_with_sizes', } # note: some VIEW_FUNCTIONS are just compositions of the view functions above # this list contains both the root view functions and any that are purely composed # of viewing functions, and is used by the JIT to determine when an operator # may return a view of its inputs; however they may sometimes return a copy. # (e.g. `contiguous`) RETURNS_VIEWS_OF_INPUT = set(VIEW_FUNCTIONS.keys()).union({ 'chunk', 'detach', 'contiguous', 'reshape', 'reshape_as', 'expand_as', 'view_as', 'real', 'imag', 'narrow', 'movedim', 'tensor_split', 'swapdims', 'swapaxes' }) ARRAYREF_TO_VEC = CodeTemplate("""\ auto ${vec} = ${arg}.vec(); """) OPTIONAL_TO_VAL = CodeTemplate("""\ auto ${val} = ${arg}.value_or(${default}); """) CALL_DISPATCH_VIA_NAMESPACE = CodeTemplate("""\ at::${api_name}(${unpacked_args})""") CALL_DISPATCH_VIA_METHOD = CodeTemplate("""\ ${var}.${api_name}(${unpacked_method_args})""") SETUP_REPLAY_VIEW_IF_NOT_SUPPORT_AS_STRIDED_OR_VIEW_WITH_METADATA_CHANGE = CodeTemplate( """\ std::function<at::Tensor(const at::Tensor&)> func=nullptr;
parser.add_argument("--output_prefix", default="", help="") parser.add_argument( "--install_dir", default=".", help="where to put generated file") parser.add_argument("--aten_root", default="", help="root directory of aten") args, _ = parser.parse_known_args() if args.aten_root: if not os.path.exists(args.aten_root): raise ValueError('aten_root ({}) does not exist'.format( args.aten_root)) sys.path.append(os.path.join(args.aten_root, '..')) # TODO: fix this from tools.codegen.code_template import CodeTemplate as CT else: from tools.codegen.code_template import CodeTemplate as CT # type: ignore[import,no-redef] OP_TEMPLATE = CT.from_file( os.path.join(args.template_dir, 'aten_op_template.h')) try: # use faster C loader if available from yaml import CLoader as Loader except ImportError: from yaml import Loader # type: ignore[misc] def write(filename, s): with open(filename, "w") as f: f.write(s) def read(filename):
To run this file by hand from the root of the PyTorch repository, run: python -m tools.code_analyzer.op_deps_processor \ --op-dependency build_code_analyzer/work/torch_result.yaml \ --output pt_deps.bzl """ import argparse import yaml from tools.codegen.code_template import CodeTemplate BAZEL_OUTPUT = CodeTemplate("""\ TORCH_DEPS = { ${ops} } """) BAZEL_OP = CodeTemplate("""\ "${op_name}": [ ${op_deps} ], """) BAZEL_OP_DEP = CodeTemplate("""\ "${dep_name}", """) DOT_OUTPUT = CodeTemplate("""\ digraph {
'view_as', 'real', 'imag', 'narrow', 'movedim', 'tensor_split', 'swapdims', 'swapaxes' }) # These are the functions we consider views for the purposes of validating # StorageImpl and TensorImpl in gen_variable_type. # `_unsafe_view` is not included in VIEW_FUNCTIONS above because it is not a # view for the purposes of ADInplaceOrView kernel, we do not want to call as_view # See NOTE [Unsafe View] for more info. ALL_VIEW_FUNCTIONS = { **VIEW_FUNCTIONS, '_unsafe_view': 'self', } ARRAYREF_TO_VEC = CodeTemplate("""\ auto ${vec} = ${arg}.vec(); """) OPTIONAL_TO_VAL = CodeTemplate("""\ auto ${val} = ${arg}.value_or(${default}); """) CALL_DISPATCH_VIA_NAMESPACE = CodeTemplate("""\ at::${api_name}(${unpacked_args})""") CALL_DISPATCH_VIA_METHOD = CodeTemplate("""\ ${var}.${api_name}(${unpacked_method_args})""") SETUP_REPLAY_VIEW_IF_NOT_SUPPORT_AS_STRIDED_OR_VIEW_WITH_METADATA_CHANGE = CodeTemplate( """\ std::function<at::Tensor(const at::Tensor&)> func=nullptr;
py_forwards.extend( forward_decls(name, overload_decls, is_python_method, module)) return { 'py_forwards': py_forwards, 'py_methods': py_methods, 'py_method_defs': py_method_defs, } # handler for output/no-output overload pair # (plugged into PY_VARIABLE_CASE as ${call_dispatch}) PY_VARIABLE_OUT = CodeTemplate("""\ if (_r.isNone(${out_idx})) { ${call_dispatch} } else { ${call_dispatch_out} } """) # handler for a single parsed signature - may be a single overload or # a pair of overloads that whose signatures only differ in output params PY_VARIABLE_CASE = CodeTemplate("""\ case ${i}: { ${body} } """) def emit_dispatch_case(i, dictionary, is_python_method): """
from typing import List, Sequence, Tuple, Optional from tools.codegen.api.autograd import * from tools.codegen.api.types import * from tools.codegen.code_template import CodeTemplate from tools.codegen.gen import FileManager from tools.codegen.model import * from tools.codegen.utils import * FUNCTION_DECLARATION = CodeTemplate("""\ struct TORCH_API ${op} : public ${superclass} { using ${superclass}::${superclass}; variable_list apply(variable_list&& grads) override; std::string name() const override { return "${op}"; } void release_variables() override { ${thread_lock} ${release_variables} } ${will_release_variables} ${saved_variables} ${saved_list_sizes} }; """) WILL_RELEASE_VARIABLES = CodeTemplate("""\ bool retain_variables = true; void will_release_variables() override { retain_variables = false; } """) FUNCTION_DEFINITION = CodeTemplate("""\
parser.add_argument("--install_dir", default=".", help="where to put generated file") parser.add_argument("--aten_root", default="", help="root directory of aten") args, _ = parser.parse_known_args() if args.aten_root: if not os.path.exists(args.aten_root): raise ValueError('aten_root ({}) does not exist'.format( args.aten_root)) sys.path.append(os.path.join(args.aten_root, '..')) # TODO: fix this from tools.codegen.code_template import CodeTemplate as CT else: from tools.codegen.code_template import CodeTemplate as CT # type: ignore[import,no-redef] OP_TEMPLATE = CT.from_file( os.path.join(args.template_dir, 'aten_op_template.h')) try: # use faster C loader if available from yaml import CLoader as Loader except ImportError: from yaml import Loader # type: ignore[misc] def write(filename, s): with open(filename, "w") as f: f.write(s) def read(filename): with open(filename, "r") as f:
from tools.codegen.code_template import CodeTemplate # You should use these lines, rather than doing it manually. # Especially if you see this error! # # File "/usr/local/lib/python2.7/dist-packages/yaml/__init__.py", line 69, in load # loader = Loader(stream) # TypeError: 'module' object is not callable try: # use faster C loader if available from yaml import CLoader as YamlLoader except ImportError: from yaml import Loader as YamlLoader GENERATED_COMMENT = CodeTemplate("@" + "generated from ${filename}") # Matches "foo" in "foo, bar" but not "foobar". Used to search for the # occurrence of a parameter in the derivative formula IDENT_REGEX = r'(^|\W){}($|\W)' # TODO: Use a real parser here; this will get bamboozled # by signatures that contain things like std::array<bool, 2> (note the space) def split_name_params(prototype): name, overload_name, params = re.match(r'(\w+)(\.\w+)?\((.*)\)', prototype).groups() return name, params.split(', ') # When tracing, we record inplace operations as out-of-place operations,
# # Method Impl Codegen # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # python binding for all overloads of a particular function/method PY_VARIABLE_METHOD_VARARGS = CodeTemplate(r"""\ // ${name} static PyObject * ${pycname}(PyObject* self_, PyObject* args, PyObject* kwargs) { ${method_header} static PythonArgParser parser({ ${signatures} }, /*traceable=*/${traceable}); ParsedArgs<${max_args}> parsed_args; auto _r = parser.parse(${self_}, args, kwargs, parsed_args); ${check_has_torch_function} switch (_r.idx) { ${dispatch} } ${method_footer} } """) # handler for a single parsed signature - may be a single overload or # a pair of overloads that whose signatures only differ in output params # (plugged into PY_VARIABLE_METHOD_VARARGS as an item in ${dispatch}) PY_VARIABLE_CASE = CodeTemplate("""\ case ${overload_index}: {
instructions = 1 constants = 2 types = 3 operators = 4 register_size = 5 EXCLUDED_OP_SET = ([ "aten::full.names", "aten::full.out", "aten::full", ]) EXCLUE_UPGRADER_SET = (["full_0_4", "full_out_0_4"]) ONE_INSTRUCTION = CodeTemplate(""" Instruction{OpCode::${operator_name}, ${X}, ${N}},""") INSTRUCTION_LIST = CodeTemplate("""std::vector<Instruction>({ ${instruction_list} }), // instructions list""") ONE_CONSTANT = CodeTemplate("""c10::IValue(${constant}),""") CONSTANT_LIST = CodeTemplate("""std::vector<c10::IValue>({ ${constant_list} }), // constants list""") ONE_TYPE = CodeTemplate("""c10::parseType("${type_str}"),""") TYPE_LIST = CodeTemplate("""std::vector<c10::TypePtr>({ ${type_list}
def process_function(info: DifferentiabilityInfo, template: CodeTemplate) -> str: saved_variables: List[str] = [] release_variables: List[str] = [] saved_list_sizes: List[str] = [] unpack: List[str] = [] asserts: List[str] = [] compute_index_ranges: List[str] = [] getter_definitions: List[str] = [] py_getsetdef_structs: List[str] = [] for arg in info.args_with_derivatives: if arg.type == 'TensorList' or arg.type == 'const c10::List<c10::optional<Tensor>> &': size = f'{arg.name}_size_' saved_list_sizes.append(f'size_t {arg.name}_size_;') else: size = '1' compute_index_ranges.append(f'auto {arg.name}_ix = gen.range({size});') def save_var(var: SavedAttribute, is_output: bool) -> None: name = var.name should_append_getsetdef = True if var.type == 'Tensor' or var.type == 'c10::optional<Tensor>' or var.type == 'c10::optional<Tensor>&' or \ (var.type == 'Scalar' and is_output): saved_variables.append(f'SavedVariable {name}_;') release_variables.append(f'{name}_.reset_data();') release_variables.append(f'{name}_.reset_grad_function();') ptr = 'shared_from_this()' if is_output else '' unpack.append(f'auto {name} = {name}_.unpack({ptr});') getter_definitions.append( GETTER_DEFINITION_SAVEDVAR.substitute( op=info.op, name=name, body=GETTER_BODY_SAVEDVAR)) elif var.type == 'TensorList': saved_variables.append(f'std::vector<SavedVariable> {name}_;') saved_variables.append(f'bool {name}_released_ = false;') # Just clear() is sufficient, we don't need to loop and clear each variable. # Because the SavedVariable owns a tensor and a grad_fn, removing the SavedVariable makes them go away as well. release_variables.append(f'{name}_.clear();') release_variables.append(f'{name}_released_ = true;') unpack.append(f'auto {name} = unpack_list({name}_);') asserts.append( f'TORCH_CHECK(!{name}_released_, ERR_BACKWARD_TWICE);') getter_definitions.append( GETTER_DEFINITION_SAVEDVAR.substitute( op=info.op, name=name, body=GETTER_BODY_VEC_SAVEDVAR)) elif var.type == 'c10::List<c10::optional<Tensor>>': saved_variables.append(f'std::vector<SavedVariable> {name}_;') saved_variables.append(f'bool {name}_released_ = false;') # Just clear() is sufficient, we don't need to loop and clear each variable. # Because the SavedVariable owns a tensor and a grad_fn, removing the SavedVariable makes them go away as well. release_variables.append(f'{name}_.clear();') release_variables.append(f'{name}_released_ = true;') unpack.append(f'auto {name} = unpack_opt_list({name}_);') asserts.append( f'TORCH_CHECK(!{name}_released_, ERR_BACKWARD_TWICE);') getter_definitions.append( GETTER_DEFINITION_SAVEDVAR.substitute( op=info.op, name=name, body=GETTER_BODY_VEC_SAVEDVAR)) elif var.type == 'IntArrayRef': saved_variables.append(f'std::vector<int64_t> {name};') getter_definitions.append( GETTER_DEFINITION.substitute(op=info.op, name=name, body=GETTER_BODY_ARRAYREF_LONG)) elif var.type == 'c10::optional<IntArrayRef>': saved_variables.append(f'c10::OptionalArray<int64_t> {name};') getter_definitions.append( GETTER_DEFINITION_OPT_ARRAYREF.substitute( op=info.op, name=name, body=GETTER_BODY_ARRAYREF_LONG)) elif var.type == 'c10::optional<ArrayRef<double>>': saved_variables.append(f'c10::OptionalArray<double> {name};') getter_definitions.append( GETTER_DEFINITION_OPT_ARRAYREF.substitute( op=info.op, name=name, body=GETTER_BODY_ARRAYREF_DOUBLE)) elif var.type == 'int64_t': saved_variables.append(f'{var.type} {name} = 0;') getter_definitions.append( GETTER_DEFINITION.substitute(op=info.op, name=name, body=GETTER_BODY_INT64_T)) else: saved_variables.append(f'{var.type} {name};') if var.type in MISC_GETTER_DEFS: getter_def, body = MISC_GETTER_DEFS[var.type] getter_definitions.append( getter_def.substitute(op=info.op, name=name, body=body)) else: # Types we don't expose python bindings to yet: # TypeAndSize, ScalarType, TensorOptions, TensorGeometry, # std::vector<std::vector<int64_t>>, std::vector<ScalarType> should_append_getsetdef = False if should_append_getsetdef: py_getsetdef_structs.append( PY_GETSETDEF_STRUCT.substitute(op=info.op, name=name)) for var in info.all_saved_inputs: save_var(var, is_output=False) for var in info.all_saved_outputs: save_var(var, is_output=True) # lock the mutex when we release variables and in Node::apply to protect thread safety # see Note [Thread Safety on Autograd Node] if len(release_variables) > 0: thread_lock = 'std::lock_guard<std::mutex> lock(mutex_);' else: thread_lock = '' if uses_retain_variables(info): will_release_variables = WILL_RELEASE_VARIABLES.substitute() else: will_release_variables = '' body: List[str] = [] if uses_single_grad(info): body.append('auto& grad = grads[0];') def emit_derivative( derivative: Derivative, args_with_derivatives: Sequence[Binding], ) -> Tuple[bool, str]: formula = derivative.formula var_names = derivative.var_names if len(var_names) == 1: checks_any_grad_defined = False if 'not_implemented' not in formula: matching_args = [ arg for arg in args_with_derivatives if arg.name == var_names[0] ] if len(matching_args) == 1: # We can add undefined grad support if the input variable is a Tensor arg = matching_args[0] if isinstance(arg.argument, Argument) and str( arg.argument.type) == 'Tensor': formula = 'any_grad_defined ? (' + formula + ') : Tensor()' checks_any_grad_defined = True return (checks_any_grad_defined, DERIVATIVE_SINGLE.substitute(name=var_names[0], derivative=formula)) else: if 'grad_input_mask' in formula: masks = [ f'should_compute_output({{ {n}_ix }}),' for n in var_names ] grad_input_mask = GRAD_INPUT_MASK.substitute(masks=masks, n=len(var_names)) else: grad_input_mask = '' idx_ranges = ', '.join(f'{n}_ix' for n in var_names) copy_ranges: List[str] = [] for i, n in enumerate(var_names): copy_ranges.append( DERIVATIVE_MULTI_COPY_RANGE.substitute(name=n, i=i)) return False, DERIVATIVE_MULTI.substitute( idx_ranges=idx_ranges, copy_ranges=copy_ranges, derivative=formula, grad_input_mask=grad_input_mask) body.extend(unpack) need_any_grad_defined_var = False for derivative in info.derivatives: checks_any_grad_defined, derivative_text = emit_derivative( derivative, info.args_with_derivatives) body.append(derivative_text) need_any_grad_defined_var |= checks_any_grad_defined # Since single-output derivative formulas need to check if grads are # defined, only perform the check once, before all the formulas if need_any_grad_defined_var: body.insert(-len(info.derivatives), 'bool any_grad_defined = any_variable_defined(grads);') if info.name in UNTRACEABLE_FUNCTIONS: superclass = 'Node' else: superclass = 'TraceableFunction' all_getsetdef_structs = ",\n".join(py_getsetdef_structs) + "," if len( py_getsetdef_structs) != 0 else "" all_getter_definitions = "\n".join(getter_definitions) return template.substitute(op=info.op, compute_index_ranges=compute_index_ranges, saved_variables=saved_variables, release_variables=release_variables, saved_list_sizes=saved_list_sizes, asserts=asserts, thread_lock=thread_lock, will_release_variables=will_release_variables, body=body, superclass=superclass, all_getter_definitions=all_getter_definitions, all_getsetdef_structs=all_getsetdef_structs)
def genCppH(hFilePath, cppFilePath, srcDirPath, glslcPath, tmpDirPath, env): print("hFilePath:{} cppFilePath:{} srcDirPath:{} glslcPath:{} tmpDirPath:{}".format( hFilePath, cppFilePath, srcDirPath, glslcPath, tmpDirPath)) vexs = glob.glob(os.path.join(srcDirPath, '**', '*.glsl'), recursive=True) templateSrcPaths = [] for f in vexs: if len(f) > 1: templateSrcPaths.append(f) templateSrcPaths.sort() print("templateSrcPaths:{}".format(templateSrcPaths)) spvPaths = [] for templateSrcPath in templateSrcPaths: print("templateSrcPath {}".format(templateSrcPath)) name = getName(templateSrcPath).replace("_glsl", "") print("name {}".format(name)) codeTemplate = CodeTemplate.from_file(templateSrcPath) srcPath = tmpDirPath + "/" + name + ".glsl" content = codeTemplate.substitute(env) with open(srcPath, 'w') as f: f.write(content) spvPath = tmpDirPath + "/" + name + ".spv" print("spvPath {}".format(spvPath)) cmd = [ glslcPath, "-fshader-stage=compute", srcPath, "-o", spvPath, "--target-env=vulkan1.0", "-Werror" ] print("\nglslc cmd:", cmd) subprocess.check_call(cmd) spvPaths.append(spvPath) h = "#pragma once\n" h += "#include <stdint.h>\n" nsbegin = "\nnamespace at { namespace native { namespace vulkan { \n" nsend = "\n} } } //namespace at::native::vulkan\n" h += nsbegin cpp = "#include <ATen/native/vulkan/{}>".format(H_NAME) cpp += nsbegin for spvPath in spvPaths: name = getName(spvPath) name_len = name + "_len" h += "extern const uint32_t {}[];\n".format(name) h += "extern const uint32_t {};\n".format(name_len) cpp += "const uint32_t " + name + "[] = {\n" sizeBytes = 0 print("spvPath:{}".format(spvPath)) with open(spvPath, 'rb') as f: for word in array.array('I', f.read()): cpp += "{},\n".format(word) sizeBytes += 4 cpp += "};\n" cpp += "const uint32_t {} = {};\n".format(name_len, sizeBytes) cpp += nsend h += nsend with open(hFilePath, "w") as f: f.write(h) with open(cppFilePath, "w") as f: f.write(cpp)
'masked_scatter', 'masked_select', 'index_fill', 'trace', 'polar', 'cumsum', 'eig', 'lerp', 'linalg_vector_norm', 'cumprod', 'prod' } # Some operators invalidate the grad_accumulator. Let's reset it. RESET_GRAD_ACCUMULATOR = {'set', 'resize'} # NOTE [ Invariant: TensorImpl and Storage Pointer Equality ] # # When a function modifies its input tensors (via inplace or out-variants), # it should never change the the input tensors' underlying c10::TensorImpl pointers # or c10::Storage pointers. # # The following code templates implement the checks for this invariant: SAVE_TENSOR_STORAGE = CodeTemplate("""\ c10::optional<Storage> ${tensor_name}_storage_saved = ${tensor_name}.has_storage() ? c10::optional<Storage>(${tensor_name}.storage()) : c10::nullopt; """) ENFORCE_SAME_TENSOR_STORAGE = CodeTemplate("""\ if (${tensor_name}_storage_saved.has_value()) AT_ASSERT(${tensor_name}_storage_saved.value().is_alias_of(${tensor_name}.storage())); """) SAVE_TENSORLIST_STORAGE = CodeTemplate("""\ std::vector<c10::optional<Storage>> ${tensorlist_name}_storage_saved(${tensorlist_name}.size()); for (const Tensor& tensor : ${tensorlist_name}) ${tensorlist_name}_storage_saved.push_back( tensor.has_storage() ? c10::optional<Storage>(tensor.storage()) : c10::nullopt); """) ENFORCE_SAME_TENSORLIST_STORAGE = CodeTemplate("""\
#!/usr/bin/env python3 import argparse import os from typing import Set from tools.codegen.selective_build.selector import SelectiveBuilder from tools.codegen.code_template import CodeTemplate import yaml if_condition_template_str = """if (kernel_tag_sv.compare("$kernel_tag_name") == 0) { return $dtype_checks; }""" if_condition_template = CodeTemplate(if_condition_template_str) selected_kernel_dtypes_h_template_str = """#pragma once #include <c10/core/ScalarType.h> #include <c10/util/string_view.h> #include <c10/macros/Macros.h> namespace at { inline constexpr bool should_include_kernel_dtype( const char *kernel_tag_str, at::ScalarType scalar_type ) { c10::string_view kernel_tag_sv C10_UNUSED = c10::string_view(kernel_tag_str); $body return false; } } """ selected_kernel_dtypes_h_template = CodeTemplate(
def _read_template(template_fn: str) -> CodeTemplate: return CodeTemplate.from_file(template_fn)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # # Method Impl Codegen # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # python binding for all overloads of a particular function/method PY_VARIABLE_METHOD_VARARGS = CodeTemplate(r"""\ // ${name} mp_obj_t ${uptname}(size_t n_args, const mp_obj_t* args, mp_map_t* kw_args) { ${method_header} static PythonArgParser parser({ ${signatures} }); ParsedArgs<${max_args}> parsed_args; auto _r = parser.parse(${self_}, n_args, args, kw_args, parsed_args); switch (_r.idx) { ${dispatch} } ${method_footer} } """) # handler for a single parsed signature - may be a single overload or # a pair of overloads that whose signatures only differ in output params # (plugged into PY_VARIABLE_METHOD_VARARGS as an item in ${dispatch}) PY_VARIABLE_CASE = CodeTemplate("""\ case ${overload_index}: { ${body} }