def convert( fname: str, graph: Any, output_template: CodeTemplate, op_template: CodeTemplate, op_dep_template: CodeTemplate, ) -> None: ops = [] for op in graph: op_name = op['name'] op_deps = [] for dep in op.get('depends', []): dep_name = dep['name'] if dep_name == op_name: # skip itself reference continue op_deps.append( op_dep_template.substitute( op_name=op_name, dep_name=dep_name)) if not op_deps: # skip ops without any fanout continue ops.append( op_template.substitute( op_name=op_name, op_deps=op_deps)) with open(fname, 'w') as out: out.write(output_template.substitute(ops=ops))
def write(dirname: str, name: str, template: CodeTemplate, env: Dict[str, List[str]]) -> None: env['generated_comment'] = GENERATED_COMMENT.substitute( filename=template.filename) path = os.path.join(dirname, name) # See Note [Unchanging results for ninja] try: with open(path, 'r') as f: old_val = f.read() except IOError: old_val = None new_val = template.substitute(env) if old_val != new_val: with open(path, 'w') as f: print("Writing {}".format(path)) f.write(new_val) else: print("Skipped writing {}".format(path))
def process_function(info: DifferentiabilityInfo, template: CodeTemplate) -> str: saved_variables: List[str] = [] release_variables: List[str] = [] saved_list_sizes: List[str] = [] unpack: List[str] = [] asserts: List[str] = [] compute_index_ranges: List[str] = [] getter_definitions: List[str] = [] py_getsetdef_structs: List[str] = [] for arg in info.args_with_derivatives: if arg.type == 'TensorList' or arg.type == 'const c10::List<c10::optional<Tensor>> &': size = f'{arg.name}_size_' saved_list_sizes.append(f'size_t {arg.name}_size_;') else: size = '1' compute_index_ranges.append(f'auto {arg.name}_ix = gen.range({size});') def save_var(var: SavedAttribute, is_output: bool) -> None: name = var.name should_append_getsetdef = True if var.type == 'Tensor' or var.type == 'c10::optional<Tensor>' or var.type == 'c10::optional<Tensor>&' or \ (var.type == 'Scalar' and is_output): saved_variables.append(f'SavedVariable {name}_;') release_variables.append(f'{name}_.reset_data();') release_variables.append(f'{name}_.reset_grad_function();') ptr = 'shared_from_this()' if is_output else '' unpack.append(f'auto {name} = {name}_.unpack({ptr});') getter_definitions.append( GETTER_DEFINITION_SAVEDVAR.substitute( op=info.op, name=name, body=GETTER_BODY_SAVEDVAR)) elif var.type == 'TensorList': saved_variables.append(f'std::vector<SavedVariable> {name}_;') saved_variables.append(f'bool {name}_released_ = false;') # Just clear() is sufficient, we don't need to loop and clear each variable. # Because the SavedVariable owns a tensor and a grad_fn, removing the SavedVariable makes them go away as well. release_variables.append(f'{name}_.clear();') release_variables.append(f'{name}_released_ = true;') unpack.append(f'auto {name} = unpack_list({name}_);') asserts.append( f'TORCH_CHECK(!{name}_released_, ERR_BACKWARD_TWICE);') getter_definitions.append( GETTER_DEFINITION_SAVEDVAR.substitute( op=info.op, name=name, body=GETTER_BODY_VEC_SAVEDVAR)) elif var.type == 'c10::List<c10::optional<Tensor>>': saved_variables.append(f'std::vector<SavedVariable> {name}_;') saved_variables.append(f'bool {name}_released_ = false;') # Just clear() is sufficient, we don't need to loop and clear each variable. # Because the SavedVariable owns a tensor and a grad_fn, removing the SavedVariable makes them go away as well. release_variables.append(f'{name}_.clear();') release_variables.append(f'{name}_released_ = true;') unpack.append(f'auto {name} = unpack_opt_list({name}_);') asserts.append( f'TORCH_CHECK(!{name}_released_, ERR_BACKWARD_TWICE);') getter_definitions.append( GETTER_DEFINITION_SAVEDVAR.substitute( op=info.op, name=name, body=GETTER_BODY_VEC_SAVEDVAR)) elif var.type == 'IntArrayRef': saved_variables.append(f'std::vector<int64_t> {name};') getter_definitions.append( GETTER_DEFINITION.substitute(op=info.op, name=name, body=GETTER_BODY_ARRAYREF_LONG)) elif var.type == 'c10::optional<IntArrayRef>': saved_variables.append(f'c10::OptionalArray<int64_t> {name};') getter_definitions.append( GETTER_DEFINITION_OPT_ARRAYREF.substitute( op=info.op, name=name, body=GETTER_BODY_ARRAYREF_LONG)) elif var.type == 'c10::optional<ArrayRef<double>>': saved_variables.append(f'c10::OptionalArray<double> {name};') getter_definitions.append( GETTER_DEFINITION_OPT_ARRAYREF.substitute( op=info.op, name=name, body=GETTER_BODY_ARRAYREF_DOUBLE)) elif var.type == 'int64_t': saved_variables.append(f'{var.type} {name} = 0;') getter_definitions.append( GETTER_DEFINITION.substitute(op=info.op, name=name, body=GETTER_BODY_INT64_T)) else: saved_variables.append(f'{var.type} {name};') if var.type in MISC_GETTER_DEFS: getter_def, body = MISC_GETTER_DEFS[var.type] getter_definitions.append( getter_def.substitute(op=info.op, name=name, body=body)) else: # Types we don't expose python bindings to yet: # TypeAndSize, ScalarType, TensorOptions, TensorGeometry, # std::vector<std::vector<int64_t>>, std::vector<ScalarType> should_append_getsetdef = False if should_append_getsetdef: py_getsetdef_structs.append( PY_GETSETDEF_STRUCT.substitute(op=info.op, name=name)) for var in info.all_saved_inputs: save_var(var, is_output=False) for var in info.all_saved_outputs: save_var(var, is_output=True) # lock the mutex when we release variables and in Node::apply to protect thread safety # see Note [Thread Safety on Autograd Node] if len(release_variables) > 0: thread_lock = 'std::lock_guard<std::mutex> lock(mutex_);' else: thread_lock = '' if uses_retain_variables(info): will_release_variables = WILL_RELEASE_VARIABLES.substitute() else: will_release_variables = '' body: List[str] = [] if uses_single_grad(info): body.append('auto& grad = grads[0];') def emit_derivative( derivative: Derivative, args_with_derivatives: Sequence[Binding], ) -> Tuple[bool, str]: formula = derivative.formula var_names = derivative.var_names if len(var_names) == 1: checks_any_grad_defined = False if 'not_implemented' not in formula: matching_args = [ arg for arg in args_with_derivatives if arg.name == var_names[0] ] if len(matching_args) == 1: # We can add undefined grad support if the input variable is a Tensor arg = matching_args[0] if isinstance(arg.argument, Argument) and str( arg.argument.type) == 'Tensor': formula = 'any_grad_defined ? (' + formula + ') : Tensor()' checks_any_grad_defined = True return (checks_any_grad_defined, DERIVATIVE_SINGLE.substitute(name=var_names[0], derivative=formula)) else: if 'grad_input_mask' in formula: masks = [ f'should_compute_output({{ {n}_ix }}),' for n in var_names ] grad_input_mask = GRAD_INPUT_MASK.substitute(masks=masks, n=len(var_names)) else: grad_input_mask = '' idx_ranges = ', '.join(f'{n}_ix' for n in var_names) copy_ranges: List[str] = [] for i, n in enumerate(var_names): copy_ranges.append( DERIVATIVE_MULTI_COPY_RANGE.substitute(name=n, i=i)) return False, DERIVATIVE_MULTI.substitute( idx_ranges=idx_ranges, copy_ranges=copy_ranges, derivative=formula, grad_input_mask=grad_input_mask) body.extend(unpack) need_any_grad_defined_var = False for derivative in info.derivatives: checks_any_grad_defined, derivative_text = emit_derivative( derivative, info.args_with_derivatives) body.append(derivative_text) need_any_grad_defined_var |= checks_any_grad_defined # Since single-output derivative formulas need to check if grads are # defined, only perform the check once, before all the formulas if need_any_grad_defined_var: body.insert(-len(info.derivatives), 'bool any_grad_defined = any_variable_defined(grads);') if info.name in UNTRACEABLE_FUNCTIONS: superclass = 'Node' else: superclass = 'TraceableFunction' all_getsetdef_structs = ",\n".join(py_getsetdef_structs) + "," if len( py_getsetdef_structs) != 0 else "" all_getter_definitions = "\n".join(getter_definitions) return template.substitute(op=info.op, compute_index_ranges=compute_index_ranges, saved_variables=saved_variables, release_variables=release_variables, saved_list_sizes=saved_list_sizes, asserts=asserts, thread_lock=thread_lock, will_release_variables=will_release_variables, body=body, superclass=superclass, all_getter_definitions=all_getter_definitions, all_getsetdef_structs=all_getsetdef_structs)
'auto {} = peekSliceOptionals({}, InputSize() - {}, InputSize());' .format(arg['name'], real_inputs, static_tensor_inputs)) elif value_is_tensor_type(arg): # load tensor inputs from Caffe2 env['statements'].append('auto {} = peek({}, {});'.format( arg['name'], real_inputs, view_length)) real_inputs += 1 else: init = CT(ARGUMENT_MAP[arg['type']]).substitute( env, arg=arg['name']) env['initialization'].append(init) emit_assignments(o, env) if o['name'] in SPECIAL_IMPLEMENTATIONS: env['invocation'] = "{}({})".format( SPECIAL_IMPLEMENTATIONS[o['name']], ','.join(env['arguments'])) elif 'namespace' in o['method_of']: env['invocation'] = CT("at::${name}(${arguments})").substitute(env) else: assert ('Tensor' in o['method_of']) env['invocation'] = "self.{}({})".format( o['name'], ', '.join(env['arguments'][1:])) top_env['implementations'].append( IMPLEMENTATION_TEMPLATE.substitute(env)) top_env['cases'].append(CASE_TEMPLATE.substitute(env)) key += 1 write(os.path.join(args.install_dir, args.output_prefix + "aten_op.h"), OP_TEMPLATE.substitute(top_env))
elif arg['type'] == 'const c10::List<c10::optional<at::Tensor>> &': # NOTE: do not advance real_inputs here. After this we will # switch to indexing the "stack" from the end env['statements'].append( 'auto {} = peekSliceOptionals({}, InputSize() - {}, InputSize());' .format(arg['name'], real_inputs, static_tensor_inputs)) elif value_is_tensor_type(arg): # load tensor inputs from Caffe2 env['statements'].append( 'auto {} = peek({}, {});'.format(arg['name'], real_inputs, view_length)) real_inputs += 1 else: init = CT(ARGUMENT_MAP[arg['type']]).substitute(env, arg=arg['name']) env['initialization'].append(init) emit_assignments(o, env) if o['name'] in SPECIAL_IMPLEMENTATIONS: env['invocation'] = "{}({})".format(SPECIAL_IMPLEMENTATIONS[o['name']], ','.join(env['arguments'])) elif 'namespace' in o['method_of']: env['invocation'] = CT("at::${name}(${arguments})").substitute(env) else: assert('Tensor' in o['method_of']) env['invocation'] = "self.{}({})".format( o['name'], ', '.join(env['arguments'][1:])) top_env['implementations'].append(IMPLEMENTATION_TEMPLATE.substitute(env)) top_env['cases'].append(CASE_TEMPLATE.substitute(env)) key += 1 write(os.path.join(args.install_dir, args.output_prefix + "aten_op.h"), OP_TEMPLATE.substitute(top_env))
view_length = 'InputSize()' if has_tensorlist and i < tensorlist_idx else static_tensor_inputs if arg['type'] == 'TensorList': # NOTE: do not advance real_inputs here. After this we will # switch to indexing the "stack" from the end as if we only had env['statements'].append( 'auto {} = peekSlice({}, InputSize() - {}, InputSize());' .format(arg['name'], real_inputs, static_tensor_inputs)) elif value_is_tensor_type(arg): # load tensor inputs from Caffe2 env['statements'].append( 'auto {} = peek({}, {});'.format(arg['name'], real_inputs, view_length)) real_inputs += 1 else: init = CT(ARGUMENT_MAP[arg['type']]).substitute(env, arg=arg['name']) env['initialization'].append(init) emit_assignments(o, env) if o['name'] in SPECIAL_IMPLEMENTATIONS: env['invocation'] = "{}({})".format(SPECIAL_IMPLEMENTATIONS[o['name']], ','.join(env['arguments'])) elif 'namespace' in o['method_of']: env['invocation'] = CT("at::${name}(${arguments})").substitute(env) else: assert('Tensor' in o['method_of']) env['invocation'] = "self.{}({})".format( o['name'], ', '.join(env['arguments'][1:])) top_env['implementations'].append(OPTION_TEMPLATE.substitute(env)) key += 1 write(os.path.join(args.install_dir, args.output_prefix + "aten_op.h"), OP_TEMPLATE.substitute(top_env))
def gen_dispatcher_registrations( fm: FileManager, output_dir: str, class_name: str, cpp_namespace: str, backend_indices: Dict[DispatchKey, BackendIndex], grouped_native_functions: Sequence[Union[NativeFunction, NativeFunctionsGroup]], backend_dispatch_key: DispatchKey, dispatch_key: DispatchKey, selector: 'SelectiveBuilder', # build_in_tree is true for lazy TS backend and affects include paths, not used for external backends build_in_tree: bool = False, per_operator_headers: bool = False, backend_name: str = "", eager_registration: bool = True) -> None: headers = [ f"{output_dir}/{backend_dispatch_key}NativeFunctions.h", ] if build_in_tree: external_backend_headers_str = "\n".join(f'#include <{h}>' for h in headers) else: external_backend_headers_str = "\n".join(f'#include "{h}"' for h in headers) assert class_name is not None backend_index = backend_indices[dispatch_key] dispatch_registrations_body = list( concatMap( dest.RegisterDispatchKey(backend_index, Target.REGISTRATION, selector, rocm=False, cpp_namespace=cpp_namespace, class_method_name=f'{class_name}', skip_dispatcher_op_registration=False), grouped_native_functions)) deferred_dispatch_registrations = "" static_init_dispatch_registrations = "" if eager_registration: static_template = CodeTemplate("""\ TORCH_LIBRARY_IMPL(aten, $dispatch_key, m) { $dispatch_registrations_body };""") static_init_dispatch_registrations = static_template.substitute( dispatch_key=dispatch_key, dispatch_registrations_body=dispatch_registrations_body) else: deferred_template = CodeTemplate("""\ TORCH_API void Register${backend_name}${dispatch_key}NativeFunctions() { static auto m = MAKE_TORCH_LIBRARY_IMPL(aten, $dispatch_key); $dispatch_registrations_body }""") deferred_dispatch_registrations = deferred_template.substitute( backend_name=backend_name, dispatch_key=dispatch_key, dispatch_registrations_body=dispatch_registrations_body) fm.write_with_template( f'Register{dispatch_key}.cpp', 'RegisterDispatchKey.cpp', lambda: { 'static_init_dispatch_registrations': static_init_dispatch_registrations, 'deferred_dispatch_registrations': deferred_dispatch_registrations, 'extra_cuda_headers': '', 'external_backend_headers': external_backend_headers_str, 'ops_headers': '#include <ATen/Functions.h>' if not per_operator_headers else '', 'DispatchKey': dispatch_key, 'dispatch_namespace': dispatch_key.lower(), 'dispatch_headers': dest.gen_registration_headers(backend_index, per_operator_headers= per_operator_headers, rocm=False), 'dispatch_helpers': dest.gen_registration_helpers(backend_index), 'dispatch_namespaced_definitions': '', 'dispatch_anonymous_definitions': list( concatMap( dest.RegisterDispatchKey(backend_index, Target.ANONYMOUS_DEFINITION, selector, rocm=False, cpp_namespace=cpp_namespace, class_method_name=f'{class_name}', skip_dispatcher_op_registration= False), grouped_native_functions) ), })