def postprocess_forward_derivatives( f: NativeFunction, defn_name: str, all_arg_names: List[str], derivatives: List[Derivative], forward_derivatives: List[ForwardDerivative], args_with_derivatives: Sequence[Binding]) -> List[ForwardDerivative]: def find_required_inputs(formula: str, postfix: str) -> Tuple[str, ...]: required_inputs = set() for arg in args_with_derivatives: if arg.type == 'TensorList': # The functions taking TensorList handle everything internally continue arg_name = arg.name found = re.search(IDENT_REGEX.format(arg_name), formula) if found: raise RuntimeError( f"The forward formula for {defn_name} is using the base name of the {arg_name} " f"argument which is ambiguous. You should use {arg_name}_p to access the primal " f"value and {arg_name}_t to access the tangent.") found = re.search(IDENT_REGEX.format(arg_name + postfix), formula) if found: required_inputs.add(arg_name) return tuple(required_inputs) updated_derivatives: List[ForwardDerivative] = [] for defn in forward_derivatives: formula = defn.formula required_inputs_tangent = find_required_inputs(formula, "_t") if formula == "auto_element_wise": if (not len(args_with_derivatives) == 1) or len(forward_derivatives) > 1: raise RuntimeError( f"Derivative definition of {defn_name} in derivatives.yaml defines the " "forward definition of gradient as element_wise but this only " "works for functions with a single differentiable input and a " "single differentiable output.") if not len(derivatives) == 1: raise RuntimeError( f"Derivative definition of {defn_name} in derivatives.yaml defines the " "forward definition of gradient as element_wise but it does not " "defines the gradient formula for its argument which is required." ) # This transformation is based on the observation that for element-wise functions, the Jacobian # matrix is diagonal and thus doing J * v or v * J gives the same result. # So here we are going to re-use the backward formula and replace two things: # 1) all occurrences of "grad" with "foo_t", where foo is the name of the unique differentiable input. # 2) all usage of an original input "foo" with its primal value "foo_p". # For example, for abs, the backward formula is: # grad * self.sgn() # And this function generates a forward formula that is: # self_t * self_p.sgn() backward_formula = derivatives[0].original_formula input_name = args_with_derivatives[0].name # Do replacement 1) of the grad def repl(m: Any) -> str: return f"{m.group(1)}{input_name}_t{m.group(2)}" fw_formula = re.sub(IDENT_REGEX.format("grad"), repl, backward_formula) # Do replacement 2) of the input variables for arg in args_with_derivatives: arg_name = arg.name def repl(m: Any) -> str: return f"{m.group(1)}{arg_name}_p{m.group(2)}" fw_formula = re.sub(IDENT_REGEX.format(arg_name), repl, fw_formula) # Since there is a single differentiable inputs and we necessarily need its tangent we can # simply require all differentiable input's tangent. required_inputs_tangent = tuple(all_arg_names) formula = fw_formula elif formula == "auto_linear": if len(forward_derivatives) > 1: raise RuntimeError( f"Derivative definition of {defn_name} in derivatives.yaml defines the " "forward definition of gradient as linear but this only works " "for functions with a single differentiable output.") # This transformation is based on the observation that linear functions can be written as: # y = f(x) = A * x # For some matrix A and the Jacobian of the function f is also A. # So doing J * v = A * v = f(v). # Hence to do the jvp, we simply need to evaluate the function at the point v instead of x. # We do this by calling the forward again by replacing any occurrence of the differentiable # input "foo" by it's tangent "foo_t". # Note that multiple inputs are not a problem as long as the function is truly linear wrt to # the vector where all the differentiable inputs are stacked. diff_arg_names = [arg.name for arg in args_with_derivatives] assert len(diff_arg_names) > 0 # Do replacement of input variables new_args = [] for arg_name in all_arg_names: if arg_name in diff_arg_names: arg_name = arg_name + "_t" new_args.append(arg_name) # Call into the forward again. We need two cases here to handle both Tensor methods and at:: functions. if Variant.function in f.variants: fw_formula = "at::{}({})".format(defn_name, ", ".join(new_args)) else: assert f.func.kind() is not SchemaKind.inplace assert Variant.method in f.variants fw_formula = "{}.{}({})".format(new_args[0], defn_name, ", ".join(new_args[1:])) # All of the input tangents are always used so all of them are required here. required_inputs_tangent = tuple(diff_arg_names) formula = fw_formula # At this point, the formula is final and is not modified anymore. # During forward formula, we use the primal instead of the input Tensors. # This call inspects the formula to find for which input's primal are used. required_inputs_primal = find_required_inputs(formula, "_p") updated_derivatives.append( ForwardDerivative(formula=formula, var_name=defn.var_name, var_type=defn.var_type, required_inputs_fw_grad=required_inputs_tangent, required_inputs_primal=required_inputs_primal)) return updated_derivatives
def saved_variables( formula: str, nctypes: List[NamedCType], var_names: Tuple[str, ...], ) -> Tuple[str, Tuple[SavedAttribute, ...]]: def stride_expr(name: str) -> str: assert var_names == (name, ), ( 'Replacement for ".strides()" is currently only supported for single derivatives of the same tensor ' 'that ".strides()" is being called on.') return f'strides_or_error({name}, "{name}")' REPLACEMENTS: List[Tuple[str, Dict[str, Any]]] = [ # replace self.sizes() with self_sizes (r'{}.sizes\(\)', { 'suffix': '_sizes', 'nctype': lambda name: NamedCType(name, BaseCType(intArrayRefT)), }), # replace self.options() with self_options (r'{}.options\(\)', { 'suffix': '_options', 'nctype': lambda name: NamedCType(name, BaseCType(tensorOptionsT)), }), # replace zeros_like(self) with self_info ( r'zeros_like\({}\)', { 'suffix': '_info', 'nctype': lambda name: NamedCType(name, BaseCType(typeAndSizeT)), 'expr': lambda name: name, # at save-time 'res': lambda name: name + '_info.zeros()', # at eval-time }), # replace self.size(2) with self_size_2 (r'{}.size\((\w+)\)', { 'suffix': lambda m: '_argsize_{}'.format(*m.groups()), 'nctype': lambda name: NamedCType(name, BaseCType(intT)), }), # replace self.numel() with self_numel (r'{}.numel\(\)', { 'suffix': '_numel', 'nctype': lambda name: NamedCType(name, BaseCType(intT)), }), # replace to_args_sizes(self) with self_args_sizes (r'to_args_sizes\({}\)', { 'suffix': '_args_sizes', 'nctype': lambda name: NamedCType(name, VectorCType(VectorCType(BaseCType(intT)))), }), # replace to_args_scalartypes(self) with self_args_scalartypes (r'to_args_scalartypes\({}\)', { 'suffix': '_args_scalartypes', 'nctype': lambda name: NamedCType(name, VectorCType(BaseCType(scalarTypeT))), }), # replace TensorGeometry(self) with self_geometry (r'TensorGeometry\({}\)', { 'suffix': '_geometry', 'nctype': lambda name: NamedCType(name, BaseCType(tensorGeometryT)), }), (r'{}.scalar_type\(\)', { 'suffix': '_scalar_type', 'nctype': lambda name: NamedCType(name, BaseCType(scalarTypeT)), }), # replace self.dim() with self_dim (r'{}.dim\(\)', { 'suffix': '_dim', 'nctype': lambda name: NamedCType(name, BaseCType(intT)), }), # replace self.strides() with self_strides (r'{}.strides\(\)', { 'suffix': '_strides', 'nctype': lambda name: NamedCType(name, BaseCType(intArrayRefT)), 'expr': stride_expr, }), ] # find which arguments need to be saved saved: List[SavedAttribute] = [] for nctype in nctypes: name = nctype.name.name if isinstance(nctype.name, SpecialArgName) else nctype.name # First search the formula for expressions which can be evaluated # when the autograd Function is created to avoid saving variables for regex, info in REPLACEMENTS: def repl(m: Match[str]) -> str: suffix: str = info['suffix'](m) if callable( info['suffix']) else info['suffix'] expr: str = info['expr'](name) if 'expr' in info else m.group( 0) saved.append( SavedAttribute( nctype=info['nctype'](name + suffix), expr=expr, )) if 'res' in info: replacement: str = info['res'](name) return replacement return name + suffix formula = re.sub(regex.format(name), repl, formula) # c10::optional<std::string> types stored in Backward nodes must be # converted to c10::optional<c10::string_view> before being passed into # the backward function if nctype.type == OptionalCType(BaseCType(stringT)): formula = re.sub( rf'\b{name}\b', f'{name}.has_value() ? c10::optional<c10::string_view>({name}.value()) : c10::nullopt', formula) # Find any variables which remain in the formula and save them if re.search(IDENT_REGEX.format(name), formula): saved.append(SavedAttribute( nctype=nctype, expr=name, )) return formula, tuple(saved)
def replace_self_with_original_self(formula: str, postfix: str) -> str: def repl(m: Match[str]) -> str: return f'{m.group(1)}original_self{postfix}{m.group(2)}' return re.sub(IDENT_REGEX.format(f'self{postfix}'), repl, formula)
def match_differentiability_info( native_functions: List[NativeFunction], differentiability_infos: Sequence[DifferentiabilityInfo], ) -> List[NativeFunctionWithDifferentiabilityInfo]: """Sets the "derivative" key on declarations to matching autograd function In-place functions will use the out-of-place derivative definition if there is no in-place specific derivative. """ info_by_schema = {info.func.func: info for info in differentiability_infos} functional_info_by_signature = { info.func.func.signature(strip_default=True): info for info in differentiability_infos if info.func.func.kind() == SchemaKind.functional} def find_info(f: NativeFunction) -> Tuple[Optional[DifferentiabilityInfo], bool]: if f.func in info_by_schema: return info_by_schema[f.func], True # if there is no exact match look for the out-of-place signature. # i.e mul() for mul_() or mul_out() return functional_info_by_signature.get(f.func.signature(strip_default=True)), False result: List[NativeFunctionWithDifferentiabilityInfo] = [] for f in native_functions: info, is_exact_match = find_info(f) # Currently, the '.strides()' to 'strides_or_error' replacement does not support # 'self' derivatives of an inplace function, so we must check for this case. if f.func.kind() == SchemaKind.inplace and (info is not None): for derivative in info.derivatives: if 'self' in derivative.var_names: for saved_input in derivative.saved_inputs: assert 'strides_or_error' not in saved_input.expr, ( "Calling '.strides()' in the 'self' derivative formula of an " f"in-place function is not supported: {f.func}") # For functions that have a single def for out-of-place and inplace (like abs()) if info and info.forward_derivatives: forward_derivatives = info.forward_derivatives if f.func.kind() == SchemaKind.inplace: # For inplace functions there is a little bit of work to do: # 1) Validate the formula and make sure the input that is modified in not used: # - If there is a formula for the inplace variant of the function (is_exact_match == True) then # we make sure that the original value of the input that is being modified inplace (self_p) is # not used in the formula. Note that the formula can use "original_self_p" here and that would # trigger a clone of the original input. # - If we are re-using the out of place formula (is_exact_match == False) then we replace every # occurrence of self_p and self_t by original_self_p and original_self_t. These will be # populated by cloned version of the original input (either the clone done by the backward AD # logic if self is also used in a backward formula or a special clone that we add). # 2) At this point, there cannot be a self_p in the formula. # 3) Change "result" into "self_p" as by design, in the inplace function codegen, the result is # simply called self (as it is modified inplace). # 4) Update the required primals data in case it used to contain "result" but should now contain # "self" # 5) If it is not an exact match, the user formula is not modifying the existing forward grad # inplace as it should. So add some code that makes sure that we do so if the forward grad # already exists. assert len(info.forward_derivatives) == 1 # Only single output inplace should exist fw_info = info.forward_derivatives[0] formula = fw_info.formula def replace_self_with_original_self(formula: str, postfix: str) -> str: def repl(m: Match[str]) -> str: return f'{m.group(1)}original_self{postfix}{m.group(2)}' return re.sub(IDENT_REGEX.format(f'self{postfix}'), repl, formula) if re.search(IDENT_REGEX.format("self_p"), formula): if is_exact_match: # For manually defined formulas, don't allow the original value to be used raise RuntimeError(f'The formula for "{f.func.name}" is using the original value of self ' 'that is being modified inplace. This would lead to wrong forward gradients. ' 'Please use "result" in the formula only.') else: # When the original formula is out of place, we save a clone of the primal # value to be able to access this value if needed # replace "self_p"/"self_t" from the formula by "original_self_p"/"original_self_t" formula = replace_self_with_original_self(formula, "_p") formula = replace_self_with_original_self(formula, "_t") # replace "result" from the formula by "self_p" def repl(m: Match[str]) -> str: return f'{m.group(1)}self_p{m.group(2)}' formula = re.sub(IDENT_REGEX.format("result"), repl, formula) required_primals = fw_info.required_inputs_primal if re.search(IDENT_REGEX.format("self_p"), formula): required_primals = required_primals + ("self",) if required_primals else ("self",) if not is_exact_match: # NOTE [In-place forward AD formula Optimization] # # This optimization transforms the formula to directly do inplace, i.e. # instead of self_t.copy_(self_t.op()) we do self_t.op_() when the following are met: # # 1) the formula satisfies the pattern: "self_t.op(*args)" # 2) "op" in (1) needs to be the same as the op the derivative is for # # (2) may seem too strict, but currently the only ops that satisfy (1) also satisfy (2) # If there is a need, we can relax (2) to allow any op that has an in-place variant is_single_method_on_self_t = False match = re.fullmatch(r'self_t.([\w]*)\((.*)\)', formula) if match: op_name, between_parens = match.group(1), match.group(2) # We want to... # Match: self_t.op1(other_p.op2(arg)) # Avoid: self_t.op1(args) + self_t.op2(args) # Avoid: self_t.op1(other_p.op2(arg)) + self_t.op2(args) def check_parens_nest_level_gt_zero(s: str) -> bool: level = 1 for ch in s: if ch == ")": level -= 1 if level == 0: return False if ch == "(": level += 1 return True is_single_method_on_self_t = check_parens_nest_level_gt_zero(between_parens) directly_do_inplace = is_single_method_on_self_t and op_name == info.name if directly_do_inplace: formula = f"self_t_raw.defined() ? self_t_raw.{op_name}_({between_parens}) : {formula}" else: # Make sure that the forward grad is modified inplace when the original formula # is out of place formula = f"self_t_raw.defined() ? self_t_raw.copy_({formula}) : {formula}" required_original_self_value = bool(re.search(IDENT_REGEX.format("original_self_p"), formula)) forward_derivatives = [ForwardDerivative( formula=formula, var_name="self", var_type=fw_info.var_type, required_inputs_fw_grad=fw_info.required_inputs_fw_grad, required_inputs_primal=required_primals, required_original_self_value=required_original_self_value, is_reusing_outplace_formula=not is_exact_match), ] else: forward_derivatives = [] result.append(NativeFunctionWithDifferentiabilityInfo( func=f, info=info, fw_derivatives=forward_derivatives )) return result
def saved_variables( formula: str, arg_names: Tuple[str, ...], arg_types: Tuple[str, ...], var_names: Tuple[str, ...], ) -> Tuple[str, Tuple[SavedAttribute, ...]]: def stride_expr(name: str) -> str: assert var_names == (name, ), ( 'Replacement for ".strides()" is currently only supported for single derivatives of the same tensor ' 'that ".strides()" is being called on.') return f'strides_or_error({name}, "{name}")' REPLACEMENTS: List[Tuple[str, Dict[str, Any]]] = [ # replace self.sizes() with self_sizes (r'{}.sizes\(\)', { 'suffix': '_sizes', 'type': 'IntArrayRef', }), # replace self.options() with self_options (r'{}.options\(\)', { 'suffix': '_options', 'type': 'at::TensorOptions', }), # replace zeros_like(self) with self_info ( r'zeros_like\({}\)', { 'suffix': '_info', 'type': 'TypeAndSize', 'expr': lambda name: name, # at save-time 'res': lambda name: name + '_info.zeros()', # at eval-time }), # replace self.size(2) with self_size_2 (r'{}.size\((\w+)\)', { 'suffix': lambda m: '_argsize_{}'.format(*m.groups()), 'type': 'int64_t', }), # replace self.numel() with self_numel (r'{}.numel\(\)', { 'suffix': '_numel', 'type': 'int64_t', }), # replace to_args_sizes(self) with self_args_sizes (r'to_args_sizes\({}\)', { 'suffix': '_args_sizes', 'type': 'std::vector<std::vector<int64_t>>', }), # replace to_args_scalartypes(self) with self_args_scalartypes (r'to_args_scalartypes\({}\)', { 'suffix': '_args_scalartypes', 'type': 'std::vector<ScalarType>', }), # replace TensorGeometry(self) with self_geometry (r'TensorGeometry\({}\)', { 'suffix': '_geometry', 'type': 'TensorGeometry', }), (r'{}.scalar_type\(\)', { 'suffix': '_scalar_type', 'type': 'ScalarType', }), # replace self.dim() with self_dim (r'{}.dim\(\)', { 'suffix': '_dim', 'type': 'int64_t', }), # replace self.strides() with self_strides (r'{}.strides\(\)', { 'suffix': '_strides', 'type': 'IntArrayRef', 'expr': stride_expr, }), ] # find which arguments need to be saved saved: List[SavedAttribute] = [] for name, type in zip(arg_names, arg_types): # First search the formula for expressions which can be evaluated # when the autograd Function is created to avoid saving variables for regex, info in REPLACEMENTS: def repl(m: Match[str]) -> str: suffix: str = info['suffix'](m) if callable( info['suffix']) else info['suffix'] expr: str = info['expr'](name) if 'expr' in info else m.group( 0) saved.append( SavedAttribute( name=name + suffix, type=info['type'], expr=expr, )) if 'res' in info: replacement: str = info['res'](name) return replacement return name + suffix formula = re.sub(regex.format(name), repl, formula) # Find any variables which remain in the formula and save them if re.search(IDENT_REGEX.format(name), formula): saved.append( SavedAttribute( name=name, # TODO: change from string to type data model type=type.replace('const ', '').replace(' &', ''), expr=name, )) return formula, tuple(saved)
def match_differentiability_info( native_functions: List[NativeFunction], differentiability_infos: Sequence[DifferentiabilityInfo], ) -> List[NativeFunctionWithDifferentiabilityInfo]: """Sets the "derivative" key on declarations to matching autograd function In-place functions will use the out-of-place derivative definition if there is no in-place specific derivative. """ info_by_schema = {info.func.func: info for info in differentiability_infos} functional_info_by_signature = { info.func.func.signature(strip_default=True): info for info in differentiability_infos if info.func.func.kind() == SchemaKind.functional} def find_info(f: NativeFunction) -> Tuple[Optional[DifferentiabilityInfo], bool]: if f.func in info_by_schema: return info_by_schema[f.func], True # if there is no exact match look for the out-of-place signature. # i.e mul() for mul_() or mul_out() return functional_info_by_signature.get(f.func.signature(strip_default=True)), False result: List[NativeFunctionWithDifferentiabilityInfo] = [] for f in native_functions: info, is_exact_match = find_info(f) # Currently, the '.strides()' to 'strides_or_error' replacement does not support # 'self' derivatives of an inplace function, so we must check for this case. if f.func.kind() == SchemaKind.inplace and (info is not None): for derivative in info.derivatives: if 'self' in derivative.var_names: for saved_input in derivative.saved_inputs: assert 'strides_or_error' not in saved_input.expr, ( "Calling '.strides()' in the 'self' derivative formula of an " f"in-place function is not supported: {f.func}") # For functions that have a single def for out-of-place and inplace (like abs()) if info and info.forward_derivatives and is_exact_match: forward_derivatives = info.forward_derivatives if f.func.kind() == SchemaKind.inplace: assert len(info.forward_derivatives) == 1 # Only single output inplace should exist fw_info = info.forward_derivatives[0] if re.search(IDENT_REGEX.format("self"), fw_info.formula): raise RuntimeError(f'The formula for "{f.func.name}" is using the original value of self that is being ' 'modified inplace. This would lead to wrong forward gradients. Please use "result" in ' 'the formula only.') # replace "result" from the formula by self def repl(m: Match[str]) -> str: return f'{m.group(1)}self{m.group(2)}' forward_derivatives = [ForwardDerivative( formula=re.sub(IDENT_REGEX.format("result"), repl, fw_info.formula), var_name="self", var_type=fw_info.var_type, required_inputs_fw_grad=fw_info.required_inputs_fw_grad, required_inputs_primal=fw_info.required_inputs_primal,), ] else: forward_derivatives = [] result.append(NativeFunctionWithDifferentiabilityInfo( func=f, info=info, fw_derivatives=forward_derivatives )) return result