Beispiel #1
0
def convert(
    fname: str,
    graph: Any,
    output_template: CodeTemplate,
    op_template: CodeTemplate,
    op_dep_template: CodeTemplate,
) -> None:
    ops = []
    for op in graph:
        op_name = op['name']
        op_deps = []

        for dep in op.get('depends', []):
            dep_name = dep['name']
            if dep_name == op_name:
                # skip itself reference
                continue
            op_deps.append(
                op_dep_template.substitute(
                    op_name=op_name,
                    dep_name=dep_name))

        if not op_deps:
            # skip ops without any fanout
            continue

        ops.append(
            op_template.substitute(
                op_name=op_name,
                op_deps=op_deps))

    with open(fname, 'w') as out:
        out.write(output_template.substitute(ops=ops))
Beispiel #2
0
def write(dirname: str, name: str, template: CodeTemplate,
          env: Dict[str, List[str]]) -> None:
    env['generated_comment'] = GENERATED_COMMENT.substitute(
        filename=template.filename)
    path = os.path.join(dirname, name)
    # See Note [Unchanging results for ninja]
    try:
        with open(path, 'r') as f:
            old_val = f.read()
    except IOError:
        old_val = None
    new_val = template.substitute(env)
    if old_val != new_val:
        with open(path, 'w') as f:
            print("Writing {}".format(path))
            f.write(new_val)
    else:
        print("Skipped writing {}".format(path))
Beispiel #3
0
def process_function(info: DifferentiabilityInfo,
                     template: CodeTemplate) -> str:
    saved_variables: List[str] = []
    release_variables: List[str] = []
    saved_list_sizes: List[str] = []
    unpack: List[str] = []
    asserts: List[str] = []
    compute_index_ranges: List[str] = []
    getter_definitions: List[str] = []
    py_getsetdef_structs: List[str] = []

    for arg in info.args_with_derivatives:
        if arg.type == 'TensorList' or arg.type == 'const c10::List<c10::optional<Tensor>> &':
            size = f'{arg.name}_size_'
            saved_list_sizes.append(f'size_t {arg.name}_size_;')
        else:
            size = '1'
        compute_index_ranges.append(f'auto {arg.name}_ix = gen.range({size});')

    def save_var(var: SavedAttribute, is_output: bool) -> None:
        name = var.name
        should_append_getsetdef = True

        if var.type == 'Tensor' or var.type == 'c10::optional<Tensor>' or var.type == 'c10::optional<Tensor>&' or \
                (var.type == 'Scalar' and is_output):
            saved_variables.append(f'SavedVariable {name}_;')
            release_variables.append(f'{name}_.reset_data();')
            release_variables.append(f'{name}_.reset_grad_function();')
            ptr = 'shared_from_this()' if is_output else ''
            unpack.append(f'auto {name} = {name}_.unpack({ptr});')
            getter_definitions.append(
                GETTER_DEFINITION_SAVEDVAR.substitute(
                    op=info.op, name=name, body=GETTER_BODY_SAVEDVAR))
        elif var.type == 'TensorList':
            saved_variables.append(f'std::vector<SavedVariable> {name}_;')
            saved_variables.append(f'bool {name}_released_ = false;')
            # Just clear() is sufficient, we don't need to loop and clear each variable.
            # Because the SavedVariable owns a tensor and a grad_fn, removing the SavedVariable makes them go away as well.
            release_variables.append(f'{name}_.clear();')
            release_variables.append(f'{name}_released_ = true;')
            unpack.append(f'auto {name} = unpack_list({name}_);')
            asserts.append(
                f'TORCH_CHECK(!{name}_released_, ERR_BACKWARD_TWICE);')
            getter_definitions.append(
                GETTER_DEFINITION_SAVEDVAR.substitute(
                    op=info.op, name=name, body=GETTER_BODY_VEC_SAVEDVAR))
        elif var.type == 'c10::List<c10::optional<Tensor>>':
            saved_variables.append(f'std::vector<SavedVariable> {name}_;')
            saved_variables.append(f'bool {name}_released_ = false;')
            # Just clear() is sufficient, we don't need to loop and clear each variable.
            # Because the SavedVariable owns a tensor and a grad_fn, removing the SavedVariable makes them go away as well.
            release_variables.append(f'{name}_.clear();')
            release_variables.append(f'{name}_released_ = true;')
            unpack.append(f'auto {name} = unpack_opt_list({name}_);')
            asserts.append(
                f'TORCH_CHECK(!{name}_released_, ERR_BACKWARD_TWICE);')
            getter_definitions.append(
                GETTER_DEFINITION_SAVEDVAR.substitute(
                    op=info.op, name=name, body=GETTER_BODY_VEC_SAVEDVAR))
        elif var.type == 'IntArrayRef':
            saved_variables.append(f'std::vector<int64_t> {name};')
            getter_definitions.append(
                GETTER_DEFINITION.substitute(op=info.op,
                                             name=name,
                                             body=GETTER_BODY_ARRAYREF_LONG))
        elif var.type == 'c10::optional<IntArrayRef>':
            saved_variables.append(f'c10::OptionalArray<int64_t> {name};')
            getter_definitions.append(
                GETTER_DEFINITION_OPT_ARRAYREF.substitute(
                    op=info.op, name=name, body=GETTER_BODY_ARRAYREF_LONG))
        elif var.type == 'c10::optional<ArrayRef<double>>':
            saved_variables.append(f'c10::OptionalArray<double> {name};')
            getter_definitions.append(
                GETTER_DEFINITION_OPT_ARRAYREF.substitute(
                    op=info.op, name=name, body=GETTER_BODY_ARRAYREF_DOUBLE))
        elif var.type == 'int64_t':
            saved_variables.append(f'{var.type} {name} = 0;')
            getter_definitions.append(
                GETTER_DEFINITION.substitute(op=info.op,
                                             name=name,
                                             body=GETTER_BODY_INT64_T))
        else:
            saved_variables.append(f'{var.type} {name};')

            if var.type in MISC_GETTER_DEFS:
                getter_def, body = MISC_GETTER_DEFS[var.type]
                getter_definitions.append(
                    getter_def.substitute(op=info.op, name=name, body=body))
            else:
                # Types we don't expose python bindings to yet:
                #   TypeAndSize, ScalarType, TensorOptions, TensorGeometry,
                #   std::vector<std::vector<int64_t>>, std::vector<ScalarType>
                should_append_getsetdef = False

        if should_append_getsetdef:
            py_getsetdef_structs.append(
                PY_GETSETDEF_STRUCT.substitute(op=info.op, name=name))

    for var in info.all_saved_inputs:
        save_var(var, is_output=False)
    for var in info.all_saved_outputs:
        save_var(var, is_output=True)

    # lock the mutex when we release variables and in Node::apply to protect thread safety
    # see Note [Thread Safety on Autograd Node]
    if len(release_variables) > 0:
        thread_lock = 'std::lock_guard<std::mutex> lock(mutex_);'
    else:
        thread_lock = ''

    if uses_retain_variables(info):
        will_release_variables = WILL_RELEASE_VARIABLES.substitute()
    else:
        will_release_variables = ''

    body: List[str] = []

    if uses_single_grad(info):
        body.append('auto& grad = grads[0];')

    def emit_derivative(
        derivative: Derivative,
        args_with_derivatives: Sequence[Binding],
    ) -> Tuple[bool, str]:
        formula = derivative.formula
        var_names = derivative.var_names
        if len(var_names) == 1:
            checks_any_grad_defined = False
            if 'not_implemented' not in formula:
                matching_args = [
                    arg for arg in args_with_derivatives
                    if arg.name == var_names[0]
                ]
                if len(matching_args) == 1:
                    # We can add undefined grad support if the input variable is a Tensor
                    arg = matching_args[0]
                    if isinstance(arg.argument, Argument) and str(
                            arg.argument.type) == 'Tensor':
                        formula = 'any_grad_defined ? (' + formula + ') : Tensor()'
                        checks_any_grad_defined = True
            return (checks_any_grad_defined,
                    DERIVATIVE_SINGLE.substitute(name=var_names[0],
                                                 derivative=formula))
        else:
            if 'grad_input_mask' in formula:
                masks = [
                    f'should_compute_output({{ {n}_ix }}),' for n in var_names
                ]
                grad_input_mask = GRAD_INPUT_MASK.substitute(masks=masks,
                                                             n=len(var_names))
            else:
                grad_input_mask = ''
            idx_ranges = ', '.join(f'{n}_ix' for n in var_names)
            copy_ranges: List[str] = []
            for i, n in enumerate(var_names):
                copy_ranges.append(
                    DERIVATIVE_MULTI_COPY_RANGE.substitute(name=n, i=i))
            return False, DERIVATIVE_MULTI.substitute(
                idx_ranges=idx_ranges,
                copy_ranges=copy_ranges,
                derivative=formula,
                grad_input_mask=grad_input_mask)

    body.extend(unpack)
    need_any_grad_defined_var = False
    for derivative in info.derivatives:
        checks_any_grad_defined, derivative_text = emit_derivative(
            derivative, info.args_with_derivatives)
        body.append(derivative_text)
        need_any_grad_defined_var |= checks_any_grad_defined
    # Since single-output derivative formulas need to check if grads are
    # defined, only perform the check once, before all the formulas
    if need_any_grad_defined_var:
        body.insert(-len(info.derivatives),
                    'bool any_grad_defined = any_variable_defined(grads);')

    if info.name in UNTRACEABLE_FUNCTIONS:
        superclass = 'Node'
    else:
        superclass = 'TraceableFunction'

    all_getsetdef_structs = ",\n".join(py_getsetdef_structs) + "," if len(
        py_getsetdef_structs) != 0 else ""
    all_getter_definitions = "\n".join(getter_definitions)

    return template.substitute(op=info.op,
                               compute_index_ranges=compute_index_ranges,
                               saved_variables=saved_variables,
                               release_variables=release_variables,
                               saved_list_sizes=saved_list_sizes,
                               asserts=asserts,
                               thread_lock=thread_lock,
                               will_release_variables=will_release_variables,
                               body=body,
                               superclass=superclass,
                               all_getter_definitions=all_getter_definitions,
                               all_getsetdef_structs=all_getsetdef_structs)
Beispiel #4
0
                    'auto {} = peekSliceOptionals({}, InputSize() - {}, InputSize());'
                    .format(arg['name'], real_inputs, static_tensor_inputs))
            elif value_is_tensor_type(arg):
                # load tensor inputs from Caffe2
                env['statements'].append('auto {} = peek({}, {});'.format(
                    arg['name'], real_inputs, view_length))
                real_inputs += 1
            else:
                init = CT(ARGUMENT_MAP[arg['type']]).substitute(
                    env, arg=arg['name'])
                env['initialization'].append(init)

        emit_assignments(o, env)

        if o['name'] in SPECIAL_IMPLEMENTATIONS:
            env['invocation'] = "{}({})".format(
                SPECIAL_IMPLEMENTATIONS[o['name']], ','.join(env['arguments']))
        elif 'namespace' in o['method_of']:
            env['invocation'] = CT("at::${name}(${arguments})").substitute(env)
        else:
            assert ('Tensor' in o['method_of'])
            env['invocation'] = "self.{}({})".format(
                o['name'], ', '.join(env['arguments'][1:]))

        top_env['implementations'].append(
            IMPLEMENTATION_TEMPLATE.substitute(env))
        top_env['cases'].append(CASE_TEMPLATE.substitute(env))
        key += 1
    write(os.path.join(args.install_dir, args.output_prefix + "aten_op.h"),
          OP_TEMPLATE.substitute(top_env))
Beispiel #5
0
            elif arg['type'] == 'const c10::List<c10::optional<at::Tensor>> &':
                # NOTE: do not advance real_inputs here. After this we will
                # switch to indexing the "stack" from the end
                env['statements'].append(
                    'auto {} = peekSliceOptionals({}, InputSize() - {}, InputSize());'
                    .format(arg['name'], real_inputs, static_tensor_inputs))
            elif value_is_tensor_type(arg):
                # load tensor inputs from Caffe2
                env['statements'].append(
                    'auto {} = peek({}, {});'.format(arg['name'], real_inputs, view_length))
                real_inputs += 1
            else:
                init = CT(ARGUMENT_MAP[arg['type']]).substitute(env, arg=arg['name'])
                env['initialization'].append(init)

        emit_assignments(o, env)

        if o['name'] in SPECIAL_IMPLEMENTATIONS:
            env['invocation'] = "{}({})".format(SPECIAL_IMPLEMENTATIONS[o['name']], ','.join(env['arguments']))
        elif 'namespace' in o['method_of']:
            env['invocation'] = CT("at::${name}(${arguments})").substitute(env)
        else:
            assert('Tensor' in o['method_of'])
            env['invocation'] = "self.{}({})".format(
                o['name'], ', '.join(env['arguments'][1:]))

        top_env['implementations'].append(IMPLEMENTATION_TEMPLATE.substitute(env))
        top_env['cases'].append(CASE_TEMPLATE.substitute(env))
        key += 1
    write(os.path.join(args.install_dir, args.output_prefix + "aten_op.h"), OP_TEMPLATE.substitute(top_env))
Beispiel #6
0
            view_length = 'InputSize()' if has_tensorlist and i < tensorlist_idx else static_tensor_inputs
            if arg['type'] == 'TensorList':
                # NOTE: do not advance real_inputs here. After this we will
                # switch to indexing the "stack" from the end as if we only had
                env['statements'].append(
                    'auto {} = peekSlice({}, InputSize() - {}, InputSize());'
                    .format(arg['name'], real_inputs, static_tensor_inputs))
            elif value_is_tensor_type(arg):
                # load tensor inputs from Caffe2
                env['statements'].append(
                    'auto {} = peek({}, {});'.format(arg['name'], real_inputs, view_length))
                real_inputs += 1
            else:
                init = CT(ARGUMENT_MAP[arg['type']]).substitute(env, arg=arg['name'])
                env['initialization'].append(init)

        emit_assignments(o, env)

        if o['name'] in SPECIAL_IMPLEMENTATIONS:
            env['invocation'] = "{}({})".format(SPECIAL_IMPLEMENTATIONS[o['name']], ','.join(env['arguments']))
        elif 'namespace' in o['method_of']:
            env['invocation'] = CT("at::${name}(${arguments})").substitute(env)
        else:
            assert('Tensor' in o['method_of'])
            env['invocation'] = "self.{}({})".format(
                o['name'], ', '.join(env['arguments'][1:]))

        top_env['implementations'].append(OPTION_TEMPLATE.substitute(env))
        key += 1
    write(os.path.join(args.install_dir, args.output_prefix + "aten_op.h"), OP_TEMPLATE.substitute(top_env))
Beispiel #7
0
def gen_dispatcher_registrations(
        fm: FileManager,
        output_dir: str,
        class_name: str,
        cpp_namespace: str,
        backend_indices: Dict[DispatchKey, BackendIndex],
        grouped_native_functions: Sequence[Union[NativeFunction,
                                                 NativeFunctionsGroup]],
        backend_dispatch_key: DispatchKey,
        dispatch_key: DispatchKey,
        selector: 'SelectiveBuilder',
        # build_in_tree is true for lazy TS backend and affects include paths, not used for external backends
        build_in_tree: bool = False,
        per_operator_headers: bool = False,
        backend_name: str = "",
        eager_registration: bool = True) -> None:
    headers = [
        f"{output_dir}/{backend_dispatch_key}NativeFunctions.h",
    ]
    if build_in_tree:
        external_backend_headers_str = "\n".join(f'#include <{h}>'
                                                 for h in headers)
    else:
        external_backend_headers_str = "\n".join(f'#include "{h}"'
                                                 for h in headers)

    assert class_name is not None
    backend_index = backend_indices[dispatch_key]

    dispatch_registrations_body = list(
        concatMap(
            dest.RegisterDispatchKey(backend_index,
                                     Target.REGISTRATION,
                                     selector,
                                     rocm=False,
                                     cpp_namespace=cpp_namespace,
                                     class_method_name=f'{class_name}',
                                     skip_dispatcher_op_registration=False),
            grouped_native_functions))
    deferred_dispatch_registrations = ""
    static_init_dispatch_registrations = ""
    if eager_registration:
        static_template = CodeTemplate("""\
TORCH_LIBRARY_IMPL(aten, $dispatch_key, m) {
    $dispatch_registrations_body
};""")
        static_init_dispatch_registrations = static_template.substitute(
            dispatch_key=dispatch_key,
            dispatch_registrations_body=dispatch_registrations_body)
    else:
        deferred_template = CodeTemplate("""\
TORCH_API void Register${backend_name}${dispatch_key}NativeFunctions() {
    static auto m = MAKE_TORCH_LIBRARY_IMPL(aten, $dispatch_key);
    $dispatch_registrations_body
}""")
        deferred_dispatch_registrations = deferred_template.substitute(
            backend_name=backend_name,
            dispatch_key=dispatch_key,
            dispatch_registrations_body=dispatch_registrations_body)

    fm.write_with_template(
        f'Register{dispatch_key}.cpp', 'RegisterDispatchKey.cpp', lambda: {
            'static_init_dispatch_registrations':
            static_init_dispatch_registrations,
            'deferred_dispatch_registrations':
            deferred_dispatch_registrations,
            'extra_cuda_headers':
            '',
            'external_backend_headers':
            external_backend_headers_str,
            'ops_headers':
            '#include <ATen/Functions.h>' if not per_operator_headers else '',
            'DispatchKey':
            dispatch_key,
            'dispatch_namespace':
            dispatch_key.lower(),
            'dispatch_headers':
            dest.gen_registration_headers(backend_index,
                                          per_operator_headers=
                                          per_operator_headers,
                                          rocm=False),
            'dispatch_helpers':
            dest.gen_registration_helpers(backend_index),
            'dispatch_namespaced_definitions':
            '',
            'dispatch_anonymous_definitions':
            list(
                concatMap(
                    dest.RegisterDispatchKey(backend_index,
                                             Target.ANONYMOUS_DEFINITION,
                                             selector,
                                             rocm=False,
                                             cpp_namespace=cpp_namespace,
                                             class_method_name=f'{class_name}',
                                             skip_dispatcher_op_registration=
                                             False), grouped_native_functions)
            ),
        })