Esempio n. 1
0
def gen_while_gradient(op, g_output):
    """
    Generates gradient While operator
    """
    from caffe2.python.core import BlobReference
    assert op.type == "While", "Expected While op"
    assert len(op.input) > 0, "Expected at least one input in While op"

    assert len(op.output) == len(g_output), \
        "Different number of gradient blobs and While op outputs"

    grad_ops, deduped_g_output = dedupe_g_output(op, g_output)
    g_output = deduped_g_output

    init_grad_map = {}
    op_output = [str(o) for o in op.output]
    for output_name, grad_output_name in zip(op_output, g_output):
        if grad_output_name:
            init_grad_map[BlobReference(output_name)] = \
                BlobReference(grad_output_name)
    assert len(init_grad_map) > 0, "Empty initial gradient map for While op"

    loop_net = _get_net_argument(op, "loop_net")
    assert loop_net, "Expected loop subnet in While op"
    assert len(loop_net.op) == 1 and loop_net.op[0].type == "Do", \
        "Gradient While op requires single Do op as a loop body"
    do_op = loop_net.op[0]
    do_args = _get_do_arguments(do_op)
    assert "reuse_workspace" not in do_args or not do_args["reuse_workspace"], \
        "Gradient While op requires Do loop body op without reuse_workspace set"

    assert len(do_op.output) > 0, "Expected Do op with at least one output"
    workspace_blob = do_op.output[-1]

    loop_grad_net, loop_grad_map, loop_input_names, loop_output_names = \
        _gen_subnet_gradient(loop_net, init_grad_map)
    assert loop_grad_net, "Failed to get gradient net for loop body in While op"

    grad_ops += _prepare_gradient_while_ops(
        fwd_op=op,
        input_names=loop_input_names,
        output_names=loop_output_names,
        loop_grad_net=loop_grad_net,
        workspace_blob=workspace_blob,
        init_grad_map=init_grad_map,
        loop_grad_map=loop_grad_map)

    op_input = [str(i) for i in op.input]
    g_input = [loop_grad_map.get(i, None) for i in op_input]
    return grad_ops, g_input
Esempio n. 2
0
def _prepare_gradient_while_ops(
        fwd_op, input_names, output_names, loop_grad_net, workspace_blob,
        init_grad_map, loop_grad_map):
    gradient_while_def = caffe2_pb2.OperatorDef()
    gradient_while_def.CopyFrom(fwd_op)
    if gradient_while_def.name:
        gradient_while_def.name += "_grad"

    loop_net_arg = caffe2_pb2.Argument()
    loop_net_arg.name = "loop_net"
    loop_net_arg.n.CopyFrom(loop_grad_net)

    cond_net_arg = caffe2_pb2.Argument()
    cond_net_arg.name = "cond_net"
    from caffe2.python.core import Net, BlobReference
    # Construct condition net - check that there're still forward workspaces
    # left using HasScope op
    cond_net = Net('gradient_loop_cond_net')
    cond_init_net = Net('gradient_loop_cond_net_init')
    cond_blob = cond_net.NextScopedBlob(cond_net.Name() + '/cond')
    cond_init_net.HasScope(workspace_blob, cond_blob)
    cond_net.HasScope(workspace_blob, cond_blob)
    for blob, init_grad_blob in init_grad_map.items():
        blob_name = str(blob)
        init_grad_blob_name = str(init_grad_blob)
        if blob_name in loop_grad_map and \
                loop_grad_map[blob_name] != init_grad_blob_name:
            cond_net.Copy(
                BlobReference(loop_grad_map[blob_name]), init_grad_blob)
            cond_init_net.Copy(
                init_grad_blob, BlobReference(loop_grad_map[blob_name]))
    cond_net_arg.n.CopyFrom(cond_net.Proto())

    del gradient_while_def.arg[:]
    gradient_while_def.arg.extend([loop_net_arg, cond_net_arg])

    del gradient_while_def.control_input[:]
    del gradient_while_def.input[:]
    gradient_while_def.input.extend(
        [str(cond_blob).encode('utf-8')] + list(input_names))
    del gradient_while_def.output[:]
    gradient_while_def.output.extend(output_names)
    gradient_while_def.is_gradient_op = True
    return [o for o in cond_init_net.Proto().op] + [gradient_while_def]
Esempio n. 3
0
 def create_param(self, param_name, init_net, shape):
     if isinstance(param_name, BlobReference):
         param = BlobReference(str(param_name), init_net)
     elif isinstance(param_name, six.string_types):
         param = ScopedBlobReference(param_name, init_net)
     else:
         raise TypeError("Unsupported type for param_name")
     # TODO(amalevich): Add operator that will check param in the workspace
     return ParameterInfo(
         param_id=None,
         param=param,
         shape=shape,
     )
Esempio n. 4
0
def gen_if_gradient(op, g_output):
    """
    Generates gradient If operator, given forward If op and a list
    of gradient blobs corresponding to forward op's outputs
    Returns a gradient op and a list of blobs corresponding to input gradients
    """
    from caffe2.python.core import BlobReference
    assert op.type == "If", "Expected If op"
    # first input is the condition blob
    assert len(op.input) > 0, "Expected at least one input in If op"

    assert len(op.output) == len(g_output), \
        "Different number of gradient blobs and If op outputs"

    grad_ops, deduped_g_output = dedupe_g_output(op, g_output)
    g_output = deduped_g_output

    init_grad_map = {}  # map from if's output blob to output gradient blob
    op_input = [str(i) for i in op.input]
    op_output = [str(o) for o in op.output]
    for output_name, grad_output_name in zip(op_output, g_output):
        if grad_output_name:
            init_grad_map[BlobReference(output_name)] = \
                BlobReference(grad_output_name)
    # shouldn't call without at least one output gradient available
    assert len(init_grad_map) > 0, "Empty initial gradient map for If op"

    grad_map = {}  # map from blob to gradient blob
    then_net = _get_net_argument(op, "then_net")
    assert then_net, "Expected then subnet in If op"
    then_grad_net, then_grad_map, then_input_names, then_output_names = \
        _gen_subnet_gradient(then_net, init_grad_map)
    assert then_grad_net, "Failed to get gradient net for then in If op"
    grad_map.update(then_grad_map)

    else_input_names = set()
    else_output_names = set()
    else_grad_map = {}
    else_grad_net = None
    else_net = _get_net_argument(op, "else_net")
    if else_net:
        else_grad_net, else_grad_map, else_input_names, else_output_names = \
            _gen_subnet_gradient(else_net, init_grad_map)
        assert else_grad_net, "Failed to get gradient net for else in If op"
        # consider case: else doesn't update blob's gradient and keeps original
        # from init_grad_map, but then updates the gradient
        for else_blob, else_grad_blob in else_grad_map.items():
            if else_blob in then_grad_map:
                then_grad_blob = then_grad_map[else_blob]
                # if both then and else branches have grad blob name for the same
                # blob and grad names are different, then one of the branches
                # doesn't use blob and has original grad blob name in it's grad map,
                # and another branch uses blob and has <blob_name>_grad name
                # in it's grad map (might be different from original grad blob)
                if then_grad_blob != else_grad_blob:
                    init_grad_name = init_grad_map[else_blob] \
                        if else_blob in init_grad_map else None

                    if then_grad_blob == init_grad_name:
                        grad_map[else_blob] = else_grad_blob
                    elif else_grad_blob == init_grad_name:
                        grad_map[else_blob] = then_grad_blob
                    else:
                        raise "Unexpected grad blob name " + else_blob + ", " + \
                            else_grad_blob + ", " + then_grad_blob
            else:
                grad_map[else_blob] = else_grad_blob

    # make sure gradients of blobs that were not computed
    # by the selected if's branch are initialized with zeros
    then_other_output_names = \
        then_output_names - (then_output_names & else_output_names)
    then_other_grad_output_names = set(
        [o for o in then_other_output_names if o in then_grad_map.values()])
    zero_then = _gen_grad_zero_init_ops(
        init_grad_map, then_grad_map, then_other_grad_output_names)
    if else_grad_net:
        else_grad_net.op.extend(zero_then)
    elif len(zero_then) > 0:
        else_grad_net = caffe2_pb2.NetDef()
        else_grad_net.CopyFrom(then_grad_net)
        if else_grad_net.name:
            else_grad_net.name += "_auto_else_zero_blobs_"
        del else_grad_net.op[:]
        else_grad_net.op.extend(zero_then)
        del else_grad_net.external_input[:]
        del else_grad_net.external_output[:]

    else_other_output_names = \
        else_output_names - (then_output_names & else_output_names)
    else_other_grad_output_names = set(
        [o for o in else_other_output_names if o in else_grad_map.values()])
    zero_else = _gen_grad_zero_init_ops(
        init_grad_map, else_grad_map, else_other_grad_output_names)
    then_grad_net.op.extend(zero_else)

    output_names = list(then_output_names | else_output_names)
    input_names = then_input_names | else_input_names
    # make sure condition blob is the first in the list
    input_names = [op_input[0]] + list(input_names - set(op_input[0]))
    gradient_if_def = _prepare_gradient_if_op(
        fwd_op=op,
        input_names=input_names,
        output_names=output_names,
        then_grad_net=then_grad_net,
        else_grad_net=else_grad_net)
    g_input = [grad_map.get(i, None) for i in op_input]
    return grad_ops + [gradient_if_def], g_input
Esempio n. 5
0
def gen_do_gradient(op, g_output):
    """
    Generates gradient Do operator, given forward Do op and a list
    of gradient blobs corresponding to forward op's outputs
    Returns a gradient op and a list of blobs corresponding to input gradients
    """
    from caffe2.python.core import BlobReference
    subnet, outer_to_inner_map, inner_to_outer_map, workspace_blob_name = \
        _do_op_sanity_check_and_process(op)

    assert len(g_output) == len(op.output), \
        "Different number of gradient blobs and Do op outputs"

    grad_ops, deduped_g_output = dedupe_g_output(op, g_output)
    g_output = deduped_g_output

    # From the outer net point of view:
    #  Do is an operator that has some number of inputs and outputs;
    #  we have to generate a gradient operator that writes into
    #  corresponding input gradient blobs and has access to inputs, outputs
    #  and gradient output blobs
    # From the inner net point of view:
    #  Do is an operator with a subnet and blob bindings,
    #  we need to forward Do's output blob gradients into inner workspace,
    #  use them to run backward pass generation and forward Do's input blob
    #  gradients back into outer workspace

    op_output = [str(o) for o in op.output]
    op_output = op_output[:-1]  # remove workspace pointer blob
    op_input = [str(i) for i in op.input]
    op_input = op_input[:-1]  # remove workspace pointer blob

    ordered_inner_output_blob_names = [outer_to_inner_map[o] for o in op_output]

    backward_pass_initial_grad_map = {}
    initial_grad_map = {}
    for inner_output_name, outer_grad_output_name in \
            zip(ordered_inner_output_blob_names, g_output):
        # link inner_output_name to corresponding inner_grad_output_name for
        # backward pass generation;
        if outer_grad_output_name:
            inner_grad_output_name = inner_output_name + "/_DO_OPERATOR_INNER_GRAD_"
            backward_pass_initial_grad_map[BlobReference(inner_output_name)] = \
                BlobReference(inner_grad_output_name)
            initial_grad_map[inner_grad_output_name] = str(outer_grad_output_name)
    assert len(initial_grad_map) > 0, "Empty initial gradient map for Do op"

    inner_grad_ops, inner_grad_names_map = _gen_subgradient_pass(
        subnet, backward_pass_initial_grad_map)

    if len(inner_grad_ops) == 0:
        return [], []

    grad_copy_ops = []
    g_input = []
    new_op_outputs = []
    new_blob_bindings = {}
    for outer_input_name in op_input:
        inner_input_name = outer_to_inner_map[outer_input_name]
        if inner_input_name in inner_grad_names_map:
            inner_grad_input_name = inner_grad_names_map[inner_input_name]
            outer_grad_input_name = outer_input_name + "_grad"

            # It is possible that inner_grad_input_name will need to be
            # linked to another outer blob. For example:
            #
            #    // y - param initialized in init_net
            #    x = ...
            #    z = ...
            #    with ops.IfNet(...):
            #        ops.Add([z, x], y) # inner Do block
            #    loss = f(..., y, ...)
            #
            # In this case x, y and z are external for the inner Do block,
            # the inputs of the Do block are z and x and the output is y.
            # When computing the gradient of input x given the gradient
            # of output y it's easy to see that they are equal.
            # During the generation of gradient Do operator, we link
            # external gradient y (y_grad) to the internal name
            # (y/_DO_OPERATOR_INNER_GRAD_) and generate the backward pass
            # for the internal Do net. As a result we get gradient operators
            # for the gradient Do and gradient map that maps internal Do
            # blobs to their computed gradients.
            # In this example, gradient map may have blob x linked to
            # gradient blob y/_DO_OPERATOR_INNER_GRAD_.
            # We should export gradient for x outside of Do, so
            # we add a blob mapping from inner gradient blob
            # (y/_DO_OPERATOR_INNER_GRAD_) to a new outer name (x_grad).
            #
            # (Note: since we use transparent blob mapping between outer and
            # inner (Do's) workspace, these operations do not involve copying
            # but are merely using blobs in outer workspace in the Do's operator
            # workspace under (possibly) different names)
            #
            # At the same time, we need to add a blob mapping from inner name
            # y/_DO_OPERATOR_INNER_GRAD_ to the outer blob y_grad
            # Hence in this case, we cannot use existing blob mapping scheme
            # that requires a bijection between subset of inner blob names and
            # a set of all (Do's input and output) outer blob names

            # TODO(iliacher): Remove unnecessary blob copying

            new_inner_grad_input_name = \
                inner_input_name + "/_DO_OPERATOR_INNER_GRAD_COPY_"
            grad_copy_ops.append(_prepare_blob_copy_op(
                inner_grad_input_name, new_inner_grad_input_name))

            new_blob_bindings[new_inner_grad_input_name] = outer_grad_input_name
            new_op_outputs.append(outer_grad_input_name)
            g_input.append(outer_grad_input_name)
        else:
            g_input.append(None)

    new_op_inputs = []
    overwritten_names = set()
    saved_local_blob_names = set()
    for grad_op in inner_grad_ops:
        grad_op_input = [str(i) for i in grad_op.input]
        grad_op_output = [str(o) for o in grad_op.output]
        for grad_op_input_name in grad_op_input:
            if grad_op_input_name in overwritten_names:
                continue
            # check if this is an external blob
            outer_name = inner_to_outer_map.get(grad_op_input_name, None)
            if not outer_name:
                # check if this is an external gradient blob
                outer_name = initial_grad_map.get(grad_op_input_name, None)
            if outer_name:
                outer_name = str(outer_name)
                if outer_name not in new_op_inputs:
                    new_op_inputs.append(outer_name)

                new_blob_bindings[grad_op_input_name] = outer_name
            else:
                # this is a local blob, we'll get it's value from
                # a saved forward op workspace
                saved_local_blob_names.add(grad_op_input_name)
        overwritten_names.update(grad_op_output)

    # add inner gradient copy ops
    inner_grad_ops += grad_copy_ops

    gradient_do_def = _prepare_gradient_do_op(
        fwd_op=op,
        fwd_net=subnet,
        grad_ops=inner_grad_ops,
        inputs=new_op_inputs,
        outputs=new_op_outputs,
        blob_bindings=new_blob_bindings,
        saved_fwd_blobs=saved_local_blob_names,
        workspace_blob_name=workspace_blob_name)
    grad_ops.append(gradient_do_def)

    _do_op_sanity_check_and_process(gradient_do_def)

    return grad_ops, g_input
Esempio n. 6
0
def PyTorchModule(helper,
                  model,
                  sample_arguments,
                  caffe2_inputs,
                  prefix_name=None):
    """
    Embed an ONNX-exportable PyTorch Model into a Caffe2 model being built.

    Arguments:
        helper (caffe2.python.core.ModelHelder): the model helper where
            this imported network should be inserted
        model (torch.nn.Module): the model to be exported
        sample_arguments (tuple of arguments): the inputs to
            the model, e.g., such that ``model(*args)`` is a valid
            invocation of the model.  Any non-Variable arguments will
            be hard-coded into the exported model; any Variable arguments
            will become inputs of the exported model, in the order they
            occur in args.  If args is a Variable, this is equivalent
            to having called it with a 1-ary tuple of that Variable.
            (Note: passing keyword arguments to the model is not currently
            supported.  Give us a shout if you need it.)
        caffe2_inputs (list of str or caffe2.python.core.BlobReference): the
           caffe2 Blobs that should be inputs to this network. Must be
           the same length as sample_arguments
        prefix_name: prefix name to add to each member of the blob, if None then
           a fresh prefix pytorch_input_N/ is used
    Returns:
        A tuple of caffe2.python.core.BlobReference objects referring to the
        models outputs, or a single BlobReference when the model returns a single
        value.
    """
    if prefix_name is None:
        global _next_idx
        prefix_name = 'pytorch_import_' + str(_next_idx) + '/'
        _next_idx += 1

    # TODO: handle the case where model cannot be exported
    # and embed as a Python op in Caffe2
    f = io.BytesIO()
    torch.onnx.export(model, sample_arguments, f, export_params=True)
    onnx_model = onnx.load(io.BytesIO(f.getvalue()))
    init_net, predict_net = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model)

    initialized = set([x.name for x in onnx_model.graph.initializer])
    uninitialized_inputs = {
        x.name: i
        for i, x in enumerate(onnx_model.graph.input)
        if x.name not in initialized
    }

    if (len(uninitialized_inputs) != len(caffe2_inputs)):
        raise ValueError('Expected {} inputs but found {}'.format(
            len(uninitialized_inputs), len(caffe2_inputs)))

    def remap_blob_name(name):
        if name in uninitialized_inputs:
            idx = uninitialized_inputs[name]
            return str(caffe2_inputs[idx])
        return prefix_name + name

    predict_net = Net(predict_net).Clone('anon', _FakeDict(remap_blob_name))
    helper.net.AppendNet(predict_net)

    init_net = Net(init_net).Clone('anon', _FakeDict(remap_blob_name))
    helper.param_init_net.AppendNet(init_net)

    results = tuple([
        BlobReference(remap_blob_name(x.name), helper.net)
        for x in onnx_model.graph.output
    ])
    return results
Esempio n. 7
0
def gen_if_gradient(op, g_output):
    """
    Generates gradient If operator, given forward If op and a list
    of gradient blobs corresponding to forward op's outputs
    Returns a gradient op and a list of blobs corresponding to input gradients
    """
    from caffe2.python.core import BlobReference
    assert op.type == "If", "Expected If op"
    # first input is the condition blob
    assert len(op.input) > 0, "Expected at least one input in If op"

    assert len(op.output) == len(g_output), \
        "Different number of gradient blobs and If op outputs"

    init_grad_map = {}  # map from if's output blob to output gradient blob
    op_input = [str(i) for i in op.input]
    op_output = [str(o) for o in op.output]
    for output_name, grad_output_name in zip(op_output, g_output):
        if grad_output_name:
            init_grad_map[BlobReference(output_name)] = \
                BlobReference(grad_output_name)
    # shouldn't call without at least one output gradient available
    assert len(init_grad_map) > 0, "Empty initial gradient map for If op"

    grad_map = {}  # map from blob to gradient blob
    then_net = _get_net_argument(op, "then_net")
    assert then_net, "Expected then subnet in If op"
    then_grad_net, then_grad_map, then_input_names, then_output_names = \
        _gen_if_branch_gradient(then_net, init_grad_map)
    assert then_grad_net, "Failed to get gradient net for then in If op"
    grad_map.update(then_grad_map)

    else_input_names = set()
    else_output_names = set()
    else_grad_map = {}
    else_grad_net = None
    else_net = _get_net_argument(op, "else_net")
    if else_net:
        else_grad_net, else_grad_map, else_input_names, else_output_names = \
            _gen_if_branch_gradient(else_net, init_grad_map)
        assert else_grad_net, "Failed to get gradient net for else in If op"
        grad_map.update(else_grad_map)

    # make sure gradients of blobs that were not computed
    # by the selected if's branch are initialized with zeros
    then_other_output_names = \
        then_output_names - (then_output_names & else_output_names)
    then_other_grad_output_names = set(
        [o for o in then_other_output_names if o in then_grad_map.values()])
    zero_then = _gen_grad_zero_init_ops(then_grad_map, then_other_grad_output_names)
    if else_grad_net:
        else_grad_net.op.extend(zero_then)
    elif len(zero_then) > 0:
        else_grad_net = caffe2_pb2.NetDef()
        else_grad_net.CopyFrom(then_grad_net)
        if else_grad_net.name:
            else_grad_net.name += "_auto_else_zero_blobs_"
        del else_grad_net.op[:]
        else_grad_net.op.extend(zero_then)
        del else_grad_net.external_input[:]
        del else_grad_net.external_output[:]

    else_other_output_names = \
        else_output_names - (then_output_names & else_output_names)
    else_other_grad_output_names = set(
        [o for o in else_other_output_names if o in else_grad_map.values()])
    zero_else = _gen_grad_zero_init_ops(else_grad_map, else_other_grad_output_names)
    then_grad_net.op.extend(zero_else)

    output_names = list(then_output_names | else_output_names)
    input_names = then_input_names | else_input_names
    # make sure condition blob is the first in the list
    input_names = [op_input[0]] + list(input_names - set(op_input[0]))
    gradient_if_def = _prepare_gradient_if_op(
        fwd_op=op,
        input_names=input_names,
        output_names=output_names,
        then_grad_net=then_grad_net,
        else_grad_net=else_grad_net)
    g_input = [grad_map.get(i, None) for i in op_input]
    return [gradient_if_def], g_input