def gen_while_gradient(op, g_output): """ Generates gradient While operator """ from caffe2.python.core import BlobReference assert op.type == "While", "Expected While op" assert len(op.input) > 0, "Expected at least one input in While op" assert len(op.output) == len(g_output), \ "Different number of gradient blobs and While op outputs" grad_ops, deduped_g_output = dedupe_g_output(op, g_output) g_output = deduped_g_output init_grad_map = {} op_output = [str(o) for o in op.output] for output_name, grad_output_name in zip(op_output, g_output): if grad_output_name: init_grad_map[BlobReference(output_name)] = \ BlobReference(grad_output_name) assert len(init_grad_map) > 0, "Empty initial gradient map for While op" loop_net = _get_net_argument(op, "loop_net") assert loop_net, "Expected loop subnet in While op" assert len(loop_net.op) == 1 and loop_net.op[0].type == "Do", \ "Gradient While op requires single Do op as a loop body" do_op = loop_net.op[0] do_args = _get_do_arguments(do_op) assert "reuse_workspace" not in do_args or not do_args["reuse_workspace"], \ "Gradient While op requires Do loop body op without reuse_workspace set" assert len(do_op.output) > 0, "Expected Do op with at least one output" workspace_blob = do_op.output[-1] loop_grad_net, loop_grad_map, loop_input_names, loop_output_names = \ _gen_subnet_gradient(loop_net, init_grad_map) assert loop_grad_net, "Failed to get gradient net for loop body in While op" grad_ops += _prepare_gradient_while_ops( fwd_op=op, input_names=loop_input_names, output_names=loop_output_names, loop_grad_net=loop_grad_net, workspace_blob=workspace_blob, init_grad_map=init_grad_map, loop_grad_map=loop_grad_map) op_input = [str(i) for i in op.input] g_input = [loop_grad_map.get(i, None) for i in op_input] return grad_ops, g_input
def _prepare_gradient_while_ops( fwd_op, input_names, output_names, loop_grad_net, workspace_blob, init_grad_map, loop_grad_map): gradient_while_def = caffe2_pb2.OperatorDef() gradient_while_def.CopyFrom(fwd_op) if gradient_while_def.name: gradient_while_def.name += "_grad" loop_net_arg = caffe2_pb2.Argument() loop_net_arg.name = "loop_net" loop_net_arg.n.CopyFrom(loop_grad_net) cond_net_arg = caffe2_pb2.Argument() cond_net_arg.name = "cond_net" from caffe2.python.core import Net, BlobReference # Construct condition net - check that there're still forward workspaces # left using HasScope op cond_net = Net('gradient_loop_cond_net') cond_init_net = Net('gradient_loop_cond_net_init') cond_blob = cond_net.NextScopedBlob(cond_net.Name() + '/cond') cond_init_net.HasScope(workspace_blob, cond_blob) cond_net.HasScope(workspace_blob, cond_blob) for blob, init_grad_blob in init_grad_map.items(): blob_name = str(blob) init_grad_blob_name = str(init_grad_blob) if blob_name in loop_grad_map and \ loop_grad_map[blob_name] != init_grad_blob_name: cond_net.Copy( BlobReference(loop_grad_map[blob_name]), init_grad_blob) cond_init_net.Copy( init_grad_blob, BlobReference(loop_grad_map[blob_name])) cond_net_arg.n.CopyFrom(cond_net.Proto()) del gradient_while_def.arg[:] gradient_while_def.arg.extend([loop_net_arg, cond_net_arg]) del gradient_while_def.control_input[:] del gradient_while_def.input[:] gradient_while_def.input.extend( [str(cond_blob).encode('utf-8')] + list(input_names)) del gradient_while_def.output[:] gradient_while_def.output.extend(output_names) gradient_while_def.is_gradient_op = True return [o for o in cond_init_net.Proto().op] + [gradient_while_def]
def create_param(self, param_name, init_net, shape): if isinstance(param_name, BlobReference): param = BlobReference(str(param_name), init_net) elif isinstance(param_name, six.string_types): param = ScopedBlobReference(param_name, init_net) else: raise TypeError("Unsupported type for param_name") # TODO(amalevich): Add operator that will check param in the workspace return ParameterInfo( param_id=None, param=param, shape=shape, )
def gen_if_gradient(op, g_output): """ Generates gradient If operator, given forward If op and a list of gradient blobs corresponding to forward op's outputs Returns a gradient op and a list of blobs corresponding to input gradients """ from caffe2.python.core import BlobReference assert op.type == "If", "Expected If op" # first input is the condition blob assert len(op.input) > 0, "Expected at least one input in If op" assert len(op.output) == len(g_output), \ "Different number of gradient blobs and If op outputs" grad_ops, deduped_g_output = dedupe_g_output(op, g_output) g_output = deduped_g_output init_grad_map = {} # map from if's output blob to output gradient blob op_input = [str(i) for i in op.input] op_output = [str(o) for o in op.output] for output_name, grad_output_name in zip(op_output, g_output): if grad_output_name: init_grad_map[BlobReference(output_name)] = \ BlobReference(grad_output_name) # shouldn't call without at least one output gradient available assert len(init_grad_map) > 0, "Empty initial gradient map for If op" grad_map = {} # map from blob to gradient blob then_net = _get_net_argument(op, "then_net") assert then_net, "Expected then subnet in If op" then_grad_net, then_grad_map, then_input_names, then_output_names = \ _gen_subnet_gradient(then_net, init_grad_map) assert then_grad_net, "Failed to get gradient net for then in If op" grad_map.update(then_grad_map) else_input_names = set() else_output_names = set() else_grad_map = {} else_grad_net = None else_net = _get_net_argument(op, "else_net") if else_net: else_grad_net, else_grad_map, else_input_names, else_output_names = \ _gen_subnet_gradient(else_net, init_grad_map) assert else_grad_net, "Failed to get gradient net for else in If op" # consider case: else doesn't update blob's gradient and keeps original # from init_grad_map, but then updates the gradient for else_blob, else_grad_blob in else_grad_map.items(): if else_blob in then_grad_map: then_grad_blob = then_grad_map[else_blob] # if both then and else branches have grad blob name for the same # blob and grad names are different, then one of the branches # doesn't use blob and has original grad blob name in it's grad map, # and another branch uses blob and has <blob_name>_grad name # in it's grad map (might be different from original grad blob) if then_grad_blob != else_grad_blob: init_grad_name = init_grad_map[else_blob] \ if else_blob in init_grad_map else None if then_grad_blob == init_grad_name: grad_map[else_blob] = else_grad_blob elif else_grad_blob == init_grad_name: grad_map[else_blob] = then_grad_blob else: raise "Unexpected grad blob name " + else_blob + ", " + \ else_grad_blob + ", " + then_grad_blob else: grad_map[else_blob] = else_grad_blob # make sure gradients of blobs that were not computed # by the selected if's branch are initialized with zeros then_other_output_names = \ then_output_names - (then_output_names & else_output_names) then_other_grad_output_names = set( [o for o in then_other_output_names if o in then_grad_map.values()]) zero_then = _gen_grad_zero_init_ops( init_grad_map, then_grad_map, then_other_grad_output_names) if else_grad_net: else_grad_net.op.extend(zero_then) elif len(zero_then) > 0: else_grad_net = caffe2_pb2.NetDef() else_grad_net.CopyFrom(then_grad_net) if else_grad_net.name: else_grad_net.name += "_auto_else_zero_blobs_" del else_grad_net.op[:] else_grad_net.op.extend(zero_then) del else_grad_net.external_input[:] del else_grad_net.external_output[:] else_other_output_names = \ else_output_names - (then_output_names & else_output_names) else_other_grad_output_names = set( [o for o in else_other_output_names if o in else_grad_map.values()]) zero_else = _gen_grad_zero_init_ops( init_grad_map, else_grad_map, else_other_grad_output_names) then_grad_net.op.extend(zero_else) output_names = list(then_output_names | else_output_names) input_names = then_input_names | else_input_names # make sure condition blob is the first in the list input_names = [op_input[0]] + list(input_names - set(op_input[0])) gradient_if_def = _prepare_gradient_if_op( fwd_op=op, input_names=input_names, output_names=output_names, then_grad_net=then_grad_net, else_grad_net=else_grad_net) g_input = [grad_map.get(i, None) for i in op_input] return grad_ops + [gradient_if_def], g_input
def gen_do_gradient(op, g_output): """ Generates gradient Do operator, given forward Do op and a list of gradient blobs corresponding to forward op's outputs Returns a gradient op and a list of blobs corresponding to input gradients """ from caffe2.python.core import BlobReference subnet, outer_to_inner_map, inner_to_outer_map, workspace_blob_name = \ _do_op_sanity_check_and_process(op) assert len(g_output) == len(op.output), \ "Different number of gradient blobs and Do op outputs" grad_ops, deduped_g_output = dedupe_g_output(op, g_output) g_output = deduped_g_output # From the outer net point of view: # Do is an operator that has some number of inputs and outputs; # we have to generate a gradient operator that writes into # corresponding input gradient blobs and has access to inputs, outputs # and gradient output blobs # From the inner net point of view: # Do is an operator with a subnet and blob bindings, # we need to forward Do's output blob gradients into inner workspace, # use them to run backward pass generation and forward Do's input blob # gradients back into outer workspace op_output = [str(o) for o in op.output] op_output = op_output[:-1] # remove workspace pointer blob op_input = [str(i) for i in op.input] op_input = op_input[:-1] # remove workspace pointer blob ordered_inner_output_blob_names = [outer_to_inner_map[o] for o in op_output] backward_pass_initial_grad_map = {} initial_grad_map = {} for inner_output_name, outer_grad_output_name in \ zip(ordered_inner_output_blob_names, g_output): # link inner_output_name to corresponding inner_grad_output_name for # backward pass generation; if outer_grad_output_name: inner_grad_output_name = inner_output_name + "/_DO_OPERATOR_INNER_GRAD_" backward_pass_initial_grad_map[BlobReference(inner_output_name)] = \ BlobReference(inner_grad_output_name) initial_grad_map[inner_grad_output_name] = str(outer_grad_output_name) assert len(initial_grad_map) > 0, "Empty initial gradient map for Do op" inner_grad_ops, inner_grad_names_map = _gen_subgradient_pass( subnet, backward_pass_initial_grad_map) if len(inner_grad_ops) == 0: return [], [] grad_copy_ops = [] g_input = [] new_op_outputs = [] new_blob_bindings = {} for outer_input_name in op_input: inner_input_name = outer_to_inner_map[outer_input_name] if inner_input_name in inner_grad_names_map: inner_grad_input_name = inner_grad_names_map[inner_input_name] outer_grad_input_name = outer_input_name + "_grad" # It is possible that inner_grad_input_name will need to be # linked to another outer blob. For example: # # // y - param initialized in init_net # x = ... # z = ... # with ops.IfNet(...): # ops.Add([z, x], y) # inner Do block # loss = f(..., y, ...) # # In this case x, y and z are external for the inner Do block, # the inputs of the Do block are z and x and the output is y. # When computing the gradient of input x given the gradient # of output y it's easy to see that they are equal. # During the generation of gradient Do operator, we link # external gradient y (y_grad) to the internal name # (y/_DO_OPERATOR_INNER_GRAD_) and generate the backward pass # for the internal Do net. As a result we get gradient operators # for the gradient Do and gradient map that maps internal Do # blobs to their computed gradients. # In this example, gradient map may have blob x linked to # gradient blob y/_DO_OPERATOR_INNER_GRAD_. # We should export gradient for x outside of Do, so # we add a blob mapping from inner gradient blob # (y/_DO_OPERATOR_INNER_GRAD_) to a new outer name (x_grad). # # (Note: since we use transparent blob mapping between outer and # inner (Do's) workspace, these operations do not involve copying # but are merely using blobs in outer workspace in the Do's operator # workspace under (possibly) different names) # # At the same time, we need to add a blob mapping from inner name # y/_DO_OPERATOR_INNER_GRAD_ to the outer blob y_grad # Hence in this case, we cannot use existing blob mapping scheme # that requires a bijection between subset of inner blob names and # a set of all (Do's input and output) outer blob names # TODO(iliacher): Remove unnecessary blob copying new_inner_grad_input_name = \ inner_input_name + "/_DO_OPERATOR_INNER_GRAD_COPY_" grad_copy_ops.append(_prepare_blob_copy_op( inner_grad_input_name, new_inner_grad_input_name)) new_blob_bindings[new_inner_grad_input_name] = outer_grad_input_name new_op_outputs.append(outer_grad_input_name) g_input.append(outer_grad_input_name) else: g_input.append(None) new_op_inputs = [] overwritten_names = set() saved_local_blob_names = set() for grad_op in inner_grad_ops: grad_op_input = [str(i) for i in grad_op.input] grad_op_output = [str(o) for o in grad_op.output] for grad_op_input_name in grad_op_input: if grad_op_input_name in overwritten_names: continue # check if this is an external blob outer_name = inner_to_outer_map.get(grad_op_input_name, None) if not outer_name: # check if this is an external gradient blob outer_name = initial_grad_map.get(grad_op_input_name, None) if outer_name: outer_name = str(outer_name) if outer_name not in new_op_inputs: new_op_inputs.append(outer_name) new_blob_bindings[grad_op_input_name] = outer_name else: # this is a local blob, we'll get it's value from # a saved forward op workspace saved_local_blob_names.add(grad_op_input_name) overwritten_names.update(grad_op_output) # add inner gradient copy ops inner_grad_ops += grad_copy_ops gradient_do_def = _prepare_gradient_do_op( fwd_op=op, fwd_net=subnet, grad_ops=inner_grad_ops, inputs=new_op_inputs, outputs=new_op_outputs, blob_bindings=new_blob_bindings, saved_fwd_blobs=saved_local_blob_names, workspace_blob_name=workspace_blob_name) grad_ops.append(gradient_do_def) _do_op_sanity_check_and_process(gradient_do_def) return grad_ops, g_input
def PyTorchModule(helper, model, sample_arguments, caffe2_inputs, prefix_name=None): """ Embed an ONNX-exportable PyTorch Model into a Caffe2 model being built. Arguments: helper (caffe2.python.core.ModelHelder): the model helper where this imported network should be inserted model (torch.nn.Module): the model to be exported sample_arguments (tuple of arguments): the inputs to the model, e.g., such that ``model(*args)`` is a valid invocation of the model. Any non-Variable arguments will be hard-coded into the exported model; any Variable arguments will become inputs of the exported model, in the order they occur in args. If args is a Variable, this is equivalent to having called it with a 1-ary tuple of that Variable. (Note: passing keyword arguments to the model is not currently supported. Give us a shout if you need it.) caffe2_inputs (list of str or caffe2.python.core.BlobReference): the caffe2 Blobs that should be inputs to this network. Must be the same length as sample_arguments prefix_name: prefix name to add to each member of the blob, if None then a fresh prefix pytorch_input_N/ is used Returns: A tuple of caffe2.python.core.BlobReference objects referring to the models outputs, or a single BlobReference when the model returns a single value. """ if prefix_name is None: global _next_idx prefix_name = 'pytorch_import_' + str(_next_idx) + '/' _next_idx += 1 # TODO: handle the case where model cannot be exported # and embed as a Python op in Caffe2 f = io.BytesIO() torch.onnx.export(model, sample_arguments, f, export_params=True) onnx_model = onnx.load(io.BytesIO(f.getvalue())) init_net, predict_net = Caffe2Backend.onnx_graph_to_caffe2_net(onnx_model) initialized = set([x.name for x in onnx_model.graph.initializer]) uninitialized_inputs = { x.name: i for i, x in enumerate(onnx_model.graph.input) if x.name not in initialized } if (len(uninitialized_inputs) != len(caffe2_inputs)): raise ValueError('Expected {} inputs but found {}'.format( len(uninitialized_inputs), len(caffe2_inputs))) def remap_blob_name(name): if name in uninitialized_inputs: idx = uninitialized_inputs[name] return str(caffe2_inputs[idx]) return prefix_name + name predict_net = Net(predict_net).Clone('anon', _FakeDict(remap_blob_name)) helper.net.AppendNet(predict_net) init_net = Net(init_net).Clone('anon', _FakeDict(remap_blob_name)) helper.param_init_net.AppendNet(init_net) results = tuple([ BlobReference(remap_blob_name(x.name), helper.net) for x in onnx_model.graph.output ]) return results
def gen_if_gradient(op, g_output): """ Generates gradient If operator, given forward If op and a list of gradient blobs corresponding to forward op's outputs Returns a gradient op and a list of blobs corresponding to input gradients """ from caffe2.python.core import BlobReference assert op.type == "If", "Expected If op" # first input is the condition blob assert len(op.input) > 0, "Expected at least one input in If op" assert len(op.output) == len(g_output), \ "Different number of gradient blobs and If op outputs" init_grad_map = {} # map from if's output blob to output gradient blob op_input = [str(i) for i in op.input] op_output = [str(o) for o in op.output] for output_name, grad_output_name in zip(op_output, g_output): if grad_output_name: init_grad_map[BlobReference(output_name)] = \ BlobReference(grad_output_name) # shouldn't call without at least one output gradient available assert len(init_grad_map) > 0, "Empty initial gradient map for If op" grad_map = {} # map from blob to gradient blob then_net = _get_net_argument(op, "then_net") assert then_net, "Expected then subnet in If op" then_grad_net, then_grad_map, then_input_names, then_output_names = \ _gen_if_branch_gradient(then_net, init_grad_map) assert then_grad_net, "Failed to get gradient net for then in If op" grad_map.update(then_grad_map) else_input_names = set() else_output_names = set() else_grad_map = {} else_grad_net = None else_net = _get_net_argument(op, "else_net") if else_net: else_grad_net, else_grad_map, else_input_names, else_output_names = \ _gen_if_branch_gradient(else_net, init_grad_map) assert else_grad_net, "Failed to get gradient net for else in If op" grad_map.update(else_grad_map) # make sure gradients of blobs that were not computed # by the selected if's branch are initialized with zeros then_other_output_names = \ then_output_names - (then_output_names & else_output_names) then_other_grad_output_names = set( [o for o in then_other_output_names if o in then_grad_map.values()]) zero_then = _gen_grad_zero_init_ops(then_grad_map, then_other_grad_output_names) if else_grad_net: else_grad_net.op.extend(zero_then) elif len(zero_then) > 0: else_grad_net = caffe2_pb2.NetDef() else_grad_net.CopyFrom(then_grad_net) if else_grad_net.name: else_grad_net.name += "_auto_else_zero_blobs_" del else_grad_net.op[:] else_grad_net.op.extend(zero_then) del else_grad_net.external_input[:] del else_grad_net.external_output[:] else_other_output_names = \ else_output_names - (then_output_names & else_output_names) else_other_grad_output_names = set( [o for o in else_other_output_names if o in else_grad_map.values()]) zero_else = _gen_grad_zero_init_ops(else_grad_map, else_other_grad_output_names) then_grad_net.op.extend(zero_else) output_names = list(then_output_names | else_output_names) input_names = then_input_names | else_input_names # make sure condition blob is the first in the list input_names = [op_input[0]] + list(input_names - set(op_input[0])) gradient_if_def = _prepare_gradient_if_op( fwd_op=op, input_names=input_names, output_names=output_names, then_grad_net=then_grad_net, else_grad_net=else_grad_net) g_input = [grad_map.get(i, None) for i in op_input] return [gradient_if_def], g_input