def _common_onnx_node_to_caffe2_op(cls, init_model, pred_model, onnx_node, opset_version): """ This translator performs the basic translation of ONNX nodes into Caffe2 operators. Besides doing a straightforward marshalling from one format to another, it also does these extra things: - Renames operators based on '_renamed_operators' - Renames attributes based on '_global_renamed_attrs' and '_per_op_renamed_attrs' If you're writing a custom translator, consider calling this first, and then fixing things up further. """ c2_op = caffe2_pb2.OperatorDef() c2_op.input.extend(onnx_node.inputs) c2_op.output.extend(onnx_node.outputs) c2_op.name = onnx_node.name onnx_op_type = onnx_node.op_type broken_version = cls._broken_operators.get(onnx_op_type, float('Inf')) if broken_version <= opset_version: raise ValueError( "Don't know how to translate op {} in ONNX operator set v{} (I only support prior to v{})" .format(onnx_op_type, opset_version, broken_version)) c2_op.type = cls._renamed_operators.get(onnx_op_type, onnx_op_type) if not core.IsOperator(c2_op.type): raise ValueError( "Don't know how to translate op {}".format(onnx_op_type)) def kmap(k): if (onnx_op_type in cls._per_op_renamed_attrs and k in cls._per_op_renamed_attrs[onnx_op_type]): return cls._per_op_renamed_attrs[onnx_op_type][k] if k in cls._global_renamed_attrs: return cls._global_renamed_attrs[k] return k c2_op.arg.extend(onnx_node.attrs.caffe2(kmap=kmap)) if opset_version < 7: # onnx opset 7 and newest caffe2 have adopted full onnx broadcast semantics # so we don't need this hack anymore if c2_op.type in cls._broadcast_operators: already_broadcast = False for arg in c2_op.arg: if arg.name == 'broadcast': already_broadcast = True if not already_broadcast: c2_op.arg.extend( [caffe2.python.utils.MakeArgument('broadcast', 1)]) return c2_op
def onnx_graph_to_caffe2_net(cls, graph_def, device="CPU", opset_version=_known_opset_version): device_option = get_device_option(Device(device)) cls._inplace_rewrite(graph_def) if graph_def.initializer: init_net = cls.onnx_initializer_to_caffe2_init_net( graph_def.initializer) initialized = {init.name for init in graph_def.initializer} else: init_net = caffe2_pb2.NetDef() initialized = set() dummy_name(cls._all_names_in_graph(graph_def) | initialized) predict_net = caffe2_pb2.NetDef() predict_net.name = graph_def.name for node in graph_def.node: predict_net.op.extend(cls._onnx_node_to_caffe2_op(node, opset_version)) predict_net.external_input.extend( value_info.name for value_info in graph_def.input) predict_net.external_output.extend( value_info.name for value_info in graph_def.output) # Caffe2 predictor requires all input blobs (including the # real model inputs) are initialized in init_net for value_info in graph_def.input: if value_info.name in initialized: continue op_def = caffe2_pb2.OperatorDef() op_def.output.extend([value_info.name]) op_def.type = 'GivenTensorFill' shape = list(d.dim_value for d in value_info.type.tensor_type.shape.dim) # TODO: Putting this in the init net will make it run faster, but it # causes some tests to fail... # shape = (1,) shape_arg = op_def.arg.add() shape_arg.name = 'shape' shape_arg.ints.extend(shape) values_arg = op_def.arg.add() values_arg.name = 'values' values_arg.floats.extend(np.ones(shape).flatten().tolist()) init_net.op.extend([op_def]) # Set the device option for the init_net and predict_net. init_net.device_option.CopyFrom(device_option) predict_net.device_option.CopyFrom(device_option) return init_net, predict_net
def _onnx_node_to_caffe2_op(cls, init_model, pred_model, node_def, opset_version): cbackend = C.Caffe2Backend(cls._dummy_name) if cbackend.support_onnx_import(node_def.op_type): # extract value infos from pred model (value infos of # node's inputs that are in init model should be all # available in pred model) value_infos = [] for name in node_def.input: if pred_model is not None: for vi in itertools.chain(pred_model.graph.input, pred_model.graph.output, pred_model.graph.value_info): if vi.name == name: value_infos.append(vi.SerializeToString()) op_strs = cbackend.convert_node(node_def.SerializeToString(), value_infos, opset_version) init_ops = [] for s in op_strs[0]: op = caffe2_pb2.OperatorDef() op.ParseFromString(s) init_ops.append(op) ops = [] for s in op_strs[1]: op = caffe2_pb2.OperatorDef() op.ParseFromString(s) ops.append(op) return Caffe2Ops(ops, init_ops, []) if node_def.op_type in cls._special_operators: translator = getattr(cls, cls._special_operators[node_def.op_type]) else: translator = cls._common_onnx_node_to_caffe2_op ops = translator(init_model, pred_model, OnnxNode(node_def), opset_version) if isinstance(ops, Caffe2Ops): return ops if not isinstance(ops, container_abcs.Iterable): ops = [ops] return Caffe2Ops(ops, [], [])
def _GetGradientForOpCC(cls, op_def, g_output): grad_defs_str, g_input = cc_GetGradientDefs( # NOQA op_def.SerializeToString(), g_output) # C++ return tuple for sparse gradients, and we will convert it to # namedtuple here. g_input = [(GradientSlice(*g) if type(g) is tuple else g) for g in g_input] grad_defs = [] for grad_def_str in grad_defs_str: grad_def = caffe2_pb2.OperatorDef() grad_def.ParseFromString(grad_def_str) grad_defs.append(grad_def) return grad_defs, g_input
def _create_tensor_filling_op(cls, onnx_tensor, name=None): """ Given an Onnx TensorProto, translate it into a Caffe2 operator which produces the given tensor filling op. """ assert name or onnx_tensor.name name = name or onnx_tensor.name c2_op = caffe2_pb2.OperatorDef() c2_values = c2_op.arg.add() c2_values.name = "values" def tensor2list(onnx_tensor): # Use the onnx.numpy_helper because the data may be raw return onnx.numpy_helper.to_array(onnx_tensor).flatten().tolist() if onnx_tensor.data_type in [TensorProto.FLOAT]: c2_op.type = 'GivenTensorFill' c2_values.floats.extend(tensor2list(onnx_tensor)) elif onnx_tensor.data_type in [TensorProto.DOUBLE]: c2_op.type = 'GivenTensorDoubleFill' c2_values.floats.extend(tensor2list(onnx_tensor)) elif onnx_tensor.data_type in [TensorProto.INT64, TensorProto.UINT32]: c2_op.type = 'GivenTensorInt64Fill' c2_values.ints.extend(tensor2list(onnx_tensor)) elif onnx_tensor.data_type in [TensorProto.UINT8, TensorProto.INT8, TensorProto.UINT16, TensorProto.INT16, TensorProto.INT32]: c2_op.type = 'GivenTensorIntFill' c2_values.ints.extend(tensor2list(onnx_tensor)) elif onnx_tensor.data_type == TensorProto.BOOL: c2_op.type = 'GivenTensorBoolFill' c2_values.ints.extend(tensor2list(onnx_tensor)) elif onnx_tensor.data_type == TensorProto.STRING: c2_op.type = 'GivenTensorStringFill' c2_values.strings.extend(onnx_tensor.string_data) else: raise RuntimeError( "unrecognized tensor type {}".format(onnx_tensor.data_type)) c2_shape = c2_op.arg.add() c2_shape.name = "shape" c2_shape.ints.extend(onnx_tensor.dims) c2_op.output.append(name) return c2_op
def NodeProtoToOperatorDef(node_proto): serialized_node_proto = None if hasattr(node_proto, 'SerializeToString') and callable( node_proto.SerializeToString): serialized_node_proto = node_proto.SerializeToString() elif isinstance(node_proto, bytes): serialized_node_proto = node_proto else: raise ValueError('No SerializeToString method is detected. ' 'neither node_proto is bytes.\ntype is {}'.format( type(node_proto))) op_def = caffe2_pb2.OperatorDef() op_def.ParseFromString(C.node_proto_to_operator_def(serialized_node_proto)) return op_def
def test_that_auto_ssa_gives_non_colliding_names(self): op1 = caffe2_pb2.OperatorDef() op1.output.extend(['foo']) op2 = caffe2_pb2.OperatorDef() op2.input.extend(['foo']) op2.output.extend(['foo']) op2.output.extend(['foo_1']) shapes = {'foo': [1], 'foo_1': [2]} blob_name_tracker = tb._get_blob_names([op1, op2]) tb._convert_to_ssa(shapes, blob_name_tracker, [op1, op2]) self.assertEqual(op1.output[0], 'foo') self.assertEqual(op2.input[0], 'foo') self.assertEqual(op2.output[0], 'foo_1') # Unfortunate name but we do not parse original `_` for now. self.assertEqual(op2.output[1], 'foo_1_1') self.assertEqual(len(shapes), 3) self.assertEqual(shapes['foo'], [1]) self.assertEqual(shapes['foo_1'], [1]) self.assertEqual(shapes['foo_1_1'], [2]) self.assertEqual(len(blob_name_tracker), 3) self.assertEqual(blob_name_tracker['foo'], 'foo') self.assertEqual(blob_name_tracker['foo_1'], 'foo') self.assertEqual(blob_name_tracker['foo_1_1'], 'foo_1')
def _add_head_tail(self, pred_net, new_head, new_tail): orig_head = pred_net.external_input[0] orig_tail = pred_net.external_output[0] # Add head head = caffe2_pb2.OperatorDef() head.type = "Copy" head.input.append(new_head) head.output.append(orig_head) dummy = caffe2_pb2.NetDef() dummy.op.extend(pred_net.op) del pred_net.op[:] pred_net.op.extend([head]) pred_net.op.extend(dummy.op) pred_net.external_input[0] = new_head # Add tail tail = caffe2_pb2.OperatorDef() tail.type = "Copy" tail.input.append(orig_tail) tail.output.append(new_tail) pred_net.op.extend([tail]) pred_net.external_output[0] = new_tail
def run_node(cls, node, inputs, device='CPU', opset_version=_known_opset_version, outputs_info=None): super(Caffe2Backend, cls).run_node(node, inputs, device=device, outputs_info=outputs_info, opset_version=opset_version) value_infos = [] device_option = get_device_option(Device(device)) ws = Workspace() with core.DeviceScope(device_option): # temporary! if isinstance(inputs, dict): for key, value in inputs.items(): ws.FeedBlob(key, value) value_infos.append( onnx.helper.make_tensor_value_info( name=key, elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[ value.dtype], shape=value.shape).SerializeToString()) else: assert len(node.input) == len( inputs), "{}: expected {} but got {}".format( node.op_type, len(node.input), len(inputs)) for key, value in zip(node.input, inputs): ws.FeedBlob(key, value) value_infos.append( onnx.helper.make_tensor_value_info( name=key, elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[ value.dtype], shape=value.shape).SerializeToString()) ops = [] cbackend = C.Caffe2Backend(cls._dummy_name) ops_str = cbackend.convert_node(node.SerializeToString(), value_infos, opset_version) for s in ops_str[0] + ops_str[1]: op = caffe2_pb2.OperatorDef() op.ParseFromString(s) op.device_option.CopyFrom(device_option) ops.append(op) ws.RunOperatorsOnce(ops) output_values = [ws.FetchBlob(name) for name in node.output] return namedtupledict('Outputs', node.output)(*output_values)
def CreateOperator(operator_type, inputs, outputs, name='', control_input=None, device_option=None, arg=None, engine=None, **kwargs): """A function wrapper that allows one to create operators based on the operator type. The type should be a string corresponding to an operator registered with Caffe2. """ operator = caffe2_pb2.OperatorDef() operator.type = operator_type operator.name = name # Add rectified inputs and outputs inputs = _RectifyInputOutput(inputs) outputs = _RectifyInputOutput(outputs) operator.input.extend([str(i) for i in inputs]) operator.output.extend([str(o) for o in outputs]) if control_input: control_input = _RectifyInputOutput(control_input) operator.control_input.extend([str(i) for i in control_input]) # Set device option: # (1) If device_option is explicitly set, use device_option. # (2) If not, but scope.DEVICESCOPE is set, then we use scope.DEVICESCOPE. # (3) Otherwise, do not set device option. if device_option is not None: operator.device_option.CopyFrom(device_option) elif scope.DEVICESCOPE is not None: operator.device_option.CopyFrom(scope.DEVICESCOPE) if engine is not None: operator.engine = engine # random seed is defined in the device option, so we need to do special # care. if 'random_seed' in kwargs: operator.device_option.random_seed = kwargs['random_seed'] del kwargs['random_seed'] # Add given arguments that do not need parsing if arg is not None: operator.arg.extend(arg) # Add all other arguments for key, value in kwargs.items(): operator.arg.add().CopyFrom(utils.MakeArgument(key, value)) if workspace.IsImmediate(): workspace.RunOperatorImmediate(operator) return operator
def run_node(cls, node, inputs, device='CPU', opset_version=_known_opset_version, outputs_info=None): print("run code......") print("node:", node) super(Caffe2Backend, cls).run_node(node, inputs, device=device, outputs_info=outputs_info, opset_version=opset_version) device_option = get_device_option(Device(device)) ws = Workspace() with core.DeviceScope(device_option): # temporary! if isinstance(inputs, dict): for key, value in inputs.items(): ws.FeedBlob(key, value) else: assert len(node.input) == len( inputs), "{}: expected {} but got {}".format( node.op_type, len(node.input), len(inputs)) for key, value in zip(node.input, inputs): ws.FeedBlob(key, value) ops = [] cbackend = C.Caffe2Backend(cls._dummy_name) ops_str = cbackend.convert_node(node.SerializeToString(), opset_version) for s in ops_str[0] + ops_str[1]: op = caffe2_pb2.OperatorDef() op.ParseFromString(s) op.device_option.CopyFrom(device_option) ops.append(op) # For testing if "ONNX_CAFFE2_DEBUG" in os.environ: init_ops, ops2, _ = cls._onnx_node_to_caffe2_op( None, None, node, opset_version or cls._known_opset_version) ops2 = init_ops + ops2 for op in ops2: op.device_option.CopyFrom(device_option) print("\nC++:\n{}\nPython:\n{}".format(ops, ops2)) ws.RunOperatorsOnce(ops) output_values = [ws.FetchBlob(name) for name in node.output] return namedtupledict('Outputs', node.output)(*output_values)
def convert_onnx_model_to_trt_op(onnx_model, max_batch_size=50, max_workspace_size=2 * 1024 * 1024, verbosity=1, debug_builder=False): """ Convert the whole ONNX model to a TensorRT C2 op """ check_gpu_() trt_str = C.onnx_to_trt_op(onnx_model.SerializeToString(), _get_output_shapes(onnx_model.graph.output), max_batch_size, max_workspace_size, verbosity, debug_builder) op = caffe2_pb2.OperatorDef() op.ParseFromString(trt_str) return op
def test_that_adding_gradient_scope_does_no_fancy_renaming(self): # because it cannot create collisions op = caffe2_pb2.OperatorDef() op.name = 'foo_grad' op.input.extend(['foo_grad', 'foo_grad_1']) shapes = {'foo_grad': [1]} blob_name_tracker = tb._get_blob_names([op]) tb._add_gradient_scope(shapes, blob_name_tracker, [op]) self.assertEqual(op.input[0], 'GRADIENTS/foo_grad') self.assertEqual(op.input[1], 'GRADIENTS/foo_grad_1') self.assertEqual(op.name, 'GRADIENTS/foo_grad') self.assertEqual(len(shapes), 1) self.assertEqual(shapes['GRADIENTS/foo_grad'], [1]) self.assertEqual(len(blob_name_tracker), 2) self.assertEqual(blob_name_tracker['GRADIENTS/foo_grad'], 'foo_grad') self.assertEqual(blob_name_tracker['GRADIENTS/foo_grad_1'], 'foo_grad_1')
def onnx_graph_to_caffe2_net(cls, graph_def): cls._inplace_rewrite(graph_def) if graph_def.initializer: init_net = cls.onnx_initializer_to_caffe2_init_net( graph_def.initializer) initialized = {init.name for init in graph_def.initializer} else: init_net = caffe2_pb2.NetDef() initialized = set() dummy_name(cls._all_names_in_graph(graph_def) | initialized) predict_net = caffe2_pb2.NetDef() predict_net.name = graph_def.name for node in graph_def.node: predict_net.op.extend(cls._onnx_node_to_caffe2_op(node)) predict_net.external_input.extend(value_info.name for value_info in graph_def.input) predict_net.external_output.extend(value_info.name for value_info in graph_def.output) # Caffe2 predictor requires all input blobs (including the # real model inputs) are initialized in init_net for value_info in graph_def.input: if value_info.name in initialized: continue op_def = caffe2_pb2.OperatorDef() op_def.output.extend([value_info.name]) op_def.type = 'GivenTensorFill' shape = list(d.dim_value for d in value_info.type.tensor_type.shape.dim) shape_arg = op_def.arg.add() shape_arg.name = 'shape' shape_arg.ints.extend(shape) values_arg = op_def.arg.add() values_arg.name = 'values' values_arg.floats.extend(np.ones(shape).flatten().tolist()) init_net.op.extend([op_def]) return init_net, predict_net
def test_that_replacing_colons_gives_non_colliding_names(self): # .. and update shapes op = caffe2_pb2.OperatorDef() op.name = 'foo:0' op.input.extend(['foo:0', 'foo$0']) shapes = {'foo:0': [1]} blob_name_tracker = tb._get_blob_names([op]) tb._replace_colons(shapes, blob_name_tracker, [op], '$') self.assertEqual(op.input[0], 'foo$0') self.assertEqual(op.input[1], 'foo$0_1') # Collision but blobs and op names are handled later by # _fill_missing_operator_names. self.assertEqual(op.name, 'foo$0') self.assertEqual(len(shapes), 1) self.assertEqual(shapes['foo$0'], [1]) self.assertEqual(len(blob_name_tracker), 2) self.assertEqual(blob_name_tracker['foo$0'], 'foo:0') self.assertEqual(blob_name_tracker['foo$0_1'], 'foo$0')
def _prepare_gradient_while_ops( fwd_op, input_names, output_names, loop_grad_net, workspace_blob, init_grad_map, loop_grad_map): gradient_while_def = caffe2_pb2.OperatorDef() gradient_while_def.CopyFrom(fwd_op) if gradient_while_def.name: gradient_while_def.name += "_grad" loop_net_arg = caffe2_pb2.Argument() loop_net_arg.name = "loop_net" loop_net_arg.n.CopyFrom(loop_grad_net) cond_net_arg = caffe2_pb2.Argument() cond_net_arg.name = "cond_net" from caffe2.python.core import Net, BlobReference # Construct condition net - check that there're still forward workspaces # left using HasScope op cond_net = Net('gradient_loop_cond_net') cond_init_net = Net('gradient_loop_cond_net_init') cond_blob = cond_net.NextScopedBlob(cond_net.Name() + '/cond') cond_init_net.HasScope(workspace_blob, cond_blob) cond_net.HasScope(workspace_blob, cond_blob) for blob, init_grad_blob in init_grad_map.items(): blob_name = str(blob) init_grad_blob_name = str(init_grad_blob) if blob_name in loop_grad_map and \ loop_grad_map[blob_name] != init_grad_blob_name: cond_net.Copy( BlobReference(loop_grad_map[blob_name]), init_grad_blob) cond_init_net.Copy( init_grad_blob, BlobReference(loop_grad_map[blob_name])) cond_net_arg.n.CopyFrom(cond_net.Proto()) del gradient_while_def.arg[:] gradient_while_def.arg.extend([loop_net_arg, cond_net_arg]) del gradient_while_def.control_input[:] del gradient_while_def.input[:] gradient_while_def.input.extend( [str(cond_blob).encode('utf-8')] + list(input_names)) del gradient_while_def.output[:] gradient_while_def.output.extend(output_names) gradient_while_def.is_gradient_op = True return [o for o in cond_init_net.Proto().op] + [gradient_while_def]
def testOperatorDef2NodeProto(self): op_def = caffe2_pb2.OperatorDef() op_def.input.extend(["A", "B", "C"]) op_def.output.extend(["X", "Y"]) op_def.name = "TestOpName" op_def.type = "TestOp" arg1 = caffe2_pb2.Argument() arg1.name = "TestArg1" arg1.i = 1 arg2 = caffe2_pb2.Argument() arg2.name = "TestArg2" arg1.s = "TestInfo".encode("utf-8") op_def.arg.extend([arg1, arg2]) op_def.device_option.CopyFrom(caffe2_pb2.DeviceOption()) op_def.engine = "TestEngine".encode("utf-8") op_def.control_input.extend(["input1", "input2"]) op_def.is_gradient_op = True op_def.debug_info = "TestDebugInfo" node = convert.OperatorDefToNodeProto(op_def) self.assertEqual(node.input, op_def.input) self.assertEqual(node.output, op_def.output) self.assertEqual(node.name, op_def.name) self.assertEqual(node.op_type, op_def.type) self.assertEqual(node.attribute[0].name, op_def.arg[0].name) self.assertEqual(node.attribute[1].name, op_def.arg[1].name) self.assertEqual(node.device_option, op_def.device_option) node_engine = [ a.s.decode("utf-8") for a in node.annotations if a.name == "engine" ][0] self.assertEqual(node_engine, op_def.engine) node_control_input = [ a.strings for a in node.annotations if a.name == "control_input" ][0] self.assertEqual(len(node_control_input), len(op_def.control_input)) for x, y in zip(node_control_input, op_def.control_input): self.assertEqual(x.decode("utf-8"), y) self.assertEqual(node.doc_string, op_def.debug_info) node_is_gradient_op = [ a.i for a in node.annotations if a.name == "is_gradient_op" ][0] self.assertEqual(node_is_gradient_op, int(op_def.is_gradient_op))
def _common_onnx_node_to_caffe2_op(cls, init_model, pred_model, onnx_node, opset_version): """ This translator performs the basic translation of ONNX nodes into Caffe2 operators. Besides doing a straightforward marshalling from one format to another, it also does these extra things: - Renames operators based on '_renamed_operators' - Renames attributes based on '_global_renamed_attrs' and '_per_op_renamed_attrs' If you're writing a custom translator, consider calling this first, and then fixing things up further. """ c2_op = caffe2_pb2.OperatorDef() c2_op.input.extend(onnx_node.inputs) c2_op.output.extend(onnx_node.outputs) c2_op.name = onnx_node.name onnx_op_type = onnx_node.op_type broken_version = cls._broken_operators.get(onnx_op_type, float('Inf')) if broken_version <= opset_version: raise ValueError( "Don't know how to translate op {} in ONNX operator set v{} (I only support prior to v{})" .format(onnx_op_type, opset_version, broken_version)) c2_op.type = cls._renamed_operators.get(onnx_op_type, onnx_op_type) if not core.IsOperator(c2_op.type): raise ValueError( "Don't know how to translate op {}".format(onnx_op_type)) def kmap(k): if (onnx_op_type in cls._per_op_renamed_attrs and k in cls._per_op_renamed_attrs[onnx_op_type]): return cls._per_op_renamed_attrs[onnx_op_type][k] if k in cls._global_renamed_attrs: return cls._global_renamed_attrs[k] return k c2_op.arg.extend(onnx_node.attrs.caffe2(kmap=kmap)) return c2_op
def testRoundTrip(self): op_def = caffe2_pb2.OperatorDef() op_def.type = "Add" op_def.input.extend(["input1"]) op_def.input.extend(["input2"]) op_def.output.extend(["output1"]) node = convert.OperatorDefToNodeProto(op_def) new_op_def = convert.NodeProtoToOperatorDef(node) input1 = np.random.randn(1, 3, 1, 5).astype(np.float32) input2 = np.random.randn(2, 1, 4, 1).astype(np.float32) ref_output1 = input1 + input2 workspace.FeedBlob("input1", input1) workspace.FeedBlob("input2", input2) self.assertEqual( workspace.RunOperatorOnce(new_op_def.SerializeToString()), True) self.assertEqual(workspace.HasBlob("output1"), True) fetched_back = workspace.FetchBlob("output1") np.testing.assert_array_equal(fetched_back, ref_output1)
def _gen_grad_zero_init_ops(grad_map, grad_output_names): grad_zero_init_ops = [] for grad_output in grad_output_names: # get the corresponding output name blob and use it in ConstantFill # so that grad_output has the same shape output_name = None for o, g in grad_map.items(): if g == grad_output: output_name = o break assert output_name, "Unknown gradient output " + grad_output grad_zero_init_op = caffe2_pb2.OperatorDef() grad_zero_init_op.type = "ConstantFill" grad_zero_init_op.input.extend([output_name]) grad_zero_init_op.output.extend([grad_output]) value_arg = caffe2_pb2.Argument() value_arg.name = "value" value_arg.f = 0.0 grad_zero_init_op.arg.extend([value_arg]) grad_zero_init_ops.append(grad_zero_init_op) return grad_zero_init_ops
def ReallyCreate(inputs, outputs, name='', device_option=None, arg=None, engine=None, **kwargs): operator = caffe2_pb2.OperatorDef() operator.type = operator_type operator.name = name if type(inputs) is str or type(inputs) is BlobReference: inputs = [inputs] elif type(inputs) is unicode: inputs = [str(inputs)] elif type(inputs) is not list: raise ValueError("Unknown input format: %s of type %s." % (str(inputs), type(inputs))) if type(outputs) is str or type(outputs) is BlobReference: outputs = [outputs] elif type(outputs) is not list: raise ValueError("Unknown output format: %s of type %s." % (str(outputs), type(outputs))) operator.input.extend([str(i) for i in inputs]) operator.output.extend([str(o) for o in outputs]) if device_option is not None: operator.device_option.CopyFrom(device_option) if engine is not None: operator.engine = engine # random seed is defined in the device option, so we need to do special # care. if 'random_seed' in kwargs: operator.device_option.random_seed = kwargs['random_seed'] del kwargs['random_seed'] # Add given arguments that do not need parsing if arg is not None: operator.arg.extend(arg) # Add all other arguments for key, value in kwargs.iteritems(): operator.arg.add().CopyFrom(utils.MakeArgument(key, value)) return operator
def add_init_params(self, init_net): ''' Adds layer initialization operators to passed net. ''' for param in self.params: # TODO(amalevich): Either return back to lambdas, that add # all params (looks a bit safer and breaking less # abstractions) or extend Net interface to this type of # operations better # TODO(xlwang) init_net._net.op has type google.protobuf.\ # internal.containers.RepeatedCompositeFieldContainer, but # the version of protobuf in fbcode does not support append # so extend is used init_op = param.initializer current_device_scope = scope.CurrentDeviceScope() if init_op: if not init_op.HasField('device_option') and\ current_device_scope: init_op = caffe2_pb2.OperatorDef() init_op.CopyFrom(param.initializer) init_op.device_option.CopyFrom(current_device_scope) init_net._net.op.extend([init_op])
def _create_transpose(cls, node_def, env): op_def = caffe2_pb2.OperatorDef() op_def.output.extend([env[o] for o in node_def.output]) op_def.input.extend([env[i] for i in node_def.input]) op_def.type = 'ConvTranspose' op_def.name = node_def.name def can_be_singular(values): if len(values) == 0: return False return all(values[0] == v for v in values) depluralizer = { 'kernel_shape': 'kernel', 'strides': 'stride', 'pads': 'pad' } def map_attr(attr): if attr.name in depluralizer: # TODO: replace this with a version test if not can_be_singular(attr.ints): raise "Caffe2 doesn't support plural kernel_shape/strides/pads prior to 6cb4d1ecb0dfb553f797f6a8a61dd6966909cb0b; if you know your Caffe2 is recent enough, comment out this test" # In fact, this code is MANDATORY, because prior to # https://github.com/caffe2/caffe2/commit/6cb4d1ecb0dfb553f797f6a8a61dd6966909cb0b # the pluralized versions were not supported. # You'll get an error like # "[enforce fail at conv_transpose_unpool_op_base.h:54] kernel_h_ > 0" # if your Caffe2 is too old and you actually use the plural # version singular_attr = AttributeProto() singular_attr.name = depluralizer[attr.name] singular_attr.i = attr.ints[0] return cls._onnx_arg_to_caffe2_arg(op_def.type, singular_attr) else: return cls._onnx_arg_to_caffe2_arg(op_def.type, attr) op_def.arg.extend([map_attr(attr) for attr in node_def.attribute]) return op_def
def _common_op_tranlator(cls, node_def, env): op_def = caffe2_pb2.OperatorDef() op_def.input.extend([env[i] for i in node_def.input]) for output in node_def.output: env[output] = output # when consumed_inputs exist, we need to # rewrite the outputs to re-use these inputs to # support Caffe2-style in-place operators. for attr in node_def.attribute: if attr.name == "consumed_inputs": schema = onnx.defs.get_schema(node_def.op_type) for i, input in enumerate(node_def.input): if attr.ints[i] != 0: # for each consumed input, the schema for the op # tells us which output (output_idx) that # this consumed input becomes _, output_idx = schema.consumed(i) # consumed outputs are not always present # for instance batch norm in test mode # does not return the consumed inputs if output_idx < len(node_def.output): # rather than use its ONNX name # use the original input name for the blob # that will be consumed env[node_def.output[output_idx]] = env[input] op_def.output.extend([env[i] for i in node_def.output]) op_def.name = node_def.name op_def.type = cls._renamed_operators.get(node_def.op_type, node_def.op_type) op_def.arg.extend( cls._onnx_arg_to_caffe2_arg(op_def.type, a) for a in node_def.attribute if a.name != "consumed_inputs") return op_def
def test_convolution_affch_folding( self, stride, pad, kernel, size, input_channels, output_channels, batch_size, use_bias, group, inplace, gc, dc): conv = core.CreateOperator( "Conv", ["X0", "w0", "b0"] if use_bias else ["X0", "w0"], ["X1"], stride=stride, pad=pad, kernel=kernel, group=group, device_option=dc[1] ) affch = core.CreateOperator( "AffineChannel", ["X1", "scale", "bias"], ["X1" if inplace else "Y"], device_option=dc[1] ) X = np.random.rand( batch_size, input_channels * group, size, size).astype(np.float32) - 0.5 w = np.random.rand( output_channels * group, input_channels, kernel, kernel) \ .astype(np.float32) - 0.5 b = np.random.rand(output_channels * group).astype(np.float32) - 0.5 scale = np.random.rand(output_channels).astype(np.float32) + 0.5 bias = np.random.rand(output_channels).astype(np.float32) - 0.5 old_ws_name = workspace.CurrentWorkspace() workspace.SwitchWorkspace("_device_check_", True) workspace.FeedBlob('X0', X, dc[1]) workspace.FeedBlob('w0', w, dc[1]) workspace.FeedBlob('b0', b, dc[1]) workspace.FeedBlob('scale', scale, dc[1]) workspace.FeedBlob('bias', bias, dc[1]) workspace.RunOperatorOnce(conv) workspace.RunOperatorOnce(affch) Y = workspace.FetchBlob('X1' if inplace else "Y") workspace.ResetWorkspace() old_net = caffe2_pb2.NetDef() conv_old = caffe2_pb2.OperatorDef() conv_old.CopyFrom(conv) conv_old.device_option.CopyFrom(dc[1]) affch_old = caffe2_pb2.OperatorDef() affch_old.CopyFrom(affch) affch_old.device_option.CopyFrom(dc[1]) old_net.op.extend([conv_old, affch_old]) workspace.FeedBlob('X0', X, dc[1]) workspace.FeedBlob('w0', w, dc[1]) workspace.FeedBlob('b0', b, dc[1]) workspace.FeedBlob('scale', scale, dc[1]) workspace.FeedBlob('bias', bias, dc[1]) net = core.Net("net") net.Proto().CopyFrom(old_net) optimizeForMKLDNN(net) self.assertTrue(len(net.Proto().op) == 1) self.assertTrue(net.Proto().op[0].type == "Conv") workspace.RunOperatorOnce(net.Proto().op[0]) Y1 = workspace.FetchBlob('X1' if inplace else "Y") if not np.allclose(Y, Y1, atol=0.01, rtol=0.01): print(Y.flatten()) print(Y1.flatten()) print(np.max(np.abs(Y - Y1))) self.assertTrue(False) workspace.SwitchWorkspace(old_ws_name)
def test_convolution_grouped_sum_relu_fusion(self, stride, pad, kernel, size, input_channels, output_channels, batch_size, use_bias, group, gc, dc): conv_S0 = core.CreateOperator( "Conv", ["SX0", "Sw0", "Sb0"] if use_bias else ["SX0", "Sw0"], ["S0"], stride=stride, pad=pad, kernel=kernel, group=group, device_option=dc[0] ) conv = core.CreateOperator( "Conv", ["X0", "w0", "b0"] if use_bias else ["X0", "w0"], ["Y0"], stride=stride, pad=pad, kernel=kernel, group=group, device_option=dc[0] ) sum = core.CreateOperator( "Sum", ["S0", "Y0"], ["S0"], device_option=dc[0] ) relu = core.CreateOperator( "Relu", ["S0"], ["S0"], device_option=dc[0] ) SX = np.random.rand( batch_size, input_channels * group, size, size).astype(np.float32) - 0.5 Sw = np.random.rand( output_channels * group, input_channels, kernel, kernel) \ .astype(np.float32) - 0.5 Sb = np.random.rand(output_channels * group).astype(np.float32) - 0.5 X = np.random.rand( batch_size, input_channels * group, size, size).astype(np.float32) - 0.5 w = np.random.rand( output_channels * group, input_channels, kernel, kernel) \ .astype(np.float32) - 0.5 b = np.random.rand(output_channels * group).astype(np.float32) - 0.5 old_ws_name = workspace.CurrentWorkspace() workspace.SwitchWorkspace("_device_check_", True) workspace.FeedBlob('SX0', SX, dc[0]) workspace.FeedBlob('Sw0', Sw, dc[0]) workspace.FeedBlob('Sb0', Sb, dc[0]) workspace.FeedBlob('X0', X, dc[0]) workspace.FeedBlob('w0', w, dc[0]) workspace.FeedBlob('b0', b, dc[0]) workspace.RunOperatorOnce(conv_S0) workspace.RunOperatorOnce(conv) workspace.RunOperatorOnce(sum) workspace.RunOperatorOnce(relu) S0 = workspace.FetchBlob('S0') workspace.ResetWorkspace() old_net = caffe2_pb2.NetDef() conv_S0_old = caffe2_pb2.OperatorDef() conv_S0_old.CopyFrom(conv_S0) conv_S0_old.device_option.CopyFrom(dc[1]) conv_old = caffe2_pb2.OperatorDef() conv_old.CopyFrom(conv) conv_old.device_option.CopyFrom(dc[1]) sum_old = caffe2_pb2.OperatorDef() sum_old.CopyFrom(sum) sum_old.device_option.CopyFrom(dc[1]) relu_old = caffe2_pb2.OperatorDef() relu_old.CopyFrom(relu) relu_old.device_option.CopyFrom(dc[1]) old_net.op.extend([conv_S0_old, conv_old, sum_old, relu_old]) workspace.FeedBlob('SX0', SX, dc[1]) workspace.FeedBlob('Sw0', Sw, dc[1]) workspace.FeedBlob('Sb0', Sb, dc[1]) workspace.FeedBlob('X0', X, dc[1]) workspace.FeedBlob('w0', w, dc[1]) workspace.FeedBlob('b0', b, dc[1]) net = core.Net("net") net.Proto().CopyFrom(old_net) optimizeForMKLDNN(net) workspace.RunNetOnce(net.Proto()) # The output tensor name will be changed by optimization # sometimes when applying conv sum fusion S2 = workspace.FetchBlob(net.Proto().op[-1].output[0]) if not np.allclose(S0, S2, atol=0.01, rtol=0.01): print(S2.flatten()) print(S0.flatten()) print(np.max(np.abs(S2 - S0))) self.assertTrue(False) workspace.SwitchWorkspace(old_ws_name)
def test_convolution_relu_fusion(self, stride, pad, kernel, size, input_channels, output_channels, batch_size, use_bias, group, gc, dc): conv = core.CreateOperator( "Conv", ["X0", "w0", "b0"] if use_bias else ["X0", "w0"], ["Y0"], stride=stride, pad=pad, kernel=kernel, group=group, device_option=dc[0] ) relu = core.CreateOperator( "Relu", ["Y0"], ["Y0"], device_option=dc[0] ) # Manual fusion for Conv + ReLU conv_fusion = core.CreateOperator( "ConvFusion", ["X1", "w1", "b1"] if use_bias else ["X1", "w1"], ["Y1"], stride=stride, pad=pad, kernel=kernel, group=group, fusion_type = 1, device_option=dc[1] ) X = np.random.rand( batch_size, input_channels * group, size, size).astype(np.float32) - 0.5 w = np.random.rand( output_channels * group, input_channels, kernel, kernel) \ .astype(np.float32) - 0.5 b = np.random.rand(output_channels * group).astype(np.float32) - 0.5 old_ws_name = workspace.CurrentWorkspace() workspace.SwitchWorkspace("_device_check_", True) workspace.FeedBlob('X0', X, dc[0]) workspace.FeedBlob('w0', w, dc[0]) workspace.FeedBlob('b0', b, dc[0]) workspace.RunOperatorOnce(conv) workspace.RunOperatorOnce(relu) Y0 = workspace.FetchBlob('Y0') workspace.ResetWorkspace() workspace.FeedBlob('X1', X, dc[1]) workspace.FeedBlob('w1', w, dc[1]) workspace.FeedBlob('b1', b, dc[1]) workspace.RunOperatorOnce(conv_fusion) Y1 = workspace.FetchBlob('Y1') if not np.allclose(Y0, Y1, atol=0.01, rtol=0.01): print(Y1.flatten()) print(Y0.flatten()) print(np.max(np.abs(Y1 - Y0))) self.assertTrue(False) # Auto fusion for Conv + ReLU workspace.ResetWorkspace() old_net = caffe2_pb2.NetDef() conv_old = caffe2_pb2.OperatorDef() conv_old.CopyFrom(conv) conv_old.device_option.CopyFrom(dc[1]) relu_old = caffe2_pb2.OperatorDef() relu_old.CopyFrom(relu) relu_old.device_option.CopyFrom(dc[1]) old_net.op.extend([conv_old, relu_old]) workspace.FeedBlob('X0', X, dc[1]) workspace.FeedBlob('w0', w, dc[1]) workspace.FeedBlob('b0', b, dc[1]) net = core.Net("net") net.Proto().CopyFrom(old_net) optimizeForMKLDNN(net) self.assertTrue(len(net.Proto().op) == 1) self.assertTrue(net.Proto().op[0].type == "ConvFusion") workspace.RunOperatorOnce(net.Proto().op[0]) Y2 = workspace.FetchBlob('Y0') if not np.allclose(Y0, Y2, atol=0.01, rtol=0.01): print(Y2.flatten()) print(Y0.flatten()) print(np.max(np.abs(Y2 - Y0))) self.assertTrue(False) workspace.SwitchWorkspace(old_ws_name)
def test_convolution_sum_fusion(self, stride, pad, kernel, size, input_channels, output_channels, batch_size, use_bias, group, sum_add, gc, dc): pool_S0 = core.CreateOperator( "MaxPool", ["SX0"], ["S0"], stride=2, pad=0, kernel=2, device_option=dc[0] ) conv = core.CreateOperator( "Conv", ["X0", "w0", "b0"] if use_bias else ["X0", "w0"], ["Y0"], stride=stride, pad=pad, kernel=kernel, group=group, device_option=dc[0] ) sum = core.CreateOperator( sum_add, ["S0", "Y0"], ["S0"], device_option=dc[0] ) # Manual fusion for Conv + Sum pool_S1 = core.CreateOperator( "MaxPool", ["SX1"], ["S1"], stride=2, pad=0, kernel=2, group=group, device_option=dc[1] ) conv_fusion = core.CreateOperator( "ConvFusion", ["X1", "w1", "b1", "S1"] if use_bias else ["X1", "w1", "S1"], ["S1"], stride=stride, pad=pad, kernel=kernel, group=group, fusion_type = 2, device_option=dc[1] ) pool_input_size = int(math.ceil(float(size + 2 * pad - kernel + 1) / stride)) * 2; SX = np.random.rand( batch_size, output_channels * group, pool_input_size, pool_input_size).astype(np.float32) - 0.5 X = np.random.rand( batch_size, input_channels * group, size, size).astype(np.float32) - 0.5 w = np.random.rand( output_channels * group, input_channels, kernel, kernel) \ .astype(np.float32) - 0.5 b = np.random.rand(output_channels * group).astype(np.float32) - 0.5 old_ws_name = workspace.CurrentWorkspace() workspace.SwitchWorkspace("_device_check_", True) workspace.FeedBlob('SX0', SX, dc[0]) workspace.FeedBlob('X0', X, dc[0]) workspace.FeedBlob('w0', w, dc[0]) workspace.FeedBlob('b0', b, dc[0]) workspace.RunOperatorOnce(pool_S0) workspace.RunOperatorOnce(conv) workspace.RunOperatorOnce(sum) S0 = workspace.FetchBlob('S0') workspace.ResetWorkspace() workspace.FeedBlob('SX1', SX, dc[1]) workspace.FeedBlob('X1', X, dc[1]) workspace.FeedBlob('w1', w, dc[1]) workspace.FeedBlob('b1', b, dc[1]) workspace.RunOperatorOnce(pool_S1) workspace.RunOperatorOnce(conv_fusion) S1 = workspace.FetchBlob('S1') if not np.allclose(S0, S1, atol=0.01, rtol=0.01): print(S1.flatten()) print(S0.flatten()) print(np.max(np.abs(S1 - S0))) self.assertTrue(False) # Auto fusion for Conv + Sum workspace.ResetWorkspace() old_net = caffe2_pb2.NetDef() pool_S0_old = caffe2_pb2.OperatorDef() pool_S0_old.CopyFrom(pool_S0) pool_S0_old.device_option.CopyFrom(dc[1]) conv_old = caffe2_pb2.OperatorDef() conv_old.CopyFrom(conv) conv_old.device_option.CopyFrom(dc[1]) sum_old = caffe2_pb2.OperatorDef() sum_old.CopyFrom(sum) sum_old.device_option.CopyFrom(dc[1]) old_net.op.extend([pool_S0_old, conv_old, sum_old]) # Conv + Sum should be fused case: [PreNode, Conv, Sum] workspace.FeedBlob('SX0', SX, dc[1]) workspace.FeedBlob('X0', X, dc[1]) workspace.FeedBlob('w0', w, dc[1]) workspace.FeedBlob('b0', b, dc[1]) net = core.Net("net") net.Proto().CopyFrom(old_net) optimizeForMKLDNN(net) self.assertTrue(len(net.Proto().op) == 2) self.assertTrue(net.Proto().op[1].type == "ConvFusion") workspace.RunNetOnce(net.Proto()) # The output tensor name will be changed by optimization # sometimes when applying conv sum fusion S2 = workspace.FetchBlob(net.Proto().op[-1].output[0]) if not np.allclose(S0, S2, atol=0.01, rtol=0.01): print(S2.flatten()) print(S0.flatten()) print(np.max(np.abs(S2 - S0))) self.assertTrue(False) # Conv + Sum should be fused case: [Conv, PreNode, Sum] workspace.ResetWorkspace() old_net = caffe2_pb2.NetDef() workspace.FeedBlob('SX0', SX, dc[1]) workspace.FeedBlob('X0', X, dc[1]) workspace.FeedBlob('w0', w, dc[1]) workspace.FeedBlob('b0', b, dc[1]) old_net.op.extend([conv_old, pool_S0_old, sum_old]) net = core.Net("net") net.Proto().CopyFrom(old_net) optimizeForMKLDNN(net) self.assertTrue(len(net.Proto().op) == 2) self.assertTrue(net.Proto().op[1].type == "ConvFusion") workspace.RunNetOnce(net.Proto()) # The output tensor name will be changed by optimization # sometimes when applying conv sum fusion S2 = workspace.FetchBlob(net.Proto().op[-1].output[0]) if not np.allclose(S0, S2, atol=0.01, rtol=0.01): print(S2.flatten()) print(S0.flatten()) print(np.max(np.abs(S2 - S0))) self.assertTrue(False) # Conv + Sum should not be fused case: [Conv, midOp, preNode, Sum] Conv output is used by midOp dropout = core.CreateOperator( "Dropout", ["Y0"], ["Y_dropout"], ratio=0.5, is_test=True, device_option=dc[1] ) workspace.ResetWorkspace() workspace.FeedBlob('SX0', SX, dc[1]) workspace.FeedBlob('X0', X, dc[1]) workspace.FeedBlob('w0', w, dc[1]) workspace.FeedBlob('b0', b, dc[1]) old_net = caffe2_pb2.NetDef() old_net.op.extend([conv_old, dropout, pool_S0_old, sum_old]) net = core.Net("net") net.Proto().CopyFrom(old_net) optimizeForMKLDNN(net) self.assertTrue(len(net.Proto().op) == 4) workspace.RunNetOnce(net.Proto()) S2 = workspace.FetchBlob(net.Proto().op[-1].output[0]) if not np.allclose(S0, S2, atol=0.01, rtol=0.01): print(S2.flatten()) print(S0.flatten()) print(np.max(np.abs(S2 - S0))) self.assertTrue(False) # Conv + Sum should not be fused case: [Conv, preNode, Sum, midOp] preNode output is used by midOp sum1 = core.CreateOperator( sum_add, ["S0", "Y0"], ["S3"], device_option=dc[1] ) dropout = core.CreateOperator( "Dropout", ["S0"], ["Y_dropout"], ratio=0.5, is_test=True, device_option=dc[1] ) workspace.ResetWorkspace() workspace.FeedBlob('SX0', SX, dc[1]) workspace.FeedBlob('X0', X, dc[1]) workspace.FeedBlob('w0', w, dc[1]) workspace.FeedBlob('b0', b, dc[1]) old_net = caffe2_pb2.NetDef() old_net.op.extend([conv_old, pool_S0_old, sum1, dropout]) net = core.Net("net") net.Proto().CopyFrom(old_net) optimizeForMKLDNN(net) print("net={}\n".format(net.Proto())) self.assertTrue(len(net.Proto().op) == 4) workspace.RunNetOnce(net.Proto()) S2 = workspace.FetchBlob(net.Proto().op[-2].output[0]) if not np.allclose(S0, S2, atol=0.01, rtol=0.01): print(S2.flatten()) print(S0.flatten()) print(np.max(np.abs(S2 - S0))) self.assertTrue(False) # Conv + Sum should not be fused case: [Conv, midOp, preNode, Sum] # midOp output has the same name with that of the Conv input relu_0 = core.CreateOperator( "Relu", ["X0"], ["X1"], device_option=dc[0] ) conv = core.CreateOperator( "Conv", ["X1", "w0", "b0"] if use_bias else ["X1", "w0"], ["Y0"], stride=1, pad=0, kernel=1, device_option=dc[0] ) relu_1 = core.CreateOperator( "Relu", ["X1"], ["X1"], device_option=dc[0] ) pool = core.CreateOperator( "MaxPool", ["X1"], ["S0"], stride=1, pad=0, kernel=1, device_option=dc[0] ) sum = core.CreateOperator( "Sum", ["S0", "Y0"], ["S0"], device_option=dc[0] ) X = np.random.rand( batch_size, input_channels, size, size).astype(np.float32) - 0.5 w = np.random.rand( input_channels, input_channels, 1, 1).astype(np.float32) - 0.5 b = np.random.rand(input_channels).astype(np.float32) - 0.5 workspace.SwitchWorkspace(old_ws_name) workspace.ResetWorkspace() workspace.FeedBlob('X0', X, dc[0]) workspace.FeedBlob('w0', w, dc[0]) workspace.FeedBlob('b0', b, dc[0]) workspace.RunOperatorOnce(relu_0) workspace.RunOperatorOnce(conv) workspace.RunOperatorOnce(relu_1) workspace.RunOperatorOnce(pool) workspace.RunOperatorOnce(sum) S0 = workspace.FetchBlob('S0') workspace.ResetWorkspace() workspace.FeedBlob('X0', X, dc[1]) workspace.FeedBlob('w0', w, dc[1]) workspace.FeedBlob('b0', b, dc[1]) relu_0_old = caffe2_pb2.OperatorDef() relu_0_old.CopyFrom(relu_0) relu_0_old.device_option.CopyFrom(dc[1]) conv_old = caffe2_pb2.OperatorDef() conv_old.CopyFrom(conv) conv_old.device_option.CopyFrom(dc[1]) relu_1_old = caffe2_pb2.OperatorDef() relu_1_old.CopyFrom(relu_1) relu_1_old.device_option.CopyFrom(dc[1]) pool_old = caffe2_pb2.OperatorDef() pool_old.CopyFrom(pool) pool_old.device_option.CopyFrom(dc[1]) sum_old = caffe2_pb2.OperatorDef() sum_old.CopyFrom(sum) sum_old.device_option.CopyFrom(dc[1]) old_net = caffe2_pb2.NetDef() old_net.op.extend([relu_0_old, conv_old, relu_1_old, pool_old, sum_old]) net = core.Net("net") net.Proto().CopyFrom(old_net) optimizeForMKLDNN(net) self.assertTrue(len(net.Proto().op) == 5) workspace.RunNetOnce(net.Proto()) S2 = workspace.FetchBlob(net.Proto().op[-1].output[0]) if not np.allclose(S0, S2, atol=0.01, rtol=0.01): print(S2.flatten()) print(S0.flatten()) print(np.max(np.abs(S2 - S0))) self.assertTrue(False)
def _prepare_blob_copy_op(from_name, to_name): copy_op_def = caffe2_pb2.OperatorDef() copy_op_def.type = "Copy" copy_op_def.input.extend([from_name]) copy_op_def.output.extend([to_name]) return copy_op_def
def test_in_place(self, stride, pad, kernel, size, input_channels, output_channels, batch_size, use_bias, gc, dc): # To expose fallback in-place potential issue, the fallback op # following ideep op must be run at least two iterations. conv = core.CreateOperator("Conv", ["X", "w", "b"] if use_bias else ["X", "w"], ["Y"], stride=stride, pad=pad, kernel=kernel, device_option=dc[0]) X = np.random.rand(batch_size, input_channels, size, size).astype( np.float32) - 0.5 w = np.random.rand(output_channels, input_channels, kernel, kernel) \ .astype(np.float32) - 0.5 b = np.random.rand(output_channels).astype(np.float32) - 0.5 old_ws_name = workspace.CurrentWorkspace() workspace.SwitchWorkspace("_device_check_", True) workspace.FeedBlob('X', X, dc[0]) workspace.FeedBlob('w', w, dc[0]) workspace.FeedBlob('b', b, dc[0]) workspace.RunOperatorOnce(conv) Y = workspace.FetchBlob('Y') scale = np.random.randn(Y.shape[1]).astype(np.float32) bias = np.random.randn(Y.shape[1]).astype(np.float32) ac = core.CreateOperator("AffineChannel", ["Y", "scale", "bias"], ["Y"], is_learnable=False, device_option=dc[0]) workspace.FeedBlob('scale', scale, dc[0]) workspace.FeedBlob('bias', bias, dc[0]) workspace.RunOperatorOnce(ac) workspace.RunOperatorOnce(conv) workspace.RunOperatorOnce(ac) Y0 = workspace.FetchBlob('Y') workspace.ResetWorkspace() dev_net = caffe2_pb2.NetDef() conv_dev = caffe2_pb2.OperatorDef() conv_dev.CopyFrom(conv) conv_dev.device_option.CopyFrom(dc[1]) ac_dev = caffe2_pb2.OperatorDef() ac_dev.CopyFrom(ac) ac_dev.device_option.CopyFrom(dc[1]) dev_net.op.extend([conv_dev, ac_dev]) workspace.FeedBlob('X', X, dc[1]) workspace.FeedBlob('w', w, dc[1]) workspace.FeedBlob('b', b, dc[1]) workspace.FeedBlob('scale', scale, dc[1]) workspace.FeedBlob('bias', bias, dc[1]) workspace.RunNetOnce(dev_net) workspace.RunNetOnce(dev_net) Y1 = workspace.FetchBlob('Y') if not np.allclose(Y0, Y1, atol=0.01, rtol=0.01): print(Y1.flatten()) print(Y0.flatten()) print(np.max(np.abs(Y1 - Y0))) self.assertTrue(False) workspace.SwitchWorkspace(old_ws_name)