예제 #1
0
    def _common_onnx_node_to_caffe2_op(cls, init_model, pred_model, onnx_node,
                                       opset_version):
        """
        This translator performs the basic translation of ONNX nodes into
        Caffe2 operators.  Besides doing a straightforward marshalling from
        one format to another, it also does these extra things:

          - Renames operators based on '_renamed_operators'
          - Renames attributes based on '_global_renamed_attrs' and
            '_per_op_renamed_attrs'

        If you're writing a custom translator, consider calling this first,
        and then fixing things up further.
        """
        c2_op = caffe2_pb2.OperatorDef()

        c2_op.input.extend(onnx_node.inputs)
        c2_op.output.extend(onnx_node.outputs)
        c2_op.name = onnx_node.name

        onnx_op_type = onnx_node.op_type
        broken_version = cls._broken_operators.get(onnx_op_type, float('Inf'))
        if broken_version <= opset_version:
            raise ValueError(
                "Don't know how to translate op {} in ONNX operator set v{} (I only support prior to v{})"
                .format(onnx_op_type, opset_version, broken_version))
        c2_op.type = cls._renamed_operators.get(onnx_op_type, onnx_op_type)
        if not core.IsOperator(c2_op.type):
            raise ValueError(
                "Don't know how to translate op {}".format(onnx_op_type))

        def kmap(k):
            if (onnx_op_type in cls._per_op_renamed_attrs
                    and k in cls._per_op_renamed_attrs[onnx_op_type]):
                return cls._per_op_renamed_attrs[onnx_op_type][k]
            if k in cls._global_renamed_attrs:
                return cls._global_renamed_attrs[k]
            return k

        c2_op.arg.extend(onnx_node.attrs.caffe2(kmap=kmap))

        if opset_version < 7:
            # onnx opset 7 and newest caffe2 have adopted full onnx broadcast semantics
            # so we don't need this hack anymore
            if c2_op.type in cls._broadcast_operators:
                already_broadcast = False
                for arg in c2_op.arg:
                    if arg.name == 'broadcast':
                        already_broadcast = True
                if not already_broadcast:
                    c2_op.arg.extend(
                        [caffe2.python.utils.MakeArgument('broadcast', 1)])

        return c2_op
예제 #2
0
    def onnx_graph_to_caffe2_net(cls, graph_def, device="CPU", opset_version=_known_opset_version):
        device_option = get_device_option(Device(device))
        cls._inplace_rewrite(graph_def)
        if graph_def.initializer:
            init_net = cls.onnx_initializer_to_caffe2_init_net(
                graph_def.initializer)
            initialized = {init.name for init in graph_def.initializer}
        else:
            init_net = caffe2_pb2.NetDef()
            initialized = set()

        dummy_name(cls._all_names_in_graph(graph_def) | initialized)

        predict_net = caffe2_pb2.NetDef()
        predict_net.name = graph_def.name
        for node in graph_def.node:
            predict_net.op.extend(cls._onnx_node_to_caffe2_op(node, opset_version))

        predict_net.external_input.extend(
            value_info.name for value_info in graph_def.input)
        predict_net.external_output.extend(
            value_info.name for value_info in graph_def.output)

        # Caffe2 predictor requires all input blobs (including the
        # real model inputs) are initialized in init_net
        for value_info in graph_def.input:
            if value_info.name in initialized:
                continue
            op_def = caffe2_pb2.OperatorDef()
            op_def.output.extend([value_info.name])
            op_def.type = 'GivenTensorFill'

            shape = list(d.dim_value for d in value_info.type.tensor_type.shape.dim)
            # TODO: Putting this in the init net will make it run faster, but it
            # causes some tests to fail...
            # shape = (1,)

            shape_arg = op_def.arg.add()
            shape_arg.name = 'shape'
            shape_arg.ints.extend(shape)

            values_arg = op_def.arg.add()
            values_arg.name = 'values'
            values_arg.floats.extend(np.ones(shape).flatten().tolist())

            init_net.op.extend([op_def])

        # Set the device option for the init_net and predict_net.
        init_net.device_option.CopyFrom(device_option)
        predict_net.device_option.CopyFrom(device_option)

        return init_net, predict_net
예제 #3
0
파일: backend.py 프로젝트: yytdfc/pytorch
    def _onnx_node_to_caffe2_op(cls, init_model, pred_model, node_def, opset_version):
        cbackend = C.Caffe2Backend(cls._dummy_name)
        if cbackend.support_onnx_import(node_def.op_type):

            # extract value infos from pred model (value infos of
            # node's inputs that are in init model should be all
            # available in pred model)
            value_infos = []
            for name in node_def.input:
                if pred_model is not None:
                    for vi in itertools.chain(pred_model.graph.input,
                                              pred_model.graph.output,
                                              pred_model.graph.value_info):
                        if vi.name == name:
                            value_infos.append(vi.SerializeToString())

            op_strs = cbackend.convert_node(node_def.SerializeToString(), value_infos, opset_version)
            init_ops = []
            for s in op_strs[0]:
                op = caffe2_pb2.OperatorDef()
                op.ParseFromString(s)
                init_ops.append(op)
            ops = []
            for s in op_strs[1]:
                op = caffe2_pb2.OperatorDef()
                op.ParseFromString(s)
                ops.append(op)
            return Caffe2Ops(ops, init_ops, [])

        if node_def.op_type in cls._special_operators:
            translator = getattr(cls, cls._special_operators[node_def.op_type])
        else:
            translator = cls._common_onnx_node_to_caffe2_op
        ops = translator(init_model, pred_model, OnnxNode(node_def), opset_version)
        if isinstance(ops, Caffe2Ops):
            return ops
        if not isinstance(ops, container_abcs.Iterable):
            ops = [ops]
        return Caffe2Ops(ops, [], [])
예제 #4
0
파일: core.py 프로젝트: zxsted/caffe2
 def _GetGradientForOpCC(cls, op_def, g_output):
     grad_defs_str, g_input = cc_GetGradientDefs(  # NOQA
         op_def.SerializeToString(), g_output)
     # C++ return tuple for sparse gradients, and we will convert it to
     # namedtuple here.
     g_input = [(GradientSlice(*g) if type(g) is tuple else g)
                for g in g_input]
     grad_defs = []
     for grad_def_str in grad_defs_str:
         grad_def = caffe2_pb2.OperatorDef()
         grad_def.ParseFromString(grad_def_str)
         grad_defs.append(grad_def)
     return grad_defs, g_input
예제 #5
0
파일: backend.py 프로젝트: yytdfc/pytorch
    def _create_tensor_filling_op(cls, onnx_tensor, name=None):
        """
        Given an Onnx TensorProto, translate it into a Caffe2 operator
        which produces the given tensor filling op.
        """
        assert name or onnx_tensor.name
        name = name or onnx_tensor.name

        c2_op = caffe2_pb2.OperatorDef()

        c2_values = c2_op.arg.add()
        c2_values.name = "values"

        def tensor2list(onnx_tensor):
            # Use the onnx.numpy_helper because the data may be raw
            return onnx.numpy_helper.to_array(onnx_tensor).flatten().tolist()

        if onnx_tensor.data_type in [TensorProto.FLOAT]:
            c2_op.type = 'GivenTensorFill'
            c2_values.floats.extend(tensor2list(onnx_tensor))
        elif onnx_tensor.data_type in [TensorProto.DOUBLE]:
            c2_op.type = 'GivenTensorDoubleFill'
            c2_values.floats.extend(tensor2list(onnx_tensor))
        elif onnx_tensor.data_type in [TensorProto.INT64,
                                       TensorProto.UINT32]:
            c2_op.type = 'GivenTensorInt64Fill'
            c2_values.ints.extend(tensor2list(onnx_tensor))
        elif onnx_tensor.data_type in [TensorProto.UINT8,
                                       TensorProto.INT8,
                                       TensorProto.UINT16,
                                       TensorProto.INT16,
                                       TensorProto.INT32]:
            c2_op.type = 'GivenTensorIntFill'
            c2_values.ints.extend(tensor2list(onnx_tensor))
        elif onnx_tensor.data_type == TensorProto.BOOL:
            c2_op.type = 'GivenTensorBoolFill'
            c2_values.ints.extend(tensor2list(onnx_tensor))
        elif onnx_tensor.data_type == TensorProto.STRING:
            c2_op.type = 'GivenTensorStringFill'
            c2_values.strings.extend(onnx_tensor.string_data)
        else:
            raise RuntimeError(
                "unrecognized tensor type {}".format(onnx_tensor.data_type))

        c2_shape = c2_op.arg.add()
        c2_shape.name = "shape"
        c2_shape.ints.extend(onnx_tensor.dims)

        c2_op.output.append(name)

        return c2_op
예제 #6
0
def NodeProtoToOperatorDef(node_proto):
    serialized_node_proto = None
    if hasattr(node_proto, 'SerializeToString') and callable(
            node_proto.SerializeToString):
        serialized_node_proto = node_proto.SerializeToString()
    elif isinstance(node_proto, bytes):
        serialized_node_proto = node_proto
    else:
        raise ValueError('No SerializeToString method is detected. '
                         'neither node_proto is bytes.\ntype is {}'.format(
                             type(node_proto)))
    op_def = caffe2_pb2.OperatorDef()
    op_def.ParseFromString(C.node_proto_to_operator_def(serialized_node_proto))
    return op_def
예제 #7
0
 def test_that_auto_ssa_gives_non_colliding_names(self):
     op1 = caffe2_pb2.OperatorDef()
     op1.output.extend(['foo'])
     op2 = caffe2_pb2.OperatorDef()
     op2.input.extend(['foo'])
     op2.output.extend(['foo'])
     op2.output.extend(['foo_1'])
     shapes = {'foo': [1], 'foo_1': [2]}
     blob_name_tracker = tb._get_blob_names([op1, op2])
     tb._convert_to_ssa(shapes, blob_name_tracker, [op1, op2])
     self.assertEqual(op1.output[0], 'foo')
     self.assertEqual(op2.input[0], 'foo')
     self.assertEqual(op2.output[0], 'foo_1')
     # Unfortunate name but we do not parse original `_` for now.
     self.assertEqual(op2.output[1], 'foo_1_1')
     self.assertEqual(len(shapes), 3)
     self.assertEqual(shapes['foo'], [1])
     self.assertEqual(shapes['foo_1'], [1])
     self.assertEqual(shapes['foo_1_1'], [2])
     self.assertEqual(len(blob_name_tracker), 3)
     self.assertEqual(blob_name_tracker['foo'], 'foo')
     self.assertEqual(blob_name_tracker['foo_1'], 'foo')
     self.assertEqual(blob_name_tracker['foo_1_1'], 'foo_1')
예제 #8
0
    def _add_head_tail(self, pred_net, new_head, new_tail):
        orig_head = pred_net.external_input[0]
        orig_tail = pred_net.external_output[0]

        # Add head
        head = caffe2_pb2.OperatorDef()
        head.type = "Copy"
        head.input.append(new_head)
        head.output.append(orig_head)
        dummy = caffe2_pb2.NetDef()
        dummy.op.extend(pred_net.op)
        del pred_net.op[:]
        pred_net.op.extend([head])
        pred_net.op.extend(dummy.op)
        pred_net.external_input[0] = new_head

        # Add tail
        tail = caffe2_pb2.OperatorDef()
        tail.type = "Copy"
        tail.input.append(orig_tail)
        tail.output.append(new_tail)
        pred_net.op.extend([tail])
        pred_net.external_output[0] = new_tail
예제 #9
0
    def run_node(cls,
                 node,
                 inputs,
                 device='CPU',
                 opset_version=_known_opset_version,
                 outputs_info=None):
        super(Caffe2Backend, cls).run_node(node,
                                           inputs,
                                           device=device,
                                           outputs_info=outputs_info,
                                           opset_version=opset_version)

        value_infos = []
        device_option = get_device_option(Device(device))
        ws = Workspace()
        with core.DeviceScope(device_option):  # temporary!
            if isinstance(inputs, dict):
                for key, value in inputs.items():
                    ws.FeedBlob(key, value)
                    value_infos.append(
                        onnx.helper.make_tensor_value_info(
                            name=key,
                            elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[
                                value.dtype],
                            shape=value.shape).SerializeToString())
            else:
                assert len(node.input) == len(
                    inputs), "{}: expected {} but got {}".format(
                        node.op_type, len(node.input), len(inputs))
                for key, value in zip(node.input, inputs):
                    ws.FeedBlob(key, value)
                    value_infos.append(
                        onnx.helper.make_tensor_value_info(
                            name=key,
                            elem_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[
                                value.dtype],
                            shape=value.shape).SerializeToString())

            ops = []
            cbackend = C.Caffe2Backend(cls._dummy_name)
            ops_str = cbackend.convert_node(node.SerializeToString(),
                                            value_infos, opset_version)
            for s in ops_str[0] + ops_str[1]:
                op = caffe2_pb2.OperatorDef()
                op.ParseFromString(s)
                op.device_option.CopyFrom(device_option)
                ops.append(op)
            ws.RunOperatorsOnce(ops)
            output_values = [ws.FetchBlob(name) for name in node.output]
            return namedtupledict('Outputs', node.output)(*output_values)
예제 #10
0
파일: core.py 프로젝트: zxsted/caffe2
def CreateOperator(operator_type,
                   inputs,
                   outputs,
                   name='',
                   control_input=None,
                   device_option=None,
                   arg=None,
                   engine=None,
                   **kwargs):
    """A function wrapper that allows one to create operators based on the
    operator type. The type should be a string corresponding to an operator
    registered with Caffe2.
    """
    operator = caffe2_pb2.OperatorDef()
    operator.type = operator_type
    operator.name = name
    # Add rectified inputs and outputs
    inputs = _RectifyInputOutput(inputs)
    outputs = _RectifyInputOutput(outputs)
    operator.input.extend([str(i) for i in inputs])
    operator.output.extend([str(o) for o in outputs])
    if control_input:
        control_input = _RectifyInputOutput(control_input)
        operator.control_input.extend([str(i) for i in control_input])
    # Set device option:
    # (1) If device_option is explicitly set, use device_option.
    # (2) If not, but scope.DEVICESCOPE is set, then we use scope.DEVICESCOPE.
    # (3) Otherwise, do not set device option.
    if device_option is not None:
        operator.device_option.CopyFrom(device_option)
    elif scope.DEVICESCOPE is not None:
        operator.device_option.CopyFrom(scope.DEVICESCOPE)
    if engine is not None:
        operator.engine = engine
    # random seed is defined in the device option, so we need to do special
    # care.
    if 'random_seed' in kwargs:
        operator.device_option.random_seed = kwargs['random_seed']
        del kwargs['random_seed']
    # Add given arguments that do not need parsing
    if arg is not None:
        operator.arg.extend(arg)
    # Add all other arguments
    for key, value in kwargs.items():
        operator.arg.add().CopyFrom(utils.MakeArgument(key, value))

    if workspace.IsImmediate():
        workspace.RunOperatorImmediate(operator)
    return operator
예제 #11
0
    def run_node(cls,
                 node,
                 inputs,
                 device='CPU',
                 opset_version=_known_opset_version,
                 outputs_info=None):
        print("run code......")
        print("node:", node)
        super(Caffe2Backend, cls).run_node(node,
                                           inputs,
                                           device=device,
                                           outputs_info=outputs_info,
                                           opset_version=opset_version)

        device_option = get_device_option(Device(device))
        ws = Workspace()
        with core.DeviceScope(device_option):  # temporary!
            if isinstance(inputs, dict):
                for key, value in inputs.items():
                    ws.FeedBlob(key, value)
            else:
                assert len(node.input) == len(
                    inputs), "{}: expected {} but got {}".format(
                        node.op_type, len(node.input), len(inputs))
                for key, value in zip(node.input, inputs):
                    ws.FeedBlob(key, value)

            ops = []
            cbackend = C.Caffe2Backend(cls._dummy_name)
            ops_str = cbackend.convert_node(node.SerializeToString(),
                                            opset_version)
            for s in ops_str[0] + ops_str[1]:
                op = caffe2_pb2.OperatorDef()
                op.ParseFromString(s)
                op.device_option.CopyFrom(device_option)
                ops.append(op)
            # For testing
            if "ONNX_CAFFE2_DEBUG" in os.environ:
                init_ops, ops2, _ = cls._onnx_node_to_caffe2_op(
                    None, None, node, opset_version
                    or cls._known_opset_version)
                ops2 = init_ops + ops2
                for op in ops2:
                    op.device_option.CopyFrom(device_option)
                print("\nC++:\n{}\nPython:\n{}".format(ops, ops2))
            ws.RunOperatorsOnce(ops)
            output_values = [ws.FetchBlob(name) for name in node.output]
            return namedtupledict('Outputs', node.output)(*output_values)
예제 #12
0
def convert_onnx_model_to_trt_op(onnx_model,
                                 max_batch_size=50,
                                 max_workspace_size=2 * 1024 * 1024,
                                 verbosity=1,
                                 debug_builder=False):
    """
    Convert the whole ONNX model to a TensorRT C2 op
    """
    check_gpu_()
    trt_str = C.onnx_to_trt_op(onnx_model.SerializeToString(),
                               _get_output_shapes(onnx_model.graph.output),
                               max_batch_size, max_workspace_size, verbosity,
                               debug_builder)
    op = caffe2_pb2.OperatorDef()
    op.ParseFromString(trt_str)
    return op
예제 #13
0
 def test_that_adding_gradient_scope_does_no_fancy_renaming(self):
     # because it cannot create collisions
     op = caffe2_pb2.OperatorDef()
     op.name = 'foo_grad'
     op.input.extend(['foo_grad', 'foo_grad_1'])
     shapes = {'foo_grad': [1]}
     blob_name_tracker = tb._get_blob_names([op])
     tb._add_gradient_scope(shapes, blob_name_tracker, [op])
     self.assertEqual(op.input[0], 'GRADIENTS/foo_grad')
     self.assertEqual(op.input[1], 'GRADIENTS/foo_grad_1')
     self.assertEqual(op.name, 'GRADIENTS/foo_grad')
     self.assertEqual(len(shapes), 1)
     self.assertEqual(shapes['GRADIENTS/foo_grad'], [1])
     self.assertEqual(len(blob_name_tracker), 2)
     self.assertEqual(blob_name_tracker['GRADIENTS/foo_grad'], 'foo_grad')
     self.assertEqual(blob_name_tracker['GRADIENTS/foo_grad_1'],
                      'foo_grad_1')
예제 #14
0
    def onnx_graph_to_caffe2_net(cls, graph_def):
        cls._inplace_rewrite(graph_def)
        if graph_def.initializer:
            init_net = cls.onnx_initializer_to_caffe2_init_net(
                graph_def.initializer)
            initialized = {init.name for init in graph_def.initializer}
        else:
            init_net = caffe2_pb2.NetDef()
            initialized = set()

        dummy_name(cls._all_names_in_graph(graph_def) | initialized)

        predict_net = caffe2_pb2.NetDef()
        predict_net.name = graph_def.name
        for node in graph_def.node:
            predict_net.op.extend(cls._onnx_node_to_caffe2_op(node))

        predict_net.external_input.extend(value_info.name
                                          for value_info in graph_def.input)
        predict_net.external_output.extend(value_info.name
                                           for value_info in graph_def.output)

        # Caffe2 predictor requires all input blobs (including the
        # real model inputs) are initialized in init_net
        for value_info in graph_def.input:
            if value_info.name in initialized:
                continue
            op_def = caffe2_pb2.OperatorDef()
            op_def.output.extend([value_info.name])
            op_def.type = 'GivenTensorFill'

            shape = list(d.dim_value
                         for d in value_info.type.tensor_type.shape.dim)

            shape_arg = op_def.arg.add()
            shape_arg.name = 'shape'
            shape_arg.ints.extend(shape)

            values_arg = op_def.arg.add()
            values_arg.name = 'values'
            values_arg.floats.extend(np.ones(shape).flatten().tolist())

            init_net.op.extend([op_def])

        return init_net, predict_net
예제 #15
0
 def test_that_replacing_colons_gives_non_colliding_names(self):
     # .. and update shapes
     op = caffe2_pb2.OperatorDef()
     op.name = 'foo:0'
     op.input.extend(['foo:0', 'foo$0'])
     shapes = {'foo:0': [1]}
     blob_name_tracker = tb._get_blob_names([op])
     tb._replace_colons(shapes, blob_name_tracker, [op], '$')
     self.assertEqual(op.input[0], 'foo$0')
     self.assertEqual(op.input[1], 'foo$0_1')
     # Collision but blobs and op names are handled later by
     # _fill_missing_operator_names.
     self.assertEqual(op.name, 'foo$0')
     self.assertEqual(len(shapes), 1)
     self.assertEqual(shapes['foo$0'], [1])
     self.assertEqual(len(blob_name_tracker), 2)
     self.assertEqual(blob_name_tracker['foo$0'], 'foo:0')
     self.assertEqual(blob_name_tracker['foo$0_1'], 'foo$0')
예제 #16
0
def _prepare_gradient_while_ops(
        fwd_op, input_names, output_names, loop_grad_net, workspace_blob,
        init_grad_map, loop_grad_map):
    gradient_while_def = caffe2_pb2.OperatorDef()
    gradient_while_def.CopyFrom(fwd_op)
    if gradient_while_def.name:
        gradient_while_def.name += "_grad"

    loop_net_arg = caffe2_pb2.Argument()
    loop_net_arg.name = "loop_net"
    loop_net_arg.n.CopyFrom(loop_grad_net)

    cond_net_arg = caffe2_pb2.Argument()
    cond_net_arg.name = "cond_net"
    from caffe2.python.core import Net, BlobReference
    # Construct condition net - check that there're still forward workspaces
    # left using HasScope op
    cond_net = Net('gradient_loop_cond_net')
    cond_init_net = Net('gradient_loop_cond_net_init')
    cond_blob = cond_net.NextScopedBlob(cond_net.Name() + '/cond')
    cond_init_net.HasScope(workspace_blob, cond_blob)
    cond_net.HasScope(workspace_blob, cond_blob)
    for blob, init_grad_blob in init_grad_map.items():
        blob_name = str(blob)
        init_grad_blob_name = str(init_grad_blob)
        if blob_name in loop_grad_map and \
                loop_grad_map[blob_name] != init_grad_blob_name:
            cond_net.Copy(
                BlobReference(loop_grad_map[blob_name]), init_grad_blob)
            cond_init_net.Copy(
                init_grad_blob, BlobReference(loop_grad_map[blob_name]))
    cond_net_arg.n.CopyFrom(cond_net.Proto())

    del gradient_while_def.arg[:]
    gradient_while_def.arg.extend([loop_net_arg, cond_net_arg])

    del gradient_while_def.control_input[:]
    del gradient_while_def.input[:]
    gradient_while_def.input.extend(
        [str(cond_blob).encode('utf-8')] + list(input_names))
    del gradient_while_def.output[:]
    gradient_while_def.output.extend(output_names)
    gradient_while_def.is_gradient_op = True
    return [o for o in cond_init_net.Proto().op] + [gradient_while_def]
예제 #17
0
    def testOperatorDef2NodeProto(self):
        op_def = caffe2_pb2.OperatorDef()
        op_def.input.extend(["A", "B", "C"])
        op_def.output.extend(["X", "Y"])
        op_def.name = "TestOpName"
        op_def.type = "TestOp"
        arg1 = caffe2_pb2.Argument()
        arg1.name = "TestArg1"
        arg1.i = 1
        arg2 = caffe2_pb2.Argument()
        arg2.name = "TestArg2"
        arg1.s = "TestInfo".encode("utf-8")
        op_def.arg.extend([arg1, arg2])
        op_def.device_option.CopyFrom(caffe2_pb2.DeviceOption())
        op_def.engine = "TestEngine".encode("utf-8")
        op_def.control_input.extend(["input1", "input2"])
        op_def.is_gradient_op = True
        op_def.debug_info = "TestDebugInfo"

        node = convert.OperatorDefToNodeProto(op_def)

        self.assertEqual(node.input, op_def.input)
        self.assertEqual(node.output, op_def.output)
        self.assertEqual(node.name, op_def.name)
        self.assertEqual(node.op_type, op_def.type)
        self.assertEqual(node.attribute[0].name, op_def.arg[0].name)
        self.assertEqual(node.attribute[1].name, op_def.arg[1].name)
        self.assertEqual(node.device_option, op_def.device_option)
        node_engine = [
            a.s.decode("utf-8") for a in node.annotations if a.name == "engine"
        ][0]
        self.assertEqual(node_engine, op_def.engine)
        node_control_input = [
            a.strings for a in node.annotations if a.name == "control_input"
        ][0]
        self.assertEqual(len(node_control_input), len(op_def.control_input))
        for x, y in zip(node_control_input, op_def.control_input):
            self.assertEqual(x.decode("utf-8"), y)
        self.assertEqual(node.doc_string, op_def.debug_info)
        node_is_gradient_op = [
            a.i for a in node.annotations if a.name == "is_gradient_op"
        ][0]
        self.assertEqual(node_is_gradient_op, int(op_def.is_gradient_op))
예제 #18
0
    def _common_onnx_node_to_caffe2_op(cls, init_model, pred_model, onnx_node,
                                       opset_version):
        """
        This translator performs the basic translation of ONNX nodes into
        Caffe2 operators.  Besides doing a straightforward marshalling from
        one format to another, it also does these extra things:

          - Renames operators based on '_renamed_operators'
          - Renames attributes based on '_global_renamed_attrs' and
            '_per_op_renamed_attrs'

        If you're writing a custom translator, consider calling this first,
        and then fixing things up further.
        """
        c2_op = caffe2_pb2.OperatorDef()

        c2_op.input.extend(onnx_node.inputs)
        c2_op.output.extend(onnx_node.outputs)
        c2_op.name = onnx_node.name

        onnx_op_type = onnx_node.op_type
        broken_version = cls._broken_operators.get(onnx_op_type, float('Inf'))
        if broken_version <= opset_version:
            raise ValueError(
                "Don't know how to translate op {} in ONNX operator set v{} (I only support prior to v{})"
                .format(onnx_op_type, opset_version, broken_version))
        c2_op.type = cls._renamed_operators.get(onnx_op_type, onnx_op_type)
        if not core.IsOperator(c2_op.type):
            raise ValueError(
                "Don't know how to translate op {}".format(onnx_op_type))

        def kmap(k):
            if (onnx_op_type in cls._per_op_renamed_attrs
                    and k in cls._per_op_renamed_attrs[onnx_op_type]):
                return cls._per_op_renamed_attrs[onnx_op_type][k]
            if k in cls._global_renamed_attrs:
                return cls._global_renamed_attrs[k]
            return k

        c2_op.arg.extend(onnx_node.attrs.caffe2(kmap=kmap))

        return c2_op
예제 #19
0
    def testRoundTrip(self):
        op_def = caffe2_pb2.OperatorDef()
        op_def.type = "Add"
        op_def.input.extend(["input1"])
        op_def.input.extend(["input2"])
        op_def.output.extend(["output1"])
        node = convert.OperatorDefToNodeProto(op_def)
        new_op_def = convert.NodeProtoToOperatorDef(node)

        input1 = np.random.randn(1, 3, 1, 5).astype(np.float32)
        input2 = np.random.randn(2, 1, 4, 1).astype(np.float32)
        ref_output1 = input1 + input2
        workspace.FeedBlob("input1", input1)
        workspace.FeedBlob("input2", input2)
        self.assertEqual(
            workspace.RunOperatorOnce(new_op_def.SerializeToString()), True)

        self.assertEqual(workspace.HasBlob("output1"), True)
        fetched_back = workspace.FetchBlob("output1")
        np.testing.assert_array_equal(fetched_back, ref_output1)
예제 #20
0
def _gen_grad_zero_init_ops(grad_map, grad_output_names):
    grad_zero_init_ops = []
    for grad_output in grad_output_names:
        # get the corresponding output name blob and use it in ConstantFill
        # so that grad_output has the same shape
        output_name = None
        for o, g in grad_map.items():
            if g == grad_output:
                output_name = o
                break
        assert output_name, "Unknown gradient output " + grad_output
        grad_zero_init_op = caffe2_pb2.OperatorDef()
        grad_zero_init_op.type = "ConstantFill"
        grad_zero_init_op.input.extend([output_name])
        grad_zero_init_op.output.extend([grad_output])
        value_arg = caffe2_pb2.Argument()
        value_arg.name = "value"
        value_arg.f = 0.0
        grad_zero_init_op.arg.extend([value_arg])
        grad_zero_init_ops.append(grad_zero_init_op)
    return grad_zero_init_ops
예제 #21
0
파일: core.py 프로젝트: sumodm/caffe2
 def ReallyCreate(inputs,
                  outputs,
                  name='',
                  device_option=None,
                  arg=None,
                  engine=None,
                  **kwargs):
     operator = caffe2_pb2.OperatorDef()
     operator.type = operator_type
     operator.name = name
     if type(inputs) is str or type(inputs) is BlobReference:
         inputs = [inputs]
     elif type(inputs) is unicode:
         inputs = [str(inputs)]
     elif type(inputs) is not list:
         raise ValueError("Unknown input format: %s of type %s." %
                          (str(inputs), type(inputs)))
     if type(outputs) is str or type(outputs) is BlobReference:
         outputs = [outputs]
     elif type(outputs) is not list:
         raise ValueError("Unknown output format: %s of type %s." %
                          (str(outputs), type(outputs)))
     operator.input.extend([str(i) for i in inputs])
     operator.output.extend([str(o) for o in outputs])
     if device_option is not None:
         operator.device_option.CopyFrom(device_option)
     if engine is not None:
         operator.engine = engine
     # random seed is defined in the device option, so we need to do special
     # care.
     if 'random_seed' in kwargs:
         operator.device_option.random_seed = kwargs['random_seed']
         del kwargs['random_seed']
     # Add given arguments that do not need parsing
     if arg is not None:
         operator.arg.extend(arg)
     # Add all other arguments
     for key, value in kwargs.iteritems():
         operator.arg.add().CopyFrom(utils.MakeArgument(key, value))
     return operator
예제 #22
0
 def add_init_params(self, init_net):
     '''
     Adds layer initialization operators to passed net.
     '''
     for param in self.params:
         # TODO(amalevich): Either return back to lambdas, that add
         # all params (looks a bit safer and breaking less
         # abstractions) or extend Net interface to this type of
         # operations better
         # TODO(xlwang) init_net._net.op has type google.protobuf.\
         # internal.containers.RepeatedCompositeFieldContainer, but
         # the version of protobuf in fbcode does not support append
         # so extend is used
         init_op = param.initializer
         current_device_scope = scope.CurrentDeviceScope()
         if init_op:
             if not init_op.HasField('device_option') and\
                     current_device_scope:
                 init_op = caffe2_pb2.OperatorDef()
                 init_op.CopyFrom(param.initializer)
                 init_op.device_option.CopyFrom(current_device_scope)
             init_net._net.op.extend([init_op])
예제 #23
0
    def _create_transpose(cls, node_def, env):
        op_def = caffe2_pb2.OperatorDef()
        op_def.output.extend([env[o] for o in node_def.output])
        op_def.input.extend([env[i] for i in node_def.input])
        op_def.type = 'ConvTranspose'
        op_def.name = node_def.name

        def can_be_singular(values):
            if len(values) == 0:
                return False
            return all(values[0] == v for v in values)

        depluralizer = {
            'kernel_shape': 'kernel',
            'strides': 'stride',
            'pads': 'pad'
        }

        def map_attr(attr):
            if attr.name in depluralizer:
                # TODO: replace this with a version test
                if not can_be_singular(attr.ints):
                    raise "Caffe2 doesn't support plural kernel_shape/strides/pads prior to 6cb4d1ecb0dfb553f797f6a8a61dd6966909cb0b; if you know your Caffe2 is recent enough, comment out this test"
                # In fact, this code is MANDATORY, because prior to
                # https://github.com/caffe2/caffe2/commit/6cb4d1ecb0dfb553f797f6a8a61dd6966909cb0b
                # the pluralized versions were not supported.
                # You'll get an error like
                # "[enforce fail at conv_transpose_unpool_op_base.h:54] kernel_h_ > 0"
                # if your Caffe2 is too old and you actually use the plural
                # version
                singular_attr = AttributeProto()
                singular_attr.name = depluralizer[attr.name]
                singular_attr.i = attr.ints[0]
                return cls._onnx_arg_to_caffe2_arg(op_def.type, singular_attr)
            else:
                return cls._onnx_arg_to_caffe2_arg(op_def.type, attr)

        op_def.arg.extend([map_attr(attr) for attr in node_def.attribute])
        return op_def
예제 #24
0
    def _common_op_tranlator(cls, node_def, env):
        op_def = caffe2_pb2.OperatorDef()
        op_def.input.extend([env[i] for i in node_def.input])

        for output in node_def.output:
            env[output] = output

        # when consumed_inputs exist, we need to
        # rewrite the outputs to re-use these inputs to
        # support Caffe2-style in-place operators.
        for attr in node_def.attribute:
            if attr.name == "consumed_inputs":
                schema = onnx.defs.get_schema(node_def.op_type)
                for i, input in enumerate(node_def.input):
                    if attr.ints[i] != 0:
                        # for each consumed input, the schema for the op
                        # tells us which output (output_idx) that
                        # this consumed input becomes
                        _, output_idx = schema.consumed(i)
                        # consumed outputs are not always present
                        # for instance batch norm in test mode
                        # does not return the consumed inputs
                        if output_idx < len(node_def.output):
                            # rather than use its ONNX name
                            # use the original input name for the blob
                            # that will be consumed
                            env[node_def.output[output_idx]] = env[input]

        op_def.output.extend([env[i] for i in node_def.output])
        op_def.name = node_def.name
        op_def.type = cls._renamed_operators.get(node_def.op_type,
                                                 node_def.op_type)
        op_def.arg.extend(
            cls._onnx_arg_to_caffe2_arg(op_def.type, a)
            for a in node_def.attribute if a.name != "consumed_inputs")
        return op_def
예제 #25
0
    def test_convolution_affch_folding(
            self, stride, pad, kernel, size, input_channels,
            output_channels, batch_size, use_bias, group,
            inplace, gc, dc):
        conv = core.CreateOperator(
            "Conv",
            ["X0", "w0", "b0"] if use_bias else ["X0", "w0"],
            ["X1"],
            stride=stride,
            pad=pad,
            kernel=kernel,
            group=group,
            device_option=dc[1]
        )
        affch = core.CreateOperator(
            "AffineChannel",
            ["X1", "scale", "bias"],
            ["X1" if inplace else "Y"],
            device_option=dc[1]
        )

        X = np.random.rand(
            batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
        w = np.random.rand(
                output_channels * group, input_channels, kernel, kernel) \
            .astype(np.float32) - 0.5
        b = np.random.rand(output_channels * group).astype(np.float32) - 0.5
        scale = np.random.rand(output_channels).astype(np.float32) + 0.5
        bias = np.random.rand(output_channels).astype(np.float32) - 0.5

        old_ws_name = workspace.CurrentWorkspace()
        workspace.SwitchWorkspace("_device_check_", True)
        workspace.FeedBlob('X0', X, dc[1])
        workspace.FeedBlob('w0', w, dc[1])
        workspace.FeedBlob('b0', b, dc[1])
        workspace.FeedBlob('scale', scale, dc[1])
        workspace.FeedBlob('bias', bias, dc[1])
        workspace.RunOperatorOnce(conv)
        workspace.RunOperatorOnce(affch)
        Y = workspace.FetchBlob('X1' if inplace else "Y")

        workspace.ResetWorkspace()
        old_net = caffe2_pb2.NetDef()
        conv_old = caffe2_pb2.OperatorDef()
        conv_old.CopyFrom(conv)
        conv_old.device_option.CopyFrom(dc[1])
        affch_old = caffe2_pb2.OperatorDef()
        affch_old.CopyFrom(affch)
        affch_old.device_option.CopyFrom(dc[1])
        old_net.op.extend([conv_old, affch_old])
        workspace.FeedBlob('X0', X, dc[1])
        workspace.FeedBlob('w0', w, dc[1])
        workspace.FeedBlob('b0', b, dc[1])
        workspace.FeedBlob('scale', scale, dc[1])
        workspace.FeedBlob('bias', bias, dc[1])
        net = core.Net("net")
        net.Proto().CopyFrom(old_net)
        optimizeForMKLDNN(net)
        self.assertTrue(len(net.Proto().op) == 1)
        self.assertTrue(net.Proto().op[0].type == "Conv")
        workspace.RunOperatorOnce(net.Proto().op[0])
        Y1 = workspace.FetchBlob('X1' if inplace else "Y")
        if not np.allclose(Y, Y1, atol=0.01, rtol=0.01):
            print(Y.flatten())
            print(Y1.flatten())
            print(np.max(np.abs(Y - Y1)))
            self.assertTrue(False)

        workspace.SwitchWorkspace(old_ws_name)
예제 #26
0
    def test_convolution_grouped_sum_relu_fusion(self, stride, pad, kernel, size,
                             input_channels, output_channels,
                             batch_size, use_bias, group, gc, dc):
        conv_S0 = core.CreateOperator(
            "Conv",
            ["SX0", "Sw0", "Sb0"] if use_bias else ["SX0", "Sw0"],
            ["S0"],
            stride=stride,
            pad=pad,
            kernel=kernel,
            group=group,
            device_option=dc[0]
        )
        conv = core.CreateOperator(
            "Conv",
            ["X0", "w0", "b0"] if use_bias else ["X0", "w0"],
            ["Y0"],
            stride=stride,
            pad=pad,
            kernel=kernel,
            group=group,
            device_option=dc[0]
        )
        sum = core.CreateOperator(
            "Sum",
            ["S0", "Y0"],
            ["S0"],
            device_option=dc[0]
        )
        relu = core.CreateOperator(
            "Relu",
            ["S0"],
            ["S0"],
            device_option=dc[0]
        )

        SX = np.random.rand(
            batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
        Sw = np.random.rand(
                output_channels * group, input_channels, kernel, kernel) \
            .astype(np.float32) - 0.5
        Sb = np.random.rand(output_channels * group).astype(np.float32) - 0.5
        X = np.random.rand(
            batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
        w = np.random.rand(
                output_channels * group, input_channels, kernel, kernel) \
            .astype(np.float32) - 0.5
        b = np.random.rand(output_channels * group).astype(np.float32) - 0.5

        old_ws_name = workspace.CurrentWorkspace()
        workspace.SwitchWorkspace("_device_check_", True)
        workspace.FeedBlob('SX0', SX, dc[0])
        workspace.FeedBlob('Sw0', Sw, dc[0])
        workspace.FeedBlob('Sb0', Sb, dc[0])
        workspace.FeedBlob('X0', X, dc[0])
        workspace.FeedBlob('w0', w, dc[0])
        workspace.FeedBlob('b0', b, dc[0])
        workspace.RunOperatorOnce(conv_S0)
        workspace.RunOperatorOnce(conv)
        workspace.RunOperatorOnce(sum)
        workspace.RunOperatorOnce(relu)
        S0 = workspace.FetchBlob('S0')

        workspace.ResetWorkspace()
        old_net = caffe2_pb2.NetDef()
        conv_S0_old = caffe2_pb2.OperatorDef()
        conv_S0_old.CopyFrom(conv_S0)
        conv_S0_old.device_option.CopyFrom(dc[1])
        conv_old = caffe2_pb2.OperatorDef()
        conv_old.CopyFrom(conv)
        conv_old.device_option.CopyFrom(dc[1])
        sum_old = caffe2_pb2.OperatorDef()
        sum_old.CopyFrom(sum)
        sum_old.device_option.CopyFrom(dc[1])
        relu_old = caffe2_pb2.OperatorDef()
        relu_old.CopyFrom(relu)
        relu_old.device_option.CopyFrom(dc[1])
        old_net.op.extend([conv_S0_old, conv_old, sum_old, relu_old])
        workspace.FeedBlob('SX0', SX, dc[1])
        workspace.FeedBlob('Sw0', Sw, dc[1])
        workspace.FeedBlob('Sb0', Sb, dc[1])
        workspace.FeedBlob('X0', X, dc[1])
        workspace.FeedBlob('w0', w, dc[1])
        workspace.FeedBlob('b0', b, dc[1])
        net = core.Net("net")
        net.Proto().CopyFrom(old_net)
        optimizeForMKLDNN(net)
        workspace.RunNetOnce(net.Proto())
        # The output tensor name will be changed by optimization
        # sometimes when applying conv sum fusion
        S2 = workspace.FetchBlob(net.Proto().op[-1].output[0])
        if not np.allclose(S0, S2, atol=0.01, rtol=0.01):
            print(S2.flatten())
            print(S0.flatten())
            print(np.max(np.abs(S2 - S0)))
            self.assertTrue(False)

        workspace.SwitchWorkspace(old_ws_name)
예제 #27
0
    def test_convolution_relu_fusion(self, stride, pad, kernel, size,
                             input_channels, output_channels,
                             batch_size, use_bias, group, gc, dc):
        conv = core.CreateOperator(
            "Conv",
            ["X0", "w0", "b0"] if use_bias else ["X0", "w0"],
            ["Y0"],
            stride=stride,
            pad=pad,
            kernel=kernel,
            group=group,
            device_option=dc[0]
        )
        relu = core.CreateOperator(
            "Relu",
            ["Y0"],
            ["Y0"],
            device_option=dc[0]
        )

        # Manual fusion for Conv + ReLU
        conv_fusion = core.CreateOperator(
            "ConvFusion",
            ["X1", "w1", "b1"] if use_bias else ["X1", "w1"],
            ["Y1"],
            stride=stride,
            pad=pad,
            kernel=kernel,
            group=group,
            fusion_type = 1,
            device_option=dc[1]
        )

        X = np.random.rand(
            batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
        w = np.random.rand(
                output_channels * group, input_channels, kernel, kernel) \
            .astype(np.float32) - 0.5
        b = np.random.rand(output_channels * group).astype(np.float32) - 0.5

        old_ws_name = workspace.CurrentWorkspace()
        workspace.SwitchWorkspace("_device_check_", True)
        workspace.FeedBlob('X0', X, dc[0])
        workspace.FeedBlob('w0', w, dc[0])
        workspace.FeedBlob('b0', b, dc[0])
        workspace.RunOperatorOnce(conv)
        workspace.RunOperatorOnce(relu)
        Y0 = workspace.FetchBlob('Y0')

        workspace.ResetWorkspace()
        workspace.FeedBlob('X1', X, dc[1])
        workspace.FeedBlob('w1', w, dc[1])
        workspace.FeedBlob('b1', b, dc[1])
        workspace.RunOperatorOnce(conv_fusion)
        Y1 = workspace.FetchBlob('Y1')
        if not np.allclose(Y0, Y1, atol=0.01, rtol=0.01):
            print(Y1.flatten())
            print(Y0.flatten())
            print(np.max(np.abs(Y1 - Y0)))
            self.assertTrue(False)

        # Auto fusion for Conv + ReLU
        workspace.ResetWorkspace()
        old_net = caffe2_pb2.NetDef()
        conv_old = caffe2_pb2.OperatorDef()
        conv_old.CopyFrom(conv)
        conv_old.device_option.CopyFrom(dc[1])
        relu_old = caffe2_pb2.OperatorDef()
        relu_old.CopyFrom(relu)
        relu_old.device_option.CopyFrom(dc[1])
        old_net.op.extend([conv_old, relu_old])
        workspace.FeedBlob('X0', X, dc[1])
        workspace.FeedBlob('w0', w, dc[1])
        workspace.FeedBlob('b0', b, dc[1])
        net = core.Net("net")
        net.Proto().CopyFrom(old_net)
        optimizeForMKLDNN(net)
        self.assertTrue(len(net.Proto().op) == 1)
        self.assertTrue(net.Proto().op[0].type == "ConvFusion")
        workspace.RunOperatorOnce(net.Proto().op[0])
        Y2 = workspace.FetchBlob('Y0')
        if not np.allclose(Y0, Y2, atol=0.01, rtol=0.01):
            print(Y2.flatten())
            print(Y0.flatten())
            print(np.max(np.abs(Y2 - Y0)))
            self.assertTrue(False)

        workspace.SwitchWorkspace(old_ws_name)
예제 #28
0
    def test_convolution_sum_fusion(self, stride, pad, kernel, size,
                             input_channels, output_channels,
                             batch_size, use_bias, group, sum_add, gc, dc):
        pool_S0 = core.CreateOperator(
            "MaxPool",
            ["SX0"],
            ["S0"],
            stride=2,
            pad=0,
            kernel=2,
            device_option=dc[0]
        )
        conv = core.CreateOperator(
            "Conv",
            ["X0", "w0", "b0"] if use_bias else ["X0", "w0"],
            ["Y0"],
            stride=stride,
            pad=pad,
            kernel=kernel,
            group=group,
            device_option=dc[0]
        )
        sum = core.CreateOperator(
            sum_add,
            ["S0", "Y0"],
            ["S0"],
            device_option=dc[0]
        )

        # Manual fusion for Conv + Sum
        pool_S1 = core.CreateOperator(
            "MaxPool",
            ["SX1"],
            ["S1"],
            stride=2,
            pad=0,
            kernel=2,
            group=group,
            device_option=dc[1]
        )
        conv_fusion = core.CreateOperator(
            "ConvFusion",
            ["X1", "w1", "b1", "S1"] if use_bias else ["X1", "w1", "S1"],
            ["S1"],
            stride=stride,
            pad=pad,
            kernel=kernel,
            group=group,
            fusion_type = 2,
            device_option=dc[1]
        )
        pool_input_size = int(math.ceil(float(size + 2 * pad - kernel + 1) / stride)) * 2;
        SX = np.random.rand(
            batch_size, output_channels * group, pool_input_size, pool_input_size).astype(np.float32) - 0.5
        X = np.random.rand(
            batch_size, input_channels * group, size, size).astype(np.float32) - 0.5
        w = np.random.rand(
                output_channels * group, input_channels, kernel, kernel) \
            .astype(np.float32) - 0.5
        b = np.random.rand(output_channels * group).astype(np.float32) - 0.5

        old_ws_name = workspace.CurrentWorkspace()
        workspace.SwitchWorkspace("_device_check_", True)
        workspace.FeedBlob('SX0', SX, dc[0])
        workspace.FeedBlob('X0', X, dc[0])
        workspace.FeedBlob('w0', w, dc[0])
        workspace.FeedBlob('b0', b, dc[0])
        workspace.RunOperatorOnce(pool_S0)
        workspace.RunOperatorOnce(conv)
        workspace.RunOperatorOnce(sum)
        S0 = workspace.FetchBlob('S0')

        workspace.ResetWorkspace()
        workspace.FeedBlob('SX1', SX, dc[1])
        workspace.FeedBlob('X1', X, dc[1])
        workspace.FeedBlob('w1', w, dc[1])
        workspace.FeedBlob('b1', b, dc[1])
        workspace.RunOperatorOnce(pool_S1)
        workspace.RunOperatorOnce(conv_fusion)
        S1 = workspace.FetchBlob('S1')

        if not np.allclose(S0, S1, atol=0.01, rtol=0.01):
            print(S1.flatten())
            print(S0.flatten())
            print(np.max(np.abs(S1 - S0)))
            self.assertTrue(False)

        # Auto fusion for Conv + Sum
        workspace.ResetWorkspace()
        old_net = caffe2_pb2.NetDef()
        pool_S0_old = caffe2_pb2.OperatorDef()
        pool_S0_old.CopyFrom(pool_S0)
        pool_S0_old.device_option.CopyFrom(dc[1])
        conv_old = caffe2_pb2.OperatorDef()
        conv_old.CopyFrom(conv)
        conv_old.device_option.CopyFrom(dc[1])
        sum_old = caffe2_pb2.OperatorDef()
        sum_old.CopyFrom(sum)
        sum_old.device_option.CopyFrom(dc[1])
        old_net.op.extend([pool_S0_old, conv_old, sum_old])

        # Conv + Sum should be fused case: [PreNode, Conv, Sum]
        workspace.FeedBlob('SX0', SX, dc[1])
        workspace.FeedBlob('X0', X, dc[1])
        workspace.FeedBlob('w0', w, dc[1])
        workspace.FeedBlob('b0', b, dc[1])
        net = core.Net("net")
        net.Proto().CopyFrom(old_net)
        optimizeForMKLDNN(net)
        self.assertTrue(len(net.Proto().op) == 2)
        self.assertTrue(net.Proto().op[1].type == "ConvFusion")
        workspace.RunNetOnce(net.Proto())
        # The output tensor name will be changed by optimization
        # sometimes when applying conv sum fusion
        S2 = workspace.FetchBlob(net.Proto().op[-1].output[0])
        if not np.allclose(S0, S2, atol=0.01, rtol=0.01):
            print(S2.flatten())
            print(S0.flatten())
            print(np.max(np.abs(S2 - S0)))
            self.assertTrue(False)

        # Conv + Sum should be fused case: [Conv, PreNode, Sum]
        workspace.ResetWorkspace()
        old_net = caffe2_pb2.NetDef()
        workspace.FeedBlob('SX0', SX, dc[1])
        workspace.FeedBlob('X0', X, dc[1])
        workspace.FeedBlob('w0', w, dc[1])
        workspace.FeedBlob('b0', b, dc[1])
        old_net.op.extend([conv_old, pool_S0_old, sum_old])
        net = core.Net("net")
        net.Proto().CopyFrom(old_net)
        optimizeForMKLDNN(net)
        self.assertTrue(len(net.Proto().op) == 2)
        self.assertTrue(net.Proto().op[1].type == "ConvFusion")
        workspace.RunNetOnce(net.Proto())
        # The output tensor name will be changed by optimization
        # sometimes when applying conv sum fusion
        S2 = workspace.FetchBlob(net.Proto().op[-1].output[0])
        if not np.allclose(S0, S2, atol=0.01, rtol=0.01):
            print(S2.flatten())
            print(S0.flatten())
            print(np.max(np.abs(S2 - S0)))
            self.assertTrue(False)

        # Conv + Sum should not be fused case: [Conv, midOp, preNode, Sum] Conv output is used by midOp
        dropout = core.CreateOperator(
            "Dropout",
            ["Y0"],
            ["Y_dropout"],
            ratio=0.5,
            is_test=True,
            device_option=dc[1]
        )

        workspace.ResetWorkspace()
        workspace.FeedBlob('SX0', SX, dc[1])
        workspace.FeedBlob('X0', X, dc[1])
        workspace.FeedBlob('w0', w, dc[1])
        workspace.FeedBlob('b0', b, dc[1])
        old_net = caffe2_pb2.NetDef()
        old_net.op.extend([conv_old, dropout, pool_S0_old, sum_old])
        net = core.Net("net")
        net.Proto().CopyFrom(old_net)
        optimizeForMKLDNN(net)
        self.assertTrue(len(net.Proto().op) == 4)
        workspace.RunNetOnce(net.Proto())
        S2 = workspace.FetchBlob(net.Proto().op[-1].output[0])
        if not np.allclose(S0, S2, atol=0.01, rtol=0.01):
            print(S2.flatten())
            print(S0.flatten())
            print(np.max(np.abs(S2 - S0)))
            self.assertTrue(False)

        # Conv + Sum should not be fused case: [Conv, preNode, Sum, midOp] preNode output is used by midOp
        sum1 = core.CreateOperator(
            sum_add,
            ["S0", "Y0"],
            ["S3"],
            device_option=dc[1]
        )
        dropout = core.CreateOperator(
            "Dropout",
            ["S0"],
            ["Y_dropout"],
            ratio=0.5,
            is_test=True,
            device_option=dc[1]
        )

        workspace.ResetWorkspace()
        workspace.FeedBlob('SX0', SX, dc[1])
        workspace.FeedBlob('X0', X, dc[1])
        workspace.FeedBlob('w0', w, dc[1])
        workspace.FeedBlob('b0', b, dc[1])
        old_net = caffe2_pb2.NetDef()
        old_net.op.extend([conv_old, pool_S0_old, sum1, dropout])
        net = core.Net("net")
        net.Proto().CopyFrom(old_net)
        optimizeForMKLDNN(net)
        print("net={}\n".format(net.Proto()))
        self.assertTrue(len(net.Proto().op) == 4)
        workspace.RunNetOnce(net.Proto())
        S2 = workspace.FetchBlob(net.Proto().op[-2].output[0])
        if not np.allclose(S0, S2, atol=0.01, rtol=0.01):
            print(S2.flatten())
            print(S0.flatten())
            print(np.max(np.abs(S2 - S0)))
            self.assertTrue(False)

        # Conv + Sum should not be fused case: [Conv, midOp, preNode, Sum]
        # midOp output has the same name with that of the Conv input
        relu_0 = core.CreateOperator(
            "Relu",
            ["X0"],
            ["X1"],
            device_option=dc[0]
        )
        conv = core.CreateOperator(
            "Conv",
            ["X1", "w0", "b0"] if use_bias else ["X1", "w0"],
            ["Y0"],
            stride=1,
            pad=0,
            kernel=1,
            device_option=dc[0]
        )
        relu_1 = core.CreateOperator(
            "Relu",
            ["X1"],
            ["X1"],
            device_option=dc[0]
        )
        pool = core.CreateOperator(
            "MaxPool",
            ["X1"],
            ["S0"],
            stride=1,
            pad=0,
            kernel=1,
            device_option=dc[0]
        )
        sum = core.CreateOperator(
            "Sum",
            ["S0", "Y0"],
            ["S0"],
            device_option=dc[0]
        )

        X = np.random.rand(
            batch_size, input_channels, size, size).astype(np.float32) - 0.5
        w = np.random.rand(
            input_channels, input_channels, 1, 1).astype(np.float32) - 0.5
        b = np.random.rand(input_channels).astype(np.float32) - 0.5

        workspace.SwitchWorkspace(old_ws_name)
        workspace.ResetWorkspace()
        workspace.FeedBlob('X0', X, dc[0])
        workspace.FeedBlob('w0', w, dc[0])
        workspace.FeedBlob('b0', b, dc[0])
        workspace.RunOperatorOnce(relu_0)
        workspace.RunOperatorOnce(conv)
        workspace.RunOperatorOnce(relu_1)
        workspace.RunOperatorOnce(pool)
        workspace.RunOperatorOnce(sum)
        S0 = workspace.FetchBlob('S0')

        workspace.ResetWorkspace()
        workspace.FeedBlob('X0', X, dc[1])
        workspace.FeedBlob('w0', w, dc[1])
        workspace.FeedBlob('b0', b, dc[1])
        relu_0_old = caffe2_pb2.OperatorDef()
        relu_0_old.CopyFrom(relu_0)
        relu_0_old.device_option.CopyFrom(dc[1])
        conv_old = caffe2_pb2.OperatorDef()
        conv_old.CopyFrom(conv)
        conv_old.device_option.CopyFrom(dc[1])
        relu_1_old = caffe2_pb2.OperatorDef()
        relu_1_old.CopyFrom(relu_1)
        relu_1_old.device_option.CopyFrom(dc[1])
        pool_old = caffe2_pb2.OperatorDef()
        pool_old.CopyFrom(pool)
        pool_old.device_option.CopyFrom(dc[1])
        sum_old = caffe2_pb2.OperatorDef()
        sum_old.CopyFrom(sum)
        sum_old.device_option.CopyFrom(dc[1])

        old_net = caffe2_pb2.NetDef()
        old_net.op.extend([relu_0_old, conv_old, relu_1_old, pool_old, sum_old])
        net = core.Net("net")
        net.Proto().CopyFrom(old_net)
        optimizeForMKLDNN(net)
        self.assertTrue(len(net.Proto().op) == 5)
        workspace.RunNetOnce(net.Proto())
        S2 = workspace.FetchBlob(net.Proto().op[-1].output[0])
        if not np.allclose(S0, S2, atol=0.01, rtol=0.01):
            print(S2.flatten())
            print(S0.flatten())
            print(np.max(np.abs(S2 - S0)))
            self.assertTrue(False)
예제 #29
0
def _prepare_blob_copy_op(from_name, to_name):
    copy_op_def = caffe2_pb2.OperatorDef()
    copy_op_def.type = "Copy"
    copy_op_def.input.extend([from_name])
    copy_op_def.output.extend([to_name])
    return copy_op_def
예제 #30
0
    def test_in_place(self, stride, pad, kernel, size, input_channels,
                      output_channels, batch_size, use_bias, gc, dc):
        # To expose fallback in-place potential issue, the fallback op
        # following ideep op must be run at least two iterations.
        conv = core.CreateOperator("Conv",
                                   ["X", "w", "b"] if use_bias else ["X", "w"],
                                   ["Y"],
                                   stride=stride,
                                   pad=pad,
                                   kernel=kernel,
                                   device_option=dc[0])
        X = np.random.rand(batch_size, input_channels, size, size).astype(
            np.float32) - 0.5
        w = np.random.rand(output_channels, input_channels, kernel, kernel) \
            .astype(np.float32) - 0.5
        b = np.random.rand(output_channels).astype(np.float32) - 0.5

        old_ws_name = workspace.CurrentWorkspace()
        workspace.SwitchWorkspace("_device_check_", True)
        workspace.FeedBlob('X', X, dc[0])
        workspace.FeedBlob('w', w, dc[0])
        workspace.FeedBlob('b', b, dc[0])
        workspace.RunOperatorOnce(conv)
        Y = workspace.FetchBlob('Y')

        scale = np.random.randn(Y.shape[1]).astype(np.float32)
        bias = np.random.randn(Y.shape[1]).astype(np.float32)
        ac = core.CreateOperator("AffineChannel", ["Y", "scale", "bias"],
                                 ["Y"],
                                 is_learnable=False,
                                 device_option=dc[0])
        workspace.FeedBlob('scale', scale, dc[0])
        workspace.FeedBlob('bias', bias, dc[0])
        workspace.RunOperatorOnce(ac)
        workspace.RunOperatorOnce(conv)
        workspace.RunOperatorOnce(ac)
        Y0 = workspace.FetchBlob('Y')

        workspace.ResetWorkspace()
        dev_net = caffe2_pb2.NetDef()
        conv_dev = caffe2_pb2.OperatorDef()
        conv_dev.CopyFrom(conv)
        conv_dev.device_option.CopyFrom(dc[1])
        ac_dev = caffe2_pb2.OperatorDef()
        ac_dev.CopyFrom(ac)
        ac_dev.device_option.CopyFrom(dc[1])
        dev_net.op.extend([conv_dev, ac_dev])
        workspace.FeedBlob('X', X, dc[1])
        workspace.FeedBlob('w', w, dc[1])
        workspace.FeedBlob('b', b, dc[1])
        workspace.FeedBlob('scale', scale, dc[1])
        workspace.FeedBlob('bias', bias, dc[1])
        workspace.RunNetOnce(dev_net)
        workspace.RunNetOnce(dev_net)
        Y1 = workspace.FetchBlob('Y')

        if not np.allclose(Y0, Y1, atol=0.01, rtol=0.01):
            print(Y1.flatten())
            print(Y0.flatten())
            print(np.max(np.abs(Y1 - Y0)))
            self.assertTrue(False)

        workspace.SwitchWorkspace(old_ws_name)