Beispiel #1
0
 def test_lengths_ops(self):
     LengthsTester().test(
         'Lengths',
         hu.lengths_tensor(dtype=np.float32,
                           min_value=1,
                           max_value=10,
                           allow_empty=True), REFERENCES_ALL)(self)
Beispiel #2
0
 def test_lengths_ops(self):
     LengthsTester()._test(
         'Lengths',
         hu.lengths_tensor(dtype=np.float32,
                           min_value=1,
                           max_value=5,
                           allow_empty=True),
         REFERENCES_ALL + REFERENCES_LENGTHS_ONLY,
     )(self)
Beispiel #3
0
 def test_lengths_ops(self):
     LengthsTester()._test(
         'Lengths',
         hu.lengths_tensor(
             dtype=np.float32,
             min_value=1,
             max_value=5,
             allow_empty=True
         ),
         REFERENCES_ALL + REFERENCES_LENGTHS_ONLY,
     )(self)
Beispiel #4
0
 def test_lengths_ops(self):
     LengthsTester().test(
         'Lengths',
         hu.lengths_tensor(
             dtype=np.float32,
             min_value=1,
             max_value=10,
             allow_empty=True
         ),
         REFERENCES_ALL
     )(self)
Beispiel #5
0
class TestLengthsPadOp(serial.SerializedTestCase):

    @serial.given(
        inputs=hu.lengths_tensor(
            dtype=np.float32,
            min_value=1,
            max_value=5,
            allow_empty=True,
        ),
        delta_length=st.integers(0, 10),
        padding_value=st.floats(-10.0, 10.0),
        **hu.gcs
    )
    def test_lengths_pad(self, inputs, delta_length, padding_value, gc, dc):
        data, lengths = inputs
        max_length = np.max(lengths) if len(lengths) > 0 else 0
        target_length = max(max_length + delta_length, 1)

        def lengths_pad_op(data, lengths):
            N = len(lengths)
            output = np.ndarray(
                shape=(target_length * N, ) + data.shape[1:], dtype=np.float32)
            output.fill(padding_value)
            ptr1, ptr2 = 0, 0
            for i in range(N):
                output[ptr1:ptr1 + lengths[i]] = data[ptr2:ptr2 + lengths[i]]
                ptr1 += target_length
                ptr2 += lengths[i]

            return [output]

        op = core.CreateOperator(
            "LengthsPad",
            ["data", "lengths"],
            ["data_padded"],
            target_length=target_length,
            padding_value=padding_value,
        )

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[data, lengths],
            reference=lengths_pad_op,
        )
class TestUtilityOps(serial.SerializedTestCase):

    @given(X=hu.tensor(), args=st.booleans(), **hu.gcs)
    @settings(deadline=10000)
    def test_slice(self, X, args, gc, dc):
        X = X.astype(dtype=np.float32)
        dim = random.randint(0, X.ndim - 1)
        slice_start = random.randint(0, X.shape[dim] - 1)
        slice_end = random.randint(slice_start, X.shape[dim] - 1)
        starts = np.array([0] * X.ndim).astype(np.int32)
        ends = np.array([-1] * X.ndim).astype(np.int32)
        starts[dim] = slice_start
        ends[dim] = slice_end

        if args:
            op = core.CreateOperator(
                "Slice", ["X"], ["Y"], starts=starts, ends=ends, device_option=gc
            )

            def slice_ref(X):
                slc = [slice(None)] * X.ndim
                slc[dim] = slice(slice_start, slice_end)
                return [X[slc]]
            inputs = [X]
        else:
            op = core.CreateOperator(
                "Slice", ["X", "starts", "ends"], ["Y"], device_option=gc
            )

            def slice_ref(x, starts, ends):
                slc = [slice(None)] * x.ndim
                slc[dim] = slice(slice_start, slice_end)
                return [x[slc]]
            inputs = [X, starts, ends]

        self.assertReferenceChecks(gc, op, inputs, slice_ref)
        self.assertDeviceChecks(dc, op, inputs, [0])
        self.assertGradientChecks(
            device_option=gc,
            op=op,
            inputs=inputs,
            outputs_to_check=0,
            outputs_with_grads=[0],
        )

    @given(ndims=st.integers(min_value=1, max_value=10), **hu.gcs)
    @settings(deadline=10000)
    def test_resize_like(self, ndims, gc, dc):
        X = np.zeros((ndims * 2, ))
        Y = np.zeros((ndims, 2))

        op = core.CreateOperator(
            "ResizeLike", ["X", "Y"], ["Z"],
        )

        def resize_like(X, Y):
            return [X.reshape(Y.shape)]

        self.assertDeviceChecks(dc, op, [X, Y], [0])
        self.assertReferenceChecks(gc, op, [X, Y], resize_like, ensure_outputs_are_inferred=True)

    @given(dtype=st.sampled_from([np.float32, np.int32]),
           ndims=st.integers(min_value=1, max_value=5),
           seed=st.integers(min_value=0, max_value=65536),
           null_axes=st.booleans(),
           engine=st.sampled_from(['CUDNN', None]),
           **hu.gcs)
    @settings(deadline=10000)
    def test_transpose(self, dtype, ndims, seed, null_axes, engine, gc, dc):
        if (gc.device_type == caffe2_pb2.CUDA and engine == "CUDNN"):
            # cudnn 5.1 does not support int.
            assume(workspace.GetCuDNNVersion() >= 6000 or dtype != np.int32)

        dims = (np.random.rand(ndims) * 16 + 1).astype(np.int32)
        X = (np.random.rand(*dims) * 16).astype(dtype)

        if null_axes:
            axes = None
            op = core.CreateOperator(
                "Transpose",
                ["input"], ["output"],
                engine=engine)
        else:
            np.random.seed(int(seed))
            axes = [int(v) for v in list(np.random.permutation(X.ndim))]
            op = core.CreateOperator(
                "Transpose",
                ["input"], ["output"],
                axes=axes,
                engine=engine)

        def transpose_ref(x, axes):
            return (np.transpose(x, axes),)

        self.assertReferenceChecks(gc, op, [X, axes],
                                   transpose_ref)

    @given(m=st.integers(5, 10), n=st.integers(5, 10),
           o=st.integers(5, 10), nans=st.booleans(), **hu.gcs)
    @settings(deadline=10000)
    def test_nan_check(self, m, n, o, nans, gc, dc):
        other = np.array([1, 2, 3]).astype(np.float32)
        X = np.random.rand(m, n, o).astype(np.float32)
        if nans:
            x_nan = np.random.randint(0, m)
            y_nan = np.random.randint(0, n)
            z_nan = np.random.randint(0, o)
            X[x_nan, y_nan, z_nan] = float('NaN')

        # print('nans: {}'.format(nans))
        # print(X)

        def nan_reference(X, Y):
            if not np.isnan(X).any():
                return [X]
            else:
                return [np.array([])]

        op = core.CreateOperator(
            "NanCheck",
            ["X", "other"],
            ["Y"]
        )

        try:
            self.assertReferenceChecks(
                device_option=gc,
                op=op,
                inputs=[X, other],
                reference=nan_reference,
            )
            if nans:
                self.assertTrue(False, "Did not fail when presented with NaN!")
        except RuntimeError:
            self.assertTrue(nans, "No NaNs but failed")

        try:
            self.assertGradientChecks(
                device_option=gc,
                op=op,
                inputs=[X],
                outputs_to_check=0,
                outputs_with_grads=[0],
            )
            if nans:
                self.assertTrue(False, "Did not fail when gradient had NaN!")
        except RuntimeError:
            pass

    @serial.given(n=st.integers(4, 5), m=st.integers(6, 7),
           d=st.integers(2, 3), **hu.gcs)
    def test_elementwise_max(self, n, m, d, gc, dc):
        X = np.random.rand(n, m, d).astype(np.float32)
        Y = np.random.rand(n, m, d).astype(np.float32)
        Z = np.random.rand(n, m, d).astype(np.float32)
        inputs = [X, Y, Z]

        def max_op(X, Y, Z):
            return [np.maximum(np.maximum(X, Y), Z)]

        op = core.CreateOperator(
            "Max",
            ["X", "Y", "Z"],
            ["mx"]
        )

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=inputs,
            reference=max_op,
        )
        self.assertDeviceChecks(dc, op, inputs, [0])

    @given(n=st.integers(4, 5), m=st.integers(6, 7),
           d=st.integers(2, 3), **hu.gcs)
    @settings(deadline=10000)
    def test_elementwise_max_grad(self, n, m, d, gc, dc):
        go = np.random.rand(n, m, d).astype(np.float32)
        X = np.random.rand(n, m, d).astype(np.float32)
        Y = np.random.rand(n, m, d).astype(np.float32)
        Z = np.random.rand(n, m, d).astype(np.float32)
        mx = np.maximum(np.maximum(X, Y), Z)
        inputs = [mx, go, X, Y, Z]

        def max_grad_op(mx, go, X, Y, Z):
            def mx_grad(a):
                return go * (mx == a)

            return [mx_grad(a) for a in [X, Y, Z]]

        op = core.CreateOperator(
            "MaxGradient",
            ["mx", "go", "X", "Y", "Z"],
            ["gX", "gY", "gZ"]
        )

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=inputs,
            reference=max_grad_op,
        )
        self.assertDeviceChecks(dc, op, inputs, [0, 1, 2])

    @serial.given(n=st.integers(4, 5), m=st.integers(6, 7),
           d=st.integers(2, 3), **hu.gcs)
    def test_elementwise_min(self, n, m, d, gc, dc):
        X = np.random.rand(n, m, d).astype(np.float32)
        Y = np.random.rand(n, m, d).astype(np.float32)
        Z = np.random.rand(n, m, d).astype(np.float32)
        inputs = [X, Y, Z]

        def min_op(X, Y, Z):
            return [np.minimum(np.minimum(X, Y), Z)]

        op = core.CreateOperator(
            "Min",
            ["X", "Y", "Z"],
            ["mx"]
        )

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=inputs,
            reference=min_op,
        )
        self.assertDeviceChecks(dc, op, inputs, [0])

    @given(n=st.integers(4, 5), m=st.integers(6, 7),
           d=st.integers(2, 3), **hu.gcs)
    @settings(deadline=10000)
    def test_elementwise_min_grad(self, n, m, d, gc, dc):
        go = np.random.rand(n, m, d).astype(np.float32)
        X = np.random.rand(n, m, d).astype(np.float32)
        Y = np.random.rand(n, m, d).astype(np.float32)
        Z = np.random.rand(n, m, d).astype(np.float32)
        mx = np.minimum(np.minimum(X, Y), Z)
        inputs = [mx, go, X, Y, Z]

        def min_grad_op(mx, go, X, Y, Z):
            def mx_grad(a):
                return go * (mx == a)

            return [mx_grad(a) for a in [X, Y, Z]]

        op = core.CreateOperator(
            "MinGradient",
            ["mx", "go", "X", "Y", "Z"],
            ["gX", "gY", "gZ"]
        )

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=inputs,
            reference=min_grad_op,
        )
        self.assertDeviceChecks(dc, op, inputs, [0, 1, 2])

    @given(
        n=st.integers(1, 8), m=st.integers(1, 10), d=st.integers(1, 4),
        in_place=st.booleans(), engine=st.sampled_from(["", "CUDNN"]),
        seed=st.integers(min_value=0, max_value=65535),
        dtype=st.sampled_from([np.int32, np.int64, np.float32]),
        **hu.gcs)
    @settings(deadline=10000)
    def test_sum(
            self, n, m, d, in_place, engine, seed, dtype, gc, dc):
        input_names = []
        input_vars = []
        np.random.seed(seed)
        for i in range(m):
            X_name = 'X' + str(i)
            input_names.extend([X_name])
            var = np.random.rand(n, d).astype(dtype)
            vars()[X_name] = var
            input_vars.append(var)

        def sum_op_ref(*args):
            res = np.zeros((n, d))
            for i in range(m):
                res = res + args[i]
            return (res, )

        op = core.CreateOperator(
            "Sum",
            input_names,
            [input_names[0]] if in_place else ['Y'],
            engine=engine,
        )

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=input_vars,
            reference=sum_op_ref,
        )
        self.assertDeviceChecks(dc, op, input_vars, [0])

    @given(
        inputs=hu.lengths_tensor().flatmap(
            lambda pair: st.tuples(
                st.just(pair[0]),
                st.just(pair[1]),
                hu.dims(max_value=len(pair[1])),
            )
        ).flatmap(
            lambda tup: st.tuples(
                st.just(tup[0]),
                st.just(tup[1]),
                hu.arrays(
                    tup[2], dtype=np.int32,
                    elements=st.integers(
                        min_value=0, max_value=len(tup[1]) - 1)),
            )
        ),
        **hu.gcs_cpu_only)
    @settings(deadline=1000)
    def test_lengths_gather(self, inputs, gc, dc):
        items = inputs[0]
        lengths = inputs[1]
        indices = inputs[2]

        def lengths_gather_op(items, lengths, indices):
            ends = np.cumsum(lengths)
            return [np.concatenate(
                list(items[ends[i] - lengths[i]:ends[i]] for i in indices))]

        op = core.CreateOperator(
            "LengthsGather",
            ["items", "lengths", "indices"],
            ["output"]
        )

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[items, lengths, indices],
            reference=lengths_gather_op,
        )

    @given(
        inputs=hu.lengths_tensor(),
        **hu.gcs_cpu_only)
    @settings(deadline=1000)
    def test_lengths_to_ranges(self, inputs, gc, dc):
        _, lengths = inputs

        def lengths_to_ranges_op(lengths):
            return [
                [[x, y] for x, y in zip(np.cumsum(np.append([0], lengths)),
                                        lengths)]
            ]

        op = core.CreateOperator(
            "LengthsToRanges",
            ["lengths"],
            ["output"]
        )

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[lengths],
            reference=lengths_to_ranges_op,
        )

        # Test shape inference logic
        net = core.Net("test_shape_inference")

        workspace.FeedBlob("lengths", lengths)
        output = net.LengthsToRanges(
            ["lengths"],
            ["output"]
        )
        (shapes, types) = workspace.InferShapesAndTypes([net])
        workspace.RunNetOnce(net)
        self.assertEqual(shapes[output], list(workspace.blobs[output].shape))
        self.assertEqual(shapes[output], list(lengths.shape) + [2])
        self.assertEqual(types[output], core.DataType.INT32)

    @given(**hu.gcs)
    @settings(deadline=None, max_examples=50)
    def test_size_op(self, gc, dc):
        X = np.array([[1, 2], [3, 4]]).astype(np.float32)

        def size_op(tensor):
            return [np.prod(tensor.shape)]

        op = core.CreateOperator(
            "Size",
            ["X"],
            ["output"]
        )

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[X],
            reference=size_op,
        )

    def test_alias_op(self):
        """ Don't use hypothesis because there are only 2 cases to check"""
        for size in [0, 5]:
            X = np.arange(size).astype(np.float32)
            workspace.FeedBlob('X', X)

            op = core.CreateOperator(
                "Alias",
                ["X"],
                ["Y"]
            )
            workspace.RunOperatorOnce(op)
            Y = workspace.FetchBlob('Y')
            np.testing.assert_array_equal(X, Y)

    @given(**hu.gcs)
    @settings(deadline=10000)
    def test_range(self, gc, dc):
        names = [
            ('stop_',),
            ('start_', 'stop_'),
            ('start_', 'stop_', 'step_'),
        ]
        # Most random values aren't great here, so use a fixed set instead of
        # hypothesis.
        for inputs in (
            (10,),
            (np.float32(10.0),),
            (0,),
            (0, 0),
            (10., 5.0, -1.),
            (2, 10000),
            (2, 10000, 20000),
            (2, 10000, -1),
        ):
            inputs = [np.array(v) for v in inputs]
            op = core.CreateOperator(
                "Range",
                names[len(inputs) - 1],
                ["Y"]
            )

            self.assertReferenceChecks(
                device_option=gc,
                op=op,
                inputs=inputs,
                reference=lambda *x: [np.arange(*x)],
            )
            self.assertDeviceChecks(dc, op, inputs, [0])

        inputs = (np.array(0), np.array(10), np.array(0))
        op = core.CreateOperator(
            "Range",
            names[len(inputs) - 1],
            ["Y"]
        )
        with six.assertRaisesRegex(self, RuntimeError, 'Step size cannot be 0'):
            self.assertReferenceChecks(
                device_option=gc,
                op=op,
                inputs=inputs,
                reference=lambda *x: [np.arange(*x)],
            )
class TorchIntegration(hu.HypothesisTestCase):
    @given(roi_counts=st.lists(st.integers(0, 5), min_size=1, max_size=10),
           num_classes=st.integers(1, 10),
           rotated=st.booleans(),
           angle_bound_on=st.booleans(),
           clip_angle_thresh=st.sampled_from([-1.0, 1.0]),
           **hu.gcs_cpu_only)
    def test_bbox_transform(
        self,
        roi_counts,
        num_classes,
        rotated,
        angle_bound_on,
        clip_angle_thresh,
        gc,
        dc,
    ):
        """
        Test with rois for multiple images in a batch
        """
        rois, deltas, im_info = create_bbox_transform_inputs(
            roi_counts, num_classes, rotated)

        def bbox_transform_ref():
            ref_op = core.CreateOperator(
                "BBoxTransform",
                ["rois", "deltas", "im_info"],
                ["box_out"],
                apply_scale=False,
                rotated=rotated,
                angle_bound_on=angle_bound_on,
                clip_angle_thresh=clip_angle_thresh,
            )
            workspace.FeedBlob("rois", rois)
            workspace.FeedBlob("deltas", deltas)
            workspace.FeedBlob("im_info", im_info)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("box_out")

        box_out = torch.tensor(bbox_transform_ref())
        a, b = torch.ops._caffe2.BBoxTransform(
            torch.tensor(rois),
            torch.tensor(deltas),
            torch.tensor(im_info),
            [1.0, 1.0, 1.0, 1.0],
            False,
            rotated,
            angle_bound_on,
            -90,
            90,
            clip_angle_thresh,
            legacy_plus_one=True,
        )

        torch.testing.assert_allclose(box_out, a)

    @given(roi_counts=st.lists(st.integers(0, 5), min_size=1, max_size=10),
           num_classes=st.integers(1, 10),
           rotated=st.booleans(),
           angle_bound_on=st.booleans(),
           clip_angle_thresh=st.sampled_from([-1.0, 1.0]),
           **hu.gcs_cpu_only)
    def test_box_with_nms_limits(
        self,
        roi_counts,
        num_classes,
        rotated,
        angle_bound_on,
        clip_angle_thresh,
        gc,
        dc,
    ):
        rotated = False  # FIXME remove this after rotation is supported
        rois, deltas, im_info = create_bbox_transform_inputs(
            roi_counts, num_classes, rotated)
        pred_bbox, batch_splits = [
            t.detach().numpy() for t in torch.ops._caffe2.BBoxTransform(
                torch.tensor(rois),
                torch.tensor(deltas),
                torch.tensor(im_info),
                [1.0, 1.0, 1.0, 1.0],
                False,
                rotated,
                angle_bound_on,
                -90,
                90,
                clip_angle_thresh,
                legacy_plus_one=True,
            )
        ]
        class_prob = np.random.randn(sum(roi_counts),
                                     num_classes).astype(np.float32)
        score_thresh = 0.5
        nms_thresh = 0.5
        topk_per_image = sum(roi_counts) / 2

        def box_with_nms_limit_ref():
            input_blobs = ["class_prob", "pred_bbox", "batch_splits"]
            output_blobs = [
                "score_nms",
                "bbox_nms",
                "class_nms",
                "batch_splits_nms",
                "keeps_nms",
                "keeps_size_nms",
            ]
            ref_op = core.CreateOperator(
                "BoxWithNMSLimit",
                input_blobs,
                output_blobs,
                score_thresh=float(score_thresh),
                nms=float(nms_thresh),
                detections_per_im=int(topk_per_image),
                soft_nms_enabled=False,
                soft_nms_method="linear",
                soft_nms_sigma=0.5,
                soft_nms_min_score_thres=0.001,
                rotated=rotated,
            )
            workspace.FeedBlob("class_prob", class_prob)
            workspace.FeedBlob("pred_bbox", pred_bbox)
            workspace.FeedBlob("batch_splits", batch_splits)
            workspace.RunOperatorOnce(ref_op)
            return (workspace.FetchBlob(b) for b in output_blobs)

        output_refs = box_with_nms_limit_ref()
        outputs = torch.ops._caffe2.BoxWithNMSLimit(
            torch.tensor(class_prob),
            torch.tensor(pred_bbox),
            torch.tensor(batch_splits),
            score_thresh=float(score_thresh),
            nms=float(nms_thresh),
            detections_per_im=int(topk_per_image),
            soft_nms_enabled=False,
            soft_nms_method="linear",
            soft_nms_sigma=0.5,
            soft_nms_min_score_thres=0.001,
            rotated=rotated,
            cls_agnostic_bbox_reg=False,
            input_boxes_include_bg_cls=True,
            output_classes_include_bg_cls=True,
            legacy_plus_one=True,
        )

        for o, o_ref in zip(outputs, output_refs):
            torch.testing.assert_allclose(o, o_ref)

    @given(
        A=st.integers(min_value=4, max_value=4),
        H=st.integers(min_value=10, max_value=10),
        W=st.integers(min_value=8, max_value=8),
        img_count=st.integers(min_value=3, max_value=3),
    )
    def test_generate_proposals(self, A, H, W, img_count):
        scores = np.ones((img_count, A, H, W)).astype(np.float32)
        bbox_deltas = (np.linspace(0, 10,
                                   num=img_count * 4 * A * H * W).reshape(
                                       (img_count, 4 * A, H,
                                        W)).astype(np.float32))
        im_info = np.ones((img_count, 3)).astype(np.float32) / 10
        anchors = np.ones((A, 4)).astype(np.float32)

        def generate_proposals_ref():
            ref_op = core.CreateOperator(
                "GenerateProposals",
                ["scores", "bbox_deltas", "im_info", "anchors"],
                ["rois", "rois_probs"],
                spatial_scale=2.0,
            )
            workspace.FeedBlob("scores", scores)
            workspace.FeedBlob("bbox_deltas", bbox_deltas)
            workspace.FeedBlob("im_info", im_info)
            workspace.FeedBlob("anchors", anchors)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("rois"), workspace.FetchBlob(
                "rois_probs")

        rois, rois_probs = generate_proposals_ref()
        rois = torch.tensor(rois)
        rois_probs = torch.tensor(rois_probs)
        a, b = torch.ops._caffe2.GenerateProposals(
            torch.tensor(scores),
            torch.tensor(bbox_deltas),
            torch.tensor(im_info),
            torch.tensor(anchors),
            2.0,
            6000,
            300,
            0.7,
            16,
            True,
            -90,
            90,
            1.0,
            legacy_plus_one=True,
        )
        torch.testing.assert_allclose(rois, a)
        torch.testing.assert_allclose(rois_probs, b)

    @given(
        bsz=st.integers(1, 5),
        seq_lens=st.integers(1, 6),
        emb_lens=st.integers(5, 10),
        hidden_size=st.integers(3, 7),
        num_layers=st.integers(1, 4),
        has_biases=st.booleans(),
        is_bidirectional=st.booleans(),
        batch_first=st.booleans(),
    )
    def test_inference_lstm(
        self,
        bsz,
        seq_lens,
        emb_lens,
        hidden_size,
        num_layers,
        has_biases,
        is_bidirectional,
        batch_first,
    ):
        num_directions = 2 if is_bidirectional else 1
        hx = np.zeros((num_layers * num_directions, bsz, hidden_size),
                      dtype=np.float32)

        if batch_first:
            inputs = np.random.randn(bsz, seq_lens,
                                     emb_lens).astype(np.float32)
        else:
            inputs = np.random.randn(seq_lens, bsz,
                                     emb_lens).astype(np.float32)

        torch_lstm = torch.nn.LSTM(
            emb_lens,
            hidden_size,
            batch_first=batch_first,
            bidirectional=is_bidirectional,
            bias=has_biases,
            num_layers=num_layers,
        )

        def inference_lstm_ref():
            input_names = ["inputs", "hidden_0", "hidden_1"]
            workspace.FeedBlob("inputs", inputs)
            workspace.FeedBlob("hidden_0", hx)
            workspace.FeedBlob("hidden_1", hx)
            for i, param in enumerate(torch_lstm._flat_weights):
                input_names.append("param_{}".format(i))
                workspace.FeedBlob("param_{}".format(i),
                                   param.detach().numpy())

            ref_op = core.CreateOperator(
                "InferenceLSTM",
                input_names,
                ["output", "hidden", "cell"],
                num_layers=num_layers,
                has_biases=has_biases,
                batch_first=batch_first,
                bidirectional=is_bidirectional,
            )
            workspace.RunOperatorOnce(ref_op)
            return (workspace.FetchBlob("output"),
                    workspace.FetchBlob("hidden"), workspace.FetchBlob("cell"))

        output, hidden, cell = inference_lstm_ref()
        output = torch.tensor(output)
        hidden = torch.tensor(hidden)
        cell = torch.tensor(cell)
        lstm_in = [
            torch.from_numpy(inputs),
            torch.from_numpy(hx),
            torch.from_numpy(hx),
        ] + [param.detach() for param in torch_lstm._flat_weights]

        a, b, c = torch.ops._caffe2.InferenceLSTM(lstm_in, num_layers,
                                                  has_biases, batch_first,
                                                  is_bidirectional)
        torch.testing.assert_allclose(output, a)
        torch.testing.assert_allclose(hidden, b)
        torch.testing.assert_allclose(cell, c)

    # Test case is using workspace.has_cuda_support and not workspace.has_gpu_support
    # to exclude it from HIP because tensor interop doesn't work for HIP tensors yet
    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    @given(
        A=st.integers(min_value=4, max_value=4),
        H=st.integers(min_value=10, max_value=10),
        W=st.integers(min_value=8, max_value=8),
        img_count=st.integers(min_value=3, max_value=3),
    )
    def test_generate_proposals_cuda(self, A, H, W, img_count):
        scores = np.ones((img_count, A, H, W)).astype(np.float32)
        bbox_deltas = (np.linspace(0, 10,
                                   num=img_count * 4 * A * H * W).reshape(
                                       (img_count, 4 * A, H,
                                        W)).astype(np.float32))
        im_info = np.ones((img_count, 3)).astype(np.float32) / 10
        anchors = np.ones((A, 4)).astype(np.float32)

        def generate_proposals_ref():
            ref_op = core.CreateOperator(
                "GenerateProposals",
                ["scores", "bbox_deltas", "im_info", "anchors"],
                ["rois", "rois_probs"],
                spatial_scale=2.0,
            )
            workspace.FeedBlob("scores", scores)
            workspace.FeedBlob("bbox_deltas", bbox_deltas)
            workspace.FeedBlob("im_info", im_info)
            workspace.FeedBlob("anchors", anchors)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("rois"), workspace.FetchBlob(
                "rois_probs")

        rois, rois_probs = generate_proposals_ref()
        rois = torch.tensor(rois)
        rois_probs = torch.tensor(rois_probs)
        a, b = torch.ops._caffe2.GenerateProposals(
            torch.tensor(scores).cuda(),
            torch.tensor(bbox_deltas).cuda(),
            torch.tensor(im_info).cuda(),
            torch.tensor(anchors).cuda(),
            2.0,
            6000,
            300,
            0.7,
            16,
            True,
            -90,
            90,
            1.0,
            legacy_plus_one=True,
        )
        torch.testing.assert_allclose(rois, a.cpu())
        torch.testing.assert_allclose(rois_probs, b.cpu())

    @given(
        N=st.integers(min_value=1, max_value=2),
        C=st.integers(min_value=4, max_value=4),
        H=st.integers(min_value=10, max_value=10),
        W=st.integers(min_value=8, max_value=8),
    )
    def _test_roi_align(self, N, C, H, W, device):
        def rand_roi():
            return np.array([
                float(int(N * np.random.rand())),
                0.5 * np.random.rand() * W,
                0.5 * np.random.rand() * H,
                (0.5 + 0.5 * np.random.rand()) * W,
                (0.5 + 0.5 * np.random.rand()) * H,
            ]).astype(np.float32)

        feature = np.random.randn(N, C, H, W).astype(np.float32)
        rois = np.array([rand_roi() for _ in range(10)])

        def roi_align_ref(_feature, _rois):
            ref_op = core.CreateOperator(
                "RoIAlign",
                ["feature", "rois"],
                ["roi_feature"],
                spatial_scale=1.0,
                pooled_h=3,
                pooled_w=3,
                sampling_ratio=0,
            )
            workspace.FeedBlob("feature", _feature)
            workspace.FeedBlob("rois", _rois)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("roi_feature")

        roi_feature_ref = roi_align_ref(feature, rois)
        roi_feature = torch.ops._caffe2.RoIAlign(
            torch.Tensor(feature).to(device),
            torch.Tensor(rois).to(device),
            order="NCHW",
            spatial_scale=1.0,
            pooled_h=3,
            pooled_w=3,
            sampling_ratio=0,
            aligned=False,
        )
        torch.testing.assert_allclose(roi_feature_ref, roi_feature.cpu())

    def test_roi_align_cpu(self):
        self._test_roi_align(device="cpu")

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_roi_align_cuda(self):
        self._test_roi_align(device="cuda")

    @given(roi_counts=st.lists(st.integers(0, 5), min_size=1, max_size=10))
    def test_collect_and_distribute_fpn_rpn_proposals_op(self, roi_counts):
        batch_size = len(roi_counts)
        im_dims = np.random.randint(100, 600, batch_size)
        rpn_rois_and_scores = []
        for i in range(5):
            rpn_rois_and_scores.append(
                torch.Tensor(generate_rois(roi_counts, im_dims)))
        for i in range(5):
            rpn_rois_and_scores.append(torch.rand(sum(roi_counts)))

        rois = torch.ops._caffe2.CollectRpnProposals(
            rpn_rois_and_scores,
            rpn_max_level=6,
            rpn_min_level=2,
            rpn_post_nms_topN=sum(roi_counts),
        )
        fpn_outputs = torch.ops._caffe2.DistributeFpnProposals(
            rois,
            roi_canonical_scale=224,
            roi_canonical_level=4,
            roi_max_level=5,
            roi_min_level=2,
            legacy_plus_one=True,
        )

        all_outputs = torch.ops._caffe2.CollectAndDistributeFpnRpnProposals(
            rpn_rois_and_scores,
            roi_canonical_scale=224,
            roi_canonical_level=4,
            roi_max_level=5,
            roi_min_level=2,
            rpn_max_level=6,
            rpn_min_level=2,
            rpn_post_nms_topN=sum(roi_counts),
            legacy_plus_one=True,
        )

        rois_fpn_list = fpn_outputs[:-1]
        rois_idx_restore_int32 = fpn_outputs[-1]

        # [rois] + fpn_outputs should be equal to all_outputs
        torch.testing.assert_allclose(rois, all_outputs[0])
        for x, y in zip(fpn_outputs, all_outputs[1:]):
            torch.testing.assert_allclose(x, y)

    @given(X=hu.tensor(), fast_gelu=st.booleans())
    def _test_gelu_op(self, X, fast_gelu, device):
        def _gelu_ref(_X):
            return (_X * norm.cdf(_X).astype(np.float32), )

        expected_output, = _gelu_ref(X)
        actual_output = torch.ops._caffe2.Gelu(torch.tensor(X), fast_gelu)

        rtol = 1e-3 if fast_gelu else 1e-4
        atol = 1e-5
        torch.testing.assert_allclose(expected_output,
                                      actual_output.cpu(),
                                      rtol=rtol,
                                      atol=atol)

    def test_gelu_op(self):
        self._test_gelu_op(device="cpu")

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_gelu_op_cuda(self):
        self._test_gelu_op(device="cuda")

    @given(inputs=hu.lengths_tensor(
        dtype=np.float32,
        min_value=1,
        max_value=5,
        allow_empty=True,
    ))
    def _test_lengths_op(self, inputs, ref_op_name, torch_op, device):
        data, lengths = inputs

        def _lengths_ref(X, Y):
            ref_op = core.CreateOperator(ref_op_name, ["X", "Y"], "out")
            workspace.FeedBlob("X", X)
            workspace.FeedBlob("Y", Y)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("out")

        expected_output = _lengths_ref(data, lengths)
        actual_output = torch_op(torch.tensor(data),
                                 torch.tensor(lengths, dtype=torch.int32))

        torch.testing.assert_allclose(expected_output, actual_output.cpu())

    def _test_lengths_sum_op(self, device):
        self._test_lengths_op("LengthsSum", torch.ops._caffe2.LengthsSum,
                              device)

    def test_lengths_sum_op(self):
        self._test_lengths_sum_op(device="cpu")

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_lengths_sum_op_cuda(self):
        self._test_lengths_sum_op(device="cuda")

    def _test_lengths_mean_op(self, device):
        self._test_lengths_op("LengthsMean", torch.ops._caffe2.LengthsMean,
                              device)

    def test_lengths_mean_op(self):
        self._test_lengths_mean_op(device="cpu")

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_lengths_mean_op_cuda(self):
        self._test_lengths_mean_op(device="cuda")

    def _test_lengths_max_op(self, device):
        self._test_lengths_op("LengthsMax", torch.ops._caffe2.LengthsMax,
                              device)

    def test_lengths_max_op(self):
        self._test_lengths_max_op(device="cpu")

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_lengths_max_op_cuda(self):
        self._test_lengths_max_op(device="cuda")

    def _test_resize_nearest_op(self, device):
        data = np.random.rand(1, 2, 3, 4).astype(np.float32)

        def _resize_nearest_ref(X):
            ref_op = core.CreateOperator(
                "ResizeNearest",
                ["X"],
                ["Y"],
                width_scale=2.0,
                height_scale=1.5,
                order="NCHW",
            )
            workspace.FeedBlob("X", X)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("Y")

        expected_output = _resize_nearest_ref(data)
        actual_output = torch.ops._caffe2.ResizeNearest(
            torch.tensor(data).to(device),
            order="NCHW",
            width_scale=2.0,
            height_scale=1.5,
        )

        torch.testing.assert_allclose(expected_output, actual_output.cpu())

    def test_resize_nearest_op_cpu(self):
        return self._test_resize_nearest_op("cpu")

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_resize_nearest_op_cuda(self):
        return self._test_resize_nearest_op("cuda")
Beispiel #8
0
class TestSegmentOps(hu.HypothesisTestCase):
    def test_sorted_segment_ops(self):
        SegmentsTester()._test(
            'SortedSegment',
            hu.segmented_tensor(
                dtype=np.float32,
                is_sorted=True,
                allow_empty=True
            ),
            REFERENCES_ALL + REFERENCES_SORTED
        )(self)

    def test_unsorted_segment_ops(self):
        SegmentsTester()._test(
            'UnsortedSegment',
            hu.segmented_tensor(
                dtype=np.float32,
                is_sorted=False,
                allow_empty=True
            ),
            REFERENCES_ALL,
        )(self)

    def test_unsorted_segment_ops_gpu(self):
        SegmentsTester()._test(
            'UnsortedSegment',
            hu.segmented_tensor(
                dtype=np.float32,
                is_sorted=False,
                allow_empty=True,
            ),
            REFERENCES_ALL,
            gpu=workspace.has_gpu_support,
            grad_check=False,
        )(self)

    def test_sparse_sorted_segment_ops(self):
        SegmentsTester()._test(
            'SparseSortedSegment',
            hu.sparse_segmented_tensor(
                dtype=np.float32,
                is_sorted=True,
                allow_empty=True
            ),
            REFERENCES_ALL
        )(self)

    def test_sparse_unsorted_segment_ops(self):
        SegmentsTester()._test(
            'SparseUnsortedSegment',
            hu.sparse_segmented_tensor(
                dtype=np.float32,
                is_sorted=False,
                allow_empty=True
            ),
            REFERENCES_ALL
        )(self)

    def test_lengths_ops(self):
        LengthsTester()._test(
            'Lengths',
            hu.lengths_tensor(
                dtype=np.float32,
                min_value=1,
                max_value=5,
                allow_empty=True
            ),
            REFERENCES_ALL + REFERENCES_LENGTHS_ONLY,
        )(self)

    def test_sparse_lengths_ops(self):
        for itype in [np.int32, np.int64]:
            LengthsTester()._test(
                'SparseLengths',
                hu.sparse_lengths_tensor(
                    dtype=np.float32,
                    min_value=1,
                    max_value=5,
                    allow_empty=True,
                    itype=itype,
                ),
                REFERENCES_ALL,
            )(self)

    @unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
    @given(**hu.gcs)
    def test_unsorted_sums_large(self, gc, dc):
        X = np.random.rand(10000, 32, 12).astype(np.float32)
        segments = np.random.randint(0, 10000, size=10000).astype(np.int32)
        op = core.CreateOperator("UnsortedSegmentSum", ["X", "segments"], "out")
        self.assertDeviceChecks(dc, op, [X, segments], [0])

    @unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
    @given(**hu.gcs)
    def test_sorted_segment_range_mean(self, gc, dc):
        X = np.random.rand(6, 32, 12).astype(np.float32)
        segments = np.array([0, 0, 1, 1, 2, 3]).astype(np.int32)
        op = core.CreateOperator(
            "SortedSegmentRangeMean",
            ["X", "segments"],
            "out"
        )
        self.assertDeviceChecks(dc, op, [X, segments], [0])
        self.assertGradientChecks(gc, op, [X, segments], 0, [0])

    @unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
    @given(**hu.gcs)
    def test_sorted_segment_range_log_mean_exp(self, gc, dc):
        X = np.random.rand(7, 32, 12).astype(np.float32)
        segments = np.array([0, 0, 1, 1, 2, 2, 3]).astype(np.int32)
        op = core.CreateOperator(
            "SortedSegmentRangeLogMeanExp",
            ["X", "segments"],
            "out"
        )
        self.assertDeviceChecks(dc, op, [X, segments], [0])
        self.assertGradientChecks(gc, op, [X, segments], 0, [0])

    @unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
    @given(**hu.gcs)
    def test_unsorted_means_large(self, gc, dc):
        X = np.random.rand(10000, 31, 19).astype(np.float32)
        segments = np.random.randint(0, 10000, size=10000).astype(np.int32)
        op = core.CreateOperator("UnsortedSegmentMean", ["X", "segments"], "out")
        self.assertDeviceChecks(dc, op, [X, segments], [0])

    @given(
        inputs=hu.lengths_tensor(
            dtype=np.float32,
            min_value=1,
            max_value=5,
            allow_empty=True,
        ),
        **hu.gcs
    )
    def test_lengths_sum(self, inputs, gc, dc):
        X, Y = inputs
        op = core.CreateOperator("LengthsSum", ["X", "Y"], "out")

        def ref(D, L):
            R = np.zeros(shape=(L.size, ) + D.shape[1:], dtype=D.dtype)
            line = 0
            for g in range(L.size):
                for _ in range(L[g]):
                    if len(D.shape) > 1:
                        R[g, :] += D[line, :]
                    else:
                        R[g] += D[line]
                    line += 1
            return [R]

        self.assertReferenceChecks(gc, op, [X, Y], ref)
        self.assertDeviceChecks(dc, op, [X, Y], [0])
        self.assertGradientChecks(gc, op, [X, Y], 0, [0])

    @given(
        inputs=hu.sparse_lengths_tensor(
            dtype=np.float32,
            min_value=1,
            max_value=5,
            allow_empty=True
        ),
        **hu.gcs
    )
    def test_sparse_lengths_sum(self, inputs, gc, dc):
        X, Y, Z = inputs
        op = core.CreateOperator("SparseLengthsSum", ["X", "Y", "Z"], "out")

        def ref(D, I, L):
            R = np.zeros(shape=(L.size, ) + D.shape[1:], dtype=D.dtype)
            line = 0
            for g in range(L.size):
                for _ in range(L[g]):
                    if len(D.shape) > 1:
                        R[g, :] += D[I[line], :]
                    else:
                        R[g] += D[I[line]]
                    line += 1
            return [R]

        self.assertReferenceChecks(gc, op, [X, Y, Z], ref)
        self.assertDeviceChecks(dc, op, [X, Y, Z], [0])
        self.assertGradientChecks(gc, op, [X, Y, Z], 0, [0])

    @given(
        inputs=hu.lengths_tensor(
            dtype=np.float32,
            min_value=1,
            max_value=5,
            allow_empty=True,
        ),
        **hu.gcs
    )
    def test_lengths_mean(self, inputs, gc, dc):
        X, Y = inputs
        op = core.CreateOperator("LengthsMean", ["X", "Y"], "out")

        def ref(D, L):
            R = np.zeros(shape=(L.size, ) + D.shape[1:], dtype=D.dtype)
            line = 0
            for g in range(L.size):
                for _ in range(L[g]):
                    if len(D.shape) > 1:
                        R[g, :] += D[line, :]
                    else:
                        R[g] += D[line]
                    line += 1
                if L[g] > 1:
                    if len(D.shape) > 1:
                        R[g, :] = R[g, :] / L[g]
                    else:
                        R[g] = R[g] / L[g]

            return [R]

        self.assertReferenceChecks(gc, op, [X, Y], ref)
        self.assertDeviceChecks(dc, op, [X, Y], [0])
        self.assertGradientChecks(gc, op, [X, Y], 0, [0])

    @given(
        inputs=hu.sparse_lengths_tensor(
            dtype=np.float32,
            min_value=1,
            max_value=5,
            allow_empty=True
        ),
        **hu.gcs
    )
    def test_sparse_lengths_mean(self, inputs, gc, dc):
        X, Y, Z = inputs
        op = core.CreateOperator("SparseLengthsMean", ["X", "Y", "Z"], "out")

        def ref(D, I, L):
            R = np.zeros(shape=(L.size, ) + D.shape[1:], dtype=D.dtype)
            line = 0
            for g in range(L.size):
                for _ in range(L[g]):
                    if len(D.shape) > 1:
                        R[g, :] += D[I[line], :]
                    else:
                        R[g] += D[I[line]]
                    line += 1

                if L[g] > 1:
                    if len(D.shape) > 1:
                        R[g, :] = R[g, :] / L[g]
                    else:
                        R[g] = R[g] / L[g]

            return [R]

        self.assertReferenceChecks(gc, op, [X, Y, Z], ref)
        self.assertDeviceChecks(dc, op, [X, Y, Z], [0])
        self.assertGradientChecks(gc, op, [X, Y, Z], 0, [0])

    @given(
        grad_on_weights=st.booleans(),
        inputs=hu.sparse_lengths_tensor(
            dtype=np.float32,
            min_value=1,
            max_value=5,
            allow_empty=True
        ),
        seed=st.integers(min_value=0, max_value=100),
        **hu.gcs
    )
    def test_sparse_lengths_weighted_sum(
            self, grad_on_weights, inputs, seed, gc, dc):
        D, I, L = inputs

        np.random.seed(int(seed))

        W = np.random.rand(I.size).astype(np.float32)
        op = core.CreateOperator(
            "SparseLengthsWeightedSum",
            ["D", "W", "I", "L"],
            "out",
            grad_on_weights=grad_on_weights)
        self.assertDeviceChecks(dc, op, [D, W, I, L], [0])
        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[D, W, I, L],
            reference=sparse_lengths_weighted_sum_ref,
            threshold=1e-4,
            output_to_grad='out',
            grad_reference=partial(
                sparse_lengths_weighted_sum_grad_ref,
                grad_on_weights=grad_on_weights),
        )
        self.assertGradientChecks(gc, op, [D, W, I, L], 0, [0])
        if grad_on_weights:
            self.assertGradientChecks(gc, op, [D, W, I, L], 1, [0])

    @given(**hu.gcs)
    def test_sparse_lengths_indices_in_gradient_sum_gpu(self, gc, dc):
        X = np.random.rand(3, 3, 4, 5).astype(np.float32)
        Y = np.asarray([3, 3, 2]).astype(np.int32)
        Z = np.random.randint(0, 50, size=8).astype(np.int64)
        op = core.CreateOperator(
            "SparseLengthsIndicesInGradientSumGradient", ["X", "Y", "Z"], "out"
        )
        self.assertDeviceChecks(dc, op, [X, Y, Z], [0])

    @given(**hu.gcs)
    def test_sparse_lengths_indices_in_gradient_mean_gpu(self, gc, dc):
        X = np.random.rand(3, 3, 4, 5).astype(np.float32)
        Y = np.asarray([3, 3, 2]).astype(np.int32)
        Z = np.random.randint(0, 50, size=8).astype(np.int64)
        op = core.CreateOperator(
            "SparseLengthsIndicesInGradientMeanGradient", ["X", "Y", "Z"], "out"
        )
        self.assertDeviceChecks(dc, op, [X, Y, Z], [0])

    @given(**hu.gcs_cpu_only)
    def test_legacy_sparse_and_lengths_sum_gradient(self, gc, dc):
        X = np.random.rand(3, 64).astype(np.float32)
        Y = np.asarray([20, 20, 10]).astype(np.int32)
        workspace.FeedBlob("X", X)
        workspace.FeedBlob("Y", Y)
        test_net = core.Net("test_net")
        test_net.SparseLengthsSumGradient(["X", "Y"], "out1")
        test_net.LengthsSumGradient(["X", "Y"], "out2")
        workspace.RunNetOnce(test_net)
        out1 = workspace.FetchBlob("out1")
        out2 = workspace.FetchBlob("out2")
        self.assertTrue((out1 == out2).all())

    @given(**hu.gcs)
    def test_sparse_lengths_sum_invalid_index(self, gc, dc):
        D = np.random.rand(50, 3, 4, 5).astype(np.float32)
        I = (np.random.randint(0, 10000, size=10) + 10000).astype(np.int64)
        L = np.asarray([4, 4, 2]).astype(np.int32)
        op = core.CreateOperator(
            "SparseLengthsSum",
            ["D", "I", "L"],
            "out")
        workspace.FeedBlob('D', D)
        workspace.FeedBlob('I', I)
        workspace.FeedBlob('L', L)
        with self.assertRaises(RuntimeError):
            workspace.RunOperatorOnce(op)

    @given(**hu.gcs_cpu_only)
    def test_sparse_lengths_positional_weighted_sum(
            self, gc, dc):
        D = np.random.rand(50, 3, 4, 5).astype(np.float32)
        W = np.random.rand(50).astype(np.float32)
        indices = np.random.randint(0, 50, size=10).astype(np.int64)
        L = np.asarray([4, 4, 2]).astype(np.int32)
        op = core.CreateOperator(
            "SparseLengthsPositionalWeightedSum",
            ["D", "W", "indices", "L"],
            "out")

        def ref_sparse(D, W, indices, L):
            workspace.FeedBlob("L", L)
            lengths_range_fill_op = core.CreateOperator(
                "LengthsRangeFill", ["L"], ["L_pos_seq"])
            workspace.RunOperatorOnce(lengths_range_fill_op)

            workspace.FeedBlob("W", W)
            gather_op = core.CreateOperator(
                "Gather", ["W", "L_pos_seq"], ["W_gathered"])
            workspace.RunOperatorOnce(gather_op)

            workspace.FeedBlob("D", D)
            workspace.FeedBlob("indices", indices)
            sparse_op = core.CreateOperator(
                "SparseLengthsWeightedSum",
                ["D", "W_gathered", "indices", "L"],
                "out_ref")
            workspace.RunOperatorOnce(sparse_op)

            return (workspace.FetchBlob("out_ref"),)

        self.assertReferenceChecks(
            gc, op, [D, W, indices, L], ref_sparse)
Beispiel #9
0
class TestConcatSplitOps(hu.HypothesisTestCase):
    @given(tensor_splits=_tensor_splits(), **hu.gcs)
    def test_concat(self, tensor_splits, gc, dc):
        axis, _, splits = tensor_splits

        op = core.CreateOperator(
            "Concat", ['X_{}'.format(i) for i in range(len(splits))],
            ['concat_result', 'split_info'],
            axis=axis)

        self.assertReferenceChecks(
            gc, op, splits, lambda *splits:
            (np.concatenate(splits, axis=axis),
             np.array([a.shape[axis] for a in splits])))
        self.assertDeviceChecks(dc, op, splits, [0, 1])
        self.assertGradientChecks(gc, op, splits, 0, [0])

    @given(tensor_splits=_tensor_splits(add_axis=True), **hu.gcs)
    def test_concat_add_axis(self, tensor_splits, gc, dc):
        axis, _, splits = tensor_splits

        op = core.CreateOperator(
            "Concat", ['X_{}'.format(i) for i in range(len(splits))],
            ['concat_result', 'split_info'],
            axis=axis,
            add_axis=1)

        self.assertReferenceChecks(
            gc, op, splits, lambda *splits:
            (np.concatenate([np.expand_dims(a, axis) for a in splits],
                            axis=axis), np.array([1] * len(splits))))
        self.assertDeviceChecks(dc, op, splits, [0, 1])
        for i in range(len(splits)):
            self.assertGradientChecks(gc, op, splits, i, [0])

    @given(tensor_splits=_tensor_splits(),
           split_as_arg=st.booleans(),
           **hu.gcs)
    def test_split(self, tensor_splits, split_as_arg, gc, dc):
        axis, split_info, splits = tensor_splits

        split_as_arg = True

        if split_as_arg:
            input_names = ['input']
            input_tensors = [np.concatenate(splits, axis=axis)]
            kwargs = dict(axis=axis, split=split_info)
        else:
            input_names = ['input', 'split']
            input_tensors = [np.concatenate(splits, axis=axis), split_info]
            kwargs = dict(axis=axis)

        op = core.CreateOperator(
            "Split", input_names,
            ['X_{}'.format(i) for i in range(len(split_info))], **kwargs)

        def split_ref(input, split=split_info):
            s = np.cumsum([0] + list(split))
            return [
                np.array(input.take(np.arange(s[i], s[i + 1]), axis=axis))
                for i in range(len(split))
            ]

        outputs_with_grad = range(len(split_info))
        self.assertReferenceChecks(gc, op, input_tensors, split_ref)
        self.assertDeviceChecks(dc, op, input_tensors, outputs_with_grad)
        self.assertGradientChecks(gc, op, input_tensors, 0, outputs_with_grad)

    @given(inputs=hu.lengths_tensor(
        dtype=np.float32,
        min_value=1,
        max_value=5,
        allow_empty=True,
    ),
           **hu.gcs)
    def test_split_by_lengths(self, inputs, gc, dc):
        data, lengths = inputs
        len_len = len(lengths)

        def _find_factor_simple(x):
            for i in [2, 3, 5]:
                if x % i == 0:
                    return i
            return x

        num_output = _find_factor_simple(len_len)
        axis = 0
        op = core.CreateOperator(
            "SplitByLengths",
            ["data", "lengths"],
            ['X_{}'.format(i) for i in range(num_output)],
            axis=axis,
        )

        def split_by_lengths_ref(data, lengths, num_output=num_output, axis=0):
            idxs = np.cumsum([0] + list(lengths)).astype(np.int32)
            return [
                np.array(
                    data.take(np.arange(idxs[i * len_len // num_output],
                                        idxs[(i + 1) * len_len // num_output]),
                              axis=axis)) for i in range(num_output)
            ]

        outputs_with_grad = range(num_output)
        input_tensors = [data, lengths]
        self.assertReferenceChecks(hu.cpu_do, op, input_tensors,
                                   split_by_lengths_ref)
        self.assertDeviceChecks(dc, op, input_tensors, outputs_with_grad)
        self.assertGradientChecks(hu.cpu_do,
                                  op,
                                  input_tensors,
                                  0,
                                  outputs_with_grad,
                                  input_device_options={"lengths": hu.cpu_do})
Beispiel #10
0
class TestUtilityOps(hu.HypothesisTestCase):
    @given(X=hu.tensor(), args=st.booleans(), **hu.gcs)
    def test_slice(self, X, args, gc, dc):
        X = X.astype(dtype=np.float32)
        dim = random.randint(0, X.ndim - 1)
        slice_start = random.randint(0, X.shape[dim] - 1)
        slice_end = random.randint(slice_start, X.shape[dim] - 1)
        starts = np.array([0] * X.ndim).astype(np.int32)
        ends = np.array([-1] * X.ndim).astype(np.int32)
        starts[dim] = slice_start
        ends[dim] = slice_end

        if args:
            op = core.CreateOperator("Slice", ["X"], ["Y"],
                                     starts=starts,
                                     ends=ends,
                                     device_option=gc)

            def slice_ref(X):
                slc = [slice(None)] * X.ndim
                slc[dim] = slice(slice_start, slice_end)
                return [X[slc]]

            inputs = [X]
        else:
            op = core.CreateOperator("Slice", ["X", "starts", "ends"], ["Y"],
                                     device_option=gc)

            def slice_ref(x, starts, ends):
                slc = [slice(None)] * x.ndim
                slc[dim] = slice(slice_start, slice_end)
                return [x[slc]]

            inputs = [X, starts, ends]

        self.assertReferenceChecks(gc, op, inputs, slice_ref)
        self.assertDeviceChecks(dc, op, inputs, [0])
        self.assertGradientChecks(
            device_option=gc,
            op=op,
            inputs=inputs,
            outputs_to_check=0,
            outputs_with_grads=[0],
        )

    @given(dtype=st.sampled_from([np.float32, np.int32]),
           ndims=st.integers(min_value=1, max_value=5),
           seed=st.integers(min_value=0, max_value=65536),
           null_axes=st.booleans(),
           engine=st.sampled_from(['CUDNN', None]),
           **hu.gcs)
    def test_transpose(self, dtype, ndims, seed, null_axes, engine, gc, dc):
        dims = (np.random.rand(ndims) * 16 + 1).astype(np.int32)
        X = (np.random.rand(*dims) * 16).astype(dtype)

        if null_axes:
            axes = None
            op = core.CreateOperator("Transpose", ["input"], ["output"],
                                     engine=engine)
        else:
            np.random.seed(int(seed))
            axes = [int(v) for v in list(np.random.permutation(X.ndim))]
            op = core.CreateOperator("Transpose", ["input"], ["output"],
                                     axes=axes,
                                     engine=engine)

        def transpose_ref(x, axes):
            return (np.transpose(x, axes), )

        self.assertReferenceChecks(gc, op, [X, axes], transpose_ref)

    @unittest.skipIf(not workspace.has_gpu_support, "No gpu support")
    def test_gpu_transpose_minusones(self):
        '''
        Repro a problem with earlier version of CuDNN Transpose Op that
        casted ints to floats.
        '''
        X = -np.ones((2, 10)).astype(np.int32)
        with core.DeviceScope(core.DeviceOption(caffe2_pb2.CUDA, 0)):
            workspace.FeedBlob("X", X)
            print("X:\n{}\n".format(workspace.FetchBlob("X")))
            op = core.CreateOperator("Transpose", ["X"], ["Y"], engine='CUDNN')
            workspace.RunOperatorOnce(op)
            Y = workspace.FetchBlob("Y")
            print("Y:\n{}\n".format(Y))

            for j in list(Y.flatten()):
                self.assertEqual(-1, j)

    @given(m=st.integers(5, 10),
           n=st.integers(5, 10),
           o=st.integers(5, 10),
           nans=st.booleans(),
           **hu.gcs)
    def test_nan_check(self, m, n, o, nans, gc, dc):
        other = np.array([1, 2, 3]).astype(np.float32)
        X = np.random.rand(m, n, o).astype(np.float32)
        if nans:
            x_nan = np.random.randint(0, m)
            y_nan = np.random.randint(0, n)
            z_nan = np.random.randint(0, o)
            X[x_nan, y_nan, z_nan] = float('NaN')

        # print('nans: {}'.format(nans))
        # print(X)

        def nan_reference(X, Y):
            if not np.isnan(X).any():
                return [X]
            else:
                return [np.array([])]

        op = core.CreateOperator("NanCheck", ["X", "other"], ["Y"])

        try:
            self.assertReferenceChecks(
                device_option=gc,
                op=op,
                inputs=[X, other],
                reference=nan_reference,
            )
            if nans:
                self.assertTrue(False, "Did not fail when presented with NaN!")
        except RuntimeError:
            self.assertTrue(nans, "No NaNs but failed")

        try:
            self.assertGradientChecks(
                device_option=gc,
                op=op,
                inputs=[X],
                outputs_to_check=0,
                outputs_with_grads=[0],
            )
            if nans:
                self.assertTrue(False, "Did not fail when gradient had NaN!")
        except RuntimeError:
            pass

    @given(n=st.integers(4, 5),
           m=st.integers(6, 7),
           d=st.integers(2, 3),
           **hu.gcs)
    def test_elementwise_max(self, n, m, d, gc, dc):
        X = np.random.rand(n, m, d).astype(np.float32)
        Y = np.random.rand(n, m, d).astype(np.float32)
        Z = np.random.rand(n, m, d).astype(np.float32)
        inputs = [X, Y, Z]

        def max_op(X, Y, Z):
            return [np.maximum(np.maximum(X, Y), Z)]

        op = core.CreateOperator("Max", ["X", "Y", "Z"], ["mx"])

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=inputs,
            reference=max_op,
        )
        self.assertDeviceChecks(dc, op, inputs, [0])

    @given(n=st.integers(4, 5),
           m=st.integers(6, 7),
           d=st.integers(2, 3),
           **hu.gcs)
    def test_elementwise_max_grad(self, n, m, d, gc, dc):
        go = np.random.rand(n, m, d).astype(np.float32)
        X = np.random.rand(n, m, d).astype(np.float32)
        Y = np.random.rand(n, m, d).astype(np.float32)
        Z = np.random.rand(n, m, d).astype(np.float32)
        mx = np.maximum(np.maximum(X, Y), Z)
        inputs = [mx, go, X, Y, Z]

        def max_grad_op(mx, go, X, Y, Z):
            def mx_grad(a):
                return go * (mx == a)

            return [mx_grad(a) for a in [X, Y, Z]]

        op = core.CreateOperator("MaxGradient", ["mx", "go", "X", "Y", "Z"],
                                 ["gX", "gY", "gZ"])

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=inputs,
            reference=max_grad_op,
        )
        self.assertDeviceChecks(dc, op, inputs, [0, 1, 2])

    @given(n=st.integers(4, 5),
           m=st.integers(6, 7),
           d=st.integers(2, 3),
           **hu.gcs)
    def test_elementwise_min(self, n, m, d, gc, dc):
        X = np.random.rand(n, m, d).astype(np.float32)
        Y = np.random.rand(n, m, d).astype(np.float32)
        Z = np.random.rand(n, m, d).astype(np.float32)
        inputs = [X, Y, Z]

        def min_op(X, Y, Z):
            return [np.minimum(np.minimum(X, Y), Z)]

        op = core.CreateOperator("Min", ["X", "Y", "Z"], ["mx"])

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=inputs,
            reference=min_op,
        )
        self.assertDeviceChecks(dc, op, inputs, [0])

    @given(n=st.integers(4, 5),
           m=st.integers(6, 7),
           d=st.integers(2, 3),
           **hu.gcs)
    def test_elementwise_min_grad(self, n, m, d, gc, dc):
        go = np.random.rand(n, m, d).astype(np.float32)
        X = np.random.rand(n, m, d).astype(np.float32)
        Y = np.random.rand(n, m, d).astype(np.float32)
        Z = np.random.rand(n, m, d).astype(np.float32)
        mx = np.minimum(np.minimum(X, Y), Z)
        inputs = [mx, go, X, Y, Z]

        def min_grad_op(mx, go, X, Y, Z):
            def mx_grad(a):
                return go * (mx == a)

            return [mx_grad(a) for a in [X, Y, Z]]

        op = core.CreateOperator("MinGradient", ["mx", "go", "X", "Y", "Z"],
                                 ["gX", "gY", "gZ"])

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=inputs,
            reference=min_grad_op,
        )
        self.assertDeviceChecks(dc, op, inputs, [0, 1, 2])

    @given(inputs=hu.lengths_tensor().flatmap(lambda pair: st.tuples(
        st.just(pair[0]),
        st.just(pair[1]),
        hu.dims(max_value=len(pair[1])),
    )).flatmap(lambda tup: st.tuples(
        st.just(tup[0]),
        st.just(tup[1]),
        hu.arrays(tup[2],
                  dtype=np.int32,
                  elements=st.integers(min_value=0, max_value=len(tup[1]) - 1)
                  ),
    )),
           **hu.gcs_cpu_only)
    def test_lengths_gather(self, inputs, gc, dc):
        items = inputs[0]
        lengths = inputs[1]
        indices = inputs[2]

        def lengths_gather_op(items, lengths, indices):
            ends = np.cumsum(lengths)
            return [
                np.concatenate(
                    list(items[ends[i] - lengths[i]:ends[i]] for i in indices))
            ]

        op = core.CreateOperator("LengthsGather",
                                 ["items", "lengths", "indices"], ["output"])

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[items, lengths, indices],
            reference=lengths_gather_op,
        )

    @given(**hu.gcs)
    def test_size_op(self, gc, dc):
        X = np.array([[1, 2], [3, 4]]).astype(np.float32)

        def size_op(tensor):
            return [np.prod(tensor.shape)]

        op = core.CreateOperator("Size", ["X"], ["output"])

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[X],
            reference=size_op,
        )

    def test_alias_op(self):
        """ Don't use hypothesis because there are only 2 cases to check"""
        for size in [0, 5]:
            X = np.arange(size).astype(np.float32)
            workspace.FeedBlob('X', X)

            op = core.CreateOperator("Alias", ["X"], ["Y"])
            workspace.RunOperatorOnce(op)
            Y = workspace.FetchBlob('Y')
            np.testing.assert_array_equal(X, Y)

    @given(**hu.gcs)
    def test_range(self, gc, dc):
        names = [
            ('stop_', ),
            ('start_', 'stop_'),
            ('start_', 'stop_', 'step_'),
        ]
        # Most random values aren't great here, so use a fixed set instead of
        # hypothesis.
        for inputs in (
            (10, ),
            (np.float32(10.0), ),
            (0, ),
            (0, 0),
            (10., 5.0, -1.),
            (2, 10000),
            (2, 10000, 20000),
            (2, 10000, -1),
        ):
            inputs = [np.array(v) for v in inputs]
            op = core.CreateOperator("Range", names[len(inputs) - 1], ["Y"])

            self.assertReferenceChecks(
                device_option=gc,
                op=op,
                inputs=inputs,
                reference=lambda *x: [np.arange(*x)],
            )
            self.assertDeviceChecks(dc, op, inputs, [0])

        with self.assertRaisesRegexp(RuntimeError, 'Step size cannot be 0'):
            inputs = (np.array(0), np.array(10), np.array(0))
            op = core.CreateOperator("Range", names[len(inputs) - 1], ["Y"])
            self.assertReferenceChecks(
                device_option=gc,
                op=op,
                inputs=inputs,
                reference=lambda *x: [np.arange(*x)],
            )
class TorchIntegration(hu.HypothesisTestCase):
    @given(roi_counts=st.lists(st.integers(0, 5), min_size=1, max_size=10),
           num_classes=st.integers(1, 10),
           rotated=st.booleans(),
           angle_bound_on=st.booleans(),
           clip_angle_thresh=st.sampled_from([-1.0, 1.0]),
           **hu.gcs_cpu_only)
    def test_bbox_transform(
        self,
        roi_counts,
        num_classes,
        rotated,
        angle_bound_on,
        clip_angle_thresh,
        gc,
        dc,
    ):
        """
        Test with rois for multiple images in a batch
        """
        rois, deltas, im_info = create_bbox_transform_inputs(
            roi_counts, num_classes, rotated)

        def bbox_transform_ref():
            ref_op = core.CreateOperator(
                "BBoxTransform",
                ["rois", "deltas", "im_info"],
                ["box_out"],
                apply_scale=False,
                rotated=rotated,
                angle_bound_on=angle_bound_on,
                clip_angle_thresh=clip_angle_thresh,
            )
            workspace.FeedBlob("rois", rois)
            workspace.FeedBlob("deltas", deltas)
            workspace.FeedBlob("im_info", im_info)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("box_out")

        box_out = torch.tensor(bbox_transform_ref())
        a, b = torch.ops._caffe2.BBoxTransform(
            torch.tensor(rois),
            torch.tensor(deltas),
            torch.tensor(im_info),
            [1.0, 1.0, 1.0, 1.0],
            False,
            rotated,
            angle_bound_on,
            -90,
            90,
            clip_angle_thresh,
            legacy_plus_one=True,
        )

        torch.testing.assert_allclose(box_out, a)

    @given(roi_counts=st.lists(st.integers(0, 5), min_size=1, max_size=10),
           num_classes=st.integers(1, 10),
           rotated=st.booleans(),
           angle_bound_on=st.booleans(),
           clip_angle_thresh=st.sampled_from([-1.0, 1.0]),
           **hu.gcs_cpu_only)
    def test_box_with_nms_limits(
        self,
        roi_counts,
        num_classes,
        rotated,
        angle_bound_on,
        clip_angle_thresh,
        gc,
        dc,
    ):
        rotated = False  # FIXME remove this after rotation is supported
        rois, deltas, im_info = create_bbox_transform_inputs(
            roi_counts, num_classes, rotated)
        pred_bbox, batch_splits = [
            t.detach().numpy() for t in torch.ops._caffe2.BBoxTransform(
                torch.tensor(rois),
                torch.tensor(deltas),
                torch.tensor(im_info),
                [1.0, 1.0, 1.0, 1.0],
                False,
                rotated,
                angle_bound_on,
                -90,
                90,
                clip_angle_thresh,
                legacy_plus_one=True,
            )
        ]
        class_prob = np.random.randn(sum(roi_counts),
                                     num_classes).astype(np.float32)
        score_thresh = 0.5
        nms_thresh = 0.5
        topk_per_image = sum(roi_counts) / 2

        def box_with_nms_limit_ref():
            input_blobs = ["class_prob", "pred_bbox", "batch_splits"]
            output_blobs = [
                "score_nms",
                "bbox_nms",
                "class_nms",
                "batch_splits_nms",
                "keeps_nms",
                "keeps_size_nms",
            ]
            ref_op = core.CreateOperator(
                "BoxWithNMSLimit",
                input_blobs,
                output_blobs,
                score_thresh=float(score_thresh),
                nms=float(nms_thresh),
                detections_per_im=int(topk_per_image),
                soft_nms_enabled=False,
                soft_nms_method="linear",
                soft_nms_sigma=0.5,
                soft_nms_min_score_thres=0.001,
                rotated=rotated,
            )
            workspace.FeedBlob("class_prob", class_prob)
            workspace.FeedBlob("pred_bbox", pred_bbox)
            workspace.FeedBlob("batch_splits", batch_splits)
            workspace.RunOperatorOnce(ref_op)
            return (workspace.FetchBlob(b) for b in output_blobs)

        output_refs = box_with_nms_limit_ref()
        outputs = torch.ops._caffe2.BoxWithNMSLimit(
            torch.tensor(class_prob),
            torch.tensor(pred_bbox),
            torch.tensor(batch_splits),
            score_thresh=float(score_thresh),
            nms=float(nms_thresh),
            detections_per_im=int(topk_per_image),
            soft_nms_enabled=False,
            soft_nms_method="linear",
            soft_nms_sigma=0.5,
            soft_nms_min_score_thres=0.001,
            rotated=rotated,
            cls_agnostic_bbox_reg=False,
            input_boxes_include_bg_cls=True,
            output_classes_include_bg_cls=True,
            legacy_plus_one=True,
        )

        for o, o_ref in zip(outputs, output_refs):
            torch.testing.assert_allclose(o, o_ref)

    @given(
        A=st.integers(min_value=4, max_value=4),
        H=st.integers(min_value=10, max_value=10),
        W=st.integers(min_value=8, max_value=8),
        img_count=st.integers(min_value=3, max_value=3),
    )
    def test_generate_proposals(self, A, H, W, img_count):
        scores = np.ones((img_count, A, H, W)).astype(np.float32)
        bbox_deltas = (np.linspace(0, 10,
                                   num=img_count * 4 * A * H * W).reshape(
                                       (img_count, 4 * A, H,
                                        W)).astype(np.float32))
        im_info = np.ones((img_count, 3)).astype(np.float32) / 10
        anchors = np.ones((A, 4)).astype(np.float32)

        def generate_proposals_ref():
            ref_op = core.CreateOperator(
                "GenerateProposals",
                ["scores", "bbox_deltas", "im_info", "anchors"],
                ["rois", "rois_probs"],
                spatial_scale=2.0,
            )
            workspace.FeedBlob("scores", scores)
            workspace.FeedBlob("bbox_deltas", bbox_deltas)
            workspace.FeedBlob("im_info", im_info)
            workspace.FeedBlob("anchors", anchors)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("rois"), workspace.FetchBlob(
                "rois_probs")

        rois, rois_probs = generate_proposals_ref()
        rois = torch.tensor(rois)
        rois_probs = torch.tensor(rois_probs)
        a, b = torch.ops._caffe2.GenerateProposals(
            torch.tensor(scores),
            torch.tensor(bbox_deltas),
            torch.tensor(im_info),
            torch.tensor(anchors),
            2.0,
            6000,
            300,
            0.7,
            16,
            True,
            -90,
            90,
            1.0,
            legacy_plus_one=True,
        )
        torch.testing.assert_allclose(rois, a)
        torch.testing.assert_allclose(rois_probs, b)

    @given(
        bsz=st.integers(1, 5),
        seq_lens=st.integers(1, 6),
        emb_lens=st.integers(5, 10),
        hidden_size=st.integers(3, 7),
        num_layers=st.integers(1, 4),
        has_biases=st.booleans(),
        is_bidirectional=st.booleans(),
        batch_first=st.booleans(),
    )
    def test_inference_lstm(
        self,
        bsz,
        seq_lens,
        emb_lens,
        hidden_size,
        num_layers,
        has_biases,
        is_bidirectional,
        batch_first,
    ):
        num_directions = 2 if is_bidirectional else 1
        hx = np.zeros((num_layers * num_directions, bsz, hidden_size),
                      dtype=np.float32)

        if batch_first:
            inputs = np.random.randn(bsz, seq_lens,
                                     emb_lens).astype(np.float32)
        else:
            inputs = np.random.randn(seq_lens, bsz,
                                     emb_lens).astype(np.float32)

        torch_lstm = torch.nn.LSTM(
            emb_lens,
            hidden_size,
            batch_first=batch_first,
            bidirectional=is_bidirectional,
            bias=has_biases,
            num_layers=num_layers,
        )

        def inference_lstm_ref():
            input_names = ["inputs", "hidden_0", "hidden_1"]
            workspace.FeedBlob("inputs", inputs)
            workspace.FeedBlob("hidden_0", hx)
            workspace.FeedBlob("hidden_1", hx)
            for i, param in enumerate(torch_lstm._flat_weights):
                input_names.append("param_{}".format(i))
                workspace.FeedBlob("param_{}".format(i),
                                   param.detach().numpy())

            ref_op = core.CreateOperator(
                "InferenceLSTM",
                input_names,
                ["output", "hidden", "cell"],
                num_layers=num_layers,
                has_biases=has_biases,
                batch_first=batch_first,
                bidirectional=is_bidirectional,
            )
            workspace.RunOperatorOnce(ref_op)
            return (workspace.FetchBlob("output"),
                    workspace.FetchBlob("hidden"), workspace.FetchBlob("cell"))

        output, hidden, cell = inference_lstm_ref()
        output = torch.tensor(output)
        hidden = torch.tensor(hidden)
        cell = torch.tensor(cell)
        lstm_in = [
            torch.from_numpy(inputs),
            torch.from_numpy(hx),
            torch.from_numpy(hx),
        ] + [param.detach() for param in torch_lstm._flat_weights]

        a, b, c = torch.ops._caffe2.InferenceLSTM(lstm_in, num_layers,
                                                  has_biases, batch_first,
                                                  is_bidirectional)
        torch.testing.assert_allclose(output, a)
        torch.testing.assert_allclose(hidden, b)
        torch.testing.assert_allclose(cell, c)

    # Test case is using workspace.has_cuda_support and not workspace.has_gpu_support
    # to exclude it from HIP because tensor interop doesn't work for HIP tensors yet
    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    @given(
        A=st.integers(min_value=4, max_value=4),
        H=st.integers(min_value=10, max_value=10),
        W=st.integers(min_value=8, max_value=8),
        img_count=st.integers(min_value=3, max_value=3),
    )
    def test_generate_proposals_cuda(self, A, H, W, img_count):
        scores = np.ones((img_count, A, H, W)).astype(np.float32)
        bbox_deltas = (np.linspace(0, 10,
                                   num=img_count * 4 * A * H * W).reshape(
                                       (img_count, 4 * A, H,
                                        W)).astype(np.float32))
        im_info = np.ones((img_count, 3)).astype(np.float32) / 10
        anchors = np.ones((A, 4)).astype(np.float32)

        def generate_proposals_ref():
            ref_op = core.CreateOperator(
                "GenerateProposals",
                ["scores", "bbox_deltas", "im_info", "anchors"],
                ["rois", "rois_probs"],
                spatial_scale=2.0,
            )
            workspace.FeedBlob("scores", scores)
            workspace.FeedBlob("bbox_deltas", bbox_deltas)
            workspace.FeedBlob("im_info", im_info)
            workspace.FeedBlob("anchors", anchors)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("rois"), workspace.FetchBlob(
                "rois_probs")

        rois, rois_probs = generate_proposals_ref()
        rois = torch.tensor(rois)
        rois_probs = torch.tensor(rois_probs)
        a, b = torch.ops._caffe2.GenerateProposals(
            torch.tensor(scores).cuda(),
            torch.tensor(bbox_deltas).cuda(),
            torch.tensor(im_info).cuda(),
            torch.tensor(anchors).cuda(),
            2.0,
            6000,
            300,
            0.7,
            16,
            True,
            -90,
            90,
            1.0,
            legacy_plus_one=True,
        )
        torch.testing.assert_allclose(rois, a.cpu())
        torch.testing.assert_allclose(rois_probs, b.cpu())

    @given(
        N=st.integers(min_value=1, max_value=2),
        C=st.integers(min_value=4, max_value=4),
        H=st.integers(min_value=10, max_value=10),
        W=st.integers(min_value=8, max_value=8),
    )
    def _test_roi_align(self, N, C, H, W, device):
        def rand_roi():
            return np.array([
                float(int(N * np.random.rand())),
                0.5 * np.random.rand() * W,
                0.5 * np.random.rand() * H,
                (0.5 + 0.5 * np.random.rand()) * W,
                (0.5 + 0.5 * np.random.rand()) * H,
            ]).astype(np.float32)

        feature = np.random.randn(N, C, H, W).astype(np.float32)
        rois = np.array([rand_roi() for _ in range(10)])

        def roi_align_ref(_feature, _rois):
            ref_op = core.CreateOperator(
                "RoIAlign",
                ["feature", "rois"],
                ["roi_feature"],
                spatial_scale=1.0,
                pooled_h=3,
                pooled_w=3,
                sampling_ratio=0,
            )
            workspace.FeedBlob("feature", _feature)
            workspace.FeedBlob("rois", _rois)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("roi_feature")

        roi_feature_ref = roi_align_ref(feature, rois)
        roi_feature = torch.ops._caffe2.RoIAlign(
            torch.Tensor(feature).to(device),
            torch.Tensor(rois).to(device),
            order="NCHW",
            spatial_scale=1.0,
            pooled_h=3,
            pooled_w=3,
            sampling_ratio=0,
            aligned=False,
        )
        torch.testing.assert_allclose(roi_feature_ref, roi_feature.cpu())

    def test_roi_align_cpu(self):
        self._test_roi_align(device="cpu")

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_roi_align_cuda(self):
        self._test_roi_align(device="cuda")

    @given(
        N=st.integers(min_value=1, max_value=2),
        C=st.integers(min_value=4, max_value=4),
        H=st.integers(min_value=10, max_value=10),
        W=st.integers(min_value=8, max_value=8),
    )
    def _test_roi_align_rotated(self, N, C, H, W, device):
        def rand_rotated_roi():
            return np.array([
                float(int(N * np.random.rand())),
                np.random.rand() * W,
                np.random.rand() * H,
                np.random.rand() * W,
                np.random.rand() * H,
                np.random.rand() * 360 - 180
            ]).astype(np.float32)

        feature = np.random.randn(N, C, H, W).astype(np.float32)
        rois = np.array([rand_rotated_roi() for _ in range(10)])

        def roi_align_ref(_feature, _rois):
            ref_op = core.CreateOperator(
                "RoIAlignRotated",
                ["feature", "rois"],
                ["roi_feature"],
                spatial_scale=1.0,
                pooled_h=3,
                pooled_w=3,
                sampling_ratio=0,
            )
            workspace.FeedBlob("feature", _feature)
            workspace.FeedBlob("rois", _rois)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("roi_feature")

        roi_feature_ref = roi_align_ref(feature, rois)
        roi_feature = torch.ops._caffe2.RoIAlignRotated(
            torch.Tensor(feature).to(device),
            torch.Tensor(rois).to(device),
            order="NCHW",
            spatial_scale=1.0,
            pooled_h=3,
            pooled_w=3,
            sampling_ratio=0,
            aligned=False,
        )
        torch.testing.assert_allclose(roi_feature_ref, roi_feature.cpu())

    def test_roi_align_rotated_cpu(self):
        self._test_roi_align_rotated(device="cpu")

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_roi_align_rotated_cuda(self):
        self._test_roi_align_rotated(device="cuda")

    @given(roi_counts=st.lists(st.integers(0, 5), min_size=1, max_size=10))
    def test_collect_and_distribute_fpn_rpn_proposals_op(self, roi_counts):
        batch_size = len(roi_counts)
        im_dims = np.random.randint(100, 600, batch_size)
        rpn_rois_and_scores = []
        for i in range(5):
            rpn_rois_and_scores.append(
                torch.Tensor(generate_rois(roi_counts, im_dims)))
        for i in range(5):
            rpn_rois_and_scores.append(torch.rand(sum(roi_counts)))

        rois = torch.ops._caffe2.CollectRpnProposals(
            rpn_rois_and_scores,
            rpn_max_level=6,
            rpn_min_level=2,
            rpn_post_nms_topN=sum(roi_counts),
        )
        fpn_outputs = torch.ops._caffe2.DistributeFpnProposals(
            rois,
            roi_canonical_scale=224,
            roi_canonical_level=4,
            roi_max_level=5,
            roi_min_level=2,
            legacy_plus_one=True,
        )

        all_outputs = torch.ops._caffe2.CollectAndDistributeFpnRpnProposals(
            rpn_rois_and_scores,
            roi_canonical_scale=224,
            roi_canonical_level=4,
            roi_max_level=5,
            roi_min_level=2,
            rpn_max_level=6,
            rpn_min_level=2,
            rpn_post_nms_topN=sum(roi_counts),
            legacy_plus_one=True,
        )

        rois_fpn_list = fpn_outputs[:-1]
        rois_idx_restore_int32 = fpn_outputs[-1]

        # [rois] + fpn_outputs should be equal to all_outputs
        torch.testing.assert_allclose(rois, all_outputs[0])
        for x, y in zip(fpn_outputs, all_outputs[1:]):
            torch.testing.assert_allclose(x, y)

    @given(X=hu.tensor(), fast_gelu=st.booleans())
    def _test_gelu_op(self, X, fast_gelu, device):
        def _gelu_ref(_X):
            return (_X * norm.cdf(_X).astype(np.float32), )

        expected_output, = _gelu_ref(X)
        actual_output = torch.ops._caffe2.Gelu(torch.tensor(X), fast_gelu)

        rtol = 1e-3 if fast_gelu else 1e-4
        atol = 1e-5
        torch.testing.assert_allclose(expected_output,
                                      actual_output.cpu(),
                                      rtol=rtol,
                                      atol=atol)

    def test_gelu_op(self):
        self._test_gelu_op(device="cpu")

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_gelu_op_cuda(self):
        self._test_gelu_op(device="cuda")

    @given(inputs=hu.lengths_tensor(
        dtype=np.float32,
        min_value=1,
        max_value=5,
        allow_empty=True,
    ))
    def _test_lengths_op(self, inputs, ref_op_name, torch_op, device):
        data, lengths = inputs

        def _lengths_ref(X, Y):
            ref_op = core.CreateOperator(ref_op_name, ["X", "Y"], "out")
            workspace.FeedBlob("X", X)
            workspace.FeedBlob("Y", Y)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("out")

        expected_output = _lengths_ref(data, lengths)
        actual_output = torch_op(torch.tensor(data),
                                 torch.tensor(lengths, dtype=torch.int32))

        torch.testing.assert_allclose(expected_output, actual_output.cpu())

    def _test_lengths_sum_op(self, device):
        self._test_lengths_op("LengthsSum", torch.ops._caffe2.LengthsSum,
                              device)

    def test_lengths_sum_op(self):
        self._test_lengths_sum_op(device="cpu")

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_lengths_sum_op_cuda(self):
        self._test_lengths_sum_op(device="cuda")

    def _test_lengths_mean_op(self, device):
        self._test_lengths_op("LengthsMean", torch.ops._caffe2.LengthsMean,
                              device)

    def test_lengths_mean_op(self):
        self._test_lengths_mean_op(device="cpu")

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_lengths_mean_op_cuda(self):
        self._test_lengths_mean_op(device="cuda")

    def _test_lengths_max_op(self, device):
        self._test_lengths_op("LengthsMax", torch.ops._caffe2.LengthsMax,
                              device)

    def test_lengths_max_op(self):
        self._test_lengths_max_op(device="cpu")

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_lengths_max_op_cuda(self):
        self._test_lengths_max_op(device="cuda")

    def _test_resize_nearest_op(self, device):
        data = np.random.rand(1, 2, 3, 4).astype(np.float32)

        def _resize_nearest_ref(X):
            ref_op = core.CreateOperator(
                "ResizeNearest",
                ["X"],
                ["Y"],
                width_scale=2.0,
                height_scale=1.5,
                order="NCHW",
            )
            workspace.FeedBlob("X", X)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("Y")

        expected_output = _resize_nearest_ref(data)
        actual_output = torch.ops._caffe2.ResizeNearest(
            torch.tensor(data).to(device),
            order="NCHW",
            width_scale=2.0,
            height_scale=1.5,
        )

        torch.testing.assert_allclose(expected_output, actual_output.cpu())

    def test_resize_nearest_op_cpu(self):
        return self._test_resize_nearest_op("cpu")

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_resize_nearest_op_cuda(self):
        return self._test_resize_nearest_op("cuda")

    @given(input_data=hu.tensor(min_dim=2, max_dim=2))
    def test_Fused8BitRowwiseQuantizedToFloat(self, input_data):
        QuantizeOp = core.CreateOperator(
            "FloatToFused8BitRowwiseQuantized",
            ["input_data"],
            ["quantized_data"],
        )

        workspace.FeedBlob("input_data", input_data)
        workspace.RunOperatorOnce(QuantizeOp)

        quantized_data = workspace.FetchBlob("quantized_data")

        dequantized_data = torch.ops._caffe2.Fused8BitRowwiseQuantizedToFloat(
            torch.tensor(quantized_data))

        reference = fused_rowwise_8bit_quantize_dequantize_reference(
            input_data)
        np.testing.assert_array_almost_equal(dequantized_data.numpy(),
                                             reference)

    @given(binary_input=st.booleans())
    def test_piecewise_linear_op(self, binary_input):
        if binary_input:
            num_dims = 1
        else:
            num_dims = 3
        data = np.random.rand(1024, num_dims).astype(np.float32)
        slopes = np.zeros(4 * num_dims).astype(np.float32)
        bounds = np.sort(np.random.rand(5, num_dims).astype(np.float32),
                         axis=0).flatten('F')
        intercepts = np.random.rand(4 * num_dims).astype(np.float32)

        def _piecewise_linear_ref(X):
            ref_op = core.CreateOperator(
                "PiecewiseLinearTransform",
                ["data", "bounds", "slopes", "intercepts"],
                ["calibrated"],
                binary=binary_input,
            )
            workspace.FeedBlob("data", X)
            workspace.FeedBlob("bounds", bounds)
            workspace.FeedBlob("slopes", slopes)
            workspace.FeedBlob("intercepts", intercepts)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("calibrated")

        expected_output = _piecewise_linear_ref(data)
        actual_output = torch.ops._caffe2.PiecewiseLinearTransform(
            torch.tensor(data), bounds.tolist(), slopes.tolist(),
            intercepts.tolist(), binary_input)

        torch.testing.assert_allclose(torch.tensor(expected_output),
                                      actual_output)

    def test_alias_with_name_is_in_place(self):
        device = "cuda" if workspace.has_cuda_support else "cpu"
        x = torch.Tensor([3, 42]).to(device)
        y = torch.ops._caffe2.AliasWithName(x, "new_name")
        x[1] = 6
        torch.testing.assert_allclose(x, torch.Tensor([3, 6]).to(device))
        # y should also change because y is alias of x
        torch.testing.assert_allclose(y, torch.Tensor([3, 6]).to(device))

    @unittest.skipIf(not workspace.has_cuda_support, "No cuda support")
    def test_copy_between_cpu_and_gpu(self):
        x_cpu_ref = torch.Tensor([1, 2, 3])
        x_gpu_ref = x_cpu_ref.to("cuda")

        x_gpu = torch.ops._caffe2.CopyCPUToGPU(x_cpu_ref)
        torch.testing.assert_allclose(x_gpu, x_gpu_ref)
        x_cpu = torch.ops._caffe2.CopyGPUToCPU(x_gpu)
        torch.testing.assert_allclose(x_cpu, x_cpu_ref)

    def test_index_hash_op(self):
        data = np.random.randint(low=0, high=1000, size=(4, 4, 4))

        def _index_hash_ref(X):
            ref_op = core.CreateOperator("IndexHash", ["X"], ["Y"],
                                         seed=0,
                                         modulo=100)
            workspace.FeedBlob("X", X)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("Y")

        expected_output = _index_hash_ref(data)
        actual_output = torch.ops._caffe2.IndexHash(torch.tensor(data),
                                                    seed=0,
                                                    modulo=100)

        torch.testing.assert_allclose(expected_output, actual_output.cpu())

    def test_bucketize_op(self):
        data = np.random.rand(8, 10).astype(np.float32) * 1000
        boundaries = np.array([1, 10, 100, 1000, 100000]).astype(np.float32)

        def _bucketize_ref(X):
            ref_op = core.CreateOperator("Bucketize", ["X"], ["Y"],
                                         boundaries=boundaries)
            workspace.FeedBlob("X", X)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("Y")

        expected_output = _bucketize_ref(data)
        actual_output = torch.ops._caffe2.Bucketize(torch.tensor(data),
                                                    boundaries)
        torch.testing.assert_allclose(expected_output, actual_output.cpu())

    @given(
        X=hu.tensor(),
        eps=st.floats(min_value=1e-4, max_value=1e-2),
    )
    def test_logit(self, X, eps):
        def ref(X, eps):
            ref_op = core.CreateOperator('Logit', ["X"], ["Y"], eps=eps)
            workspace.FeedBlob("X", X)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("Y")

        expected_output = ref(X, eps)
        actual_output = torch.ops._caffe2.Logit(torch.tensor(X), eps)
        torch.testing.assert_allclose(expected_output, actual_output.cpu())

    def test_percentile(self):
        original_values = np.array([[3., 5., 3], [5., 1.,
                                                  6.]]).astype(np.float32)
        value_to_pct = np.array([[3, 0.2], [5, 0.5], [1, 0.3],
                                 [3, 0.6]]).astype(np.float32)
        lengths = np.array([2, 1, 1]).astype(np.int32)

        def _percentile_ref(original_values, value_to_pct, lengths):
            ref_op = core.CreateOperator(
                'Percentile', ["original_values", "value_to_pct", "lengths"],
                ["Y"])
            workspace.FeedBlob("original_values", original_values)
            workspace.FeedBlob("value_to_pct", value_to_pct)
            workspace.FeedBlob("lengths", lengths)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("Y")

        expected_output = _percentile_ref(original_values, value_to_pct,
                                          lengths)
        actual_output = torch.ops._caffe2.Percentile(
            torch.tensor(original_values), torch.Tensor(value_to_pct),
            torch.Tensor(lengths).int())
        torch.testing.assert_allclose(expected_output, actual_output.cpu())

    def test_batch_bucket_one_hot_op(self):
        data = np.array([[2, 3], [4, 1], [2, 5]]).astype(np.float32)
        lengths = np.array([2, 3]).astype(np.int32)
        boundaries = np.array([0.1, 2.5, 1, 3.1, 4.5]).astype(np.float32)

        def _batch_bucket_one_hot_ref(data, lengths, boundaries):
            ref_op = core.CreateOperator('BatchBucketOneHot',
                                         ["data", "lengths", "boundaries"],
                                         ["Y"])
            workspace.FeedBlob("data", data)
            workspace.FeedBlob("lengths", lengths)
            workspace.FeedBlob("boundaries", boundaries)
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("Y")

        expected_output = _batch_bucket_one_hot_ref(data, lengths, boundaries)
        actual_output = torch.ops._caffe2.BatchBucketOneHot(
            torch.tensor(data),
            torch.Tensor(lengths).int(), torch.Tensor(boundaries))
        torch.testing.assert_allclose(expected_output, actual_output.cpu())

    @given(lengths_0=st.integers(1, 10), lengths_1=st.integers(1, 10))
    @settings(deadline=1000)
    def test_merge_id_lists(self, lengths_0, lengths_1):
        def _merge_id_lists(lengths, values):
            ref_op = core.CreateOperator(
                'MergeIdLists',
                ["lengths_0", "values_0", "lengths_1", "values_1"],
                ["merged_lengths", "merged_values"])
            workspace.FeedBlob("lengths_0", lengths[0])
            workspace.FeedBlob("values_0", values[0])
            workspace.FeedBlob("lengths_1", lengths[1])
            workspace.FeedBlob("values_1", values[1])
            workspace.RunOperatorOnce(ref_op)
            return workspace.FetchBlob("merged_lengths"), workspace.FetchBlob(
                "merged_values")

        lengths = [
            np.array([lengths_0]).astype(np.int32),
            np.array([lengths_1]).astype(np.int32)
        ]
        values = [
            np.random.choice(np.arange(0, 10), size=lengths_0,
                             replace=False).astype(np.int32),
            np.random.choice(np.arange(10, 20), size=lengths_1,
                             replace=False).astype(np.int32)
        ]

        expected_merged_lengths, expected_merged_values = _merge_id_lists(
            lengths, values)
        output_merged_lengths, output_merged_values = torch.ops._caffe2.MergeIdLists(
            [
                torch.tensor(lengths[0]),
                torch.tensor(values[0]),
                torch.tensor(lengths[1]),
                torch.tensor(values[1])
            ])
        torch.testing.assert_allclose(expected_merged_lengths,
                                      output_merged_lengths)
        torch.testing.assert_allclose(expected_merged_values,
                                      output_merged_values)

    def test_learning_rate(self):
        base_lr = 0.05
        no_iter = torch.tensor([0])
        one_iter = torch.tensor([1])
        two_iter = torch.tensor([2])

        # Fixed policy
        self.assertEqual(
            base_lr,
            torch.ops._caffe2.LearningRate(iterations=no_iter,
                                           base_lr=base_lr,
                                           policy="fixed"),
        )
        self.assertEqual(
            base_lr,
            torch.ops._caffe2.LearningRate(iterations=one_iter,
                                           base_lr=base_lr,
                                           policy="fixed"),
        )

        # Step policy
        gamma = 0.99
        stepsize = 1

        self.assertEqual(
            base_lr,
            torch.ops._caffe2.LearningRate(
                iterations=no_iter,
                base_lr=base_lr,
                policy="step",
                stepsize=stepsize,
                gamma=gamma,
            ),
        )
        self.assertAlmostEqual(
            base_lr * (gamma**(1.0 / stepsize)),
            torch.ops._caffe2.LearningRate(
                iterations=one_iter,
                base_lr=base_lr,
                policy="step",
                stepsize=stepsize,
                gamma=gamma,
            ),
        )
        self.assertAlmostEqual(
            base_lr * (gamma**(2.0 / stepsize)),
            torch.ops._caffe2.LearningRate(
                iterations=two_iter,
                base_lr=base_lr,
                policy="step",
                stepsize=stepsize,
                gamma=gamma,
            ),
        )
Beispiel #12
0
class TestUtilityOps(hu.HypothesisTestCase):

    @given(X=hu.tensor(), neg=st.booleans(), **hu.gcs)
    def test_slice(self, X, neg, gc, dc):
        X = X.astype(dtype=np.float32)
        dim = random.randint(0, X.ndim - 1)
        slice_start = random.randint(0, X.shape[dim] - 1)
        slice_end = random.randint(slice_start, X.shape[dim] - 1)
        starts = np.array([0] * X.ndim).astype(np.int32)
        ends = np.array([-1] * X.ndim).astype(np.int32)
        starts[dim] = slice_start
        ends[dim] = slice_end

        op = core.CreateOperator(
            "Slice", ["X", "starts", "ends"], ["Y"], device_option=gc
        )

        def slice_ref(X, starts, ends):
            slc = [slice(None)] * X.ndim
            slc[dim] = slice(slice_start, slice_end)
            return [X[slc]]

        self.assertReferenceChecks(gc, op, [X, starts, ends], slice_ref)

        self.assertDeviceChecks(dc, op, [X, starts, ends], [0])

    @given(dtype=st.sampled_from([np.float32, np.int32, np.int64]),
           ndims=st.integers(min_value=1, max_value=5),
           seed=st.integers(min_value=0, max_value=65536),
           null_axes=st.booleans(),
           engine=st.sampled_from(['CUDNN', None]),
           **hu.gcs)
    def test_transpose(self, dtype, ndims, seed, null_axes, engine, gc, dc):
        dims = (np.random.rand(ndims) * 16 + 1).astype(np.int32)
        X = (np.random.rand(*dims) * 16).astype(dtype)

        if null_axes:
            axes = None
            op = core.CreateOperator(
                "Transpose",
                ["input"], ["output"],
                engine=engine)
        else:
            np.random.seed(int(seed))
            axes = [int(v) for v in list(np.random.permutation(X.ndim))]
            op = core.CreateOperator(
                "Transpose",
                ["input"], ["output"],
                axes=axes,
                engine=engine)

        def transpose_ref(x, axes):
            return (np.transpose(x, axes),)

        self.assertReferenceChecks(gc, op, [X, axes],
                                   transpose_ref)

    @given(m=st.integers(5, 10), n=st.integers(5, 10),
           o=st.integers(5, 10), nans=st.booleans(), **hu.gcs)
    def test_nan_check(self, m, n, o, nans, gc, dc):
        other = np.array([1, 2, 3]).astype(np.float32)
        X = np.random.rand(m, n, o).astype(np.float32)
        if nans:
            x_nan = np.random.randint(0, m)
            y_nan = np.random.randint(0, n)
            z_nan = np.random.randint(0, o)
            X[x_nan, y_nan, z_nan] = float('NaN')

        # print('nans: {}'.format(nans))
        # print(X)

        def nan_reference(X, Y):
            if not np.isnan(X).any():
                return [X]
            else:
                return [np.array([])]

        op = core.CreateOperator(
            "NanCheck",
            ["X", "other"],
            ["Y"]
        )

        try:
            self.assertReferenceChecks(
                device_option=gc,
                op=op,
                inputs=[X, other],
                reference=nan_reference,
            )
            if nans:
                self.assertTrue(False, "Did not fail when presented with NaN!")
        except RuntimeError:
            self.assertTrue(nans, "No NaNs but failed")

        try:
            self.assertGradientChecks(
                device_option=gc,
                op=op,
                inputs=[X],
                outputs_to_check=0,
                outputs_with_grads=[0],
            )
            if nans:
                self.assertTrue(False, "Did not fail when gradient had NaN!")
        except RuntimeError:
            pass

    @given(n=st.integers(4, 5), m=st.integers(6, 7),
           d=st.integers(2, 3), **hu.gcs)
    def test_elementwise_max(self, n, m, d, gc, dc):
        X = np.random.rand(n, m, d).astype(np.float32)
        Y = np.random.rand(n, m, d).astype(np.float32)
        Z = np.random.rand(n, m, d).astype(np.float32)

        def max_op(X, Y, Z):
            return [np.maximum(np.maximum(X, Y), Z)]

        op = core.CreateOperator(
            "Max",
            ["X", "Y", "Z"],
            ["mx"]
        )

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[X, Y, Z],
            reference=max_op,
        )

    @given(
        inputs=hu.lengths_tensor(max_value=30).flatmap(
            lambda pair: st.tuples(
                st.just(pair[0]),
                st.just(pair[1]),
                hu.dims(max_value=len(pair[1])),
            )
        ).flatmap(
            lambda tup: st.tuples(
                st.just(tup[0]),
                st.just(tup[1]),
                hu.arrays(
                    tup[2], dtype=np.int32,
                    elements=st.integers(
                        min_value=0, max_value=len(tup[1]) - 1)),
            )
        ),
        **hu.gcs_cpu_only)
    def test_lengths_gather(self, inputs, gc, dc):
        items = inputs[0]
        lengths = inputs[1]
        indices = inputs[2]

        def lengths_gather_op(items, lengths, indices):
            ends = np.cumsum(lengths)
            return [np.concatenate(
                list(items[ends[i] - lengths[i]:ends[i]] for i in indices))]

        op = core.CreateOperator(
            "LengthsGather",
            ["items", "lengths", "indices"],
            ["output"]
        )

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[items, lengths, indices],
            reference=lengths_gather_op,
        )

    @given(**hu.gcs)
    def test_size_op(self, gc, dc):
        X = np.array([[1, 2], [3, 4]]).astype(np.float32)

        def size_op(tensor):
            return [np.prod(tensor.shape)]

        op = core.CreateOperator(
            "Size",
            ["X"],
            ["output"]
        )

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[X],
            reference=size_op,
        )