예제 #1
0
    def test_spatialbn_train_mode(self, size, input_channels, batch_size, seed,
                                  order, epsilon, momentum, inplace, engine,
                                  gc, dc):
        # Currently HIP SpatialBN only supports NCHW
        if hiputl.run_in_hip(gc, dc):
            assume(order == "NCHW")

        op = core.CreateOperator(
            "SpatialBN",
            ["X", "scale", "bias", "running_mean", "running_var"],
            [
                "X" if inplace else "Y", "running_mean", "running_var",
                "saved_mean", "saved_var"
            ],
            order=order,
            is_test=False,
            epsilon=epsilon,
            momentum=momentum,
            engine=engine,
        )
        np.random.seed(1701)
        scale = np.random.rand(input_channels).astype(np.float32) + 0.5
        bias = np.random.rand(input_channels).astype(np.float32) - 0.5
        mean = np.random.randn(input_channels).astype(np.float32)
        var = np.random.rand(input_channels).astype(np.float32) + 0.5
        X = np.random.rand(batch_size, input_channels, size, size).astype(
            np.float32) - 0.5

        if order == "NHWC":
            X = X.swapaxes(1, 2).swapaxes(2, 3)

        self.assertDeviceChecks(dc, op, [X, scale, bias, mean, var],
                                [0, 1, 2, 3, 4])
예제 #2
0
    def test_pooling_3d(self, stride, pad, kernel, size, input_channels,
                        batch_size, order, op_type, engine, gc, dc):
        assume(pad < kernel)
        assume(size + pad + pad >= kernel)
        # Currently MIOpen Pooling only supports pooling with NCHW order.
        if hiputl.run_in_hip(gc, dc) and (workspace.GetHIPVersion() < 303
                                          or order == "NHWC"):
            assume(engine != "CUDNN")
        # some case here could be calculated with global pooling, but instead
        # calculated with general implementation, slower but should still
        # be correct.
        op = core.CreateOperator(
            op_type,
            ["X"],
            ["Y"],
            strides=[stride] * 3,
            kernels=[kernel] * 3,
            pads=[pad] * 6,
            order=order,
            engine=engine,
        )
        X = np.random.rand(batch_size, size, size, size,
                           input_channels).astype(np.float32)
        if order == "NCHW":
            X = utils.NHWC2NCHW(X)

        self.assertDeviceChecks(dc, op, [X], [0], threshold=0.001)
        if 'MaxPool' not in op_type:
            self.assertGradientChecks(gc, op, [X], 0, [0], threshold=0.001)
예제 #3
0
    def test_spatialbn_train_mode_gradient_check_1d(self, size, input_channels,
                                                    batch_size, seed, order,
                                                    epsilon, momentum, engine,
                                                    gc, dc):
        # Currently MIOPEN SpatialBN only supports 2D
        if hiputl.run_in_hip(gc, dc):
            assume(engine != "CUDNN")
        op = core.CreateOperator(
            "SpatialBN",
            ["X", "scale", "bias", "mean", "var"],
            ["Y", "mean", "var", "saved_mean", "saved_var"],
            order=order,
            is_test=False,
            epsilon=epsilon,
            momentum=momentum,
            engine=engine,
        )
        np.random.seed(seed)
        scale = np.random.rand(input_channels).astype(np.float32) + 0.5
        bias = np.random.rand(input_channels).astype(np.float32) - 0.5
        mean = np.random.randn(input_channels).astype(np.float32)
        var = np.random.rand(input_channels).astype(np.float32) + 0.5
        X = np.random.rand(batch_size, input_channels, size).astype(
            np.float32) - 0.5
        if order == "NHWC":
            X = X.swapaxes(1, 2)

        for input_to_check in [0, 1, 2]:  # dX, dScale, dBias
            self.assertGradientChecks(gc,
                                      op, [X, scale, bias, mean, var],
                                      input_to_check, [0],
                                      stepsize=0.01)
예제 #4
0
    def test_pooling(self, stride, pad, kernel, size,
                     input_channels, batch_size,
                     order, op_type, engine, gc, dc):
        assume(pad < kernel)
        if hiputl.run_in_hip(gc, dc) and engine == "CUDNN":
            assume(order == "NCHW" and op_type != "LpPool")

        op = core.CreateOperator(
            op_type,
            ["X"],
            ["Y"],
            stride=stride,
            kernel=kernel,
            pad=pad,
            order=order,
            engine=engine,
        )
        X = np.random.rand(
            batch_size, size, size, input_channels).astype(np.float32)
        if order == "NCHW":
            X = X.transpose((0, 3, 1, 2))

        self.assertDeviceChecks(dc, op, [X], [0])
        if 'MaxPool' not in op_type:
            self.assertGradientChecks(gc, op, [X], 0, [0])
예제 #5
0
 def test_convolution_transpose_layout(
         self, stride, pad, kernel, adj,
         size, input_channels,
         output_channels, batch_size,
         engine, shared_buffer, use_bias, gc, dc):
     assume(adj < stride)
     X = np.random.rand(
         batch_size, size, size, input_channels).astype(np.float32) - 0.5
     w = np.random.rand(
         input_channels, kernel, kernel, output_channels)\
         .astype(np.float32) - 0.5
     b = np.random.rand(output_channels).astype(np.float32) - 0.5
     outputs = {}
     for order in ["NCHW", "NHWC"]:
         if hiputl.run_in_hip(gc, dc) and order == "NHWC":
             # MIOPEN doesn't work with NHWC, fallback to use normal hip
             tmp_engine = ""
         else:
             tmp_engine = engine
         op = core.CreateOperator(
             "ConvTranspose",
             ["X", "w", "b"] if use_bias else ["X", "w"],
             ["Y"],
             strides=[stride] * 2,
             kernels=[kernel] * 2,
             pads=[pad] * 4,
             adjs=[adj] * 2,
             order=order,
             engine=tmp_engine,
             shared_buffer=int(shared_buffer),
             device_option=gc,
         )
         if order == "NCHW":
             X_f = utils.NHWC2NCHW(X)
             w_f = utils.NHWC2NCHW(w)
         else:
             X_f = X
             w_f = w
         self.assertDeviceChecks(
             dc,
             op,
             [X_f, w_f, b] if use_bias else [X_f, w_f],
             [0])
         self.ws.create_blob("X").feed(X_f, device_option=gc)
         self.ws.create_blob("w").feed(w_f, device_option=gc)
         self.ws.create_blob("b").feed(b, device_option=gc)
         self.ws.run(op)
         outputs[order] = self.ws.blobs["Y"].fetch()
     output_size = (size - 1) * stride + kernel + adj - 2 * pad
     self.assertEqual(
         outputs["NCHW"].shape,
         (batch_size, output_channels, output_size, output_size))
     np.testing.assert_allclose(
         outputs["NCHW"],
         utils.NHWC2NCHW(outputs["NHWC"]),
         atol=1e-4,
         rtol=1e-4)
예제 #6
0
 def test_convolution_transpose_layout_legacy_args(
         self, stride, pad, kernel, adj,
         size, input_channels,
         output_channels, batch_size,
         engine, shared_buffer, use_bias, gc, dc):
     assume(adj < stride)
     X = np.random.rand(
         batch_size, size, size, input_channels).astype(np.float32) - 0.5
     w = np.random.rand(
         input_channels, kernel, kernel, output_channels)\
         .astype(np.float32) - 0.5
     b = np.random.rand(output_channels).astype(np.float32) - 0.5
     outputs = {}
     for order in ["NCHW", "NHWC"]:
         # NHWC not supported in MIOpen, run HIP
         if hiputl.run_in_hip(gc, dc) and engine == "CUDNN":
             tmp_engine = "CUDNN" if order == "NCHW" else ""
         else:
             tmp_engine = engine
         op = core.CreateOperator(
             "ConvTranspose",
             ["X", "w", "b"] if use_bias else ["X", "w"],
             ["Y"],
             stride=stride,
             kernel=kernel,
             pad=pad,
             adj=adj,
             order=order,
             engine=tmp_engine,
             shared_buffer=int(shared_buffer),
             device_option=gc,
         )
         if order == "NCHW":
             X_f = X.transpose((0, 3, 1, 2))
             w_f = w.transpose((0, 3, 1, 2))
         else:
             X_f = X
             w_f = w
         self.assertDeviceChecks(
             dc,
             op,
             [X_f, w_f, b] if use_bias else [X_f, w_f],
             [0])
         self.ws.create_blob("X").feed(X_f, device_option=gc)
         self.ws.create_blob("w").feed(w_f, device_option=gc)
         self.ws.create_blob("b").feed(b, device_option=gc)
         self.ws.run(op)
         outputs[order] = self.ws.blobs["Y"].fetch()
     output_size = (size - 1) * stride + kernel + adj - 2 * pad
     self.assertEqual(
         outputs["NCHW"].shape,
         (batch_size, output_channels, output_size, output_size))
     np.testing.assert_allclose(
         outputs["NCHW"],
         outputs["NHWC"].transpose((0, 3, 1, 2)),
         atol=1e-4,
         rtol=1e-4)
    def test_sparse_lengths_weightedsum_cpu(
        self, batchsize, fptype, fp16asint, blocksize, empty_indices, gc, dc
    ):
        if fptype != np.float32:
            assume(gc.device_type == caffe2_pb2.CPU)
            assume(not hiputl.run_in_hip(gc, dc))
            assume(caffe2_pb2.CUDA not in {d.device_type for d in dc})

        print("<test_sparse_lengths_weightedsum_cpu>")

        tblsize = 300
        if fptype == np.float32:
            Tbl = np.random.rand(tblsize, blocksize).astype(np.float32)
            atol = 1e-5
        else:
            if fp16asint:
                Tbl = (
                    (10.0 * np.random.rand(tblsize, blocksize))
                    .round()
                    .astype(np.float16)
                )
                atol = 1e-3
            else:
                Tbl = np.random.rand(tblsize, blocksize).astype(np.float16)
                atol = 1e-1

        # array of each row length
        if empty_indices:
            Lengths = np.zeros(batchsize, dtype=np.int32)
        else:
            Lengths = np.random.randint(1, 30, size=batchsize, dtype=np.int32)
        # flat indices
        Indices = np.random.randint(0, tblsize, size=sum(Lengths), dtype=np.int64)
        Weights = np.random.rand(sum(Lengths)).astype(np.float32)

        op = core.CreateOperator(
            "SparseLengthsWeightedSum", ["Tbl", "Weights", "Indices", "Lengths"], "out"
        )

        def sparse_lengths_weightedsum_ref(Tbl, Weights, Indices, Lengths):
            rptr = np.cumsum(np.insert(Lengths, [0], [0]))
            out = np.zeros((len(Lengths), blocksize))
            for i in range(0, len(rptr[0:-1])):
                w = Weights[rptr[i] : rptr[i + 1]]
                out[i] = (Tbl[Indices[rptr[i] : rptr[i + 1]]] * w[:, np.newaxis]).sum(
                    axis=0
                )
            return [out.astype(np.float32)]

        self.assertReferenceChecks(
            gc,
            op,
            [Tbl, Weights, Indices, Lengths],
            sparse_lengths_weightedsum_ref,
            threshold=1e-3,
            atol=atol,
        )
예제 #8
0
    def test_1d_convolution_nchw(self, input_channels, output_channels,
                                 batch_size, stride, size, kernel, dilation,
                                 pad, use_bias, engine, force_algo_fwd,
                                 force_algo_dgrad, force_algo_wgrad, gc, dc):
        if hiputl.run_in_hip(gc, dc):
            # currently miopen only supports 2d conv
            assume(engine != 'CUDNN')  # CUDNN is aliased to MIOPEN for HIP

        self._nd_convolution_nchw(1, input_channels, output_channels,
                                  batch_size, stride, size, kernel, dilation,
                                  pad, use_bias, engine, force_algo_fwd,
                                  force_algo_dgrad, force_algo_wgrad, gc, dc)
예제 #9
0
    def test_max_pool_grad(self, op_type, dim, N, C, D, H, W, kernel, stride,
                           pad, order, engine, gc, dc):
        assume(pad < kernel)
        assume(dim > 1 or engine == "")
        if hiputl.run_in_hip(gc, dc):
            if dim != 2:
                assume(engine != "CUDNN")
            elif engine == "CUDNN":
                assume(order == "NCHW")

        if op_type.endswith("ND"):
            op_type = op_type.replace("N", str(dim))

        op = core.CreateOperator(
            op_type,
            ["X"],
            ["Y"],
            kernels=[kernel] * dim,
            strides=[stride] * dim,
            pads=[pad] * dim * 2,
            order=order,
            engine=engine,
        )

        if dim == 1:
            size = W
            dims = [N, C, W]
            axes = [0, 2, 1]
        elif dim == 2:
            size = H * W
            dims = [N, C, H, W]
            axes = [0, 2, 3, 1]
        else:
            size = D * H * W
            dims = [N, C, D, H, W]
            axes = [0, 2, 3, 4, 1]

        X = np.zeros((N * C, size)).astype(np.float32)
        for i in range(N * C):
            X[i, :] = np.arange(size, dtype=np.float32) / size
            np.random.shuffle(X[i, :])
        X = X.reshape(dims)
        if order == "NHWC":
            X = np.transpose(X, axes)

        self.assertDeviceChecks(dc, op, [X], [0])
        self.assertGradientChecks(gc,
                                  op, [X],
                                  0, [0],
                                  threshold=0.05,
                                  stepsize=0.005)
예제 #10
0
    def test_group_convolution(self, stride, pad, kernel, size, group,
                               input_channels_per_group,
                               output_channels_per_group, batch_size, order,
                               engine, use_bias, gc, dc):
        assume(size >= kernel)

        if hiputl.run_in_hip(gc, dc):
            if order == "NHWC":
                assume(group == 1 and engine != "CUDNN")
        else:
            # TODO: Group conv in NHWC not implemented for GPU yet.
            assume(group == 1 or order == "NCHW"
                   or gc.device_type == caffe2_pb2.CPU)

            if group != 1 and order == "NHWC":
                dc = [d for d in dc if d.device_type == caffe2_pb2.CPU]

        # Group conv not implemented with EIGEN engine.
        assume(group == 1 or engine != "EIGEN")

        input_channels = input_channels_per_group * group
        output_channels = output_channels_per_group * group

        op = core.CreateOperator(
            "Conv",
            ["X", "w", "b"] if use_bias else ["X", "w"],
            ["Y"],
            stride=stride,
            kernel=kernel,
            pad=pad,
            order=order,
            engine=engine,
            group=group,
        )
        X = np.random.rand(batch_size, size, size, input_channels).astype(
            np.float32) - 0.5
        w = np.random.rand(
            output_channels, kernel, kernel,
            input_channels_per_group).astype(np.float32)\
            - 0.5
        b = np.random.rand(output_channels).astype(np.float32) - 0.5
        if order == "NCHW":
            X = utils.NHWC2NCHW(X)
            w = utils.NHWC2NCHW(w)

        inputs = [X, w, b] if use_bias else [X, w]

        self.assertDeviceChecks(dc, op, inputs, [0])
        for i in range(len(inputs)):
            self.assertGradientChecks(gc, op, inputs, i, [0])
예제 #11
0
    def test_1d_convolution(
        self,
        input_channels,
        output_channels,
        batch_size,
        stride,
        size,
        kernel,
        dilation,
        pad,
        group,
        order,
        use_bias,
        engine,
        force_algo_fwd,
        force_algo_dgrad,
        force_algo_wgrad,
        gc,
        dc,
    ):
        if hiputl.run_in_hip(gc, dc):
            # currently miopen only supports 2d conv
            assume(engine != "CUDNN")  # CUDNN is aliased to MIOPEN for HIP
        # TODO: 1D conv in NHWC not implemented for GPU yet.
        assume(order == "NCHW" or gc.device_type == caffe2_pb2.CPU)
        if order == "NHWC":
            dc = [d for d in dc if d.device_type == caffe2_pb2.CPU]

        self._nd_convolution(
            1,
            input_channels,
            output_channels,
            batch_size,
            stride,
            size,
            kernel,
            dilation,
            pad,
            group,
            order,
            use_bias,
            engine,
            force_algo_fwd,
            force_algo_dgrad,
            force_algo_wgrad,
            gc,
            dc,
        )
예제 #12
0
    def test_convolution_transpose_with_group(self, stride, pad, kernel, adj,
                                              size, input_channels,
                                              output_channels, batch_size,
                                              group, order, engine,
                                              shared_buffer, use_bias, gc, dc):
        assume(adj < stride)
        # TODO: Group conv_transpose in NHWC not implemented for GPU yet.
        assume(group == 1 or order == "NCHW"
               or gc.device_type == caffe2_pb2.CPU)
        if group != 1 and order == "NHWC":
            dc = [d for d in dc if d.device_type == caffe2_pb2.CPU]

        if hiputl.run_in_hip(gc, dc) and order == "NHWC":
            engine = ""

        op = core.CreateOperator(
            "ConvTranspose",
            ["X", "w", "b"] if use_bias else ["X", "w"],
            ["Y"],
            stride=stride,
            kernel=kernel,
            pad=pad,
            adj=adj,
            group=group,
            order=order,
            engine=engine,
            shared_buffer=int(shared_buffer),
            device_option=gc,
        )

        input_channels *= group
        output_channels *= group

        X = np.random.rand(batch_size, size, size, input_channels).astype(
            np.float32) - 0.5
        w = np.random.rand(
            input_channels, kernel, kernel, int(output_channels / group)) \
            .astype(np.float32) - 0.5
        b = np.random.rand(output_channels).astype(np.float32) - 0.5
        if order == "NCHW":
            X = utils.NHWC2NCHW(X)
            w = utils.NHWC2NCHW(w)

        inputs = [X, w, b] if use_bias else [X, w]
        self.assertDeviceChecks(dc, op, inputs, [0])
        for i in range(len(inputs)):
            self.assertGradientChecks(gc, op, inputs, i, [0])
예제 #13
0
    def test_convolution_transpose_gradients(self, stride, pad, kernel, adj,
                                             size, input_channels,
                                             output_channels, batch_size,
                                             order, engine, use_bias,
                                             compute_dX, gc, dc):
        assume(adj < stride)
        if hiputl.run_in_hip(gc, dc) and engine == "CUDNN":
            assume(order == "NCHW")
        X = np.random.rand(batch_size, size, size, input_channels).astype(
            np.float32) - 0.5
        w = np.random.rand(
            input_channels, kernel, kernel, output_channels)\
            .astype(np.float32) - 0.5
        b = np.random.rand(output_channels).astype(np.float32) - 0.5
        op = core.CreateOperator(
            "ConvTranspose",
            ["X", "w", "b"] if use_bias else ["X", "w"],
            ["Y"],
            stride=stride,
            kernel=kernel,
            pad=pad,
            adj=adj,
            order=order,
            engine=engine,
            no_gradient_to_input=not compute_dX,
        )
        if order == "NCHW":
            X = utils.NHWC2NCHW(X)
            w = utils.NHWC2NCHW(w)

        inputs = [X, w, b] if use_bias else [X, w]
        self.assertDeviceChecks(dc, op, inputs, [0])

        if use_bias and compute_dX:
            # w, b, X
            outputs_to_check = [1, 2, 0]
        elif use_bias:
            # w, b
            outputs_to_check = [1, 2]
        elif compute_dX:
            # w, X
            outputs_to_check = [1, 0]
        else:
            # w
            outputs_to_check = [1]
        for i in outputs_to_check:
            self.assertGradientChecks(gc, op, inputs, i, [0])
예제 #14
0
    def test_avg_pool_count_include_pad(self, op_type, dim, N, C, D, H, W,
                                        kernel, stride, pad, count_include_pad,
                                        order, engine, gc, dc):
        assume(pad < kernel)
        if hiputl.run_in_hip(gc, dc):
            if dim != 2:
                assume(engine != "CUDNN")
            elif engine == "CUDNN":
                assume(order == "NCHW")

        if op_type.endswith("ND"):
            op_type = op_type.replace("N", str(dim))

        op = core.CreateOperator(
            op_type,
            ["X"],
            ["Y"],
            kernels=[kernel] * dim,
            strides=[stride] * dim,
            pads=[pad] * dim * 2,
            count_include_pad=count_include_pad,
            order=order,
            engine=engine,
        )

        if dim == 1:
            dims = [N, C, W]
            axes = [0, 2, 1]
        elif dim == 2:
            dims = [N, C, H, W]
            axes = [0, 2, 3, 1]
        else:
            dims = [N, C, D, H, W]
            axes = [0, 2, 3, 4, 1]
        X = np.random.randn(*dims).astype(np.float32)
        if order == "NHWC":
            X = np.transpose(X, axes)

        self.assertDeviceChecks(dc, op, [X], [0])
        self.assertGradientChecks(gc, op, [X], 0, [0])
예제 #15
0
    def test_spatialbn_test_mode_3d(self, size, input_channels, batch_size,
                                    seed, order, epsilon, inplace, engine, gc,
                                    dc):
        # Currently MIOPEN SpatialBN only supports 2D
        if hiputl.run_in_hip(gc, dc):
            assume(engine != "CUDNN")
        op = core.CreateOperator(
            "SpatialBN",
            ["X", "scale", "bias", "mean", "var"],
            ["X" if inplace else "Y"],
            order=order,
            is_test=True,
            epsilon=epsilon,
            engine=engine,
        )

        def reference_spatialbn_test(X, scale, bias, mean, var):
            if order == "NCHW":
                scale = scale[np.newaxis, :, np.newaxis, np.newaxis,
                              np.newaxis]
                bias = bias[np.newaxis, :, np.newaxis, np.newaxis, np.newaxis]
                mean = mean[np.newaxis, :, np.newaxis, np.newaxis, np.newaxis]
                var = var[np.newaxis, :, np.newaxis, np.newaxis, np.newaxis]

            return ((X - mean) / np.sqrt(var + epsilon) * scale + bias, )

        np.random.seed(1701)
        scale = np.random.rand(input_channels).astype(np.float32) + 0.5
        bias = np.random.rand(input_channels).astype(np.float32) - 0.5
        mean = np.random.randn(input_channels).astype(np.float32)
        var = np.random.rand(input_channels).astype(np.float32) + 0.5
        X = np.random.rand(batch_size, input_channels, size, size, size)\
            .astype(np.float32) - 0.5

        if order == "NHWC":
            X = utils.NCHW2NHWC(X)
        self.assertReferenceChecks(gc, op, [X, scale, bias, mean, var],
                                   reference_spatialbn_test)
        self.assertDeviceChecks(dc, op, [X, scale, bias, mean, var], [0])
예제 #16
0
    def test_global_pooling(self, size, input_channels, batch_size, order,
                            op_type, engine, gc, dc):
        # CuDNN 5 does not support deterministic max pooling.
        assume(workspace.GetCuDNNVersion() >= 6000 or op_type != "MaxPool")

        if hiputl.run_in_hip(gc, dc) and engine == "CUDNN":
            assume(order == "NCHW" and op_type != "LpPool")
        op = core.CreateOperator(
            op_type,
            ["X"],
            ["Y"],
            order=order,
            engine=engine,
            global_pooling=True,
        )
        X = np.random.rand(batch_size, size, size,
                           input_channels).astype(np.float32)
        if order == "NCHW":
            X = utils.NHWC2NCHW(X)

        self.assertDeviceChecks(dc, op, [X], [0])
        if 'MaxPool' not in op_type:
            self.assertGradientChecks(gc, op, [X], 0, [0])
예제 #17
0
    def test_global_pooling_3d(self, kernel, size, input_channels, batch_size,
                               order, op_type, engine, gc, dc):
        # Currently MIOpen Pooling only supports 2d pooling
        if hiputl.run_in_hip(gc, dc):
            assume(engine != "CUDNN")
        # pad and stride ignored because they will be infered in global_pooling
        op = core.CreateOperator(
            op_type,
            ["X"],
            ["Y"],
            kernels=[kernel] * 3,
            order=order,
            global_pooling=True,
            engine=engine,
        )
        X = np.random.rand(batch_size, size, size, size,
                           input_channels).astype(np.float32)
        if order == "NCHW":
            X = utils.NHWC2NCHW(X)

        self.assertDeviceChecks(dc, op, [X], [0], threshold=0.001)
        if 'MaxPool' not in op_type:
            self.assertGradientChecks(gc, op, [X], 0, [0], threshold=0.001)
예제 #18
0
    def test_1x1_conv(self, op_type, N, G, DX, DY, H, W, use_bias, order,
                      force_algo_fwd, force_algo_dgrad, force_algo_wgrad, gc,
                      dc):
        if hiputl.run_in_hip(gc, dc):
            assume(order == "NCHW")
        if order == "NHWC":
            G = 1

        C = G * DX
        M = G * DY

        op = core.CreateOperator(
            op_type,
            ["X", "filter", "bias"] if use_bias else ["X", "filter"],
            ["Y"],
            stride_h=1,
            stride_w=1,
            pad_t=0,
            pad_l=0,
            pad_b=0,
            pad_r=0,
            kernel=1,
            order=order,
            group=G,
            force_algo_fwd=force_algo_fwd,
            force_algo_dgrad=force_algo_dgrad,
            force_algo_wgrad=force_algo_wgrad,
        )

        if order == "NCHW":
            X = np.random.randn(N, C, H, W).astype(np.float32)
            filter = np.random.randn(M, DX, 1, 1).astype(np.float32)
        else:
            X = np.random.randn(N, H, W, C).astype(np.float32)
            filter = np.random.randn(M, 1, 1, DX).astype(np.float32)
        bias = np.random.randn(M).astype(np.float32)
        inputs = [X, filter, bias] if use_bias else [X, filter]

        def conv_1x1_nchw_ref(X, filter, bias=None):
            X = X.reshape(N, G, DX, -1)
            filter = filter.reshape(G, DY, DX)
            Y = np.zeros(shape=(N, G, DY, H * W), dtype=np.float32)
            for i in range(N):
                for j in range(G):
                    Y[i, j, :, :] = np.dot(filter[j, :, :], X[i, j, :, :])
            Y = Y.reshape(N, M, H, W)
            if bias is not None:
                bias = bias.reshape(1, M, 1, 1)
                Y = np.add(Y, bias)
            return [Y]

        def conv_1x1_nhwc_ref(X, filter, bias=None):
            X = X.reshape(N, -1, G, DX)
            filter = filter.reshape(G, DY, DX)
            Y = np.zeros(shape=(N, H * W, G, DY), dtype=np.float32)
            for i in range(N):
                for j in range(G):
                    Y[i, :, j, :] = np.dot(X[i, :, j, :],
                                           filter[j, :, :].transpose())
            Y = Y.reshape(N, H, W, M)
            if bias is not None:
                bias = bias.reshape(1, 1, 1, M)
                Y = np.add(Y, bias)
            return [Y]

        if order == "NCHW":
            conv_1x1_ref = conv_1x1_nchw_ref
        else:
            conv_1x1_ref = conv_1x1_nhwc_ref
        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=inputs,
            reference=conv_1x1_ref,
        )
        self.assertDeviceChecks(dc, op, inputs, [0])
        for i in range(len(inputs)):
            self.assertGradientChecks(gc, op, inputs, i, [0])
예제 #19
0
    def test_convolution_layout(self, op_type, stride, pad, kernel, dilation,
                                size, input_channels, output_channels,
                                batch_size, use_bias, gc, dc):
        assume(size >= dilation * (kernel - 1) + 1)

        X = np.random.rand(batch_size, size, size, input_channels).astype(
            np.float32) - 0.5
        w = np.random.rand(output_channels, kernel, kernel,
                           input_channels).astype(np.float32) - 0.5
        b = np.random.rand(output_channels).astype(np.float32) - 0.5
        Output = collections.namedtuple("Output", ["Y", "engine", "order"])
        outputs = []

        for order in ["NCHW", "NHWC"]:
            engine_list = ['']
            if hiputl.run_in_hip(gc, dc):
                if order == 'NCHW':
                    engine_list.append('MIOPEN')
            else:
                if _cudnn_supports(dilation=(dilation > 1),
                                   nhwc=(order == 'NHWC')):
                    engine_list.append('CUDNN')

            for engine in engine_list:
                op = core.CreateOperator(
                    op_type,
                    ["X", "w", "b"] if use_bias else ["X", "w"],
                    ["Y"],
                    stride=stride,
                    kernel=kernel,
                    dilation=dilation,
                    pad=pad,
                    order=order,
                    engine=engine,
                    device_option=gc,
                    exhaustive_search=True,
                )
                if order == "NCHW":
                    X_f = X.transpose((0, 3, 1, 2))
                    w_f = w.transpose((0, 3, 1, 2))
                else:
                    X_f = X
                    w_f = w
                self.assertDeviceChecks(
                    dc, op, [X_f, w_f, b] if use_bias else [X_f, w_f], [0])
                self.ws.create_blob("X").feed(X_f, device_option=gc)
                self.ws.create_blob("w").feed(w_f, device_option=gc)
                self.ws.create_blob("b").feed(b, device_option=gc)
                self.ws.run(op)
                outputs.append(
                    Output(Y=self.ws.blobs["Y"].fetch(),
                           engine=engine,
                           order=order))

        def canonical(o):
            if o.order == "NHWC":
                return o.Y.transpose((0, 3, 1, 2))
            else:
                return o.Y

        for o in outputs:
            np.testing.assert_allclose(canonical(outputs[0]),
                                       canonical(o),
                                       atol=1e-4,
                                       rtol=1e-4)
예제 #20
0
    def test_convolution_gradients(self, op_type, stride, pad, kernel,
                                   dilation, size, input_channels,
                                   output_channels, batch_size, group, order,
                                   engine, use_bias, force_algo_fwd,
                                   force_algo_dgrad, force_algo_wgrad, gc, dc):
        if order == "NHWC" or engine == "MKLDNN":
            group = 1

        input_channels *= group
        output_channels *= group
        dkernel = dilation * (kernel - 1) + 1

        if engine == 'CUDNN':
            if hiputl.run_in_hip(gc, dc):
                assume((order == "NCHW") and not (dilation > 1 and group > 1))
            else:
                assume(
                    _cudnn_supports(dilation=(dilation > 1),
                                    nhwc=(order == 'NHWC'),
                                    backward=True))

        assume(engine != "MKLDNN" or use_bias is True)

        op = core.CreateOperator(
            op_type,
            ["X", "w", "b"] if use_bias else ["X", "w"],
            ["Y"],
            stride=stride,
            kernel=kernel,
            dilation=dilation,
            pad=pad,
            group=group,
            order=order,
            engine=engine,
            force_algo_fwd=force_algo_fwd,
            force_algo_dgrad=force_algo_dgrad,
            force_algo_wgrad=force_algo_wgrad,
        )
        X = np.random.rand(batch_size, size, size, input_channels).astype(
            np.float32) - 0.5
        w = np.random.rand(output_channels, kernel, kernel,
                           int(input_channels / group)).astype(
                               np.float32) - 0.5
        b = np.random.rand(output_channels).astype(np.float32) - 0.5
        if order == "NCHW":
            X = X.transpose((0, 3, 1, 2))
            w = w.transpose((0, 3, 1, 2))

        inputs = [X, w, b] if use_bias else [X, w]
        # Error handling path.
        if size + pad + pad < dkernel or size + pad + pad < dkernel:
            with self.assertRaises(RuntimeError):
                self.assertDeviceChecks(dc, op, inputs, [0])
            return

        try:
            self.assertDeviceChecks(dc, op, inputs, [0])
        except RuntimeError as e:
            es = str(e)
            # CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM should always have
            # implementation
            if "status == CUDNN_STATUS_SUCCESS" not in es \
               or "CUDNN_STATUS_NOT_SUPPORTED" not in es \
               or force_algo_fwd == 0:
                raise e

        for i in range(len(inputs)):
            try:
                self.assertGradientChecks(gc, op, inputs, i, [0])
            except RuntimeError as e:
                es = str(e)
                if "status == CUDNN_STATUS_SUCCESS" not in es \
                   or "CUDNN_STATUS_NOT_SUPPORTED" not in es:
                    raise e
예제 #21
0
    def test_spatialbn_train_mode(self, size, input_channels, batch_size, seed,
                                  order, epsilon, momentum, inplace, engine,
                                  gc, dc):
        # Currently HIP SpatialBN only supports NCHW
        if hiputl.run_in_hip(gc, dc):
            assume(order == "NCHW")

        assume(batch_size == 0 or batch_size * size * size > 1)

        op = core.CreateOperator(
            "SpatialBN",
            ["X", "scale", "bias", "running_mean", "running_var"],
            [
                "X" if inplace else "Y", "running_mean", "running_var",
                "saved_mean", "saved_var"
            ],
            order=order,
            is_test=False,
            epsilon=epsilon,
            momentum=momentum,
            engine=engine,
        )
        np.random.seed(1701)
        scale = np.random.randn(input_channels).astype(np.float32)
        bias = np.random.rand(input_channels).astype(np.float32) - 0.5
        mean = np.random.randn(input_channels).astype(np.float32)
        var = np.random.rand(input_channels).astype(np.float32) + 0.5
        X = np.random.randn(batch_size, input_channels, size,
                            size).astype(np.float32)

        if order == "NHWC":
            X = np.transpose(X, (0, 2, 3, 1))

        def batch_norm_ref(X, scale, bias, running_mean, running_var):
            if batch_size == 0:
                Y = np.zeros(X.shape)
                saved_mean = np.zeros(running_mean.shape)
                saved_var = np.zeros(running_var.shape)
                return (Y, running_mean, running_var, saved_mean, saved_var)

            if order == "NHWC":
                X = np.transpose(X, (0, 3, 1, 2))

            C = X.shape[1]
            reduce_size = batch_size * size * size
            saved_mean = np.mean(X, (0, 2, 3))
            saved_var = np.var(X, (0, 2, 3))
            running_mean = momentum * running_mean + (1.0 -
                                                      momentum) * saved_mean
            running_var = momentum * running_var + (1.0 - momentum) * (
                reduce_size / (reduce_size - 1)) * saved_var
            std = np.sqrt(saved_var + epsilon)
            broadcast_shape = (1, C, 1, 1)
            Y = (X - np.reshape(saved_mean, broadcast_shape)) / np.reshape(
                std, broadcast_shape) * np.reshape(
                    scale, broadcast_shape) + np.reshape(
                        bias, broadcast_shape)

            if order == "NHWC":
                Y = np.transpose(Y, (0, 2, 3, 1))

            return (Y, running_mean, running_var, saved_mean, 1.0 / std)

        self.assertReferenceChecks(gc, op, [X, scale, bias, mean, var],
                                   batch_norm_ref)
        self.assertDeviceChecks(dc, op, [X, scale, bias, mean, var],
                                [0, 1, 2, 3, 4])
    def test_sparse_lengths_sum_cpu(
        self,
        batchsize,
        fptype,
        fp16asint,
        blocksize,
        normalize_by_lengths,
        empty_indices,
        gc,
        dc,
    ):
        if fptype != np.float32:
            assume(gc.device_type == caffe2_pb2.CPU)
            assume(not hiputl.run_in_hip(gc, dc))
            assume(caffe2_pb2.CUDA not in {d.device_type for d in dc})

        if normalize_by_lengths:
            print("<test_sparse_lengths_sum_mean_cpu>")
        else:
            print("<test_sparse_lengths_sum_cpu>")

        tblsize = 300
        if fptype == np.float32:
            Tbl = np.random.rand(tblsize, blocksize).astype(np.float32)
            atol = 1e-5
        else:
            if fp16asint:
                Tbl = ((10.0 *
                        np.random.rand(tblsize, blocksize)).round().astype(
                            np.float16))
                atol = 1e-3
            else:
                Tbl = np.random.rand(tblsize, blocksize).astype(np.float16)
                atol = 1e-1

        # array of each row length
        if empty_indices:
            Lengths = np.zeros(batchsize, dtype=np.int32)
        else:
            Lengths = np.random.randint(1, 30, size=batchsize, dtype=np.int32)
        # flat indices
        Indices = np.random.randint(0,
                                    tblsize,
                                    size=sum(Lengths),
                                    dtype=np.int64)

        op = core.CreateOperator(
            "SparseLengths" + ("Mean" if normalize_by_lengths else "Sum"),
            ["Tbl", "Indices", "Lengths"],
            "out",
        )

        def sparse_lengths_sum_ref(Tbl, Indices, Lengths):
            rptr = np.cumsum(np.insert(Lengths, [0], [0]))
            out = np.zeros((len(Lengths), blocksize))
            if normalize_by_lengths:
                for i in range(0, len(rptr[0:-1])):
                    if Lengths[i] != 0:
                        out[i] = (
                            Tbl[Indices[rptr[i]:rptr[i + 1]]].sum(axis=0) *
                            1.0 / float(Lengths[i]))
            else:
                for i in range(0, len(rptr[0:-1])):
                    out[i] = Tbl[Indices[rptr[i]:rptr[i + 1]]].sum(axis=0)

            return [out.astype(np.float32)]

        self.assertReferenceChecks(
            gc,
            op,
            [Tbl, Indices, Lengths],
            sparse_lengths_sum_ref,
            threshold=1e-3,
            atol=atol,
        )
예제 #23
0
    def test_convolution_gradients(
        self,
        op_type,
        stride,
        pad,
        kernel,
        dilation,
        size,
        input_channels,
        output_channels,
        batch_size,
        group,
        order,
        engine,
        use_bias,
        force_algo_fwd,
        force_algo_dgrad,
        force_algo_wgrad,
        gc,
        dc,
    ):
        # TODO: Group conv in NHWC not implemented for GPU yet.
        assume(
            group == 1
            or (order == "NCHW" or gc.device_type == caffe2_pb2.CPU)
            and engine != "MKLDNN"
        )
        if group != 1 and order == "NHWC":
            dc = [d for d in dc if d.device_type == caffe2_pb2.CPU]

        input_channels *= group
        output_channels *= group
        dkernel = dilation * (kernel - 1) + 1

        if engine == "CUDNN":
            if hiputl.run_in_hip(gc, dc):
                assume((order == "NCHW") and not (dilation > 1 and group > 1))
            else:
                assume(
                    _cudnn_supports(
                        dilation=(dilation > 1), nhwc=(order == "NHWC"), backward=True
                    )
                )

        assume(engine != "MKLDNN" or use_bias is True)

        op = core.CreateOperator(
            op_type,
            ["X", "w", "b"] if use_bias else ["X", "w"],
            ["Y"],
            stride=stride,
            kernel=kernel,
            dilation=dilation,
            pad=pad,
            group=group,
            order=order,
            engine=engine,
            force_algo_fwd=force_algo_fwd,
            force_algo_dgrad=force_algo_dgrad,
            force_algo_wgrad=force_algo_wgrad,
        )
        X = (
            np.random.rand(batch_size, size, size, input_channels).astype(np.float32)
            - 0.5
        )
        w = (
            np.random.rand(
                output_channels, kernel, kernel, int(input_channels / group)
            ).astype(np.float32)
            - 0.5
        )
        b = np.random.rand(output_channels).astype(np.float32) - 0.5
        if order == "NCHW":
            X = utils.NHWC2NCHW(X)
            w = utils.NHWC2NCHW(w)

        inputs = [X, w, b] if use_bias else [X, w]
        # Error handling path.
        if size + pad + pad < dkernel or size + pad + pad < dkernel:
            with self.assertRaises(RuntimeError):
                self.assertDeviceChecks(dc, op, inputs, [0])
            return

        try:
            self.assertDeviceChecks(dc, op, inputs, [0])
        except RuntimeError as e:
            es = str(e)
            # CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM should always have
            # implementation
            if (
                "status == CUDNN_STATUS_SUCCESS" not in es
                or "CUDNN_STATUS_NOT_SUPPORTED" not in es
                or force_algo_fwd == 0
            ):
                raise e

        for i in range(len(inputs)):
            try:
                self.assertGradientChecks(gc, op, inputs, i, [0])
            except RuntimeError as e:
                es = str(e)
                if (
                    "status == CUDNN_STATUS_SUCCESS" not in es
                    or "CUDNN_STATUS_NOT_SUPPORTED" not in es
                ):
                    raise e