def test_spatialbn_train_mode(self, size, input_channels, batch_size, seed, order, epsilon, momentum, inplace, engine, gc, dc): # Currently HIP SpatialBN only supports NCHW if hiputl.run_in_hip(gc, dc): assume(order == "NCHW") op = core.CreateOperator( "SpatialBN", ["X", "scale", "bias", "running_mean", "running_var"], [ "X" if inplace else "Y", "running_mean", "running_var", "saved_mean", "saved_var" ], order=order, is_test=False, epsilon=epsilon, momentum=momentum, engine=engine, ) np.random.seed(1701) scale = np.random.rand(input_channels).astype(np.float32) + 0.5 bias = np.random.rand(input_channels).astype(np.float32) - 0.5 mean = np.random.randn(input_channels).astype(np.float32) var = np.random.rand(input_channels).astype(np.float32) + 0.5 X = np.random.rand(batch_size, input_channels, size, size).astype( np.float32) - 0.5 if order == "NHWC": X = X.swapaxes(1, 2).swapaxes(2, 3) self.assertDeviceChecks(dc, op, [X, scale, bias, mean, var], [0, 1, 2, 3, 4])
def test_pooling_3d(self, stride, pad, kernel, size, input_channels, batch_size, order, op_type, engine, gc, dc): assume(pad < kernel) assume(size + pad + pad >= kernel) # Currently MIOpen Pooling only supports pooling with NCHW order. if hiputl.run_in_hip(gc, dc) and (workspace.GetHIPVersion() < 303 or order == "NHWC"): assume(engine != "CUDNN") # some case here could be calculated with global pooling, but instead # calculated with general implementation, slower but should still # be correct. op = core.CreateOperator( op_type, ["X"], ["Y"], strides=[stride] * 3, kernels=[kernel] * 3, pads=[pad] * 6, order=order, engine=engine, ) X = np.random.rand(batch_size, size, size, size, input_channels).astype(np.float32) if order == "NCHW": X = utils.NHWC2NCHW(X) self.assertDeviceChecks(dc, op, [X], [0], threshold=0.001) if 'MaxPool' not in op_type: self.assertGradientChecks(gc, op, [X], 0, [0], threshold=0.001)
def test_spatialbn_train_mode_gradient_check_1d(self, size, input_channels, batch_size, seed, order, epsilon, momentum, engine, gc, dc): # Currently MIOPEN SpatialBN only supports 2D if hiputl.run_in_hip(gc, dc): assume(engine != "CUDNN") op = core.CreateOperator( "SpatialBN", ["X", "scale", "bias", "mean", "var"], ["Y", "mean", "var", "saved_mean", "saved_var"], order=order, is_test=False, epsilon=epsilon, momentum=momentum, engine=engine, ) np.random.seed(seed) scale = np.random.rand(input_channels).astype(np.float32) + 0.5 bias = np.random.rand(input_channels).astype(np.float32) - 0.5 mean = np.random.randn(input_channels).astype(np.float32) var = np.random.rand(input_channels).astype(np.float32) + 0.5 X = np.random.rand(batch_size, input_channels, size).astype( np.float32) - 0.5 if order == "NHWC": X = X.swapaxes(1, 2) for input_to_check in [0, 1, 2]: # dX, dScale, dBias self.assertGradientChecks(gc, op, [X, scale, bias, mean, var], input_to_check, [0], stepsize=0.01)
def test_pooling(self, stride, pad, kernel, size, input_channels, batch_size, order, op_type, engine, gc, dc): assume(pad < kernel) if hiputl.run_in_hip(gc, dc) and engine == "CUDNN": assume(order == "NCHW" and op_type != "LpPool") op = core.CreateOperator( op_type, ["X"], ["Y"], stride=stride, kernel=kernel, pad=pad, order=order, engine=engine, ) X = np.random.rand( batch_size, size, size, input_channels).astype(np.float32) if order == "NCHW": X = X.transpose((0, 3, 1, 2)) self.assertDeviceChecks(dc, op, [X], [0]) if 'MaxPool' not in op_type: self.assertGradientChecks(gc, op, [X], 0, [0])
def test_convolution_transpose_layout( self, stride, pad, kernel, adj, size, input_channels, output_channels, batch_size, engine, shared_buffer, use_bias, gc, dc): assume(adj < stride) X = np.random.rand( batch_size, size, size, input_channels).astype(np.float32) - 0.5 w = np.random.rand( input_channels, kernel, kernel, output_channels)\ .astype(np.float32) - 0.5 b = np.random.rand(output_channels).astype(np.float32) - 0.5 outputs = {} for order in ["NCHW", "NHWC"]: if hiputl.run_in_hip(gc, dc) and order == "NHWC": # MIOPEN doesn't work with NHWC, fallback to use normal hip tmp_engine = "" else: tmp_engine = engine op = core.CreateOperator( "ConvTranspose", ["X", "w", "b"] if use_bias else ["X", "w"], ["Y"], strides=[stride] * 2, kernels=[kernel] * 2, pads=[pad] * 4, adjs=[adj] * 2, order=order, engine=tmp_engine, shared_buffer=int(shared_buffer), device_option=gc, ) if order == "NCHW": X_f = utils.NHWC2NCHW(X) w_f = utils.NHWC2NCHW(w) else: X_f = X w_f = w self.assertDeviceChecks( dc, op, [X_f, w_f, b] if use_bias else [X_f, w_f], [0]) self.ws.create_blob("X").feed(X_f, device_option=gc) self.ws.create_blob("w").feed(w_f, device_option=gc) self.ws.create_blob("b").feed(b, device_option=gc) self.ws.run(op) outputs[order] = self.ws.blobs["Y"].fetch() output_size = (size - 1) * stride + kernel + adj - 2 * pad self.assertEqual( outputs["NCHW"].shape, (batch_size, output_channels, output_size, output_size)) np.testing.assert_allclose( outputs["NCHW"], utils.NHWC2NCHW(outputs["NHWC"]), atol=1e-4, rtol=1e-4)
def test_convolution_transpose_layout_legacy_args( self, stride, pad, kernel, adj, size, input_channels, output_channels, batch_size, engine, shared_buffer, use_bias, gc, dc): assume(adj < stride) X = np.random.rand( batch_size, size, size, input_channels).astype(np.float32) - 0.5 w = np.random.rand( input_channels, kernel, kernel, output_channels)\ .astype(np.float32) - 0.5 b = np.random.rand(output_channels).astype(np.float32) - 0.5 outputs = {} for order in ["NCHW", "NHWC"]: # NHWC not supported in MIOpen, run HIP if hiputl.run_in_hip(gc, dc) and engine == "CUDNN": tmp_engine = "CUDNN" if order == "NCHW" else "" else: tmp_engine = engine op = core.CreateOperator( "ConvTranspose", ["X", "w", "b"] if use_bias else ["X", "w"], ["Y"], stride=stride, kernel=kernel, pad=pad, adj=adj, order=order, engine=tmp_engine, shared_buffer=int(shared_buffer), device_option=gc, ) if order == "NCHW": X_f = X.transpose((0, 3, 1, 2)) w_f = w.transpose((0, 3, 1, 2)) else: X_f = X w_f = w self.assertDeviceChecks( dc, op, [X_f, w_f, b] if use_bias else [X_f, w_f], [0]) self.ws.create_blob("X").feed(X_f, device_option=gc) self.ws.create_blob("w").feed(w_f, device_option=gc) self.ws.create_blob("b").feed(b, device_option=gc) self.ws.run(op) outputs[order] = self.ws.blobs["Y"].fetch() output_size = (size - 1) * stride + kernel + adj - 2 * pad self.assertEqual( outputs["NCHW"].shape, (batch_size, output_channels, output_size, output_size)) np.testing.assert_allclose( outputs["NCHW"], outputs["NHWC"].transpose((0, 3, 1, 2)), atol=1e-4, rtol=1e-4)
def test_sparse_lengths_weightedsum_cpu( self, batchsize, fptype, fp16asint, blocksize, empty_indices, gc, dc ): if fptype != np.float32: assume(gc.device_type == caffe2_pb2.CPU) assume(not hiputl.run_in_hip(gc, dc)) assume(caffe2_pb2.CUDA not in {d.device_type for d in dc}) print("<test_sparse_lengths_weightedsum_cpu>") tblsize = 300 if fptype == np.float32: Tbl = np.random.rand(tblsize, blocksize).astype(np.float32) atol = 1e-5 else: if fp16asint: Tbl = ( (10.0 * np.random.rand(tblsize, blocksize)) .round() .astype(np.float16) ) atol = 1e-3 else: Tbl = np.random.rand(tblsize, blocksize).astype(np.float16) atol = 1e-1 # array of each row length if empty_indices: Lengths = np.zeros(batchsize, dtype=np.int32) else: Lengths = np.random.randint(1, 30, size=batchsize, dtype=np.int32) # flat indices Indices = np.random.randint(0, tblsize, size=sum(Lengths), dtype=np.int64) Weights = np.random.rand(sum(Lengths)).astype(np.float32) op = core.CreateOperator( "SparseLengthsWeightedSum", ["Tbl", "Weights", "Indices", "Lengths"], "out" ) def sparse_lengths_weightedsum_ref(Tbl, Weights, Indices, Lengths): rptr = np.cumsum(np.insert(Lengths, [0], [0])) out = np.zeros((len(Lengths), blocksize)) for i in range(0, len(rptr[0:-1])): w = Weights[rptr[i] : rptr[i + 1]] out[i] = (Tbl[Indices[rptr[i] : rptr[i + 1]]] * w[:, np.newaxis]).sum( axis=0 ) return [out.astype(np.float32)] self.assertReferenceChecks( gc, op, [Tbl, Weights, Indices, Lengths], sparse_lengths_weightedsum_ref, threshold=1e-3, atol=atol, )
def test_1d_convolution_nchw(self, input_channels, output_channels, batch_size, stride, size, kernel, dilation, pad, use_bias, engine, force_algo_fwd, force_algo_dgrad, force_algo_wgrad, gc, dc): if hiputl.run_in_hip(gc, dc): # currently miopen only supports 2d conv assume(engine != 'CUDNN') # CUDNN is aliased to MIOPEN for HIP self._nd_convolution_nchw(1, input_channels, output_channels, batch_size, stride, size, kernel, dilation, pad, use_bias, engine, force_algo_fwd, force_algo_dgrad, force_algo_wgrad, gc, dc)
def test_max_pool_grad(self, op_type, dim, N, C, D, H, W, kernel, stride, pad, order, engine, gc, dc): assume(pad < kernel) assume(dim > 1 or engine == "") if hiputl.run_in_hip(gc, dc): if dim != 2: assume(engine != "CUDNN") elif engine == "CUDNN": assume(order == "NCHW") if op_type.endswith("ND"): op_type = op_type.replace("N", str(dim)) op = core.CreateOperator( op_type, ["X"], ["Y"], kernels=[kernel] * dim, strides=[stride] * dim, pads=[pad] * dim * 2, order=order, engine=engine, ) if dim == 1: size = W dims = [N, C, W] axes = [0, 2, 1] elif dim == 2: size = H * W dims = [N, C, H, W] axes = [0, 2, 3, 1] else: size = D * H * W dims = [N, C, D, H, W] axes = [0, 2, 3, 4, 1] X = np.zeros((N * C, size)).astype(np.float32) for i in range(N * C): X[i, :] = np.arange(size, dtype=np.float32) / size np.random.shuffle(X[i, :]) X = X.reshape(dims) if order == "NHWC": X = np.transpose(X, axes) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks(gc, op, [X], 0, [0], threshold=0.05, stepsize=0.005)
def test_group_convolution(self, stride, pad, kernel, size, group, input_channels_per_group, output_channels_per_group, batch_size, order, engine, use_bias, gc, dc): assume(size >= kernel) if hiputl.run_in_hip(gc, dc): if order == "NHWC": assume(group == 1 and engine != "CUDNN") else: # TODO: Group conv in NHWC not implemented for GPU yet. assume(group == 1 or order == "NCHW" or gc.device_type == caffe2_pb2.CPU) if group != 1 and order == "NHWC": dc = [d for d in dc if d.device_type == caffe2_pb2.CPU] # Group conv not implemented with EIGEN engine. assume(group == 1 or engine != "EIGEN") input_channels = input_channels_per_group * group output_channels = output_channels_per_group * group op = core.CreateOperator( "Conv", ["X", "w", "b"] if use_bias else ["X", "w"], ["Y"], stride=stride, kernel=kernel, pad=pad, order=order, engine=engine, group=group, ) X = np.random.rand(batch_size, size, size, input_channels).astype( np.float32) - 0.5 w = np.random.rand( output_channels, kernel, kernel, input_channels_per_group).astype(np.float32)\ - 0.5 b = np.random.rand(output_channels).astype(np.float32) - 0.5 if order == "NCHW": X = utils.NHWC2NCHW(X) w = utils.NHWC2NCHW(w) inputs = [X, w, b] if use_bias else [X, w] self.assertDeviceChecks(dc, op, inputs, [0]) for i in range(len(inputs)): self.assertGradientChecks(gc, op, inputs, i, [0])
def test_1d_convolution( self, input_channels, output_channels, batch_size, stride, size, kernel, dilation, pad, group, order, use_bias, engine, force_algo_fwd, force_algo_dgrad, force_algo_wgrad, gc, dc, ): if hiputl.run_in_hip(gc, dc): # currently miopen only supports 2d conv assume(engine != "CUDNN") # CUDNN is aliased to MIOPEN for HIP # TODO: 1D conv in NHWC not implemented for GPU yet. assume(order == "NCHW" or gc.device_type == caffe2_pb2.CPU) if order == "NHWC": dc = [d for d in dc if d.device_type == caffe2_pb2.CPU] self._nd_convolution( 1, input_channels, output_channels, batch_size, stride, size, kernel, dilation, pad, group, order, use_bias, engine, force_algo_fwd, force_algo_dgrad, force_algo_wgrad, gc, dc, )
def test_convolution_transpose_with_group(self, stride, pad, kernel, adj, size, input_channels, output_channels, batch_size, group, order, engine, shared_buffer, use_bias, gc, dc): assume(adj < stride) # TODO: Group conv_transpose in NHWC not implemented for GPU yet. assume(group == 1 or order == "NCHW" or gc.device_type == caffe2_pb2.CPU) if group != 1 and order == "NHWC": dc = [d for d in dc if d.device_type == caffe2_pb2.CPU] if hiputl.run_in_hip(gc, dc) and order == "NHWC": engine = "" op = core.CreateOperator( "ConvTranspose", ["X", "w", "b"] if use_bias else ["X", "w"], ["Y"], stride=stride, kernel=kernel, pad=pad, adj=adj, group=group, order=order, engine=engine, shared_buffer=int(shared_buffer), device_option=gc, ) input_channels *= group output_channels *= group X = np.random.rand(batch_size, size, size, input_channels).astype( np.float32) - 0.5 w = np.random.rand( input_channels, kernel, kernel, int(output_channels / group)) \ .astype(np.float32) - 0.5 b = np.random.rand(output_channels).astype(np.float32) - 0.5 if order == "NCHW": X = utils.NHWC2NCHW(X) w = utils.NHWC2NCHW(w) inputs = [X, w, b] if use_bias else [X, w] self.assertDeviceChecks(dc, op, inputs, [0]) for i in range(len(inputs)): self.assertGradientChecks(gc, op, inputs, i, [0])
def test_convolution_transpose_gradients(self, stride, pad, kernel, adj, size, input_channels, output_channels, batch_size, order, engine, use_bias, compute_dX, gc, dc): assume(adj < stride) if hiputl.run_in_hip(gc, dc) and engine == "CUDNN": assume(order == "NCHW") X = np.random.rand(batch_size, size, size, input_channels).astype( np.float32) - 0.5 w = np.random.rand( input_channels, kernel, kernel, output_channels)\ .astype(np.float32) - 0.5 b = np.random.rand(output_channels).astype(np.float32) - 0.5 op = core.CreateOperator( "ConvTranspose", ["X", "w", "b"] if use_bias else ["X", "w"], ["Y"], stride=stride, kernel=kernel, pad=pad, adj=adj, order=order, engine=engine, no_gradient_to_input=not compute_dX, ) if order == "NCHW": X = utils.NHWC2NCHW(X) w = utils.NHWC2NCHW(w) inputs = [X, w, b] if use_bias else [X, w] self.assertDeviceChecks(dc, op, inputs, [0]) if use_bias and compute_dX: # w, b, X outputs_to_check = [1, 2, 0] elif use_bias: # w, b outputs_to_check = [1, 2] elif compute_dX: # w, X outputs_to_check = [1, 0] else: # w outputs_to_check = [1] for i in outputs_to_check: self.assertGradientChecks(gc, op, inputs, i, [0])
def test_avg_pool_count_include_pad(self, op_type, dim, N, C, D, H, W, kernel, stride, pad, count_include_pad, order, engine, gc, dc): assume(pad < kernel) if hiputl.run_in_hip(gc, dc): if dim != 2: assume(engine != "CUDNN") elif engine == "CUDNN": assume(order == "NCHW") if op_type.endswith("ND"): op_type = op_type.replace("N", str(dim)) op = core.CreateOperator( op_type, ["X"], ["Y"], kernels=[kernel] * dim, strides=[stride] * dim, pads=[pad] * dim * 2, count_include_pad=count_include_pad, order=order, engine=engine, ) if dim == 1: dims = [N, C, W] axes = [0, 2, 1] elif dim == 2: dims = [N, C, H, W] axes = [0, 2, 3, 1] else: dims = [N, C, D, H, W] axes = [0, 2, 3, 4, 1] X = np.random.randn(*dims).astype(np.float32) if order == "NHWC": X = np.transpose(X, axes) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks(gc, op, [X], 0, [0])
def test_spatialbn_test_mode_3d(self, size, input_channels, batch_size, seed, order, epsilon, inplace, engine, gc, dc): # Currently MIOPEN SpatialBN only supports 2D if hiputl.run_in_hip(gc, dc): assume(engine != "CUDNN") op = core.CreateOperator( "SpatialBN", ["X", "scale", "bias", "mean", "var"], ["X" if inplace else "Y"], order=order, is_test=True, epsilon=epsilon, engine=engine, ) def reference_spatialbn_test(X, scale, bias, mean, var): if order == "NCHW": scale = scale[np.newaxis, :, np.newaxis, np.newaxis, np.newaxis] bias = bias[np.newaxis, :, np.newaxis, np.newaxis, np.newaxis] mean = mean[np.newaxis, :, np.newaxis, np.newaxis, np.newaxis] var = var[np.newaxis, :, np.newaxis, np.newaxis, np.newaxis] return ((X - mean) / np.sqrt(var + epsilon) * scale + bias, ) np.random.seed(1701) scale = np.random.rand(input_channels).astype(np.float32) + 0.5 bias = np.random.rand(input_channels).astype(np.float32) - 0.5 mean = np.random.randn(input_channels).astype(np.float32) var = np.random.rand(input_channels).astype(np.float32) + 0.5 X = np.random.rand(batch_size, input_channels, size, size, size)\ .astype(np.float32) - 0.5 if order == "NHWC": X = utils.NCHW2NHWC(X) self.assertReferenceChecks(gc, op, [X, scale, bias, mean, var], reference_spatialbn_test) self.assertDeviceChecks(dc, op, [X, scale, bias, mean, var], [0])
def test_global_pooling(self, size, input_channels, batch_size, order, op_type, engine, gc, dc): # CuDNN 5 does not support deterministic max pooling. assume(workspace.GetCuDNNVersion() >= 6000 or op_type != "MaxPool") if hiputl.run_in_hip(gc, dc) and engine == "CUDNN": assume(order == "NCHW" and op_type != "LpPool") op = core.CreateOperator( op_type, ["X"], ["Y"], order=order, engine=engine, global_pooling=True, ) X = np.random.rand(batch_size, size, size, input_channels).astype(np.float32) if order == "NCHW": X = utils.NHWC2NCHW(X) self.assertDeviceChecks(dc, op, [X], [0]) if 'MaxPool' not in op_type: self.assertGradientChecks(gc, op, [X], 0, [0])
def test_global_pooling_3d(self, kernel, size, input_channels, batch_size, order, op_type, engine, gc, dc): # Currently MIOpen Pooling only supports 2d pooling if hiputl.run_in_hip(gc, dc): assume(engine != "CUDNN") # pad and stride ignored because they will be infered in global_pooling op = core.CreateOperator( op_type, ["X"], ["Y"], kernels=[kernel] * 3, order=order, global_pooling=True, engine=engine, ) X = np.random.rand(batch_size, size, size, size, input_channels).astype(np.float32) if order == "NCHW": X = utils.NHWC2NCHW(X) self.assertDeviceChecks(dc, op, [X], [0], threshold=0.001) if 'MaxPool' not in op_type: self.assertGradientChecks(gc, op, [X], 0, [0], threshold=0.001)
def test_1x1_conv(self, op_type, N, G, DX, DY, H, W, use_bias, order, force_algo_fwd, force_algo_dgrad, force_algo_wgrad, gc, dc): if hiputl.run_in_hip(gc, dc): assume(order == "NCHW") if order == "NHWC": G = 1 C = G * DX M = G * DY op = core.CreateOperator( op_type, ["X", "filter", "bias"] if use_bias else ["X", "filter"], ["Y"], stride_h=1, stride_w=1, pad_t=0, pad_l=0, pad_b=0, pad_r=0, kernel=1, order=order, group=G, force_algo_fwd=force_algo_fwd, force_algo_dgrad=force_algo_dgrad, force_algo_wgrad=force_algo_wgrad, ) if order == "NCHW": X = np.random.randn(N, C, H, W).astype(np.float32) filter = np.random.randn(M, DX, 1, 1).astype(np.float32) else: X = np.random.randn(N, H, W, C).astype(np.float32) filter = np.random.randn(M, 1, 1, DX).astype(np.float32) bias = np.random.randn(M).astype(np.float32) inputs = [X, filter, bias] if use_bias else [X, filter] def conv_1x1_nchw_ref(X, filter, bias=None): X = X.reshape(N, G, DX, -1) filter = filter.reshape(G, DY, DX) Y = np.zeros(shape=(N, G, DY, H * W), dtype=np.float32) for i in range(N): for j in range(G): Y[i, j, :, :] = np.dot(filter[j, :, :], X[i, j, :, :]) Y = Y.reshape(N, M, H, W) if bias is not None: bias = bias.reshape(1, M, 1, 1) Y = np.add(Y, bias) return [Y] def conv_1x1_nhwc_ref(X, filter, bias=None): X = X.reshape(N, -1, G, DX) filter = filter.reshape(G, DY, DX) Y = np.zeros(shape=(N, H * W, G, DY), dtype=np.float32) for i in range(N): for j in range(G): Y[i, :, j, :] = np.dot(X[i, :, j, :], filter[j, :, :].transpose()) Y = Y.reshape(N, H, W, M) if bias is not None: bias = bias.reshape(1, 1, 1, M) Y = np.add(Y, bias) return [Y] if order == "NCHW": conv_1x1_ref = conv_1x1_nchw_ref else: conv_1x1_ref = conv_1x1_nhwc_ref self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=conv_1x1_ref, ) self.assertDeviceChecks(dc, op, inputs, [0]) for i in range(len(inputs)): self.assertGradientChecks(gc, op, inputs, i, [0])
def test_convolution_layout(self, op_type, stride, pad, kernel, dilation, size, input_channels, output_channels, batch_size, use_bias, gc, dc): assume(size >= dilation * (kernel - 1) + 1) X = np.random.rand(batch_size, size, size, input_channels).astype( np.float32) - 0.5 w = np.random.rand(output_channels, kernel, kernel, input_channels).astype(np.float32) - 0.5 b = np.random.rand(output_channels).astype(np.float32) - 0.5 Output = collections.namedtuple("Output", ["Y", "engine", "order"]) outputs = [] for order in ["NCHW", "NHWC"]: engine_list = [''] if hiputl.run_in_hip(gc, dc): if order == 'NCHW': engine_list.append('MIOPEN') else: if _cudnn_supports(dilation=(dilation > 1), nhwc=(order == 'NHWC')): engine_list.append('CUDNN') for engine in engine_list: op = core.CreateOperator( op_type, ["X", "w", "b"] if use_bias else ["X", "w"], ["Y"], stride=stride, kernel=kernel, dilation=dilation, pad=pad, order=order, engine=engine, device_option=gc, exhaustive_search=True, ) if order == "NCHW": X_f = X.transpose((0, 3, 1, 2)) w_f = w.transpose((0, 3, 1, 2)) else: X_f = X w_f = w self.assertDeviceChecks( dc, op, [X_f, w_f, b] if use_bias else [X_f, w_f], [0]) self.ws.create_blob("X").feed(X_f, device_option=gc) self.ws.create_blob("w").feed(w_f, device_option=gc) self.ws.create_blob("b").feed(b, device_option=gc) self.ws.run(op) outputs.append( Output(Y=self.ws.blobs["Y"].fetch(), engine=engine, order=order)) def canonical(o): if o.order == "NHWC": return o.Y.transpose((0, 3, 1, 2)) else: return o.Y for o in outputs: np.testing.assert_allclose(canonical(outputs[0]), canonical(o), atol=1e-4, rtol=1e-4)
def test_convolution_gradients(self, op_type, stride, pad, kernel, dilation, size, input_channels, output_channels, batch_size, group, order, engine, use_bias, force_algo_fwd, force_algo_dgrad, force_algo_wgrad, gc, dc): if order == "NHWC" or engine == "MKLDNN": group = 1 input_channels *= group output_channels *= group dkernel = dilation * (kernel - 1) + 1 if engine == 'CUDNN': if hiputl.run_in_hip(gc, dc): assume((order == "NCHW") and not (dilation > 1 and group > 1)) else: assume( _cudnn_supports(dilation=(dilation > 1), nhwc=(order == 'NHWC'), backward=True)) assume(engine != "MKLDNN" or use_bias is True) op = core.CreateOperator( op_type, ["X", "w", "b"] if use_bias else ["X", "w"], ["Y"], stride=stride, kernel=kernel, dilation=dilation, pad=pad, group=group, order=order, engine=engine, force_algo_fwd=force_algo_fwd, force_algo_dgrad=force_algo_dgrad, force_algo_wgrad=force_algo_wgrad, ) X = np.random.rand(batch_size, size, size, input_channels).astype( np.float32) - 0.5 w = np.random.rand(output_channels, kernel, kernel, int(input_channels / group)).astype( np.float32) - 0.5 b = np.random.rand(output_channels).astype(np.float32) - 0.5 if order == "NCHW": X = X.transpose((0, 3, 1, 2)) w = w.transpose((0, 3, 1, 2)) inputs = [X, w, b] if use_bias else [X, w] # Error handling path. if size + pad + pad < dkernel or size + pad + pad < dkernel: with self.assertRaises(RuntimeError): self.assertDeviceChecks(dc, op, inputs, [0]) return try: self.assertDeviceChecks(dc, op, inputs, [0]) except RuntimeError as e: es = str(e) # CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM should always have # implementation if "status == CUDNN_STATUS_SUCCESS" not in es \ or "CUDNN_STATUS_NOT_SUPPORTED" not in es \ or force_algo_fwd == 0: raise e for i in range(len(inputs)): try: self.assertGradientChecks(gc, op, inputs, i, [0]) except RuntimeError as e: es = str(e) if "status == CUDNN_STATUS_SUCCESS" not in es \ or "CUDNN_STATUS_NOT_SUPPORTED" not in es: raise e
def test_spatialbn_train_mode(self, size, input_channels, batch_size, seed, order, epsilon, momentum, inplace, engine, gc, dc): # Currently HIP SpatialBN only supports NCHW if hiputl.run_in_hip(gc, dc): assume(order == "NCHW") assume(batch_size == 0 or batch_size * size * size > 1) op = core.CreateOperator( "SpatialBN", ["X", "scale", "bias", "running_mean", "running_var"], [ "X" if inplace else "Y", "running_mean", "running_var", "saved_mean", "saved_var" ], order=order, is_test=False, epsilon=epsilon, momentum=momentum, engine=engine, ) np.random.seed(1701) scale = np.random.randn(input_channels).astype(np.float32) bias = np.random.rand(input_channels).astype(np.float32) - 0.5 mean = np.random.randn(input_channels).astype(np.float32) var = np.random.rand(input_channels).astype(np.float32) + 0.5 X = np.random.randn(batch_size, input_channels, size, size).astype(np.float32) if order == "NHWC": X = np.transpose(X, (0, 2, 3, 1)) def batch_norm_ref(X, scale, bias, running_mean, running_var): if batch_size == 0: Y = np.zeros(X.shape) saved_mean = np.zeros(running_mean.shape) saved_var = np.zeros(running_var.shape) return (Y, running_mean, running_var, saved_mean, saved_var) if order == "NHWC": X = np.transpose(X, (0, 3, 1, 2)) C = X.shape[1] reduce_size = batch_size * size * size saved_mean = np.mean(X, (0, 2, 3)) saved_var = np.var(X, (0, 2, 3)) running_mean = momentum * running_mean + (1.0 - momentum) * saved_mean running_var = momentum * running_var + (1.0 - momentum) * ( reduce_size / (reduce_size - 1)) * saved_var std = np.sqrt(saved_var + epsilon) broadcast_shape = (1, C, 1, 1) Y = (X - np.reshape(saved_mean, broadcast_shape)) / np.reshape( std, broadcast_shape) * np.reshape( scale, broadcast_shape) + np.reshape( bias, broadcast_shape) if order == "NHWC": Y = np.transpose(Y, (0, 2, 3, 1)) return (Y, running_mean, running_var, saved_mean, 1.0 / std) self.assertReferenceChecks(gc, op, [X, scale, bias, mean, var], batch_norm_ref) self.assertDeviceChecks(dc, op, [X, scale, bias, mean, var], [0, 1, 2, 3, 4])
def test_sparse_lengths_sum_cpu( self, batchsize, fptype, fp16asint, blocksize, normalize_by_lengths, empty_indices, gc, dc, ): if fptype != np.float32: assume(gc.device_type == caffe2_pb2.CPU) assume(not hiputl.run_in_hip(gc, dc)) assume(caffe2_pb2.CUDA not in {d.device_type for d in dc}) if normalize_by_lengths: print("<test_sparse_lengths_sum_mean_cpu>") else: print("<test_sparse_lengths_sum_cpu>") tblsize = 300 if fptype == np.float32: Tbl = np.random.rand(tblsize, blocksize).astype(np.float32) atol = 1e-5 else: if fp16asint: Tbl = ((10.0 * np.random.rand(tblsize, blocksize)).round().astype( np.float16)) atol = 1e-3 else: Tbl = np.random.rand(tblsize, blocksize).astype(np.float16) atol = 1e-1 # array of each row length if empty_indices: Lengths = np.zeros(batchsize, dtype=np.int32) else: Lengths = np.random.randint(1, 30, size=batchsize, dtype=np.int32) # flat indices Indices = np.random.randint(0, tblsize, size=sum(Lengths), dtype=np.int64) op = core.CreateOperator( "SparseLengths" + ("Mean" if normalize_by_lengths else "Sum"), ["Tbl", "Indices", "Lengths"], "out", ) def sparse_lengths_sum_ref(Tbl, Indices, Lengths): rptr = np.cumsum(np.insert(Lengths, [0], [0])) out = np.zeros((len(Lengths), blocksize)) if normalize_by_lengths: for i in range(0, len(rptr[0:-1])): if Lengths[i] != 0: out[i] = ( Tbl[Indices[rptr[i]:rptr[i + 1]]].sum(axis=0) * 1.0 / float(Lengths[i])) else: for i in range(0, len(rptr[0:-1])): out[i] = Tbl[Indices[rptr[i]:rptr[i + 1]]].sum(axis=0) return [out.astype(np.float32)] self.assertReferenceChecks( gc, op, [Tbl, Indices, Lengths], sparse_lengths_sum_ref, threshold=1e-3, atol=atol, )
def test_convolution_gradients( self, op_type, stride, pad, kernel, dilation, size, input_channels, output_channels, batch_size, group, order, engine, use_bias, force_algo_fwd, force_algo_dgrad, force_algo_wgrad, gc, dc, ): # TODO: Group conv in NHWC not implemented for GPU yet. assume( group == 1 or (order == "NCHW" or gc.device_type == caffe2_pb2.CPU) and engine != "MKLDNN" ) if group != 1 and order == "NHWC": dc = [d for d in dc if d.device_type == caffe2_pb2.CPU] input_channels *= group output_channels *= group dkernel = dilation * (kernel - 1) + 1 if engine == "CUDNN": if hiputl.run_in_hip(gc, dc): assume((order == "NCHW") and not (dilation > 1 and group > 1)) else: assume( _cudnn_supports( dilation=(dilation > 1), nhwc=(order == "NHWC"), backward=True ) ) assume(engine != "MKLDNN" or use_bias is True) op = core.CreateOperator( op_type, ["X", "w", "b"] if use_bias else ["X", "w"], ["Y"], stride=stride, kernel=kernel, dilation=dilation, pad=pad, group=group, order=order, engine=engine, force_algo_fwd=force_algo_fwd, force_algo_dgrad=force_algo_dgrad, force_algo_wgrad=force_algo_wgrad, ) X = ( np.random.rand(batch_size, size, size, input_channels).astype(np.float32) - 0.5 ) w = ( np.random.rand( output_channels, kernel, kernel, int(input_channels / group) ).astype(np.float32) - 0.5 ) b = np.random.rand(output_channels).astype(np.float32) - 0.5 if order == "NCHW": X = utils.NHWC2NCHW(X) w = utils.NHWC2NCHW(w) inputs = [X, w, b] if use_bias else [X, w] # Error handling path. if size + pad + pad < dkernel or size + pad + pad < dkernel: with self.assertRaises(RuntimeError): self.assertDeviceChecks(dc, op, inputs, [0]) return try: self.assertDeviceChecks(dc, op, inputs, [0]) except RuntimeError as e: es = str(e) # CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM should always have # implementation if ( "status == CUDNN_STATUS_SUCCESS" not in es or "CUDNN_STATUS_NOT_SUPPORTED" not in es or force_algo_fwd == 0 ): raise e for i in range(len(inputs)): try: self.assertGradientChecks(gc, op, inputs, i, [0]) except RuntimeError as e: es = str(e) if ( "status == CUDNN_STATUS_SUCCESS" not in es or "CUDNN_STATUS_NOT_SUPPORTED" not in es ): raise e