class TestNormalizeOp(hu.HypothesisTestCase): @given(X=hu.tensor(min_dim=1, max_dim=5, elements=hu.floats(min_value=0.5, max_value=1.0)), **hu.gcs) def test_normalize(self, X, gc, dc): def ref_normalize(X, axis): x_normed = X / np.maximum( np.sqrt((X**2).sum(axis=axis, keepdims=True)), 1e-12) return (x_normed, ) for axis in range(-X.ndim, X.ndim): x = copy.copy(X) op = core.CreateOperator("Normalize", "X", "Y", axis=axis) self.assertReferenceChecks( gc, op, [x], functools.partial(ref_normalize, axis=axis)) self.assertDeviceChecks(dc, op, [x], [0]) self.assertGradientChecks(gc, op, [x], 0, [0]) @given(X=hu.tensor(min_dim=1, max_dim=5, elements=hu.floats(min_value=0.5, max_value=1.0)), **hu.gcs) def test_normalize_L1(self, X, gc, dc): def ref(X, axis): norm = abs(X).sum(axis=axis, keepdims=True) return (X / norm, ) for axis in range(-X.ndim, X.ndim): print("axis: ", axis) op = core.CreateOperator("NormalizeL1", "X", "Y", axis=axis) self.assertReferenceChecks(gc, op, [X], functools.partial(ref, axis=axis)) self.assertDeviceChecks(dc, op, [X], [0])
class TestTrigonometricOp(serial.SerializedTestCase): @given(X=hu.tensor(elements=hu.floats(min_value=-0.7, max_value=0.7)), **hu.gcs) @settings(deadline=None, max_examples=50) def test_acos(self, X, gc, dc): self.assertTrigonometricChecks("Acos", X, lambda x: (np.arccos(X), ), gc, dc) @given(X=hu.tensor(elements=hu.floats(min_value=-0.7, max_value=0.7)), **hu.gcs) @settings(deadline=None, max_examples=50) def test_asin(self, X, gc, dc): self.assertTrigonometricChecks("Asin", X, lambda x: (np.arcsin(X), ), gc, dc) @given(X=hu.tensor(elements=hu.floats(min_value=-100, max_value=100)), **hu.gcs) @settings(deadline=None, max_examples=50) def test_atan(self, X, gc, dc): self.assertTrigonometricChecks("Atan", X, lambda x: (np.arctan(X), ), gc, dc) @given(X=hu.tensor(elements=hu.floats(min_value=-0.5, max_value=0.5)), **hu.gcs) @settings(deadline=None, max_examples=50) def test_tan(self, X, gc, dc): self.assertTrigonometricChecks("Tan", X, lambda x: (np.tan(X), ), gc, dc) def assertTrigonometricChecks(self, op_name, input, reference, gc, dc): op = core.CreateOperator(op_name, ["X"], ["Y"]) self.assertReferenceChecks(gc, op, [input], reference) self.assertDeviceChecks(dc, op, [input], [0]) self.assertGradientChecks(gc, op, [input], 0, [0])
class TestAPMeterOps(hu.HypothesisTestCase): @given(predictions=hu.arrays(dims=[10, 3], elements=hu.floats(allow_nan=False, allow_infinity=False, min_value=0.1, max_value=1)), labels=hu.arrays(dims=[10, 3], dtype=np.int32, elements=st.integers(min_value=0, max_value=1)), **hu.gcs_cpu_only) def test_average_precision(self, predictions, labels, gc, dc): op = core.CreateOperator( "APMeter", ["predictions", "labels"], ["AP"], buffer_size=10, ) def op_ref(predictions, labels): ap = calculate_ap(predictions, labels) return (ap, ) self.assertReferenceChecks(device_option=gc, op=op, inputs=[predictions, labels], reference=op_ref) @given(predictions=hu.arrays(dims=[10, 3], elements=hu.floats(allow_nan=False, allow_infinity=False, min_value=0.1, max_value=1)), labels=hu.arrays(dims=[10, 3], dtype=np.int32, elements=st.integers(min_value=0, max_value=1)), **hu.gcs_cpu_only) def test_average_precision_small_buffer(self, predictions, labels, gc, dc): op_small_buffer = core.CreateOperator( "APMeter", ["predictions", "labels"], ["AP"], buffer_size=5, ) def op_ref(predictions, labels): # We can only hold the last 5 in the buffer ap = calculate_ap(predictions[5:], labels[5:]) return (ap, ) self.assertReferenceChecks(device_option=gc, op=op_small_buffer, inputs=[predictions, labels], reference=op_ref)
class TestEnsureClipped(hu.HypothesisTestCase): @given(X=hu.arrays(dims=[5, 10], elements=hu.floats(min_value=-1.0, max_value=1.0)), in_place=st.booleans(), sparse=st.booleans(), indices=hu.arrays(dims=[5], elements=st.booleans()), **hu.gcs_cpu_only) def test_ensure_clipped(self, X, in_place, sparse, indices, gc, dc): if (not in_place) and sparse: return param = X.astype(np.float32) m, n = param.shape indices = np.array(np.nonzero(indices)[0], dtype=np.int64) grad = np.random.rand(len(indices), n) workspace.FeedBlob("indices", indices) workspace.FeedBlob("grad", grad) workspace.FeedBlob("param", param) input = ["param", "indices", "grad"] if sparse else ["param"] output = "param" if in_place else "output" op = core.CreateOperator("EnsureClipped", input, output, min=0.0) workspace.RunOperatorOnce(op) def ref(): return (np.array([ np.clip(X[i], 0, None) if i in indices else X[i] for i in range(m) ]) if sparse else np.clip(X, 0, None)) npt.assert_allclose(workspace.blobs[output], ref(), rtol=1e-3)
class TestErfOp(serial.SerializedTestCase): @serial.given( X=hu.tensor(elements=hu.floats(min_value=-0.7, max_value=0.7)), **hu.gcs) def test_erf(self, X, gc, dc): op = core.CreateOperator('Erf', ["X"], ["Y"]) self.assertReferenceChecks(gc, op, [X], lambda x: (np.vectorize(math.erf)(X),)) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks(gc, op, [X], 0, [0])
class TestEnforceFinite(hu.HypothesisTestCase): @given( X=hu.tensor( # allow empty min_value=0, elements=hu.floats(allow_nan=True, allow_infinity=True), ), **hu.gcs ) @settings(deadline=10000) def test_enforce_finite(self, X, gc, dc): def all_finite_value(X): if X.size <= 0: return True return np.isfinite(X).all() net = core.Net('test_net') net.Const(array=X, blob_out="X") net.EnforceFinite("X", []) if all_finite_value(X): self.assertTrue(workspace.RunNetOnce(net)) else: with self.assertRaises(RuntimeError): workspace.RunNetOnce(net) @given( X=hu.tensor( elements=hu.floats(min_value=0, max_value=10, allow_nan=False, allow_infinity=False), ), **hu.gcs ) def test_enforce_finite_device_check(self, X, gc, dc): op = core.CreateOperator( "EnforceFinite", ["X"], [], ) self.assertDeviceChecks(dc, op, [X], [])
class TestBucketizeOp(hu.HypothesisTestCase): @given(x=hu.tensor(min_dim=1, max_dim=2, dtype=np.float32, elements=hu.floats(min_value=-5, max_value=5)), **hu.gcs) def test_bucketize_op(self, x, gc, dc): length = np.random.randint(low=1, high=5) boundaries = np.random.randn(length) * 5 boundaries.sort() def ref(x, boundaries): bucket_idx = np.digitize(x, boundaries, right=True) return [bucket_idx] op = core.CreateOperator('Bucketize', ["X"], ["INDICES"], boundaries=boundaries) self.assertReferenceChecks(gc, op, [x, boundaries], ref)
def _inputs(draw): batch_size = draw(st.integers(2, 10)) rows_num = draw(st.integers(1, 100)) block_size = draw(st.integers(1, 2)) index_num = draw(st.integers(1, 10)) return ( draw( hnp.arrays( np.float32, (batch_size, rows_num, block_size), elements=hu.floats(-10.0, 10.0), )), draw( hnp.arrays( np.int32, (index_num, 1), elements=st.integers(0, rows_num - 1), )), )
def _data_and_scale(data_min_size=4, data_max_size=10, examples_min_number=1, examples_max_number=4, dtype=np.float32, elements=None): params_ = st.tuples( st.integers(min_value=examples_min_number, max_value=examples_max_number), st.integers(min_value=data_min_size, max_value=data_max_size), st.sampled_from([np.float32, np.int32, np.int64])) return params_.flatmap(lambda param_: st.tuples( hu.arrays([param_[0], param_[1]], dtype=dtype), hu.arrays( [param_[0]], dtype=param_[2], elements=(hu.floats(0.0, 10000.0) if param_[2] in [np.float32] else st.integers(0, 10000)), ), ))
class TestElementwiseOps(hu.HypothesisTestCase): @given(X=hu.tensor(dtype=np.float32), **hu.gcs) @settings(deadline=10000) def test_abs(self, X, gc, dc): op = core.CreateOperator( "Abs", ["X"], ["Y"], ) def abs_ref(X): return [np.absolute(X)] self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=abs_ref, ensure_outputs_are_inferred=True, ) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks(gc, op, [X], 0, [0], ensure_outputs_are_inferred=True) @given(X=hu.tensor(dtype=np.float32), inplace=st.booleans(), **hu.gcs) @settings(deadline=10000) def test_exp(self, X, inplace, gc, dc): op = core.CreateOperator( "Exp", ["X"], ["X"] if inplace else ["Y"], ) def exp_ref(X): return [np.exp(X)] self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=exp_ref, ensure_outputs_are_inferred=True, ) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks(gc, op, [X], 0, [0], ensure_outputs_are_inferred=True) @given(n=st.integers(0, 6), m=st.integers(4, 6), seed=st.integers(0, 1000), **hu.gcs) @settings(deadline=1000) def test_log(self, n, m, gc, dc, seed): np.random.seed(seed) X = np.random.rand(n, m).astype(np.float32) + 1.0 def log_op(X): return [np.log(X)] op = core.CreateOperator("Log", ["X"], ["Z"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=log_op, ensure_outputs_are_inferred=True, ) self.assertGradientChecks(gc, op, [X], 0, [0], stepsize=1e-4, threshold=1e-2, ensure_outputs_are_inferred=True) @given(n=st.integers(0, 10), m=st.integers(4, 6), d=st.integers(2, 3), seed=st.integers(0, 1000), **hu.gcs) @settings(deadline=10000) def test_powt(self, n, m, d, gc, dc, seed): np.random.seed(seed) X = np.random.rand(n, m, d).astype(np.float32) + 1.0 Y = np.random.rand(n, m, d).astype(np.float32) + 2.0 def powt_op(X, Y): return [np.power(X, Y)] #two gradients Y*X^(Y-1) and X^Y * ln(X) def powt_grad(g_out, outputs, fwd_inputs): [X, Y] = fwd_inputs Z = outputs[0] return ([Y * np.power(X, Y - 1), Z * np.log(X)] * g_out) op = core.CreateOperator("Pow", ["X", "Y"], ["Z"]) self.assertReferenceChecks(device_option=gc, op=op, inputs=[X, Y], reference=powt_op, output_to_grad="Z", grad_reference=powt_grad, ensure_outputs_are_inferred=True) @given(n=st.integers(0, 6), m=st.integers(4, 6), seed=st.integers(0, 1000), **hu.gcs) @settings(deadline=10000) def test_sqr(self, n, m, gc, dc, seed): np.random.seed(seed) X = np.random.rand(n, m).astype(np.float32) def sqr_op(X): return [np.square(X)] op = core.CreateOperator("Sqr", ["X"], ["Z"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=sqr_op, ensure_outputs_are_inferred=True, ) self.assertGradientChecks(gc, op, [X], 0, [0], stepsize=1e-4, threshold=1e-2, ensure_outputs_are_inferred=True) @given( X=hu.tensor( elements=hu.floats(min_value=0.1, max_value=10), # allow empty tensor min_value=0), inplace=st.booleans(), **hu.gcs) @settings(deadline=10000) def test_sqrt(self, X, inplace, gc, dc): def sqrt_op(X): return [np.sqrt(X)] op = core.CreateOperator("Sqrt", ["X"], ["X"] if inplace else ["Y"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=sqrt_op, ensure_outputs_are_inferred=True, ) self.assertDeviceChecks(dc, op, [X], [0]) # stepsize need to be smaller than the possible minimum X, so the # sqrt is well defined self.assertGradientChecks(gc, op, [X], 0, [0], stepsize=1e-2, ensure_outputs_are_inferred=True) @given(X=hu.tensor(dtype=np.float32), inplace=st.booleans(), **hu.gcs) @settings(deadline=10000) def test_softsign(self, X, inplace, gc, dc): op = core.CreateOperator( "Softsign", ["X"], ["X"] if inplace else ["Y"], ) def softsign_ref(X): return [X / (1.0 + np.absolute(X))] self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=softsign_ref, ensure_outputs_are_inferred=True, ) self.assertDeviceChecks(dc, op, [X], [0]) if not inplace: self.assertGradientChecks( gc, op, [X], 0, [0], ensure_outputs_are_inferred=True, ) @given(X=hu.tensor(elements=hu.floats(min_value=0.1, max_value=10.0), dtype=np.float32), inplace=st.booleans(), **hu.gcs) @settings(deadline=10000) def test_rsqrt(self, X, inplace, gc, dc): op = core.CreateOperator( "Rsqrt", ["X"], ["X"] if inplace else ["Y"], ) def rsqrt_ref(X): return [1.0 / np.sqrt(X)] self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=rsqrt_ref, ensure_outputs_are_inferred=True, ) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks( gc, op, [X], 0, [0], stepsize=5e-3, ensure_outputs_are_inferred=True, ) @given(X=hu.tensor(dtype=np.float32), **hu.gcs) @settings(deadline=10000) def test_cube(self, X, gc, dc): op = core.CreateOperator( "Cube", ["X"], ["Y"], ) def cube_ref(X): return [np.power(X, 3)] def cube_grad_ref(g_out, outputs, fwd_inputs): dY = g_out [X] = fwd_inputs return [dY * np.square(X) * 3] self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=cube_ref, output_to_grad="Y", grad_reference=cube_grad_ref, ensure_outputs_are_inferred=True, ) self.assertDeviceChecks(dc, op, [X], [0]) @given(X=hu.tensor(dtype=np.float32), in_place=st.booleans(), **hu.gcs) @settings(deadline=10000) def test_cbrt(self, X, in_place, gc, dc): op = core.CreateOperator( "Cbrt", ["X"], ["X"] if in_place else ["Y"], ) def cbrt_ref(X): return [np.cbrt(X)] self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=cbrt_ref, ensure_outputs_are_inferred=True, ) @given(X=hu.tensor(elements=hu.floats(min_value=1.0, max_value=10.0), dtype=np.float32), in_place=st.booleans(), **hu.gcs) @settings(deadline=10000) def test_cbrt_grad(self, X, in_place, gc, dc): op = core.CreateOperator( "Cbrt", ["X"], ["X"] if in_place else ["Y"], ) self.assertGradientChecks( gc, op, [X], 0, [0], ensure_outputs_are_inferred=True, ) self.assertGradientChecks( gc, op, [-X], 0, [0], ensure_outputs_are_inferred=True, ) @given(n=st.integers(0, 6), m=st.integers(4, 6), seed=st.integers(0, 1000), **hu.gcs_cpu_only) def test_mish(self, n, m, gc, dc, seed): np.random.seed(seed) X = np.random.rand(n, m).astype(np.float32) def mish(X): return [X * np.tanh(np.log(1 + np.exp(X)))] op = core.CreateOperator("Mish", ["X"], ["Z"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=mish, ensure_outputs_are_inferred=True, ) self.assertGradientChecks(gc, op, [X], 0, [0], stepsize=1e-4, threshold=1e-2, ensure_outputs_are_inferred=True) @given(n=st.integers(0, 6), m=st.integers(4, 6), seed=st.integers(0, 1000), **hu.gcs_cpu_only) def test_mish_gradient_inplace(self, n, m, gc, dc, seed): np.random.seed(seed) def mish(X): return [X * np.tanh(np.log(1 + np.exp(X)))] def mish_gradient(X, Y, dY): w = np.exp(3 * X) + 4 * np.exp( 2 * X) + (6 + 4 * X) * np.exp(X) + 4 * (1 + X) sigma2 = np.square(np.square(np.exp(X) + 1) + 1) return [dY * np.exp(X) * w / sigma2] # return [dY * (Y + np.divide(1. - Y, 1. + np.exp(-X)))] X = np.random.rand(n, m).astype(np.float32) Y = mish(X)[0] dY = np.random.rand(n, m).astype(np.float32) op = core.CreateOperator("MishGradient", ["X", "Y", "grad"], "grad") self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, Y, dY], reference=mish_gradient, ) @given(n=st.integers(0, 6), m=st.integers(4, 6), seed=st.integers(0, 1000), **hu.gcs) @settings(deadline=10000) def test_swish(self, n, m, gc, dc, seed): np.random.seed(seed) X = np.random.rand(n, m).astype(np.float32) def swish(X): return [np.divide(X, (1. + np.exp(-X)))] op = core.CreateOperator("Swish", ["X"], ["Z"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=swish, ensure_outputs_are_inferred=True, ) self.assertGradientChecks(gc, op, [X], 0, [0], stepsize=1e-4, threshold=1e-2, ensure_outputs_are_inferred=True) @given(n=st.integers(0, 6), m=st.integers(4, 6), seed=st.integers(0, 1000), **hu.gcs) @settings(deadline=1000) def test_swish_gradient_inplace(self, n, m, gc, dc, seed): np.random.seed(seed) def swish(X): return [np.divide(X, (1. + np.exp(-X)))] def swish_gradient(X, Y, dY): return [dY * (Y + np.divide(1. - Y, 1. + np.exp(-X)))] X = np.random.rand(n, m).astype(np.float32) Y = swish(X)[0] dY = np.random.rand(n, m).astype(np.float32) op = core.CreateOperator("SwishGradient", ["X", "Y", "grad"], "grad") self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, Y, dY], reference=swish_gradient, ) @given(X=hu.tensor(dtype=np.float32), inplace=st.booleans(), engine=st.sampled_from(["", "CUDNN"]), **hu.gcs) @settings(deadline=1000) def test_sigmoid(self, X, inplace, engine, gc, dc): op = core.CreateOperator( "Sigmoid", ["X"], ["X"] if inplace else ["Y"], engine=engine, ) def sigmoid_ref(X): return [1.0 / (1.0 + np.exp(-X))] self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=sigmoid_ref, ensure_outputs_are_inferred=True, ) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks(gc, op, [X], 0, [0], ensure_outputs_are_inferred=True) @given(X=hu.tensor(dtype=np.float32), inplace=st.booleans(), alpha=hu.floats(min_value=-100.0, max_value=100.0), beta=hu.floats(min_value=-100.0, max_value=100.0), engine=st.sampled_from([""]), **hu.gcs) @settings(deadline=10000) def test_hard_sigmoid(self, X, inplace, alpha, beta, engine, gc, dc): # Prevent alpha and beta from mutually being 0 to avoid a division # error when adjusting our inputs assume(alpha != 0.0 or beta != 0.0) op = core.CreateOperator( "HardSigmoid", ["X"], ["X"] if inplace else ["Y"], alpha=alpha, beta=beta, engine=engine, ) def hard_sigmoid_ref(X): return [np.minimum(1.0, np.maximum(0.0, X * alpha + beta))] # Adjust inputs to avoid differentitating at inflection points if abs(alpha) > 0.001: Y = X * alpha + beta Y += 0.04 * np.sign(Y) Y[Y == 0.0] += 0.1 Y[Y == 1.0] -= 0.1 X = (Y - beta) / alpha self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=hard_sigmoid_ref, ensure_outputs_are_inferred=True, ) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks(gc, op, [X], 0, [0], stepsize=1e-4, threshold=1e-2, ensure_outputs_are_inferred=True) @given(n=st.integers(0, 6), m=st.integers(4, 6), **hu.gcs) @settings(deadline=10000) def test_eq(self, n, m, gc, dc): # Set broadcast and no axis, i.e. broadcasting last dimensions. X = np.random.randint(2, size=(n, m)) Y = np.random.randint(2, size=(n, m)) op = core.CreateOperator("EQ", ["X", "Y"], "out", broadcast=1) def eq(X, Y): return [X == Y] self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, Y], reference=eq, ensure_outputs_are_inferred=True, ) workspace.FeedBlob('X', X) workspace.FeedBlob('Y', Y) net = core.Net("batch_bucket_one_hot_test") result = net.EQ(["X", "Y"], 1) (shapes, types) = workspace.InferShapesAndTypes([net]) workspace.RunNetOnce(net) self.assertEqual(shapes[result], list(workspace.blobs[result].shape)) self.assertEqual(shapes[result], list(X.shape)) self.assertEqual(types[result], core.DataType.BOOL) @given(n=st.integers(0, 6), m=st.integers(4, 6), **hu.gcs) @settings(deadline=10000) def test_eq_bcast(self, n, m, gc, dc): # Set broadcast and no axis, i.e. broadcasting last dimensions. X = np.random.randint(2, size=(n, m)) Y = np.random.randint(2, size=(m, )) op = core.CreateOperator("EQ", ["X", "Y"], "out", broadcast=1) def eq(X, Y): return [X == Y] self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, Y], reference=eq, ensure_outputs_are_inferred=True, ) workspace.FeedBlob('X', X) workspace.FeedBlob('Y', Y) net = core.Net("eq_bast") result = net.EQ(["X", "Y"], 1, broadcast=1) (shapes, types) = workspace.InferShapesAndTypes([net]) workspace.RunNetOnce(net) self.assertTrue(str(result) in shapes) self.assertEqual(shapes[result], list(workspace.blobs[result].shape)) self.assertEqual(shapes[result], list(X.shape)) self.assertEqual(types[result], core.DataType.BOOL) net_2 = core.Net("eq_bast_invalid") result_2 = net_2.EQ(["X", "Y"], 1) (shapes, types) = workspace.InferShapesAndTypes([net]) self.assertTrue(str(result_2) not in shapes) def _run_single_test(self, op, ref, A, B, reverse_inputs, test_grad, gc, dc): inputs = [A, B] self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=ref, ensure_outputs_are_inferred=True, ) self.assertDeviceChecks(dc, op, inputs, [0]) if test_grad: for i in range(len(inputs)): self.assertGradientChecks( gc, op, inputs, i, [0], ensure_outputs_are_inferred=True, ) if reverse_inputs: inputs = [B, A] self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=ref, ensure_outputs_are_inferred=True, ) self.assertDeviceChecks(dc, op, inputs, [0]) if test_grad: for i in range(len(inputs)): self.assertGradientChecks( gc, op, inputs, i, [0], ensure_outputs_are_inferred=True, ) def _test_binary_op(self, op_name, np_ref, n, m, k, t, bias, test_grad, gc, dc): op = core.CreateOperator( op_name, ["A", "B"], ["C"], ) def ref(A, B): return [np_ref(A, B)] A = np.random.rand(n, m, k, t).astype(np.float32) + bias B = np.random.rand(n, m, k, t).astype(np.float32) + bias self._run_single_test(op, ref, A, B, True, test_grad, gc, dc) A = np.random.rand(1).astype(np.float32) + bias B = np.random.rand(n, m, k, t).astype(np.float32) + bias self._run_single_test(op, ref, A, B, True, test_grad, gc, dc) A = np.random.rand(k, t).astype(np.float32) + bias B = np.random.rand(n, m, k, t).astype(np.float32) + bias self._run_single_test(op, ref, A, B, True, test_grad, gc, dc) A = np.random.rand(n, m, 1, 1).astype(np.float32) + bias B = np.random.rand(n, m, k, t).astype(np.float32) + bias self._run_single_test(op, ref, A, B, True, test_grad, gc, dc) A = np.random.rand(1, m, k, 1).astype(np.float32) + bias B = np.random.rand(n, m, k, t).astype(np.float32) + bias self._run_single_test(op, ref, A, B, True, test_grad, gc, dc) A = np.random.rand(m, 1, t).astype(np.float32) + bias B = np.random.rand(n, m, k, t).astype(np.float32) + bias self._run_single_test(op, ref, A, B, True, test_grad, gc, dc) A = np.random.rand(1, m, 1, t).astype(np.float32) + bias B = np.random.rand(n, 1, k, 1).astype(np.float32) + bias self._run_single_test(op, ref, A, B, True, test_grad, gc, dc) def _test_binary_op_in_place(self, op_name, np_ref, n, m, bias, test_grad, in_place_2nd, gc, dc): def ref(A, B): return [np_ref(A, B)] op = core.CreateOperator( op_name, ["A", "B"], ["A"], ) A = np.random.rand(n, m).astype(np.float32) + bias B = np.random.rand(m).astype(np.float32) + bias self._run_single_test(op, ref, A, B, False, test_grad, gc, dc) A = np.random.rand(n, m).astype(np.float32) + bias B = np.random.rand(n, 1).astype(np.float32) + bias self._run_single_test(op, ref, A, B, False, test_grad, gc, dc) if in_place_2nd: op = core.CreateOperator( op_name, ["A", "B"], ["B"], ) A = np.random.rand(m).astype(np.float32) + bias B = np.random.rand(n, m).astype(np.float32) + bias self._run_single_test(op, ref, A, B, False, test_grad, gc, dc) A = np.random.rand(n, 1).astype(np.float32) + bias B = np.random.rand(n, m).astype(np.float32) + bias self._run_single_test(op, ref, A, B, False, test_grad, gc, dc) @given(n=st.integers(0, 5), m=st.integers(0, 5), k=st.integers(0, 5), t=st.integers(0, 5), **hu.gcs) @settings(deadline=None, max_examples=50) def test_add(self, n, m, k, t, gc, dc): self._test_binary_op("Add", np.add, n, m, k, t, -0.5, True, gc, dc) self._test_binary_op_in_place("Add", np.add, n, m, -0.5, True, True, gc, dc) @given(n=st.integers(0, 5), m=st.integers(0, 5), k=st.integers(0, 5), t=st.integers(0, 5), **hu.gcs) @settings(deadline=None, max_examples=50) def test_sub(self, n, m, k, t, gc, dc): self._test_binary_op("Sub", np.subtract, n, m, k, t, -0.5, True, gc, dc) self._test_binary_op_in_place("Sub", np.subtract, n, m, -0.5, True, True, gc, dc) @given(n=st.integers(0, 5), m=st.integers(0, 5), k=st.integers(0, 5), t=st.integers(0, 5), **hu.gcs) @settings(deadline=None, max_examples=50) def test_mul(self, n, m, k, t, gc, dc): self._test_binary_op("Mul", np.multiply, n, m, k, t, -0.5, True, gc, dc) @given(n=st.integers(0, 5), m=st.integers(0, 5), k=st.integers(0, 5), t=st.integers(0, 5), **hu.gcs) @settings(deadline=None, max_examples=50) def test_div(self, n, m, k, t, gc, dc): self._test_binary_op("Div", np.divide, n, m, k, t, 1.0, True, gc, dc) self._test_binary_op_in_place("Div", np.divide, n, m, 1.0, True, False, gc, dc) @given(n=st.integers(1, 5), m=st.integers(1, 5), broadcast=st.booleans(), **hu.gcs) @settings(deadline=10000) def test_div_legacy_grad(self, n, m, broadcast, gc, dc): op = core.CreateOperator( "DivGradient", ["B", "C", "dC"], ["dA", "dB"], ) def div_grad_ref(B, C, dC): dA = dC / B dB = -dC * C / B if broadcast: dB = np.sum(dB, axis=0) return [dA, dB] if broadcast: B = np.random.rand(m).astype(np.float32) + 1.0 else: B = np.random.rand(n, m).astype(np.float32) + 1.0 C = np.random.randn(n, m).astype(np.float32) dC = np.random.randn(n, m).astype(np.float32) inputs = [B, C, dC] self.assertReferenceChecks( device_option=gc, op=op, inputs=inputs, reference=div_grad_ref, ) self.assertDeviceChecks(dc, op, inputs, [0, 1]) def _test_bitwise_binary_op(self, op_name, np_ref, n, m, k, t, gc, dc): op = core.CreateOperator( op_name, ["A", "B"], ["C"], ) def ref(A, B): return [np_ref(A, B)] A = np.random.randint(128, size=(n, m, k, t)) B = np.random.randint(128, size=(n, m, k, t)) self._run_single_test(op, ref, A, B, True, False, gc, dc) A = np.random.randint(128, size=1) B = np.random.randint(128, size=(n, m, k, t)) self._run_single_test(op, ref, A, B, True, False, gc, dc) A = np.random.randint(128, size=(k, t)) B = np.random.randint(128, size=(n, m, k, t)) self._run_single_test(op, ref, A, B, True, False, gc, dc) A = np.random.randint(128, size=(n, m, 1, 1)) B = np.random.randint(128, size=(n, m, k, t)) self._run_single_test(op, ref, A, B, True, False, gc, dc) A = np.random.randint(128, size=(1, m, k, 1)) B = np.random.randint(128, size=(n, m, k, t)) self._run_single_test(op, ref, A, B, True, False, gc, dc) A = np.random.randint(128, size=(m, 1, t)) B = np.random.randint(128, size=(n, m, k, t)) self._run_single_test(op, ref, A, B, True, False, gc, dc) A = np.random.randint(128, size=(1, m, 1, t)) B = np.random.randint(128, size=(n, 1, k, 1)) self._run_single_test(op, ref, A, B, True, False, gc, dc) @given(n=st.integers(1, 5), m=st.integers(1, 5), k=st.integers(1, 5), t=st.integers(1, 5), **hu.gcs) @settings(deadline=10000) def test_bitwise_and(self, n, m, k, t, gc, dc): self._test_bitwise_binary_op("BitwiseAnd", np.bitwise_and, n, m, k, t, gc, dc) @given(n=st.integers(1, 5), m=st.integers(1, 5), k=st.integers(1, 5), t=st.integers(1, 5), **hu.gcs) @settings(deadline=10000) def test_bitwise_or(self, n, m, k, t, gc, dc): self._test_bitwise_binary_op("BitwiseOr", np.bitwise_or, n, m, k, t, gc, dc) @given(n=st.integers(1, 5), m=st.integers(1, 5), k=st.integers(1, 5), t=st.integers(1, 5), **hu.gcs) @settings(deadline=10000) def test_bitwise_xor(self, n, m, k, t, gc, dc): self._test_bitwise_binary_op("BitwiseXor", np.bitwise_xor, n, m, k, t, gc, dc) @given(X=hu.tensor(elements=hu.floats(min_value=0.5, max_value=2), dtype=np.float32), inplace=st.booleans(), **hu.gcs) @settings(deadline=10000) def test_reciprocal(self, X, inplace, gc, dc): def reciprocal_op(X): return [np.reciprocal(X)] op = core.CreateOperator("Reciprocal", ["X"], ["X"] if inplace else ["Y"]) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=reciprocal_op, ensure_outputs_are_inferred=True, ) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks(gc, op, [X], 0, [0], stepsize=1e-3, threshold=0.05, ensure_outputs_are_inferred=True) @given(X=hu.tensor(dtype=np.bool), **hu.gcs) @settings(deadline=10000) def test_not(self, X, gc, dc): def not_op(X): return [np.logical_not(X)] op = core.CreateOperator( "Not", ["X"], ["Y"], ) self.assertReferenceChecks( device_option=gc, op=op, inputs=[X], reference=not_op, ensure_outputs_are_inferred=True, ) self.assertDeviceChecks(dc, op, [X], [0])
class TestSequenceOps(serial.SerializedTestCase): @given(start_pad_width=st.integers(min_value=1, max_value=2), end_pad_width=st.integers(min_value=0, max_value=2), args=_gen_test_add_padding(with_pad_data=True), ret_lengths=st.booleans(), **hu.gcs) @settings(deadline=1000) def test_add_padding(self, start_pad_width, end_pad_width, args, ret_lengths, gc, dc): lengths, data, start_padding, end_padding = args start_padding = np.array(start_padding, dtype=np.float32) end_padding = np.array(end_padding, dtype=np.float32) outputs = ['output', 'lengths_out'] if ret_lengths else ['output'] op = core.CreateOperator( 'AddPadding', ['data', 'lengths', 'start_padding', 'end_padding'], outputs, padding_width=start_pad_width, end_padding_width=end_pad_width) self.assertReferenceChecks( device_option=gc, op=op, inputs=[data, lengths, start_padding, end_padding], reference=partial(_add_padding_ref, start_pad_width, end_pad_width, ret_lengths)) @given(start_pad_width=st.integers(min_value=1, max_value=2), end_pad_width=st.integers(min_value=0, max_value=2), args=_gen_test_add_padding(with_pad_data=False), **hu.gcs) def test_add_zero_padding(self, start_pad_width, end_pad_width, args, gc, dc): lengths, data = args op = core.CreateOperator('AddPadding', ['data', 'lengths'], ['output', 'lengths_out'], padding_width=start_pad_width, end_padding_width=end_pad_width) self.assertReferenceChecks( gc, op, [data, lengths], partial(_add_padding_ref, start_pad_width, end_pad_width, True)) @given(start_pad_width=st.integers(min_value=1, max_value=2), end_pad_width=st.integers(min_value=0, max_value=2), data=hu.tensor(min_dim=1, max_dim=3), **hu.gcs) def test_add_padding_no_length(self, start_pad_width, end_pad_width, data, gc, dc): op = core.CreateOperator('AddPadding', ['data'], ['output', 'output_lens'], padding_width=start_pad_width, end_padding_width=end_pad_width) self.assertReferenceChecks( gc, op, [data], partial(_add_padding_ref, start_pad_width, end_pad_width, True, lengths=np.array([data.shape[0]]))) # Uncomment the following seed to make this fail. # @seed(302934307671667531413257853548643485645) # See https://github.com/caffe2/caffe2/issues/1547 @unittest.skip("flaky test") @given(start_pad_width=st.integers(min_value=1, max_value=2), end_pad_width=st.integers(min_value=0, max_value=2), args=_gen_test_add_padding(with_pad_data=False, is_remove=True), **hu.gcs) def test_remove_padding(self, start_pad_width, end_pad_width, args, gc, dc): lengths, data = args op = core.CreateOperator('RemovePadding', ['data', 'lengths'], ['output', 'lengths_out'], padding_width=start_pad_width, end_padding_width=end_pad_width) self.assertReferenceChecks(device_option=gc, op=op, inputs=[data, lengths], reference=partial(_remove_padding_ref, start_pad_width, end_pad_width)) @given(start_pad_width=st.integers(min_value=0, max_value=2), end_pad_width=st.integers(min_value=0, max_value=2), args=_gen_test_add_padding(with_pad_data=True), **hu.gcs) @settings(deadline=10000) def test_gather_padding(self, start_pad_width, end_pad_width, args, gc, dc): lengths, data, start_padding, end_padding = args padded_data, padded_lengths = _add_padding_ref(start_pad_width, end_pad_width, True, data, lengths, start_padding, end_padding) op = core.CreateOperator('GatherPadding', ['data', 'lengths'], ['start_padding', 'end_padding'], padding_width=start_pad_width, end_padding_width=end_pad_width) self.assertReferenceChecks(device_option=gc, op=op, inputs=[padded_data, padded_lengths], reference=partial(_gather_padding_ref, start_pad_width, end_pad_width)) @given(data=hu.tensor(min_dim=3, max_dim=3, dtype=np.float32, elements=hu.floats(min_value=-np.inf, max_value=np.inf), min_value=1, max_value=10), **hu.gcs) @settings(deadline=10000) def test_reverse_packed_segs(self, data, gc, dc): max_length = data.shape[0] batch_size = data.shape[1] lengths = np.random.randint(max_length + 1, size=batch_size) op = core.CreateOperator("ReversePackedSegs", ["data", "lengths"], ["reversed_data"]) def op_ref(data, lengths): rev_data = np.array(data, copy=True) for i in range(batch_size): seg_length = lengths[i] for j in range(seg_length): rev_data[j][i] = data[seg_length - 1 - j][i] return (rev_data, ) def op_grad_ref(grad_out, outputs, inputs): return op_ref(grad_out, inputs[1]) + (None, ) self.assertReferenceChecks(device_option=gc, op=op, inputs=[data, lengths], reference=op_ref, output_to_grad='reversed_data', grad_reference=op_grad_ref) @given(data=hu.tensor(min_dim=1, max_dim=3, dtype=np.float32, elements=hu.floats(min_value=-np.inf, max_value=np.inf), min_value=10, max_value=10), indices=st.lists(st.integers(min_value=0, max_value=9), min_size=0, max_size=10), **hu.gcs_cpu_only) @settings(deadline=10000) def test_remove_data_blocks(self, data, indices, gc, dc): indices = np.array(indices) op = core.CreateOperator("RemoveDataBlocks", ["data", "indices"], ["shrunk_data"]) def op_ref(data, indices): unique_indices = np.unique(indices) sorted_indices = np.sort(unique_indices) shrunk_data = np.delete(data, sorted_indices, axis=0) return (shrunk_data, ) self.assertReferenceChecks(device_option=gc, op=op, inputs=[data, indices], reference=op_ref) @given(elements=st.lists(st.integers(min_value=0, max_value=9), min_size=0, max_size=10), **hu.gcs_cpu_only) @settings(deadline=1000) def test_find_duplicate_elements(self, elements, gc, dc): mapping = { 0: "a", 1: "b", 2: "c", 3: "d", 4: "e", 5: "f", 6: "g", 7: "h", 8: "i", 9: "j" } data = np.array([mapping[e] for e in elements], dtype='|S') op = core.CreateOperator("FindDuplicateElements", ["data"], ["indices"]) def op_ref(data): unique_data = [] indices = [] for i, e in enumerate(data): if e in unique_data: indices.append(i) else: unique_data.append(e) return (np.array(indices, dtype=np.int64), ) self.assertReferenceChecks(device_option=gc, op=op, inputs=[data], reference=op_ref)
class TestAdadelta(serial.SerializedTestCase): @staticmethod def ref_adadelta(param_in, mom_in, mom_delta_in, grad, lr, epsilon, decay, using_fp16=False): param_in_f32 = param_in mom_in_f32 = mom_in mom_delta_in_f32 = mom_delta_in if (using_fp16): param_in_f32 = param_in.astype(np.float32) mom_in_f32 = mom_in.astype(np.float32) mom_delta_in_f32 = mom_delta_in.astype(np.float32) mom_out = decay * mom_in_f32 + (1.0 - decay) * grad * grad new_grad = (np.sqrt(mom_delta_in_f32 + epsilon) / np.sqrt(mom_out + epsilon)) * grad param_out = param_in_f32 + lr * new_grad mom_delta_out = decay * mom_delta_in_f32 + ( 1.0 - decay) * new_grad * new_grad if (using_fp16): return (param_out.astype(np.float16), mom_out.astype(np.float16), mom_delta_out.astype(np.float16)) else: return (param_out.astype(np.float32), mom_out.astype(np.float32), mom_delta_out.astype(np.float32)) @given(inputs=hu.tensors(n=4), lr=hu.floats(min_value=0.01, max_value=0.99, allow_nan=False, allow_infinity=False), epsilon=hu.floats(min_value=0.01, max_value=0.99, allow_nan=False, allow_infinity=False), decay=hu.floats(min_value=0.01, max_value=0.99, allow_nan=False, allow_infinity=False), **hu.gcs) @settings(deadline=1000) def test_adadelta(self, inputs, lr, epsilon, decay, gc, dc): param, moment, moment_delta, grad = inputs moment = np.abs(moment) moment_delta = np.abs(moment_delta) lr = np.array([lr], dtype=np.float32) op = core.CreateOperator( "Adadelta", ["param", "moment", "moment_delta", "grad", "lr"], ["param", "moment", "moment_delta"], epsilon=epsilon, decay=decay, device_option=gc, ) self.assertReferenceChecks( gc, op, [param, moment, moment_delta, grad, lr], functools.partial(self.ref_adadelta, epsilon=epsilon, decay=decay)) # Suppress filter_too_much health check. # Likely caused by `assume` call falling through too often. @settings(suppress_health_check=[HealthCheck.filter_too_much], deadline=10000) @given(inputs=hu.tensors(n=4), lr=hu.floats(min_value=0.01, max_value=0.99, allow_nan=False, allow_infinity=False), epsilon=hu.floats(min_value=0.01, max_value=0.99, allow_nan=False, allow_infinity=False), decay=hu.floats(min_value=0.01, max_value=0.99, allow_nan=False, allow_infinity=False), **hu.gcs) def test_sparse_adadelta(self, inputs, lr, epsilon, decay, gc, dc): param, moment, moment_delta, grad = inputs moment = np.abs(moment) moment_delta = np.abs(moment_delta) lr = np.array([lr], dtype=np.float32) # Create an indexing array containing values that are lists of indices, # which index into grad indices = np.random.choice(np.arange(grad.shape[0]), size=np.random.randint(grad.shape[0]), replace=False) # Sparsify grad grad = grad[indices] op = core.CreateOperator( "SparseAdadelta", ["param", "moment", "moment_delta", "indices", "grad", "lr"], ["param", "moment", "moment_delta"], epsilon=epsilon, decay=decay, device_option=gc) def ref_sparse(param, moment, moment_delta, indices, grad, lr, decay, ref_using_fp16): param_out = np.copy(param) moment_out = np.copy(moment) moment_delta_out = np.copy(moment_delta) for i, index in enumerate(indices): param_out[index], moment_out[index], moment_delta_out[ index] = self.ref_adadelta(param[index], moment[index], moment_delta[index], grad[i], lr, epsilon, decay, ref_using_fp16) return (param_out, moment_out, moment_delta_out) ref_using_fp16_values = [False] if gc == hu.gpu_do: ref_using_fp16_values.append(True) for ref_using_fp16 in ref_using_fp16_values: moment_i = None moment_delta_i = None param_i = None if (ref_using_fp16): moment_i = moment.astype(np.float16) moment_delta_i = moment_delta.astype(np.float16) param_i = param.astype(np.float16) else: moment_i = moment.astype(np.float32) moment_delta_i = moment_delta.astype(np.float32) param_i = param.astype(np.float32) self.assertReferenceChecks(gc, op, [ param_i, moment_i, moment_delta_i, indices, grad, lr, decay, ref_using_fp16 ], ref_sparse) @given(inputs=hu.tensors(n=3), lr=st.floats(min_value=0.01, max_value=0.99, allow_nan=False, allow_infinity=False), epsilon=st.floats(min_value=0.01, max_value=0.99, allow_nan=False, allow_infinity=False), decay=st.floats(min_value=0.01, max_value=0.99, allow_nan=False, allow_infinity=False), **hu.gcs) @settings(deadline=1000) def test_sparse_adadelta_empty(self, inputs, lr, epsilon, decay, gc, dc): param, moment, moment_delta = inputs moment = np.abs(moment) lr = np.array([lr], dtype=np.float32) grad = np.empty(shape=(0, ) + param.shape[1:], dtype=np.float32) indices = np.empty(shape=(0, ), dtype=np.int64) hypothesis.note('indices.shape: %s' % str(indices.shape)) op = core.CreateOperator( "SparseAdadelta", ["param", "moment", "moment_delta", "indices", "grad", "lr"], ["param", "moment", "moment_delta"], epsilon=epsilon, decay=decay, device_option=gc) def ref_sparse_empty(param, moment, moment_delta, indices, grad, lr, decay): param_out = np.copy(param) moment_out = np.copy(moment) moment_delta_out = np.copy(moment_delta) return (param_out, moment_out, moment_delta_out) ref_using_fp16_values = [False] if gc == hu.gpu_do: ref_using_fp16_values.append(True) for ref_using_fp16 in ref_using_fp16_values: moment_i = None moment_delta_i = None param_i = None if (ref_using_fp16): moment_i = moment.astype(np.float16) moment_delta_i = moment_delta.astype(np.float16) param_i = param.astype(np.float16) else: moment_i = moment.astype(np.float32) moment_delta_i = moment_delta.astype(np.float32) param_i = param.astype(np.float32) self.assertReferenceChecks( gc, op, [param_i, moment_i, moment_delta_i, indices, grad, lr, decay], ref_sparse_empty)
class TestPairWiseLossOps(serial.SerializedTestCase): @given(X=hu.arrays(dims=[2, 1], elements=hu.floats(min_value=0.0, max_value=10.0)), label=hu.arrays(dims=[2, 1], elements=st.integers(min_value=0, max_value=1), dtype=np.float32), **hu.gcs_cpu_only) def test_pair_wise_loss_predictions(self, X, label, gc, dc): workspace.FeedBlob('X', X) workspace.FeedBlob('label', label) new_label = np.array([label[1], label[0]]) new_x = np.array([X[1], X[0]]) workspace.FeedBlob('new_x', new_x) workspace.FeedBlob('new_label', new_label) net = core.Net('net') net.PairWiseLoss(['X', 'label'], ['output']) net.PairWiseLoss(['new_x', 'new_label'], ['new_output']) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [net], num_iter=1)) workspace.RunPlan(plan) output = workspace.FetchBlob('output') new_output = workspace.FetchBlob('new_output') sign = 1 if label[0] > label[1] else -1 if label[0] == label[1]: self.assertEqual(np.asscalar(output), 0) return self.assertAlmostEqual(np.asscalar(output), np.asscalar( np.log(1 + np.exp(sign * (X[1] - X[0])))), delta=1e-4) # check swapping row order doesn't alter overall loss self.assertAlmostEqual(output, new_output) @given(X=hu.arrays(dims=[2, 1], elements=hu.floats(min_value=0.0, max_value=10.0)), label=hu.arrays(dims=[2, 1], elements=st.integers(min_value=0, max_value=1), dtype=np.float32), dY=hu.arrays(dims=[1], elements=hu.floats(min_value=1, max_value=10)), **hu.gcs_cpu_only) def test_pair_wise_loss_gradient(self, X, label, dY, gc, dc): workspace.FeedBlob('X', X) workspace.FeedBlob('dY', dY) workspace.FeedBlob('label', label) net = core.Net('net') net.PairWiseLossGradient( ['X', 'label', 'dY'], ['dX'], ) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [net], num_iter=1)) workspace.RunPlan(plan) dx = workspace.FetchBlob('dX') sign = 1 if label[0] > label[1] else -1 if label[0] == label[1]: self.assertEqual(np.asscalar(dx[0]), 0) return self.assertAlmostEqual(np.asscalar(dx[0]), np.asscalar(-dY[0] * sign / (1 + np.exp(sign * (X[0] - X[1])))), delta=1e-2 * abs(np.asscalar(dx[0]))) self.assertEqual(np.asscalar(dx[0]), np.asscalar(-dx[1])) delta = 1e-3 up_x = np.array([[X[0] + delta], [X[1]]], dtype=np.float32) down_x = np.array([[X[0] - delta], [X[1]]], dtype=np.float32) workspace.FeedBlob('up_x', up_x) workspace.FeedBlob('down_x', down_x) new_net = core.Net('new_net') new_net.PairWiseLoss(['up_x', 'label'], ['up_output']) new_net.PairWiseLoss(['down_x', 'label'], ['down_output']) plan = core.Plan('predict_data') plan.AddStep(core.execution_step('predict_data', [new_net], num_iter=1)) workspace.RunPlan(plan) down_output_pred = workspace.FetchBlob('down_output') up_output_pred = workspace.FetchBlob('up_output') np.testing.assert_allclose( np.asscalar(dx[0]), np.asscalar(0.5 * dY[0] * (up_output_pred[0] - down_output_pred[0]) / delta), rtol=1e-2, atol=1e-2) @serial.given(n=st.integers(0, 10), k=st.integers(1, 5), **hu.gcs_cpu_only) def test_pair_wise_loss_batch(self, n, k, gc, dc): lengths = np.random.randint(k, size=n).astype(np.int32) + 1 X = np.random.rand(sum(lengths)).astype(np.float32) label = np.random.randint(k, size=sum(lengths)).astype(np.float32) def pair_wise_op(X, label, lengths): N = lengths.size output = np.zeros(N).astype(np.float32) def f(x): return np.log(1 + np.exp(x)) offset = 0 for idx in range(N): offset += lengths[idx - 1] if idx > 0 else 0 count = 0 for i in range(offset, offset + lengths[idx]): for j in range(offset, i): if label[i] == label[j]: continue sign = 1 if label[i] > label[j] else -1 output[idx] += f(sign * (X[j] - X[i])) count += 1 if count > 0: output[idx] /= count return [output] op = core.CreateOperator('PairWiseLoss', ['X', 'label', 'lengths'], 'out') # Check against numpy reference self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, label, lengths], reference=pair_wise_op, ) # Check over multiple devices self.assertDeviceChecks(dc, op, [X, label, lengths], [0]) # Gradient check self.assertGradientChecks(gc, op, [X, label, lengths], 0, [0])
class TestActivations(serial.SerializedTestCase): @given(X=hu.tensor(), in_place=st.booleans(), engine=st.sampled_from(["", "CUDNN"]), **mu.gcs) @settings(deadline=10000) def test_relu(self, X, in_place, engine, gc, dc): if gc == mu.mkl_do: in_place = False op = core.CreateOperator( "Relu", ["X"], ["X"] if in_place else ["Y"], engine=engine, ) def relu_ref(X): return [np.maximum(X, 0.0)] # go away from the origin point to avoid kink problems X += 0.02 * np.sign(X) X[X == 0.0] += 0.02 self.assertReferenceChecks(gc, op, [X], relu_ref, ensure_outputs_are_inferred=True) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks(gc, op, [X], 0, [0], ensure_outputs_are_inferred=True) @given(N=st.integers(1, 10), M=st.integers(1, 10), in_place=st.booleans(), **hu.gcs) def test_relu_empty_input(self, N, M, in_place, gc, dc): op = core.CreateOperator( "Relu", ["X"], ["X"] if in_place else ["Y"], ) def relu_ref(X): return [np.maximum(X, 0.0)] X = np.random.randn(0, N, M).astype(np.float32) self.assertReferenceChecks(gc, op, [X], relu_ref, ensure_outputs_are_inferred=True) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks(gc, op, [X], 0, [0], ensure_outputs_are_inferred=True) @unittest.skipIf(not workspace.has_gpu_support, "Relu for float16 can only run on GPU now.") @given(X=hu.tensor(dtype=np.float16), in_place=st.booleans(), engine=st.sampled_from(["", "CUDNN"]), **hu.gcs) def test_relu_fp16(self, X, in_place, engine, gc, dc): # fp16 is only supported on CUDA/HIP assume(core.IsGPUDeviceType(gc.device_type)) op = core.CreateOperator( "Relu", ["X"], ["X"] if in_place else ["Y"], engine=engine, ) def relu_ref(X): return [np.maximum(X, 0.0)] def relu_grad_ref(g_out, outputs, fwd_inputs): dY = g_out [Y] = outputs dX = dY dX[Y == 0] = 0 return [dX] # go away from the origin point to avoid kink problems X += 0.02 * np.sign(X) X[X == 0.0] += 0.02 self.assertReferenceChecks(gc, op, [X], relu_ref, output_to_grad="X" if in_place else "Y", grad_reference=relu_grad_ref) @serial.given(X=hu.tensor(elements=hu.floats(-3.0, 3.0)), n=hu.floats(min_value=0.5, max_value=2.0), in_place=st.booleans(), **hu.gcs) def test_relu_n(self, X, n, in_place, gc, dc): op = core.CreateOperator( "ReluN", ["X"], ["X"] if in_place else ["Y"], n=n, ) def relu_n_ref(X): return [np.minimum(np.maximum(X, 0.0), n)] # go away from 0 and n to avoid kink problems X += 0.04 * np.sign(X) X[X == 0.0] += 0.04 X -= n X += 0.02 * np.sign(X) X[X == 0.0] -= 0.02 X += n self.assertReferenceChecks(gc, op, [X], relu_n_ref, ensure_outputs_are_inferred=True) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks(gc, op, [X], 0, [0], stepsize=0.005, ensure_outputs_are_inferred=True) @serial.given(X=hu.tensor(), alpha=hu.floats(min_value=0.1, max_value=2.0), in_place=st.booleans(), engine=st.sampled_from(["", "CUDNN"]), **hu.gcs) def test_elu(self, X, alpha, in_place, engine, gc, dc): op = core.CreateOperator( "Elu", ["X"], ["X"] if in_place else ["Y"], alpha=alpha, engine=engine, ) def elu_ref(X): Y = X Y[X < 0] = alpha * (np.exp(X[X < 0]) - 1.0) return [Y] # go away from the origin point to avoid kink problems X += 0.04 * np.sign(X) X[X == 0.0] += 0.04 self.assertReferenceChecks(gc, op, [X], elu_ref, ensure_outputs_are_inferred=True) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks(gc, op, [X], 0, [0], stepsize=1e-2, ensure_outputs_are_inferred=True) @given(X=hu.tensor(min_dim=4, max_dim=4), alpha=hu.floats(min_value=0.1, max_value=2.0), inplace=st.booleans(), shared=st.booleans(), order=st.sampled_from(["NCHW", "NHWC"]), seed=st.sampled_from([20, 100]), **hu.gcs) @settings(deadline=10000) def test_prelu(self, X, alpha, inplace, shared, order, seed, gc, dc): np.random.seed(seed) W = np.random.randn(X.shape[1] if order == "NCHW" else X.shape[3]).astype(np.float32) if shared: W = np.random.randn(1).astype(np.float32) # go away from the origin point to avoid kink problems X += 0.04 * np.sign(X) X[X == 0.0] += 0.04 def prelu_ref(X, W): Y = X.copy() W = W.reshape(1, -1, 1, 1) if order == "NCHW" \ else W.reshape(1, 1, 1, -1) assert len(X.shape) == 4 neg_indices = X <= 0 assert len(neg_indices.shape) == 4 assert X.shape == neg_indices.shape Y[neg_indices] = (Y * W)[neg_indices] return (Y, ) op = core.CreateOperator("PRelu", ["X", "W"], ["Y" if not inplace else "X"], alpha=alpha, order=order) self.assertReferenceChecks(gc, op, [X, W], prelu_ref, ensure_outputs_are_inferred=True) # Check over multiple devices self.assertDeviceChecks(dc, op, [X, W], [0]) if not inplace: # Gradient check wrt X self.assertGradientChecks(gc, op, [X, W], 0, [0], stepsize=1e-2, ensure_outputs_are_inferred=True) # Gradient check wrt W self.assertGradientChecks(gc, op, [X, W], 1, [0], stepsize=1e-2, ensure_outputs_are_inferred=True) @serial.given(X=hu.tensor(), alpha=hu.floats(min_value=0.1, max_value=2.0), inplace=st.booleans(), **hu.gcs) def test_leaky_relu(self, X, alpha, inplace, gc, dc): # go away from the origin point to avoid kink problems X += 0.04 * np.sign(X) X[X == 0.0] += 0.04 def leaky_relu_ref(X): Y = X.copy() neg_indices = X <= 0 Y[neg_indices] = Y[neg_indices] * alpha return (Y, ) op = core.CreateOperator("LeakyRelu", ["X"], ["Y" if not inplace else "X"], alpha=alpha) self.assertReferenceChecks(gc, op, [X], leaky_relu_ref, ensure_outputs_are_inferred=True) # Check over multiple devices self.assertDeviceChecks(dc, op, [X], [0]) @given(X=hu.tensor(), inplace=st.booleans(), **hu.gcs) def test_leaky_relu_default(self, X, inplace, gc, dc): # go away from the origin point to avoid kink problems X += 0.04 * np.sign(X) X[X == 0.0] += 0.04 def leaky_relu_ref(X): Y = X.copy() neg_indices = X <= 0 Y[neg_indices] = Y[neg_indices] * 0.01 return (Y, ) op = core.CreateOperator("LeakyRelu", ["X"], ["Y" if not inplace else "X"]) self.assertReferenceChecks(gc, op, [X], leaky_relu_ref) # Check over multiple devices self.assertDeviceChecks(dc, op, [X], [0]) @given(X=hu.tensor(), fast_gelu=st.booleans(), **hu.gcs) @settings(deadline=1000) def test_gelu(self, X, fast_gelu, gc, dc): op = core.CreateOperator( "Gelu", ["X"], ["Y"], fast_gelu=fast_gelu, ) def gelu_ref(X): return (X * norm.cdf(X), ) tol = 1e-3 if fast_gelu else 1e-4 self.assertReferenceChecks(gc, op, [X], gelu_ref, threshold=tol, ensure_outputs_are_inferred=True) self.assertDeviceChecks(dc, op, [X], [0]) self.assertGradientChecks(gc, op, [X], 0, [0], ensure_outputs_are_inferred=True)
class TestBatchBucketize(serial.SerializedTestCase): @serial.given(**hu.gcs_cpu_only) def test_batch_bucketize_example(self, gc, dc): op = core.CreateOperator( 'BatchBucketize', ["FEATURE", "INDICES", "BOUNDARIES", "LENGTHS"], ["O"]) float_feature = np.array( [[1.42, 2.07, 3.19, 0.55, 4.32], [4.57, 2.30, 0.84, 4.48, 3.09], [0.89, 0.26, 2.41, 0.47, 1.05], [0.03, 2.97, 2.43, 4.36, 3.11], [2.74, 5.77, 0.90, 2.63, 0.38]], dtype=np.float32) indices = np.array([0, 1, 4], dtype=np.int32) lengths = np.array([2, 3, 1], dtype=np.int32) boundaries = np.array([0.5, 1.0, 1.5, 2.5, 3.5, 2.5], dtype=np.float32) def ref(float_feature, indices, boundaries, lengths): output = np.array( [[2, 1, 1], [2, 1, 1], [1, 0, 0], [0, 2, 1], [2, 3, 0]], dtype=np.int32) return (output, ) self.assertReferenceChecks( gc, op, [float_feature, indices, boundaries, lengths], ref) @given(x=hu.tensor(min_dim=2, max_dim=2, dtype=np.float32, elements=hu.floats(min_value=0, max_value=5), min_value=5), seed=st.integers(min_value=2, max_value=1000), **hu.gcs_cpu_only) def test_batch_bucketize(self, x, seed, gc, dc): op = core.CreateOperator( 'BatchBucketize', ["FEATURE", "INDICES", "BOUNDARIES", "LENGTHS"], ['O']) np.random.seed(seed) d = x.shape[1] lens = np.random.randint(low=1, high=3, size=d - 3) indices = np.random.choice(range(d), d - 3, replace=False) indices.sort() boundaries = [] for i in range(d - 3): # add [0, 0] as duplicated boundary for duplicated bucketization if lens[i] > 2: cur_boundary = np.append( np.random.randn(lens[i] - 2) * 5, [0, 0]) else: cur_boundary = np.random.randn(lens[i]) * 5 cur_boundary.sort() boundaries += cur_boundary.tolist() lens = np.array(lens, dtype=np.int32) boundaries = np.array(boundaries, dtype=np.float32) indices = np.array(indices, dtype=np.int32) def ref(x, indices, boundaries, lens): output_dim = indices.shape[0] ret = np.zeros((x.shape[0], output_dim)).astype(np.int32) boundary_offset = 0 for i, l in enumerate(indices): temp_bound = boundaries[boundary_offset:lens[i] + boundary_offset] for j in range(x.shape[0]): for k, bound_val in enumerate(temp_bound): if k == len(temp_bound) - 1 and x[j, l] > bound_val: ret[j, i] = k + 1 elif x[j, l] > bound_val: continue else: ret[j, i] = k break boundary_offset += lens[i] return (ret, ) self.assertReferenceChecks(gc, op, [x, indices, boundaries, lens], ref)
class TestBooleanMaskOp(serial.SerializedTestCase): @given(x=hu.tensor1d(min_len=1, max_len=100, elements=hu.floats(min_value=0.5, max_value=1.0)), **hu.gcs_cpu_only) @settings(deadline=1000) def test_boolean_mask_gradient(self, x, gc, dc): op = core.CreateOperator("BooleanMask", ["data", "mask"], "masked_data") mask = np.random.choice(a=[True, False], size=x.shape[0]) expected_gradient = np.copy(mask).astype(int) self.assertDeviceChecks(dc, op, [x, mask], [0]) self.assertGradientChecks(gc, op, [x, mask], 0, [0]) @given(x=hu.tensor1d(min_len=1, max_len=5, elements=hu.floats(min_value=0.5, max_value=1.0)), **hu.gcs) @settings(deadline=1000) def test_boolean_mask(self, x, gc, dc): op = core.CreateOperator("BooleanMask", ["data", "mask"], "masked_data") mask = np.random.choice(a=[True, False], size=x.shape[0]) def ref(x, mask): return (x[mask], ) self.assertReferenceChecks(gc, op, [x, mask], ref) self.assertDeviceChecks(dc, op, [x, mask], [0]) @given(x=hu.tensor1d(min_len=1, max_len=5, elements=hu.floats(min_value=0.5, max_value=1.0)), **hu.gcs) def test_boolean_mask_indices(self, x, gc, dc): op = core.CreateOperator("BooleanMask", ["data", "mask"], ["masked_data", "masked_indices"]) mask = np.random.choice(a=[True, False], size=x.shape[0]) def ref(x, mask): return (x[mask], np.where(mask)[0]) self.assertReferenceChecks(gc, op, [x, mask], ref) self.assertDeviceChecks(dc, op, [x, mask], [0]) @staticmethod def _dtype_conversion(x, dtype, gc, dc): """SequenceMask only supports fp16 with CUDA/ROCm.""" if dtype == np.float16: assume(core.IsGPUDeviceType(gc.device_type)) dc = [d for d in dc if core.IsGPUDeviceType(d.device_type)] x = x.astype(dtype) return x, dc @given(x=hu.tensor(min_dim=2, max_dim=5, elements=hu.floats(min_value=0.5, max_value=1.0)), dtype=st.sampled_from([np.float32, np.float16]), **hu.gcs) def test_sequence_mask_with_lengths(self, x, dtype, gc, dc): x, dc = self._dtype_conversion(x, dtype, gc, dc) # finite fill value needed for gradient check fill_val = 1e-3 if dtype == np.float16 else 1e-9 op = core.CreateOperator("SequenceMask", ["data", "lengths"], ["masked_data"], mode="sequence", axis=len(x.shape) - 1, fill_val=fill_val) elem_dim = x.shape[-1] leading_dim = 1 for dim in x.shape[:-1]: leading_dim *= dim lengths = np.random.randint(0, elem_dim, [leading_dim])\ .astype(np.int32) def ref(x, lengths): ref = np.reshape(x, [leading_dim, elem_dim]) for i in range(leading_dim): for j in range(elem_dim): if j >= lengths[i]: ref[i, j] = fill_val return [ref.reshape(x.shape)] self.assertReferenceChecks(gc, op, [x, lengths], ref) self.assertDeviceChecks(dc, op, [x, lengths], [0]) @given(x=hu.tensor(min_dim=2, max_dim=5, elements=hu.floats(min_value=0.5, max_value=1.0)), dtype=st.sampled_from([np.float32, np.float16]), **hu.gcs) @settings(deadline=10000) def test_sequence_mask_with_window(self, x, dtype, gc, dc): x, dc = self._dtype_conversion(x, dtype, gc, dc) # finite fill value needed for gradient check fill_val = 1e-3 if dtype == np.float16 else 1e-9 radius = 2 op = core.CreateOperator("SequenceMask", ["data", "centers"], ["masked_data"], mode="window", radius=radius, axis=len(x.shape) - 1, fill_val=fill_val) elem_dim = x.shape[-1] leading_dim = 1 for dim in x.shape[:-1]: leading_dim *= dim centers = np.random.randint(0, elem_dim, [leading_dim])\ .astype(np.int32) def ref(x, centers): ref = np.reshape(x, [leading_dim, elem_dim]) for i in range(leading_dim): for j in range(elem_dim): if j > centers[i] + radius or j < centers[i] - radius: ref[i, j] = fill_val return [ref.reshape(x.shape)] self.assertReferenceChecks(gc, op, [x, centers], ref) self.assertDeviceChecks(dc, op, [x, centers], [0]) # Gradient check with np.float16 is found to be flakey, disable for now # with high threshold (to repro, set threshold to 0.4). threshold = 1.0 if dtype == np.float16 else 0.005 self.assertGradientChecks(gc, op, [x, centers], 0, [0], threshold=threshold) @given(x=hu.tensor(min_dim=2, max_dim=5, elements=hu.floats(min_value=0.5, max_value=1.0)), mode=st.sampled_from(['upper', 'lower', 'upperdiag', 'lowerdiag']), dtype=st.sampled_from([np.float32, np.float16]), **hu.gcs) @settings(deadline=10000) def test_sequence_mask_triangle(self, x, mode, dtype, gc, dc): x, dc = self._dtype_conversion(x, dtype, gc, dc) # finite fill value needed for gradient check fill_val = 1e-3 if dtype == np.float16 else 1e-9 op = core.CreateOperator("SequenceMask", ["data"], ["masked_data"], mode=mode, axis=len(x.shape) - 1, fill_val=fill_val) elem_dim = x.shape[-1] leading_dim = 1 for dim in x.shape[:-1]: leading_dim *= dim if mode == 'upper': def compare(i, j): return j > i elif mode == 'lower': def compare(i, j): return j < i elif mode == 'upperdiag': def compare(i, j): return j >= i elif mode == 'lowerdiag': def compare(i, j): return j <= i def ref(x): ref = np.reshape(x, [leading_dim, elem_dim]) for i in range(leading_dim): for j in range(elem_dim): if compare(i, j): ref[i, j] = fill_val return [ref.reshape(x.shape)] self.assertReferenceChecks(gc, op, [x], ref) self.assertDeviceChecks(dc, op, [x], [0]) # Gradient check with np.float16 is found to be flakey, disable for now # with high threshold (to repro, set threshold to 0.4). threshold = 1.0 if dtype == np.float16 else 0.005 stepsize = 0.1 if dtype == np.float16 else 0.05 self.assertGradientChecks(gc, op, [x], 0, [0], threshold=threshold, stepsize=stepsize) @given(x=hu.tensor(min_dim=2, max_dim=5, elements=hu.floats(min_value=0.5, max_value=1.0)), dtype=st.sampled_from([np.float32, np.float16]), **hu.gcs) @settings(deadline=10000) def test_sequence_mask_batching_lengths(self, x, dtype, gc, dc): x, dc = self._dtype_conversion(x, dtype, gc, dc) # finite fill value needed for gradient check fill_val = 1e-3 if dtype == np.float16 else 1e-9 # choose _different_ batch and axis dimensions, w/ axis != 0. axis = 0 batch = 0 while axis == 0 or axis < batch: inds = np.arange(len(x.shape)) np.random.shuffle(inds) batch = inds[0] axis = inds[1] op = core.CreateOperator("SequenceMask", ["data", "lengths"], ["masked_data"], mode='sequence', axis=axis, fill_val=fill_val, batch=batch) before = int(np.prod(x.shape[:batch + 1])) between = int(np.prod(x.shape[batch + 1:axis])) after = int(np.prod(x.shape[axis:])) lengths = np.random.randint(0, after, [between])\ .astype(np.int32) def ref(z, l): w = np.reshape(z, [before, between, after]) for b in range(before): r = w[b, :, :] for i in range(between): for j in range(after): if j >= l[i]: r[i, j] = fill_val return [w.reshape(z.shape)] self.assertReferenceChecks(gc, op, [x, lengths], ref) self.assertDeviceChecks(dc, op, [x, lengths], [0]) # Gradient check with np.float16 is found to be flakey, disable for now # with high threshold (to repro, set threshold to 0.4). threshold = 1.0 if dtype == np.float16 else 0.005 self.assertGradientChecks(gc, op, [x, lengths], 0, [0], threshold=threshold) @given(x=hu.tensor(min_dim=4, max_dim=4, elements=hu.floats(min_value=0.5, max_value=1.0)), dtype=st.sampled_from([np.float32, np.float16]), **hu.gcs) @settings(deadline=10000) def test_sequence_mask_batching_window(self, x, dtype, gc, dc): x, dc = self._dtype_conversion(x, dtype, gc, dc) # finite fill value needed for gradient check fill_val = 1e-3 if dtype == np.float16 else 1e-9 radius = 1 # choose _different_ batch and axis dimensions, w/ axis != 0. axis = 0 batch = 0 while axis == 0 or axis < batch: inds = np.arange(len(x.shape)) np.random.shuffle(inds) batch = inds[0] axis = inds[1] op = core.CreateOperator("SequenceMask", ["data", "centers"], ["masked_data"], mode='window', radius=radius, axis=axis, fill_val=fill_val, batch=batch) before = int(np.prod(x.shape[:batch + 1])) between = int(np.prod(x.shape[batch + 1:axis])) after = int(np.prod(x.shape[axis:])) centers = np.random.randint(0, after, [between])\ .astype(np.int32) def ref(z, c): w = np.reshape(z, [before, between, after]) for b in range(before): r = w[b, :, :] for i in range(between): for j in range(after): if j > c[i] + radius or j < c[i] - radius: r[i, j] = fill_val return [w.reshape(z.shape)] self.assertReferenceChecks(gc, op, [x, centers], ref) self.assertDeviceChecks(dc, op, [x, centers], [0]) # Gradient check with np.float16 is found to be flakey, disable for now # with high threshold (to repro, set threshold to 0.4). threshold = 1.0 if dtype == np.float16 else 0.005 self.assertGradientChecks(gc, op, [x, centers], 0, [0], threshold=threshold) @given(x=hu.tensor(min_dim=3, max_dim=5, elements=hu.floats(min_value=0.5, max_value=1.0)), mode=st.sampled_from(['upper', 'lower', 'upperdiag', 'lowerdiag']), dtype=st.sampled_from([np.float32, np.float16]), **hu.gcs) @settings(deadline=10000) def test_sequence_mask_batching_triangle(self, x, mode, dtype, gc, dc): x, dc = self._dtype_conversion(x, dtype, gc, dc) # finite fill value needed for gradient check fill_val = 1e-3 if dtype == np.float16 else 1e-9 # choose _different_ batch and axis dimensions, w/ axis != 0. axis = 0 batch = 0 while axis == 0 or axis < batch: inds = np.arange(len(x.shape)) np.random.shuffle(inds) batch = inds[0] axis = inds[1] op = core.CreateOperator("SequenceMask", ["data"], ["masked_data"], mode=mode, axis=axis, fill_val=fill_val, batch=batch) if mode == 'upper': def compare(i, j): return j > i elif mode == 'lower': def compare(i, j): return j < i elif mode == 'upperdiag': def compare(i, j): return j >= i elif mode == 'lowerdiag': def compare(i, j): return j <= i def ref(z): before = int(np.prod(z.shape[:batch + 1])) between = int(np.prod(z.shape[batch + 1:axis])) after = int(np.prod(z.shape[axis:])) w = np.reshape(z, [before, between, after]) for b in range(before): r = w[b, :, :] for i in range(between): for j in range(after): if compare(i, j): r[i, j] = fill_val return [w.reshape(z.shape)] self.assertReferenceChecks(gc, op, [x], ref) self.assertDeviceChecks(dc, op, [x], [0]) # Gradient check with np.float16 is found to be flakey, disable for now # with high threshold (to repro, set threshold to 0.4). threshold = 1.0 if dtype == np.float16 else 0.005 stepsize = 0.1 if dtype == np.float16 else 0.05 self.assertGradientChecks(gc, op, [x], 0, [0], threshold=threshold, stepsize=stepsize) @given(x=hu.tensor(min_dim=3, max_dim=5, elements=hu.floats(min_value=0.5, max_value=1.0)), dtype=st.sampled_from([np.float32, np.float16]), **hu.gcs) def test_sequence_mask_repeated(self, x, dtype, gc, dc): x, dc = self._dtype_conversion(x, dtype, gc, dc) # finite fill value needed for gradient check fill_val = 1e-3 if dtype == np.float16 else 1e-9 op = core.CreateOperator("SequenceMask", ["data", "lengths"], ["masked_data"], mode="sequence", axis=len(x.shape) - 2, repeat_from_axis=-1, fill_val=fill_val) elem_dim = x.shape[-2] leading_dim = 1 for dim in x.shape[:-2]: leading_dim *= dim lengths = np.random.randint(0, elem_dim, [leading_dim])\ .astype(np.int32) def ref(x, lengths): ref = np.reshape(x, [leading_dim, elem_dim, -1]) for i in range(leading_dim): for j in range(elem_dim): if j >= lengths[i]: ref[i, j, :] = fill_val return [ref.reshape(x.shape)] self.assertReferenceChecks(gc, op, [x, lengths], ref) self.assertDeviceChecks(dc, op, [x, lengths], [0])
class TestCrossEntropyOps(hu.HypothesisTestCase): @given(inputs=st.lists( elements=st.integers(min_value=1, max_value=5), min_size=1, max_size=2, ).flatmap(lambda shape: st.tuples( hu.arrays(dims=shape, elements=st.one_of( hu.floats(min_value=-1.0, max_value=-0.1), hu.floats(min_value=0.1, max_value=1.0), )), hu.arrays( dims=shape, elements=st.sampled_from([0.0, 1.0]), ), )), options=st.one_of(st.tuples(st.just(True), st.just(False)), st.tuples(st.just(False), st.just(True)), st.tuples(st.just(False), st.just(False))), **hu.gcs) def test_sigmoid_cross_entropy_with_logits(self, inputs, options, gc, dc): logits, targets = inputs log_D_trick, unjoined_lr_loss = options def sigmoid_xentr_logit_ref(logits, targets): if unjoined_lr_loss: s = unjoined_sigmoid_cross_entropy(logits, targets) else: s = (sigmoid_cross_entropy_with_logits(logits, targets) if not log_D_trick else sigmoid_cross_entropy_with_logits_with_log_D_trick( logits, targets)) m = np.mean(s, axis=len(logits.shape) - 1) return (m, ) def sigmoid_xentr_logit_grad_ref(g_out, outputs, fwd_inputs): fwd_logits, fwd_targets = fwd_inputs inner_size = fwd_logits.shape[-1] if unjoined_lr_loss: m = unjoined_sigmoid_cross_entropy_grad(logits, targets) else: m = (sigmoid_cross_entropy_with_logits_grad( fwd_logits, fwd_targets) if not log_D_trick else sigmoid_cross_entropy_with_logits_with_log_D_trick_grad( fwd_logits, fwd_targets)) # m = fwd_targets - sigmoid(fwd_logits) g_in = -np.expand_dims(g_out, axis=-1) * m / inner_size return (g_in, None) op = core.CreateOperator('SigmoidCrossEntropyWithLogits', ['logits', 'targets'], ['xentropy'], log_D_trick=log_D_trick, unjoined_lr_loss=unjoined_lr_loss) self.assertReferenceChecks(device_option=gc, op=op, inputs=[logits, targets], reference=sigmoid_xentr_logit_ref, output_to_grad='xentropy', grad_reference=sigmoid_xentr_logit_grad_ref) @given(log_D_trick=st.just(False), **hu.gcs_cpu_only) def test_cross_entropy_and_unjoied_cross_entropy_relation( self, log_D_trick, gc, dc): logits = np.array([ 1.4720, 0.3500, -0.6529, -1.1908, 0.8357, -1.0774, -0.3395, -0.2469, 0.6708, -1.8332 ], dtype='f') targets = np.array([1., 1., 1., 1., 1., 1., 0., 0., 0., 0.], dtype='f') lr_size = targets.size unjoined_lr_loss = False def sigmoid_xentr_logit_ref(logits, targets): if unjoined_lr_loss: s = unjoined_sigmoid_cross_entropy(logits, targets) else: s = sigmoid_cross_entropy_with_logits(logits, targets) m = np.mean(s, axis=len(logits.shape) - 1) return (m, ) def sigmoid_xentr_logit_grad_ref(g_out, outputs, fwd_inputs): fwd_logits, fwd_targets = fwd_inputs inner_size = fwd_logits.shape[-1] if unjoined_lr_loss: m = unjoined_sigmoid_cross_entropy_grad(logits, targets) else: m = sigmoid_cross_entropy_with_logits_grad( fwd_logits, fwd_targets) # m = fwd_targets - sigmoid(fwd_logits) g_in = -np.expand_dims(g_out, axis=-1) * m / inner_size return (g_in, None) op = core.CreateOperator('SigmoidCrossEntropyWithLogits', ['logits', 'targets'], ['xentropy'], log_D_trick=log_D_trick, unjoined_lr_loss=unjoined_lr_loss) output_lr = self.assertReferenceChecks( device_option=gc, op=op, inputs=[logits, targets], reference=sigmoid_xentr_logit_ref, output_to_grad='xentropy', grad_reference=sigmoid_xentr_logit_grad_ref) # Unjoined dataset where labels change later logits = np.array([ 1.4720, 0.3500, -0.6529, -1.1908, 0.8357, -1.0774, -0.3395, -0.2469, 0.6708, -1.8332, 1.4720, 0.3500, -0.6529, -1.1908, 0.8357, -1.0774 ], dtype='f') targets = np.array( [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1.], dtype='f') unjoined_lr_loss = True unjoined_lr_size = targets.size op = core.CreateOperator('SigmoidCrossEntropyWithLogits', ['logits', 'targets'], ['xentropy'], log_D_trick=log_D_trick, unjoined_lr_loss=unjoined_lr_loss) outputs_unjoined_lr = self.assertReferenceChecks( device_option=gc, op=op, inputs=[logits, targets], reference=sigmoid_xentr_logit_ref, output_to_grad='xentropy', grad_reference=sigmoid_xentr_logit_grad_ref) self.assertAlmostEqual(output_lr[0].item(0) * lr_size / unjoined_lr_size, outputs_unjoined_lr[0].item(0), delta=0.0001) @given(inputs=st.lists( elements=st.integers(min_value=1, max_value=5), min_size=1, max_size=2, ).flatmap(lambda shape: st.tuples( hu.arrays(dims=shape, elements=st.one_of( hu.floats(min_value=-1.0, max_value=-0.1), hu.floats(min_value=0.1, max_value=1.0), )), hu.arrays( dims=shape, elements=st.sampled_from([0.0, 1.0]), ), hu.arrays( dims=shape, elements=hu.floats(min_value=0.1, max_value=1.0), ), )), **hu.gcs) def test_weighted_sigmoid_cross_entropy_with_logits(self, inputs, gc, dc): logits, targets, weights = inputs def weighted_sigmoid_xentr_logit_ref(logits, targets, weights): s = sigmoid_cross_entropy_with_logits(logits, targets) s = np.multiply(s, weights) m = np.mean(s, axis=len(logits.shape) - 1) return (m, ) def weighted_sigmoid_xentr_logit_grad_ref(g_out, outputs, fwd_inputs): fwd_logits, fwd_targets, fwd_weights = fwd_inputs inner_size = fwd_logits.shape[-1] m = fwd_targets - sigmoid(fwd_logits) m = np.multiply(m, weights) g_in = -np.expand_dims(g_out, axis=-1) * m / inner_size return (g_in, None, None) op = core.CreateOperator('WeightedSigmoidCrossEntropyWithLogits', ['logits', 'targets', 'weights'], ['xentropy']) self.assertReferenceChecks( device_option=gc, op=op, inputs=[logits, targets, weights], reference=weighted_sigmoid_xentr_logit_ref, output_to_grad='xentropy', grad_reference=weighted_sigmoid_xentr_logit_grad_ref) @given(n=st.integers(2, 10), b=st.integers(1, 5), **hu.gcs_cpu_only) def test_soft_label_cross_entropy(self, n, b, gc, dc): # Initialize X and add 1e-2 for numerical stability X = np.random.rand(b, n).astype(np.float32) X = X + 1e-2 for i in range(b): X[i] = X[i] / np.sum(X[i]) # Initialize label label = np.random.rand(b, n).astype(np.float32) for i in range(b): label[i] = label[i] / np.sum(label[i]) # Reference implementation of cross entropy with soft labels def soft_label_xentr_ref(X, label): xent = [ np.sum((-label[j][i] * np.log(max(X[j][i], 1e-20)) for i in range(len(X[0])))) for j in range(b) ] return (xent, ) op = core.CreateOperator("CrossEntropy", ["X", "label"], ["Y"]) # TODO(surya) Once CrossEntropyOp is ported to GPU, add the respective # tests to this unit test. self.assertReferenceChecks( device_option=gc, op=op, inputs=[X, label], reference=soft_label_xentr_ref, ) self.assertGradientChecks(gc, op, [X, label], 0, [0], stepsize=1e-4, threshold=1e-2)
class TestRegularizer(LayersTestCase): @given(X=hu.arrays(dims=[2, 5], elements=hu.floats(min_value=-1.0, max_value=1.0))) def test_log_barrier(self, X): param = core.BlobReference("X") workspace.FeedBlob(param, X) train_init_net, train_net = self.get_training_nets() reg = regularizer.LogBarrier(1.0) output = reg(train_net, train_init_net, param, by=RegularizationBy.ON_LOSS) reg( train_net, train_init_net, param, grad=None, by=RegularizationBy.AFTER_OPTIMIZER, ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) def ref(X): return ( np.array(np.sum(-np.log(np.clip(X, 1e-9, None))) * 0.5).astype( np.float32 ), np.clip(X, 1e-9, None), ) for x, y in zip(workspace.FetchBlobs([output, param]), ref(X)): npt.assert_allclose(x, y, rtol=1e-3) @given( X=hu.arrays(dims=[2, 5], elements=hu.floats(min_value=-1.0, max_value=1.0)), left_open=st.booleans(), right_open=st.booleans(), eps=hu.floats(min_value=1e-6, max_value=1e-4), ub=hu.floats(min_value=-1.0, max_value=1.0), lb=hu.floats(min_value=-1.0, max_value=1.0), **hu.gcs_cpu_only ) def test_bounded_grad_proj(self, X, left_open, right_open, eps, ub, lb, gc, dc): if ub - (eps if right_open else 0.) < lb + (eps if left_open else 0.): return param = core.BlobReference("X") workspace.FeedBlob(param, X) train_init_net, train_net = self.get_training_nets() reg = regularizer.BoundedGradientProjection( lb=lb, ub=ub, left_open=left_open, right_open=right_open, epsilon=eps ) output = reg(train_net, train_init_net, param, by=RegularizationBy.ON_LOSS) reg( train_net, train_init_net, param, grad=None, by=RegularizationBy.AFTER_OPTIMIZER, ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) def ref(X): return np.clip( X, lb + (eps if left_open else 0.), ub - (eps if right_open else 0.) ) assert output is None npt.assert_allclose(workspace.blobs[param], ref(X), atol=1e-7) @given( output_dim=st.integers(1, 10), input_num=st.integers(3, 30), reg_weight=st.integers(0, 10) ) def test_group_l1_norm(self, output_dim, input_num, reg_weight): """ 1. create a weight blob 2. create random group splits 3. run group_l1_nrom with the weight blob 4. run equivalent np operations to calculate group l1 norm 5. compare if the results from 3 and 4 are equal """ def compare_reference(weight, group_boundaries, reg_lambda, output): group_splits = np.hsplit(weight, group_boundaries[1:-1]) l2_reg = np.sqrt([np.sum(np.square(g)) for g in group_splits]) l2_normalized = np.multiply(l2_reg, np.array([np.sqrt(g.shape[1]) for g in group_splits])) result = np.multiply(np.sum(l2_normalized), reg_lambda) npt.assert_almost_equal(result, workspace.blobs[output], decimal=2) weight = np.random.rand(output_dim, input_num).astype(np.float32) feature_num = np.random.randint(low=1, high=input_num - 1) group_boundaries = [0] group_boundaries = np.append( group_boundaries, np.sort( np.random.choice(range(1, input_num - 1), feature_num, replace=False) ), ) group_boundaries = np.append(group_boundaries, [input_num]) split_info = np.diff(group_boundaries) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) train_init_net, train_net = self.get_training_nets() reg = regularizer.GroupL1Norm(reg_weight * 0.1, split_info.tolist()) output = reg( train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) compare_reference(weight, group_boundaries, reg_weight * 0.1, output) @given( param_dim=st.integers(10, 30), k=st.integers(5, 9), reg_weight=st.integers(0, 10) ) def test_l1_norm_trimmed(self, param_dim, k, reg_weight): weight = np.random.rand(param_dim).astype(np.float32) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) train_init_net, train_net = self.get_training_nets() reg = regularizer.L1NormTrimmed(reg_weight * 0.1, k) output = reg( train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) result = np.sum(np.sort(np.absolute(weight))[:(param_dim - k)]) * reg_weight * 0.1 npt.assert_almost_equal(result, workspace.blobs[output], decimal=2) @given( param_dim=st.integers(10, 30), k=st.integers(5, 9), l1=st.integers(0, 10), l2=st.integers(0, 10) ) def test_elastic_l1_norm_trimmed(self, param_dim, k, l1, l2): weight = np.random.rand(param_dim).astype(np.float32) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) train_init_net, train_net = self.get_training_nets() reg = regularizer.ElasticNetL1NormTrimmed(l1 * 0.1, l2 * 0.1, k) output = reg( train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) l1_norm = np.sum(np.sort(np.absolute(weight))[:(param_dim - k)]) l2_norm = np.sum(np.square(weight)) result = l1_norm * l1 * 0.1 + l2_norm * l2 * 0.1 npt.assert_almost_equal(result, workspace.blobs[output], decimal=2) @given( row_dim=st.integers(5, 10), norm=st.floats(min_value=1.0, max_value=4.0), data_strategy=st.data(), ) def test_fp16_max_norm(self, row_dim, norm, data_strategy): weight = np.random.rand(row_dim, 5).astype(np.float16) grad = np.random.rand(row_dim, 5).astype(np.float16) # generate indices that will be updated indices = data_strategy.draw( hu.tensor( dtype=np.int64, min_dim=1, max_dim=1, elements=st.sampled_from(np.arange(weight.shape[0])), ) ) indices = np.unique(indices) # compute expected result result = weight.copy() # prevent dived by zero eps = 1e-12 norms = np.sqrt(np.sum(result[indices, ] ** 2, axis=1, keepdims=True)) # if the norms are smaller than max_norm, then it doesn't need update desired = np.clip(norms, 0, norm) # apply max norm result[indices, ] *= desired / (eps + norms) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) grad_blob = core.BlobReference("grad_blob") workspace.FeedBlob(grad_blob, grad) indices_blob = core.BlobReference("indices") workspace.FeedBlob(indices_blob, indices) grad_blob_slice = core.GradientSlice(indices=indices_blob, values=grad_blob) train_init_net, train_net = self.get_training_nets() reg = regularizer.MaxNorm(norm, dtype='fp16') reg( train_net, train_init_net, weight_blob, grad_blob_slice, by=RegularizationBy.AFTER_OPTIMIZER ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) npt.assert_almost_equal(result, workspace.FetchBlob('weight_blob'), decimal=2)
class TestRegularizer(LayersTestCase): @given(X=hu.arrays(dims=[2, 5], elements=hu.floats(min_value=-1.0, max_value=1.0))) def test_log_barrier(self, X): param = core.BlobReference("X") workspace.FeedBlob(param, X) train_init_net, train_net = self.get_training_nets() reg = regularizer.LogBarrier(1.0) output = reg(train_net, train_init_net, param, by=RegularizationBy.ON_LOSS) reg( train_net, train_init_net, param, grad=None, by=RegularizationBy.AFTER_OPTIMIZER, ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) def ref(X): return ( np.array(np.sum(-np.log(np.clip(X, 1e-9, None))) * 0.5).astype( np.float32), np.clip(X, 1e-9, None), ) for x, y in zip(workspace.FetchBlobs([output, param]), ref(X)): npt.assert_allclose(x, y, rtol=1e-3) @given(X=hu.arrays(dims=[2, 5], elements=hu.floats(min_value=-1.0, max_value=1.0)), left_open=st.booleans(), right_open=st.booleans(), eps=hu.floats(min_value=1e-6, max_value=1e-4), ub=hu.floats(min_value=-1.0, max_value=1.0), lb=hu.floats(min_value=-1.0, max_value=1.0), **hu.gcs_cpu_only) def test_bounded_grad_proj(self, X, left_open, right_open, eps, ub, lb, gc, dc): if ub - (eps if right_open else 0.) < lb + (eps if left_open else 0.): return param = core.BlobReference("X") workspace.FeedBlob(param, X) train_init_net, train_net = self.get_training_nets() reg = regularizer.BoundedGradientProjection(lb=lb, ub=ub, left_open=left_open, right_open=right_open, epsilon=eps) output = reg(train_net, train_init_net, param, by=RegularizationBy.ON_LOSS) reg( train_net, train_init_net, param, grad=None, by=RegularizationBy.AFTER_OPTIMIZER, ) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) def ref(X): return np.clip(X, lb + (eps if left_open else 0.), ub - (eps if right_open else 0.)) assert output is None npt.assert_allclose(workspace.blobs[param], ref(X), atol=1e-7) @given(output_dim=st.integers(1, 10), input_num=st.integers(3, 30), reg_weight=st.integers(0, 10)) def test_group_l1_norm(self, output_dim, input_num, reg_weight): """ 1. create a weight blob 2. create random group splits 3. run group_l1_nrom with the weight blob 4. run equivalent np operations to calculate group l1 norm 5. compare if the results from 3 and 4 are equal """ def compare_reference(weight, group_boundaries, reg_lambda, output): group_splits = np.hsplit(weight, group_boundaries[1:-1]) l2_reg = np.sqrt([np.sum(np.square(g)) for g in group_splits]) l2_normalized = np.multiply( l2_reg, np.array([np.sqrt(g.shape[1]) for g in group_splits])) result = np.multiply(np.sum(l2_normalized), reg_lambda) npt.assert_almost_equal(result, workspace.blobs[output], decimal=2) weight = np.random.rand(output_dim, input_num).astype(np.float32) feature_num = np.random.randint(low=1, high=input_num - 1) group_boundaries = [0] group_boundaries = np.append( group_boundaries, np.sort( np.random.choice(range(1, input_num - 1), feature_num, replace=False)), ) group_boundaries = np.append(group_boundaries, [input_num]) split_info = np.diff(group_boundaries) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) train_init_net, train_net = self.get_training_nets() reg = regularizer.GroupL1Norm(reg_weight * 0.1, split_info.tolist()) output = reg(train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) compare_reference(weight, group_boundaries, reg_weight * 0.1, output) @given(param_dim=st.integers(10, 30), k=st.integers(5, 9), reg_weight=st.integers(0, 10)) def test_l1_norm_trimmed(self, param_dim, k, reg_weight): weight = np.random.rand(param_dim).astype(np.float32) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) train_init_net, train_net = self.get_training_nets() reg = regularizer.L1NormTrimmed(reg_weight * 0.1, k) output = reg(train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) result = np.sum(np.sort( np.absolute(weight))[:(param_dim - k)]) * reg_weight * 0.1 npt.assert_almost_equal(result, workspace.blobs[output], decimal=2) @given(param_dim=st.integers(10, 30), k=st.integers(5, 9), l1=st.integers(0, 10), l2=st.integers(0, 10)) def test_elastic_l1_norm_trimmed(self, param_dim, k, l1, l2): weight = np.random.rand(param_dim).astype(np.float32) weight_blob = core.BlobReference("weight_blob") workspace.FeedBlob(weight_blob, weight) train_init_net, train_net = self.get_training_nets() reg = regularizer.ElasticNetL1NormTrimmed(l1 * 0.1, l2 * 0.1, k) output = reg(train_net, train_init_net, weight_blob, by=RegularizationBy.ON_LOSS) workspace.RunNetOnce(train_init_net) workspace.RunNetOnce(train_net) l1_norm = np.sum(np.sort(np.absolute(weight))[:(param_dim - k)]) l2_norm = np.sum(np.square(weight)) result = l1_norm * l1 * 0.1 + l2_norm * l2 * 0.1 npt.assert_almost_equal(result, workspace.blobs[output], decimal=2)
class TestSequenceOps(serial.SerializedTestCase): @given(start_pad_width=st.integers(min_value=1, max_value=2), end_pad_width=st.integers(min_value=0, max_value=2), args=_gen_test_add_padding(with_pad_data=True), ret_lengths=st.booleans(), **hu.gcs) @settings(deadline=10000) def test_add_padding(self, start_pad_width, end_pad_width, args, ret_lengths, gc, dc): lengths, data, start_padding, end_padding = args start_padding = np.array(start_padding, dtype=np.float32) end_padding = np.array(end_padding, dtype=np.float32) outputs = ['output', 'lengths_out'] if ret_lengths else ['output'] op = core.CreateOperator( 'AddPadding', ['data', 'lengths', 'start_padding', 'end_padding'], outputs, padding_width=start_pad_width, end_padding_width=end_pad_width) self.assertReferenceChecks( device_option=gc, op=op, inputs=[data, lengths, start_padding, end_padding], reference=partial(_add_padding_ref, start_pad_width, end_pad_width, ret_lengths)) def _local_test_add_padding_shape_and_type( self, data, start_pad_width, end_pad_width, ret_lengths, lengths=None, ): if ret_lengths and lengths is None: return workspace.ResetWorkspace() workspace.FeedBlob("data", data) if lengths is not None: workspace.FeedBlob("lengths", np.array(lengths).astype(np.int32)) op = core.CreateOperator( 'AddPadding', ['data'] if lengths is None else ['data', 'lengths'], ['output', 'lengths_out'] if ret_lengths else ['output'], padding_width=start_pad_width, end_padding_width=end_pad_width) add_padding_net = core.Net("add_padding_net") add_padding_net.Proto().op.extend([op]) assert workspace.RunNetOnce( add_padding_net), "Failed to run the add_padding_net" shapes, types = workspace.InferShapesAndTypes([add_padding_net], ) expected_shape = list(data.shape) expected_shape[0] += (1 if lengths is None else len(lengths)) * (start_pad_width + end_pad_width) self.assertEqual(shapes["output"], expected_shape) self.assertEqual(types["output"], core.DataType.FLOAT) if ret_lengths: if lengths is None: self.assertEqual(shapes["lengths_out"], [1]) else: self.assertEqual(shapes["lengths_out"], [len(lengths)]) self.assertEqual(types["lengths_out"], core.DataType.INT32) def test_add_padding_shape_and_type_3(self): for start_pad_width in range(3): for end_pad_width in range(3): for ret_lengths in [True, False]: self._local_test_add_padding_shape_and_type( data=np.random.rand(1, 2).astype(np.float32), lengths=None, start_pad_width=start_pad_width, end_pad_width=end_pad_width, ret_lengths=ret_lengths, ) def test_add_padding_shape_and_type_4(self): for start_pad_width in range(3): for end_pad_width in range(3): for ret_lengths in [True, False]: self._local_test_add_padding_shape_and_type( data=np.random.rand(3, 1, 2).astype(np.float32), lengths=[1, 1, 1], start_pad_width=start_pad_width, end_pad_width=end_pad_width, ret_lengths=ret_lengths, ) def test_add_padding_shape_and_type_5(self): for start_pad_width in range(3): for end_pad_width in range(3): for ret_lengths in [True, False]: self._local_test_add_padding_shape_and_type( data=np.random.rand(3, 2, 1).astype(np.float32), lengths=None, start_pad_width=start_pad_width, end_pad_width=end_pad_width, ret_lengths=ret_lengths, ) @given(start_pad_width=st.integers(min_value=0, max_value=3), end_pad_width=st.integers(min_value=0, max_value=3), num_dims=st.integers(min_value=1, max_value=4), num_groups=st.integers(min_value=0, max_value=4), ret_lengths=st.booleans(), **hu.gcs) @settings(deadline=1000) def test_add_padding_shape_and_type(self, start_pad_width, end_pad_width, num_dims, num_groups, ret_lengths, gc, dc): np.random.seed(666) lengths = [] for _ in range(num_groups): lengths.append(np.random.randint(0, 3)) if sum(lengths) == 0: lengths = [] data_shape = [] for _ in range(num_dims): data_shape.append(np.random.randint(1, 4)) if sum(lengths) > 0: data_shape[0] = sum(lengths) data = np.random.randn(*data_shape).astype(np.float32) self._local_test_add_padding_shape_and_type( data=data, lengths=lengths if len(lengths) else None, start_pad_width=start_pad_width, end_pad_width=end_pad_width, ret_lengths=ret_lengths, ) @given(start_pad_width=st.integers(min_value=1, max_value=2), end_pad_width=st.integers(min_value=0, max_value=2), args=_gen_test_add_padding(with_pad_data=False), **hu.gcs) def test_add_zero_padding(self, start_pad_width, end_pad_width, args, gc, dc): lengths, data = args op = core.CreateOperator('AddPadding', ['data', 'lengths'], ['output', 'lengths_out'], padding_width=start_pad_width, end_padding_width=end_pad_width) self.assertReferenceChecks( gc, op, [data, lengths], partial(_add_padding_ref, start_pad_width, end_pad_width, True)) @given(start_pad_width=st.integers(min_value=1, max_value=2), end_pad_width=st.integers(min_value=0, max_value=2), data=hu.tensor(min_dim=1, max_dim=3), **hu.gcs) def test_add_padding_no_length(self, start_pad_width, end_pad_width, data, gc, dc): op = core.CreateOperator('AddPadding', ['data'], ['output', 'output_lens'], padding_width=start_pad_width, end_padding_width=end_pad_width) self.assertReferenceChecks( gc, op, [data], partial(_add_padding_ref, start_pad_width, end_pad_width, True, lengths=np.array([data.shape[0]]))) # Uncomment the following seed to make this fail. # @seed(302934307671667531413257853548643485645) # See https://github.com/caffe2/caffe2/issues/1547 @unittest.skip("flaky test") @given(start_pad_width=st.integers(min_value=1, max_value=2), end_pad_width=st.integers(min_value=0, max_value=2), args=_gen_test_add_padding(with_pad_data=False, is_remove=True), **hu.gcs) def test_remove_padding(self, start_pad_width, end_pad_width, args, gc, dc): lengths, data = args op = core.CreateOperator('RemovePadding', ['data', 'lengths'], ['output', 'lengths_out'], padding_width=start_pad_width, end_padding_width=end_pad_width) self.assertReferenceChecks(device_option=gc, op=op, inputs=[data, lengths], reference=partial(_remove_padding_ref, start_pad_width, end_pad_width)) @given(start_pad_width=st.integers(min_value=0, max_value=2), end_pad_width=st.integers(min_value=0, max_value=2), args=_gen_test_add_padding(with_pad_data=True), **hu.gcs) @settings(deadline=10000) def test_gather_padding(self, start_pad_width, end_pad_width, args, gc, dc): lengths, data, start_padding, end_padding = args padded_data, padded_lengths = _add_padding_ref(start_pad_width, end_pad_width, True, data, lengths, start_padding, end_padding) op = core.CreateOperator('GatherPadding', ['data', 'lengths'], ['start_padding', 'end_padding'], padding_width=start_pad_width, end_padding_width=end_pad_width) self.assertReferenceChecks(device_option=gc, op=op, inputs=[padded_data, padded_lengths], reference=partial(_gather_padding_ref, start_pad_width, end_pad_width)) @given(data=hu.tensor(min_dim=3, max_dim=3, dtype=np.float32, elements=hu.floats(min_value=-np.inf, max_value=np.inf), min_value=1, max_value=10), **hu.gcs) @settings(deadline=10000) def test_reverse_packed_segs(self, data, gc, dc): max_length = data.shape[0] batch_size = data.shape[1] lengths = np.random.randint(max_length + 1, size=batch_size) op = core.CreateOperator("ReversePackedSegs", ["data", "lengths"], ["reversed_data"]) def op_ref(data, lengths): rev_data = np.array(data, copy=True) for i in range(batch_size): seg_length = lengths[i] for j in range(seg_length): rev_data[j][i] = data[seg_length - 1 - j][i] return (rev_data, ) def op_grad_ref(grad_out, outputs, inputs): return op_ref(grad_out, inputs[1]) + (None, ) self.assertReferenceChecks(device_option=gc, op=op, inputs=[data, lengths], reference=op_ref, output_to_grad='reversed_data', grad_reference=op_grad_ref) @given(data=hu.tensor(min_dim=1, max_dim=3, dtype=np.float32, elements=hu.floats(min_value=-np.inf, max_value=np.inf), min_value=10, max_value=10), indices=st.lists(st.integers(min_value=0, max_value=9), min_size=0, max_size=10), **hu.gcs_cpu_only) @settings(deadline=10000) def test_remove_data_blocks(self, data, indices, gc, dc): indices = np.array(indices) op = core.CreateOperator("RemoveDataBlocks", ["data", "indices"], ["shrunk_data"]) def op_ref(data, indices): unique_indices = np.unique(indices) sorted_indices = np.sort(unique_indices) shrunk_data = np.delete(data, sorted_indices, axis=0) return (shrunk_data, ) self.assertReferenceChecks(device_option=gc, op=op, inputs=[data, indices], reference=op_ref) @given(elements=st.lists(st.integers(min_value=0, max_value=9), min_size=0, max_size=10), **hu.gcs_cpu_only) @settings(deadline=10000) def test_find_duplicate_elements(self, elements, gc, dc): mapping = { 0: "a", 1: "b", 2: "c", 3: "d", 4: "e", 5: "f", 6: "g", 7: "h", 8: "i", 9: "j" } data = np.array([mapping[e] for e in elements], dtype='|S') op = core.CreateOperator("FindDuplicateElements", ["data"], ["indices"]) def op_ref(data): unique_data = [] indices = [] for i, e in enumerate(data): if e in unique_data: indices.append(i) else: unique_data.append(e) return (np.array(indices, dtype=np.int64), ) self.assertReferenceChecks(device_option=gc, op=op, inputs=[data], reference=op_ref)