Example #1
0
class OrderSwitchOpsTest(hu.HypothesisTestCase):
    @given(X=hu.tensor(min_dim=3, max_dim=5, min_value=1, max_value=5),
           engine=st.sampled_from(["", "CUDNN"]),
           **hu.gcs)
    def test_nchw2nhwc(self, X, engine, gc, dc):
        op = core.CreateOperator("NCHW2NHWC", ["X"], ["Y"], engine=engine)

        def nchw2nhwc_ref(X):
            X_reshaped = X.transpose((0, ) + tuple(range(2, X.ndim)) + (1, ))
            return (X_reshaped, )

        self.assertReferenceChecks(gc, op, [X], nchw2nhwc_ref)
        self.assertGradientChecks(gc, op, [X], 0, [0])
        self.assertDeviceChecks(dc, op, [X], [0])

    @given(X=hu.tensor(min_dim=3, max_dim=5, min_value=1, max_value=5),
           engine=st.sampled_from(["", "CUDNN"]),
           **hu.gcs)
    def test_nhwc2nchw(self, X, engine, gc, dc):
        op = core.CreateOperator("NHWC2NCHW", ["X"], ["Y"], engine=engine)

        def nhwc2nchw_ref(X):
            X_reshaped = X.transpose((0, X.ndim - 1) +
                                     tuple(range(1, X.ndim - 1)))
            return (X_reshaped, )

        self.assertReferenceChecks(gc, op, [X], nhwc2nchw_ref)
        self.assertGradientChecks(gc, op, [X], 0, [0])
        self.assertDeviceChecks(dc, op, [X], [0])
Example #2
0
class TestHyperbolicOps(hu.HypothesisTestCase):
    def _test_hyperbolic_op(self, op_name, np_ref, X, in_place, gc, dc):
        op = core.CreateOperator(op_name, ["X"], ["X"] if in_place else ["Y"])

        def ref(X):
            return [np_ref(X)]

        self.assertReferenceChecks(
            device_option=gc,
            op=op,
            inputs=[X],
            reference=ref,
        )
        self.assertDeviceChecks(dc, op, [X], [0])
        self.assertGradientChecks(gc, op, [X], 0, [0])

    @given(X=hu.tensor(dtype=np.float32), in_place=st.booleans(), **hu.gcs)
    def test_tanh(self, X, in_place, gc, dc):
        self._test_hyperbolic_op("Tanh", np.tanh, X, in_place, gc, dc)

    @given(X=hu.tensor(dtype=np.float32), **hu.gcs)
    def test_sinh(self, X, gc, dc):
        self._test_hyperbolic_op("Sinh", np.sinh, X, False, gc, dc)

    @given(X=hu.tensor(dtype=np.float32), **hu.gcs)
    def test_cosh(self, X, gc, dc):
        self._test_hyperbolic_op("Cosh", np.cosh, X, False, gc, dc)
class TestNormalizeOp(hu.HypothesisTestCase):
    @given(X=hu.tensor(min_dim=1,
                       max_dim=5,
                       elements=st.floats(min_value=0.5, max_value=1.0)),
           **hu.gcs)
    def test_normalize(self, X, gc, dc):
        def ref_normalize(X, axis):
            x_normed = X / np.maximum(
                np.sqrt((X**2).sum(axis=axis, keepdims=True)), 1e-12)
            return (x_normed, )

        for axis in range(-X.ndim, X.ndim):
            x = copy.copy(X)
            op = core.CreateOperator("Normalize", "X", "Y", axis=axis)
            self.assertReferenceChecks(
                gc, op, [x], functools.partial(ref_normalize, axis=axis))
            self.assertDeviceChecks(dc, op, [x], [0])
            self.assertGradientChecks(gc, op, [x], 0, [0])

    @given(X=hu.tensor(min_dim=1,
                       max_dim=5,
                       elements=st.floats(min_value=0.5, max_value=1.0)),
           **hu.gcs)
    def test_normalize_L1(self, X, gc, dc):
        def ref(X, axis):
            norm = abs(X).sum(axis=axis, keepdims=True)
            return (X / norm, )

        for axis in range(-X.ndim, X.ndim):
            print("axis: ", axis)
            op = core.CreateOperator("NormalizeL1", "X", "Y", axis=axis)
            self.assertReferenceChecks(gc, op, [X],
                                       functools.partial(ref, axis=axis))
            self.assertDeviceChecks(dc, op, [X], [0])
Example #4
0
class TestTrigonometricOp(serial.SerializedTestCase):
    @given(X=hu.tensor(elements=hu.floats(min_value=-0.7, max_value=0.7)),
           **hu.gcs)
    @settings(deadline=None, max_examples=50)
    def test_acos(self, X, gc, dc):
        self.assertTrigonometricChecks("Acos", X, lambda x: (np.arccos(X), ),
                                       gc, dc)

    @given(X=hu.tensor(elements=hu.floats(min_value=-0.7, max_value=0.7)),
           **hu.gcs)
    @settings(deadline=None, max_examples=50)
    def test_asin(self, X, gc, dc):
        self.assertTrigonometricChecks("Asin", X, lambda x: (np.arcsin(X), ),
                                       gc, dc)

    @given(X=hu.tensor(elements=hu.floats(min_value=-100, max_value=100)),
           **hu.gcs)
    @settings(deadline=None, max_examples=50)
    def test_atan(self, X, gc, dc):
        self.assertTrigonometricChecks("Atan", X, lambda x: (np.arctan(X), ),
                                       gc, dc)

    @given(X=hu.tensor(elements=hu.floats(min_value=-0.5, max_value=0.5)),
           **hu.gcs)
    @settings(deadline=None, max_examples=50)
    def test_tan(self, X, gc, dc):
        self.assertTrigonometricChecks("Tan", X, lambda x: (np.tan(X), ), gc,
                                       dc)

    def assertTrigonometricChecks(self, op_name, input, reference, gc, dc):
        op = core.CreateOperator(op_name, ["X"], ["Y"])
        self.assertReferenceChecks(gc, op, [input], reference)
        self.assertDeviceChecks(dc, op, [input], [0])
        self.assertGradientChecks(gc, op, [input], 0, [0])
Example #5
0
class TestMathOps(serial.SerializedTestCase):
    @given(X=hu.tensor(),
           exponent=st.floats(min_value=2.0, max_value=3.0),
           **hu.gcs)
    def test_elementwise_power(self, X, exponent, gc, dc):
        # negative integer raised with non-integer exponent is domain error
        X = np.abs(X)

        def powf(X):
            return (X**exponent, )

        def powf_grad(g_out, outputs, fwd_inputs):
            return (exponent * (fwd_inputs[0]**(exponent - 1)) * g_out, )

        op = core.CreateOperator("Pow", ["X"], ["Y"], exponent=exponent)

        self.assertReferenceChecks(gc,
                                   op, [X],
                                   powf,
                                   output_to_grad="Y",
                                   grad_reference=powf_grad),

    @serial.given(X=hu.tensor(),
                  exponent=st.floats(min_value=-3.0, max_value=3.0),
                  **hu.gcs)
    def test_sign(self, X, exponent, gc, dc):
        def signf(X):
            return [np.sign(X)]

        op = core.CreateOperator("Sign", ["X"], ["Y"])

        self.assertReferenceChecks(gc, op, [X], signf),
        self.assertDeviceChecks(dc, op, [X], [0])
class TestBooleanMaskOp(hu.HypothesisTestCase):
    @given(x=hu.tensor(min_dim=1,
                       max_dim=5,
                       elements=st.floats(min_value=0.5, max_value=1.0)),
           **hu.gcs)
    def test_boolean_mask(self, x, gc, dc):
        op = core.CreateOperator("BooleanMask", ["data", "mask"],
                                 "masked_data")
        mask = np.random.choice(a=[True, False], size=x.shape[0])

        def ref(x, mask):
            return (x[mask], )

        self.assertReferenceChecks(gc, op, [x, mask], ref)
        self.assertDeviceChecks(dc, op, [x, mask], [0])

    @given(x=hu.tensor(min_dim=1,
                       max_dim=5,
                       elements=st.floats(min_value=0.5, max_value=1.0)),
           **hu.gcs)
    def test_boolean_mask_indices(self, x, gc, dc):
        op = core.CreateOperator("BooleanMask", ["data", "mask"],
                                 ["masked_data", "masked_indices"])
        mask = np.random.choice(a=[True, False], size=x.shape[0])

        def ref(x, mask):
            return (x[mask], np.where(mask)[0])

        self.assertReferenceChecks(gc, op, [x, mask], ref)
        self.assertDeviceChecks(dc, op, [x, mask], [0])
Example #7
0
class TestDropout(hu.HypothesisTestCase):

    @given(X=hu.tensor(),
           in_place=st.booleans(),
           ratio=st.floats(0, 0.999),
           engine=st.sampled_from(["", "CUDNN"]),
           **hu.gcs)
    def test_dropout_is_test(self, X, in_place, ratio, engine, gc, dc):
        """Test with is_test=True for a deterministic reference impl."""
        # TODO(lukeyeager): enable this path when the GPU path is fixed
        if in_place:
            # Skip if trying in-place on GPU
            assume(not (gc.device_type in {caffe2_pb2.CUDA, caffe2_pb2.HIP} and engine == ''))
            # If in-place on CPU, don't compare with GPU
            dc = dc[:1]

        op = core.CreateOperator("Dropout", ["X"],
                                 ["X" if in_place else "Y"],
                                 ratio=ratio, engine=engine, is_test=True)

        self.assertDeviceChecks(dc, op, [X], [0])
        # No sense in checking gradients for test phase

        def reference_dropout_test(x):
            return x, np.ones(x.shape, dtype=np.bool)
        self.assertReferenceChecks(
            gc, op, [X], reference_dropout_test,
            # The 'mask' output may be uninitialized
            outputs_to_check=[0])

    @given(X=hu.tensor(),
           in_place=st.booleans(),
           output_mask=st.booleans(),
           engine=st.sampled_from(["", "CUDNN"]),
           **hu.gcs)
    def test_dropout_ratio0(self, X, in_place, output_mask, engine, gc, dc):
        """Test with ratio=0 for a deterministic reference impl."""
        # TODO(lukeyeager): enable this path when the op is fixed
        if in_place:
            # Skip if trying in-place on GPU
            assume(gc.device_type not in {caffe2_pb2.CUDA, caffe2_pb2.HIP})
            # If in-place on CPU, don't compare with GPU
            dc = dc[:1]
        is_test = not output_mask
        op = core.CreateOperator("Dropout", ["X"],
                                 ["X" if in_place else "Y"] +
                                 (["mask"] if output_mask else []),
                                 ratio=0.0, engine=engine,
                                 is_test=is_test)

        self.assertDeviceChecks(dc, op, [X], [0])
        if not is_test:
            self.assertGradientChecks(gc, op, [X], 0, [0])

        def reference_dropout_ratio0(x):
            return (x,) if is_test else (x, np.ones(x.shape, dtype=np.bool))
        self.assertReferenceChecks(
            gc, op, [X], reference_dropout_ratio0,
            # Don't check the mask with cuDNN because it's packed data
            outputs_to_check=None if engine != 'CUDNN' else [0])
class OrderSwitchOpsTest(hu.HypothesisTestCase):
    @given(X=hu.tensor(min_dim=3, max_dim=5, min_value=1, max_value=5),
           engine=st.sampled_from(["", "CUDNN"]),
           **hu.gcs)
    def test_nchw2nhwc(self, X, engine, gc, dc):
        op = core.CreateOperator("NCHW2NHWC", ["X"], ["Y"], engine=engine)

        def nchw2nhwc_ref(X):
            return (utils.NCHW2NHWC(X), )

        self.assertReferenceChecks(gc, op, [X], nchw2nhwc_ref)
        self.assertGradientChecks(gc, op, [X], 0, [0])
        self.assertDeviceChecks(dc, op, [X], [0])

    @given(X=hu.tensor(min_dim=3, max_dim=5, min_value=1, max_value=5),
           engine=st.sampled_from(["", "CUDNN"]),
           **hu.gcs)
    def test_nhwc2nchw(self, X, engine, gc, dc):
        op = core.CreateOperator("NHWC2NCHW", ["X"], ["Y"], engine=engine)

        def nhwc2nchw_ref(X):
            return (utils.NHWC2NCHW(X), )

        self.assertReferenceChecks(gc, op, [X], nhwc2nchw_ref)
        self.assertGradientChecks(gc, op, [X], 0, [0])
        self.assertDeviceChecks(dc, op, [X], [0])
class TestThresholdedRelu(serial.SerializedTestCase):

    # test case 1 - default alpha - we do reference and dc checks.
    # test case 2 does dc and reference checks over range of alphas.
    # test case 3 does gc over range of alphas.
    @serial.given(input=hu.tensor(),
                  engine=st.sampled_from(["", "CUDNN"]),
                  **hu.gcs)
    def test_thresholded_relu_1(self, input, gc, dc, engine):
        X = input
        op = core.CreateOperator("ThresholdedRelu", ["X"], ["Y"],
                                 engine=engine)

        def defaultRef(X):
            Y = np.copy(X)
            Y[Y <= 1.0] = 0.0
            return (Y, )

        self.assertDeviceChecks(dc, op, [X], [0])
        self.assertReferenceChecks(gc, op, [X], defaultRef)

    @given(input=hu.tensor(),
           alpha=st.floats(min_value=1.0, max_value=5.0),
           engine=st.sampled_from(["", "CUDNN"]),
           **hu.gcs)
    def test_thresholded_relu_2(self, input, alpha, gc, dc, engine):
        X = input
        op = core.CreateOperator("ThresholdedRelu", ["X"], ["Y"],
                                 alpha=alpha,
                                 engine=engine)

        def ref(X):
            Y = np.copy(X)
            Y[Y <= alpha] = 0.0
            return (Y, )

        self.assertDeviceChecks(dc, op, [X], [0])
        self.assertReferenceChecks(gc, op, [X], ref)

    @given(input=hu.tensor(),
           alpha=st.floats(min_value=1.1, max_value=5.0),
           engine=st.sampled_from(["", "CUDNN"]),
           **hu.gcs)
    @settings(deadline=10000)
    def test_thresholded_relu_3(self, input, alpha, gc, dc, engine):
        X = TestThresholdedRelu.fix_input(input)
        op = core.CreateOperator("ThresholdedRelu", ["X"], ["Y"],
                                 alpha=float(alpha),
                                 engine=engine)
        self.assertGradientChecks(gc, op, [X], 0, [0])

    @staticmethod
    def fix_input(input):
        # go away from alpha to avoid derivative discontinuities
        input += 0.02 * np.sign(input)
        return input
Example #10
0
class TestTopK(hu.HypothesisTestCase):

    @given(X=hu.tensor(), **mu.gcs)
    def test_top_k(self, X, gc, dc):
        X = X.astype(dtype=np.float32)
        k = random.randint(1, X.shape[-1])
        op = core.CreateOperator(
            "TopK", ["X"], ["Values", "Indices"], k=k, device_option=gc
        )

        def top_k_ref(X):
            X_flat = X.reshape((-1, X.shape[-1]))
            indices_ref = np.ndarray(shape=X_flat.shape, dtype=np.int32)
            values_ref = np.ndarray(shape=X_flat.shape, dtype=np.float32)
            for i in range(X_flat.shape[0]):
                od = OrderedDict()
                for j in range(X_flat.shape[1]):
                    val = X_flat[i, j]
                    if val not in od:
                        od[val] = []
                    od[val].append(j)
                j = 0
                for val, idxs in sorted(od.items(), reverse=True):
                    for idx in idxs:
                        indices_ref[i, j] = idx
                        values_ref[i, j] = val
                        j = j + 1

            indices_ref = np.reshape(indices_ref, X.shape)
            values_ref = np.reshape(values_ref, X.shape)

            indices_ref = indices_ref.take(list(range(k)), axis=-1)
            values_ref = values_ref.take(list(range(k)), axis=-1)

            return (values_ref, indices_ref)

        self.assertReferenceChecks(hu.cpu_do, op, [X], top_k_ref)

    @given(X=hu.tensor(min_dim=2), **hu.gcs_cpu_only)
    def test_top_k_grad(self, X, gc, dc):
        X = X.astype(np.float32)
        k = random.randint(1, X.shape[-1])

        # this try to make sure adding stepsize (0.05)
        # will not change TopK selections at all
        # since dims max_value = 5 as defined in
        # caffe2/caffe2/python/hypothesis_test_util.py
        for i in range(X.shape[-1]):
            X[..., i] = i * 1.0 / X.shape[-1]

        op = core.CreateOperator(
            "TopK", ["X"], ["Values", "Indices"], k=k, device_option=gc
        )

        self.assertGradientChecks(gc, op, [X], 0, [0])
Example #11
0
class TestTransposeOp(serial.SerializedTestCase):
    @serial.given(X=hu.tensor(dtype=np.float32),
                  use_axes=st.booleans(),
                  **hu.gcs)
    def test_transpose(self, X, use_axes, gc, dc):
        ndim = len(X.shape)
        axes = np.arange(ndim)
        np.random.shuffle(axes)

        if (use_axes):
            op = core.CreateOperator("Transpose", ["X"], ["Y"],
                                     axes=axes,
                                     device_option=gc)
        else:
            op = core.CreateOperator("Transpose", ["X"], ["Y"],
                                     device_option=gc)

        def transpose_ref(X):
            if use_axes:
                return [np.transpose(X, axes=axes)]
            else:
                return [np.transpose(X)]

        self.assertReferenceChecks(gc, op, [X], transpose_ref)
        self.assertDeviceChecks(dc, op, [X], [0])
        self.assertGradientChecks(gc, op, [X], 0, [0])

    @unittest.skipIf(not workspace.has_cuda_support, "no cuda support")
    @given(X=hu.tensor(dtype=np.float32),
           use_axes=st.booleans(),
           **hu.gcs_cuda_only)
    def test_transpose_cudnn(self, X, use_axes, gc, dc):
        ndim = len(X.shape)
        axes = np.arange(ndim)
        np.random.shuffle(axes)

        if (use_axes):
            op = core.CreateOperator("Transpose", ["X"], ["Y"],
                                     axes=axes,
                                     engine="CUDNN",
                                     device_option=hu.cuda_do)
        else:
            op = core.CreateOperator("Transpose", ["X"], ["Y"],
                                     engine="CUDNN",
                                     device_option=hu.cuda_do)

        def transpose_ref(X):
            if use_axes:
                return [np.transpose(X, axes=axes)]
            else:
                return [np.transpose(X)]

        self.assertReferenceChecks(hu.gpu_do, op, [X], transpose_ref)
        self.assertGradientChecks(hu.gpu_do, op, [X], 0, [0])
Example #12
0
class TestArgOps(hu.HypothesisTestCase):
    def argmax_ref(self, X, axis, keepdims):
        indices = np.argmax(X, axis=axis)
        if keepdims:
            out_dims = list(X.shape)
            out_dims[axis] = 1
            indices = indices.reshape(tuple(out_dims))
        return [indices]

    @given(X=hu.tensor(dtype=np.float32),
           axis=st.integers(-1, 5),
           keepdims=st.booleans(),
           **hu.gcs)
    def test_argmax(self, X, axis, keepdims, gc, dc):
        if axis >= len(X.shape):
            axis %= len(X.shape)
        op = core.CreateOperator("ArgMax", ["X"], ["Indices"],
                                 axis=axis,
                                 keepdims=keepdims,
                                 device_option=gc)

        def argmax_ref(X):
            indices = np.argmax(X, axis=axis)
            if keepdims:
                out_dims = list(X.shape)
                out_dims[axis] = 1
                indices = indices.reshape(tuple(out_dims))
            return [indices]

        self.assertReferenceChecks(gc, op, [X], argmax_ref)
        self.assertDeviceChecks(dc, op, [X], [0])

    @given(X=hu.tensor(dtype=np.float32),
           axis=st.integers(-1, 5),
           keepdims=st.booleans(),
           **hu.gcs)
    def test_argmin(self, X, axis, keepdims, gc, dc):
        if axis >= len(X.shape):
            axis %= len(X.shape)
        op = core.CreateOperator("ArgMin", ["X"], ["Indices"],
                                 axis=axis,
                                 keepdims=keepdims,
                                 device_option=gc)

        def argmin_ref(X):
            indices = np.argmin(X, axis=axis)
            if keepdims:
                out_dims = list(X.shape)
                out_dims[axis] = 1
                indices = indices.reshape(tuple(out_dims))
            return [indices]

        self.assertReferenceChecks(gc, op, [X], argmin_ref)
        self.assertDeviceChecks(dc, op, [X], [0])
class DropoutTest(hu.HypothesisTestCase):
    @given(X=hu.tensor(),
           in_place=st.booleans(),
           ratio=st.floats(0, 0.999),
           **mu.gcs)
    def test_dropout_is_test(self, X, in_place, ratio, gc, dc):
        """Test with is_test=True for a deterministic reference impl."""
        op = core.CreateOperator('Dropout', ['X'], ['X' if in_place else 'Y'],
                                 ratio=ratio,
                                 is_test=True)

        self.assertDeviceChecks(dc, op, [X], [0])

        # No sense in checking gradients for test phase

        def reference_dropout_test(x):
            return x, np.ones(x.shape, dtype=np.bool)

        self.assertReferenceChecks(
            gc,
            op,
            [X],
            reference_dropout_test,
            # The 'mask' output may be uninitialized
            outputs_to_check=[0])

    @given(X=hu.tensor(),
           in_place=st.booleans(),
           output_mask=st.booleans(),
           **mu.gcs)
    @unittest.skipIf(True, "Skip duo to different rand seed.")
    def test_dropout_ratio0(self, X, in_place, output_mask, gc, dc):
        """Test with ratio=0 for a deterministic reference impl."""
        is_test = not output_mask
        op = core.CreateOperator('Dropout', ['X'], ['X' if in_place else 'Y'] +
                                 (['mask'] if output_mask else []),
                                 ratio=0.0,
                                 is_test=is_test)

        self.assertDeviceChecks(dc, op, [X], [0])

        def reference_dropout_ratio0(x):
            return (x, ) if is_test else (x, np.ones(x.shape, dtype=np.bool))

        self.assertReferenceChecks(gc,
                                   op, [X],
                                   reference_dropout_ratio0,
                                   outputs_to_check=[0])
Example #14
0
class TestSoftplus(hu.HypothesisTestCase):
    @given(X=hu.tensor(), **hu.gcs)
    @settings(deadline=1000)
    def test_softplus(self, X, gc, dc):
        op = core.CreateOperator("Softplus", ["X"], ["Y"])
        self.assertDeviceChecks(dc, op, [X], [0])
        self.assertGradientChecks(gc, op, [X], 0, [0])
Example #15
0
    def test_sparse_momentum_sgd(
        self, inputs, momentum, nesterov, lr, data_strategy, gc, dc
    ):
        w, grad, m = inputs

        # Create an indexing array containing values which index into grad
        indices = data_strategy.draw(
            hu.tensor(
                max_dim=1,
                min_value=1,
                max_value=grad.shape[0],
                dtype=np.int64,
                elements=st.sampled_from(np.arange(grad.shape[0])),
            ),
        )

        # Verify that the generated indices are unique
        hypothesis.assume(
            np.array_equal(
                np.unique(indices.flatten()),
                np.sort(indices.flatten())))

        # Sparsify grad
        grad = grad[indices]

        # Make momentum >= 0
        m = np.abs(m)

        # Convert lr to a numpy array
        lr = np.asarray([lr], dtype=np.float32)

        op = core.CreateOperator(
            "SparseMomentumSGDUpdate", ["grad", "m", "lr", "param", "indices"],
            ["adjusted_grad", "m", "param"],
            momentum=momentum,
            nesterov=int(nesterov),
            device_option=gc
        )

        # Reference
        def momentum_sgd(grad, m, lr):
            lr = lr[0]
            if not nesterov:
                adjusted_gradient = lr * grad + momentum * m
                return (adjusted_gradient, adjusted_gradient)
            else:
                m_new = momentum * m + lr * grad
                return ((1 + momentum) * m_new - momentum * m, m_new)

        def sparse(grad, m, lr, param, i):
            grad_new, m_new = momentum_sgd(grad, m[i], lr)
            m[i] = m_new
            param[i] -= grad_new
            return (grad_new, m, param)

        self.assertReferenceChecks(
            gc,
            op,
            [grad, m, lr, w, indices],
            sparse)
Example #16
0
class TransposeTest(hu.HypothesisTestCase):
    @given(X=hu.tensor(min_dim=1, max_dim=5, dtype=np.float32),
           use_axes=st.booleans(),
           **mu.gcs)
    @settings(deadline=None, max_examples=50)
    def test_transpose(self, X, use_axes, gc, dc):
        ndim = len(X.shape)
        axes = np.arange(ndim)
        np.random.shuffle(axes)

        if use_axes:
            op = core.CreateOperator("Transpose", ["X"], ["Y"],
                                     axes=axes,
                                     device_option=gc)
        else:
            op = core.CreateOperator("Transpose", ["X"], ["Y"],
                                     device_option=gc)

        def transpose_ref(X):
            if use_axes:
                return [np.transpose(X, axes=axes)]
            else:
                return [np.transpose(X)]

        self.assertReferenceChecks(gc, op, [X], transpose_ref)
        self.assertDeviceChecks(dc, op, [X], [0])
        self.assertGradientChecks(gc, op, [X], 0, [0])
Example #17
0
def _tensor_splits(draw, add_axis=False):
    """Generates (axis, split_info, tensor_splits) tuples."""
    tensor = draw(hu.tensor(min_value=4))  # Each dim has at least 4 elements.
    axis = draw(st.integers(0, len(tensor.shape) - 1))
    if add_axis:
        # Simple case: get individual slices along one axis, where each of them
        # is (N-1)-dimensional. The axis will be added back upon concatenation.
        return (axis, np.ones(tensor.shape[axis], dtype=np.int32), [
            np.array(tensor.take(i, axis=axis))
            for i in range(tensor.shape[axis])
        ])
    else:
        # General case: pick some (possibly consecutive, even non-unique)
        # indices at which we will split the tensor, along the given axis.
        splits = sorted(
            draw(
                st.lists(elements=st.integers(0, tensor.shape[axis]),
                         max_size=4)) + [0, tensor.shape[axis]])
        return (
            axis,
            np.array(np.diff(splits), dtype=np.int32),
            [
                tensor.take(range(splits[i], splits[i + 1]), axis=axis)
                for i in range(len(splits) - 1)
            ],
        )
Example #18
0
    def test_sparse_adagrad(self, inputs, lr, epsilon,
                            data_strategy, gc, dc):
        param, momentum, grad = inputs
        momentum = np.abs(momentum)
        lr = np.array([lr], dtype=np.float32)

        # Create an indexing array containing values that are lists of indices,
        # which index into grad
        indices = data_strategy.draw(
            hu.tensor(dtype=np.int64,
                      elements=st.sampled_from(np.arange(grad.shape[0]))),
        )
        hypothesis.note('indices.shape: %s' % str(indices.shape))

        # For now, the indices must be unique
        hypothesis.assume(np.array_equal(np.unique(indices.flatten()),
                                         np.sort(indices.flatten())))

        # Sparsify grad
        grad = grad[indices]

        op = core.CreateOperator(
            "SparseAdagrad",
            ["param", "momentum", "indices", "grad", "lr"],
            ["param", "momentum"],
            epsilon=epsilon,
            device_option=gc)

        def ref_sparse(param, momentum, indices, grad, lr, ref_using_fp16=False):
            param_out = np.copy(param)
            momentum_out = np.copy(momentum)
            for i, index in enumerate(indices):
                param_out[index], momentum_out[index] = self.ref_adagrad(
                    param[index],
                    momentum[index],
                    grad[i],
                    lr,
                    epsilon,
                    using_fp16=ref_using_fp16
                )
            return (param_out, momentum_out)

        ref_using_fp16_values = [False]
        if dc == hu.gpu_do:
            ref_using_fp16_values.append(True)

        for ref_using_fp16 in ref_using_fp16_values:
            if(ref_using_fp16):
                print('test_sparse_adagrad with half precision embedding')
                momentum_i = momentum.astype(np.float16)
                param_i = param.astype(np.float16)
            else:
                print('test_sparse_adagrad with full precision embedding')
                momentum_i = momentum.astype(np.float32)
                param_i = param.astype(np.float32)

            self.assertReferenceChecks(
                gc, op, [param_i, momentum_i, indices, grad, lr, ref_using_fp16],
                ref_sparse
            )
class TestEnsureCPUOutputOp(hu.HypothesisTestCase):

    @given(
        input=hu.tensor(dtype=np.float32),
        dev_options=_dev_options()
    )
    def test_ensure_cpu_output(self, input, dev_options):
        op_dev, input_blob_dev = dev_options
        net = core.Net('test_net')
        data = net.GivenTensorFill(
            [],
            ["data"],
            values=input,
            shape=input.shape,
            device_option=input_blob_dev
        )

        data_cpu = net.EnsureCPUOutput(
            [data],
            ["data_cpu"],
            device_option=op_dev
        )
        workspace.RunNetOnce(net)

        data_cpu_value = workspace.FetchBlob(data_cpu)
        np.testing.assert_allclose(input, data_cpu_value)
Example #20
0
class PythonOpTest(hu.HypothesisTestCase):
    @unittest.skipIf(not HAS_NUMBA, "")
    @given(x=hu.tensor(),
           n=st.integers(min_value=1, max_value=20),
           w=st.integers(min_value=1, max_value=20))
    def test_multithreaded_evaluation_numba_nogil(self, x, n, w):
        @numba.jit(nopython=True, nogil=True)
        def g(input_, output):
            output[...] = input_

        def f(inputs, outputs):
            outputs[0].reshape(inputs[0].shape)
            g(inputs[0].data, outputs[0].data)

        ops = [CreatePythonOperator(f, ["x"], [str(i)]) for i in range(n)]
        net = core.Net("net")
        net.Proto().op.extend(ops)
        net.Proto().type = "dag"
        net.Proto().num_workers = w
        iters = 100
        plan = core.Plan("plan")
        plan.AddStep(core.ExecutionStep("test-step", net, iters))
        workspace.FeedBlob("x", x)
        workspace.RunPlan(plan.Proto().SerializeToString())
        for i in range(n):
            y = workspace.FetchBlob(str(i))
            np.testing.assert_almost_equal(x, y)
Example #21
0
    def test_sparse_lengths_fp16(self, input, data_strategy, is_mean, gc, dc):
        m = input.shape[0]

        lengths = data_strategy.draw(
            hu.tensor(
                max_dim=1,
                max_value=input.shape[0],
                dtype=np.int32,
                elements=st.integers(min_value=0, max_value=27),
            ))
        lengths_sum = int(np.sum(lengths).item())

        indices = data_strategy.draw(
            hu.arrays([lengths_sum],
                      dtype=np.int64,
                      elements=st.sampled_from(np.arange(m))))
        if is_mean:
            op = core.CreateOperator("SparseLengthsMean",
                                     ["input", "indices", "lengths"], "out")
            self.assertReferenceChecks(gc, op, [input, indices, lengths],
                                       sparse_lengths_mean_ref)

        else:
            op = core.CreateOperator("SparseLengthsSum",
                                     ["input", "indices", "lengths"], "out")
            self.assertReferenceChecks(gc, op, [input, indices, lengths],
                                       sparse_lengths_sum_ref)
class TestIndexHashOps(hu.HypothesisTestCase):
    @given(indices=st.sampled_from([
        np.int32, np.int64
    ]).flatmap(lambda dtype: hu.tensor(min_dim=1, max_dim=1, dtype=dtype)),
           seed=st.integers(min_value=0, max_value=10),
           modulo=st.integers(min_value=100000, max_value=200000),
           **hu.gcs_cpu_only)
    def test_index_hash_ops(self, indices, seed, modulo, gc, dc):
        op = core.CreateOperator("IndexHash", ["indices"], ["hashed_indices"],
                                 seed=seed,
                                 modulo=modulo)

        def index_hash(indices):
            dtype = np.array(indices).dtype
            assert dtype == np.int32 or dtype == np.int64
            hashed_indices = []
            for index in indices:
                hashed = dtype.type(0xDEADBEEF * seed)
                indices_bytes = np.array([index], dtype).view(np.int8)
                for b in indices_bytes:
                    hashed = dtype.type(hashed * 65537 + b)
                hashed = (modulo + hashed % modulo) % modulo
                hashed_indices.append(hashed)
            return [hashed_indices]

        self.assertDeviceChecks(dc, op, [indices], [0])
        self.assertReferenceChecks(gc, op, [indices], index_hash)
Example #23
0
class TestGlu(hu.HypothesisTestCase):
    @given(
        X=hu.tensor(),
        **hu.gcs
    )
    def test_glu(self, X, gc, dc):

        def glu_ref(X):
            ndim = X.ndim
            M = 1
            for i in range(ndim - 1):
                M *= X.shape[i]
            N = X.shape[ndim - 1]
            N2 = int(N / 2)
            yShape = list(X.shape)
            yShape[ndim - 1] = int(yShape[ndim - 1] / 2)
            Y = np.zeros(yShape)
            for i in range(0, M):
                for j in range(0, N2):
                    x1 = X.flat[i * N + j]
                    x2 = X.flat[i * N + j + N2]
                    Y.flat[i * N2 + j] = x1 * (1. / (1. + np.exp(-x2)))
            return [Y]

        # Test only valid tensors.
        assume(X.shape[X.ndim - 1] % 2 == 0)
        op = core.CreateOperator("Glu", ["X"], ["Y"])
        self.assertReferenceChecks(gc, op, [X], glu_ref)
class TestNegateGradient(hu.HypothesisTestCase):
    @given(X=hu.tensor(), inplace=st.booleans(), **hu.gcs)
    def test_forward(self, X, inplace, gc, dc):
        def neg_grad_ref(X):
            return (X, )

        op = core.CreateOperator("NegateGradient", ["X"],
                                 ["Y" if not inplace else "X"])
        self.assertReferenceChecks(gc, op, [X], neg_grad_ref)
        self.assertDeviceChecks(dc, op, [X], [0])

    @given(size=st.lists(st.integers(min_value=1, max_value=20),
                         min_size=1,
                         max_size=5))
    def test_grad(self, size):
        X = np.random.random_sample(size)
        workspace.ResetWorkspace()
        workspace.FeedBlob("X", X.astype(np.float32))

        net = core.Net("negate_grad_test")
        Y = net.NegateGradient(["X"], ["Y"])

        grad_map = net.AddGradientOperators([Y])
        workspace.RunNetOnce(net)

        # check X_grad == negate of Y_grad
        x_val, y_val = workspace.FetchBlobs(['X', 'Y'])
        x_grad_val, y_grad_val = workspace.FetchBlobs(
            [grad_map['X'], grad_map['Y']])
        np.testing.assert_array_equal(x_val, y_val)
        np.testing.assert_array_equal(x_grad_val, y_grad_val * (-1))
Example #25
0
class PythonOpTest(hu.HypothesisTestCase):
    @given(x=hu.tensor(),
           n=st.integers(min_value=1, max_value=20),
           w=st.integers(min_value=1, max_value=20))
    @settings(deadline=10000)
    def test_simple_python_op(self, x, n, w):
        def g(input_, output):
            output[...] = input_

        def f(inputs, outputs):
            outputs[0].reshape(inputs[0].shape)
            g(inputs[0].data, outputs[0].data)

        ops = [CreatePythonOperator(f, ["x"], [str(i)]) for i in range(n)]
        net = core.Net("net")
        net.Proto().op.extend(ops)
        net.Proto().type = "dag"
        net.Proto().num_workers = w
        iters = 100
        plan = core.Plan("plan")
        plan.AddStep(core.ExecutionStep("test-step", net, iters))
        workspace.FeedBlob("x", x)
        workspace.RunPlan(plan.Proto().SerializeToString())
        for i in range(n):
            y = workspace.FetchBlob(str(i))
            np.testing.assert_almost_equal(x, y)
Example #26
0
def _tensor_splits(draw, add_axis=False):
    """Generates (axis, split_info, tensor_splits) tuples."""
    tensor = draw(hu.tensor(min_value=4))  # Each dim has at least 4 elements.
    axis = draw(st.integers(-len(tensor.shape), len(tensor.shape) - 1))
    if add_axis:
        # Simple case: get individual slices along one axis, where each of them
        # is (N-1)-dimensional. The axis will be added back upon concatenation.
        return (
            axis,
            np.ones(tensor.shape[axis], dtype=np.int32),
            [
                np.array(tensor.take(i, axis=axis))
                for i in range(tensor.shape[axis])
            ]
        )
    else:
        # General case: pick some (possibly consecutive, even non-unique)
        # indices at which we will split the tensor, along the given axis.
        splits = sorted(draw(
            st.lists(elements=st.integers(0, tensor.shape[axis]), max_size=4)
        ) + [0, tensor.shape[axis]])
        return (
            axis,
            np.array(np.diff(splits), dtype=np.int32),
            [
                tensor.take(range(splits[i], splits[i + 1]), axis=axis)
                for i in range(len(splits) - 1)
            ],
        )
class TestGivenTensorFillOps(hu.HypothesisTestCase):
    @given(X=hu.tensor(min_dim=1, max_dim=4, dtype=np.int32),
           t=st.sampled_from([
               (core.DataType.BOOL, np.bool_, "GivenTensorFill"),
               (core.DataType.INT32, np.int32, "GivenTensorFill"),
               (core.DataType.FLOAT, np.float32, "GivenTensorFill"),
               (core.DataType.INT16, np.int16, "GivenTensorInt16Fill"),
               (core.DataType.INT32, np.int32, "GivenTensorIntFill"),
               (core.DataType.INT64, np.int64, "GivenTensorInt64Fill"),
               (core.DataType.BOOL, np.bool_, "GivenTensorBoolFill"),
               (core.DataType.DOUBLE, np.double, "GivenTensorDoubleFill"),
               (core.DataType.INT32, np.double, "GivenTensorDoubleFill"),
           ]),
           **hu.gcs)
    def test_given_tensor_fill(self, X, t, gc, dc):
        X = X.astype(t[1])
        print('X: ', str(X))
        op = core.CreateOperator(
            t[2],
            [],
            ["Y"],
            shape=X.shape,
            dtype=t[0],
            values=X.reshape((1, X.size)),
        )

        def constant_fill(*args, **kw):
            return [X]

        self.assertReferenceChecks(gc, op, [], constant_fill)
        self.assertDeviceChecks(dc, op, [], [0])
Example #28
0
class TestEnforceFinite(hu.HypothesisTestCase):
    @given(
        X=hu.tensor(
            # allow empty
            min_value=0,
            elements=st.floats(allow_nan=True, allow_infinity=True),
        ),
        **hu.gcs
    )
    def test_enforce_finite(self, X, gc, dc):

        def all_finite_value(X):
            if X.size <= 0:
                return True

            return np.isfinite(X).all()

        net = core.Net('test_net')
        net.Const(array=X, blob_out="X")
        net.EnforceFinite("X", [])

        if all_finite_value(X):
            self.assertTrue(workspace.RunNetOnce(net))
        else:
            with self.assertRaises(RuntimeError):
                workspace.RunNetOnce(net)
Example #29
0
    def test_sparse_momentum_sgd(
        self, inputs, momentum, nesterov, lr, data_strategy, gc, dc
    ):
        w, grad, m = inputs

        # Create an indexing array containing values which index into grad
        indices = data_strategy.draw(
            hu.tensor(
                max_dim=1,
                min_value=1,
                max_value=grad.shape[0],
                dtype=np.int64,
                elements=st.sampled_from(np.arange(grad.shape[0])),
            ),
        )

        # Verify that the generated indices are unique
        hypothesis.assume(
            np.array_equal(
                np.unique(indices.flatten()),
                np.sort(indices.flatten())))

        # Sparsify grad
        grad = grad[indices]

        # Make momentum >= 0
        m = np.abs(m)

        # Convert lr to a numpy array
        lr = np.asarray([lr], dtype=np.float32)

        op = core.CreateOperator(
            "SparseMomentumSGDUpdate", ["grad", "m", "lr", "param", "indices"],
            ["adjusted_grad", "m", "param"],
            momentum=momentum,
            nesterov=int(nesterov),
            device_option=gc
        )

        # Reference
        def momentum_sgd(grad, m, lr):
            lr = lr[0]
            if not nesterov:
                adjusted_gradient = lr * grad + momentum * m
                return (adjusted_gradient, adjusted_gradient)
            else:
                m_new = momentum * m + lr * grad
                return ((1 + momentum) * m_new - momentum * m, m_new)

        def sparse(grad, m, lr, param, i):
            grad_new, m_new = momentum_sgd(grad, m[i], lr)
            m[i] = m_new
            param[i] -= grad_new
            return (grad_new, m, param)

        self.assertReferenceChecks(
            gc,
            op,
            [grad, m, lr, w, indices],
            sparse)
Example #30
0
    def test_sparse_adam_output_grad(self, inputs, ITER, LR, beta1, beta2,
                                     epsilon, data_strategy, gc, dc):
        param, mom1, mom2, grad = inputs
        mom2 = np.absolute(mom2)
        ITER = np.array([ITER], dtype=np.int64)
        LR = np.array([LR], dtype=np.float32)

        # Create an indexing array containing values which index into grad
        indices = data_strategy.draw(
            hu.tensor(
                max_dim=1,
                min_value=1,
                max_value=grad.shape[0],
                dtype=np.int64,
                elements=st.sampled_from(np.arange(grad.shape[0])),
            ), )

        # Verify that the generated indices are unique
        hypothesis.assume(
            np.array_equal(np.unique(indices.flatten()),
                           np.sort(indices.flatten())))

        # Sparsify grad
        grad = grad[indices]

        op = core.CreateOperator(
            "SparseAdam",
            ["param", "mom1", "mom2", "indices", "grad", "lr", "iter"],
            ["param", "mom1", "mom2", "output_grad"],
            beta1=beta1,
            beta2=beta2,
            epsilon=epsilon)

        def ref_sparse_output_grad(param, mom1, mom2, indices, grad, LR, ITER,
                                   beta1, beta2, epsilon, output_grad):
            param_out = np.copy(param)
            mom1_out = np.copy(mom1)
            mom2_out = np.copy(mom2)
            grad_out = np.copy(grad)

            for i, index in enumerate(indices):
                param_out[index], mom1_out[index], mom2_out[index], grad_out[i] = \
                    self.ref_adam(param[index], mom1[index], mom2[index],
                                  grad[i], LR, ITER,
                                  beta1, beta2, epsilon, output_grad)
            return (param_out, mom1_out, mom2_out, grad_out)

        # Iter lives on the CPU
        input_device_options = {'iter': hu.cpu_do}

        self.assertReferenceChecks(
            gc,
            op, [param, mom1, mom2, indices, grad, LR, ITER],
            functools.partial(ref_sparse_output_grad,
                              beta1=beta1,
                              beta2=beta2,
                              epsilon=epsilon,
                              output_grad=True),
            input_device_options=input_device_options)
Example #31
0
def _data(draw):
    return draw(
        hu.tensor(dtype=np.int64,
            elements=st.integers(
                min_value=np.iinfo(np.int64).min, max_value=np.iinfo(np.int64).max
            )
        )
    )
Example #32
0
    def test_smart_decay_sparse_adam(self, inputs, ITER, LR, beta1, beta2, epsilon,
                                     data_strategy, gc, dc):
        param, mom1, mom2, grad = inputs

        mom2 = np.absolute(mom2)
        ITER = np.array([ITER], dtype=np.int64)
        # Here we will define the last_seen tensor as being randomly from 0 to ITER
        # (the value of t to be tested will be ITER+1)
        last_seen = np.random.randint(low=0, high=ITER + 1, size=param.shape, dtype=np.int64)
        LR = np.array([LR], dtype=np.float32)

        # Create an indexing array containing values which index into grad
        indices = data_strategy.draw(
            hu.tensor(
                max_dim=1,
                min_value=1,
                max_value=grad.shape[0],
                dtype=np.int64,
                elements=st.sampled_from(np.arange(grad.shape[0])),
            ),
        )

        # Verify that the generated indices are unique
        hypothesis.assume(
            np.array_equal(
                np.unique(indices.flatten()),
                np.sort(indices.flatten())))

        # Sparsify grad
        grad = grad[indices]

        op = core.CreateOperator(
            "SmartDecaySparseAdam",
            ["param", "mom1", "mom2", "last_seen", "indices", "grad", "lr", "iter"],
            ["param", "mom1", "mom2", "last_seen"],
            beta1=beta1, beta2=beta2, epsilon=epsilon)

        def ref_sparse(param, mom1, mom2, last_seen, indices, grad, LR, ITER):
            param_out = np.copy(param)
            mom1_out = np.copy(mom1)
            mom2_out = np.copy(mom2)
            last_seen_out = np.copy(last_seen)

            for i, index in enumerate(indices):
                param_out[index], mom1_out[index], mom2_out[index], last_seen_out[index] = \
                    self.ref_smart_decay_adam(param[index], mom1[index], mom2[index], last_seen[index],
                                              grad[i], LR, ITER,
                                              beta1, beta2, epsilon)
            return (param_out, mom1_out, mom2_out, last_seen_out)

        # Iter lives on the CPU
        input_device_options = {'iter': hu.cpu_do}

        self.assertReferenceChecks(
            gc, op,
            [param, mom1, mom2, last_seen, indices, grad, LR, ITER],
            ref_sparse,
            input_device_options=input_device_options)
Example #33
0
def adagrad_sparse_test_helper(parent_test, inputs, lr, epsilon, data_strategy,
                               engine, ref_adagrad, gc, dc):
    param, momentum, grad = inputs
    momentum = np.abs(momentum)
    lr = np.array([lr], dtype=np.float32)

    # Create an indexing array containing values that are lists of indices,
    # which index into grad
    indices = data_strategy.draw(
        hu.tensor(dtype=np.int64,
                  elements=st.sampled_from(np.arange(grad.shape[0]))), )
    hypothesis.note('indices.shape: %s' % str(indices.shape))

    # For now, the indices must be unique
    hypothesis.assume(
        np.array_equal(np.unique(indices.flatten()),
                       np.sort(indices.flatten())))

    # Sparsify grad
    grad = grad[indices]

    op = core.CreateOperator("SparseAdagrad",
                             ["param", "momentum", "indices", "grad", "lr"],
                             ["param", "momentum"],
                             epsilon=epsilon,
                             engine=engine,
                             device_option=gc)

    def ref_sparse(param, momentum, indices, grad, lr, ref_using_fp16=False):
        param_out = np.copy(param)
        momentum_out = np.copy(momentum)
        for i, index in enumerate(indices):
            param_out[index], momentum_out[index] = ref_adagrad(
                param[index],
                momentum[index],
                grad[i],
                lr,
                epsilon,
                using_fp16=ref_using_fp16)
        return (param_out, momentum_out)

    ref_using_fp16_values = [False]
    if dc == hu.gpu_do:
        ref_using_fp16_values.append(True)

    for ref_using_fp16 in ref_using_fp16_values:
        if (ref_using_fp16):
            print('test_sparse_adagrad with half precision embedding')
            momentum_i = momentum.astype(np.float16)
            param_i = param.astype(np.float16)
        else:
            print('test_sparse_adagrad with full precision embedding')
            momentum_i = momentum.astype(np.float32)
            param_i = param.astype(np.float32)

        parent_test.assertReferenceChecks(
            gc, op, [param_i, momentum_i, indices, grad, lr, ref_using_fp16],
            ref_sparse)
Example #34
0
class TestErfOp(serial.SerializedTestCase):
    @serial.given(
        X=hu.tensor(elements=hu.floats(min_value=-0.7, max_value=0.7)),
        **hu.gcs)
    def test_erf(self, X, gc, dc):
        op = core.CreateOperator('Erf', ["X"], ["Y"])
        self.assertReferenceChecks(gc, op, [X], lambda x: (np.vectorize(math.erf)(X),))
        self.assertDeviceChecks(dc, op, [X], [0])
        self.assertGradientChecks(gc, op, [X], 0, [0])
Example #35
0
    def test_sparse_adam(self, inputs, ITER, LR, beta1, beta2, epsilon,
                         data_strategy, gc, dc):
        param, mom1, mom2, grad = inputs
        mom1 = np.absolute(mom1)
        mom2 = np.absolute(mom2)
        ITER = np.array([ITER], dtype=np.int64)
        LR = np.array([LR], dtype=np.float32)

        # Create an indexing array containing values which index into grad
        indices = data_strategy.draw(
            hu.tensor(
                max_dim=1,
                min_value=1,
                max_value=grad.shape[0],
                dtype=np.int64,
                elements=st.sampled_from(np.arange(grad.shape[0])),
            ),
        )

        # Verify that the generated indices are unique
        hypothesis.assume(
            np.array_equal(
                np.unique(indices.flatten()),
                np.sort(indices.flatten())))

        # Sparsify grad
        grad = grad[indices]

        op = core.CreateOperator(
            "SparseAdam",
            ["param", "mom1", "mom2", "indices", "grad", "lr", "iter"],
            ["param", "mom1", "mom2"],
            beta1=beta1, beta2=beta2, epsilon=epsilon)

        def ref_sparse(param, mom1, mom2, indices, grad, LR, ITER):
            param_out = np.copy(param)
            mom1_out = np.copy(mom1)
            mom2_out = np.copy(mom2)
            for i, index in enumerate(indices):
                param_out[index], mom1_out[index], mom2_out[index] = \
                    self.ref_adam(param[index], mom1[index], mom2[index],
                                  grad[i], LR, ITER,
                                  beta1, beta2, epsilon)
            return (param_out, mom1_out, mom2_out)

        # Iter lives on the CPU
        input_device_options = {'iter': hu.cpu_do}

        self.assertReferenceChecks(
            gc, op,
            [param, mom1, mom2, indices, grad, LR, ITER],
            ref_sparse,
            input_device_options=input_device_options)
Example #36
0
    def test_row_wise_sparse_adagrad(self, inputs, lr, epsilon,
                                     data_strategy, gc, dc):
        param, grad = inputs
        lr = np.array([lr], dtype=np.float32)

        # Create a 1D row-wise average sum of squared gradients tensor.
        momentum = data_strategy.draw(
            hu.tensor1d(min_len=param.shape[0], max_len=param.shape[0],
                        elements=hu.elements_of_type(dtype=np.float32))
        )
        momentum = np.abs(momentum)

        # Create an indexing array containing values which index into grad
        indices = data_strategy.draw(
            hu.tensor(dtype=np.int64,
                      elements=st.sampled_from(np.arange(grad.shape[0]))),
        )

        # Note that unlike SparseAdagrad, RowWiseSparseAdagrad uses a moment
        # tensor that is strictly 1-dimensional and equal in length to the
        # first dimension of the parameters, so indices must also be
        # 1-dimensional.
        indices = indices.flatten()

        hypothesis.note('indices.shape: %s' % str(indices.shape))

        # The indices must be unique
        hypothesis.assume(np.array_equal(np.unique(indices), np.sort(indices)))

        # Sparsify grad
        grad = grad[indices]

        op = core.CreateOperator(
            "RowWiseSparseAdagrad",
            ["param", "momentum", "indices", "grad", "lr"],
            ["param", "momentum"],
            epsilon=epsilon,
            device_option=gc)

        def ref_row_wise_sparse(param, momentum, indices, grad, lr):
            param_out = np.copy(param)
            momentum_out = np.copy(momentum)
            for i, index in enumerate(indices):
                param_out[index], momentum_out[index] = self.ref_row_wise_adagrad(
                    param[index], momentum[index], grad[i], lr, epsilon)
            return (param_out, momentum_out)

        self.assertReferenceChecks(
            gc, op,
            [param, momentum, indices, grad, lr],
            ref_row_wise_sparse)
Example #37
0
    def test_sparse_normalize(self, inputs, use_max_norm, norm,
                              data_strategy, gc, dc):
        param, grad = inputs
        param += 0.02 * np.sign(param)
        param[param == 0.0] += 0.02

        # Create an indexing array containing values that are lists of indices,
        # which index into grad
        indices = data_strategy.draw(
            hu.tensor(dtype=np.int64, min_dim=1, max_dim=1,
                      elements=st.sampled_from(np.arange(grad.shape[0]))),
        )
        hypothesis.note('indices.shape: %s' % str(indices.shape))

        # For now, the indices must be unique
        hypothesis.assume(np.array_equal(np.unique(indices.flatten()),
                                         np.sort(indices.flatten())))

        # Sparsify grad
        grad = grad[indices]

        op = core.CreateOperator(
            "SparseNormalize",
            ["param", "indices", "grad"],
            ["param"],
            use_max_norm=use_max_norm,
            norm=norm,
        )

        def ref_sparse_normalize(param, indices, grad):
            param_out = np.copy(param)
            for _, index in enumerate(indices):
                param_out[index] = self.ref_normalize(
                    param[index],
                    use_max_norm,
                    norm,
                )
            return (param_out,)

        # self.assertDeviceChecks(dc, op, [param, indices, grad], [0])
        self.assertReferenceChecks(
            gc, op, [param, indices, grad],
            ref_sparse_normalize
        )
Example #38
0
    def test_sparse_adagrad(self, inputs, lr, epsilon,
                            data_strategy, gc, dc):
        param, momentum, grad = inputs
        momentum = np.abs(momentum)
        lr = np.array([lr], dtype=np.float32)

        # Create an indexing array containing values which index into grad
        indices = data_strategy.draw(
            hu.tensor(dtype=np.int64,
                      elements=st.sampled_from(np.arange(grad.shape[0]))),
        )
        hypothesis.note('indices.shape: %s' % str(indices.shape))

        # For now, the indices must be unique
        hypothesis.assume(np.array_equal(np.unique(indices.flatten()),
                                         np.sort(indices.flatten())))

        # Sparsify grad
        grad = grad[indices]

        op = core.CreateOperator(
            "SparseAdagrad",
            ["param", "momentum", "indices", "grad", "lr"],
            ["param", "momentum"],
            epsilon=epsilon,
            device_option=gc)

        def ref_sparse(param, momentum, indices, grad, lr):
            param_out = np.copy(param)
            momentum_out = np.copy(momentum)
            for i, index in enumerate(indices):
                param_out[index], momentum_out[index] = self.ref_adagrad(
                    param[index], momentum[index], grad[i], lr, epsilon)
            return (param_out, momentum_out)

        self.assertReferenceChecks(
            gc, op,
            [param, momentum, indices, grad, lr],
            ref_sparse)
Example #39
0
def _data(draw):
    dtype = draw(st.sampled_from([np.int32, np.int64]))
    return draw(hu.tensor(dtype=dtype))
Example #40
0
    def test_row_wise_sparse_adam(self, inputs, ITER, LR, beta1, beta2, epsilon,
                                  data_strategy, gc, dc):
        param, mom1, grad = inputs
        ITER = np.array([ITER], dtype=np.int64)
        LR = np.array([LR], dtype=np.float32)

        # Create a 1D row-wise average 2nd moment tensor.
        mom2 = data_strategy.draw(
            hu.tensor1d(min_len=param.shape[0], max_len=param.shape[0],
                        elements=hu.elements_of_type(dtype=np.float32))
        )
        mom2 = np.absolute(mom2)

        # Create an indexing array containing values which index into grad
        indices = data_strategy.draw(
            hu.tensor(
                max_dim=1,
                min_value=1,
                max_value=grad.shape[0],
                dtype=np.int64,
                elements=st.sampled_from(np.arange(grad.shape[0])),
            ),
        )

        # Note that unlike SparseAdam, RowWiseSparseAdam uses a moment
        # tensor that is strictly 1-dimensional and equal in length to the
        # first dimension of the parameters, so indices must also be
        # 1-dimensional.
        indices = indices.flatten()

        hypothesis.note('indices.shape: %s' % str(indices.shape))

        # Verify that the generated indices are unique
        hypothesis.assume(np.array_equal(np.unique(indices), np.sort(indices)))

        # Sparsify grad
        grad = grad[indices]

        op = core.CreateOperator(
            "RowWiseSparseAdam",
            ["param", "mom1", "mom2", "indices", "grad", "lr", "iter"],
            ["param", "mom1", "mom2"],
            beta1=beta1, beta2=beta2, epsilon=epsilon)

        def ref_row_wise_sparse(param, mom1, mom2, indices, grad, LR, ITER):
            param_out = np.copy(param)
            mom1_out = np.copy(mom1)
            mom2_out = np.copy(mom2)
            for i, index in enumerate(indices):
                param_out[index], mom1_out[index], mom2_out[index] = \
                    self.ref_row_wise_adam(param[index], mom1[index], mom2[index],
                                           grad[i], LR, ITER,
                                           beta1, beta2, epsilon)
            return (param_out, mom1_out, mom2_out)

        # Iter lives on the CPU
        input_device_options = {'iter': hu.cpu_do}

        self.assertReferenceChecks(
            gc, op,
            [param, mom1, mom2, indices, grad, LR, ITER],
            ref_row_wise_sparse,
            input_device_options=input_device_options)