Ejemplo n.º 1
0
class TestFusionReductionAndElementwise(unittest.TestCase):
    def generate_inputs(self, xp):
        x = testing.shaped_random((3, 4), xp, 'int64', scale=10, seed=0)
        y = testing.shaped_random((3, 4), xp, 'int64', scale=10, seed=0)
        return (x, y), {}

    @fusion_utils.check_fusion()
    def test_premap_one_array(self, xp):
        return lambda x, y: xp.sum(x * 3, self.axis)

    @fusion_utils.check_fusion()
    def test_premap_two_arrays(self, xp):
        return lambda x, y: xp.sum(x + y, self.axis)

    @fusion_utils.check_fusion()
    def test_postmap_one_array(self, xp):
        return lambda x, y: xp.sum(x, self.axis) + 3

    @unittest.skipUnless(fusion_utils.can_use_grid_synchronization(),
                         'Requires CUDA grid synchronization')
    @fusion_utils.check_fusion(accept_error=ValueError)
    def test_postmap_two_arrays(self, xp):
        return lambda x, y: xp.sum(x, self.axis) + y

    @unittest.skipUnless(fusion_utils.can_use_grid_synchronization(),
                         'Requires CUDA grid synchronization')
    @fusion_utils.check_fusion(accept_error=ValueError)
    def test_premap_postmap(self, xp):
        return lambda x, y: xp.sum(xp.sqrt(x) + y, self.axis) * 2 + y

    # TODO(asi1024): Uncomment after replace fusion implementaiton.
    # @fusion_utils.check_fusion()
    # def test_premap_inplace(self, xp):
    #     def impl(x, y):
    #         x += 2
    #         y += x
    #         return xp.sum(y, self.axis)
    #     return impl

    @unittest.skipUnless(fusion_utils.can_use_grid_synchronization(),
                         'Requires CUDA grid synchronization')
    @fusion_utils.check_fusion(accept_error=ValueError)
    def test_postmap_inplace(self, xp):
        def impl(x, y):
            y += x
            res = xp.sum(x, self.axis)
            y += res

        return impl
Ejemplo n.º 2
0
class TestFusionMultistageReductions(unittest.TestCase):
    def generate_inputs(self, xp):
        x = testing.shaped_random((3, 4, 5), xp, 'int64', scale=10, seed=0)
        return (x, ), {}

    @unittest.skipUnless(fusion_utils.can_use_grid_synchronization(),
                         'Requires CUDA grid synchronization')
    @fusion_utils.check_fusion()
    def test_multistage_reductions(self, xp):
        return lambda x: x.prod(axis=1).sum(axis=1)

    @unittest.skipUnless(fusion_utils.can_use_grid_synchronization(),
                         'Requires CUDA grid synchronization')
    @fusion_utils.check_fusion()
    def test_multistage_reductions_and_elementwise(self, xp):
        return lambda x: (xp.sqrt(x).prod(axis=0) + x).sum(axis=1) * 2
Ejemplo n.º 3
0
class TestFusionTuple(FusionTestBase):
    @testing.for_all_dtypes(no_complex=True)
    @fusion_utils.check_fusion(generate_inputs_args=(3, ))
    def test_tuple(self, xp, dtype):
        def func(x, y, z):
            w = x * y + z
            (x, w) = (w, x)
            return z * w + y + x

        return func

    @testing.for_all_dtypes(no_complex=True)
    @fusion_utils.check_fusion(generate_inputs_args=(3, ))
    def test_return_tuple(self, xp, dtype):
        def func(x, y, z):
            return x + y, y + z, z * x

        return func

    @testing.for_all_dtypes(no_complex=True)
    @fusion_utils.check_fusion(generate_inputs_args=(3, ))
    def test_multiple_outputdifferent_type_same_ufunc(self, xp, dtype):
        def func(x, y, z):
            x = x.astype('int32')
            y = x.astype('float32')
            return x + y, y + z, z + x

        return func

    @testing.for_all_dtypes(no_complex=True)
    @fusion_utils.check_fusion(generate_inputs_args=(1, ))
    def test_return_empty_tuple(self, xp, dtype):
        def func(x):
            return ()

        return func

    @testing.for_all_dtypes(no_complex=True)
    @fusion_utils.check_fusion(generate_inputs_args=(1, ))
    def test_return_singleton_tuple(self, xp, dtype):
        def func(x):
            return (x, )

        return func

    @unittest.skipUnless(fusion_utils.can_use_grid_synchronization(),
                         'Requires CUDA grid synchronization')
    @testing.for_all_dtypes(no_bool=True, no_complex=True)
    @fusion_utils.check_fusion(generate_inputs_args=(2, ))
    def test_various_shape(self, xp, dtype):
        def func(x, y):
            a = x + y
            b = xp.sum(a, axis=0)
            x += b
            c = xp.sum(x, axis=0)
            y += c
            return c, b, x, y, a

        return func
Ejemplo n.º 4
0
class TestFusionMultipleReductions(unittest.TestCase):
    def generate_inputs(self, xp):
        x = testing.shaped_random((3, 4), xp, 'int64', scale=10, seed=0)
        y = testing.shaped_random((3, 4), xp, 'int64', scale=10, seed=0)
        return (x, y), {}

    @unittest.skipUnless(fusion_utils.can_use_grid_synchronization(),
                         'Requires CUDA grid synchronization')
    @fusion_utils.check_fusion()
    def test_two_distinct_reductions(self, xp):
        return lambda x, y: (x.sum(self.axis1), y.sum(self.axis2))

    @unittest.skipUnless(fusion_utils.can_use_grid_synchronization(),
                         'Requires CUDA grid synchronization')
    @fusion_utils.check_fusion(accept_error=ValueError)
    def test_two_reductions_and_elementwise(self, xp):
        return lambda x, y: x.sum(self.axis1) + y.sum(self.axis2)
Ejemplo n.º 5
0
class TestIndexingCombination(unittest.TestCase):
    def generate_inputs(self, xp, dtype1, dtype2):
        x = testing.shaped_random((3, 4), xp, dtype1, scale=10, seed=0)
        y = testing.shaped_random((4, ), xp, dtype2, scale=10, seed=1)
        z = testing.shaped_random((1, ), xp, dtype1, scale=10, seed=2)
        return (x, y, z), {}

    @testing.for_all_dtypes_combination(names=['dtype1', 'dtype2'],
                                        no_bool=True)
    @fusion_utils.check_fusion()
    def test_indexing_and_add_1(self, xp, dtype1, dtype2):
        return lambda x, y, z: x + y[1]

    @testing.for_all_dtypes_combination(names=['dtype1', 'dtype2'],
                                        no_bool=True)
    @fusion_utils.check_fusion()
    def test_indexing_and_add_2(self, xp, dtype1, dtype2):
        return lambda x, y, z: x + z[0] + y

    @testing.for_all_dtypes_combination(names=['dtype1', 'dtype2'],
                                        no_bool=True)
    @fusion_utils.check_fusion()
    def test_indexing_and_add_3(self, xp, dtype1, dtype2):
        return lambda x, y, z: x + x[0] + x[1]

    @testing.for_all_dtypes_combination(names=['dtype1', 'dtype2'],
                                        no_bool=True)
    @fusion_utils.check_fusion()
    def test_indexing_and_add_4(self, xp, dtype1, dtype2):
        return lambda x, y, z: x + x[0, 1] + x[1] + x + x[2, 1]

    @testing.for_all_dtypes_combination(names=['dtype1', 'dtype2'],
                                        no_bool=True)
    @fusion_utils.check_fusion()
    def test_indexing_twice_1(self, xp, dtype1, dtype2):
        return lambda x, y, z: x[0][1]

    @testing.for_all_dtypes_combination(names=['dtype1', 'dtype2'],
                                        no_bool=True)
    @fusion_utils.check_fusion()
    def test_indexing_twice_2(self, xp, dtype1, dtype2):
        return lambda x, y, z: x[0][1] + x[1][0]

    @unittest.skipUnless(fusion_utils.can_use_grid_synchronization(),
                         'Requires CUDA grid synchronization')
    @testing.for_all_dtypes_combination(names=['dtype1', 'dtype2'],
                                        no_bool=True)
    @fusion_utils.check_fusion()
    def test_indexing_twice_3(self, xp, dtype1, dtype2):
        return lambda x, y, z: x[0][1] + x[1] + y[0] + x[1][0] + x