Beispiel #1
0
    def test_scan_int32(self):
        in_ary = np.random.randint(0, 10000, ARRAY_SIZE).astype(np.int32)
        in_ary_d = cuda.to_device(in_ary)

        output = MyScan.scan_gpu(in_ary_d)

        out_carry = output.getitem(0)
        out_ary = in_ary_d.copy_to_host()

        # check last carry
        assert np.isclose(out_carry, in_ary.sum()), 'carry return is not sum'

        # check array
        carry = 0
        for i in xrange(ARRAY_SIZE):
            assert out_ary[i] == carry, 'output array not correct'
            carry += in_ary[i]
Beispiel #2
0
    def test_scan_fp64(self):
        in_ary = np.random.rand(ARRAY_SIZE).astype(np.float64)
        in_ary_d = cuda.to_device(in_ary)

        output = MyScan.scan_gpu(in_ary_d)

        out_carry = output.getitem(0)
        out_ary = in_ary_d.copy_to_host()

        # check last carry
        print out_carry, in_ary.sum()
        # self.assertTrue(np.isclose(out_carry, in_ary.sum()),
        #                 'carry return is not sum')

        # check array
        carry = 0
        for i in xrange(ARRAY_SIZE):
            self.assertEqual(out_ary[i], carry, 'output array not correct')
            carry += in_ary[i]
Beispiel #3
0
def test_recursive_big_scan():

    print "running recursive scan test"

    MAX_TPB = 512
    n = 2e6
    n = int(n)

    a = np.arange(n).astype(np.int32)
    reference = np.empty_like(a)

    start = timer()
    sum_ref = MyScan.exprefixsumNumba(a, reference, init=0)
    end = timer()

    dA = cuda.to_device(a)

    # e1, e2 = cuda.event(), cuda.event()

    # e1.record()
    # e2.record()

    start2 = timer()
    total_sum = MyScan.scan_gpu(dA)
    end2 = timer()

    dA.copy_to_host(ary=a)
    sum_gpu = total_sum.copy_to_host()

    print "sum_ref = ", sum_ref
    print "sum_gpu = ", sum_gpu

    print "CPU took:    ", (end - start) * 1000, " ms"
    print "Kernel took: ", (end2 - start2) * 1000, " ms"

    print (a == reference).all()