예제 #1
0
    def test_bitlog(self):
        from pycuda.tools import bitlog2

        assert bitlog2(17) == 4
        assert bitlog2(0xAFFE) == 15
        assert bitlog2(0x3AFFE) == 17
        assert bitlog2(0xCC3AFFE) == 27
예제 #2
0
    def test_bitlog(self):
        from pycuda.tools import bitlog2

        assert bitlog2(17) == 4
        assert bitlog2(0xAFFE) == 15
        assert bitlog2(0x3AFFE) == 17
        assert bitlog2(0xCC3AFFE) == 27
예제 #3
0
    def test_mempool(self):
        from pycuda.tools import bitlog2
        from pycuda.tools import DeviceMemoryPool

        pool = DeviceMemoryPool()
        queue = []
        free, total = drv.mem_get_info()

        e0 = bitlog2(free)

        for e in range(e0 - 6, e0 - 4):
            for i in range(100):
                queue.append(pool.allocate(1 << e))
                if len(queue) > 10:
                    queue.pop(0)
        del queue
        pool.stop_holding()
예제 #4
0
    def test_mempool(self):
        from pycuda.tools import bitlog2
        from pycuda.tools import DeviceMemoryPool

        pool = DeviceMemoryPool()
        maxlen = 10
        queue = []
        free, total = drv.mem_get_info()

        e0 = bitlog2(free)

        for e in range(e0-6, e0-4):
            for i in range(100):
                queue.append(pool.allocate(1<<e))
                if len(queue) > 10:
                    queue.pop(0)
        del queue
        pool.stop_holding()
예제 #5
0
 def test_bitlog(self):
     from pycuda.tools import bitlog2
     assert bitlog2(17) == 4
     assert bitlog2(0xaffe) == 15
     assert bitlog2(0x3affe) == 17
     assert bitlog2(0xcc3affe) == 27
예제 #6
0
def main():
    import pycuda.gpuarray as gpuarray

    sizes = []
    times_gpu = []
    flops_gpu = []
    flops_cpu = []
    times_cpu = []

    from pycuda.tools import bitlog2

    max_power = bitlog2(drv.mem_get_info()[0]) - 2
    # they're floats, i.e. 4 bytes each
    for power in range(10, max_power):
        size = 1 << power
        print(size)
        sizes.append(size)
        a = gpuarray.zeros((size,), dtype=numpy.float32)
        b = gpuarray.zeros((size,), dtype=numpy.float32)
        b.fill(1)

        if power > 20:
            count = 100
        else:
            count = 1000

        # gpu -----------------------------------------------------------------
        start = drv.Event()
        end = drv.Event()
        start.record()

        for i in range(count):
            a + b

        end.record()
        end.synchronize()

        secs = start.time_till(end) * 1e-3

        times_gpu.append(secs / count)
        flops_gpu.append(size)
        del a
        del b

        # cpu -----------------------------------------------------------------
        a_cpu = numpy.random.randn(size).astype(numpy.float32)
        b_cpu = numpy.random.randn(size).astype(numpy.float32)

        # start timer
        from time import time

        start = time()
        for i in range(count):
            a_cpu + b_cpu
        secs = time() - start

        times_cpu.append(secs / count)
        flops_cpu.append(size)

    # calculate pseudo flops
    flops_gpu = [f / t for f, t in zip(flops_gpu, times_gpu)]
    flops_cpu = [f / t for f, t in zip(flops_cpu, times_cpu)]

    from pytools import Table

    tbl = Table()
    tbl.add_row(
        (
            "Size",
            "Time GPU",
            "Size/Time GPU",
            "Time CPU",
            "Size/Time CPU",
            "GPU vs CPU speedup",
        )
    )
    for s, t, f, t_cpu, f_cpu in zip(sizes, times_gpu, flops_gpu, times_cpu, flops_cpu):
        tbl.add_row((s, t, f, t_cpu, f_cpu, f / f_cpu))
    print(tbl)
예제 #7
0
 def test_bitlog(self):
     from pycuda.tools import bitlog2
     assert bitlog2(17) == 4
     assert bitlog2(0xaffe) == 15
     assert bitlog2(0x3affe) == 17
     assert bitlog2(0xcc3affe) == 27
예제 #8
0
def main():
    import pycuda.gpuarray as gpuarray

    sizes = []
    times_gpu = []
    flops_gpu = []
    flops_cpu = []
    times_cpu = []

    from pycuda.tools import bitlog2
    max_power = bitlog2(drv.mem_get_info()[0]) - 2
    # they're floats, i.e. 4 bytes each
    for power in range(10, max_power):
        size = 1<<power
        print size
        sizes.append(size)
        a = gpuarray.zeros((size,), dtype=numpy.float32)
        b = gpuarray.zeros((size,), dtype=numpy.float32)
        b.fill(1)

        if power > 20:
            count = 100
        else:
            count = 1000

        # gpu -----------------------------------------------------------------
        start = drv.Event()
        end = drv.Event()
        start.record()

        for i in range(count):
            a+b

        end.record()
        end.synchronize()

        secs = start.time_till(end)*1e-3

        times_gpu.append(secs/count)
        flops_gpu.append(size)
        del a
        del b

        # cpu -----------------------------------------------------------------
        a_cpu = numpy.random.randn(size).astype(numpy.float32)
        b_cpu = numpy.random.randn(size).astype(numpy.float32)

        #start timer
        from time import time
        start = time()
        for i in range(count):
            a_cpu + b_cpu
        secs = time() - start

        times_cpu.append(secs/count)
        flops_cpu.append(size)


    # calculate pseudo flops
    flops_gpu = [f/t for f, t in zip(flops_gpu,times_gpu)]
    flops_cpu = [f/t for f, t in zip(flops_cpu,times_cpu)]

    from pytools import Table
    tbl = Table()
    tbl.add_row(("Size", "Time GPU", "Size/Time GPU",
        "Time CPU","Size/Time CPU","GPU vs CPU speedup"))
    for s, t, f, t_cpu, f_cpu in zip(sizes, times_gpu, flops_gpu, times_cpu, flops_cpu):
        tbl.add_row((s, t, f, t_cpu, f_cpu, f/f_cpu))
    print tbl