Esempio n. 1
0
    def reduce(self, context, x, out=None, initial=0.0, queue=None):

        if queue is None:
            queue = x.queue

        if not isinstance(x, cl.DeviceMemoryView):
            x = cl.from_host(queue.context, x)

        #output, input, shared, group_size, initial=0.0
        size = x.size
        shared = cl.local_memory(x.ctype, ndim=1, shape=[size])

        group_size = size // 2
        for item in [2, 4, 8, 16, 32, 64, 128, 256, 512]:
            if group_size < item:
                group_size = item // 2
                break
        else:
            group_size = 512

        if out is None:
            out = cl.empty(queue.context, [1], x.format)

        kernel = reduce_kernel.compile(queue.context,
                                       function=self.device_func,
                                       output=cl.global_memory(out.ctype,
                                                               flat=True),
                                       array=cl.global_memory(x.ctype,
                                                              flat=True),
                                       shared=shared,
                                       group_size=cl.cl_uint,
                                       cly_meta=self.device_func.func_name)

        max_wgsize = kernel.work_group_size(queue.device)

        group_size = min(max_wgsize, group_size)

        kernel(queue, out, out.array_info, x, x.array_info, shared,
               shared.local_info, group_size)
        #        reduce_kernel(queue, self.device_func, out, x, shared, group_size)
        #        reduce_kernel(queue, self.device_func, out, x, shared, group_size)

        array = CLArray._view_as_this(out)
        array.__array_init__(context, queue)
        return array
Esempio n. 2
0
    def reduce(self, context, x, out=None, initial=0.0, queue=None):
        
        if queue is None:
            queue = x.queue
        
        if not isinstance(x, cl.DeviceMemoryView):
            x = cl.from_host(queue.context, x)
            
        #output, input, shared, group_size, initial=0.0
        size = x.size
        shared = cl.local_memory(x.ctype, ndim=1, shape=[size])
        
        group_size = size // 2
        for item in [2, 4, 8, 16, 32, 64, 128, 256, 512]:
            if group_size < item:
                group_size = item // 2
                break
        else:
            group_size = 512
        
        if out is None:
            out = cl.empty(queue.context, [1], x.format)
        
        kernel = reduce_kernel.compile(queue.context,
                                       function=self.device_func,
                                       output=cl.global_memory(out.ctype, flat=True),
                                       array=cl.global_memory(x.ctype, flat=True),
                                       shared=shared,
                                       group_size=cl.cl_uint,
                                       cly_meta=self.device_func.func_name)
        
        max_wgsize = kernel.work_group_size(queue.device)
        
        group_size = min(max_wgsize, group_size)
        
        kernel(queue, out, out.array_info, x, x.array_info, shared, shared.local_info, group_size)
#        reduce_kernel(queue, self.device_func, out, x, shared, group_size)
#        reduce_kernel(queue, self.device_func, out, x, shared, group_size)
        
        array = CLArray._view_as_this(out)
        array.__array_init__(context, queue)
        return array
Esempio n. 3
0
def reduce(queue, function, input, initial=0.0):
    '''
    reduce(queue, function, sequence[, initial]) -> value
    
    Apply a function of two arguments cumulatively to the items of a sequence,
    from left to right, so as to reduce the sequence to a single value.
    For example, reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) calculates
    ((((1+2)+3)+4)+5).  If initial is present, it is placed before the items
    of the sequence in the calculation, and serves as a default when the
    sequence is empty.

    '''

    size = input.size
    shared = cl.local_memory(input.format, [size])
    output = cl.empty(queue.context, [1], input.format)

    group_size = size // 2

    cl_reduce(queue, function, output, input, shared, group_size, initial)

    return output
Esempio n. 4
0
def reduce(queue, function, input, initial=0.0):
    '''
    reduce(queue, function, sequence[, initial]) -> value
    
    Apply a function of two arguments cumulatively to the items of a sequence,
    from left to right, so as to reduce the sequence to a single value.
    For example, reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) calculates
    ((((1+2)+3)+4)+5).  If initial is present, it is placed before the items
    of the sequence in the calculation, and serves as a default when the
    sequence is empty.

    '''

    size = input.size
    shared = cl.local_memory(input.format, [size])
    output = cl.empty(queue.context, [1], input.format)
    
    group_size = size // 2
    
    cl_reduce(queue, function, output, input , shared, group_size, initial)
    
    return output