コード例 #1
0
    def __init__(self,image_shape,filter_array,boundry='clamp',queue=None):

        if(queue is None):
            self._queue = cl.CommandQueue(opencl_tools.get_a_context(),
                                   properties=opencl_tools.profile_properties)
        else:
            self._queue =  queue
        
        filter_array =filter_array.astype(np.float32)
            
        self._image_shape = image_shape
        self._filter_size = filter_array.shape;
        self._kernal_size = (4,21)
        

        half_kernal_sizes = (filter_array.shape[0]/2,filter_array.shape[1]/2)
        
        kernal_ratios = (int(np.ceil(2*filter_array.shape[0]/float(self._kernal_size[0]))),
                         int(np.ceil(2*filter_array.shape[1]/float(self._kernal_size[1]))))
        preamble = """
            #define ROWS %d
            #define COLS %d
            #define NUMBER_COLORS %d
        
            #define FILTER_HEIGHT %d
            #define FILTER_WIDTH %d

            #define KERNAL_HEIGHT %d
            #define KERNAL_WIDTH %d
            
            #define half_fheight %d
            #define half_fwidth %d
            
            #define FILTER_2_TO_KERNAL_HEIGHT %d
            #define FILTER_2_TO_KERNAL_WIDTH %d
        """ % (image_shape + 
                filter_array.shape + 
                self._kernal_size + 
                half_kernal_sizes +
                kernal_ratios )

        if(boundry == 'clamp'):
            preamble += "#define BOUNDRY_CLAMP\n"
        elif(boundry == 'zero'):
            preamble += "#define BOUNDRY_CLAMP\n"
        else:
            raise ValueError('Unknown boundry value: %s' % boundry)
            
        if not isinstance(filter_array,cl_array.Array): 
            self._filter = cl_array.to_device(self._queue,filter_array)
        else:
            self._filter = filter_array
            
        preamble += gpu_algorithms.add_matrix_axses_for('filter',self._filter)
        preamble = opencl_tools.build_preamble_for_context\
                                        (self._queue.context,preamble)

        prg = cl.Program(self._queue.context,preamble + _blur_kernal).build(options=[]);
        
        self._blur_kernal = prg.blur_kernal
コード例 #2
0
 def __init__(self, points,point_offset=None):
     self._ctx = opencl_tools.get_a_context()
     self._queue = cl.CommandQueue(self._ctx,properties=opencl_tools.profile_properties);
     self._gpu_points = cl_array.to_device(self._queue,points.astype(np.float32))
     
     if(point_offset!=None):
         self._point_offset \
             = cl_array.to_device(self._queue,point_offset.astype(np.float32))
     else:
         self._point_offset \
             = cl_array.zeros(self._queue,points.shape[0],np.float32)
コード例 #3
0
    def __init__(self,a,b_shape,b_order='C',c_order='C',queue=None):
        assert a.shape[1] == b_shape[0]
        assert a.dtype == np.float32
        
        if(queue is None):
            self._queue = cl.CommandQueue(opencl_tools.get_a_context(),properties=opencl_tools.profile_properties)
        else:
            self._queue =  queue
            
        self._block_size = 16

        self.shape = a.shape        
        
        if min(a.shape[0],a.shape[1],b_shape[0],b_shape[1]) < self._block_size:
            self._block_size = min(a.shape[0],a.shape[1],b_shape[0],b_shape[1])
            
        self._b_shape = b_shape
        self._kernel_params  = {"block_size": self._block_size,
                                "w_a":a.shape[1], 
                                "h_a":a.shape[0], 
                                "w_b":b_shape[1]}
        
                      
        self._a_order = gpu_algorithms._get_matrix_order(a)
        self._b_order = gpu_algorithms._get_matrix_order(b_order)
        self._c_order = gpu_algorithms._get_matrix_order(c_order)
        
        
        preamble = ""
        if(self._a_order == 'C'):
            preamble+= "#define A_ROW_MAJOR_ORDER\n"
        if(self._b_order == 'C'):
            preamble+= "#define B_ROW_MAJOR_ORDER\n"
        if(self._c_order == 'C'):
            preamble+= "#define C_ROW_MAJOR_ORDER\n"
            
        full_kernal = preamble + (KERNEL_CODE % self._kernel_params)
        prg = cl.Program(self._queue.context, full_kernal ).build()
                         
        self.kernel = prg.matrixMul
        self.kernel.set_scalar_arg_dtypes([ 
                                            None,#__global float* C, 
                                            None,#__global float* A, 
                                            None,#__global float* B,
                                            np.uint32,#uint x_offset,
                                            np.uint32,#uint y_offset
                                          ])
        #Transfer the matrix to both the gpu, if it is not there already
        if isinstance(a,cl_array.Array):
            self.mat = a;
        else:
            self.mat = cl_array.to_device(self._queue,a)
        
        self.max_batch_size = (2048,2048)
コード例 #4
0
 def test():
     import time
     queue = cl.CommandQueue(opencl_tools.get_a_context(),properties=opencl_tools.profile_properties)
     
     #test_sum
     cpu_array1 = np.random.rand(1000,1000).astype(np.float32)
     cpu_array2 = np.random.rand(1000,1000).astype(np.float32)
     
     true_val = []
     
     for axs in xrange(2):
         t0 =time.time()  
         true_val.append(np.sum(cpu_array1*cpu_array2,axis = axs))
         t1 =time.time()  
         print "Their time for axis ",axs,":", t1-t0
         
     
     
     
     for in_order in ['F','C']:
         for axs in xrange(2):
             print "Testing: In order",in_order, "with axis",axs
             
             t1 =time.time()
             
             
             
             gpu_array1 = cl_array.to_device(queue,
                                                np.asarray(cpu_array1,
                                                           order=in_order))
             gpu_array2 = cl_array.to_device(queue,
                                                np.asarray(cpu_array2,
                                                           order=in_order))                                                              
             test_sum=SumProductKernal(gpu_array1,axis = axs,queue=queue)
 
             t2 =time.time()
             gpu_val = test_sum(None,gpu_array2,gpu_array1)
             cl.enqueue_barrier(queue).wait()
             t3 =time.time()  
             sum_val=gpu_val.get()
             t4 = time.time()  
             
             
             print "\tOur time:", t3-t2
             print "\tOur time with transfers:", t4-t1
             err = true_val[axs]-sum_val
             print "\tMax error:",np.max(np.abs(err)/true_val[axs])            
             gpu_array1.data.release(); del gpu_array1
             gpu_array2.data.release(); del gpu_array2
             gpu_val.data.release(); del gpu_val
コード例 #5
0
    def test():
        import time
        queue = cl.CommandQueue(opencl_tools.get_a_context(),properties=opencl_tools.profile_properties)
        
        #test_sum
        cpu_array = np.random.rand(7546*5,6425).astype(np.float32)
        
        true_val = []
        
        for axs in xrange(2):
            t0 =time.time()  
            true_val.append(np.sum(cpu_array,axis = axs))
            t1 =time.time()  
            print "Their time for axis ",axs,":", t1-t0
            
        
        
        
        for in_order in ['F','C']:
            for axs in xrange(2):
                precompute_test_sum = SumKernal(np.float32,axis = axs,queue=queue)
                for size_type in ['size at construction','size at run']:
                    print "Testing: In order",in_order, "with axis",axs, 'and', size_type
                    
                    t1 =time.time()
                    
                    
                    
                    gpu_array = cl_array.to_device(queue,
                                                       np.asarray(cpu_array,
                                                                  order=in_order))

                    if size_type == 'size at construction' :                                     
                        test_sum = SumKernal(gpu_array,axis = axs,queue=queue)
                    else:
                        test_sum = precompute_test_sum
                    t2 =time.time()
                    gpu_val = test_sum(matrix=gpu_array)
                    cl.enqueue_barrier(queue).wait()
                    t3 =time.time()  
                    sum_val=gpu_val.get()
                    t4 = time.time()  
                    
                    
                    print "\tOur time:", t3-t2
                    print "\tOur time with transfers\\construction:", t4-t1
                    err = true_val[axs]-sum_val
                    print "\tMax error:",np.max(np.abs(err)/true_val[axs])            
                    gpu_array.data.release(); del gpu_array
                    gpu_val.data.release(); del gpu_val
コード例 #6
0
    def __init__(self,matrix,axis,queue=None):
        assert axis >= 0 and axis <= 1
        
        if(queue is None):
            self._queue = cl.CommandQueue(opencl_tools.get_a_context(),properties=opencl_tools.profile_properties)
        else:
            self._queue =  queue
            
        self._block_size = 32
        self._matrix = matrix
        self._axis = axis


        self._matrix_order = _get_matrix_order(matrix)
        #if we are using C major order, we see the sum order as oposit, so
        #exchange the order
        if(self._matrix_order == 'C'):
            self._effective_axis = 1 - axis
        else:
            self._effective_axis = axis
        
        preamble="""
            #define sum_size %d
            #define other_size %d
            #define blockSize %d 
            """ % (matrix.shape[axis],
                   matrix.shape[1-axis],
                   self._block_size)   
            
        preamble = opencl_tools.build_preamble_for_context\
                                        (self._queue.context,preamble)
        if(self._effective_axis == 0 ):
            preamble += "#define SUM_OVER_SLOW_CHANGING\n"
            
        prg = cl.Program(self._queue.context,preamble+_sum_product_code).build();
                         
        self.kernel = prg.sum_product_per_axis
コード例 #7
0
        assert mat.size == 1
        assert mat.dtype == self.mat_struct

        mat_view = np.empty((self.n,self.vector_padded),self.element_type)
        mat_view.data[:] = mat.data[:]

        #mat_view = mat.view(self.element_type) 
        #mat_view = mat_view.reshape((self.n,self.vector_padded))
        
        return mat_view[:self.n,:self.n]
   


#        
if __name__ == "__main__":
    queue = cl.CommandQueue(opencl_tools.get_a_context(),
                                properties=opencl_tools.profile_properties)
                                
    test_code = Template("""
    __kernel void test_kernal(${matrix_type} mat_in,
                         ${vector_type} v_in,__global ${vector_type}* v_out) 
    {
        *v_out = mat_dot(mat_in,v_in);
    }
    
    __kernel void test_kernal2(${matrix_type} mat1_in,
                         ${matrix_type} mat2_in,__global ${matrix_type}* m_out) 
    {
        *m_out = mat_mul(mat1_in,mat2_in);
    }   
    
コード例 #8
0
    def __init__(self,matrix,axis,queue=None):
        assert axis >= 0 and axis <= 1
        
        if(queue is None):
            self._queue = cl.CommandQueue(opencl_tools.get_a_context(),properties=opencl_tools.profile_properties)
        else:
            self._queue =  queue
           
        self._block_size = 32
        self._axis = axis
        if np.issubdtype(matrix,type):
            self._sizes_given_as_arguments = True
            
            self._dtype = matrix
            self._ctype = cl.tools.dtype_to_ctype(self._dtype)
            
            preamble="""
                #define blockSize %d 
                #define DTYPE %s
                """ % (self._block_size,
                       self._ctype)   
            preamble = opencl_tools.build_preamble_for_context\
                                            (self._queue.context,preamble)
            preamble += "#define SIZE_AS_ARGUMENT\n"
        else:
            self._sizes_given_as_arguments = False
            self._matrix = matrix
            self._matrix_order = _get_matrix_order(matrix)


            #if we are using C major order, we see the sum order as oposit, so
            #exchange the order
            if(self._matrix_order == 'C'):
                self._effective_axis = 1 - axis
            else:
                self._effective_axis = axis
            
            self._dtype = matrix.dtype
            self._ctype = cl.tools.dtype_to_ctype(self._dtype)
            
            preamble="""
                #define sum_size %d
                #define other_size %d
                #define blockSize %d 
                #define DTYPE %s
                """ % (matrix.shape[axis],
                       matrix.shape[1-axis],
                       self._block_size,
                       self._ctype)   
            preamble = opencl_tools.build_preamble_for_context\
                                            (self._queue.context,preamble)
            if(self._effective_axis == 0 ):
                preamble += "#define SUM_OVER_SLOW_CHANGING\n"
            
        prg = cl.Program(self._queue.context,preamble+_sum_code).build();
                         
        self.kernel = prg.sum_per_axis
        
        if self._sizes_given_as_arguments:
            self.kernel.set_scalar_arg_dtypes([None,None,
                                                np.int32,np.int32,np.int32])
コード例 #9
0
    def matrix_to_np_array(self, mat):
        assert mat.size == 1
        assert mat.dtype == self.mat_struct

        mat_view = np.empty((self.n, self.vector_padded), self.element_type)
        mat_view.data[:] = mat.data[:]

        #mat_view = mat.view(self.element_type)
        #mat_view = mat_view.reshape((self.n,self.vector_padded))

        return mat_view[:self.n, :self.n]


#
if __name__ == "__main__":
    queue = cl.CommandQueue(opencl_tools.get_a_context(),
                            properties=opencl_tools.profile_properties)

    test_code = Template("""
    __kernel void test_kernal(${matrix_type} mat_in,
                         ${vector_type} v_in,__global ${vector_type}* v_out) 
    {
        *v_out = mat_dot(mat_in,v_in);
    }
    
    __kernel void test_kernal2(${matrix_type} mat1_in,
                         ${matrix_type} mat2_in,__global ${matrix_type}* m_out) 
    {
        *m_out = mat_mul(mat1_in,mat2_in);
    }   
    
コード例 #10
0
    def __init__(self,image_shape,row_filter_array,
                     col_filter_array,boundry='clamp',queue=None):
        assert row_filter_array.shape[1] == 1
        assert col_filter_array.shape[0] == 1
        if(queue is None):
            self._queue = cl.CommandQueue(opencl_tools.get_a_context(),
                                   properties=opencl_tools.profile_properties)
        else:
            self._queue =  queue
        
        row_filter_array =row_filter_array.astype(np.float32)
        col_filter_array =col_filter_array.astype(np.float32)
        
        self._image_shape = image_shape
        self._filter_size = (row_filter_array.shape[0],col_filter_array.shape[1]);
        self._kernal_size = (1,256)
        

        half_filter_sizes = (self._filter_size[0]/2,self._filter_size[1]/2)
        
        buffer_width = (self._kernal_size[1]+self._filter_size[1],)
        preamble = """
            #define ROWS %d
            #define COLS %d
            #define NUMBER_COLORS %d
        
            #define FILTER_HEIGHT %d
            #define FILTER_WIDTH %d

            #define KERNAL_HEIGHT %d
            #define KERNAL_WIDTH %d
            
            #define half_fheight %d
            #define half_fwidth %d
            
            #define local_buffer_width %d
        """ % (image_shape + 
                self._filter_size + 
                self._kernal_size + 
                half_filter_sizes +
                buffer_width )

        if(boundry == 'clamp'):
            preamble += "#define BOUNDRY_CLAMP\n"
        elif(boundry == 'zero'):
            preamble += "#define BOUNDRY_CLAMP\n"
        else:
            raise ValueError('Unknown boundry value: %s' % boundry)
            
        if not isinstance(row_filter_array,cl_array.Array): 
            self._row_filter_array = cl_array.to_device(self._queue,row_filter_array)
        else:
            self._row_filter_array = row_filter_array

        if not isinstance(col_filter_array,cl_array.Array): 
            self._col_filter_array = cl_array.to_device(self._queue,col_filter_array)
        else:
            self._col_filter_array = col_filter_array            
            
        preamble = opencl_tools.build_preamble_for_context\
                                        (self._queue.context,preamble)

        prg = cl.Program(self._queue.context,preamble + _separable_filter_kernal_float4).build(options=[]);
        
        self._separable_filter_kernal = prg.separable_filter_kernal
コード例 #11
0
    def __init__(self, arg1, shape=None, dtype=None, copy=False, queue=None):
        """Orginaly taken from scipi's implementation"""
        if queue is None:
            self._queue = cl.CommandQueue(opencl_tools.get_a_context(), properties=opencl_tools.profile_properties)
        else:
            self._queue = queue

        self.size = -1
        # Default running sizes
        self.rows_per_run = 1000
        self.cols_per_run = 100
        self.local_size = (16, 16, 1)

        if isinstance(arg1, self.__class__) or sp.isspmatrix(arg1):
            self._set_self(arg1, copy)

        elif isinstance(arg1, tuple):
            if sp.sputils.isshape(arg1):
                # It's a tuple of matrix dimensions (M, N)
                # create empty matrix
                self.shape = arg1  # spmatrix checks for errors here
                M, N = self.shape
                self.data = cl_array.zeros(self._queue, 0, sp.sputils.getdtype(dtype, default=np.float32))
                self.indices = cl_array.zeros(self._queue, 0, np.intc)
                self.indptr = cl_array.zeros(self._queue, self._swap((M, N))[0] + 1, dtype=np.intc)
            else:
                if len(arg1) == 2:
                    # (data, ij) format
                    other = sp.coo_matrix(arg1, shape=shape)
                    self._set_self(other)
                elif len(arg1) == 3:
                    # (data, indices, indptr) format
                    (data, indices, indptr) = arg1
                    self.indices = self._make_gpu_array(indices, copy=copy)
                    self.indptr = self._make_gpu_array(indptr, copy=copy)
                    self.data = self._make_gpu_array(data, copy=copy, dtype=sp.sputils.getdtype(dtype, data))
                else:
                    raise ValueError("unrecognized %s_matrix constructor usage" % self.format)

        else:
            # must be dense
            try:
                arg1 = np.asarray(arg1)
            except:
                raise ValueError("unrecognized %s_matrix constructor usage" % self.format)

            if dtype == None:
                dtype = np.float32

            self._set_self(sp.coo_matrix(arg1, dtype=dtype))

        # Read matrix dimensions given, if any
        if shape is not None:
            self.shape = shape  # spmatrix will check for errors
        else:
            if self.shape is None:
                # shape not already set, try to infer dimensions
                try:
                    major_dim = len(self.indptr) - 1
                    minor_dim = self.indices.max() + 1
                except:
                    raise ValueError("unable to infer matrix dimensions")
                else:
                    self.shape = self._swap((major_dim, minor_dim))

        if dtype is not None:
            if self.data.dtype != dtype:
                old_data = self.data
                self.data = self.data.astype(dtype)
                old_data.data.release()

        # self.check_format(full_check=False)
        self._old_shape = (-1, -1)
        self._expected_order = ("", "")

        self.dtype = self.data.dtype

        self._kernal_table = dict()
コード例 #12
0
    def __init__(self, arg1, shape=None, dtype=None, copy=False,queue=None):
        """Orginaly taken from scipi's implementation"""
        if(queue is None):
            self._queue = cl.CommandQueue(opencl_tools.get_a_context(),properties=opencl_tools.profile_properties)
        else:
            self._queue =  queue
            
        self.size =-1
        #Default running sizes
        self.rows_per_run = 1000
        self.cols_per_run = 100
        self.local_size =  (16,16,1);
        

        if isinstance(arg1,self.__class__) or sp.isspmatrix(arg1):
            self._set_self( arg1,copy )

        elif isinstance(arg1, tuple):
            if sp.sputils.isshape(arg1):
                # It's a tuple of matrix dimensions (M, N)
                # create empty matrix
                self.shape = arg1   #spmatrix checks for errors here
                M, N = self.shape
                self.data    = cl_array.zeros(self._queue,0, sp.sputils.getdtype(dtype, default=np.float32))
                self.indices = cl_array.zeros(self._queue,0, np.intc)
                self.indptr  = cl_array.zeros(self._queue,self._swap((M,N))[0] + 1, dtype=np.intc)
            else:
                if len(arg1) == 2:
                    # (data, ij) format
                    other = sp.coo_matrix(arg1, shape=shape)
                    self._set_self( other )
                elif len(arg1) == 3:
                    # (data, indices, indptr) format
                    (data, indices, indptr) = arg1
                    self.indices = self._make_gpu_array(indices, copy=copy)
                    self.indptr  = self._make_gpu_array(indptr, copy=copy)
                    self.data    = self._make_gpu_array(data, copy=copy, dtype=sp.sputils.getdtype(dtype, data))
                else:
                    raise ValueError("unrecognized %s_matrix constructor usage" %
                            self.format)

        else:
            #must be dense
            try:
                arg1 = np.asarray(arg1)
            except:
                raise ValueError("unrecognized %s_matrix constructor usage" %
                        self.format)
                        
            if dtype == None:
                dtype = np.float32
                
            self._set_self(sp.coo_matrix(arg1, dtype=dtype) )

        # Read matrix dimensions given, if any
        if shape is not None:
            self.shape = shape   # spmatrix will check for errors
        else:
            if self.shape is None:
                # shape not already set, try to infer dimensions
                try:
                    major_dim = len(self.indptr) - 1
                    minor_dim = self.indices.max() + 1
                except:
                    raise ValueError('unable to infer matrix dimensions')
                else:
                    self.shape = self._swap((major_dim,minor_dim))

        if dtype is not None:
            if(self.data.dtype != dtype):
                old_data = self.data
                self.data = self.data.astype(dtype)
                old_data.data.release()

        #self.check_format(full_check=False)
        self._old_shape = (-1,-1)
        self._expected_order = ("","")   
        
        self.dtype = self.data.dtype
        
        self._kernal_table = dict()