예제 #1
0
    def test_csr2sertilp(self):

        mat = np.array([ [1,0,2,0,3,0], 
                         [4,0,5,0,0,0],
                         [0,0,0,6,7,0],
                         [0,0,0,0,0,8],
                         [21,0,22,0,23,0], 
                         [24,0,25,0,0,0],
                         [0,0,0,26,27,0],
                         [0,0,0,0,0,28]
                       ])
        
        sp_mat = sp.csr_matrix(mat)
        
        row_len_right = np.array([1,1,1,1,1,1,1,1])
        sl_start_right = np.array([0,16,32])
        val_right = np.array([1.0,2.0,4.0,5.0,6.0,7.0,8.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.0,22.0,24.0,25.0,26.0,27.0,28.0,0.0,23.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
        #collumns taken directly from dataset, 
        col_vs_right = np.array([1,3,1,3,4,5,6,0,5,0,0,0,0,0,0,0,1,3,1,3,4,5,6,0,5,0,0,0,0,0,0,0])
        #but in sparse format collumns start from 0  so we have to substract 1      
        col_right = col_vs_right-1
        col_right[col_right==-1]=0
                
        val,col,row_len,sl_start=spf.csr2sertilp(sp_mat,
                                            threadsPerRow=2, 
                                            prefetch=2,
                                            sliceSize=4,
                                            minAlign=2*4)
                                                    
        self.assertTrue(np.allclose(row_len,row_len_right), 'sliced ellpack row length arrays are not equal')
        self.assertTrue(np.allclose(sl_start,sl_start_right), 'sliced ellpack slice start arrays are not equal')       
        self.assertTrue(np.allclose(val,val_right), 'sliced ellpack values arrays are not equal')
        self.assertTrue(np.allclose(col,col_right), 'sliced ellpack collumns arrays are not equal')
예제 #2
0
 print 'Total sum:',resultsEll.sum()
 print "Error to CPU:",np.square(resultsEll-kij).sum()
 print resultsEll[0:1000:skip]
 #print results
 
 ##------------------------------------------
 # SERTILP gpu kernel
 
 
 sliceSize=64
 threadsPerRow=2
 prefetch=2
 minAlign=64 #8
 v,c,r,ss=spf.csr2sertilp(X,
                          threadsPerRow=threadsPerRow, 
                          prefetch=prefetch, 
                          sliceSize=sliceSize,
                          minAlign=minAlign)
 
 sd=rbf.Diag
 self_dot = rbf.Xsquare
 results = np.zeros(2*num_el,dtype=np.float32)
 
 kernel_file = "sertilpMulti2Col.cu"
 
 with open (kernel_file,"r") as CudaFile:
     data = CudaFile.read();
        
 #compile module
 #module = SourceModule(data,cache_dir='./nvcc_cache',keep=True,no_extern_c=True)
 module = SourceModule(data,keep=True,no_extern_c=True,options=["--ptxas-options=-v"])