Ejemplo n.º 1
0
 def __init__(self, source, b, a):
     # Automatically duplicate mono input to fit the desired output shape
     if b.shape[0]!=source.nchannels:
         if source.nchannels!=1:
             raise ValueError('Can only automatically duplicate source channels for mono sources, use RestructureFilterbank.')
         source = RestructureFilterbank(source, b.shape[0])
     Filterbank.__init__(self, source)
     # Weave version of filtering requires Fortran ordering of filter params
     if len(b.shape)==2 and len(a.shape)==2:
         b = reshape(b, b.shape+(1,))
         a = reshape(a, a.shape+(1,))
     self.filt_b = array(b, order='F')
     self.filt_a = array(a, order='F')
     self.filt_state = zeros((b.shape[0], b.shape[1]-1, b.shape[2]), order='F')
Ejemplo n.º 2
0
 def __init__(self, source, b, a):
     # Automatically duplicate mono input to fit the desired output shape
     if b.shape[0]!=source.nchannels:
         if source.nchannels!=1:
             raise ValueError('Can only automatically duplicate source channels for mono sources, use RestructureFilterbank.')
         source = RestructureFilterbank(source, b.shape[0])
     Filterbank.__init__(self, source)
     # Weave version of filtering requires Fortran ordering of filter params
     if len(b.shape)==2 and len(a.shape)==2:
         b = reshape(b, b.shape+(1,))
         a = reshape(a, a.shape+(1,))
     self.filt_b = array(b, order='F')
     self.filt_a = array(a, order='F')
     self.filt_state = zeros((b.shape[0], b.shape[1], b.shape[2]), order='F')
     self.use_weave = get_global_preference('useweave')
     if self.use_weave:
         log_info('brian.hears.filtering.linearfilterbank', 'Using weave')
         self.cpp_compiler = get_global_preference('weavecompiler')
         self.extra_compile_args = ['-O3']
         if self.cpp_compiler=='gcc':
             self.extra_compile_args += get_global_preference('gcc_options')
Ejemplo n.º 3
0
 def __init__(self, source, b, a):
     # Automatically duplicate mono input to fit the desired output shape
     if b.shape[0] != source.nchannels:
         if source.nchannels != 1:
             raise ValueError(
                 'Can only automatically duplicate source channels for mono sources, use RestructureFilterbank.'
             )
         source = RestructureFilterbank(source, b.shape[0])
     Filterbank.__init__(self, source)
     # Weave version of filtering requires Fortran ordering of filter params
     if len(b.shape) == 2 and len(a.shape) == 2:
         b = reshape(b, b.shape + (1, ))
         a = reshape(a, a.shape + (1, ))
     self.filt_b = array(b, order='F')
     self.filt_a = array(a, order='F')
     self.filt_state = zeros((b.shape[0], b.shape[1], b.shape[2]),
                             order='F')
     self.use_weave = get_global_preference('useweave')
     if self.use_weave:
         log_info('brian.hears.filtering.linearfilterbank', 'Using weave')
         self.cpp_compiler = get_global_preference('weavecompiler')
         self.extra_compile_args = ['-O3']
         if self.cpp_compiler == 'gcc':
             self.extra_compile_args += get_global_preference('gcc_options')
Ejemplo n.º 4
0
 def __init__(self, source, b, a, samplerate=None,
              precision='double', forcesync=True, pagelocked_mem=True, unroll_filterorder=None):
     # Automatically duplicate mono input to fit the desired output shape
     if b.shape[0]!=source.nchannels:
         if source.nchannels!=1:
             raise ValueError('Can only automatically duplicate source channels for mono sources, use RestructureFilterbank.')
         source = RestructureFilterbank(source, b.shape[0])
     Filterbank.__init__(self, source)
     if pycuda.context is None:
         set_gpu_device(0)
     self.precision=precision
     if self.precision=='double':
         self.precision_dtype=float64
     else:
         self.precision_dtype=float32
     self.forcesync=forcesync
     self.pagelocked_mem=pagelocked_mem
     n, m, p=b.shape
     self.filt_b=b
     self.filt_a=a
     filt_b_gpu=array(b, dtype=self.precision_dtype)
     filt_a_gpu=array(a, dtype=self.precision_dtype)
     filt_state=zeros((n, m-1, p), dtype=self.precision_dtype)
     if pagelocked_mem:
         filt_y=drv.pagelocked_zeros((n,), dtype=self.precision_dtype)
         self.pre_x=drv.pagelocked_zeros((n,), dtype=self.precision_dtype)
     else:
         filt_y=zeros(n, dtype=self.precision_dtype)
         self.pre_x=zeros(n, dtype=self.precision_dtype)
     self.filt_b_gpu=gpuarray.to_gpu(filt_b_gpu.T.flatten()) # transform to Fortran order for better GPU mem
     self.filt_a_gpu=gpuarray.to_gpu(filt_a_gpu.T.flatten()) # access speeds
     self.filt_state=gpuarray.to_gpu(filt_state.T.flatten())
     self.unroll_filterorder = unroll_filterorder
     if unroll_filterorder is None:
         if m<=32:
             unroll_filterorder = True
         else:
             unroll_filterorder = False
     # TODO: improve code, check memory access patterns, maybe use local memory
     code='''
     #define x(s,i) _x[(s)*n+(i)]
     #define y(s,i) _y[(s)*n+(i)]
     #define a(i,j,k) _a[(i)+(j)*n+(k)*n*m]
     #define b(i,j,k) _b[(i)+(j)*n+(k)*n*m]
     #define zi(i,j,k) _zi[(i)+(j)*n+(k)*n*(m-1)]
     __global__ void filt(SCALAR *_b, SCALAR *_a, SCALAR *_x, SCALAR *_zi, SCALAR *_y, int numsamples)
     {
         int j = blockIdx.x * blockDim.x + threadIdx.x;
         if(j>=n) return;
         for(int s=0; s<numsamples; s++)
         {
     '''
     for k in range(p):
         loopcode='''
         y(s,j) = b(j,0,k)*x(s,j) + zi(j,0,k);
         '''
         if unroll_filterorder:
             for i in range(m-2):
                 loopcode+=re.sub('\\bi\\b', str(i), '''
                 zi(j,i,k) = b(j,i+1,k)*x(s,j) + zi(j,i+1,k) - a(j,i+1,k)*y(s,j);
                 ''')
         else:
             loopcode+='''
             for(int i=0;i<m-2;i++)
                 zi(j,i,k) = b(j,i+1,k)*x(s,j) + zi(j,i+1,k) - a(j,i+1,k)*y(s,j);
             '''
         loopcode+='''
         zi(j,m-2,k) = b(j,m-1,k)*x(s,j) - a(j,m-1,k)*y(s,j);
         '''
         if k<p-1:
             loopcode+='''
             x(s,j) = y(s,j);
             '''
         loopcode=re.sub('\\bk\\b', str(k), loopcode)
         code+=loopcode
     code+='''
         }
     }
     '''
     code=code.replace('SCALAR', self.precision)
     code=re.sub("\\bp\\b", str(p), code) #replace the variable by their values
     code=re.sub("\\bm\\b", str(m), code)
     code=re.sub("\\bn\\b", str(n), code)
     #print code
     self.gpu_mod=pycuda.compiler.SourceModule(code)
     self.gpu_filt_func=self.gpu_mod.get_function("filt")
     blocksize=256
     if n<blocksize:
         blocksize=n
     if n%blocksize==0:
         gridsize=n/blocksize
     else:
         gridsize=n/blocksize+1
     self.block=(blocksize, 1, 1)
     self.grid=(gridsize, 1)
     self.gpu_filt_func.prepare((intp, intp, intp, intp, intp, int32), self.block)
     self._has_run_once=False
Ejemplo n.º 5
0
 def __init__(self,
              source,
              b,
              a,
              samplerate=None,
              precision='double',
              forcesync=True,
              pagelocked_mem=True,
              unroll_filterorder=None):
     # Automatically duplicate mono input to fit the desired output shape
     if b.shape[0] != source.nchannels:
         if source.nchannels != 1:
             raise ValueError(
                 'Can only automatically duplicate source channels for mono sources, use RestructureFilterbank.'
             )
         source = RestructureFilterbank(source, b.shape[0])
     Filterbank.__init__(self, source)
     if pycuda.context is None:
         set_gpu_device(0)
     self.precision = precision
     if self.precision == 'double':
         self.precision_dtype = float64
     else:
         self.precision_dtype = float32
     self.forcesync = forcesync
     self.pagelocked_mem = pagelocked_mem
     n, m, p = b.shape
     self.filt_b = b
     self.filt_a = a
     filt_b_gpu = array(b, dtype=self.precision_dtype)
     filt_a_gpu = array(a, dtype=self.precision_dtype)
     filt_state = zeros((n, m - 1, p), dtype=self.precision_dtype)
     if pagelocked_mem:
         filt_y = drv.pagelocked_zeros((n, ), dtype=self.precision_dtype)
         self.pre_x = drv.pagelocked_zeros((n, ),
                                           dtype=self.precision_dtype)
     else:
         filt_y = zeros(n, dtype=self.precision_dtype)
         self.pre_x = zeros(n, dtype=self.precision_dtype)
     self.filt_b_gpu = gpuarray.to_gpu(filt_b_gpu.T.flatten(
     ))  # transform to Fortran order for better GPU mem
     self.filt_a_gpu = gpuarray.to_gpu(
         filt_a_gpu.T.flatten())  # access speeds
     self.filt_state = gpuarray.to_gpu(filt_state.T.flatten())
     self.unroll_filterorder = unroll_filterorder
     if unroll_filterorder is None:
         if m <= 32:
             unroll_filterorder = True
         else:
             unroll_filterorder = False
     # TODO: improve code, check memory access patterns, maybe use local memory
     code = '''
     #define x(s,i) _x[(s)*n+(i)]
     #define y(s,i) _y[(s)*n+(i)]
     #define a(i,j,k) _a[(i)+(j)*n+(k)*n*m]
     #define b(i,j,k) _b[(i)+(j)*n+(k)*n*m]
     #define zi(i,j,k) _zi[(i)+(j)*n+(k)*n*(m-1)]
     __global__ void filt(SCALAR *_b, SCALAR *_a, SCALAR *_x, SCALAR *_zi, SCALAR *_y, int numsamples)
     {
         int j = blockIdx.x * blockDim.x + threadIdx.x;
         if(j>=n) return;
         for(int s=0; s<numsamples; s++)
         {
     '''
     for k in range(p):
         loopcode = '''
         y(s,j) = b(j,0,k)*x(s,j) + zi(j,0,k);
         '''
         if unroll_filterorder:
             for i in range(m - 2):
                 loopcode += re.sub(
                     '\\bi\\b', str(i), '''
                 zi(j,i,k) = b(j,i+1,k)*x(s,j) + zi(j,i+1,k) - a(j,i+1,k)*y(s,j);
                 ''')
         else:
             loopcode += '''
             for(int i=0;i<m-2;i++)
                 zi(j,i,k) = b(j,i+1,k)*x(s,j) + zi(j,i+1,k) - a(j,i+1,k)*y(s,j);
             '''
         loopcode += '''
         zi(j,m-2,k) = b(j,m-1,k)*x(s,j) - a(j,m-1,k)*y(s,j);
         '''
         if k < p - 1:
             loopcode += '''
             x(s,j) = y(s,j);
             '''
         loopcode = re.sub('\\bk\\b', str(k), loopcode)
         code += loopcode
     code += '''
         }
     }
     '''
     code = code.replace('SCALAR', self.precision)
     code = re.sub("\\bp\\b", str(p),
                   code)  #replace the variable by their values
     code = re.sub("\\bm\\b", str(m), code)
     code = re.sub("\\bn\\b", str(n), code)
     #print code
     self.gpu_mod = pycuda.compiler.SourceModule(code)
     self.gpu_filt_func = self.gpu_mod.get_function("filt")
     blocksize = 256
     if n < blocksize:
         blocksize = n
     if n % blocksize == 0:
         gridsize = n / blocksize
     else:
         gridsize = n / blocksize + 1
     self.block = (blocksize, 1, 1)
     self.grid = (gridsize, 1)
     self.gpu_filt_func.prepare((intp, intp, intp, intp, intp, int32),
                                self.block)
     self._has_run_once = False