Esempio n. 1
0
    def compute_Gb(self, Dsfilename, lamb=0.0):
        """
        compute G matrix using dirichlet coefficients
        Dsfilename: generated by VTDM_prepb
        lamb: smoothing parameter \lambda
        
        """
        handle = la.cublashandle()
        import tables
        h5file = tables.openFile(Dsfilename)
        Ds = h5file.root.real.read()

        d_Ds = parray.to_gpu(Ds.reshape((Ds.shape[0], -1)))
        del Ds

        d_Dsw = parray.empty((d_Ds.shape[0], d_Ds.shape[0]), d_Ds.dtype)
        if d_Ds.dtype == np.float64:
            from scikits.cuda.cublas import cublasDgemm
            gemm = cublasDgemm
        else:
            from scikits.cuda.cublas import cublasSgemm
            gemm = cublasSgemm
        gemm(handle.handle, 't', 'n', d_Dsw.shape[0], d_Dsw.shape[0],
             d_Ds.shape[1], 1.0, d_Ds.gpudata, d_Ds.ld, d_Ds.gpudata, d_Ds.ld,
             0.0, d_Dsw.gpudata, d_Dsw.ld)
        Ds = h5file.root.imag.read()
        d_Ds.set(Ds)
        gemm(handle.handle, 't', 'n', d_Dsw.shape[0], d_Dsw.shape[0],
             d_Ds.shape[1], 1.0, d_Ds.gpudata, d_Ds.ld, d_Ds.gpudata, d_Ds.ld,
             1.0, d_Dsw.gpudata, d_Dsw.ld)
        del Ds
        h5file.close()

        norm_func = get_put_norm_kernel(d_Dsw.dtype)
        launch_kernel(norm_func, (256, 1, 1), (d_Dsw.shape[0], 1),
                      [d_Dsw, self.d_norm, d_Dsw.ld])

        self.d_G = parray.empty((self.size, self.size), self.dtype)

        G_func = get_G_kernel(self.dtype, d_Dsw.dtype)
        launch_kernel(G_func, (256, 1, 1), (self.d_G.shape[0], 1), [
            self.d_G, self.d_G.ld, self.d_tk1, self.d_tk2, self.Wt, self.Mt,
            d_Dsw, d_Dsw.ld, self.d_neuron_ind
        ],
                      timed="G matrix")

        if lamb != 0:
            lamb_func = get_diag_add_kernel(self.dtype)
            launch_kernel(
                lamb_func, (256, 1, 1),
                (6 * cuda.Context.get_device().MULTIPROCESSOR_COUNT, 1), [
                    self.d_G, self.d_G.ld, self.d_G.shape[0],
                    self.dtype.type(lamb)
                ])
Esempio n. 2
0
    def compute_G(self, Dswfilename, lamb=0.0):
        """
        compute G matrix using weighting between RFs
        Dswfilename: generated by VTDM_prep
        lamb: smoothing parameter \lambda
        
        """

        Dsw = read_file(Dswfilename)
        d_Dsw = parray.to_gpu(Dsw)
        del Dsw

        #norm_func = get_put_norm_kernel(d_Dsw.dtype)
        #launch_kernel(norm_func, (256, 1, 1), (d_Dsw.shape[0],1), [d_Dsw, self.d_norm, d_Dsw.ld])

        self.d_G = parray.empty((self.size, self.size), self.dtype)

        G_func = get_G_kernel(self.dtype, d_Dsw.dtype)
        launch_kernel(G_func, (256, 1, 1), (self.d_G.shape[0], 1), [
            self.d_G, self.d_G.ld, self.d_tk1, self.d_tk2, self.Wt, self.Mt,
            d_Dsw, d_Dsw.ld, self.d_neuron_ind
        ],
                      timed="G matrix")

        if lamb != 0:
            lamb_func = get_diag_add_kernel(self.dtype)
            launch_kernel(
                lamb_func, (256, 1, 1),
                (6 * cuda.Context.get_device().MULTIPROCESSOR_COUNT, 1), [
                    self.d_G, self.d_G.ld, self.d_G.shape[0],
                    self.dtype.type(lamb)
                ])
Esempio n. 3
0
    def filter(self, video_input):
        """
        Performs RF filtering on input video
        for all the rfs
        """
        if len(video_input.shape) == 2:
            # if input has 2 dimensions
            assert video_input.shape[1] == self.size
        else:
            # if input has 3 dimensions
            assert (video_input.shape[1]*video_input.shape[2] ==
                    self.size)
        # rasterizing inputs
        video_input.resize((video_input.shape[0], self.size))

        d_video = parray.to_gpu(video_input)
        d_output = parray.empty((self.num_neurons, video_input.shape[0]),
                                self.dtype)
        free, total = cuda.mem_get_info()
        self.ONE_TIME_FILTERS = ((free // self.dtype.itemsize)
                                 * 3 // 4 // self.size)
        self.ONE_TIME_FILTERS -= self.ONE_TIME_FILTERS % 2
        self.ONE_TIME_FILTERS = min(self.ONE_TIME_FILTERS, self.num_neurons)
        handle = la.cublashandle()

        for i in np.arange(0, self.num_neurons, self.ONE_TIME_FILTERS):
            Nfilters = min(self.ONE_TIME_FILTERS, self.num_neurons - i)
            self.generate_filters(startbias=i, N_filters=Nfilters)
            la.dot(self.filters, d_video, opb='t',
                   C=d_output[i: i+Nfilters],
                   handle=handle)
        del self.filters
        return d_output.T()
Esempio n. 4
0
 def reconstruct(self, dirichfilename, time_frame, dt):
     """
     reconstruct video from c = (G)^{+}q
     
     dirichfilename: generated by either VTDM_prep or VTDM_prepb
     time_frame: a tuple or list of 2, in format [start_time, end_time]
     dt: interval between two consecutive frames in reconstruction
     
     Important Note:
     assumes the solution c is store in self.q
     
     """
 
     t = np.arange(time_frame[0], time_frame[1], dt)
     d_t = parray.to_gpu(t)
     
     dirich = read_file(dirichfilename)
     d_dirich = parray.to_gpu(dirich)
     del dirich
     
     rec_fun = get_reconstruct_kernel(d_dirich.dtype, self.d_q.dtype)
     u_rec = parray.empty((d_t.size, d_dirich.shape[1], d_dirich.shape[2]), np.float64)
     launch_kernel(rec_fun, (128,1,1), ((d_dirich.shape[1]*d_dirich.shape[2]-1) / 128+1, d_t.size), [u_rec, u_rec.ld, d_dirich, d_dirich.ld, self.d_tk1, self.d_tk2, self.d_q, d_t, self.d_neuron_ind, self.d_norm, self.Mt, self.Wt/self.Mt, self.size])
     
     return u_rec
Esempio n. 5
0
File: vrf.py Progetto: bionet/vtem
    def filter(self, V):
        """
        Filter a video V
        Must set up parameters of CS RF first
        
        Parameters
        ----------
        V : 3D ndarray, with shape (num_frames, Px, Py)
           
        Returns
        -------
        the filtered output by the gabor filters specified in self
        output is a PitchArray with shape (num_neurons, num_frames),
        jth row of which is the output of jth gabor filter

        """
        d_output = parray.empty((self.num_neurons, V.shape[0]), self.dtype)
        d_video = parray.to_gpu(V.reshape(V.shape[0], V.shape[1]*V.shape[2]))
    
        free,total = cuda.mem_get_info()
        self.ONE_TIME_FILTERS = (free / self.dtype.itemsize) * 3/4 / self.Pxall / self.Pyall
        
        handle = la.cublashandle()
        for i in np.arange(0,self.num_neurons,self.ONE_TIME_FILTERS):
            Nfilters =  min(self.ONE_TIME_FILTERS, self.num_neurons - i)
            self.generate_visual_receptive_fields(startbias = i, N_filters = Nfilters)
            cublasDgemm(handle.handle, 't','n', V.shape[0], int(Nfilters), self.Pxall*self.Pyall, self.dx*self.dy, d_video.gpudata, d_video.ld, self.filters.gpudata, self.filters.ld, 0, int(int(d_output.gpudata)+int(d_output.ld*i*d_output.dtype.itemsize)) , d_output.ld)
        return d_output.T()
Esempio n. 6
0
File: vrf.py Progetto: bionet/vtem
 def compute_Dsw(self, d_Ds, Mx, My, h_norm):
     """
     Compute the weighting matrix of the "correlation" between each two RFs
     
     Parameters
     ----------
     d_Ds : PitchArray 
         containing dirichlet coefficient most possibly created by compute_Ds
     Mx : integer
         order in the x dimension
     My : integer
         order in the y dimension
     
     Returns
     -------
     PitchArray with shape (num_neurons, num_neurons)
     """
     
     if self.dtype == np.complex128:
         gemm = cublasZgemm
     else:
         gemm = cublasCgemm
     
     d_weight = parray.empty((self.num_neurons, self.num_neurons), self.dtype)
     
     handle = la.cublashandle()
     
     gemm(handle.handle, 'c', 'n', self.num_neurons, self.num_neurons, (2*Mx+1)*(2*My+1), 1.0, d_Ds.gpudata, d_Ds.ld, d_Ds.gpudata, d_Ds.ld, 0, d_weight.gpudata, d_weight.ld);
     d_Dsw = d_weight.real()
     
     norm_func = get_put_norm_kernel(d_Dsw.dtype)
     launch_kernel(norm_func, (256, 1, 1), (d_Dsw.shape[0],1), [d_Dsw, parray.to_gpu(h_norm.astype(np.float64)), d_Dsw.ld])
     
     
     return d_Dsw
Esempio n. 7
0
    def reconstruct(self, dirichfilename, time_frame, dt):
        """
        reconstruct video from c = (G)^{+}q
        
        dirichfilename: generated by either VTDM_prep or VTDM_prepb
        time_frame: a tuple or list of 2, in format [start_time, end_time]
        dt: interval between two consecutive frames in reconstruction
        
        Important Note:
        assumes the solution c is store in self.q
        
        """

        t = np.arange(time_frame[0], time_frame[1], dt)
        d_t = parray.to_gpu(t)

        dirich = read_file(dirichfilename)
        d_dirich = parray.to_gpu(dirich)
        del dirich

        rec_fun = get_reconstruct_kernel(d_dirich.dtype, self.d_q.dtype)
        u_rec = parray.empty((d_t.size, d_dirich.shape[1], d_dirich.shape[2]),
                             np.float64)
        launch_kernel(
            rec_fun, (128, 1, 1),
            ((d_dirich.shape[1] * d_dirich.shape[2] - 1) / 128 + 1, d_t.size),
            [
                u_rec, u_rec.ld, d_dirich, d_dirich.ld, self.d_tk1, self.d_tk2,
                self.d_q, d_t, self.d_neuron_ind, self.d_norm, self.Mt,
                self.Wt / self.Mt, self.size
            ])

        return u_rec
Esempio n. 8
0
File: vrf.py Progetto: bionet/vtem
 def compute_Ds(self, Mx, My):
     """
     Parameters
     ----------
     Mx : integer
         Order in the x dimension
     My : integer
         Order in the y dimension
     
     Returns
     -------
     The dirichlet coefficients of all gabor filters with order Mx, My
     in the format of PitchArray with shape (num_neurons, 2*Mx+1, 2*My+1)
     """
     
     import scikits.cuda.cufft as cufft
     d_Ds = parray.empty((self.num_neurons, 2*My+1, 2*Mx+1), self.dtype)
     ONE_TIME_FILTER = min(1024**3 / (self.Px * self.Py * d_Ds.dtype.itemsize) / 2, self.num_neurons)
     
     n = np.asarray((self.Py, self.Px) ,np.int32)
     
     if self.dtype == np.complex128:
         plan = cufft.cufftPlanMany(2, n.ctypes.data, None, 1, 0, None, 1, 0, cufft.CUFFT_Z2Z, ONE_TIME_FILTER)
         fftfunc = cufft.cufftExecZ2Z
     else:
         plan = cufft.cufftPlanMany(2, n.ctypes.data, None, 1, 0, None, 1, 0, cufft.CUFFT_C2C, ONE_TIME_FILTER)
         fftfunc = cufft.cufftExecC2C
     
     fft2Dsfun = get_fft2Ds_kernel(dtype = self.dtype)
     
     
     for i in range(0, self.num_neurons, ONE_TIME_FILTER):
         N_filters = min(ONE_TIME_FILTER, self.num_neurons - i)
         self.generate_visual_receptive_fields(startbias = i, N_filters = N_filters)
         
         
         if N_filters < ONE_TIME_FILTER:
             cufft.cufftDestroy(plan)
             if self.dtype == np.complex128:
                 plan = cufft.cufftPlanMany(2, n.ctypes.data, None, 1, 0, None, 1, 0, cufft.CUFFT_Z2Z, N_filters)
                 
             else:
                 plan = cufft.cufftPlanMany(2, n.ctypes.data, None, 1, 0, None, 1, 0, cufft.CUFFT_C2C, N_filters)
             
         
         
         #be careful with the side-by-side constraint
         fftfunc(plan, int(self.filters.gpudata), int(self.filters.gpudata), cufft.CUFFT_FORWARD)
         
         launch_kernel(fft2Dsfun, (256, 1, 1), (Mx*2+1, My * 2+1), [[d_Ds, i * d_Ds.ld], self.filters, Mx, My, self.Px, self.Py, N_filters, d_Ds.ld, self.dx*self.dy]);
     
     cufft.cufftDestroy(plan)
     
     return d_Ds
Esempio n. 9
0
File: vrf.py Progetto: bionet/vtem
    def compute_dirich_space(self, d_Ds, Mx, My, Px, Py, Sx, Sy, Wx, Wy, x_start = None, y_start = None):
        """
        Compute the spatial reconstruction functions
        
        Parameters
        ----------

        d_Ds : PitchArray 
             containing dirichlet coefficient most possibly created by compute_Ds
        Mx : integer
             Order in the x dimension
        My : integer
             Order in the y dimension
        Px : integer
             number of pixels in reconstruction functions in the x dimension 
        Py : integer
             number of pixels in reconstruction functions in the y dimension
        Sx : float
             spatial domain in degree of reconstruction functions
             in x direction
        Sy : float 
             spatial domain in degree of reconstruction functions
             in y direction
        Wx : float
             spatial bandwidth in x direction 
        Wy : float
             spatial bandwidth in y direction
        x_start : float 
             indicating the starting degree in x direction
        y_start : float
             indicating the starting degree in y direction

        output: PitchArray with shape (num_neurons, Px, Py)
        """
        
        if self.dtype == np.complex128:
            typef = np.dtype(np.float64)
        else:
            typef = np.dtype(np.float32)
                
        dirich_fun = get_dirich_space_kernel(self.dtype, typef)
                
        d_dirich = parray.empty((self.num_neurons, Py, Px),typef)
            
        if x_start is None:
            x_start = - Sx/ 2

        if y_start is None:
            y_start = - Sy/2
		
        BLOCKSIZE = 16
        launch_kernel(dirich_fun,(BLOCKSIZE, BLOCKSIZE, 1), (((Px-1) / BLOCKSIZE+1) * ((Py-1) / BLOCKSIZE+1), self.num_neurons), [d_dirich, d_dirich.ld, d_Ds, d_Ds.ld, Px, Py, Mx, My, Sx, Sy, x_start, y_start, Wx / Mx, Wy / My], shared = d_Ds.dtype.itemsize * (2*Mx+1), timed = "dirich")
		
        return d_dirich
Esempio n. 10
0
    def compute_q(self):
        """ compute q """
        self.d_q = parray.empty((self.size, 1), self.dtype)

        q_func = get_compute_q_kernel(self.dtype)
        launch_kernel(
            q_func, (256, 1, 1),
            (6 * cuda.Context.get_device().MULTIPROCESSOR_COUNT, 1), [
                self.d_q, self.d_tk1, self.d_tk2, self.d_neuron_ind,
                self.d_kappa, self.d_delta, self.d_bias, self.d_norm, self.size
            ])
Esempio n. 11
0
    def generate_filters(self, N_filters=None, startbias=0):
        """
        Generate a batch of filters from parameters set in self

        start_bias: start from the (start_bias)th filter
        N_filters: generate N_filters filters
        """
        assert self.gpu_loaded
        if N_filters is None:
            N_filters = self.num_neurons - startbias

        if hasattr(self, 'filters'):
            if N_filters != self.filters.shape[0]:
                delattr(self, 'filters')
                self.filters = parray.empty(
                    (N_filters, self.size), self.dtype)
        else:
            self.filters = parray.empty(
                (N_filters, self.size), self.dtype)

        self._call_filter_func(N_filters, startbias)
Esempio n. 12
0
    def compute_Gb(self, Dsfilename, lamb=0.0):
        """
        compute G matrix using dirichlet coefficients
        Dsfilename: generated by VTDM_prepb
        lamb: smoothing parameter \lambda
        
        """
        handle = la.cublashandle()
        import tables
        h5file = tables.openFile(Dsfilename)
        Ds = h5file.root.real.read()
        
        d_Ds = parray.to_gpu(Ds.reshape((Ds.shape[0],-1)))
        del Ds
 
        d_Dsw = parray.empty((d_Ds.shape[0], d_Ds.shape[0]), d_Ds.dtype)
        if d_Ds.dtype == np.float64:
            from scikits.cuda.cublas import cublasDgemm
            gemm = cublasDgemm
        else:
            from scikits.cuda.cublas import cublasSgemm
            gemm = cublasSgemm
        gemm(handle.handle, 't', 'n', d_Dsw.shape[0], d_Dsw.shape[0], d_Ds.shape[1], 1.0, d_Ds.gpudata, d_Ds.ld, d_Ds.gpudata, d_Ds.ld, 0.0, d_Dsw.gpudata, d_Dsw.ld)
        Ds = h5file.root.imag.read()
        d_Ds.set(Ds)
        gemm(handle.handle, 't', 'n', d_Dsw.shape[0], d_Dsw.shape[0], d_Ds.shape[1], 1.0, d_Ds.gpudata, d_Ds.ld, d_Ds.gpudata, d_Ds.ld, 1.0, d_Dsw.gpudata, d_Dsw.ld)        
        del Ds
        h5file.close()
        
        norm_func = get_put_norm_kernel(d_Dsw.dtype)
        launch_kernel(norm_func, (256, 1, 1), (d_Dsw.shape[0],1), [d_Dsw, self.d_norm, d_Dsw.ld])

        self.d_G = parray.empty((self.size, self.size), self.dtype)

        G_func = get_G_kernel(self.dtype, d_Dsw.dtype)
        launch_kernel(G_func, (256, 1, 1), (self.d_G.shape[0], 1), [self.d_G, self.d_G.ld, self.d_tk1, self.d_tk2, self.Wt, self.Mt, d_Dsw, d_Dsw.ld, self.d_neuron_ind], timed = "G matrix")

        if lamb != 0:
            lamb_func = get_diag_add_kernel(self.dtype)
            launch_kernel(lamb_func, (256,1,1), (6 * cuda.Context.get_device().MULTIPROCESSOR_COUNT, 1), [self.d_G, self.d_G.ld, self.d_G.shape[0], self.dtype.type(lamb)])
Esempio n. 13
0
File: vrf.py Progetto: bionet/vtem
    def generate_visual_receptive_fields(self, startbias = 0, N_filters = None, x_start = None, y_start = None):
        """
        Generate a batch of centre surround filters from parameters set in self
        
        Parameters
        ----------
        start_bias : integer, optional
            start the the (start_bias)th filter
        N_filters : integer, optional 
            generate N_filters filters
        x_start : float
            indicating the starting degree in x direction
        y_start : float 
            indicating the starting degree in y direction

        
        """
        if N_filters is None:
            N_filters = self.num_neurons - startbias
        
        
        try:
            if N_filters > self.filters.shape[0]:
                del self.filters
                self.filters = parray.empty((N_filters, self.Pyall, self.Pxall), self.dtype)
        except:
            self.filters = parray.empty((N_filters, self.Pyall, self.Pxall), self.dtype)
        

        if x_start is None:
            x_start = - self.Sxall/ 2

        if y_start is None:
            y_start = -self.Syall/2
        
        BLOCK_SIZE = 16
        
        launch_kernel(self.func, (BLOCK_SIZE, BLOCK_SIZE, 1), (((self.Pxall-1)/BLOCK_SIZE+1) * ((self.Pyall-1)/BLOCK_SIZE+1), int(N_filters)), [self.filters, self.filters.ld, [self.d_alpha, startbias], [self.d_x0, startbias], [self.d_y0, startbias], self.Pxall, self.Pyall, self.Sxall, self.Syall, x_start, y_start, self.sigma_center**2, self.sigma_surround**2])
Esempio n. 14
0
    def __init__(self, num_gpot_neurons, gpot_delay_steps,
                 rest, num_spike_neurons, spike_delay_steps):

        self.num_gpot_neurons = num_gpot_neurons
        if num_gpot_neurons > 0:
            self.dtype = np.double
            self.gpot_delay_steps = gpot_delay_steps
            self.gpot_buffer = parray.empty((gpot_delay_steps, num_gpot_neurons),np.double)

            self.gpot_current = 0

            for i in range(gpot_delay_steps):
                cuda.memcpy_dtod(int(self.gpot_buffer.gpudata) + \
                    self.gpot_buffer.ld * i * self.gpot_buffer.dtype.itemsize,\
                    rest.gpudata, rest.nbytes)

        self.num_spike_neurons = num_spike_neurons
        if num_spike_neurons > 0:
            self.spike_delay_steps = spike_delay_steps
            self.spike_buffer = parray.zeros((spike_delay_steps,num_spike_neurons),np.int32)
            self.spike_current = 0
Esempio n. 15
0
    def compute_G(self, Dswfilename, lamb=0.0):
        """
        compute G matrix using weighting between RFs
        Dswfilename: generated by VTDM_prep
        lamb: smoothing parameter \lambda
        
        """
        
        
        Dsw = read_file(Dswfilename)
        d_Dsw = parray.to_gpu(Dsw)
        del Dsw

        #norm_func = get_put_norm_kernel(d_Dsw.dtype)
        #launch_kernel(norm_func, (256, 1, 1), (d_Dsw.shape[0],1), [d_Dsw, self.d_norm, d_Dsw.ld])

        self.d_G = parray.empty((self.size, self.size), self.dtype)

        G_func = get_G_kernel(self.dtype, d_Dsw.dtype)
        launch_kernel(G_func, (256, 1, 1), (self.d_G.shape[0], 1), [self.d_G, self.d_G.ld, self.d_tk1, self.d_tk2, self.Wt, self.Mt, d_Dsw, d_Dsw.ld, self.d_neuron_ind], timed = "G matrix")

        if lamb != 0:
            lamb_func = get_diag_add_kernel(self.dtype)
            launch_kernel(lamb_func, (256,1,1), (6 * cuda.Context.get_device().MULTIPROCESSOR_COUNT, 1), [self.d_G, self.d_G.ld, self.d_G.shape[0], self.dtype.type(lamb)])
Esempio n. 16
0
    def filter_image(self, image_input):
        """
        Performs RF filtering on input video
        for all the rfs
        """
        # video dimensions should match screen dimensions
        # numpy resize operation doesn,t make any checks
        if len(image_input.shape) == 2:
            # if input has 2 dimensions
            assert image_input.shape[1] == self.size
        else:
            # if input has 3 dimensions
            assert (image_input.shape[1]*image_input.shape[2] ==
                    self.size)

        # rasterizing inputs
        image_input.resize((1, self.size))

        d_image = parray.to_gpu(image_input)
        d_output = parray.empty((self.num_neurons, image_input.shape[0]),
                                self.dtype)
        free, total = cuda.mem_get_info()
        self.ONE_TIME_FILTERS = ((free // self.dtype.itemsize)
                                 * 3 // 4 // self.size)
        self.ONE_TIME_FILTERS -= self.ONE_TIME_FILTERS % 2
        self.ONE_TIME_FILTERS = min(self.ONE_TIME_FILTERS, self.num_neurons)
        handle = la.cublashandle()

        for i in np.arange(0, self.num_neurons, self.ONE_TIME_FILTERS):
            Nfilters = min(self.ONE_TIME_FILTERS, self.num_neurons - i)
            self.generate_filters(startbias=i, N_filters=Nfilters)
            la.dot(self.filters, d_image, opb='t',
                   C=d_output[i: i+Nfilters],
                   handle=handle)
        del self.filters
        return d_output.T()
Esempio n. 17
0
    def __init__(self, num_gpot_neurons,  gpot_delay_steps,
                 rest, num_spike_neurons, spike_delay_steps):

        self.num_gpot_neurons = num_gpot_neurons
        if num_gpot_neurons > 0:
            self.dtype = np.double
            self.gpot_delay_steps = gpot_delay_steps
            self.gpot_buffer = parray.empty(
                (gpot_delay_steps, num_gpot_neurons), np.double)

            self.gpot_current = 0

            for i in range(gpot_delay_steps):
                cuda.memcpy_dtod(
                    int(self.gpot_buffer.gpudata) +
                    self.gpot_buffer.ld * i * self.gpot_buffer.dtype.itemsize,
                    rest.gpudata, rest.nbytes)

        self.num_spike_neurons = num_spike_neurons
        if num_spike_neurons > 0:
            self.spike_delay_steps = spike_delay_steps
            self.spike_buffer = parray.zeros(
                (spike_delay_steps, num_spike_neurons), np.int32)
            self.spike_current = 0
Esempio n. 18
0
 def compute_q(self ):
     """ compute q """
     self.d_q = parray.empty((self.size,1), self.dtype)
     
     q_func = get_compute_q_kernel(self.dtype)
     launch_kernel(q_func, (256, 1, 1), (6 * cuda.Context.get_device().MULTIPROCESSOR_COUNT, 1), [self.d_q, self.d_tk1, self.d_tk2, self.d_neuron_ind, self.d_kappa, self.d_delta, self.d_bias, self.d_norm, self.size])
Esempio n. 19
0
def rnn3(G, q, dt=1e-6, alpha=5000, steps=4000, XOUTPUTSTEPS=None):
    """
    Solving the decoding problem using a recurrent neural network.
    
    Parameters
    ----------
    G: PitchArray
       Must be real and positive semidefinite.
    q: PitchArray
       The measurements from spikes
    dt: float (optional)
        the time step in simulating the continuous network
    alpha: float (optional)
           scaling factor
    steps: int (optional)
           the number of steps to run the network
    XOUTPUTSTEPS: int (optional)
           The number of steps that are returned.
           If using default None, only return the final result.
    
    Return
    ------
    c: PitchArray
       The approximate solution to the decoding problem
    output: PitchArray (optional)
            If XOUTPUTSTEPS is not None, the full output specified
    """
    if G.dtype != q.dtype:
        raise TypeError("matrix multiplication must have same dtype")

    if np.iscomplexobj(G):
        raise TypeError("RNN currently only solves real types")

    if (len(G.shape) != 2) | (len(q.shape) != 2):
        raise TypeError("G, q must both be matrices")

    if XOUTPUTSTEPS is None:
        XOUTPUTSTEPS = min(20, steps)
        x_steps = steps / XOUTPUTSTEPS
        fullout = False
    else:
        fullout = True
        x_steps = steps / int(XOUTPUTSTEPS)
        output = parray.empty((XOUTPUTSTEPS, q.size), q.dtype)

    c = parray.zeros_like(q)
    update_func = get_rnn3_update_func(G.dtype)

    dt = float(dt)
    alpha = float(alpha)

    y = parray.empty_like(q)

    if y.dtype == np.float64:
        normfunc = cublasDnrm2
    else:
        normfunc = cublasSnrm2

    grid = (6 * cuda.Context.get_device().MULTIPROCESSOR_COUNT, 1)

    handle = la.cublashandle()

    start = time.time()
    for i in range(0, steps + 1):
        Gc = la.dot(G, c, handle=handle)
        launch_kernel(update_func, (256, 1, 1),
                      grid, [c, dt * alpha, q, Gc, y, c.size, 1],
                      prepared=True)

        if i % x_steps == 0:
            ynorm = normfunc(handle.handle, y.size, y.gpudata, 1)
            print "%d, norm = %.10f, time=%f(ms)" % (i / x_steps, ynorm,
                                                     (time.time() - start) *
                                                     1000)
            if fullout:
                cuda.memcpy_dtod(
                    int(output.gpudata) +
                    output.dtype.itemsize * output.ld * int(i / x_steps - 1),
                    c.gpudata, c.dtype.itemsize * c.size)

    #cuda.memcpy_dtod(q.gpudata, c.gpudata, c.dtype.itemsize*c.size)

    if fullout:
        return c, output
    else:
        return c
Esempio n. 20
0
    filter_func = _get_gaussian_cylinder(dtype)

    # Constants
    S1 = 128
    S2 = 128
    PHOTORECEPTORS = 8
    M_size = S1*S2  # same as grid[0].size
    N_filters = PHOTORECEPTORS

    RAD = 1
    KAPPA = 20
    SIGMA = 1  # or angle
    NTHREADS = (128, 1, 1)
    NBLOCKS = ((M_size-1) // NTHREADS[0] + 1, 1)

    d_filters = parray.empty((N_filters, M_size), dtype)
    grid = np.meshgrid(np.linspace(-1, 1, num=S1),
                       np.linspace(-np.pi, np.pi, num=S2))
    d_grid = [parray.to_gpu(grid[i].flatten()) for i in range(len(grid))]

    dxy = np.diff(grid[0][0, :2])*np.diff(grid[1][:2, 0])[0]

    ref_z = 2*random.rand(PHOTORECEPTORS)-1  # -1 to 1
    d_refz = parray.to_gpu(ref_z)

    ref_theta = np.pi*random.rand(PHOTORECEPTORS)  # half cylinder
    d_reftheta = parray.to_gpu(ref_theta)
    filter_func.prepared_call(
        NBLOCKS,
        NTHREADS,
        d_filters.gpudata,
Esempio n. 21
0
File: vrf.py Progetto: bionet/vtem
 def compute_dirich_space_fft(self, d_Ds, Mx, My, Px, Py, Sx, Sy, Wx, Wy):
     import scikits.cuda.cufft as cufft
     
     dx = Sx / Px
     dy = Sy / Py
     
     Px1 = int(np.round(self.Sx / dx))
     Py1 = int(np.round(self.Sy / dy))
     
     
     if self.dtype == np.complex128:
         typef = np.dtype(np.float64)
     else:
         typef = np.dtype(np.float32)
     
     d_dirich = parray.empty((self.num_neurons, Py, Px),typef)
     
     freemem,totalmem = cuda.mem_get_info()
     
     ONE_TIME_FILTER = int(min(freemem / (Px1 * Py1 * d_Ds.dtype.itemsize) / 4, self.num_neurons))
     
     
     n = np.asarray((Py1, Px1) ,np.int32)
     
     
     if self.dtype == np.complex128:
         plan = cufft.cufftPlanMany(2, n.ctypes.data, None, 1, 0, None, 1, 0, cufft.CUFFT_Z2Z, ONE_TIME_FILTER)
         fftfunc = cufft.cufftExecZ2Z
     else:
         plan = cufft.cufftPlanMany(2, n.ctypes.data, None, 1, 0, None, 1, 0, cufft.CUFFT_C2C, ONE_TIME_FILTER)
         fftfunc = cufft.cufftExecC2C
     
     
     Ds2fftfun = get_Ds2fft_kernel(self.dtype)
     d_filter_complex = parray.empty((ONE_TIME_FILTER, Px1*Py1), self.dtype)
     filter2recfun = get_filter2rec_kernel(self.dtype)
     
     for i in range(0, self.num_neurons, ONE_TIME_FILTER):
         N_filters = min(ONE_TIME_FILTER, self.num_neurons - i)
         d_filter_complex.fill(0)
         
         launch_kernel(Ds2fftfun, (256,1,1), (Mx*2+1, My*2+1), [[d_Ds,i * d_Ds.ld], d_Ds.ld, d_filter_complex, d_filter_complex.ld, Mx, My, Px1, Py1, N_filters])
         
         if N_filters < ONE_TIME_FILTER:
             cufft.cufftDestroy(plan)
             if self.dtype == np.complex128:
                 plan = cufft.cufftPlanMany(2, n.ctypes.data, None, 1, 0, None, 1, 0, cufft.CUFFT_Z2Z, N_filters)
             
             else:
                 plan = cufft.cufftPlanMany(2, n.ctypes.data, None, 1, 0, None, 1, 0, cufft.CUFFT_C2C, N_filters)
         
         
         
         #be careful with the side-by-side constraint
         fftfunc(plan, int(d_filter_complex.gpudata), int(d_filter_complex.gpudata), cufft.CUFFT_INVERSE)
         
         
         BLOCK_SIZE = 16
         launch_kernel(filter2recfun, (BLOCK_SIZE,BLOCK_SIZE,1), (((Px-1) / BLOCK_SIZE + 1)* ((Py-1) / BLOCK_SIZE+1), N_filters), [[d_dirich,i * d_dirich.ld],d_dirich.ld, d_filter_complex, d_filter_complex.ld, N_filters, Px, Py, Px1, Py1])
     
     cufft.cufftDestroy(plan)
     
     return d_dirich
Esempio n. 22
0
 def encode(self, neural_inputs, startbias = 0, avg_rate = 0.1):
     """
         Encode with IAFs with random thresholds
         
         Parameters
         ----------
         neural_inputs : PitchArray 
              PitchArray of shape (num_samples, num_neurons) containing inputs to all neurons
         startbias : integer 
              the neuron index corresponding to first column of neural_inputs
         avg_rate : float 
              average spiking rate assumed for neurons, will allocate memory num_samples/avg_rate for each neuron for storing spikes
         
         Returns
         -------
         spikes : ndarray of self.dtype 
              stores the spikes for one neuron after another
         spike_count : ndarray of int32 of size num_neurons 
              indicates the number of spikes generated by each neuron
         
         Notes
         -----
         spikes for neuron j can be accessed by
         ::
         
              cum_count = np.concatenate((np.zeros(1,np.int32),np.cumsum(spike_count)))
              tk = spikes[cum_count[j]:cum_count[j+1]]
         
         """
     
     neuron_per_block=64
     
     if self.num_neurons != neural_inputs.shape[1]:
         raise ValueError("input size should match number of neurons")
     
     Ntimesteps = neural_inputs.shape[0]
     
     d_spikecount = parray.empty((1, self.num_neurons), np.int32)
     
     randnum = np.random.normal(size = ( int(np.ceil(Ntimesteps / avg_rate)), self.num_neurons)).astype(self.dtype)
     #d_spike = parray.empty( ( int(np.ceil(Ntimesteps / avg_rate)), self.num_neurons), self.dtype) 
     d_spike = parray.to_gpu(randnum)
     
     
     if neural_inputs.__class__ is np.ndarray:
         d_neural_inputs = parray.to_gpu(neural_inputs)
     else:
         d_neural_inputs = neural_inputs
     
     
     launch_kernel(self.func, (neuron_per_block, 1, 1), (int(np.ceil(np.float64(self.num_neurons) / neuron_per_block)), 1), \
                   [neural_inputs, neural_inputs.ld, self.num_neurons, Ntimesteps, d_spike, d_spike.ld, [self.d_v0, startbias], \
                    [self.d_kappa, startbias], [self.d_bias, startbias], [self.d_delta, startbias], [self.d_time_count, startbias],\
                    d_spikecount, int(np.ceil(Ntimesteps / avg_rate)), self.dt, [self.d_delta_value,startbias], [self.d_sigma,startbias]], shared = self.dtype.itemsize * neuron_per_block)
     
     
     spike_count = d_spikecount.get()
     spike_count.resize((self.num_neurons,))
     
     if spike_count.max() >= np.ceil(Ntimesteps / avg_rate):
         raise ValueError("number of spikes exceeded the limit of buffer")
     
     spike = rearrange_spikes(d_spike, spike_count, self.num_neurons)
     
     return spike, spike_count
Esempio n. 23
0
def rnn3(G, q, dt = 1e-6, alpha = 5000, steps = 4000, XOUTPUTSTEPS = None):
    """
    Solving the decoding problem using a recurrent neural network.
    
    Parameters
    ----------
    G: PitchArray
       Must be real and positive semidefinite.
    q: PitchArray
       The measurements from spikes
    dt: float (optional)
        the time step in simulating the continuous network
    alpha: float (optional)
           scaling factor
    steps: int (optional)
           the number of steps to run the network
    XOUTPUTSTEPS: int (optional)
           The number of steps that are returned.
           If using default None, only return the final result.
    
    Return
    ------
    c: PitchArray
       The approximate solution to the decoding problem
    output: PitchArray (optional)
            If XOUTPUTSTEPS is not None, the full output specified
    """
    if G.dtype != q.dtype:
        raise TypeError("matrix multiplication must have same dtype")

    if np.iscomplexobj(G):
        raise TypeError("RNN currently only solves real types")

    if (len(G.shape) != 2) | (len(q.shape) != 2):
        raise TypeError("G, q must both be matrices")

    if XOUTPUTSTEPS is None:
        XOUTPUTSTEPS = min(20, steps)
        x_steps = steps / XOUTPUTSTEPS
        fullout = False
    else:
        fullout = True
        x_steps = steps / int(XOUTPUTSTEPS)
        output = parray.empty((XOUTPUTSTEPS, q.size), q.dtype)

    c = parray.zeros_like(q)
    update_func = get_rnn3_update_func(G.dtype)

    dt = float(dt)
    alpha = float(alpha)

    y = parray.empty_like(q)

    if y.dtype == np.float64:
        normfunc = cublasDnrm2
    else:
        normfunc = cublasSnrm2

    grid = (6 * cuda.Context.get_device().MULTIPROCESSOR_COUNT, 1)
    
    handle = la.cublashandle()
    
    start = time.time()
    for i in range(0,steps+1):
        Gc = la.dot(G, c, handle = handle)
        launch_kernel(update_func, (256,1,1), grid, 
                      [c, dt*alpha, q, Gc, y, c.size, 1],
                      prepared = True)
        
        if i%x_steps == 0:
            ynorm = normfunc(handle.handle, y.size, y.gpudata, 1)
            print "%d, norm = %.10f, time=%f(ms)" % (i / x_steps, ynorm,
                                                     (time.time()-start)*1000);
            if fullout:
                cuda.memcpy_dtod(
                    int(output.gpudata) + 
                    output.dtype.itemsize*output.ld*int(i/x_steps-1), 
                    c.gpudata, c.dtype.itemsize * c.size)

    #cuda.memcpy_dtod(q.gpudata, c.gpudata, c.dtype.itemsize*c.size)

    if fullout:
        return c,output
    else:
		return c