def filter(self, video_input): """ Performs RF filtering on input video for all the rfs """ if len(video_input.shape) == 2: # if input has 2 dimensions assert video_input.shape[1] == self.size else: # if input has 3 dimensions assert (video_input.shape[1]*video_input.shape[2] == self.size) # rasterizing inputs video_input.resize((video_input.shape[0], self.size)) d_video = parray.to_gpu(video_input) d_output = parray.empty((self.num_neurons, video_input.shape[0]), self.dtype) free, total = cuda.mem_get_info() self.ONE_TIME_FILTERS = ((free // self.dtype.itemsize) * 3 // 4 // self.size) self.ONE_TIME_FILTERS -= self.ONE_TIME_FILTERS % 2 self.ONE_TIME_FILTERS = min(self.ONE_TIME_FILTERS, self.num_neurons) handle = la.cublashandle() for i in np.arange(0, self.num_neurons, self.ONE_TIME_FILTERS): Nfilters = min(self.ONE_TIME_FILTERS, self.num_neurons - i) self.generate_filters(startbias=i, N_filters=Nfilters) la.dot(self.filters, d_video, opb='t', C=d_output[i: i+Nfilters], handle=handle) del self.filters return d_output.T()
def filter_image(self, image_input): """ Performs RF filtering on input video for all the rfs """ # video dimensions should match screen dimensions # numpy resize operation doesn,t make any checks if len(image_input.shape) == 2: # if input has 2 dimensions assert image_input.shape[1] == self.size else: # if input has 3 dimensions assert (image_input.shape[1]*image_input.shape[2] == self.size) # rasterizing inputs image_input.resize((1, self.size)) d_image = parray.to_gpu(image_input) d_output = parray.empty((self.num_neurons, image_input.shape[0]), self.dtype) free, total = cuda.mem_get_info() self.ONE_TIME_FILTERS = ((free // self.dtype.itemsize) * 3 // 4 // self.size) self.ONE_TIME_FILTERS -= self.ONE_TIME_FILTERS % 2 self.ONE_TIME_FILTERS = min(self.ONE_TIME_FILTERS, self.num_neurons) handle = la.cublashandle() for i in np.arange(0, self.num_neurons, self.ONE_TIME_FILTERS): Nfilters = min(self.ONE_TIME_FILTERS, self.num_neurons - i) self.generate_filters(startbias=i, N_filters=Nfilters) la.dot(self.filters, d_image, opb='t', C=d_output[i: i+Nfilters], handle=handle) del self.filters return d_output.T()
def rnn3(G, q, dt = 1e-6, alpha = 5000, steps = 4000, XOUTPUTSTEPS = None): """ Solving the decoding problem using a recurrent neural network. Parameters ---------- G: PitchArray Must be real and positive semidefinite. q: PitchArray The measurements from spikes dt: float (optional) the time step in simulating the continuous network alpha: float (optional) scaling factor steps: int (optional) the number of steps to run the network XOUTPUTSTEPS: int (optional) The number of steps that are returned. If using default None, only return the final result. Return ------ c: PitchArray The approximate solution to the decoding problem output: PitchArray (optional) If XOUTPUTSTEPS is not None, the full output specified """ if G.dtype != q.dtype: raise TypeError("matrix multiplication must have same dtype") if np.iscomplexobj(G): raise TypeError("RNN currently only solves real types") if (len(G.shape) != 2) | (len(q.shape) != 2): raise TypeError("G, q must both be matrices") if XOUTPUTSTEPS is None: XOUTPUTSTEPS = min(20, steps) x_steps = steps / XOUTPUTSTEPS fullout = False else: fullout = True x_steps = steps / int(XOUTPUTSTEPS) output = parray.empty((XOUTPUTSTEPS, q.size), q.dtype) c = parray.zeros_like(q) update_func = get_rnn3_update_func(G.dtype) dt = float(dt) alpha = float(alpha) y = parray.empty_like(q) if y.dtype == np.float64: normfunc = cublasDnrm2 else: normfunc = cublasSnrm2 grid = (6 * cuda.Context.get_device().MULTIPROCESSOR_COUNT, 1) handle = la.cublashandle() start = time.time() for i in range(0,steps+1): Gc = la.dot(G, c, handle = handle) launch_kernel(update_func, (256,1,1), grid, [c, dt*alpha, q, Gc, y, c.size, 1], prepared = True) if i%x_steps == 0: ynorm = normfunc(handle.handle, y.size, y.gpudata, 1) print "%d, norm = %.10f, time=%f(ms)" % (i / x_steps, ynorm, (time.time()-start)*1000); if fullout: cuda.memcpy_dtod( int(output.gpudata) + output.dtype.itemsize*output.ld*int(i/x_steps-1), c.gpudata, c.dtype.itemsize * c.size) #cuda.memcpy_dtod(q.gpudata, c.gpudata, c.dtype.itemsize*c.size) if fullout: return c,output else: return c
def rnn3(G, q, dt=1e-6, alpha=5000, steps=4000, XOUTPUTSTEPS=None): """ Solving the decoding problem using a recurrent neural network. Parameters ---------- G: PitchArray Must be real and positive semidefinite. q: PitchArray The measurements from spikes dt: float (optional) the time step in simulating the continuous network alpha: float (optional) scaling factor steps: int (optional) the number of steps to run the network XOUTPUTSTEPS: int (optional) The number of steps that are returned. If using default None, only return the final result. Return ------ c: PitchArray The approximate solution to the decoding problem output: PitchArray (optional) If XOUTPUTSTEPS is not None, the full output specified """ if G.dtype != q.dtype: raise TypeError("matrix multiplication must have same dtype") if np.iscomplexobj(G): raise TypeError("RNN currently only solves real types") if (len(G.shape) != 2) | (len(q.shape) != 2): raise TypeError("G, q must both be matrices") if XOUTPUTSTEPS is None: XOUTPUTSTEPS = min(20, steps) x_steps = steps / XOUTPUTSTEPS fullout = False else: fullout = True x_steps = steps / int(XOUTPUTSTEPS) output = parray.empty((XOUTPUTSTEPS, q.size), q.dtype) c = parray.zeros_like(q) update_func = get_rnn3_update_func(G.dtype) dt = float(dt) alpha = float(alpha) y = parray.empty_like(q) if y.dtype == np.float64: normfunc = cublasDnrm2 else: normfunc = cublasSnrm2 grid = (6 * cuda.Context.get_device().MULTIPROCESSOR_COUNT, 1) handle = la.cublashandle() start = time.time() for i in range(0, steps + 1): Gc = la.dot(G, c, handle=handle) launch_kernel(update_func, (256, 1, 1), grid, [c, dt * alpha, q, Gc, y, c.size, 1], prepared=True) if i % x_steps == 0: ynorm = normfunc(handle.handle, y.size, y.gpudata, 1) print "%d, norm = %.10f, time=%f(ms)" % (i / x_steps, ynorm, (time.time() - start) * 1000) if fullout: cuda.memcpy_dtod( int(output.gpudata) + output.dtype.itemsize * output.ld * int(i / x_steps - 1), c.gpudata, c.dtype.itemsize * c.size) #cuda.memcpy_dtod(q.gpudata, c.gpudata, c.dtype.itemsize*c.size) if fullout: return c, output else: return c