def test_fftconvolve(self, num_samps, mode="full"): cpu_sig = np.random.rand(num_samps) gpu_sig = cp.asarray(cpu_sig) cpu_autocorr = signal.fftconvolve(cpu_sig, cpu_sig[::-1], mode=mode) gpu_autocorr = cp.asnumpy( cusignal.fftconvolve(gpu_sig, gpu_sig[::-1], mode=mode)) assert array_equal(cpu_autocorr, gpu_autocorr)
def filter_morlet_gpu(data, sr, omega, morlet_frequency): n_chans, n_ts = data.shape data_gpu = cp.asarray(data) win = cp.array(mne.time_frequency.morlet(sr, [morlet_frequency], omega)[0]) data_preprocessed = cp.zeros_like(data_gpu, dtype=cp.complex64) for i in range(n_chans): data_preprocessed[i] = cusignal.fftconvolve(data_gpu[i], win, 'same') return data_preprocessed
def routine_gpu(data, sr, omega, morlet_frequency): n_chans, n_ts = data.shape data_gpu = cp.asarray(data) win = cp.array(mne.time_frequency.morlet(sr, [morlet_frequency], omega)[0]) data_preprocessed = cp.zeros_like(data_gpu, dtype=cp.complex64) surr_data = cp.zeros_like(data_preprocessed) for i in range(n_chans): data_preprocessed[i] = cusignal.fftconvolve(data_gpu[i], win, 'same') data_preprocessed[i] /= cp.abs(data_preprocessed[i]) surr_data[i] = cp.roll(data_preprocessed[i], np.random.randint(n_ts - 1)) plv = cp.inner(data_preprocessed, cp.conj(data_preprocessed)) / n_ts plv_surr = cp.inner(surr_data, cp.conj(surr_data)) / n_ts return cp.asnumpy(plv), cp.asnumpy(plv_surr)
def go(signal, gpuR, gpuW): """ Run demodulation on the GPU First store the reference and window data on the GPU using the init_gpu() function. The object returned are required for this function. Returns: - A (M, N) numpy array (np.float64) buffer for the average of the convolution result along the second dimension of the signal data. This can be considered as the demodulation result for each demodulation channel. """ N, k = signal.shape M = gpuR.shape[0] gpuS = cp.asarray(signal) gpuW = cp.tile(gpuW, (N,1)) results = np.zeros((M, N)) for i in range(M): buffer = cp.multiply(gpuS, gpuR[i,:]) buffer = cusignal.fftconvolve(buffer, gpuW, mode='same', axes=1) buffer = cp.mean(buffer, axis=1) results[i,:] = cp.asnumpy(buffer) return results
def _filter_data(self, frequency: float): n_chans = self.data.shape[0] amplitude_percentiles = cp.linspace(0, 100, self.n_bins + 1) win = cp.array( mne.time_frequency.morlet(self.sfreq, [frequency], self.omega)[0]) data_envelope = cp.zeros_like(self.data) for i in range(n_chans): self.data_preprocessed[i] = cusignal.fftconvolve( self.data[i], win, 'same') data_envelope[i] = cp.abs(self.data_preprocessed[i]) # normalize analog signal amplitude to make possible to compute PLV through inner product with conjugate self.data_preprocessed[i] /= data_envelope[i] # normalize signal envelope to make it comparable between different contacts data_envelope[i] /= cupy_median(data_envelope[i]) self.data_thresholded[i] = data_envelope[i] <= ( cupy_median(data_envelope[i]) * 2) # self.data_thresholded[i] = True amplitude_bins = cp.percentile(data_envelope[self.data_thresholded], amplitude_percentiles) digitize_cupy(data_envelope, amplitude_bins, out=self.data_amplitude_labels) self.data_amplitude_labels -= 1 # deleting envelope to save some space # I am jogging here with conjugate and envelope memory because we dont need conjugate during preprocessing # and dont need envelope after preprocessing # del data_envelope # data_envelope = None self.data_envelope = data_envelope self.data_conj = cp.zeros_like(self.data_preprocessed) cp.conj(self.data_preprocessed, out=self.data_conj)
def gpu_version(self, sig, mode): with cp.cuda.Stream.null: out = cusignal.fftconvolve(sig, sig[::-1], mode=mode) cp.cuda.Stream.null.synchronize() return out