def generate_analytic_subbands(signal, filters, padding_size=None, fft_mode='auto'): """Generate the analytic subbands (i.e., hilbert transform) of the signal by applying the provided filters. The input filters are applied to the signal to perform subband decomposition. The signal can be optionally zero-padded before the decomposition. For full cochleagram generation, see generate_subband_envelopes. Args: signal (array): The sound signal (waveform) in the time domain. filters (array): The filterbank, in frequency space, used to generate the cochleagram. This should be the full filter-set output of erbFilter.make_erb_cos_filters_nx, or similar. padding_size (int, optional): Factor that determines if the signal will be zero-padded before generating the subbands. If this is None, or less than 1, no zero-padding will be used. Otherwise, zeros are added to the end of the input signal until is it of length `padding_size * length(signal)`. This padded region will be removed after performing the subband decomposition. fft_mode ({'auto', 'fftw', 'np'}, optional): Determine what implementation to use for FFT-like operations. 'auto' will attempt to use pyfftw, but will fallback to numpy, if necessary. TODO: fix zero-padding Returns: array: **analytic_subbands**: The analytic subbands (i.e., hilbert transform) resulting of the subband decomposition. This should have the same shape as `filters`. """ signal_flat = reshape_signal_canonical(signal) if padding_size is not None and padding_size > 1: signal_flat, padding = pad_signal(signal_flat, padding_size) fft_sample = utils.fft(signal_flat, mode=fft_mode) subbands = filters * fft_sample analytic_subbands = utils.fhilbert(subbands, mode=fft_mode) if padding_size is not None and padding_size > 1: analytic_subbands = analytic_subbands[:, :signal_flat.shape[ 0] - padding] # i dont know if this is correct return analytic_subbands
def collapse_subbands(subbands, filters, fft_mode='auto'): """Collapse the subbands into a waveform by (re)applying the filterbank. Args: subbands (array): The subband decomposition (i.e., cochleagram) to collapse. filters (array): The filterbank, in frequency space, used to generate the cochleagram. This should be the full filter-set output of erbFilter.make_erb_cos_filters_nx, or similar, that was used to create `subbands`. fft_mode ({'auto', 'fftw', 'np'}, optional): Determine what implementation to use for FFT-like operations. 'auto' will attempt to use pyfftw, but will fallback to numpy, if necessary. Returns: array: **signal**: The signal resulting from collapsing the subbands. """ fft_subbands = filters * utils.fft(subbands, mode=fft_mode) # subbands = utils.ifft(fft_subbands) subbands = np.real(utils.ifft(fft_subbands, mode=fft_mode)) signal = subbands.sum(axis=0) return signal
def generate_subband_envelopes_fast(signal, filters, padding_size=None, fft_mode='auto', debug_ret_all=False): """Generate the subband envelopes (i.e., the cochleagram) of the signal by applying the provided filters. This method returns *only* the envelopes of the subband decomposition. The signal can be optionally zero-padded before the decomposition. The resulting envelopes can be optionally downsampled and then modified with a nonlinearity. This function expedites the calculation of the subbands envelopes by: 1) using the rfft rather than standard fft to compute the dft for real-valued signals 2) hand-computing the Hilbert transform, to avoid unnecessary calls to fft/ifft. See utils.rfft, utils.irfft, and utils.fhilbert for more details on the methods used for speed-up. Args: signal (array): The sound signal (waveform) in the time domain. Should be flattened, i.e., the shape is (n_samples,). filters (array): The filterbank, in frequency space, used to generate the cochleagram. This should be the full filter-set output of erbFilter.make_erb_cos_filters_nx, or similar. padding_size (int, optional): Factor that determines if the signal will be zero-padded before generating the subbands. If this is None, or less than 1, no zero-padding will be used. Otherwise, zeros are added to the end of the input signal until is it of length `padding_size * length(signal)`. This padded region will be removed after performing the subband decomposition. fft_mode ({'auto', 'fftw', 'np'}, optional): Determine what implementation to use for FFT-like operations. 'auto' will attempt to use pyfftw, but will fallback to numpy, if necessary. Returns: array: **subband_envelopes**: The subband envelopes (i.e., cochleagram) resulting from the subband decomposition. This should have the same shape as `filters`. """ # convert the signal to a canonical representation signal_flat = reshape_signal_canonical(signal) #print(f' the signal flat is of shape {signal_flat.shape}') if padding_size is not None and padding_size > 1: signal_flat, padding = pad_signal(signal_flat, padding_size) if np.isrealobj(signal_flat): # attempt to speed up computation with rfft fft_sample = utils.rfft(signal_flat, mode=fft_mode) nr = fft_sample.shape[0] # prep for hilbert transform by extending to negative freqs subbands = np.zeros(filters.shape, dtype=complex) subbands[:, :nr] = _real_freq_filter(fft_sample, filters) else: fft_sample = utils.fft(signal_flat, mode=fft_mode) subbands = filters * fft_sample analytic_subbands = utils.fhilbert(subbands, mode=fft_mode) subband_envelopes = np.abs(analytic_subbands) if padding_size is not None and padding_size > 1: analytic_subbands = analytic_subbands[:, :signal_flat.shape[ 0] - padding] # i dont know if this is correct subband_envelopes = subband_envelopes[:, :signal_flat.shape[ 0] - padding] # i dont know if this is correct if debug_ret_all is True: out_dict = {} # add all local variables to out_dict for k in dir(): if k != 'out_dict': out_dict[k] = locals()[k] return out_dict else: return subband_envelopes
def generate_subbands(signal, filters, padding_size=None, fft_mode='auto', debug_ret_all=False): """Generate the subband decomposition of the signal by applying the provided filters. The input filters are applied to the signal to perform subband decomposition. The signal can be optionally zero-padded before the decomposition. Args: signal (array): The sound signal (waveform) in the time domain. filters (array): The filterbank, in frequency space, used to generate the cochleagram. This should be the full filter-set output of erbFilter.make_erb_cos_filters_nx, or similar. padding_size (int, optional): Factor that determines if the signal will be zero-padded before generating the subbands. If this is None, or less than 1, no zero-padding will be used. Otherwise, zeros are added to the end of the input signal until is it of length `padding_size * length(signal)`. This padded region will be removed after performing the subband decomposition. fft_mode ({'auto', 'fftw', 'np'}, optional): Determine what implementation to use for FFT-like operations. 'auto' will attempt to use pyfftw, but will fallback to numpy, if necessary. Returns: array: **subbands**: The subbands resulting from the subband decomposition. This should have the same shape as `filters`. """ # note: numpy defaults to row vecs # if padding_size is not None and padding_size >= 1: # padding = signal.shape[0] * padding_size - signal.shape[0] # print('padding ', padding) # signal = np.concatenate((signal, np.zeros(padding))) # convert the signal to a canonical representation signal_flat = reshape_signal_canonical(signal) if padding_size is not None and padding_size > 1: signal_flat, padding = pad_signal(signal_flat, padding_size) is_signal_even = signal_flat.shape[0] % 2 == 0 if np.isrealobj( signal_flat ) and is_signal_even: # attempt to speed up computation with rfft if signal_flat.shape[0] % 2 == 0: fft_sample = utils.rfft(signal_flat, mode=fft_mode) subbands = _real_freq_filter(fft_sample, filters) subbands = utils.irfft(subbands, mode=fft_mode) # operates row-wise else: warnings.warn( 'Consider using even-length signal for a rfft speedup', RuntimeWarning, stacklevel=2) fft_sample = utils.fft(signal_flat, mode=fft_mode) subbands = filters * fft_sample subbands = np.real(utils.ifft(subbands, mode=fft_mode)) # operates row-wise else: fft_sample = utils.fft(signal_flat, mode=fft_mode) subbands = filters * fft_sample subbands = np.real(utils.ifft(subbands, mode=fft_mode)) # operates row-wise if padding_size is not None and padding_size > 1: subbands = subbands[:, :signal_flat.shape[0] - padding] # i dont know if this is correct if debug_ret_all is True: out_dict = {} # add all local variables to out_dict for k in dir(): if k != 'out_dict': out_dict[k] = locals()[k] return out_dict else: return subbands