def test_error_checking(self): self.assertRaises(ValueError, lambda: segment_axis(np.arange(7), length=0, shift=0)) self.assertRaises(ValueError, lambda: segment_axis(np.arange(7), length=3, shift=0))
def test_ending(self): assert_equal(segment_axis(np.arange(6), length=3, shift=2, end='cut'), np.array([[0, 1, 2], [2, 3, 4]])) assert_equal( segment_axis( np.arange(6)+10, length=3, shift=2, end='pad', pad_mode='wrap'), [[10, 11, 12], [12, 13, 14], [14, 15, 10]] ) assert_equal(segment_axis(np.arange(6), length=3, shift=2, end='pad', pad_value=-17), np.array([[0, 1, 2], [2, 3, 4], [4, 5, -17]]))
def test_simple(self): assert_equal(segment_axis(np.arange(6), length=3, shift=3), np.array([[0, 1, 2], [3, 4, 5]])) assert_equal(segment_axis(np.arange(7), length=3, shift=2), np.array([[0, 1, 2], [2, 3, 4], [4, 5, 6]])) assert_equal(segment_axis(np.arange(7), length=3, shift=1), np.array([[0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5], [4, 5, 6]])) assert_equal(segment_axis(np.arange(7), length=3, shift=-1), [[4, 5, 6], [3, 4, 5], [2, 3, 4], [1, 2, 3], [0, 1, 2]])
def tbf_to_tbchw(x, left_context, right_context, step_width, pad_mode='symmetric', pad_kwargs=None): """ Transfroms data from TxBxF format to TxBxCxHxW format This is only relevant for training a neural network in frames mode. The abbreviations stand for: T: Time frames B: Batch size F: Feature size C: Channel (almost always 1) H: Height of the convolution filter W: Width of the convolution filter :param x: Data to be transformed :param left_context: Context size left to current frame :param right_context: Context size right to current frame :param step_width: Step width for window :param pad_mode: Mode for padding. See :numpy.pad for details :param pad_kwargs: Kwargs for pad call :return: Transformed data """ if pad_kwargs is None: pad_kwargs = dict() x = np.pad(x, ((left_context, right_context), (0, 0), (0, 0)), mode=pad_mode, **pad_kwargs) window_size = left_context + right_context + 1 return segment_axis(x, window_size, step_width, axis=0, end='cut').transpose(0, 2, 3, 1)[:, :, None, :, :]
def est_time_shift(sig, ref_sig, seg_size, seg_shift): """Estimate the time shift between two signals The time shift is estimated based on the generalized cross correlation with phase transform (GCC-PhaT). Args: sig (numpy.ndarray): Vector corresponding to a signal ref_sig (numpy.ndarray): Vector corresponding to the signal being used as reference seg_size (int): Size of the segments used in the GCC-PhaT algorithm seg_shift: Shift of the segments used in the GCC-PhaT algorithm Returns: shifts (numpy.ndarray): Vector corresponding to the estimated time shifts """ def _get_gcpsd(seg, seg_ref): """Calculate the generalized cross power spectral density (GCPSD) for the given signal segments Args: seg (array-like): Vector corresponding to a segment of a signal seg_ref (array-like): Vector corresponding to the segment of the reference signal Returns: gcpsd (numpy.ndarray): Vector corresponding to the GCPSD """ fft_seg = np.fft.fft(seg) fft_ref_seg = np.fft.fft(seg_ref) cpsd = np.conj(fft_ref_seg) * fft_seg gcpsd = cpsd / (np.abs(fft_seg) * np.abs(fft_ref_seg) + 1e-18) return gcpsd segments = segment_axis(sig, seg_size, seg_shift, end='cut') segments_ref = segment_axis(ref_sig, seg_size, seg_shift, end='cut') shifts = np.zeros(len(segments)) for seg_idx, (seg, ref_seg) in enumerate(zip(segments, segments_ref)): shifts[seg_idx] = max_time_lag_search(_get_gcpsd(seg, ref_seg)) return shifts
def test_multidimensional(self): assert_equal(segment_axis(np.ones((2, 3, 4, 5, 6)), axis=3, length=3, shift=2).shape, (2, 3, 4, 2, 3, 6)) assert_equal( segment_axis(np.ones((2, 3, 4, 5, 6)), axis=2, length=3, shift=2, end='cut').shape, (2, 3, 1, 3, 5, 6)) assert_equal( segment_axis(np.ones((2, 3, 4, 5, 6)), axis=2, length=3, shift=2, end='pad', pad_mode='wrap').shape, (2, 3, 2, 3, 5, 6)) assert_equal( segment_axis(np.ones((2, 3, 4, 5, 6)), axis=2, length=3, shift=2, end='pad').shape, (2, 3, 2, 3, 5, 6))
def maxfilt(x, n, axis=-1): """ Args: x: n: axis: Returns: >>> x = np.ones((2, 5, 3)).cumsum(1) >>> x[0] **= 2 >>> maxfilt(x, 3, axis=1).shape """ assert n % 2 == 1, n if axis < 0: axis = x.ndim + axis pad_width = [[0, 0] for _ in range(x.ndim)] pad_width[axis] = [(n - 1) // 2, (n - 1) // 2] x = np.pad(x, pad_width, mode="constant") x = segment_axis(x, n, shift=1, axis=axis, pad_mode="cut").max(axis + 1) return x
def SRMR(signal: np.ndarray, sample_rate: int = 16000, n: int = 23, low_freq: int = 125) -> float: """Python implementation of the SRMR metric. Matlab reference implementation: https://github.com/MuSAELab/SRMRToolbox Because results of other openly available SRMR python packages significantly deviate from the original evaluation tool, this function reimplements the Matlab functionality. An ASL-adjustment is not implemented, so that the calculated values still slightly differ from the Matlab implementation. For an exact reproduction of the matlab results, the usage of an ASL-adjustion is required. However the deviation of this implmentation from the Matlab version typically is not larger than 1e-3. :param signal: signal on which the SRMR is calculated :param sample_rate: sample rate of signal :param n: number of gammatone filters used :param low_freq: lowest center frequency of the gammatone filterbank, highest frequency is half the sample rate :return: SRMR metric for given signal """ #Preprocessing of the signal (Voice activity detection) signal = _preprocessing_vad(signal, sample_rate) signal = signal - np.mean(signal) signal /= np.std(signal, keepdims=True) #Gammatone filterbank (with n Filters) signal = gammatone_filterbank(signal, sample_rate=sample_rate, n=n, low_freq=low_freq) #Calculate temporal envelope of the signal for i in range(len(signal)): signal[i] = np.abs(sp.signal.hilbert(signal[i])) #Frequencies of the modulation filters modulation_filter_frequencies = [ 4.0, 6.5, 10.7, 17.6, 28.9, 47.5, 78.1, 128.0 ] #Using 8 modulation filters on the output of the gammatone filters E = [] for j in range(len(signal)): E.append([]) for k in range(8): W0 = math.tan(2 * math.pi * modulation_filter_frequencies[k] / (2 * sample_rate)) B0 = W0 / 2 b = np.ndarray( (3, ), dtype=float, buffer=np.array( [B0 / (1 + B0 + W0**2), 0, -B0 / (1 + B0 + W0**2)])) a = np.ndarray((3, ), dtype=float, buffer=np.array([ 1, (2 * W0**2 - 2) / (1 + B0 + W0**2), (1 - B0 + W0**2) / (1 + B0 + W0**2) ])) E[j].append(sp.signal.lfilter(b, a, signal[j], axis=0)) #Calculation of the energy of the single bands energy = [] for j in range(len(E)): energy.append([]) for k in range(len(E[j])): energy[j].append([]) #Segmentation of the signal temp = segment_axis(E[j][k], int(sample_rate / 1000) * 256, int(sample_rate / 1000) * 64) #Multiplication of a hamming window with each segment and summation of the result hamm_window = sp.signal.hamming(int(sample_rate / 1000) * 256, sym=True) for window in temp: energy[j][k].append(np.sum(np.square(hamm_window * window))) #Calculation of the center frequencys (ERBS) and the corresponding ERBs cfs = calculate_cfs(low_freq, sample_rate / 2, n) ERBs = [] for i in range(len(cfs)): ERBs.append(cfs[i] / 9.26449 + 24.7) #Calculation of the means of the single bands means = np.ndarray((len(energy), len(energy[0]))) for j in range(len(energy)): for k in range(len(energy[j])): means[j][k] = np.mean(energy[j][k]) #Calculation of the Bandwidth total_energy = np.sum(np.sum(means)) AC_energy = np.sum(means, axis=1) AC_perc = AC_energy * 100 / total_energy sum = 0.0 BW = 0.0 for i in range(len(AC_perc)): sum += AC_perc[i] if (sum > 90): BW = ERBs[i] break #Calculate cutoffs cutoffs = [] for cfs in modulation_filter_frequencies: w0 = 2 * math.pi * cfs / sample_rate B0 = math.tan(w0 / 2) / 2 cutoffs.append(cfs - (B0 * sample_rate / (2 * math.pi))) #Calculation of the mean of the different bands wth regards to the cuffoff band numerator = np.sum(np.sum(means, axis=0)[:4]) denominator = np.sum(means, axis=0)[4] for i in range(5, 8): denominator += np.sum(means, axis=0)[i] if cutoffs[i - 1] < BW < cutoffs[i]: break return numerator / denominator