Esempio n. 1
0
 def test_error_checking(self):
     self.assertRaises(ValueError,
                       lambda: segment_axis(np.arange(7), length=0,
                                            shift=0))
     self.assertRaises(ValueError,
                       lambda: segment_axis(np.arange(7), length=3,
                                            shift=0))
Esempio n. 2
0
 def test_ending(self):
     assert_equal(segment_axis(np.arange(6), length=3, shift=2, end='cut'),
                  np.array([[0, 1, 2], [2, 3, 4]]))
     assert_equal(
         segment_axis(
             np.arange(6)+10, length=3, shift=2, end='pad', pad_mode='wrap'),
             [[10, 11, 12], [12, 13, 14], [14, 15, 10]]
     )
     assert_equal(segment_axis(np.arange(6), length=3, shift=2, end='pad',
                               pad_value=-17),
                  np.array([[0, 1, 2], [2, 3, 4], [4, 5, -17]]))
Esempio n. 3
0
    def test_simple(self):
        assert_equal(segment_axis(np.arange(6), length=3, shift=3),
                     np.array([[0, 1, 2], [3, 4, 5]]))

        assert_equal(segment_axis(np.arange(7), length=3, shift=2),
                     np.array([[0, 1, 2], [2, 3, 4], [4, 5, 6]]))

        assert_equal(segment_axis(np.arange(7), length=3, shift=1),
                     np.array([[0, 1, 2], [1, 2, 3], [2, 3, 4], [3, 4, 5],
                               [4, 5, 6]]))

        assert_equal(segment_axis(np.arange(7), length=3, shift=-1),
                     [[4, 5, 6], [3, 4, 5], [2, 3, 4], [1, 2, 3], [0, 1, 2]])
Esempio n. 4
0
def tbf_to_tbchw(x,
                 left_context,
                 right_context,
                 step_width,
                 pad_mode='symmetric',
                 pad_kwargs=None):
    """ Transfroms data from TxBxF format to TxBxCxHxW format

    This is only relevant for training a neural network in frames mode.

    The abbreviations stand for:

    T: Time frames
    B: Batch size
    F: Feature size
    C: Channel (almost always 1)
    H: Height of the convolution filter
    W: Width of the convolution filter

    :param x: Data to be transformed
    :param left_context: Context size left to current frame
    :param right_context: Context size right to current frame
    :param step_width: Step width for window
    :param pad_mode: Mode for padding. See :numpy.pad for details
    :param pad_kwargs: Kwargs for pad call
    :return: Transformed data
    """
    if pad_kwargs is None:
        pad_kwargs = dict()
    x = np.pad(x, ((left_context, right_context), (0, 0), (0, 0)),
               mode=pad_mode,
               **pad_kwargs)
    window_size = left_context + right_context + 1
    return segment_axis(x, window_size, step_width, axis=0,
                        end='cut').transpose(0, 2, 3, 1)[:, :, None, :, :]
Esempio n. 5
0
def est_time_shift(sig, ref_sig, seg_size, seg_shift):
    """Estimate the time shift between two signals

    The time shift is estimated based on the generalized cross correlation with
    phase transform (GCC-PhaT).

    Args:
        sig (numpy.ndarray):
            Vector corresponding to a signal
        ref_sig (numpy.ndarray):
            Vector corresponding to the signal being used as reference
        seg_size (int):
            Size of the segments used in the GCC-PhaT algorithm
        seg_shift:
            Shift of the segments used in the GCC-PhaT algorithm
    Returns:
        shifts (numpy.ndarray):
            Vector corresponding to the estimated time shifts
    """
    def _get_gcpsd(seg, seg_ref):
        """Calculate the generalized cross power spectral density (GCPSD) for
        the given signal segments
        
        Args:
            seg (array-like):
                Vector corresponding to a segment of a signal
            seg_ref (array-like):
                Vector corresponding to the segment of the reference signal
        Returns:
            gcpsd (numpy.ndarray):
                Vector corresponding to the GCPSD
        """
        fft_seg = np.fft.fft(seg)
        fft_ref_seg = np.fft.fft(seg_ref)
        cpsd = np.conj(fft_ref_seg) * fft_seg
        gcpsd = cpsd / (np.abs(fft_seg) * np.abs(fft_ref_seg) + 1e-18)
        return gcpsd

    segments = segment_axis(sig, seg_size, seg_shift, end='cut')
    segments_ref = segment_axis(ref_sig, seg_size, seg_shift, end='cut')
    shifts = np.zeros(len(segments))
    for seg_idx, (seg, ref_seg) in enumerate(zip(segments, segments_ref)):
        shifts[seg_idx] = max_time_lag_search(_get_gcpsd(seg, ref_seg))
    return shifts
Esempio n. 6
0
    def test_multidimensional(self):
        assert_equal(segment_axis(np.ones((2, 3, 4, 5, 6)), axis=3, length=3,
                                  shift=2).shape,
                     (2, 3, 4, 2, 3, 6))

        assert_equal(
            segment_axis(np.ones((2, 3, 4, 5, 6)), axis=2, length=3, shift=2,
                         end='cut').shape,
            (2, 3, 1, 3, 5, 6))

        assert_equal(
            segment_axis(np.ones((2, 3, 4, 5, 6)), axis=2, length=3, shift=2,
                         end='pad', pad_mode='wrap').shape,
            (2, 3, 2, 3, 5, 6))

        assert_equal(
            segment_axis(np.ones((2, 3, 4, 5, 6)), axis=2, length=3, shift=2,
                         end='pad').shape,
            (2, 3, 2, 3, 5, 6))
Esempio n. 7
0
def maxfilt(x, n, axis=-1):
    """

    Args:
        x:
        n:
        axis:

    Returns:

    >>> x = np.ones((2, 5, 3)).cumsum(1)
    >>> x[0] **= 2
    >>> maxfilt(x, 3, axis=1).shape
    """
    assert n % 2 == 1, n
    if axis < 0:
        axis = x.ndim + axis
    pad_width = [[0, 0] for _ in range(x.ndim)]
    pad_width[axis] = [(n - 1) // 2, (n - 1) // 2]
    x = np.pad(x, pad_width, mode="constant")
    x = segment_axis(x, n, shift=1, axis=axis, pad_mode="cut").max(axis + 1)
    return x
Esempio n. 8
0
def SRMR(signal: np.ndarray,
         sample_rate: int = 16000,
         n: int = 23,
         low_freq: int = 125) -> float:
    """Python implementation of the SRMR metric.
    Matlab reference implementation: https://github.com/MuSAELab/SRMRToolbox
    Because results of other openly available SRMR python packages significantly deviate from
    the original evaluation tool, this function reimplements the Matlab functionality.
    An ASL-adjustment is not implemented, so that the calculated values still slightly differ from the Matlab implementation.
    For an exact reproduction of the matlab results, the usage of an ASL-adjustion is required. However the deviation of 
    this implmentation from the Matlab version typically is not larger than 1e-3.

    :param signal: signal on which the SRMR is calculated
    :param sample_rate: sample rate of signal
    :param n: number of gammatone filters used
    :param low_freq: lowest center frequency of the gammatone filterbank, highest frequency is half the sample rate
    :return: SRMR metric for given signal
    """
    #Preprocessing of the signal (Voice activity detection)
    signal = _preprocessing_vad(signal, sample_rate)
    signal = signal - np.mean(signal)
    signal /= np.std(signal, keepdims=True)

    #Gammatone filterbank (with n Filters)
    signal = gammatone_filterbank(signal,
                                  sample_rate=sample_rate,
                                  n=n,
                                  low_freq=low_freq)

    #Calculate temporal envelope of the signal
    for i in range(len(signal)):
        signal[i] = np.abs(sp.signal.hilbert(signal[i]))

    #Frequencies of the modulation filters
    modulation_filter_frequencies = [
        4.0, 6.5, 10.7, 17.6, 28.9, 47.5, 78.1, 128.0
    ]

    #Using 8 modulation filters on the output of the gammatone filters
    E = []
    for j in range(len(signal)):
        E.append([])
        for k in range(8):
            W0 = math.tan(2 * math.pi * modulation_filter_frequencies[k] /
                          (2 * sample_rate))
            B0 = W0 / 2

            b = np.ndarray(
                (3, ),
                dtype=float,
                buffer=np.array(
                    [B0 / (1 + B0 + W0**2), 0, -B0 / (1 + B0 + W0**2)]))
            a = np.ndarray((3, ),
                           dtype=float,
                           buffer=np.array([
                               1, (2 * W0**2 - 2) / (1 + B0 + W0**2),
                               (1 - B0 + W0**2) / (1 + B0 + W0**2)
                           ]))

            E[j].append(sp.signal.lfilter(b, a, signal[j], axis=0))

    #Calculation of the energy of the single bands
    energy = []
    for j in range(len(E)):
        energy.append([])
        for k in range(len(E[j])):
            energy[j].append([])

            #Segmentation of the signal
            temp = segment_axis(E[j][k],
                                int(sample_rate / 1000) * 256,
                                int(sample_rate / 1000) * 64)

            #Multiplication of a hamming window with each segment and summation of the result
            hamm_window = sp.signal.hamming(int(sample_rate / 1000) * 256,
                                            sym=True)
            for window in temp:
                energy[j][k].append(np.sum(np.square(hamm_window * window)))

    #Calculation of the center frequencys (ERBS) and the corresponding ERBs
    cfs = calculate_cfs(low_freq, sample_rate / 2, n)

    ERBs = []

    for i in range(len(cfs)):
        ERBs.append(cfs[i] / 9.26449 + 24.7)

    #Calculation of the means of the single bands
    means = np.ndarray((len(energy), len(energy[0])))

    for j in range(len(energy)):
        for k in range(len(energy[j])):
            means[j][k] = np.mean(energy[j][k])

    #Calculation of the Bandwidth
    total_energy = np.sum(np.sum(means))
    AC_energy = np.sum(means, axis=1)
    AC_perc = AC_energy * 100 / total_energy

    sum = 0.0
    BW = 0.0

    for i in range(len(AC_perc)):
        sum += AC_perc[i]
        if (sum > 90):
            BW = ERBs[i]
            break

    #Calculate cutoffs
    cutoffs = []

    for cfs in modulation_filter_frequencies:
        w0 = 2 * math.pi * cfs / sample_rate
        B0 = math.tan(w0 / 2) / 2
        cutoffs.append(cfs - (B0 * sample_rate / (2 * math.pi)))

    #Calculation of the mean of the different bands wth regards to the cuffoff band
    numerator = np.sum(np.sum(means, axis=0)[:4])
    denominator = np.sum(means, axis=0)[4]

    for i in range(5, 8):
        denominator += np.sum(means, axis=0)[i]
        if cutoffs[i - 1] < BW < cutoffs[i]:
            break

    return numerator / denominator