Ejemplo n.º 1
0
def plotGraph(vocalsName, beatsName):
    vocalsFile = './spedUpVocals/{}'.format(vocalsName) + '.mp3'
    beatsFile = './spedUpBeats/{}'.format(beatsName) + '.mp3'

    # Compute local onset autocorrelation
    # y, sr = librosa.load('./output/funk/accompaniment.wav', duration=60)
    # musica1
    y, sr = librosa.load(vocalsFile, duration=60)
    hop_length = 512
    oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
    tempogram = librosa.feature.tempogram(onset_envelope=oenv,
                                          sr=sr,
                                          hop_length=hop_length)
    # Compute global onset autocorrelation
    ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])
    ac_global = librosa.util.normalize(ac_global)
    # Estimate the global tempo for display purposes
    tempo = librosa.beat.tempo(onset_envelope=oenv,
                               sr=sr,
                               hop_length=hop_length)[0]

    fig, ax = plt.subplots(nrows=1, figsize=(120, 15))
    times = librosa.times_like(oenv, sr=sr, hop_length=hop_length)
    # ax.plot(times, oenv, label='Onset strength')
    ax.plot(times, oenv, label='Vocal')
    ax.label_outer()
    ax.legend(frameon=True)

    # y, sr = librosa.load('./output/recairei/vocals.wav', duration=60)
    # musica2
    y, sr = librosa.load(beatsFile, duration=60)
    hop_length = 512
    oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
    tempogram = librosa.feature.tempogram(onset_envelope=oenv,
                                          sr=sr,
                                          hop_length=hop_length)
    # Compute global onset autocorrelation
    ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])
    ac_global = librosa.util.normalize(ac_global)
    # Estimate the global tempo for display purposes
    tempo = librosa.beat.tempo(onset_envelope=oenv,
                               sr=sr,
                               hop_length=hop_length)[0]

    times = librosa.times_like(oenv, sr=sr, hop_length=hop_length)
    ax.plot(times, oenv, 'C2', label='Accompaniment')
    # ax.plot(times, oenv, 'C2',label='Onset strength')
    ax.label_outer()
    ax.legend(frameon=True)

    graphFileName = './graphs/{}-{}'.format(vocalsName, beatsName)
    os.makedirs(os.path.dirname(graphFileName), exist_ok=True)
    plt.savefig(graphFileName)
    plt.close()
Ejemplo n.º 2
0
    def get_defects_echo(self):
        """
        Получение маркеров дефекта echo.
        """

        # Сильное эхо детектируется по значению автокорреляционной фунции по короткому окну.
        # Если использовать только данный признак, то возникает слишком много ложных срабатываний
        # на музыке, для которой характерно повторение звуков (темп, барабаны и т.д.).
        # Для отсечения музыки вычисляется глобальное значение автокорреляционной функции
        # для темпограммы - сохранение данной величины на высоком уровне свидетельствует
        # о сохранении темпа в записи.
        # Для скорости отсечение по глобальной автокорреляции темпограммы делаем по одному
        # каналу.

        # получаем список сегментов для обработки + время каждого сегмента
        segs = self.separator(self.Settings.Echo.Sep)

        # загрузка настроек
        setting = self.Settings.Echo

        # обрабатываем посегментно
        for s in segs:

            if self.FlagChannel == 2:

                # обрабатываем каждый канал сегмента
                for n, x in enumerate(s[0]):

                    # Ориентируемся по каналу 0 для определения подходящей темпограммы.
                    # Для этого не требуется построение спектрограмм.
                    oenv = librosa.onset.onset_strength(y=x,
                                                        sr=self.SampleRate)
                    tempogram = librosa.feature.tempogram(onset_envelope=oenv,
                                                          sr=self.SampleRate)
                    acg = librosa.autocorrelate(oenv,
                                                max_size=tempogram.shape[0])
                    acg = librosa.util.normalize(acg)
                    if (acg[:len(acg) // 2].mean()) > setting.GlobNormCorrThr:
                        return

                    # Для анализа эхо не строим никакие спектрограммы.
                    ln = int(self.SampleRate * setting.LocCorrWin)
                    parts = len(x) // ln
                    for i in range(parts):
                        yp = x[i * ln:(i + 1) * ln]
                        ac = librosa.autocorrelate(yp)
                        ac = ac[ln // 5:]
                        if max(ac) > setting.LocCorrThr:
                            print(
                                f'root path file: {self.FileName}, channel number {n}, name of defect: echo,'
                                f' time mark: {s[1] + i * (ln / self.SampleRate)}, '
                                f'{s[1] + (i + 1) * (ln / self.SampleRate)}')
Ejemplo n.º 3
0
    def compute_BSpectrum(self, ret):
        """ Compute beat spectrogram of the audio signal
        returns: 1D beat spectrum of the audio signal & 2D beat spectrogram
        """
        assert type(ret) == str, \
        "Please specify a string \'spectrum\'|\'spectrogram\'|\'both\'"

        self.pspec = np.abs(lbr.magphase(self.spec)[0]) ** 2  # Power spectrogram

        # Beat spectrogram
        for i in range(self.pspec.shape[0]):
            self.bspec_[i] = lbr.autocorrelate(self.pspec[i], max_size=self.pspec[i].size)

        # Beat spectrum
        for j in range(self.pspec.shape[0]):
            self.bspec += self.bspec_[j,:]

        # Normalize the beat spectrum
        self.bspec/=self.bspec[0]
        # Setting the first element to zero, so as to remove DC and preserve Length
        self.bspec[self.bspec==1]=0
        # Return what has been asked for
        if ret == 'specrtum':
            return self.bspec
        elif ret == 'spectrogram':
            return self.bspec_
        elif ret == 'both':
            return self.bspec, self.bspec_
        else:
            print("Please specify \'spectrum\'|\'spectrogram\'|\'both\'")
Ejemplo n.º 4
0
    def getFeatures(self, path):
        # e.g., features = getFeatures('noise_data/user/5.wav')
        signal, samplingRate = lbr.load(path)

        # Compute MFCC features from the raw signal
        frame_ms = 30
        mfcc = lbr.feature.mfcc(y=signal, sr=samplingRate, hop_length=int(samplingRate*frame_ms/1000), n_mfcc=13)

        # Compute energy of each frame
        energy = lbr.feature.rmse(y=signal, hop_length=int(samplingRate*frame_ms/1000))

        # Compute pitch using autocorrelation
        autocorrelation = lbr.autocorrelate(signal)

        # The first-order differences (delta features)
        #mfcc_delta = lbr.feature.delta(mfcc)

        # Zero-crossing rate
        #zerorate = lbr.feature.zero_crossing_rate(signal)

        # Roll-off frequency
        #rolloff = lbr.feature.spectral_rolloff(y=signal, sr=samplingRate)

        # Spectral bandwidth
        #bandwidth = lbr.feature.spectral_bandwidth(y=signal, sr=samplingRate)

        #TODO: More features

        return (mfcc, energy, autocorrelation)
Ejemplo n.º 5
0
def linear_predictive_coding(frames, n_coeff=20):
    """Return linear predictive coding coefficients for each audio frame.

    frames  : 2-D numpy.ndarray where each line represents an audio frame
    n_coeff : number of LPC coefficients to generate (also equal to the
              maximum autocorrelation lag order considered)
    """
    # Check arguments validity. Adjust the number of coefficients.
    check_type_validity(frames, np.ndarray, 'frames')
    if frames.ndim != 2:
        raise ValueError('`frames` should be a 2-D np.array.')
    check_type_validity(n_coeff, int, 'n_coeff')
    if n_coeff < 1:
        raise ValueError('`n_coeff` should be a strictly positive int.')
    n_coeff = min(n_coeff, frames.shape[1] - 1)
    # Compute the frame-wise LPC coefficients.
    autocorrelations = librosa.autocorrelate(frames, n_coeff + 1)
    lpc = np.array([
        # Levinson-Durbin recursion. False positive pylint: disable=no-member
        scipy.linalg.solve_toeplitz(autocorr[:-1], autocorr[1:])
        for autocorr in autocorrelations
    ])
    # Compute the frame_wise root mean squared prediction errors.
    frame_wise_errors = np.array([
        frames[:, i] - np.sum(lpc * frames[:, i - n_coeff:i][:, ::-1], axis=1)
        for i in range(n_coeff, frames.shape[1])
    ])
    frames_rmse = np.sqrt(np.mean(np.square(frame_wise_errors), axis=0))
    # Return the LPC coefficients and error terms.
    return lpc, frames_rmse
Ejemplo n.º 6
0
def test_tempogram_odf_equiv(tempo, center):
    sr = 22050
    hop_length = 512
    duration = 8

    odf = np.zeros(duration * sr // hop_length)
    spacing = sr * 60.0 // (hop_length * tempo)
    odf[::int(spacing)] = 1

    odf_ac = librosa.autocorrelate(odf)

    tempogram = librosa.feature.tempogram(
        onset_envelope=odf,
        sr=sr,
        hop_length=hop_length,
        win_length=len(odf),
        window=np.ones,
        center=center,
        norm=None,
    )

    idx = 0
    if center:
        idx = len(odf) // 2

    assert np.allclose(odf_ac, tempogram[:, idx])
Ejemplo n.º 7
0
def estimate_tempo(ose):
    """
    This function uses the precomputed global tempo information parameters to estimate the tempo
    of one piece
    :param ose: The onset strength envelope
    :return: The tactus estimate, the estimated tempo expressed in terms of OSE frames
                and whether duple (True) or triple (False) tempo is assumed
    """
    # The list of tempo period strengths
    TPS = []

    # Calculate autocorrelation of onset strength envelope
    ac = librosa.autocorrelate(ose)

    # For each frame of the autocorrelated onset strength envelope save the
    # weighted value (as seen in the Ellis paper)
    for tau in range(1, ose.size):
        res = autocorrelation_weighting(tau, Globals.TAU_0) * ac[tau]
        TPS.append(res)
    # This index stores the highest value -> this indicates the most likely tempo
    tau_index = np.argmax(TPS)
    # Express in terms of samples
    index_samples = tau_index * Globals.FFT_HOP
    # Express in terms of seconds
    tau_est = index_samples / Globals.OSE_SAMPLE_RATE
    tempo_est = 60 / tau_est

    # Helper functions for calculating the probabilities of duple and triple tempos
    def get_TPS2(tau):
        return TPS[tau] + 0.5 * TPS[2 * tau] + 0.25 * TPS[2 * tau - 1] + 0.25 * TPS[2 * tau + 1]
    def get_TPS3(tau):
        return TPS[tau] + 0.33 * TPS[3 * tau] + 0.33 * TPS[3 * tau - 1] + 0.33 * TPS[3 * tau + 1]

    TPS2 = []
    TPS3 = []
    search_range = 2000  # corresponds to the first 8 seconds of the song
    for tau in range(1, search_range):
        TPS2.append(get_TPS2(tau))
        TPS3.append(get_TPS3(tau))
    tau2 = np.argmax(TPS2)
    tau3 = np.argmax(TPS3)

    max_vals = [TPS[tau_index], TPS2[tau2], TPS3[tau3]]
    metre = np.argmax(max_vals)

    if metre == 0:
        # Duple tempo normal time
        tau_samples = tau_index * Globals.FFT_HOP
        tactus = tau_samples / Globals.OSE_SAMPLE_RATE
        return tactus, tau_index, True
    elif metre == 1:
        # Duple tempo double time
        tau_samples = tau2 * Globals.FFT_HOP
        tactus = (1 / 2) * tau_samples / Globals.OSE_SAMPLE_RATE
        return tactus, tau2, True
    elif metre == 2:
        # Triplet tempo
        tau_samples = tau3 * Globals.FFT_HOP
        tactus = (1 / 3) * tau_samples / Globals.OSE_SAMPLE_RATE
        return tactus, tau3, False
Ejemplo n.º 8
0
    def get_pitch(self, fmin=50.0, fmax=2000.0):
        freqs = []
        sr = self._sampling_rate
        x = self._audio_array
        onset_samples = librosa.onset.onset_detect(x, sr=sr, units='samples',
                                                   hop_length=self._hop_length,
                                                   backtrack=False,
                                                   pre_max=20,
                                                   post_max=20,
                                                   pre_avg=100,
                                                   post_avg=100,
                                                   delta=0.2,
                                                   wait=0)
        onset_boundaries = np.concatenate([[0], onset_samples, [len(x)]])

        for i in range(len(onset_boundaries) - 3):
            n0 = onset_samples[i]
            n1 = onset_samples[i + 1]
            r = librosa.autocorrelate(x[n0:n1])
            i_min = sr / fmax
            i_max = sr / fmin
            r[:int(i_min)] = 0
            r[int(i_max):] = 0

            # Find the location of the maximum autocorrelation.
            i = r.argmax()
            f0 = float(sr) / i
            freqs.append(f0)
            freqs1 = np.array(freqs)
        return np.median(freqs1), freqs1.mean()
def plot_static_beat(tempo, onset_env, sampling_rate):
    # Convert to scalar
    tempo = np.asscalar(tempo)
    # Compute 2-second windowed autocorrelation
    hop_length = 512
    auto_correlation = librosa.autocorrelate(onset_env,
                                             2 * sampling_rate // hop_length)
    freqs = librosa.tempo_frequencies(len(auto_correlation),
                                      sr=sampling_rate,
                                      hop_length=hop_length)

    # Plot on a BPM axis.  We skip the first (0-lag) bin.
    fig = plt.figure(figsize=(8, 4))
    ax = fig.add_subplot(111)
    ax.semilogx(freqs[1:],
                librosa.util.normalize(auto_correlation)[1:],
                label='Onset autocorrelation',
                basex=2)
    ax.axvline(tempo,
               0,
               1,
               color='r',
               alpha=0.75,
               linestyle='--',
               label='Tempo: {:.2f} BPM'.format(tempo))
    ax.grid()
    ax.axis('tight')
    return fig
Ejemplo n.º 10
0
    def __test(y, max_size):

        ac = librosa.autocorrelate(y, max_size=max_size)

        if max_size is None or max_size > len(y):
            eq_(len(ac), len(y))

        else:
            eq_(len(ac), max_size)
Ejemplo n.º 11
0
 def plot_correlation1(self, onset_env, sr):
     hop_length = 512
     ac = librosa.autocorrelate(onset_env, 2 * sr // hop_length)
     freqs = librosa.tempo_frequencies(len(ac),
                                       sr=sr,
                                       hop_length=hop_length)
     self.li.set_xdata(freqs[1:])
     self.li.set_ydata(librosa.util.normalize(ac)[1:])
     plt.pause(0.001)
Ejemplo n.º 12
0
    def __test(y, max_size):

        ac = librosa.autocorrelate(y, max_size=max_size)

        if max_size is None or max_size > len(y):
            eq_(len(ac), len(y))

        else:
            eq_(len(ac), max_size)
Ejemplo n.º 13
0
 def estimate_pitch(self, segment, sr, fmin=50.0, fmax=2000.0):
     global hop_length
     r = librosa.autocorrelate(segment)
     i_min = sr / fmax
     i_max = sr / fmin
     r[:int(i_min)] = 0
     r[int(i_max):] = 0
     i = r.argmax()
     f0 = float(sr) / i
     return int(f0)
Ejemplo n.º 14
0
def onset_estimate_bpm(onsets, start_bpm, fft_res):
    """Estimate the BPM from an onset envelope

    Arguments:
      onsets     -- (ndarray)   time-series of onset strengths
      start_bpm  -- (float)     initial guess of the BPM
      fft_res    -- (float)     resolution of FFT (sample rate / hop length)

    Returns bpm:
      bpm       -- (float)  estimated BPM

    """

    ac_size     = 4.0
    duration    = 90.0
    end_time    = 90.0
    bpm_std     = 1.0

    # Chop onsets to X[(upper_limit - duration):upper_limit]
    # or as much as will fit
    maxcol      = min(len(onsets)-1, np.round(end_time * fft_res))
    mincol      = max(0,    maxcol - np.round(duration * fft_res))

    # Use auto-correlation out of 4 seconds (empirically set??)
    ac_window   = np.round(ac_size * fft_res)

    # Compute the autocorrelation
    x_corr      = librosa.autocorrelate(onsets[mincol:maxcol], ac_window)


    #   FIXME:  2013-01-25 08:55:40 by Brian McFee <*****@*****.**>
    #   this fails if ac_window > length of song   
    # re-weight the autocorrelation by log-normal prior
    bpms    = 60.0 * fft_res / (np.arange(1, ac_window+1))

    # Smooth the autocorrelation by a log-normal distribution
    x_corr  = x_corr * np.exp(-0.5 * ((np.log2(bpms / start_bpm)) / bpm_std)**2)

    # Get the local maximum of weighted correlation
    x_peaks = librosa.localmax(x_corr)

    # Zero out all peaks before the first negative
    x_peaks[:np.argmax(x_corr < 0)] = False


    # Choose the best peak out of .33, .5, 2, 3 * start_period
    candidates      = np.multiply(  np.argmax(x_peaks * x_corr), 
                                    [1.0/3, 1.0/2, 1.0, 2.0, 3.0])

    candidates      = candidates.astype(int)
    candidates      = candidates[candidates < ac_window]

    best_period     = np.argmax(x_corr[candidates])

    return 60.0 * fft_res / candidates[best_period]
Ejemplo n.º 15
0
def animate(i):
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True,
                    frames_per_buffer=CHUNK)

    data = np.fromstring(stream.read(CHUNK), dtype=np.int16)
    abs_data = np.abs(data)
    mean_abs_data = np.mean(abs_data)
    #print(mean_abs_data)
    if mean_abs_data > 1000: # 피아노 소리가 들리지 않을 때는 계산하지 않음

        n = len(data)
        y = librosa.autocorrelate(x, max_size=512)
        x1 = np.linspace(0, 44100 / 2, n)
        # plt.plot(data)
        # plt.show()
        y = np.fft.fft(data) / n

        y = np.absolute(y)
        sum_y = np.sum(y)
        sum_y_list.append(sum_y)
        print(sum_y)
        y = y[range(int(n / 2))]
        y1 = copy.copy(y)

        # peak 값을 찾기 위한 임계점을 유동적으로 하기 위한 기준 잡기
        max_peak = 0
        std_peaks, _ = find_peaks(y, height=1500)
        #print('std_peak : ', std_peaks)
        if len(std_peaks) > 0:
            max_peak = np.max(y[std_peaks])
            std_threshold = max_peak * 0.4
            #print('max_peak점 : ', std_threshold)
            peaks, _ = find_peaks(y, height=std_threshold)

            gye_name = scale(peaks * x_interval)

            if not gye_name[0]:
                print(1)
            else:
                print('y:', y)
                multiple_freq_decrease(y, peaks)
                peaks1, _ = find_peaks(y, height=std_threshold)
                print('peaks :  ', peaks1)
                gye_name1 = scale(peaks1 * x_interval)
                print(gye_name1)

        stream.stop_stream()
        print('빠져나옴')
        stream.close()
        p.terminate()

        line.set_data(x, y)
    return line,
Ejemplo n.º 16
0
    def __test(y, truth, max_size, axis):

        ac = librosa.autocorrelate(y, max_size=max_size, axis=axis)

        my_slice = [slice(None)] * truth.ndim
        if max_size is not None and max_size <= y.shape[axis]:
            my_slice[axis] = slice(min(max_size, y.shape[axis]))

        if not np.iscomplexobj(y):
            assert not np.iscomplexobj(ac)

        assert np.allclose(ac, truth[my_slice])
Ejemplo n.º 17
0
    def __test(y, truth, max_size, axis):

        ac = librosa.autocorrelate(y, max_size=max_size, axis=axis)

        my_slice = [slice(None)] * truth.ndim
        if max_size is not None and max_size <= y.shape[axis]:
            my_slice[axis] = slice(min(max_size, y.shape[axis]))

        if not np.iscomplexobj(y):
            assert not np.iscomplexobj(ac)

        assert np.allclose(ac, truth[my_slice])
Ejemplo n.º 18
0
def estimate_pitch(segment, sr, fmin=50.0, fmax=2000.0):
    # Computa a autocorrelação do segmento de entrada.
    r = librosa.autocorrelate(segment)
    # Defini os limites inferiores e superiores para o argmax de autocorrelação.
    i_min = sr / fmax
    i_max = sr / fmin
    r[:int(i_min)] = 0
    r[int(i_max):] = 0
    # Encontra a localização da autocorrelação máxima.
    i = r.argmax()
    f0 = float(sr) / i
    return f0
Ejemplo n.º 19
0
 def autocorrelate_bps(self, amount=1):
     if self.sr:
         print('sr = ', self.sr)
         # lag = bps * amount
         lag = self.bps * amount
         ac = librosa.autocorrelate(self.y, max_size=lag * self.sr / 512)
         self.autocor_bps = [ac, lag]
         return [ac, lag]
     else:
         print(
             "[ autocorrelate_bps ] Error happend: no sr, y, bps... \n"
             "[ autocorrelate_bps ] Try to use find_beat_per_second() and then repeat !"
         )
Ejemplo n.º 20
0
def estimate_pitch_ac(segment, sr=global_sr, fmin=50.0, fmax=2000.0):
    # Compute autocorrelation of input segment.
    r = librosa.autocorrelate(segment)

    # Define lower and upper limits for the autocorrelation argmax.
    i_min = sr / fmax
    i_max = sr / fmin
    r[:int(i_min)] = 0
    r[int(i_max):] = 0

    # Find the location of the maximum autocorrelation.
    i = r.argmax()
    f0 = float(sr) / i
    return f0
Ejemplo n.º 21
0
def _print_image(file, n, onset_env, hi_lag_1, lo_lag_1, hi_lag_6, lo_lag_6,
                 hi_lag_8, lo_lag_8):
    """
    Print PDF of RLAC.
    """
    ac = librosa.autocorrelate(onset_env)

    for i in range(ac.shape[0]):
        ac[i] /= ac.shape[0] - i

    plt.rc('font', family='Times New Roman')

    ax = plt.gca()
    plt.gcf().set_size_inches(4, 2)
    ax.get_xaxis().set_major_formatter(
        matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x) // 1000)))
    plt.plot(ac)
    plt.ylim(bottom=0.04)
    plt.xlim(left=0, right=len(ac))

    ac_8 = ac.copy()
    ac_8[:hi_lag_8] = 0
    ac_8[lo_lag_8:] = 0
    x_8 = hi_lag_8 + (lo_lag_8 - hi_lag_8) // 2
    plt.axvline(x=x_8, color='black', linestyle='dashed')
    plt.plot(ac_8, label='8 IBIs±4%')

    ac_6 = ac.copy()
    ac_6[:hi_lag_6] = 0
    ac_6[lo_lag_6:] = 0
    x_6 = hi_lag_6 + (lo_lag_6 - hi_lag_6) // 2
    plt.axvline(x=x_6, color='black', linestyle='dashed')
    plt.plot(ac_6, label='6 IBIs±4%')

    ac_1 = ac.copy()
    ac_1[:hi_lag_1] = 0
    ac_1[lo_lag_1:] = 0
    x_1 = hi_lag_1 + (lo_lag_1 - hi_lag_1) // 2
    plt.axvline(x=x_1, color='black', linestyle='dashed')
    plt.plot(ac_1, label='1 IBI±4%')

    plt.ylabel('Autocorrelation')
    plt.xlabel('Lag in 1,000 Frames')
    plt.legend(loc='lower right')
    os.makedirs('figures', exist_ok=True)
    plt.savefig('figures/' +
                basename(file).replace('.wav', '_{}.pdf'.format(n)),
                bbox_inches='tight')
    plt.close()
Ejemplo n.º 22
0
def tempo_track(x, sr):
    hop_length = 200  # samples per frame
    onset_env = librosa.onset.onset_strength(x,
                                             sr=sr,
                                             hop_length=hop_length,
                                             n_fft=2048)
    S = librosa.stft(onset_env, hop_length=1, n_fft=512)
    fourier_tempogram = np.absolute(S)
    tmp = np.log1p(onset_env[n0:n1])
    r = librosa.autocorrelate(tmp)
    tempo = librosa.beat.tempo(x, sr=sr)
    T = len(x) / float(sr)
    seconds_per_beat = 60.0 / tempo[0]
    beat_times = numpy.arange(0, T, seconds_per_beat)
    return beat_times
Ejemplo n.º 23
0
def detection(filename, fmin, fmax, verify=False):
    """
    Detects whether a mosquito is present in 100ms snippets of a given audio file
    :param filename: Name of audio file; String
    :param fmin: Minimum frequency to look for; int
    :param fmax: Maximum frequency to look for; int
    :param verify: Whether or not to make plots for human verification of the result; Bool
    :return: Mosquito audio snippets AND binary Labels for all 100ms snippets of the audio AND sampling rate; 
             np.array of floats AND np.array of ints AND int
    """
    audio_parts = []
    # Load and apply bandpass filter
    audio, sr = librosa.load(filename, sr=None)
    b, a = scipy.signal.butter(N=2, Wn=[300, 2048], btype='bandpass', fs=sr)
    audio = scipy.signal.lfilter(b, a, audio)

    # Calculate windows for signal
    ms100_window = sr // 10  # 100ms
    num_windows = audio.size // ms100_window
    start = 0

    # Store labels for mosquito presence in hot-one-encoding
    labels = np.zeros(shape=num_windows, dtype=int)
    for i, window in enumerate(range(num_windows)):
        # Autocorrelation
        window = audio[start:start+ms100_window]
        r = librosa.autocorrelate(window)

        periodic, first_distance = is_periodic(r, sr//1000, sr)
        if periodic:
            # Pitch Detection
            T = (first_distance / sr)  # Calculate period in seconds
            f0 = round(1 / T, 2)  # Physics, f = 1/T in Hz

            if fmin <= f0 <= fmax:  # Roughly mosquito frequency range (across species)
                labels[i] = 1
                audio_parts.append(window)

        if verify:
            peaks, _ = scipy.signal.find_peaks(r, distance=sr//1000)
            plt.plot(r)
            plt.plot(peaks[:-1], r[peaks[:-1]], marker="x")
            plt.savefig(f"{i}.png")
            plt.close()

        start += ms100_window

    return audio_parts, labels, sr
Ejemplo n.º 24
0
def guess_note(y, sr):
    r = librosa.autocorrelate(y, max_size=5000)

    midi_hi = 120.0
    midi_lo = 12.0
    f_hi = 700
    f_lo = 75
    t_lo = sr / f_hi
    t_hi = sr / f_lo

    r[:int(t_lo)] = 0
    r[int(t_hi):] = 0

    t_max = r.argmax()

    note = librosa.hz_to_note(float(sr) / t_max)

    return note
Ejemplo n.º 25
0
def getVoicingActivity(audioSegment,
                       hopLength,
                       method='rmsEnergy',
                       threshold=0.0):
    """
	Description:
		Given an audio file as an array (obtain from audio=librosa.load(filename.wav)) this returns a binary array
		[1,0,0,1,1,1,1,...] where each element indicates the voicing nature of a frame of audio (determined by hopLength)

	"""

    if method == 'rmsEnergy':
        if threshold == 0.0:
            print(
                f"ERROR:Threshold was NOT calculated before utils.getVoicingActivity()."
            )

        rmsEnergy = librosa.feature.rmse(audioSegment,
                                         frame_length=2 * hopLength,
                                         hop_length=hopLength,
                                         center=True)

        voicingActivity = [1] * rmsEnergy.shape[1]

        for k in range(rmsEnergy.shape[1]):
            if rmsEnergy[0][k] < threshold:
                voicingActivity[k] = 0

        # DEBUG plots:
        #plt.plot(rmsEnergy[0])
        #plt.hlines(thres, 0, len(rmsEnergy[0]))

    elif method == 'periodicity':
        frames = librosa.util.frame(audioSegment,
                                    frame_length=2 * hopLength,
                                    hop_length=hopLength)
        for indx in range(len(frames)):
            rxx = librosa.autocorrelate(frames[indx], max_size=10)

    else:
        raise ValueError(
            f"ERROR:Voicing Activity detection Method: {method} INVALID.")

    return voicingActivity
Ejemplo n.º 26
0
 def setTempo(self, plot=False, force=False):
     if plot or force or np.isnan(self.tempo):
         hop_length = self.hopLength
         self.tempo = librosa.beat.estimate_tempo(self.onsetEnv,
                                                  sr=self.sr,
                                                  hop_length=hop_length)
         ac = librosa.util.normalize(
             librosa.autocorrelate(self.onsetEnv,
                                   3 * self.sr // hop_length))
         tempo_frames = (60 * self.sr / hop_length) / self.tempo
         self.autocorrelationStd = np.std(ac[int(tempo_frames):])
         self.autocorrelationMean = np.mean(ac[int(tempo_frames):])
         if plot:
             fig = plt.figure(figsize=(20, 10))
             ax = fig.add_subplot(111)
             ax.plot(ac, label='Onset autocorrelation')
             ax.vlines([tempo_frames],
                       0,
                       1,
                       color='r',
                       alpha=0.75,
                       linestyle='--',
                       label='Tempo: {:.2f} BPM'.format(self.tempo))
             ax.axhline(y=self.autocorrelationMean,
                        color='k',
                        linestyle=':',
                        label='Mean+-Std {:.3f}'.format(
                            self.autocorrelationStd))
             ax.axhline(y=self.autocorrelationMean +
                        self.autocorrelationStd,
                        color='k',
                        linestyle=':')
             ax.axhline(y=self.autocorrelationMean -
                        self.autocorrelationStd,
                        color='k',
                        linestyle=':')
             librosa.display.time_ticks(
                 librosa.frames_to_time(np.arange(len(ac)), sr=self.sr))
             plt.title(self.fileName)
             plt.xlabel('Lag')
             plt.legend()
             plt.axis('tight')
             plt.savefig(self.fileName + '.png', format='png', dpi=300)
             plt.close(fig)
Ejemplo n.º 27
0
def estimate_pitch(segment,
                   sr,
                   fmin=librosa.note_to_hz('C3'),
                   fmax=librosa.note_to_hz('C6')):

    # Compute autocorrelation of input segment.

    r = librosa.autocorrelate(segment)

    # Define lower and upper limits for the autocorrelation argmax.
    i_min = sr / fmax
    i_max = sr / fmin
    r[:int(i_min)] = 0
    r[int(i_max):] = 0

    # Find the location of the maximum autocorrelation.
    i = r.argmax()
    f0 = float(sr) / i
    return f0
Ejemplo n.º 28
0
def make_file_data(y, sr, hop, nfft, win_len):
    chroma = librosa.feature.chroma_stft(y=y, sr=sr, hop_length=hop)
    spectral_contrast = librosa.feature.spectral_contrast(y=y,
                                                          sr=sr,
                                                          hop_length=hop)
    onset_env = librosa.onset.onset_strength(y=y,
                                             sr=sr,
                                             hop_length=hop,
                                             n_fft=nfft)
    zcr = librosa.feature.zero_crossing_rate(y,
                                             frame_length=win_len,
                                             hop_length=hop)
    onsetLog = np.log1p(onset_env)
    ac = librosa.autocorrelate(onsetLog)
    mfcc = librosa.feature.mfcc(y, sr, n_mfcc=13)

    data = np.vstack([chroma, spectral_contrast, mfcc, onset_env, zcr,
                      ac]).transpose(1, 0)
    return data
Ejemplo n.º 29
0
    def __test_equiv(tempo, center):
        odf = np.zeros(duration * sr // hop_length)
        spacing = sr * 60. // (hop_length * tempo)
        odf[::int(spacing)] = 1

        odf_ac = librosa.autocorrelate(odf)

        tempogram = librosa.feature.tempogram(onset_envelope=odf,
                                              sr=sr,
                                              hop_length=hop_length,
                                              win_length=len(odf),
                                              window=np.ones,
                                              center=center,
                                              norm=None)

        idx = 0
        if center:
            idx = len(odf)//2

        assert np.allclose(odf_ac, tempogram[:, idx])
Ejemplo n.º 30
0
def estimate_pitch(n0,
                   n1,
                   fmin=50.0,
                   fmax=2000.0):  #F0 ESTIMATION OF A GIVEN SEGMENT

    # Compute autocorrelation of input segment.
    segment = x[n0:n1]
    r = librosa.autocorrelate(segment)

    # Define lower and upper limits for the autocorrelation argmax.
    i_min = sr / fmax
    i_max = sr / fmin
    r[:int(i_min)] = 0
    r[int(i_max):] = 0

    # Find the location of the maximum autocorrelation.
    i = r.argmax()
    f0 = float(sr) / i
    f0s.append(f0)
    return f0
Ejemplo n.º 31
0
def tempoVSautoCorrelation(tempo, hop_length):
    tempo = np.asscalar(tempo)
    ac = librosa.autocorrelate(onset_env, 2 * sr // hop_length)
    freqs = librosa.tempo_frequencies(len(ac), sr=sr, hop_length=hop_length)
    plt.figure(figsize=(8, 4))
    plt.semilogx(freqs[1:],
                 librosa.util.normalize(ac)[1:],
                 label='Onset autocorrelation',
                 basex=2)
    plt.axvline(tempo,
                0,
                1,
                color='r',
                alpha=0.75,
                linestyle='--',
                label='Tempo: {:.2f} BPM'.format(tempo))
    plt.xlabel('Tempo (BPM)')
    plt.grid()
    plt.title('Static tempo estimation')
    plt.legend(frameon=True)
    plt.axis('tight')
Ejemplo n.º 32
0
def estimate_pitch(segment, sr, fmin=50.0, fmax=2000.0):

    # Compute autocorrelation of input segment.
    print '1'
    r = librosa.autocorrelate(segment)
    print '1'
    # Define lower and upper limits for the autocorrelation argmax.
    i_min = sr / fmax
    print '1'
    i_max = sr / fmin
    print '1'
    r[:int(i_min)] = 0
    print '1'
    r[int(i_max):] = 0
    print '1'
    # Find the location of the maximum autocorrelation.
    i = r.argmax()
    print '1'
    f0 = float(sr) / i
    print '1'
    return f0
 def generate(self):
     rhythm_bpm = np.empty((len(self.data), 8))
     for i, song in enumerate(self.data):
         if self.verbose and i % 100 == 0:
             print("Got rhythm data for {0} songs".format(i))
         oenv = librosa.onset.onset_strength(y=song, sr=self.sr)
         # tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=self.sr)
         # dtempo = librosa.beat.tempo(
         #     onset_envelope=onset_env, sr=self.sr, aggregate=None)
         tempogram = librosa.feature.tempogram(onset_envelope=oenv,
                                               sr=self.sr)
         tempogram_features = self.util.vector_to_features(tempogram)
         ac_global = librosa.autocorrelate(oenv,
                                           max_size=tempogram.shape[0])
         ac_global = librosa.util.normalize(ac_global)
         tempo = librosa.beat.tempo(onset_envelope=oenv, sr=self.sr)
         rhythm_bpm[i] = np.hstack(
             [tempo,
              ac_global.mean(),
              ac_global.std(), tempogram_features])
     return rhythm_bpm
Ejemplo n.º 34
0
def compute_stm(counts, nbins=300, ncycles=5):    
    start_ind = 0
    nsamples = nbins*ncycles
    end_ind = start_ind + nsamples
    n_acf = 1500
    n_stm = 750 #int(nsamples/4.)
    ac_all, scale_all = [], []

    while end_ind <= len(counts):
        ac = librosa.autocorrelate(counts[start_ind:end_ind], max_size=n_acf)
        ac = librosa.util.normalize(ac, norm=np.inf)
        ac_all.append(ac)

        scale = librosa.fmt(ac, n_fmt=n_stm)
        scale_all.append(scale)

        start_ind += nsamples
        end_ind += nsamples

    ac_all = np.array(ac_all)
    scale_all = np.array(scale_all)
    
    return ac_all, scale_all
Ejemplo n.º 35
0
 def setTempo(self, plot=False, force=False):
     if plot or force or np.isnan(self.tempo):
         hop_length = self.hopLength
         self.tempo = librosa.beat.estimate_tempo(self.onsetEnv, sr=self.sr, hop_length=hop_length)
         ac = librosa.util.normalize(librosa.autocorrelate(self.onsetEnv, 3 * self.sr // hop_length))
         tempo_frames = (60 * self.sr / hop_length) / self.tempo
         self.autocorrelationStd = np.std(ac[int(tempo_frames) :])
         self.autocorrelationMean = np.mean(ac[int(tempo_frames) :])
         if plot:
             fig = plt.figure(figsize=(20, 10))
             ax = fig.add_subplot(111)
             ax.plot(ac, label="Onset autocorrelation")
             ax.vlines(
                 [tempo_frames],
                 0,
                 1,
                 color="r",
                 alpha=0.75,
                 linestyle="--",
                 label="Tempo: {:.2f} BPM".format(self.tempo),
             )
             ax.axhline(
                 y=self.autocorrelationMean,
                 color="k",
                 linestyle=":",
                 label="Mean+-Std {:.3f}".format(self.autocorrelationStd),
             )
             ax.axhline(y=self.autocorrelationMean + self.autocorrelationStd, color="k", linestyle=":")
             ax.axhline(y=self.autocorrelationMean - self.autocorrelationStd, color="k", linestyle=":")
             librosa.display.time_ticks(librosa.frames_to_time(np.arange(len(ac)), sr=self.sr))
             plt.title(self.fileName)
             plt.xlabel("Lag")
             plt.legend()
             plt.axis("tight")
             plt.savefig(self.fileName + ".png", format="png", dpi=300)
             plt.close(fig)
def get_features5(file):
    fp = FeaturePlan(sample_rate=22050)
    
    fp.addFeature('scfp: SpectralCrestFactorPerBand FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256')#19

    fp.addFeature('sfp: SpectralFlatnessPerBand FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256') #19
   
    fp.addFeature('loudness: Loudness FFTLength=0  FFTWindow=Hanning  LMode=Relative  blockSize=512  stepSize=256')#24
    #fp.addFeature('ms: MagnitudeSpectrum FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256') #257

    engine = Engine()
    engine.load(fp.getDataFlow())
    afp = AudioFileProcessor()
    afp.processFile(engine,file)
    feats = engine.readAllOutputs()
    
    

    y, sr = librosa.load(file)
    print y.shape
    print sr

    hop_length = 256
    oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
    tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr, hop_length=hop_length)
    ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])#384
    ac_global = librosa.util.normalize(ac_global)
    print ac_global.shape
    tempo = librosa.beat.estimate_tempo(oenv, sr=sr, hop_length=hop_length) #1
    print "tempo" , tempo
    print "tempogram" , tempogram.shape#384

    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    #print "tempo", tempo
    #print "beat_frames", beat_frames.shape
    beat_times = librosa.frames_to_time(beat_frames, sr=sr)
    #print "beat_times" , beat_times.shape
    #print beat_times
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    # Compute MFCC features from the raw signal
    

    chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr) #12

    c = np.mean(chromagram,axis=1)
    print "c", c.shape
    print "chromagram" , chromagram.shape
    r = calc_statistical_features(chromagram)
    print r.shape

    a1 = calc_statistical_features(feats['scfp'].transpose())   #19*7 = 133
    a1 = a1.reshape(a1.shape[0]*a1.shape[1])
    print a1.shape
    a2 = calc_statistical_features(feats['sfp'].transpose())    #19*7 = 133
    a2 = a2.reshape(a2.shape[0]*a2.shape[1])
    print a2.shape
    a3 = calc_statistical_features(feats['loudness'].transpose())  #24*7 = 168
    a3 = a3.reshape(a3.shape[0]*a3.shape[1])
    print a3.shape
    a4 = calc_statistical_features(tempogram) #384*7 = 2688 
    a4 = a4.reshape(a4.shape[0]*a4.shape[1])
    print a4.shape
    a5 = calc_statistical_features(chromagram)   #12*7 = 84
    a5 = a5.reshape(a5.shape[0]*a5.shape[1])
    print a5.shape
    feature5_set = np.hstack((a1,a2,a3,a4,a5))   #266+168+84+2688 = 3206
    print "feature5_set",feature5_set.shape
    return feature5_set
def test_feature():
    file = "/mnt/hgfs/vmfiles/genres/pop/pop.00002.wav"
    fp = FeaturePlan(sample_rate=22050)
    fp.addFeature('mfcc: MFCC blockSize=512 stepSize=256')#13
    fp.addFeature('sr: SpectralRolloff blockSize=512 stepSize=256')#1
    fp.addFeature('sf: SpectralFlux blockSize=512 stepSize=256')#1
    fp.addFeature('scfp: SpectralCrestFactorPerBand FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256')#19
    fp.addFeature('sf1: SpectralFlatness FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256')#1
    fp.addFeature('sc: SpectralShapeStatistics FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256') #4
    fp.addFeature('sfp: SpectralFlatnessPerBand FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256') #19
    fp.addFeature('energy: Energy blockSize=512  stepSize=256')#1
    fp.addFeature('loudness: Loudness FFTLength=0  FFTWindow=Hanning  LMode=Relative  blockSize=512  stepSize=256')#24
    fp.addFeature('ms: MagnitudeSpectrum FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256') #257
    fp.addFeature('ps: PerceptualSharpness FFTLength=0  FFTWindow=Hanning  blockSize=512  stepSize=256')#1
    fp.addFeature('zcr:ZCR blockSize=512  stepSize=256')#1
    engine = Engine()
    engine.load(fp.getDataFlow())
    afp = AudioFileProcessor()
    afp.processFile(engine,file)
    feats = engine.readAllOutputs()
    ceps = feats['zcr']
    #print ceps.shape
    #num_ceps = len(ceps)
    
    y, sr = librosa.load(file)
    print y.shape
    print sr

    hop_length = 256
    oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
    tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr, hop_length=hop_length)
    ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])#384
    ac_global = librosa.util.normalize(ac_global)
    print ac_global.shape
    tempo = librosa.beat.estimate_tempo(oenv, sr=sr, hop_length=hop_length) #1
    print "tempo" , tempo
    print "tempogram" , tempogram.shape#384

    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
    print "tempo", tempo
    print "beat_frames", beat_frames.shape
    beat_times = librosa.frames_to_time(beat_frames, sr=sr)
    print "beat_times" , beat_times.shape
    print beat_times
    y_harmonic, y_percussive = librosa.effects.hpss(y)
    # Compute MFCC features from the raw signal
    mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
    print "mfcc" , mfcc.shape
    # And the first-order differences (delta features)
    mfcc_delta = librosa.feature.delta(mfcc)
    print "mfcc_delta" , mfcc_delta.shape
    # Stack and synchronize between beat events
    # This time, we'll use the mean value (default) instead of median
    beat_mfcc_delta = librosa.feature.sync(np.vstack([mfcc, mfcc_delta]), beat_frames)

    print "beat_mfcc_delta" , beat_mfcc_delta.shape

    chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr) #12

    c = np.mean(chromagram,axis=1)
    print "c", c.shape
    print "chromagram" , chromagram.shape
    r = calc_statistical_features(chromagram)
    print r.shape

    
    beat_chroma = librosa.feature.sync(chromagram, beat_frames, aggregate=np.median)
    print "beat_chroma" , beat_chroma.shape
    #print beat_chroma
    # Finally, stack all beat-synchronous features together
    beat_features = np.vstack([beat_chroma, beat_mfcc_delta])
    print "beat_features" , beat_features.shape
    beat_feature_set = np.mean(beat_features,axis =1)
    print beat_feature_set.shape
    print beat_feature_set