Exemple #1
0
def extract(args):
    audio_directory, output_directory, af, overwrite = args
    subdir, output_file = os.path.split(af.split(audio_directory)[1])
    output_file = os.path.splitext(output_file)[0]
    output_file = os.path.join(output_directory, output_file)

    if os.path.exists(output_file) and not overwrite:
        print('Skipping {}. Already exists.'.format(output_file))
        return

    output = dict()

    try:
        y, _sr = soundfile.read(af)
        y = to_mono(y)
        sr = 22050
        y = resample(y, _sr, sr)
    except Exception as e:
        y, sr = load(af)

    output['linspec_mag'], output['linspec_phase'] = linspec(y)
    output['melspec'] = melspec(y, sr=sr)
    output['logspec'] = logspec(y, sr=sr)
    output['hcqt_mag'], output['hcqt_phase'] = hcqt(y, sr=sr)
    output['vggish_melspec'] = vggish_melspec(y, sr=sr)

    # high-level
    output['percussive_ratio'], output['percussive_rms'], output[
        'total_rms'] = percussive_ratio(y, margin=3.0)
    output['onset_strength'] = onset_strength(y, detrend=True)
    output['tempogram'] = tempogram(y)
    output['onset_patterns'] = onset_patterns(y, sr=sr)

    np.savez_compressed(output_file, **output)
def findRhythmic(wave):  # 3 dimensions
    rhythm_feature = {}

    env = onset_strength(wave)
    tempogram = feature.tempogram(onset_envelope=env, hop_length=hop_size)
    rhythm_feature['tempo_sum'] = np.sum(tempogram)

    return rhythm_feature
def tempo(y=None, sr=22050, onset_envelope=None, hop_length=512, start_bpm=120,
          std_bpm=1.0, ac_size=8.0, max_tempo=320.0, aggregate=np.mean):

    if start_bpm <= 0:
        raise ParameterError('start_bpm must be strictly positive')

    win_length = np.asscalar(core.time_to_frames(ac_size, sr=sr,
                                                 hop_length=hop_length))

    tg = tempogram(y=y, sr=sr,
                   onset_envelope=onset_envelope,
                   hop_length=hop_length,
                   win_length=win_length)

    # Eventually, we want this to work for time-varying tempo
    if aggregate is not None:
        tg = aggregate(tg, axis=1, keepdims=True)

    # Get the BPM values for each bin, skipping the 0-lag bin
    bpms = core.tempo_frequencies(tg.shape[0], hop_length=hop_length, sr=sr)

    # Weight the autocorrelation by a log-normal distribution
    prior = np.exp(-0.5 * ((np.log2(bpms) - np.log2(start_bpm)) / std_bpm)**2)

    prior2 = np.argsort(prior, axis=0)
    prior2_idx = prior2[-2]
    # print(prior2_idx)
    # print('prior_2_idx', prior2_idx)

    # Kill everything above the max tempo
    if max_tempo is not None:
        max_idx = np.argmax(bpms < max_tempo)
        prior[:max_idx] = 0

    # Really, instead of multiplying by the prior, we should set up a
    # probabilistic model for tempo and add log-probabilities.
    # This would give us a chance to recover from null signals and
    # rely on the prior.
    # it would also make time aggregation much more natural

    # Get the maximum, weighted by the prior

    period = tg * prior[:, np.newaxis]
    best_period = np.argmax(period, axis=0)
    best_2 = np.argsort(period, axis=0)
    prior2_idx = best_2[-2]
    #print(prior2_idx)
    #print(best_period)

    second_period = prior2_idx
    tempi = bpms[best_period]
    tempi2 = bpms[second_period]
    #print(type(tempi), type(tempi2))
    # Wherever the best tempo is index 0, return start_bpm
    tempi[best_period == 0] = start_bpm
    tempi2[second_period == 0] = start_bpm
    return (tempi2.astype(float)[0].item(), tempi.astype(float)[0].item())
Exemple #4
0
 def _compute_tempo(self, audio_buffer):
     """
     uses
     """
     sample_rate = 8000
     tempo = tempogram(y=audio_buffer.astype(float),
                       sr=sample_rate,
                       norm=None)
     return tempo
Exemple #5
0
def plot_tempograms(filepaths):
    """Accepts a list of filepaths, and plots a tempogram for each associated audio file."""

    fig, axes = plt.subplots(3, 1)
    for k in range(len(filepaths)):
        data, rate = librosa.load(filepaths[k])
        gram = tempogram(data, rate)
        temp = tempo(data, rate)
        print('Tempogram dimensions:', gram.shape)
        display.specshow(gram,
                         sr=rate,
                         x_axis='time',
                         y_axis='tempo',
                         cmap='magma',
                         ax=axes[k])
        axes[k].axhline(temp, color='w', linestyle='--', alpha=1)
        axes[k].set_title(str(filepaths[k][15:]))
    plt.tight_layout()
    plt.show()
Exemple #6
0
    def transform_audio(self, y):
        '''Compute the tempogram

        Parameters
        ----------
        y : np.ndarray
            Audio buffer

        Returns
        -------
        data : dict
            data['tempogram'] : np.ndarray, shape=(n_frames, win_length)
                The tempogram
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        tgram = tempogram(y=y,
                          sr=self.sr,
                          hop_length=self.hop_length,
                          win_length=self.win_length).astype(np.float32)

        tgram = fix_length(tgram, n_frames)
        return {'tempogram': tgram.T[self.idx]}
Exemple #7
0
def ac_peaks(data, rate, plot=False):
    """Return the three highest peaks in the autocorrelation (tempo) array.  Plot if needed."""

    # Get the onset strength envelope (deals with lag, spectral flux, but the main idea is that it can give us info about lag/variation in the audio, so we can use it to get tempo information)
    oenv = librosa.onset.onset_strength(y=data, sr=rate)

    # Compute the tempogram and truncate at time (index) 1000
    gram = tempogram(data, rate)
    gram = gram[:, :1000]

    # Get the global autocorrelation and the frequencies (in this case, freqs indicate BPM estimates)
    ac_global = librosa.autocorrelate(oenv, max_size=gram.shape[0])
    freqs = librosa.tempo_frequencies(gram.shape[0], sr=rate)

    # Find the peaks of the autocorrelation plot, sort them, and keep only the three highest peaks
    peaks, _ = find_peaks(ac_global)
    sorting = np.argsort(ac_global[peaks])
    peaks = peaks[sorting][-3:]

    # Plot the stuff if requested
    if plot:
        plt.semilogx(freqs, ac_global, ':', base=2)
        plt.semilogx(freqs[peaks],
                     ac_global[peaks],
                     marker='o',
                     linestyle='',
                     base=2)
        plt.xlabel('BPM')
        plt.ylabel('Autocorrelation')
        plt.legend(['Global Autocorrelation', 'Three Highest Peaks'])
        plt.show()

    # Return the frequencies with the three highest autocorrelation value as an array
    if len(freqs[peaks]) == 3:
        return np.array(freqs[peaks])[::-1]
    else:
        return np.array([float('NaN'), float('NaN'), float('NaN')])
Exemple #8
0
        row = np.concatenate((row, spcent))
        flatness = np.mean(lf.spectral_flatness(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, flatness))
        rolloff = np.mean(lf.spectral_rolloff(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, rolloff))
        mspec = np.mean(lf.melspectrogram(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, mspec))
        mfcc = np.mean(lf.mfcc(thing1[:-1], n_mfcc=30).T, axis=0)
        row = np.concatenate((row, mfcc))
        tonnetz = np.mean(lf.tonnetz(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, tonnetz))
        rmse = np.mean(lf.rmse(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, rmse))
        contrast = np.mean(lf.spectral_contrast(thing1[:-1]).T, axis=0)
        row = np.concatenate((row, contrast))
        tempo = np.mean(lf.tempogram(thing[:-1], win_length=88).T, axis=0)
        row = np.concatenate((row, tempo))
        row = np.append(row, thing1[-1])
        #print(len(row))

        train_data = np.append(train_data, row)
        counter += 1

columns = ["feat_" + str(i) for i in range(299)]
columns.append("class")
df_train2 = pd.DataFrame(columns=columns)

for i in range(6325):
    print(float(i) / 6325. * 100)
    row = train_data[300 * i:300 * (i + 1)]
    #print(pd.Series(row))