def analyse(filename, resample_to=2756, bt_hop_length=128,
            chroma_hop_length=512, chroma_n_fft=1024):
    samples, sampleRate = librosa.load(filename)
    length = float(len(samples))/sampleRate
    if resample_to:
        samples = librosa.resample(samples, sampleRate, resample_to)
        sampleRate = resample_to
    newSampleRate = 2756
    samples = librosa.resample(samples, sampleRate, newSampleRate)
    sampleRate = newSampleRate
    tempo, beats = librosa.beat.beat_track(samples, sampleRate,
                                           hop_length=bt_hop_length)
    beat_times = librosa.frames_to_time(beats, sampleRate,
                                        hop_length=bt_hop_length)
    chromagram = librosa.feature.chromagram(samples, sampleRate,
                                            hop_length=chroma_hop_length,
                                            n_fft=chroma_n_fft)
    chromagram = numpy.transpose(chromagram)
    distances = scipy.spatial.distance.cdist(chromagram, CHORDS, "cosine")
    chords = distances.argmin(axis=1)
    chords = scipy.signal.medfilt(chords, 11)
    chord_frames = numpy.array(numpy.where(numpy.diff(chords) != 0))
    chords = chords[chord_frames][0].astype(int)
    chord_times = librosa.frames_to_time(chord_frames, sampleRate,
                                         hop_length=chroma_hop_length,
                                         n_fft=chroma_n_fft)[0]
    chord_names = CHORD_NAMES[chords]
    return {"beats": list(beat_times),
            "chords": [{"chord": chord_name, "time": chord_time} for chord_name, chord_time in zip(chord_names, chord_times)],
            "tempo": tempo}
Example #2
0
File: main.py Project: beckgom/msaf
def features(filename):
    # print '\t[1/5] loading audio'
    y, sr = librosa.load(filename, sr=SR)

    # print '\t[2/5] Separating harmonic and percussive signals'
    y_perc, y_harm = hp_sep(y)

    # print '\t[3/5] detecting beats'
    bpm, beats = get_beats(y=y_perc, sr=sr, hop_length=HOP_LENGTH)

    # print '\t[4/5] generating CQT'
    M1 = np.abs(
        librosa.cqt(y=y_harm, sr=sr, hop_length=HOP_LENGTH, bins_per_octave=12, fmin=librosa.midi_to_hz(24), n_bins=72)
    )

    M1 = librosa.logamplitude(M1 ** 2.0, ref_power=np.max)

    # print '\t[5/5] generating MFCC'
    S = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=HOP_LENGTH, n_mels=N_MELS)
    M2 = librosa.feature.mfcc(S=librosa.logamplitude(S), n_mfcc=N_MFCC)

    n = min(M1.shape[1], M2.shape[1])

    beats = beats[beats < n]

    beats = np.unique(np.concatenate([[0], beats]))

    times = librosa.frames_to_time(beats, sr=sr, hop_length=HOP_LENGTH)

    times = np.concatenate([times, [float(len(y)) / sr]])
    M1 = librosa.feature.sync(M1, beats, aggregate=np.median)
    M2 = librosa.feature.sync(M2, beats, aggregate=np.mean)
    return (M1, M2), times
Example #3
0
def logcqt_onsets(x, fs, pre_max=0, post_max=1, pre_avg=0,
                  post_avg=1, delta=0.05, wait=50):
    """
    Parameters
    ----------
    x : np.ndarray
        Audio signal

    fs : scalar
        Samplerate of the audio signal.

    pre_max, post_max, pre_avg, post_avg, delta, wait
        See `librosa.util.peak_pick` for details.

    Returns
    -------
    onsets : np.ndarray, ndim=1
        Times in seconds for splitting.
    """
    hop_length = 1024
    x_noise = x + np.random.normal(scale=10.**-3, size=x.shape)
    cqt = librosa.cqt(x_noise.flatten(),
                      sr=fs, hop_length=hop_length, fmin=27.5,
                      n_bins=24*8, bins_per_octave=24, tuning=0,
                      sparsity=0, real=False, norm=1)
    cqt = np.abs(cqt)
    lcqt = np.log1p(5000*cqt)

    c_n = utils.canny(51, 3.5, 1)
    onset_strength = sig.lfilter(c_n, np.ones(1), lcqt, axis=1).mean(axis=0)

    peak_idx = librosa.onset.onset_detect(
        onset_envelope=onset_strength, delta=delta, wait=wait)
    return librosa.frames_to_time(peak_idx, hop_length=hop_length)
Example #4
0
def get_beat(y, PARAMETERS):
    '''Estimate beat times and tempo'''
    # Compute a log-power mel spectrogram on the percussive component
    S_p = librosa.feature.melspectrogram(y=y, 
                                         sr=PARAMETERS['load']['sr'], 
                                         n_fft=PARAMETERS['stft']['n_fft'], 
                                         hop_length=PARAMETERS['beat']['hop_length'],
                                         n_mels=PARAMETERS['mel']['n_mels'],
                                         fmax=PARAMETERS['mel']['fmax'])
    
    S_p = librosa.logamplitude(S_p, ref_power=S_p.max())
    
    # Compute the median onset aggregation
    odf = librosa.onset.onset_strength(S=S_p, aggregate=np.median)
    
    # Get beats
    tempo, beats = librosa.beat.beat_track(onset_envelope=odf, 
                                           sr=PARAMETERS['load']['sr'], 
                                           hop_length=PARAMETERS['beat']['hop_length'])
      
    beat_times = librosa.frames_to_time(beats, 
                                        sr=PARAMETERS['load']['sr'], 
                                        hop_length=PARAMETERS['beat']['hop_length'])
    
    return tempo, beat_times, odf
Example #5
0
def compute_beats(y_percussive, sr=22050):
    """Computes the beats using librosa.

    Parameters
    ----------
    y_percussive: np.array
        Percussive part of the audio signal in samples.
    sr: int
        Sample rate.

    Returns
    -------
    beats_idx: np.array
        Indeces in frames of the estimated beats.
    beats_times: np.array
        Time of the estimated beats.
    """
    logging.info("Estimating Beats...")
    tempo, beats_idx = librosa.beat.beat_track(y=y_percussive, sr=sr,
                                               hop_length=msaf.Anal.hop_size)

    # Add first and last beat
    beats_idx = np.concatenate(([0], beats_idx,
                                [len(y_percussive) / msaf.Anal.hop_size])).\
        astype(np.int)

    # To times
    times = librosa.frames_to_time(beats_idx, sr=sr,
                                   hop_length=msaf.Anal.hop_size)

    return beats_idx, times
Example #6
0
    def analyse_bpm(self, y, sr):
        """
        determine le bpm d'une musique
        exemple de test:
                        analyse1 = analyse("/home/bettini/Musique/Deorro.wav", "fichier_csv")
                        y, sr = analyse1.extrairedatamusic()
                        analyse1.analyse_bpm(y, sr)

        :param pathtofile: chemin absolue du fichier audio dont on veut analyser le bpm
        :param fichier_csv: fichier csv dans lequel sera enregistre les bpms du morceau (nom de la playlist en cours)
        :Comment ecrit dans le fichier csv a la fin

        """
        # creation de la liste qui va etre exportee dans le csv
        ElemCsv = []

        # execution du tracker bpm par default
        tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)

        # Converti les sequences d'indice de beat en un chronogramme correspondant aux impulsions d'énergie au cours de la musique
        beat_times = librosa.frames_to_time(beat_frames, sr=sr)

        # calcul du bpm du debut et de la fin de la musique dans le cas d'un changement au cours de la musique
        bpm_d = 0
        bpm_f = 0
        for i in range(100):
            bpm_d = bpm_d + (beat_times[i + 1] - beat_times[i])
            bpm_f = bpm_f + (beat_times[len(beat_times) - i - 1] - beat_times[len(beat_times) - i - 2])

        # on complete la lste qui va etre mis dans le de la base de donnée
        ElemCsv.append(tempo)
        ElemCsv.append(60 / (bpm_d / 100))
        ElemCsv.append(60 / (bpm_f / 100))

        return ElemCsv  # bpm debut, bpm fin , bpm moyen
Example #7
0
def segment_audio_timeit(signal, sr):

    start_time = timeit.default_timer()
    silence_threshold = get_silence_threshold(signal, sr)
    print("getsilencethreshold: ")
    print(timeit.default_timer() - start_time)

    start_time = timeit.default_timer()
    o_env = librosa.onset.onset_strength(y=signal, sr=sr, centering=False, hop_length=HOP_LENGTH)
    onset_frames = librosa.onset.onset_detect(onset_envelope=o_env, sr=sr, hop_length=HOP_LENGTH)
    onset_times = librosa.frames_to_time(onset_frames, sr=sr, hop_length=HOP_LENGTH)
    print("librosa.onset_detect: ")
    print(timeit.default_timer() - start_time)

    segments = []

    overalltime = timeit.default_timer()
    for i in range(len(onset_times)):
        segment_start = onset_times[i]*sr
        if i != len(onset_times)-1:
            segment_end = (onset_times[i+1]*sr)-HOP_LENGTH
        else:
            segment_end = len(signal)-1
        segment_end = find_segment_end(segment_start, segment_end, signal, silence_threshold)

        if (segment_end - segment_start >= MIN_SOUND_LEN*sr) and (onset_times[i] > START_TIME)\
                and (onset_times[i] < (len(signal)/sr-END_TIME)):
            segments.append((signal[segment_start: segment_end], onset_times[i]))

    print('all segments')
    print(timeit.default_timer() - overalltime)

    return segments
Example #8
0
def envelope_onsets(x, fs, wait=100):
    """
    Parameters
    ----------
    filename : str
        Path to an audiofile to split.

    Returns
    -------
    onsets : np.ndarray, ndim=1
        Times in seconds for splitting.
    """

    log_env = 10 * np.log10(10. ** -4.5 + np.power(x.flatten()[:], 2.0))
    w_n = np.hanning(100)
    w_n /= w_n.sum()
    log_env_lpf = sig.filtfilt(w_n, np.ones(1), log_env)

    n_hop = 100
    kernel = utils.canny(100, 3.5, 1)
    kernel /= np.abs(kernel).sum()
    onsets_forward = sig.lfilter(
        kernel, np.ones(1),
        log_env_lpf[::n_hop] - log_env_lpf.min(), axis=0)

    onsets_pos = onsets_forward * (onsets_forward > 0)
    peak_idx = librosa.util.peak_pick(onsets_pos,
                                      pre_max=500, post_max=500, pre_avg=10,
                                      post_avg=10, delta=0.025, wait=wait)
    return librosa.frames_to_time(peak_idx, hop_length=n_hop)
Example #9
0
def ellis_bpm(fname, start_bpm, hpss=True, hop_length=512, tightness=100.0, plot=False, sound=False):
    y, sr = librosa.load(fname, sr=None)
    log.debug(u'Estimating tempo: {}'.format(TERM.cyan(fname)))
    if hpss:
        log.debug(TERM.magenta("Getting percussive elements"))
        y_harmonic, y_percussive = librosa.effects.hpss(y)
        chunks = np.array_split(y_percussive, PLOT_SPLIT)
        log.debug(TERM.magenta("Estimating beats per minute"))
        bpm, beat_frames = librosa.beat.beat_track(y=y_percussive, sr=sr, start_bpm=start_bpm, hop_length=hop_length, tightness=tightness)
    else:
        log.debug(TERM.magenta("Estimating beats per minute"))
        bpm, beat_frames = librosa.beat.beat_track(y=y, sr=sr, start_bpm=start_bpm, hop_length=hop_length, tightness=tightness)
        chunks = np.array_split(y, PLOT_SPLIT)

    log.debug(u'Tempo: {:6.2f} bpm'.format(bpm))
    if plot:
        plt.figure(figsize=(16,10))

        curr_frame = 0
        for i in range(PLOT_SPLIT):
            plt.subplot(PLOT_SPLIT * 100 + 11 + i)
            plt.plot(curr_frame + np.arange(len(chunks[i])), chunks[i], 'g')
            for b in beat_frames:
                plt.axvline(x=b*hop_length, color='k')
            plt.xlim([curr_frame, len(chunks[i]) + curr_frame])
            curr_frame += len(chunks[i])
        plt.show(block=False)
    if sound:
        beat_times = librosa.frames_to_time(beat_frames, sr=sr, hop_length=hop_length)
        clicks = mir_eval.sonify.clicks(beat_times, sr, length=len(y))
        sd.play(y + clicks, sr)
        input('Press Return key to stop sound')
        sd.stop()
    return bpm
Example #10
0
File: base.py Project: wangsix/msaf
    def estimate_beats(self):
        """Estimates the beats using librosa.

        Returns
        -------
        times: np.array
            Times of estimated beats in seconds.
        frames: np.array
            Frame indeces of estimated beats.
        """
        # Compute harmonic-percussive source separiation if needed
        if self._audio_percussive is None:
            self._audio_harmonic, self._audio_percussive = self.compute_HPSS()

        # Compute beats
        tempo, frames = librosa.beat.beat_track(
            y=self._audio_percussive, sr=self.sr,
            hop_length=self.hop_length)

        # To times
        times = librosa.frames_to_time(frames, sr=self.sr,
                                       hop_length=self.hop_length)

        # TODO: Is this really necessary?
        if len(times) > 0 and times[0] == 0:
            times = times[1:]
            frames = frames[1:]

        return times, frames
Example #11
0
def beat_track(input_file, output_csv):
    '''Beat tracking function

    :parameters:
      - input_file : str
          Path to input audio file (wav, mp3, m4a, flac, etc.)

      - output_file : str
          Path to save beat event timestamps as a CSV file
    '''

    print('Loading ', input_file)
    y, sr = librosa.load(input_file, sr=22050)

    # Use a default hop size of 512 samples @ 22KHz ~= 23ms
    hop_length = 512

    # This is the window length used by default in stft
    print('Tracking beats')
    tempo, beats = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_length)

    print('Estimated tempo: {:0.2f} beats per minute'.format(tempo))

    # save output
    # 'beats' will contain the frame numbers of beat events.
    beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=hop_length)

    print('Saving output to ', output_csv)
    librosa.output.times_csv(output_csv, beat_times)
    print('done!')
Example #12
0
 def analyze(self):
     audio_path = self.path
     y, sr = librosa.load(audio_path, sr=None)
     tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
     self.tempo = tempo
     self.beats = list(beats)
     self.times = list(librosa.frames_to_time(beats, sr=sr))
Example #13
0
def process_file(input_file, **kwargs):

    output_file = os.path.basename(input_file)
    output_file = os.path.splitext(output_file)[0]
    output_file = os.path.extsep.join([output_file, "log"])

    if kwargs["median"]:
        output_file = os.path.extsep.join([output_file, "med"])
    else:
        output_file = os.path.extsep.join([output_file, "sum"])

    output_file = os.path.extsep.join([output_file, kwargs["spectrogram"]])
    output_file = os.path.extsep.join([output_file, "csv"])
    output_file = os.path.join(kwargs["destination"], output_file)

    with open(input_file, "r") as f:
        S = pickle.load(f)[SPECMAP[kwargs["spectrogram"]]].astype(np.float32)

    if kwargs["median"]:
        odf = librosa.onset.onset_strength(S=S, sr=SR, hop_length=HOP, n_fft=N_FFT, aggregate=np.median)
    else:
        odf = librosa.onset.onset_strength(S=S, sr=SR, hop_length=HOP, n_fft=N_FFT, aggregate=np.mean)

    tempo, beats = librosa.beat.beat_track(onsets=odf, sr=SR, hop_length=HOP, tightness=TIGHTNESS)

    times = librosa.frames_to_time(beats, sr=SR, hop_length=HOP, n_fft=N_FFT)
    librosa.output.times_csv(output_file, times)
Example #14
0
def compute_beats(y_percussive, sr=22050):
    """Computes the beats using librosa.

    Parameters
    ----------
    y_percussive: np.array
        Percussive part of the audio signal in samples.
    sr: int
        Sample rate.

    Returns
    -------
    beats_idx: np.array
        Indeces in frames of the estimated beats.
    beats_times: np.array
        Time of the estimated beats.
    """
    logging.info("Estimating Beats...")
    tempo, beats_idx = librosa.beat.beat_track(y=y_percussive, sr=sr,
                                               hop_length=msaf.Anal.hop_size)
    times = librosa.frames_to_time(beats_idx, sr=sr,
                                   hop_length=msaf.Anal.hop_size)

    # Remove first beat time if 0
    if times[0] == 0:
        times = times[1:]
        beats_idx = beats_idx[1:]
    return beats_idx, times
def libroRMS(filepath, kRatio):
    y, sr = librosa.load(filepath) # Load the waveform as y, sr is sample rate
    clipLength = librosa.get_duration(y=y, sr=sr)
    kValue = int(clipLength/kRatio +1) #sets up relative ratio of samples

    ### get the RMS of the audio sample ###
    data = librosa.feature.rmse(y=y, hop_length=2048)
    boundaries = librosa.segment.agglomerative(data, k=kValue) # Agglomeration
    boundary_times = librosa.frames_to_time(boundaries, hop_length=2048) # ~.1s
    intervals = np.hstack([boundary_times[:-1, np.newaxis], boundary_times[1:, np.newaxis]])
    get_rms = librosa.feature.sync(data, boundaries, aggregate=np.max)

    nkValue = kValue-1 #because, for some reason, the intervals above leave out the last one
    fixedN = np.delete(get_rms, nkValue, axis=1)
    npsTurn = np.concatenate((intervals, fixedN.T), axis=1)

    #transform from np array to regular list
    flatnps = npsTurn.tolist()
    slice_value = int(kValue//3)
    rmsOut1 = sorted(flatnps, key = lambda x: int(x[2]), reverse=True)
    #rmsOut2 = slice(rmsOut1[0: slice_value])
    rmsOut2 = rmsOut1[0 : slice_value]
    rmsOut3 = sorted(rmsOut2, key = lambda x: int(x[0]))

    return rmsOut3
Example #16
0
def logcqt_onsets(x, fs, pre_max=0, post_max=1, pre_avg=0,
                  post_avg=1, delta=0.05, wait=50, hop_length=1024):
    """
    Parameters
    ----------
    x : np.ndarray
        Audio signal

    fs : scalar
        Samplerate of the audio signal.

    pre_max, post_max, pre_avg, post_avg, delta, wait
        See `librosa.util.peak_pick` for details.

    Returns
    -------
    onsets : np.ndarray, ndim=1
        Times in seconds for splitting.
    """
    lcqt = logcqt(x, fs, hop_length)
    c_n = utils.canny(51, 3.5, 1)
    onset_strength = sig.lfilter(c_n, np.ones(1), lcqt, axis=1).mean(axis=0)

    peak_idx = librosa.onset.onset_detect(
        onset_envelope=onset_strength, delta=delta, wait=wait)
    return librosa.frames_to_time(peak_idx, hop_length=hop_length)
Example #17
0
def extract_cqt(audio_data):
    '''
    CQT routine with default parameters filled in, and some post-processing.

    Parameters
    ----------
    audio_data : np.ndarray
        Audio data to compute CQT of

    Returns
    -------
    cqt : np.ndarray
        CQT of the supplied audio data.
    frame_times : np.ndarray
        Times, in seconds, of each frame in the CQT
    '''
    # Compute CQT
    cqt = librosa.cqt(audio_data, sr=FS, fmin=librosa.midi_to_hz(NOTE_START),
                      n_bins=N_NOTES, hop_length=HOP_LENGTH, tuning=0.)
    # Compute the time of each frame
    times = librosa.frames_to_time(
        np.arange(cqt.shape[1]), sr=FS, hop_length=HOP_LENGTH)
    # Use float32 for the cqt to save space/memory
    cqt = cqt.astype(np.float32)
    return cqt, times
Example #18
0
def hpss_beats(input_file, output_csv):
    '''HPSS beat tracking
    
    :parameters:
      - input_file : str
          Path to input audio file (wav, mp3, m4a, flac, etc.)

      - output_file : str
          Path to save beat event timestamps as a CSV file
    '''

    # Load the file
    print 'Loading  ', input_file
    y, sr = librosa.load(input_file)

    # Do HPSS
    print 'Harmonic-percussive separation ... '
    y = percussive(y)

    # Construct onset envelope from percussive component
    print 'Tracking beats on percussive component'
    onsets = librosa.onset.onset_strength(y=y, sr=sr, hop_length=HOP_LENGTH, n_fft=N_FFT, aggregate=np.median)

    # Track the beats
    tempo, beats = librosa.beat.beat_track( onsets=onsets, 
                                            sr=sr, 
                                            hop_length=HOP_LENGTH)

    beat_times  = librosa.frames_to_time(beats, 
                                         sr=sr, 
                                         hop_length=HOP_LENGTH,
                                         n_fft=N_FFT)
    # Save the output
    print 'Saving beats to ', output_csv
    librosa.output.times_csv(output_csv, beat_times)
    def filter_out(self,nob,song2):
        song2.change_temp(self.tempo)
        song2.cut_song(self.length_of_songs)
        l=scipy.signal.firwin( numtaps=10, cutoff=300, nyq=self.sr/2)
        h=-l
        h[10/2]=h[10/2]+1
        fader_l=self.audio_left[int(self.bars[-nob-1][1]*self.sr):]
        fader_r=self.audio_right[int(self.bars[-nob-1][1]*self.sr):]
        fader=np.arange(float(len(fader_l)))/float(len(fader_l))
        fader_l=scipy.signal.lfilter(l,1.0,fader_l*fader[::-1])
        fader_r=scipy.signal.lfilter(l,1.0,fader_r*fader[::-1])
        haha=scipy.signal.lfilter(h,1.0,(song2.audio_left[int(song2.beat_times[0]*self.sr):int(song2.beat_times[0]*self.sr)+len(self.audio_left[int(self.bars[-nob-1][1]*self.sr):])]*fader))

        self.audio_left[int(self.bars[-nob-1][1]*self.sr):]=fader_l+haha

        self.audio_left=np.concatenate((self.audio_left,song2.audio_left[len(haha):]))

        haha=scipy.signal.lfilter(h,1.0,(song2.audio_right[int(song2.beat_times[0]*self.sr):int(song2.beat_times[0]*self.sr)+len(self.audio_right[int(self.bars[-nob-1][1]*self.sr):])]*fader))

        self.audio_right[int(self.bars[-nob-1][1]*self.sr):]=fader_r+haha

        self.audio_right=np.concatenate((self.audio_right,song2.audio_right[len(haha):]))
        tempo, beats = librosa.beat.beat_track(y=self.audio_left, sr=self.sr)
        self.beat_times=librosa.frames_to_time(beats, sr=self.sr)
        bars=[]
        for i in range(len(self.beat_times)/4-1):
            bars.append([self.beat_times[i*4],self.beat_times[(i+1)*4]])
        self.bars=np.array(bars)
    def fade_out(self,nob,song2):
        song2.change_temp(self.tempo)
        song2.cut_song(self.length_of_songs)
        fader_l=self.audio_left[int(self.bars[-nob-1][1]*self.sr):]
        fader_r=self.audio_right[int(self.bars[-nob-1][1]*self.sr):]
        fader=np.arange(float(len(fader_l)))/float(len(fader_l))
        fader_l=fader_l*fader[::-1]
        fader_r=fader_r*fader[::-1]
        haha=song2.audio_left[int(song2.beat_times[0]*self.sr):int(song2.beat_times[0]*self.sr)+len(self.audio_left[int(self.bars[-nob-1][1]*self.sr):])]*fader

        self.audio_left[int(self.bars[-nob-1][1]*self.sr):]=fader_l+haha

        self.audio_left=np.concatenate((self.audio_left,song2.audio_left[len(haha):]))

        haha=song2.audio_right[int(song2.beat_times[0]*self.sr):int(song2.beat_times[0]*self.sr)+len(self.audio_right[int(self.bars[-nob-1][1]*self.sr):])]*fader

        self.audio_right[int(self.bars[-nob-1][1]*self.sr):]=fader_r+haha

        self.audio_right=np.concatenate((self.audio_right,song2.audio_right[len(haha):]))
        tempo, beats = librosa.beat.beat_track(y=self.audio_left, sr=self.sr)
        self.beat_times=librosa.frames_to_time(beats, sr=self.sr)
        bars=[]
        for i in range(len(self.beat_times)/4-1):
            bars.append([self.beat_times[i*4],self.beat_times[(i+1)*4]])
        self.bars=np.array(bars)
Example #21
0
def beat_track(input_file, output_csv):
    '''Beat tracking function
    
    :parameters:
      - input_file : str
          Path to input audio file (wav, mp3, m4a, flac, etc.)

      - output_file : str
          Path to save beat event timestamps as a CSV file
    '''

    print 'Loading ', input_file
    y, sr         = librosa.load(input_file, sr=22050)

    # Use a default hop size of 64 frames @ 22KHz ~= 11.6ms
    HOP_LENGTH  = 64

    # This is the window length used by default in stft
    N_FFT       = 2048

    print 'Tracking beats'
    tempo, beats    = librosa.beat.beat_track(y=y, sr=sr, hop_length=HOP_LENGTH)

    print 'Estimated tempo: %0.2f beats per minute' % tempo

    # 3. save output
    # 'beats' will contain the frame numbers of beat events.

    beat_times = librosa.frames_to_time(beats, sr=sr, hop_length=HOP_LENGTH, n_fft=N_FFT)

    print 'Saving output to ', output_csv
    librosa.output.times_csv(output_csv, beat_times)
    print 'done!'
def main():
	""" 
	main() - Main function for feature extraction

		Inputs: 
			None

		Outputs:
			Pickle file with feature data
	"""

	vocalData = loadmat('../../Data/firstVerseTimes.mat')
	audioPath = '../../Audio/Vocals/'
	assert isdir(audioPath), "Audio path does not exist"		# Make sure directory of audio exists

	fileList = [ join(audioPath, 'Vocals_' + str(vocalData['firstVerseTimes'][i][3][0])) for i in range(len(vocalData['firstVerseTimes'])) ]
	numFiles = len(fileList)
	vocalFeatures = np.zeros((numFiles, 8))

	for i in range(numFiles):

		print 'Working on file {} of {}'.format(i, numFiles)
		# Read in audio 
		audio, sr = librosa.load(fileList[i], sr=44100)
		S = librosa.stft(audio, n_fft = 1024, hop_length = 512)
		spec = np.abs(S)

		# Extract features
		centroids = centroid(spec, sr)														# Spectral centroid
		contrasts = contrast(spec, sr, 0.05)												# Spectral contrast
		onset_frames    = librosa.onset.onset_detect(y=audio, sr=sr, hop_length=64)			# Calculate frames of onsets
		onset_times     = librosa.frames_to_time(onset_frames, sr, hop_length=64)			# Calculate times of onsets

		# Extract feature statistics
		vocalFeatures[i,0] =  np.mean(np.diff(onset_times))						# Mean of onset durations
		vocalFeatures[i,1] = np.var(np.diff(onset_times))						# Variance of onset durations
		vocalFeatures[i,2], beats = librosa.beat.beat_track(audio, sr )			# Get beats and tempo
		vocalFeatures[i,3] = np.mean(centroids)									# Mean of centroids
		vocalFeatures[i,4] = np.var(centroids)									# Variance of centroids
		vocalFeatures[i,5] = np.mean(contrasts)									# Mean of spectral contrast
		vocalFeatures[i,6] = np.var(contrasts)									# Mean of spectral contrast
		vocalFeatures[i,7] = onset_times.shape[0] / (audio.shape[0] / float(sr))# Onset density

	# Create dictionary for features
	dataDict = {'ID': np.array([vocalData['firstVerseTimes'][i][0][0][0] for i in range(len(vocalData['firstVerseTimes']))]), 
				'onsetMean': vocalFeatures[:,0],
				'onsetVar': vocalFeatures[:,1],
				'tempo': vocalFeatures[:,2],
				'centroidMean': vocalFeatures[:,3],
				'centroidVar': vocalFeatures[:,4],
				'contrastMean': vocalFeatures[:,5],
				'contrastVar': vocalFeatures[:,6],
				'onsetDensity': vocalFeatures[:,7],
				'artist': [vocalData['firstVerseTimes'][i][1][0] for i in range(len(vocalData['firstVerseTimes']))],
				'song': np.array([vocalData['firstVerseTimes'][i][2][0] for i in range(len(vocalData['firstVerseTimes']))])}

	dump(dataDict, open('vocalFeatureData.p', 'w'))

	print ('Done')
Example #23
0
def extract_timing_data(filename, samplerate=22050, channels=1, hop_length=64):
    x_n, fs = marl.audio.read(filename, samplerate, channels)
    onset_env = librosa.onset.onset_strength(
        x_n.squeeze(), fs, hop_length=hop_length, aggregate=np.median)
    tempo, beat_frames = librosa.beat.beat_track(
        onset_envelope=onset_env, sr=fs, hop_length=hop_length)
    beat_times = librosa.frames_to_time(
        beat_frames, sr=fs, hop_length=hop_length)
    onset_frames = librosa.onset.onset_detect(
        onset_envelope=onset_env, sr=fs, hop_length=hop_length)
    onset_times = librosa.frames_to_time(
        onset_frames, sr=fs, hop_length=hop_length)
    duration = len(x_n) / fs
    return dict(onset_times=onset_times.tolist(),
                beat_times=beat_times.tolist(),
                tempo=tempo,
                duration=duration)
Example #24
0
def analyze_frames(y, sr, debug=False):
    A = {}
    
    hop_length = 128

    # First, get the track duration
    A['duration'] = float(len(y)) / sr

    # Then, get the beats
    if debug: print "> beat tracking"
    tempo, beats = librosa.beat.beat_track(y, sr, hop_length=hop_length)

    # Push the last frame as a phantom beat
    A['tempo'] = tempo
    A['beats'] = librosa.frames_to_time(beats, sr, hop_length=hop_length).tolist()

    if debug: print "beats count: ", len(A['beats'])

    if debug: print "> spectrogram"
    S = librosa.feature.melspectrogram(y, sr,   n_fft=2048, 
                                                hop_length=hop_length, 
                                                n_mels=80, 
                                                fmax=8000)
    S = S / S.max()

    # A['spectrogram'] = librosa.logamplitude(librosa.feature.sync(S, beats)**2).T.tolist()

    # Let's make some beat-synchronous mfccs
    if debug: print "> mfcc"
    S = librosa.feature.mfcc(librosa.logamplitude(S), n_mfcc=40)
    A['timbres'] = librosa.feature.sync(S, beats).T.tolist()

    if debug: print "timbres count: ", len(A['timbres'])

    # And some chroma
    if debug: print "> chroma"
    S = N.abs(librosa.stft(y, hop_length=hop_length))

    # Grab the harmonic component
    H = librosa.decompose.hpss(S)[0]
    # H = librosa.hpss.hpss_median(S, win_P=31, win_H=31, p=1.0)[0]
    A['chroma'] = librosa.feature.sync(librosa.feature.chromagram(S=H, sr=sr),
                                        beats,
                                        aggregate=N.median).T.tolist()

    # Relative loudness
    S = S / S.max()
    S = S**2

    if debug: print "> dists"
    dists = structure(N.vstack([N.array(A['timbres']).T, N.array(A['chroma']).T]))
    A['dense_dist'] = dists

    edge_lens = [A["beats"][i] - A["beats"][i - 1]
                 for i in xrange(1, len(A["beats"]))]
    A["avg_beat_duration"] = N.mean(edge_lens)

    return A
Example #25
0
    def __test(units, hop_length, y, sr):

        b1 = librosa.onset.onset_detect(y=y, sr=sr, hop_length=hop_length)
        b2 = librosa.onset.onset_detect(y=y, sr=sr, hop_length=hop_length,
                                        units=units)

        t1 = librosa.frames_to_time(b1, sr=sr, hop_length=hop_length)

        if units == 'time':
            t2 = b2

        elif units == 'samples':
            t2 = librosa.samples_to_time(b2, sr=sr)

        elif units == 'frames':
            t2 = librosa.frames_to_time(b2, sr=sr, hop_length=hop_length)

        assert np.allclose(t1, t2)
Example #26
0
    def __test(units, hop_length, y, sr):

        tempo, b1 = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_length)
        _, b2 = librosa.beat.beat_track(y=y, sr=sr, hop_length=hop_length,
                                        units=units)

        t1 = librosa.frames_to_time(b1, sr=sr, hop_length=hop_length)

        if units == 'time':
            t2 = b2

        elif units == 'samples':
            t2 = librosa.samples_to_time(b2, sr=sr)

        elif units == 'frames':
            t2 = librosa.frames_to_time(b2, sr=sr, hop_length=hop_length)

        assert np.allclose(t1, t2)
    def __test(infile):
        DATA    = load(infile)
        
        (bpm, beats) = librosa.beat.beat_track(y=None, sr=8000, hop_length=32,
                                               onsets=DATA['onsetenv'][0], n_fft=None)

        print beats
        print DATA['beats']
        assert numpy.allclose(librosa.frames_to_time(beats, sr=8000, hop_length=32), DATA['beats'])
        pass
Example #28
0
	def beat_analysis(self):
		"""runs the analysis on the song to determine where the beats are, and adds a beat channel"""
		self.tempo, self.beat_frames = librosa.beat.beat_track(self.waveform,self.sample_rate)
		self.beat_times = librosa.frames_to_time(self.beat_frames, self.sample_rate)
		self.beat_channel=Channel('Beat',False)
		for second in self.beat_times:
			#rounds time to 1/10 of a second
			second = round(second, 1)
			time=datetime.timedelta(0,second)
			#saves beat in channel
			self.beat_channel.update(time, True)
Example #29
0
    def __test(infile):

        DATA = load(infile)

        (bpm, beats) = librosa.beat.beat_track(y=None,
                                               sr=8000,
                                               hop_length=32,
                                               onset_envelope=DATA['onsetenv'][0])

        beat_times = librosa.frames_to_time(beats, sr=8000, hop_length=32)
        assert np.allclose(beat_times, DATA['beats'])
 def direct(self,song2):
     song2.change_temp(self.tempo)
     song2.cut_song(self.length_of_songs)
     self.audio_left=np.concatenate((self.audio_left[:int(self.beat_times[-1]*self.sr)],song2.audio_left[int(song2.beat_times[0]*self.sr):]))
     self.audio_right=np.concatenate((self.audio_right[:int(self.beat_times[-1]*self.sr)],song2.audio_right[int(song2.beat_times[0]*self.sr):]))
     tempo, beats = librosa.beat.beat_track(y=self.audio_left, sr=self.sr)
     self.beat_times=librosa.frames_to_time(beats, sr=self.sr)
     bars=[]
     for i in range(len(self.beat_times)/4-1):
         bars.append([self.beat_times[i*4],self.beat_times[(i+1)*4]])
     self.bars=np.array(bars)