Example #1
0
 def __test(y, sr, S, n_fft, hop_length, fmin, fmax, threshold):
     pitches, mags = librosa.piptrack(y=y,
                                      sr=sr,
                                      S=S,
                                      n_fft=n_fft,
                                      hop_length=hop_length,
                                      fmin=fmin,
                                      fmax=fmax,
                                      threshold=threshold)
Example #2
0
def get_librosa_pitch(signal, fs, window):
    pitches, magnitudes = librosa.piptrack(y=signal.astype('float'), sr=fs, n_fft=int(window),
                                           hop_length=int(window/10))
    pitch_pos = np.argmax(magnitudes, axis=0)
    pitches_final = []
    for i in range(len(pitch_pos)):
        pitches_final.append(pitches[pitch_pos[i], i])
    pitches_final = np.array(pitches_final)
    pitches_final[pitches_final > 2000] = 0  # cut high pitches
    return mf(pitches_final, 3)              # use medfilt for smoothing
Example #3
0
def get_pitch(filename= read_and_write_audio()):
    x, sr_ = librosa.load(filename)
    pitches, mags = librosa.piptrack(y=x,sr=sr_)
    index = mags[:,128].argmax()
    pitch = pitches[index, 128]
    note_name = librosa.hz_to_note(pitch)
    
    if len(note_name) > 2:
        return note_name[0:2]
    return note_name[0]
Example #4
0
def get_pitch(filename):
    x, sr_ = librosa.load(filename)
    pitches, mags = librosa.piptrack(y=x, sr=sr_)
    index = mags[:, 128].argmax()
    pitch = pitches[index, 128]
    note_name = librosa.hz_to_note(pitch)

    if len(note_name) > 2:
        return f'{note_name[0]}#'
    return note_name[0]
Example #5
0
    def __test(S, freq):
        pitches, mags = librosa.piptrack(S=S, fmin=100)

        idx = (mags > 0)

        assert len(idx) > 0

        recovered_pitches = pitches[idx]

        # We should be within one cent of the target
        assert np.all(np.abs(np.log2(recovered_pitches) - np.log2(freq)) <= 1e-2)
def baseline_tracking(audio_file, result_file=None):
    start = time.clock()
    y, sr = librosa.load(audio_file, sr=database_sr)
    print('Audio file loaded: ' + audio_file)
    print('{:f}s for loading the audio file.'.format(time.clock() - start))

    start = time.clock()
    pitches, magnitudes = librosa.piptrack(y=y, sr=database_sr)
    mag_thresh = 2 * np.mean(magnitudes) / 3
    d_range, time_range = pitches.shape
    pitches_max, magnitudes_max = \
     extract_pitch_max(pitches, magnitudes, time_range)
    ret_pitch = []
    if result_file:
        file = open(result_file, 'w+')
    # print out tracked notes over time
    for t in range(time_range):
        # filter out frequencies with zero value
        pitch_t = pitches_max[t]
        # filter out notes that are too weak
        mag_t = magnitudes_max[t]
        if debug:
            print("pitch_idx shape:")
            print(pitch_idx.shape)
            print("mag_idx shape:")
            print(mag_idx.shape)
            print("idx shape:")
            print(merged_idx.shape)

        # only print at a time t if there're notes present
#		if pitch_t != 0 and mag_t > mag_thresh:
        if True:
            ret_pitch.append(pitch_t)
            if result_file:
                file.write(str(t) + ',' + str(pitch_t) + '\n')
            elif debug:
                print(t)
                print(pitch_t)
                print()
    if result_file:
        print("Saved in " + result_file)
        file.close()

    print("Stat:")
    print('{:f}s for piptrack'.format(time.clock() - start))
    print("len(pitches): {:d}".format(len(pitches)))
    print("pitch shape:")
    print(pitches.shape)
    print("magnitudes shape:")
    print(magnitudes.shape)
    print("sampling rates:")
    print(sr)

    return pitches, ret_pitch
Example #7
0
    def __test(S, freq):
        pitches, mags = librosa.piptrack(S=S, fmin=100)

        idx = (mags > 0)

        assert len(idx) > 0

        recovered_pitches = pitches[idx]

        # We should be within one cent of the target
        assert np.all(np.abs(np.log2(recovered_pitches) - np.log2(freq)) <= 1e-2)
Example #8
0
def onlyPitch(X, sample_rate):
    stft = np.abs(librosa.stft(X))
    pitches, magnitudes = librosa.piptrack(X,
                                           sr=sample_rate,
                                           S=stft,
                                           fmin=70,
                                           fmax=400)
    pitch = []
    for i in range(magnitudes.shape[1]):
        index = magnitudes[:, i].argmax()
        pitch.append(pitches[index, i])
    return np.asarray(pitch)
Example #9
0
def get_pitches_by_beats(y, starts, ends):
    pitches = []
    for b in range(len(starts)):
        seg1 = y[np.int(starts[b] * global_sr):np.int(ends[b] * global_sr)]
        pitches1, magnitudes1 = librosa.piptrack(y=seg1,
                                                 sr=global_sr,
                                                 fmin=20,
                                                 fmax=8000,
                                                 hop_length=2048)
        max_p = np.median((extract_max(pitches1)))
        pitches.append(max_p)
    return np.array(pitches)
Example #10
0
def load_music(filepath):
    print("=========loac music %s=========" % (filepath))

    # 节拍点的音高
    beat_pitches = []
    # 节拍点的八音12度的强度
    beat_chroma = []
    # 节拍点的八音12度最强度的索引
    beat_chroma_max_index = []

    y, sr = librosa.load(filepath, sr=None)
    S = np.abs(librosa.stft(y))

    # 起点强度(音符按键起始点)
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    # 节拍点(帧索引)
    tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr)
    # 节拍点 (时间点,单位秒)
    beat_times = librosa.frames_to_time(beats, sr=sr)

    # 频率、振幅强度,横坐标是采样点,纵坐标是帧
    pitches, magnitudes = librosa.piptrack(S=S, sr=sr)
    # 八音12度的强度
    chroma_stft = librosa.feature.chroma_stft(S=S, sr=sr)

    for beat in beats:
        index = magnitudes[:, beat].argmax()
        pitch = pitches[index, beat]
        beat_pitches.append(float(pitch))

        chromas = []
        max_chroma = 0
        max_chroma_index = 0
        chr_st_index = 0
        for chr_st in chroma_stft:
            chromas.append(chr_st[beat])

            if chr_st[beat] > max_chroma:
                max_chroma = chr_st[beat]
                max_chroma_index = chr_st_index
            chr_st_index += 1

        beat_chroma.append(chromas)
        beat_chroma_max_index.append(max_chroma_index)

    return {
        "beat_times": beat_times.tolist(),
        "beat_pitches": beat_pitches,
        "beat_chroma_max_index": beat_chroma_max_index
    }
Example #11
0
def pitch_wave(path):
    """
    sr, audio = wavfile.read(path)
    time, frequency, confidence, activation = crepe.predict(audio, sr, viterbi=True)
    print(time, frequency, confidence, activation)
    return(time, frequency, confidence, activation)
    """
    print("-*-*-*-*-pitch : wave.ver-*-*-*-*-")

    y, sr = librosa.load(path)
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    plt.title("pitches")
    plt.plot(pitches)
    plt.show()
def bank(indata):
    # segment cycle into quarter-notes.
    #   llen / 4 sized segments.
    # try to find the pitch of each quarter note.
    # save the audio to bank - marked with its pitch

    l, _ = indata.shape
    #q_size = int(l/4)
    q_size = int(l / 8)
    q = np.zeros((q_size, 2), dtype='float32')
    count = 0

    for x in range(1, 8):
        q[:] = indata[count:count + q_size]
        pitches, magnetude = librosa.piptrack(y=q[:, 1],
                                              sr=SAMPLE_RATE,
                                              fmin=250.0,
                                              fmax=1050.0)
        _, ts = magnetude.shape
        print(ts)
        print(pitches.shape)
        index = max(magnetude[:, i].argmax() for i in range(ts))
        prev = magnetude[:, 0].argmax()
        best_ts = 0
        for i in range(ts):
            if magnetude[:, i].argmax() > prev:
                best_ts = i

            prev = magnetude[:, i].argmax()

        pitch = pitches[index, best_ts]
        if pitch != 0.0:
            note_info = librosa.hz_to_note(pitch, cents=True)

            # if your pretty close - >< 40cent its added to the notebank

            print(note_info)
            cents = int(note_info[0].replace('+', '-').split('-', 1)[-1])
            print("Cents: ", cents)
            if cents < 25:
                note = librosa.hz_to_note(pitch, octave=False)
                print("Bank: ", note)
                keyId = note_names.index(note[0])
                #if keyId in noteBank:
                #noteBank[keyId].append(q)
                #else:
                #noteBank[keyId] = list(q)
                noteBank[keyId] = q

        count += q_size
Example #13
0
def cal_pitch(audio_path='/data/pytong/wav/itg_0603/EE1811306983070-interview-iOS-2020-05-29-09-06-23.wav',
              sample_rate=16000, hop=160):
  signal, rate = librosa.load(audio_path, sr=sample_rate, mono=True)
  pitches, magnitudes = librosa.piptrack(y=signal, sr=rate,
                                         hop_length=hop)
  pitch_result = []
  pitches_reverted = list(zip(*pitches))
  magnitudes_reverted = list(zip(*magnitudes))
  for cur_pitch_list, cur_magnitude_list in zip(pitches_reverted, magnitudes_reverted):
    index = cur_magnitude_list.index(max(cur_magnitude_list))
    pitch = cur_pitch_list[index]
    pitch_result.append(pitch)
  print(len(pitch_result), pitch_result[0:2])
  return pitch_result
Example #14
0
 def extract_features(self, filepath, filename, offset):
     full_path = filepath + '/' + filename
     if not os.path.isfile(full_path):
         raise ExtractionError
     samples, sample_rate = librosa.load(full_path,
                                         offset=offset,
                                         sr=None,
                                         duration=1)
     tempogram = librosa.feature.tempogram(y=samples, sr=sample_rate)
     mfcc = librosa.feature.mfcc(y=samples, sr=sample_rate)
     pitches, magnitudes = librosa.piptrack(y=samples, sr=sample_rate)
     return np.concatenate([
         tempogram.flatten()[:MAX_TEMPOGRAM_WEIGHT],
         mfcc.flatten()[:MAX_MFCC_WEIGHT],
         magnitudes.flatten()[:MAX_PITCH_WEIGHT]
     ])
Example #15
0
def findpitch(file):
    y, sr = librosa.load(file)
    stft = np.abs(librosa.stft(y))
    pitches, magnitudes = librosa.piptrack(y, sr=sr, S=stft, fmin=70, fmax=400)
    pitch = []
    for i in range(magnitudes.shape[1]):
        index = magnitudes[:, 1].argmax()
        pitch.append(pitches[index, i])

    # pitch_tuning_offset = librosa.pitch_tuning(pitches)
    pitch = [p for p in pitch if p > 0]
    # pitchmean = np.mean(pitch)
    #     pitchstd = np.std(pitch)
    #     pitchmax = np.max(pitch)
    pitchmin = np.min(pitch)
    return pitchmin
Example #16
0
def find_songKey(indata):
    # find key of recent layer
    global song_key
    
    pitches, magnetude = librosa.piptrack(y=indata[:,1], sr=SAMPLE_RATE, fmin=250.0,fmax=1050.0)
    time_slice = 1
    index = magnetude[:, time_slice].argmax()
    pitch = pitches[index, time_slice]
    while pitch == 0.0:
        time_slice = (time_slice + 10)%len(pitches)
        index = magnetude[:, time_slice].argmax()
        pitch = pitches[index, time_slice]

    key = librosa.hz_to_note(pitch, octave=False)
    print("findKey: ",key)
    keyId = note_names.index(key[0])
    song_key = keyId
Example #17
0
def extract_features(filename):
    # 파일 로드 부분
    y, sr = librosa.load(filename)
    # mel spectrogram을 fmax = 8000으로 계산 후 차원 고려하여 transpose
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
    S = np.transpose(S)
    # piptrack 알고리즘
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    pitches = np.transpose(pitches)
    magnitudes = np.transpose(magnitudes)
    # 가장 유효한 값들을 나열한 일차원 sequence로 변환
    pit_seq = pip_to_pitch(pitches)
    mag_seq = pip_to_pitch(magnitudes)
    # 발화 전체 중 silence의 비율 (delay_p)
    delay_p, mag_index = find_delay(mag_seq, np.std(mag_seq))
    # 유효 frame에 대해 pitch 의 mean 및 stdev
    mean, error = effective_analysis(pit_seq, mag_index)
    return delay_p, mean, error
Example #18
0
def f0_estimate(f, S, fs, fmin=50, fmax=500, threshold=0.2):
    '''
    :param f: stft的频率值
    :param S: stft频谱
    :param fs: 采样率
    :param fmin:
    :param fmax:
    :param threshold:
    :return: f0 shape=(,t)
    '''
    y = np.abs(S)
    pitches, mag = lib.piptrack(S=y,
                                sr=fs,
                                fmin=fmin,
                                fmax=fmax,
                                threshold=threshold)
    f0 = f[np.argmax(mag, axis=0)]
    return f0
Example #19
0
def aaa():
    p = pyaudio.PyAudio()

    stream = p.open(format=FORMAT,
                    channels=CHANNELS,
                    rate=RATE,
                    input=True,
                    frames_per_buffer=CHUNK)

    print("* recording")

    frames = []

    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("* done recording")

    stream.stop_stream()
    stream.close()
    p.terminate()

    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()





    y , sr =librosa.load(r'C:\Users\PIYUSH\Desktop\output.wav',sr=32050)
#import librosa
    s = np.abs(librosa.stft(y))
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    s_mean=(np.mean(s))
    s_pitch=(np.mean(magnitudes))
    result=(s_mean+s_pitch)/2
    return result
Example #20
0
def get_audio_analysis(song_url):
    if(song_url is None):
        return None, None, None, None
    response = urlopen(song_url)
    urllib.request.urlretrieve(song_url, "current.mp3")
    y, sr = librosa.load("./current.mp3")

    tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
    #Tempo = beats/minute


    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    #pitch = Frequency
    pitch_ave = np.average(pitches)

    harm = np.sum(librosa.effects.harmonic(y))
    perc = np.sum(librosa.effects.percussive(y))


    return tempo, pitch_ave, harm, perc
Example #21
0
def get_freq_mag(y,sr,window_size=None, window_shift=None):
    '''
    valores por defecto:
    - ventanas de 25ms 
    - desplazamientos de la ventana de 10ms
    '''
    if window_size is None:
        n_fft = int(0.025*sr)
    else:
        n_fft = int(window_size*0.001*sr)
    if window_shift is None:
        hop_length = int(0.010*sr)
    else:
        hop_length = int(window_shift*0.001*sr)
    #Recoger las frecuencias presentes y sus magnitudes
    frequencies,magnitudes = librosa.piptrack(y,sr,hop_length=hop_length,n_fft=n_fft)
    frequencies = np.transpose(frequencies)
    magnitudes = np.transpose(magnitudes)
    
    return frequencies, magnitudes
def long_term_info(y, sr):
    interval = 0.01
    window = 0.256
    start_stft = time.time()
    stft = librosa.stft(y,
                        hop_length=int(interval * sr),
                        n_fft=int(window * sr))
    end_stft = time.time()
    stft = np.transpose(stft)
    power = np.abs(stft)**2
    start_pitch = time.time()
    pitch, mag = librosa.piptrack(y=y,
                                  sr=sr,
                                  hop_length=int(interval * sr),
                                  n_fft=int(window * sr))
    end_pitch = time.time()
    duration_stft = end_stft - start_stft
    duration_pitch = end_pitch - start_pitch
    pitch = np.transpose(pitch)
    return stft, power, pitch
def get_freq_mag(y,sr,window_size=None, window_shift=None):
    '''
    default values:
    - windows of 25ms 
    - window shifts of 10ms
    '''
    if window_size is None:
        n_fft = int(0.025*sr)
    else:
        n_fft = int(window_size*0.001*sr)
    if window_shift is None:
        hop_length = int(0.010*sr)
    else:
        hop_length = int(window_shift*0.001*sr)
    #collect frequencies present and their magnitudes
    frequencies,magnitudes = librosa.piptrack(y,sr,hop_length=hop_length,n_fft=n_fft)
    frequencies = np.transpose(frequencies)
    magnitudes = np.transpose(magnitudes)
    
    return frequencies, magnitudes
Example #24
0
def getVar(path):

    # pitches, magnitudes = lib.piptrack(y=wf, sr=sr, fmin=50, fmax=500)

    # pitches = pitches[np.nonzero(pitches)]

    # # pitches = np.asarray(pitches)

    # # duration = np.array(list(range(0, duration)))

    # plt.subplot(211)
    # plt.plot(wf)
    # plt.title('wave')
    # plt.subplot(212)
    # plt.imshow(pitches, aspect="auto", interpolation="nearest", origin="bottom")
    # plt.plot(pitches)

    # plt.title("pitches")
    # plt.show()

    y, sr = librosa.load(path)
    # D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max, top_db=10)
    duration = float(lib.get_duration(y=y, sr=sr))
    drf = lib.time_to_frames(duration, sr=sr)

    pits, mags = lib.piptrack(y=y, sr=sr, fmin=50, fmax=500)
    print("pits type === ", type(pits), end='\n')
    print("megs len === ", len(mags[0]), end='\n')
    print(np.shape(mags))
    plt.subplot(211)
    librosa.display.specshow(
        pits, x_axis='time', y_axis='log, frames', fmax=500)
    plt.title('pits')
    plt.subplot(212)
    librosa.display.specshow(mags, x_axis='time', y_axis='log')
    plt.title('mags')
    # plt.plot(duration)
    # plt.title("check duration")
    plt.show()
def findKey_arbitrary(indata):
    # find key of recent layer
    # find the notes for the appropriate chord in the note bank.

    pitches, magnetude = librosa.piptrack(y=indata[:, 1],
                                          sr=SAMPLE_RATE,
                                          fmin=250.0,
                                          fmax=1050.0)
    time_slice = 1
    index = magnetude[:, time_slice].argmax()
    pitch = pitches[index, time_slice]
    while pitch == 0.0:
        time_slice = (time_slice + 10) % len(pitches)
        index = magnetude[:, time_slice].argmax()
        pitch = pitches[index, time_slice]

    key = librosa.hz_to_note(pitch, octave=False)
    print("findKey: ", key)
    keyId = note_names.index(key[0])
    seq = [keyId % 12, (keyId + 4) % 12,
           (keyId + 7) % 12]  # Major chord always
    construct_chord(seq)
Example #26
0
def calc_pitches(audio_file):
    with tempfile.TemporaryDirectory() as tmpdirname:
        #conversion to wav 44100 s16 mono
        in_path = tmpdirname + '/in.mp3'
        out_path = tmpdirname + '/out.wav'
        in_file = open(in_path, 'wb')
        in_file.write(audio_file.read())
        audio_file.seek(0)
        subprocess.call([
            'ffmpeg', '-i', in_path, '-acodec', 'pcm_s16le', '-ac', '1', '-ar',
            '44100', out_path
        ])

        #pitch detection
        y, sr = librosa.load(out_path, sr=44100)
        duration = librosa.get_duration(y=y, sr=sr)
        pitches, magnitudes = librosa.piptrack(y=y,
                                               sr=sr,
                                               fmin=min_freq,
                                               fmax=max_freq,
                                               hop_length=int(duration * sr /
                                                              10))
        #for i in range(len(pitches)):
        # print(pitches[i])
        # print(magnitudes[i])
        # print('\n')

        #get unique freq bins
        max_mag = np.amax(magnitudes, axis=1)
        freqs = list(
            set([
                min_freq + freq_bin_width * round(
                    (f[1] - min_freq) / freq_bin_width)
                for f in list(enumerate(np.amax(pitches, axis=1)))
                if f[1] >= min_freq and max_mag[f[0]] > 7
            ]))
        return freqs
    return []
 def run(self):
     logging.info("Starting Pitch detector")
     # This loop condition have to be checked frequently, so the code inside may not be blocking
     while not self.terminated:
         new_frame = self.audio_frames.get()  # Get new frame (blocking)
         if self.counter == 0:
             self.frames = new_frame
             self.counter += 1
         elif self.counter >= BUFFER_SIZE:
             self.frames = np.append(self.frames, new_frame)
             pitches, magnitudes = librosa.piptrack(self.frames,
                                                    SAMPLE_RATE)
             # Select out pitches with high energy
             pitches = pitches[magnitudes > np.median(magnitudes)]
             new_tuning = int(50 + 100 * librosa.pitch_tuning(pitches))
             if np.abs(self.last_pitch -
                       new_tuning) > PITCH_CHANGE_THRESHOLD:
                 self.last_pitch = new_tuning
                 self.manager.new_tuning(new_tuning)
             self.counter = 0
         else:
             self.frames = np.append(self.frames, new_frame)
             self.counter += 1
Example #28
0
def audioAnalyze(y, sr):
    # librosa.piptrack to extract pitch
    pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
    # the first ~10 columns are often meaningless
    pitches_candidate = []
    for time in range(10, len(pitches[0])):
        pitches_candidate.append(detect_pitch(pitches, time))
    # onset_detection: assumption: chord usually happen just after onset
    onset_frames = librosa.onset.onset_detect(y, sr)
    chord_note_candidate = []
    for x in onset_frames:
        if x <= len(pitches_candidate):
            # so that the frequencies can be conveniently rounded to exact frequencies for excluding the harmonics
            chord_note_candidate.append(
                librosa.note_to_hz(librosa.hz_to_note(pitches_candidate[x])))
    chord_note_no_harmonics = []

    for x in chord_note_candidate:
        # x is a list of list (...), so x[0] corresponding to freq in an onset
        temp = []
        for i in range(0, len(x[0])):
            if i == 0:
                temp.append(x[0][i])
            for j in range(0, i):
                if j == i - 1:
                    temp.append(x[0][i])
                    break
                elif checkHarmonics(x[0][j], x[0][i]):
                    break
        chord_note_no_harmonics.append(temp)

    # https://stackoverflow.com/questions/3724551/python-uniqueness-for-list-of-lists
    chord_note_no_harmonics = [
        list(x) for x in set(tuple(x) for x in chord_note_no_harmonics)
    ]

    return chord_note_no_harmonics
Example #29
0
 def __test(y, sr, S, n_fft, hop_length, fmin, fmax, threshold):
     pitches, mags = librosa.piptrack(
         y=y, sr=sr, S=S, n_fft=n_fft, hop_length=hop_length, fmin=fmin,
         fmax=fmax, threshold=threshold)
Example #30
0
import librosa
import matplotlib.pyplot as plt

filename = 'temp.wav'
y ,sr = librosa.load(filename)
print(sr)
pitches, magnitudes= librosa.piptrack(y=y, sr=sr)
mfccs = librosa.feature.mfcc(y=y, sr=sr)
print('mfcc calc done')

#visualize mfccs

librosa.display.specshow(mfccs, x_axis='time')
#plt.imshow(mfccs)
plt.colorbar()
plt.title('MFCC')
plt.tight_layout()
plt.show()
Example #31
0
import librosa
import librosa.display

print("Now Loading file...")
s_n = "WAV_CUT/BornToBeNN_song3.wav"
y, sr = librosa.load(s_n)

print("Now Dividing file...")
y_h, y_p = librosa.effects.hpss(y)
print("Now Pitching file...")
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)

s_h = "WAV_CUT/BornToBeNN_song3_har.wav"
s_p = "WAV_CUT/BornToBeNN_song3_per.wav"

print("Now Saving file...")
librosa.output.write_wav(s_h, y_h, sr)
librosa.output.write_wav(s_p, y_p, sr)

print(len(pitches))

for j in range(0, len(pitches[0]) - 1):
    for i in range(0, len(pitches) - 1):
        if magnitudes[i, j] != 0:
            print("At " + str(0.3 * j) + " ~ " + str(0.3 * (j + 1)) +
                  " Seconds.")
            print("Hz : " + str(pitches[i, j]) + "\tdB : " +
                  str(magnitudes[i, j]))
#pitches[f, t]      magnitudes[f, t]
#f : Frequency num  Frequency large
#t : at time t      at time t
def features(X, sample_rate):

    stft = np.abs(librosa.stft(X))

    # fmin 和 fmax 对应于人类语音的最小最大基本频率
    pitches, magnitudes = librosa.piptrack(X, sr=sample_rate, S=stft, fmin=70, fmax=400)
    pitch = []
    for i in range(magnitudes.shape[1]):
        index = magnitudes[:, 1].argmax()
        pitch.append(pitches[index, i])

    pitch_tuning_offset = librosa.pitch_tuning(pitches)
    pitchmean = np.mean(pitch)
    pitchstd = np.std(pitch)
    pitchmax = np.max(pitch)
    pitchmin = np.min(pitch)

    # 频谱质心
    cent = librosa.feature.spectral_centroid(y=X, sr=sample_rate)
    cent = cent / np.sum(cent)
    meancent = np.mean(cent)
    stdcent = np.std(cent)
    maxcent = np.max(cent)

    # 谱平面
    flatness = np.mean(librosa.feature.spectral_flatness(y=X))

    # 使用系数为50的MFCC特征
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=50).T, axis=0)
    mfccsstd = np.std(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=50).T, axis=0)
    mfccmax = np.max(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=50).T, axis=0)

    # 色谱图
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)

    # 梅尔频率
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)

    # ottava对比
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)

    # 过零率
    zerocr = np.mean(librosa.feature.zero_crossing_rate(X))

    S, phase = librosa.magphase(stft)
    meanMagnitude = np.mean(S)
    stdMagnitude = np.std(S)
    maxMagnitude = np.max(S)

    # 均方根能量
    rmse = librosa.feature.rmse(S=S)[0]
    meanrms = np.mean(rmse)
    stdrms = np.std(rmse)
    maxrms = np.max(rmse)

    ext_features = np.array([
        flatness, zerocr, meanMagnitude, maxMagnitude, meancent, stdcent,
        maxcent, stdMagnitude, pitchmean, pitchmax, pitchstd,
        pitch_tuning_offset, meanrms, maxrms, stdrms
    ])

    ext_features = np.concatenate((ext_features, mfccs, mfccsstd, mfccmax, chroma, mel, contrast))

    return ext_features
Example #33
0
                                                   win_length=win_length)
            times = librosa.frames_to_time(beats, sr=sr)
            frames = librosa.time_to_frames(
                times,
                sr=sr,
                hop_length=hop_length,
                n_fft=n_fft,
                win_length=win_length,
            )
            # file=open((os.path.join(args.outdir, name))+'_beats.txt', "w+")
            # for beat in beats:
            #    file.write(str(beat)+' ')
            # file.close()
            np.save((os.path.join(args.outdir, name)) + "_beats",
                    np.array(beats))
            """extract pitches"""
            pitches, magnitudes = librosa.piptrack(
                y=y,
                sr=sr,
                n_fft=n_fft,
                hop_length=hop_length,
                win_length=win_length,
            )
            pitches = pitches.T
            # file=open((os.path.join(args.outdir, name))+'_pitches.txt',"w+")
            pitch = np.zeros((pitches.shape[0]))
            for i in range(pitches.shape[0]):
                pitch[i] = max(pitches[i])
            # file.close()
            np.save((os.path.join(args.outdir, name)) + "_pitch", pitch)
def get_pitch(wavefile):
    y, sr = librosa.load(wavefile)
    if len(y) % 2 != 0:
        y = y[:-1]
    pitches, mag = librosa.piptrack(y=y, sr=sr)
    return pitches, mag