def test_pitchgram_on_single_tone_should_have_peak_at_that_tone(): pitch = 12 + 7 # G5 f = Tuning().pitch_to_freq(pitch) fs = 44100 x = sine(sample_time(0, 1, fs=fs), freq=f) frame_size = 4096 hop_size = 2048 output_frame_size = hop_size signal_frames = SignalFrames(x, frame_size, hop_size, sample_rate=fs, mono_mix=True) bin_range = [-48, 67] x_pitchgram = pitchgram(signal_frames, output_frame_size, magnitudes='power_db', bin_range=bin_range, bin_division=1) max_bin_expected = pitch - bin_range[0] max_bin_actual = x_pitchgram.mean(axis=0).argmax() assert x_pitchgram.shape == (21, 115), x_pitchgram.shape assert max_bin_actual == max_bin_expected
def test_reassigned_pitchgram_values_should_be_in_proper_range(): frame_size = 4096 hop_size = frame_size output_frame_size = 1024 audio_file = os.path.join(DATA_DIR, 'she_brings_to_me.wav') signal_frames = SignalFrames(audio_file, frame_size, hop_size, mono_mix=True) X_r = pitchgram(signal_frames, output_frame_size, magnitudes='power_db') assert np.all(X_r >= -120), 'min value: %f should be >= -120' % X_r.min() assert np.all(X_r <= 0), 'max value: %f should be <= 0' % X_r.max()
# load data song = "The_Beatles/03_-_A_Hard_Day's_Night/05_-_And_I_Love_Her" audio_file = data_dir + '/beatles/audio-cd/' + song + '.wav' block_size = 4096 hop_size = 2048 print('loading audio:', audio_file) signal_frames = tfr.SignalFrames(audio_file, frame_size=block_size, hop_size=hop_size) print('computing chromagram') # normalized to [0.; 1.] X_orig = tfr.pitchgram(signal_frames) ### Features print(X_orig.shape) frame_count, feature_count = X_orig.shape target_count = 12 # we'll cut the datasets into small sequences of frames max_seq_size = 100 def pad_sequences(a, max_seq_size): """ Cuts the list of frames into fixed-length sequences.
# load data song = "05_-_And_I_Love_Her" audio_file = data_dir + '/beatles/' + song + '.wav' block_size = 4096 hop_size = 2048 print('loading audio:', audio_file) signal_frames = tfr.SignalFrames(audio_file, frame_size=block_size, hop_size=hop_size) print('computing chromagram') # normalized to [0.; 1.] X_orig = tfr.pitchgram(signal_frames, magnitudes='power_db_normalized') ### Features print(X_orig.shape) frame_count, feature_count = X_orig.shape target_count = 12 # we'll cut the datasets into small sequences of frames max_seq_size = 100 def pad_sequences(a, max_seq_size): """ Cuts the list of frames into fixed-length sequences.