Beispiel #1
0
def test_pitchgram_on_single_tone_should_have_peak_at_that_tone():
    pitch = 12 + 7  # G5
    f = Tuning().pitch_to_freq(pitch)
    fs = 44100
    x = sine(sample_time(0, 1, fs=fs), freq=f)
    frame_size = 4096
    hop_size = 2048
    output_frame_size = hop_size
    signal_frames = SignalFrames(x,
                                 frame_size,
                                 hop_size,
                                 sample_rate=fs,
                                 mono_mix=True)
    bin_range = [-48, 67]
    x_pitchgram = pitchgram(signal_frames,
                            output_frame_size,
                            magnitudes='power_db',
                            bin_range=bin_range,
                            bin_division=1)

    max_bin_expected = pitch - bin_range[0]
    max_bin_actual = x_pitchgram.mean(axis=0).argmax()

    assert x_pitchgram.shape == (21, 115), x_pitchgram.shape
    assert max_bin_actual == max_bin_expected
Beispiel #2
0
def test_reassigned_pitchgram_values_should_be_in_proper_range():
    frame_size = 4096
    hop_size = frame_size
    output_frame_size = 1024
    audio_file = os.path.join(DATA_DIR, 'she_brings_to_me.wav')
    signal_frames = SignalFrames(audio_file, frame_size, hop_size, mono_mix=True)
    X_r = pitchgram(signal_frames, output_frame_size, magnitudes='power_db')
    assert np.all(X_r >= -120), 'min value: %f should be >= -120' % X_r.min()
    assert np.all(X_r <= 0), 'max value: %f should be <= 0' % X_r.max()
Beispiel #3
0
# load data

song = "The_Beatles/03_-_A_Hard_Day's_Night/05_-_And_I_Love_Her"
audio_file = data_dir + '/beatles/audio-cd/' + song + '.wav'

block_size = 4096
hop_size = 2048

print('loading audio:', audio_file)
signal_frames = tfr.SignalFrames(audio_file,
                                 frame_size=block_size,
                                 hop_size=hop_size)
print('computing chromagram')
# normalized to [0.; 1.]
X_orig = tfr.pitchgram(signal_frames)

### Features

print(X_orig.shape)

frame_count, feature_count = X_orig.shape
target_count = 12

# we'll cut the datasets into small sequences of frames
max_seq_size = 100


def pad_sequences(a, max_seq_size):
    """
    Cuts the list of frames into fixed-length sequences.
Beispiel #4
0
# load data

song = "05_-_And_I_Love_Her"
audio_file = data_dir + '/beatles/' + song + '.wav'

block_size = 4096
hop_size = 2048

print('loading audio:', audio_file)
signal_frames = tfr.SignalFrames(audio_file,
                                 frame_size=block_size,
                                 hop_size=hop_size)
print('computing chromagram')
# normalized to [0.; 1.]
X_orig = tfr.pitchgram(signal_frames, magnitudes='power_db_normalized')

### Features

print(X_orig.shape)

frame_count, feature_count = X_orig.shape
target_count = 12

# we'll cut the datasets into small sequences of frames
max_seq_size = 100


def pad_sequences(a, max_seq_size):
    """
    Cuts the list of frames into fixed-length sequences.