-
Notifications
You must be signed in to change notification settings - Fork 1
/
sampler_stft_overlap.py
348 lines (290 loc) · 11.5 KB
/
sampler_stft_overlap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
#UPDATED FREQUENCIES
import numpy
import sys
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import waveIO
sr = 44100.
bpm = 148
dt=0.5
threshold = 25
#threshold = 1067
WINDOW_LENGTH = 4500
OVERLAP_PERCENT = 0.5 # 0 <= OVERLAP PERCENT < 1
# This dict holds the "ideal" values for each note in the Fourth octave.
# A wave will be said to correspond to a given note if its frequency is within 1%
# of the ideal values in this table, multiplied appropriately to give the frequency for the correct octave
note_freqs = {
"A":440.,
"A#":466.16,
"B":493.88,
"C":261.63,
"C#":277.18,
"D":293.66,
"D#":311.13,
"E":329.63,
"F":349.23,
"F#":369.99,
"G":391.99,
"G#":415.31
}
#This will hold all our chopped up notes
# Each note consists of a list containing 5 sublists
# Each of those 5 sublists corresponds to an octave 1-5
# Octave 1 is the first element of the list, Octave 6 the sixth
# For example, notes_db["C"][2] will be a list of all wave chunks whose dominant frequency
# corresponds to a C3 note, that is, a C in the 3rd octave
notes_db = {
"A": [[],[],[],[],[],[]],
"A#": [[],[],[],[],[],[]],
"B": [[],[],[],[],[],[]],
"C": [[],[],[],[],[],[]],
"C#": [[],[],[],[],[],[]],
"D": [[],[],[],[],[],[]],
"D#": [[],[],[],[],[],[]],
"E": [[],[],[],[],[],[]],
"F": [[],[],[],[],[],[]],
"F#": [[],[],[],[],[],[]],
"G": [[],[],[],[],[],[]],
"G#":[[],[],[],[],[],[]]
}
all_notes=[]
# Use a given tempo to determine the correct length of time for each chunk of wave
def compute_dt(bpm):
#assuming dt is for an eighth note
bps = bpm/60.
spb = 1./bps
dt = spb/2.
return dt
# Split a long wave into chunks of note_size size
# note size is given in terms of list elements, so it must be computed beforehand
def split_wave(wave, note_size):
chunks=[]
for i in range(0, len(wave), note_size):
chunks.append(wave[i:i+note_size])
return chunks
#places a wave chunk in the correct bin
def store_note(chunk):
global threshold
#threshold=50
#when beginning to store a new sample, reset the threshold to its initial value
octave_multiplier = 1
w = numpy.fft.rfft(chunk) / (len(chunk))
freqs = numpy.fft.fftfreq(len(w))
idx = numpy.argmax(numpy.abs(w))
frequency = freqs[idx] * (sr/2)
if numpy.abs(w[idx]) > threshold and 27.5 <= frequency <= 4187:
# store the note in the list of all stored chunk, and get its index within that list
all_notes.append(chunk)
note_index = len(all_notes) - 1
#set the threshold to a value relative to the maximum amplitude in this sample
#threshold = max(0.2 * numpy.abs(w[idx]), 50)
print("Max amplitude is {!s} at frequency {!s}".format(numpy.abs(w[idx]), frequency))
#If the loudest frequency is greater than the threshold, store this chunk's index in the appropriate note bin
# repeat for the next loudest frequency and so on until the frequencies are no longer louder than the threshold
while numpy.abs(w[idx]) > threshold and 27.5 <= frequency <= 4187:
#determine what octave then note is in
# Doubling a frequency increases the note's octave by 1
# Thus, if the frequency is outside the default range, simply half or double each bin value to figure out what note it is
while frequency < (note_freqs["C"] * octave_multiplier)*0.99:
octave_multiplier = octave_multiplier/2.
while frequency > (note_freqs["B"] * octave_multiplier)*1.01:
octave_multiplier = octave_multiplier*2
# iterate over each note in the scale, testing the chunk frequency
# If the chunk frequency is within 1% of the note's frequency for a given octave,
# place the chunk's index into that octave's bin within the corresponding note bin
# By storing indices rather than whole chunks here, we can avoid having to store multiple copies of chunks which
# represent chords
for note,note_frequency in note_freqs.iteritems():
octave_frequency = note_frequency * octave_multiplier
if octave_frequency * 0.99 <= frequency <= octave_frequency*1.01:
if octave_multiplier==0.125 and note_index not in notes_db[note][0]:
notes_db[note][0].append(note_index)
#return [w, freqs]
elif octave_multiplier==0.25 and note_index not in notes_db[note][1]:
notes_db[note][1].append(note_index)
#return [w, freqs]
elif octave_multiplier==0.5 and note_index not in notes_db[note][2]:
notes_db[note][2].append(note_index)
#return [w,freqs]
elif octave_multiplier==1 and note_index not in notes_db[note][3]:
notes_db[note][3].append(note_index)
#return [w, freqs]
elif octave_multiplier==2 and note_index not in notes_db[note][4]:
notes_db[note][4].append(note_index)
#return [w, freqs]
elif octave_multiplier==4 and note_index not in notes_db[note][5]:
notes_db[note][5].append(note_index)
#return [w, freqs]
#delete this frequency and its data from w and freqs, then compute a new idx and frequency
w = numpy.delete(w,idx)
octave_multiplier = 1
freqs = numpy.delete(freqs,idx)
idx = numpy.argmax(numpy.abs(w))
frequency = freqs[idx] * (sr/2)
def print_notes():
for i in range(6):
note="C"
print("{!s}{!s}: {!s}".format(note, i+1, len(notes_db[note][i])))
note="C#"
print("{!s}{!s}: {!s}".format(note, i+1, len(notes_db[note][i])))
note="D"
print("{!s}{!s}: {!s}".format(note, i+1, len(notes_db[note][i])))
note="D#"
print("{!s}{!s}: {!s}".format(note, i+1, len(notes_db[note][i])))
note="E"
print("{!s}{!s}: {!s}".format(note, i+1, len(notes_db[note][i])))
note="F"
print("{!s}{!s}: {!s}".format(note, i+1, len(notes_db[note][i])))
note="F#"
print("{!s}{!s}: {!s}".format(note, i+1, len(notes_db[note][i])))
note="G"
print("{!s}{!s}: {!s}".format(note, i+1, len(notes_db[note][i])))
note="G#"
print("{!s}{!s}: {!s}".format(note, i+1, len(notes_db[note][i])))
note="A"
print("{!s}{!s}: {!s}".format(note, i+1, len(notes_db[note][i])))
note="A#"
print("{!s}{!s}: {!s}".format(note, i+1, len(notes_db[note][i])))
note="B"
print("{!s}{!s}: {!s}".format(note, i+1, len(notes_db[note][i])))
# Tests the sample wave I created in a different function. The first quarter of the file is just a ~440hz sin
# The second quarter is a roughly 554hz sin wave
# The third quarter is a roughly 659hz sin wave
def test_sample_wave(chunks):
for i in range(len(chunks)/4):
chunk = chunks[i]
w = numpy.fft.rfft(chunk)
freqs = numpy.fft.fftfreq(len(w))
idx = numpy.argmax(numpy.abs(w))
frequency = freqs[idx] * (sr/2)
numpy.testing.assert_allclose(frequency, note_freqs["A"], rtol=0.01)
for i in range(len(chunks)/4,len(chunks)/2):
chunk = chunks[i]
w = numpy.fft.rfft(chunk)
freqs = numpy.fft.fftfreq(len(w))
idx = numpy.argmax(numpy.abs(w))
frequency = freqs[idx] * (sr/2)
numpy.testing.assert_allclose(frequency, note_freqs["C#"], rtol=0.01)
for i in range(len(chunks)/2,3*len(chunks)/4):
chunk = chunks[i]
w = numpy.fft.rfft(chunk)
freqs = numpy.fft.fftfreq(len(w))
idx = numpy.argmax(numpy.abs(w))
frequency = freqs[idx] * (sr/2)
numpy.testing.assert_allclose(frequency, note_freqs["E"], rtol=0.01)
print("Tests passed for sample wave!")
def build_song(musicfile, dt):
note_time = numpy.arange(0,dt, 1/sr)
song_wave=numpy.array([])
with open(musicfile,'r') as music:
for line in music:
line = line.strip()
if line == '#':
continue
for k in range(4):
#increase the length of song wave by half a window
if len(song_wave) >= WINDOW_LENGTH:
song_wave = numpy.append(song_wave, numpy.zeros(WINDOW_LENGTH / 2))
if line == '%':
#empty_sample = numpy.zeros((dt * sr) - 1)
if len(song_wave) < WINDOW_LENGTH:
song_wave = numpy.append(song_wave, empty_sample)
#else:
# song_wave[-WINDOW_LENGTH :] = song_wave[-WINDOW_LENGTH : ] + empty_sample
continue
line_notes = line.split(',')
for note in line_notes:
note_name = note[:-1]
note_octave = note[-1]
note_sample_bin = notes_db[note_name][int(note_octave) - 1]
note_sample_index = numpy.random.choice(note_sample_bin)
note_sample = all_notes[note_sample_index]
print(len(song_wave))
#song_wave = numpy.append(song_wave, note_sample)
if len(song_wave) < WINDOW_LENGTH:
song_wave = numpy.append(song_wave, note_sample)
else:
song_wave[-WINDOW_LENGTH :] = song_wave[-WINDOW_LENGTH : ] + note_sample
return song_wave
def apply_window_function(func, data, tau):
result = numpy.array([])
for t in data:
result = numpy.append(result, data[t] * func(t - tau))
return result
#Since we only apply the function to a window-size piece at a time, the rectangular window function simply returns the window sized piece intact
def window_rectangular(wave_window):
return wave_window*1
def window_hanning(wave_window):
#get the values of a hanning curve. The wave window will be multiplied by these values
hanning_multipliers = numpy.hanning(len(wave_window))
result=[]
for i in range(len(wave_window)):
result.append(wave_window[i]*hanning_multipliers[i])
return result
def plot_notes():
objects = []
quantities = []
for i in range(3,6):
objects.append('C{!s}'.format(i+1))
objects.append('C#{!s}'.format(i+1))
objects.append('D{!s}'.format(i+1))
objects.append('D#{!s}'.format(i+1))
objects.append('E{!s}'.format(i+1))
objects.append('F{!s}'.format(i+1))
objects.append('F#{!s}'.format(i+1))
objects.append('G{!s}'.format(i+1))
objects.append('G#{!s}'.format(i+1))
objects.append('A{!s}'.format(i+1))
objects.append('A#{!s}'.format(i+1))
objects.append('B{!s}'.format(i+1))
quantities.append(len(notes_db['C'][i]))
quantities.append(len(notes_db['C#'][i]))
quantities.append(len(notes_db['D'][i]))
quantities.append(len(notes_db['D#'][i]))
quantities.append(len(notes_db['E'][i]))
quantities.append(len(notes_db['F'][i]))
quantities.append(len(notes_db['F#'][i]))
quantities.append(len(notes_db['G'][i]))
quantities.append(len(notes_db['G#'][i]))
quantities.append(len(notes_db['A'][i]))
quantities.append(len(notes_db['A#'][i]))
quantities.append(len(notes_db['B'][i]))
y_pos = numpy.arange(len(objects))
plt.bar(y_pos, quantities, align='center', width=0.75)
plt.title("notes pulled from wave by quantity, stft")
plt.xlabel("Note names and octaves")
plt.ylabel("quantity")
plt.xticks(y_pos, objects, fontsize=10)
plt.show()
if __name__ == '__main__':
WINDOW_FUNC = window_hanning
# read and parse each input wave
for i in range(1, len(sys.argv) - 2):
#read in the wave file and unpack its data
wave_data = waveIO.read_wav_file(sys.argv[i])
wave_data = waveIO.unpack(wave_data)
wave_time = len(wave_data)/sr
wave_chunks = []
#Instead of multiplying the entire wave by the windowing function, we can simply examine window-sized pieces of the wave at a time
#counting by WINDOW_LENGTH*(1-OVERLAP_PERCENT) allows adjustment of what percent of each window overlaps with the next
#Example: OVERLAP_PERCENT = 0 means the next window will start where the previous window ended
#Example: OVERLAP_PERCENT = 0.5 means the next window will start at the halfway mark of the previous window
#This is necessary when using nonrectangular window functions, to prevent loss of data
for i in range(0, len(wave_data), int(WINDOW_LENGTH*(1-OVERLAP_PERCENT))):
window = []
window.extend(wave_data[i:i+WINDOW_LENGTH])
#multiply by the window function
window = WINDOW_FUNC(window)
wave_chunks.append(window)
for i in range(len(wave_chunks)):
chunk = wave_chunks[i]
if len(chunk) != WINDOW_LENGTH:
continue
store_note(chunk)
print_notes()
plot_notes()
# read and parse the music file
musicfile = sys.argv[-2]
song = build_song(musicfile, WINDOW_LENGTH / float(sr))
waveIO.write_wav_file(sys.argv[-1], waveIO.pack(song))