/
MAIN.py
587 lines (457 loc) · 18.7 KB
/
MAIN.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
"""
guitar_sonification.py - Guitar Sonification: Audio to MIDI Conversion
By Sahand ShahRiari
"""
### SEGMENTING AUDIO
print("\n<<Guitar Sonification: Importing libraries>>\nLoading...\n")
question1 = 'random'
while question1 != 'y':
question1 = input('do you want to start recording? (y/n)')
if question1 == 'y':
question2 = int(input('How may seconds do you want to Record?'))
if question1 == 'n':
print("Sorry to hear that\nBye!")
exit()
### IMPORTING LIBRARIES
import librosa, librosa.display, numpy, scipy
import matplotlib.pyplot as plt
import sounddevice as sd
from audiolazy import freq2str
import math
import wave
import curses
import pyaudio
import struct
from numpy.fft import rfft
from numpy import argmax, mean, diff, log, polyfit, arange
from matplotlib.mlab import find
from scipy.signal import blackmanharris, fftconvolve
from pylab import subplot, plot, log, copy, show
import sys, soundfile
# 'curses' configuration
stdscr = curses.initscr()
stdscr.nodelay(True)
curses.noecho()
curses.cbreak()
# PyAudio object variable
pa = pyaudio.PyAudio()
# Size of each read-in chunk
CHUNK = 1
# Set how often data for the result will be saved (every nth CHUNK)
NTH_ITERATION = 1
BUFFER_SIZE = 1024 # Increase this if playback becomes choppy, decrease to reduce latency
CHANNELS = 2
RECORD_SECONDS = int(question2)
WAVE_OUTPUT_FILENAME = "file.wav"
FORMAT = pyaudio.paInt16
#TODO: INPUT
def recording():
print("Recording...\n\n <<Sound Card Details>>")
sound_card = pyaudio.PyAudio()
input_info = sound_card.get_default_input_device_info()
for i in input_info:
print(i + ": " + str(input_info[i]))
print()
line_in = sound_card.open(format=FORMAT,
frames_per_buffer=BUFFER_SIZE,
channels= CHANNELS,
rate = int(input_info["defaultSampleRate"]),
input=True,
output=True)
print("Recording...")
frames = []
for i in range(0, int(int(input_info["defaultSampleRate"]) / BUFFER_SIZE * RECORD_SECONDS)):
data = line_in.read(BUFFER_SIZE)
frames.append(data)
print ("finished recording!")
# stop Recording
line_in.stop_stream()
line_in.close()
sound_card.terminate()
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
waveFile.setnchannels(CHANNELS)
waveFile.setsampwidth(sound_card.get_sample_size(FORMAT))
waveFile.setframerate(int(input_info["defaultSampleRate"]))
waveFile.writeframes(b''.join(frames))
waveFile.close()
def loadFile(fn):
# Load a wave file
x, sr = librosa.load("fn")
print("LOADING... \n FILE LOADED: " , fn)
print("Sample Rate = " + str(sr))
return x, sr
### NOISE CANCELLATION
def noise(filename):
# Read in the given file
(waveform, stream) = readin(filename)
# Give some feedback
stdscr.addstr('Now noise-cancelling the file')
# Collecting the volume levels in decibels in a list
decibel_levels = []
# Collecting the waves into lists
total_original = []
total_inverted = []
total_difference = []
# Counting the iterations of the while-loop
iteration = 0
# Determines the ratio of the mix
ratio = 1.0
# Determines if the noise-cancellation is active
active = True
# Read a first chunk and continue to do so for as long as there is a stream to read in
original = waveform.readframes(CHUNK)
while original != b'':
try:
# Capture if a key was pressed
pressed_key = stdscr.getch()
# If the 'o' key was pressed toggle the 'active' variable
if pressed_key == 111:
active = not active
# While the noise-cancellation is not activated the ratio should be 100% towards the orginial audio
if not active:
ratio = 2.0
else:
ratio = 1.0
# Increase the ratio of the mix
elif pressed_key == 43:
ratio += 0.01
# Decrease the ratio of the mix
elif pressed_key == 45:
ratio -= 0.01
# If the 'x' key was pressed abort the loop
elif pressed_key == 120:
break
# Invert the original audio
inverted = invert(original)
# Play back a mixed audio stream of both, original source and the inverted one
if active:
mix = mix_samples(original, inverted, ratio)
stream.write(mix)
# In case the noise-cancellation is not turned off temporarily, only play the orignial audio source
else:
stream.write(original)
# On every nth iteration append the difference between the level of the source audio and the inverted one
if iteration % NTH_ITERATION == 0:
# Clear the terminal before outputting the new value
stdscr.clear()
# Calculate the difference of the source and the inverted audio
difference = calculate_difference(original, inverted)
# Print the current difference
stdscr.addstr('Difference (in dB): {}\n'.format(difference))
# Append the difference to the list used for the plot
decibel_levels.append(difference)
# Calculate the waves for the graph
int_original, int_inverted, int_difference = calculate_wave(original, inverted, ratio)
total_original.append(int_original)
total_inverted.append(int_inverted)
total_difference.append(int_difference)
# Read in the next chunk of data
original = waveform.readframes(CHUNK)
# Add up one to the iterations
iteration += 1
except (KeyboardInterrupt, SystemExit):
break
# Stop the stream after there is no more data to read
stream.stop_stream()
stream.close()
# Outputting feedback regarding the end of the file
print('Finished noise-cancelling the file')
# Plot the results
plot_results(decibel_levels, NTH_ITERATION)
plot_wave_results(total_original, total_inverted, total_difference, NTH_ITERATION)
# Revert the changes from 'curses'
curses.endwin()
# Terminate PyAudio
pa.terminate()
def readin(file):
"""
Reads in the given wave file and returns a new PyAudio stream object from it.
:param file: The path to the file to read in
:return (waveform, stream): (The actual audio data as a waveform, the PyAudio object for said data)
"""
# Open the waveform from the command argument
try:
waveform = wave.open(file, 'r')
except wave.Error:
print('The program can only process wave audio files (.wav)')
sys.exit()
except FileNotFoundError:
print('The chosen file does not exist')
sys.exit()
# Load PyAudio and create a useable waveform object
stream = pa.open(
format=pa.get_format_from_width(waveform.getsampwidth()),
channels=waveform.getnchannels(),
rate=waveform.getframerate(),
output=True
)
# Return the waveform as well as the generated PyAudio stream object
return waveform, stream
def invert(data):
"""
Inverts the byte data it received utilizing an XOR operation.
:param data: A chunk of byte data
:return inverted: The same size of chunked data inverted bitwise
"""
# Convert the bytestring into an integer
intwave = numpy.fromstring(data, numpy.int32)
# Invert the integer
intwave = numpy.invert(intwave)
# Convert the integer back into a bytestring
inverted = numpy.frombuffer(intwave, numpy.byte)
# Return the inverted audio data
return inverted
def mix_samples(sample_1, sample_2, ratio):
"""
Mixes two samples into each other
:param sample_1: A bytestring containing the first audio source
:param sample_2: A bytestring containing the second audio source
:param ratio: A float which determines the mix-ratio of the two samples (the higher, the louder the first sample)
:return mix: A bytestring containing the two samples mixed together
"""
# Calculate the actual ratios based on the float the function received
(ratio_1, ratio_2) = get_ratios(ratio)
# Convert the two samples to integers
intwave_sample_1 = numpy.fromstring(sample_1, numpy.int16)
intwave_sample_2 = numpy.fromstring(sample_2, numpy.int16)
# Mix the two samples together based on the calculated ratios
intwave_mix = (intwave_sample_1 * ratio_1 + intwave_sample_2 * ratio_2).astype(numpy.int16)
# Convert the new mix back to a playable bytestring
mix = numpy.frombuffer(intwave_mix, numpy.byte)
return mix
def get_ratios(ratio):
"""
Calculates the ratios using a received float
:param ratio: A float betwenn 0 and 2 resembling the ratio between two things
:return ratio_1, ratio_2: The two calculated actual ratios
"""
ratio = float(ratio)
ratio_1 = ratio / 2
ratio_2 = (2 - ratio) / 2
return ratio_1, ratio_2
def calculate_decibel(data):
"""
Calculates the volume level in decibel of the given data
:param data: A bytestring used to calculate the decibel level
:return db: The calculated volume level in decibel
"""
count = len(data) / 2
form = "%dh" % count
shorts = struct.unpack(form, data)
sum_squares = 0.0
for sample in shorts:
n = sample * (1.0 / 32768)
sum_squares += n * n
rms = math.sqrt(sum_squares / count) + 0.0001
db = 20 * math.log10(rms)
return db
def calculate_difference(data_1, data_2):
"""
Calculates the difference level in decibel between the received binary inputs
:param data_1: The first binary digit
:param data_2: The second binary digit
:return difference: The calculated difference level (in dB)
"""
difference = calculate_decibel(data_1) - calculate_decibel(data_2)
return difference
def calculate_wave(original, inverted, ratio):
"""
Converts the bytestrings it receives into plottable integers and calculates the difference between both
:param original: A bytestring of sound
:param inverted: A bytestring of sound
:param ratio: A float which determines the mix-ratio of the two samples
:return int_original, int_inverted, int_difference: A tupel of the three calculated integers
"""
# Calculate the actual ratios based on the float the function received
(ratio_1, ratio_2) = get_ratios(ratio)
# Convert the two samples to integers to be able to add them together
int_original = numpy.fromstring(original, numpy.int16)[0] * ratio_1
int_inverted = numpy.fromstring(inverted, numpy.int16)[0] * ratio_2
# Calculate the difference between the two samples
int_difference = (int_original + int_inverted)
return int_original, int_inverted, int_difference
def plot_results(data, nth_iteration):
"""
Plots the list it receives and cuts off the first ten entries to circumvent the plotting of initial silence
:param data: A list of data to be plotted
:param nth_iteration: Used for the label of the x axis
"""
# Plot the data
plt.plot(data[10:])
# Label the axes
plt.xlabel('Time (every {}th {} byte)'.format(nth_iteration, CHUNK))
plt.ylabel('Volume level difference (in dB)')
# Calculate and output the absolute median difference level
plt.suptitle('Difference - Median (in dB): {}'.format(numpy.round(numpy.fabs(numpy.median(data)), decimals=5)), fontsize=14)
# Display the plotted graph
plt.show()
def plot_wave_results(total_original, total_inverted, total_difference, nth_iteration):
"""
Plots the three waves of the original sound, the inverted one and their difference
:param total_original: A list of the original wave data
:param total_inverted: A list of the inverted wave data
:param total_difference: A list of the difference of 'total_original' and 'total_inverted'
:param nth_iteration: Used for the label of the x axis
"""
# Plot the three waves
plt.plot(total_original, 'b')
plt.plot(total_inverted, 'r')
plt.plot(total_difference, 'g')
# Label the axes
plt.xlabel('Time (per {}th {} byte chunk)'.format(nth_iteration, CHUNK))
plt.ylabel('Amplitude (integer representation of each {} byte chunk)'.format(nth_iteration, CHUNK))
# Calculate and output the absolute median difference level
plt.suptitle('Waves: original (blue), inverted (red), output (green)', fontsize=14)
# Display the plotted graph
plt.show()
### ONSET DETECTION AND SEGMENTATION
def onset(x, sr):
# Short-time Fourier transform (for EQ, must do inverse Fourier transform after)
X = librosa.stft(x)
# Find the frames when onsets occur
onset_frames = librosa.onset.onset_detect(x, sr=sr)
print("Onset Frames = " + str(onset_frames) + "\n ")
# Find the times, in seconds, when onsets occur in the audio signal
onset_times = librosa.frames_to_time(onset_frames, sr=sr)
print("Onset Times = " + str(onset_times) + "\n ")
# Convert the onset frames into sample indices to play "BEEB" sound on it
onset_samples = librosa.frames_to_samples(onset_frames)
print("Onset Samples = " + str(onset_samples) + "\n ")
# Use the "length" parameter so the click track is the same length as the original signal
clicks = librosa.clicks(times=onset_times, length=len(x))
# Play the click track "added to" the original signal
sd.play(x + clicks, sr)
# Display the waveform of the original signal
librosa.display.waveplot(x, sr)
plt.title("Original Signal")
plt.show() # Close window to resume
return onset_frames, onset_times, onset_samples
# Concatenate the segments and pad them with silence
def concatenate_segments(segments, sr=22050, pad_time=0.100):
padded_segments = [numpy.concatenate([segment, numpy.zeros(int(pad_time * sr))]) for segment in segments]
return numpy.concatenate(padded_segments)
def segment(x, sr, onset_samples):
frame_sz = int(0.100 * sr)
segments = numpy.array([x[i:i + frame_sz] for i in onset_samples])
concatenated_signal = concatenate_segments(segments, sr)
# Play the segmented signal
sd.play(concatenated_signal, sr)
# Display the waveform of the segmented signal
librosa.display.waveplot(concatenated_signal, sr)
plt.title("Segmented Signal")
plt.show() # Close window to resume
return segments
### PITCH DETECTION
def parabolic(f, x):
"""
Quadratic interpolation for estimating the true position of an
inter-sample maximum when nearby samples are known.
f is a vector and x is an index for that vector.
Returns (vx, vy), the coordinates of the vertex of a parabola that goes
through point x and its two neighbors.
Example:
Defining a vector f with a local maximum at index 3 (= 6), find local
maximum if points 2, 3, and 4 actually defined a parabola.
"""
xv = 1 / 2. * (f[x - 1] - f[x + 1]) / (f[x - 1] - 2 * f[x] + f[x + 1]) + x
yv = f[x] - 1 / 4. * (f[x - 1] - f[x + 1]) * (xv - x)
return (xv, yv)
def parabolic_polyfit(f, x, n):
"""
Use the built-in polyfit() function to find the peak of a parabola
f is a vector and x is an index for that vector.
n is the number of samples of the curve used to fit the parabola.
"""
a, b, c = polyfit(arange(x - n // 2, x + n // 2 + 1), f[x - n // 2:x + n // 2 + 1], 2)
xv = -0.5 * b / a
yv = a * xv ** 2 + b * xv + c
return (xv, yv)
def freq_from_crossings(sig, fs):
"""
Estimate frequency by counting zero crossings
"""
# Find all indices right before a rising-edge zero crossing
indices = find((sig[1:] >= 0) & (sig[:-1] < 0))
# Naive (Measures 1000.185 Hz for 1000 Hz, for instance)
# crossings = indices
# More accurate, using linear interpolation to find intersample
# zero-crossings (Measures 1000.000129 Hz for 1000 Hz, for instance)
crossings = [i - sig[i] / (sig[i + 1] - sig[i]) for i in indices]
# Some other interpolation based on neighboring points might be better.
# Spline, cubic, whatever
return fs / mean(diff(crossings))
def freq_from_fft(sig, fs):
"""
Estimate frequency from peak of FFT
"""
# Compute Fourier transform of windowed signal
windowed = sig * blackmanharris(len(sig))
f = rfft(windowed)
# Find the peak and interpolate to get a more accurate peak
i = argmax(abs(f)) # Just use this for less-accurate, naive version
true_i = parabolic(log(abs(f)), i)[0]
# Convert to equivalent frequency
return fs * true_i / len(windowed)
def freq_from_autocorr(sig, fs):
"""
Estimate frequency using autocorrelation
"""
# Calculate autocorrelation (same thing as convolution, but with
# one input reversed in time), and throw away the negative lags
corr = fftconvolve(sig, sig[::-1], mode='full')
corr = corr[len(corr) // 2:]
# Find the first low point
d = diff(corr)
start = find(d > 0)[0]
# Find the next peak after the low point (other than 0 lag). This bit is
# not reliable for long signals, due to the desired peak occurring between
# samples, and other peaks appearing higher.
# Should use a weighting function to de-emphasize the peaks at longer lags.
peak = argmax(corr[start:]) + start
px, py = parabolic(corr, peak)
return fs / px
def pitch(segment, sr):
print("\n<<Guitar Sonification: Pitch Detection>>\nLoading...\n")
try:
signal, fs = segment, sr
except NameError:
signal, fs, enc = flacread(filename)
print("Calculating frequency from FFT:")
print("%f Hz" % freq_from_fft(signal, fs))
a1 = freq_from_fft(signal, fs)
b1 = freq2str(a1)
print("MIDI NOTE: ", b1 + "\n")
print("Calculating frequency from zero crossings:")
print("%f Hz" % freq_from_crossings(signal, fs))
a2 = freq_from_crossings(signal, fs)
b2 = freq2str(a2)
print("MIDI NOTE: ", b2 + "\n")
print("Calculating frequency from autocorrelation:")
print("%f Hz" % freq_from_autocorr(signal, fs))
a3 = freq_from_autocorr(signal, fs)
b3 = freq2str(a3)
print("MIDI NOTE: ", b3)
print("\n ________________________________ \n")
### MAIN
def master():
recording()
### LOADING SAMPLE
##A, B = loadFile("audio\ibanez.wav")
fn = "file.wav"
x, sr = librosa.load(fn)
print("LOADING... \n FILE LOADED: " , fn)
print("Sample Rate = " + str(sr))
### NOISE CANCELATION
noise(fn)
### SEGMENTATION
O1, O2, O3 = onset(x, sr)
segmented = segment(x, sr, O3)
### PITCH DETECTION
for i in segmented:
pitch(i, sr)
def main():
if question1 == 'y' or 'yes':
master()
if question1 == 'n' or 'no':
exit()
main()