/
demo.py
83 lines (67 loc) · 2.25 KB
/
demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import os
import time
import librosa
import numpy
import pyaudio
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import utils
CHANNELS = 1
RATE = 44100
FRAMES_PER_BUFFER = 1024 * 17
N_FFT = 4096
SCREEN_WIDTH = 178
ENERGY_THRESHOLD = 0.4
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
F_LO = librosa.note_to_hz('C2')
F_HI = librosa.note_to_hz('C9')
M = librosa.filters.mel(RATE, N_FFT, SCREEN_WIDTH, fmin=F_LO, fmax=F_HI)
p = pyaudio.PyAudio()
loaded_model = tf.keras.models.load_model('ver_1.1.h5')
def mfcc(y):
sample = librosa.feature.mfcc(y=y, sr=44100, n_mfcc=40)
sample = numpy.expand_dims(sample.T,axis=0)
return sample
def test(sample):
data = mfcc(sample)
data = numpy.reshape(data, (-1, data.shape[1], data.shape[2], 1))
predictions = loaded_model.predict(data)
if numpy.argmax(predictions) == 0:
return ''
elif numpy.argmax(predictions) == 1:
return 'Snap & Clap'
else:
return ''
def generate_string_from_audio(audio_data):
x_fft = numpy.fft.rfft(audio_data, n=N_FFT)
melspectrum = M.dot(abs(x_fft))
char_list = [' ']*SCREEN_WIDTH
for i in range(SCREEN_WIDTH):
if melspectrum[i] > ENERGY_THRESHOLD:
char_list[i] = '*'
elif i % 30 == 29:
char_list[i] = '|'
return ''.join(char_list)
def callback(in_data, frame_count, time_info, status):
audio_data = numpy.frombuffer(in_data, dtype=numpy.float64)
print( generate_string_from_audio(audio_data), test(audio_data) )
frames = []
for i in range(0, int(RATE / FRAMES_PER_BUFFER * 0.2)):
data = stream.read(FRAMES_PER_BUFFER)
frames.append(data)
return in_data, pyaudio.paContinue
stream = p.open(format=pyaudio.paFloat32,
channels=CHANNELS,
rate=RATE,
input=True, # Do record input.
output=False, # Do not play back output.
frames_per_buffer=FRAMES_PER_BUFFER,
stream_callback=callback)
stream.start_stream()
while stream.is_active():
time.sleep(0.100)
stream.stop_stream()
stream.close()
p.terminate()