/
main.py
123 lines (107 loc) · 3.38 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#coding: utf-8
import os
import sys
import itertools
import numpy as np
import bisect
from yaafelib import Engine, AudioFileProcessor, FeaturePlan
RES_DIR = os.path.join(
os.path.dirname(os.path.abspath(__file__)),
'resources'
)
WAV_PATH = os.path.join(RES_DIR, 'DTMF_dialing.wav')
ANS_PATH = os.path.join(RES_DIR, 'answer.txt')
SILENT_MAG_THRESHOLD = 10
KEYS = [
'1', '2', '3', 'A',
'4', '5', '6', 'B',
'7', '8', '9', 'C',
'*', '0', '#', 'D'
]
SILENT_KEY = 'S'
UNKNOWN_KEY = 'U'
LOWER_FREQS = [697, 770, 852, 941]
UPPER_FREQS = [1209, 1336, 1477, 1633]
def purge_seq(seq):
rst = []
for k, vs in itertools.groupby(seq):
if k == SILENT_KEY or k == UNKNOWN_KEY:
if len(rst) == 0:
continue
else:
rst.append("")
else:
if len(rst) == 0:
rst.append("")
rst[-1] += k * ((len(list(vs)) + 1) / 2) # XXX
if rst[-1] == "":
rst = rst[:-1]
return rst
def detect(wav_path, ans_path=None):
sample_rate = 8000
block_size = 1024
step_size = block_size / 2
n_band = block_size / 2
freq_bound = [i * sample_rate / 2. / n_band for i in range(n_band + 1)]
plan = FeaturePlan(sample_rate=sample_rate, resample=True)
plan.addFeature(
'power_spectrum: PowerSpectrum blockSize=%d stepSize=%d' % (
block_size, step_size
)
)
dataflow = plan.getDataFlow()
afp = AudioFileProcessor()
engine = Engine()
engine.load(dataflow)
afp.processFile(engine, wav_path)
spectrogram = engine.readOutput('power_spectrum')
seq = []
for spectrum in spectrogram:
mean_mag = np.mean(spectrum)
if mean_mag <= SILENT_MAG_THRESHOLD:
seq.append(SILENT_KEY)
continue
lower_data = (-1, -1)
upper_data = (-1, -1)
for target_idx, target_freq in itertools.chain(
enumerate(LOWER_FREQS), enumerate(UPPER_FREQS)
):
idx = bisect.bisect(freq_bound, target_freq)
assert idx > 0
freq1 = freq_bound[idx - 1]
mag1 = spectrum[idx - 1]
freq2 = freq_bound[idx]
mag2 = spectrum[idx]
w1 = 1. * (freq2 - target_freq) / (freq2 - freq1)
w2 = 1. - w1
target_mag = (w1 * mag1 + w2 * mag2)
if target_mag > mean_mag * 2:
if target_freq < 1000:
if target_mag > lower_data[1]:
lower_data = (target_idx, target_mag)
else:
if target_mag > upper_data[1]:
upper_data = (target_idx, target_mag)
lower_idx = lower_data[0]
upper_idx = upper_data[0]
if lower_idx == -1 or upper_idx == -1:
seq.append(UNKNOWN_KEY)
else:
seq.append(KEYS[lower_idx * len(LOWER_FREQS) + upper_idx])
ans = purge_seq(seq)
if ans_path is not None:
with open(ans_path) as fh:
for i, line in enumerate(fh):
line = line.strip()
assert line == ans[i], "%s != %s" % (line, ans[i])
print '[%d] %s' % (i, line)
else:
for i, line in enumerate(ans):
print '[%d] %s' % (i, line)
def main(argv):
if len(argv) == 1:
detect(WAV_PATH, ANS_PATH)
else:
detect(*argv[1:])
if __name__ == '__main__':
main(sys.argv)