-
Notifications
You must be signed in to change notification settings - Fork 0
/
audio_detective.py
99 lines (78 loc) · 2.92 KB
/
audio_detective.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# Adapted from the script described at
# http://www.macdevcenter.com/pub/a/python/2001/01/31/numerically.html
import sys
# loading audio
from scikits import audiolab
# processing data
import numpy as np
from numpy.fft import rfft, fft
from math import sin, pi, log10
# plotting
import matplotlib.pyplot as plt
def get_spectrogram(filename, fft_length):
fp = audiolab.Sndfile(filename, 'r')
sample_rate = fp.samplerate
total_num_samps = fp.nframes
num_fft = (total_num_samps / fft_length ) - 2
# create temporary working array
fft_buckets = np.zeros((num_fft, fft_length), float)
channels = fp.channels
# read in the data from the file
for i in range(num_fft):
frames = fp.read_frames(fft_length)
if channels == 2:
# TODO: figure out how to combine channels appropriately
fft_buckets[i,:] = frames[:,0] - 128.0
elif channels == 1:
fft_buckets[i,:] = frames - 128.0
else:
raise Exception("Unsupported # of channels: %d" % channels)
# Window the data
fft_buckets = fft_buckets * np.hamming(fft_length)
# Transform with the FFT, Return Power
freq_pwr = 10*np.log10(1e-20 + abs(rfft(fft_buckets, fft_length)))
n_out_pts = (fft_length / 2) + 1
axis_hz = 0.5 * float(sample_rate) / n_out_pts * np.arange(n_out_pts)
axis = axis_hz / 1000
audio = []
audionorm = []
trans = freq_pwr.transpose()
for freq in trans:
audio.append(freq.sum())
audionorm.append(freq.sum() / n_out_pts)
#plt.plot(axis, audionorm)
window = [-1, 0, 1]
slope = np.convolve(audio, window, mode='same') / np.convolve(range((fft_length / 2) + 1), window, mode='same')
slopes = []
slopenorm = []
for point in slope:
slopes.append(point)
slopenorm.append(point / n_out_pts)
#plt.plot(axis, slopenorm)
#plt.show()
highfreq = 0
for hz in axis_hz:
if hz > highfreq:
highfreq = hz
freqinc = highfreq / ((fft_length / 2) + 1)
freqcut = int(10000 / freqinc)
slopespart = slopenorm[freqcut:]
# Find local minima
localminima = []
for i in range(len(slopespart) - 2):
if slopespart[i] > slopespart[i + 1] and slopespart[i + 1] < slopespart[i + 2] and slopespart[i + 1] < -10:
localminima.append(slopespart[i + 1])
if len(localminima) < 1:
return "xxxxx"
last = slopenorm.index(localminima[-1]) * freqinc
if last > 20500:
last = slopenorm.index(localminima[-2]) * freqinc
lastround = round(last / 500) * 500
bitfreqs = {20000: '320', 19500: '256', 19000: 'v0', 18500: '192', 18000: 'v2', 16500: '128'}
#print last, lastround
#print "Best guess at source quality: " + bitfreqs[lastround]
return str(int(lastround)) + " - " + str(int(last))
if __name__ == "__main__":
filename = sys.argv[1]
fft_length = 2**int(sys.argv[2])
get_spectrogram(filename, fft_length)