/
SimpleAudio.py
406 lines (342 loc) · 13.6 KB
/
SimpleAudio.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
import pyaudio
import numpy as np
import wave
import sys
import math
import random
import pylab as pl
# seed the random number generator
random.seed()
# Some default values for the audio format
CHUNK = 256
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 48000
# This is needed for rescaling
MAX_AMP = 2**15
class Audio(pyaudio.PyAudio):
def __init__(self,channels=1,
rate=RATE,
chunk=CHUNK,
format=FORMAT):
# Initialise the parent class
pyaudio.PyAudio.__init__(self)
# Set the format to that specified
self.chan = channels
self.rate = rate
self.chunk = chunk
self.format = format
self.nptype = self.getNpType(format)
# Set the curent data to an empty array of the correct type
self.data = np.array([],dtype=self.nptype)
# No streams are open at the moment
self.istream = None
self.ostream = None
# a counter for referencing the data in chunks
self.chunk_index = 0
def __del__(self):
self.terminate()
# Get a chunk of data from the current input stream
def getChunk(self):
tmpstr = self.istream.read(self.chunk)
array = np.fromstring(tmpstr, dtype=self.nptype)
self.data = np.append(self.data,array)
# Put a chunk of data to the current output stream
def putChunk(self):
slice_from = self.chunk_index*self.chunk
slice_to = slice_from + self.chunk - 1
# Slicing a numpy array out of bounds doesn't seem to raise an
# index error, so we explcitly test and raise the error ourselves
if slice_to > self.data.shape[0]:
raise IndexError
array = self.data[slice_from:slice_to]
self.ostream.write(array.tostring())
self.chunk_index += 1
# Open an input stream
# We just call the inherited open function in the parent class
# with the correct format data
def openInputStream(self):
self.istream = self.open(format = self.format,
channels = self.chan,
rate = self.rate,
input = True,
frames_per_buffer = self.chunk)
# Close the input stream
def closeInputStream(self):
self.istream.close()
self.istream = None
# Open an output stream
def openOutputStream(self):
self.ostream = self.open(format = self.format,
channels = self.chan,
rate = self.rate,
output = True)
self.chunk_index = 0
# close the output stream
def closeOutputStream(self):
self.ostream.close()
self.ostream = None
# Record data
def record(self, time=5.0):
# Clear current data
self.data = np.array([],dtype=self.nptype)
# Open an inputstream
self.openInputStream()
print "Recording..."
# Get time*sample_rate values in total, a chunk at a time
for i in range(0, int(time * self.rate/self.chunk) ):
self.getChunk()
print "Done Recording"
# Close the input stream
self.closeInputStream()
# Play the current data
def play(self):
# Open an outputstream
self.openOutputStream()
# Reset the chunk counter to 0
self.chunk_index = 0
print "Playing..."
# Loop (potentially forever)
while True:
# Try to put output a chunk
try:
self.putChunk()
# If we run out of data to output, break out of the loop
except IndexError:
break
print "Stopped playing"
# Close the output stream
self.closeOutputStream()
# Save the data to a file
def save(self,path):
# Create a 'string' of the data
raw = self.data.tostring()
# Open the file for writing
wf = wave.open(path, 'wb')
# Set the header information
wf.setnchannels(self.chan)
wf.setsampwidth(self.get_sample_size(self.format))
wf.setframerate(self.rate)
# Write the data
wf.writeframes(raw)
# Close the file
wf.close()
# Load data from a file
def load(self,path):
# Open the file for reading
wf = wave.open(path,"rb")
# Get information from the files header
self.format = self.get_format_from_width(wf.getsampwidth())
self.nptype = self.getNpType(self.format)
self.chan = wf.getnchannels()
self.rate = wf.getframerate()
# Set the internal data attribute to an empty array of the right type
self.data = np.array([],dtype=self.nptype)
# Read a chunk of data from the file
raw = wf.readframes(self.chunk)
# Loop while there is data in the file
while raw != "":
# Convert the raw data to a numpy array
array = np.fromstring(raw, dtype=self.nptype)
# Append the array to the class data attribute
self.data = np.append(self.data,array)
# Read the next chunk, ready for the next loop iteration
raw = wf.readframes(self.chunk)
# Close the file
wf.close()
# Convert the pyaudio data format type to the numpy type
# - This really needs expanding to deal with other data types, e.g. 8bit and 24bit audio
def getNpType(self,type):
if type == pyaudio.paInt16:
return np.int16
# Convert the numpy data format type to the pyaudio type
def getPaType(self,type):
if type == np.int16:
return pyaudio.paInt16
# Add an echo the the current audio data
# repeat - How many delayed repeats to add
# delay - How long to delay each repeat (in samples)
def add_echo(self,repeat,delay):
# get the length of the existing data
length = self.data.shape[0]
# create a new array with the required extra length
array = np.zeros(length + repeat*delay, dtype=self.nptype)
# loop for the number of delays + 1
# - we use the 0th iteration of the loop to reduce the amplitude of the original
# waveform, so when we add to it we don't 'clip'
for i in range(0,repeat+1):
# Get start and end times for the current offset
start = i*delay
end = length + i*delay
# Calculate the current scaling factor
scale = 2**(i+1)
# Add a scaled version of self.data to 'window' of the new array
array[start:end] += self.data / scale
# Set the class data attribute to the new array
self.data = array
def rescale(self,val):
# Check arguement passed
if not 0<=val<=1:
raise ValueError, "Expected scaling factor between 0 and 1"
# find the biggest peak
peak = 0
length = self.data.shape[0]
for i in range(0,length-1):
if abs(self.data[i]) > peak:
peak = abs(self.data[i])
# Calculate the rescaling factor
rescale_factor = val*MAX_AMP/float(peak)
# Create a new array of floats for the rescaling
array = np.zeros(length,np.float)
array += self.data
array *= rescale_factor
# set the class data attribute to the rescaled version
self.data = array.astype(self.nptype)
def create_tone(self,frequency,length,amplitude):
if not 0<=amplitude<=1:
raise ValueError, "Expected amplitude between 0 and 1"
s = np.zeros(length,self.nptype)
for i in range(0,length):
s[i] = amplitude*MAX_AMP*math.sin(frequency*i*2*math.pi/self.rate)
self.data=s
def create_noise(self,length,amplitude):
if not 0<=amplitude<=1:
raise ValueError, "Expected amplitude between 0 and 1"
s = np.zeros(length,self.nptype)
for i in range(0,length):
s[i] = amplitude*MAX_AMP*random.random()
self.data = s
# This version adds to the existing object.
# Cons of this approach: changes the original object,
# if used more than once to add more than two objects together
# the relative amplitudes are not maintained due to the scaling
def add(self,other):
# Find the length of the longest
length = max(self.data.shape[0],other.data.shape[0])
# Create an empty array of this length
array = array = np.zeros(length, dtype=self.nptype)
# Add in each data at half amplitute (so it doesn't clip)
array += self.data / 2.0
array += other.data / 2.0
# Update the stored array in the current object.
self.data = array
def reverse(self):
# get the length of the array and create a new array of zeros
length = self.data.shape[0]
array = np.zeros(length,dtype=self.nptype)
# index over the range of the array
for i in range(0,length):
# set the ith item of the array to (length-1)-ith item
array[i] = self.data[length-1-i]
self.data = array
def reverse_npindex(self):
# slice the array sith a step of -1
self.data = self.data[::-1]
def __len__(self):
return self.data.shape[0]
def get_samplerange(self):
if self.nptype == np.int16:
return math.pow(2, 16)
# Convert time to samples
def time_to_samples(self,time):
return time * self.rate
# Convert samples to time (seconds)
def samples_to_time(self, samples):
return float(samples) / self.rate
def compute_fft(self, start, end):
dur = end - start
fft = pl.fft(self.data[start:end])
real_range = np.ceil((dur+1)/2.0)
fft = fft[0:real_range]
fft = abs(fft)
return fft * np.hanning(len(fft))
def change_speed(self, factor):
indxs = np.round( np.arange(0, len(self.data), factor) )
indxs = indxs[indxs < len(self.data)].astype(int)
self.data = self.data[indxs]
def time_stretch_fft(self, factor, windowsize=1024, overlap=512, apply_hanning=True):
phase = np.zeros(windowsize)
if apply_hanning:
amp_window = np.hanning(windowsize)
else:
amp_window = np.ones(windowsize, dtype=np.float)
result = np.zeros( int(len(self.data) / factor + windowsize))
for i in np.arange(0, len(self.data)-(windowsize+overlap), overlap*factor, dtype=np.int):
a1 = self.data[i: i + windowsize]
a2 = self.data[i + overlap: i + windowsize + overlap]
s1 = np.fft.fft(amp_window * a1)
s2 = np.fft.fft(amp_window * a2)
phase = (phase + np.angle(s2/s1)) % 2*np.pi
a2_rephased = np.fft.ifft(np.abs(s2)*np.exp(1j*phase))
i2 = int(i/factor)
result[i2 : i2 + windowsize] += amp_window*np.real(a2_rephased)
result = ((2**(16-4)) * result/result.max())
self.data = result.astype(self.nptype)
def plot_waveform(self, start=0, end=-1, x_unit="samples"):
array = self.data[start:end]
num_samples = len(array)
if x_unit == "samples":
pl.plot(range(num_samples), array)
pl.xlabel('Time (Samples)')
elif x_unit == "time":
end_time = self.samples_to_time(num_samples)
y_steps = np.arange(0, end_time, float(end_time) / num_samples)
pl.plot(y_steps, array)
pl.xlabel('Time (s)')
pl.ylabel('Amplitude')
samplerange = self.get_samplerange()
pl.ylim([-samplerange/2, samplerange/2])
pl.show()
def plot_spectrum(self, array, start=0, end=-1, plot_log=False):
array = array[start:end]
len_arr = len(array)
#print len_arr
freq_axis = np.arange(0, len_arr, 1.0) #* (self.rate / len_arr)
if plot_log:
pl.plot(freq_axis/1000, 10*np.log10(array), color='k')
pl.ylabel('Power (dB)')
else:
pl.plot(freq_axis/1000, array, color='k')
pl.xlabel('Frequency (kHz)')
pl.show()
# This version uses a function just defined in the module namespace (i.e. not a method of the class),
# and takes one argument that is a list of audio objects. This allows an arbitrary number of objects and uniform scaling
def sum(audio_objects):
# Get the length of the longest.
# - the max() function when given an iterable object will give the max in that 'list',
# - you can also specifiy a function to use to evaluate the size of each object using key=function_name
length = len(max(audio_objects,key=len))
# Work out the required scaling factor to prevent clipping
scale = 1.0/len(audio_objects)
# make an array of zeros
# - should really check that the dtype of each of the objects is the same and use that dtype!
array = np.zeros(length, dtype=np.int16)
# Add each audio_object to the array
for obj in audio_objects:
array += obj.data * scale
# Create a new object to return
new_object = Audio()
new_object.data = array
return new_object
def testAdd():
c = Audio()
e = Audio()
g = Audio()
c.create_tone(261.63,240000,0.8)
e.create_tone(329.63,240000,0.8)
g.create_tone(392.00,240000,0.8)
chord = sum((c,e,g))
chord.play()
if __name__ == "__main__":
pass
#testAdd()
#a = Audio()
#a.record(5)
#a.save("qqq.wav")
#b = Audio()
#b.load("nina48.wav")
#b.rescale(1.0)
#b.change_speed(0.5)
#b.time_stretch_fft(2.0)
#b.plot_waveform()
#b.play()