-
Notifications
You must be signed in to change notification settings - Fork 0
/
convoflute.py
120 lines (94 loc) · 4.6 KB
/
convoflute.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# CONVOFLUTE by Colin McSwiggen
# A little convolution-based intstrument/sound-design tool.
# Released in 2013 under a Creative Commons BY-NC-SA license.
import numpy as np
import numm
import sys
SAMPLE_RATE = 44100 # By default the audio will play back at 44.1kHz. Changing this constant will frequency shift the output I suppose.
outX = 0 # X coordinate of current position in playback grid.
outY = 0 # Y coordinate of current position in playback grid.
vidWidth = 1440 # Width of video output.
vidHeight = 900 # Height of video output.
fileName1 = "file1.wav" # Input audio filenames. Should be given as the second and third command-line arguments.
fileName2 = "file2.wav"
chunkSize = 4096 # The number of samples per chunk. Should be given as the first command-line argument.
currentSpectrum = np.zeros(chunkSize) # The current convolution spectrum, for the visualization.
def chunkulate(sound, chunkLength):
# sound is a 2-channel sound represented as a 2-by-whatever numpy array.
# chunkLength is the desired chunk length in samples.
# Returns a floor(whatever/chunkLength)-by-2-by-chunkLength--length numpy array
# containing the chunks. Remainder samples at the end of the sound are discarded.
return np.array( [sound[i*chunkLength:(i+1)*chunkLength] for i in range(sound.shape[0]/chunkLength)] )
def scale(arr, maxVal):
# Returns arr, a numpy array, scaled to max absolute value maxVal.
return arr * (maxVal/np.amax(np.absolute(arr)))
def audio_out(chunk):
global currentSpectrum
# Locate the appropriate chunks of the input files.
chunk1 = windowed1[int( outX * windowed1.shape[0] )]
chunk2 = windowed2[int( outY * windowed2.shape[0] )]
spectrum = np.fft.rfft(chunk1.T) * np.fft.rfft(chunk2.T) # Calculate the frequency-domain product of the chunks.
conv = np.fft.irfft(spectrum).T # Calculate the convolution of the chunks.
chunk += scale(conv, 32000).astype('int16') # Output the scaled convolution.
currentSpectrum = spectrum
def video_out(frame):
global currentSpectrum
frame[:,:,:] = 0 # Start with a black frame.
# Visualize the current audio spectrum.
brights = np.clip(4*np.mean(np.log(np.absolute(currentSpectrum)), axis=0), 0, 255).astype('int16') # Derive brightness values from the spectrum.
print brights
for i in range(frame.shape[1]): # TODO: Get rid of this for loop.
frame[:,i,:] = brights[int(i * float(brights.shape[0])/frame.shape[1])]
# Draw red crosshairs over the current grid location.
frame[int(outY*frame.shape[0]),:,0] = 255
frame[:,int(outX*frame.shape[1]),0] = 255
def mouse_in(event_type, px, py, button):
# On mouse click, update global click coordinate variables.
global outX, outY
if (event_type == 'mouse-button-press'):
outX = px
outY = py
print "CLICK!"
print "outX: " + str(outX) + " outY: " + str(outY)
def keyboard_in(event_type, key):
# Quit on Q. Save current output chunk on space. Navigate playback grid with WASD.
global runner, outX, outY
print "KEY DOWN: " + key
if key == ' ':
outFile = fileName1 + "--" + fileName2 + "--" + str(chunkSize) + "_" + str(outX) + "_" + str(outY) + ".wav"
numm.np2sound(currentOutChunk, outFile)
print "Rendered " + outFile
elif key == 'q':
runner.quit()
elif (key == 'a' and outX > 0.01):
outX -= 0.01
elif (key == 'd' and outX < 0.99):
outX += 0.01
elif (key == 'w' and outY > 0.01):
outY -= 0.01
elif (key == 's' and outY < 0.99):
outY += 0.01
print "outX: " + str(outX) + " outY: " + str(outY)
if (__name__ == "__main__"):
# The three command-line arguments are the audio chunk size (in samples)
# and the filenames of the two files to analyze.
chunkSize = int(sys.argv[1])
fileName1 = sys.argv[2]
fileName2 = sys.argv[3]
# Extract the files to sound!
print "Extracting " + fileName1 + " and " + fileName2 + "..."
audio1 = numm.sound2np(fileName1)
audio2 = numm.sound2np(fileName2)
# Chunkulate the nparrays.
print "Chunkulating..."
chunks1 = chunkulate(audio1, chunkSize)
chunks2 = chunkulate(audio2, chunkSize)
print "Chunked " + fileName1 + " into " + str(chunks1.shape) + " chunks."
print "Chunked " + fileName2 + " into " + str(chunks2.shape) + " chunks."
# Window the chunks.
print "Windowing the chunks..."
windowed1 = chunks1 * np.array( [np.array( [np.hamming(chunkSize),]*2 ).T,] * chunks1.shape[0] )
windowed2 = chunks2 * np.array( [np.array( [np.hamming(chunkSize),]*2 ).T,] * chunks2.shape[0] )
# Run the thing!
runner = numm.Run(audio_out=audio_out, video_out=video_out, mouse_in=mouse_in, keyboard_in=keyboard_in, width=vidWidth, height=vidHeight, fullscreen=True, audio_chunksize=chunkSize, audio_samplerate=SAMPLE_RATE) # The numm object that plays the sound.
runner.run()