-
Notifications
You must be signed in to change notification settings - Fork 0
/
imageToWav-pycuda.py
103 lines (87 loc) · 3.22 KB
/
imageToWav-pycuda.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import numpy as np
import Image
import struct
import math
import sys
import pycuda.autoinit
import pycuda.driver as drv
import pycuda.gpuarray as gpuarray
import pycuda.cumath as cm
from pycuda.compiler import SourceModule
def oscillator(x, freq=1, amp=1, base=0, phase=0):
return base + amp * cm.sin(2 * np.pi * freq * x + phase)
def writewav(filename, numChannels, sampleRate, bitsPerSample, time, data):
wave = open(filename, 'wb')
dataSize = time * sampleRate * numChannels * bitsPerSample / 8
#https://ccrma.stanford.edu/courses/422/projects/WaveFormat/
ChunkID = 'RIFF'
ChunkSize = struct.pack('<I', dataSize + 36)
Format = 'WAVE'
Subchunk1ID = 'fmt '
Subchunk1Size = struct.pack('<I', 16)
AudioFormat = struct.pack('<H', 1)
NumChannels = struct.pack('<H', numChannels)
SampleRate = struct.pack('<I', sampleRate)
ByteRate = struct.pack('<I', sampleRate * numChannels * bitsPerSample / 8)
BlockAlign = struct.pack('<H', numChannels * bitsPerSample / 8)
BitsPerSample = struct.pack('<H', bitsPerSample)
Subchunk2ID = 'data'
Subchunk2Size = struct.pack('<I', dataSize)
header = ChunkID + ChunkSize + Format + Subchunk1ID + Subchunk1Size +\
AudioFormat + NumChannels + SampleRate + ByteRate + BlockAlign +\
BitsPerSample + Subchunk2ID + Subchunk2Size
wave.write(header)
# write float64 data as signed int16
#amplitude/volume, max value is 32768
#higher amplitude causes noise (vertical bars)
# (0.01 * data).astype(np.int16).tofile(wave)
(1000 * data).astype(np.int16).tofile(wave)
wave.close()
im = Image.open(sys.argv[1])
size = im.size
d = list(im.getdata())
xres = size[0]
yres = size[1]
yscale = 22000 / float(yres)
time = int(round(22.0 * xres / yres))
#print time
xlen = time / float(size[0])
#print xlen
#initialize out
out = np.zeros(0)
#rgb aliases
r=0
g=1
b=2
for x in range(xres):
#float32 degrades quality, but float64 not support by gpus
t_gpu = gpuarray.arange(x*xlen, x*xlen + xlen, 1./44100, dtype=np.float32)
tone_gpu = gpuarray.zeros(t_gpu.size, dtype=np.float32)
print "{0}%".format(round(100.0 * x / xres, 2))
for y in range(yres):
p = d[x+xres*y]
#keep playing with these values
amplitude = 10**(1-5.25+4.25*(p[r]+p[g]+p[b])/(255*3))
# print amplitude, math.log(amplitude+1)
# amplitude = math.log(amplitude+1)# / math.log(255)
# print x, y, amplitude
if p[r] > 10 or p[g] > 10 and p[b] > 10:
tone_gpu += oscillator(t_gpu,
amp = amplitude,
#amp=(p[r]+p[g]+p[b]),
freq=yscale * (yres - y))
tone_gpu = tone_gpu + 1
# tone_gpu = cm.log10(tone_gpu)
# tone_gpu = cm.log10(tone_gpu)
# tone_gpu = cm.log10(tone_gpu)
# tone_gpu = cm.log10(tone_gpu)
# tone_gpu = cm.log10(tone_gpu)
# tone_gpu = cm.log(tone_gpu)
tone_gpu = tone_gpu / math.log(128) #not much faster than multiple logs
tone = tone_gpu.get()
out = np.append(out,tone)
#pad with silence at end if necessary
if out.size < 44100 * time:
out = np.append(out, np.zeros(44100 * time - out.size))
#print out.size
writewav(sys.argv[2], 1, 44100, 16, time, out)