-
Notifications
You must be signed in to change notification settings - Fork 0
/
mp_matmul_shared.py
86 lines (59 loc) · 2.17 KB
/
mp_matmul_shared.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from multiprocessing import Pool,Process
from multiprocessing import sharedctypes
import ctypes
import numpy
from numpy import ctypeslib
import numpy
from time import time
def mat_rowrange_mul(args):
# a little ugly, but allows running with a Pool
# which accept only 1 argument
a_row_domain, a_shape, b_shape, shared_a, shared_b, shared_c = args
# access shared memory object as numpy array, set dimensions
nd_c = ctypeslib.as_array(shared_c).reshape((a_shape[0],b_shape[1]))
nd_a = ctypeslib.as_array(shared_a).reshape(a_shape)
nd_b = ctypeslib.as_array(shared_b).reshape(b_shape)
# write answer to shared memory
# it would be better if numpy.dot could write "in-place"
nd_c[a_row_domain[0]:a_row_domain[1],:] = \
numpy.dot(nd_a[a_row_domain[0]:a_row_domain[1],:],nd_b)
return None
if __name__ == '__main__':
x = 4096
# y = 2048
y = 256
num_cpus = 2
#num_cpus = 4
a_shape = (x,y)
b_shape = (y,x)
# allocate source and dest. arrays
a = numpy.random.uniform(size=a_shape)
b = numpy.random.uniform(size=b_shape)
c = numpy.empty((x,x))
# allocated shared memory
shared_a = sharedctypes.Array(ctypes.c_double,a.flat,lock=False)
shared_b = sharedctypes.Array(ctypes.c_double,b.flat,lock=False)
shared_c = sharedctypes.Array(ctypes.c_double,c.flat,lock=False)
# access the answer as a numpy array, set dimensions
nd_c = ctypeslib.as_array(shared_c).reshape((a_shape[0],b_shape[1]))
# 1 process reference
print("starting.")
t1 = time()
ans1 = numpy.dot(a,b)
print("1 CPU:", time()-t1)
# x must be a multiple of num_cpus
assert(x%num_cpus==0)
step = x/num_cpus
# define row domains for each process
domains = zip(numpy.arange(0,x,step),numpy.arange(0,x,step)+step)
static_args = (a_shape,b_shape,shared_a,shared_b,shared_c)
# allocate processes
p = [Process(target=mat_rowrange_mul, args=((x,)+static_args,) ) for x in domains]
t1 = time()
for x in p:
x.start()
for x in p:
x.join()
ans2 = nd_c
print("%d CPUs:" % num_cpus, time()-t1)
print("Same?:", numpy.alltrue(abs(ans1-ans2)<1e-11))