-
Notifications
You must be signed in to change notification settings - Fork 0
/
matvec.py
162 lines (135 loc) · 4.11 KB
/
matvec.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
from functools import wraps
import operator as op
import math
import numpy as np
import sys
import multiprocessing
from multiprocessing import Process
sys.path.append('test_harness')
from timedict import timedict
# Without using numpy
def star_add(opperands):
"""Wrap operation.add so that it takes a tuple.
:opperands: tuple of things to add
:returns: the sum
"""
return op.add(*opperands)
def reduc_tree(pool, oper, seq, NP):
"""A reduction tree implementation using pool as the parallel workers
for an arbitrary binary operator.
:oper: takes a point (a,b) and returns a single item.
:seq: The sequence to reduce, it will not be copied
:NP: The number of processing elements
:returns: The sum of seq.
"""
tmp_seq = seq
while len(tmp_seq) > 1:
n = len(tmp_seq)
evens = tmp_seq[0::2]
odds = tmp_seq[1::2]
end = tmp_seq[-1]
argseq = zip(evens, odds)
tmp_seq = pool.map(oper, argseq)
if n%2 == 1:
oper((tmp_seq[0], end))
return tmp_seq
def MP_main(pool, num_procs):
"""Using multiproccessing to see if it is faster
:returns: The sum over SEQ
"""
answers = SEQ
count = reduc_tree(pool, star_add, answers, NP)
return count
# With NUMPY much faster
def np_norm(arr):
return np.sqrt(arr*arr)
def partition(seq, NP):
"""Partition the sequence for the processors
:seq: any array like
:returns: an array-like of array-likes
"""
n = len(seq)
elts_per_p = n/(NP)
#print(elts_per_p)
indices = [(int(i*elts_per_p), int((i+1)*elts_per_p))
for i in range(NP)]
arrays = [seq[mini:maxi] for mini, maxi in indices]
return arrays
def packed_reduction(pool, seq, NP):
"""A reduction tree implementation using pool as the parallel workers
for an arbitrary binary operator.
assuming that the data has been partitioned into seq sequences
:oper: takes a point (a,b) and returns a single item.
:seq: The sequence to reduce, it will not be copied
:NP: The number of processing elements
:returns: The sum of seq.
"""
#TODO add operator field
parts = pool.map(np.sum, seq)
total_sum = np.sum(parts)
return total_sum
def dot(tupl):
return np.dot(*tupl)
def par_matvec(pool, args, np):
timer.tic('map')
partials = pool.map(dot, args)
timer.toc('map')
return partials
def numpy_main(pool, num_procs):
"""Using multiproccessing to see if it is faster
:returns: The sum over SEQ
"""
#arrays = partition(SEQ, NP)
#answers = pool.map(np_norm, arrays)
#print(answers)
timer.tic('partition')
block_rows = partition(MAT, num_procs)
timer.toc('partition')
vec_copies = num_procs*[SEQ]
timer.tic('zip')
args = zip(block_rows, vec_copies)
timer.toc('zip')
answers = par_matvec(pool, args, num_procs)
print([a.shape for a in answers])
#answers = np.concatenate(answers, axis=0)
answers = packed_reduction(pool, answers, num_procs)
return answers
def serial_main():
"""Does the main without any parallel overhead
:returns: the sum of the matvec
"""
#answers = np.sqrt((SEQ * SEQ))
answers = MAT.dot(SEQ)
count = np.sum(answers)
return count
if __name__ == '__main__':
timer = timedict()
scale = 14
SEQ = np.random.random(2**scale)
MAT = np.random.rand(2**scale, 2**scale)
#print(MAT)
print(SEQ)
MAX_PROCS = 4
NP = 4
pool = multiprocessing.Pool(processes=NP)
par_name = 'MP'+str(MAX_PROCS)
npname = 'NumPy'+str(MAX_PROCS)
timer.tic(0)
count = serial_main()
timer.toc(0)
print('serial')
print('we cannot do matvec without numpy')
timer.tic(par_name)
mp_count = MP_main(pool, NP)
timer.toc(par_name)
print('mp')
timer.tic(npname)
np_count = numpy_main(pool, NP)
timer.toc(npname)
print('np')
print('num_primes:%s,%s,%s' % (count, mp_count, np_count))
print(repr(timer.ends))
print('Multiprocs speedup: %f' %
(timer.ends[0]/timer.ends[par_name]))
print('NumPy speedup: %f' %
(timer.ends[0]/timer.ends[npname]))