/
GPU_Processor.py
173 lines (139 loc) · 5.8 KB
/
GPU_Processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
''' =======================================================================
GPU Processor
Exposes functionality which the GPU-rpc-server will use.
Using pyOpenCL to handle processing
======================================================================= '''
from util import timing
import pyopencl as cl
import numpy
import random
''' =======================================================================
Functionality
======================================================================= '''
# ----------------------------------------------------------------------------
#
# Data loader
#
# ----------------------------------------------------------------------------
def load_data():
'''Loads tax data and creates NumPy arrays for it. This should be called
only once, when a CL object is created. Data could be loaded from
a file, database, etc. In this example, each data item is a NumPy array.
'''
num_items = 100000
# Shape of data
data_dict = {
'income': numpy.array([abs(random.gauss(90000, 45000)) for i in xrange(num_items)],
dtype=numpy.float32),
'capGains': numpy.array([abs(random.gauss(20000, 4000)) for i in xrange(num_items)],
dtype=numpy.float32),
'fillingStatus': numpy.array([random.randint(0,4) for i in xrange(num_items)],
dtype=numpy.float32),
'dividendsInterest': numpy.array([abs(random.gauss(50000, 45000)) for i in xrange(num_items)],
dtype=numpy.float32),
'children': numpy.array([random.randint(0,4) for i in xrange(num_items)],
dtype=numpy.float32),
}
# Load all the record data from a file. Each record is a person with
# an income, cap gains, num dependents, etc.
return data_dict
class CL:
def __init__(self):
self.data = load_data()
self.ctx = cl.create_some_context()
self.queue = cl.CommandQueue(self.ctx)
self.setup_buffers()
def setup_buffers(self):
'''Sets up the data arrays and buffers. This needs to happen
only once, as the data itself does not change
'''
#initialize client side (CPU) arrays
timing.timings.start('buffer')
print 'Setting up data arrays'
#Get data from arrays
timing.timings.stop('buffer')
print 'Done setting up two numpy arrays in %s ms | (%s seconds)' % (
timing.timings.timings['buffer']['timings'][-1],
timing.timings.timings['buffer']['timings'][-1] / 1000
)
timing.timings.start('buffer')
#create OpenCL buffers
mf = cl.mem_flags
self.income_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
hostbuf=self.data['income'])
self.capGains_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
hostbuf=self.data['capGains'])
self.dividendsInterest_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
hostbuf=self.data['dividendsInterest'])
self.children_buf = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR,
hostbuf=self.data['children'])
# Destination buffer must be same size as the input buffer
self.dest_buf = cl.Buffer(self.ctx, mf.WRITE_ONLY,
self.data['income'].nbytes)
timing.timings.stop('buffer')
print 'Done setting up buffers in %s ms' % (
timing.timings.timings['buffer']['timings'][-1]
)
def load_program(self, params):
''' Create the .cl program to use. This needs to get regenerated at each
request, as the values used change based on the request
'''
#Generate a .cl program
program = """
__kernel void worker(
__global float* data_income,
__global float* data_capGains,
__global float* data_dividendsInterest,
__global float* data_children,
__global float* result
)
{
unsigned int i = get_global_id(0);
float d1 = data_income[i];
float d2 = data_capGains[i];
"""
# Define calculation based on passed in params. The data arrays are
# static
program += """result[i] = d1 * d2 * {income};
""".format(
income=params['income']
)
program += "}"
#create the program
self.program = cl.Program(self.ctx, program).build()
def execute(self, params):
''' This handles the actual execution for the processing, which would
get executed on each request - this is where we care about the
performance
'''
timing.timings.start('load')
self.load_program(params)
timing.timings.stop('load')
finish = timing.timings.timings['load']['timings'][-1]
print '<<< Loaded program in %s ms' % (finish)
timing.timings.start('execute')
# Start the program
self.program.worker(self.queue,
self.data['income'].shape,
None,
self.income_buf,
self.capGains_buf,
self.dividendsInterest_buf,
self.children_buf,
self.dest_buf,
)
# Get an empty numpy array in the shape of the original data
result = numpy.empty_like(self.data['income'])
#Wait for result
cl.enqueue_read_buffer(self.queue, self.dest_buf, result).wait()
#show timing info
timing.timings.stop('execute')
finish = timing.timings.timings['execute']['timings'][-1]
print '<<< Executed in %s ms' % (finish)
return result
# Execute it
# ---------------------------------------
if __name__ == "__main__":
# Test that execute works when calling this directly passing in a param
example = CL()
print example.execute({ 'income': 42 })