-
Notifications
You must be signed in to change notification settings - Fork 0
/
CLSolve.py
124 lines (95 loc) · 5.86 KB
/
CLSolve.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import pyopencl as cl
import numpy as np
import os
os.environ['PYOPENCL_COMPILER_OUTPUT'] = '1'
class CLSolver:
def loadKernel(self, filename):
#read in the OpenCL source file as a string
f = open(filename, 'r')
contents = "".join(f.readlines())
#create the program
self.program = cl.Program(self.context, contents).build()
def init(self, filename="montecarlo.cl"):
#setup context and command queue
self.platform = cl.get_platforms()
self.gpu_devices = self.platform[0].get_devices(device_type = cl.device_type.GPU)
#print device details
for i in range(len(self.gpu_devices)):
device = self.gpu_devices[i]
print "Device:", i
print device
print "Global Memory:", device.get_info(cl.device_info.GLOBAL_MEM_SIZE) / 1000, "KB"
print "Local Memory:",device.get_info(cl.device_info.LOCAL_MEM_SIZE) / 1000, "KB"
#create openCL context and queue
self.context = cl.Context(devices=self.gpu_devices)
self.queue = cl.CommandQueue(self.context)
#load kernel from file
self.loadKernel("Solvers/montecarlo.cl")
def initBuffers(self,puzzle):
#define lengths buffer and copy to the GPU
#as we will not read from this buffer later, mapping is not required
self.lengths = np.full(self.simulations,np.iinfo(np.int16).max,dtype=np.int16)
self.lengthsBuffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.lengths)
#define buffer for aggregated lengths for each workgroup
self.groupLengths = np.full(self.workGroups,np.iinfo(np.int16).max,dtype=np.int16)
self.groupLengthsBuffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE | cl.mem_flags.USE_HOST_PTR, hostbuf=self.groupLengths)
#map group lengths buffer
cl.enqueue_map_buffer(self.queue,self.groupLengthsBuffer,cl.map_flags.READ,0,self.groupLengths.shape,self.groupLengths.dtype)
#get the input puzzle ready for the kernel; convert to 8 bit int (char)
p = np.array(puzzle['puzzle']).astype(np.int8)
#subtract 1 so that -1 denotes a gap and 0 denotes a square to be filled
p = p - np.ones_like(p,dtype=p.dtype)
#copy the puzzle, one for each simulation
self.puzzles = np.zeros((self.simulations,self.height,self.width),dtype=p.dtype)
self.puzzles[:,0:self.height,0:self.width] = p
#define puzzles buffer and copy data (we do not need to worry about getting data out of this buffer, so mapping isn't required)
#this buffer contains the input puzzles, one for each invocation (the puzzle is too large to hold in local or shared memory)
self.puzzlesFlattened = self.puzzles.ravel()
self.puzzlesBuffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.puzzlesFlattened)
#define output buffer for best solutions aggregated across workgroups
self.solutions = self.puzzles[0:self.workGroups]
self.solutionsFlattened = self.solutions.ravel()
self.solutionsBuffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE | cl.mem_flags.USE_HOST_PTR, hostbuf=self.solutionsFlattened)
#map solutions buffer
cl.enqueue_map_buffer(self.queue,self.solutionsBuffer,cl.map_flags.READ,0,self.solutionsFlattened.shape,self.solutions.dtype)
def solve(self,puzzle,simulations = 16384, iterations = 35, workGroupSize = 128):
self.simulations = simulations
self.iterations = iterations
self.workGroupSize = workGroupSize
self.workGroups = int(self.simulations / self.workGroupSize)
self.width = np.int8(puzzle['width'])
self.height = np.int8(puzzle['height'])
#initialise buffers
self.initBuffers(puzzle)
#create kernel
self.kernel = cl.Kernel(self.program,"montecarlo")
self.kernel.set_args(self.lengthsBuffer,self.groupLengthsBuffer,self.puzzlesBuffer,self.solutionsBuffer,self.height,self.width,np.int32(self.iterations))
#execute program for a number of iterations
cl.enqueue_nd_range_kernel(self.queue,self.kernel,(self.simulations,),(self.workGroupSize,))
#unmap group lengths buffer from device
cl.enqueue_map_buffer(self.queue,self.groupLengthsBuffer,cl.map_flags.WRITE,0,self.groupLengths.shape,self.groupLengths.dtype)
self.groupLengths = self.groupLengthsBuffer.get_host_array(self.groupLengths.shape,dtype=self.groupLengths.dtype)
#unmap solutions buffer from device
cl.enqueue_map_buffer(self.queue,self.solutionsBuffer,cl.map_flags.WRITE,0,self.solutionsFlattened.shape,self.solutions.dtype)
self.solutions = self.solutionsBuffer.get_host_array(self.solutions.shape,dtype=self.solutions.dtype)
#release buffers
self.lengthsBuffer.release()
self.groupLengthsBuffer.release()
self.puzzlesBuffer.release()
self.solutionsBuffer.release()
#get the best solution
i = self.groupLengths.argmin()
bestSolution = np.array(self.solutions[i])
#convert solution to list format used by challenge
solution = []
for row in range(0,puzzle['height']):
for col in range(0,puzzle['width']):
if bestSolution[row][col]!=-1:
s = bestSolution[row][col]
#add to solution list
solution.append({'X': int(col),'Y': int(row),'Size':int(s)})
#clear cells in solution
for i in range(0,s):
for j in range(0,s):
bestSolution[row+i][col+j]=-1
return solution