-
Notifications
You must be signed in to change notification settings - Fork 0
/
gpuGameOfLife.py
167 lines (147 loc) · 5.53 KB
/
gpuGameOfLife.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# Conway's game of life using PyCUDA
# This program is designed to be run on a GPU
# Due to CUDA limitations, the maximum allowable board has
# 65536 x 32768 items
# imports
import numpy as np
import pycuda.autoinit
import pycuda.driver as drv
import pycuda.gpuarray as gpuarray
from pycuda.tools import DeviceData
from pycuda.compiler import SourceModule
# constants
MATRIX_SIZE = 32768 # size of square board
BLOCK_SIZE = 32 # number of blocks
N_ITERS = 1 # number of iterations
class Game:
def __init__(self, matrix, iters, block):
self.size = matrix
self.n_iters = iters
self.n_blocks = matrix // block
self.n_threads = block
self.initialize_board()
self.initialize_kernel()
self.run()
# Each cell is randomly set to either 1 (live) or 0 (dead)
def initialize_board(self):
self.board = np.random.randint(2, size = (self.size, self.size)).astype(np.float32)
#self.board = np.zeros((self.size, self.size)).astype(np.float32)
# PyCUDA implementation of Conway's game of life
# Kernel is written in C
def initialize_kernel(self):
self.kernel_code = """
// Update each cell of the grid
// Any live cell with less than two live neighbors dies
// Any live cell with two or three live neighbors lives
// Any live cell with four or more live neighbors dies
// Any dead cell with three neighbors becomes a live cell
__global__ void life_step(float *board, float *board2)
{{
// Matrix size
unsigned int m_size = {};
unsigned int num_cells = {};
// Column index of the element
unsigned int x = threadIdx.x + blockIdx.x * blockDim.x;
// Row index of the element
unsigned int y = threadIdx.y + blockIdx.y * blockDim.y;
// Thread ID in the board array
unsigned int thread_id = y * m_size + x;
// Game of life classically takes place on an infinite grid
// I've used a toroidal geometry for the problem
// The matrix wraps from top to bottom and from left to right
unsigned int above = (thread_id - m_size) % num_cells;
unsigned int below = (thread_id + m_size) % num_cells;
unsigned int left;
if (thread_id % m_size == 0) {{
left = thread_id + m_size - 1;
}} else {{
left = thread_id - 1;
}}
unsigned int right;
if (thread_id % m_size == m_size - 1) {{
right = thread_id - m_size + 1;
}} else {{
right = thread_id + 1;
}}
unsigned int above_left;
if (thread_id % m_size == 0) {{
above_left = (thread_id - 1) % num_cells;
}} else {{
above_left = (thread_id - m_size - 1) % num_cells;
}}
unsigned int above_right;
if (thread_id % m_size == m_size - 1) {{
above_right = (thread_id - blockDim.x * m_size + 1) % num_cells;
}} else {{
above_right = (thread_id - m_size + 1) % num_cells;
}}
unsigned int below_left;
if (thread_id % m_size == 0) {{
below_left = (thread_id + blockDim.x * m_size - 1) % num_cells;
}} else {{
below_left = (thread_id + m_size - 1) % num_cells;
}}
unsigned int below_right;
if (thread_id % m_size == m_size - 1) {{
below_right = (thread_id + 1) % num_cells;
}} else {{
below_right = (thread_id + m_size + 1) % num_cells;
}}
unsigned int num_neighbors = board[above] + board[below] + board[left] + board[right] +
board[above_left] + board[above_right] + board[below_left] + board[below_right];
unsigned int live_and2 = board[thread_id] && (num_neighbors == 2); // Live cell with 2 neighbors
unsigned int live_and3 = board[thread_id] && (num_neighbors == 3); // Live cell with 3 neighbors
unsigned int dead_and3 = !(board[thread_id]) && (num_neighbors == 3); // Dead cell with 3 neighbors
board2[thread_id] = live_and2 || live_and3 || dead_and3;
}}
"""
# Transfer CPU memory to GPU memory
self.board_gpu = gpuarray.to_gpu(self.board)
self.next_board = gpuarray.empty((self.size, self.size), np.float32)
self.kernel = self.kernel_code.format(self.size, self.size * self.size)
# Compile kernel code
self.mod = SourceModule(self.kernel)
# Get kernel function from compiled module
self.game = self.mod.get_function('life_step')
# Prints runtime kernel info
def mem_info(self):
shared = self.game.shared_size_bytes
regs = self.game.num_regs
local = self.game.local_size_bytes
const = self.game.const_size_bytes
mbpt = self.game.max_threads_per_block
print("=MEM=\nLocal:{},\nShared:{},\nRegisters:{},\nConst:{},\nMax Threads/B:{}".format(local, shared, regs, const, mbpt))
# Prints static device info
def usage_info(self):
(free,total) = drv.mem_get_info()
print("Global memory occupancy:{}% free".format(free*100/total))
for devicenum in range(drv.Device.count()):
device = drv.Device(devicenum)
attrs = device.get_attributes()
#Beyond this point is just pretty printing
print("\n===Attributes for device {}".format(device))
for (key,value) in attrs.items():
print("{}:{}".format(str(key),str(value)))
# Main driver function
# Life_step function is called N_ITERS times
def run(self):
self.mem_info()
self.usage_info()
i = 0
print('\nStarting board: \n', self.board_gpu)
while i < N_ITERS:
self.game(
# input
self.board_gpu,
# output
self.next_board,
# grid of n_blocks x n_blocks
grid = (self.n_blocks, self.n_blocks, 1),
# block of n_threads x n_threads
block = (self.n_threads, self.n_threads, 1),
)
self.board_gpu, self.next_board = self.next_board, self.board_gpu
i += 1
print('Final board: \n', self.board_gpu)
if __name__ == '__main__':
Game(MATRIX_SIZE, N_ITERS, BLOCK_SIZE)