def test_global_offsets_consts_array(self): k = KernelCode(''' double Y[s][n]; double F[s][n]; double A[s][s]; double y[n]; for (int j = 0; j < n; ++j) { Y[0][j] += A[0][0] * F[0][j]; Y[0][j] += A[0][1] * F[1][j]; Y[1][j] += A[1][0] * F[0][j]; Y[1][j] += A[1][1] * F[1][j]; Y[0][j] = Y[0][j] + y[j]; Y[1][j] = Y[1][j] + y[j]; }''', machine=None) k.set_constant('n', 100000000) k.set_constant('s', 2) offsets_warmup = k.compile_global_offsets(iteration=range(0, 10000), spacing=0) for l, s in offsets_warmup: self.assertEqual(len(l), 5, msg="Number of load offsets") self.assertEqual(len(s), 2, msg="Number of store offsets")
def test_array_sizes_3d(self): k = KernelCode(self.threed_code, machine=None) k.set_constant('N', 10) k.set_constant('M', 20) sizes = k.array_sizes(in_bytes=True, subs_consts=True) # 8 byte per double checked_sizes = {'a': 20 * 10 * 10 * 8, 'b': 20 * 10 * 10 * 8} self.assertEqual(sizes, checked_sizes)
def test_array_sizes_3d(self): k = KernelCode(self.threed_code) k.set_constant('N', 10) k.set_constant('M', 20) sizes = k.array_sizes(in_bytes=True, subs_consts=True) # 8 byte per double checked_sizes = {'a': 20*10*10*8, 'b': 20*10*10*8} self.assertEqual(sizes, checked_sizes)
def test_global_offsets_2d(self): k = KernelCode(self.twod_code, machine=None) k.set_constant('N', 10) k.set_constant('M', 20) sizes = k.array_sizes(in_bytes=True, subs_consts=True) offsets = k.compile_global_offsets(iteration=0, spacing=0) read_offsets, write_offsets = list(offsets)[0] # read access to a[j][i-1], a[j][i+1], a[j-1][i], a[j+1][i] self.assertCountEqual([(1 * 10 + 0) * 8, (1 * 10 + 2) * 8, (0 * 10 + 1) * 8, (2 * 10 + 1) * 8], read_offsets) # write access to b[i][j] self.assertCountEqual([sizes['a'] + (1 * 10 + 1) * 8], write_offsets)
def test_global_offsets_2d(self): k = KernelCode(self.twod_code, machine=None) k.set_constant('N', 10) k.set_constant('M', 20) sizes = k.array_sizes(in_bytes=True, subs_consts=True) offsets = k.compile_global_offsets(iteration=0, spacing=0) read_offsets, write_offsets = list(offsets)[0] # read access to a[j][i-1], a[j][i+1], a[j-1][i], a[j+1][i] self.assertCountEqual( [(1 * 10 + 0) * 8, (1 * 10 + 2) * 8, (0 * 10 + 1) * 8, (2 * 10 + 1) * 8], read_offsets) # write access to b[i][j] self.assertCountEqual( [sizes['a'] + (1 * 10 + 1) * 8], write_offsets)
def test_global_offsets_variable_small_array(self): k = KernelCode(''' double Y[s][n]; double y[n]; for (int l = 0; l < s; l++) for (int j = 0; j < n; j++) Y[l][j] = Y[l][j] + y[j];''', machine=None) k.set_constant('n', 100000000) k.set_constant('s', 2) offsets_warmup = k.compile_global_offsets(iteration=range(0, 10000), spacing=0) for l, s in offsets_warmup: self.assertEqual(len(l), 2, msg="Number of load offsets") self.assertEqual(len(s), 1, msg="Number of store offsets")
def test_non_variable_accesse(self): kernel = KernelCode(''' double Y[s][n]; double F[s][n]; double A[s][s]; double y[n]; double h; for (int l = 0; l < s; ++l) for (int j = 0; j < n; ++j) Y[l][j] = A[l][0] * F[0][j] * h + y[j]; ''', machine=self.machine) kernel.set_constant('s', 4) kernel.set_constant('n', 1000000) lcp = LayerConditionPredictor(kernel, self.machine) self.assertEqual(lcp.get_evicts(), [1, 1, 1, 0]) self.assertEqual(lcp.get_misses(), [3, 3, 3, 0]) self.assertEqual(lcp.get_hits(), [0, 0, 0, 3])
def test_global_offsets_consts_array(self): k = KernelCode(''' double Y[s][n]; double F[s][n]; double A[s][s]; double y[n]; for (int j = 0; j < n; ++j) { Y[0][j] += A[0][0] * F[0][j]; Y[0][j] += A[0][1] * F[1][j]; Y[1][j] += A[1][0] * F[0][j]; Y[1][j] += A[1][1] * F[1][j]; Y[0][j] = Y[0][j] + y[j]; Y[1][j] = Y[1][j] + y[j]; }''', machine=None) k.set_constant('n', 100000000) k.set_constant('s', 2) offsets_warmup = k.compile_global_offsets(iteration=range(0,10000), spacing=0) for l,s in offsets_warmup: self.assertEqual(len(l), 5, msg="Number of load offsets") self.assertEqual(len(s), 2, msg="Number of store offsets")
def test_iterations_sizes_2d_linear(self): k = KernelCode(self.twod_linear, machine=None) k.set_constant('N', 10) k.set_constant('M', 20) self.assertEqual(k.iteration_length(), 200)