def calculation_helper(self, p, q, r, tsize): m1 = np.random.rand(p, q) m2 = np.random.rand(q, r) m3 = np.matmul(m1, m2) _m1 = _matrix.Matrix(m1) _m2 = _matrix.Matrix(m2) start = self.time_report(-1) naive = _matrix.multiply_naive(_m1, _m2) tnaive = self.time_report(start) start = self.time_report(-1) tile = _matrix.multiply_tile(_m1, _m2, tsize) ttile = self.time_report(start) start = self.time_report(-1) mkl = _matrix.multiply_mkl(_m1, _m2) tmkl = self.time_report(start) assert m3.shape[0] == naive.nrow and m3.shape[1] == naive.ncol assert m3.shape[0] == tile.nrow and m3.shape[1] == tile.ncol assert m3.shape[0] == mkl.nrow and m3.shape[1] == mkl.ncol assert(self.is_close_to_equal(m3, naive, p, r)) assert(self.is_close_to_equal(m3, tile, p, r)) assert(self.is_close_to_equal(m3, mkl, p, r)) f = open("performance.txt", "w") f.write('multiply_naive costs: {} seconds\n'.format(tnaive)) f.write('multiply_tile costs: {} seconds\n'.format(ttile)) f.write('multiply_mkl costs: {} seconds\n'.format(tmkl)) f.write('tile speed-up over naive: {}\n'.format(tnaive / ttile)) f.write('MKL speed-up over naive: {}\n'.format(tnaive / tmkl)) f.close()
def test_performance(self): size = 1000 mat1, mat2, *_ = self.make_matrices(1000) tile = random.randint(10, 100) st = time.time() ret_naive = _matrix.multiply_naive(mat1, mat2) naive_time = time.time() - st st = time.time() ret_tile = _matrix.multiply_tile(mat1, mat2, tile) tile_time = time.time() - st st = time.time() ret_mkl = _matrix.multiply_mkl(mat1, mat2) mkl_time = time.time() - st f = open('performance.txt', 'w') f.writelines([ 'tile = ', repr(tile), '\nnaive ', repr(naive_time), ' s.\ntile ', repr(tile_time), ' s\nmkl', repr(mkl_time), ' s.\ntile/naive = ', repr(tile_time / naive_time) ]) f.close() self.assertLess(tile_time / naive_time, 0.8)
def test_zero(self): self.assertEqual(0, _matrix.bytes()) size = 200 mat1, mat2, mat3, *_ = self.make_matrices(size) self.assertEqual(3*8 * size*size, _matrix.bytes()) base_alloc = _matrix.allocated() base_dealloc = _matrix.deallocated() ret_naive = _matrix.multiply_naive(mat1, mat3) ret_mkl = _matrix.multiply_mkl(mat1, mat3) self.assertEqual(size, ret_naive.nrow) self.assertEqual(size, ret_naive.ncol) self.assertEqual(size, ret_mkl.nrow) self.assertEqual(size, ret_mkl.ncol) for i in range(ret_naive.nrow): for j in range(ret_naive.ncol): self.assertEqual(0, ret_naive[i,j]) self.assertEqual(0, ret_mkl[i,j]) self.assertEqual(5*8 * size*size, _matrix.bytes()) self.assertEqual(base_alloc + 2*8 * size*size, _matrix.allocated()) self.assertEqual(base_dealloc, _matrix.deallocated())
def test_speed(): mat1 = m.Matrix(largesize, largesize) mat2 = m.Matrix(largesize, largesize) fp = open("performance.txt", "a") for x in range( largesize ): for y in range( largesize ): mat1[x,y] = random.random() mat2[x,y] = random.random() t_naive = time.time() naive = m.multiply_naive(mat1, mat2) t_naive = time.time() - t_naive fp.write('multiply_naive use ' + str(t_naive) + 's\n' ) tilesize = 1 t_tilemin = t_naive * 2 while tilesize * tilesize <= largesize : tilesize *= 2 t_tile = time.time() tile = m.multiply_tile(mat1, mat2, tilesize) t_tile = time.time() - t_tile fp.write('multiply_tile use ' + str(t_tile) +'s with tile size ' + str(tilesize) + '\n' ) if t_tilemin > t_tile : t_tilemin = t_tile t_mkl = time.time() mkl = m.multiply_mkl(mat1, mat2) t_mkl = time.time() - t_mkl fp.write('multiply_mkl use ' + str(t_mkl) + 's\n' ) fp.close() assert t_naive * 0.8 > t_tilemin
def calculation_helper(self, p, q, r, tsize): m1 = np.random.rand(p, q) m2 = np.random.rand(q, r) m3 = np.matmul(m1, m2) _m1 = _matrix.Matrix(m1) _m2 = _matrix.Matrix(m2) tstart = time.clock() ret_naive = _matrix.multiply_naive(_m1, _m2) tnaive = time.clock() - tstart tstart = time.clock() ret_tile = _matrix.multiply_tile(_m1, _m2, tsize) ttile = time.clock() - tstart tstart = time.clock() ret_mkl = _matrix.multiply_mkl(_m1, _m2) tmkl = time.clock() - tstart assert m3.shape[0] == ret_naive.nrow and m3.shape[1] == ret_naive.ncol assert m3.shape[0] == ret_tile.nrow and m3.shape[1] == ret_tile.ncol assert m3.shape[0] == ret_mkl.nrow and m3.shape[1] == ret_mkl.ncol assert (self.is_close_to_equal(m3, ret_naive, p, r)) assert (self.is_close_to_equal(m3, ret_tile, p, r)) assert (self.is_close_to_equal(m3, ret_mkl, p, r)) with open('performance.txt', 'w') as f: f.write('multiply_naive: {}\n'.format(tnaive)) f.write('multiply_tile: {}\n'.format(ttile)) f.write('multiply_mkl: {}\n'.format(tmkl)) f.write('tile speed-up over naive: {}\n'.format(tnaive / ttile)) f.write('MKL speed-up over naive: {}\n'.format(tnaive / tmkl))
def test_equal(self): m1 = self.create_matrix(3, 4) m2 = self.create_matrix(4, 5) c1 = _matrix.multiply_naive(m1, m2) c2 = _matrix.multiply_mkl(m1, m2) c3 = _matrix.multiply_tile(m1, m2, 64) self.assertEqual(c1, c2) self.assertEqual(c2, c3)
def test_result(self): mat1 = Matrix(np.random.random((1151, 1151))) mat2 = Matrix(np.random.random((1151, 1151))) naive_ret = multiply_naive(mat1, mat2) tile_ret = multiply_tile(mat1, mat2, 64) mkl_ret = multiply_mkl(mat1, mat2) for i in range(naive_ret.nrow): for j in range(naive_ret.ncol): assert naive_ret[i, j] == pytest.approx(tile_ret[i, j], abs=1e-05)
def test_performance(): np_mat1 = np.random.random((1000, 1000)) np_mat2 = np.random.random((1000, 1000)) mat1 = Matrix(np_mat1) mat2 = Matrix(np_mat2) naive_timing = [] for i in range(5): start = time.time() multiply_naive(mat1, mat2) end = time.time() naive_timing.append(end - start) tile_timing = [] for i in range(5): start = time.time() multiply_tile(mat1, mat2, 8) end = time.time() tile_timing.append(end - start) mkl_timing = [] for i in range(5): start = time.time() multiply_mkl(mat1, mat2) end = time.time() mkl_timing.append(end - start) with open('performance.txt', 'w') as f: naivesec = np.min(naive_timing) tilesec = np.min(tile_timing) mklsec = np.min(mkl_timing) print('multiply_naive runtime = {0:2.4f} seconds'.format(naivesec), file=f) print('multiply_tile runtime = {0:2.4f} seconds'.format(tilesec), file=f) print('multiply_mkl runtime = {0:2.4f} seconds'.format(mklsec), file=f) print('Tile speed-up over naive: {0:2.2f} x'.format(naivesec / tilesec), file=f) print('MKL speed-up over naive: {0:2.2f} x'.format(naivesec / mklsec), file=f)
def test_mkl_mul(self): m1 = self.create_matrix(2, 3) m2 = self.create_matrix(3, 4) c2 = _matrix.multiply_mkl(m1, m2) self.assertEqual(c2.nrow, 2) self.assertEqual(c2.ncol, 4) for i in range(2): for j in range(4): v = 0.0 for k in range(3): v += m1[i,k] * m2[k,j] self.assertEqual(v, c2[i,j])
def test_zero(self): size = 100 mat1, mat2, mat3, *_ = self.make_matrices(size) ret_naive = _matrix.multiply_naive(mat1, mat3) ret_mkl = _matrix.multiply_mkl(mat1, mat3) self.assertEqual(size, ret_naive.nrow) self.assertEqual(size, ret_naive.ncol) self.assertEqual(size, ret_mkl.nrow) self.assertEqual(size, ret_mkl.ncol) for i in range(ret_naive.nrow): for j in range(ret_naive.ncol): self.assertEqual(0, ret_naive[i, j]) self.assertEqual(0, ret_mkl[i, j])
def test_multiply_mkl(): np.random.seed(5555) a = np.random.random((1000, 1000)) b = np.random.random((1000, 1000)) mat_a = _matrix.Matrix(a) mat_b = _matrix.Matrix(b) mat_ret = _matrix.multiply_mkl(mat_a, mat_b) assert mat_ret.nrow == mat_a.nrow assert mat_ret.ncol == mat_b.ncol assert np.array(mat_ret) == pytest.approx(np.matmul(a, b))
def test_tile(): mat1 = m.Matrix(size, size) mat2 = m.Matrix(size, size) np_mat = np_matrix(size, size) copy_matrix(mat1, np_mat, size, size) copy_matrix(mat2, np_mat, size, size) mat_tile = m.multiply_tile(mat1, mat2, 8) mat_mkl = m.multiply_mkl(mat1, mat2) for i in range(size): for j in range(size): npt.assert_equal(mat_tile[i, j], mat_mkl[i, j])
def test_multiply_mkl(): for i in range(2): m = np.random.randint(200, 400) n = np.random.randint(200, 400) k = np.random.randint(200, 400) np_mat1 = np.random.random((m, k)) np_mat2 = np.random.random((k, n)) mat1 = Matrix(np_mat1) mat2 = Matrix(np_mat2) mkl_ans = multiply_mkl(mat1, mat2) assert mkl_ans.nrow == m assert mkl_ans.ncol == n assert np.array(mkl_ans) == pytest.approx(np.matmul(np_mat1, np_mat2))
def test_performance(self): mat1 = Matrix(np.random.random((1151, 1151))) mat2 = Matrix(np.random.random((1151, 1151))) timer_naive = [] timer_tile = [] timer_mkl = [] for i in range(5): timer = time.time() multiply_naive(mat1, mat2) timer_naive.append(time.time() - timer) timer = time.time() multiply_tile(mat1, mat2, 64) timer_tile.append(time.time() - timer) timer = time.time() multiply_mkl(mat1, mat2) timer_mkl.append(time.time() - timer) naive_avg = np.average(timer_naive) tile_avg = np.average(timer_tile) mkl_avg = np.average(timer_mkl) with open("performance.txt", "w") as f: f.write("[Average execution time]\n") f.write("multiply_naive: {} secs\n".format("%.4f" % naive_avg)) f.write("multiply_tile : {} secs\n".format("%.4f" % tile_avg)) f.write("multiply_mkl : {} secs\n".format("%.4f" % mkl_avg)) f.write("The tiling version is {:.1%} faster than naive version\n". format(naive_avg / tile_avg)) f.write( "The mkl version is {:.1%} faster than naive version\n".format( naive_avg / mkl_avg)) f.close()
def test_multiply(): m1 = Matrix(10, 20) m2 = Matrix(20, 10) for i in range(10): for j in range(20): m1[i, j] = 8.0 for i in range(20): for j in range(10): m2[i, j] = 6.0 m3 = multiply_naive(m1, m2) m4 = multiply_mkl(m1, m2) assert (compareMat(m3, m4, 10, 10))
def test_match_tile_mkl(self): size = 100 mat1, mat2, *_ = self.make_matrices(size) ret_mkl = _matrix.multiply_mkl(mat1, mat2) ret_tile = _matrix.multiply_tile(mat1, mat2, 4) self.assertEqual(size, ret_tile.nrow) self.assertEqual(size, ret_tile.ncol) self.assertEqual(size, ret_mkl.nrow) self.assertEqual(size, ret_mkl.ncol) for i in range(ret_tile.nrow): for j in range(ret_tile.ncol): self.assertNotEqual(mat1[i, j], ret_mkl[i, j]) self.assertEqual(ret_tile[i, j], ret_mkl[i, j])
def test_caculation(self): size = 1000 np_mat1 = np.random.random(size * size) np_mat2 = np.random.random(size * size) mat1 = _matrix.Matrix(size, size, np_mat1.tolist()) mat2 = _matrix.Matrix(size, size, np_mat2.tolist()) start = time.time() ret_naive = _matrix.multiply_naive(mat1, mat2) end = time.time() navie_time = end - start print('multiply_naive runtime = {0:2.4f} seconds'.format(end - start)) start = time.time() ret_tile = _matrix.multiply_tile(mat1, mat2, 100) end = time.time() tile_time = end - start print(end - start) start = time.time() ret_mkl = _matrix.multiply_mkl(mat1, mat2) end = time.time() mkl_time = end - start print(end - start) self.assertEqual(size, ret_naive.nrow) self.assertEqual(size, ret_naive.ncol) self.assertEqual(size, ret_mkl.nrow) self.assertEqual(size, ret_mkl.ncol) for i in range(ret_naive.nrow): for j in range(ret_naive.ncol): self.assertNotEqual(mat1[i, j], ret_mkl[i, j]) self.assertEqual(ret_naive[i, j], pytest.approx(ret_mkl[i, j], abs=1e-05)) self.assertEqual(ret_tile[i, j], pytest.approx(ret_mkl[i, j], abs=1e-05)) fp = open("performance.txt", "w") fp.write( 'multiply_naive runtime = {0:2.4f} seconds\n'.format(navie_time)) fp.write( 'multiply_tile runtime = {0:2.4f} seconds\n'.format(tile_time)) fp.write('multiply_mkl runtime = {0:2.4f} seconds\n'.format(mkl_time)) fp.close
def test_multiply_mkl(self): for i in range(2): row_n_col = 1000 np_mat1 = np.random.random(row_n_col * row_n_col) np_mat2 = np.random.random(row_n_col * row_n_col) mat1 = Matrix(row_n_col, row_n_col, np_mat1.tolist()) mat2 = Matrix(row_n_col, row_n_col, np_mat2.tolist()) np_mat1 = np_mat1.reshape((row_n_col, row_n_col)) np_mat2 = np_mat2.reshape((row_n_col, row_n_col)) answer = multiply_mkl(mat1, mat2) assert answer.nrow == row_n_col assert answer.ncol == row_n_col assert np.array(answer.buffer_vector()).reshape((row_n_col, row_n_col)) ==\ pytest.approx(np.matmul(np_mat1, np_mat2))
def test_multiply(self): d1, d2, d3 = random.randrange(50, 200), \ random.randrange(50, 200), \ random.randrange(50, 200) d1, d2, d3 = random.randrange(3, 4), \ random.randrange(3, 4), \ random.randrange(3, 4) m, a = self.make_matrices(d1, d2) n, b = self.make_matrices(d2, d3) k = _matrix.multiply_tile(m, n, 64) l = _matrix.multiply_mkl(m, n) c = np.dot(a, b) for i in range(d1): for j in range(d3): self.assertEqual(k[i, j], l[i, j]) self.assertEqual(l[i, j], c[i, j])
def different_tile_size(): matrix_size = 512 A_content = np.eye(matrix_size) A = _matrix.Matrix(A_content) # print(A) B_content = np.arange(1, matrix_size**2 + 1).reshape(matrix_size, matrix_size) B = _matrix.Matrix(B_content) # print(B) MKL_MM = _matrix.multiply_mkl(A, B) for p in range(1, 9): tiled_MM = _matrix.multiply_tile(A, B, 2**p) assert tiled_MM == MKL_MM for p in [14, 22, 56]: tiled_MM = _matrix.multiply_tile(A, B, p) assert tiled_MM == MKL_MM
def check_tile(row, col1, col2, tile_size): np_mat1 = np.random.random(row * col1) np_mat2 = np.random.random(col1 * col2) mat1 = Matrix(row, col1, np_mat1.tolist()) mat2 = Matrix(col1, col2, np_mat2.tolist()) np_mat1 = np_mat1.reshape((row, col1)) np_mat2 = np_mat2.reshape((col1, col2)) tile_answer = multiply_tile(mat1, mat2, tile_size) assert tile_answer.nrow == row assert tile_answer.ncol == col2 mkl_answer = multiply_mkl(mat1, mat2) tile_ver_in_np = np.array(tile_answer.buffer_vector()).reshape( (row, col2)) mkl_ver_in_np = np.array(mkl_answer.buffer_vector()).reshape( (row, col2)) assert tile_ver_in_np == pytest.approx(mkl_ver_in_np, abs=1e-05) # benchmark ns = dict(multiply_naive=multiply_naive, multiply_tile=multiply_tile,\ mat1=mat1, mat2=mat2, tile_size=tile_size) t_naive = timeit.Timer('multiply_naive(mat1, mat2)', globals=ns) t_tile = timeit.Timer('multiply_tile(mat1, mat2, tile_size)', globals=ns) # t_mkl = timeit.Timer('_matrix.multiply_mkl(mat1, mat2)', globals=ns) time_naive = min(t_naive.repeat(5, 1)) time_tile = min(t_tile.repeat(5, 1)) # time_mkl = min(t_mkl.repeat(10, 1)) ratio = time_tile / time_naive with open("performance.txt", "w") as file: file.write( "tile time (tile_size: {}) / naive time: {}/{}={}".format( tile_size, time_tile, time_naive, ratio)) return ratio
def test_correct(): mat1 = m.Matrix(smallsize, smallsize) mat2 = m.Matrix(smallsize, smallsize) for x in range( smallsize ): for y in range( smallsize ): mat1[x,y] = x* y mat2[x,y] = x * y naive = m.multiply_naive(mat1, mat2) tile = m.multiply_tile(mat1, mat2, 32) mkl = m.multiply_mkl(mat1, mat2) for x in range( smallsize ): for y in range( smallsize ): print( tile[x,y] ) assert tile == naive assert naive == mkl assert tile == mkl
def test_multiply_naive(self): for i in range(2): row_n_col = 1000 np_mat1 = np.random.random(row_n_col * row_n_col) np_mat2 = np.random.random(row_n_col * row_n_col) mat1 = Matrix(row_n_col, row_n_col, np_mat1.tolist()) mat2 = Matrix(row_n_col, row_n_col, np_mat2.tolist()) np_mat1 = np_mat1.reshape((row_n_col, row_n_col)) np_mat2 = np_mat2.reshape((row_n_col, row_n_col)) naive_answer = multiply_naive(mat1, mat2) assert naive_answer.nrow == row_n_col assert naive_answer.ncol == row_n_col mkl_answer = multiply_mkl(mat1, mat2) naive_ver_in_np = np.array(naive_answer.buffer_vector()).reshape( (row_n_col, row_n_col)) mkl_ver_in_np = np.array(mkl_answer.buffer_vector()).reshape( (row_n_col, row_n_col)) assert naive_ver_in_np == pytest.approx(mkl_ver_in_np, abs=1e-05)
def check_tile(self, mat1, mat2, tsize): if 0 == tsize: ret_tile = _matrix.multiply_naive(mat1, mat2) tile_str = "_matrix.multiply_naive(mat1, mat2)" else: ret_tile = _matrix.multiply_tile(mat1, mat2, tsize) tile_str = "_matrix.multiply_tile(mat1, mat2, tsize)" ret_mkl = _matrix.multiply_mkl(mat1, mat2) for i in range(ret_tile.nrow): for j in range(ret_tile.ncol): self.assertNotEqual(mat1[i, j], ret_mkl[i, j]) self.assertEqual(ret_tile[i, j], ret_mkl[i, j]) ns = dict(_matrix=_matrix, mat1=mat1, mat2=mat2, tsize=tsize) t_tile = timeit.Timer(tile_str, globals=ns) t_mkl = timeit.Timer('_matrix.multiply_mkl(mat1, mat2)', globals=ns) time_tile = min(t_tile.repeat(10, 1)) time_mkl = min(t_mkl.repeat(10, 1)) ratio = time_tile / time_mkl return ratio, time_tile
def MKL_MM(A, B): return _matrix.multiply_mkl(A, B)
def test_performance(): m = np.random.randint(500, 501) n = np.random.randint(500, 501) k = np.random.randint(500, 501) np_mat1 = np.random.random((m, k)) np_mat2 = np.random.random((k, n)) mat1 = Matrix(np_mat1) mat2 = Matrix(np_mat2) naive_timing = [] for i in range(2): start = time.time() naive_ans = multiply_naive(mat1, mat2) end = time.time() naive_timing.append(end - start) mkl_timing = [] for i in range(2): start = time.time() mkl_ans = multiply_mkl(mat1, mat2) end = time.time() mkl_timing.append(end - start) tile16_timing = [] for i in range(2): start = time.time() tile16_ans = multiply_tile(mat1, mat2, 16) end = time.time() tile16_timing.append(end - start) tile17_timing = [] for i in range(2): start = time.time() tile17_ans = multiply_tile(mat1, mat2, 17) end = time.time() tile17_timing.append(end - start) tile19_timing = [] for i in range(2): start = time.time() tile19_ans = multiply_tile(mat1, mat2, 19) end = time.time() tile19_timing.append(end - start) naive_timing = np.mean(naive_timing) mkl_timing = np.mean(mkl_timing) tile16_timing = np.mean(tile16_timing) tile17_timing = np.mean(tile17_timing) tile19_timing = np.mean(tile19_timing) with open('performance.txt', 'w') as f: print('Performance Measurement', file=f) print('Input Matrix size:', file=f) print('Matrix 1: {} x {}'.format(m, k), file=f) print('Matrix 2: {} x {}'.format(k, n), file=f) print('Average Time for multiply_naive: {}'.format(naive_timing), file=f) print('Average Time for multiply_mkl: {}'.format(mkl_timing), file=f) print('Average Time for multiply_tile16: {}'.format(tile16_timing), file=f) print('Average Time for multiply_tile17: {}'.format(tile17_timing), file=f) print('Average Time for multiply_tile19: {}'.format(tile19_timing), file=f) assert tile16_timing / naive_timing <= 0.8 assert tile17_timing / naive_timing <= 0.8 assert tile19_timing / naive_timing <= 0.8