def test_kmeans(self): def test_impl(numCenter, numIter, N, D): A = np.ones((N, D)) centroids = np.zeros((numCenter, D)) for l in range(numIter): dist = np.array([[ sqrt(np.sum((A[i, :] - centroids[j, :])**2)) for j in range(numCenter) ] for i in range(N)]) labels = np.array([dist[i, :].argmin() for i in range(N)]) centroids = np.array([[ np.sum(A[labels == i, j]) / np.sum(labels == i) for j in range(D) ] for i in range(numCenter)]) return centroids hpat_func = hpat.jit(test_impl) n = 11 np.testing.assert_allclose(hpat_func(1, 1, n, 2), test_impl(1, 1, n, 2)) self.assertEqual(count_array_OneDs(), 4) self.assertEqual(count_array_OneD_Vars(), 1) self.assertEqual(count_parfor_OneDs(), 5) self.assertEqual(count_parfor_OneD_Vars(), 1)
def test_intraday(self): def test_impl(nsyms): max_num_days = 100 all_res = 0.0 for i in hpat.prange(nsyms): s_open = 20 * np.ones(max_num_days) s_low = 28 * np.ones(max_num_days) s_close = 19 * np.ones(max_num_days) df = pd.DataFrame({ 'Open': s_open, 'Low': s_low, 'Close': s_close }) df['Stdev'] = df['Close'].rolling(window=90).std() df['Moving Average'] = df['Close'].rolling(window=20).mean() df['Criteria1'] = (df['Open'] - df['Low'].shift(1)) < -df['Stdev'] df['Criteria2'] = df['Open'] > df['Moving Average'] df['BUY'] = df['Criteria1'] & df['Criteria2'] df['Pct Change'] = (df['Close'] - df['Open']) / df['Open'] df['Rets'] = df['Pct Change'][df['BUY'] == True] all_res += df['Rets'].mean() return all_res hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 0) self.assertEqual(count_parfor_OneDs(), 1)
def test_dist_input(self): def test_impl(A): return len(A) hpat_func = hpat.jit(distributed=['A'])(test_impl) n = 128 arr = np.ones(n) np.testing.assert_allclose(hpat_func(arr) / self.num_ranks, test_impl(arr)) self.assertEqual(count_array_OneDs(), 1)
def test_series_head_index_parallel1(self): def test_impl(S): return S.head(3) S = pd.Series([6,9,2,3,6,4,5], ['a','ab','abc','c','f','hh','']) hpat_func = hpat.jit(distributed={'S'})(test_impl) start, end = get_start_end(len(S)) pd.testing.assert_series_equal(hpat_func(S[start:end]), test_impl(S)) self.assertTrue(count_array_OneDs()>0)
def test_series_head_index_parallel1(self): '''Verifies head method for distributed Series with integer index''' def test_impl(S): return S.head(3) hpat_func = hpat.jit(distributed={'S'})(test_impl) S = pd.Series([6, 9, 2, 3, 6, 4, 5], [8, 1, 6, 0, 9, 1, 3]) start, end = get_start_end(len(S)) pd.testing.assert_series_equal(hpat_func(S[start:end]), test_impl(S)) self.assertTrue(count_array_OneDs() > 0)
def test_rebalance(self): def test_impl(N): A = np.arange(n) B = A[A > 10] C = hpat.distributed_api.rebalance_array(B) return C.sum() hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 3) self.assertEqual(count_parfor_OneDs(), 2)
def test_array_reduce(self): def test_impl(N): A = np.ones(3) B = np.ones(3) for i in numba.prange(N): A += B return A hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 0) self.assertEqual(count_parfor_OneDs(), 1)
def test_join1_seq(self): def test_impl(n): df1 = pd.DataFrame({'key1': np.arange(n)+3, 'A': np.arange(n)+1.0}) df2 = pd.DataFrame({'key2': 2*np.arange(n)+1, 'B': n+np.arange(n)+1.0}) df3 = pd.merge(df1, df2, left_on='key1', right_on='key2') return df3.B hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n).sum(), test_impl(n).sum()) self.assertEqual(count_array_OneDs(), 0) self.assertEqual(count_parfor_OneDs(), 0) n = 11111 self.assertEqual(hpat_func(n).sum(), test_impl(n).sum())
def test_series_head_parallel1(self): '''Verifies head method for distributed Series with string data and no index''' def test_impl(S): return S.head(7) hpat_func = hpat.jit(distributed={'S'})(test_impl) # need to test different lenghts, as head's size is fixed and implementation # depends on relation of size of the data per processor to output data size for n in range(1, 5): S = pd.Series(['a', 'ab', 'abc', 'c', 'f', 'hh', ''] * n) start, end = get_start_end(len(S)) pd.testing.assert_series_equal(hpat_func(S[start:end]), test_impl(S)) self.assertTrue(count_array_OneDs() > 0)
def test_cumsum(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) Ac = df.A.cumsum() return Ac.sum() hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_array_OneDs(), 2) self.assertEqual(count_parfor_REPs(), 0) self.assertEqual(count_parfor_OneDs(), 2) self.assertTrue(dist_IR_contains('dist_cumsum'))
def test_dist_return(self): def test_impl(N): A = np.arange(N) return A hpat_func = hpat.jit(locals={'A:return': 'distributed'})(test_impl) n = 128 dist_sum = hpat.jit(lambda a: hpat.distributed_api.dist_reduce( a, np.int32(hpat.distributed_api.Reduce_Type.Sum.value))) dist_sum(1) # run to compile np.testing.assert_allclose(dist_sum(hpat_func(n).sum()), test_impl(n).sum()) self.assertEqual(count_array_OneDs(), 1) self.assertEqual(count_parfor_OneDs(), 1)
def test_box_dist_return(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df hpat_func = hpat.jit(distributed={'df'})(test_impl) n = 11 hres, res = hpat_func(n), test_impl(n) self.assertEqual(count_array_OneDs(), 3) self.assertEqual(count_parfor_OneDs(), 2) dist_sum = hpat.jit(lambda a: hpat.distributed_api.dist_reduce( a, np.int32(hpat.distributed_api.Reduce_Type.Sum.value))) dist_sum(1) # run to compile np.testing.assert_allclose(dist_sum(hres.A.sum()), res.A.sum()) np.testing.assert_allclose(dist_sum(hres.B.sum()), res.B.sum())
def test_rebalance_loop(self): def test_impl(N): A = np.arange(n) B = A[A > 10] s = 0 for i in range(3): s += B.sum() return s hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 4) self.assertEqual(count_parfor_OneDs(), 2) self.assertIn('allgather', list(hpat_func.inspect_llvm().values())[0])
def test_logistic_regression(self): def test_impl(n, d): iterations = 3 X = np.ones((n, d)) + .5 Y = np.ones(n) D = X.shape[1] w = np.ones(D) - 0.5 for i in range(iterations): w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) return w hpat_func = hpat.jit(test_impl) n = 11 d = 4 np.testing.assert_allclose(hpat_func(n, d), test_impl(n, d)) self.assertEqual(count_array_OneDs(), 3) self.assertEqual(count_parfor_OneDs(), 3)
def test_dist_return_tuple(self): def test_impl(N): A = np.arange(N) B = np.arange(N) + 1.5 return A, B hpat_func = hpat.jit(locals={'A:return': 'distributed', 'B:return': 'distributed'})(test_impl) n = 128 dist_sum = hpat.jit( lambda a: hpat.distributed_api.dist_reduce( a, np.int32(hpat.distributed_api.Reduce_Type.Sum.value))) dist_sum(1.0) # run to compile np.testing.assert_allclose( dist_sum((hpat_func(n)[0] + hpat_func(n)[1]).sum()), (test_impl(n)[0] + test_impl(n)[1]).sum()) self.assertEqual(count_array_OneDs(), 2) self.assertEqual(count_parfor_OneDs(), 2)
def test_linear_regression(self): def test_impl(N, D): p = 2 iterations = 3 X = np.ones((N, D)) + .5 Y = np.ones((N, p)) alphaN = 0.01 / N w = np.zeros((D, p)) for i in range(iterations): w -= alphaN * np.dot(X.T, np.dot(X, w) - Y) return w hpat_func = hpat.jit(test_impl) n = 11 d = 4 np.testing.assert_allclose(hpat_func(n, d), test_impl(n, d)) self.assertEqual(count_array_OneDs(), 5) self.assertEqual(count_parfor_OneDs(), 3)
def test_join_left_parallel1(self): """ """ def test_impl(A1, B1, C1, A2, B2, D2): df1 = pd.DataFrame({'A': A1, 'B': B1, 'C': C1}) df2 = pd.DataFrame({'A': A2, 'B': B2, 'D': D2}) df3 = df1.merge(df2, on=('A', 'B')) return df3.C.sum() + df3.D.sum() hpat_func = hpat.jit( locals={ 'A1:input': 'distributed', 'B1:input': 'distributed', 'C1:input': 'distributed', })(test_impl) df1 = pd.DataFrame({ 'A': [3, 1, 1, 3, 4], 'B': [1, 2, 3, 2, 3], 'C': [7, 8, 9, 4, 5] }) df2 = pd.DataFrame({ 'A': [2, 1, 4, 4, 3], 'B': [1, 3, 2, 3, 2], 'D': [1, 2, 3, 4, 8] }) start, end = get_start_end(len(df1)) h_A1 = df1.A.values[start:end] h_B1 = df1.B.values[start:end] h_C1 = df1.C.values[start:end] h_A2 = df2.A.values h_B2 = df2.B.values h_D2 = df2.D.values p_A1 = df1.A.values p_B1 = df1.B.values p_C1 = df1.C.values p_A2 = df2.A.values p_B2 = df2.B.values p_D2 = df2.D.values h_res = hpat_func(h_A1, h_B1, h_C1, h_A2, h_B2, h_D2) p_res = test_impl(p_A1, p_B1, p_C1, p_A2, p_B2, p_D2) self.assertEqual(h_res, p_res) self.assertEqual(count_array_OneDs(), 3)
def test_kde(self): def test_impl(n): X = np.ones(n) b = 0.5 points = np.array([-1.0, 2.0, 5.0]) N = points.shape[0] exps = 0 for i in hpat.prange(n): p = X[i] d = (-(p - points)**2) / (2 * b**2) m = np.min(d) exps += m - np.log(b * N) + np.log(np.sum(np.exp(d - m))) return exps hpat_func = hpat.jit(test_impl) n = 11 np.testing.assert_approx_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 1) self.assertEqual(count_parfor_OneDs(), 2)
def test_logistic_regression_acc(self): def test_impl(N, D): iterations = 3 g = 2 * np.ones(D) - 1 X = 2 * np.ones((N, D)) - 1 Y = ((np.dot(X, g) > 0.0) == (np.ones(N) > .90)) + .0 w = 2 * np.ones(D) - 1 for i in range(iterations): w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) R = np.dot(X, w) > 0.0 accuracy = np.sum(R == Y) / N return accuracy hpat_func = hpat.jit(test_impl) n = 11 d = 4 np.testing.assert_approx_equal(hpat_func(n, d), test_impl(n, d)) self.assertEqual(count_array_OneDs(), 3) self.assertEqual(count_parfor_OneDs(), 4)
def test_array_reduce(self): binops = ['+=', '*=', '+=', '*=', '|=', '|='] dtypes = ['np.float32', 'np.float32', 'np.float64', 'np.float64', 'np.int32', 'np.int64'] for (op, typ) in zip(binops, dtypes): func_text = """def f(n): A = np.arange(0, 10, 1, {}) B = np.arange(0 + 3, 10 + 3, 1, {}) for i in numba.prange(n): A {} B return A """.format(typ, typ, op) loc_vars = {} exec(func_text, {'np': np, 'numba': numba}, loc_vars) test_impl = loc_vars['f'] hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 0) self.assertEqual(count_parfor_OneDs(), 1)