def test_df_cov1(self): # test series rolling functions # all functions except apply df1 = pd.DataFrame({'A': [0, 1, 2, np.nan, 4], 'B': np.ones(5)}) df2 = pd.DataFrame({'A': [0, 1, 2, -2, 4], 'C': np.ones(5)}) wins = (3, ) if LONG_TEST: wins = (2, 3, 5) centers = (False, True) def test_impl(df, df2, w, c): return df.rolling(w, center=c).cov(df2) hpat_func = sdc.jit(test_impl) for args in itertools.product([df1, df2], [df1, df2], wins, centers): pd.testing.assert_frame_equal(hpat_func(*args), test_impl(*args)) pd.testing.assert_frame_equal(hpat_func(*args), test_impl(*args)) def test_impl2(df, df2, w, c): return df.rolling(w, center=c).corr(df2) hpat_func = sdc.jit(test_impl2) for args in itertools.product([df1, df2], [df1, df2], wins, centers): pd.testing.assert_frame_equal(hpat_func(*args), test_impl2(*args)) pd.testing.assert_frame_equal(hpat_func(*args), test_impl2(*args))
def test_rolling1(self): # size 3 without unroll def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)}) Ac = df.A.rolling(3).sum() return Ac.sum() hpat_func = sdc.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) # size 7 with unroll def test_impl_2(n): df = pd.DataFrame({ 'A': np.arange(n) + 1.0, 'B': np.random.ranf(n) }) Ac = df.A.rolling(7).sum() return Ac.sum() hpat_func = sdc.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_series_fixed1(self): # test series rolling functions # all functions except apply S1 = pd.Series([0, 1, 2, np.nan, 4]) S2 = pd.Series([0, 1, 2, -2, 4]) wins = (3, ) if LONG_TEST: wins = (2, 3, 5) centers = (False, True) for func_name in test_funcs: func_text = "def test_impl(S, w, c):\n return S.rolling(w, center=c).{}()\n".format( func_name) loc_vars = {} exec(func_text, {}, loc_vars) test_impl = loc_vars['test_impl'] hpat_func = sdc.jit(test_impl) for args in itertools.product(wins, centers): pd.testing.assert_series_equal(hpat_func(S1, *args), test_impl(S1, *args)) pd.testing.assert_series_equal(hpat_func(S2, *args), test_impl(S2, *args)) # test apply def apply_test_impl(S, w, c): return S.rolling(w, center=c).apply(lambda a: a.sum()) hpat_func = sdc.jit(apply_test_impl) for args in itertools.product(wins, centers): pd.testing.assert_series_equal(hpat_func(S1, *args), apply_test_impl(S1, *args)) pd.testing.assert_series_equal(hpat_func(S2, *args), apply_test_impl(S2, *args))
def test_series_cov1(self): # test series rolling functions # all functions except apply S1 = pd.Series([0, 1, 2, np.nan, 4]) S2 = pd.Series([0, 1, 2, -2, 4]) wins = (3, ) if LONG_TEST: wins = (2, 3, 5) centers = (False, True) def test_impl(S, S2, w, c): return S.rolling(w, center=c).cov(S2) hpat_func = sdc.jit(test_impl) for args in itertools.product([S1, S2], [S1, S2], wins, centers): pd.testing.assert_series_equal(hpat_func(*args), test_impl(*args)) pd.testing.assert_series_equal(hpat_func(*args), test_impl(*args)) def test_impl2(S, S2, w, c): return S.rolling(w, center=c).corr(S2) hpat_func = sdc.jit(test_impl2) for args in itertools.product([S1, S2], [S1, S2], wins, centers): pd.testing.assert_series_equal(hpat_func(*args), test_impl2(*args)) pd.testing.assert_series_equal(hpat_func(*args), test_impl2(*args))
def test_logistic_regression(self): ''' Testing logistic regression including * result and model boxing/unboxing * optional and required arguments passing ''' def train_impl(n, d): X = np.ones((n, d), dtype=np.double) + .5 Y = np.ones((n, 1), dtype=np.double) algo = d4p.logistic_regression_training(2, penaltyL1=0.1, penaltyL2=0.1, interceptFlag=True) return algo.compute(X, Y) def prdct_impl(n, d, model): w = np.ones((n, d), dtype=np.double) - 22.5 algo = d4p.logistic_regression_prediction( 2, resultsToCompute= "computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities" ) return algo.compute(w, model) train_hpat = sdc.jit(train_impl) prdct_hpat = sdc.jit(prdct_impl) n = 11 d = 4 pred_impl = prdct_impl(n, d, train_impl(n, d).model).prediction pred_hpat = prdct_hpat(n, d, train_hpat(n, d).model).prediction np.testing.assert_allclose(pred_impl, pred_hpat)
def test_nunique_str(self): def test_impl(n): df = pd.DataFrame({'A': ['aa', 'bb', 'aa', 'cc', 'cc']}) return df.A.nunique() hpat_func = sdc.jit(test_impl) n = 1001 np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) # test compile again for overload related issues hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(n), test_impl(n))
def test_nunique(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n)}) df.A[2] = 0 return df.A.nunique() hpat_func = sdc.jit(test_impl) n = 1001 np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) # test compile again for overload related issues hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(n), test_impl(n))
def test_assert(self): # make sure assert in an inlined function works def g(a): assert a == 0 hpat_g = sdc.jit(g) def f(): hpat_g(0) hpat_f = sdc.jit(f) hpat_f()
def test_nunique_str_parallel(self): # TODO: test without file def test_impl(): df = pq.read_table('example.parquet').to_pandas() return df.two.nunique() hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) # test compile again for overload related issues hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0)
def test_equality(self): arg = 'test_str' def test_impl(_str): return (_str == 'test_str') hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(arg), test_impl(arg)) def test_impl(_str): return (_str != 'test_str') hpat_func = sdc.jit(test_impl) self.assertEqual(hpat_func(arg), test_impl(arg))
def test_dist_return(self): def test_impl(N): A = np.arange(N) return A hpat_func = sdc.jit(locals={'A:return': 'distributed'})(test_impl) n = 128 dist_sum = sdc.jit( lambda a: sdc.distributed_api.dist_reduce( a, np.int32(sdc.distributed_api.Reduce_Type.Sum.value))) dist_sum(1) # run to compile np.testing.assert_allclose( dist_sum(hpat_func(n).sum()), test_impl(n).sum()) self.assertEqual(count_array_OneDs(), 1) self.assertEqual(count_parfor_OneDs(), 1)
def test_box_dist_return(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df hpat_func = sdc.jit(distributed={'df'})(test_impl) n = 11 hres, res = hpat_func(n), test_impl(n) self.assertEqual(count_array_OneDs(), 3) self.assertEqual(count_parfor_OneDs(), 2) dist_sum = sdc.jit(lambda a: sdc.distributed_api.dist_reduce( a, np.int32(sdc.distributed_api.Reduce_Type.Sum.value))) dist_sum(1) # run to compile np.testing.assert_allclose(dist_sum(hres.A.sum()), res.A.sum()) np.testing.assert_allclose(dist_sum(hres.B.sum()), res.B.sum())
def test_df_drop1(self): def test_impl(df): return df.drop(columns=['A']) df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]}) hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
def test_np_io1(self): def test_impl(): A = np.fromfile("np_file1.dat", np.float64) return A hpat_func = sdc.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl())
def test_agg_multikey_parallel(self): def test_impl(in_A, in_B, in_C): df = pd.DataFrame({'A': in_A, 'B': in_B, 'C': in_C}) A = df.groupby(['A', 'C'])['B'].sum() return A.sum() hpat_func = sdc.jit( locals={ 'in_A:input': 'distributed', 'in_B:input': 'distributed', 'in_C:input': 'distributed' })(test_impl) df = pd.DataFrame({ 'A': [2, 1, 1, 1, 2, 2, 1], 'B': [-8, 2, 3, 1, 5, 6, 7], 'C': [3, 5, 6, 5, 4, 4, 3] }) start, end = get_start_end(len(df)) h_A = df.A.values[start:end] h_B = df.B.values[start:end] h_C = df.C.values[start:end] p_A = df.A.values p_B = df.B.values p_C = df.C.values h_res = hpat_func(h_A, h_B, h_C) p_res = test_impl(p_A, p_B, p_C) self.assertEqual(h_res, p_res)
def test_df_reset_index1(self): def test_impl(df): return df.reset_index(drop=True) df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
def test_df_fillna_str1(self): def test_impl(df): return df.fillna("dd") df = pd.DataFrame({'A': ['aa', 'b', None, 'ccc']}) hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
def test_df_fillna1(self): def test_impl(df): return df.fillna(5.0) df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
def test_create_without_column_names(self): def test_impl(): df = pd.DataFrame([100, 200, 300, 400, 200, 100]) return df hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl())
def test_box2(self): def test_impl(): df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'bb', 'ccc']}) return df hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl())
def test_unbox_without_column_names(self): def test_impl(df): return df df = pd.DataFrame([100, 200, 300, 400, 200, 100]) hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
def test_variable_apply1(self): # test sequentially with manually created dfs df1 = pd.DataFrame({ 'B': [0, 1, 2, np.nan, 4], 'time': [ pd.Timestamp('20130101 09:00:00'), pd.Timestamp('20130101 09:00:02'), pd.Timestamp('20130101 09:00:03'), pd.Timestamp('20130101 09:00:05'), pd.Timestamp('20130101 09:00:06') ] }) df2 = pd.DataFrame({ 'B': [0, 1, 2, -2, 4], 'time': [ pd.Timestamp('20130101 09:00:01'), pd.Timestamp('20130101 09:00:02'), pd.Timestamp('20130101 09:00:03'), pd.Timestamp('20130101 09:00:04'), pd.Timestamp('20130101 09:00:09') ] }) wins = ('2s', ) if LONG_TEST: wins = ('1s', '2s', '3s', '4s') # all functions except apply for w in wins: func_text = "def test_impl(df):\n return df.rolling('{}', on='time').apply(lambda a: a.sum())\n".format( w) loc_vars = {} exec(func_text, {}, loc_vars) test_impl = loc_vars['test_impl'] hpat_func = sdc.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df1), test_impl(df1)) pd.testing.assert_frame_equal(hpat_func(df2), test_impl(df2))
def test_getitem_bool_series(self): def test_impl(df): return df['A'][df['B']].values hpat_func = sdc.jit(test_impl) df = pd.DataFrame({'A': [1, 2, 3], 'B': [True, False, True]}) np.testing.assert_array_equal(test_impl(df), hpat_func(df))
def test_join1_seq_key_change1(self): # make sure const list typing doesn't replace const key values def test_impl(df1, df2, df3, df4): o1 = df1.merge(df2, on=['A']) o2 = df3.merge(df4, on=['B']) return o1, o2 hpat_func = sdc.jit(test_impl) n = 11 df1 = pd.DataFrame({'A': np.arange(n) + 3, 'AA': np.arange(n) + 1.0}) df2 = pd.DataFrame({ 'A': 2 * np.arange(n) + 1, 'AAA': n + np.arange(n) + 1.0 }) df3 = pd.DataFrame({ 'B': 2 * np.arange(n) + 1, 'BB': n + np.arange(n) + 1.0 }) df4 = pd.DataFrame({ 'B': 2 * np.arange(n) + 1, 'BBB': n + np.arange(n) + 1.0 }) pd.testing.assert_frame_equal( hpat_func(df1, df2, df3, df4)[1], test_impl(df1, df2, df3, df4)[1])
def do_jit(f): """Context manager to jit function""" cfunc = sdc.jit(f) try: yield cfunc finally: del cfunc
def test_reduce_filter1(self): import sys dtypes = ['float32', 'float64', 'int32', 'int64'] funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax'] for (dtype, func) in itertools.product(dtypes, funcs): # loc allreduce doesn't support int64 on windows if (sys.platform.startswith('win') and dtype == 'int64' and func in ['argmin', 'argmax']): continue func_text = """def f(A): A = A[A>5] return A.{}() """.format(func) loc_vars = {} exec(func_text, {'np': np}, loc_vars) test_impl = loc_vars['f'] hpat_func = sdc.jit(locals={'A:input': 'distributed'})(test_impl) n = 21 start, end = get_start_end(n) np.random.seed(0) A = np.random.randint(0, 10, n).astype(dtype) np.testing.assert_almost_equal( hpat_func(A[start:end]), test_impl(A), decimal=3, err_msg="{} on {}".format(func, dtype)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_intraday(self): def test_impl(nsyms): max_num_days = 100 all_res = 0.0 for i in sdc.prange(nsyms): s_open = 20 * np.ones(max_num_days) s_low = 28 * np.ones(max_num_days) s_close = 19 * np.ones(max_num_days) df = pd.DataFrame({ 'Open': s_open, 'Low': s_low, 'Close': s_close }) df['Stdev'] = df['Close'].rolling(window=90).std() df['Moving Average'] = df['Close'].rolling(window=20).mean() df['Criteria1'] = (df['Open'] - df['Low'].shift(1)) < -df['Stdev'] df['Criteria2'] = df['Open'] > df['Moving Average'] df['BUY'] = df['Criteria1'] & df['Criteria2'] df['Pct Change'] = (df['Close'] - df['Open']) / df['Open'] df['Rets'] = df['Pct Change'][df['BUY']] all_res += df['Rets'].mean() return all_res hpat_func = sdc.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 0) self.assertEqual(count_parfor_OneDs(), 1)
def test_dataframe_columns_attribute(self): def test_impl(): df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]}) return df.columns hpat_func = sdc.jit(test_impl) np.testing.assert_array_equal(hpat_func(), test_impl())
def test_kmeans(self): def test_impl(numCenter, numIter, N, D): A = np.ones((N, D)) centroids = np.zeros((numCenter, D)) for l in range(numIter): dist = np.array([[ sqrt(np.sum((A[i, :] - centroids[j, :])**2)) for j in range(numCenter) ] for i in range(N)]) labels = np.array([dist[i, :].argmin() for i in range(N)]) centroids = np.array([[ np.sum(A[labels == i, j]) / np.sum(labels == i) for j in range(D) ] for i in range(numCenter)]) return centroids hpat_func = sdc.jit(test_impl) n = 11 np.testing.assert_allclose(hpat_func(1, 1, n, 2), test_impl(1, 1, n, 2)) self.assertEqual(count_array_OneDs(), 4) self.assertEqual(count_array_OneD_Vars(), 1) self.assertEqual(count_parfor_OneDs(), 5) self.assertEqual(count_parfor_OneD_Vars(), 1)
def test_dataframe_columns_iterator(self): def test_impl(): df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]}) return [column for column in df.columns] hpat_func = sdc.jit(test_impl) np.testing.assert_array_equal(hpat_func(), test_impl())