def test_rolling1(self): # size 3 without unroll def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)}) Ac = df.A.rolling(3).sum() return Ac.sum() hpat_func = hpat.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) # size 7 with unroll def test_impl_2(n): df = pd.DataFrame({ 'A': np.arange(n) + 1.0, 'B': np.random.ranf(n) }) Ac = df.A.rolling(7).sum() return Ac.sum() hpat_func = hpat.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_nunique_str_parallel(self): # TODO: test without file def test_impl(): df = pq.read_table('example.parquet').to_pandas() return df.two.nunique() hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) # test compile again for overload related issues hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0)
def test_reduce_filter1(self): import sys dtypes = ['float32', 'float64', 'int32', 'int64'] funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax'] for (dtype, func) in itertools.product(dtypes, funcs): # loc allreduce doesn't support int64 on windows if (sys.platform.startswith('win') and dtype == 'int64' and func in ['argmin', 'argmax']): continue func_text = """def f(A): A = A[A>5] return A.{}() """.format(func) loc_vars = {} exec(func_text, {'np': np}, loc_vars) test_impl = loc_vars['f'] hpat_func = hpat.jit(locals={'A:input': 'distributed'})(test_impl) n = 21 start, end = get_start_end(n) np.random.seed(0) A = np.random.randint(0, 10, n).astype(dtype) np.testing.assert_almost_equal( hpat_func(A[start:end]), test_impl(A), decimal=3, err_msg="{} on {}".format(func, dtype)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_shape(self): def test_impl(N): return np.ones(N).shape[0] hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_pq_float_no_nan(self): def test_impl(): df = pq.read_table('example.parquet').to_pandas() return df.four.sum() hpat_func = hpat.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_unique_str_parallel(self): # TODO: test without file def test_impl(): df = pq.read_table('example.parquet').to_pandas() return (df.two.unique() == 'foo').sum() hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0)
def test_agg_parallel_str(self): def test_impl(): df = pq.read_table("groupby3.pq").to_pandas() A = df.groupby('A')['B'].agg(lambda x: x.max() - x.min()) return A.sum() hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_df_values_parallel1(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df.values.sum() hpat_func = hpat.jit(test_impl) n = 11 np.testing.assert_array_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_np_io2(self): # parallel version def test_impl(): A = np.fromfile("np_file1.dat", np.float64) return A.sum() hpat_func = hpat.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_pq_str_with_nan_par_multigroup(self): def test_impl(): df = pq.read_table('example2.parquet').to_pandas() A = df.five.values == 'foo' return A.sum() hpat_func = hpat.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_shape1(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n, np.int64), 'B': np.random.ranf(n)}) return df.shape hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_pd_read_parquet(self): def test_impl(): df = pd.read_parquet('kde.parquet') X = df['points'] return X.sum() hpat_func = hpat.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_quantile_parallel_int(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(0, n, 1, np.int32)}) return df.A.quantile(.25) hpat_func = hpat.jit(test_impl) n = 1001 np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_series_dist_input1(self): def test_impl(S): return S.max() hpat_func = hpat.jit(distributed={'S'})(test_impl) n = 111 S = pd.Series(np.arange(n)) start, end = get_start_end(n) self.assertEqual(hpat_func(S[start:end]), test_impl(S)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_whole_slice(self): def test_impl(N): X = np.ones((N, 4)) X[:, 3] = (X[:, 3]) / (np.max(X[:, 3]) - np.min(X[:, 3])) return X.sum() hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_pq_read(self): def test_impl(): t = pq.read_table('kde.parquet') df = t.to_pandas() X = df['points'] return X.sum() hpat_func = hpat.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_h5_read_parallel(self): def test_impl(): f = h5py.File("lr.hdf5", "r") X = f['points'][:] Y = f['responses'][:] return X.sum() + Y.sum() hpat_func = hpat.jit(test_impl) np.testing.assert_almost_equal(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_strided_getitem(self): def test_impl(N): A = np.ones(N) B = A[::7] return B.sum() hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_agg_parallel_std(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n, np.int64), 'B': np.arange(n)}) A = df.groupby('A')['B'].std() return A.sum() hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_filter3(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + n, 'B': np.arange(n)**2}) df1 = df.iloc[(df.A > .5).values] return np.sum(df1.B) hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_describe(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(0, n, 1, np.float64)}) return df.A.describe() hpat_func = hpat.jit(test_impl) n = 1001 hpat_func(n) # XXX: test actual output self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_setitem2(self): def test_impl(N): A = np.arange(10) + 1.0 A[0:4] = 30 return A.sum() hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_concat_series_str(self): def test_impl(): df1 = pq.read_table('example.parquet').to_pandas() df2 = pq.read_table('example.parquet').to_pandas() A3 = pd.concat([df1.two, df2.two]) return (A3 == 'foo').sum() hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(), test_impl()) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_filter2(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.ones(n)}) df1 = df.loc[df.A > .5] return np.sum(df1.B) hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_rolling2(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) df['moving average'] = df.A.rolling(window=5, center=True).mean() return df['moving average'].sum() hpat_func = hpat.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_1D_Var_len(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n) + 1.0}) df1 = df[df.A > 5] return len(df1.B) hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_shift2(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) Ac = df.A.pct_change() return Ac.sum() hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_agg_parallel_as_index(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n, np.int64), 'B': np.arange(n)}) df2 = df.groupby('A', as_index=False).max() return df2.A.sum() hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_rolling3(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) Ac = df.A.rolling(3, center=True).apply(lambda a: a[0]+2*a[1]+a[2]) return Ac.sum() hpat_func = hpat.jit(test_impl) n = 121 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_column_getitem1(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) Ac = df['A'].values return Ac.sum() hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) self.assertEqual(count_parfor_OneDs(), 1)