class TestDataFrame(unittest.TestCase): def test_create1(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) return df.A hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) def test_create_cond1(self): def test_impl(A, B, c): if c: df = pd.DataFrame({'A': A}) else: df = pd.DataFrame({'A': B}) return df.A hpat_func = hpat.jit(test_impl) n = 11 A = np.ones(n) B = np.arange(n) + 1.0 c = 0 pd.testing.assert_series_equal(hpat_func(A, B, c), test_impl(A, B, c)) c = 2 pd.testing.assert_series_equal(hpat_func(A, B, c), test_impl(A, B, c)) @unittest.skip('Implement feature to create DataFrame without column names' ) def test_create_without_column_names(self): def test_impl(): df = pd.DataFrame([100, 200, 300, 400, 200, 100]) return df hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_unbox1(self): def test_impl(df): return df.A hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)}) pd.testing.assert_series_equal(hpat_func(df), test_impl(df)) @unittest.skip("needs properly refcounted dataframes") def test_unbox2(self): def test_impl(df, cond): n = len(df) if cond: df['A'] = np.arange(n) + 2.0 return df.A hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) pd.testing.assert_series_equal(hpat_func(df.copy(), True), test_impl(df.copy(), True)) pd.testing.assert_series_equal(hpat_func(df.copy(), False), test_impl(df.copy(), False)) @unittest.skip('Implement feature to create DataFrame without column names' ) def test_unbox_without_column_names(self): def test_impl(df): return df df = pd.DataFrame([100, 200, 300, 400, 200, 100]) hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_box1(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df hpat_func = hpat.jit(test_impl) n = 11 do_check = False if platform.system( ) == 'Windows' and not IS_32BITS else True pd.testing.assert_frame_equal(hpat_func(n), test_impl(n), check_dtype=do_check) def test_box2(self): def test_impl(): df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'bb', 'ccc']}) return df hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skip("pending df filter support") def test_box3(self): def test_impl(df): df = df[df.A != 'dd'] return df hpat_func = hpat.jit(test_impl) df = pd.DataFrame({'A': ['aa', 'bb', 'cc']}) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_box_categorical(self): def test_impl(df): df['A'] = df['A'] + 1 return df hpat_func = hpat.jit(test_impl) df = pd.DataFrame({ 'A': [1, 2, 3], 'B': pd.Series(['N', 'Y', 'Y'], dtype=pd.api.types.CategoricalDtype(['N', 'Y'])) }) pd.testing.assert_frame_equal(hpat_func(df.copy(deep=True)), test_impl(df)) @unittest.skipIf( check_numba_version('0.46.0'), "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690") def test_box_dist_return(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df hpat_func = hpat.jit(distributed={'df'})(test_impl) n = 11 hres, res = hpat_func(n), test_impl(n) self.assertEqual(count_array_OneDs(), 3) self.assertEqual(count_parfor_OneDs(), 2) dist_sum = hpat.jit(lambda a: hpat.distributed_api.dist_reduce( a, np.int32(hpat.distributed_api.Reduce_Type.Sum.value))) dist_sum(1) # run to compile np.testing.assert_allclose(dist_sum(hres.A.sum()), res.A.sum()) np.testing.assert_allclose(dist_sum(hres.B.sum()), res.B.sum()) def test_len1(self): def test_impl(n): df = pd.DataFrame({ 'A': np.ones(n, np.int64), 'B': np.random.ranf(n) }) return len(df) hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_shape1(self): def test_impl(n): df = pd.DataFrame({ 'A': np.ones(n, np.int64), 'B': np.random.ranf(n) }) return df.shape hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_column_getitem1(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) Ac = df['A'].values return Ac.sum() hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) self.assertEqual(count_parfor_OneDs(), 1) def test_column_list_getitem1(self): def test_impl(df): return df[['A', 'C']] hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({ 'A': np.arange(n), 'B': np.ones(n), 'C': np.random.ranf(n) }) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_filter1(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + n, 'B': np.arange(n)**2}) df1 = df[df.A > .5] return df1.B.sum() hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_filter2(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + n, 'B': np.arange(n)**2}) df1 = df.loc[df.A > .5] return np.sum(df1.B) hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_filter3(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + n, 'B': np.arange(n)**2}) df1 = df.iloc[(df.A > .5).values] return np.sum(df1.B) hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_iloc1(self): def test_impl(df, n): return df.iloc[1:n].B.values hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) np.testing.assert_array_equal(hpat_func(df, n), test_impl(df, n)) def test_iloc2(self): def test_impl(df, n): return df.iloc[np.array([1, 4, 9])].B.values hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) np.testing.assert_array_equal(hpat_func(df, n), test_impl(df, n)) def test_iloc3(self): def test_impl(df): return df.iloc[:, 1].values hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @unittest.skip("TODO: support A[[1,2,3]] in Numba") def test_iloc4(self): def test_impl(df, n): return df.iloc[[1, 4, 9]].B.values hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) np.testing.assert_array_equal(hpat_func(df, n), test_impl(df, n)) def test_iloc5(self): # test iloc with global value def test_impl(df): return df.iloc[:, COL_IND].values hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) def test_loc1(self): def test_impl(df): return df.loc[:, 'B'].values hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) def test_iat1(self): def test_impl(n): df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n}) return df.iat[3, 1] hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) def test_iat2(self): def test_impl(df): return df.iat[3, 1] hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n}) self.assertEqual(hpat_func(df), test_impl(df)) def test_iat3(self): def test_impl(df, n): return df.iat[n - 1, 1] hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n}) self.assertEqual(hpat_func(df, n), test_impl(df, n)) def test_iat_set1(self): def test_impl(df, n): df.iat[n - 1, 1] = n**2 return df.A # return the column to check column aliasing hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n}) df2 = df.copy() pd.testing.assert_series_equal(hpat_func(df, n), test_impl(df2, n)) def test_iat_set2(self): def test_impl(df, n): df.iat[n - 1, 1] = n**2 return df # check df aliasing/boxing hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n}) df2 = df.copy() pd.testing.assert_frame_equal(hpat_func(df, n), test_impl(df2, n)) def test_set_column1(self): # set existing column def test_impl(n): df = pd.DataFrame({ 'A': np.ones(n, np.int64), 'B': np.arange(n) + 3.0 }) df['A'] = np.arange(n) return df hpat_func = hpat.jit(test_impl) n = 11 do_check = False if platform.system( ) == 'Windows' and not IS_32BITS else True pd.testing.assert_frame_equal(hpat_func(n), test_impl(n), check_dtype=do_check) def test_set_column_reflect4(self): # set existing column def test_impl(df, n): df['A'] = np.arange(n) hpat_func = hpat.jit(test_impl) n = 11 df1 = pd.DataFrame({ 'A': np.ones(n, np.int64), 'B': np.arange(n) + 3.0 }) df2 = df1.copy() hpat_func(df1, n) test_impl(df2, n) do_check = False if platform.system( ) == 'Windows' and not IS_32BITS else True pd.testing.assert_frame_equal(df1, df2, check_dtype=do_check) def test_set_column_new_type1(self): # set existing column with a new type def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n) + 3.0}) df['A'] = np.arange(n) return df hpat_func = hpat.jit(test_impl) n = 11 do_check = False if platform.system( ) == 'Windows' and not IS_32BITS else True pd.testing.assert_frame_equal(hpat_func(n), test_impl(n), check_dtype=do_check) def test_set_column2(self): # create new column def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n) + 1.0}) df['C'] = np.arange(n) return df hpat_func = hpat.jit(test_impl) n = 11 do_check = False if platform.system( ) == 'Windows' and not IS_32BITS else True pd.testing.assert_frame_equal(hpat_func(n), test_impl(n), check_dtype=do_check) def test_set_column_reflect3(self): # create new column def test_impl(df, n): df['C'] = np.arange(n) hpat_func = hpat.jit(test_impl) n = 11 df1 = pd.DataFrame({ 'A': np.ones(n, np.int64), 'B': np.arange(n) + 3.0 }) df2 = df1.copy() hpat_func(df1, n) test_impl(df2, n) do_check = False if platform.system( ) == 'Windows' and not IS_32BITS else True pd.testing.assert_frame_equal(df1, df2, check_dtype=do_check) def test_set_column_bool1(self): def test_impl(df): df['C'] = df['A'][df['B']] hpat_func = hpat.jit(test_impl) df = pd.DataFrame({'A': [1, 2, 3], 'B': [True, False, True]}) df2 = df.copy() test_impl(df2) hpat_func(df) pd.testing.assert_series_equal(df.C, df2.C) def test_set_column_reflect1(self): def test_impl(df, arr): df['C'] = arr return df.C.sum() hpat_func = hpat.jit(test_impl) n = 11 arr = np.random.ranf(n) df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) hpat_func(df, arr) self.assertIn('C', df) np.testing.assert_almost_equal(df.C.values, arr) def test_set_column_reflect2(self): def test_impl(df, arr): df['C'] = arr return df.C.sum() hpat_func = hpat.jit(test_impl) n = 11 arr = np.random.ranf(n) df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) df2 = df.copy() np.testing.assert_almost_equal(hpat_func(df, arr), test_impl(df2, arr)) def test_df_values1(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df.values hpat_func = hpat.jit(test_impl) n = 11 np.testing.assert_array_equal(hpat_func(n), test_impl(n)) def test_df_values2(self): def test_impl(df): return df.values hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) def test_df_values_parallel1(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df.values.sum() hpat_func = hpat.jit(test_impl) n = 11 np.testing.assert_array_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_df_apply(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)}) B = df.apply(lambda r: r.A + r.B, axis=1) return df.B.sum() n = 121 hpat_func = hpat.jit(test_impl) np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) def test_df_apply_branch(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)}) B = df.apply(lambda r: r.A < 10 and r.B > 20, axis=1) return df.B.sum() n = 121 hpat_func = hpat.jit(test_impl) np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) def test_df_describe(self): def test_impl(n): df = pd.DataFrame({ 'A': np.arange(0, n, 1, np.float32), 'B': np.arange(n) }) #df.A[0:1] = np.nan return df.describe() hpat_func = hpat.jit(test_impl) n = 1001 hpat_func(n) # XXX: test actual output self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_sort_values(self): def test_impl(df): df.sort_values('A', inplace=True) return df.B.values n = 1211 np.random.seed(2) df = pd.DataFrame({ 'A': np.random.ranf(n), 'B': np.arange(n), 'C': np.random.ranf(n) }) hpat_func = hpat.jit(test_impl) np.testing.assert_almost_equal(hpat_func(df.copy()), test_impl(df)) def test_sort_values_copy(self): def test_impl(df): df2 = df.sort_values('A') return df2.B.values n = 1211 np.random.seed(2) df = pd.DataFrame({ 'A': np.random.ranf(n), 'B': np.arange(n), 'C': np.random.ranf(n) }) hpat_func = hpat.jit(test_impl) np.testing.assert_almost_equal(hpat_func(df.copy()), test_impl(df)) def test_sort_values_single_col(self): def test_impl(df): df.sort_values('A', inplace=True) return df.A.values n = 1211 np.random.seed(2) df = pd.DataFrame({'A': np.random.ranf(n)}) hpat_func = hpat.jit(test_impl) np.testing.assert_almost_equal(hpat_func(df.copy()), test_impl(df)) def test_sort_values_single_col_str(self): def test_impl(df): df.sort_values('A', inplace=True) return df.A.values n = 1211 random.seed(2) str_vals = [] for _ in range(n): k = random.randint(1, 30) val = ''.join( random.choices(string.ascii_uppercase + string.digits, k=k)) str_vals.append(val) df = pd.DataFrame({'A': str_vals}) hpat_func = hpat.jit(test_impl) self.assertTrue((hpat_func(df.copy()) == test_impl(df)).all()) def test_sort_values_str(self): def test_impl(df): df.sort_values('A', inplace=True) return df.B.values n = 1211 random.seed(2) str_vals = [] str_vals2 = [] for i in range(n): k = random.randint(1, 30) val = ''.join( random.choices(string.ascii_uppercase + string.digits, k=k)) str_vals.append(val) val = ''.join( random.choices(string.ascii_uppercase + string.digits, k=k)) str_vals2.append(val) df = pd.DataFrame({'A': str_vals, 'B': str_vals2}) # use mergesort for stability, in str generation equal keys are more probable sorted_df = df.sort_values('A', inplace=False, kind='mergesort') hpat_func = hpat.jit(test_impl) self.assertTrue((hpat_func(df) == sorted_df.B.values).all()) def test_sort_parallel_single_col(self): # create `kde.parquet` file ParquetGenerator.gen_kde_pq() # TODO: better parallel sort test def test_impl(): df = pd.read_parquet('kde.parquet') df.sort_values('points', inplace=True) res = df.points.values return res hpat_func = hpat.jit(locals={'res:return': 'distributed'})(test_impl) save_min_samples = hpat.hiframes.sort.MIN_SAMPLES try: hpat.hiframes.sort.MIN_SAMPLES = 10 res = hpat_func() self.assertTrue((np.diff(res) >= 0).all()) finally: # restore global val hpat.hiframes.sort.MIN_SAMPLES = save_min_samples def test_df_isna1(self): '''Verify DataFrame.isna implementation for various types of data''' def test_impl(df): return df.isna() hpat_func = hpat.jit(test_impl) # TODO: add column with datetime values when test_series_datetime_isna1 is fixed df = pd.DataFrame({ 'A': [1.0, 2.0, np.nan, 1.0], 'B': [np.inf, 5, np.nan, 6], 'C': ['aa', 'b', None, 'ccc'], 'D': [None, 'dd', '', None] }) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_df_astype_str1(self): '''Verifies DataFrame.astype implementation converting various types to string''' def test_impl(df): return df.astype(str) hpat_func = hpat.jit(test_impl) # TODO: add column with float values when test_series_astype_float_to_str1 is fixed df = pd.DataFrame({ 'A': [-1, 2, 11, 5, 0, -7], 'B': ['aa', 'bb', 'cc', 'dd', '', 'fff'] }) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_df_astype_float1(self): '''Verifies DataFrame.astype implementation converting various types to float''' def test_impl(df): return df.astype(np.float64) hpat_func = hpat.jit(test_impl) # TODO: uncomment column with string values when test_series_astype_str_to_float64 is fixed df = pd.DataFrame({ 'A': [-1, 2, 11, 5, 0, -7], # 'B': ['3.24', '1E+05', '-1', '-1.3E-01', 'nan', 'inf'], 'C': [3.24, 1E+05, -1, -1.3E-01, np.nan, np.inf] }) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_df_astype_int1(self): '''Verifies DataFrame.astype implementation converting various types to int''' def test_impl(df): return df.astype(np.int32) hpat_func = hpat.jit(test_impl) n = 6 # TODO: uncomment column with string values when test_series_astype_str_to_int32 is fixed df = pd.DataFrame({ 'A': np.ones(n, dtype=np.int64), 'B': np.arange(n, dtype=np.int32), # 'C': ['-1', '2', '3', '0', '-7', '99'], 'D': np.arange(float(n), dtype=np.float32) }) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_sort_parallel(self): # create `kde.parquet` file ParquetGenerator.gen_kde_pq() # TODO: better parallel sort test def test_impl(): df = pd.read_parquet('kde.parquet') df['A'] = df.points.astype(np.float64) df.sort_values('points', inplace=True) res = df.A.values return res hpat_func = hpat.jit(locals={'res:return': 'distributed'})(test_impl) save_min_samples = hpat.hiframes.sort.MIN_SAMPLES try: hpat.hiframes.sort.MIN_SAMPLES = 10 res = hpat_func() self.assertTrue((np.diff(res) >= 0).all()) finally: # restore global val hpat.hiframes.sort.MIN_SAMPLES = save_min_samples def test_itertuples(self): def test_impl(df): res = 0.0 for r in df.itertuples(): res += r[1] return res hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.ones(n, np.int64)}) self.assertEqual(hpat_func(df), test_impl(df)) def test_itertuples_str(self): def test_impl(df): res = "" for r in df.itertuples(): res += r[1] return res hpat_func = hpat.jit(test_impl) n = 3 df = pd.DataFrame({'A': ['aa', 'bb', 'cc'], 'B': np.ones(n, np.int64)}) self.assertEqual(hpat_func(df), test_impl(df)) def test_itertuples_order(self): def test_impl(n): res = 0.0 df = pd.DataFrame({'B': np.arange(n), 'A': np.ones(n, np.int64)}) for r in df.itertuples(): res += r[1] return res hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) def test_itertuples_analysis(self): """tests array analysis handling of generated tuples, shapes going through blocks and getting used in an array dimension """ def test_impl(n): res = 0 df = pd.DataFrame({'B': np.arange(n), 'A': np.ones(n, np.int64)}) for r in df.itertuples(): if r[1] == 2: A = np.ones(r[1]) res += len(A) return res hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) @unittest.skipIf(platform.system() == 'Windows', "Attribute 'dtype' are different int64 and int32") def test_df_head1(self): def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)}) return df.head(3) hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_frame_equal(hpat_func(n), test_impl(n)) def test_pct_change1(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.pct_change(3) hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_frame_equal(hpat_func(n), test_impl(n)) def test_mean1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.mean() hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) def test_median1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): df = pd.DataFrame({'A': 2**np.arange(n), 'B': np.arange(n) + 1.0}) return df.median() hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) def test_std1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.std() hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) def test_var1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.var() hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) def test_max1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.max() hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) def test_min1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.min() hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) @unittest.skipIf(not hpat.config.config_pipeline_hpat_default, "DataFrame.sum() not implemented in new style") def test_sum1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.sum() hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) def test_prod1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.prod() hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) def test_count(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)}) return df.count() hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) def test_count1(self): # TODO: non-numeric columns should be ignored automatically def test_impl(n): df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1}) return df.count() hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_series_equal(hpat_func(n), test_impl(n)) def test_df_fillna1(self): def test_impl(df): return df.fillna(5.0) df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_df_fillna_str1(self): def test_impl(df): return df.fillna("dd") df = pd.DataFrame({'A': ['aa', 'b', None, 'ccc']}) hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_df_fillna_inplace1(self): def test_impl(A): A.fillna(11.0, inplace=True) return A df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) df2 = df.copy() hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df2)) def test_df_reset_index1(self): def test_impl(df): return df.reset_index(drop=True) df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_df_reset_index_inplace1(self): def test_impl(): df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) df.reset_index(drop=True, inplace=True) return df hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) def test_df_dropna1(self): def test_impl(df): return df.dropna() df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]}) hpat_func = hpat.jit(test_impl) out = test_impl(df).reset_index(drop=True) h_out = hpat_func(df) pd.testing.assert_frame_equal(out, h_out) def test_df_dropna2(self): def test_impl(df): return df.dropna() df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]}) hpat_func = hpat.jit(test_impl) out = test_impl(df).reset_index(drop=True) h_out = hpat_func(df) pd.testing.assert_frame_equal(out, h_out) def test_df_dropna_inplace1(self): # TODO: fix error when no df is returned def test_impl(df): df.dropna(inplace=True) return df df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]}) df2 = df.copy() hpat_func = hpat.jit(test_impl) out = test_impl(df).reset_index(drop=True) h_out = hpat_func(df2) pd.testing.assert_frame_equal(out, h_out) def test_df_dropna_str1(self): def test_impl(df): return df.dropna() df = pd.DataFrame({ 'A': [1.0, 2.0, 4.0, 1.0], 'B': ['aa', 'b', None, 'ccc'] }) hpat_func = hpat.jit(test_impl) out = test_impl(df).reset_index(drop=True) h_out = hpat_func(df) pd.testing.assert_frame_equal(out, h_out) def test_df_drop1(self): def test_impl(df): return df.drop(columns=['A']) df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]}) hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_df_drop_inplace2(self): # test droping after setting the column def test_impl(df): df2 = df[['A', 'B']] df2['D'] = np.ones(3) df2.drop(columns=['D'], inplace=True) return df2 df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]}) hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_df_drop_inplace1(self): def test_impl(df): df.drop('A', axis=1, inplace=True) return df df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]}) df2 = df.copy() hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df2)) def test_isin_df1(self): def test_impl(df, df2): return df.isin(df2) hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) df2 = pd.DataFrame({'A': np.arange(n), 'C': np.arange(n)**2}) df2.A[n // 2:] = n pd.testing.assert_frame_equal(hpat_func(df, df2), test_impl(df, df2)) @unittest.skip("needs dict typing in Numba") def test_isin_dict1(self): def test_impl(df): vals = {'A': [2, 3, 4], 'C': [4, 5, 6]} return df.isin(vals) hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_isin_list1(self): def test_impl(df): vals = [2, 3, 4] return df.isin(vals) hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df)) def test_append1(self): def test_impl(df, df2): return df.append(df2, ignore_index=True) hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) df2 = pd.DataFrame({'A': np.arange(n), 'C': np.arange(n)**2}) df2.A[n // 2:] = n pd.testing.assert_frame_equal(hpat_func(df, df2), test_impl(df, df2)) def test_append2(self): def test_impl(df, df2, df3): return df.append([df2, df3], ignore_index=True) hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) df2 = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) df2.A[n // 2:] = n df3 = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2}) pd.testing.assert_frame_equal(hpat_func(df, df2, df3), test_impl(df, df2, df3)) def test_concat_columns1(self): def test_impl(S1, S2): return pd.concat([S1, S2], axis=1) hpat_func = hpat.jit(test_impl) S1 = pd.Series([4, 5]) S2 = pd.Series([6., 7.]) # TODO: support int as column name pd.testing.assert_frame_equal( hpat_func(S1, S2), test_impl(S1, S2).rename(columns={ 0: '0', 1: '1' })) def test_var_rename(self): # tests df variable replacement in hiframes_untyped where inlining # can cause extra assignments and definition handling errors # TODO: inline freevar def test_impl(): df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]}) # TODO: df['C'] = [5,6,7] df['C'] = np.ones(3) return inner_get_column(df) hpat_func = hpat.jit(test_impl) pd.testing.assert_series_equal(hpat_func(), test_impl(), check_names=False) @unittest.skip("Implement getting columns attribute") def test_dataframe_columns_attribute(self): def test_impl(): df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]}) return df.columns hpat_func = hpat.jit(test_impl) np.testing.assert_array_equal(hpat_func(), test_impl()) @unittest.skip("Implement getting columns attribute") def test_dataframe_columns_iterator(self): def test_impl(): df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]}) return [column for column in df.columns] hpat_func = hpat.jit(test_impl) np.testing.assert_array_equal(hpat_func(), test_impl()) @unittest.skip("Implement set_index for DataFrame") def test_dataframe_set_index(self): def test_impl(): df = pd.DataFrame({ 'month': [1, 4, 7, 10], 'year': [2012, 2014, 2013, 2014], 'sale': [55, 40, 84, 31] }) return df.set_index('month') hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skip("Implement sort_index for DataFrame") def test_dataframe_sort_index(self): def test_impl(): df = pd.DataFrame({'A': [1, 2, 3, 4, 5]}, index=[100, 29, 234, 1, 150]) return df.sort_index() hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl()) @unittest.skip("Implement iterrows for DataFrame") def test_dataframe_iterrows(self): def test_impl(df): print(df.iterrows()) return [row for _, row in df.iterrows()] df = pd.DataFrame({ 'A': [1, 2, 3], 'B': [0.2, 0.5, 0.001], 'C': ['a', 'bb', 'ccc'] }) hpat_func = hpat.jit(test_impl) np.testing.assert_array_equal(hpat_func(df), test_impl(df)) @unittest.skip("Support parameter axis=1") def test_dataframe_axis_param(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)}) return df.sum(axis=1) n = 100 hpat_func = hpat.jit(test_impl) pd.testing.assert_series_equal(hpat_func(n), test_impl(n))
class TestML(unittest.TestCase): def test_logistic_regression(self): def test_impl(n, d): iterations = 3 X = np.ones((n, d)) + .5 Y = np.ones(n) D = X.shape[1] w = np.ones(D) - 0.5 for i in range(iterations): w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) return w hpat_func = hpat.jit(test_impl) n = 11 d = 4 np.testing.assert_allclose(hpat_func(n, d), test_impl(n, d)) self.assertEqual(count_array_OneDs(), 3) self.assertEqual(count_parfor_OneDs(), 3) def test_logistic_regression_acc(self): def test_impl(N, D): iterations = 3 g = 2 * np.ones(D) - 1 X = 2 * np.ones((N, D)) - 1 Y = ((np.dot(X, g) > 0.0) == (np.ones(N) > .90)) + .0 w = 2 * np.ones(D) - 1 for i in range(iterations): w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) R = np.dot(X, w) > 0.0 accuracy = np.sum(R == Y) / N return accuracy hpat_func = hpat.jit(test_impl) n = 11 d = 4 np.testing.assert_approx_equal(hpat_func(n, d), test_impl(n, d)) self.assertEqual(count_array_OneDs(), 3) self.assertEqual(count_parfor_OneDs(), 4) def test_linear_regression(self): def test_impl(N, D): p = 2 iterations = 3 X = np.ones((N, D)) + .5 Y = np.ones((N, p)) alphaN = 0.01 / N w = np.zeros((D, p)) for i in range(iterations): w -= alphaN * np.dot(X.T, np.dot(X, w) - Y) return w hpat_func = hpat.jit(test_impl) n = 11 d = 4 np.testing.assert_allclose(hpat_func(n, d), test_impl(n, d)) self.assertEqual(count_array_OneDs(), 5) self.assertEqual(count_parfor_OneDs(), 3) def test_kde(self): def test_impl(n): X = np.ones(n) b = 0.5 points = np.array([-1.0, 2.0, 5.0]) N = points.shape[0] exps = 0 for i in hpat.prange(n): p = X[i] d = (-(p - points)**2) / (2 * b**2) m = np.min(d) exps += m - np.log(b * N) + np.log(np.sum(np.exp(d - m))) return exps hpat_func = hpat.jit(test_impl) n = 11 np.testing.assert_approx_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 1) self.assertEqual(count_parfor_OneDs(), 2) @unittest.skipIf(check_numba_version('0.46.0'), "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690") def test_kmeans(self): def test_impl(numCenter, numIter, N, D): A = np.ones((N, D)) centroids = np.zeros((numCenter, D)) for l in range(numIter): dist = np.array([[sqrt(np.sum((A[i, :] - centroids[j, :])**2)) for j in range(numCenter)] for i in range(N)]) labels = np.array([dist[i, :].argmin() for i in range(N)]) centroids = np.array([[np.sum(A[labels == i, j]) / np.sum(labels == i) for j in range(D)] for i in range(numCenter)]) return centroids hpat_func = hpat.jit(test_impl) n = 11 np.testing.assert_allclose(hpat_func(1, 1, n, 2), test_impl(1, 1, n, 2)) self.assertEqual(count_array_OneDs(), 4) self.assertEqual(count_array_OneD_Vars(), 1) self.assertEqual(count_parfor_OneDs(), 5) self.assertEqual(count_parfor_OneD_Vars(), 1)
class TestBasic(BaseTest): def test_getitem(self): def test_impl(N): A = np.ones(N) B = np.ones(N) > .5 C = A[B] return C.sum() hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_setitem1(self): def test_impl(N): A = np.arange(10) + 1.0 A[0] = 30 return A.sum() hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_setitem2(self): def test_impl(N): A = np.arange(10) + 1.0 A[0:4] = 30 return A.sum() hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_astype(self): def test_impl(N): return np.ones(N).astype(np.int32).sum() hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_shape(self): def test_impl(N): return np.ones(N).shape[0] hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) # def test_impl(N): # return np.ones((N, 3, 4)).shape # # hpat_func = hpat.jit(test_impl) # n = 128 # np.testing.assert_allclose(hpat_func(n), test_impl(n)) # self.assertEqual(count_array_REPs(), 0) # self.assertEqual(count_parfor_REPs(), 0) def test_inplace_binop(self): def test_impl(N): A = np.ones(N) B = np.ones(N) B += A return B.sum() hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_getitem_multidim(self): def test_impl(N): A = np.ones((N, 3)) B = np.ones(N) > .5 C = A[B, 2] return C.sum() hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_whole_slice(self): def test_impl(N): X = np.ones((N, 4)) X[:, 3] = (X[:, 3]) / (np.max(X[:, 3]) - np.min(X[:, 3])) return X.sum() hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_strided_getitem(self): def test_impl(N): A = np.ones(N) B = A[::7] return B.sum() hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_assert(self): # make sure assert in an inlined function works def g(a): assert a == 0 hpat_g = hpat.jit(g) def f(): hpat_g(0) hpat_f = hpat.jit(f) hpat_f() def test_inline_locals(self): # make sure locals in inlined function works @hpat.jit(locals={'B': hpat.float64[:]}) def g(S): B = pd.to_numeric(S, errors='coerce') return B def f(): return g(pd.Series(['1.2'])) pd.testing.assert_series_equal(hpat.jit(f)(), f()) def test_reduce(self): import sys dtypes = ['float32', 'float64', 'int32', 'int64'] funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax'] for (dtype, func) in itertools.product(dtypes, funcs): # loc allreduce doesn't support int64 on windows if (sys.platform.startswith('win') and dtype == 'int64' and func in ['argmin', 'argmax']): continue func_text = """def f(n): A = np.arange(0, n, 1, np.{}) return A.{}() """.format(dtype, func) loc_vars = {} exec(func_text, {'np': np}, loc_vars) test_impl = loc_vars['f'] hpat_func = hpat.jit(test_impl) n = 21 # XXX arange() on float32 has overflow issues on large n np.testing.assert_almost_equal(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_reduce2(self): import sys dtypes = ['float32', 'float64', 'int32', 'int64'] funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax'] for (dtype, func) in itertools.product(dtypes, funcs): # loc allreduce doesn't support int64 on windows if (sys.platform.startswith('win') and dtype == 'int64' and func in ['argmin', 'argmax']): continue func_text = """def f(A): return A.{}() """.format(func) loc_vars = {} exec(func_text, {'np': np}, loc_vars) test_impl = loc_vars['f'] hpat_func = hpat.jit(locals={'A:input': 'distributed'})(test_impl) n = 21 start, end = get_start_end(n) np.random.seed(0) A = np.random.randint(0, 10, n).astype(dtype) np.testing.assert_almost_equal( hpat_func(A[start:end]), test_impl(A), decimal=3) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_reduce_filter1(self): import sys dtypes = ['float32', 'float64', 'int32', 'int64'] funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax'] for (dtype, func) in itertools.product(dtypes, funcs): # loc allreduce doesn't support int64 on windows if (sys.platform.startswith('win') and dtype == 'int64' and func in ['argmin', 'argmax']): continue func_text = """def f(A): A = A[A>5] return A.{}() """.format(func) loc_vars = {} exec(func_text, {'np': np}, loc_vars) test_impl = loc_vars['f'] hpat_func = hpat.jit(locals={'A:input': 'distributed'})(test_impl) n = 21 start, end = get_start_end(n) np.random.seed(0) A = np.random.randint(0, 10, n).astype(dtype) np.testing.assert_almost_equal( hpat_func(A[start:end]), test_impl(A), decimal=3, err_msg="{} on {}".format(func, dtype)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) def test_array_reduce(self): binops = ['+=', '*=', '+=', '*=', '|=', '|='] dtypes = ['np.float32', 'np.float32', 'np.float64', 'np.float64', 'np.int32', 'np.int64'] for (op, typ) in zip(binops, dtypes): func_text = """def f(n): A = np.arange(0, 10, 1, {}) B = np.arange(0 + 3, 10 + 3, 1, {}) for i in numba.prange(n): A {} B return A """.format(typ, typ, op) loc_vars = {} exec(func_text, {'np': np, 'numba': numba}, loc_vars) test_impl = loc_vars['f'] hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 0) self.assertEqual(count_parfor_OneDs(), 1) @unittest.skipIf(check_numba_version('0.46.0'), "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690") def test_dist_return(self): def test_impl(N): A = np.arange(N) return A hpat_func = hpat.jit(locals={'A:return': 'distributed'})(test_impl) n = 128 dist_sum = hpat.jit( lambda a: hpat.distributed_api.dist_reduce( a, np.int32(hpat.distributed_api.Reduce_Type.Sum.value))) dist_sum(1) # run to compile np.testing.assert_allclose( dist_sum(hpat_func(n).sum()), test_impl(n).sum()) self.assertEqual(count_array_OneDs(), 1) self.assertEqual(count_parfor_OneDs(), 1) @unittest.skipIf(check_numba_version('0.46.0'), "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690") def test_dist_return_tuple(self): def test_impl(N): A = np.arange(N) B = np.arange(N) + 1.5 return A, B hpat_func = hpat.jit(locals={'A:return': 'distributed', 'B:return': 'distributed'})(test_impl) n = 128 dist_sum = hpat.jit( lambda a: hpat.distributed_api.dist_reduce( a, np.int32(hpat.distributed_api.Reduce_Type.Sum.value))) dist_sum(1.0) # run to compile np.testing.assert_allclose( dist_sum((hpat_func(n)[0] + hpat_func(n)[1]).sum()), (test_impl(n)[0] + test_impl(n)[1]).sum()) self.assertEqual(count_array_OneDs(), 2) self.assertEqual(count_parfor_OneDs(), 2) def test_dist_input(self): def test_impl(A): return len(A) hpat_func = hpat.jit(distributed=['A'])(test_impl) n = 128 arr = np.ones(n) np.testing.assert_allclose(hpat_func(arr) / self.num_ranks, test_impl(arr)) self.assertEqual(count_array_OneDs(), 1) @unittest.skipIf(check_numba_version('0.46.0'), "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690") def test_rebalance(self): def test_impl(N): A = np.arange(n) B = A[A > 10] C = hpat.distributed_api.rebalance_array(B) return C.sum() try: hpat.distributed_analysis.auto_rebalance = True hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 3) self.assertEqual(count_parfor_OneDs(), 2) finally: hpat.distributed_analysis.auto_rebalance = False @unittest.skipIf(check_numba_version('0.46.0'), "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690") def test_rebalance_loop(self): def test_impl(N): A = np.arange(n) B = A[A > 10] s = 0 for i in range(3): s += B.sum() return s try: hpat.distributed_analysis.auto_rebalance = True hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 4) self.assertEqual(count_parfor_OneDs(), 2) self.assertIn('allgather', list(hpat_func.inspect_llvm().values())[0]) finally: hpat.distributed_analysis.auto_rebalance = False def test_transpose(self): def test_impl(n): A = np.ones((30, 40, 50)) B = A.transpose((0, 2, 1)) C = A.transpose(0, 2, 1) return B.sum() + C.sum() hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) @unittest.skip("Numba's perfmute generation needs to use np seed properly") def test_permuted_array_indexing(self): # Since Numba uses Python's PRNG for producing random numbers in NumPy, # we cannot compare against NumPy. Therefore, we implement permutation # in Python. def python_permutation(n, r): arr = np.arange(n) r.shuffle(arr) return arr def test_one_dim(arr_len): A = np.arange(arr_len) B = np.copy(A) P = np.random.permutation(arr_len) A, B = A[P], B[P] return A, B # Implementation that uses Python's PRNG for producing a permutation. # We test against this function. def python_one_dim(arr_len, r): A = np.arange(arr_len) B = np.copy(A) P = python_permutation(arr_len, r) A, B = A[P], B[P] return A, B # Ideally, in above *_impl functions we should just call # np.random.seed() and they should produce the same sequence of random # numbers. However, since Numba's PRNG uses NumPy's initialization # method for initializing PRNG, we cannot just set seed. Instead, we # resort to this hack that generates a Python Random object with a fixed # seed and copies the state to Numba's internal NumPy PRNG state. For # details please see https://github.com/numba/numba/issues/2782. r = self._follow_cpython(get_np_state_ptr()) hpat_func1 = hpat.jit(locals={'A:return': 'distributed', 'B:return': 'distributed'})(test_one_dim) # Test one-dimensional array indexing. for arr_len in [11, 111, 128, 120]: hpat_A, hpat_B = hpat_func1(arr_len) python_A, python_B = python_one_dim(arr_len, r) rank_bounds = self._rank_bounds(arr_len) np.testing.assert_allclose(hpat_A, python_A[slice(*rank_bounds)]) np.testing.assert_allclose(hpat_B, python_B[slice(*rank_bounds)]) # Test two-dimensional array indexing. Like in one-dimensional case # above, in addition to NumPy version that is compiled by Numba, we # implement a Python version. def test_two_dim(arr_len): first_dim = arr_len // 2 A = np.arange(arr_len).reshape(first_dim, 2) B = np.copy(A) P = np.random.permutation(first_dim) A, B = A[P], B[P] return A, B def python_two_dim(arr_len, r): first_dim = arr_len // 2 A = np.arange(arr_len).reshape(first_dim, 2) B = np.copy(A) P = python_permutation(first_dim, r) A, B = A[P], B[P] return A, B hpat_func2 = hpat.jit(locals={'A:return': 'distributed', 'B:return': 'distributed'})(test_two_dim) for arr_len in [18, 66, 128]: hpat_A, hpat_B = hpat_func2(arr_len) python_A, python_B = python_two_dim(arr_len, r) rank_bounds = self._rank_bounds(arr_len // 2) np.testing.assert_allclose(hpat_A, python_A[slice(*rank_bounds)]) np.testing.assert_allclose(hpat_B, python_B[slice(*rank_bounds)]) # Test that the indexed array is not modified if it is not being # assigned to. def test_rhs(arr_len): A = np.arange(arr_len) B = np.copy(A) P = np.random.permutation(arr_len) C = A[P] return A, B, C hpat_func3 = hpat.jit(locals={'A:return': 'distributed', 'B:return': 'distributed', 'C:return': 'distributed'})(test_rhs) for arr_len in [15, 23, 26]: A, B, _ = hpat_func3(arr_len) np.testing.assert_allclose(A, B)