def test_pd_DataFrame_from_series_par(self): def test_impl(n): S1 = pd.Series(np.ones(n)) S2 = pd.Series(np.random.ranf(n)) df = pd.DataFrame({'A': S1, 'B': S2}) return df.A.sum() hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) self.assertEqual(count_parfor_OneDs(), 1)
def test_h5_group_keys(self): def test_impl(): f = h5py.File("test_group_read.hdf5", "r") g1 = f['G'] s = 0 for dname in g1.keys(): X = g1[dname][:] s += X.sum() f.close() return s hpat_func = hpat.jit(test_impl, h5_types={'X': hpat.int64[:]}) self.assertEqual(hpat_func(), test_impl())
def test_series_count1(self): def test_impl(S): return S.count() hpat_func = hpat.jit(test_impl) S = pd.Series([np.nan, 2., 3.]) self.assertEqual(hpat_func(S), test_impl(S)) S = pd.Series([np.nan, np.nan]) self.assertEqual(hpat_func(S), test_impl(S)) S = pd.Series(['aa', 'bb', np.nan]) self.assertEqual(hpat_func(S), test_impl(S))
def test_column_map_arg(self): def test_impl(df): df['B'] = df.A.map(lambda a: 2 * a) return n = 121 df1 = pd.DataFrame({'A': np.arange(n)}) df2 = pd.DataFrame({'A': np.arange(n)}) hpat_func = hpat.jit(test_impl) hpat_func(df1) self.assertTrue(hasattr(df1, 'B')) test_impl(df2) np.testing.assert_equal(df1.B.values, df2.B.values)
def test_decode_unicode2(self): # test strings that start with ascii def test_impl(S): return S[0], S[1], S[2] hpat_func = hpat.jit(test_impl) S = pd.Series([ 'abc¡Y tú quién te crees?', 'dd2🐍⚡', '22 大处着眼,小处着手。', ]) self.assertEqual(hpat_func(S), test_impl(S))
def test_series_sort_values_index1(self): def test_impl(A, B): S = pd.Series(A, B) return S.sort_values() hpat_func = hpat.jit(test_impl) n = 11 np.random.seed(0) # TODO: support passing Series with Index # S = pd.Series(np.random.ranf(n), np.random.randint(0, 100, n)) A = np.random.ranf(n) B = np.random.ranf(n) pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B))
def test_merge_asof_seq1(self): def test_impl(df1, df2): return pd.merge_asof(df1, df2, on='time') hpat_func = hpat.jit(test_impl) df1 = pd.DataFrame( {'time': pd.DatetimeIndex( ['2017-01-03', '2017-01-06', '2017-02-21']), 'B': [4, 5, 6]}) df2 = pd.DataFrame( {'time': pd.DatetimeIndex( ['2017-01-01', '2017-01-02', '2017-01-04', '2017-02-23', '2017-02-25']), 'A': [2,3,7,8,9]}) pd.testing.assert_frame_equal(hpat_func(df1, df2), test_impl(df1, df2))
def test_pass_return(self): def test_impl(_str): return _str hpat_func = hpat.jit(test_impl) # pass single string and return arg = 'test_str' self.assertEqual(hpat_func(arg), test_impl(arg)) # pass string list and return arg = ['test_str1', 'test_str2'] self.assertEqual(hpat_func(arg), test_impl(arg))
def test_set_column1(self): # set existing column def test_impl(n): df = pd.DataFrame({'A': np.ones(n, np.int64), 'B': np.random.ranf(n)}) df['A'] = np.arange(n) return df.A.sum() hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) self.assertEqual(count_parfor_OneDs(), 1)
def test_set_column2(self): # create new column def test_impl(n): df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) df['C'] = np.arange(n) return df.C.sum() hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0) self.assertEqual(count_parfor_OneDs(), 1)
def test_muti_hiframes_node_filter_agg(self): def test_impl(df, cond): df2 = df[cond] c = df2.groupby('A')['B'].count() return df2.C, c hpat_func = hpat.jit(test_impl) df = pd.DataFrame({'A': [2,1,1,1,2,2,1], 'B': [-8,2,3,1,5,6,7], 'C': [2,3,-1,1,2,3,-1]}) cond = df.A > 1 res = test_impl(df, cond) h_res = hpat_func(df, cond) self.assertEqual(set(res[1]), set(h_res[1])) np.testing.assert_array_equal(res[0], h_res[0])
def test_df_astype_str1(self): '''Verifies DataFrame.astype implementation converting various types to string''' def test_impl(df): return df.astype(str) hpat_func = hpat.jit(test_impl) # TODO: add column with float values when test_series_astype_float_to_str1 is fixed df = pd.DataFrame({ 'A': [-1, 2, 11, 5, 0, -7], 'B': ['aa', 'bb', 'cc', 'dd', '', 'fff'] }) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
def test_shape1(self): def test_impl(n): df = pd.DataFrame({ 'A': np.ones(n, np.int64), 'B': np.random.ranf(n) }) return df.shape hpat_func = hpat.jit(test_impl) n = 11 self.assertEqual(hpat_func(n), test_impl(n)) self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_replace_noregex_std(self): def test_impl(_str, pat, val): s = unicode_to_std_str(_str) e = unicode_to_std_str(pat) val = unicode_to_std_str(val) out = hpat.str_ext.str_replace_noregex(s, e, val) return std_str_to_unicode(out) _str = 'What does the fox say' pat = 'does the fox' val = 'does the cat' hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(_str, pat, val), _str.replace(pat, val))
def test_array_reduce(self): def test_impl(N): A = np.ones(3); B = np.ones(3); for i in numba.prange(N): A += B return A hpat_func = hpat.jit(test_impl) n = 128 np.testing.assert_allclose(hpat_func(n), test_impl(n)) self.assertEqual(count_array_OneDs(), 0) self.assertEqual(count_parfor_OneDs(), 1)
def test_df_describe(self): def test_impl(n): df = pd.DataFrame({'A': np.arange(0, n, 1, np.float32), 'B': np.arange(n)}) #df.A[0:1] = np.nan return df.describe() hpat_func = hpat.jit(test_impl) n = 1001 hpat_func(n) # XXX: test actual output self.assertEqual(count_array_REPs(), 0) self.assertEqual(count_parfor_REPs(), 0)
def test_str2str(self): str2str_methods = ['capitalize', 'casefold', 'lower', 'lstrip', 'rstrip', 'strip', 'swapcase', 'title', 'upper'] for method in str2str_methods: func_text = "def test_impl(_str):\n" func_text += " return _str.{}()\n".format(method) loc_vars = {} exec(func_text, {}, loc_vars) test_impl = loc_vars['test_impl'] hpat_func = hpat.jit(test_impl) arg = ' \tbbCD\t ' self.assertEqual(hpat_func(arg), test_impl(arg))
def test_string_NA_box(self): # create `example.parquet` file ParquetGenerator.gen_pq_test() def test_impl(): df = pq.read_table('example.parquet').to_pandas() return df.five hpat_func = hpat.jit(test_impl) # XXX just checking isna() since Pandas uses None in this case # instead of nan for some reason np.testing.assert_array_equal(hpat_func().isna(), test_impl().isna())
def test_set_column1_issue(self): # set existing column def test_impl(n): df = pd.DataFrame({ 'A': np.ones(n, np.int64), 'B': np.arange(n) + 3.0 }) df['A'] = np.arange(n) return df hpat_func = hpat.jit(test_impl) n = 11 pd.testing.assert_frame_equal(hpat_func(n), test_impl(n))
def test_join1_seq_str_na(self): # test setting NA in string data column def test_impl(): df1 = pd.DataFrame({'key1': ['foo', 'bar', 'baz']}) df2 = pd.DataFrame({ 'key2': ['baz', 'bar', 'baz'], 'B': ['b', 'zzz', 'ss'] }) df3 = df1.merge(df2, left_on='key1', right_on='key2', how='left') return df3.B hpat_func = hpat.jit(test_impl) self.assertEqual(set(hpat_func()), set(test_impl()))
def test_pivot(self): def test_impl(df): pt = df.pivot_table(index='A', columns='C', values='D', aggfunc='sum') return (pt.small.values, pt.large.values) hpat_func = hpat.jit(pivots={'pt': ['small', 'large']})(test_impl) self.assertEqual(set(hpat_func(_pivot_df1)[0]), set(test_impl(_pivot_df1)[0])) self.assertEqual(set(hpat_func(_pivot_df1)[1]), set(test_impl(_pivot_df1)[1]))
def test_setitem_series3(self): def test_impl(A, i): S = pd.Series(A) S[i] = 100 hpat_func = hpat.jit(test_impl) n = 11 A = np.arange(n) A1 = A.copy() A2 = A hpat_func(A1, 0) test_impl(A2, 0) np.testing.assert_array_equal(A1, A2)
def test_csv_str1(self): def test_impl(): return pd.read_csv("csv_data_date1.csv", names=['A', 'B', 'C', 'D'], dtype={ 'A': np.int, 'B': np.float, 'C': str, 'D': np.int }) hpat_func = hpat.jit(test_impl) pd.testing.assert_frame_equal(hpat_func(), test_impl())
def test_fixed_apply2(self): # test sequentially with generated dfs def test_impl(df, w, c): return df.rolling(w, center=c).apply(lambda a: a.sum()) hpat_func = hpat.jit(test_impl) sizes = (1, 2, 10, 11, 121, 1000) wins = (2, 3, 5) centers = (False, True) for n, w, c in itertools.product(sizes, wins, centers): df = pd.DataFrame({'B': np.arange(n)}) pd.testing.assert_frame_equal(hpat_func(df, w, c), test_impl(df, w, c))
def test_df_astype_int1(self): '''Verifies DataFrame.astype implementation converting various types to int''' def test_impl(df): return df.astype(np.int32) hpat_func = hpat.jit(test_impl) n = 6 # TODO: uncomment column with string values when test_series_astype_str_to_int32 is fixed df = pd.DataFrame({'A': np.ones(n, dtype=np.int64), 'B': np.arange(n, dtype=np.int32), # 'C': ['-1', '2', '3', '0', '-7', '99'], 'D': np.arange(float(n), dtype=np.float32) }) pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
def test_unbox2(self): def test_impl(df, cond): n = len(df) if cond: df['A'] = np.arange(n) + 2.0 return df.A hpat_func = hpat.jit(test_impl) n = 11 df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)}) pd.testing.assert_series_equal(hpat_func(df.copy(), True), test_impl(df.copy(), True)) pd.testing.assert_series_equal(hpat_func(df.copy(), False), test_impl(df.copy(), False))
def test_sort_values_copy(self): def test_impl(df): df2 = df.sort_values('A') return df2.B.values n = 1211 np.random.seed(2) df = pd.DataFrame({ 'A': np.random.ranf(n), 'B': np.arange(n), 'C': np.random.ranf(n) }) hpat_func = hpat.jit(test_impl) np.testing.assert_almost_equal(hpat_func(df.copy()), test_impl(df))
def test_box_categorical(self): def test_impl(df): df['A'] = df['A'] + 1 return df hpat_func = hpat.jit(test_impl) df = pd.DataFrame({ 'A': [1, 2, 3], 'B': pd.Series(['N', 'Y', 'Y'], dtype=pd.api.types.CategoricalDtype(['N', 'Y'])) }) pd.testing.assert_frame_equal(hpat_func(df.copy(deep=True)), test_impl(df))
def test_var_rename(self): # tests df variable replacement in hiframes_untyped where inlining # can cause extra assignments and definition handling errors # TODO: inline freevar def test_impl(): df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]}) # TODO: df['C'] = [5,6,7] df['C'] = np.ones(3) return inner_get_column(df) hpat_func = hpat.jit(test_impl) pd.testing.assert_series_equal(hpat_func(), test_impl(), check_names=False)
def test_concat_columns1(self): def test_impl(S1, S2): return pd.concat([S1, S2], axis=1) hpat_func = hpat.jit(test_impl) S1 = pd.Series([4, 5]) S2 = pd.Series([6., 7.]) # TODO: support int as column name pd.testing.assert_frame_equal( hpat_func(S1, S2), test_impl(S1, S2).rename(columns={ 0: '0', 1: '1' }))