def test_series_median_parallel1(self): # create `kde.parquet` file ParquetGenerator.gen_kde_pq() def test_impl(): df = pq.read_table('kde.parquet').to_pandas() S = df.points return S.median() hpat_func = hpat.jit(test_impl) self.assertEqual(hpat_func(), test_impl())
def test_series_sort_values_parallel1(self): # create `kde.parquet` file ParquetGenerator.gen_kde_pq() def test_impl(): df = pq.read_table('kde.parquet').to_pandas() S = df.points return S.sort_values() hpat_func = hpat.jit(test_impl) np.testing.assert_array_equal(hpat_func(), test_impl())
def test_string_NA_box(self): # create `example.parquet` file ParquetGenerator.gen_pq_test() def test_impl(): df = pq.read_table('example.parquet').to_pandas() return df.five hpat_func = hpat.jit(test_impl) # XXX just checking isna() since Pandas uses None in this case # instead of nan for some reason np.testing.assert_array_equal(hpat_func().isna(), test_impl().isna())
def test_sort_parallel_single_col(self): # create `kde.parquet` file ParquetGenerator.gen_kde_pq() # TODO: better parallel sort test def test_impl(): df = pd.read_parquet('kde.parquet') df.sort_values('points', inplace=True) res = df.points.values return res hpat_func = hpat.jit(locals={'res:return': 'distributed'})(test_impl) save_min_samples = hpat.hiframes.sort.MIN_SAMPLES try: hpat.hiframes.sort.MIN_SAMPLES = 10 res = hpat_func() self.assertTrue((np.diff(res) >= 0).all()) finally: # restore global val hpat.hiframes.sort.MIN_SAMPLES = save_min_samples