コード例 #1
0
class TestDataFrame(unittest.TestCase):
    def test_create1(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)})
            return df.A

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_series_equal(hpat_func(n), test_impl(n))

    def test_create_cond1(self):
        def test_impl(A, B, c):
            if c:
                df = pd.DataFrame({'A': A})
            else:
                df = pd.DataFrame({'A': B})
            return df.A

        hpat_func = hpat.jit(test_impl)
        n = 11
        A = np.ones(n)
        B = np.arange(n) + 1.0
        c = 0
        pd.testing.assert_series_equal(hpat_func(A, B, c), test_impl(A, B, c))
        c = 2
        pd.testing.assert_series_equal(hpat_func(A, B, c), test_impl(A, B, c))

    @unittest.skip('Implement feature to create DataFrame without column names'
                   )
    def test_create_without_column_names(self):
        def test_impl():
            df = pd.DataFrame([100, 200, 300, 400, 200, 100])
            return df

        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(), test_impl())

    def test_unbox1(self):
        def test_impl(df):
            return df.A

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)})
        pd.testing.assert_series_equal(hpat_func(df), test_impl(df))

    @unittest.skip("needs properly refcounted dataframes")
    def test_unbox2(self):
        def test_impl(df, cond):
            n = len(df)
            if cond:
                df['A'] = np.arange(n) + 2.0
            return df.A

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)})
        pd.testing.assert_series_equal(hpat_func(df.copy(), True),
                                       test_impl(df.copy(), True))
        pd.testing.assert_series_equal(hpat_func(df.copy(), False),
                                       test_impl(df.copy(), False))

    @unittest.skip('Implement feature to create DataFrame without column names'
                   )
    def test_unbox_without_column_names(self):
        def test_impl(df):
            return df

        df = pd.DataFrame([100, 200, 300, 400, 200, 100])
        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_box1(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)})
            return df

        hpat_func = hpat.jit(test_impl)
        n = 11
        do_check = False if platform.system(
        ) == 'Windows' and not IS_32BITS else True
        pd.testing.assert_frame_equal(hpat_func(n),
                                      test_impl(n),
                                      check_dtype=do_check)

    def test_box2(self):
        def test_impl():
            df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'bb', 'ccc']})
            return df

        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(), test_impl())

    @unittest.skip("pending df filter support")
    def test_box3(self):
        def test_impl(df):
            df = df[df.A != 'dd']
            return df

        hpat_func = hpat.jit(test_impl)
        df = pd.DataFrame({'A': ['aa', 'bb', 'cc']})
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_box_categorical(self):
        def test_impl(df):
            df['A'] = df['A'] + 1
            return df

        hpat_func = hpat.jit(test_impl)
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B':
            pd.Series(['N', 'Y', 'Y'],
                      dtype=pd.api.types.CategoricalDtype(['N', 'Y']))
        })
        pd.testing.assert_frame_equal(hpat_func(df.copy(deep=True)),
                                      test_impl(df))

    @unittest.skipIf(
        check_numba_version('0.46.0'),
        "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690")
    def test_box_dist_return(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)})
            return df

        hpat_func = hpat.jit(distributed={'df'})(test_impl)
        n = 11
        hres, res = hpat_func(n), test_impl(n)
        self.assertEqual(count_array_OneDs(), 3)
        self.assertEqual(count_parfor_OneDs(), 2)
        dist_sum = hpat.jit(lambda a: hpat.distributed_api.dist_reduce(
            a, np.int32(hpat.distributed_api.Reduce_Type.Sum.value)))
        dist_sum(1)  # run to compile
        np.testing.assert_allclose(dist_sum(hres.A.sum()), res.A.sum())
        np.testing.assert_allclose(dist_sum(hres.B.sum()), res.B.sum())

    def test_len1(self):
        def test_impl(n):
            df = pd.DataFrame({
                'A': np.ones(n, np.int64),
                'B': np.random.ranf(n)
            })
            return len(df)

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_shape1(self):
        def test_impl(n):
            df = pd.DataFrame({
                'A': np.ones(n, np.int64),
                'B': np.random.ranf(n)
            })
            return df.shape

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_column_getitem1(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)})
            Ac = df['A'].values
            return Ac.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
        self.assertEqual(count_parfor_OneDs(), 1)

    def test_column_list_getitem1(self):
        def test_impl(df):
            return df[['A', 'C']]

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({
            'A': np.arange(n),
            'B': np.ones(n),
            'C': np.random.ranf(n)
        })
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_filter1(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n) + n, 'B': np.arange(n)**2})
            df1 = df[df.A > .5]
            return df1.B.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_filter2(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n) + n, 'B': np.arange(n)**2})
            df1 = df.loc[df.A > .5]
            return np.sum(df1.B)

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_filter3(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n) + n, 'B': np.arange(n)**2})
            df1 = df.iloc[(df.A > .5).values]
            return np.sum(df1.B)

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_iloc1(self):
        def test_impl(df, n):
            return df.iloc[1:n].B.values

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
        np.testing.assert_array_equal(hpat_func(df, n), test_impl(df, n))

    def test_iloc2(self):
        def test_impl(df, n):
            return df.iloc[np.array([1, 4, 9])].B.values

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
        np.testing.assert_array_equal(hpat_func(df, n), test_impl(df, n))

    def test_iloc3(self):
        def test_impl(df):
            return df.iloc[:, 1].values

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
        np.testing.assert_array_equal(hpat_func(df), test_impl(df))

    @unittest.skip("TODO: support A[[1,2,3]] in Numba")
    def test_iloc4(self):
        def test_impl(df, n):
            return df.iloc[[1, 4, 9]].B.values

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
        np.testing.assert_array_equal(hpat_func(df, n), test_impl(df, n))

    def test_iloc5(self):
        # test iloc with global value
        def test_impl(df):
            return df.iloc[:, COL_IND].values

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
        np.testing.assert_array_equal(hpat_func(df), test_impl(df))

    def test_loc1(self):
        def test_impl(df):
            return df.loc[:, 'B'].values

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
        np.testing.assert_array_equal(hpat_func(df), test_impl(df))

    def test_iat1(self):
        def test_impl(n):
            df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n})
            return df.iat[3, 1]

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))

    def test_iat2(self):
        def test_impl(df):
            return df.iat[3, 1]

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n})
        self.assertEqual(hpat_func(df), test_impl(df))

    def test_iat3(self):
        def test_impl(df, n):
            return df.iat[n - 1, 1]

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n})
        self.assertEqual(hpat_func(df, n), test_impl(df, n))

    def test_iat_set1(self):
        def test_impl(df, n):
            df.iat[n - 1, 1] = n**2
            return df.A  # return the column to check column aliasing

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n})
        df2 = df.copy()
        pd.testing.assert_series_equal(hpat_func(df, n), test_impl(df2, n))

    def test_iat_set2(self):
        def test_impl(df, n):
            df.iat[n - 1, 1] = n**2
            return df  # check df aliasing/boxing

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'B': np.ones(n), 'A': np.arange(n) + n})
        df2 = df.copy()
        pd.testing.assert_frame_equal(hpat_func(df, n), test_impl(df2, n))

    def test_set_column1(self):
        # set existing column
        def test_impl(n):
            df = pd.DataFrame({
                'A': np.ones(n, np.int64),
                'B': np.arange(n) + 3.0
            })
            df['A'] = np.arange(n)
            return df

        hpat_func = hpat.jit(test_impl)
        n = 11
        do_check = False if platform.system(
        ) == 'Windows' and not IS_32BITS else True
        pd.testing.assert_frame_equal(hpat_func(n),
                                      test_impl(n),
                                      check_dtype=do_check)

    def test_set_column_reflect4(self):
        # set existing column
        def test_impl(df, n):
            df['A'] = np.arange(n)

        hpat_func = hpat.jit(test_impl)
        n = 11
        df1 = pd.DataFrame({
            'A': np.ones(n, np.int64),
            'B': np.arange(n) + 3.0
        })
        df2 = df1.copy()
        hpat_func(df1, n)
        test_impl(df2, n)
        do_check = False if platform.system(
        ) == 'Windows' and not IS_32BITS else True
        pd.testing.assert_frame_equal(df1, df2, check_dtype=do_check)

    def test_set_column_new_type1(self):
        # set existing column with a new type
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n) + 3.0})
            df['A'] = np.arange(n)
            return df

        hpat_func = hpat.jit(test_impl)
        n = 11
        do_check = False if platform.system(
        ) == 'Windows' and not IS_32BITS else True
        pd.testing.assert_frame_equal(hpat_func(n),
                                      test_impl(n),
                                      check_dtype=do_check)

    def test_set_column2(self):
        # create new column
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n) + 1.0})
            df['C'] = np.arange(n)
            return df

        hpat_func = hpat.jit(test_impl)
        n = 11
        do_check = False if platform.system(
        ) == 'Windows' and not IS_32BITS else True
        pd.testing.assert_frame_equal(hpat_func(n),
                                      test_impl(n),
                                      check_dtype=do_check)

    def test_set_column_reflect3(self):
        # create new column
        def test_impl(df, n):
            df['C'] = np.arange(n)

        hpat_func = hpat.jit(test_impl)
        n = 11
        df1 = pd.DataFrame({
            'A': np.ones(n, np.int64),
            'B': np.arange(n) + 3.0
        })
        df2 = df1.copy()
        hpat_func(df1, n)
        test_impl(df2, n)
        do_check = False if platform.system(
        ) == 'Windows' and not IS_32BITS else True
        pd.testing.assert_frame_equal(df1, df2, check_dtype=do_check)

    def test_set_column_bool1(self):
        def test_impl(df):
            df['C'] = df['A'][df['B']]

        hpat_func = hpat.jit(test_impl)
        df = pd.DataFrame({'A': [1, 2, 3], 'B': [True, False, True]})
        df2 = df.copy()
        test_impl(df2)
        hpat_func(df)
        pd.testing.assert_series_equal(df.C, df2.C)

    def test_set_column_reflect1(self):
        def test_impl(df, arr):
            df['C'] = arr
            return df.C.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        arr = np.random.ranf(n)
        df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)})
        hpat_func(df, arr)
        self.assertIn('C', df)
        np.testing.assert_almost_equal(df.C.values, arr)

    def test_set_column_reflect2(self):
        def test_impl(df, arr):
            df['C'] = arr
            return df.C.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        arr = np.random.ranf(n)
        df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)})
        df2 = df.copy()
        np.testing.assert_almost_equal(hpat_func(df, arr), test_impl(df2, arr))

    def test_df_values1(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)})
            return df.values

        hpat_func = hpat.jit(test_impl)
        n = 11
        np.testing.assert_array_equal(hpat_func(n), test_impl(n))

    def test_df_values2(self):
        def test_impl(df):
            return df.values

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)})
        np.testing.assert_array_equal(hpat_func(df), test_impl(df))

    def test_df_values_parallel1(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)})
            return df.values.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        np.testing.assert_array_equal(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_df_apply(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)})
            B = df.apply(lambda r: r.A + r.B, axis=1)
            return df.B.sum()

        n = 121
        hpat_func = hpat.jit(test_impl)
        np.testing.assert_almost_equal(hpat_func(n), test_impl(n))

    def test_df_apply_branch(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)})
            B = df.apply(lambda r: r.A < 10 and r.B > 20, axis=1)
            return df.B.sum()

        n = 121
        hpat_func = hpat.jit(test_impl)
        np.testing.assert_almost_equal(hpat_func(n), test_impl(n))

    def test_df_describe(self):
        def test_impl(n):
            df = pd.DataFrame({
                'A': np.arange(0, n, 1, np.float32),
                'B': np.arange(n)
            })
            #df.A[0:1] = np.nan
            return df.describe()

        hpat_func = hpat.jit(test_impl)
        n = 1001
        hpat_func(n)
        # XXX: test actual output
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_sort_values(self):
        def test_impl(df):
            df.sort_values('A', inplace=True)
            return df.B.values

        n = 1211
        np.random.seed(2)
        df = pd.DataFrame({
            'A': np.random.ranf(n),
            'B': np.arange(n),
            'C': np.random.ranf(n)
        })
        hpat_func = hpat.jit(test_impl)
        np.testing.assert_almost_equal(hpat_func(df.copy()), test_impl(df))

    def test_sort_values_copy(self):
        def test_impl(df):
            df2 = df.sort_values('A')
            return df2.B.values

        n = 1211
        np.random.seed(2)
        df = pd.DataFrame({
            'A': np.random.ranf(n),
            'B': np.arange(n),
            'C': np.random.ranf(n)
        })
        hpat_func = hpat.jit(test_impl)
        np.testing.assert_almost_equal(hpat_func(df.copy()), test_impl(df))

    def test_sort_values_single_col(self):
        def test_impl(df):
            df.sort_values('A', inplace=True)
            return df.A.values

        n = 1211
        np.random.seed(2)
        df = pd.DataFrame({'A': np.random.ranf(n)})
        hpat_func = hpat.jit(test_impl)
        np.testing.assert_almost_equal(hpat_func(df.copy()), test_impl(df))

    def test_sort_values_single_col_str(self):
        def test_impl(df):
            df.sort_values('A', inplace=True)
            return df.A.values

        n = 1211
        random.seed(2)
        str_vals = []

        for _ in range(n):
            k = random.randint(1, 30)
            val = ''.join(
                random.choices(string.ascii_uppercase + string.digits, k=k))
            str_vals.append(val)
        df = pd.DataFrame({'A': str_vals})
        hpat_func = hpat.jit(test_impl)
        self.assertTrue((hpat_func(df.copy()) == test_impl(df)).all())

    def test_sort_values_str(self):
        def test_impl(df):
            df.sort_values('A', inplace=True)
            return df.B.values

        n = 1211
        random.seed(2)
        str_vals = []
        str_vals2 = []

        for i in range(n):
            k = random.randint(1, 30)
            val = ''.join(
                random.choices(string.ascii_uppercase + string.digits, k=k))
            str_vals.append(val)
            val = ''.join(
                random.choices(string.ascii_uppercase + string.digits, k=k))
            str_vals2.append(val)

        df = pd.DataFrame({'A': str_vals, 'B': str_vals2})
        # use mergesort for stability, in str generation equal keys are more probable
        sorted_df = df.sort_values('A', inplace=False, kind='mergesort')
        hpat_func = hpat.jit(test_impl)
        self.assertTrue((hpat_func(df) == sorted_df.B.values).all())

    def test_sort_parallel_single_col(self):
        # create `kde.parquet` file
        ParquetGenerator.gen_kde_pq()

        # TODO: better parallel sort test
        def test_impl():
            df = pd.read_parquet('kde.parquet')
            df.sort_values('points', inplace=True)
            res = df.points.values
            return res

        hpat_func = hpat.jit(locals={'res:return': 'distributed'})(test_impl)

        save_min_samples = hpat.hiframes.sort.MIN_SAMPLES
        try:
            hpat.hiframes.sort.MIN_SAMPLES = 10
            res = hpat_func()
            self.assertTrue((np.diff(res) >= 0).all())
        finally:
            # restore global val
            hpat.hiframes.sort.MIN_SAMPLES = save_min_samples

    def test_df_isna1(self):
        '''Verify DataFrame.isna implementation for various types of data'''
        def test_impl(df):
            return df.isna()

        hpat_func = hpat.jit(test_impl)

        # TODO: add column with datetime values when test_series_datetime_isna1 is fixed
        df = pd.DataFrame({
            'A': [1.0, 2.0, np.nan, 1.0],
            'B': [np.inf, 5, np.nan, 6],
            'C': ['aa', 'b', None, 'ccc'],
            'D': [None, 'dd', '', None]
        })
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_df_astype_str1(self):
        '''Verifies DataFrame.astype implementation converting various types to string'''
        def test_impl(df):
            return df.astype(str)

        hpat_func = hpat.jit(test_impl)

        # TODO: add column with float values when test_series_astype_float_to_str1 is fixed
        df = pd.DataFrame({
            'A': [-1, 2, 11, 5, 0, -7],
            'B': ['aa', 'bb', 'cc', 'dd', '', 'fff']
        })
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_df_astype_float1(self):
        '''Verifies DataFrame.astype implementation converting various types to float'''
        def test_impl(df):
            return df.astype(np.float64)

        hpat_func = hpat.jit(test_impl)

        # TODO: uncomment column with string values when test_series_astype_str_to_float64 is fixed
        df = pd.DataFrame({
            'A': [-1, 2, 11, 5, 0, -7],
            #                   'B': ['3.24', '1E+05', '-1', '-1.3E-01', 'nan', 'inf'],
            'C': [3.24, 1E+05, -1, -1.3E-01, np.nan, np.inf]
        })
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_df_astype_int1(self):
        '''Verifies DataFrame.astype implementation converting various types to int'''
        def test_impl(df):
            return df.astype(np.int32)

        hpat_func = hpat.jit(test_impl)

        n = 6
        # TODO: uncomment column with string values when test_series_astype_str_to_int32 is fixed
        df = pd.DataFrame({
            'A': np.ones(n, dtype=np.int64),
            'B': np.arange(n, dtype=np.int32),
            #                   'C': ['-1', '2', '3', '0', '-7', '99'],
            'D': np.arange(float(n), dtype=np.float32)
        })
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_sort_parallel(self):
        # create `kde.parquet` file
        ParquetGenerator.gen_kde_pq()

        # TODO: better parallel sort test
        def test_impl():
            df = pd.read_parquet('kde.parquet')
            df['A'] = df.points.astype(np.float64)
            df.sort_values('points', inplace=True)
            res = df.A.values
            return res

        hpat_func = hpat.jit(locals={'res:return': 'distributed'})(test_impl)

        save_min_samples = hpat.hiframes.sort.MIN_SAMPLES
        try:
            hpat.hiframes.sort.MIN_SAMPLES = 10
            res = hpat_func()
            self.assertTrue((np.diff(res) >= 0).all())
        finally:
            # restore global val
            hpat.hiframes.sort.MIN_SAMPLES = save_min_samples

    def test_itertuples(self):
        def test_impl(df):
            res = 0.0
            for r in df.itertuples():
                res += r[1]
            return res

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.arange(n), 'B': np.ones(n, np.int64)})
        self.assertEqual(hpat_func(df), test_impl(df))

    def test_itertuples_str(self):
        def test_impl(df):
            res = ""
            for r in df.itertuples():
                res += r[1]
            return res

        hpat_func = hpat.jit(test_impl)
        n = 3
        df = pd.DataFrame({'A': ['aa', 'bb', 'cc'], 'B': np.ones(n, np.int64)})
        self.assertEqual(hpat_func(df), test_impl(df))

    def test_itertuples_order(self):
        def test_impl(n):
            res = 0.0
            df = pd.DataFrame({'B': np.arange(n), 'A': np.ones(n, np.int64)})
            for r in df.itertuples():
                res += r[1]
            return res

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))

    def test_itertuples_analysis(self):
        """tests array analysis handling of generated tuples, shapes going
        through blocks and getting used in an array dimension
        """
        def test_impl(n):
            res = 0
            df = pd.DataFrame({'B': np.arange(n), 'A': np.ones(n, np.int64)})
            for r in df.itertuples():
                if r[1] == 2:
                    A = np.ones(r[1])
                    res += len(A)
            return res

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))

    @unittest.skipIf(platform.system() == 'Windows',
                     "Attribute 'dtype' are different int64 and int32")
    def test_df_head1(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)})
            return df.head(3)

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_frame_equal(hpat_func(n), test_impl(n))

    def test_pct_change1(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1})
            return df.pct_change(3)

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_frame_equal(hpat_func(n), test_impl(n))

    def test_mean1(self):
        # TODO: non-numeric columns should be ignored automatically
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1})
            return df.mean()

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_series_equal(hpat_func(n), test_impl(n))

    def test_median1(self):
        # TODO: non-numeric columns should be ignored automatically
        def test_impl(n):
            df = pd.DataFrame({'A': 2**np.arange(n), 'B': np.arange(n) + 1.0})
            return df.median()

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_series_equal(hpat_func(n), test_impl(n))

    def test_std1(self):
        # TODO: non-numeric columns should be ignored automatically
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1})
            return df.std()

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_series_equal(hpat_func(n), test_impl(n))

    def test_var1(self):
        # TODO: non-numeric columns should be ignored automatically
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1})
            return df.var()

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_series_equal(hpat_func(n), test_impl(n))

    def test_max1(self):
        # TODO: non-numeric columns should be ignored automatically
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1})
            return df.max()

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_series_equal(hpat_func(n), test_impl(n))

    def test_min1(self):
        # TODO: non-numeric columns should be ignored automatically
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1})
            return df.min()

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_series_equal(hpat_func(n), test_impl(n))

    @unittest.skipIf(not hpat.config.config_pipeline_hpat_default,
                     "DataFrame.sum() not implemented in new style")
    def test_sum1(self):
        # TODO: non-numeric columns should be ignored automatically
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1})
            return df.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_series_equal(hpat_func(n), test_impl(n))

    def test_prod1(self):
        # TODO: non-numeric columns should be ignored automatically
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1})
            return df.prod()

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_series_equal(hpat_func(n), test_impl(n))

    def test_count(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)})
            return df.count()

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_series_equal(hpat_func(n), test_impl(n))

    def test_count1(self):
        # TODO: non-numeric columns should be ignored automatically
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.arange(n) + 1})
            return df.count()

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_series_equal(hpat_func(n), test_impl(n))

    def test_df_fillna1(self):
        def test_impl(df):
            return df.fillna(5.0)

        df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]})
        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_df_fillna_str1(self):
        def test_impl(df):
            return df.fillna("dd")

        df = pd.DataFrame({'A': ['aa', 'b', None, 'ccc']})
        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_df_fillna_inplace1(self):
        def test_impl(A):
            A.fillna(11.0, inplace=True)
            return A

        df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]})
        df2 = df.copy()
        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df2))

    def test_df_reset_index1(self):
        def test_impl(df):
            return df.reset_index(drop=True)

        df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]})
        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_df_reset_index_inplace1(self):
        def test_impl():
            df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]})
            df.reset_index(drop=True, inplace=True)
            return df

        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(), test_impl())

    def test_df_dropna1(self):
        def test_impl(df):
            return df.dropna()

        df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]})
        hpat_func = hpat.jit(test_impl)
        out = test_impl(df).reset_index(drop=True)
        h_out = hpat_func(df)
        pd.testing.assert_frame_equal(out, h_out)

    def test_df_dropna2(self):
        def test_impl(df):
            return df.dropna()

        df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]})
        hpat_func = hpat.jit(test_impl)
        out = test_impl(df).reset_index(drop=True)
        h_out = hpat_func(df)
        pd.testing.assert_frame_equal(out, h_out)

    def test_df_dropna_inplace1(self):
        # TODO: fix error when no df is returned
        def test_impl(df):
            df.dropna(inplace=True)
            return df

        df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]})
        df2 = df.copy()
        hpat_func = hpat.jit(test_impl)
        out = test_impl(df).reset_index(drop=True)
        h_out = hpat_func(df2)
        pd.testing.assert_frame_equal(out, h_out)

    def test_df_dropna_str1(self):
        def test_impl(df):
            return df.dropna()

        df = pd.DataFrame({
            'A': [1.0, 2.0, 4.0, 1.0],
            'B': ['aa', 'b', None, 'ccc']
        })
        hpat_func = hpat.jit(test_impl)
        out = test_impl(df).reset_index(drop=True)
        h_out = hpat_func(df)
        pd.testing.assert_frame_equal(out, h_out)

    def test_df_drop1(self):
        def test_impl(df):
            return df.drop(columns=['A'])

        df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]})
        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_df_drop_inplace2(self):
        # test droping after setting the column
        def test_impl(df):
            df2 = df[['A', 'B']]
            df2['D'] = np.ones(3)
            df2.drop(columns=['D'], inplace=True)
            return df2

        df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]})
        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_df_drop_inplace1(self):
        def test_impl(df):
            df.drop('A', axis=1, inplace=True)
            return df

        df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]})
        df2 = df.copy()
        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df2))

    def test_isin_df1(self):
        def test_impl(df, df2):
            return df.isin(df2)

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
        df2 = pd.DataFrame({'A': np.arange(n), 'C': np.arange(n)**2})
        df2.A[n // 2:] = n
        pd.testing.assert_frame_equal(hpat_func(df, df2), test_impl(df, df2))

    @unittest.skip("needs dict typing in Numba")
    def test_isin_dict1(self):
        def test_impl(df):
            vals = {'A': [2, 3, 4], 'C': [4, 5, 6]}
            return df.isin(vals)

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_isin_list1(self):
        def test_impl(df):
            vals = [2, 3, 4]
            return df.isin(vals)

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))

    def test_append1(self):
        def test_impl(df, df2):
            return df.append(df2, ignore_index=True)

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
        df2 = pd.DataFrame({'A': np.arange(n), 'C': np.arange(n)**2})
        df2.A[n // 2:] = n
        pd.testing.assert_frame_equal(hpat_func(df, df2), test_impl(df, df2))

    def test_append2(self):
        def test_impl(df, df2, df3):
            return df.append([df2, df3], ignore_index=True)

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
        df2 = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
        df2.A[n // 2:] = n
        df3 = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)**2})
        pd.testing.assert_frame_equal(hpat_func(df, df2, df3),
                                      test_impl(df, df2, df3))

    def test_concat_columns1(self):
        def test_impl(S1, S2):
            return pd.concat([S1, S2], axis=1)

        hpat_func = hpat.jit(test_impl)
        S1 = pd.Series([4, 5])
        S2 = pd.Series([6., 7.])
        # TODO: support int as column name
        pd.testing.assert_frame_equal(
            hpat_func(S1, S2),
            test_impl(S1, S2).rename(columns={
                0: '0',
                1: '1'
            }))

    def test_var_rename(self):
        # tests df variable replacement in hiframes_untyped where inlining
        # can cause extra assignments and definition handling errors
        # TODO: inline freevar
        def test_impl():
            df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]})
            # TODO: df['C'] = [5,6,7]
            df['C'] = np.ones(3)
            return inner_get_column(df)

        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_series_equal(hpat_func(),
                                       test_impl(),
                                       check_names=False)

    @unittest.skip("Implement getting columns attribute")
    def test_dataframe_columns_attribute(self):
        def test_impl():
            df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]})
            return df.columns

        hpat_func = hpat.jit(test_impl)
        np.testing.assert_array_equal(hpat_func(), test_impl())

    @unittest.skip("Implement getting columns attribute")
    def test_dataframe_columns_iterator(self):
        def test_impl():
            df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]})
            return [column for column in df.columns]

        hpat_func = hpat.jit(test_impl)
        np.testing.assert_array_equal(hpat_func(), test_impl())

    @unittest.skip("Implement set_index for DataFrame")
    def test_dataframe_set_index(self):
        def test_impl():
            df = pd.DataFrame({
                'month': [1, 4, 7, 10],
                'year': [2012, 2014, 2013, 2014],
                'sale': [55, 40, 84, 31]
            })
            return df.set_index('month')

        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(), test_impl())

    @unittest.skip("Implement sort_index for DataFrame")
    def test_dataframe_sort_index(self):
        def test_impl():
            df = pd.DataFrame({'A': [1, 2, 3, 4, 5]},
                              index=[100, 29, 234, 1, 150])
            return df.sort_index()

        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(), test_impl())

    @unittest.skip("Implement iterrows for DataFrame")
    def test_dataframe_iterrows(self):
        def test_impl(df):
            print(df.iterrows())
            return [row for _, row in df.iterrows()]

        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [0.2, 0.5, 0.001],
            'C': ['a', 'bb', 'ccc']
        })
        hpat_func = hpat.jit(test_impl)
        np.testing.assert_array_equal(hpat_func(df), test_impl(df))

    @unittest.skip("Support parameter axis=1")
    def test_dataframe_axis_param(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n), 'B': np.arange(n)})
            return df.sum(axis=1)

        n = 100
        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_series_equal(hpat_func(n), test_impl(n))
コード例 #2
0
class TestML(unittest.TestCase):

    def test_logistic_regression(self):
        def test_impl(n, d):
            iterations = 3
            X = np.ones((n, d)) + .5
            Y = np.ones(n)
            D = X.shape[1]
            w = np.ones(D) - 0.5
            for i in range(iterations):
                w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X)
            return w

        hpat_func = hpat.jit(test_impl)
        n = 11
        d = 4
        np.testing.assert_allclose(hpat_func(n, d), test_impl(n, d))
        self.assertEqual(count_array_OneDs(), 3)
        self.assertEqual(count_parfor_OneDs(), 3)

    def test_logistic_regression_acc(self):
        def test_impl(N, D):
            iterations = 3
            g = 2 * np.ones(D) - 1
            X = 2 * np.ones((N, D)) - 1
            Y = ((np.dot(X, g) > 0.0) == (np.ones(N) > .90)) + .0

            w = 2 * np.ones(D) - 1
            for i in range(iterations):
                w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X)
                R = np.dot(X, w) > 0.0
                accuracy = np.sum(R == Y) / N
            return accuracy

        hpat_func = hpat.jit(test_impl)
        n = 11
        d = 4
        np.testing.assert_approx_equal(hpat_func(n, d), test_impl(n, d))
        self.assertEqual(count_array_OneDs(), 3)
        self.assertEqual(count_parfor_OneDs(), 4)

    def test_linear_regression(self):
        def test_impl(N, D):
            p = 2
            iterations = 3
            X = np.ones((N, D)) + .5
            Y = np.ones((N, p))
            alphaN = 0.01 / N
            w = np.zeros((D, p))
            for i in range(iterations):
                w -= alphaN * np.dot(X.T, np.dot(X, w) - Y)
            return w

        hpat_func = hpat.jit(test_impl)
        n = 11
        d = 4
        np.testing.assert_allclose(hpat_func(n, d), test_impl(n, d))
        self.assertEqual(count_array_OneDs(), 5)
        self.assertEqual(count_parfor_OneDs(), 3)

    def test_kde(self):
        def test_impl(n):
            X = np.ones(n)
            b = 0.5
            points = np.array([-1.0, 2.0, 5.0])
            N = points.shape[0]
            exps = 0
            for i in hpat.prange(n):
                p = X[i]
                d = (-(p - points)**2) / (2 * b**2)
                m = np.min(d)
                exps += m - np.log(b * N) + np.log(np.sum(np.exp(d - m)))
            return exps

        hpat_func = hpat.jit(test_impl)
        n = 11
        np.testing.assert_approx_equal(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_OneDs(), 1)
        self.assertEqual(count_parfor_OneDs(), 2)

    @unittest.skipIf(check_numba_version('0.46.0'),
                     "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690")
    def test_kmeans(self):
        def test_impl(numCenter, numIter, N, D):
            A = np.ones((N, D))
            centroids = np.zeros((numCenter, D))

            for l in range(numIter):
                dist = np.array([[sqrt(np.sum((A[i, :] - centroids[j, :])**2))
                                  for j in range(numCenter)] for i in range(N)])
                labels = np.array([dist[i, :].argmin() for i in range(N)])

                centroids = np.array([[np.sum(A[labels == i, j]) / np.sum(labels == i)
                                       for j in range(D)] for i in range(numCenter)])

            return centroids

        hpat_func = hpat.jit(test_impl)
        n = 11
        np.testing.assert_allclose(hpat_func(1, 1, n, 2), test_impl(1, 1, n, 2))
        self.assertEqual(count_array_OneDs(), 4)
        self.assertEqual(count_array_OneD_Vars(), 1)
        self.assertEqual(count_parfor_OneDs(), 5)
        self.assertEqual(count_parfor_OneD_Vars(), 1)
コード例 #3
0
ファイル: test_basic.py プロジェクト: rowhit/sdc
class TestBasic(BaseTest):

    def test_getitem(self):
        def test_impl(N):
            A = np.ones(N)
            B = np.ones(N) > .5
            C = A[B]
            return C.sum()

        hpat_func = hpat.jit(test_impl)
        n = 128
        np.testing.assert_allclose(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_setitem1(self):
        def test_impl(N):
            A = np.arange(10) + 1.0
            A[0] = 30
            return A.sum()

        hpat_func = hpat.jit(test_impl)
        n = 128
        np.testing.assert_allclose(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_setitem2(self):
        def test_impl(N):
            A = np.arange(10) + 1.0
            A[0:4] = 30
            return A.sum()

        hpat_func = hpat.jit(test_impl)
        n = 128
        np.testing.assert_allclose(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_astype(self):
        def test_impl(N):
            return np.ones(N).astype(np.int32).sum()

        hpat_func = hpat.jit(test_impl)
        n = 128
        np.testing.assert_allclose(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_shape(self):
        def test_impl(N):
            return np.ones(N).shape[0]

        hpat_func = hpat.jit(test_impl)
        n = 128
        np.testing.assert_allclose(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

        # def test_impl(N):
        #     return np.ones((N, 3, 4)).shape
        #
        # hpat_func = hpat.jit(test_impl)
        # n = 128
        # np.testing.assert_allclose(hpat_func(n), test_impl(n))
        # self.assertEqual(count_array_REPs(), 0)
        # self.assertEqual(count_parfor_REPs(), 0)

    def test_inplace_binop(self):
        def test_impl(N):
            A = np.ones(N)
            B = np.ones(N)
            B += A
            return B.sum()

        hpat_func = hpat.jit(test_impl)
        n = 128
        np.testing.assert_allclose(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_getitem_multidim(self):
        def test_impl(N):
            A = np.ones((N, 3))
            B = np.ones(N) > .5
            C = A[B, 2]
            return C.sum()

        hpat_func = hpat.jit(test_impl)
        n = 128
        np.testing.assert_allclose(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_whole_slice(self):
        def test_impl(N):
            X = np.ones((N, 4))
            X[:, 3] = (X[:, 3]) / (np.max(X[:, 3]) - np.min(X[:, 3]))
            return X.sum()

        hpat_func = hpat.jit(test_impl)
        n = 128
        np.testing.assert_allclose(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_strided_getitem(self):
        def test_impl(N):
            A = np.ones(N)
            B = A[::7]
            return B.sum()

        hpat_func = hpat.jit(test_impl)
        n = 128
        np.testing.assert_allclose(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    def test_assert(self):
        # make sure assert in an inlined function works

        def g(a):
            assert a == 0

        hpat_g = hpat.jit(g)

        def f():
            hpat_g(0)

        hpat_f = hpat.jit(f)
        hpat_f()

    def test_inline_locals(self):
        # make sure locals in inlined function works
        @hpat.jit(locals={'B': hpat.float64[:]})
        def g(S):
            B = pd.to_numeric(S, errors='coerce')
            return B

        def f():
            return g(pd.Series(['1.2']))

        pd.testing.assert_series_equal(hpat.jit(f)(), f())

    def test_reduce(self):
        import sys
        dtypes = ['float32', 'float64', 'int32', 'int64']
        funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax']
        for (dtype, func) in itertools.product(dtypes, funcs):
            # loc allreduce doesn't support int64 on windows
            if (sys.platform.startswith('win')
                    and dtype == 'int64'
                    and func in ['argmin', 'argmax']):
                continue
            func_text = """def f(n):
                A = np.arange(0, n, 1, np.{})
                return A.{}()
            """.format(dtype, func)
            loc_vars = {}
            exec(func_text, {'np': np}, loc_vars)
            test_impl = loc_vars['f']

            hpat_func = hpat.jit(test_impl)
            n = 21  # XXX arange() on float32 has overflow issues on large n
            np.testing.assert_almost_equal(hpat_func(n), test_impl(n))
            self.assertEqual(count_array_REPs(), 0)
            self.assertEqual(count_parfor_REPs(), 0)

    def test_reduce2(self):
        import sys
        dtypes = ['float32', 'float64', 'int32', 'int64']
        funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax']
        for (dtype, func) in itertools.product(dtypes, funcs):
            # loc allreduce doesn't support int64 on windows
            if (sys.platform.startswith('win')
                    and dtype == 'int64'
                    and func in ['argmin', 'argmax']):
                continue
            func_text = """def f(A):
                return A.{}()
            """.format(func)
            loc_vars = {}
            exec(func_text, {'np': np}, loc_vars)
            test_impl = loc_vars['f']

            hpat_func = hpat.jit(locals={'A:input': 'distributed'})(test_impl)
            n = 21
            start, end = get_start_end(n)
            np.random.seed(0)
            A = np.random.randint(0, 10, n).astype(dtype)
            np.testing.assert_almost_equal(
                hpat_func(A[start:end]), test_impl(A), decimal=3)
            self.assertEqual(count_array_REPs(), 0)
            self.assertEqual(count_parfor_REPs(), 0)

    def test_reduce_filter1(self):
        import sys
        dtypes = ['float32', 'float64', 'int32', 'int64']
        funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax']
        for (dtype, func) in itertools.product(dtypes, funcs):
            # loc allreduce doesn't support int64 on windows
            if (sys.platform.startswith('win')
                    and dtype == 'int64'
                    and func in ['argmin', 'argmax']):
                continue
            func_text = """def f(A):
                A = A[A>5]
                return A.{}()
            """.format(func)
            loc_vars = {}
            exec(func_text, {'np': np}, loc_vars)
            test_impl = loc_vars['f']

            hpat_func = hpat.jit(locals={'A:input': 'distributed'})(test_impl)
            n = 21
            start, end = get_start_end(n)
            np.random.seed(0)
            A = np.random.randint(0, 10, n).astype(dtype)
            np.testing.assert_almost_equal(
                hpat_func(A[start:end]), test_impl(A), decimal=3,
                err_msg="{} on {}".format(func, dtype))
            self.assertEqual(count_array_REPs(), 0)
            self.assertEqual(count_parfor_REPs(), 0)

    def test_array_reduce(self):
        binops = ['+=', '*=', '+=', '*=', '|=', '|=']
        dtypes = ['np.float32', 'np.float32', 'np.float64', 'np.float64', 'np.int32', 'np.int64']
        for (op, typ) in zip(binops, dtypes):
            func_text = """def f(n):
                  A = np.arange(0, 10, 1, {})
                  B = np.arange(0 +  3, 10 + 3, 1, {})
                  for i in numba.prange(n):
                      A {} B
                  return A
            """.format(typ, typ, op)
            loc_vars = {}
            exec(func_text, {'np': np, 'numba': numba}, loc_vars)
            test_impl = loc_vars['f']

            hpat_func = hpat.jit(test_impl)
            n = 128
            np.testing.assert_allclose(hpat_func(n), test_impl(n))
            self.assertEqual(count_array_OneDs(), 0)
            self.assertEqual(count_parfor_OneDs(), 1)

    @unittest.skipIf(check_numba_version('0.46.0'),
                     "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690")
    def test_dist_return(self):
        def test_impl(N):
            A = np.arange(N)
            return A

        hpat_func = hpat.jit(locals={'A:return': 'distributed'})(test_impl)
        n = 128
        dist_sum = hpat.jit(
            lambda a: hpat.distributed_api.dist_reduce(
                a, np.int32(hpat.distributed_api.Reduce_Type.Sum.value)))
        dist_sum(1)  # run to compile
        np.testing.assert_allclose(
            dist_sum(hpat_func(n).sum()), test_impl(n).sum())
        self.assertEqual(count_array_OneDs(), 1)
        self.assertEqual(count_parfor_OneDs(), 1)

    @unittest.skipIf(check_numba_version('0.46.0'),
                     "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690")
    def test_dist_return_tuple(self):
        def test_impl(N):
            A = np.arange(N)
            B = np.arange(N) + 1.5
            return A, B

        hpat_func = hpat.jit(locals={'A:return': 'distributed',
                                     'B:return': 'distributed'})(test_impl)
        n = 128
        dist_sum = hpat.jit(
            lambda a: hpat.distributed_api.dist_reduce(
                a, np.int32(hpat.distributed_api.Reduce_Type.Sum.value)))
        dist_sum(1.0)  # run to compile
        np.testing.assert_allclose(
            dist_sum((hpat_func(n)[0] + hpat_func(n)[1]).sum()), (test_impl(n)[0] + test_impl(n)[1]).sum())
        self.assertEqual(count_array_OneDs(), 2)
        self.assertEqual(count_parfor_OneDs(), 2)

    def test_dist_input(self):
        def test_impl(A):
            return len(A)

        hpat_func = hpat.jit(distributed=['A'])(test_impl)
        n = 128
        arr = np.ones(n)
        np.testing.assert_allclose(hpat_func(arr) / self.num_ranks, test_impl(arr))
        self.assertEqual(count_array_OneDs(), 1)

    @unittest.skipIf(check_numba_version('0.46.0'),
                     "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690")
    def test_rebalance(self):
        def test_impl(N):
            A = np.arange(n)
            B = A[A > 10]
            C = hpat.distributed_api.rebalance_array(B)
            return C.sum()

        try:
            hpat.distributed_analysis.auto_rebalance = True
            hpat_func = hpat.jit(test_impl)
            n = 128
            np.testing.assert_allclose(hpat_func(n), test_impl(n))
            self.assertEqual(count_array_OneDs(), 3)
            self.assertEqual(count_parfor_OneDs(), 2)
        finally:
            hpat.distributed_analysis.auto_rebalance = False

    @unittest.skipIf(check_numba_version('0.46.0'),
                     "Broken in numba 0.46.0. https://github.com/numba/numba/issues/4690")
    def test_rebalance_loop(self):
        def test_impl(N):
            A = np.arange(n)
            B = A[A > 10]
            s = 0
            for i in range(3):
                s += B.sum()
            return s

        try:
            hpat.distributed_analysis.auto_rebalance = True
            hpat_func = hpat.jit(test_impl)
            n = 128
            np.testing.assert_allclose(hpat_func(n), test_impl(n))
            self.assertEqual(count_array_OneDs(), 4)
            self.assertEqual(count_parfor_OneDs(), 2)
            self.assertIn('allgather', list(hpat_func.inspect_llvm().values())[0])
        finally:
            hpat.distributed_analysis.auto_rebalance = False

    def test_transpose(self):
        def test_impl(n):
            A = np.ones((30, 40, 50))
            B = A.transpose((0, 2, 1))
            C = A.transpose(0, 2, 1)
            return B.sum() + C.sum()

        hpat_func = hpat.jit(test_impl)
        n = 128
        np.testing.assert_allclose(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

    @unittest.skip("Numba's perfmute generation needs to use np seed properly")
    def test_permuted_array_indexing(self):

        # Since Numba uses Python's PRNG for producing random numbers in NumPy,
        # we cannot compare against NumPy.  Therefore, we implement permutation
        # in Python.
        def python_permutation(n, r):
            arr = np.arange(n)
            r.shuffle(arr)
            return arr

        def test_one_dim(arr_len):
            A = np.arange(arr_len)
            B = np.copy(A)
            P = np.random.permutation(arr_len)
            A, B = A[P], B[P]
            return A, B

        # Implementation that uses Python's PRNG for producing a permutation.
        # We test against this function.
        def python_one_dim(arr_len, r):
            A = np.arange(arr_len)
            B = np.copy(A)
            P = python_permutation(arr_len, r)
            A, B = A[P], B[P]
            return A, B

        # Ideally, in above *_impl functions we should just call
        # np.random.seed() and they should produce the same sequence of random
        # numbers.  However, since Numba's PRNG uses NumPy's initialization
        # method for initializing PRNG, we cannot just set seed.  Instead, we
        # resort to this hack that generates a Python Random object with a fixed
        # seed and copies the state to Numba's internal NumPy PRNG state.  For
        # details please see https://github.com/numba/numba/issues/2782.
        r = self._follow_cpython(get_np_state_ptr())

        hpat_func1 = hpat.jit(locals={'A:return': 'distributed',
                                      'B:return': 'distributed'})(test_one_dim)

        # Test one-dimensional array indexing.
        for arr_len in [11, 111, 128, 120]:
            hpat_A, hpat_B = hpat_func1(arr_len)
            python_A, python_B = python_one_dim(arr_len, r)
            rank_bounds = self._rank_bounds(arr_len)
            np.testing.assert_allclose(hpat_A, python_A[slice(*rank_bounds)])
            np.testing.assert_allclose(hpat_B, python_B[slice(*rank_bounds)])

        # Test two-dimensional array indexing.  Like in one-dimensional case
        # above, in addition to NumPy version that is compiled by Numba, we
        # implement a Python version.
        def test_two_dim(arr_len):
            first_dim = arr_len // 2
            A = np.arange(arr_len).reshape(first_dim, 2)
            B = np.copy(A)
            P = np.random.permutation(first_dim)
            A, B = A[P], B[P]
            return A, B

        def python_two_dim(arr_len, r):
            first_dim = arr_len // 2
            A = np.arange(arr_len).reshape(first_dim, 2)
            B = np.copy(A)
            P = python_permutation(first_dim, r)
            A, B = A[P], B[P]
            return A, B

        hpat_func2 = hpat.jit(locals={'A:return': 'distributed',
                                      'B:return': 'distributed'})(test_two_dim)

        for arr_len in [18, 66, 128]:
            hpat_A, hpat_B = hpat_func2(arr_len)
            python_A, python_B = python_two_dim(arr_len, r)
            rank_bounds = self._rank_bounds(arr_len // 2)
            np.testing.assert_allclose(hpat_A, python_A[slice(*rank_bounds)])
            np.testing.assert_allclose(hpat_B, python_B[slice(*rank_bounds)])

        # Test that the indexed array is not modified if it is not being
        # assigned to.
        def test_rhs(arr_len):
            A = np.arange(arr_len)
            B = np.copy(A)
            P = np.random.permutation(arr_len)
            C = A[P]
            return A, B, C

        hpat_func3 = hpat.jit(locals={'A:return': 'distributed',
                                      'B:return': 'distributed',
                                      'C:return': 'distributed'})(test_rhs)

        for arr_len in [15, 23, 26]:
            A, B, _ = hpat_func3(arr_len)
            np.testing.assert_allclose(A, B)