Exemple #1
0
    def test_pd_DataFrame_from_series_par(self):
        def test_impl(n):
            S1 = pd.Series(np.ones(n))
            S2 = pd.Series(np.random.ranf(n))
            df = pd.DataFrame({'A': S1, 'B': S2})
            return df.A.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
        self.assertEqual(count_parfor_OneDs(), 1)
Exemple #2
0
    def test_h5_group_keys(self):
        def test_impl():
            f = h5py.File("test_group_read.hdf5", "r")
            g1 = f['G']
            s = 0
            for dname in g1.keys():
                X = g1[dname][:]
                s += X.sum()
            f.close()
            return s

        hpat_func = hpat.jit(test_impl, h5_types={'X': hpat.int64[:]})
        self.assertEqual(hpat_func(), test_impl())
Exemple #3
0
    def test_series_count1(self):
        def test_impl(S):
            return S.count()
        hpat_func = hpat.jit(test_impl)

        S = pd.Series([np.nan, 2., 3.])
        self.assertEqual(hpat_func(S), test_impl(S))

        S = pd.Series([np.nan, np.nan])
        self.assertEqual(hpat_func(S), test_impl(S))

        S = pd.Series(['aa', 'bb', np.nan])
        self.assertEqual(hpat_func(S), test_impl(S))
Exemple #4
0
    def test_column_map_arg(self):
        def test_impl(df):
            df['B'] = df.A.map(lambda a: 2 * a)
            return

        n = 121
        df1 = pd.DataFrame({'A': np.arange(n)})
        df2 = pd.DataFrame({'A': np.arange(n)})
        hpat_func = hpat.jit(test_impl)
        hpat_func(df1)
        self.assertTrue(hasattr(df1, 'B'))
        test_impl(df2)
        np.testing.assert_equal(df1.B.values, df2.B.values)
Exemple #5
0
    def test_decode_unicode2(self):
        # test strings that start with ascii
        def test_impl(S):
            return S[0], S[1], S[2]

        hpat_func = hpat.jit(test_impl)

        S = pd.Series([
            'abc¡Y tú quién te crees?',
            'dd2🐍⚡',
            '22 大处着眼,小处着手。',
        ])
        self.assertEqual(hpat_func(S), test_impl(S))
Exemple #6
0
    def test_series_sort_values_index1(self):
        def test_impl(A, B):
            S = pd.Series(A, B)
            return S.sort_values()
        hpat_func = hpat.jit(test_impl)

        n = 11
        np.random.seed(0)
        # TODO: support passing Series with Index
        # S = pd.Series(np.random.ranf(n), np.random.randint(0, 100, n))
        A = np.random.ranf(n)
        B = np.random.ranf(n)
        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B))
Exemple #7
0
    def test_merge_asof_seq1(self):
        def test_impl(df1, df2):
            return pd.merge_asof(df1, df2, on='time')

        hpat_func = hpat.jit(test_impl)
        df1 = pd.DataFrame(
            {'time': pd.DatetimeIndex(
                ['2017-01-03', '2017-01-06', '2017-02-21']), 'B': [4, 5, 6]})
        df2 = pd.DataFrame(
            {'time': pd.DatetimeIndex(
                ['2017-01-01', '2017-01-02', '2017-01-04', '2017-02-23',
                '2017-02-25']), 'A': [2,3,7,8,9]})
        pd.testing.assert_frame_equal(hpat_func(df1, df2), test_impl(df1, df2))
Exemple #8
0
    def test_pass_return(self):
        def test_impl(_str):
            return _str

        hpat_func = hpat.jit(test_impl)

        # pass single string and return
        arg = 'test_str'
        self.assertEqual(hpat_func(arg), test_impl(arg))

        # pass string list and return
        arg = ['test_str1', 'test_str2']
        self.assertEqual(hpat_func(arg), test_impl(arg))
Exemple #9
0
    def test_set_column1(self):
        # set existing column
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n, np.int64), 'B': np.random.ranf(n)})
            df['A'] = np.arange(n)
            return df.A.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
        self.assertEqual(count_parfor_OneDs(), 1)
Exemple #10
0
    def test_set_column2(self):
        # create new column
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)})
            df['C'] = np.arange(n)
            return df.C.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
        self.assertEqual(count_parfor_OneDs(), 1)
Exemple #11
0
    def test_muti_hiframes_node_filter_agg(self):
        def test_impl(df, cond):
            df2 = df[cond]
            c = df2.groupby('A')['B'].count()
            return df2.C, c

        hpat_func = hpat.jit(test_impl)
        df = pd.DataFrame({'A': [2,1,1,1,2,2,1], 'B': [-8,2,3,1,5,6,7], 'C': [2,3,-1,1,2,3,-1]})
        cond = df.A > 1
        res = test_impl(df, cond)
        h_res = hpat_func(df, cond)
        self.assertEqual(set(res[1]), set(h_res[1]))
        np.testing.assert_array_equal(res[0], h_res[0])
Exemple #12
0
    def test_df_astype_str1(self):
        '''Verifies DataFrame.astype implementation converting various types to string'''
        def test_impl(df):
            return df.astype(str)

        hpat_func = hpat.jit(test_impl)

        # TODO: add column with float values when test_series_astype_float_to_str1 is fixed
        df = pd.DataFrame({
            'A': [-1, 2, 11, 5, 0, -7],
            'B': ['aa', 'bb', 'cc', 'dd', '', 'fff']
        })
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
Exemple #13
0
    def test_shape1(self):
        def test_impl(n):
            df = pd.DataFrame({
                'A': np.ones(n, np.int64),
                'B': np.random.ranf(n)
            })
            return df.shape

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
Exemple #14
0
    def test_replace_noregex_std(self):
        def test_impl(_str, pat, val):
            s = unicode_to_std_str(_str)
            e = unicode_to_std_str(pat)
            val = unicode_to_std_str(val)
            out = hpat.str_ext.str_replace_noregex(s, e, val)
            return std_str_to_unicode(out)

        _str = 'What does the fox say'
        pat = 'does the fox'
        val = 'does the cat'
        hpat_func = hpat.jit(test_impl)
        self.assertEqual(hpat_func(_str, pat, val), _str.replace(pat, val))
Exemple #15
0
    def test_array_reduce(self):
        def test_impl(N):
            A = np.ones(3);
            B = np.ones(3);
            for i in numba.prange(N):
                A += B
            return A

        hpat_func = hpat.jit(test_impl)
        n = 128
        np.testing.assert_allclose(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_OneDs(), 0)
        self.assertEqual(count_parfor_OneDs(), 1)
Exemple #16
0
    def test_df_describe(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(0, n, 1, np.float32),
                               'B': np.arange(n)})
            #df.A[0:1] = np.nan
            return df.describe()

        hpat_func = hpat.jit(test_impl)
        n = 1001
        hpat_func(n)
        # XXX: test actual output
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
Exemple #17
0
    def test_str2str(self):
        str2str_methods = ['capitalize', 'casefold', 'lower', 'lstrip',
                           'rstrip', 'strip', 'swapcase', 'title', 'upper']
        for method in str2str_methods:
            func_text = "def test_impl(_str):\n"
            func_text += "  return _str.{}()\n".format(method)
            loc_vars = {}
            exec(func_text, {}, loc_vars)
            test_impl = loc_vars['test_impl']
            hpat_func = hpat.jit(test_impl)

            arg = ' \tbbCD\t '
            self.assertEqual(hpat_func(arg), test_impl(arg))
Exemple #18
0
    def test_string_NA_box(self):
        # create `example.parquet` file
        ParquetGenerator.gen_pq_test()

        def test_impl():
            df = pq.read_table('example.parquet').to_pandas()
            return df.five

        hpat_func = hpat.jit(test_impl)

        # XXX just checking isna() since Pandas uses None in this case
        # instead of nan for some reason
        np.testing.assert_array_equal(hpat_func().isna(), test_impl().isna())
Exemple #19
0
    def test_set_column1_issue(self):
        # set existing column
        def test_impl(n):
            df = pd.DataFrame({
                'A': np.ones(n, np.int64),
                'B': np.arange(n) + 3.0
            })
            df['A'] = np.arange(n)
            return df

        hpat_func = hpat.jit(test_impl)
        n = 11
        pd.testing.assert_frame_equal(hpat_func(n), test_impl(n))
Exemple #20
0
    def test_join1_seq_str_na(self):
        # test setting NA in string data column
        def test_impl():
            df1 = pd.DataFrame({'key1': ['foo', 'bar', 'baz']})
            df2 = pd.DataFrame({
                'key2': ['baz', 'bar', 'baz'],
                'B': ['b', 'zzz', 'ss']
            })
            df3 = df1.merge(df2, left_on='key1', right_on='key2', how='left')
            return df3.B

        hpat_func = hpat.jit(test_impl)
        self.assertEqual(set(hpat_func()), set(test_impl()))
Exemple #21
0
    def test_pivot(self):
        def test_impl(df):
            pt = df.pivot_table(index='A',
                                columns='C',
                                values='D',
                                aggfunc='sum')
            return (pt.small.values, pt.large.values)

        hpat_func = hpat.jit(pivots={'pt': ['small', 'large']})(test_impl)
        self.assertEqual(set(hpat_func(_pivot_df1)[0]),
                         set(test_impl(_pivot_df1)[0]))
        self.assertEqual(set(hpat_func(_pivot_df1)[1]),
                         set(test_impl(_pivot_df1)[1]))
Exemple #22
0
    def test_setitem_series3(self):
        def test_impl(A, i):
            S = pd.Series(A)
            S[i] = 100
        hpat_func = hpat.jit(test_impl)

        n = 11
        A = np.arange(n)
        A1 = A.copy()
        A2 = A
        hpat_func(A1, 0)
        test_impl(A2, 0)
        np.testing.assert_array_equal(A1, A2)
Exemple #23
0
    def test_csv_str1(self):
        def test_impl():
            return pd.read_csv("csv_data_date1.csv",
                               names=['A', 'B', 'C', 'D'],
                               dtype={
                                   'A': np.int,
                                   'B': np.float,
                                   'C': str,
                                   'D': np.int
                               })

        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(), test_impl())
Exemple #24
0
    def test_fixed_apply2(self):
        # test sequentially with generated dfs
        def test_impl(df, w, c):
            return df.rolling(w, center=c).apply(lambda a: a.sum())

        hpat_func = hpat.jit(test_impl)
        sizes = (1, 2, 10, 11, 121, 1000)
        wins = (2, 3, 5)
        centers = (False, True)
        for n, w, c in itertools.product(sizes, wins, centers):
            df = pd.DataFrame({'B': np.arange(n)})
            pd.testing.assert_frame_equal(hpat_func(df, w, c),
                                          test_impl(df, w, c))
Exemple #25
0
    def test_df_astype_int1(self):
        '''Verifies DataFrame.astype implementation converting various types to int'''
        def test_impl(df):
            return df.astype(np.int32)
        hpat_func = hpat.jit(test_impl)

        n = 6
        # TODO: uncomment column with string values when test_series_astype_str_to_int32 is fixed
        df = pd.DataFrame({'A': np.ones(n, dtype=np.int64),
                           'B': np.arange(n, dtype=np.int32),
        #                   'C': ['-1', '2', '3', '0', '-7', '99'],
                           'D': np.arange(float(n), dtype=np.float32)
        })
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
Exemple #26
0
    def test_unbox2(self):
        def test_impl(df, cond):
            n = len(df)
            if cond:
                df['A'] = np.arange(n) + 2.0
            return df.A

        hpat_func = hpat.jit(test_impl)
        n = 11
        df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)})
        pd.testing.assert_series_equal(hpat_func(df.copy(), True),
                                       test_impl(df.copy(), True))
        pd.testing.assert_series_equal(hpat_func(df.copy(), False),
                                       test_impl(df.copy(), False))
Exemple #27
0
    def test_sort_values_copy(self):
        def test_impl(df):
            df2 = df.sort_values('A')
            return df2.B.values

        n = 1211
        np.random.seed(2)
        df = pd.DataFrame({
            'A': np.random.ranf(n),
            'B': np.arange(n),
            'C': np.random.ranf(n)
        })
        hpat_func = hpat.jit(test_impl)
        np.testing.assert_almost_equal(hpat_func(df.copy()), test_impl(df))
Exemple #28
0
    def test_box_categorical(self):
        def test_impl(df):
            df['A'] = df['A'] + 1
            return df

        hpat_func = hpat.jit(test_impl)
        df = pd.DataFrame({
            'A': [1, 2, 3],
            'B':
            pd.Series(['N', 'Y', 'Y'],
                      dtype=pd.api.types.CategoricalDtype(['N', 'Y']))
        })
        pd.testing.assert_frame_equal(hpat_func(df.copy(deep=True)),
                                      test_impl(df))
Exemple #29
0
    def test_var_rename(self):
        # tests df variable replacement in hiframes_untyped where inlining
        # can cause extra assignments and definition handling errors
        # TODO: inline freevar
        def test_impl():
            df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]})
            # TODO: df['C'] = [5,6,7]
            df['C'] = np.ones(3)
            return inner_get_column(df)

        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_series_equal(hpat_func(),
                                       test_impl(),
                                       check_names=False)
Exemple #30
0
    def test_concat_columns1(self):
        def test_impl(S1, S2):
            return pd.concat([S1, S2], axis=1)

        hpat_func = hpat.jit(test_impl)
        S1 = pd.Series([4, 5])
        S2 = pd.Series([6., 7.])
        # TODO: support int as column name
        pd.testing.assert_frame_equal(
            hpat_func(S1, S2),
            test_impl(S1, S2).rename(columns={
                0: '0',
                1: '1'
            }))