Beispiel #1
0
    def test_df_cov1(self):
        # test series rolling functions
        # all functions except apply
        df1 = pd.DataFrame({'A': [0, 1, 2, np.nan, 4], 'B': np.ones(5)})
        df2 = pd.DataFrame({'A': [0, 1, 2, -2, 4], 'C': np.ones(5)})
        wins = (3, )
        if LONG_TEST:
            wins = (2, 3, 5)
        centers = (False, True)

        def test_impl(df, df2, w, c):
            return df.rolling(w, center=c).cov(df2)

        hpat_func = sdc.jit(test_impl)
        for args in itertools.product([df1, df2], [df1, df2], wins, centers):
            pd.testing.assert_frame_equal(hpat_func(*args), test_impl(*args))
            pd.testing.assert_frame_equal(hpat_func(*args), test_impl(*args))

        def test_impl2(df, df2, w, c):
            return df.rolling(w, center=c).corr(df2)

        hpat_func = sdc.jit(test_impl2)
        for args in itertools.product([df1, df2], [df1, df2], wins, centers):
            pd.testing.assert_frame_equal(hpat_func(*args), test_impl2(*args))
            pd.testing.assert_frame_equal(hpat_func(*args), test_impl2(*args))
Beispiel #2
0
    def test_rolling1(self):
        # size 3 without unroll
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)})
            Ac = df.A.rolling(3).sum()
            return Ac.sum()

        hpat_func = sdc.jit(test_impl)
        n = 121
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)

        # size 7 with unroll

        def test_impl_2(n):
            df = pd.DataFrame({
                'A': np.arange(n) + 1.0,
                'B': np.random.ranf(n)
            })
            Ac = df.A.rolling(7).sum()
            return Ac.sum()

        hpat_func = sdc.jit(test_impl)
        n = 121
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
Beispiel #3
0
    def test_series_fixed1(self):
        # test series rolling functions
        # all functions except apply
        S1 = pd.Series([0, 1, 2, np.nan, 4])
        S2 = pd.Series([0, 1, 2, -2, 4])
        wins = (3, )
        if LONG_TEST:
            wins = (2, 3, 5)
        centers = (False, True)
        for func_name in test_funcs:
            func_text = "def test_impl(S, w, c):\n  return S.rolling(w, center=c).{}()\n".format(
                func_name)
            loc_vars = {}
            exec(func_text, {}, loc_vars)
            test_impl = loc_vars['test_impl']
            hpat_func = sdc.jit(test_impl)
            for args in itertools.product(wins, centers):
                pd.testing.assert_series_equal(hpat_func(S1, *args),
                                               test_impl(S1, *args))
                pd.testing.assert_series_equal(hpat_func(S2, *args),
                                               test_impl(S2, *args))
        # test apply

        def apply_test_impl(S, w, c):
            return S.rolling(w, center=c).apply(lambda a: a.sum())

        hpat_func = sdc.jit(apply_test_impl)
        for args in itertools.product(wins, centers):
            pd.testing.assert_series_equal(hpat_func(S1, *args),
                                           apply_test_impl(S1, *args))
            pd.testing.assert_series_equal(hpat_func(S2, *args),
                                           apply_test_impl(S2, *args))
Beispiel #4
0
    def test_series_cov1(self):
        # test series rolling functions
        # all functions except apply
        S1 = pd.Series([0, 1, 2, np.nan, 4])
        S2 = pd.Series([0, 1, 2, -2, 4])
        wins = (3, )
        if LONG_TEST:
            wins = (2, 3, 5)
        centers = (False, True)

        def test_impl(S, S2, w, c):
            return S.rolling(w, center=c).cov(S2)

        hpat_func = sdc.jit(test_impl)
        for args in itertools.product([S1, S2], [S1, S2], wins, centers):
            pd.testing.assert_series_equal(hpat_func(*args), test_impl(*args))
            pd.testing.assert_series_equal(hpat_func(*args), test_impl(*args))

        def test_impl2(S, S2, w, c):
            return S.rolling(w, center=c).corr(S2)

        hpat_func = sdc.jit(test_impl2)
        for args in itertools.product([S1, S2], [S1, S2], wins, centers):
            pd.testing.assert_series_equal(hpat_func(*args), test_impl2(*args))
            pd.testing.assert_series_equal(hpat_func(*args), test_impl2(*args))
Beispiel #5
0
        def test_logistic_regression(self):
            '''
            Testing logistic regression including
               * result and model boxing/unboxing
               * optional and required arguments passing
            '''
            def train_impl(n, d):
                X = np.ones((n, d), dtype=np.double) + .5
                Y = np.ones((n, 1), dtype=np.double)
                algo = d4p.logistic_regression_training(2,
                                                        penaltyL1=0.1,
                                                        penaltyL2=0.1,
                                                        interceptFlag=True)
                return algo.compute(X, Y)

            def prdct_impl(n, d, model):
                w = np.ones((n, d), dtype=np.double) - 22.5
                algo = d4p.logistic_regression_prediction(
                    2,
                    resultsToCompute=
                    "computeClassesLabels|computeClassesProbabilities|computeClassesLogProbabilities"
                )
                return algo.compute(w, model)

            train_hpat = sdc.jit(train_impl)
            prdct_hpat = sdc.jit(prdct_impl)
            n = 11
            d = 4
            pred_impl = prdct_impl(n, d, train_impl(n, d).model).prediction
            pred_hpat = prdct_hpat(n, d, train_hpat(n, d).model).prediction

            np.testing.assert_allclose(pred_impl, pred_hpat)
Beispiel #6
0
    def test_nunique_str(self):
        def test_impl(n):
            df = pd.DataFrame({'A': ['aa', 'bb', 'aa', 'cc', 'cc']})
            return df.A.nunique()

        hpat_func = sdc.jit(test_impl)
        n = 1001
        np.testing.assert_almost_equal(hpat_func(n), test_impl(n))
        # test compile again for overload related issues
        hpat_func = sdc.jit(test_impl)
        np.testing.assert_almost_equal(hpat_func(n), test_impl(n))
Beispiel #7
0
    def test_nunique(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n)})
            df.A[2] = 0
            return df.A.nunique()

        hpat_func = sdc.jit(test_impl)
        n = 1001
        np.testing.assert_almost_equal(hpat_func(n), test_impl(n))
        # test compile again for overload related issues
        hpat_func = sdc.jit(test_impl)
        np.testing.assert_almost_equal(hpat_func(n), test_impl(n))
Beispiel #8
0
    def test_assert(self):
        # make sure assert in an inlined function works

        def g(a):
            assert a == 0

        hpat_g = sdc.jit(g)

        def f():
            hpat_g(0)

        hpat_f = sdc.jit(f)
        hpat_f()
Beispiel #9
0
    def test_nunique_str_parallel(self):
        # TODO: test without file
        def test_impl():
            df = pq.read_table('example.parquet').to_pandas()
            return df.two.nunique()

        hpat_func = sdc.jit(test_impl)
        self.assertEqual(hpat_func(), test_impl())
        self.assertEqual(count_array_REPs(), 0)
        # test compile again for overload related issues
        hpat_func = sdc.jit(test_impl)
        self.assertEqual(hpat_func(), test_impl())
        self.assertEqual(count_array_REPs(), 0)
Beispiel #10
0
    def test_equality(self):
        arg = 'test_str'

        def test_impl(_str):
            return (_str == 'test_str')
        hpat_func = sdc.jit(test_impl)

        self.assertEqual(hpat_func(arg), test_impl(arg))

        def test_impl(_str):
            return (_str != 'test_str')
        hpat_func = sdc.jit(test_impl)

        self.assertEqual(hpat_func(arg), test_impl(arg))
Beispiel #11
0
    def test_dist_return(self):
        def test_impl(N):
            A = np.arange(N)
            return A

        hpat_func = sdc.jit(locals={'A:return': 'distributed'})(test_impl)
        n = 128
        dist_sum = sdc.jit(
            lambda a: sdc.distributed_api.dist_reduce(
                a, np.int32(sdc.distributed_api.Reduce_Type.Sum.value)))
        dist_sum(1)  # run to compile
        np.testing.assert_allclose(
            dist_sum(hpat_func(n).sum()), test_impl(n).sum())
        self.assertEqual(count_array_OneDs(), 1)
        self.assertEqual(count_parfor_OneDs(), 1)
Beispiel #12
0
    def test_box_dist_return(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n), 'B': np.arange(n)})
            return df

        hpat_func = sdc.jit(distributed={'df'})(test_impl)
        n = 11
        hres, res = hpat_func(n), test_impl(n)
        self.assertEqual(count_array_OneDs(), 3)
        self.assertEqual(count_parfor_OneDs(), 2)
        dist_sum = sdc.jit(lambda a: sdc.distributed_api.dist_reduce(
            a, np.int32(sdc.distributed_api.Reduce_Type.Sum.value)))
        dist_sum(1)  # run to compile
        np.testing.assert_allclose(dist_sum(hres.A.sum()), res.A.sum())
        np.testing.assert_allclose(dist_sum(hres.B.sum()), res.B.sum())
Beispiel #13
0
    def test_df_drop1(self):
        def test_impl(df):
            return df.drop(columns=['A'])

        df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0], 'B': [4, 5, 6, 7]})
        hpat_func = sdc.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
Beispiel #14
0
    def test_np_io1(self):
        def test_impl():
            A = np.fromfile("np_file1.dat", np.float64)
            return A

        hpat_func = sdc.jit(test_impl)
        np.testing.assert_almost_equal(hpat_func(), test_impl())
Beispiel #15
0
    def test_agg_multikey_parallel(self):
        def test_impl(in_A, in_B, in_C):
            df = pd.DataFrame({'A': in_A, 'B': in_B, 'C': in_C})
            A = df.groupby(['A', 'C'])['B'].sum()
            return A.sum()

        hpat_func = sdc.jit(
            locals={
                'in_A:input': 'distributed',
                'in_B:input': 'distributed',
                'in_C:input': 'distributed'
            })(test_impl)
        df = pd.DataFrame({
            'A': [2, 1, 1, 1, 2, 2, 1],
            'B': [-8, 2, 3, 1, 5, 6, 7],
            'C': [3, 5, 6, 5, 4, 4, 3]
        })
        start, end = get_start_end(len(df))
        h_A = df.A.values[start:end]
        h_B = df.B.values[start:end]
        h_C = df.C.values[start:end]
        p_A = df.A.values
        p_B = df.B.values
        p_C = df.C.values
        h_res = hpat_func(h_A, h_B, h_C)
        p_res = test_impl(p_A, p_B, p_C)
        self.assertEqual(h_res, p_res)
Beispiel #16
0
    def test_df_reset_index1(self):
        def test_impl(df):
            return df.reset_index(drop=True)

        df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]})
        hpat_func = sdc.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
Beispiel #17
0
    def test_df_fillna_str1(self):
        def test_impl(df):
            return df.fillna("dd")

        df = pd.DataFrame({'A': ['aa', 'b', None, 'ccc']})
        hpat_func = sdc.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
Beispiel #18
0
    def test_df_fillna1(self):
        def test_impl(df):
            return df.fillna(5.0)

        df = pd.DataFrame({'A': [1.0, 2.0, np.nan, 1.0]})
        hpat_func = sdc.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
Beispiel #19
0
    def test_create_without_column_names(self):
        def test_impl():
            df = pd.DataFrame([100, 200, 300, 400, 200, 100])
            return df

        hpat_func = sdc.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(), test_impl())
Beispiel #20
0
    def test_box2(self):
        def test_impl():
            df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'bb', 'ccc']})
            return df

        hpat_func = sdc.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(), test_impl())
Beispiel #21
0
    def test_unbox_without_column_names(self):
        def test_impl(df):
            return df

        df = pd.DataFrame([100, 200, 300, 400, 200, 100])
        hpat_func = sdc.jit(test_impl)
        pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
Beispiel #22
0
 def test_variable_apply1(self):
     # test sequentially with manually created dfs
     df1 = pd.DataFrame({
         'B': [0, 1, 2, np.nan, 4],
         'time': [
             pd.Timestamp('20130101 09:00:00'),
             pd.Timestamp('20130101 09:00:02'),
             pd.Timestamp('20130101 09:00:03'),
             pd.Timestamp('20130101 09:00:05'),
             pd.Timestamp('20130101 09:00:06')
         ]
     })
     df2 = pd.DataFrame({
         'B': [0, 1, 2, -2, 4],
         'time': [
             pd.Timestamp('20130101 09:00:01'),
             pd.Timestamp('20130101 09:00:02'),
             pd.Timestamp('20130101 09:00:03'),
             pd.Timestamp('20130101 09:00:04'),
             pd.Timestamp('20130101 09:00:09')
         ]
     })
     wins = ('2s', )
     if LONG_TEST:
         wins = ('1s', '2s', '3s', '4s')
     # all functions except apply
     for w in wins:
         func_text = "def test_impl(df):\n  return df.rolling('{}', on='time').apply(lambda a: a.sum())\n".format(
             w)
         loc_vars = {}
         exec(func_text, {}, loc_vars)
         test_impl = loc_vars['test_impl']
         hpat_func = sdc.jit(test_impl)
         pd.testing.assert_frame_equal(hpat_func(df1), test_impl(df1))
         pd.testing.assert_frame_equal(hpat_func(df2), test_impl(df2))
Beispiel #23
0
    def test_getitem_bool_series(self):
        def test_impl(df):
            return df['A'][df['B']].values

        hpat_func = sdc.jit(test_impl)
        df = pd.DataFrame({'A': [1, 2, 3], 'B': [True, False, True]})
        np.testing.assert_array_equal(test_impl(df), hpat_func(df))
Beispiel #24
0
    def test_join1_seq_key_change1(self):
        # make sure const list typing doesn't replace const key values
        def test_impl(df1, df2, df3, df4):
            o1 = df1.merge(df2, on=['A'])
            o2 = df3.merge(df4, on=['B'])
            return o1, o2

        hpat_func = sdc.jit(test_impl)
        n = 11
        df1 = pd.DataFrame({'A': np.arange(n) + 3, 'AA': np.arange(n) + 1.0})
        df2 = pd.DataFrame({
            'A': 2 * np.arange(n) + 1,
            'AAA': n + np.arange(n) + 1.0
        })
        df3 = pd.DataFrame({
            'B': 2 * np.arange(n) + 1,
            'BB': n + np.arange(n) + 1.0
        })
        df4 = pd.DataFrame({
            'B': 2 * np.arange(n) + 1,
            'BBB': n + np.arange(n) + 1.0
        })
        pd.testing.assert_frame_equal(
            hpat_func(df1, df2, df3, df4)[1],
            test_impl(df1, df2, df3, df4)[1])
Beispiel #25
0
def do_jit(f):
    """Context manager to jit function"""
    cfunc = sdc.jit(f)
    try:
        yield cfunc
    finally:
        del cfunc
Beispiel #26
0
    def test_reduce_filter1(self):
        import sys
        dtypes = ['float32', 'float64', 'int32', 'int64']
        funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax']
        for (dtype, func) in itertools.product(dtypes, funcs):
            # loc allreduce doesn't support int64 on windows
            if (sys.platform.startswith('win')
                    and dtype == 'int64'
                    and func in ['argmin', 'argmax']):
                continue
            func_text = """def f(A):
                A = A[A>5]
                return A.{}()
            """.format(func)
            loc_vars = {}
            exec(func_text, {'np': np}, loc_vars)
            test_impl = loc_vars['f']

            hpat_func = sdc.jit(locals={'A:input': 'distributed'})(test_impl)
            n = 21
            start, end = get_start_end(n)
            np.random.seed(0)
            A = np.random.randint(0, 10, n).astype(dtype)
            np.testing.assert_almost_equal(
                hpat_func(A[start:end]), test_impl(A), decimal=3,
                err_msg="{} on {}".format(func, dtype))
            self.assertEqual(count_array_REPs(), 0)
            self.assertEqual(count_parfor_REPs(), 0)
Beispiel #27
0
    def test_intraday(self):
        def test_impl(nsyms):
            max_num_days = 100
            all_res = 0.0
            for i in sdc.prange(nsyms):
                s_open = 20 * np.ones(max_num_days)
                s_low = 28 * np.ones(max_num_days)
                s_close = 19 * np.ones(max_num_days)
                df = pd.DataFrame({
                    'Open': s_open,
                    'Low': s_low,
                    'Close': s_close
                })
                df['Stdev'] = df['Close'].rolling(window=90).std()
                df['Moving Average'] = df['Close'].rolling(window=20).mean()
                df['Criteria1'] = (df['Open'] -
                                   df['Low'].shift(1)) < -df['Stdev']
                df['Criteria2'] = df['Open'] > df['Moving Average']
                df['BUY'] = df['Criteria1'] & df['Criteria2']
                df['Pct Change'] = (df['Close'] - df['Open']) / df['Open']
                df['Rets'] = df['Pct Change'][df['BUY']]
                all_res += df['Rets'].mean()
            return all_res

        hpat_func = sdc.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_OneDs(), 0)
        self.assertEqual(count_parfor_OneDs(), 1)
Beispiel #28
0
    def test_dataframe_columns_attribute(self):
        def test_impl():
            df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]})
            return df.columns

        hpat_func = sdc.jit(test_impl)
        np.testing.assert_array_equal(hpat_func(), test_impl())
Beispiel #29
0
    def test_kmeans(self):
        def test_impl(numCenter, numIter, N, D):
            A = np.ones((N, D))
            centroids = np.zeros((numCenter, D))

            for l in range(numIter):
                dist = np.array([[
                    sqrt(np.sum((A[i, :] - centroids[j, :])**2))
                    for j in range(numCenter)
                ] for i in range(N)])
                labels = np.array([dist[i, :].argmin() for i in range(N)])

                centroids = np.array([[
                    np.sum(A[labels == i, j]) / np.sum(labels == i)
                    for j in range(D)
                ] for i in range(numCenter)])

            return centroids

        hpat_func = sdc.jit(test_impl)
        n = 11
        np.testing.assert_allclose(hpat_func(1, 1, n, 2),
                                   test_impl(1, 1, n, 2))
        self.assertEqual(count_array_OneDs(), 4)
        self.assertEqual(count_array_OneD_Vars(), 1)
        self.assertEqual(count_parfor_OneDs(), 5)
        self.assertEqual(count_parfor_OneD_Vars(), 1)
Beispiel #30
0
    def test_dataframe_columns_iterator(self):
        def test_impl():
            df = pd.DataFrame({'A': [1, 2, 3], 'B': [2, 3, 4]})
            return [column for column in df.columns]

        hpat_func = sdc.jit(test_impl)
        np.testing.assert_array_equal(hpat_func(), test_impl())