Exemple #1
0
    def test_rolling1(self):
        # size 3 without unroll
        def test_impl(n):
            df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)})
            Ac = df.A.rolling(3).sum()
            return Ac.sum()

        hpat_func = hpat.jit(test_impl)
        n = 121
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
        # size 7 with unroll
        def test_impl_2(n):
            df = pd.DataFrame({'A': np.arange(n)+1.0, 'B': np.random.ranf(n)})
            Ac = df.A.rolling(7).sum()
            return Ac.sum()

        hpat_func = hpat.jit(test_impl)
        n = 121
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
Exemple #2
0
    def test_series_fusion2(self):
        # make sure getting data var avoids incorrect single def assumption
        def test_impl(A, B):
            S = B + 2
            if A[0] == 0:
                S = A + 1
            return S + B

        n = 11
        A = pd.Series(np.arange(n))
        B = pd.Series(np.arange(n)**2)
        hpat_func = hpat.jit(test_impl)
        pd.testing.assert_series_equal(hpat_func(A, B), test_impl(A, B))
        self.assertEqual(count_parfor_REPs(), 3)
Exemple #3
0
    def test_concat_series(self):
        def test_impl(n):
            df1 = pd.DataFrame({'key1': np.arange(n), 'A': np.arange(n)+1.0})
            df2 = pd.DataFrame({'key2': n-np.arange(n), 'A': n+np.arange(n)+1.0})
            A3 = pd.concat([df1.A, df2.A])
            return A3.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
        n = 11111
        self.assertEqual(hpat_func(n), test_impl(n))
Exemple #4
0
    def test_cumsum(self):
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)})
            Ac = df.A.cumsum()
            return Ac.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_array_OneDs(), 2)
        self.assertEqual(count_parfor_REPs(), 0)
        self.assertEqual(count_parfor_OneDs(), 2)
        self.assertTrue(dist_IR_contains('dist_cumsum'))
Exemple #5
0
    def test_join1(self):
        def test_impl(n):
            df1 = pd.DataFrame({'key1': np.arange(n)+3, 'A': np.arange(n)+1.0})
            df2 = pd.DataFrame({'key2': 2*np.arange(n)+1, 'B': n+np.arange(n)+1.0})
            df3 = pd.merge(df1, df2, left_on='key1', right_on='key2')
            return df3.B.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
        n = 11111
        self.assertEqual(hpat_func(n), test_impl(n))
Exemple #6
0
    def test_shift1(self):
        def test_impl(n):
            df = pd.DataFrame({
                'A': np.arange(n) + 1.0,
                'B': np.random.ranf(n)
            })
            Ac = df.A.shift(1)
            return Ac.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
Exemple #7
0
    def test_df_describe(self):
        def test_impl(n):
            df = pd.DataFrame({
                'A': np.arange(0, n, 1, np.float32),
                'B': np.arange(n)
            })
            #df.A[0:1] = np.nan
            return df.describe()

        hpat_func = hpat.jit(test_impl)
        n = 1001
        hpat_func(n)
        # XXX: test actual output
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
    def test_str_split_parallel(self):
        def test_impl(df):
            B = df.A.str.split(',')
            return B

        n = 5
        start, end = get_start_end(n)
        A = ['AB,CC', 'C,ABB,D', 'CAD', 'CA,D', 'AA,,D']
        df = pd.DataFrame({'A': A[start:end]})
        hpat_func = hpat.jit(distributed={'df', 'B'})(test_impl)
        pd.testing.assert_series_equal(hpat_func(df),
                                       test_impl(df),
                                       check_names=False)
        self.assertEqual(count_array_REPs(), 3)
        self.assertEqual(count_parfor_REPs(), 0)
    def test_df_input_dist1(self):
        def test_impl(df):
            return df.B.sum()

        n = 121
        A = [3, 4, 5, 6, 1]
        B = [5, 6, 2, 1, 3]
        n = 5
        start, end = get_start_end(n)
        df = pd.DataFrame({'A': A, 'B': B})
        df_h = pd.DataFrame({'A': A[start:end], 'B': B[start:end]})
        hpat_func = hpat.jit(distributed={'df'})(test_impl)
        np.testing.assert_almost_equal(hpat_func(df_h), test_impl(df))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
Exemple #10
0
    def test_str_replace_regex_parallel(self):
        def test_impl(df):
            B = df.A.str.replace('AB*', 'EE', regex=True)
            return B

        n = 5
        A = ['ABCC', 'CABBD', 'CCD', 'CCDAABB', 'ED']
        start, end = get_start_end(n)
        df = pd.DataFrame({'A': A[start:end]})
        hpat_func = hpat.jit(distributed={'df', 'B'})(test_impl)
        pd.testing.assert_series_equal(hpat_func(df),
                                       test_impl(df),
                                       check_names=False)
        self.assertEqual(count_array_REPs(), 3)
        self.assertEqual(count_parfor_REPs(), 0)
Exemple #11
0
    def test_fixed_parallel_apply1(self):
        def test_impl(n, w, center):
            df = pd.DataFrame({'B': np.arange(n)})
            R = df.rolling(w, center=center).apply(lambda a: a.sum())
            return R.B.sum()

        hpat_func = hpat.jit(test_impl)
        sizes = (1, 2, 10, 11, 121, 1000)
        wins = (2, 4, 5, 10, 11)
        centers = (False, True)
        for args in itertools.product(sizes, wins, centers):
            self.assertEqual(hpat_func(*args), test_impl(*args),
                             "rolling fixed window with {}".format(args))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
Exemple #12
0
    def test_set_column1(self):
        # set existing column
        def test_impl(n):
            df = pd.DataFrame({
                'A': np.ones(n, np.int64),
                'B': np.random.ranf(n)
            })
            df['A'] = np.arange(n)
            return df.A.sum()

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
        self.assertEqual(count_parfor_OneDs(), 1)
Exemple #13
0
    def test_write_csv_parallel1(self):
        def test_impl(n, fname):
            df = pd.DataFrame({'A': np.arange(n)})
            df.to_csv(fname)

        hpat_func = hpat.jit(test_impl)
        n = 111
        hp_fname = 'test_write_csv1_hpat_par.csv'
        pd_fname = 'test_write_csv1_pd_par.csv'
        hpat_func(n, hp_fname)
        test_impl(n, pd_fname)
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
        # TODO: delete files
        if get_rank() == 0:
            pd.testing.assert_frame_equal(pd.read_csv(hp_fname),
                                          pd.read_csv(pd_fname))
Exemple #14
0
    def test_column_distribution(self):
        # make sure all column calls are distributed
        def test_impl(n):
            df = pd.DataFrame({'A': np.ones(n), 'B': np.random.ranf(n)})
            df.A.fillna(5.0, inplace=True)
            DF = df.A.fillna(5.0)
            s = DF.sum()
            m = df.A.mean()
            v = df.A.var()
            t = df.A.std()
            Ac = df.A.cumsum()
            return Ac.sum() + s + m + v + t

        hpat_func = hpat.jit(test_impl)
        n = 11
        self.assertEqual(hpat_func(n), test_impl(n))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)
        self.assertTrue(dist_IR_contains('dist_cumsum'))
Exemple #15
0
 def test_variable_apply_parallel1(self):
     wins = ('1s', '2s', '3s', '4s')
     # XXX: Pandas returns time = [np.nan] for size==1 for some reason
     sizes = (2, 10, 11, 121, 1000)
     # all functions except apply
     for w in wins:
         func_text = "def test_impl(n):\n"
         func_text += "  df = pd.DataFrame({'B': np.arange(n), 'time': "
         func_text += "    pd.DatetimeIndex(np.arange(n) * 1000000000)})\n"
         func_text += "  res = df.rolling('{}', on='time').apply(lambda a: a.sum())\n".format(
             w)
         func_text += "  return res.B.sum()\n"
         loc_vars = {}
         exec(func_text, {'pd': pd, 'np': np}, loc_vars)
         test_impl = loc_vars['test_impl']
         hpat_func = hpat.jit(test_impl)
         for n in sizes:
             np.testing.assert_almost_equal(hpat_func(n), test_impl(n))
     self.assertEqual(count_array_REPs(), 0)
     self.assertEqual(count_parfor_REPs(), 0)
Exemple #16
0
    def test_reduce(self):
        dtypes = ['float32', 'float64', 'int32', 'int64']
        funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax']
        for (dtype, func) in itertools.product(dtypes, funcs):
            # loc allreduce doesn't support int64
            if dtype=='int64' and func in ['argmin', 'argmax']:
                continue
            func_text = """def f(n):
                A = np.ones(n, dtype=np.{})
                return A.{}()
            """.format(dtype, func)
            loc_vars = {}
            exec(func_text, {'np': np}, loc_vars)
            test_impl = loc_vars['f']

            hpat_func = hpat.jit(test_impl)
            n = 128
            np.testing.assert_almost_equal(hpat_func(n), test_impl(n))
            self.assertEqual(count_array_REPs(), 0)
            self.assertEqual(count_parfor_REPs(), 0)
Exemple #17
0
    def test_reduce(self):
        import sys
        dtypes = ['float32', 'float64', 'int32', 'int64']
        funcs = ['sum', 'prod', 'min', 'max', 'argmin', 'argmax']
        for (dtype, func) in itertools.product(dtypes, funcs):
            # loc allreduce doesn't support int64 on windows
            if (sys.platform.startswith('win') and dtype == 'int64'
                    and func in ['argmin', 'argmax']):
                continue
            func_text = """def f(n):
                A = np.arange(0, n, 1, np.{})
                return A.{}()
            """.format(dtype, func)
            loc_vars = {}
            exec(func_text, {'np': np}, loc_vars)
            test_impl = loc_vars['f']

            hpat_func = hpat.jit(test_impl)
            n = 21  # XXX arange() on float32 has overflow issues on large n
            np.testing.assert_almost_equal(hpat_func(n), test_impl(n))
            self.assertEqual(count_array_REPs(), 0)
            self.assertEqual(count_parfor_REPs(), 0)
Exemple #18
0
    def test_join_datetime_parallel1(self):
        def test_impl(df1, df2):
            df3 = pd.merge(df1, df2, on='time')
            return (df3.A.sum(), df3.time.max(), df3.B.sum())

        hpat_func = hpat.jit(distributed=['df1', 'df2'])(test_impl)
        df1 = pd.DataFrame({
            'time':
            pd.DatetimeIndex(['2017-01-03', '2017-01-06', '2017-02-21']),
            'B': [4, 5, 6]
        })
        df2 = pd.DataFrame({
            'time':
            pd.DatetimeIndex(['2017-01-01', '2017-01-06', '2017-01-03']),
            'A': [7, 8, 9]
        })
        start1, end1 = get_start_end(len(df1))
        start2, end2 = get_start_end(len(df2))
        self.assertEqual(
            hpat_func(df1.iloc[start1:end1], df2.iloc[start2:end2]),
            test_impl(df1, df2))
        self.assertEqual(count_array_REPs(), 0)
        self.assertEqual(count_parfor_REPs(), 0)