Exemple #1
0
    def test_dataframe_binary_operator_function_div_multiIndex_param_level_param_fill_value(
            self):
        pdf = pd.DataFrame({
            'angles': [0, 3, 4],
            'degrees': [360, 180, 360]
        },
                           index=['circle', 'triangle', 'rectangle'])
        odf = orca.DataFrame({
            'angles': [0, 3, 4],
            'degrees': [360, 180, 360]
        },
                             index=['circle', 'triangle', 'rectangle'])

        pdf_multi = pd.DataFrame(
            {
                'angles': [0, 3, 4, 4, 5, 6],
                'degrees': [360, 180, 360, 360, 540, 720]
            },
            index=[['A', 'A', 'A', 'B', 'B', 'B'],
                   [
                       'circle', 'triangle', 'rectangle', 'square', 'pentagon',
                       'hexagon'
                   ]])
        odf_multi = orca.DataFrame(
            {
                'angles': [0, 3, 4, 4, 5, 6],
                'degrees': [360, 180, 360, 360, 540, 720]
            },
            index=[['A', 'A', 'A', 'B', 'B', 'B'],
                   [
                       'circle', 'triangle', 'rectangle', 'square', 'pentagon',
                       'hexagon'
                   ]])
        assert_frame_equal(pdf_multi, odf_multi.to_pandas())
Exemple #2
0
    def test_dataframe(self):
        odf = self.odf
        pdf = self.pdf

        self.assertEqual(repr((odf['a'] + 1).to_pandas()), repr(pdf['a'] + 1))

        self.assertEqual(repr(odf.columns), repr(pd.Index(['a', 'b'])))

        self.assertEqual(repr((odf[odf['b'] > 2]).to_pandas()),
                         repr(pdf[pdf['b'] > 2]))
        self.assertEqual(repr(odf[['a', 'b']]), repr(pdf[['a', 'b']]))
        self.assertEqual(repr(odf.a), repr(pdf.a))

        assert repr(odf)

        df = pd.DataFrame({
            'a': [1, 2, 3, 4, 5, 6, 7, 8, 9],
            'b': [4, 5, 6, 3, 2, 1, 0, 0, 0],
        })
        ddf = orca.DataFrame(df)
        self.assertEqual(repr(df[['a', 'b']]), repr(ddf[['a', 'b']]))

        # TODO:NOT IMPLEMENTED
        # self.assertEqual(repr(ddf.a.notnull().alias("x").name), repr("x"))

        # check orca.DataFrame(os.Series)
        pser = pd.Series([1, 2, 3], name='x')
        kser = orca.Series([1, 2, 3], name='x')
        self.assertEqual(repr(pd.DataFrame(pser)), repr(orca.DataFrame(kser)))
Exemple #3
0
    def test_join_from_dataframe_param_on(self):
        odf = orca.DataFrame({
            'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],
            'A': [1, 2, 3, 4, 5, 6]
        })
        odf_other = orca.DataFrame({
            'key': ['K0', 'K1', 'K2'],
            'B': [11, 22, 33]
        })
        odf_join = odf.join(odf_other,
                            on='A',
                            lsuffix='_caller',
                            rsuffix='_other')

        pdf = pd.DataFrame({
            'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],
            'A': [1, 2, 3, 4, 5, 6]
        })
        pdf_other = pd.DataFrame({
            'key': ['K0', 'K1', 'K2'],
            'B': [11, 22, 33]
        })
        pdf_join = pdf.join(pdf_other,
                            on='A',
                            lsuffix='_caller',
                            rsuffix='_other')

        pdf_join.loc[:, 'key_other'].fillna("", inplace=True)
        assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False)
Exemple #4
0
    def test_join_from_dataframe_index(self):
        orca_left = orca.DataFrame({
            'A': [1, 2, 3],
            'B': [11, 22, 33]
        },
                                   index=['K0', 'K1', 'K2'])
        orca_right = orca.DataFrame(
            {
                'C': [111, 222, 333],
                'D': [1111, 2222, 3333]
            },
            index=['K0', 'K2', 'K3'])
        odf_join = orca_left.join(orca_right)

        pd_left = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [11, 22, 33]
        },
                               index=['K0', 'K1', 'K2'])
        pd_right = pd.DataFrame({
            'C': [111, 222, 333],
            'D': [1111, 2222, 3333]
        },
                                index=['K0', 'K2', 'K3'])
        pdf_join = pd_left.join(pd_right)

        assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False)
Exemple #5
0
    def test_dataframe_attributes_axes(self):
        pdf = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
        odf = orca.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
        assert_index_equal(pdf.axes[0], odf.axes[0].to_pandas())
        assert_index_equal(pdf.axes[1], odf.axes[1])

        pdf = pd.DataFrame({
            'col1': [1, 2],
            'col2': [3, 4]
        }, pd.date_range("20190101", periods=2, freq="d"))
        odf = orca.DataFrame({
            'col1': [1, 2],
            'col2': [3, 4]
        }, orca.date_range("20190101", periods=2, freq="d"))
        assert_index_equal(pdf.axes[0], odf.axes[0].to_pandas())
        assert_index_equal(pdf.axes[1], odf.axes[1])

        pdf = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}, index=['a', 'b'])
        odf = orca.DataFrame({
            'col1': [1, 2],
            'col2': [3, 4]
        },
                             index=['a', 'b'])
        assert_index_equal(pdf.axes[0], odf.axes[0].to_pandas())
        assert_index_equal(pdf.axes[1], odf.axes[1])
Exemple #6
0
    def test_join_from_dataframe_index_param_sort(self):
        orca_left = orca.DataFrame({
            'A': [1, 2, 3],
            'B': [11, 22, 33]
        },
                                   index=['K0', 'K1', 'K2'])
        orca_right = orca.DataFrame(
            {
                'C': [111, 222, 333],
                'D': [1111, 2222, 3333]
            },
            index=['K0', 'K2', 'K3'])
        # TODO:NOT IMPLEMENTED
        # odf_join = orca_left.join(orca_right, sort=True)

        pd_left = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [11, 22, 33]
        },
                               index=['K0', 'K1', 'K2'])
        pd_right = pd.DataFrame({
            'C': [111, 222, 333],
            'D': [1111, 2222, 3333]
        },
                                index=['K0', 'K2', 'K3'])
        pdf_join = pd_left.join(pd_right, sort=True)
Exemple #7
0
    def test_dataframe_append(self):
        n = 10  # note that n should be a multiple of 10
        re = n / 10
        pdf1 = pd.DataFrame({
            'id':
            np.arange(1, n + 1, 1, dtype='int32'),
            'date':
            np.repeat(pd.date_range('2019.08.01', periods=10, freq='D'), re),
            'tsymbol':
            np.repeat(
                ['a', 'b', 'c', 'd', 'e', 'QWW', 'FEA', 'FFW', 'DER', 'POD'],
                re),
            'tbool':
            np.repeat(np.repeat(np.arange(2, dtype='bool'), 5), re),
            'tchar':
            np.repeat(np.arange(1, 11, 1, dtype='int8'), re),
            'tshort':
            np.repeat(np.arange(1, 11, 1, dtype='int16'), re),
            'tint':
            np.repeat(np.arange(1, 11, 1, dtype='int32'), re),
            'tlong':
            np.repeat(np.arange(1, 11, 1, dtype='int64'), re),
            'tfloat':
            np.repeat(np.arange(1, 11, 1, dtype='float32'), re),
            'tdouble':
            np.repeat(np.arange(1, 11, 1, dtype='float64'), re)
        })
        n = 20  # note that n should be a multiple of 10
        re = n / 10
        pdf2 = pd.DataFrame({
            'id':
            np.arange(1, n + 1, 1, dtype='int32'),
            'date':
            np.repeat(pd.date_range('2019.08.01', periods=10, freq='D'), re),
            'tsymbol':
            np.repeat(
                ['a', 'b', 'c', 'd', 'e', 'QWW', 'FEA', 'FFW', 'DER', 'POD'],
                re),
            'tbool':
            np.repeat(np.repeat(np.arange(2, dtype='bool'), 5), re),
            'tchar':
            np.repeat(np.arange(1, 11, 1, dtype='int8'), re),
            'tshort':
            np.repeat(np.arange(1, 11, 1, dtype='int16'), re),
            'tint':
            np.repeat(np.arange(1, 11, 1, dtype='int32'), re),
            'tlong':
            np.repeat(np.arange(1, 11, 1, dtype='int64'), re),
            'tfloat':
            np.repeat(np.arange(1, 11, 1, dtype='float32'), re),
            'tdouble':
            np.repeat(np.arange(1, 11, 1, dtype='float64'), re)
        })

        odf1 = orca.DataFrame(pdf1)
        odf2 = orca.DataFrame(pdf2)
        assert_frame_equal(pdf1.append(pdf2), odf1.append(odf2).to_pandas())
Exemple #8
0
    def test_dataframe_attributes_columns(self):
        pdf = pd.DataFrame(
            {
                'float': [1.0, 2.0, 3.5, 6.5],
                'int': [1, 2, 7, 4],
                'datetime': pd.date_range('2019-01-02', periods=4),
                'string': ['foo', 'ss', 'sw', 'qa']
            },
            index=['a', 'b', 'c', 'c'])
        odf = orca.DataFrame(
            {
                'float': [1.0, 2.0, 3.5, 6.5],
                'int': [1, 2, 7, 4],
                'datetime': pd.date_range('2019-01-02', periods=4),
                'string': ['foo', 'ss', 'sw', 'qa']
            },
            index=['a', 'b', 'c', 'c'])
        assert_index_equal(pdf.columns, odf.columns)

        pdf = pd.DataFrame(
            {
                'float': [1.0, 2.0, 3.5, 6.5],
                'int': [1, 2, 7, 4],
                'datetime': pd.date_range('2019-01-02', periods=4),
                'string': ['foo', 'ss', 'sw', 'qa']
            },
            index=pd.date_range("20190101", periods=4, freq="d"))
        # pd.to_datetime(["20190101","20190304"])
        odf = orca.DataFrame(
            {
                'float': [1.0, 2.0, 3.5, 6.5],
                'int': [1, 2, 7, 4],
                'datetime': pd.date_range('2019-01-02', periods=4),
                'string': ['foo', 'ss', 'sw', 'qa']
            },
            index=orca.date_range("20190101", periods=4, freq="d"))
        assert_index_equal(pdf.columns, odf.columns)

        pdf = pd.DataFrame(
            {
                'float': [1.0, 2.0, 3.5, 6.5],
                'int': [1, 2, 7, 4],
                'datetime': pd.date_range('2019-01-02', periods=4),
                'string': ['foo', 'ss', 'sw', 'qa']
            },
            index=[1, 2, 3, 4])
        odf = orca.DataFrame(
            {
                'float': [1.0, 2.0, 3.5, 6.5],
                'int': [1, 2, 7, 4],
                'datetime': pd.date_range('2019-01-02', periods=4),
                'string': ['foo', 'ss', 'sw', 'qa']
            },
            index=[1, 2, 3, 4])
        assert_index_equal(pdf.columns, odf.columns)
Exemple #9
0
    def test_join_from_dataframe_how(self):
        odf = orca.DataFrame({
            'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],
            'A': [1, 2, 3, 4, 5, 6]
        })
        odf_other = orca.DataFrame({
            'key': ['K0', 'K1', 'K2'],
            'B': [11, 22, 33]
        })

        pdf = pd.DataFrame({
            'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],
            'A': [1, 2, 3, 4, 5, 6]
        })
        pdf_other = pd.DataFrame({
            'key': ['K0', 'K1', 'K2'],
            'B': [11, 22, 33]
        })

        # by default, how = left

        # how = right
        odf_join = odf.join(odf_other,
                            how="right",
                            lsuffix='_caller',
                            rsuffix='_other')
        pdf_join = pdf.join(pdf_other,
                            how="right",
                            lsuffix='_caller',
                            rsuffix='_other')
        assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False)

        # how = inner
        odf_join = odf.join(odf_other,
                            how="inner",
                            lsuffix='_caller',
                            rsuffix='_other')
        pdf_join = pdf.join(pdf_other,
                            how="inner",
                            lsuffix='_caller',
                            rsuffix='_other')
        assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False)

        # how = outer
        odf_join = odf.join(odf_other,
                            how="outer",
                            lsuffix='_caller',
                            rsuffix='_other')
        pdf_join = pdf.join(pdf_other,
                            how="outer",
                            lsuffix='_caller',
                            rsuffix='_other')
        pdf_join.loc[:, 'key_other'].fillna("", inplace=True)
        assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False)
Exemple #10
0
    def test_dataframe_Combining_joining_merging_append_in_memory(self):
        pdf = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB'))
        pdf2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB'))

        odf = orca.DataFrame([[1, 2], [3, 4]], columns=list('AB'))
        odf2 = orca.DataFrame([[5, 6], [7, 8]], columns=list('AB'))

        assert_frame_equal(pdf.append(pdf2), odf.append(odf2).to_pandas())
        assert_frame_equal(pdf.append(pdf2, ignore_index=True), odf.append(odf2, ignore_index=True).to_pandas())
        assert_frame_equal(pdf.append(pdf2, sort=True), odf.append(odf2, sort=True).to_pandas())
        odf.append(odf2, inplace=True)
        assert_frame_equal(pdf.append(pdf2), odf.to_pandas())
Exemple #11
0
    def test_dataframe_multiindex_names_level(self):
        columns = pd.MultiIndex.from_tuples([('X', 'A', 'Z'), ('X', 'B', 'Z'),
                                             ('Y', 'C', 'Z'), ('Y', 'D', 'Z')],
                                            names=['lvl_1', 'lvl_2', 'lv_3'])
        pdf = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12],
                            [13, 14, 15, 16], [17, 18, 19, 20]],
                           columns=columns)
        odf = orca.DataFrame(pdf)

        self.assertEqual(repr(odf.columns.names), repr(pdf.columns.names))
        self.assertEqual(repr(odf.columns.names), repr(pdf.columns.names))

        odf1 = orca.DataFrame(pdf)
        self.assertEqual(repr(odf1.columns.names), repr(pdf.columns.names))
Exemple #12
0
    def test_dataframe_concat(self):
        pdf1 = pd.DataFrame([['a', 1], ['b', 2]], columns=['letter', 'number'])
        pdf2 = pd.DataFrame([['c', 3], ['d', 4]], columns=['letter', 'number'])
        odf1 = orca.DataFrame([['a', 1], ['b', 2]],
                              columns=['letter', 'number'])
        odf2 = orca.DataFrame([['c', 3], ['d', 4]],
                              columns=['letter', 'number'])
        assert_frame_equal(pd.concat([pdf1, pdf2]),
                           orca.concat([odf1, odf2]).to_pandas())
        # assert_frame_equal(pd.concat([pdf1, pdf1]), orca.concat([odf1, odf1]).to_pandas())

        assert_frame_equal(pd.concat([pdf1, pdf2], join="inner"),
                           orca.concat([odf1, odf2], join="inner").to_pandas())
        assert_frame_equal(
            pd.concat([pdf1, pdf2], ignore_index=True),
            orca.concat([odf1, odf2], ignore_index=True).to_pandas())

        pdf1 = pd.DataFrame([[3, 1], [6, 2]], columns=['letter', 'number'])
        odf1 = orca.DataFrame([[3, 1], [6, 2]], columns=['letter', 'number'])
        pdf3 = pd.DataFrame([[100, 3, 16], [90, 4, 7]],
                            columns=['letter', 'number', 'animal'])
        odf3 = orca.DataFrame([[100, 3, 16], [90, 4, 7]],
                              columns=['letter', 'number', 'animal'])
        assert_frame_equal(pd.concat([pdf1, pdf3], join="inner"),
                           orca.concat([odf1, odf3], join="inner").to_pandas())
        assert_frame_equal(
            pd.concat([pdf1, pdf3], join="outer", sort=False),
            orca.concat([odf1, odf3], join="outer", sort=False).to_pandas())
        assert_frame_equal(
            pd.concat([pdf1, pdf3], ignore_index=True, sort=False),
            orca.concat([odf1, odf3], ignore_index=True,
                        sort=False).to_pandas())

        tuples = [('cobra', 'mark i'), ('cobra', 'mark ii'),
                  ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
                  ('viper', 'mark ii'), ('viper', 'mark iii')]
        index = pd.MultiIndex.from_tuples(tuples)
        values = [[12, 2], [0, 4], [10, 20], [1, 4], [7, 1], [16, 36]]
        pdf = pd.DataFrame(values,
                           columns=['max_speed', 'shield'],
                           index=index)

        index = orca.MultiIndex.from_tuples(tuples)
        odf = orca.DataFrame(values,
                             columns=['max_speed', 'shield'],
                             index=index)
        assert_frame_equal(
            pd.concat([pdf, pdf1], ignore_index=True, sort=False),
            orca.concat([odf, odf1], ignore_index=True,
                        sort=False).to_pandas())
Exemple #13
0
 def test_to_csv(self):
     df = orca.DataFrame({
         'name': ['Raphael', 'Donatello'],
         'mask': ['red', 'purple'],
         'weapon': ['sai', 'bo staff']
     })
     df.to_csv(path_or_buf=f"{WORK_DIR}tocsv.csv")
Exemple #14
0
 def test_indexing_dataframe_case_1(self):
     np_mat = np.full((8, 4), 10)
     pdf = pd.DataFrame(np_mat, index=pd.date_range('1/1/2000', periods=8), columns=['A', 'B', 'C', 'D'])
     ps = pdf['A']
     odf = orca.DataFrame(np_mat, index=orca.date_range('1/1/2000', periods=8), columns=['A', 'B', 'C', 'D'])
     os = odf['A']
     assert_series_equal(ps, os.to_pandas())
Exemple #15
0
    def test_indexing_dataframe_head_tail(self):
        pdf = pd.DataFrame(
            {'animal': ['alligator', 'bee', 'falcon', 'lion', 'monkey', 'parrot', 'shark', 'whale', 'zebra'],
             'id': [1, 2, 3, 4, 5, 6, 7, 8, 9]})
        odf = orca.DataFrame(
            {'animal': ['alligator', 'bee', 'falcon', 'lion', 'monkey', 'parrot', 'shark', 'whale', 'zebra'],
             'id': [1, 2, 3, 4, 5, 6, 7, 8, 9]})

        # head
        assert_frame_equal(pdf.head(), odf.head().to_pandas())
        assert_frame_equal(pdf.head(5), odf.head(5).to_pandas())
        assert_frame_equal(pdf.head(3), odf.head(3).to_pandas())
        # TODO: orca.DataFrame.head(0)
        # assert_frame_equal(pdf.head(0), odf.head(0).to_pandas())
        assert_frame_equal(pdf.head(-3), odf.head(-3).to_pandas())
        assert_frame_equal(pdf[pdf['id'] > 5].head(-3), odf[odf['id'] > 5].head(-3).to_pandas())
        # TODO: orca.ArithExpression.head(-3)
        # assert_frame_equal((pdf['id']+1).head(-3), (odf['id']+1).head(-3).to_pandas())

        # tail
        assert_frame_equal(pdf.tail(), odf.tail().to_pandas())
        assert_frame_equal(pdf.tail(5), odf.tail(5).to_pandas())
        assert_frame_equal(pdf.tail(3), odf.tail(3).to_pandas())
        # TODO: orca.DataFrame.tail(0)
        # assert_frame_equal(pdf.tail(0), odf.tail(0).to_pandas())
        assert_frame_equal(pdf.tail(-3), odf.tail(-3).to_pandas())
        assert_frame_equal(pdf[pdf['id'] > 5].tail(-3), odf[odf['id'] > 5].tail(-3).to_pandas())
Exemple #16
0
 def test_indexing_dataframe_iloc_get(self):
     pdf = pd.DataFrame([{'a': 1, 'b': 2, 'c': 3, 'd': 4}, {'a': 100, 'b': 200, 'c': 300, 'd': 400},
                         {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}])
     odf = orca.DataFrame([{'a': 1, 'b': 2, 'c': 3, 'd': 4}, {'a': 100, 'b': 200, 'c': 300, 'd': 400},
                           {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}])
     # integer
     assert_series_equal(pdf.iloc[0], odf.iloc[0].to_pandas())
     # list
     assert_frame_equal(pdf.iloc[[0, 1]], odf.iloc[[0, 1]].to_pandas())
     # slice
     assert_frame_equal(pdf.iloc[:3], odf.iloc[:3].to_pandas())
     # both axes integer
     # TODO: odf.iloc[0, 1]应该返回类型为numpy.int64的整型常量,而非一个series
     # self.assertEqual(pdf.iloc[0, 1], odf.iloc[0, 1])
     # both axes list
     assert_frame_equal(pdf.iloc[[0, 2], [1, 3]], odf.iloc[[0, 2], [1, 3]].to_pandas())
     # both axes slice
     assert_frame_equal(pdf.iloc[1:3, 0:3], odf.iloc[1:3, 0:3].to_pandas())
     assert_frame_equal(pdf.iloc[[False, True, False]], odf.iloc[[False, True, False]].to_pandas())
     assert_frame_equal(pdf.iloc[:, [True, False, True, False]],
                        odf.iloc[:, [True, False, True, False]].to_pandas())
     assert_frame_equal(pdf.iloc[[False, True, False], :],
                        odf.iloc[[False, True, False], :].to_pandas())
     # both axes boolean array
     assert_frame_equal(pdf.iloc[[True, True, False], [True, False, True, False]],
                        odf.iloc[[True, True, False], [True, False, True, False]].to_pandas())
Exemple #17
0
def calc_stock_pnl(ports, daily_rtn, holding_days, end_date, last_days):
    dates = ports[['tranche']].drop_duplicates().sort_values(by='tranche')

    dates_after_ages = orca.DataFrame()
    for age in range(1, holding_days + 1):
        dates_after_age_i = dates.copy()
        dates_after_age_i['age'] = age
        dates_after_age_i['date_after_age'] = dates_after_age_i[
            'tranche'].shift(-age)
        dates_after_ages.append(dates_after_age_i, inplace=True)

    pos = ports.merge(dates_after_ages, on='tranche')
    pos = pos.join(last_days, on='PERMNO')
    pos = pos.loc[(pos.date_after_age.notnull() &
                   (pos.date_after_age <= pos.last_day.clip(upper=end_date))),
                  ['date_after_age', 'PERMNO', 'tranche', 'age', 'wt']]
    pos = pos.compute()
    pos.rename(columns={'date_after_age': 'date', 'wt': 'expr'}, inplace=True)
    pos['ret'] = 0.0
    pos['pnl'] = 0.0

    # use set_index to make it easy to equal join two Frames
    daily_rtn.set_index(['date', 'PERMNO'], inplace=True)
    pos.set_index(['date', 'PERMNO'], inplace=True)
    pos['ret'] = daily_rtn['RET']
    pos.reset_index(inplace=True)
    pos['expr'] = (pos.expr * (1 + pos.ret).cumprod()).groupby(
        ['PERMNO', 'tranche'], lazy=True).transform()
    pos['pnl'] = pos.expr * pos.ret / (1 + pos.ret)

    return pos
Exemple #18
0
    def test_dataframe_Combining_joining_merging_append_on_disk(self):
        pdf = odf_disk = orca.read_table()

        # print(self.odf_csv.dtypes)
        # print(self.pdf_csv.dtypes)
        pdf = pd.DataFrame(columns=self.odf_csv._data_columns)
        odf = orca.DataFrame(columns=self.odf_csv._data_columns)
Exemple #19
0
    def test_reset_index_with_multiindex_columns(self):
        index = pd.MultiIndex.from_tuples([('bird', 'falcon'),
                                           ('bird', 'parrot'),
                                           ('mammal', 'lion'),
                                           ('mammal', 'monkey')],
                                          names=['class', 'name'])
        columns = pd.MultiIndex.from_tuples([('speed', 'max'),
                                             ('species', 'type')])
        pdf = pd.DataFrame([(389.0, 'fly'), (24.0, 'fly'), (80.5, 'run'),
                            (np.nan, 'jump')],
                           index=index,
                           columns=columns)
        odf = orca.DataFrame(pdf)

        self.assertEqual(repr(odf), repr(pdf))
        self.assertEqual(repr(odf.reset_index()), repr(pdf.reset_index()))
        self.assertEqual(repr(odf.reset_index(level='class')),
                         repr(pdf.reset_index(level='class')))
        self.assertEqual(repr(odf.reset_index(level='class', col_level=1)),
                         repr(pdf.reset_index(level='class', col_level=1)))
        self.assertEqual(
            repr(
                odf.reset_index(level='class', col_level=1,
                                col_fill='species')),
            repr(
                pdf.reset_index(level='class', col_level=1,
                                col_fill='species')))
        self.assertEqual(
            repr(odf.reset_index(level='class', col_level=1,
                                 col_fill='genus')),
            repr(pdf.reset_index(level='class', col_level=1,
                                 col_fill='genus')))
Exemple #20
0
    def test_dataframe_column_level_name(self):
        column = pd.Index(['A', 'B', 'C'], name='X')
        pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=column)
        odf = orca.DataFrame(pdf)

        self.assertEqual(repr(odf), repr(pdf))
        self.assertEqual(repr(odf.columns.names), repr(pdf.columns.names))
        self.assertEqual(repr(odf.columns.names), repr(pdf.columns.names))
Exemple #21
0
 def test_dataframe_Function_application_GroupBy_window_transform(self):
     pdf = pd.DataFrame(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9], [np.nan, np.nan, np.nan]],
         columns=['A', 'B', 'C'])
     odf = orca.DataFrame(
         [[1, 2, 3], [4, 5, 6], [7, 8, 9], [np.nan, np.nan, np.nan]],
         columns=['A', 'B', 'C'])
     assert_frame_equal(pdf.transform([np.sqrt, np.exp]),
                        odf.transform([np.sqrt, np.exp]).to_pandas())
Exemple #22
0
 def test_dataframe_multiindex_columns(self):
     pdf = pd.DataFrame(
         {
             ('x', 'a', '1'): [1, 2, 3],
             ('x', 'b', '2'): [4, 5, 6],
             ('y.z', 'c.d', '3'): [7, 8, 9],
             ('x', 'b', '4'): [10, 11, 12],
         },
         index=[0, 1, 3])
     odf = orca.DataFrame(pdf)
Exemple #23
0
 def test_join_from_csv_param_sort(self):
     # TODO:NOT IMPLEMENTED
     pd_ll = self.pdf_csv_right.sample(frac=1)
     orca_ll = orca.DataFrame(pd_ll)
     # odf_join = orca_ll.join(self.odf_csv_right, lsuffix='_caller', rsuffix='_other', sort=True)
     pdf_join = pd_ll.join(self.pdf_csv_right,
                           lsuffix='_caller',
                           rsuffix='_other',
                           sort=True)
     pdf_join.loc[:, 'TICKER_other'].fillna("", inplace=True)
Exemple #24
0
    def test_join_from_dataframe_index_param_how(self):
        orca_left = orca.DataFrame({
            'A': [1, 2, 3],
            'B': [11, 22, 33]
        },
                                   index=['K0', 'K1', 'K2'])
        orca_right = orca.DataFrame(
            {
                'C': [111, 222, 333],
                'D': [1111, 2222, 3333]
            },
            index=['K0', 'K2', 'K3'])

        pd_left = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [11, 22, 33]
        },
                               index=['K0', 'K1', 'K2'])
        pd_right = pd.DataFrame({
            'C': [111, 222, 333],
            'D': [1111, 2222, 3333]
        },
                                index=['K0', 'K2', 'K3'])

        # by default, how = left

        # how = right
        odf_join = orca_left.join(orca_right, how="right")
        pdf_join = pd_left.join(pd_right, how="right")
        assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False)

        # how = inner
        odf_join = orca_left.join(orca_right, how="inner")
        pdf_join = pd_left.join(pd_right, how="inner")
        # pdf_join.loc[:, 'key_other'].fillna("", inplace=True)
        assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False)

        # how = outer
        odf_join = orca_left.join(orca_right, how="outer")
        pdf_join = pd_left.join(pd_right, how="outer")
        # pdf_join.loc[:, 'key_other'].fillna("", inplace=True)
        assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False)
Exemple #25
0
    def test_indexing_dataframe_loc_get(self):
        pdf = pd.DataFrame([[1, 2], [4, 5], [7, 8]], index=['cobra', 'viper', 'sidewinder'],
                           columns=['max_speed', 'shield'])
        odf = orca.DataFrame([[1, 2], [4, 5], [7, 8]], index=['cobra', 'viper', 'sidewinder'],
                             columns=['max_speed', 'shield'])
        assert_series_equal(odf.loc['cobra'].to_pandas(), pdf.loc['cobra'])
        self.assertEqual(odf.loc['cobra', 'shield'], pdf.loc['cobra', 'shield'])
        assert_frame_equal(odf.loc[['cobra', 'viper']].to_pandas(), pdf.loc[['cobra', 'viper']])
        assert_frame_equal(odf.loc[[False, False, True]].to_pandas(), pdf.loc[[False, False, True]])
        assert_frame_equal(odf.loc[odf['shield'] > 5].to_pandas(), pdf.loc[pdf['shield'] > 5])
        assert_frame_equal(odf.loc[odf['shield'] > 6, ['max_speed']].to_pandas(),
                           pdf.loc[pdf['shield'] > 6, ['max_speed']])
        # assert_frame_equal(odf.loc['cobra':, 'max_speed':'shield'].to_pandas(), pdf.loc[pdf['shield'] > 6, ['max_speed']])
        assert_frame_equal(odf.loc['cobra':'viper'].to_pandas(), pdf.loc['cobra':'viper'])
        # TODO:odf.loc[:, 'max_speed'] 结果只有一列的DataFrame应该返回一个series
        # assert_series_equal(odf.loc['cobra':'viper', 'max_speed'].to_pandas(), pdf.loc['cobra':'viper', 'max_speed'])
        # assert_series_equal(odf.loc[:, 'max_speed'].to_pandas(), pdf.loc[:, 'max_speed'])

        pdf = pd.DataFrame([[1, 2], [4, 5], [7, 8]], index=[7, 8, 9], columns=['max_speed', 'shield'])
        odf = orca.DataFrame([[1, 2], [4, 5], [7, 8]], index=[7, 8, 9], columns=['max_speed', 'shield'])
        assert_frame_equal(odf.loc[7:9].to_pandas(), pdf.loc[7:9])

        v = np.full((6, 4), 10)
        pdf = pd.DataFrame(v, index=list('abcdef'), columns=list('ABCD'))
        odf = orca.DataFrame(v, index=list('abcdef'), columns=list('ABCD'))
        pd.DataFrame()
        assert_frame_equal(odf.loc[['a', 'b', 'd'], :].to_pandas(), pdf.loc[['a', 'b', 'd'], :])
        # assert_frame_equal(odf.loc['d':, 'A':'C'].to_pandas(), pdf.loc['d':, 'A':'C'])
        # assert_frame_equal((odf.loc['a'] > 0).to_pandas(), pdf.loc['a'] > 0)

        # TODO:loc:当index中含有nan值,pandas的表现似乎不太正常
        # pdd = pd.DataFrame(
        #     {'id': [1, 2, 2, 3, 3], 'sym': ['s', 'a', 's', 'a', 's'], 'values': [np.nan, 2, 2, np.nan, 2]})
        # pdd.set_index('values', inplace=True)
        # odd = orca.DataFrame(pdd)
        # assert_frame_equal(pdd.loc[np.nan:], odd.loc[np.nan:].to_pandas())

        # TODO:loc:当index为时间类型的index
        pdd = pd.DataFrame(
            {'id': [1, 2, 2, 3, 3], 'sym': ['s', 'a', 's', 'a', 's'], 'values': [np.nan, 2, 2, np.nan, 2]},
            index=pd.date_range('20190101', '20190105', 5))
        odd = orca.DataFrame(pdd)
Exemple #26
0
    def test_dataframe_binary_operator_function_mul_dataframe(self):
        pdf = pd.DataFrame({
            'angles': [0, 3, 4],
            'degrees': [360, 180, 360]
        },
                           index=['circle', 'triangle', 'rectangle'])
        odf = orca.DataFrame({
            'angles': [0, 3, 4],
            'degrees': [360, 180, 360]
        },
                             index=['circle', 'triangle', 'rectangle'])
        assert_frame_equal(pdf, odf.to_pandas())

        p_other = pd.DataFrame({'angles': [0, 3, 4]},
                               index=['circle', 'triangle', 'rectangle'])
        o_other = orca.DataFrame({'angles': [0, 3, 4]},
                                 index=['circle', 'triangle', 'rectangle'])
        assert_frame_equal(p_other, o_other.to_pandas())

        p_index = pd.DataFrame({
            'angles': [3, 5, 8],
            'degrees': [2, 5, 7]
        },
                               index=['circle', 'triangle', 'rectangle'])
        o_index = orca.DataFrame({
            'angles': [3, 5, 8],
            'degrees': [2, 5, 7]
        },
                                 index=['circle', 'triangle', 'rectangle'])
        assert_frame_equal(p_index, o_index.to_pandas())

        pre = pdf * p_other
        ore = (odf * o_other).to_pandas()
        assert_frame_equal(pre, ore)

        pre = pdf.mul(p_other)
        ore = odf.mul(o_other).to_pandas()
        assert_frame_equal(pre, ore)

        pre = pdf.mul(p_index)
        ore = odf.mul(o_index).to_pandas()
        assert_frame_equal(pre, ore)
Exemple #27
0
    def test_indexing_dataframe_case_3(self):
        pdf = pd.DataFrame(np.full((8, 4), 10), index=pd.date_range('1/1/2000', periods=8),
                           columns=['A', 'B', 'C', 'D'])
        odf = orca.DataFrame(np.full((8, 4), 10), index=orca.date_range('1/1/2000', periods=8),
                             columns=['A', 'B', 'C', 'D'])

        # use this form to create a new column
        pdf['A'] = list(range(len(pdf.index)))
        odf['A'] = list(range(len(odf.index)))
        ps = pdf['A']
        os = odf['A']
        assert_series_equal(ps, os.to_pandas(), check_dtype=False)
Exemple #28
0
 def test_dataframe_binary_operator_function_sub_series(self):
     pdf = pd.DataFrame({
         'angles': [0, 3, 4],
         'degrees': [360, 180, 360]
     },
                        index=['circle', 'triangle', 'rectangle'])
     odf = orca.DataFrame({
         'angles': [0, 3, 4],
         'degrees': [360, 180, 360]
     },
                          index=['circle', 'triangle', 'rectangle'])
     assert_frame_equal(pdf, odf.to_pandas())
Exemple #29
0
    def test_join_from_dataframe_sort(self):
        pdf_other = pd.DataFrame({
            'key': ['K0', 'K1', 'K2'],
            'B': [11, 22, 33]
        })
        odf_other = orca.DataFrame({
            'key': ['K0', 'K1', 'K2'],
            'B': [11, 22, 33]
        })
        pdf = pd.DataFrame({
            'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'],
            'A': [1, 2, 3, 4, 5, 6]
        })

        pdf_ll = pdf.sample(frac=1)
        odf_ll = orca.DataFrame(pdf_ll)
        # TODO:NOT IMPLEMENTED
        # odf_join = odf_ll.join(odf_other, lsuffix='_caller', rsuffix='_other', sort=True)
        pdf_join = pdf_ll.join(pdf_other,
                               lsuffix='_caller',
                               rsuffix='_other',
                               sort=True)

        pdf_join.loc[:, 'key_other'].fillna("", inplace=True)
Exemple #30
0
    def test_multiindex_column_access(self):
        columns = pd.MultiIndex.from_tuples([('a', 'w', 'q', 'b'),
                                             ('c', 'w', 'd', 'c'),
                                             ('e', 's', 'f', 's'),
                                             ('m', 'g', 'e', 'r'),
                                             ('s', 's', 'd', 'h'),
                                             ('i', 's', 's', 's')])

        pdf = pd.DataFrame([(1, 'a', 'x', 10, 100, 1000),
                            (2, 'b', 'y', 20, 200, 2000),
                            (3, 'c', 'z', 30, 300, 3000)],
                           columns=columns)
        odf = orca.DataFrame(pdf)

        self.assertEqual(repr(odf), repr(pdf))