def test_dataframe_binary_operator_function_div_multiIndex_param_level_param_fill_value( self): pdf = pd.DataFrame({ 'angles': [0, 3, 4], 'degrees': [360, 180, 360] }, index=['circle', 'triangle', 'rectangle']) odf = orca.DataFrame({ 'angles': [0, 3, 4], 'degrees': [360, 180, 360] }, index=['circle', 'triangle', 'rectangle']) pdf_multi = pd.DataFrame( { 'angles': [0, 3, 4, 4, 5, 6], 'degrees': [360, 180, 360, 360, 540, 720] }, index=[['A', 'A', 'A', 'B', 'B', 'B'], [ 'circle', 'triangle', 'rectangle', 'square', 'pentagon', 'hexagon' ]]) odf_multi = orca.DataFrame( { 'angles': [0, 3, 4, 4, 5, 6], 'degrees': [360, 180, 360, 360, 540, 720] }, index=[['A', 'A', 'A', 'B', 'B', 'B'], [ 'circle', 'triangle', 'rectangle', 'square', 'pentagon', 'hexagon' ]]) assert_frame_equal(pdf_multi, odf_multi.to_pandas())
def test_dataframe(self): odf = self.odf pdf = self.pdf self.assertEqual(repr((odf['a'] + 1).to_pandas()), repr(pdf['a'] + 1)) self.assertEqual(repr(odf.columns), repr(pd.Index(['a', 'b']))) self.assertEqual(repr((odf[odf['b'] > 2]).to_pandas()), repr(pdf[pdf['b'] > 2])) self.assertEqual(repr(odf[['a', 'b']]), repr(pdf[['a', 'b']])) self.assertEqual(repr(odf.a), repr(pdf.a)) assert repr(odf) df = pd.DataFrame({ 'a': [1, 2, 3, 4, 5, 6, 7, 8, 9], 'b': [4, 5, 6, 3, 2, 1, 0, 0, 0], }) ddf = orca.DataFrame(df) self.assertEqual(repr(df[['a', 'b']]), repr(ddf[['a', 'b']])) # TODO:NOT IMPLEMENTED # self.assertEqual(repr(ddf.a.notnull().alias("x").name), repr("x")) # check orca.DataFrame(os.Series) pser = pd.Series([1, 2, 3], name='x') kser = orca.Series([1, 2, 3], name='x') self.assertEqual(repr(pd.DataFrame(pser)), repr(orca.DataFrame(kser)))
def test_join_from_dataframe_param_on(self): odf = orca.DataFrame({ 'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], 'A': [1, 2, 3, 4, 5, 6] }) odf_other = orca.DataFrame({ 'key': ['K0', 'K1', 'K2'], 'B': [11, 22, 33] }) odf_join = odf.join(odf_other, on='A', lsuffix='_caller', rsuffix='_other') pdf = pd.DataFrame({ 'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], 'A': [1, 2, 3, 4, 5, 6] }) pdf_other = pd.DataFrame({ 'key': ['K0', 'K1', 'K2'], 'B': [11, 22, 33] }) pdf_join = pdf.join(pdf_other, on='A', lsuffix='_caller', rsuffix='_other') pdf_join.loc[:, 'key_other'].fillna("", inplace=True) assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False)
def test_join_from_dataframe_index(self): orca_left = orca.DataFrame({ 'A': [1, 2, 3], 'B': [11, 22, 33] }, index=['K0', 'K1', 'K2']) orca_right = orca.DataFrame( { 'C': [111, 222, 333], 'D': [1111, 2222, 3333] }, index=['K0', 'K2', 'K3']) odf_join = orca_left.join(orca_right) pd_left = pd.DataFrame({ 'A': [1, 2, 3], 'B': [11, 22, 33] }, index=['K0', 'K1', 'K2']) pd_right = pd.DataFrame({ 'C': [111, 222, 333], 'D': [1111, 2222, 3333] }, index=['K0', 'K2', 'K3']) pdf_join = pd_left.join(pd_right) assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False)
def test_dataframe_attributes_axes(self): pdf = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) odf = orca.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) assert_index_equal(pdf.axes[0], odf.axes[0].to_pandas()) assert_index_equal(pdf.axes[1], odf.axes[1]) pdf = pd.DataFrame({ 'col1': [1, 2], 'col2': [3, 4] }, pd.date_range("20190101", periods=2, freq="d")) odf = orca.DataFrame({ 'col1': [1, 2], 'col2': [3, 4] }, orca.date_range("20190101", periods=2, freq="d")) assert_index_equal(pdf.axes[0], odf.axes[0].to_pandas()) assert_index_equal(pdf.axes[1], odf.axes[1]) pdf = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}, index=['a', 'b']) odf = orca.DataFrame({ 'col1': [1, 2], 'col2': [3, 4] }, index=['a', 'b']) assert_index_equal(pdf.axes[0], odf.axes[0].to_pandas()) assert_index_equal(pdf.axes[1], odf.axes[1])
def test_join_from_dataframe_index_param_sort(self): orca_left = orca.DataFrame({ 'A': [1, 2, 3], 'B': [11, 22, 33] }, index=['K0', 'K1', 'K2']) orca_right = orca.DataFrame( { 'C': [111, 222, 333], 'D': [1111, 2222, 3333] }, index=['K0', 'K2', 'K3']) # TODO:NOT IMPLEMENTED # odf_join = orca_left.join(orca_right, sort=True) pd_left = pd.DataFrame({ 'A': [1, 2, 3], 'B': [11, 22, 33] }, index=['K0', 'K1', 'K2']) pd_right = pd.DataFrame({ 'C': [111, 222, 333], 'D': [1111, 2222, 3333] }, index=['K0', 'K2', 'K3']) pdf_join = pd_left.join(pd_right, sort=True)
def test_dataframe_append(self): n = 10 # note that n should be a multiple of 10 re = n / 10 pdf1 = pd.DataFrame({ 'id': np.arange(1, n + 1, 1, dtype='int32'), 'date': np.repeat(pd.date_range('2019.08.01', periods=10, freq='D'), re), 'tsymbol': np.repeat( ['a', 'b', 'c', 'd', 'e', 'QWW', 'FEA', 'FFW', 'DER', 'POD'], re), 'tbool': np.repeat(np.repeat(np.arange(2, dtype='bool'), 5), re), 'tchar': np.repeat(np.arange(1, 11, 1, dtype='int8'), re), 'tshort': np.repeat(np.arange(1, 11, 1, dtype='int16'), re), 'tint': np.repeat(np.arange(1, 11, 1, dtype='int32'), re), 'tlong': np.repeat(np.arange(1, 11, 1, dtype='int64'), re), 'tfloat': np.repeat(np.arange(1, 11, 1, dtype='float32'), re), 'tdouble': np.repeat(np.arange(1, 11, 1, dtype='float64'), re) }) n = 20 # note that n should be a multiple of 10 re = n / 10 pdf2 = pd.DataFrame({ 'id': np.arange(1, n + 1, 1, dtype='int32'), 'date': np.repeat(pd.date_range('2019.08.01', periods=10, freq='D'), re), 'tsymbol': np.repeat( ['a', 'b', 'c', 'd', 'e', 'QWW', 'FEA', 'FFW', 'DER', 'POD'], re), 'tbool': np.repeat(np.repeat(np.arange(2, dtype='bool'), 5), re), 'tchar': np.repeat(np.arange(1, 11, 1, dtype='int8'), re), 'tshort': np.repeat(np.arange(1, 11, 1, dtype='int16'), re), 'tint': np.repeat(np.arange(1, 11, 1, dtype='int32'), re), 'tlong': np.repeat(np.arange(1, 11, 1, dtype='int64'), re), 'tfloat': np.repeat(np.arange(1, 11, 1, dtype='float32'), re), 'tdouble': np.repeat(np.arange(1, 11, 1, dtype='float64'), re) }) odf1 = orca.DataFrame(pdf1) odf2 = orca.DataFrame(pdf2) assert_frame_equal(pdf1.append(pdf2), odf1.append(odf2).to_pandas())
def test_dataframe_attributes_columns(self): pdf = pd.DataFrame( { 'float': [1.0, 2.0, 3.5, 6.5], 'int': [1, 2, 7, 4], 'datetime': pd.date_range('2019-01-02', periods=4), 'string': ['foo', 'ss', 'sw', 'qa'] }, index=['a', 'b', 'c', 'c']) odf = orca.DataFrame( { 'float': [1.0, 2.0, 3.5, 6.5], 'int': [1, 2, 7, 4], 'datetime': pd.date_range('2019-01-02', periods=4), 'string': ['foo', 'ss', 'sw', 'qa'] }, index=['a', 'b', 'c', 'c']) assert_index_equal(pdf.columns, odf.columns) pdf = pd.DataFrame( { 'float': [1.0, 2.0, 3.5, 6.5], 'int': [1, 2, 7, 4], 'datetime': pd.date_range('2019-01-02', periods=4), 'string': ['foo', 'ss', 'sw', 'qa'] }, index=pd.date_range("20190101", periods=4, freq="d")) # pd.to_datetime(["20190101","20190304"]) odf = orca.DataFrame( { 'float': [1.0, 2.0, 3.5, 6.5], 'int': [1, 2, 7, 4], 'datetime': pd.date_range('2019-01-02', periods=4), 'string': ['foo', 'ss', 'sw', 'qa'] }, index=orca.date_range("20190101", periods=4, freq="d")) assert_index_equal(pdf.columns, odf.columns) pdf = pd.DataFrame( { 'float': [1.0, 2.0, 3.5, 6.5], 'int': [1, 2, 7, 4], 'datetime': pd.date_range('2019-01-02', periods=4), 'string': ['foo', 'ss', 'sw', 'qa'] }, index=[1, 2, 3, 4]) odf = orca.DataFrame( { 'float': [1.0, 2.0, 3.5, 6.5], 'int': [1, 2, 7, 4], 'datetime': pd.date_range('2019-01-02', periods=4), 'string': ['foo', 'ss', 'sw', 'qa'] }, index=[1, 2, 3, 4]) assert_index_equal(pdf.columns, odf.columns)
def test_join_from_dataframe_how(self): odf = orca.DataFrame({ 'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], 'A': [1, 2, 3, 4, 5, 6] }) odf_other = orca.DataFrame({ 'key': ['K0', 'K1', 'K2'], 'B': [11, 22, 33] }) pdf = pd.DataFrame({ 'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], 'A': [1, 2, 3, 4, 5, 6] }) pdf_other = pd.DataFrame({ 'key': ['K0', 'K1', 'K2'], 'B': [11, 22, 33] }) # by default, how = left # how = right odf_join = odf.join(odf_other, how="right", lsuffix='_caller', rsuffix='_other') pdf_join = pdf.join(pdf_other, how="right", lsuffix='_caller', rsuffix='_other') assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False) # how = inner odf_join = odf.join(odf_other, how="inner", lsuffix='_caller', rsuffix='_other') pdf_join = pdf.join(pdf_other, how="inner", lsuffix='_caller', rsuffix='_other') assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False) # how = outer odf_join = odf.join(odf_other, how="outer", lsuffix='_caller', rsuffix='_other') pdf_join = pdf.join(pdf_other, how="outer", lsuffix='_caller', rsuffix='_other') pdf_join.loc[:, 'key_other'].fillna("", inplace=True) assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False)
def test_dataframe_Combining_joining_merging_append_in_memory(self): pdf = pd.DataFrame([[1, 2], [3, 4]], columns=list('AB')) pdf2 = pd.DataFrame([[5, 6], [7, 8]], columns=list('AB')) odf = orca.DataFrame([[1, 2], [3, 4]], columns=list('AB')) odf2 = orca.DataFrame([[5, 6], [7, 8]], columns=list('AB')) assert_frame_equal(pdf.append(pdf2), odf.append(odf2).to_pandas()) assert_frame_equal(pdf.append(pdf2, ignore_index=True), odf.append(odf2, ignore_index=True).to_pandas()) assert_frame_equal(pdf.append(pdf2, sort=True), odf.append(odf2, sort=True).to_pandas()) odf.append(odf2, inplace=True) assert_frame_equal(pdf.append(pdf2), odf.to_pandas())
def test_dataframe_multiindex_names_level(self): columns = pd.MultiIndex.from_tuples([('X', 'A', 'Z'), ('X', 'B', 'Z'), ('Y', 'C', 'Z'), ('Y', 'D', 'Z')], names=['lvl_1', 'lvl_2', 'lv_3']) pdf = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16], [17, 18, 19, 20]], columns=columns) odf = orca.DataFrame(pdf) self.assertEqual(repr(odf.columns.names), repr(pdf.columns.names)) self.assertEqual(repr(odf.columns.names), repr(pdf.columns.names)) odf1 = orca.DataFrame(pdf) self.assertEqual(repr(odf1.columns.names), repr(pdf.columns.names))
def test_dataframe_concat(self): pdf1 = pd.DataFrame([['a', 1], ['b', 2]], columns=['letter', 'number']) pdf2 = pd.DataFrame([['c', 3], ['d', 4]], columns=['letter', 'number']) odf1 = orca.DataFrame([['a', 1], ['b', 2]], columns=['letter', 'number']) odf2 = orca.DataFrame([['c', 3], ['d', 4]], columns=['letter', 'number']) assert_frame_equal(pd.concat([pdf1, pdf2]), orca.concat([odf1, odf2]).to_pandas()) # assert_frame_equal(pd.concat([pdf1, pdf1]), orca.concat([odf1, odf1]).to_pandas()) assert_frame_equal(pd.concat([pdf1, pdf2], join="inner"), orca.concat([odf1, odf2], join="inner").to_pandas()) assert_frame_equal( pd.concat([pdf1, pdf2], ignore_index=True), orca.concat([odf1, odf2], ignore_index=True).to_pandas()) pdf1 = pd.DataFrame([[3, 1], [6, 2]], columns=['letter', 'number']) odf1 = orca.DataFrame([[3, 1], [6, 2]], columns=['letter', 'number']) pdf3 = pd.DataFrame([[100, 3, 16], [90, 4, 7]], columns=['letter', 'number', 'animal']) odf3 = orca.DataFrame([[100, 3, 16], [90, 4, 7]], columns=['letter', 'number', 'animal']) assert_frame_equal(pd.concat([pdf1, pdf3], join="inner"), orca.concat([odf1, odf3], join="inner").to_pandas()) assert_frame_equal( pd.concat([pdf1, pdf3], join="outer", sort=False), orca.concat([odf1, odf3], join="outer", sort=False).to_pandas()) assert_frame_equal( pd.concat([pdf1, pdf3], ignore_index=True, sort=False), orca.concat([odf1, odf3], ignore_index=True, sort=False).to_pandas()) tuples = [('cobra', 'mark i'), ('cobra', 'mark ii'), ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'), ('viper', 'mark ii'), ('viper', 'mark iii')] index = pd.MultiIndex.from_tuples(tuples) values = [[12, 2], [0, 4], [10, 20], [1, 4], [7, 1], [16, 36]] pdf = pd.DataFrame(values, columns=['max_speed', 'shield'], index=index) index = orca.MultiIndex.from_tuples(tuples) odf = orca.DataFrame(values, columns=['max_speed', 'shield'], index=index) assert_frame_equal( pd.concat([pdf, pdf1], ignore_index=True, sort=False), orca.concat([odf, odf1], ignore_index=True, sort=False).to_pandas())
def test_to_csv(self): df = orca.DataFrame({ 'name': ['Raphael', 'Donatello'], 'mask': ['red', 'purple'], 'weapon': ['sai', 'bo staff'] }) df.to_csv(path_or_buf=f"{WORK_DIR}tocsv.csv")
def test_indexing_dataframe_case_1(self): np_mat = np.full((8, 4), 10) pdf = pd.DataFrame(np_mat, index=pd.date_range('1/1/2000', periods=8), columns=['A', 'B', 'C', 'D']) ps = pdf['A'] odf = orca.DataFrame(np_mat, index=orca.date_range('1/1/2000', periods=8), columns=['A', 'B', 'C', 'D']) os = odf['A'] assert_series_equal(ps, os.to_pandas())
def test_indexing_dataframe_head_tail(self): pdf = pd.DataFrame( {'animal': ['alligator', 'bee', 'falcon', 'lion', 'monkey', 'parrot', 'shark', 'whale', 'zebra'], 'id': [1, 2, 3, 4, 5, 6, 7, 8, 9]}) odf = orca.DataFrame( {'animal': ['alligator', 'bee', 'falcon', 'lion', 'monkey', 'parrot', 'shark', 'whale', 'zebra'], 'id': [1, 2, 3, 4, 5, 6, 7, 8, 9]}) # head assert_frame_equal(pdf.head(), odf.head().to_pandas()) assert_frame_equal(pdf.head(5), odf.head(5).to_pandas()) assert_frame_equal(pdf.head(3), odf.head(3).to_pandas()) # TODO: orca.DataFrame.head(0) # assert_frame_equal(pdf.head(0), odf.head(0).to_pandas()) assert_frame_equal(pdf.head(-3), odf.head(-3).to_pandas()) assert_frame_equal(pdf[pdf['id'] > 5].head(-3), odf[odf['id'] > 5].head(-3).to_pandas()) # TODO: orca.ArithExpression.head(-3) # assert_frame_equal((pdf['id']+1).head(-3), (odf['id']+1).head(-3).to_pandas()) # tail assert_frame_equal(pdf.tail(), odf.tail().to_pandas()) assert_frame_equal(pdf.tail(5), odf.tail(5).to_pandas()) assert_frame_equal(pdf.tail(3), odf.tail(3).to_pandas()) # TODO: orca.DataFrame.tail(0) # assert_frame_equal(pdf.tail(0), odf.tail(0).to_pandas()) assert_frame_equal(pdf.tail(-3), odf.tail(-3).to_pandas()) assert_frame_equal(pdf[pdf['id'] > 5].tail(-3), odf[odf['id'] > 5].tail(-3).to_pandas())
def test_indexing_dataframe_iloc_get(self): pdf = pd.DataFrame([{'a': 1, 'b': 2, 'c': 3, 'd': 4}, {'a': 100, 'b': 200, 'c': 300, 'd': 400}, {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}]) odf = orca.DataFrame([{'a': 1, 'b': 2, 'c': 3, 'd': 4}, {'a': 100, 'b': 200, 'c': 300, 'd': 400}, {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000}]) # integer assert_series_equal(pdf.iloc[0], odf.iloc[0].to_pandas()) # list assert_frame_equal(pdf.iloc[[0, 1]], odf.iloc[[0, 1]].to_pandas()) # slice assert_frame_equal(pdf.iloc[:3], odf.iloc[:3].to_pandas()) # both axes integer # TODO: odf.iloc[0, 1]应该返回类型为numpy.int64的整型常量,而非一个series # self.assertEqual(pdf.iloc[0, 1], odf.iloc[0, 1]) # both axes list assert_frame_equal(pdf.iloc[[0, 2], [1, 3]], odf.iloc[[0, 2], [1, 3]].to_pandas()) # both axes slice assert_frame_equal(pdf.iloc[1:3, 0:3], odf.iloc[1:3, 0:3].to_pandas()) assert_frame_equal(pdf.iloc[[False, True, False]], odf.iloc[[False, True, False]].to_pandas()) assert_frame_equal(pdf.iloc[:, [True, False, True, False]], odf.iloc[:, [True, False, True, False]].to_pandas()) assert_frame_equal(pdf.iloc[[False, True, False], :], odf.iloc[[False, True, False], :].to_pandas()) # both axes boolean array assert_frame_equal(pdf.iloc[[True, True, False], [True, False, True, False]], odf.iloc[[True, True, False], [True, False, True, False]].to_pandas())
def calc_stock_pnl(ports, daily_rtn, holding_days, end_date, last_days): dates = ports[['tranche']].drop_duplicates().sort_values(by='tranche') dates_after_ages = orca.DataFrame() for age in range(1, holding_days + 1): dates_after_age_i = dates.copy() dates_after_age_i['age'] = age dates_after_age_i['date_after_age'] = dates_after_age_i[ 'tranche'].shift(-age) dates_after_ages.append(dates_after_age_i, inplace=True) pos = ports.merge(dates_after_ages, on='tranche') pos = pos.join(last_days, on='PERMNO') pos = pos.loc[(pos.date_after_age.notnull() & (pos.date_after_age <= pos.last_day.clip(upper=end_date))), ['date_after_age', 'PERMNO', 'tranche', 'age', 'wt']] pos = pos.compute() pos.rename(columns={'date_after_age': 'date', 'wt': 'expr'}, inplace=True) pos['ret'] = 0.0 pos['pnl'] = 0.0 # use set_index to make it easy to equal join two Frames daily_rtn.set_index(['date', 'PERMNO'], inplace=True) pos.set_index(['date', 'PERMNO'], inplace=True) pos['ret'] = daily_rtn['RET'] pos.reset_index(inplace=True) pos['expr'] = (pos.expr * (1 + pos.ret).cumprod()).groupby( ['PERMNO', 'tranche'], lazy=True).transform() pos['pnl'] = pos.expr * pos.ret / (1 + pos.ret) return pos
def test_dataframe_Combining_joining_merging_append_on_disk(self): pdf = odf_disk = orca.read_table() # print(self.odf_csv.dtypes) # print(self.pdf_csv.dtypes) pdf = pd.DataFrame(columns=self.odf_csv._data_columns) odf = orca.DataFrame(columns=self.odf_csv._data_columns)
def test_reset_index_with_multiindex_columns(self): index = pd.MultiIndex.from_tuples([('bird', 'falcon'), ('bird', 'parrot'), ('mammal', 'lion'), ('mammal', 'monkey')], names=['class', 'name']) columns = pd.MultiIndex.from_tuples([('speed', 'max'), ('species', 'type')]) pdf = pd.DataFrame([(389.0, 'fly'), (24.0, 'fly'), (80.5, 'run'), (np.nan, 'jump')], index=index, columns=columns) odf = orca.DataFrame(pdf) self.assertEqual(repr(odf), repr(pdf)) self.assertEqual(repr(odf.reset_index()), repr(pdf.reset_index())) self.assertEqual(repr(odf.reset_index(level='class')), repr(pdf.reset_index(level='class'))) self.assertEqual(repr(odf.reset_index(level='class', col_level=1)), repr(pdf.reset_index(level='class', col_level=1))) self.assertEqual( repr( odf.reset_index(level='class', col_level=1, col_fill='species')), repr( pdf.reset_index(level='class', col_level=1, col_fill='species'))) self.assertEqual( repr(odf.reset_index(level='class', col_level=1, col_fill='genus')), repr(pdf.reset_index(level='class', col_level=1, col_fill='genus')))
def test_dataframe_column_level_name(self): column = pd.Index(['A', 'B', 'C'], name='X') pdf = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=column) odf = orca.DataFrame(pdf) self.assertEqual(repr(odf), repr(pdf)) self.assertEqual(repr(odf.columns.names), repr(pdf.columns.names)) self.assertEqual(repr(odf.columns.names), repr(pdf.columns.names))
def test_dataframe_Function_application_GroupBy_window_transform(self): pdf = pd.DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9], [np.nan, np.nan, np.nan]], columns=['A', 'B', 'C']) odf = orca.DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9], [np.nan, np.nan, np.nan]], columns=['A', 'B', 'C']) assert_frame_equal(pdf.transform([np.sqrt, np.exp]), odf.transform([np.sqrt, np.exp]).to_pandas())
def test_dataframe_multiindex_columns(self): pdf = pd.DataFrame( { ('x', 'a', '1'): [1, 2, 3], ('x', 'b', '2'): [4, 5, 6], ('y.z', 'c.d', '3'): [7, 8, 9], ('x', 'b', '4'): [10, 11, 12], }, index=[0, 1, 3]) odf = orca.DataFrame(pdf)
def test_join_from_csv_param_sort(self): # TODO:NOT IMPLEMENTED pd_ll = self.pdf_csv_right.sample(frac=1) orca_ll = orca.DataFrame(pd_ll) # odf_join = orca_ll.join(self.odf_csv_right, lsuffix='_caller', rsuffix='_other', sort=True) pdf_join = pd_ll.join(self.pdf_csv_right, lsuffix='_caller', rsuffix='_other', sort=True) pdf_join.loc[:, 'TICKER_other'].fillna("", inplace=True)
def test_join_from_dataframe_index_param_how(self): orca_left = orca.DataFrame({ 'A': [1, 2, 3], 'B': [11, 22, 33] }, index=['K0', 'K1', 'K2']) orca_right = orca.DataFrame( { 'C': [111, 222, 333], 'D': [1111, 2222, 3333] }, index=['K0', 'K2', 'K3']) pd_left = pd.DataFrame({ 'A': [1, 2, 3], 'B': [11, 22, 33] }, index=['K0', 'K1', 'K2']) pd_right = pd.DataFrame({ 'C': [111, 222, 333], 'D': [1111, 2222, 3333] }, index=['K0', 'K2', 'K3']) # by default, how = left # how = right odf_join = orca_left.join(orca_right, how="right") pdf_join = pd_left.join(pd_right, how="right") assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False) # how = inner odf_join = orca_left.join(orca_right, how="inner") pdf_join = pd_left.join(pd_right, how="inner") # pdf_join.loc[:, 'key_other'].fillna("", inplace=True) assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False) # how = outer odf_join = orca_left.join(orca_right, how="outer") pdf_join = pd_left.join(pd_right, how="outer") # pdf_join.loc[:, 'key_other'].fillna("", inplace=True) assert_frame_equal(odf_join.to_pandas(), pdf_join, check_dtype=False)
def test_indexing_dataframe_loc_get(self): pdf = pd.DataFrame([[1, 2], [4, 5], [7, 8]], index=['cobra', 'viper', 'sidewinder'], columns=['max_speed', 'shield']) odf = orca.DataFrame([[1, 2], [4, 5], [7, 8]], index=['cobra', 'viper', 'sidewinder'], columns=['max_speed', 'shield']) assert_series_equal(odf.loc['cobra'].to_pandas(), pdf.loc['cobra']) self.assertEqual(odf.loc['cobra', 'shield'], pdf.loc['cobra', 'shield']) assert_frame_equal(odf.loc[['cobra', 'viper']].to_pandas(), pdf.loc[['cobra', 'viper']]) assert_frame_equal(odf.loc[[False, False, True]].to_pandas(), pdf.loc[[False, False, True]]) assert_frame_equal(odf.loc[odf['shield'] > 5].to_pandas(), pdf.loc[pdf['shield'] > 5]) assert_frame_equal(odf.loc[odf['shield'] > 6, ['max_speed']].to_pandas(), pdf.loc[pdf['shield'] > 6, ['max_speed']]) # assert_frame_equal(odf.loc['cobra':, 'max_speed':'shield'].to_pandas(), pdf.loc[pdf['shield'] > 6, ['max_speed']]) assert_frame_equal(odf.loc['cobra':'viper'].to_pandas(), pdf.loc['cobra':'viper']) # TODO:odf.loc[:, 'max_speed'] 结果只有一列的DataFrame应该返回一个series # assert_series_equal(odf.loc['cobra':'viper', 'max_speed'].to_pandas(), pdf.loc['cobra':'viper', 'max_speed']) # assert_series_equal(odf.loc[:, 'max_speed'].to_pandas(), pdf.loc[:, 'max_speed']) pdf = pd.DataFrame([[1, 2], [4, 5], [7, 8]], index=[7, 8, 9], columns=['max_speed', 'shield']) odf = orca.DataFrame([[1, 2], [4, 5], [7, 8]], index=[7, 8, 9], columns=['max_speed', 'shield']) assert_frame_equal(odf.loc[7:9].to_pandas(), pdf.loc[7:9]) v = np.full((6, 4), 10) pdf = pd.DataFrame(v, index=list('abcdef'), columns=list('ABCD')) odf = orca.DataFrame(v, index=list('abcdef'), columns=list('ABCD')) pd.DataFrame() assert_frame_equal(odf.loc[['a', 'b', 'd'], :].to_pandas(), pdf.loc[['a', 'b', 'd'], :]) # assert_frame_equal(odf.loc['d':, 'A':'C'].to_pandas(), pdf.loc['d':, 'A':'C']) # assert_frame_equal((odf.loc['a'] > 0).to_pandas(), pdf.loc['a'] > 0) # TODO:loc:当index中含有nan值,pandas的表现似乎不太正常 # pdd = pd.DataFrame( # {'id': [1, 2, 2, 3, 3], 'sym': ['s', 'a', 's', 'a', 's'], 'values': [np.nan, 2, 2, np.nan, 2]}) # pdd.set_index('values', inplace=True) # odd = orca.DataFrame(pdd) # assert_frame_equal(pdd.loc[np.nan:], odd.loc[np.nan:].to_pandas()) # TODO:loc:当index为时间类型的index pdd = pd.DataFrame( {'id': [1, 2, 2, 3, 3], 'sym': ['s', 'a', 's', 'a', 's'], 'values': [np.nan, 2, 2, np.nan, 2]}, index=pd.date_range('20190101', '20190105', 5)) odd = orca.DataFrame(pdd)
def test_dataframe_binary_operator_function_mul_dataframe(self): pdf = pd.DataFrame({ 'angles': [0, 3, 4], 'degrees': [360, 180, 360] }, index=['circle', 'triangle', 'rectangle']) odf = orca.DataFrame({ 'angles': [0, 3, 4], 'degrees': [360, 180, 360] }, index=['circle', 'triangle', 'rectangle']) assert_frame_equal(pdf, odf.to_pandas()) p_other = pd.DataFrame({'angles': [0, 3, 4]}, index=['circle', 'triangle', 'rectangle']) o_other = orca.DataFrame({'angles': [0, 3, 4]}, index=['circle', 'triangle', 'rectangle']) assert_frame_equal(p_other, o_other.to_pandas()) p_index = pd.DataFrame({ 'angles': [3, 5, 8], 'degrees': [2, 5, 7] }, index=['circle', 'triangle', 'rectangle']) o_index = orca.DataFrame({ 'angles': [3, 5, 8], 'degrees': [2, 5, 7] }, index=['circle', 'triangle', 'rectangle']) assert_frame_equal(p_index, o_index.to_pandas()) pre = pdf * p_other ore = (odf * o_other).to_pandas() assert_frame_equal(pre, ore) pre = pdf.mul(p_other) ore = odf.mul(o_other).to_pandas() assert_frame_equal(pre, ore) pre = pdf.mul(p_index) ore = odf.mul(o_index).to_pandas() assert_frame_equal(pre, ore)
def test_indexing_dataframe_case_3(self): pdf = pd.DataFrame(np.full((8, 4), 10), index=pd.date_range('1/1/2000', periods=8), columns=['A', 'B', 'C', 'D']) odf = orca.DataFrame(np.full((8, 4), 10), index=orca.date_range('1/1/2000', periods=8), columns=['A', 'B', 'C', 'D']) # use this form to create a new column pdf['A'] = list(range(len(pdf.index))) odf['A'] = list(range(len(odf.index))) ps = pdf['A'] os = odf['A'] assert_series_equal(ps, os.to_pandas(), check_dtype=False)
def test_dataframe_binary_operator_function_sub_series(self): pdf = pd.DataFrame({ 'angles': [0, 3, 4], 'degrees': [360, 180, 360] }, index=['circle', 'triangle', 'rectangle']) odf = orca.DataFrame({ 'angles': [0, 3, 4], 'degrees': [360, 180, 360] }, index=['circle', 'triangle', 'rectangle']) assert_frame_equal(pdf, odf.to_pandas())
def test_join_from_dataframe_sort(self): pdf_other = pd.DataFrame({ 'key': ['K0', 'K1', 'K2'], 'B': [11, 22, 33] }) odf_other = orca.DataFrame({ 'key': ['K0', 'K1', 'K2'], 'B': [11, 22, 33] }) pdf = pd.DataFrame({ 'key': ['K0', 'K1', 'K2', 'K3', 'K4', 'K5'], 'A': [1, 2, 3, 4, 5, 6] }) pdf_ll = pdf.sample(frac=1) odf_ll = orca.DataFrame(pdf_ll) # TODO:NOT IMPLEMENTED # odf_join = odf_ll.join(odf_other, lsuffix='_caller', rsuffix='_other', sort=True) pdf_join = pdf_ll.join(pdf_other, lsuffix='_caller', rsuffix='_other', sort=True) pdf_join.loc[:, 'key_other'].fillna("", inplace=True)
def test_multiindex_column_access(self): columns = pd.MultiIndex.from_tuples([('a', 'w', 'q', 'b'), ('c', 'w', 'd', 'c'), ('e', 's', 'f', 's'), ('m', 'g', 'e', 'r'), ('s', 's', 'd', 'h'), ('i', 's', 's', 's')]) pdf = pd.DataFrame([(1, 'a', 'x', 10, 100, 1000), (2, 'b', 'y', 20, 200, 2000), (3, 'c', 'z', 30, 300, 3000)], columns=columns) odf = orca.DataFrame(pdf) self.assertEqual(repr(odf), repr(pdf))