def setUp(self): self.seriesd = common.getSeriesData() self.tsd = common.getTimeSeriesData() self.frame = self.klass(self.seriesd) self.intframe = self.klass( dict((k, v.astype(int)) for k, v in self.seriesd.iteritems())) self.tsframe = self.klass(self.tsd) self.mixed_frame = self.frame.copy() self.mixed_frame['foo'] = 'bar' self.ts1 = common.makeTimeSeries() self.ts2 = common.makeTimeSeries()[5:] self.ts3 = common.makeTimeSeries()[-5:] self.ts4 = common.makeTimeSeries()[1:-1] self.ts_dict = { 'col1': self.ts1, 'col2': self.ts2, 'col3': self.ts3, 'col4': self.ts4, } self.empty = self.klass({}) self.unsortable = self.klass( { 'foo': [1] * 1000, datetime.today(): [1] * 1000, 'bar': ['bar'] * 1000, datetime.today() + timedelta(1): ['bar'] * 1000 }, index=np.arange(1000))
def float_frame(): """ Fixture for DataFrame of floats with index of unique strings Columns are ['A', 'B', 'C', 'D']. A B C D P7GACiRnxd -0.465578 -0.361863 0.886172 -0.053465 qZKh6afn8n -0.466693 -0.373773 0.266873 1.673901 tkp0r6Qble 0.148691 -0.059051 0.174817 1.598433 wP70WOCtv8 0.133045 -0.581994 -0.992240 0.261651 M2AeYQMnCz -1.207959 -0.185775 0.588206 0.563938 QEPzyGDYDo -0.381843 -0.758281 0.502575 -0.565053 r78Jwns6dn -0.653707 0.883127 0.682199 0.206159 ... ... ... ... ... IHEGx9NO0T -0.277360 0.113021 -1.018314 0.196316 lPMj8K27FA -1.313667 -0.604776 -1.305618 -0.863999 qa66YMWQa5 1.110525 0.475310 -0.747865 0.032121 yOa0ATsmcE -0.431457 0.067094 0.096567 -0.264962 65znX3uRNG 1.528446 0.160416 -0.109635 -0.032987 eCOBvKqf3e 0.235281 1.622222 0.781255 0.392871 xSucinXxuV -1.263557 0.252799 -0.552247 0.400426 [30 rows x 4 columns] """ return DataFrame(tm.getSeriesData())
def mixed_float_frame(): """ Fixture for DataFrame of different float types with index of unique strings Columns are ['A', 'B', 'C', 'D']. A B C D GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993 KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588 VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731 kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607 CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266 0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541 tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710 ... ... ... ... ... 7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237 4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612 B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653 hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427 1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827 9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204 xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502 [30 rows x 4 columns] """ df = DataFrame(tm.getSeriesData()) df.A = df.A.astype('float32') df.B = df.B.astype('float32') df.C = df.C.astype('float16') df.D = df.D.astype('float64') return df
def test_convert_r_matrix(): is_na = robj.baseenv.get("is.na") seriesd = _test.getSeriesData() frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A']) # Null data frame["E"] = [np.nan for item in frame["A"]] r_dataframe = convert_to_r_matrix(frame) assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index) assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns) assert all(is_na(item) for item in r_dataframe.rx(True, "E")) for column in frame[["A", "B", "C", "D"]]: coldata = r_dataframe.rx(True, column) original_data = frame[column] assert np.array_equal(convert_robj(coldata), original_data) # Pandas bug 1282 frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)] # FIXME: Ugly, this whole module needs to be ported to nose/unittest try: wrong_matrix = convert_to_r_matrix(frame) except TypeError: pass except Exception: raise
def float_frame2(): """ Fixture for DataFrame of floats with index of unique strings Columns are ['D', 'C', 'B', 'A'] """ return DataFrame(tm.getSeriesData(), columns=['D', 'C', 'B', 'A'])
def float_string_frame(): """ Fixture for DataFrame of floats and strings with index of unique strings Columns are ['A', 'B', 'C', 'D', 'foo']. A B C D foo w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar 3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar ... ... ... ... ... ... 9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar 9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar [30 rows x 5 columns] """ df = DataFrame(tm.getSeriesData()) df["foo"] = "bar" return df
def float_frame_with_na(): """ Fixture for DataFrame of floats with index of unique strings Columns are ['A', 'B', 'C', 'D']; some entries are missing A B C D ABwBzA0ljw -1.128865 -0.897161 0.046603 0.274997 DJiRzmbyQF 0.728869 0.233502 0.722431 -0.890872 neMgPD5UBF 0.486072 -1.027393 -0.031553 1.449522 0yWA4n8VeX -1.937191 -1.142531 0.805215 -0.462018 3slYUbbqU1 0.153260 1.164691 1.489795 -0.545826 soujjZ0A08 NaN NaN NaN NaN 7W6NLGsjB9 NaN NaN NaN NaN ... ... ... ... ... uhfeaNkCR1 -0.231210 -0.340472 0.244717 -0.901590 n6p7GYuBIV -0.419052 1.922721 -0.125361 -0.727717 ZhzAeY6p1y 1.234374 -1.425359 -0.827038 -0.633189 uWdPsORyUh 0.046738 -0.980445 -1.102965 0.605503 3DJA6aN590 -0.091018 -1.684734 -1.100900 0.215947 2GBPAzdbMk -2.883405 -1.021071 1.209877 1.633083 sHadBoyVHw -2.223032 -0.326384 0.258931 0.245517 [30 rows x 4 columns] """ df = DataFrame(tm.getSeriesData()) # set some NAs df.loc[5:10] = np.nan df.loc[15:20, -2:] = np.nan return df
def setUp(self): self.ts = tm.makeTimeSeries() self.seriesd = tm.getSeriesData() self.tsd = tm.getTimeSeriesData() self.frame = DataFrame(self.seriesd) self.tsframe = DataFrame(self.tsd) self.df = df() self.df_mixed_floats = DataFrame( {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 'C': np.random.randn(8), 'D': np.array( np.random.randn(8), dtype='float32')}) self.mframe = mframe() self.three_group = DataFrame( {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', 'foo', 'foo', 'foo'], 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', 'two', 'two', 'one'], 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', 'dull', 'shiny', 'shiny', 'shiny'], 'D': np.random.randn(11), 'E': np.random.randn(11), 'F': np.random.randn(11)})
def int_frame(): """ Fixture for DataFrame of ints with index of unique strings Columns are ['A', 'B', 'C', 'D'] A B C D vpBeWjM651 1 0 1 0 5JyxmrP1En -1 0 0 0 qEDaoD49U2 -1 1 0 0 m66TkTfsFe 0 0 0 0 EHPaNzEUFm -1 0 -1 0 fpRJCevQhi 2 0 0 0 OlQvnmfi3Q 0 0 -2 0 ... .. .. .. .. uB1FPlz4uP 0 0 0 1 EcSe6yNzCU 0 0 -1 0 L50VudaiI8 -1 1 -2 0 y3bpw4nwIp 0 -1 0 0 H0RdLLwrCT 1 1 0 0 rY82K0vMwm 0 0 0 0 1OPIUjnkjk 2 0 0 0 [30 rows x 4 columns] """ df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) # force these all to int64 to avoid platform testing issues return DataFrame({c: s for c, s in df.items()}, dtype=np.int64)
def test_convert_r_matrix(self): is_na = robj.baseenv.get("is.na") seriesd = tm.getSeriesData() frame = pd.DataFrame(seriesd, columns=["D", "C", "B", "A"]) # Null data frame["E"] = [np.nan for item in frame["A"]] r_dataframe = com.convert_to_r_matrix(frame) assert np.array_equal(com.convert_robj(r_dataframe.rownames), frame.index) assert np.array_equal(com.convert_robj(r_dataframe.colnames), frame.columns) assert all(is_na(item) for item in r_dataframe.rx(True, "E")) for column in frame[["A", "B", "C", "D"]]: coldata = r_dataframe.rx(True, column) original_data = frame[column] assert np.array_equal(com.convert_robj(coldata), original_data) # Pandas bug 1282 frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)] try: wrong_matrix = com.convert_to_r_matrix(frame) except TypeError: pass except Exception: raise
def test_convert_r_matrix(): is_na = robj.baseenv.get("is.na") seriesd = _test.getSeriesData() frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A']) #Null data frame["E"] = [np.nan for item in frame["A"]] r_dataframe = convert_to_r_matrix(frame) assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index) assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns) assert all(is_na(item) for item in r_dataframe.rx(True, "E")) for column in frame[["A", "B", "C", "D"]]: coldata = r_dataframe.rx(True, column) original_data = frame[column] assert np.array_equal(convert_robj(coldata), original_data) # Pandas bug 1282 frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)] #FIXME: Ugly, this whole module needs to be ported to nose/unittest try: wrong_matrix = convert_to_r_matrix(frame) except TypeError: pass except Exception: raise
def test_convert_r_dataframe(): is_na = robj.baseenv.get("is.na") seriesd = _test.getSeriesData() frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A']) #Null data frame["E"] = [np.nan for item in frame["A"]] # Some mixed type data frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)] r_dataframe = convert_to_r_dataframe(frame) assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index) assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns) assert all(is_na(item) for item in r_dataframe.rx2("E")) for column in frame[["A", "B", "C", "D"]]: coldata = r_dataframe.rx2(column) original_data = frame[column] assert np.array_equal(convert_robj(coldata), original_data) for column in frame[["D", "E"]]: for original, converted in zip(frame[column], r_dataframe.rx2(column)): if pd.isnull(original): assert is_na(converted) else: assert original == converted
def float_frame(): """ Fixture for DataFrame of floats with index of unique strings Columns are ['A', 'B', 'C', 'D']. """ return DataFrame(tm.getSeriesData())
def bool_frame_with_na(): """ Fixture for DataFrame of booleans with index of unique strings Columns are ['A', 'B', 'C', 'D']; some entries are missing A B C D zBZxY2IDGd False False False False IhBWBMWllt False True True True ctjdvZSR6R True False True True AVTujptmxb False True False True G9lrImrSWq False False False True sFFwdIUfz2 NaN NaN NaN NaN s15ptEJnRb NaN NaN NaN NaN ... ... ... ... ... UW41KkDyZ4 True True False False l9l6XkOdqV True False False False X2MeZfzDYA False True False False xWkIKU7vfX False True False True QOhL6VmpGU False False False True 22PwkRJdat False True False False kfboQ3VeIK True False True False [30 rows x 4 columns] """ df = DataFrame(tm.getSeriesData()) > 0 df = df.astype(object) # set some NAs df.loc[5:10] = np.nan df.loc[15:20, -2:] = np.nan return df
def mixed_int_frame(): """ Fixture for DataFrame of different int types with index of unique strings Columns are ['A', 'B', 'C', 'D']. A B C D mUrCZ67juP 0 1 2 2 rw99ACYaKS 0 1 0 0 7QsEcpaaVU 0 1 1 1 xkrimI2pcE 0 1 0 0 dz01SuzoS8 0 1 255 255 ccQkqOHX75 -1 1 0 0 DN0iXaoDLd 0 1 0 0 ... .. .. ... ... Dfb141wAaQ 1 1 254 254 IPD8eQOVu5 0 1 0 0 CcaKulsCmv 0 1 0 0 rIBa8gu7E5 0 1 0 0 RP6peZmh5o 0 1 1 1 NMb9pipQWQ 0 1 0 0 PqgbJEzjib 0 1 3 3 [30 rows x 4 columns] """ df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) df.A = df.A.astype('int32') df.B = np.ones(len(df.B), dtype='uint64') df.C = df.C.astype('uint8') df.D = df.C.astype('int64') return df
def mixed_int_frame(): """ Fixture for DataFrame of different int types with index of unique strings Columns are ['A', 'B', 'C', 'D']. A B C D mUrCZ67juP 0 1 2 2 rw99ACYaKS 0 1 0 0 7QsEcpaaVU 0 1 1 1 xkrimI2pcE 0 1 0 0 dz01SuzoS8 0 1 255 255 ccQkqOHX75 -1 1 0 0 DN0iXaoDLd 0 1 0 0 ... .. .. ... ... Dfb141wAaQ 1 1 254 254 IPD8eQOVu5 0 1 0 0 CcaKulsCmv 0 1 0 0 rIBa8gu7E5 0 1 0 0 RP6peZmh5o 0 1 1 1 NMb9pipQWQ 0 1 0 0 PqgbJEzjib 0 1 3 3 [30 rows x 4 columns] """ df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) df.A = df.A.astype("int32") df.B = np.ones(len(df.B), dtype="uint64") df.C = df.C.astype("uint8") df.D = df.C.astype("int64") return df
def test_convert_r_dataframe(): is_na = robj.baseenv.get("is.na") seriesd = _test.getSeriesData() frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A']) # Null data frame["E"] = [np.nan for item in frame["A"]] # Some mixed type data frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)] r_dataframe = convert_to_r_dataframe(frame) assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index) assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns) assert all(is_na(item) for item in r_dataframe.rx2("E")) for column in frame[["A", "B", "C", "D"]]: coldata = r_dataframe.rx2(column) original_data = frame[column] assert np.array_equal(convert_robj(coldata), original_data) for column in frame[["D", "E"]]: for original, converted in zip(frame[column], r_dataframe.rx2(column)): if pd.isnull(original): assert is_na(converted) else: assert original == converted
def float_string_frame(): """ Fixture for DataFrame of floats and strings with index of unique strings Columns are ['A', 'B', 'C', 'D', 'foo']. A B C D foo w3orJvq07g -1.594062 -1.084273 -1.252457 0.356460 bar PeukuVdmz2 0.109855 -0.955086 -0.809485 0.409747 bar ahp2KvwiM8 -1.533729 -0.142519 -0.154666 1.302623 bar 3WSJ7BUCGd 2.484964 0.213829 0.034778 -2.327831 bar khdAmufk0U -0.193480 -0.743518 -0.077987 0.153646 bar LE2DZiFlrE -0.193566 -1.343194 -0.107321 0.959978 bar HJXSJhVn7b 0.142590 1.257603 -0.659409 -0.223844 bar ... ... ... ... ... ... 9a1Vypttgw -1.316394 1.601354 0.173596 1.213196 bar h5d1gVFbEy 0.609475 1.106738 -0.155271 0.294630 bar mK9LsTQG92 1.303613 0.857040 -1.019153 0.369468 bar oOLksd9gKH 0.558219 -0.134491 -0.289869 -0.951033 bar 9jgoOjKyHg 0.058270 -0.496110 -0.413212 -0.852659 bar jZLDHclHAO 0.096298 1.267510 0.549206 -0.005235 bar lR0nxDp1C2 -2.119350 -0.794384 0.544118 0.145849 bar [30 rows x 5 columns] """ df = DataFrame(tm.getSeriesData()) df['foo'] = 'bar' return df
def setUp(self): self.seriesd = common.getSeriesData() self.tsd = common.getTimeSeriesData() self.frame = self.klass(self.seriesd) self.intframe = self.klass(dict((k, v.astype(int)) for k, v in self.seriesd.iteritems())) self.tsframe = self.klass(self.tsd) self.mixed_frame = self.frame.copy() self.mixed_frame['foo'] = 'bar' self.ts1 = common.makeTimeSeries() self.ts2 = common.makeTimeSeries()[5:] self.ts3 = common.makeTimeSeries()[-5:] self.ts4 = common.makeTimeSeries()[1:-1] self.ts_dict = { 'col1' : self.ts1, 'col2' : self.ts2, 'col3' : self.ts3, 'col4' : self.ts4, } self.empty = self.klass({}) self.unsortable = self.klass( {'foo' : [1] * 1000, datetime.today() : [1] * 1000, 'bar' : ['bar'] * 1000, datetime.today() + timedelta(1) : ['bar'] * 1000}, index=np.arange(1000))
def mixed_float_frame(): """ Fixture for DataFrame of different float types with index of unique strings Columns are ['A', 'B', 'C', 'D']. A B C D GI7bbDaEZe -0.237908 -0.246225 -0.468506 0.752993 KGp9mFepzA -1.140809 -0.644046 -1.225586 0.801588 VeVYLAb1l2 -1.154013 -1.677615 0.690430 -0.003731 kmPME4WKhO 0.979578 0.998274 -0.776367 0.897607 CPyopdXTiz 0.048119 -0.257174 0.836426 0.111266 0kJZQndAj0 0.274357 -0.281135 -0.344238 0.834541 tqdwQsaHG8 -0.979716 -0.519897 0.582031 0.144710 ... ... ... ... ... 7FhZTWILQj -2.906357 1.261039 -0.780273 -0.537237 4pUDPM4eGq -2.042512 -0.464382 -0.382080 1.132612 B8dUgUzwTi -1.506637 -0.364435 1.087891 0.297653 hErlVYjVv9 1.477453 -0.495515 -0.713867 1.438427 1BKN3o7YLs 0.127535 -0.349812 -0.881836 0.489827 9S4Ekn7zga 1.445518 -2.095149 0.031982 0.373204 xN1dNn6OV6 1.425017 -0.983995 -0.363281 -0.224502 [30 rows x 4 columns] """ df = DataFrame(tm.getSeriesData()) df.A = df.A.astype("float32") df.B = df.B.astype("float32") df.C = df.C.astype("float16") df.D = df.D.astype("float64") return df
def float_string_frame(): """ Fixture for DataFrame of floats and strings with index of unique strings Columns are ['A', 'B', 'C', 'D', 'foo']. """ df = DataFrame(tm.getSeriesData()) df['foo'] = 'bar' return df
def int_frame(): """ Fixture for DataFrame of ints with index of unique strings Columns are ['A', 'B', 'C', 'D'] """ df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) # force these all to int64 to avoid platform testing issues return DataFrame({c: s for c, s in df.items()}, dtype=np.int64)
def int_frame(): """ Fixture for DataFrame of ints with index of unique strings Columns are ['A', 'B', 'C', 'D'] """ df = DataFrame({k: v.astype(int) for k, v in compat.iteritems(tm.getSeriesData())}) # force these all to int64 to avoid platform testing issues return DataFrame({c: s for c, s in compat.iteritems(df)}, dtype=np.int64)
def setup_method(self, method): self.ts = tm.makeTimeSeries() self.seriesd = tm.getSeriesData() self.tsd = tm.getTimeSeriesData() self.frame = DataFrame(self.seriesd) self.tsframe = DataFrame(self.tsd) self.df = DataFrame({ 'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 'C': np.random.randn(8), 'D': np.random.randn(8) }) self.df_mixed_floats = DataFrame({ 'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 'C': np.random.randn(8), 'D': np.array(np.random.randn(8), dtype='float32') }) index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) self.mframe = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) self.three_group = DataFrame({ 'A': [ 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', 'foo', 'foo', 'foo' ], 'B': [ 'one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', 'two', 'two', 'one' ], 'C': [ 'dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', 'dull', 'shiny', 'shiny', 'shiny' ], 'D': np.random.randn(11), 'E': np.random.randn(11), 'F': np.random.randn(11) })
def float_frame_with_na(): """ Fixture for DataFrame of floats with index of unique strings Columns are ['A', 'B', 'C', 'D']; some entries are missing """ df = DataFrame(tm.getSeriesData()) # set some NAs df.loc[5:10] = np.nan df.loc[15:20, -2:] = np.nan return df
def mixed_float_frame2(): """ Fixture for DataFrame of different float types with index of unique strings Columns are ['A', 'B', 'C', 'D']. """ df = DataFrame(tm.getSeriesData()) df.D = df.D.astype('float16') df.C = df.C.astype('float32') df.B = df.B.astype('float64') return df
def mixed_float_frame2(): """ Fixture for DataFrame of different float types with index of unique strings Columns are ['A', 'B', 'C', 'D']. """ df = DataFrame(tm.getSeriesData()) df.D = df.D.astype('float32') df.C = df.C.astype('float32') df.B = df.B.astype('float16') df.D = df.D.astype('float64') return df
def bool_frame_with_na(): """ Fixture for DataFrame of booleans with index of unique strings Columns are ['A', 'B', 'C', 'D']; some entries are missing """ df = DataFrame(tm.getSeriesData()) > 0 df = df.astype(object) # set some NAs df.loc[5:10] = np.nan df.loc[15:20, -2:] = np.nan return df
def mixed_int_frame(): """ Fixture for DataFrame of different int types with index of unique strings Columns are ['A', 'B', 'C', 'D']. """ df = DataFrame({k: v.astype(int) for k, v in tm.getSeriesData().items()}) df.A = df.A.astype('int32') df.B = np.ones(len(df.B), dtype='uint64') df.C = df.C.astype('uint8') df.D = df.C.astype('int64') return df
def mixed_int_frame(): """ Fixture for DataFrame of different int types with index of unique strings Columns are ['A', 'B', 'C', 'D']. """ df = DataFrame({k: v.astype(int) for k, v in compat.iteritems(tm.getSeriesData())}) df.A = df.A.astype('int32') df.B = np.ones(len(df.B), dtype='uint64') df.C = df.C.astype('uint8') df.D = df.C.astype('int64') return df
def test_ix_assign_column_mixed(self): # GH #1142 df = DataFrame(tm.getSeriesData()) df['foo'] = 'bar' orig = df.loc[:, 'B'].copy() df.loc[:, 'B'] = df.loc[:, 'B'] + 1 tm.assert_series_equal(df.B, orig + 1) # GH 3668, mixed frame with series value df = DataFrame({ 'x': np.arange(10), 'y': np.arange(10, 20), 'z': 'bar' }) expected = df.copy() for i in range(5): indexer = i * 2 v = 1000 + i * 200 expected.loc[indexer, 'y'] = v assert expected.loc[indexer, 'y'] == v df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100 tm.assert_frame_equal(df, expected) # GH 4508, making sure consistency of assignments df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]}) df.loc[[ 0, 2, ], 'b'] = [100, -100] expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]}) tm.assert_frame_equal(df, expected) df = DataFrame({'a': list(range(4))}) df['b'] = np.nan df.loc[[1, 3], 'b'] = [100, -100] expected = DataFrame({ 'a': [0, 1, 2, 3], 'b': [np.nan, 100, np.nan, -100] }) tm.assert_frame_equal(df, expected) # ok, but chained assignments are dangerous # if we turn off chained assignment it will work with option_context('chained_assignment', None): df = DataFrame({'a': list(range(4))}) df['b'] = np.nan df['b'].loc[[1, 3]] = [100, -100] tm.assert_frame_equal(df, expected)
def test_convert_r_matrix(): seriesd = _test.getSeriesData() frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A']) r_dataframe = convert_to_r_matrix(frame) assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index) assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns) for column in r_dataframe.colnames: coldata = r_dataframe.rx(True, column) original_data = frame[column] assert np.array_equal(convert_robj(coldata), original_data)
def test_rename_inplace(): test_data = pandas.DataFrame(tm.getSeriesData()) modin_df = pd.DataFrame(test_data) df_equals( modin_df.rename(columns={"C": "foo"}), test_data.rename(columns={"C": "foo"}), ) frame = test_data.copy() modin_frame = modin_df.copy() frame.rename(columns={"C": "foo"}, inplace=True) modin_frame.rename(columns={"C": "foo"}, inplace=True) df_equals(modin_frame, frame)
def test_frame_operators(self): seriesd = tm.getSeriesData() frame = pd.DataFrame(seriesd) frame2 = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A']) garbage = np.random.random(4) colSeries = pd.Series(garbage, index=np.array(frame.columns)) idSum = frame + frame seriesSum = frame + colSeries for col, series in idSum.items(): for idx, val in series.items(): origVal = frame[col][idx] * 2 if not np.isnan(val): assert val == origVal else: assert np.isnan(origVal) for col, series in seriesSum.items(): for idx, val in series.items(): origVal = frame[col][idx] + colSeries[col] if not np.isnan(val): assert val == origVal else: assert np.isnan(origVal) added = frame2 + frame2 expected = frame2 * 2 tm.assert_frame_equal(added, expected) df = pd.DataFrame({'a': ['a', None, 'b']}) tm.assert_frame_equal(df + df, pd.DataFrame({'a': ['aa', np.nan, 'bb']})) # Test for issue #10181 for dtype in ('float', 'int64'): frames = [ pd.DataFrame(dtype=dtype), pd.DataFrame(columns=['A'], dtype=dtype), pd.DataFrame(index=[0], dtype=dtype), ] for df in frames: assert (df + df).equals(df) tm.assert_frame_equal(df + df, df)
def test_ix_assign_column_mixed(self): # GH #1142 df = DataFrame(tm.getSeriesData()) df['foo'] = 'bar' orig = df.loc[:, 'B'].copy() df.loc[:, 'B'] = df.loc[:, 'B'] + 1 tm.assert_series_equal(df.B, orig + 1) # GH 3668, mixed frame with series value df = DataFrame({'x': np.arange(10), 'y': np.arange(10, 20), 'z': 'bar'}) expected = df.copy() for i in range(5): indexer = i * 2 v = 1000 + i * 200 expected.loc[indexer, 'y'] = v assert expected.loc[indexer, 'y'] == v df.loc[df.x % 2 == 0, 'y'] = df.loc[df.x % 2 == 0, 'y'] * 100 tm.assert_frame_equal(df, expected) # GH 4508, making sure consistency of assignments df = DataFrame({'a': [1, 2, 3], 'b': [0, 1, 2]}) df.loc[[0, 2, ], 'b'] = [100, -100] expected = DataFrame({'a': [1, 2, 3], 'b': [100, 1, -100]}) tm.assert_frame_equal(df, expected) df = DataFrame({'a': list(range(4))}) df['b'] = np.nan df.loc[[1, 3], 'b'] = [100, -100] expected = DataFrame({'a': [0, 1, 2, 3], 'b': [np.nan, 100, np.nan, -100]}) tm.assert_frame_equal(df, expected) # ok, but chained assignments are dangerous # if we turn off chained assignment it will work with option_context('chained_assignment', None): df = DataFrame({'a': list(range(4))}) df['b'] = np.nan df['b'].loc[[1, 3]] = [100, -100] tm.assert_frame_equal(df, expected)
def test_astype(): td = pandas.DataFrame(tm.getSeriesData()) modin_df = pd.DataFrame(td.values, index=td.index, columns=td.columns) expected_df = pandas.DataFrame(td.values, index=td.index, columns=td.columns) modin_df_casted = modin_df.astype(np.int32) expected_df_casted = expected_df.astype(np.int32) df_equals(modin_df_casted, expected_df_casted) modin_df_casted = modin_df.astype(np.float64) expected_df_casted = expected_df.astype(np.float64) df_equals(modin_df_casted, expected_df_casted) modin_df_casted = modin_df.astype(str) expected_df_casted = expected_df.astype(str) df_equals(modin_df_casted, expected_df_casted) modin_df_casted = modin_df.astype("category") expected_df_casted = expected_df.astype("category") df_equals(modin_df_casted, expected_df_casted) dtype_dict = {"A": np.int32, "B": np.int64, "C": str} modin_df_casted = modin_df.astype(dtype_dict) expected_df_casted = expected_df.astype(dtype_dict) df_equals(modin_df_casted, expected_df_casted) # Ignore lint because this is testing bad input bad_dtype_dict = {"B": np.int32, "B": np.int64, "B": str} # noqa F601 modin_df_casted = modin_df.astype(bad_dtype_dict) expected_df_casted = expected_df.astype(bad_dtype_dict) df_equals(modin_df_casted, expected_df_casted) modin_df = pd.DataFrame(index=["row1"], columns=["col1"]) modin_df["col1"]["row1"] = 11 modin_df_casted = modin_df.astype(int) expected_df = pandas.DataFrame(index=["row1"], columns=["col1"]) expected_df["col1"]["row1"] = 11 expected_df_casted = expected_df.astype(int) df_equals(modin_df_casted, expected_df_casted) with pytest.raises(KeyError): modin_df.astype({"not_exists": np.uint8})
def test_rename_axis_inplace(): test_frame = pandas.DataFrame(tm.getSeriesData()) modin_df = pd.DataFrame(test_frame) result = test_frame.copy() modin_result = modin_df.copy() no_return = result.rename_axis("foo", inplace=True) modin_no_return = modin_result.rename_axis("foo", inplace=True) assert no_return is modin_no_return df_equals(modin_result, result) result = test_frame.copy() modin_result = modin_df.copy() no_return = result.rename_axis("bar", axis=1, inplace=True) modin_no_return = modin_result.rename_axis("bar", axis=1, inplace=True) assert no_return is modin_no_return df_equals(modin_result, result)
def setUp(self): self.ts = tm.makeTimeSeries() self.seriesd = tm.getSeriesData() self.tsd = tm.getTimeSeriesData() self.frame = DataFrame(self.seriesd) self.tsframe = DataFrame(self.tsd) self.df = DataFrame( { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], "C": np.random.randn(8), "D": np.random.randn(8), } ) self.df_mixed_floats = DataFrame( { "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], "B": ["one", "one", "two", "three", "two", "two", "one", "three"], "C": np.random.randn(8), "D": np.array(np.random.randn(8), dtype="float32"), } ) index = MultiIndex( levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=["first", "second"], ) self.mframe = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"]) self.three_group = DataFrame( { "A": ["foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar", "foo", "foo", "foo"], "B": ["one", "one", "one", "two", "one", "one", "one", "two", "two", "two", "one"], "C": ["dull", "dull", "shiny", "dull", "dull", "shiny", "shiny", "dull", "shiny", "shiny", "shiny"], "D": np.random.randn(11), "E": np.random.randn(11), "F": np.random.randn(11), } )
def test_ix_assign_column_mixed(self): # GH #1142 df = DataFrame(tm.getSeriesData()) df['foo'] = 'bar' orig = df.ix[:, 'B'].copy() df.ix[:, 'B'] = df.ix[:, 'B'] + 1 assert_series_equal(df.B, orig + 1) # GH 3668, mixed frame with series value df = DataFrame({'x':range(10), 'y':range(10,20),'z' : 'bar'}) expected = df.copy() expected.ix[0, 'y'] = 1000 expected.ix[2, 'y'] = 1200 expected.ix[4, 'y'] = 1400 expected.ix[6, 'y'] = 1600 expected.ix[8, 'y'] = 1800 df.ix[df.x % 2 == 0, 'y'] = df.ix[df.x % 2 == 0, 'y'] * 100 assert_frame_equal(df,expected)
def setup_method(self, method): self.ts = tm.makeTimeSeries() self.seriesd = tm.getSeriesData() self.tsd = tm.getTimeSeriesData() self.frame = DataFrame(self.seriesd) self.tsframe = DataFrame(self.tsd) self.df = df() self.df_mixed_floats = DataFrame({ 'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 'C': np.random.randn(8), 'D': np.array(np.random.randn(8), dtype='float32') }) self.mframe = mframe() self.three_group = DataFrame({ 'A': [ 'foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', 'foo', 'foo', 'foo' ], 'B': [ 'one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', 'two', 'two', 'one' ], 'C': [ 'dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', 'dull', 'shiny', 'shiny', 'shiny' ], 'D': np.random.randn(11), 'E': np.random.randn(11), 'F': np.random.randn(11) })
def test_ix_assign_column_mixed(self): # GH #1142 df = DataFrame(tm.getSeriesData()) df['foo'] = 'bar' orig = df.ix[:, 'B'].copy() df.ix[:, 'B'] = df.ix[:, 'B'] + 1 assert_series_equal(df.B, orig + 1) # GH 3668, mixed frame with series value df = DataFrame({'x':lrange(10), 'y':lrange(10,20),'z' : 'bar'}) expected = df.copy() for i in range(5): indexer = i*2 v = 1000 + i*200 expected.ix[indexer, 'y'] = v self.assert_(expected.ix[indexer, 'y'] == v) df.ix[df.x % 2 == 0, 'y'] = df.ix[df.x % 2 == 0, 'y'] * 100 assert_frame_equal(df,expected) # GH 4508, making sure consistency of assignments df = DataFrame({'a':[1,2,3],'b':[0,1,2]}) df.ix[[0,2,],'b'] = [100,-100] expected = DataFrame({'a' : [1,2,3], 'b' : [100,1,-100] }) assert_frame_equal(df,expected) df = pd.DataFrame({'a': lrange(4) }) df['b'] = np.nan df.ix[[1,3],'b'] = [100,-100] expected = DataFrame({'a' : [0,1,2,3], 'b' : [np.nan,100,np.nan,-100] }) assert_frame_equal(df,expected) # ok, but chained assignments are dangerous df = pd.DataFrame({'a': lrange(4) }) df['b'] = np.nan df['b'].ix[[1,3]] = [100,-100] assert_frame_equal(df,expected)
def setUp(self): self.ts = tm.makeTimeSeries() self.seriesd = tm.getSeriesData() self.tsd = tm.getTimeSeriesData() self.frame = DataFrame(self.seriesd) self.tsframe = DataFrame(self.tsd) self.df = DataFrame( {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 'C': np.random.randn(8), 'D': np.random.randn(8)}) self.df_mixed_floats = DataFrame( {'A': ['foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'foo'], 'B': ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'], 'C': np.random.randn(8), 'D': np.array( np.random.randn(8), dtype='float32')}) index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) self.mframe = DataFrame(np.random.randn(10, 3), index=index, columns=['A', 'B', 'C']) self.three_group = DataFrame( {'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', 'foo', 'foo', 'foo'], 'B': ['one', 'one', 'one', 'two', 'one', 'one', 'one', 'two', 'two', 'two', 'one'], 'C': ['dull', 'dull', 'shiny', 'dull', 'dull', 'shiny', 'shiny', 'dull', 'shiny', 'shiny', 'shiny'], 'D': np.random.randn(11), 'E': np.random.randn(11), 'F': np.random.randn(11)})
def _skip_if_no_xlsxwriter(): try: import xlsxwriter # NOQA except ImportError: raise nose.SkipTest('xlsxwriter not installed, skipping') def _skip_if_no_excelsuite(): _skip_if_no_xlrd() _skip_if_no_xlwt() _skip_if_no_openpyxl() _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() _frame = DataFrame(_seriesd)[:10] _frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A'])[:10] _tsframe = tm.makeTimeDataFrame()[:5] _mixed_frame = _frame.copy() _mixed_frame['foo'] = 'bar' class SharedItems(object): def setUp(self): self.dirpath = tm.get_data_path() self.csv1 = os.path.join(self.dirpath, 'test1.csv') self.csv2 = os.path.join(self.dirpath, 'test2.csv') self.xls1 = os.path.join(self.dirpath, 'test.xls') self.xlsx1 = os.path.join(self.dirpath, 'test.xlsx')
import os import sys import unittest from textwrap import dedent from numpy import nan from numpy.random import randn import numpy as np from pandas import DataFrame, Series, Index import pandas.core.format as fmt import pandas.util.testing as tm import pandas import pandas as pd _frame = DataFrame(tm.getSeriesData()) def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) return pth class TestDataFrameFormatting(unittest.TestCase): def setUp(self): self.frame = _frame.copy() def test_repr_embedded_ndarray(self): arr = np.empty(10, dtype=[('err', object)]) for i in range(len(arr)): arr['err'][i] = np.random.randn(i)
import warnings from numpy import nan from numpy.random import randn import numpy as np from pandas import DataFrame, Series, Index from pandas.util.py3compat import lzip import pandas.core.format as fmt import pandas.util.testing as tm import pandas import pandas as pd from pandas.core.config import (set_option, get_option, option_context, reset_option) _frame = DataFrame(tm.getSeriesData()) def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) return pth class TestDataFrameFormatting(unittest.TestCase): _multiprocess_can_split_ = True def setUp(self): self.warn_filters = warnings.filters warnings.filterwarnings('ignore', category=FutureWarning, module=".*format") self.frame = _frame.copy()
def test_invert(self): _seriesd = tm.getSeriesData() df = pd.DataFrame(_seriesd) assert_frame_equal(-(df < 0), ~(df < 0))
def setUp(self): self.frame = DataFrame(tm.getSeriesData()) self.mixed_frame = self.frame.copy() self.mixed_frame['foo'] = 'bar'
def frame(): return DataFrame(tm.getSeriesData())
def seriesd(): return tm.getSeriesData()