def _maybe_convert_index_type(index): try: index = index.astype(int) except (TypeError, ValueError): if not isinstance(index, MultiIndex): s = Series(index, name=index.name) index = Index(s.convert_objects(convert_numeric=True), name=index.name) return index
def test_convert_objects(self): s = Series([1., 2, 3], index=['a', 'b', 'c']) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates=False, convert_numeric=True) assert_series_equal(result, s) # force numeric conversion r = s.copy().astype('O') r['a'] = '1' with tm.assert_produces_warning(FutureWarning): result = r.convert_objects(convert_dates=False, convert_numeric=True) assert_series_equal(result, s) r = s.copy().astype('O') r['a'] = '1.' with tm.assert_produces_warning(FutureWarning): result = r.convert_objects(convert_dates=False, convert_numeric=True) assert_series_equal(result, s) r = s.copy().astype('O') r['a'] = 'garbled' expected = s.copy() expected['a'] = np.nan with tm.assert_produces_warning(FutureWarning): result = r.convert_objects(convert_dates=False, convert_numeric=True) assert_series_equal(result, expected) # GH 4119, not converting a mixed type (e.g.floats and object) s = Series([1, 'na', 3, 4]) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_numeric=True) expected = Series([1, np.nan, 3, 4]) assert_series_equal(result, expected) s = Series([1, '', 3, 4]) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_numeric=True) expected = Series([1, np.nan, 3, 4]) assert_series_equal(result, expected) # dates s = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0)]) s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0), 'foo', 1.0, 1, Timestamp('20010104'), '20010105'], dtype='O') with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates=True, convert_numeric=False) expected = Series([Timestamp('20010101'), Timestamp('20010102'), Timestamp('20010103')], dtype='M8[ns]') assert_series_equal(result, expected) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=False) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=True) assert_series_equal(result, expected) expected = Series([Timestamp('20010101'), Timestamp('20010102'), Timestamp('20010103'), lib.NaT, lib.NaT, lib.NaT, Timestamp('20010104'), Timestamp('20010105')], dtype='M8[ns]') with tm.assert_produces_warning(FutureWarning): result = s2.convert_objects(convert_dates='coerce', convert_numeric=False) assert_series_equal(result, expected) with tm.assert_produces_warning(FutureWarning): result = s2.convert_objects(convert_dates='coerce', convert_numeric=True) assert_series_equal(result, expected) # preserver all-nans (if convert_dates='coerce') s = Series(['foo', 'bar', 1, 1.0], dtype='O') with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=False) expected = Series([lib.NaT] * 2 + [Timestamp(1)] * 2) assert_series_equal(result, expected) # preserver if non-object s = Series([1], dtype='float32') with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce', convert_numeric=False) assert_series_equal(result, s) # r = s.copy() # r[0] = np.nan # result = r.convert_objects(convert_dates=True,convert_numeric=False) # assert result.dtype == 'M8[ns]' # dateutil parses some single letters into today's value as a date for x in 'abcdefghijklmnopqrstuvwxyz': s = Series([x]) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce') assert_series_equal(result, s) s = Series([x.upper()]) with tm.assert_produces_warning(FutureWarning): result = s.convert_objects(convert_dates='coerce') assert_series_equal(result, s)
def test_convert_objects_preserve_all_bool(self): s = Series([False, True, False, False], dtype=object) with tm.assert_produces_warning(FutureWarning): r = s.convert_objects(convert_numeric=True) e = Series([False, True, False, False], dtype=bool) tm.assert_series_equal(r, e)
def test_convert_objects_preserve_bool(self): s = Series([1, True, 3, 5], dtype=object) with tm.assert_produces_warning(FutureWarning): r = s.convert_objects(convert_numeric=True) e = Series([1, 1, 3, 5], dtype='i8') tm.assert_series_equal(r, e)
#split columns of dataframe and make col_n the column indexes temps = pd.DataFrame(list(temps.col.str.split()), columns=col_n[0]) #drop the duplicate column name row temps = temps.drop(temps.index[0]) #this would strip white space, but I think it's unnecessary: temps.apply(lambda x: x.str.strip()) #Change Ms to missing values import numpy as np temps.replace('M', np.nan, inplace=True) #create a column with TX vs TN, change MO so is actually Month temps['Lvl'] = Series(temps['MO']).str[-2:] temps['MO'] = Series(temps['MO']).str[:-2] temps['YRMO'] = Series(temps['YR']+temps['MO']) #make year and month indexes (Q: Added Lvl as well does this shape make sense?) temps = temps.set_index(['YR','MO', 'YRMO','Lvl']) #Q: added in YRMO so can group and plot, but must be a way to do this with the hierarchical indexing temps = temps.stack().unstack(['Lvl']) #adding name to day index temps.head(100) temps.index.names = ['YR','MO', 'YRMO', 'DAY'] #convert TX and TN to numbers temps = temps.convert_objects(convert_numeric=True) #grouping yrmo_grouped = temps.groupby(level=(['YRMO'])).mean() #Q: really don't think that should need YRMO yr_grouped = temps.groupby(level=(['YR'])).mean() #Let's try graphing! import matplotlib.pyplot as plt yrmo_grouped.plot() #this is pandas plot which is a wrapper on plt.plot() yr_grouped.plot() #you can do rolling averages! pd.rolling_sum(temps,1000).plot() #test