def test_series(self, orient, numpy): s = Series([10, 20, 30, 40, 50, 60], name="series", index=[6, 7, 8, 9, 10, 15]).sort_values() encode_kwargs = {} if orient is None else dict(orient=orient) decode_kwargs = {} if numpy is None else dict(numpy=numpy) output = ujson.decode(ujson.encode(s, **encode_kwargs), **decode_kwargs) if orient == "split": dec = _clean_dict(output) output = Series(**dec) else: output = Series(output) if orient in (None, "index"): s.name = None output = output.sort_values() s.index = ["6", "7", "8", "9", "10", "15"] elif orient in ("records", "values"): s.name = None s.index = [0, 1, 2, 3, 4, 5] tm.assert_series_equal(output, s, check_dtype=False)
def test_set_name_attribute(self): s = Series([1, 2, 3]) s2 = Series([1, 2, 3], name="bar") for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), u"\u05D0"]: s.name = name self.assertEqual(s.name, name) s2.name = name self.assertEqual(s2.name, name)
def test_set_name_attribute(self): s = Series([1, 2, 3]) s2 = Series([1, 2, 3], name='bar') for name in [7, 7., 'name', datetime(2001, 1, 1), (1,), "\u05D0"]: s.name = name assert s.name == name s2.name = name assert s2.name == name
def test_metadata_propagation_indiv(self): # check that the metadata matches up on the resulting ops o = Series(range(3),range(3)) o.name = 'foo' o2 = Series(range(3),range(3)) o2.name = 'bar' result = o.T self.check_metadata(o,result)
def test_name_printing(self): # test small series s = Series([0, 1, 2]) s.name = "test" self.assert_("Name: test" in repr(s)) s.name = None self.assert_(not "Name:" in repr(s)) # test big series (diff code path) s = Series(range(0,1000)) s.name = "test" self.assert_("Name: test" in repr(s)) s.name = None self.assert_(not "Name:" in repr(s))
def test_metadata_propagation_indiv(self): # check that the metadata matches up on the resulting ops o = Series(range(3), range(3)) o.name = 'foo' o2 = Series(range(3), range(3)) o2.name = 'bar' result = o.T self.check_metadata(o, result) # resample ts = Series(np.random.rand(1000), index=date_range('20130101', periods=1000, freq='s'), name='foo') result = ts.resample('1T').mean() self.check_metadata(ts, result) result = ts.resample('1T').min() self.check_metadata(ts, result) result = ts.resample('1T').apply(lambda x: x.sum()) self.check_metadata(ts, result) _metadata = Series._metadata _finalize = Series.__finalize__ Series._metadata = ['name', 'filename'] o.filename = 'foo' o2.filename = 'bar' def finalize(self, other, method=None, **kwargs): for name in self._metadata: if method == 'concat' and name == 'filename': value = '+'.join([getattr( o, name) for o in other.objs if getattr(o, name, None) ]) object.__setattr__(self, name, value) else: object.__setattr__(self, name, getattr(other, name, None)) return self Series.__finalize__ = finalize result = pd.concat([o, o2]) assert result.filename == 'foo+bar' assert result.name is None # reset Series._metadata = _metadata Series.__finalize__ = _finalize
def test_constructor(self, datetime_series, empty_series): assert datetime_series.index.is_all_dates # Pass in Series derived = Series(datetime_series) assert derived.index.is_all_dates assert tm.equalContents(derived.index, datetime_series.index) # Ensure new index is not created assert id(datetime_series.index) == id(derived.index) # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) assert mixed.dtype == np.object_ assert mixed[1] is np.NaN assert not empty_series.index.is_all_dates assert not Series({}).index.is_all_dates # exception raised is of type Exception with pytest.raises(Exception, match="Data must be 1-dimensional"): Series(np.random.randn(3, 3), index=np.arange(3)) mixed.name = 'Series' rs = Series(mixed).name xp = 'Series' assert rs == xp # raise on MultiIndex GH4187 m = MultiIndex.from_arrays([[1, 2], [3, 4]]) msg = "initializing a Series from a MultiIndex is not supported" with pytest.raises(NotImplementedError, match=msg): Series(m)
def test_zero_emsd(self): N = 10 actual = tp.emsd(self.dead_still, 1, 1) expected = Series(np.zeros(N)).iloc[1:].astype('float64') expected.index.name = 'lagt' expected.name = 'msd' assert_series_equal(actual, expected)
def ensure1d(x, name, series=False): if isinstance(x, Series): if not isinstance(x.name, str): x.name = str(x.name) if series: return x else: return np.asarray(x) if isinstance(x, DataFrame): if x.shape[1] != 1: raise ValueError(name + ' must be squeezable to 1 dimension') else: x = Series(x[x.columns[0]], x.index) if not isinstance(x.name, str): x.name = str(x.name) if series: return x else: return np.asarray(x) if not isinstance(x, np.ndarray): x = np.asarray(x) if x.ndim == 0: x = x[None] elif x.ndim != 1: x = np.squeeze(x) if x.ndim != 1: raise ValueError(name + ' must be squeezable to 1 dimension') if series: return Series(x, name=name) else: return np.asarray(x)
def test_constructor(self): self.assertTrue(self.ts.index.is_all_dates) # Pass in Series derived = Series(self.ts) self.assertTrue(derived.index.is_all_dates) self.assertTrue(tm.equalContents(derived.index, self.ts.index)) # Ensure new index is not created self.assertEqual(id(self.ts.index), id(derived.index)) # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) self.assertEqual(mixed.dtype, np.object_) self.assertIs(mixed[1], np.NaN) self.assertFalse(self.empty.index.is_all_dates) self.assertFalse(Series({}).index.is_all_dates) self.assertRaises(Exception, Series, np.random.randn(3, 3), index=np.arange(3)) mixed.name = 'Series' rs = Series(mixed).name xp = 'Series' self.assertEqual(rs, xp) # raise on MultiIndex GH4187 m = MultiIndex.from_arrays([[1, 2], [3, 4]]) self.assertRaises(NotImplementedError, Series, m)
def main(): # Series 可以看做一个定长的有序字典。 s1 = Series([1,2,3.0,'abc']) print s1 print s2 = Series(data=[1,3,5,7],index = ['a','b','x','y']) print s2 print s2.index print s2.values s2.name = 'a_series' s2.index.name = 'the_index' print s2 ser = Series([4.5,7.2,-5.3,3.6],index=['d','b','a','c']) # reindex a = ['a','b','c','d','e'] ser_1 = ser.reindex(a) print ser_1 ser_2 = ser.reindex(a, fill_value=0) print ser_2 print # DataFrame 是一个表格型的数据结构,它含有一组有序的列(类似于 index),每列可以是不同的值类型(不像 ndarray 只能有一个 dtype)。 # 基本上可以把 DataFrame 看成是共享同一个 index 的 Series 的集合。 data = {'state':['Ohino','Ohino','Ohino','Nevada','Nevada'], 'year':[2000,2001,2002,2001,2002], 'pop':[1.5,1.7,3.6,2.4,2.9]} df = DataFrame(data) print df df = DataFrame(data, index=['one','two','three','four','five'], columns=['year','state','pop','debt']) print df print df.index print df.columns print type(df['debt']) state = ['Texas','Utha','California'] df1 = df.reindex(columns=state, method='ffill') print df1 print
def test_concat_series_axis1(self): ts = tm.makeTimeSeries() pieces = [ts[:-2], ts[2:], ts[2:-2]] result = concat(pieces, axis=1) expected = DataFrame(pieces).T assert_frame_equal(result, expected) result = concat(pieces, keys=['A', 'B', 'C'], axis=1) expected = DataFrame(pieces, index=['A', 'B', 'C']).T assert_frame_equal(result, expected) # preserve series names, #2489 s = Series(randn(5), name='A') s2 = Series(randn(5), name='B') result = concat([s, s2], axis=1) expected = DataFrame({'A': s, 'B': s2}) assert_frame_equal(result, expected) s2.name = None result = concat([s, s2], axis=1) self.assertTrue(np.array_equal( result.columns, Index(['A', 0], dtype='object'))) # must reindex, #2603 s = Series(randn(3), index=['c', 'a', 'b'], name='A') s2 = Series(randn(4), index=['d', 'a', 'b', 'c'], name='B') result = concat([s, s2], axis=1) expected = DataFrame({'A': s, 'B': s2}) assert_frame_equal(result, expected)
def test_repr_unicode(self): s = Series([u'\u03c3'] * 10) repr(s) a = Series([u"\u05d0"] * 1000) a.name = 'title1' repr(a)
def test_constructor(self): assert self.ts.index.is_all_dates # Pass in Series derived = Series(self.ts) assert derived.index.is_all_dates assert tm.equalContents(derived.index, self.ts.index) # Ensure new index is not created assert id(self.ts.index) == id(derived.index) # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) assert mixed.dtype == np.object_ assert mixed[1] is np.NaN assert not self.empty.index.is_all_dates assert not Series({}).index.is_all_dates pytest.raises(Exception, Series, np.random.randn(3, 3), index=np.arange(3)) mixed.name = 'Series' rs = Series(mixed).name xp = 'Series' assert rs == xp # raise on MultiIndex GH4187 m = MultiIndex.from_arrays([[1, 2], [3, 4]]) pytest.raises(NotImplementedError, Series, m)
def predict_autosequence(config, context, predict_index, fit_model=True, update_column=None): if len(context.train_index & predict_index): logging.warning("Train and predict indices overlap...") x, y = None, None if fit_model: x, y = fit(config, context) logging.debug(x.columns) logging.debug(config.model.coef_) ctx = context.copy() ps = [] for i in predict_index: ctx.data = context.data x = get_x(config, ctx) predict_x = x.reindex([i]) # make actual predictions p = config.model.predict(predict_x.values) if update_column is not None: ctx.data[update_column][i] = p[0] ps.append(p[0]) try: preds = Series(ps, index=predict_index) except: preds = DataFrame(ps, index=predict_index) # prediction post-processing if config.prediction is not None: context.data[config.predictions_name] = preds preds = build_target(config.prediction, context) preds = preds.reindex(predict_index) preds.name = '' return preds, x, y
def make_ref(df, method='mean'): ''' Get a reference spectrum, requried for computing the dynamic spectrum (y-ref). Usually this is this is set to the time-wise mean of the dataset, to 0, or to an external, pre-saved spectrum. This will generate mean or empy reference spectra. External spectra are easy enough to generate. Assumes spectral information is along the index of the dataframe! No attempt to acommadate other styles is made. df: DataFrame with spectral data along index/row axis (=1) and temporal/physical variable along columns (axis=0) Method: Style to generate reference spectrum from dataframe. "mean" - Columnwise-mean of the dataframe "empty" - Fills series with 0.0's to length of spectral index returns: series of length of df.index''' method=method.lower() if method=='mean': refspec=df.mean(axis=1) elif method=='empty': refspec=Series( [0.0 for i in range(len(df.index))], index=df.index) #builtin way to do this? else: raise badvalue_error(method, 'mean, empty') refspec.name='refspec' #Not sure if this will be useful return refspec
def test_label(self): s = Series([1, 2]) _, ax = self.plt.subplots() ax = s.plot(label='LABEL', legend=True, ax=ax) self._check_legend_labels(ax, labels=['LABEL']) self.plt.close() _, ax = self.plt.subplots() ax = s.plot(legend=True, ax=ax) self._check_legend_labels(ax, labels=['None']) self.plt.close() # get name from index s.name = 'NAME' _, ax = self.plt.subplots() ax = s.plot(legend=True, ax=ax) self._check_legend_labels(ax, labels=['NAME']) self.plt.close() # override the default _, ax = self.plt.subplots() ax = s.plot(legend=True, label='LABEL', ax=ax) self._check_legend_labels(ax, labels=['LABEL']) self.plt.close() # Add lebel info, but don't draw _, ax = self.plt.subplots() ax = s.plot(legend=False, label='LABEL', ax=ax) assert ax.get_legend() is None # Hasn't been drawn ax.legend() # draw it self._check_legend_labels(ax, labels=['LABEL'])
def test_name_printing(self): # test small series s = Series([0, 1, 2]) s.name = "test" self.assertIn("Name: test", repr(s)) s.name = None self.assertNotIn("Name:", repr(s)) # test big series (diff code path) s = Series(lrange(0, 1000)) s.name = "test" self.assertIn("Name: test", repr(s)) s.name = None self.assertNotIn("Name:", repr(s)) s = Series(index=date_range('20010101', '20020101'), name='test') self.assertIn("Name: test", repr(s))
def test_repr_unicode(self): s = Series([u"\u03c3"] * 10) repr(s) a = Series([u"\u05d0"] * 1000) a.name = "title1" repr(a)
def test_repr_name_iterable_indexable(self): s = Series([1, 2, 3], name=np.int64(3)) # it works! repr(s) s.name = (u("\u05d0"), ) * 2 repr(s)
def test_zero_emsd(self): N = 10 actual = tp.emsd(self.dead_still, 1, 1) expected = Series(np.zeros(N, dtype=np.float), index=np.arange(N, dtype=np.float)).iloc[1:] expected.index.name = 'lagt' expected.name = 'msd' assert_series_equal(actual, expected)
def add_id_column_to_dframe(self, dframe): encoded_columns_map = self.schema.rename_map_for_dframe(dframe) dframe = dframe.rename(columns=encoded_columns_map) id_column = Series([self.dataset_observation_id] * len(dframe)) id_column.name = DATASET_OBSERVATION_ID return dframe.join(id_column)
def asSeries(df, name='', limit=0): '''Get the time series indexed by day of release.''' if 'Gross' not in df or 'Day #' not in df: print('{} has an empty dataframe'.format(name)) return Series() series = Series(df['Gross']) series.index = df['Day #'] if limit > 0: series = series[:limit] series.name = name return series
def test_describe_empty(self): result = pd.Series().describe() self.assertEqual(result['count'], 0) self.assertTrue(result.drop('count').isnull().all()) nanSeries = Series([np.nan]) nanSeries.name = 'NaN' result = nanSeries.describe() self.assertEqual(result['count'], 0) self.assertTrue(result.drop('count').isnull().all())
def test_only_rows_for_parent_id(self): parent_id = 1 len_parent_rows = len(self.dframe) / 2 column = Series([parent_id] * len_parent_rows) column.name = PARENT_DATASET_ID self.dframe = self.dframe.join(column) dframe_only = rows_for_parent_id(self.dframe, parent_id) self.assertFalse(PARENT_DATASET_ID in dframe_only.columns) self.assertEqual(len(dframe_only), len_parent_rows)
def test_name_printing(self): # Test small Series. s = Series([0, 1, 2]) s.name = "test" assert "Name: test" in repr(s) s.name = None assert "Name:" not in repr(s) # Test big Series (diff code path). s = Series(lrange(0, 1000)) s.name = "test" assert "Name: test" in repr(s) s.name = None assert "Name:" not in repr(s) s = Series(index=date_range('20010101', '20020101'), name='test') assert "Name: test" in repr(s)
def predict(config, context, predict_index, fit_model=True, model_name=None): if len(context.train_index & predict_index): print "WARNING: train and predict indices overlap..." x, y = None, None if model_name: config.model = context.store.load(model_name) if not model_name and fit_model: x, y = fit(config, context) # TODO: possible to have x loaded without new prediction rows if x is None: # rebuild just the necessary x: ctx = context.copy() ctx.data = context.data.ix[predict_index] x = get_x(config, ctx) try: # we may or may not have y's in predict context # we get them if we can for metrics and reporting y = get_y(config, ctx) except KeyError: pass if debug: print x.columns predict_x = x.reindex(predict_index) print "Making predictions... ", # make actual predictions ps = config.model.predict(predict_x.values) try: preds = Series(ps, index=predict_x.index) except: preds = DataFrame(ps, index=predict_x.index) print "[OK]" # prediction post-processing if config.prediction is not None: old = context.data context.data = context.data.reindex(predict_x.index) context.data[config.predictions_name] = preds preds = build_target(config.prediction, context) preds = preds.reindex(predict_x.index) context.data = old preds.name = '' actuals = y.reindex(predict_index) # TODO: handle multi-variate predictions predict_x['predictions'] = preds predict_x['actuals'] = actuals config.update_reporters_with_predictions(context, predict_x, actuals, preds) return predict_x
def test_only_rows_for_parent_id(self): parent_id = 1 len_parent_rows = len(self.bframe) / 2 column = Series([parent_id] * len_parent_rows) column.name = PARENT_DATASET_ID self.bframe = BambooFrame(self.bframe.join(column)) bframe_only = self.bframe.only_rows_for_parent_id(parent_id) self.assertFalse(PARENT_DATASET_ID in bframe_only.columns) self.assertEqual(len(bframe_only), len_parent_rows)
def test_zero_emsd(self): N = 10 actual = tp.emsd(self.dead_still, 1, 1) expected = Series(np.zeros(N, dtype=np.float), index=np.arange(N, dtype=np.float)).iloc[1:] expected.index.name = 'lagt' expected.name = 'msd' # HACK: Float64Index imprecision ruins index equality. # Test them separately. If that works, make them exactly the same. assert_almost_equal(actual.index.values, expected.index.values) actual.index = expected.index assert_series_equal(actual, expected)
def test_linear_emsd(self): A = 1 EARLY = 7 # only early lag times have good stats actual = tp.emsd(self.many_walks, 1, 1, max_lagtime=EARLY) a = np.arange(EARLY, dtype='float64') expected = Series(2*A*a, index=a).iloc[1:] expected.name = 'msd' expected.index.name = 'lag time [s]' # HACK: Float64Index imprecision ruins index equality. # Test them separately. If that works, make them exactly the same. assert_almost_equal(actual.index.values, expected.index.values) actual.index = expected.index assert_series_equal(np.round(actual), expected)
# -*- coding: UTF-8 -*- import tushare as ts import pandas as pd import numpy as np from pandas import DataFrame,Series data = DataFrame(np.arange(16).reshape((4, 4)), index=['Ohio', 'Colorado', 'Utah', 'New York'], columns=['one', 'two', 'three', 'four']) d = [1,2,3,4] se1 = DataFrame(columns=['one', 'two', 'three', 'four'],index = ['fu']) se1.loc['fu'] = [1,2,3,4] se2 = Series({'a':11,'b':11,'c':11}) se2.name = 'fu' data = data.append(se1) df = pd.DataFrame(np.arange(0,60,2).reshape(10,3),columns=list('abc'),index = ['one','two','three','four','five','six','seven','eight','nine','ten']) df2 = pd.DataFrame(np.arange(60,120,2).reshape(10,3),columns=list('abc'),index = ['one','two','three','four','five','six','seven','eight','nine','ten']) df = df.append(df2) df for x in xrange(len(df)): if df.iloc[x,2]%4 == 0: print 'hi','_____',df.index[x],df.ix[df.index[x],'b'],'________',df.index[x],df.ix[df.index[x],1] df.iloc[x,2] = 'hi' df.ix[df.index[x],'d'] = 'hello' df.loc[df.index[x],'e'] = 'congrat' df.iloc[x,0] = 'hehe' # for x in xrange(len(data)): # if data.loc[x,'label'] != 'none' and data.loc[x,'label'] != 'nodate':
example_obj = Series(dict_data, dtype=np.float32, name="example_data") example_obj example_obj["a"] example_obj["a"] = 3.2 example_obj example_obj[example_obj > 2] example_obj * 2 np.exp(example_obj) #np.abs , np.log 지수함수로 만들기 e**0 "b" in example_obj example_obj.to_dict() #from DataFrame to dictionary example_obj.values example_obj.index example_obj.name = "number" example_obj.index.name = "alphabet" example_obj dict_data_1 = {"a":1, "b":2, "c":3, "d":4, "e":5} indexes = ["a","b","c","d","e","f","g","h"] series_obj_1 = Series(dict_data_1, index=indexes) series_obj_1 """ 2. pandas_dataframe """ # Example from - https://chrisalbon.com/python/pandas_map_values_to_values.html raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'], 'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'], 'age': [42, 52, 36, 24, 73], 'city': ['San Francisco', 'Baltimore', 'Miami', 'Douglas', 'Boston']}
print(Obj3) print(type(Obj3)) # Criando uma lista esportes = ['Futebol', 'Tenis', 'Natação', 'Basktetball'] # Criando uma serie e usando uma lista como índice Obj4 = Series(dict, index=esportes) print(Obj4) print(pd.isnull(Obj4)) print(pd.notnull(Obj4)) print(Obj4.isnull()) # Concatenando Series print(Obj3 + Obj4) Obj4.name = 'população' Obj4.index.name = 'esporte' print(Obj4) print("\nDataframes") print("----------") from pandas import DataFrame data = {'Estado': ['Santa Catarina', 'Paraná', 'Goiás', 'Bahia', 'Minas Gerais'], 'Ano': [2002, 2003, 2004, 2005, 2006], 'População': [1.5, 1.7, 3.6, 2.4, 2.9]} frame = DataFrame(data) print(frame) print(type(frame)) print(DataFrame(data, columns=['Ano', 'Estado', 'População'])) frame2 = DataFrame(data, columns = ['Ano', 'Estado', 'População', 'Débito'],
obj.values obj.index obj[[2, 1]] # sequence index obj[['a', 'c']] # defined index obj[obj > 0] # from dictionary sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000} type(sdata) type(sdata['Ohio']) # this is scalar value obj3 = Series( sdata) # convert dictionary to series, key is index, value is value type(obj3) obj3.name = 'population', # change the name for value, like column name obj3.index.name = 'state', # change the name for index, like row name states = ['California', 'Ohio', 'Oregon', 'Texas'] obj4 = Series( sdata, index=states ) # return the partial series based on index, totally matched with index """ #################### operation ######################### """ obj4 + obj3 # add the values based on index """________________________________________________________________ DataFrame """ data = { 'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 'year': [2000, 2001, 2002, 2001, 2002],
def converter(series: pd.Series) -> pd.Series: series.name = name return series
#print revenue print revenue['ola'] print revenue[revenue >= 35] #use boolean conditions print 'lyft' in revenue revenue_dict = revenue.to_dict() print revenue_dict #nan values index_2 = ['ola', 'uber', 'grab', 'gojek', 'lyft'] revenue2 = Series(revenue, index_2) print revenue2 print 'is null' print pd.isnull(revenue2) print 'not null' print pd.notnull(revenue2) #addition of series print revenue + revenue2 #assigning names revenue2.name = "Company revenues" revenue2.index.name = "Company Name" print revenue2
import matplotlib as mpl import matplotlib.pyplot as plt obj = Series([1, -2, 3, -4]) obj2 = Series([1, -2, 3, -4], index=['a', 'b', 'c', 'd']) obj2.values obj2[['a', 'b']] np.abs(obj2) data = {'张三': 92, '李四': 78, '王五': 68, '小明': 82} obj3 = Series(data) names = ['张三', '李四', '王五', '小明'] obj4 = Series(data, index=names) obj4.name = 'math' obj4.index.name = 'student' data = { 'name': ['张三', '李四', '王五', '小明'], 'sex': ['female', 'female', 'male', 'male'], 'year': [2001, 2001, 2003, 2202], 'city': ['北京', '上海', '广州', '北京'] } df = DataFrame(data) df = DataFrame(data, columns=['name', 'sex', 'year', 'city'], index=['a', 'b', 'c', 'd']) obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e']) obj = Series([1, -2, 3, -4], index=[0, 2, 3, 5])
import pandas as pd import numpy as np import os os.system('clear') # Series obj1 = Series([4, 7, -5, 3]) obj2 = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c']) np.exp(obj2) sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000} obj3 = Series(sdata) states = ['California', 'Ohio', 'Oregon', "Texas"] obj4 = Series(sdata, index=states) obj4.name = 'population' obj4.index.name = 'state' obj1.index = ['Bob', 'Steve', 'Jeff', 'Ryan'] # DataFrame data = { 'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 'year': [2000, 2001, 2002, 2001, 2002], 'pop': [1.5, 1.7, 3.6, 2.4, 2.9] } frame = DataFrame(data) frame2 = DataFrame(data, columns=['year', 'state', 'pop', 'debt'], index=['one', 'two', 'three', 'four', 'five'])
# se puede utilizar isnull y notnull para encontrar huecos de informacion pd.isnull(obj2) #obj2.isnull() # al igual para el opuesto pd.notnull(obj2) #obj2.notnull() # desplegar la serie de WW2 nuevamente WW2_Series # checar las serie con el valor de Argentina obj2 # Now we can add and pandas automatically aligns data by index WW2_Series + obj2 # podemos nombrar una serie obj2.name = "World War 2 Casualties" # desplegar obj2 # tambien se puede nombrar los indices obj2.index.name = 'Countries' # desplegar obj2
def test_apply_dictlike_reducer(string_series, ops, how): # GH 39140 expected = Series({name: op(string_series) for name, op in ops.items()}) expected.name = string_series.name result = getattr(string_series, how)(ops) tm.assert_series_equal(result, expected)
def test_tidy_repr(self): a = Series(["\u05d0"] * 1000) a.name = "title1" repr(a) # should not raise exception
import numpy as np import pandas as pd from pandas import Series, DataFrame df1 = pd.DataFrame(np.arange(36).reshape(6, 6)) print(df1) df2 = pd.DataFrame(np.arange(15).reshape(5, 3)) print(df2) df3 = pd.concat([df1, df2], axis=1) print(df3) print(df3.drop([0, 2], axis=1)) series = Series(np.arange(6)) series.name = "added_variable" print(series) variable_added = DataFrame.join(df1, series) print(variable_added) added_datatable = variable_added.append(variable_added, ignore_index=True) print(added_datatable) print(df1.sort_values(by=[5], ascending=[True]))
def add_benchmark(net_values: pd.DataFrame, benchmark: pd.Series): benchmark.name = 'benchmark' net_values = net_values.join(benchmark) net_values[ 'benchmark'] = net_values['benchmark'] / net_values['benchmark'][0] return net_values
obj = Series(sdata) #print(obj) states = ['California', 'Ohio', 'Oregon', 'Texas'] obj = Series(sdata, index=states) #print(obj) #print(pd.isnull(obj)) #print(pd.notnull(obj)) #print(obj.isnull()) obj.name = 'population' obj.index.name = 'state' #print(obj) #obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan'] #print(obj) data = { 'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 'year': [2000, 2001, 2002, 2001, 2002], 'pop': [1.5, 1.7, 3.6, 2.4, 2.6] } frame = DataFrame(data)
list = [1, 2, 3, 4, 5] print(list) print(1 in list) print(10 in list) dict = {'a': 1, 'b': 2, 'c':3} print(dict) print('a' in dict) print('b' in dict) print('d' in dict) obj2 = Series([4, 7, -5, 3], index=['a', 'b', 'c', 'd']) print(obj2) print('a' in obj2) print('b' in obj2) sdata = {'Seoul':9, 'Incheon': 10, 'Busan':10, 'Daejeon':10} obj3 = Series(sdata) print(obj3) cities = ['Busan', 'Daejeon', 'Incheon', 'Seoul'] obj4 = Series(sdata, index=cities) print(obj4) obj4.name = 'weather' obj4.index.name = 'city' print(obj4) obj4.index = ['Daegu', 'Gwangju', 'Sejeong', 'Jeju'] print(obj4)
def base(pge): import pandas as pd import numpy as np from pandas import DataFrame, Series file = pge + '.csv' data = pd.read_csv(file) tcol = 'Date/Time' #time column ecol = 'Electricity:Facility [kW](Hourly)' #total electricity col time = list(data[tcol]) energylist = list(data[ecol]) energy = Series(energylist) energy.name = 'energy' monlist = [m.split(' ')[1].split('/')[0] for m in time] #month daylist = [d.split(' ')[1].split('/')[1] for d in time] month = Series(monlist) month.name = 'month' day = Series(daylist) day.name = 'day' ######## SEASONAL # Summer is from May1 (5/1) to Oct31 (10/31) # Winter is Jan1 (1/1) to Apr30 (4/30) # and Nov 1 (11/1) to Dec 31 (12/31) # denote summer season as 1, winter as 0 seasonlist = [] for m in monlist: if int(m) >= 5 and int(m) <= 10: seasonlist.append(1) #summer else: seasonlist.append(0) #winter #CHARGING q = { 'P': (13.8, 12.3), 'Q': (7, 12.3), 'R': (15.6, 11.0), 'S': (13.8, 11.2), 'T': (7.0, 8.5), 'V': (8.7, 10.6), 'W': (16.8, 10.1), 'X': (10.1, 10.9), 'Y': (10.6, 12.6), 'Z': (6.2, 9.0) } # baseline quantities per territory as of 2016 mincharge = 0.32854 #total daily utility minimum charge tier = [0.18151, 0.21546, 0.27389, 0.34876] #energy rates for different tiers #tarrif levels # s is baseline in summer kWh per day sb = q[pge][0] # set summer baseline s1 = sb * 1.3 # tier 1 max (130% of baseline) s2 = sb * 2 # tier 2 max (200% of baseline) # w is baseline in winter kWh per day wb = q[pge][1] # set winter baseline w1 = wb * 1.3 # tier 1 max w2 = wb * 2 # tier 2 max charge = [] usage = 0 hr = 0 eave = [] #use loop to store daily average energy usage for i in range(len(energylist)): hr += 1 usage = usage + energylist[i] #the algorithm below if hr % 24 == 0: eave.append(usage) if int(seasonlist[i]) == 1: #summer #CASE 1 under baseline if usage <= sb: if usage * tier[0] > mincharge: charge.append(usage * tier[0]) else: charge.append(mincharge) #CASE 2 less than tier 1 more than the base elif usage <= s1: charge.append(sb * tier[0] + (usage - sb) * tier[1]) #CASE 3 less than tier 2 more than tier 1 elif usage <= s2: charge.append(sb * tier[0] + (s1 - sb) * tier[1] + (usage - s1) * tier[2]) #CASE 4 more than tier 2 elif usage > s2: charge.append(sb * tier[0] + (s1 - sb) * tier[1] + (s2 - s1) * tier[2] + (usage - s2) * tier[3]) hr = 0 usage = 0 else: #winter #CASE 1 under baseline if usage <= wb: if usage * tier[0] > mincharge: charge.append(usage * tier[0]) else: charge.append(mincharge) #CASE 2 less than tier 1 more than the base elif usage <= w1: charge.append(wb * tier[0] + (usage - wb) * tier[1]) #CASE 3 less than tier 2 more than tier 1 elif usage <= w2: charge.append(wb * tier[0] + (w1 - sb) * tier[1] + (usage - w1) * tier[2]) #CASE 4 more than tier 2 elif usage > w2: charge.append(wb * tier[0] + (w1 - wb) * tier[1] + (w2 - w1) * tier[2] + (usage - w2) * tier[3]) hr = 0 usage = 0 total = sum(charge) return total
terr = raw_input('Select Territory (T,R,W,X,S)>>> ') file = terr+'.csv' data = pd.read_csv(file) #print data.head() tcol = 'Date/Time' #time column ecol = 'Electricity:Facility [kW](Hourly)' #total electricity col time = list(data[tcol]) timemod = [i.replace('24:00:00','23:59:00') for i in time] timeplot = dates.datestr2num(timemod) energylist = list(data[ecol]) energy = Series(energylist) energy.name = 'energy' monlist = [m.split(' ')[1].split('/')[0] for m in time] daylist = [d.split(' ')[1].split('/')[1] for d in time] hrlist = [hr.split(' ')[3].split(':')[0] for hr in time] ######## DAY OF THE WEEK # Start Jan 1 on a Monday d = '01' dow = 0 dowlist = [] for i in daylist: if d == i: dowlist.append(dow) elif d != i and dow < 6:
obj2[obj2>5] #NumPy数组运算(如根据布尔型数组进行过捷、标量乘法、应用数学函数等)都会保留索引和值之间的链接 obj2*2 np.exp(obj2) 'b' in obj2 #还可以将Series看成是一个定长的有序字典,因为它是索引值到数据值的一个映射。它可以用在许多原本需要字典参数的函数中: sdata = {'Ohio':35000, 'Texas':7100,'Oregon':16000,'tah':5000} obj3=Series(sdata) #通过字典创建 Series pd.isnull(obj3) # pd.notnull(obj3) #pandas的isnull罪IJnotnull 函数可用于检测缺失数据: #对于许多应用而言, Series最重要的一个功能是:它在算术运算中会自动对齐不同索引的数据。 #Series对象本身及其索引都有一个name属性,该属性跟pandas其他的关键功能关系非常密切: obj3.name='population' obj3.index.name='state' obj3 # In[637]: #DataFrame 是一个表格型的数据结构,它含有一组有序的列,每列可以是不同的值类型(数值、字符E辑、布尔1在等) D ataF ram e既有行索引也有安IJ 索引,它可以被看做由Series组成的字典(共用同一个索引)。 #构建方法 最常用的一种是直接传入一个等长列表或NumPy数组组成的字典: data= {'state':['Ohio','Ohio','Ohio','Nevada','Nevada'], 'year':[2000,2001,2002,2001,2002], 'pop':[1.5,1.7,3.6,2.4,2.9]} DataFrame(data) #结果会自动加上索引
print(obj2[2]) print(obj2[2:4]) print(obj2[[2, 1]]) print(obj2 > 0) print('a' in obj2) print('k' in obj2) print('dict --------------------') names = {'mouse': 5000, 'keyboard': 35000, 'monitor': 550000} print(names, type(names)) obj3 = Series(names) print(obj3) obj3.index = ['마우스', '키보드', '모니터'] # 색인 이름 바꾸기 print(obj3) obj3.name = '상품가격' #series에 이름을 줄수도 있다. print(obj3) print('-----DataFrame ---------') from pandas import DataFrame df = DataFrame(obj3) print(df, type(df)) print() data = { 'irum': ['홍길동', '신선해', '공기밥', '한송이', '신기해'], 'juso': ('역삼동', '신길동', '역삼동', '역삼동', '서초동'), 'nai': (23, 25, 33, 20, 26) } print(data, type(data))
obj4 #pandas的isnull和notnull可以检测缺失数据 pd.isnull(obj4) #返回布尔型 pd.notnull(obj4) #series的isnull obj4.isnull() #series会自动对齐不同索引的数据 obj3 obj4 obj3 + obj4 #series对象本身和索引都有一个“name”属性,类似于标签 obj4.name = 'population' #设置series对象本身的“name”属性 obj4.index.name = 'state' obj4 #通过赋值的方式就地修改series的索引 obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan'] obj #dataframe的常见创建方式 data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], #等长列的字典 'year': [2000, 2001, 2002, 2001, 2002], 'pop': [1.5, 1.7, 3.6, 2.4, 2.9]} frame = DataFrame(data) #未指定索引则自动生成编号(0开始),列索引对应字典的键 frame DataFrame(data, columns=['year', 'state', 'pop']) #指定列索引
aSer = pd.Series([1, 2.0, 'a']) print(aSer) bSer = pd.Series(['apple', 'peach', 'lemon'], index=[1, 2, 3]) print(bSer) print(bSer.index) print(bSer.values) aSer = Series([3, 5, 7], index=['a', 'b', 'c']) print(aSer['b']) print(aSer * 2) import numpy as np print(np.exp(aSer)) # Series 数据对齐 data = {'AXP': '86.40', 'CSCO': '122.64', 'BA': '99.44'} sindax = ['AXP', 'CSCO', 'BA', 'AAPL'] aSer = pd.Series(data, index=sindax) print(aSer) print(pd.isnull(aSer)) bSer = {'AXP': '86.40', 'CSCO': '122.64', 'CVX': '23.78'} cSer = pd.Series(bSer) print(aSer + cSer) aSer.name = 'cnames' aSer.index.name = 'volume' print(aSer)
def qqe(close, length=None, smooth=None, factor=None, mamode=None, drift=None, offset=None, **kwargs): """Indicator: Quantitative Qualitative Estimation (QQE)""" # Validate arguments length = int(length) if length and length > 0 else 14 smooth = int(smooth) if smooth and smooth > 0 else 5 factor = float(factor) if factor else 4.236 wilders_length = 2 * length - 1 mamode = mamode if isinstance(mamode, str) else "ema" close = verify_series(close, max(length, smooth, wilders_length)) drift = get_drift(drift) offset = get_offset(offset) if close is None: return # Calculate Result rsi_ = rsi(close, length) _mode = mamode.lower()[0] if mamode != "ema" else "" rsi_ma = ma(mamode, rsi_, length=smooth) # RSI MA True Range rsi_ma_tr = rsi_ma.diff(drift).abs() # Double Smooth the RSI MA True Range using Wilder's Length with a default # width of 4.236. smoothed_rsi_tr_ma = ma("ema", rsi_ma_tr, length=wilders_length) dar = factor * ma("ema", smoothed_rsi_tr_ma, length=wilders_length) # Create the Upper and Lower Bands around RSI MA. upperband = rsi_ma + dar lowerband = rsi_ma - dar m = close.size long = Series(0, index=close.index) short = Series(0, index=close.index) trend = Series(1, index=close.index) qqe = Series(rsi_ma.iloc[0], index=close.index) qqe_long = Series(npNaN, index=close.index) qqe_short = Series(npNaN, index=close.index) for i in range(1, m): c_rsi, p_rsi = rsi_ma.iloc[i], rsi_ma.iloc[i - 1] c_long, p_long = long.iloc[i - 1], long.iloc[i - 2] c_short, p_short = short.iloc[i - 1], short.iloc[i - 2] # Long Line if p_rsi > c_long and c_rsi > c_long: long.iloc[i] = npMaximum(c_long, lowerband.iloc[i]) else: long.iloc[i] = lowerband.iloc[i] # Short Line if p_rsi < c_short and c_rsi < c_short: short.iloc[i] = npMinimum(c_short, upperband.iloc[i]) else: short.iloc[i] = upperband.iloc[i] # Trend & QQE Calculation # Long: Current RSI_MA value Crosses the Prior Short Line Value # Short: Current RSI_MA Crosses the Prior Long Line Value if (c_rsi > c_short and p_rsi < p_short) or (c_rsi <= c_short and p_rsi >= p_short): trend.iloc[i] = 1 qqe.iloc[i] = qqe_long.iloc[i] = long.iloc[i] elif (c_rsi > c_long and p_rsi < p_long) or (c_rsi <= c_long and p_rsi >= p_long): trend.iloc[i] = -1 qqe.iloc[i] = qqe_short.iloc[i] = short.iloc[i] else: trend.iloc[i] = trend.iloc[i - 1] if trend.iloc[i] == 1: qqe.iloc[i] = qqe_long.iloc[i] = long.iloc[i] else: qqe.iloc[i] = qqe_short.iloc[i] = short.iloc[i] # Offset if offset != 0: rsi_ma = rsi_ma.shift(offset) qqe = qqe.shift(offset) long = long.shift(offset) short = short.shift(offset) # Handle fills if "fillna" in kwargs: rsi_ma.fillna(kwargs["fillna"], inplace=True) qqe.fillna(kwargs["fillna"], inplace=True) qqe_long.fillna(kwargs["fillna"], inplace=True) qqe_short.fillna(kwargs["fillna"], inplace=True) if "fill_method" in kwargs: rsi_ma.fillna(method=kwargs["fill_method"], inplace=True) qqe.fillna(method=kwargs["fill_method"], inplace=True) qqe_long.fillna(method=kwargs["fill_method"], inplace=True) qqe_short.fillna(method=kwargs["fill_method"], inplace=True) # Name and Categorize it _props = f"{_mode}_{length}_{smooth}_{factor}" qqe.name = f"QQE{_props}" rsi_ma.name = f"QQE{_props}_RSI{_mode.upper()}MA" qqe_long.name = f"QQEl{_props}" qqe_short.name = f"QQEs{_props}" qqe.category = rsi_ma.category = "momentum" qqe_long.category = qqe_short.category = qqe.category # Prepare DataFrame to return data = { qqe.name: qqe, rsi_ma.name: rsi_ma, # long.name: long, short.name: short qqe_long.name: qqe_long, qqe_short.name: qqe_short } df = DataFrame(data) df.name = f"QQE{_props}" df.category = qqe.category return df
def ebsw(close, length=None, bars=None, offset=None, **kwargs): """Indicator: Even Better SineWave (EBSW)""" # Validate arguments length = int(length) if length and length > 38 else 40 bars = int(bars) if bars and bars > 0 else 10 close = verify_series(close, length) offset = get_offset(offset) if close is None: return # variables alpha1 = HP = 0 # alpha and HighPass a1 = b1 = c1 = c2 = c3 = 0 Filt = Pwr = Wave = 0 lastClose = lastHP = 0 FilterHist = [0, 0] # Filter history # Calculate Result m = close.size result = [npNaN for _ in range(0, length - 1)] + [0] for i in range(length, m): # HighPass filter cyclic components whose periods are shorter than Duration input alpha1 = (1 - npSin(360 / length)) / npCos(360 / length) HP = 0.5 * (1 + alpha1) * (close[i] - lastClose) + alpha1 * lastHP # Smooth with a Super Smoother Filter from equation 3-3 a1 = npExp(-npSqrt(2) * npPi / bars) b1 = 2 * a1 * npCos(npSqrt(2) * 180 / bars) c2 = b1 c3 = -1 * a1 * a1 c1 = 1 - c2 - c3 Filt = c1 * (HP + lastHP) / 2 + c2 * FilterHist[1] + c3 * FilterHist[0] # Filt = float("{:.8f}".format(float(Filt))) # to fix for small scientific notations, the big ones fail # 3 Bar average of Wave amplitude and power Wave = (Filt + FilterHist[1] + FilterHist[0]) / 3 Pwr = (Filt * Filt + FilterHist[1] * FilterHist[1] + FilterHist[0] * FilterHist[0]) / 3 # Normalize the Average Wave to Square Root of the Average Power Wave = Wave / npSqrt(Pwr) # update storage, result FilterHist.append(Filt) # append new Filt value FilterHist.pop( 0) # remove first element of list (left) -> updating/trim lastHP = HP lastClose = close[i] result.append(Wave) ebsw = Series(result, index=close.index) # Offset if offset != 0: ebsw = ebsw.shift(offset) # Handle fills if "fillna" in kwargs: ebsw.fillna(kwargs["fillna"], inplace=True) if "fill_method" in kwargs: ebsw.fillna(method=kwargs["fill_method"], inplace=True) # Name and Categorize it ebsw.name = f"EBSW_{length}_{bars}" ebsw.category = "cycles" return ebsw
# numpy array operations are still valid for Series print np.exp(obj2) # Series canalso be considered as a fixed length, ordered list print 'b' in obj2 # pandas has _isnull_ and _notnull_ to detect the missing data print pd.isnull(obj4) # or obj4.isnull() # Series can automatically align differently-indexed data in arithmetic # operations print obj3 + obj4 # Both the Series object itself and its index have a _name_attribute obj4.name = 'Population' obj4.index.name = 'State' ## A _DataFrame_ represents a tabular, spreadsheet-like data structure ## containing an ordered collection of columns, each can be a different ## value type. ## A DataFrame has both a row and column _index_. # Initialization from a dict of equal-length lists or numpy arrays data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 'year': [2000, 2001, 2002, 2001, 2002], 'pop': [1.5, 1.7, 3.6, 2.4, 2.9]} frame = DataFrame(data) # Initialization from a nested dict of dicts format pop = {'Nevada':{2001:2.4, 2002:2.9},
def test_concat_mixed_objs(self): # concat mixed series/frames # G2385 # axis 1 index = date_range("01-Jan-2013", periods=10, freq="H") arr = np.arange(10, dtype="int64") s1 = Series(arr, index=index) s2 = Series(arr, index=index) df = DataFrame(arr.reshape(-1, 1), index=index) expected = DataFrame(np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 0]) result = concat([df, df], axis=1) tm.assert_frame_equal(result, expected) expected = DataFrame(np.repeat(arr, 2).reshape(-1, 2), index=index, columns=[0, 1]) result = concat([s1, s2], axis=1) tm.assert_frame_equal(result, expected) expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2]) result = concat([s1, s2, s1], axis=1) tm.assert_frame_equal(result, expected) expected = DataFrame(np.repeat(arr, 5).reshape(-1, 5), index=index, columns=[0, 0, 1, 2, 3]) result = concat([s1, df, s2, s2, s1], axis=1) tm.assert_frame_equal(result, expected) # with names s1.name = "foo" expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, 0]) result = concat([s1, df, s2], axis=1) tm.assert_frame_equal(result, expected) s2.name = "bar" expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3), index=index, columns=["foo", 0, "bar"]) result = concat([s1, df, s2], axis=1) tm.assert_frame_equal(result, expected) # ignore index expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3), index=index, columns=[0, 1, 2]) result = concat([s1, df, s2], axis=1, ignore_index=True) tm.assert_frame_equal(result, expected) # axis 0 expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), index=index.tolist() * 3, columns=[0]) result = concat([s1, df, s2]) tm.assert_frame_equal(result, expected) expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0]) result = concat([s1, df, s2], ignore_index=True) tm.assert_frame_equal(result, expected)
obj = Series([4, 7, -5, 3]) obj.values obj.index obj2 = Series([4, 7, -5, 3], index=['a', 'b', 'c', 'd']) obj2 obj2['b'] obj2[1] sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000} obj4 = Series(sdata) States = ['California', 'Ohio', 'Oregon', 'Texas'] obj3 = Series(sdata, index=States) obj3 pd.isnull(obj3) pd.notnull(obj3) obj3 + obj4 obj3.name = 'Population' obj3.index.name = 'State' obj3 obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan'] obj data = { 'State': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'], 'year': [2000, 2001, 2002, 2001, 2002], 'pop': [1.5, 1.7, 3.6, 2.4, 2.9] } frame = DataFrame(data) frame DataFrame(data, columns=['year', 'State', 'pop']) frame2 = DataFrame(data, columns=['year', 'State', 'pop', 'debt'],
# casualties price cas = Series([870000, 430000, 300000, 210000, 400000], index=['USSR', 'Germany', 'China', 'Japan', 'USA']) print(cas) print(cas['USA']) # check witch countries had cas greater than 4 mill print(cas[cas > 400000]) # check if a index or value as in a series print('USSR' in cas) # into dictionary cas_dict = cas.to_dict() print(cas_dict) cas2 = Series(cas_dict) countries = ['China', 'Germany', 'Japan', 'USA', 'USSR', 'Argentina'] obj2 = Series(cas_dict, index=countries) print(obj2, 'obj2') print(pd.isnull(obj2)) print(pd.notnull(obj2)) print(cas2 + obj2) # you can give name series obj2.name = "Casualties" obj2.index.name = 'Countries'
mySeries2[mySeries2 > 0] # equivalent to which() in R myDoubled = mySeries * 2 # perform functions on your arrays np.exp(myDoubled) # Series indices can be thought of like dicts, but are immutable 'b' in mySeries2 'e' in mySeries2 # We can even turn dicts into series eVotes = {'NC': 15, 'TX': 38, 'CA': 55} # sorted by key ev = Series(eVotes) states = ['CA', 'NC', 'TX', 'OH'] ev2 = Series(ev, states) # Series have a name, as do their indices ev2.name = "electoral votes" ev2.index.name = "state" print ev2 # DataFrames are fun too data = pd.read_csv( "https://raw.githubusercontent.com/mcdickenson/python-washu-2014/master/day9/lab9.csv" ) print data.head() data['State'] # change column names for convenience data.columns = ['st', 'yr', 'ev', 'pop'] # we can create new columns data['popm'] = data['pop'] / 1000000.0
def compute_up(t, s, **kwargs): s2 = Series(s.unique()) s2.name = s.name return s2
def test_tidy_repr(self): a = Series([u("\u05d0")] * 1000) a.name = 'title1' repr(a) # should not raise exception