Example #1
0
    def test_series(self, orient, numpy):
        s = Series([10, 20, 30, 40, 50, 60], name="series",
                   index=[6, 7, 8, 9, 10, 15]).sort_values()

        encode_kwargs = {} if orient is None else dict(orient=orient)
        decode_kwargs = {} if numpy is None else dict(numpy=numpy)

        output = ujson.decode(ujson.encode(s, **encode_kwargs),
                              **decode_kwargs)

        if orient == "split":
            dec = _clean_dict(output)
            output = Series(**dec)
        else:
            output = Series(output)

        if orient in (None, "index"):
            s.name = None
            output = output.sort_values()
            s.index = ["6", "7", "8", "9", "10", "15"]
        elif orient in ("records", "values"):
            s.name = None
            s.index = [0, 1, 2, 3, 4, 5]

        tm.assert_series_equal(output, s, check_dtype=False)
Example #2
0
 def test_set_name_attribute(self):
     s = Series([1, 2, 3])
     s2 = Series([1, 2, 3], name="bar")
     for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), u"\u05D0"]:
         s.name = name
         self.assertEqual(s.name, name)
         s2.name = name
         self.assertEqual(s2.name, name)
Example #3
0
 def test_set_name_attribute(self):
     s = Series([1, 2, 3])
     s2 = Series([1, 2, 3], name='bar')
     for name in [7, 7., 'name', datetime(2001, 1, 1), (1,), "\u05D0"]:
         s.name = name
         assert s.name == name
         s2.name = name
         assert s2.name == name
Example #4
0
    def test_metadata_propagation_indiv(self):
        # check that the metadata matches up on the resulting ops

        o = Series(range(3),range(3))
        o.name = 'foo'
        o2 = Series(range(3),range(3))
        o2.name = 'bar'

        result = o.T
        self.check_metadata(o,result)
Example #5
0
 def test_name_printing(self):
     # test small series
     s = Series([0, 1, 2])
     s.name = "test"
     self.assert_("Name: test" in repr(s))
     s.name = None
     self.assert_(not "Name:" in repr(s))
     # test big series (diff code path)
     s = Series(range(0,1000))
     s.name = "test"
     self.assert_("Name: test" in repr(s))
     s.name = None
     self.assert_(not "Name:" in repr(s))
Example #6
0
    def test_metadata_propagation_indiv(self):
        # check that the metadata matches up on the resulting ops

        o = Series(range(3), range(3))
        o.name = 'foo'
        o2 = Series(range(3), range(3))
        o2.name = 'bar'

        result = o.T
        self.check_metadata(o, result)

        # resample
        ts = Series(np.random.rand(1000),
                    index=date_range('20130101', periods=1000, freq='s'),
                    name='foo')
        result = ts.resample('1T').mean()
        self.check_metadata(ts, result)

        result = ts.resample('1T').min()
        self.check_metadata(ts, result)

        result = ts.resample('1T').apply(lambda x: x.sum())
        self.check_metadata(ts, result)

        _metadata = Series._metadata
        _finalize = Series.__finalize__
        Series._metadata = ['name', 'filename']
        o.filename = 'foo'
        o2.filename = 'bar'

        def finalize(self, other, method=None, **kwargs):
            for name in self._metadata:
                if method == 'concat' and name == 'filename':
                    value = '+'.join([getattr(
                        o, name) for o in other.objs if getattr(o, name, None)
                    ])
                    object.__setattr__(self, name, value)
                else:
                    object.__setattr__(self, name, getattr(other, name, None))

            return self

        Series.__finalize__ = finalize

        result = pd.concat([o, o2])
        assert result.filename == 'foo+bar'
        assert result.name is None

        # reset
        Series._metadata = _metadata
        Series.__finalize__ = _finalize
Example #7
0
    def test_constructor(self, datetime_series, empty_series):
        assert datetime_series.index.is_all_dates

        # Pass in Series
        derived = Series(datetime_series)
        assert derived.index.is_all_dates

        assert tm.equalContents(derived.index, datetime_series.index)
        # Ensure new index is not created
        assert id(datetime_series.index) == id(derived.index)

        # Mixed type Series
        mixed = Series(['hello', np.NaN], index=[0, 1])
        assert mixed.dtype == np.object_
        assert mixed[1] is np.NaN

        assert not empty_series.index.is_all_dates
        assert not Series({}).index.is_all_dates

        # exception raised is of type Exception
        with pytest.raises(Exception, match="Data must be 1-dimensional"):
            Series(np.random.randn(3, 3), index=np.arange(3))

        mixed.name = 'Series'
        rs = Series(mixed).name
        xp = 'Series'
        assert rs == xp

        # raise on MultiIndex GH4187
        m = MultiIndex.from_arrays([[1, 2], [3, 4]])
        msg = "initializing a Series from a MultiIndex is not supported"
        with pytest.raises(NotImplementedError, match=msg):
            Series(m)
Example #8
0
 def test_zero_emsd(self):
     N = 10
     actual = tp.emsd(self.dead_still, 1, 1)
     expected = Series(np.zeros(N)).iloc[1:].astype('float64')
     expected.index.name = 'lagt'
     expected.name = 'msd'
     assert_series_equal(actual, expected)
Example #9
0
def ensure1d(x, name, series=False):
    if isinstance(x, Series):
        if not isinstance(x.name, str):
            x.name = str(x.name)
        if series:
            return x
        else:
            return np.asarray(x)

    if isinstance(x, DataFrame):
        if x.shape[1] != 1:
            raise ValueError(name + ' must be squeezable to 1 dimension')
        else:
            x = Series(x[x.columns[0]], x.index)
            if not isinstance(x.name, str):
                x.name = str(x.name)
        if series:
            return x
        else:
            return np.asarray(x)

    if not isinstance(x, np.ndarray):
        x = np.asarray(x)
    if x.ndim == 0:
        x = x[None]
    elif x.ndim != 1:
        x = np.squeeze(x)
        if x.ndim != 1:
            raise ValueError(name + ' must be squeezable to 1 dimension')

    if series:
        return Series(x, name=name)
    else:
        return np.asarray(x)
Example #10
0
    def test_constructor(self):
        self.assertTrue(self.ts.index.is_all_dates)

        # Pass in Series
        derived = Series(self.ts)
        self.assertTrue(derived.index.is_all_dates)

        self.assertTrue(tm.equalContents(derived.index, self.ts.index))
        # Ensure new index is not created
        self.assertEqual(id(self.ts.index), id(derived.index))

        # Mixed type Series
        mixed = Series(['hello', np.NaN], index=[0, 1])
        self.assertEqual(mixed.dtype, np.object_)
        self.assertIs(mixed[1], np.NaN)

        self.assertFalse(self.empty.index.is_all_dates)
        self.assertFalse(Series({}).index.is_all_dates)
        self.assertRaises(Exception, Series, np.random.randn(3, 3),
                          index=np.arange(3))

        mixed.name = 'Series'
        rs = Series(mixed).name
        xp = 'Series'
        self.assertEqual(rs, xp)

        # raise on MultiIndex GH4187
        m = MultiIndex.from_arrays([[1, 2], [3, 4]])
        self.assertRaises(NotImplementedError, Series, m)
def main():
	# Series 可以看做一个定长的有序字典。
	s1 = Series([1,2,3.0,'abc'])
	print s1
	print
	s2 = Series(data=[1,3,5,7],index = ['a','b','x','y'])
	print s2
	print s2.index
	print s2.values
	s2.name = 'a_series'
	s2.index.name = 'the_index'
	print s2
	ser = Series([4.5,7.2,-5.3,3.6],index=['d','b','a','c'])
	# reindex
	a = ['a','b','c','d','e']
	ser_1 = ser.reindex(a)
	print ser_1
	ser_2 = ser.reindex(a, fill_value=0)
	print ser_2
	print
	# DataFrame 是一个表格型的数据结构,它含有一组有序的列(类似于 index),每列可以是不同的值类型(不像 ndarray 只能有一个 dtype)。
	# 基本上可以把 DataFrame 看成是共享同一个 index 的 Series 的集合。
	data = {'state':['Ohino','Ohino','Ohino','Nevada','Nevada'], 'year':[2000,2001,2002,2001,2002], 'pop':[1.5,1.7,3.6,2.4,2.9]}
	df = DataFrame(data)
	print df
	df = DataFrame(data, index=['one','two','three','four','five'], columns=['year','state','pop','debt'])
	print df
	print df.index
	print df.columns
	print type(df['debt'])
	state = ['Texas','Utha','California']
	df1 = df.reindex(columns=state, method='ffill')
	print df1
	print
Example #12
0
    def test_concat_series_axis1(self):
        ts = tm.makeTimeSeries()

        pieces = [ts[:-2], ts[2:], ts[2:-2]]

        result = concat(pieces, axis=1)
        expected = DataFrame(pieces).T
        assert_frame_equal(result, expected)

        result = concat(pieces, keys=['A', 'B', 'C'], axis=1)
        expected = DataFrame(pieces, index=['A', 'B', 'C']).T
        assert_frame_equal(result, expected)

        # preserve series names, #2489
        s = Series(randn(5), name='A')
        s2 = Series(randn(5), name='B')

        result = concat([s, s2], axis=1)
        expected = DataFrame({'A': s, 'B': s2})
        assert_frame_equal(result, expected)

        s2.name = None
        result = concat([s, s2], axis=1)
        self.assertTrue(np.array_equal(
            result.columns, Index(['A', 0], dtype='object')))

        # must reindex, #2603
        s = Series(randn(3), index=['c', 'a', 'b'], name='A')
        s2 = Series(randn(4), index=['d', 'a', 'b', 'c'], name='B')
        result = concat([s, s2], axis=1)
        expected = DataFrame({'A': s, 'B': s2})
        assert_frame_equal(result, expected)
Example #13
0
    def test_repr_unicode(self):
        s = Series([u'\u03c3'] * 10)
        repr(s)

        a = Series([u"\u05d0"] * 1000)
        a.name = 'title1'
        repr(a)
Example #14
0
    def test_constructor(self):
        assert self.ts.index.is_all_dates

        # Pass in Series
        derived = Series(self.ts)
        assert derived.index.is_all_dates

        assert tm.equalContents(derived.index, self.ts.index)
        # Ensure new index is not created
        assert id(self.ts.index) == id(derived.index)

        # Mixed type Series
        mixed = Series(['hello', np.NaN], index=[0, 1])
        assert mixed.dtype == np.object_
        assert mixed[1] is np.NaN

        assert not self.empty.index.is_all_dates
        assert not Series({}).index.is_all_dates
        pytest.raises(Exception, Series, np.random.randn(3, 3),
                      index=np.arange(3))

        mixed.name = 'Series'
        rs = Series(mixed).name
        xp = 'Series'
        assert rs == xp

        # raise on MultiIndex GH4187
        m = MultiIndex.from_arrays([[1, 2], [3, 4]])
        pytest.raises(NotImplementedError, Series, m)
Example #15
0
File: models.py Project: Afey/ramp
def predict_autosequence(config, context, predict_index, fit_model=True, update_column=None):
    if len(context.train_index & predict_index):
        logging.warning("Train and predict indices overlap...")
    
    x, y = None, None
    
    if fit_model:
        x, y = fit(config, context)
    
    logging.debug(x.columns)
    logging.debug(config.model.coef_)
    
    ctx = context.copy()
    ps = []
    for i in predict_index:
        ctx.data = context.data
        x = get_x(config, ctx)
        predict_x = x.reindex([i])
    
        # make actual predictions
        p = config.model.predict(predict_x.values)
        if update_column is not None:
            ctx.data[update_column][i] = p[0]
        ps.append(p[0])
    try:
        preds = Series(ps, index=predict_index)
    except:
        preds = DataFrame(ps, index=predict_index)
    # prediction post-processing
    if config.prediction is not None:
        context.data[config.predictions_name] = preds
        preds = build_target(config.prediction, context)
        preds = preds.reindex(predict_index)
    preds.name = ''
    return preds, x, y
Example #16
0
def make_ref(df, method='mean'):
    ''' Get a reference spectrum, requried for computing the dynamic spectrum (y-ref).  Usually this is this is set
    to the time-wise mean of the dataset, to 0, or to an external, pre-saved spectrum.  This will generate mean or empy
    reference spectra.  External spectra are easy enough to generate.
    
    Assumes spectral information is along the index of the dataframe!  No attempt to acommadate other styles is made.
    
    df: DataFrame with spectral data along index/row axis (=1) and temporal/physical variable along columns (axis=0)
    
    Method: Style to generate reference spectrum from dataframe.
       "mean" - Columnwise-mean of the dataframe
       "empty" - Fills series with 0.0's to length of spectral index
    
    returns: series of length of df.index'''

    method=method.lower()
    
    if method=='mean':
        refspec=df.mean(axis=1)

    elif method=='empty':
        refspec=Series( [0.0 for i in range(len(df.index))], index=df.index)  #builtin way to do this?
               
        
    else:
        raise badvalue_error(method, 'mean, empty')
        
    refspec.name='refspec' #Not sure if this will be useful
    return refspec
Example #17
0
 def test_label(self):
     s = Series([1, 2])
     _, ax = self.plt.subplots()
     ax = s.plot(label='LABEL', legend=True, ax=ax)
     self._check_legend_labels(ax, labels=['LABEL'])
     self.plt.close()
     _, ax = self.plt.subplots()
     ax = s.plot(legend=True, ax=ax)
     self._check_legend_labels(ax, labels=['None'])
     self.plt.close()
     # get name from index
     s.name = 'NAME'
     _, ax = self.plt.subplots()
     ax = s.plot(legend=True, ax=ax)
     self._check_legend_labels(ax, labels=['NAME'])
     self.plt.close()
     # override the default
     _, ax = self.plt.subplots()
     ax = s.plot(legend=True, label='LABEL', ax=ax)
     self._check_legend_labels(ax, labels=['LABEL'])
     self.plt.close()
     # Add lebel info, but don't draw
     _, ax = self.plt.subplots()
     ax = s.plot(legend=False, label='LABEL', ax=ax)
     assert ax.get_legend() is None  # Hasn't been drawn
     ax.legend()  # draw it
     self._check_legend_labels(ax, labels=['LABEL'])
Example #18
0
    def test_name_printing(self):
        # test small series
        s = Series([0, 1, 2])
        s.name = "test"
        self.assertIn("Name: test", repr(s))
        s.name = None
        self.assertNotIn("Name:", repr(s))
        # test big series (diff code path)
        s = Series(lrange(0, 1000))
        s.name = "test"
        self.assertIn("Name: test", repr(s))
        s.name = None
        self.assertNotIn("Name:", repr(s))

        s = Series(index=date_range('20010101', '20020101'), name='test')
        self.assertIn("Name: test", repr(s))
Example #19
0
    def test_repr_unicode(self):
        s = Series([u"\u03c3"] * 10)
        repr(s)

        a = Series([u"\u05d0"] * 1000)
        a.name = "title1"
        repr(a)
Example #20
0
    def test_repr_name_iterable_indexable(self):
        s = Series([1, 2, 3], name=np.int64(3))

        # it works!
        repr(s)

        s.name = (u("\u05d0"), ) * 2
        repr(s)
Example #21
0
 def test_zero_emsd(self):
     N = 10
     actual = tp.emsd(self.dead_still, 1, 1)
     expected = Series(np.zeros(N, dtype=np.float),
                       index=np.arange(N, dtype=np.float)).iloc[1:]
     expected.index.name = 'lagt'
     expected.name = 'msd'
     assert_series_equal(actual, expected)
Example #22
0
    def add_id_column_to_dframe(self, dframe):
        encoded_columns_map = self.schema.rename_map_for_dframe(dframe)

        dframe = dframe.rename(columns=encoded_columns_map)

        id_column = Series([self.dataset_observation_id] * len(dframe))
        id_column.name = DATASET_OBSERVATION_ID

        return dframe.join(id_column)
Example #23
0
def asSeries(df, name='', limit=0):
    '''Get the time series indexed by day of release.'''
    if 'Gross' not in df or 'Day #' not in df:
        print('{} has an empty dataframe'.format(name))
        return Series()
    series = Series(df['Gross'])
    series.index = df['Day #']
    if limit > 0:
        series = series[:limit]
    series.name = name
    return series
Example #24
0
    def test_describe_empty(self):
        result = pd.Series().describe()

        self.assertEqual(result['count'], 0)
        self.assertTrue(result.drop('count').isnull().all())

        nanSeries = Series([np.nan])
        nanSeries.name = 'NaN'
        result = nanSeries.describe()
        self.assertEqual(result['count'], 0)
        self.assertTrue(result.drop('count').isnull().all())
Example #25
0
    def test_only_rows_for_parent_id(self):
        parent_id = 1
        len_parent_rows = len(self.dframe) / 2

        column = Series([parent_id] * len_parent_rows)
        column.name = PARENT_DATASET_ID

        self.dframe = self.dframe.join(column)
        dframe_only = rows_for_parent_id(self.dframe, parent_id)

        self.assertFalse(PARENT_DATASET_ID in dframe_only.columns)
        self.assertEqual(len(dframe_only), len_parent_rows)
Example #26
0
    def test_name_printing(self):
        # Test small Series.
        s = Series([0, 1, 2])

        s.name = "test"
        assert "Name: test" in repr(s)

        s.name = None
        assert "Name:" not in repr(s)

        # Test big Series (diff code path).
        s = Series(lrange(0, 1000))

        s.name = "test"
        assert "Name: test" in repr(s)

        s.name = None
        assert "Name:" not in repr(s)

        s = Series(index=date_range('20010101', '20020101'), name='test')
        assert "Name: test" in repr(s)
Example #27
0
def predict(config, context, predict_index, fit_model=True, model_name=None):
    if len(context.train_index & predict_index):
        print "WARNING: train and predict indices overlap..."

    x, y = None, None

    if model_name:
        config.model = context.store.load(model_name)

    if not model_name and fit_model:
        x, y = fit(config, context)

    # TODO: possible to have x loaded without new prediction rows
    if x is None:
        # rebuild just the necessary x:
        ctx = context.copy()
        ctx.data = context.data.ix[predict_index]
        x = get_x(config, ctx)
        try:
            # we may or may not have y's in predict context
            # we get them if we can for metrics and reporting
            y = get_y(config, ctx)
        except KeyError:
            pass

    if debug:
        print x.columns

    predict_x = x.reindex(predict_index)

    print "Making predictions... ",
    # make actual predictions
    ps = config.model.predict(predict_x.values)
    try:
        preds = Series(ps, index=predict_x.index)
    except:
        preds = DataFrame(ps, index=predict_x.index)
    print "[OK]"
    # prediction post-processing
    if config.prediction is not None:
        old = context.data
        context.data = context.data.reindex(predict_x.index)
        context.data[config.predictions_name] = preds
        preds = build_target(config.prediction, context)
        preds = preds.reindex(predict_x.index)
        context.data = old
    preds.name = ''
    actuals = y.reindex(predict_index)
    # TODO: handle multi-variate predictions
    predict_x['predictions'] = preds
    predict_x['actuals'] = actuals
    config.update_reporters_with_predictions(context, predict_x, actuals, preds)
    return predict_x
Example #28
0
    def test_only_rows_for_parent_id(self):
        parent_id = 1
        len_parent_rows = len(self.bframe) / 2

        column = Series([parent_id] * len_parent_rows)
        column.name = PARENT_DATASET_ID

        self.bframe = BambooFrame(self.bframe.join(column))
        bframe_only = self.bframe.only_rows_for_parent_id(parent_id)

        self.assertFalse(PARENT_DATASET_ID in bframe_only.columns)
        self.assertEqual(len(bframe_only), len_parent_rows)
Example #29
0
 def test_zero_emsd(self):
     N = 10
     actual = tp.emsd(self.dead_still, 1, 1)
     expected = Series(np.zeros(N, dtype=np.float),
                       index=np.arange(N, dtype=np.float)).iloc[1:]
     expected.index.name = 'lagt'
     expected.name = 'msd'
     # HACK: Float64Index imprecision ruins index equality.
     # Test them separately. If that works, make them exactly the same.
     assert_almost_equal(actual.index.values, expected.index.values)
     actual.index = expected.index
     assert_series_equal(actual, expected)
Example #30
0
 def test_linear_emsd(self):
     A = 1
     EARLY = 7 # only early lag times have good stats
     actual = tp.emsd(self.many_walks, 1, 1, max_lagtime=EARLY)
     a = np.arange(EARLY, dtype='float64')
     expected = Series(2*A*a, index=a).iloc[1:]
     expected.name = 'msd'
     expected.index.name = 'lag time [s]'
     # HACK: Float64Index imprecision ruins index equality.
     # Test them separately. If that works, make them exactly the same.
     assert_almost_equal(actual.index.values, expected.index.values)
     actual.index = expected.index
     assert_series_equal(np.round(actual), expected)
# -*- coding: UTF-8 -*-
import tushare as ts
import pandas as pd
import numpy as np
from pandas import DataFrame,Series


data = DataFrame(np.arange(16).reshape((4, 4)),
                 index=['Ohio', 'Colorado', 'Utah', 'New York'],
                 columns=['one', 'two', 'three', 'four'])
d = [1,2,3,4]
se1 = DataFrame(columns=['one', 'two', 'three', 'four'],index = ['fu'])
se1.loc['fu'] = [1,2,3,4]
se2 = Series({'a':11,'b':11,'c':11})
se2.name = 'fu'
data = data.append(se1)
df = pd.DataFrame(np.arange(0,60,2).reshape(10,3),columns=list('abc'),index = ['one','two','three','four','five','six','seven','eight','nine','ten'])  
df2 = pd.DataFrame(np.arange(60,120,2).reshape(10,3),columns=list('abc'),index = ['one','two','three','four','five','six','seven','eight','nine','ten'])  
df = df.append(df2)
df
for x in xrange(len(df)):
	if df.iloc[x,2]%4 == 0:		
		print 'hi','_____',df.index[x],df.ix[df.index[x],'b'],'________',df.index[x],df.ix[df.index[x],1]
		df.iloc[x,2] = 'hi'
		df.ix[df.index[x],'d'] = 'hello'
		df.loc[df.index[x],'e'] = 'congrat'
		df.iloc[x,0] = 'hehe'

# for x in xrange(len(data)):
# 	if data.loc[x,'label'] != 'none' and data.loc[x,'label'] != 'nodate':
Example #32
0
example_obj = Series(dict_data, dtype=np.float32, name="example_data")
example_obj
example_obj["a"]

example_obj["a"] = 3.2
example_obj

example_obj[example_obj > 2]
example_obj * 2

np.exp(example_obj) #np.abs , np.log 지수함수로 만들기 e**0
"b" in example_obj
example_obj.to_dict() #from DataFrame to dictionary
example_obj.values
example_obj.index
example_obj.name = "number"
example_obj.index.name = "alphabet"
example_obj

dict_data_1 = {"a":1, "b":2, "c":3, "d":4, "e":5}
indexes = ["a","b","c","d","e","f","g","h"]
series_obj_1 = Series(dict_data_1, index=indexes)
series_obj_1
"""
 2. pandas_dataframe
"""
# Example from - https://chrisalbon.com/python/pandas_map_values_to_values.html
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
        'last_name': ['Miller', 'Jacobson', 'Ali', 'Milner', 'Cooze'],
        'age': [42, 52, 36, 24, 73],
        'city': ['San Francisco', 'Baltimore', 'Miami', 'Douglas', 'Boston']}
Example #33
0
print(Obj3)
print(type(Obj3))

# Criando uma lista
esportes = ['Futebol', 'Tenis', 'Natação', 'Basktetball']
# Criando uma serie e usando uma lista como índice
Obj4 = Series(dict, index=esportes)
print(Obj4)
print(pd.isnull(Obj4))
print(pd.notnull(Obj4))
print(Obj4.isnull())

# Concatenando Series
print(Obj3 + Obj4)

Obj4.name = 'população'
Obj4.index.name = 'esporte'
print(Obj4)

print("\nDataframes")
print("----------")
from pandas import DataFrame
data = {'Estado': ['Santa Catarina', 'Paraná', 'Goiás', 'Bahia', 'Minas Gerais'],
        'Ano': [2002, 2003, 2004, 2005, 2006],
        'População': [1.5, 1.7, 3.6, 2.4, 2.9]}
frame = DataFrame(data)
print(frame)
print(type(frame))
print(DataFrame(data, columns=['Ano', 'Estado', 'População']))

frame2 = DataFrame(data, columns = ['Ano', 'Estado', 'População', 'Débito'],
Example #34
0
obj.values
obj.index

obj[[2, 1]]  # sequence index
obj[['a', 'c']]  # defined index
obj[obj > 0]

# from dictionary
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
type(sdata)
type(sdata['Ohio'])  # this is scalar value

obj3 = Series(
    sdata)  # convert dictionary to series, key is index, value is value
type(obj3)
obj3.name = 'population',  # change the name for value, like column name
obj3.index.name = 'state',  # change the name for index, like row name

states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = Series(
    sdata, index=states
)  # return the partial series based on index, totally matched with index
""" #################### operation ######################### """
obj4 + obj3  # add the values based on index
"""________________________________________________________________
                        DataFrame
"""

data = {
    'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
    'year': [2000, 2001, 2002, 2001, 2002],
Example #35
0
 def converter(series: pd.Series) -> pd.Series:
     series.name = name
     return series
#print revenue

print revenue['ola']
print revenue[revenue >= 35]

#use boolean conditions
print 'lyft' in revenue

revenue_dict = revenue.to_dict()
print revenue_dict

#nan values
index_2 = ['ola', 'uber', 'grab', 'gojek', 'lyft']
revenue2 = Series(revenue, index_2)
print revenue2

print 'is null'
print pd.isnull(revenue2)

print 'not null'
print pd.notnull(revenue2)

#addition of series
print revenue + revenue2

#assigning names

revenue2.name = "Company revenues"
revenue2.index.name = "Company Name"
print revenue2
Example #37
0
import matplotlib as mpl
import matplotlib.pyplot as plt

obj = Series([1, -2, 3, -4])

obj2 = Series([1, -2, 3, -4], index=['a', 'b', 'c', 'd'])
obj2.values
obj2[['a', 'b']]
np.abs(obj2)

data = {'张三': 92, '李四': 78, '王五': 68, '小明': 82}
obj3 = Series(data)

names = ['张三', '李四', '王五', '小明']
obj4 = Series(data, index=names)
obj4.name = 'math'
obj4.index.name = 'student'

data = {
    'name': ['张三', '李四', '王五', '小明'],
    'sex': ['female', 'female', 'male', 'male'],
    'year': [2001, 2001, 2003, 2202],
    'city': ['北京', '上海', '广州', '北京']
}
df = DataFrame(data)
df = DataFrame(data,
               columns=['name', 'sex', 'year', 'city'],
               index=['a', 'b', 'c', 'd'])
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])

obj = Series([1, -2, 3, -4], index=[0, 2, 3, 5])
Example #38
0
import pandas as pd
import numpy as np
import os

os.system('clear')

# Series
obj1 = Series([4, 7, -5, 3])
obj2 = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
np.exp(obj2)

sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = Series(sdata)
states = ['California', 'Ohio', 'Oregon', "Texas"]
obj4 = Series(sdata, index=states)
obj4.name = 'population'
obj4.index.name = 'state'

obj1.index = ['Bob', 'Steve', 'Jeff', 'Ryan']

# DataFrame
data = {
    'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
    'year': [2000, 2001, 2002, 2001, 2002],
    'pop': [1.5, 1.7, 3.6, 2.4, 2.9]
}

frame = DataFrame(data)
frame2 = DataFrame(data,
                   columns=['year', 'state', 'pop', 'debt'],
                   index=['one', 'two', 'three', 'four', 'five'])
Example #39
0
# se puede utilizar isnull y notnull para encontrar huecos de informacion
pd.isnull(obj2)

#obj2.isnull() 

# al igual para el opuesto
pd.notnull(obj2)

#obj2.notnull()

# desplegar la serie de WW2 nuevamente
WW2_Series

# checar las serie con el valor de Argentina
obj2

# Now we can add and pandas automatically aligns data by index
WW2_Series + obj2

# podemos nombrar una serie
obj2.name = "World War 2 Casualties"

# desplegar
obj2

# tambien se puede nombrar los indices
obj2.index.name = 'Countries'

# desplegar
obj2
Example #40
0
def test_apply_dictlike_reducer(string_series, ops, how):
    # GH 39140
    expected = Series({name: op(string_series) for name, op in ops.items()})
    expected.name = string_series.name
    result = getattr(string_series, how)(ops)
    tm.assert_series_equal(result, expected)
Example #41
0
 def test_tidy_repr(self):
     a = Series(["\u05d0"] * 1000)
     a.name = "title1"
     repr(a)  # should not raise exception
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

df1 = pd.DataFrame(np.arange(36).reshape(6, 6))
print(df1)
df2 = pd.DataFrame(np.arange(15).reshape(5, 3))
print(df2)
df3 = pd.concat([df1, df2], axis=1)
print(df3)
print(df3.drop([0, 2], axis=1))

series = Series(np.arange(6))
series.name = "added_variable"
print(series)

variable_added = DataFrame.join(df1, series)
print(variable_added)

added_datatable = variable_added.append(variable_added, ignore_index=True)
print(added_datatable)

print(df1.sort_values(by=[5], ascending=[True]))
Example #43
0
def add_benchmark(net_values: pd.DataFrame, benchmark: pd.Series):
    benchmark.name = 'benchmark'
    net_values = net_values.join(benchmark)
    net_values[
        'benchmark'] = net_values['benchmark'] / net_values['benchmark'][0]
    return net_values
obj = Series(sdata)

#print(obj)

states = ['California', 'Ohio', 'Oregon', 'Texas']
obj = Series(sdata, index=states)

#print(obj)

#print(pd.isnull(obj))

#print(pd.notnull(obj))

#print(obj.isnull())

obj.name = 'population'
obj.index.name = 'state'

#print(obj)

#obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']

#print(obj)

data = {
    'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
    'year': [2000, 2001, 2002, 2001, 2002],
    'pop': [1.5, 1.7, 3.6, 2.4, 2.6]
}

frame = DataFrame(data)
Example #45
0
list = [1, 2, 3, 4, 5]
print(list)
print(1 in list)
print(10 in list)

dict = {'a': 1, 'b': 2, 'c':3}
print(dict)
print('a' in dict)
print('b' in dict)
print('d' in dict)

obj2 = Series([4, 7, -5, 3], index=['a', 'b', 'c', 'd'])
print(obj2)
print('a' in obj2)
print('b' in obj2)

sdata = {'Seoul':9, 'Incheon': 10, 'Busan':10, 'Daejeon':10}
obj3 = Series(sdata)
print(obj3)

cities = ['Busan', 'Daejeon', 'Incheon', 'Seoul']
obj4 = Series(sdata, index=cities)
print(obj4)

obj4.name = 'weather'
obj4.index.name = 'city'
print(obj4)

obj4.index = ['Daegu', 'Gwangju', 'Sejeong', 'Jeju']
print(obj4)
Example #46
0
def base(pge):

    import pandas as pd
    import numpy as np
    from pandas import DataFrame, Series

    file = pge + '.csv'
    data = pd.read_csv(file)

    tcol = 'Date/Time'  #time column
    ecol = 'Electricity:Facility [kW](Hourly)'  #total electricity col

    time = list(data[tcol])
    energylist = list(data[ecol])
    energy = Series(energylist)
    energy.name = 'energy'

    monlist = [m.split(' ')[1].split('/')[0] for m in time]  #month
    daylist = [d.split(' ')[1].split('/')[1] for d in time]

    month = Series(monlist)
    month.name = 'month'
    day = Series(daylist)
    day.name = 'day'

    ######## SEASONAL
    # Summer is from May1 (5/1) to Oct31 (10/31)
    # Winter is Jan1 (1/1) to Apr30 (4/30)
    #   and Nov 1 (11/1) to Dec 31 (12/31)

    # denote summer season as 1, winter as 0

    seasonlist = []
    for m in monlist:
        if int(m) >= 5 and int(m) <= 10:
            seasonlist.append(1)  #summer
        else:
            seasonlist.append(0)  #winter

    #CHARGING

    q = {
        'P': (13.8, 12.3),
        'Q': (7, 12.3),
        'R': (15.6, 11.0),
        'S': (13.8, 11.2),
        'T': (7.0, 8.5),
        'V': (8.7, 10.6),
        'W': (16.8, 10.1),
        'X': (10.1, 10.9),
        'Y': (10.6, 12.6),
        'Z': (6.2, 9.0)
    }  # baseline quantities per territory as of 2016

    mincharge = 0.32854  #total daily utility minimum charge

    tier = [0.18151, 0.21546, 0.27389,
            0.34876]  #energy rates for different tiers

    #tarrif levels
    # s is baseline in summer kWh per day
    sb = q[pge][0]  # set summer baseline
    s1 = sb * 1.3  # tier 1 max (130% of baseline)
    s2 = sb * 2  # tier 2 max (200% of baseline)

    # w is baseline in winter kWh per day
    wb = q[pge][1]  # set winter baseline
    w1 = wb * 1.3  # tier 1 max
    w2 = wb * 2  # tier 2 max

    charge = []
    usage = 0
    hr = 0
    eave = []  #use loop to store daily average energy usage

    for i in range(len(energylist)):
        hr += 1
        usage = usage + energylist[i]

        #the algorithm below
        if hr % 24 == 0:
            eave.append(usage)

            if int(seasonlist[i]) == 1:  #summer
                #CASE 1 under baseline
                if usage <= sb:
                    if usage * tier[0] > mincharge:
                        charge.append(usage * tier[0])
                    else:
                        charge.append(mincharge)

                #CASE 2 less than tier 1 more than the base
                elif usage <= s1:
                    charge.append(sb * tier[0] + (usage - sb) * tier[1])

                #CASE 3 less than tier 2 more than tier 1
                elif usage <= s2:
                    charge.append(sb * tier[0] + (s1 - sb) * tier[1] +
                                  (usage - s1) * tier[2])

                #CASE 4 more than tier 2
                elif usage > s2:
                    charge.append(sb * tier[0] + (s1 - sb) * tier[1] +
                                  (s2 - s1) * tier[2] + (usage - s2) * tier[3])

                hr = 0
                usage = 0

            else:  #winter
                #CASE 1 under baseline
                if usage <= wb:
                    if usage * tier[0] > mincharge:
                        charge.append(usage * tier[0])
                    else:
                        charge.append(mincharge)

                #CASE 2 less than tier 1 more than the base
                elif usage <= w1:
                    charge.append(wb * tier[0] + (usage - wb) * tier[1])

                #CASE 3 less than tier 2 more than tier 1
                elif usage <= w2:
                    charge.append(wb * tier[0] + (w1 - sb) * tier[1] +
                                  (usage - w1) * tier[2])

                #CASE 4 more than tier 2
                elif usage > w2:
                    charge.append(wb * tier[0] + (w1 - wb) * tier[1] +
                                  (w2 - w1) * tier[2] + (usage - w2) * tier[3])

                hr = 0
                usage = 0
        total = sum(charge)
    return total
Example #47
0
terr = raw_input('Select Territory (T,R,W,X,S)>>> ')
file = terr+'.csv'

data = pd.read_csv(file)
#print data.head()

tcol = 'Date/Time' #time column
ecol = 'Electricity:Facility [kW](Hourly)' #total electricity col

time = list(data[tcol])
timemod = [i.replace('24:00:00','23:59:00') for i in time]
timeplot = dates.datestr2num(timemod)
energylist = list(data[ecol])
energy = Series(energylist)
energy.name = 'energy'

monlist = [m.split(' ')[1].split('/')[0] for m in time]
daylist = [d.split(' ')[1].split('/')[1] for d in time]
hrlist = [hr.split(' ')[3].split(':')[0] for hr in time]

######## DAY OF THE WEEK
# Start Jan 1 on a Monday
d = '01'
dow = 0
dowlist = [] 

for i in daylist:    
    if d == i:
        dowlist.append(dow)
    elif d != i and dow < 6:
Example #48
0
obj2[obj2>5]   #NumPy数组运算(如根据布尔型数组进行过捷、标量乘法、应用数学函数等)都会保留索引和值之间的链接
obj2*2
np.exp(obj2)
'b' in obj2    #还可以将Series看成是一个定长的有序字典,因为它是索引值到数据值的一个映射。它可以用在许多原本需要字典参数的函数中:

sdata = {'Ohio':35000, 'Texas':7100,'Oregon':16000,'tah':5000}
obj3=Series(sdata)    #通过字典创建 Series

pd.isnull(obj3)  # pd.notnull(obj3)   #pandas的isnull罪IJnotnull 函数可用于检测缺失数据:
#对于许多应用而言, Series最重要的一个功能是:它在算术运算中会自动对齐不同索引的数据。


#Series对象本身及其索引都有一个name属性,该属性跟pandas其他的关键功能关系非常密切:

obj3.name='population'
obj3.index.name='state'
obj3




# In[637]:

#DataFrame    是一个表格型的数据结构,它含有一组有序的列,每列可以是不同的值类型(数值、字符E辑、布尔1在等) D ataF ram e既有行索引也有安IJ 索引,它可以被看做由Series组成的字典(共用同一个索引)。
#构建方法   最常用的一种是直接传入一个等长列表或NumPy数组组成的字典:

data= {'state':['Ohio','Ohio','Ohio','Nevada','Nevada'],
       'year':[2000,2001,2002,2001,2002],
       'pop':[1.5,1.7,3.6,2.4,2.9]}
DataFrame(data)   #结果会自动加上索引
Example #49
0
print(obj2[2])
print(obj2[2:4])
print(obj2[[2, 1]])
print(obj2 > 0)
print('a' in obj2)
print('k' in obj2)

print('dict --------------------')
names = {'mouse': 5000, 'keyboard': 35000, 'monitor': 550000}
print(names, type(names))
obj3 = Series(names)
print(obj3)
obj3.index = ['마우스', '키보드', '모니터']  # 색인 이름 바꾸기
print(obj3)

obj3.name = '상품가격'  #series에 이름을 줄수도 있다.
print(obj3)

print('-----DataFrame ---------')
from pandas import DataFrame
df = DataFrame(obj3)
print(df, type(df))

print()
data = {
    'irum': ['홍길동', '신선해', '공기밥', '한송이', '신기해'],
    'juso': ('역삼동', '신길동', '역삼동', '역삼동', '서초동'),
    'nai': (23, 25, 33, 20, 26)
}
print(data, type(data))
obj4

#pandas的isnull和notnull可以检测缺失数据
pd.isnull(obj4)   #返回布尔型
pd.notnull(obj4)

#series的isnull
obj4.isnull()

#series会自动对齐不同索引的数据
obj3
obj4
obj3 + obj4

#series对象本身和索引都有一个“name”属性,类似于标签
obj4.name = 'population'      #设置series对象本身的“name”属性
obj4.index.name = 'state'
obj4

#通过赋值的方式就地修改series的索引
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
obj

#dataframe的常见创建方式
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],   #等长列的字典
        'year': [2000, 2001, 2002, 2001, 2002],
        'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
frame = DataFrame(data)             #未指定索引则自动生成编号(0开始),列索引对应字典的键
frame

DataFrame(data, columns=['year', 'state', 'pop'])    #指定列索引
Example #51
0
aSer = pd.Series([1, 2.0, 'a'])
print(aSer)

bSer = pd.Series(['apple', 'peach', 'lemon'], index=[1, 2, 3])
print(bSer)
print(bSer.index)
print(bSer.values)

aSer = Series([3, 5, 7], index=['a', 'b', 'c'])
print(aSer['b'])
print(aSer * 2)

import numpy as np

print(np.exp(aSer))

# Series 数据对齐
data = {'AXP': '86.40', 'CSCO': '122.64', 'BA': '99.44'}
sindax = ['AXP', 'CSCO', 'BA', 'AAPL']
aSer = pd.Series(data, index=sindax)
print(aSer)
print(pd.isnull(aSer))

bSer = {'AXP': '86.40', 'CSCO': '122.64', 'CVX': '23.78'}
cSer = pd.Series(bSer)
print(aSer + cSer)

aSer.name = 'cnames'
aSer.index.name = 'volume'
print(aSer)
def qqe(close, length=None, smooth=None, factor=None, mamode=None, drift=None, offset=None, **kwargs):
    """Indicator: Quantitative Qualitative Estimation (QQE)"""
    # Validate arguments
    length = int(length) if length and length > 0 else 14
    smooth = int(smooth) if smooth and smooth > 0 else 5
    factor = float(factor) if factor else 4.236
    wilders_length = 2 * length - 1
    mamode = mamode if isinstance(mamode, str) else "ema"
    close = verify_series(close, max(length, smooth, wilders_length))
    drift = get_drift(drift)
    offset = get_offset(offset)

    if close is None: return

    # Calculate Result
    rsi_ = rsi(close, length)
    _mode = mamode.lower()[0] if mamode != "ema" else ""
    rsi_ma = ma(mamode, rsi_, length=smooth)

    # RSI MA True Range
    rsi_ma_tr = rsi_ma.diff(drift).abs()

    # Double Smooth the RSI MA True Range using Wilder's Length with a default
    # width of 4.236.
    smoothed_rsi_tr_ma = ma("ema", rsi_ma_tr, length=wilders_length)
    dar = factor * ma("ema", smoothed_rsi_tr_ma, length=wilders_length)

    # Create the Upper and Lower Bands around RSI MA.
    upperband = rsi_ma + dar
    lowerband = rsi_ma - dar

    m = close.size
    long = Series(0, index=close.index)
    short = Series(0, index=close.index)
    trend = Series(1, index=close.index)
    qqe = Series(rsi_ma.iloc[0], index=close.index)
    qqe_long = Series(npNaN, index=close.index)
    qqe_short = Series(npNaN, index=close.index)

    for i in range(1, m):
        c_rsi, p_rsi = rsi_ma.iloc[i], rsi_ma.iloc[i - 1]
        c_long, p_long = long.iloc[i - 1], long.iloc[i - 2]
        c_short, p_short = short.iloc[i - 1], short.iloc[i - 2]

        # Long Line
        if p_rsi > c_long and c_rsi > c_long:
            long.iloc[i] = npMaximum(c_long, lowerband.iloc[i])
        else:
            long.iloc[i] = lowerband.iloc[i]

        # Short Line
        if p_rsi < c_short and c_rsi < c_short:
            short.iloc[i] = npMinimum(c_short, upperband.iloc[i])
        else:
            short.iloc[i] = upperband.iloc[i]

        # Trend & QQE Calculation
        # Long: Current RSI_MA value Crosses the Prior Short Line Value
        # Short: Current RSI_MA Crosses the Prior Long Line Value
        if (c_rsi > c_short and p_rsi < p_short) or (c_rsi <= c_short and p_rsi >= p_short):
            trend.iloc[i] = 1
            qqe.iloc[i] = qqe_long.iloc[i] = long.iloc[i]
        elif (c_rsi > c_long and p_rsi < p_long) or (c_rsi <= c_long and p_rsi >= p_long):
            trend.iloc[i] = -1
            qqe.iloc[i] = qqe_short.iloc[i] = short.iloc[i]
        else:
            trend.iloc[i] = trend.iloc[i - 1]
            if trend.iloc[i] == 1:
                qqe.iloc[i] = qqe_long.iloc[i] = long.iloc[i]
            else:
                qqe.iloc[i] = qqe_short.iloc[i]  = short.iloc[i]

    # Offset
    if offset != 0:
        rsi_ma = rsi_ma.shift(offset)
        qqe = qqe.shift(offset)
        long = long.shift(offset)
        short = short.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        rsi_ma.fillna(kwargs["fillna"], inplace=True)
        qqe.fillna(kwargs["fillna"], inplace=True)
        qqe_long.fillna(kwargs["fillna"], inplace=True)
        qqe_short.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        rsi_ma.fillna(method=kwargs["fill_method"], inplace=True)
        qqe.fillna(method=kwargs["fill_method"], inplace=True)
        qqe_long.fillna(method=kwargs["fill_method"], inplace=True)
        qqe_short.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    _props = f"{_mode}_{length}_{smooth}_{factor}"
    qqe.name = f"QQE{_props}"
    rsi_ma.name = f"QQE{_props}_RSI{_mode.upper()}MA"
    qqe_long.name = f"QQEl{_props}"
    qqe_short.name = f"QQEs{_props}"
    qqe.category = rsi_ma.category = "momentum"
    qqe_long.category = qqe_short.category = qqe.category

    # Prepare DataFrame to return
    data = {
        qqe.name: qqe, rsi_ma.name: rsi_ma,
        # long.name: long, short.name: short
        qqe_long.name: qqe_long, qqe_short.name: qqe_short
    }
    df = DataFrame(data)
    df.name = f"QQE{_props}"
    df.category = qqe.category

    return df
def ebsw(close, length=None, bars=None, offset=None, **kwargs):
    """Indicator: Even Better SineWave (EBSW)"""
    # Validate arguments
    length = int(length) if length and length > 38 else 40
    bars = int(bars) if bars and bars > 0 else 10
    close = verify_series(close, length)
    offset = get_offset(offset)

    if close is None: return

    # variables
    alpha1 = HP = 0  # alpha and HighPass
    a1 = b1 = c1 = c2 = c3 = 0
    Filt = Pwr = Wave = 0

    lastClose = lastHP = 0
    FilterHist = [0, 0]  # Filter history

    # Calculate Result
    m = close.size
    result = [npNaN for _ in range(0, length - 1)] + [0]
    for i in range(length, m):
        # HighPass filter cyclic components whose periods are shorter than Duration input
        alpha1 = (1 - npSin(360 / length)) / npCos(360 / length)
        HP = 0.5 * (1 + alpha1) * (close[i] - lastClose) + alpha1 * lastHP

        # Smooth with a Super Smoother Filter from equation 3-3
        a1 = npExp(-npSqrt(2) * npPi / bars)
        b1 = 2 * a1 * npCos(npSqrt(2) * 180 / bars)
        c2 = b1
        c3 = -1 * a1 * a1
        c1 = 1 - c2 - c3
        Filt = c1 * (HP + lastHP) / 2 + c2 * FilterHist[1] + c3 * FilterHist[0]
        # Filt = float("{:.8f}".format(float(Filt))) # to fix for small scientific notations, the big ones fail

        # 3 Bar average of Wave amplitude and power
        Wave = (Filt + FilterHist[1] + FilterHist[0]) / 3
        Pwr = (Filt * Filt + FilterHist[1] * FilterHist[1] +
               FilterHist[0] * FilterHist[0]) / 3

        # Normalize the Average Wave to Square Root of the Average Power
        Wave = Wave / npSqrt(Pwr)

        # update storage, result
        FilterHist.append(Filt)  # append new Filt value
        FilterHist.pop(
            0)  # remove first element of list (left) -> updating/trim
        lastHP = HP
        lastClose = close[i]
        result.append(Wave)

    ebsw = Series(result, index=close.index)

    # Offset
    if offset != 0:
        ebsw = ebsw.shift(offset)

    # Handle fills
    if "fillna" in kwargs:
        ebsw.fillna(kwargs["fillna"], inplace=True)
    if "fill_method" in kwargs:
        ebsw.fillna(method=kwargs["fill_method"], inplace=True)

    # Name and Categorize it
    ebsw.name = f"EBSW_{length}_{bars}"
    ebsw.category = "cycles"

    return ebsw
Example #54
0
# numpy array operations are still valid for Series
print np.exp(obj2)

# Series canalso be considered as a fixed length, ordered list
print 'b' in obj2

# pandas has _isnull_ and _notnull_ to detect the missing data
print pd.isnull(obj4) # or obj4.isnull()

# Series can automatically align differently-indexed data in arithmetic 
# operations
print obj3 + obj4

# Both the Series object itself and its index have a _name_attribute
obj4.name = 'Population'
obj4.index.name = 'State'

## A _DataFrame_ represents a tabular, spreadsheet-like data structure 
## containing an ordered collection of columns, each can be a different 
## value type.
## A DataFrame has both a row and column _index_.

# Initialization from a dict of equal-length lists or numpy arrays
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
        'year':  [2000, 2001, 2002, 2001, 2002],
        'pop':   [1.5, 1.7, 3.6, 2.4, 2.9]}
frame = DataFrame(data)

# Initialization from a nested dict of dicts format
pop = {'Nevada':{2001:2.4, 2002:2.9},
Example #55
0
    def test_concat_mixed_objs(self):

        # concat mixed series/frames
        # G2385

        # axis 1
        index = date_range("01-Jan-2013", periods=10, freq="H")
        arr = np.arange(10, dtype="int64")
        s1 = Series(arr, index=index)
        s2 = Series(arr, index=index)
        df = DataFrame(arr.reshape(-1, 1), index=index)

        expected = DataFrame(np.repeat(arr, 2).reshape(-1, 2),
                             index=index,
                             columns=[0, 0])
        result = concat([df, df], axis=1)
        tm.assert_frame_equal(result, expected)

        expected = DataFrame(np.repeat(arr, 2).reshape(-1, 2),
                             index=index,
                             columns=[0, 1])
        result = concat([s1, s2], axis=1)
        tm.assert_frame_equal(result, expected)

        expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
                             index=index,
                             columns=[0, 1, 2])
        result = concat([s1, s2, s1], axis=1)
        tm.assert_frame_equal(result, expected)

        expected = DataFrame(np.repeat(arr, 5).reshape(-1, 5),
                             index=index,
                             columns=[0, 0, 1, 2, 3])
        result = concat([s1, df, s2, s2, s1], axis=1)
        tm.assert_frame_equal(result, expected)

        # with names
        s1.name = "foo"
        expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
                             index=index,
                             columns=["foo", 0, 0])
        result = concat([s1, df, s2], axis=1)
        tm.assert_frame_equal(result, expected)

        s2.name = "bar"
        expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
                             index=index,
                             columns=["foo", 0, "bar"])
        result = concat([s1, df, s2], axis=1)
        tm.assert_frame_equal(result, expected)

        # ignore index
        expected = DataFrame(np.repeat(arr, 3).reshape(-1, 3),
                             index=index,
                             columns=[0, 1, 2])
        result = concat([s1, df, s2], axis=1, ignore_index=True)
        tm.assert_frame_equal(result, expected)

        # axis 0
        expected = DataFrame(np.tile(arr, 3).reshape(-1, 1),
                             index=index.tolist() * 3,
                             columns=[0])
        result = concat([s1, df, s2])
        tm.assert_frame_equal(result, expected)

        expected = DataFrame(np.tile(arr, 3).reshape(-1, 1), columns=[0])
        result = concat([s1, df, s2], ignore_index=True)
        tm.assert_frame_equal(result, expected)
Example #56
0
obj = Series([4, 7, -5, 3])
obj.values
obj.index
obj2 = Series([4, 7, -5, 3], index=['a', 'b', 'c', 'd'])
obj2
obj2['b']
obj2[1]
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj4 = Series(sdata)
States = ['California', 'Ohio', 'Oregon', 'Texas']
obj3 = Series(sdata, index=States)
obj3
pd.isnull(obj3)
pd.notnull(obj3)
obj3 + obj4
obj3.name = 'Population'
obj3.index.name = 'State'
obj3
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
obj

data = {
    'State': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
    'year': [2000, 2001, 2002, 2001, 2002],
    'pop': [1.5, 1.7, 3.6, 2.4, 2.9]
}
frame = DataFrame(data)
frame
DataFrame(data, columns=['year', 'State', 'pop'])
frame2 = DataFrame(data,
                   columns=['year', 'State', 'pop', 'debt'],
Example #57
0
# casualties price
cas = Series([870000, 430000, 300000, 210000, 400000],
             index=['USSR', 'Germany', 'China', 'Japan', 'USA'])
print(cas)
print(cas['USA'])

# check witch countries had cas greater than 4 mill
print(cas[cas > 400000])

# check if a index or value as in a series
print('USSR' in cas)

# into dictionary
cas_dict = cas.to_dict()
print(cas_dict)

cas2 = Series(cas_dict)

countries = ['China', 'Germany', 'Japan', 'USA', 'USSR', 'Argentina']
obj2 = Series(cas_dict, index=countries)
print(obj2, 'obj2')

print(pd.isnull(obj2))
print(pd.notnull(obj2))

print(cas2 + obj2)

# you can give name series
obj2.name = "Casualties"
obj2.index.name = 'Countries'
Example #58
0
mySeries2[mySeries2 > 0]  # equivalent to which() in R
myDoubled = mySeries * 2  # perform functions on your arrays
np.exp(myDoubled)

# Series indices can be thought of like dicts, but are immutable
'b' in mySeries2
'e' in mySeries2

# We can even turn dicts into series
eVotes = {'NC': 15, 'TX': 38, 'CA': 55}  # sorted by key
ev = Series(eVotes)
states = ['CA', 'NC', 'TX', 'OH']
ev2 = Series(ev, states)

# Series have a name, as do their indices
ev2.name = "electoral votes"
ev2.index.name = "state"
print ev2

# DataFrames are fun too

data = pd.read_csv(
    "https://raw.githubusercontent.com/mcdickenson/python-washu-2014/master/day9/lab9.csv"
)
print data.head()
data['State']
# change column names for convenience
data.columns = ['st', 'yr', 'ev', 'pop']

# we can create new columns
data['popm'] = data['pop'] / 1000000.0
Example #59
0
def compute_up(t, s, **kwargs):
    s2 = Series(s.unique())
    s2.name = s.name
    return s2
Example #60
0
 def test_tidy_repr(self):
     a = Series([u("\u05d0")] * 1000)
     a.name = 'title1'
     repr(a)  # should not raise exception