Beispiel #1
0
        def _check_case(xloc, xlen, yloc, ylen, eloc, elen):
            xindex = BlockIndex(TEST_LENGTH, xloc, xlen)
            yindex = BlockIndex(TEST_LENGTH, yloc, ylen)

            xdindex = xindex.to_int_index()
            ydindex = yindex.to_int_index()

            x = np.arange(xindex.npoints) * 10. + 1
            y = np.arange(yindex.npoints) * 100. + 1

            xfill = 0
            yfill = 2

            result_block_vals, rb_index = sparse_op(
                x, xindex, xfill, y, yindex, yfill)
            result_int_vals, ri_index = sparse_op(x, xdindex, xfill,
                                                  y, ydindex, yfill)

            self.assert_(rb_index.to_int_index().equals(ri_index))
            assert_equal(result_block_vals, result_int_vals)

            # check versus Series...
            xseries = Series(x, xdindex.indices)
            xseries = xseries.reindex(np.arange(TEST_LENGTH)).fillna(xfill)

            yseries = Series(y, ydindex.indices)
            yseries = yseries.reindex(np.arange(TEST_LENGTH)).fillna(yfill)

            series_result = python_op(xseries, yseries)
            series_result = series_result.reindex(ri_index.indices)

            assert_equal(result_block_vals, series_result.values)
            assert_equal(result_int_vals, series_result.values)
Beispiel #2
0
class Reindex(object):

    def setup(self):
        rng = date_range(start='1/1/1970', periods=10000, freq='1min')
        self.df = DataFrame(np.random.rand(10000, 10), index=rng,
                            columns=range(10))
        self.df['foo'] = 'bar'
        self.rng_subset = Index(rng[::2])
        self.df2 = DataFrame(index=range(10000),
                             data=np.random.rand(10000, 30), columns=range(30))
        N = 5000
        K = 200
        level1 = tm.makeStringIndex(N).values.repeat(K)
        level2 = np.tile(tm.makeStringIndex(K).values, N)
        index = MultiIndex.from_arrays([level1, level2])
        self.s = Series(np.random.randn(N * K), index=index)
        self.s_subset = self.s[::2]

    def time_reindex_dates(self):
        self.df.reindex(self.rng_subset)

    def time_reindex_columns(self):
        self.df2.reindex(columns=self.df.columns[1:5])

    def time_reindex_multiindex(self):
        self.s.reindex(self.s_subset.index)
def main():
	# Series 可以看做一个定长的有序字典。
	s1 = Series([1,2,3.0,'abc'])
	print s1
	print
	s2 = Series(data=[1,3,5,7],index = ['a','b','x','y'])
	print s2
	print s2.index
	print s2.values
	s2.name = 'a_series'
	s2.index.name = 'the_index'
	print s2
	ser = Series([4.5,7.2,-5.3,3.6],index=['d','b','a','c'])
	# reindex
	a = ['a','b','c','d','e']
	ser_1 = ser.reindex(a)
	print ser_1
	ser_2 = ser.reindex(a, fill_value=0)
	print ser_2
	print
	# DataFrame 是一个表格型的数据结构,它含有一组有序的列(类似于 index),每列可以是不同的值类型(不像 ndarray 只能有一个 dtype)。
	# 基本上可以把 DataFrame 看成是共享同一个 index 的 Series 的集合。
	data = {'state':['Ohino','Ohino','Ohino','Nevada','Nevada'], 'year':[2000,2001,2002,2001,2002], 'pop':[1.5,1.7,3.6,2.4,2.9]}
	df = DataFrame(data)
	print df
	df = DataFrame(data, index=['one','two','three','four','five'], columns=['year','state','pop','debt'])
	print df
	print df.index
	print df.columns
	print type(df['debt'])
	state = ['Texas','Utha','California']
	df1 = df.reindex(columns=state, method='ffill')
	print df1
	print
def test_reindex_datetimeindexes_tz_naive_and_aware():
    # GH 8306
    idx = date_range('20131101', tz='America/Chicago', periods=7)
    newidx = date_range('20131103', periods=10, freq='H')
    s = Series(range(7), index=idx)
    with pytest.raises(TypeError):
        s.reindex(newidx, method='ffill')
def test_reindex_nan():
    ts = Series([2, 3, 5, 7], index=[1, 4, nan, 8])

    i, j = [nan, 1, nan, 8, 4, nan], [2, 0, 2, 3, 1, 2]
    assert_series_equal(ts.reindex(i), ts.iloc[j])

    ts.index = ts.index.astype('object')

    # reindex coerces index.dtype to float, loc/iloc doesn't
    assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
Beispiel #6
0
class ReindexMethod:

    params = [['pad', 'backfill'], [date_range, period_range]]
    param_names = ['method', 'constructor']

    def setup(self, method, constructor):
        N = 100000
        self.idx = constructor('1/1/2000', periods=N, freq='1min')
        self.ts = Series(np.random.randn(N), index=self.idx)[::2]

    def time_reindex_method(self, method, constructor):
        self.ts.reindex(self.idx, method=method)
Beispiel #7
0
class ReindexMethod(object):

    params = ['pad', 'backfill']
    param_names = ['method']

    def setup(self, method):
        N = 100000
        self.idx = date_range('1/1/2000', periods=N, freq='1min')
        self.ts = Series(np.random.randn(N), index=self.idx)[::2]

    def time_reindex_method(self, method):
        self.ts.reindex(self.idx, method=method)
Beispiel #8
0
    def test_reindex_int(self):
        ts = self.ts[::2]
        int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)

        # this should work fine
        reindexed_int = int_ts.reindex(self.ts.index)

        # if NaNs introduced
        self.assert_(reindexed_int.dtype == np.float_)

        # NO NaNs introduced
        reindexed_int = int_ts.reindex(int_ts.index[::2])
        self.assert_(reindexed_int.dtype == np.int_)
def test_reindex_int(test_data):
    ts = test_data.ts[::2]
    int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)

    # this should work fine
    reindexed_int = int_ts.reindex(test_data.ts.index)

    # if NaNs introduced
    assert reindexed_int.dtype == np.float_

    # NO NaNs introduced
    reindexed_int = int_ts.reindex(int_ts.index[::2])
    assert reindexed_int.dtype == np.int_
Beispiel #10
0
        def _check(values, index1, index2, fill_value):
            first_series = SparseSeries(values, sparse_index=index1, fill_value=fill_value)
            reindexed = first_series.sparse_reindex(index2)
            self.assert_(reindexed.sp_index is index2)

            int_indices1 = index1.to_int_index().indices
            int_indices2 = index2.to_int_index().indices

            expected = Series(values, index=int_indices1)
            expected = expected.reindex(int_indices2).fillna(fill_value)
            assert_almost_equal(expected.values, reindexed.sp_values)

            # make sure level argument asserts
            expected = expected.reindex(int_indices2).fillna(fill_value)
def pd_dataframe4():
    obj=Series([4.5,7.2,-5.3,3.6],index=['d','b','a','c'])
    print obj
    obj2=obj.reindex(['a','b','c','d'])
    print obj2
    obj2=obj.reindex(['a','b','c','d','e'])
    print obj2
    obj2=obj.reindex(['a','b','c','d','e'],fill_value=0)
    print obj2
    obj3 = Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
    obj4=obj3.reindex(range(6),method='ffill')
    print obj4
    obj4=obj3.reindex(range(6),method='bfill')
    print obj4
def test_reindex_bool(test_data):
    # A series other than float, int, string, or object
    ts = test_data.ts[::2]
    bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)

    # this should work fine
    reindexed_bool = bool_ts.reindex(test_data.ts.index)

    # if NaNs introduced
    assert reindexed_bool.dtype == np.object_

    # NO NaNs introduced
    reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
    assert reindexed_bool.dtype == np.bool_
Beispiel #13
0
    def test_alignment(self):
        x = Series(data=[1, 2, 3], index=MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3)]))

        y = Series(data=[4, 5, 6], index=MultiIndex.from_tuples([("Z", 1), ("Z", 2), ("B", 3)]))

        res = x - y
        exp_index = x.index.union(y.index)
        exp = x.reindex(exp_index) - y.reindex(exp_index)
        assert_series_equal(res, exp)

        # hit non-monotonic code path
        res = x[::-1] - y[::-1]
        exp_index = x.index.union(y.index)
        exp = x.reindex(exp_index) - y.reindex(exp_index)
        assert_series_equal(res, exp)
Beispiel #14
0
    def test_reindex_bool(self):

        # A series other than float, int, string, or object
        ts = self.ts[::2]
        bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)

        # this should work fine
        reindexed_bool = bool_ts.reindex(self.ts.index)

        # if NaNs introduced
        self.assert_(reindexed_bool.dtype == np.object_)

        # NO NaNs introduced
        reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
        self.assert_(reindexed_bool.dtype == np.bool_)
Beispiel #15
0
def test_loc_getitem_setitem_integer_slice_keyerrors():
    s = Series(np.random.randn(10), index=lrange(0, 20, 2))

    # this is OK
    cp = s.copy()
    cp.iloc[4:10] = 0
    assert (cp.iloc[4:10] == 0).all()

    # so is this
    cp = s.copy()
    cp.iloc[3:11] = 0
    assert (cp.iloc[3:11] == 0).values.all()

    result = s.iloc[2:6]
    result2 = s.loc[3:11]
    expected = s.reindex([4, 6, 8, 10])

    assert_series_equal(result, expected)
    assert_series_equal(result2, expected)

    # non-monotonic, raise KeyError
    s2 = s.iloc[lrange(5) + lrange(5, 10)[::-1]]
    with pytest.raises(KeyError, match=r"^3L?$"):
        s2.loc[3:11]
    with pytest.raises(KeyError, match=r"^3L?$"):
        s2.loc[3:11] = 0
Beispiel #16
0
def predict_autosequence(config, context, predict_index, fit_model=True, update_column=None):
    if len(context.train_index & predict_index):
        logging.warning("Train and predict indices overlap...")
    
    x, y = None, None
    
    if fit_model:
        x, y = fit(config, context)
    
    logging.debug(x.columns)
    logging.debug(config.model.coef_)
    
    ctx = context.copy()
    ps = []
    for i in predict_index:
        ctx.data = context.data
        x = get_x(config, ctx)
        predict_x = x.reindex([i])
    
        # make actual predictions
        p = config.model.predict(predict_x.values)
        if update_column is not None:
            ctx.data[update_column][i] = p[0]
        ps.append(p[0])
    try:
        preds = Series(ps, index=predict_index)
    except:
        preds = DataFrame(ps, index=predict_index)
    # prediction post-processing
    if config.prediction is not None:
        context.data[config.predictions_name] = preds
        preds = build_target(config.prediction, context)
        preds = preds.reindex(predict_index)
    preds.name = ''
    return preds, x, y
Beispiel #17
0
    def test_upsample_with_limit(self):
        rng = date_range("1/1/2000", periods=3, freq="5t")
        ts = Series(np.random.randn(len(rng)), rng)

        result = ts.resample("t", fill_method="ffill", limit=2)
        expected = ts.reindex(result.index, method="ffill", limit=2)
        assert_series_equal(result, expected)
    def test_upsample_with_limit(self):
        rng = date_range('1/1/2000', periods=3, freq='5t')
        ts = Series(np.random.randn(len(rng)), rng)

        result = ts.resample('t', fill_method='ffill', limit=2)
        expected = ts.reindex(result.index, method='ffill', limit=2)
        assert_series_equal(result, expected)
Beispiel #19
0
        def _check(values, index1, index2, fill_value):
            first_series = SparseSeries(values, sparse_index=index1,
                                        fill_value=fill_value)
            reindexed = first_series.sparse_reindex(index2)
            self.assertIs(reindexed.sp_index, index2)

            int_indices1 = index1.to_int_index().indices
            int_indices2 = index2.to_int_index().indices

            expected = Series(values, index=int_indices1)
            expected = expected.reindex(int_indices2).fillna(fill_value)
            tm.assert_almost_equal(expected.values, reindexed.sp_values)

            # make sure level argument asserts
            # TODO: expected is not used anywhere...remove?
            expected = expected.reindex(int_indices2).fillna(fill_value)  # noqa
 def select_known_events(gauge_dict, base, precipitation, precip=True, date='2006-01-01', buffer_days=25):
     s = data[base]['Q_cfs']
     name = gauge_dict.keys()[0]
     date_obj = to_datetime(date)
     rng = s[date_obj - Timedelta(days=buffer_days): date_obj + Timedelta(days=buffer_days)]
     if precip:
         fig, ax1 = plt.subplots()
         ax1.plot(rng, 'k', label='Discharge [cfs]')
         ax1.legend()
         plt.title('Discharge at {} Event Peak: {}'.format(name, date))
         ppt_s = Series(array(precipitation)[:, 1], index=array(precipitation)[:, 0])
         ppt_s = ppt_s.reindex(index=rng.index, method=None)
         ppt_s = ppt_s.apply(to_numeric)
         ppt_s[ppt_s < 0] = 0.0
         ppt_s = ppt_s[date_obj - Timedelta(days=buffer_days): date_obj + Timedelta(days=buffer_days)]
         ax1.set_xlabel('Date')
         ax1.set_ylabel('[cfs]')
         for tl in ax1.get_yticklabels():
             tl.set_color('k')
         ax2 = ax1.twinx()
         ax2.bar(ppt_s.index, ppt_s, width=0.1, label='Precipitation [mm/hr]')
         plt.gca().invert_yaxis()
         ax2.set_ylabel('[mm]')
         for tl in ax2.get_yticklabels():
             tl.set_color('b')
         ax2.legend()
     if not precip:
         plt.plot(rng)
         plt.plot(rng, 'k')
         plt.legend()
         plt.title('Discharge at {} Event Peak: {}'.format(name, date))
         plt.xlabel('Date')
         plt.ylabel('[cfs]')
Beispiel #21
0
def _add_margins(table, data, values, rows=None, cols=None, aggfunc=np.mean):
    if len(cols) > 0:
        col_margin = data[rows + values].groupby(rows).agg(aggfunc)

        # need to "interleave" the margins

        table_pieces = []
        margin_keys = []

        if len(cols) > 0:
            grouper = table.groupby(level=0, axis=1)
        else:
            grouper = ((k, table[[k]]) for k in table.columns)

        for key, piece in grouper:
            all_key = (key, 'All') + ('',) * (len(cols) - 1)
            piece[all_key] = col_margin[key]
            table_pieces.append(piece)
            margin_keys.append(all_key)

        result = table_pieces[0]
        for piece in table_pieces[1:]:
            result = result.join(piece)
    else:
        result = table
        margin_keys = table.columns

    grand_margin = {}
    for k, v in data[values].iteritems():
        try:
            grand_margin[k] = aggfunc(v)
        except TypeError:
            pass

    if len(cols) > 0:
        row_margin = data[cols + values].groupby(cols).agg(aggfunc)
        row_margin = row_margin.stack()

        # slight hack
        new_order = [len(cols)] + range(len(cols))
        row_margin.index = row_margin.index.reorder_levels(new_order)
    else:
        row_margin = Series(np.nan, index=result.columns)

    key = ('All',) + ('',) * (len(rows) - 1)

    row_margin = row_margin.reindex(result.columns)
    # populate grand margin
    for k in margin_keys:
        if len(cols) > 0:
            row_margin[k] = grand_margin[k[0]]
        else:
            row_margin[k] = grand_margin[k]

    margin_dummy = DataFrame(row_margin, columns=[key]).T
    result = result.append(margin_dummy)

    return result
def test_reindex_with_datetimes():
    rng = date_range('1/1/2000', periods=20)
    ts = Series(np.random.randn(20), index=rng)

    result = ts.reindex(list(ts.index[5:10]))
    expected = ts[5:10]
    tm.assert_series_equal(result, expected)

    result = ts[list(ts.index[5:10])]
    tm.assert_series_equal(result, expected)
def test_reindex_series_add_nat():
    rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s')
    series = Series(rng)

    result = series.reindex(range(15))
    assert np.issubdtype(result.dtype, np.dtype('M8[ns]'))

    mask = result.isna()
    assert mask[-5:].all()
    assert not mask[:-5].any()
Beispiel #24
0
    def test_reindex_series_add_nat(self):
        rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s")
        series = Series(rng)

        result = series.reindex(range(15))
        self.assert_(np.issubdtype(result.dtype, np.datetime64))

        mask = result.isnull()
        self.assert_(mask[-5:].all())
        self.assert_(not mask[:-5].any())
Beispiel #25
0
def test_getitem_setitem_slice_integers():
    s = Series(np.random.randn(8), index=[2, 4, 6, 8, 10, 12, 14, 16])

    result = s[:4]
    expected = s.reindex([2, 4, 6, 8])
    assert_series_equal(result, expected)

    s[:4] = 0
    assert (s[:4] == 0).all()
    assert not (s[4:] == 0).any()
Beispiel #26
0
    def test_interp_regression(self):
        tm._skip_if_no_scipy()
        _skip_if_no_pchip()

        ser = Series(np.sort(np.random.uniform(size=100)))

        # interpolate at new_index
        new_index = ser.index + Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])
        interp_s = ser.reindex(new_index).interpolate(method='pchip')
        # does not blow up, GH5977
        interp_s[49:51]
Beispiel #27
0
    def test_interpolate_from_derivatives(self):
        ser = Series([10, 11, 12, 13])

        expected = Series([11.00, 11.25, 11.50, 11.75,
                           12.00, 12.25, 12.50, 12.75, 13.00],
                          index=Index([1.0, 1.25, 1.5, 1.75,
                                       2.0, 2.25, 2.5, 2.75, 3.0]))
        # interpolate at new_index
        new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75]))
        interp_s = ser.reindex(new_index).interpolate(
            method='from_derivatives')
        assert_series_equal(interp_s[1:3], expected)
Beispiel #28
0
def predict(config, context, predict_index, fit_model=True, model_name=None):
    if len(context.train_index & predict_index):
        print "WARNING: train and predict indices overlap..."

    x, y = None, None

    if model_name:
        config.model = context.store.load(model_name)

    if not model_name and fit_model:
        x, y = fit(config, context)

    # TODO: possible to have x loaded without new prediction rows
    if x is None:
        # rebuild just the necessary x:
        ctx = context.copy()
        ctx.data = context.data.ix[predict_index]
        x = get_x(config, ctx)
        try:
            # we may or may not have y's in predict context
            # we get them if we can for metrics and reporting
            y = get_y(config, ctx)
        except KeyError:
            pass

    if debug:
        print x.columns

    predict_x = x.reindex(predict_index)

    print "Making predictions... ",
    # make actual predictions
    ps = config.model.predict(predict_x.values)
    try:
        preds = Series(ps, index=predict_x.index)
    except:
        preds = DataFrame(ps, index=predict_x.index)
    print "[OK]"
    # prediction post-processing
    if config.prediction is not None:
        old = context.data
        context.data = context.data.reindex(predict_x.index)
        context.data[config.predictions_name] = preds
        preds = build_target(config.prediction, context)
        preds = preds.reindex(predict_x.index)
        context.data = old
    preds.name = ''
    actuals = y.reindex(predict_index)
    # TODO: handle multi-variate predictions
    predict_x['predictions'] = preds
    predict_x['actuals'] = actuals
    config.update_reporters_with_predictions(context, predict_x, actuals, preds)
    return predict_x
def test_reindex_fill_value():
    # -----------------------------------------------------------
    # floats
    floats = Series([1., 2., 3.])
    result = floats.reindex([1, 2, 3])
    expected = Series([2., 3., np.nan], index=[1, 2, 3])
    assert_series_equal(result, expected)

    result = floats.reindex([1, 2, 3], fill_value=0)
    expected = Series([2., 3., 0], index=[1, 2, 3])
    assert_series_equal(result, expected)

    # -----------------------------------------------------------
    # ints
    ints = Series([1, 2, 3])

    result = ints.reindex([1, 2, 3])
    expected = Series([2., 3., np.nan], index=[1, 2, 3])
    assert_series_equal(result, expected)

    # don't upcast
    result = ints.reindex([1, 2, 3], fill_value=0)
    expected = Series([2, 3, 0], index=[1, 2, 3])
    assert issubclass(result.dtype.type, np.integer)
    assert_series_equal(result, expected)

    # -----------------------------------------------------------
    # objects
    objects = Series([1, 2, 3], dtype=object)

    result = objects.reindex([1, 2, 3])
    expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
    assert_series_equal(result, expected)

    result = objects.reindex([1, 2, 3], fill_value='foo')
    expected = Series([2, 3, 'foo'], index=[1, 2, 3], dtype=object)
    assert_series_equal(result, expected)

    # ------------------------------------------------------------
    # bools
    bools = Series([True, False, True])

    result = bools.reindex([1, 2, 3])
    expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
    assert_series_equal(result, expected)

    result = bools.reindex([1, 2, 3], fill_value=False)
    expected = Series([False, True, False], index=[1, 2, 3])
    assert_series_equal(result, expected)
def test_reindex_pad():
    s = Series(np.arange(10), dtype='int64')
    s2 = s[::2]

    reindexed = s2.reindex(s.index, method='pad')
    reindexed2 = s2.reindex(s.index, method='ffill')
    assert_series_equal(reindexed, reindexed2)

    expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10))
    assert_series_equal(reindexed, expected)

    # GH4604
    s = Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
    new_index = ['a', 'g', 'c', 'f']
    expected = Series([1, 1, 3, 3], index=new_index)

    # this changes dtype because the ffill happens after
    result = s.reindex(new_index).ffill()
    assert_series_equal(result, expected.astype('float64'))

    result = s.reindex(new_index).ffill(downcast='infer')
    assert_series_equal(result, expected)

    expected = Series([1, 5, 3, 5], index=new_index)
    result = s.reindex(new_index, method='ffill')
    assert_series_equal(result, expected)

    # inference of new dtype
    s = Series([True, False, False, True], index=list('abcd'))
    new_index = 'agc'
    result = s.reindex(list(new_index)).ffill()
    expected = Series([True, True, False], index=list(new_index))
    assert_series_equal(result, expected)

    # GH4618 shifted series downcasting
    s = Series(False, index=range(0, 5))
    result = s.shift(1).fillna(method='bfill')
    expected = Series(False, index=range(0, 5))
    assert_series_equal(result, expected)