Example #1
0
    def test_to_panel_na_handling(self):
        df = DataFrame(np.random.randint(0, 10, size=20).reshape((10, 2)),
                       index=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1],
                              [0, 1, 2, 3, 4, 5, 2, 3, 4, 5]])

        panel = df.to_panel()
        self.assert_(isnull(panel[0].ix[1, [0, 1]]).all())
Example #2
0
 def test_from_frame_level1_unsorted(self):
     tuples = [('MSFT', 3), ('MSFT', 2), ('AAPL', 2),
               ('AAPL', 1), ('MSFT', 1)]
     midx = MultiIndex.from_tuples(tuples)
     df = DataFrame(np.random.rand(5,4), index=midx)
     p = df.to_panel()
     assert_frame_equal(p.minor_xs(2), df.xs(2, level=1).sort_index())
Example #3
0
    def test_to_frame(self):
        # filtered
        filtered = self.panel.to_frame()
        expected = self.panel.to_frame().dropna(how="any")
        assert_frame_equal(filtered, expected)

        # unfiltered
        unfiltered = self.panel.to_frame(filter_observations=False)
        assert_panel_equal(unfiltered.to_panel(), self.panel)

        # names
        self.assertEqual(unfiltered.index.names, ["major", "minor"])

        # unsorted, round trip
        df = self.panel.to_frame(filter_observations=False)
        unsorted = df.take(np.random.permutation(len(df)))
        pan = unsorted.to_panel()
        assert_panel_equal(pan, self.panel)

        # preserve original index names
        df = DataFrame(
            np.random.randn(6, 2), index=[["a", "a", "b", "b", "c", "c"], [0, 1, 0, 1, 0, 1]], columns=["one", "two"]
        )
        df.index.names = ["foo", "bar"]
        df.columns.name = "baz"

        rdf = df.to_panel().to_frame()
        self.assertEqual(rdf.index.names, df.index.names)
        self.assertEqual(rdf.columns.names, df.columns.names)
Example #4
0
    def _read_panel_table(self, group, where=None):
        from pandas.core.common import _asarray_tuplesafe

        table = getattr(group, 'table')

        # create the selection
        sel = Selection(table, where)
        sel.select()
        fields = table._v_attrs.fields

        columns = _maybe_convert(sel.values['column'],
                                 table._v_attrs.columns_kind)
        index = _maybe_convert(sel.values['index'],
                               table._v_attrs.index_kind)
        # reconstruct
        long_index = MultiIndex.from_arrays([index, columns])
        lp = DataFrame(sel.values['values'], index=long_index,
                       columns=fields)

        if not long_index.has_duplicates:
            lp = lp.sortlevel(level=0)
            wp = lp.to_panel()
        else:
            if not self._quiet:  # pragma: no cover
                print ('Duplicate entries in table, taking most recently '
                       'appended')

            # need a better algorithm
            tuple_index = long_index.get_tuple_index()
            index_map = lib.map_indices_object(tuple_index)

            unique_tuples = lib.fast_unique(tuple_index)
            unique_tuples = _asarray_tuplesafe(unique_tuples)

            indexer = lib.merge_indexer_object(unique_tuples, index_map)

            new_index = long_index.take(indexer)
            new_values = lp.values.take(indexer, axis=0)

            lp = DataFrame(new_values, index=new_index, columns=lp.columns)
            wp = lp.to_panel()

        if sel.column_filter:
            new_minor = sorted(set(wp.minor_axis) & sel.column_filter)
            wp = wp.reindex(minor=new_minor)
        return wp
Example #5
0
 def test_from_frame_level1_unsorted(self):
     tuples = [("MSFT", 3), ("MSFT", 2), ("AAPL", 2), ("AAPL", 1), ("MSFT", 1)]
     midx = MultiIndex.from_tuples(tuples)
     df = DataFrame(np.random.rand(5, 4), index=midx)
     p = df.to_panel()
     assert_frame_equal(p.minor_xs(2), df.ix[:, 2].sort_index())
Example #6
0
    def _read_panel_table(self, group, where=None):
        table = getattr(group, 'table')
        fields = table._v_attrs.fields

        # create the selection
        sel = Selection(table, where, table._v_attrs.index_kind)
        sel.select()
        fields = table._v_attrs.fields

        columns = _maybe_convert(sel.values['column'],
                                 table._v_attrs.columns_kind)
        index = _maybe_convert(sel.values['index'],
                               table._v_attrs.index_kind)
        values = sel.values['values']

        major = Factor(index)
        minor = Factor(columns)

        J, K = len(major.levels), len(minor.levels)
        key = major.labels * K + minor.labels

        if len(unique(key)) == len(key):
            sorter, _ = lib.groupsort_indexer(key, J * K)

            # the data need to be sorted
            sorted_values = values.take(sorter, axis=0)
            major_labels = major.labels.take(sorter)
            minor_labels = minor.labels.take(sorter)

            block = block2d_to_block3d(sorted_values, fields, (J, K),
                                       major_labels, minor_labels)

            mgr = BlockManager([block], [block.items,
                                         major.levels, minor.levels])
            wp = Panel(mgr)
        else:
            if not self._quiet:  # pragma: no cover
                print ('Duplicate entries in table, taking most recently '
                       'appended')

            # reconstruct
            long_index = MultiIndex.from_arrays([index, columns])
            lp = DataFrame(values, index=long_index, columns=fields)

            # need a better algorithm
            tuple_index = long_index.get_tuple_index()

            unique_tuples = lib.fast_unique(tuple_index)
            unique_tuples = _asarray_tuplesafe(unique_tuples)

            indexer = match(unique_tuples, tuple_index)

            new_index = long_index.take(indexer)
            new_values = lp.values.take(indexer, axis=0)

            lp = DataFrame(new_values, index=new_index, columns=lp.columns)
            wp = lp.to_panel()

        if sel.column_filter:
            new_minor = sorted(set(wp.minor_axis) & sel.column_filter)
            wp = wp.reindex(minor=new_minor)
        return wp
Example #7
0
from pandas import Series, DataFrame
import pandas as pd
import numpy as np
import pandas_datareader.data as web

ser = Series(np.arange(3.))
print(ser)

ser2 = Series(np.arange(3.), index=['a', 'b', 'c'])
print(ser2[-1])

print(ser.ix[:1])

ser3 = Series(range(3), index=[-5, 1, 3])
print(ser3.iloc[2])
print(ser3.iloc[-1])

frame = DataFrame(np.arange(6).reshape(3, 2), index=[2, 0, 1])
print(frame.iloc[0])

pdata = pd.Panel(
    dict((stk, web.get_data_yahoo(stk, '1/1/2010', '1/30/2010')) for stk in ['AAPL', 'IBM', 'MSFT', 'GOOG']))
print(pdata)
print(pdata.ix[:, '1/5/2010', :])
frame = pdata.ix[:, '1/5/2010':, :].to_frame()
print(frame)
print(frame.to_panel())
print('finsih')
Example #8
0
    def _read_panel_table(self, group, where=None):
        from pandas.core.index import unique_int64, Factor
        from pandas.core.common import _asarray_tuplesafe
        from pandas.core.internals import BlockManager
        from pandas.core.reshape import block2d_to_block3d

        table = getattr(group, "table")

        # create the selection
        sel = Selection(table, where)
        sel.select()
        fields = table._v_attrs.fields

        columns = _maybe_convert(sel.values["column"], table._v_attrs.columns_kind)
        index = _maybe_convert(sel.values["index"], table._v_attrs.index_kind)
        values = sel.values["values"]

        major = Factor(index)
        minor = Factor(columns)

        J, K = len(major.levels), len(minor.levels)
        key = major.labels * K + minor.labels

        if len(unique_int64(key)) == len(key):
            sorter, _ = lib.groupsort_indexer(key, J * K)

            # the data need to be sorted
            sorted_values = values.take(sorter, axis=0)
            major_labels = major.labels.take(sorter)
            minor_labels = minor.labels.take(sorter)

            block = block2d_to_block3d(sorted_values, fields, (J, K), major_labels, minor_labels)

            mgr = BlockManager([block], [block.items, major.levels, minor.levels])
            wp = Panel(mgr)
        else:
            if not self._quiet:  # pragma: no cover
                print ("Duplicate entries in table, taking most recently " "appended")

            # reconstruct
            long_index = MultiIndex.from_arrays([index, columns])
            lp = DataFrame(values, index=long_index, columns=fields)

            # need a better algorithm
            tuple_index = long_index.get_tuple_index()
            index_map = lib.map_indices_object(tuple_index)

            unique_tuples = lib.fast_unique(tuple_index)
            unique_tuples = _asarray_tuplesafe(unique_tuples)

            indexer = lib.merge_indexer_object(unique_tuples, index_map)

            new_index = long_index.take(indexer)
            new_values = lp.values.take(indexer, axis=0)

            lp = DataFrame(new_values, index=new_index, columns=lp.columns)
            wp = lp.to_panel()

        if sel.column_filter:
            new_minor = sorted(set(wp.minor_axis) & sel.column_filter)
            wp = wp.reindex(minor=new_minor)
        return wp