예제 #1
0
    def test_frame_select(self):
        df = tm.makeTimeDataFrame()
        self.store.put('frame', df, table=True)
        date = df.index[len(df) // 2]

        crit1 = ('index', '>=', date)
        crit2 = ('columns', ['A', 'D'])
        crit3 = ('columns', 'A')

        result = self.store.select('frame', [crit1, crit2])
        expected = df.ix[date:, ['A', 'D']]
        tm.assert_frame_equal(result, expected)

        result = self.store.select('frame', [crit3])
        expected = df.ix[:, ['A']]
        tm.assert_frame_equal(result, expected)

        # other indicies for a frame

        # integer
        df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))
        self.store.append('df_int', df)
        self.store.select(
            'df_int',
            [Term("index<10"), Term("columns", "=", ["A"])])

        df = DataFrame(
            dict(A=np.random.rand(20),
                 B=np.random.rand(20),
                 index=np.arange(20, dtype='f8')))
        self.store.append('df_float', df)
        self.store.select('df_float',
                          [Term("index<10.0"),
                           Term("columns", "=", ["A"])])
예제 #2
0
    def test_remove_where(self):

        # non-existance
        crit1 = Term('index', '>', 'foo')
        self.store.remove('a', where=[crit1])

        # try to remove non-table (with crit)
        # non-table ok (where = None)
        wp = tm.makePanel()
        self.store.put('wp', wp, table=True)
        self.store.remove('wp', [('column', ['A', 'D'])])
        rs = self.store.select('wp')
        expected = wp.reindex(minor_axis=['B', 'C'])
        tm.assert_panel_equal(rs, expected)

        # empty where
        self.store.remove('wp')
        self.store.put('wp', wp, table=True)
        self.store.remove('wp', [])

        # non - empty where
        self.store.remove('wp')
        self.store.put('wp', wp, table=True)
        self.assertRaises(Exception, self.store.remove, 'wp', ['foo'])

        # selectin non-table with a where
        self.store.put('wp2', wp, table=False)
        self.assertRaises(Exception, self.store.remove, 'wp2',
                          [('column', ['A', 'D'])])
예제 #3
0
def test_frame_select(setup_path):

    df = tm.makeTimeDataFrame()

    with ensure_clean_store(setup_path) as store:
        store.put("frame", df, format="table")
        date = df.index[len(df) // 2]

        crit1 = Term("index>=date")
        assert crit1.env.scope["date"] == date

        crit2 = "columns=['A', 'D']"
        crit3 = "columns=A"

        result = store.select("frame", [crit1, crit2])
        expected = df.loc[date:, ["A", "D"]]
        tm.assert_frame_equal(result, expected)

        result = store.select("frame", [crit3])
        expected = df.loc[:, ["A"]]
        tm.assert_frame_equal(result, expected)

        # invalid terms
        df = tm.makeTimeDataFrame()
        store.append("df_time", df)
        msg = "could not convert string to Timestamp"
        with pytest.raises(ValueError, match=msg):
            store.select("df_time", "index>0")
예제 #4
0
    def test_select(self):
        wp = tm.makePanel()

        # put/select ok
        self.store.remove('wp')
        self.store.put('wp', wp, table=True)
        self.store.select('wp')

        # non-table ok (where = None)
        self.store.remove('wp')
        self.store.put('wp2', wp, table=False)
        self.store.select('wp2')

        # selection on the non-indexable with a large number of columns
        wp = Panel(np.random.randn(100, 100, 100),
                   items=['Item%03d' % i for i in xrange(100)],
                   major_axis=date_range('1/1/2000', periods=100),
                   minor_axis=['E%03d' % i for i in xrange(100)])

        self.store.remove('wp')
        self.store.append('wp', wp)
        items = ['Item%03d' % i for i in xrange(80)]
        result = self.store.select('wp', Term('items', items))
        expected = wp.reindex(items=items)
        tm.assert_panel_equal(expected, result)
예제 #5
0
def test_invalid_terms(setup_path):

    with ensure_clean_store(setup_path) as store:

        with catch_warnings(record=True):

            df = tm.makeTimeDataFrame()
            df["string"] = "foo"
            df.loc[df.index[0:4], "string"] = "bar"

            store.put("df", df, format="table")

            # some invalid terms
            msg = re.escape(
                "__init__() missing 1 required positional argument: 'where'")
            with pytest.raises(TypeError, match=msg):
                Term()

            # more invalid
            msg = re.escape("cannot process expression [df.index[3]], "
                            "[2000-01-06 00:00:00] is not a valid condition")
            with pytest.raises(ValueError, match=msg):
                store.select("df", "df.index[3]")

            msg = "invalid syntax"
            with pytest.raises(SyntaxError, match=msg):
                store.select("df", "index>")

    # from the docs
    with ensure_clean_path(setup_path) as path:
        dfq = DataFrame(
            np.random.randn(10, 4),
            columns=list("ABCD"),
            index=date_range("20130101", periods=10),
        )
        dfq.to_hdf(path, "dfq", format="table", data_columns=True)

        # check ok
        read_hdf(path,
                 "dfq",
                 where="index>Timestamp('20130104') & columns=['A', 'B']")
        read_hdf(path, "dfq", where="A>0 or C>0")

    # catch the invalid reference
    with ensure_clean_path(setup_path) as path:
        dfq = DataFrame(
            np.random.randn(10, 4),
            columns=list("ABCD"),
            index=date_range("20130101", periods=10),
        )
        dfq.to_hdf(path, "dfq", format="table")

        msg = (r"The passed where expression: A>0 or C>0\n\s*"
               r"contains an invalid variable reference\n\s*"
               r"all of the variable references must be a reference to\n\s*"
               r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*"
               r"The currently defined references are: index,columns\n")
        with pytest.raises(ValueError, match=msg):
            read_hdf(path, "dfq", where="A>0 or C>0")
예제 #6
0
    def test_terms(self):

        wp = tm.makePanel()
        self.store.put('wp', wp, table=True)

        # some invalid terms
        terms = [
            ['minor', ['A', 'B']],
            ['index', ['20121114']],
            ['index', ['20121114', '20121114']],
        ]
        for t in terms:
            self.assertRaises(Exception, self.store.select, 'wp', t)

        self.assertRaises(Exception, Term.__init__)
        self.assertRaises(Exception, Term.__init__, 'blah')
        self.assertRaises(Exception, Term.__init__, 'index')
        self.assertRaises(Exception, Term.__init__, 'index', '==')
        self.assertRaises(Exception, Term.__init__, 'index', '>', 5)

        result = self.store.select(
            'wp',
            [Term('major_axis<20000108'),
             Term('minor_axis', '=', ['A', 'B'])])
        expected = wp.truncate(after='20000108').reindex(minor=['A', 'B'])
        tm.assert_panel_equal(result, expected)

        # valid terms
        terms = [
            dict(field='index', op='>', value='20121114'),
            ('index', '20121114'),
            ('index', '>', '20121114'),
            (('index', ['20121114', '20121114']), ),
            ('index', datetime(2012, 11, 14)),
            'index>20121114',
            'major>20121114',
            'major_axis>20121114',
            (('minor', ['A', 'B']), ),
            (('minor_axis', ['A', 'B']), ),
            ((('minor_axis', ['A', 'B']), ), ),
            (('column', ['A', 'B']), ),
        ]

        for t in terms:
            self.store.select('wp', t)
예제 #7
0
    def test_select_filter_corner(self):
        df = DataFrame(np.random.randn(50, 100))
        df.index = ['%.3d' % c for c in df.index]
        df.columns = ['%.3d' % c for c in df.columns]
        self.store.put('frame', df, table=True)

        crit = Term('column', df.columns[:75])
        result = self.store.select('frame', [crit])
        tm.assert_frame_equal(result, df.ix[:, df.columns[:75]])
예제 #8
0
def Term(*args, **kwargs):
    import warnings

    warnings.warn("pd.Term is deprecated as it is not "
                  "applicable to user code. Instead use in-line "
                  "string expressions in the where clause when "
                  "searching in HDFStore",
                  FutureWarning, stacklevel=2)
    from pandas.io.pytables import Term
    return Term(*args, **kwargs)
예제 #9
0
def test_encoding(setup_path):

    with ensure_clean_store(setup_path) as store:
        df = DataFrame({"A": "foo", "B": "bar"}, index=range(5))
        df.loc[2, "A"] = np.nan
        df.loc[3, "B"] = np.nan
        _maybe_remove(store, "df")
        store.append("df", df, encoding="ascii")
        tm.assert_frame_equal(store["df"], df)

        expected = df.reindex(columns=["A"])
        result = store.select("df", Term("columns=A", encoding="ascii"))
        tm.assert_frame_equal(result, expected)
예제 #10
0
    def test_append_frame_column_oriented(self):

        # column oriented
        df = tm.makeTimeDataFrame()
        self.store.remove('df1')
        self.store.append('df1', df.ix[:, :2], axes=['columns'])
        self.store.append('df1', df.ix[:, 2:])
        tm.assert_frame_equal(self.store['df1'], df)

        result = self.store.select('df1', 'columns=A')
        expected = df.reindex(columns=['A'])
        tm.assert_frame_equal(expected, result)

        # this isn't supported
        self.assertRaises(Exception, self.store.select, 'df1',
                          ('columns=A', Term('index', '>', df.index[4])))

        # selection on the non-indexable
        result = self.store.select(
            'df1', ('columns=A', Term('index', '=', df.index[0:4])))
        expected = df.reindex(columns=['A'], index=df.index[0:4])
        tm.assert_frame_equal(expected, result)
예제 #11
0
    def test_remove_crit(self):
        wp = tm.makePanel()
        self.store.put('wp', wp, table=True)
        date = wp.major_axis[len(wp.major_axis) // 2]

        crit1 = Term('index', '>', date)
        crit2 = Term('column', ['A', 'D'])
        self.store.remove('wp', where=[crit1])
        self.store.remove('wp', where=[crit2])
        result = self.store['wp']
        expected = wp.truncate(after=date).reindex(minor=['B', 'C'])
        tm.assert_panel_equal(result, expected)

        # test non-consecutive row removal
        wp = tm.makePanel()
        self.store.put('wp2', wp, table=True)

        date1 = wp.major_axis[1:3]
        date2 = wp.major_axis[5]
        date3 = [wp.major_axis[7], wp.major_axis[9]]

        crit1 = Term('index', date1)
        crit2 = Term('index', date2)
        crit3 = Term('index', date3)

        self.store.remove('wp2', where=[crit1])
        self.store.remove('wp2', where=[crit2])
        self.store.remove('wp2', where=[crit3])
        result = self.store['wp2']

        ma = list(wp.major_axis)
        for d in date1:
            ma.remove(d)
        ma.remove(date2)
        for d in date3:
            ma.remove(d)
        expected = wp.reindex(major=ma)
        tm.assert_panel_equal(result, expected)
예제 #12
0
    def test_versioning(self):
        self.store['a'] = tm.makeTimeSeries()
        self.store['b'] = tm.makeDataFrame()
        df = tm.makeTimeDataFrame()
        self.store.remove('df1')
        self.store.append('df1', df[:10])
        self.store.append('df1', df[10:])
        self.assert_(self.store.root.a._v_attrs.pandas_version == '0.10')
        self.assert_(self.store.root.b._v_attrs.pandas_version == '0.10')
        self.assert_(self.store.root.df1._v_attrs.pandas_version == '0.10')

        # write a file and wipe its versioning
        self.store.remove('df2')
        self.store.append('df2', df)
        self.store.get_node('df2')._v_attrs.pandas_version = None
        self.store.select('df2')
        self.store.select('df2', [Term('index', '>', df.index[2])])
예제 #13
0
    def test_legacy_table_read(self):
        # legacy table types
        pth = curpath()
        store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'r')
        store.select('df1')
        store.select('df2')
        store.select('wp1')

        # force the frame
        store.select('df2', typ='legacy_frame')

        # old version (this still throws an exception though)
        import warnings
        warnings.filterwarnings('ignore', category=IncompatibilityWarning)
        self.assertRaises(Exception, store.select, 'wp1',
                          Term('minor_axis', '=', 'B'))
        warnings.filterwarnings('always', category=IncompatibilityWarning)

        store.close()
예제 #14
0
    def test_terms(self):

        wp = tm.makePanel()
        p4d = tm.makePanel4D()
        self.store.put('wp', wp, table=True)
        self.store.put('p4d', p4d, table=True)

        # some invalid terms
        terms = [
            ['minor', ['A', 'B']],
            ['index', ['20121114']],
            ['index', ['20121114', '20121114']],
        ]
        for t in terms:
            self.assertRaises(Exception, self.store.select, 'wp', t)

        self.assertRaises(Exception, Term.__init__)
        self.assertRaises(Exception, Term.__init__, 'blah')
        self.assertRaises(Exception, Term.__init__, 'index')
        self.assertRaises(Exception, Term.__init__, 'index', '==')
        self.assertRaises(Exception, Term.__init__, 'index', '>', 5)

        # panel
        result = self.store.select(
            'wp',
            [Term('major_axis<20000108'),
             Term('minor_axis', '=', ['A', 'B'])])
        expected = wp.truncate(after='20000108').reindex(minor=['A', 'B'])
        tm.assert_panel_equal(result, expected)

        # p4d
        result = self.store.select('p4d', [
            Term('major_axis<20000108'),
            Term('minor_axis', '=', ['A', 'B']),
            Term('items', '=', ['ItemA', 'ItemB'])
        ])
        expected = p4d.truncate(after='20000108').reindex(
            minor=['A', 'B'], items=['ItemA', 'ItemB'])
        tm.assert_panel4d_equal(result, expected)

        # valid terms
        terms = [
            dict(field='major_axis', op='>', value='20121114'),
            ('major_axis', '20121114'),
            ('major_axis', '>', '20121114'),
            (('major_axis', ['20121114', '20121114']), ),
            ('major_axis', datetime(2012, 11, 14)),
            'major_axis>20121114',
            'major_axis>20121114',
            'major_axis>20121114',
            (('minor_axis', ['A', 'B']), ),
            (('minor_axis', ['A', 'B']), ),
            ((('minor_axis', ['A', 'B']), ), ),
            (('items', ['ItemA', 'ItemB']), ),
            ('items=ItemA'),
        ]

        for t in terms:
            self.store.select('wp', t)
            self.store.select('p4d', t)

        # valid for p4d only
        terms = [
            (('labels', '=', ['l1', 'l2']), ),
            Term('labels', '=', ['l1', 'l2']),
        ]

        for t in terms:
            self.store.select('p4d', t)
예제 #15
0
    def test_remove_crit(self):
        wp = tm.makePanel()

        # group row removal
        date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10])
        crit4 = Term('major_axis', date4)
        self.store.put('wp3', wp, table=True)
        n = self.store.remove('wp3', where=[crit4])
        assert (n == 36)
        result = self.store.select('wp3')
        expected = wp.reindex(major_axis=wp.major_axis - date4)
        tm.assert_panel_equal(result, expected)

        # upper half
        self.store.put('wp', wp, table=True)
        date = wp.major_axis[len(wp.major_axis) // 2]

        crit1 = Term('major_axis', '>', date)
        crit2 = Term('minor_axis', ['A', 'D'])
        n = self.store.remove('wp', where=[crit1])

        assert (n == 56)

        n = self.store.remove('wp', where=[crit2])
        assert (n == 32)

        result = self.store['wp']
        expected = wp.truncate(after=date).reindex(minor=['B', 'C'])
        tm.assert_panel_equal(result, expected)

        # individual row elements
        self.store.put('wp2', wp, table=True)

        date1 = wp.major_axis[1:3]
        crit1 = Term('major_axis', date1)
        self.store.remove('wp2', where=[crit1])
        result = self.store.select('wp2')
        expected = wp.reindex(major_axis=wp.major_axis - date1)
        tm.assert_panel_equal(result, expected)

        date2 = wp.major_axis[5]
        crit2 = Term('major_axis', date2)
        self.store.remove('wp2', where=[crit2])
        result = self.store['wp2']
        expected = wp.reindex(major_axis=wp.major_axis - date1 -
                              Index([date2]))
        tm.assert_panel_equal(result, expected)

        date3 = [wp.major_axis[7], wp.major_axis[9]]
        crit3 = Term('major_axis', date3)
        self.store.remove('wp2', where=[crit3])
        result = self.store['wp2']
        expected = wp.reindex(major_axis=wp.major_axis - date1 -
                              Index([date2]) - Index(date3))
        tm.assert_panel_equal(result, expected)

        # corners
        self.store.put('wp4', wp, table=True)
        n = self.store.remove(
            'wp4', where=[Term('major_axis', '>', wp.major_axis[-1])])
        result = self.store.select('wp4')
        tm.assert_panel_equal(result, wp)
예제 #16
0
    def test_ndim_indexables(self):
        """ test using ndim tables in new ways"""

        p4d = tm.makePanel4D()

        def check_indexers(key, indexers):
            for i, idx in enumerate(indexers):
                self.assert_(
                    getattr(
                        getattr(self.store.root, key).table.description,
                        idx)._v_pos == i)

        # append then change (will take existing schema)
        indexers = ['items', 'major_axis', 'minor_axis']

        self.store.remove('p4d')
        self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
        self.store.append('p4d', p4d.ix[:, :, 10:, :])
        tm.assert_panel4d_equal(self.store.select('p4d'), p4d)
        check_indexers('p4d', indexers)

        # same as above, but try to append with differnt axes
        self.store.remove('p4d')
        self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
        self.store.append('p4d',
                          p4d.ix[:, :, 10:, :],
                          axes=['labels', 'items', 'major_axis'])
        tm.assert_panel4d_equal(self.store.select('p4d'), p4d)
        check_indexers('p4d', indexers)

        # pass incorrect number of axes
        self.store.remove('p4d')
        self.assertRaises(Exception,
                          self.store.append,
                          'p4d',
                          p4d.ix[:, :, :10, :],
                          axes=['major_axis', 'minor_axis'])

        # different than default indexables #1
        indexers = ['labels', 'major_axis', 'minor_axis']
        self.store.remove('p4d')
        self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
        self.store.append('p4d', p4d.ix[:, :, 10:, :])
        tm.assert_panel4d_equal(self.store['p4d'], p4d)
        check_indexers('p4d', indexers)

        # different than default indexables #2
        indexers = ['major_axis', 'labels', 'minor_axis']
        self.store.remove('p4d')
        self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers)
        self.store.append('p4d', p4d.ix[:, :, 10:, :])
        tm.assert_panel4d_equal(self.store['p4d'], p4d)
        check_indexers('p4d', indexers)

        # partial selection
        result = self.store.select('p4d', ['labels=l1'])
        expected = p4d.reindex(labels=['l1'])
        tm.assert_panel4d_equal(result, expected)

        # partial selection2
        result = self.store.select(
            'p4d',
            [Term('labels=l1'),
             Term('items=ItemA'),
             Term('minor_axis=B')])
        expected = p4d.reindex(labels=['l1'],
                               items=['ItemA'],
                               minor_axis=['B'])
        tm.assert_panel4d_equal(result, expected)

        # non-existant partial selection
        result = self.store.select(
            'p4d',
            [Term('labels=l1'),
             Term('items=Item1'),
             Term('minor_axis=B')])
        expected = p4d.reindex(labels=['l1'], items=[], minor_axis=['B'])
        tm.assert_panel4d_equal(result, expected)