def test_frame_select(self): df = tm.makeTimeDataFrame() self.store.put('frame', df, table=True) date = df.index[len(df) // 2] crit1 = ('index', '>=', date) crit2 = ('columns', ['A', 'D']) crit3 = ('columns', 'A') result = self.store.select('frame', [crit1, crit2]) expected = df.ix[date:, ['A', 'D']] tm.assert_frame_equal(result, expected) result = self.store.select('frame', [crit3]) expected = df.ix[:, ['A']] tm.assert_frame_equal(result, expected) # other indicies for a frame # integer df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20))) self.store.append('df_int', df) self.store.select( 'df_int', [Term("index<10"), Term("columns", "=", ["A"])]) df = DataFrame( dict(A=np.random.rand(20), B=np.random.rand(20), index=np.arange(20, dtype='f8'))) self.store.append('df_float', df) self.store.select('df_float', [Term("index<10.0"), Term("columns", "=", ["A"])])
def test_remove_where(self): # non-existance crit1 = Term('index', '>', 'foo') self.store.remove('a', where=[crit1]) # try to remove non-table (with crit) # non-table ok (where = None) wp = tm.makePanel() self.store.put('wp', wp, table=True) self.store.remove('wp', [('column', ['A', 'D'])]) rs = self.store.select('wp') expected = wp.reindex(minor_axis=['B', 'C']) tm.assert_panel_equal(rs, expected) # empty where self.store.remove('wp') self.store.put('wp', wp, table=True) self.store.remove('wp', []) # non - empty where self.store.remove('wp') self.store.put('wp', wp, table=True) self.assertRaises(Exception, self.store.remove, 'wp', ['foo']) # selectin non-table with a where self.store.put('wp2', wp, table=False) self.assertRaises(Exception, self.store.remove, 'wp2', [('column', ['A', 'D'])])
def test_frame_select(setup_path): df = tm.makeTimeDataFrame() with ensure_clean_store(setup_path) as store: store.put("frame", df, format="table") date = df.index[len(df) // 2] crit1 = Term("index>=date") assert crit1.env.scope["date"] == date crit2 = "columns=['A', 'D']" crit3 = "columns=A" result = store.select("frame", [crit1, crit2]) expected = df.loc[date:, ["A", "D"]] tm.assert_frame_equal(result, expected) result = store.select("frame", [crit3]) expected = df.loc[:, ["A"]] tm.assert_frame_equal(result, expected) # invalid terms df = tm.makeTimeDataFrame() store.append("df_time", df) msg = "could not convert string to Timestamp" with pytest.raises(ValueError, match=msg): store.select("df_time", "index>0")
def test_select(self): wp = tm.makePanel() # put/select ok self.store.remove('wp') self.store.put('wp', wp, table=True) self.store.select('wp') # non-table ok (where = None) self.store.remove('wp') self.store.put('wp2', wp, table=False) self.store.select('wp2') # selection on the non-indexable with a large number of columns wp = Panel(np.random.randn(100, 100, 100), items=['Item%03d' % i for i in xrange(100)], major_axis=date_range('1/1/2000', periods=100), minor_axis=['E%03d' % i for i in xrange(100)]) self.store.remove('wp') self.store.append('wp', wp) items = ['Item%03d' % i for i in xrange(80)] result = self.store.select('wp', Term('items', items)) expected = wp.reindex(items=items) tm.assert_panel_equal(expected, result)
def test_invalid_terms(setup_path): with ensure_clean_store(setup_path) as store: with catch_warnings(record=True): df = tm.makeTimeDataFrame() df["string"] = "foo" df.loc[df.index[0:4], "string"] = "bar" store.put("df", df, format="table") # some invalid terms msg = re.escape( "__init__() missing 1 required positional argument: 'where'") with pytest.raises(TypeError, match=msg): Term() # more invalid msg = re.escape("cannot process expression [df.index[3]], " "[2000-01-06 00:00:00] is not a valid condition") with pytest.raises(ValueError, match=msg): store.select("df", "df.index[3]") msg = "invalid syntax" with pytest.raises(SyntaxError, match=msg): store.select("df", "index>") # from the docs with ensure_clean_path(setup_path) as path: dfq = DataFrame( np.random.randn(10, 4), columns=list("ABCD"), index=date_range("20130101", periods=10), ) dfq.to_hdf(path, "dfq", format="table", data_columns=True) # check ok read_hdf(path, "dfq", where="index>Timestamp('20130104') & columns=['A', 'B']") read_hdf(path, "dfq", where="A>0 or C>0") # catch the invalid reference with ensure_clean_path(setup_path) as path: dfq = DataFrame( np.random.randn(10, 4), columns=list("ABCD"), index=date_range("20130101", periods=10), ) dfq.to_hdf(path, "dfq", format="table") msg = (r"The passed where expression: A>0 or C>0\n\s*" r"contains an invalid variable reference\n\s*" r"all of the variable references must be a reference to\n\s*" r"an axis \(e.g. 'index' or 'columns'\), or a data_column\n\s*" r"The currently defined references are: index,columns\n") with pytest.raises(ValueError, match=msg): read_hdf(path, "dfq", where="A>0 or C>0")
def test_terms(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) # some invalid terms terms = [ ['minor', ['A', 'B']], ['index', ['20121114']], ['index', ['20121114', '20121114']], ] for t in terms: self.assertRaises(Exception, self.store.select, 'wp', t) self.assertRaises(Exception, Term.__init__) self.assertRaises(Exception, Term.__init__, 'blah') self.assertRaises(Exception, Term.__init__, 'index') self.assertRaises(Exception, Term.__init__, 'index', '==') self.assertRaises(Exception, Term.__init__, 'index', '>', 5) result = self.store.select( 'wp', [Term('major_axis<20000108'), Term('minor_axis', '=', ['A', 'B'])]) expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) tm.assert_panel_equal(result, expected) # valid terms terms = [ dict(field='index', op='>', value='20121114'), ('index', '20121114'), ('index', '>', '20121114'), (('index', ['20121114', '20121114']), ), ('index', datetime(2012, 11, 14)), 'index>20121114', 'major>20121114', 'major_axis>20121114', (('minor', ['A', 'B']), ), (('minor_axis', ['A', 'B']), ), ((('minor_axis', ['A', 'B']), ), ), (('column', ['A', 'B']), ), ] for t in terms: self.store.select('wp', t)
def test_select_filter_corner(self): df = DataFrame(np.random.randn(50, 100)) df.index = ['%.3d' % c for c in df.index] df.columns = ['%.3d' % c for c in df.columns] self.store.put('frame', df, table=True) crit = Term('column', df.columns[:75]) result = self.store.select('frame', [crit]) tm.assert_frame_equal(result, df.ix[:, df.columns[:75]])
def Term(*args, **kwargs): import warnings warnings.warn("pd.Term is deprecated as it is not " "applicable to user code. Instead use in-line " "string expressions in the where clause when " "searching in HDFStore", FutureWarning, stacklevel=2) from pandas.io.pytables import Term return Term(*args, **kwargs)
def test_encoding(setup_path): with ensure_clean_store(setup_path) as store: df = DataFrame({"A": "foo", "B": "bar"}, index=range(5)) df.loc[2, "A"] = np.nan df.loc[3, "B"] = np.nan _maybe_remove(store, "df") store.append("df", df, encoding="ascii") tm.assert_frame_equal(store["df"], df) expected = df.reindex(columns=["A"]) result = store.select("df", Term("columns=A", encoding="ascii")) tm.assert_frame_equal(result, expected)
def test_append_frame_column_oriented(self): # column oriented df = tm.makeTimeDataFrame() self.store.remove('df1') self.store.append('df1', df.ix[:, :2], axes=['columns']) self.store.append('df1', df.ix[:, 2:]) tm.assert_frame_equal(self.store['df1'], df) result = self.store.select('df1', 'columns=A') expected = df.reindex(columns=['A']) tm.assert_frame_equal(expected, result) # this isn't supported self.assertRaises(Exception, self.store.select, 'df1', ('columns=A', Term('index', '>', df.index[4]))) # selection on the non-indexable result = self.store.select( 'df1', ('columns=A', Term('index', '=', df.index[0:4]))) expected = df.reindex(columns=['A'], index=df.index[0:4]) tm.assert_frame_equal(expected, result)
def test_remove_crit(self): wp = tm.makePanel() self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = Term('index', '>', date) crit2 = Term('column', ['A', 'D']) self.store.remove('wp', where=[crit1]) self.store.remove('wp', where=[crit2]) result = self.store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) tm.assert_panel_equal(result, expected) # test non-consecutive row removal wp = tm.makePanel() self.store.put('wp2', wp, table=True) date1 = wp.major_axis[1:3] date2 = wp.major_axis[5] date3 = [wp.major_axis[7], wp.major_axis[9]] crit1 = Term('index', date1) crit2 = Term('index', date2) crit3 = Term('index', date3) self.store.remove('wp2', where=[crit1]) self.store.remove('wp2', where=[crit2]) self.store.remove('wp2', where=[crit3]) result = self.store['wp2'] ma = list(wp.major_axis) for d in date1: ma.remove(d) ma.remove(date2) for d in date3: ma.remove(d) expected = wp.reindex(major=ma) tm.assert_panel_equal(result, expected)
def test_versioning(self): self.store['a'] = tm.makeTimeSeries() self.store['b'] = tm.makeDataFrame() df = tm.makeTimeDataFrame() self.store.remove('df1') self.store.append('df1', df[:10]) self.store.append('df1', df[10:]) self.assert_(self.store.root.a._v_attrs.pandas_version == '0.10') self.assert_(self.store.root.b._v_attrs.pandas_version == '0.10') self.assert_(self.store.root.df1._v_attrs.pandas_version == '0.10') # write a file and wipe its versioning self.store.remove('df2') self.store.append('df2', df) self.store.get_node('df2')._v_attrs.pandas_version = None self.store.select('df2') self.store.select('df2', [Term('index', '>', df.index[2])])
def test_legacy_table_read(self): # legacy table types pth = curpath() store = HDFStore(os.path.join(pth, 'legacy_table.h5'), 'r') store.select('df1') store.select('df2') store.select('wp1') # force the frame store.select('df2', typ='legacy_frame') # old version (this still throws an exception though) import warnings warnings.filterwarnings('ignore', category=IncompatibilityWarning) self.assertRaises(Exception, store.select, 'wp1', Term('minor_axis', '=', 'B')) warnings.filterwarnings('always', category=IncompatibilityWarning) store.close()
def test_terms(self): wp = tm.makePanel() p4d = tm.makePanel4D() self.store.put('wp', wp, table=True) self.store.put('p4d', p4d, table=True) # some invalid terms terms = [ ['minor', ['A', 'B']], ['index', ['20121114']], ['index', ['20121114', '20121114']], ] for t in terms: self.assertRaises(Exception, self.store.select, 'wp', t) self.assertRaises(Exception, Term.__init__) self.assertRaises(Exception, Term.__init__, 'blah') self.assertRaises(Exception, Term.__init__, 'index') self.assertRaises(Exception, Term.__init__, 'index', '==') self.assertRaises(Exception, Term.__init__, 'index', '>', 5) # panel result = self.store.select( 'wp', [Term('major_axis<20000108'), Term('minor_axis', '=', ['A', 'B'])]) expected = wp.truncate(after='20000108').reindex(minor=['A', 'B']) tm.assert_panel_equal(result, expected) # p4d result = self.store.select('p4d', [ Term('major_axis<20000108'), Term('minor_axis', '=', ['A', 'B']), Term('items', '=', ['ItemA', 'ItemB']) ]) expected = p4d.truncate(after='20000108').reindex( minor=['A', 'B'], items=['ItemA', 'ItemB']) tm.assert_panel4d_equal(result, expected) # valid terms terms = [ dict(field='major_axis', op='>', value='20121114'), ('major_axis', '20121114'), ('major_axis', '>', '20121114'), (('major_axis', ['20121114', '20121114']), ), ('major_axis', datetime(2012, 11, 14)), 'major_axis>20121114', 'major_axis>20121114', 'major_axis>20121114', (('minor_axis', ['A', 'B']), ), (('minor_axis', ['A', 'B']), ), ((('minor_axis', ['A', 'B']), ), ), (('items', ['ItemA', 'ItemB']), ), ('items=ItemA'), ] for t in terms: self.store.select('wp', t) self.store.select('p4d', t) # valid for p4d only terms = [ (('labels', '=', ['l1', 'l2']), ), Term('labels', '=', ['l1', 'l2']), ] for t in terms: self.store.select('p4d', t)
def test_remove_crit(self): wp = tm.makePanel() # group row removal date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10]) crit4 = Term('major_axis', date4) self.store.put('wp3', wp, table=True) n = self.store.remove('wp3', where=[crit4]) assert (n == 36) result = self.store.select('wp3') expected = wp.reindex(major_axis=wp.major_axis - date4) tm.assert_panel_equal(result, expected) # upper half self.store.put('wp', wp, table=True) date = wp.major_axis[len(wp.major_axis) // 2] crit1 = Term('major_axis', '>', date) crit2 = Term('minor_axis', ['A', 'D']) n = self.store.remove('wp', where=[crit1]) assert (n == 56) n = self.store.remove('wp', where=[crit2]) assert (n == 32) result = self.store['wp'] expected = wp.truncate(after=date).reindex(minor=['B', 'C']) tm.assert_panel_equal(result, expected) # individual row elements self.store.put('wp2', wp, table=True) date1 = wp.major_axis[1:3] crit1 = Term('major_axis', date1) self.store.remove('wp2', where=[crit1]) result = self.store.select('wp2') expected = wp.reindex(major_axis=wp.major_axis - date1) tm.assert_panel_equal(result, expected) date2 = wp.major_axis[5] crit2 = Term('major_axis', date2) self.store.remove('wp2', where=[crit2]) result = self.store['wp2'] expected = wp.reindex(major_axis=wp.major_axis - date1 - Index([date2])) tm.assert_panel_equal(result, expected) date3 = [wp.major_axis[7], wp.major_axis[9]] crit3 = Term('major_axis', date3) self.store.remove('wp2', where=[crit3]) result = self.store['wp2'] expected = wp.reindex(major_axis=wp.major_axis - date1 - Index([date2]) - Index(date3)) tm.assert_panel_equal(result, expected) # corners self.store.put('wp4', wp, table=True) n = self.store.remove( 'wp4', where=[Term('major_axis', '>', wp.major_axis[-1])]) result = self.store.select('wp4') tm.assert_panel_equal(result, wp)
def test_ndim_indexables(self): """ test using ndim tables in new ways""" p4d = tm.makePanel4D() def check_indexers(key, indexers): for i, idx in enumerate(indexers): self.assert_( getattr( getattr(self.store.root, key).table.description, idx)._v_pos == i) # append then change (will take existing schema) indexers = ['items', 'major_axis', 'minor_axis'] self.store.remove('p4d') self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) self.store.append('p4d', p4d.ix[:, :, 10:, :]) tm.assert_panel4d_equal(self.store.select('p4d'), p4d) check_indexers('p4d', indexers) # same as above, but try to append with differnt axes self.store.remove('p4d') self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) self.store.append('p4d', p4d.ix[:, :, 10:, :], axes=['labels', 'items', 'major_axis']) tm.assert_panel4d_equal(self.store.select('p4d'), p4d) check_indexers('p4d', indexers) # pass incorrect number of axes self.store.remove('p4d') self.assertRaises(Exception, self.store.append, 'p4d', p4d.ix[:, :, :10, :], axes=['major_axis', 'minor_axis']) # different than default indexables #1 indexers = ['labels', 'major_axis', 'minor_axis'] self.store.remove('p4d') self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) self.store.append('p4d', p4d.ix[:, :, 10:, :]) tm.assert_panel4d_equal(self.store['p4d'], p4d) check_indexers('p4d', indexers) # different than default indexables #2 indexers = ['major_axis', 'labels', 'minor_axis'] self.store.remove('p4d') self.store.append('p4d', p4d.ix[:, :, :10, :], axes=indexers) self.store.append('p4d', p4d.ix[:, :, 10:, :]) tm.assert_panel4d_equal(self.store['p4d'], p4d) check_indexers('p4d', indexers) # partial selection result = self.store.select('p4d', ['labels=l1']) expected = p4d.reindex(labels=['l1']) tm.assert_panel4d_equal(result, expected) # partial selection2 result = self.store.select( 'p4d', [Term('labels=l1'), Term('items=ItemA'), Term('minor_axis=B')]) expected = p4d.reindex(labels=['l1'], items=['ItemA'], minor_axis=['B']) tm.assert_panel4d_equal(result, expected) # non-existant partial selection result = self.store.select( 'p4d', [Term('labels=l1'), Term('items=Item1'), Term('minor_axis=B')]) expected = p4d.reindex(labels=['l1'], items=[], minor_axis=['B']) tm.assert_panel4d_equal(result, expected)