def test_existing_arbitrary_collection_mdataframe(self): data = { 'foo': 'bar', 'bax': { 'fox': 'fax', } } store = OmegaStore() store.register_backend(PandasRawDictBackend.KIND, PandasRawDictBackend) foo_coll = store.mongodb['foo'] foo_coll.insert(data) store.make_metadata('myfoo', collection='foo', kind='pandas.rawdict').save() self.assertIn('myfoo', store.list()) # test we get back _id column if raw=True mdf = store.getl('myfoo', raw=True) self.assertIsInstance(mdf, MDataFrame) data_df = mdf.value data_raw = store.collection('myfoo').find_one() assert_frame_equal(json_normalize(data_raw), data_df) # test we get just the data column mdf = store.getl('myfoo', raw=False) self.assertIsInstance(mdf, MDataFrame) data_df = mdf.value data_raw = store.collection('myfoo').find_one() cols = ['foo', 'bax.fox'] assert_frame_equal(json_normalize(data)[cols], data_df[cols])
def test_put_dataframe_with_index(self): # create some dataframe df = pd.DataFrame({'a': list(range(1, 10)), 'b': list(range(1, 10))}) store = OmegaStore(prefix='') store.put(df, 'mydata', index=['a', '-b']) idxs = list(store.collection('mydata').list_indexes()) idx_names = map(lambda v: dict(v).get('name'), idxs) self.assertIn('asc_a__desc_b', idx_names)
def test_put_dataframe_with_index(self): # create some dataframe df = pd.DataFrame({'a': list(range(1, 10)), 'b': list(range(1, 10))}) store = OmegaStore(prefix='') store.put(df, 'mydata', index=['a', '-b']) idxs = store.collection('mydata').index_information() idx_names = humanize_index(idxs) self.assertIn('asc__id_asc_a_desc_b_asc__idx#0_0_asc__om#rowid', idx_names)
def test_arbitrary_collection_new(self): data = {'foo': 'bar', 'bax': 'fox'} store = OmegaStore() store.register_backend(PandasRawDictBackend.KIND, PandasRawDictBackend) # create the collection foo_coll = store.mongodb['foo'] foo_coll.insert(data) # store the collection as is store.put(foo_coll, 'myfoo').save() self.assertIn('myfoo', store.list()) # test we get back _id column if raw=True data_df = store.get('myfoo', raw=True) data_raw = store.collection('myfoo').find_one() assert_frame_equal(json_normalize(data_raw), data_df) # test we get just the data column data_df = store.get('myfoo', raw=False) data_raw = store.collection('myfoo').find_one() del data_raw['_id'] assert_frame_equal(json_normalize(data_raw), data_df) cols = ['foo', 'bax'] assert_frame_equal(data_df[cols], json_normalize(data_raw)[cols])
def test_put_dataframe_timeseries(self): # create some dataframe tsidx = pd.date_range(pd.datetime(2016, 1, 1), pd.datetime(2016, 4, 1)) df = pd.DataFrame({ 'a': list(range(0, len(tsidx))), 'b': list(range(0, len(tsidx))) }, index=tsidx) store = OmegaStore(prefix='') store.put(df, 'mydata') dfx = store.get('mydata') assert_frame_equal(df, dfx) idxs = list(store.collection('mydata').list_indexes()) idx_names = [dict(v).get('name') for v in idxs] self.assertIn('asc__idx#0_0', idx_names)
def test_put_dataframe_multiindex(self): # create some dataframe store = OmegaStore(prefix='') midx = pd.MultiIndex(levels=[[u'bar', u'baz', u'foo', u'qux'], [u'one', u'two']], labels=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]], names=[u'first', u'second']) df = pd.DataFrame({'x': range(0, len(midx))}, index=midx) store.put(df, 'mydata') dfx = store.get('mydata') assert_frame_equal(df, dfx) idxs = list(store.collection('mydata').list_indexes()) idx_names = [dict(v).get('name') for v in idxs] self.assertIn('asc__idx#0_first__asc__idx#1_second', idx_names)
def test_put_dataframe_timeseries(self): # create some dataframe tsidx = pd.date_range(pd.datetime(2016, 1, 1), pd.datetime(2016, 4, 1)) df = pd.DataFrame( { 'a': list(range(0, len(tsidx))), 'b': list(range(0, len(tsidx))) }, index=tsidx) store = OmegaStore(prefix='') store.put(df, 'mydata') dfx = store.get('mydata') assert_frame_equal(df, dfx) idxs = store.collection('mydata').index_information() idx_names = humanize_index(idxs) self.assertIn('asc__id_asc__idx#0_0_asc__om#rowid', idx_names)
def test_put_dataframe_multiindex(self): # create some dataframe store = OmegaStore(prefix='') midx = pd.MultiIndex(levels=[[u'bar', u'baz', u'foo', u'qux'], [u'one', u'two']], codes=[[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]], names=[u'first', u'second']) df = pd.DataFrame({'x': range(0, len(midx))}, index=midx) store.put(df, 'mydata') dfx = store.get('mydata') assert_frame_equal(df, dfx) idxs = store.collection('mydata').index_information() idx_names = humanize_index(idxs) self.assertIn( 'asc__id_asc__idx#0_first_asc__idx#1_second_asc__om#rowid', idx_names)