예제 #1
0
 def test_existing_arbitrary_collection_mdataframe(self):
     data = {
         'foo': 'bar',
         'bax': {
             'fox': 'fax',
         }
     }
     store = OmegaStore()
     store.register_backend(PandasRawDictBackend.KIND, PandasRawDictBackend)
     foo_coll = store.mongodb['foo']
     foo_coll.insert(data)
     store.make_metadata('myfoo', collection='foo',
                         kind='pandas.rawdict').save()
     self.assertIn('myfoo', store.list())
     # test we get back _id column if raw=True
     mdf = store.getl('myfoo', raw=True)
     self.assertIsInstance(mdf, MDataFrame)
     data_df = mdf.value
     data_raw = store.collection('myfoo').find_one()
     assert_frame_equal(json_normalize(data_raw), data_df)
     # test we get just the data column
     mdf = store.getl('myfoo', raw=False)
     self.assertIsInstance(mdf, MDataFrame)
     data_df = mdf.value
     data_raw = store.collection('myfoo').find_one()
     cols = ['foo', 'bax.fox']
     assert_frame_equal(json_normalize(data)[cols], data_df[cols])
예제 #2
0
 def test_put_dataframe_with_index(self):
     # create some dataframe
     df = pd.DataFrame({'a': list(range(1, 10)), 'b': list(range(1, 10))})
     store = OmegaStore(prefix='')
     store.put(df, 'mydata', index=['a', '-b'])
     idxs = list(store.collection('mydata').list_indexes())
     idx_names = map(lambda v: dict(v).get('name'), idxs)
     self.assertIn('asc_a__desc_b', idx_names)
예제 #3
0
 def test_put_dataframe_with_index(self):
     # create some dataframe
     df = pd.DataFrame({'a': list(range(1, 10)), 'b': list(range(1, 10))})
     store = OmegaStore(prefix='')
     store.put(df, 'mydata', index=['a', '-b'])
     idxs = store.collection('mydata').index_information()
     idx_names = humanize_index(idxs)
     self.assertIn('asc__id_asc_a_desc_b_asc__idx#0_0_asc__om#rowid',
                   idx_names)
예제 #4
0
 def test_arbitrary_collection_new(self):
     data = {'foo': 'bar', 'bax': 'fox'}
     store = OmegaStore()
     store.register_backend(PandasRawDictBackend.KIND, PandasRawDictBackend)
     # create the collection
     foo_coll = store.mongodb['foo']
     foo_coll.insert(data)
     # store the collection as is
     store.put(foo_coll, 'myfoo').save()
     self.assertIn('myfoo', store.list())
     # test we get back _id column if raw=True
     data_df = store.get('myfoo', raw=True)
     data_raw = store.collection('myfoo').find_one()
     assert_frame_equal(json_normalize(data_raw), data_df)
     # test we get just the data column
     data_df = store.get('myfoo', raw=False)
     data_raw = store.collection('myfoo').find_one()
     del data_raw['_id']
     assert_frame_equal(json_normalize(data_raw), data_df)
     cols = ['foo', 'bax']
     assert_frame_equal(data_df[cols], json_normalize(data_raw)[cols])
예제 #5
0
 def test_put_dataframe_timeseries(self):
     # create some dataframe
     tsidx = pd.date_range(pd.datetime(2016, 1, 1), pd.datetime(2016, 4, 1))
     df = pd.DataFrame({
         'a': list(range(0, len(tsidx))),
         'b': list(range(0, len(tsidx)))
     }, index=tsidx)
     store = OmegaStore(prefix='')
     store.put(df, 'mydata')
     dfx = store.get('mydata')
     assert_frame_equal(df, dfx)
     idxs = list(store.collection('mydata').list_indexes())
     idx_names = [dict(v).get('name') for v in idxs]
     self.assertIn('asc__idx#0_0', idx_names)
예제 #6
0
 def test_put_dataframe_multiindex(self):
     # create some dataframe
     store = OmegaStore(prefix='')
     midx = pd.MultiIndex(levels=[[u'bar', u'baz', u'foo', u'qux'],
                                  [u'one', u'two']],
                          labels=[[0, 0, 1, 1, 2, 2, 3, 3],
                                  [0, 1, 0, 1, 0, 1, 0, 1]],
                          names=[u'first', u'second'])
     df = pd.DataFrame({'x': range(0, len(midx))}, index=midx)
     store.put(df, 'mydata')
     dfx = store.get('mydata')
     assert_frame_equal(df, dfx)
     idxs = list(store.collection('mydata').list_indexes())
     idx_names = [dict(v).get('name') for v in idxs]
     self.assertIn('asc__idx#0_first__asc__idx#1_second', idx_names)
예제 #7
0
 def test_put_dataframe_timeseries(self):
     # create some dataframe
     tsidx = pd.date_range(pd.datetime(2016, 1, 1), pd.datetime(2016, 4, 1))
     df = pd.DataFrame(
         {
             'a': list(range(0, len(tsidx))),
             'b': list(range(0, len(tsidx)))
         },
         index=tsidx)
     store = OmegaStore(prefix='')
     store.put(df, 'mydata')
     dfx = store.get('mydata')
     assert_frame_equal(df, dfx)
     idxs = store.collection('mydata').index_information()
     idx_names = humanize_index(idxs)
     self.assertIn('asc__id_asc__idx#0_0_asc__om#rowid', idx_names)
예제 #8
0
 def test_put_dataframe_multiindex(self):
     # create some dataframe
     store = OmegaStore(prefix='')
     midx = pd.MultiIndex(levels=[[u'bar', u'baz', u'foo', u'qux'],
                                  [u'one', u'two']],
                          codes=[[0, 0, 1, 1, 2, 2, 3, 3],
                                 [0, 1, 0, 1, 0, 1, 0, 1]],
                          names=[u'first', u'second'])
     df = pd.DataFrame({'x': range(0, len(midx))}, index=midx)
     store.put(df, 'mydata')
     dfx = store.get('mydata')
     assert_frame_equal(df, dfx)
     idxs = store.collection('mydata').index_information()
     idx_names = humanize_index(idxs)
     self.assertIn(
         'asc__id_asc__idx#0_first_asc__idx#1_second_asc__om#rowid',
         idx_names)