Exemple #1
0
 def test_migrate_unhashed_name(self):
     store = OmegaStore(bucket='foo', prefix='foo/')
     df = pd.DataFrame({'x': range(100)})
     long_name = 'a' * 10
     raised = False
     error = ''
     # save as unhashed (old version)
     store.defaults.OMEGA_STORE_HASHEDNAMES = False
     meta_unhashed = store.put(df, long_name)
     # simulate upgrade, no migration
     store.defaults.OMEGA_STORE_HASHEDNAMES = True
     # check we can still retrieve
     dfx = store.get(long_name)
     assert_frame_equal(df, dfx)
     # migrate
     store.defaults.OMEGA_STORE_HASHEDNAMES = True
     migrate_unhashed_datasets(store)
     meta_migrated = store.metadata(long_name)
     # check we can still retrieve after migration
     dfx = store.get(long_name)
     assert_frame_equal(df, dfx)
     # stored hashed
     meta_hashed = store.put(df, long_name, append=False)
     # check migration worked as expected
     self.assertNotEqual(meta_unhashed.collection, meta_hashed.collection)
     self.assertEqual(meta_migrated.collection, meta_hashed.collection)
Exemple #2
0
 def test_store_with_attributes(self):
     data = {'a': list(range(1, 10)), 'b': list(range(1, 10))}
     df = pd.DataFrame(data)
     store = OmegaStore()
     # store the object, no attributes
     meta = store.put(df, 'foo', append=False)
     meta = store.metadata('foo')
     self.assertEqual(meta.attributes, {})
     # update attributes
     meta = store.put(df, 'foo', append=False, attributes={'foo': 'bar'})
     meta = store.metadata('foo')
     self.assertEqual(meta.attributes, {'foo': 'bar'})
     meta = store.put(df,
                      'foo',
                      append=False,
                      attributes={
                          'foo': 'bax',
                          'foobar': 'barbar'
                      })
     meta = store.metadata('foo')
     self.assertEqual(meta.attributes, {'foo': 'bax', 'foobar': 'barbar'})
Exemple #3
0
 def test_long_dataset_name_hdf(self):
     store = OmegaStore(bucket='foo', prefix='foo/')
     df = pd.DataFrame({'xyz' * 100: range(100)})
     # limited by index key limit in MongoDB
     # see https://docs.mongodb.com/manual/reference/limits/#Index-Key-Limit
     long_name = 'a' * 990
     raised = False
     error = ''
     # hashed names
     store.defaults.OMEGA_STORE_HASHEDNAMES = True
     meta = store.put(df, long_name, as_hdf=True)
     meta = store.metadata(long_name)
     self.assertNotEqual(meta.gridfile.name, long_name)
     self.assertFalse(raised, error)
     # unhashed names
     store.defaults.OMEGA_STORE_HASHEDNAMES = False
     long_name = 'a' * 200
     store.put(df, long_name, as_hdf=True)
     meta = store.metadata(long_name)
     self.assertEqual(meta.gridfile.name,
                      store._get_obj_store_key(long_name, '.hdf'))
Exemple #4
0
 def test_store_dataframe_as_hdf(self):
     data = {'a': list(range(1, 10)), 'b': list(range(1, 10))}
     df = pd.DataFrame(data)
     store = OmegaStore()
     meta = store.put(df, 'foo', as_hdf=True)
     self.assertEqual(meta.kind, 'pandas.hdf')
     # make sure the hdf file is actually there
     meta = store.metadata('foo')
     self.assertIn(meta.gridfile.name, store.fs.list())
     df2 = store.get('foo')
     self.assertTrue(df.equals(df2), "dataframes differ")
     # test for non-existent file raises exception
     meta = store.put(df2, 'foo_will_be_removed', as_hdf=True)
     meta = store.metadata('foo_will_be_removed')
     file_id = store.fs.get_last_version(meta.gridfile.name)._id
     store.fs.delete(file_id)
     store2 = OmegaStore()
     with self.assertRaises(gridfs.errors.NoFile):
         store2.get('foo_will_be_removed')
     # test hdf file is not there
     self.assertNotIn('hdfdf.hdf', store2.fs.list())
Exemple #5
0
 def test_migrate_unhashed_name_hdf(self):
     store = OmegaStore(bucket='foo', prefix='foo/')
     df = pd.DataFrame({'x': range(100)})
     long_name = 'a' * 10
     raised = False
     error = ''
     # save as unhashed (old version)
     store.defaults.OMEGA_STORE_HASHEDNAMES = False
     store.put(df, long_name, as_hdf=True)
     meta_unhashed = store.metadata(long_name)
     # retrieve should still work
     store.defaults.OMEGA_STORE_HASHEDNAMES = True
     dfx = store.get(long_name)
     assert_frame_equal(df, dfx)
     # stored hashed
     store.put(df, long_name, replace=True, as_hdf=True)
     meta_hashed = store.metadata(long_name)
     dfx = store.get(long_name)
     assert_frame_equal(df, dfx)
     # check hashing actually worked
     self.assertNotEqual(meta_unhashed.gridfile.name,
                         meta_hashed.gridfile.name)
Exemple #6
0
    def test_store_metadata_notstrict(self):
        """ ensure Metadata attributes are not strictly checked

        this is to allow metadata extensions between omegaml versions
        """
        om = OmegaStore(prefix='')
        # dict
        data = {'a': list(range(1, 10)), 'b': list(range(1, 10))}
        attributes = {'foo': 'bar'}
        meta = om.put(data, 'data', attributes=attributes)
        meta_collection = om.mongodb['metadata']
        flt = {'name': 'data'}
        meta_entry = meta_collection.find_one(flt)
        meta_entry['modified_extra'] = meta_entry['modified']
        meta_collection.replace_one(flt, meta_entry)
        try:
            meta = om.metadata('data')
        except FieldDoesNotExist:
            not_raised = False
        else:
            not_raised = True
        self.assertTrue(not_raised)