def test_build_bundle(self): import shutil bundle = Bundle() shutil.copyfile(bundle.filesystem.path('meta', 'schema-edit-me.csv'), bundle.filesystem.path('meta', 'schema.csv')) #try: bundle.database.enable_delete = True bundle.clean() bundle = Bundle() bundle.exit_on_fatal = False bundle.pre_prepare() bundle.prepare() bundle.post_prepare() bundle.pre_build() bundle.build_db_inserter_codes() bundle.post_build() bundle.close() # The second run will use the changes to the schema made in the # first run, due to the types errors in the 'coding' table. bundle.clean() bundle = Bundle() bundle.exit_on_fatal = False bundle.pre_prepare() bundle.prepare() bundle.post_prepare() bundle.pre_build() bundle.build_db_inserter_codes() bundle.post_build() bundle.close() try: pass finally: # Need to clean up to ensure that we're back to a good state. # This runs the normal build, which will be used by the other # tests. shutil.copyfile( bundle.filesystem.path('meta', 'schema-edit-me.csv'), bundle.filesystem.path('meta', 'schema.csv')) bundle.clean() bundle = Bundle() bundle.exit_on_fatal = False bundle.pre_prepare() bundle.prepare() bundle.post_prepare() bundle.pre_build() bundle.build() bundle.post_build()
def setUp(self): import bundles.testbundle.bundle from ambry.run import RunConfig import manifests, configs self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__) self.config_dir = os.path.dirname(configs.__file__) self.rc = get_runconfig( (os.path.join(self.config_dir, 'test.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_ACCOUNTS)) self.copy_or_build_bundle() self.bundle = Bundle() #print "Deleting: {}".format(self.rc.group('filesystem').root) #ambry.util.rm_rf(self.rc.group('filesystem').root) self.m = os.path.join(os.path.dirname(manifests.__file__), 'test.ambry') with open(self.m) as f: self.m_contents = f.read()
def test_partition_2(self): bundle = Bundle() bundle.clean() bundle.pre_prepare() bundle.prepare() bundle.post_prepare() table = self.bundle.schema.tables[0] p = (('time', 'time2'), ('space', 'space3'), ('table', table.name), ('grain', 'grain4')) p += p pids = {} for i in range(4): for j in range(4): pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1])) pids[pid.fqname] = pid for pid in pids.values(): part = bundle.partitions.new_db_partition(**pid.dict) part.create() parts = bundle.partitions._find_orm( PartitionNameQuery(vid=pid.vid)).all() self.assertIn(pid.sname, [p.name for p in parts])
def setUp(self): super(Test, self).setUp() self.copy_or_build_bundle() self.bundle = Bundle() self.bundle_dir = self.bundle.bundle_dir
def bundle_dirs(self): bundle = Bundle() marker = bundle.filesystem.build_path('test-marker') build_dir = bundle.filesystem.build_path( ) + '/' # Slash needed for rsync save_dir = bundle.filesystem.build_path() + "-save/" return bundle, marker, build_dir, save_dir
def test_simple_build(self): import shutil bundle = Bundle() shutil.copyfile(bundle.filesystem.path('meta', 'schema-edit-me.csv'), bundle.filesystem.path('meta', 'schema.csv')) bundle.clean() bundle = Bundle() bundle.exit_on_fatal = False bundle.pre_prepare() bundle.prepare() bundle.post_prepare() bundle.pre_build() bundle.build() bundle.post_build()
def test_wkb(self): from shapely.wkb import dumps, loads b = Bundle() p = b.partitions.find(table='geot2') for row in p.query( "SELECT quote(AsBinary(GEOMETRY)) as wkb, quote(GEOMETRY) FROM geot2" ): print row
def setUp(self): import os from ambry.run import get_runconfig, RunConfig self.copy_or_build_bundle() self.bundle = Bundle() self.bundle_dir = self.bundle.bundle_dir self.rc = get_runconfig( (os.path.join(self.bundle_dir, 'geo-test-config.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_ACCOUNTS))
def test_db_bundle(self): from ambry.bundle import BuildBundle, DbBundle b = BuildBundle(self.bundle_dir) b.clean() self.assertTrue(b.identity.id_ is not None) self.assertEquals('source-dataset-subset-variation', b.identity.sname) self.assertEquals('source-dataset-subset-variation-0.0.1', b.identity.vname) b.database.create() db_path = b.database.path dbb = DbBundle(db_path) self.assertEqual("source-dataset-subset-variation", dbb.identity.sname) self.assertEqual("source-dataset-subset-variation-0.0.1", dbb.identity.vname) b = Bundle() b.database.enable_delete = True b.clean() b.database.create() b = Bundle() b.exit_on_fatal = False b.pre_prepare() b.prepare() b.post_prepare() b.pre_build() #b.build_db_inserter() b.build_geo() b.post_build() b.close()
def setUp(self): super(Test, self).setUp() # import bundles.testbundle.bundle self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__) self.rc = get_runconfig( (os.path.join(self.bundle_dir, 'library-test-config.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_ACCOUNTS)) self.copy_or_build_bundle() self.bundle = Bundle() Test.rm_rf(self.rc.group('filesystem').root)
def setUp(self): self.copy_or_build_bundle() self.bundle_dir = os.path.join( os.path.dirname(os.path.abspath(__file__)), 'testbundle') self.rc = RunConfig([ os.path.join(self.bundle_dir, 'client-test-config.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_CONFIG ]) self.server_rc = RunConfig([ os.path.join(self.bundle_dir, 'server-test-config.yaml'), RunConfig.USER_CONFIG ]) self.bundle = Bundle() self.bundle_dir = self.bundle.bundle_dir
def test_partition(self): from ambry.dbexceptions import ConflictError from ambry.identity import PartitionNameQuery from ambry.partition.csv import CsvPartition self.bundle.clean() self.bundle.prepare() p = self.bundle.partitions.new_db_partition(time=10, space=10, data={'pid': 'pid1'}) with self.assertRaises(ConflictError): self.bundle.partitions.new_db_partition(time=10, space=10, data={'pid': 'pid1'}) self.assertEqual(1, len(self.bundle.partitions.all)) p = self.bundle.partitions.find_or_new(time=10, space=10) p.database.create( ) # Find will go to the library if the database doesn't exist. self.assertEqual(1, len(self.bundle.partitions.all)) self.assertEquals('pid1', p.data['pid']) p = self.bundle.partitions.find(PartitionNameQuery(time=10, space=10)) self.assertEquals('pid1', p.data['pid']) p = self.bundle.partitions.find(time=10, space=10) self.assertEquals('pid1', p.data['pid']) pnq3 = PartitionNameQuery(space=10) with self.bundle.session as s: p = self.bundle.partitions._find_orm(pnq3).first() p.data['foo'] = 'bar' s.add(p) bundle = Bundle() p = bundle.partitions.find(pnq3) self.assertEquals('bar', p.data['foo']) #p = self.bundle.partitions.find(PartitionNameQuery(name='source-dataset-subset-variation-30-hdf')) #self.assertTrue(p is not None) #self.assertEquals('source-dataset-subset-variation-30-hdf', p.identity.sname) # # Create all possible combinations of partition names # table = self.bundle.schema.tables[0] p = (('time', 'time2'), ('space', 'space3'), ('table', table.name), ('grain', 'grain4')) p += p pids = {} for i in range(4): for j in range(4): pid = self.bundle.identity.as_partition(**dict(p[i:i + j + 1])) pids[pid.fqname] = pid with self.bundle.session as s: s.commit() # These two deletey bits clear out all of the old # partitions, to avoid a conflict with the next section. We also have # to delete the files, since create() adds a partition record to the database, # and if one already exists, it will throw an Integrity Error. for p in self.bundle.partitions: if os.path.exists(p.database.path): os.remove(p.database.path) for p in self.bundle.dataset.partitions: # Using SQL instead of s.delete() because we want to avoid the cascade to stored_partitions, since # that table doesn't exist in the bundle, only in the library s.execute("DELETE FROM partitions WHERE p_vid = :vid", {'vid': p.vid})
def setUp(self): self.copy_or_build_bundle() self.bundle = Bundle() self.bundle_dir = self.bundle.bundle_dir
def test_versions(self): from ambry.run import get_runconfig from ambry.library.query import Resolver import shutil idnt = self.bundle.identity l = self.get_library() l.purge() orig = os.path.join(self.bundle.bundle_dir, 'bundle.yaml') save = os.path.join(self.bundle.bundle_dir, 'bundle.yaml.save') shutil.copyfile(orig, save) datasets = {} try: for i in [1, 2, 3]: idnt._on.revision = i idnt.name.version_major = i idnt.name.version_minor = i * 10 bundle = Bundle() get_runconfig.clear() #clear runconfig cache bundle.metadata.load_all() bundle.metadata.identity = idnt.ident_dict bundle.metadata.names = idnt.names_dict bundle.metadata.write_to_dir(write_all=True) bundle = Bundle() bundle.clean() bundle.pre_prepare() bundle.prepare() bundle.post_prepare() bundle.pre_build() bundle.build_small() #bundle.build() bundle.post_build() bundle = Bundle() l.put_bundle(bundle) finally: pass os.rename(save, orig) # # Save the list of datasets for version analysis in other # tests # db = l.database for d in db.list(with_partitions=True).values(): datasets[d.vid] = d.dict datasets[d.vid]['partitions'] = {} for p_vid, p in d.partitions.items(): datasets[d.vid]['partitions'][p_vid] = p.dict with open(self.bundle.filesystem.path('meta', 'version_datasets.json'), 'w') as f: import json f.write(json.dumps(datasets)) r = Resolver(db.session) ref = idnt.id_ ref = "source-dataset-subset-variation-=2.20" ip, results = r.resolve_ref_all(ref) for row in results: print row
def copy_or_build_bundle(self): """Set up a clean bundle build, either by re-building the bundle, or by copying it from a saved bundle directory """ # For most cases, re-set the bundle by copying from a saved version. If # the bundle doesn't exist and the saved version doesn't exist, # build a new one. bundle, marker, build_dir, save_dir = self.bundle_dirs() idnt = bundle.identity if str(idnt.name.version) != "0.0.1": # Rebuild the bundle if the test_library.py:test_versions # script didn't reset the bundle at the end from ambry.util import rm_rf rm_rf(build_dir) rm_rf(save_dir) idnt = Identity.from_dict(dict(bundle.metadata.identity)) bundle.metadata.identity = idnt.ident_dict bundle.metadata.names = idnt.names_dict bundle.metadata.write_to_dir() if not os.path.exists(marker): global_logger.info( "Build dir marker ({}) is missing".format(marker)) # There is a good reason to create a seperate instance, # but don't remember what it is ... bundle.clean() bundle = Bundle() if not os.path.exists(save_dir): global_logger.info("Save dir is missing; re-build bundle. ") bundle.pre_prepare() bundle.prepare() bundle.post_prepare() if str(bundle.identity.name.version) != '0.0.1': raise Exception( "Can only save bundle if version is 0.0.1. This one is version: {} " .format(bundle.identity.name.version)) bundle.pre_build() bundle.build() bundle.post_build() bundle.close() with open(marker, 'w') as f: f.write(str(time.time())) # Copy the newly built bundle to the save directory os.system("rm -rf {1}; rsync -arv {0} {1} > /dev/null ".format( build_dir, save_dir)) # Always copy, just to be safe. #global_logger.info( "Copying bundle from {}".format(save_dir)) os.system("rm -rf {0}; rsync -arv {1} {0} > /dev/null ".format( build_dir, save_dir))
def test_bundle_build(self): from ambry.dbexceptions import ConflictError bundle = Bundle() # Need to clear the library, or the Bundle's pre_prepare # will cancel the build if this version is already installed bundle.library.purge() bundle.exit_on_fatal = False bundle.clean() bundle.database.create() bp = bundle.partitions with bundle.session: bp._new_orm_partition(PartialPartitionName(time = 't1', space='s1')) bp._new_orm_partition(PartialPartitionName(time = 't1', space='s2')) bp._new_orm_partition(PartialPartitionName(time = 't1', space=None)) bp._new_orm_partition(PartialPartitionName(time = 't2', space='s1')) bp._new_orm_partition(PartialPartitionName(time = 't2', space='s2')) bp._new_orm_partition(PartialPartitionName(time = 't2', space=None)) with self.assertRaises(ConflictError): with bundle.session: bp._new_orm_partition(PartialPartitionName(time = 't1', space='s1')) pnq = PartitionNameQuery(time=NameQuery.ANY, space='s1') names = [p.vname for p in bp._find_orm(pnq).all()] self.assertEqual({u'source-dataset-subset-variation-t2-s1-0.0.1', u'source-dataset-subset-variation-t1-s1-0.0.1'}, set(names)) names = [p.vname for p in bp._find_orm(PartitionNameQuery(space=NameQuery.ANY)).all()] self.assertEqual(6,len(names)) names = [p.vname for p in bp._find_orm(PartitionNameQuery(time='t1',space=NameQuery.ANY)).all()] self.assertEqual({'source-dataset-subset-variation-t1-s2-0.0.1', 'source-dataset-subset-variation-t1-0.0.1', 'source-dataset-subset-variation-t1-s1-0.0.1'}, set(names)) names = [p.vname for p in bp._find_orm(PartitionNameQuery(time='t1',space=NameQuery.NONE)).all()] self.assertEqual({'source-dataset-subset-variation-t1-0.0.1'}, set(names)) # Start over, use a higher level function to create the partitions bundle.close() # Or you'll get an OperationalError bundle = Bundle() bundle.exit_on_fatal = False bundle.clean() bundle.database.create() bp = bundle.partitions bp._new_partition(PartialPartitionName(time = 't1', space='s1')) self.assertEquals(1, len(bp.all)) bp._new_partition(PartialPartitionName(time = 't1', space='s2')) self.assertEquals(2, len(bp.all)) bp._new_partition(PartialPartitionName(time = 't1', space=None)) bp._new_partition(PartialPartitionName(time = 't2', space='s1')) bp._new_partition(PartialPartitionName(time = 't2', space='s2')) bp._new_partition(PartialPartitionName(time = 't2', space=None)) self.assertEquals(6, len(bp.all)) names = [p.vname for p in bp._find_orm(PartitionNameQuery(time='t1',space=NameQuery.ANY)).all()] self.assertEqual({'source-dataset-subset-variation-t1-s2-0.0.1', 'source-dataset-subset-variation-t1-0.0.1', 'source-dataset-subset-variation-t1-s1-0.0.1'}, set(names)) # Start over, use a higher level function to create the partitions bundle.close() bundle = Bundle() bundle.exit_on_fatal = False bundle.clean() bundle.database.create() bp = bundle.partitions p = bp.new_db_partition(time = 't1', space='s1') self.assertEquals('source-dataset-subset-variation-t1-s1-0.0.1~piEGPXmDC8001001', p.identity.fqname) p = bp.find_or_new(time = 't1', space='s2') self.assertEquals('source-dataset-subset-variation-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname) # Duplicate p = bp.find_or_new(time = 't1', space='s2') self.assertEquals('source-dataset-subset-variation-t1-s2-0.0.1~piEGPXmDC8002001', p.identity.fqname) p = bp.find_or_new_geo(time = 't2', space='s1') # Which it is depends on whether GDAL is installed. self.assertIn(p.identity.fqname,[ 'source-dataset-subset-variation-t2-s1-geo-0.0.1~piEGPXmDC8003001', 'source-dataset-subset-variation-t2-s1-0.0.1~piEGPXmDC8003001' ] ) # Ok! Build! bundle.close() bundle = Bundle() bundle.exit_on_fatal = False bundle.clean() bundle.pre_prepare() bundle.prepare() bundle.post_prepare() bundle.pre_build() bundle.build_db_inserter_codes() bundle.post_build() self.assertEquals('diEGPXmDC8001',bundle.identity.vid) self.assertEquals('source-dataset-subset-variation',bundle.identity.sname) self.assertEquals('source-dataset-subset-variation-0.0.1',bundle.identity.vname) self.assertEquals('source-dataset-subset-variation-0.0.1~diEGPXmDC8001',bundle.identity.fqname)
def test_config_update(self): bundle = Bundle() bundle.update_configuration()
def setUp(self): super(Test, self).setUp() self.bundle = Bundle() self.bundle_dir = self.bundle.bundle_dir