def delete_bundle(self): from ambry.util import rm_rf bundle, marker, build_dir, save_dir = self.bundle_dirs() rm_rf(build_dir) rm_rf(save_dir)
def test_source_get(self): from ambry.util import rm_rf import shutil from ambry.orm import Dataset l = self.get_library() s = l.source print 'Source Dir: ', s.base_dir, l.database.dsn rm_rf(s.base_dir) shutil.copytree(self.source_save_dir, s.base_dir) s.sync_source() snames = { 'example.com-altdb-orig', 'example.com-random', 'example.com-segmented-orig' } self.assertEquals(snames, {ident.sname for ident in s._dir_list().values()}) self.assertEquals(snames, {ident.sname for ident in s.list().values()}) self.build_bundle(s, l, "example.com-random-0.0.1") s.sync_repos() codes = { ident.vname: ident.locations.codes for ident in l.list().values() } for key, ident in sorted(l.list().items(), key=lambda x: x[1].vname): print str(ident.locations), ident.vname self.assertIn(Dataset.LOCATION.SOURCE, codes['example.com-random-0.0.1']) self.assertIn(Dataset.LOCATION.LIBRARY, codes['example.com-random-0.0.1']) self.assertNotIn(Dataset.LOCATION.UPSTREAM, codes['example.com-random-0.0.1']) self.assertIn(Dataset.LOCATION.SOURCE, codes['example.com-segmented-orig-0.1.1']) l.push() # Also stores upstream ref in Files and Datasets codes = { ident.vname: ident.locations.codes for ident in l.list().values() } for key, ident in sorted(l.list().items(), key=lambda x: x[1].vname): print str(ident.locations), ident.vid, ident.fqname self.assertIn(Dataset.LOCATION.UPSTREAM, codes['example.com-random-0.0.1'])
def copy_or_build_bundle(self): """Set up a clean bundle build, either by re-building the bundle, or by copying it from a saved bundle directory """ # For most cases, re-set the bundle by copying from a saved version. If # the bundle doesn't exist and the saved version doesn't exist, # build a new one. bundle, marker, build_dir, save_dir = self.bundle_dirs() idnt = bundle.identity if str(idnt.name.version) != "0.0.1": # Rebuild the bundle if the test_library.py:test_versions # script didn't reset the bundle at the end from ambry.util import rm_rf rm_rf(build_dir) rm_rf(save_dir) idnt = Identity.from_dict(dict(bundle.metadata.identity)) bundle.metadata.identity = idnt.ident_dict bundle.metadata.names = idnt.names_dict bundle.metadata.write_to_dir() if not os.path.exists(marker): global_logger.info("Build dir marker ({}) is missing".format(marker)) # There is a good reason to create a seperate instance, # but don't remember what it is ... bundle.clean() bundle = Bundle() if not os.path.exists(save_dir): global_logger.info("Save dir is missing; re-build bundle. ") bundle.pre_prepare() bundle.prepare() bundle.post_prepare() if str(bundle.identity.name.version) != '0.0.1': raise Exception("Can only save bundle if version is 0.0.1. This one is version: {} ".format( bundle.identity.name.version)) bundle.pre_build() bundle.build() bundle.post_build() bundle.close() with open(marker, 'w') as f: f.write(str(time.time())) # Copy the newly built bundle to the save directory os.system("rm -rf {1}; rsync -arv {0} {1} > /dev/null ".format(build_dir, save_dir)) # Always copy, just to be safe. # global_logger.info( "Copying bundle from {}".format(save_dir)) os.system("rm -rf {0}; rsync -arv {1} {0} > /dev/null ".format(build_dir, save_dir))
def deinit(self): import os fn = os.path.join(self.dir_, '.gitignore') if os.path.exists(fn): os.remove(fn) dn = os.path.join(self.dir_, '.git') if os.path.exists(dn): from ambry.util import rm_rf rm_rf(dn)
def setUp(self): rm_rf('/tmp/server') self.copy_or_build_bundle() self.bundle_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),'testbundle') self.rc = RunConfig([os.path.join(self.bundle_dir,'client-test-config.yaml'), os.path.join(self.bundle_dir,'bundle.yaml'), RunConfig.USER_ACCOUNTS]) self.server_rc = RunConfig([os.path.join(self.bundle_dir,'server-test-config.yaml'), RunConfig.USER_ACCOUNTS]) self.bundle = Bundle() self.bundle_dir = self.bundle.bundle_dir
def test_source_get(self): from ambry.util import rm_rf import shutil from ambry.orm import Dataset l = self.get_library() s = l.source print 'Source Dir: ', s.base_dir, l.database.dsn rm_rf(s.base_dir) shutil.copytree(self.source_save_dir, s.base_dir) s.sync_source() snames = {'example.com-altdb-orig', 'example.com-random', 'example.com-segmented-orig'} self.assertEquals(snames, { ident.sname for ident in s._dir_list().values() } ) self.assertEquals(snames, { ident.sname for ident in s.list().values() } ) self.build_bundle( s, l, "example.com-random-0.0.1") s.sync_repos() codes = {ident.vname: ident.locations.codes for ident in l.list().values()} for key, ident in sorted(l.list().items(), key=lambda x: x[1].vname): print str(ident.locations), ident.vname self.assertIn(Dataset.LOCATION.SOURCE, codes['example.com-random-0.0.1']) self.assertIn(Dataset.LOCATION.LIBRARY, codes['example.com-random-0.0.1']) self.assertNotIn(Dataset.LOCATION.UPSTREAM, codes['example.com-random-0.0.1']) self.assertIn(Dataset.LOCATION.SOURCE, codes['example.com-segmented-orig-0.1.1']) l.push() # Also stores upstream ref in Files and Datasets codes = {ident.vname: ident.locations.codes for ident in l.list().values()} for key, ident in sorted(l.list().items(), key=lambda x: x[1].vname): print str(ident.locations), ident.vid, ident.fqname self.assertIn(Dataset.LOCATION.UPSTREAM, codes['example.com-random-0.0.1'])
def test_install(self): from ambry.util import rm_rf root = self.rc.group('filesystem').root rm_rf(root) l = self.get_library() print l.info l.put_bundle(self.bundle) def cb(what, metadata, start): return print "PUSH ", what, metadata['name'], start for remote in l.remotes[0:3]: # This really should use update(), but it throws inscrutable exceptions. for f in l.files.query.state('pushed').all: f.state = 'new' l.files.merge(f) print 'Pushing to ', remote l.push(cb=cb, upstream=remote) l.purge() # Remove the entries from the library l.sync_remotes(clean=True, remotes=l.remotes[0:3]) r = l.resolve(self.bundle.identity.vid) self.assertEquals('diEGPXmDC8001', str(r.vid)) r = l.resolve(self.bundle.partitions.all[0].identity.vid) self.assertEquals('diEGPXmDC8001', str(r.vid)) self.assertEquals('piEGPXmDC8001001', str(r.partition.vid)) ident, cache = l.locate(self.bundle.identity.vid) self.assertEquals('/tmp/server/remote', cache.repo_id) self.assertEquals(self.bundle.identity.vid, ident.vid)
def setUp(self): import bundles.testbundle.bundle rm_rf('/tmp/server') self.copy_or_build_bundle() self.bundle_dir = os.path.dirname(bundles.testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir, 'client-test-config.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'), RunConfig.USER_ACCOUNTS) ) self.server_rc = get_runconfig((os.path.join(self.bundle_dir, 'server-test-config.yaml'), RunConfig.USER_ACCOUNTS)) self.bundle = Bundle() self.bundle_dir = self.bundle.bundle_dir
def _test_put_bundle(self, name, remote_config=None): from ambry.bundle import DbBundle from ambry.library.query import QueryCommand rm_rf('/tmp/server') self.start_server(remote_config) r = None #Rest(self.server_url, remote_config) bf = self.bundle.database.path # With an FLO response = r.put(open(bf), self.bundle.identity) self.assertEquals(self.bundle.identity.id_, response.object.get('id')) # with a path response = r.put( bf, self.bundle.identity) self.assertEquals(self.bundle.identity.id_, response.object.get('id')) for p in self.bundle.partitions.all: response = r.put( open(p.database.path), p.identity) self.assertEquals(p.identity.id_, response.object.get('id')) # Now get the bundles bundle_file = r.get(self.bundle.identity,'/tmp/foo.db') bundle = DbBundle(bundle_file) self.assertIsNot(bundle, None) self.assertEquals('a1DxuZ',bundle.identity.id_) # Should show up in datasets list. o = r.list() self.assertTrue('a1DxuZ' in o.keys() ) o = r.find(QueryCommand().table(name='tone').partition(any=True)) self.assertTrue( 'b1DxuZ001' in [i.id_ for i in o]) self.assertTrue( 'a1DxuZ' in [i.as_dataset.id_ for i in o])
def new_datasource(path, fmt='shapefile'): import os from ambry.util import rm_rf drv, options = driver_by_name(fmt) if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) if os.path.exists(path): if os.path.isdir(path): rm_rf(path) else: os.remove(path) ds = drv.CreateDataSource(path, options=options) if ds is None: raise Exception("Failed to create datasource: {}".format(path)) return ds
def new_datasource(path, fmt='shapefile'): import os from ambry.util import rm_rf drv, options, layer_options = driver_by_name(fmt) if not os.path.exists(os.path.dirname(path)): os.makedirs(os.path.dirname(path)) if os.path.exists(path): if os.path.isdir(path): rm_rf(path) else: os.remove(path) ds = drv.CreateDataSource(path, options=options) ds._layer_options = layer_options if ds is None: raise Exception("Failed to create datasource: {}".format(path)) return ds
def bound_clusters_in_raster( a, aa, shape_file_dir, contour_interval,contour_value, use_bb=True, use_distance=False): """Create a shapefile that contains contours and bounding boxes for clusters of contours. :param a: A numpy array that contains the data inwhich to find clusters :type a: Numpy array :param aa: The analysis object that sets the coordinate system for the area that contains the array :type aa: ambry.geo.AnalysisArea :param shape_file_dir: The path to a directory where generated files will be stored. :type shape_file_dir: string :param contour_interval: The difference between successive contour intervals. :type contour_interval: float :param contour_value: :type contour_value: float :param use_bb: If True, compute nearness and intersection using the contours bounding boxes, not the geometry :type use_bb: bool :param use_distance: If not False, consider contours that are closer than this value to be overlapping. :type : number :rtype: Returns a list of dictionaries, one for each of the combined bounding boxes This method will store, in the `shape_file_dir` directory: * a GeoTIFF representation of the array `a` * An ERSI shapefile layer named `countours`, holding all of the countours. * A layer named `contour_bounds` with the bounding boxes for all of the contours with value `contour_value` * A layer named `combined_bounds` with bounding boxes of intersecting and nearby boxes rom `contour_bounds` The routine will iteratively combine contours that overlap. If `use_distance` is set to a number, and contours that are closer than this value will be joined. If `use_bb` is set, the intersection and distance computations use the bounding boxes of the contours, not the contours themselves. """ import ambry.geo as dg from osgeo.gdalconst import GDT_Float32 import ambry.util as util from osgeo import gdal import ogr, os import numpy as np if os.path.exists(shape_file_dir): util.rm_rf(shape_file_dir) os.makedirs(shape_file_dir) rasterf = os.path.join(shape_file_dir,'contour.tiff') ogr_ds = ogr.GetDriverByName('ESRI Shapefile').CreateDataSource(shape_file_dir) # Setup the countour layer. ogr_lyr = ogr_ds.CreateLayer('contours', aa.srs) ogr_lyr.CreateField(ogr.FieldDefn('id', ogr.OFTInteger)) ogr_lyr.CreateField(ogr.FieldDefn('value', ogr.OFTReal)) # Create the contours from the GeoTIFF file. ds = aa.get_geotiff(rasterf, data_type=GDT_Float32) ds.GetRasterBand(1).SetNoDataValue(0) ds.GetRasterBand(1).WriteArray(np.flipud(a)) gdal.ContourGenerate(ds.GetRasterBand(1), contour_interval, # contourInterval 0, # contourBase [], # fixedLevelCount 0, # useNoData 0, # noDataValue ogr_lyr, #destination layer 0, #idField 1 # elevation field ) # Get buffered bounding boxes around each of the hotspots, # and put them into a new layer. bound_lyr = ogr_ds.CreateLayer('contour_bounds', aa.srs) for i in range(ogr_lyr.GetFeatureCount()): f1 = ogr_lyr.GetFeature(i) if f1.GetFieldAsDouble('value') != contour_value: continue g1 = f1.GetGeometryRef() bb = dg.create_bb(g1.GetEnvelope(), g1.GetSpatialReference()) f = ogr.Feature(bound_lyr.GetLayerDefn()) f.SetGeometry(bb) bound_lyr.CreateFeature(f) # Doing a full loop instead of a list comprehension b/c the way that comprehensions # compose arrays results in segfaults, probably because a copied geometry # object is being released before being used. geos = [] for i in range(bound_lyr.GetFeatureCount()): f = bound_lyr.GetFeature(i) g = f.geometry() geos.append(g.Clone()) # Combine hot spots that have intersecting bounding boxes, to get larger # areas that cover all of the adjacent intersecting smaller areas. geos = dg.combine_envelopes(geos, use_bb=use_bb, use_distance = use_distance) # Write out the combined bounds areas. lyr = ogr_ds.CreateLayer('combined_bounds', aa.srs) lyr.CreateField(ogr.FieldDefn('id', ogr.OFTInteger)) lyr.CreateField(ogr.FieldDefn('area', ogr.OFTReal)) lyr.CreateField(ogr.FieldDefn('name', ogr.OFTString)) lyr.CreateField(ogr.FieldDefn('code', ogr.OFTString)) envelopes = [] id = 1 for env in geos: f = ogr.Feature(lyr.GetLayerDefn()) bb = dg.create_bb(env.GetEnvelope(), env.GetSpatialReference()) f.SetGeometry(bb) f.SetField(0, id) f.SetField(1, bb.Area()) f.SetField(2, None) f.SetField(3, None) id += 1 lyr.CreateFeature(f) envelopes.append({'id':id, 'env':bb.GetEnvelope(), 'area':bb.Area()}) return envelopes
def x_test_put_redirect(self): from ambry.bundle import DbBundle from ambry.library.query import QueryCommand from ambry.util import md5_for_file, rm_rf, bundle_file_type # # Simple out and retrieve # cache = self.bundle.filesystem._get_cache(self.server_rc.filesystem, 'direct-remote') cache2 = self.bundle.filesystem._get_cache(self.server_rc.filesystem, 'direct-remote-2') rm_rf(os.path.dirname(cache.cache_dir)) rm_rf(os.path.dirname(cache2.cache_dir)) cache.put( self.bundle.database.path, 'direct') path = cache2.get('direct') self.assertEquals('sqlite',bundle_file_type(path)) cache.remove('direct', propagate = True) # # Connect through server. # rm_rf('/tmp/server') self.start_server(name='default-remote') api = None # Rest(self.server_url, self.rc.accounts) # Upload directly, then download via the cache. cache.remove(self.bundle.identity.cache_key, propagate = True) r = api.upload_file(self.bundle.identity, self.bundle.database.path, force=True ) path = cache.get(self.bundle.identity.cache_key) b = DbBundle(path) self.assertEquals("source-dataset-subset-variation-ca0d",b.identity.name ) # # Full service # p = self.bundle.partitions.all[0] cache.remove(self.bundle.identity.cache_key, propagate = True) cache.remove(p.identity.cache_key, propagate = True) r = api.put( self.bundle.database.path, self.bundle.identity ) print "Put {}".format(r.object) r = api.put(p.database.path, p.identity ) print "Put {}".format(r.object) r = api.put(p.database.path, p.identity ) r = api.get(p.identity,'/tmp/foo.db') print "Get {}".format(r) b = DbBundle(r) self.assertEquals("source-dataset-subset-variation-ca0d",b.identity.name )
def copy_or_build_bundle(self): """Set up a clean bundle build, either by re-building the bundle, or by copying it from a saved bundle directory """ # For most cases, re-set the bundle by copying from a saved version. If # the bundle doesn't exist and the saved version doesn't exist, # build a new one. bundle, marker, build_dir, save_dir = self.bundle_dirs() idnt = bundle.identity if str(idnt.name.version) != "0.0.1": # Rebuild the bundle if the test_library.py:test_versions # script didn't reset the bundle at the end from ambry.util import rm_rf rm_rf(build_dir) rm_rf(save_dir) idnt = Identity.from_dict(dict(bundle.metadata.identity)) bundle.metadata.identity = idnt.ident_dict bundle.metadata.names = idnt.names_dict bundle.metadata.write_to_dir() if not os.path.exists(marker): global_logger.info( "Build dir marker ({}) is missing".format(marker)) # There is a good reason to create a seperate instance, # but don't remember what it is ... bundle.clean() bundle = Bundle() if not os.path.exists(save_dir): global_logger.info("Save dir is missing; re-build bundle. ") bundle.pre_prepare() bundle.prepare() bundle.post_prepare() if str(bundle.identity.name.version) != '0.0.1': raise Exception( "Can only save bundle if version is 0.0.1. This one is version: {} " .format(bundle.identity.name.version)) bundle.pre_build() bundle.build() bundle.post_build() bundle.close() with open(marker, 'w') as f: f.write(str(time.time())) # Copy the newly built bundle to the save directory os.system("rm -rf {1}; rsync -arv {0} {1} > /dev/null ".format( build_dir, save_dir)) # Always copy, just to be safe. #global_logger.info( "Copying bundle from {}".format(save_dir)) os.system("rm -rf {0}; rsync -arv {1} {0} > /dev/null ".format( build_dir, save_dir))
def bound_clusters_in_raster(a, aa, shape_file_dir, contour_interval, contour_value, use_bb=True, use_distance=False): """Create a shapefile that contains contours and bounding boxes for clusters of contours. :param a: A numpy array that contains the data inwhich to find clusters :type a: Numpy array :param aa: The analysis object that sets the coordinate system for the area that contains the array :type aa: ambry.geo.AnalysisArea :param shape_file_dir: The path to a directory where generated files will be stored. :type shape_file_dir: string :param contour_interval: The difference between successive contour intervals. :type contour_interval: float :param contour_value: :type contour_value: float :param use_bb: If True, compute nearness and intersection using the contours bounding boxes, not the geometry :type use_bb: bool :param use_distance: If not False, consider contours that are closer than this value to be overlapping. :type : number :rtype: Returns a list of dictionaries, one for each of the combined bounding boxes This method will store, in the `shape_file_dir` directory: * a GeoTIFF representation of the array `a` * An ERSI shapefile layer named `countours`, holding all of the countours. * A layer named `contour_bounds` with the bounding boxes for all of the contours with value `contour_value` * A layer named `combined_bounds` with bounding boxes of intersecting and nearby boxes rom `contour_bounds` The routine will iteratively combine contours that overlap. If `use_distance` is set to a number, and contours that are closer than this value will be joined. If `use_bb` is set, the intersection and distance computations use the bounding boxes of the contours, not the contours themselves. """ import ambry.geo as dg from osgeo.gdalconst import GDT_Float32 import ambry.util as util from osgeo import gdal import ogr import os import numpy as np if os.path.exists(shape_file_dir): util.rm_rf(shape_file_dir) os.makedirs(shape_file_dir) rasterf = os.path.join(shape_file_dir, 'contour.tiff') ogr_ds = ogr.GetDriverByName('ESRI Shapefile').CreateDataSource( shape_file_dir) # Setup the countour layer. ogr_lyr = ogr_ds.CreateLayer('contours', aa.srs) ogr_lyr.CreateField(ogr.FieldDefn('id', ogr.OFTInteger)) ogr_lyr.CreateField(ogr.FieldDefn('value', ogr.OFTReal)) # Create the contours from the GeoTIFF file. ds = aa.get_geotiff(rasterf, data_type=GDT_Float32) ds.GetRasterBand(1).SetNoDataValue(0) ds.GetRasterBand(1).WriteArray(np.flipud(a)) gdal.ContourGenerate( ds.GetRasterBand(1), contour_interval, # contourInterval 0, # contourBase [], # fixedLevelCount 0, # useNoData 0, # noDataValue ogr_lyr, # destination layer 0, # idField 1 # elevation field ) # Get buffered bounding boxes around each of the hotspots, # and put them into a new layer. bound_lyr = ogr_ds.CreateLayer('contour_bounds', aa.srs) for i in range(ogr_lyr.GetFeatureCount()): f1 = ogr_lyr.GetFeature(i) if f1.GetFieldAsDouble('value') != contour_value: continue g1 = f1.GetGeometryRef() bb = dg.create_bb(g1.GetEnvelope(), g1.GetSpatialReference()) f = ogr.Feature(bound_lyr.GetLayerDefn()) f.SetGeometry(bb) bound_lyr.CreateFeature(f) # Doing a full loop instead of a list comprehension b/c the way that comprehensions # compose arrays results in segfaults, probably because a copied geometry # object is being released before being used. geos = [] for i in range(bound_lyr.GetFeatureCount()): f = bound_lyr.GetFeature(i) g = f.geometry() geos.append(g.Clone()) # Combine hot spots that have intersecting bounding boxes, to get larger # areas that cover all of the adjacent intersecting smaller areas. geos = dg.combine_envelopes(geos, use_bb=use_bb, use_distance=use_distance) # Write out the combined bounds areas. lyr = ogr_ds.CreateLayer('combined_bounds', aa.srs) lyr.CreateField(ogr.FieldDefn('id', ogr.OFTInteger)) lyr.CreateField(ogr.FieldDefn('area', ogr.OFTReal)) lyr.CreateField(ogr.FieldDefn('name', ogr.OFTString)) lyr.CreateField(ogr.FieldDefn('code', ogr.OFTString)) envelopes = [] id = 1 for env in geos: f = ogr.Feature(lyr.GetLayerDefn()) bb = dg.create_bb(env.GetEnvelope(), env.GetSpatialReference()) f.SetGeometry(bb) f.SetField(0, id) f.SetField(1, bb.Area()) f.SetField(2, None) f.SetField(3, None) id += 1 lyr.CreateFeature(f) envelopes.append({ 'id': id, 'env': bb.GetEnvelope(), 'area': bb.Area() }) return envelopes