def check_data_integrity(layer_files): """Read list of layer files and verify that that they have the same projection and georeferencing. """ # Set default values for projection and geotransform. # Choosing 'None' will use value of first layer. projection = Projection(DEFAULT_PROJECTION) geotransform = None coordinates = None for filename in layer_files: # Extract data layer = filename # Ensure that projection is consistent across all layers if projection is None: projection = layer.projection else: msg = ('Projections in input layer %s is not as expected:\n' 'projection: %s\n' 'default: %s' '' % (filename, projection.get_projection(proj4=True), layer.projection.get_projection(proj4=True))) assert projection == layer.projection, msg # Ensure that geotransform is consistent across all *raster* layers if layer.is_raster: if geotransform is None: geotransform = layer.get_geotransform() else: msg = ('Geotransforms in input raster layers are different: ' '%s %s' % (geotransform, layer.get_geotransform())) assert geotransform == layer.get_geotransform(), msg # In case of vector layers, we check that the coordinates # are the same if layer.is_vector: if coordinates is None: coordinates = layer.get_geometry() else: msg = ('Coordinates in input vector layers are different: ' '%s %s' % (coordinates, layer.get_geometry())) assert numpy.allclose(coordinates, layer.get_geometry()), msg
def read_from_file(self, filename): # Open data file for reading # File must be kept open, otherwise GDAL methods segfault. fid = self.fid = gdal.Open(filename, gdal.GA_ReadOnly) if fid is None: msg = "Could not open file %s" % filename raise Exception(msg) # Record raster metadata from file basename, ext = os.path.splitext(filename) # If file is ASCII, check that projection is around. # GDAL does not check this nicely, so it is worth an # error message if ext == ".asc": try: open(basename + ".prj") except IOError: msg = ( "Projection file not found for %s. You must supply " "a projection file with extension .prj" % filename ) raise RuntimeError(msg) # Look for any keywords self.keywords = read_keywords(basename + ".keywords") # Always use basename without leading directories as name rastername = os.path.split(basename)[-1] self.filename = filename self.name = rastername self.projection = Projection(self.fid.GetProjection()) self.geotransform = self.fid.GetGeoTransform() self.columns = fid.RasterXSize self.rows = fid.RasterYSize self.number_of_bands = fid.RasterCount # Assume that file contains all data in one band msg = "Only one raster band currently allowed" if self.number_of_bands > 1: msg = ( "WARNING: Number of bands in %s are %i. " "Only the first band will currently be " "used." % (filename, self.number_of_bands) ) # FIXME(Ole): Let us use python warnings here raise Exception(msg) # Get first band. band = self.band = fid.GetRasterBand(1) if band is None: msg = "Could not read raster band from %s" % filename raise Exception(msg)
def read_from_file(self, filename): # Open data file for reading # File must be kept open, otherwise GDAL methods segfault. fid = self.fid = gdal.Open(filename, gdal.GA_ReadOnly) if fid is None: msg = 'Could not open file %s' % filename raise Exception(msg) # Record raster metadata from file basename, ext = os.path.splitext(filename) # If file is ASCII, check that projection is around. # GDAL does not check this nicely, so it is worth an # error message if ext == '.asc': try: open(basename + '.prj') except IOError: msg = ('Projection file not found for %s. You must supply ' 'a projection file with extension .prj' % filename) raise RuntimeError(msg) # Look for any keywords self.keywords = read_keywords(basename + '.keywords') # Determine name if 'title' in self.keywords: rastername = self.keywords['title'] else: # Use basename without leading directories as name rastername = os.path.split(basename)[-1] self.name = rastername self.filename = filename self.projection = Projection(self.fid.GetProjection()) self.geotransform = self.fid.GetGeoTransform() self.columns = fid.RasterXSize self.rows = fid.RasterYSize self.number_of_bands = fid.RasterCount # Assume that file contains all data in one band msg = 'Only one raster band currently allowed' if self.number_of_bands > 1: msg = ('WARNING: Number of bands in %s are %i. ' 'Only the first band will currently be ' 'used.' % (filename, self.number_of_bands)) # FIXME(Ole): Let us use python warnings here raise Exception(msg) # Get first band. band = self.band = fid.GetRasterBand(1) if band is None: msg = 'Could not read raster band from %s' % filename raise Exception(msg)
def __init__(self, data=None, projection=None, geotransform=None, name='Raster layer', caption=''): """Initialise object with either data or filename Input data: Can be either * a filename of a raster file format known to GDAL * an MxN array of raster data * None projection: Geospatial reference in WKT format. Only used if data is provide as a numeric array, geotransform: GDAL geotransform (6-tuple). (top left x, w-e pixel resolution, rotation, top left y, rotation, n-s pixel resolution). See e.g. http://www.gdal.org/gdal_tutorial.html Only used if data is provide as a numeric array, name: Optional name for layer. Only used if data is provide as a numeric array, caption: Optional text field that describes the layer. This field can for example be used to display text about the layer in a web application. """ self.caption = caption if data is None: # Instantiate empty object self.name = name self.data = None self.projection = None self.coordinates = None self.filename = None return if isinstance(data, basestring): self.read_from_file(data) else: # Assume that data is provided as an array # with extra keyword arguments supplying metadata self.data = numpy.array(data, dtype='d', copy=False) self.filename = None self.name = name self.projection = Projection(projection) self.geotransform = geotransform self.rows = data.shape[0] self.columns = data.shape[1] self.number_of_bands = 1
def __init__(self, data=None, projection=None, geometry=None, name='Vector layer', caption=''): """Initialise object with either geometry or filename Input data: Can be either * a filename of a vector file format known to GDAL * List of dictionaries of fields associated with point coordinates * None projection: Geospatial reference in WKT format. Only used if geometry is provide as a numeric array, geometry: An Nx2 array of point coordinates name: Optional name for layer. Only used if geometry is provide as a numeric array caption: Optional text field that describes the layer. This field can for example be used to display text about the layer in a web application. """ self.caption = caption if data is None and projection is None and geometry is None: # Instantiate empty object self.name = name self.projection = None self.geometry = None self.filename = None self.data = None self.extent = None return if isinstance(data, basestring): self.read_from_file(data) else: # Assume that geometry is provided as an array # with extra keyword arguments supplying metadata msg = 'Geometry must be specified' assert geometry is not None, msg self.geometry = numpy.array(geometry, dtype='d', copy=False) msg = 'Projection must be specified' assert projection is not None, msg self.projection = Projection(projection) self.data = data self.name = name self.filename = None
def __init__(self, data=None, projection=None, geotransform=None, name='Raster layer', keywords=None): """Initialise object with either data or filename Input data: Can be either * a filename of a raster file format known to GDAL * an MxN array of raster data * None (FIXME (Ole): Remove this option) projection: Geospatial reference in WKT format. Only used if data is provide as a numeric array, geotransform: GDAL geotransform (6-tuple). (top left x, w-e pixel resolution, rotation, top left y, rotation, n-s pixel resolution). See e.g. http://www.gdal.org/gdal_tutorial.html Only used if data is provide as a numeric array, name: Optional name for layer. Only used if data is provide as a numeric array, keywords: Optional dictionary with keywords that describe the layer. When the layer is stored, these keywords will be written into an associated file with extension .keywords. Keywords can for example be used to display text about the layer in a web application. Note that if data is a filename, all other arguments are ignored as they will be inferred from the file. """ # Input checks if data is None: # Instantiate empty object self.name = name self.data = None self.projection = None self.coordinates = None self.filename = None self.keywords = {} return # Initialisation if isinstance(data, basestring): self.read_from_file(data) else: # Assume that data is provided as an array # with extra keyword arguments supplying metadata if keywords is None: self.keywords = {} else: msg = ('Specified keywords must be either None or a ' 'dictionary. I got %s' % keywords) assert isinstance(keywords, dict), msg self.keywords = keywords self.data = numpy.array(data, dtype='d', copy=False) self.filename = None self.name = name self.projection = Projection(projection) self.geotransform = geotransform self.rows = data.shape[0] self.columns = data.shape[1] self.number_of_bands = 1
def __init__(self, data=None, projection=None, geometry=None, name="Vector layer", keywords=None): """Initialise object with either geometry or filename Input data: Can be either * a filename of a vector file format known to GDAL * List of dictionaries of fields associated with point coordinates * None projection: Geospatial reference in WKT format. Only used if geometry is provide as a numeric array, geometry: A list of either point coordinates or polygons name: Optional name for layer. Only used if geometry is provide as a numeric array keywords: Optional dictionary with keywords that describe the layer. When the layer is stored, these keywords will be written into an associated file with extension .keywords. Keywords can for example be used to display text about the layer in a web application. Note that if data is a filename, all other arguments are ignored as they will be inferred from the file. The geometry type will be inferred from the dimensions of geometry. If each entry is one set of coordinates the type will be ogr.wkbPoint, if it is an array of coordinates the type will be ogr.wkbPolygon. """ if data is None and projection is None and geometry is None: # Instantiate empty object self.name = name self.projection = None self.geometry = None self.geometry_type = None self.filename = None self.data = None self.extent = None self.keywords = {} return if isinstance(data, basestring): self.read_from_file(data) else: # Assume that data is provided as sequences provided as # arguments to the Vector constructor # with extra keyword arguments supplying metadata self.name = name self.filename = None if keywords is None: self.keywords = {} else: msg = "Specified keywords must be either None or a " "dictionary. I got %s" % keywords assert isinstance(keywords, dict), msg self.keywords = keywords msg = "Geometry must be specified" assert geometry is not None, msg msg = "Geometry must be a sequence" assert is_sequence(geometry), msg self.geometry = geometry self.geometry_type = get_geometry_type(geometry) msg = "Projection must be specified" assert projection is not None, msg self.projection = Projection(projection) self.data = data if data is not None: msg = "Data must be a sequence" assert is_sequence(data), msg msg = "The number of entries in geometry and data " "must be the same" assert len(geometry) == len(data), msg
def read_from_file(self, filename): """ Read and unpack vector data. It is assumed that the file contains only one layer with the pertinent features. Further it is assumed for the moment that all geometries are points. * A feature is a geometry and a set of attributes. * A geometry refers to location and can be point, line, polygon or combinations thereof. * The attributes or obtained through GetField() The full OGR architecture is documented at * http://www.gdal.org/ogr/ogr_arch.html * http://www.gdal.org/ogr/ogr_apitut.html Examples are at * danieljlewis.org/files/2010/09/basicpythonmap.pdf * http://invisibleroads.com/tutorials/gdal-shapefile-points-save.html * http://www.packtpub.com/article/geospatial-data-python-geometry """ basename, _ = os.path.splitext(filename) # Look for any keywords self.keywords = read_keywords(basename + ".keywords") # Determine name if "title" in self.keywords: vectorname = self.keywords["title"] else: # Use basename without leading directories as name vectorname = os.path.split(basename)[-1] self.name = vectorname self.filename = filename self.geometry_type = None # In case there are no features fid = ogr.Open(filename) if fid is None: msg = "Could not open %s" % filename raise IOError(msg) # Assume that file contains all data in one layer msg = "Only one vector layer currently allowed" if fid.GetLayerCount() > 1: msg = ( "WARNING: Number of layers in %s are %i. " "Only the first layer will currently be " "used." % (filename, fid.GetLayerCount()) ) raise Exception(msg) layer = fid.GetLayerByIndex(0) # Get spatial extent self.extent = layer.GetExtent() # Get projection p = layer.GetSpatialRef() self.projection = Projection(p) # Get number of features N = layer.GetFeatureCount() # Extract coordinates and attributes for all features geometry = [] data = [] for i in range(N): feature = layer.GetFeature(i) if feature is None: msg = "Could not get feature %i from %s" % (i, filename) raise Exception(msg) # Record coordinates ordered as Longitude, Latitude G = feature.GetGeometryRef() if G is None: msg = "Geometry was None in filename %s " % filename raise Exception(msg) else: self.geometry_type = G.GetGeometryType() if self.geometry_type == ogr.wkbPoint: geometry.append((G.GetX(), G.GetY())) elif self.geometry_type == ogr.wkbPolygon: ring = G.GetGeometryRef(0) M = ring.GetPointCount() coordinates = [] for j in range(M): coordinates.append((ring.GetX(j), ring.GetY(j))) # Record entire polygon ring as an Mx2 numpy array geometry.append(numpy.array(coordinates, dtype="d", copy=False)) else: msg = ( "Only point and polygon geometries are supported. " "Geometry in filename %s " "was %s." % (filename, G.GetGeometryType()) ) raise Exception(msg) # Record attributes by name number_of_fields = feature.GetFieldCount() fields = {} for j in range(number_of_fields): name = feature.GetFieldDefnRef(j).GetName() # FIXME (Ole): Ascertain the type of each field? # We need to cast each appropriately? # This is issue #66 # feature_type = feature.GetFieldDefnRef(j).GetType() fields[name] = feature.GetField(j) # print 'Field', name, feature_type, j, fields[name] data.append(fields) # Store geometry coordinates as a compact numeric array self.geometry = geometry self.data = data
class Vector: """Class for abstraction of vector data """ def __init__(self, data=None, projection=None, geometry=None, name="Vector layer", keywords=None): """Initialise object with either geometry or filename Input data: Can be either * a filename of a vector file format known to GDAL * List of dictionaries of fields associated with point coordinates * None projection: Geospatial reference in WKT format. Only used if geometry is provide as a numeric array, geometry: A list of either point coordinates or polygons name: Optional name for layer. Only used if geometry is provide as a numeric array keywords: Optional dictionary with keywords that describe the layer. When the layer is stored, these keywords will be written into an associated file with extension .keywords. Keywords can for example be used to display text about the layer in a web application. Note that if data is a filename, all other arguments are ignored as they will be inferred from the file. The geometry type will be inferred from the dimensions of geometry. If each entry is one set of coordinates the type will be ogr.wkbPoint, if it is an array of coordinates the type will be ogr.wkbPolygon. """ if data is None and projection is None and geometry is None: # Instantiate empty object self.name = name self.projection = None self.geometry = None self.geometry_type = None self.filename = None self.data = None self.extent = None self.keywords = {} return if isinstance(data, basestring): self.read_from_file(data) else: # Assume that data is provided as sequences provided as # arguments to the Vector constructor # with extra keyword arguments supplying metadata self.name = name self.filename = None if keywords is None: self.keywords = {} else: msg = "Specified keywords must be either None or a " "dictionary. I got %s" % keywords assert isinstance(keywords, dict), msg self.keywords = keywords msg = "Geometry must be specified" assert geometry is not None, msg msg = "Geometry must be a sequence" assert is_sequence(geometry), msg self.geometry = geometry self.geometry_type = get_geometry_type(geometry) msg = "Projection must be specified" assert projection is not None, msg self.projection = Projection(projection) self.data = data if data is not None: msg = "Data must be a sequence" assert is_sequence(data), msg msg = "The number of entries in geometry and data " "must be the same" assert len(geometry) == len(data), msg # FIXME: Need to establish extent here def __str__(self): g_type_str = geometrytype2string(self.geometry_type) return "Vector data set: %s, %i features, geometry type " "%s (%s)" % ( self.name, len(self), str(self.geometry_type), g_type_str, ) def __len__(self): """Size of vector layer defined as number of features """ return len(self.geometry) def __eq__(self, other, rtol=1.0e-5, atol=1.0e-8): """Override '==' to allow comparison with other vector objecs Input other: Vector instance to compare to rtol, atol: Relative and absolute tolerance. See numpy.allclose for details """ # Check type if not isinstance(other, Vector): msg = "Vector instance cannot be compared to %s" " as its type is %s " % (str(other), type(other)) raise TypeError(msg) # Check projection if self.projection != other.projection: return False # Check geometry if not numpy.allclose(self.get_geometry(), other.get_geometry(), rtol=rtol, atol=atol): return False # Check keys x = self.get_data() y = other.get_data() for key in x[0]: for i in range(len(y)): if key not in y[i]: return False for key in y[0]: for i in range(len(x)): if key not in x[i]: return False # Check data for i, a in enumerate(x): for key in a: if a[key] != y[i][key]: # Not equal, try numerical comparison with tolerances if not numpy.allclose(a[key], y[i][key], rtol=rtol, atol=atol): return False # Check keywords if self.keywords != other.keywords: return False # Vector layers are identical up to the specified tolerance return True def __ne__(self, other): """Override '!=' to allow comparison with other projection objecs """ return not self == other def get_name(self): return self.name def get_keywords(self, key=None): """Return keywords dictionary """ if key is None: return self.keywords else: if key in self.keywords: return self.keywords[key] else: msg = "Keyword %s does not exist in %s: Options are " "%s" % ( key, self.get_name(), self.keywords.keys(), ) raise Exception(msg) def get_caption(self): """Return 'caption' keyword if present. Otherwise ''. """ if "caption" in self.keywords: return self.keywords["caption"] else: return "" def read_from_file(self, filename): """ Read and unpack vector data. It is assumed that the file contains only one layer with the pertinent features. Further it is assumed for the moment that all geometries are points. * A feature is a geometry and a set of attributes. * A geometry refers to location and can be point, line, polygon or combinations thereof. * The attributes or obtained through GetField() The full OGR architecture is documented at * http://www.gdal.org/ogr/ogr_arch.html * http://www.gdal.org/ogr/ogr_apitut.html Examples are at * danieljlewis.org/files/2010/09/basicpythonmap.pdf * http://invisibleroads.com/tutorials/gdal-shapefile-points-save.html * http://www.packtpub.com/article/geospatial-data-python-geometry """ basename, _ = os.path.splitext(filename) # Look for any keywords self.keywords = read_keywords(basename + ".keywords") # Determine name if "title" in self.keywords: vectorname = self.keywords["title"] else: # Use basename without leading directories as name vectorname = os.path.split(basename)[-1] self.name = vectorname self.filename = filename self.geometry_type = None # In case there are no features fid = ogr.Open(filename) if fid is None: msg = "Could not open %s" % filename raise IOError(msg) # Assume that file contains all data in one layer msg = "Only one vector layer currently allowed" if fid.GetLayerCount() > 1: msg = ( "WARNING: Number of layers in %s are %i. " "Only the first layer will currently be " "used." % (filename, fid.GetLayerCount()) ) raise Exception(msg) layer = fid.GetLayerByIndex(0) # Get spatial extent self.extent = layer.GetExtent() # Get projection p = layer.GetSpatialRef() self.projection = Projection(p) # Get number of features N = layer.GetFeatureCount() # Extract coordinates and attributes for all features geometry = [] data = [] for i in range(N): feature = layer.GetFeature(i) if feature is None: msg = "Could not get feature %i from %s" % (i, filename) raise Exception(msg) # Record coordinates ordered as Longitude, Latitude G = feature.GetGeometryRef() if G is None: msg = "Geometry was None in filename %s " % filename raise Exception(msg) else: self.geometry_type = G.GetGeometryType() if self.geometry_type == ogr.wkbPoint: geometry.append((G.GetX(), G.GetY())) elif self.geometry_type == ogr.wkbPolygon: ring = G.GetGeometryRef(0) M = ring.GetPointCount() coordinates = [] for j in range(M): coordinates.append((ring.GetX(j), ring.GetY(j))) # Record entire polygon ring as an Mx2 numpy array geometry.append(numpy.array(coordinates, dtype="d", copy=False)) else: msg = ( "Only point and polygon geometries are supported. " "Geometry in filename %s " "was %s." % (filename, G.GetGeometryType()) ) raise Exception(msg) # Record attributes by name number_of_fields = feature.GetFieldCount() fields = {} for j in range(number_of_fields): name = feature.GetFieldDefnRef(j).GetName() # FIXME (Ole): Ascertain the type of each field? # We need to cast each appropriately? # This is issue #66 # feature_type = feature.GetFieldDefnRef(j).GetType() fields[name] = feature.GetField(j) # print 'Field', name, feature_type, j, fields[name] data.append(fields) # Store geometry coordinates as a compact numeric array self.geometry = geometry self.data = data def write_to_file(self, filename): """Save vector data to file Input filename: filename with extension .shp or .gml Note, if attribute names are longer than 10 characters they will be truncated. This is due to limitations in the shp file driver and has to be done here since gdal v1.7 onwards has changed its handling of this issue: http://www.gdal.org/ogr/drv_shapefile.html """ # Check file format basename, extension = os.path.splitext(filename) msg = "Invalid file type for file %s. Only extensions " "shp or gml allowed." % filename assert extension == ".shp" or extension == ".gml", msg driver = DRIVER_MAP[extension] # FIXME (Ole): Tempory flagging of GML issue (ticket #18) if extension == ".gml": msg = ( "OGR GML driver does not store geospatial reference." "This format is disabled for the time being. See " "https://github.com/AIFDR/riab/issues/18" ) raise Exception(msg) # Derive layername from filename (excluding preceding dirs) layername = os.path.split(basename)[-1] # Get vector data geometry = self.get_geometry() data = self.get_data() N = len(geometry) # Clear any previous file of this name (ogr does not overwrite) try: os.remove(filename) except: pass # Create new file with one layer drv = ogr.GetDriverByName(driver) if drv is None: msg = "OGR driver %s not available" % driver raise Exception(msg) ds = drv.CreateDataSource(filename) if ds is None: msg = "Creation of output file %s failed" % filename raise Exception(msg) lyr = ds.CreateLayer(layername, self.projection.spatial_reference, self.geometry_type) if lyr is None: msg = "Could not create layer %s" % layername raise Exception(msg) # Define attributes if any store_attributes = False if data is not None: if len(data) > 0: try: fields = data[0].keys() except: msg = ( 'Input parameter "attributes" was specified ' "but it does not contain dictionaries with " "field information as expected. The first" "element is %s" % data[0] ) raise Exception(msg) else: # Establish OGR types for each element ogrtypes = {} for name in fields: att = data[0][name] py_type = type(att) msg = "Unknown type for storing vector " "data: %s, %s" % (name, str(py_type)[1:-1]) assert py_type in TYPE_MAP, msg ogrtypes[name] = TYPE_MAP[py_type] else: msg = 'Input parameter "data" was specified ' "but appears to be empty" raise Exception(msg) # Create attribute fields in layer store_attributes = True for name in fields: fd = ogr.FieldDefn(name, ogrtypes[name]) # FIXME (Ole): Trying to address issue #16 # But it doesn't work and # somehow changes the values of MMI in test # width = max(128, len(name)) # print name, width # fd.SetWidth(width) # Silent handling of warnings like # Warning 6: Normalized/laundered field name: #'CONTENTS_LOSS_AUD' to 'CONTENTS_L' gdal.PushErrorHandler("CPLQuietErrorHandler") if lyr.CreateField(fd) != 0: msg = "Could not create field %s" % name raise Exception(msg) # Restore error handler gdal.PopErrorHandler() # Store geometry geom = ogr.Geometry(self.geometry_type) layer_def = lyr.GetLayerDefn() for i in range(N): # Create new feature instance feature = ogr.Feature(layer_def) # Store geometry and check if self.geometry_type == ogr.wkbPoint: x = float(geometry[i][0]) y = float(geometry[i][1]) geom.SetPoint_2D(0, x, y) elif self.geometry_type == ogr.wkbPolygon: wkt = array2wkt(geometry[i], geom_type="POLYGON") geom = ogr.CreateGeometryFromWkt(wkt) else: msg = "Geometry type %s not implemented" % self.geometry_type raise Exception(msg) feature.SetGeometry(geom) G = feature.GetGeometryRef() if G is None: msg = "Could not create GeometryRef for file %s" % filename raise Exception(msg) # Store attributes if store_attributes: for j, name in enumerate(fields): actual_field_name = layer_def.GetFieldDefn(j).GetNameRef() val = data[i][name] if type(val) == numpy.ndarray: # A singleton of type <type 'numpy.ndarray'> works # for gdal version 1.6 but fails for version 1.8 # in SetField with error: NotImplementedError: # Wrong number of arguments for overloaded function val = float(val) feature.SetField(actual_field_name, val) # Save this feature if lyr.CreateFeature(feature) != 0: msg = "Failed to create feature %i in file %s" % (i, filename) raise Exception(msg) feature.Destroy() # Write keywords if any write_keywords(self.keywords, basename + ".keywords") def get_attribute_names(self): """ Get available attribute names These are the ones that can be used with get_data """ return self.data[0].keys() def get_data(self, attribute=None, index=None): """Get vector attributes Data is returned as a list where each entry is a dictionary of attributes for one feature. Entries in get_geometry() and get_data() are related as 1-to-1 If optional argument attribute is specified and a valid name, then the list of values for that attribute is returned. If optional argument index is specified on the that value will be returned. Any value of index is ignored if attribute is None. """ if hasattr(self, "data"): if attribute is None: return self.data else: msg = ( "Specified attribute %s does not exist in " "vector layer %s. Valid names are %s" "" % (attribute, self, self.data[0].keys()) ) assert attribute in self.data[0], msg if index is None: # Return all values for specified attribute return [x[attribute] for x in self.data] else: # Return value for specified attribute and index msg = "Specified index must be either None or " "an integer. I got %s" % index assert type(index) == type(0) msg = ( "Specified index must lie within the bounds " "of vector layer %s which is [%i, %i]" "" % (self, 0, len(self) - 1) ) assert 0 <= index < len(self) return self.data[index][attribute] else: msg = "Vector data instance does not have any attributes" raise Exception(msg) def get_geometry(self): """Return geometry for vector layer. Depending on the feature type, geometry is geometry type output type ----------------------------- point coordinates (Nx2 array of longitudes and latitudes) line TODO polygon list of arrays of coordinates """ return self.geometry def get_projection(self, proj4=False): """Return projection of this layer as a string """ return self.projection.get_projection(proj4) def get_bounding_box(self): """Get bounding box coordinates for vector layer. Format is [West, South, East, North] """ e = self.extent return [e[0], e[2], e[1], e[3]] # West # South # East # North def get_extrema(self, attribute=None): """Get min and max values from specified attribute Return min, max """ if attribute is None: msg = "Valid attribute name must be specified in get_extrema " "for vector layers. I got None." raise RuntimeError(msg) x = self.get_data(attribute) return min(x), max(x) def get_topN(self, attribute, N=10): """Get top N features Input attribute: The name of attribute where values are sought N: How many Output layer: New vector layer with selected features """ # FIXME (Ole): Maybe generalise this to arbitrary expressions # Input checks msg = "Specfied attribute must be a string. " "I got %s" % (type(attribute)) assert isinstance(attribute, basestring), msg msg = "Specified attribute was empty" assert attribute != "", msg msg = "N must be a positive number. I got %i" % N assert N > 0, msg # Create list of values for specified attribute values = self.get_data(attribute) # Sort and select using Schwarzian transform A = zip(values, self.data, self.geometry) A.sort() # Pick top N and unpack _, data, geometry = zip(*A[-N:]) # Create new Vector instance and return return Vector(data=data, projection=self.get_projection(), geometry=geometry) def interpolate(self, X, name=None): """Interpolate values of this vector layer to other layer Input X: Layer object defining target name: Optional name of interpolated layer Output Y: Layer object with values of this vector layer interpolated to geometry of input layer X """ msg = "Interpolation from vector layers not yet implemented" raise Exception(msg) @property def is_raster(self): return False @property def is_vector(self): return True @property def is_point_data(self): return self.is_vector and self.geometry_type == ogr.wkbPoint @property def is_polygon_data(self): return self.is_vector and self.geometry_type == ogr.wkbPolygon
class Raster: """Internal representation of raster data """ def __init__(self, data=None, projection=None, geotransform=None, name='Raster layer', caption=''): """Initialise object with either data or filename Input data: Can be either * a filename of a raster file format known to GDAL * an MxN array of raster data * None projection: Geospatial reference in WKT format. Only used if data is provide as a numeric array, geotransform: GDAL geotransform (6-tuple). (top left x, w-e pixel resolution, rotation, top left y, rotation, n-s pixel resolution). See e.g. http://www.gdal.org/gdal_tutorial.html Only used if data is provide as a numeric array, name: Optional name for layer. Only used if data is provide as a numeric array, caption: Optional text field that describes the layer. This field can for example be used to display text about the layer in a web application. """ self.caption = caption if data is None: # Instantiate empty object self.name = name self.data = None self.projection = None self.coordinates = None self.filename = None return if isinstance(data, basestring): self.read_from_file(data) else: # Assume that data is provided as an array # with extra keyword arguments supplying metadata self.data = numpy.array(data, dtype='d', copy=False) self.filename = None self.name = name self.projection = Projection(projection) self.geotransform = geotransform self.rows = data.shape[0] self.columns = data.shape[1] self.number_of_bands = 1 def __str__(self): return self.name def __len__(self): """Size of data set defined as total number of grid points """ return len(self.get_data().flat) def __eq__(self, other, rtol=1.0e-5, atol=1.0e-8): """Override '==' to allow comparison with other raster objecs Input other: Raster instance to compare to rtol, atol: Relative and absolute tolerance. See numpy.allclose for details """ # Check type if not isinstance(other, Raster): msg = ('Raster instance cannot be compared to %s' ' as its type is %s ' % (str(other), type(other))) raise TypeError(msg) # Check projection if self.projection != other.projection: return False # Check geotransform if self.get_geotransform() != other.get_geotransform(): return False # Check data if not numpy.allclose(self.get_data(), other.get_data(), rtol=rtol, atol=atol): return False # Raster layers are identical up to the specified tolerance return True def __ne__(self, other): """Override '!=' to allow comparison with other projection objecs """ return not self == other def get_name(self): return self.name def get_caption(self): return self.caption def read_from_file(self, filename): # Open data file for reading # File must be kept open, otherwise GDAL methods segfault. fid = self.fid = gdal.Open(filename, gdal.GA_ReadOnly) if fid is None: msg = 'Could not open file %s' % filename raise Exception(msg) # Record raster metadata from file basename, ext = os.path.splitext(filename) # If file is ASCII, check that projection is around. # GDAL does not check this nicely, so it is worth an # error message if ext == '.asc': try: open(basename + '.prj') except IOError: msg = ('Projection file not found for %s. You must supply ' 'a projection file with extension .prj' % filename) raise RuntimeError(msg) # Always use basename without leading directories as name rastername = os.path.split(basename)[-1] self.filename = filename self.name = rastername self.projection = Projection(self.fid.GetProjection()) self.geotransform = self.fid.GetGeoTransform() self.columns = fid.RasterXSize self.rows = fid.RasterYSize self.number_of_bands = fid.RasterCount # Assume that file contains all data in one band msg = 'Only one raster band currently allowed' if self.number_of_bands > 1: msg = ('WARNING: Number of bands in %s are %i. ' 'Only the first band will currently be ' 'used.' % (filename, self.number_of_bands)) # FIXME(Ole): Let us use python warnings here raise Exception(msg) # Get first band. band = self.band = fid.GetRasterBand(1) if band is None: msg = 'Could not read raster band from %s' % filename raise Exception(msg) def write_to_file(self, filename): """Save raster data to file Input filename: filename with extension .tif """ # Check file format _, extension = os.path.splitext(filename) msg = ('Invalid file type for file %s. Only extension ' 'tif allowed.' % filename) assert extension == '.tif', msg format = DRIVER_MAP[extension] # Get raster data A = self.get_data() # Get Dimensions. Note numpy and Gdal swap order N, M = A.shape # Create empty file driver = gdal.GetDriverByName(format) fid = driver.Create(filename, M, N, 1, gdal.GDT_Float64) if fid is None: msg = ('Gdal could not create filename %s using ' 'format %s' % (filename, format)) raise Exception(msg) # Write metada fid.SetProjection(str(self.projection)) fid.SetGeoTransform(self.geotransform) # Write data fid.GetRasterBand(1).WriteArray(A) def interpolate(self, X, name=None): """Interpolate values of this raster layer to other layer Input X: Layer object defining target name: Optional name of interpolated layer. If name is None, the name of self is used. Output Y: Layer object with values of this raster layer interpolated to geometry of input layer X """ if X.is_raster: if self.get_geotransform() != X.get_geotransform(): # Need interpolation between grids msg = 'Intergrid interpolation not yet implemented' raise Exception(msg) else: # Rasters are aligned, no need to interpolate return self else: # Interpolate this raster layer to geometry of X return interpolate_raster_vector(self, X, name) def get_data(self, nan=False): """Get raster data as numeric array If keyword nan is True, nodata values will be replaced with NaN If keyword nan has a numeric value, that will be used for NODATA """ # FIXME (Ole): Once we have the ability to use numpy.nan throughout, # make that the default and name everything better if hasattr(self, 'data'): A = self.data assert A.shape[0] == self.rows and A.shape[1] == self.columns else: # Read from raster file A = self.band.ReadAsArray() M, N = A.shape msg = ('Dimensions of raster array do not match those of ' 'raster file %s' % self.filename) assert M == self.rows, msg assert N == self.columns, msg if nan is False: pass else: if nan is True: NAN = numpy.nan else: NAN = nan # Replace NODATA_VALUE with NaN nodata = self.get_nodata_value() NaN = numpy.ones(A.shape, A.dtype) * NAN A = numpy.where(A == nodata, NaN, A) return A def get_projection(self, proj4=False): """Return projection of this layer as a string. """ return self.projection.get_projection(proj4) def get_geotransform(self): """Return geotransform for this raster layer Output geotransform: 6 digit vector (top left x, w-e pixel resolution, rotation, top left y, rotation, n-s pixel resolution). See e.g. http://www.gdal.org/gdal_tutorial.html """ return self.geotransform def get_geometry(self): """Return longitudes and latitudes (the axes) for grid. Return two vectors (longitudes and latitudes) corresponding to grid. The values are offset by half a pixel size to correspond to pixel registration. I.e. If the grid origin (top left corner) is (105, 10) and the resolution is 1 degrees in each direction, then the vectors will take the form longitudes = [100.5, 101.5, ..., 109.5] latitudes = [0.5, 1.5, ..., 9.5] """ # Get parameters for axes g = self.get_geotransform() lon_ul = g[0] # Longitude of upper left corner lat_ul = g[3] # Latitude of upper left corner dx = g[1] # Longitudinal resolution dy = - g[5] # Latitudinal resolution (always(?) negative) nx = self.columns ny = self.rows assert dx > 0 assert dy > 0 # Coordinates of lower left corner lon_ll = lon_ul lat_ll = lat_ul - ny * dy # Coordinates of upper right corner lon_ur = lon_ul + nx * dx # Define pixel centers along each directions dy2 = dy / 2 dx2 = dx / 2 # Define longitudes and latitudes for each axes x = numpy.linspace(lon_ll + dx2, lon_ur - dx2, nx) y = numpy.linspace(lat_ll + dy2, lat_ul - dy2, ny) # Return return x, y def __mul__(self, other): return self.get_data() * other.get_data() def __add__(self, other): return self.get_data() + other.get_data() def get_extrema(self): """Get min and max from raster If raster has a nominated no_data value, this is ignored. Return min, max """ A = self.get_data(nan=True) min = numpy.nanmin(A.flat[:]) max = numpy.nanmax(A.flat[:]) return min, max def get_nodata_value(self): """Get the internal representation of NODATA If the internal value is None, the standard -9999 is assumed """ nodata = self.band.GetNoDataValue() # Use common default in case nodata was not registered in raster file if nodata is None: nodata = -9999 return nodata def get_bins(self, N=10, quantiles=False): """Get N values between the min and the max occurred in this dataset. Return sorted list of length N+1 where the first element is min and the last is max. Intermediate values depend on the keyword quantiles: If quantiles is True, they represent boundaries between quantiles. If quantiles is False, they represent equidistant interval boundaries. """ min, max = self.get_extrema() levels = [] if quantiles is False: # Linear intervals d = (max - min) / N for i in range(N): levels.append(min + i * d) else: # Quantiles # FIXME (Ole): Not 100% sure about this algorithm, # but it is close enough A = self.get_data(nan=True).flat[:] mask = numpy.logical_not(numpy.isnan(A)) # Omit NaN's A = A.compress(mask) A.sort() assert len(A) == A.shape[0] d = float(len(A) + 0.5) / N for i in range(N): levels.append(A[int(i * d)]) levels.append(max) return levels def get_bounding_box(self): """Get bounding box coordinates for raster layer Format is [West, South, East, North] """ geotransform = self.geotransform x_origin = geotransform[0] # top left x y_origin = geotransform[3] # top left y x_res = geotransform[1] # w-e pixel resolution y_res = geotransform[5] # n-s pixel resolution x_pix = self.columns y_pix = self.rows minx = x_origin maxx = x_origin + (x_pix * x_res) miny = y_origin + (y_pix * y_res) maxy = y_origin return [minx, miny, maxx, maxy] @property def is_raster(self): return True @property def is_vector(self): return False
def __init__(self, data=None, projection=None, geotransform=None, name="Raster layer", keywords=None): """Initialise object with either data or filename Input data: Can be either * a filename of a raster file format known to GDAL * an MxN array of raster data * None (FIXME (Ole): Remove this option) projection: Geospatial reference in WKT format. Only used if data is provide as a numeric array, geotransform: GDAL geotransform (6-tuple). (top left x, w-e pixel resolution, rotation, top left y, rotation, n-s pixel resolution). See e.g. http://www.gdal.org/gdal_tutorial.html Only used if data is provide as a numeric array, name: Optional name for layer. Only used if data is provide as a numeric array, keywords: Optional dictionary with keywords that describe the layer. When the layer is stored, these keywords will be written into an associated file with extension .keywords. Keywords can for example be used to display text about the layer in a web application. Note that if data is a filename, all other arguments are ignored as they will be inferred from the file. """ # Input checks if data is None: # Instantiate empty object self.name = name self.data = None self.projection = None self.coordinates = None self.filename = None self.keywords = {} return # Initialisation if isinstance(data, basestring): self.read_from_file(data) else: # Assume that data is provided as an array # with extra keyword arguments supplying metadata if keywords is None: self.keywords = {} else: msg = "Specified keywords must be either None or a " "dictionary. I got %s" % keywords assert isinstance(keywords, dict), msg self.keywords = keywords self.data = numpy.array(data, dtype="d", copy=False) self.filename = None self.name = name self.projection = Projection(projection) self.geotransform = geotransform self.rows = data.shape[0] self.columns = data.shape[1] self.number_of_bands = 1
def check_data_integrity(layer_files): """Read list of layer files and verify that that they have the same projection and georeferencing. """ # Set default values for projection and geotransform. # Enforce DEFAULT (WGS84). # Choosing 'None' will use value of first layer. reference_projection = Projection(DEFAULT_PROJECTION) geotransform = None coordinates = None for layer in layer_files: # Ensure that projection is consistent across all layers if reference_projection is None: reference_projection = layer.projection else: msg = ('Projections in input layer %s is not as expected:\n' 'projection: %s\n' 'default: %s' '' % (layer, layer.projection, reference_projection)) assert reference_projection == layer.projection, msg # Ensure that geotransform and dimensions is consistent across # all *raster* layers if layer.is_raster: if geotransform is None: geotransform = layer.get_geotransform() else: msg = ('Geotransforms in input raster layers are different: ' '%s %s' % (geotransform, layer.get_geotransform())) # FIXME (Ole): Use high tolerance until we find out # why geoserver changes resolution. assert numpy.allclose(geotransform, layer.get_geotransform(), rtol=1.0e-1), msg # In either case of vector layers, we check that the coordinates # are the same if layer.is_vector: if coordinates is None: coordinates = layer.get_geometry() else: msg = ('Coordinates in input vector layers are different: ' '%s %s' % (coordinates, layer.get_geometry())) assert numpy.allclose(coordinates, layer.get_geometry()), msg msg = ('There are no data points to interpolate to. ' 'Perhaps zoom out or pan to the study area ' 'and try again') assert len(layer) > 0, msg # Check that arrays are aligned. # # We have observerd Geoserver resolution changes - see ticket:102 # https://github.com/AIFDR/riab/issues/102 # # However, both rasters are now downloaded with exactly the same # parameters since we have made bbox and resolution variable in ticket:103 # https://github.com/AIFDR/riab/issues/103 # # So if they are still not aligned, we raise an Exception # First find the minimum dimensions M = N = sys.maxint refname = '' for layer in layer_files: if layer.is_raster: if layer.rows < M: refname = layer.get_name() M = layer.rows if layer.columns < N: refname = layer.get_name() N = layer.columns # Then check for alignment for layer in layer_files: if layer.is_raster: data = layer.get_data() msg = ('Rasters are not aligned!\n' 'Raster %s has %i rows but raster %s has %i rows\n' 'Refer to issue #102' % (layer.get_name(), layer.rows, refname, M)) assert layer.rows == M, msg msg = ('Rasters are not aligned!\n' 'Raster %s has %i columns but raster %s has %i columns\n' 'Refer to issue #102' % (layer.get_name(), layer.columns, refname, N)) assert layer.columns == N, msg
def __init__(self, data=None, projection=None, geometry=None, name='Vector layer', keywords=None): """Initialise object with either geometry or filename Input data: Can be either * a filename of a vector file format known to GDAL * List of dictionaries of fields associated with point coordinates * None projection: Geospatial reference in WKT format. Only used if geometry is provide as a numeric array, geometry: A list of either point coordinates or polygons name: Optional name for layer. Only used if geometry is provide as a numeric array keywords: Optional dictionary with keywords that describe the layer. When the layer is stored, these keywords will be written into an associated file with extension .keywords. Keywords can for example be used to display text about the layer in a web application. Note that if data is a filename, all other arguments are ignored as they will be inferred from the file. The geometry type will be inferred from the dimensions of geometry. If each entry is one set of coordinates the type will be ogr.wkbPoint, if it is an array of coordinates the type will be ogr.wkbPolygon. """ if data is None and projection is None and geometry is None: # Instantiate empty object self.name = name self.projection = None self.geometry = None self.geometry_type = None self.filename = None self.data = None self.extent = None self.keywords = {} return if isinstance(data, basestring): self.read_from_file(data) else: # Assume that data is provided as sequences provided as # arguments to the Vector constructor # with extra keyword arguments supplying metadata self.name = name self.filename = None if keywords is None: self.keywords = {} else: msg = ('Specified keywords must be either None or a ' 'dictionary. I got %s' % keywords) assert isinstance(keywords, dict), msg self.keywords = keywords msg = 'Geometry must be specified' assert geometry is not None, msg msg = 'Geometry must be a sequence' assert is_sequence(geometry), msg self.geometry = geometry self.geometry_type = get_geometry_type(geometry) msg = 'Projection must be specified' assert projection is not None, msg self.projection = Projection(projection) self.data = data if data is not None: msg = 'Data must be a sequence' assert is_sequence(data), msg msg = ('The number of entries in geometry and data ' 'must be the same') assert len(geometry) == len(data), msg
def read_from_file(self, filename): """ Read and unpack vector data. It is assumed that the file contains only one layer with the pertinent features. Further it is assumed for the moment that all geometries are points. * A feature is a geometry and a set of attributes. * A geometry refers to location and can be point, line, polygon or combinations thereof. * The attributes or obtained through GetField() The full OGR architecture is documented at * http://www.gdal.org/ogr/ogr_arch.html * http://www.gdal.org/ogr/ogr_apitut.html Examples are at * danieljlewis.org/files/2010/09/basicpythonmap.pdf * http://invisibleroads.com/tutorials/gdal-shapefile-points-save.html * http://www.packtpub.com/article/geospatial-data-python-geometry """ basename, _ = os.path.splitext(filename) # Look for any keywords self.keywords = read_keywords(basename + '.keywords') # Determine name if 'title' in self.keywords: vectorname = self.keywords['title'] else: # Use basename without leading directories as name vectorname = os.path.split(basename)[-1] self.name = vectorname self.filename = filename self.geometry_type = None # In case there are no features fid = ogr.Open(filename) if fid is None: msg = 'Could not open %s' % filename raise IOError(msg) # Assume that file contains all data in one layer msg = 'Only one vector layer currently allowed' if fid.GetLayerCount() > 1: msg = ('WARNING: Number of layers in %s are %i. ' 'Only the first layer will currently be ' 'used.' % (filename, fid.GetLayerCount())) raise Exception(msg) layer = fid.GetLayerByIndex(0) # Get spatial extent self.extent = layer.GetExtent() # Get projection p = layer.GetSpatialRef() self.projection = Projection(p) # Get number of features N = layer.GetFeatureCount() # Extract coordinates and attributes for all features geometry = [] data = [] for i in range(N): feature = layer.GetFeature(i) if feature is None: msg = 'Could not get feature %i from %s' % (i, filename) raise Exception(msg) # Record coordinates ordered as Longitude, Latitude G = feature.GetGeometryRef() if G is None: msg = ('Geometry was None in filename %s ' % filename) raise Exception(msg) else: self.geometry_type = G.GetGeometryType() if self.geometry_type == ogr.wkbPoint: geometry.append((G.GetX(), G.GetY())) elif self.geometry_type == ogr.wkbPolygon: ring = G.GetGeometryRef(0) M = ring.GetPointCount() coordinates = [] for j in range(M): coordinates.append((ring.GetX(j), ring.GetY(j))) # Record entire polygon ring as an Mx2 numpy array geometry.append( numpy.array(coordinates, dtype='d', copy=False)) else: msg = ('Only point and polygon geometries are supported. ' 'Geometry in filename %s ' 'was %s.' % (filename, G.GetGeometryType())) raise Exception(msg) # Record attributes by name number_of_fields = feature.GetFieldCount() fields = {} for j in range(number_of_fields): name = feature.GetFieldDefnRef(j).GetName() # FIXME (Ole): Ascertain the type of each field? # We need to cast each appropriately? # This is issue #66 #feature_type = feature.GetFieldDefnRef(j).GetType() fields[name] = feature.GetField(j) #print 'Field', name, feature_type, j, fields[name] data.append(fields) # Store geometry coordinates as a compact numeric array self.geometry = geometry self.data = data
class Vector: """Class for abstraction of vector data """ def __init__(self, data=None, projection=None, geometry=None, name='Vector layer', keywords=None): """Initialise object with either geometry or filename Input data: Can be either * a filename of a vector file format known to GDAL * List of dictionaries of fields associated with point coordinates * None projection: Geospatial reference in WKT format. Only used if geometry is provide as a numeric array, geometry: A list of either point coordinates or polygons name: Optional name for layer. Only used if geometry is provide as a numeric array keywords: Optional dictionary with keywords that describe the layer. When the layer is stored, these keywords will be written into an associated file with extension .keywords. Keywords can for example be used to display text about the layer in a web application. Note that if data is a filename, all other arguments are ignored as they will be inferred from the file. The geometry type will be inferred from the dimensions of geometry. If each entry is one set of coordinates the type will be ogr.wkbPoint, if it is an array of coordinates the type will be ogr.wkbPolygon. """ if data is None and projection is None and geometry is None: # Instantiate empty object self.name = name self.projection = None self.geometry = None self.geometry_type = None self.filename = None self.data = None self.extent = None self.keywords = {} return if isinstance(data, basestring): self.read_from_file(data) else: # Assume that data is provided as sequences provided as # arguments to the Vector constructor # with extra keyword arguments supplying metadata self.name = name self.filename = None if keywords is None: self.keywords = {} else: msg = ('Specified keywords must be either None or a ' 'dictionary. I got %s' % keywords) assert isinstance(keywords, dict), msg self.keywords = keywords msg = 'Geometry must be specified' assert geometry is not None, msg msg = 'Geometry must be a sequence' assert is_sequence(geometry), msg self.geometry = geometry self.geometry_type = get_geometry_type(geometry) msg = 'Projection must be specified' assert projection is not None, msg self.projection = Projection(projection) self.data = data if data is not None: msg = 'Data must be a sequence' assert is_sequence(data), msg msg = ('The number of entries in geometry and data ' 'must be the same') assert len(geometry) == len(data), msg # FIXME: Need to establish extent here def __str__(self): g_type_str = geometrytype2string(self.geometry_type) return ('Vector data set: %s, %i features, geometry type ' '%s (%s)' % (self.name, len(self), str(self.geometry_type), g_type_str)) def __len__(self): """Size of vector layer defined as number of features """ return len(self.geometry) def __eq__(self, other, rtol=1.0e-5, atol=1.0e-8): """Override '==' to allow comparison with other vector objecs Input other: Vector instance to compare to rtol, atol: Relative and absolute tolerance. See numpy.allclose for details """ # Check type if not isinstance(other, Vector): msg = ('Vector instance cannot be compared to %s' ' as its type is %s ' % (str(other), type(other))) raise TypeError(msg) # Check projection if self.projection != other.projection: return False # Check geometry if not numpy.allclose( self.get_geometry(), other.get_geometry(), rtol=rtol, atol=atol): return False # Check keys x = self.get_data() y = other.get_data() for key in x[0]: for i in range(len(y)): if key not in y[i]: return False for key in y[0]: for i in range(len(x)): if key not in x[i]: return False # Check data for i, a in enumerate(x): for key in a: if a[key] != y[i][key]: # Not equal, try numerical comparison with tolerances if not numpy.allclose( a[key], y[i][key], rtol=rtol, atol=atol): return False # Check keywords if self.keywords != other.keywords: return False # Vector layers are identical up to the specified tolerance return True def __ne__(self, other): """Override '!=' to allow comparison with other projection objecs """ return not self == other def get_name(self): return self.name def get_keywords(self, key=None): """Return keywords dictionary """ if key is None: return self.keywords else: if key in self.keywords: return self.keywords[key] else: msg = ('Keyword %s does not exist in %s: Options are ' '%s' % (key, self.get_name(), self.keywords.keys())) raise Exception(msg) def get_caption(self): """Return 'caption' keyword if present. Otherwise ''. """ if 'caption' in self.keywords: return self.keywords['caption'] else: return '' def read_from_file(self, filename): """ Read and unpack vector data. It is assumed that the file contains only one layer with the pertinent features. Further it is assumed for the moment that all geometries are points. * A feature is a geometry and a set of attributes. * A geometry refers to location and can be point, line, polygon or combinations thereof. * The attributes or obtained through GetField() The full OGR architecture is documented at * http://www.gdal.org/ogr/ogr_arch.html * http://www.gdal.org/ogr/ogr_apitut.html Examples are at * danieljlewis.org/files/2010/09/basicpythonmap.pdf * http://invisibleroads.com/tutorials/gdal-shapefile-points-save.html * http://www.packtpub.com/article/geospatial-data-python-geometry """ basename, _ = os.path.splitext(filename) # Look for any keywords self.keywords = read_keywords(basename + '.keywords') # Determine name if 'title' in self.keywords: vectorname = self.keywords['title'] else: # Use basename without leading directories as name vectorname = os.path.split(basename)[-1] self.name = vectorname self.filename = filename self.geometry_type = None # In case there are no features fid = ogr.Open(filename) if fid is None: msg = 'Could not open %s' % filename raise IOError(msg) # Assume that file contains all data in one layer msg = 'Only one vector layer currently allowed' if fid.GetLayerCount() > 1: msg = ('WARNING: Number of layers in %s are %i. ' 'Only the first layer will currently be ' 'used.' % (filename, fid.GetLayerCount())) raise Exception(msg) layer = fid.GetLayerByIndex(0) # Get spatial extent self.extent = layer.GetExtent() # Get projection p = layer.GetSpatialRef() self.projection = Projection(p) # Get number of features N = layer.GetFeatureCount() # Extract coordinates and attributes for all features geometry = [] data = [] for i in range(N): feature = layer.GetFeature(i) if feature is None: msg = 'Could not get feature %i from %s' % (i, filename) raise Exception(msg) # Record coordinates ordered as Longitude, Latitude G = feature.GetGeometryRef() if G is None: msg = ('Geometry was None in filename %s ' % filename) raise Exception(msg) else: self.geometry_type = G.GetGeometryType() if self.geometry_type == ogr.wkbPoint: geometry.append((G.GetX(), G.GetY())) elif self.geometry_type == ogr.wkbPolygon: ring = G.GetGeometryRef(0) M = ring.GetPointCount() coordinates = [] for j in range(M): coordinates.append((ring.GetX(j), ring.GetY(j))) # Record entire polygon ring as an Mx2 numpy array geometry.append( numpy.array(coordinates, dtype='d', copy=False)) else: msg = ('Only point and polygon geometries are supported. ' 'Geometry in filename %s ' 'was %s.' % (filename, G.GetGeometryType())) raise Exception(msg) # Record attributes by name number_of_fields = feature.GetFieldCount() fields = {} for j in range(number_of_fields): name = feature.GetFieldDefnRef(j).GetName() # FIXME (Ole): Ascertain the type of each field? # We need to cast each appropriately? # This is issue #66 #feature_type = feature.GetFieldDefnRef(j).GetType() fields[name] = feature.GetField(j) #print 'Field', name, feature_type, j, fields[name] data.append(fields) # Store geometry coordinates as a compact numeric array self.geometry = geometry self.data = data def write_to_file(self, filename): """Save vector data to file Input filename: filename with extension .shp or .gml Note, if attribute names are longer than 10 characters they will be truncated. This is due to limitations in the shp file driver and has to be done here since gdal v1.7 onwards has changed its handling of this issue: http://www.gdal.org/ogr/drv_shapefile.html """ # Check file format basename, extension = os.path.splitext(filename) msg = ('Invalid file type for file %s. Only extensions ' 'shp or gml allowed.' % filename) assert extension == '.shp' or extension == '.gml', msg driver = DRIVER_MAP[extension] # FIXME (Ole): Tempory flagging of GML issue (ticket #18) if extension == '.gml': msg = ('OGR GML driver does not store geospatial reference.' 'This format is disabled for the time being. See ' 'https://github.com/AIFDR/riab/issues/18') raise Exception(msg) # Derive layername from filename (excluding preceding dirs) layername = os.path.split(basename)[-1] # Get vector data geometry = self.get_geometry() data = self.get_data() N = len(geometry) # Clear any previous file of this name (ogr does not overwrite) try: os.remove(filename) except: pass # Create new file with one layer drv = ogr.GetDriverByName(driver) if drv is None: msg = 'OGR driver %s not available' % driver raise Exception(msg) ds = drv.CreateDataSource(filename) if ds is None: msg = 'Creation of output file %s failed' % filename raise Exception(msg) lyr = ds.CreateLayer(layername, self.projection.spatial_reference, self.geometry_type) if lyr is None: msg = 'Could not create layer %s' % layername raise Exception(msg) # Define attributes if any store_attributes = False if data is not None: if len(data) > 0: try: fields = data[0].keys() except: msg = ('Input parameter "attributes" was specified ' 'but it does not contain dictionaries with ' 'field information as expected. The first' 'element is %s' % data[0]) raise Exception(msg) else: # Establish OGR types for each element ogrtypes = {} for name in fields: att = data[0][name] py_type = type(att) msg = ('Unknown type for storing vector ' 'data: %s, %s' % (name, str(py_type)[1:-1])) assert py_type in TYPE_MAP, msg ogrtypes[name] = TYPE_MAP[py_type] else: msg = ('Input parameter "data" was specified ' 'but appears to be empty') raise Exception(msg) # Create attribute fields in layer store_attributes = True for name in fields: fd = ogr.FieldDefn(name, ogrtypes[name]) # FIXME (Ole): Trying to address issue #16 # But it doesn't work and # somehow changes the values of MMI in test #width = max(128, len(name)) #print name, width #fd.SetWidth(width) # Silent handling of warnings like # Warning 6: Normalized/laundered field name: #'CONTENTS_LOSS_AUD' to 'CONTENTS_L' gdal.PushErrorHandler('CPLQuietErrorHandler') if lyr.CreateField(fd) != 0: msg = 'Could not create field %s' % name raise Exception(msg) # Restore error handler gdal.PopErrorHandler() # Store geometry geom = ogr.Geometry(self.geometry_type) layer_def = lyr.GetLayerDefn() for i in range(N): # Create new feature instance feature = ogr.Feature(layer_def) # Store geometry and check if self.geometry_type == ogr.wkbPoint: x = float(geometry[i][0]) y = float(geometry[i][1]) geom.SetPoint_2D(0, x, y) elif self.geometry_type == ogr.wkbPolygon: wkt = array2wkt(geometry[i], geom_type='POLYGON') geom = ogr.CreateGeometryFromWkt(wkt) else: msg = 'Geometry type %s not implemented' % self.geometry_type raise Exception(msg) feature.SetGeometry(geom) G = feature.GetGeometryRef() if G is None: msg = 'Could not create GeometryRef for file %s' % filename raise Exception(msg) # Store attributes if store_attributes: for j, name in enumerate(fields): actual_field_name = layer_def.GetFieldDefn(j).GetNameRef() val = data[i][name] if type(val) == numpy.ndarray: # A singleton of type <type 'numpy.ndarray'> works # for gdal version 1.6 but fails for version 1.8 # in SetField with error: NotImplementedError: # Wrong number of arguments for overloaded function val = float(val) feature.SetField(actual_field_name, val) # Save this feature if lyr.CreateFeature(feature) != 0: msg = 'Failed to create feature %i in file %s' % (i, filename) raise Exception(msg) feature.Destroy() # Write keywords if any write_keywords(self.keywords, basename + '.keywords') def get_attribute_names(self): """ Get available attribute names These are the ones that can be used with get_data """ return self.data[0].keys() def get_data(self, attribute=None, index=None): """Get vector attributes Data is returned as a list where each entry is a dictionary of attributes for one feature. Entries in get_geometry() and get_data() are related as 1-to-1 If optional argument attribute is specified and a valid name, then the list of values for that attribute is returned. If optional argument index is specified on the that value will be returned. Any value of index is ignored if attribute is None. """ if hasattr(self, 'data'): if attribute is None: return self.data else: msg = ('Specified attribute %s does not exist in ' 'vector layer %s. Valid names are %s' '' % (attribute, self, self.data[0].keys())) assert attribute in self.data[0], msg if index is None: # Return all values for specified attribute return [x[attribute] for x in self.data] else: # Return value for specified attribute and index msg = ('Specified index must be either None or ' 'an integer. I got %s' % index) assert type(index) == type(0) msg = ('Specified index must lie within the bounds ' 'of vector layer %s which is [%i, %i]' '' % (self, 0, len(self) - 1)) assert 0 <= index < len(self) return self.data[index][attribute] else: msg = 'Vector data instance does not have any attributes' raise Exception(msg) def get_geometry(self): """Return geometry for vector layer. Depending on the feature type, geometry is geometry type output type ----------------------------- point coordinates (Nx2 array of longitudes and latitudes) line TODO polygon list of arrays of coordinates """ return self.geometry def get_projection(self, proj4=False): """Return projection of this layer as a string """ return self.projection.get_projection(proj4) def get_bounding_box(self): """Get bounding box coordinates for vector layer. Format is [West, South, East, North] """ e = self.extent return [ e[0], # West e[2], # South e[1], # East e[3] ] # North def get_extrema(self, attribute=None): """Get min and max values from specified attribute Return min, max """ if attribute is None: msg = ('Valid attribute name must be specified in get_extrema ' 'for vector layers. I got None.') raise RuntimeError(msg) x = self.get_data(attribute) return min(x), max(x) def get_topN(self, attribute, N=10): """Get top N features Input attribute: The name of attribute where values are sought N: How many Output layer: New vector layer with selected features """ # FIXME (Ole): Maybe generalise this to arbitrary expressions # Input checks msg = ('Specfied attribute must be a string. ' 'I got %s' % (type(attribute))) assert isinstance(attribute, basestring), msg msg = 'Specified attribute was empty' assert attribute != '', msg msg = 'N must be a positive number. I got %i' % N assert N > 0, msg # Create list of values for specified attribute values = self.get_data(attribute) # Sort and select using Schwarzian transform A = zip(values, self.data, self.geometry) A.sort() # Pick top N and unpack _, data, geometry = zip(*A[-N:]) # Create new Vector instance and return return Vector(data=data, projection=self.get_projection(), geometry=geometry) def interpolate(self, X, name=None): """Interpolate values of this vector layer to other layer Input X: Layer object defining target name: Optional name of interpolated layer Output Y: Layer object with values of this vector layer interpolated to geometry of input layer X """ msg = 'Interpolation from vector layers not yet implemented' raise Exception(msg) @property def is_raster(self): return False @property def is_vector(self): return True @property def is_point_data(self): return self.is_vector and self.geometry_type == ogr.wkbPoint @property def is_polygon_data(self): return self.is_vector and self.geometry_type == ogr.wkbPolygon
def read_from_file(self, filename): """ Read and unpack vector data. It is assumed that the file contains only one layer with the pertinent features. Further it is assumed for the moment that all geometries are points. * A feature is a geometry and a set of attributes. * A geometry refers to location and can be point, line, polygon or combinations thereof. * The attributes or obtained through GetField() The full OGR architecture is documented at * http://www.gdal.org/ogr/ogr_arch.html * http://www.gdal.org/ogr/ogr_apitut.html Examples are at * danieljlewis.org/files/2010/09/basicpythonmap.pdf * http://invisibleroads.com/tutorials/gdal-shapefile-points-save.html * http://www.packtpub.com/article/geospatial-data-python-geometry """ self.name, _ = os.path.splitext(filename) fid = ogr.Open(filename) if fid is None: msg = 'Could not open %s' % filename raise IOError(msg) # Assume that file contains all data in one layer msg = 'Only one vector layer currently allowed' if fid.GetLayerCount() > 1: msg = ('WARNING: Number of layers in %s are %i. ' 'Only the first layer will currently be ' 'used.' % (filename, fid.GetLayerCount())) raise Exception(msg) layer = fid.GetLayerByIndex(0) # Get spatial extent self.extent = layer.GetExtent() # Get projection p = layer.GetSpatialRef() self.projection = Projection(p) # Get number of features N = layer.GetFeatureCount() # Extract coordinates and attributes for all features geometry = [] data = [] for i in range(N): feature = layer.GetFeature(i) if feature is None: msg = 'Could not get feature %i from %s' % (i, filename) raise Exception(msg) # Record coordinates G = feature.GetGeometryRef() if G is not None and G.GetGeometryType() == ogr.wkbPoint: # Longitude, Latitude geometry.append((G.GetX(), G.GetY())) else: msg = ('Only point geometries are supported. ' 'Geometry in filename %s ' 'was %s.' % (filename, G.GetGeometryType())) raise Exception(msg) # Record attributes by name number_of_fields = feature.GetFieldCount() fields = {} for j in range(number_of_fields): name = feature.GetFieldDefnRef(j).GetName() # FIXME (Ole): Ascertain the type of each field? # We need to cast each appropriately? # This is issue #66 feature_type = feature.GetFieldDefnRef(j).GetType() #print 'Field', name, type fields[name] = feature.GetField(j) data.append(fields) # FIXME: When we get to more general geometries, we # should probably just stay with a list of features. self.geometry = numpy.array(geometry, dtype='d', copy=False) self.data = data self.filename = filename
class Vector: """Class for abstraction of vector data """ def __init__(self, data=None, projection=None, geometry=None, name='Vector layer', caption=''): """Initialise object with either geometry or filename Input data: Can be either * a filename of a vector file format known to GDAL * List of dictionaries of fields associated with point coordinates * None projection: Geospatial reference in WKT format. Only used if geometry is provide as a numeric array, geometry: An Nx2 array of point coordinates name: Optional name for layer. Only used if geometry is provide as a numeric array caption: Optional text field that describes the layer. This field can for example be used to display text about the layer in a web application. """ self.caption = caption if data is None and projection is None and geometry is None: # Instantiate empty object self.name = name self.projection = None self.geometry = None self.filename = None self.data = None self.extent = None return if isinstance(data, basestring): self.read_from_file(data) else: # Assume that geometry is provided as an array # with extra keyword arguments supplying metadata msg = 'Geometry must be specified' assert geometry is not None, msg self.geometry = numpy.array(geometry, dtype='d', copy=False) msg = 'Projection must be specified' assert projection is not None, msg self.projection = Projection(projection) self.data = data self.name = name self.filename = None # FIXME: Need to establish extent here def __str__(self): return self.name def __len__(self): """Size of vector layer defined as number of features """ # FIXME - change to len(self.geometry) return self.geometry.shape[0] def __eq__(self, other, rtol=1.0e-5, atol=1.0e-8): """Override '==' to allow comparison with other vector objecs Input other: Vector instance to compare to rtol, atol: Relative and absolute tolerance. See numpy.allclose for details """ # Check type if not isinstance(other, Vector): msg = ('Vector instance cannot be compared to %s' ' as its type is %s ' % (str(other), type(other))) raise TypeError(msg) # Check projection if self.projection != other.projection: return False # Check geometry if not numpy.allclose(self.get_geometry(), other.get_geometry(), rtol=rtol, atol=atol): return False # Check keys x = self.get_data() y = other.get_data() for key in x[0]: for i in range(len(y)): if key not in y[i]: return False for key in y[0]: for i in range(len(x)): if key not in x[i]: return False # Check data for i, a in enumerate(x): for key in a: if a[key] != y[i][key]: # Not equal, try numerical comparison with tolerances if not numpy.allclose(a[key], y[i][key], rtol=rtol, atol=atol): return False # Vector layers are identical up to the specified tolerance return True def __ne__(self, other): """Override '!=' to allow comparison with other projection objecs """ return not self == other def get_name(self): return self.name def get_caption(self): return self.caption def read_from_file(self, filename): """ Read and unpack vector data. It is assumed that the file contains only one layer with the pertinent features. Further it is assumed for the moment that all geometries are points. * A feature is a geometry and a set of attributes. * A geometry refers to location and can be point, line, polygon or combinations thereof. * The attributes or obtained through GetField() The full OGR architecture is documented at * http://www.gdal.org/ogr/ogr_arch.html * http://www.gdal.org/ogr/ogr_apitut.html Examples are at * danieljlewis.org/files/2010/09/basicpythonmap.pdf * http://invisibleroads.com/tutorials/gdal-shapefile-points-save.html * http://www.packtpub.com/article/geospatial-data-python-geometry """ self.name, _ = os.path.splitext(filename) fid = ogr.Open(filename) if fid is None: msg = 'Could not open %s' % filename raise IOError(msg) # Assume that file contains all data in one layer msg = 'Only one vector layer currently allowed' if fid.GetLayerCount() > 1: msg = ('WARNING: Number of layers in %s are %i. ' 'Only the first layer will currently be ' 'used.' % (filename, fid.GetLayerCount())) raise Exception(msg) layer = fid.GetLayerByIndex(0) # Get spatial extent self.extent = layer.GetExtent() # Get projection p = layer.GetSpatialRef() self.projection = Projection(p) # Get number of features N = layer.GetFeatureCount() # Extract coordinates and attributes for all features geometry = [] data = [] for i in range(N): feature = layer.GetFeature(i) if feature is None: msg = 'Could not get feature %i from %s' % (i, filename) raise Exception(msg) # Record coordinates G = feature.GetGeometryRef() if G is not None and G.GetGeometryType() == ogr.wkbPoint: # Longitude, Latitude geometry.append((G.GetX(), G.GetY())) else: msg = ('Only point geometries are supported. ' 'Geometry in filename %s ' 'was %s.' % (filename, G.GetGeometryType())) raise Exception(msg) # Record attributes by name number_of_fields = feature.GetFieldCount() fields = {} for j in range(number_of_fields): name = feature.GetFieldDefnRef(j).GetName() # FIXME (Ole): Ascertain the type of each field? # We need to cast each appropriately? # This is issue #66 feature_type = feature.GetFieldDefnRef(j).GetType() #print 'Field', name, type fields[name] = feature.GetField(j) data.append(fields) # FIXME: When we get to more general geometries, we # should probably just stay with a list of features. self.geometry = numpy.array(geometry, dtype='d', copy=False) self.data = data self.filename = filename def write_to_file(self, filename): """Save vector data to file Input filename: filename with extension .shp or .gml """ # Derive layername from filename (excluding preceding dirs) x = os.path.split(filename)[-1] layername, extension = os.path.splitext(x) # Check file format msg = ('Invalid file type for file %s. Only extensions ' 'shp or gml allowed.' % filename) assert extension == '.shp' or extension == '.gml', msg driver = DRIVER_MAP[extension] # FIXME (Ole): Tempory flagging of GML issue if extension == '.gml': msg = ('OGR GML driver does not store geospatial reference.' 'This format is disabled for the time being') raise Exception(msg) # Get vector data geometry = self.get_geometry() data = self.get_data() N = len(geometry) # Clear any previous file of this name (ogr does not overwrite) try: os.remove(filename) except: pass # Create new file with one layer drv = ogr.GetDriverByName(driver) if drv is None: msg = 'OGR driver %s not available' % driver raise Exception(msg) ds = drv.CreateDataSource(filename) if ds is None: msg = 'Creation of output file %s failed' % filename raise Exception(msg) lyr = ds.CreateLayer(layername, self.projection.spatial_reference, ogr.wkbPoint) if lyr is None: msg = 'Could not create layer %s' % layername raise Exception(msg) # Define attributes if any store_attributes = False if data is not None: if len(data) > 0: try: fields = data[0].keys() except: msg = ('Input parameter "attributes" was specified ' 'but it does not contain dictionaries with ' 'field information as expected. The first' 'element is %s' % data[0]) raise Exception(msg) else: # Establish OGR types for each element ogrtypes = {} for name in fields: py_type = type(data[0][name]) ogrtypes[name] = TYPE_MAP[py_type] else: msg = ('Input parameter "data" was specified ' 'but appears to be empty') raise Exception(msg) # Create attribute fields in layer store_attributes = True for name in fields: fd = ogr.FieldDefn(name, ogrtypes[name]) # FIXME (Ole): Trying to address issue #16 # But it doesn't work and # somehow changes the values of MMI in test #width = max(128, len(name)) #print name, width #fd.SetWidth(width) if lyr.CreateField(fd) != 0: msg = 'Could not create field %s' % name raise Exception(msg) # Store point data for i in range(N): # FIXME (Ole): Need to assign entire vector if at all possible # Coordinates x = float(geometry[i, 0]) y = float(geometry[i, 1]) pt = ogr.Geometry(ogr.wkbPoint) pt.SetPoint_2D(0, x, y) feature = ogr.Feature(lyr.GetLayerDefn()) feature.SetGeometry(pt) G = feature.GetGeometryRef() if G is None: msg = 'Could not create GeometryRef for file %s' % filename raise Exception(msg) # Attributes if store_attributes: for name in fields: feature.SetField(name, data[i][name]) # Save this feature if lyr.CreateFeature(feature) != 0: msg = 'Failed to create feature %i in file %s' % (i, filename) raise Exception(msg) feature.Destroy() def get_data(self, attribute=None, index=None): """Get vector attributes Data is returned as a list where each entry is a dictionary of attributes for one feature. Entries in get_geometry() and get_data() are related as 1-to-1 If optional argument attribute is specified and a valid name, then the list of values for that attribute is returned. If optional argument index is specified on the that value will be returned. Any value of index is ignored if attribute is None. """ if hasattr(self, 'data'): if attribute is None: return self.data else: msg = ('Specified attribute %s does not exist in ' 'vector layer %s. Valid names are %s' '' % (attribute, self, self.data[0].keys())) assert attribute in self.data[0], msg if index is None: # Return all values for specified attribute return [x[attribute] for x in self.data] else: # Return value for specified attribute and index msg = ('Specified index must be either None or ' 'an integer. I got %s' % index) assert type(index) == type(0) msg = ('Specified index must lie within the bounds ' 'of vector layer %s which is [%i, %i]' '' % (self, 0, len(self) - 1)) assert 0 <= index < len(self) return self.data[index][attribute] else: msg = 'Vector data instance does not have any attributes' raise Exception(msg) def get_geometry(self): """Return geometry for vector layer. Depending on the feature type, geometry is geometry type output type ----------------------------- point coordinates (Nx2 array of longitudes and latitudes) line TODO polygon TODO """ return self.geometry def get_projection(self, proj4=False): """Return projection of this layer as a string """ return self.projection.get_projection(proj4) def get_bounding_box(self): """Get bounding box coordinates for vector layer. Format is [West, South, East, North] """ e = self.extent return [e[0], # West e[2], # South e[1], # East e[3]] # North def get_extrema(self, attribute=None): """Get min and max values from specified attribute Return min, max """ if attribute is None: msg = ('Valid attribute name must be specified in get_extrema ' 'for vector layers. I got None.') raise RuntimeError(msg) x = self.get_data(attribute) return min(x), max(x) def get_topN(self, attribute, N=10): """Get top N features Input attribute: The name of attribute where values are sought N: How many Output layer: New vector layer with selected features """ # FIXME (Ole): Maybe generalise this to arbitrary expressions # Input checks msg = ('Specfied attribute must be a string. ' 'I got %s' % (type(attribute))) assert isinstance(attribute, basestring), msg msg = 'Specified attribute was empty' assert attribute != '', msg msg = 'N must be a positive number. I got %i' % N assert N > 0, msg # Create list of values for specified attribute values = self.get_data(attribute) # Sort and select using Schwarzian transform A = zip(values, self.data, self.geometry) A.sort() # Pick top N and unpack _, data, geometry = zip(*A[-N:]) # Create new Vector instance and return return Vector(data=data, projection=self.get_projection(), geometry=geometry) def interpolate(self, X, name=None): """Interpolate values of this vector layer to other layer Input X: Layer object defining target name: Optional name of interpolated layer Output Y: Layer object with values of this vector layer interpolated to geometry of input layer X """ msg = 'Interpolation from vector layers not yet implemented' raise Exception(msg) @property def is_raster(self): return False @property def is_vector(self): return True