Example #1
0
File: core.py Project: sabman/riab
def check_data_integrity(layer_files):
    """Read list of layer files and verify that that they have the same
    projection and georeferencing.
    """

    # Set default values for projection and geotransform.
    # Choosing 'None' will use value of first layer.
    projection = Projection(DEFAULT_PROJECTION)
    geotransform = None
    coordinates = None

    for filename in layer_files:

        # Extract data
        layer = filename

        # Ensure that projection is consistent across all layers
        if projection is None:
            projection = layer.projection
        else:
            msg = ('Projections in input layer %s is not as expected:\n'
                   'projection: %s\n'
                   'default:    %s'
                   '' % (filename,
                         projection.get_projection(proj4=True),
                         layer.projection.get_projection(proj4=True)))
            assert projection == layer.projection, msg

        # Ensure that geotransform is consistent across all *raster* layers
        if layer.is_raster:
            if geotransform is None:
                geotransform = layer.get_geotransform()
            else:
                msg = ('Geotransforms in input raster layers are different: '
                       '%s %s' % (geotransform, layer.get_geotransform()))
                assert geotransform == layer.get_geotransform(), msg

        # In case of vector layers, we check that the coordinates
        # are the same
        if layer.is_vector:
            if coordinates is None:
                coordinates = layer.get_geometry()
            else:
                msg = ('Coordinates in input vector layers are different: '
                       '%s %s' % (coordinates, layer.get_geometry()))
                assert numpy.allclose(coordinates,
                                      layer.get_geometry()), msg
Example #2
0
    def read_from_file(self, filename):

        # Open data file for reading
        # File must be kept open, otherwise GDAL methods segfault.
        fid = self.fid = gdal.Open(filename, gdal.GA_ReadOnly)
        if fid is None:
            msg = "Could not open file %s" % filename
            raise Exception(msg)

        # Record raster metadata from file
        basename, ext = os.path.splitext(filename)

        # If file is ASCII, check that projection is around.
        # GDAL does not check this nicely, so it is worth an
        # error message
        if ext == ".asc":
            try:
                open(basename + ".prj")
            except IOError:
                msg = (
                    "Projection file not found for %s. You must supply "
                    "a projection file with extension .prj" % filename
                )
                raise RuntimeError(msg)

        # Look for any keywords
        self.keywords = read_keywords(basename + ".keywords")

        # Always use basename without leading directories as name
        rastername = os.path.split(basename)[-1]

        self.filename = filename
        self.name = rastername

        self.projection = Projection(self.fid.GetProjection())
        self.geotransform = self.fid.GetGeoTransform()
        self.columns = fid.RasterXSize
        self.rows = fid.RasterYSize
        self.number_of_bands = fid.RasterCount

        # Assume that file contains all data in one band
        msg = "Only one raster band currently allowed"
        if self.number_of_bands > 1:
            msg = (
                "WARNING: Number of bands in %s are %i. "
                "Only the first band will currently be "
                "used." % (filename, self.number_of_bands)
            )
            # FIXME(Ole): Let us use python warnings here
            raise Exception(msg)

        # Get first band.
        band = self.band = fid.GetRasterBand(1)
        if band is None:
            msg = "Could not read raster band from %s" % filename
            raise Exception(msg)
Example #3
0
    def read_from_file(self, filename):

        # Open data file for reading
        # File must be kept open, otherwise GDAL methods segfault.
        fid = self.fid = gdal.Open(filename, gdal.GA_ReadOnly)
        if fid is None:
            msg = 'Could not open file %s' % filename
            raise Exception(msg)

        # Record raster metadata from file
        basename, ext = os.path.splitext(filename)

        # If file is ASCII, check that projection is around.
        # GDAL does not check this nicely, so it is worth an
        # error message
        if ext == '.asc':
            try:
                open(basename + '.prj')
            except IOError:
                msg = ('Projection file not found for %s. You must supply '
                       'a projection file with extension .prj' % filename)
                raise RuntimeError(msg)

        # Look for any keywords
        self.keywords = read_keywords(basename + '.keywords')

        # Determine name
        if 'title' in self.keywords:
            rastername = self.keywords['title']
        else:
            # Use basename without leading directories as name
            rastername = os.path.split(basename)[-1]

        self.name = rastername
        self.filename = filename

        self.projection = Projection(self.fid.GetProjection())
        self.geotransform = self.fid.GetGeoTransform()
        self.columns = fid.RasterXSize
        self.rows = fid.RasterYSize
        self.number_of_bands = fid.RasterCount

        # Assume that file contains all data in one band
        msg = 'Only one raster band currently allowed'
        if self.number_of_bands > 1:
            msg = ('WARNING: Number of bands in %s are %i. '
                   'Only the first band will currently be '
                   'used.' % (filename, self.number_of_bands))
            # FIXME(Ole): Let us use python warnings here
            raise Exception(msg)

        # Get first band.
        band = self.band = fid.GetRasterBand(1)
        if band is None:
            msg = 'Could not read raster band from %s' % filename
            raise Exception(msg)
Example #4
0
    def __init__(self, data=None, projection=None, geotransform=None,
                 name='Raster layer', caption=''):
        """Initialise object with either data or filename

        Input
            data: Can be either
                * a filename of a raster file format known to GDAL
                * an MxN array of raster data
                * None
            projection: Geospatial reference in WKT format.
                        Only used if data is provide as a numeric array,
            geotransform: GDAL geotransform (6-tuple).
                          (top left x, w-e pixel resolution, rotation,
                           top left y, rotation, n-s pixel resolution).
                          See e.g. http://www.gdal.org/gdal_tutorial.html
                          Only used if data is provide as a numeric array,
            name: Optional name for layer.
                  Only used if data is provide as a numeric array,
            caption: Optional text field that describes the layer. This field
                     can for example be used to display text about the layer
                     in a web application.
        """

        self.caption = caption
        if data is None:
            # Instantiate empty object
            self.name = name
            self.data = None
            self.projection = None
            self.coordinates = None
            self.filename = None
            return

        if isinstance(data, basestring):
            self.read_from_file(data)
        else:
            # Assume that data is provided as an array
            # with extra keyword arguments supplying metadata

            self.data = numpy.array(data, dtype='d', copy=False)

            self.filename = None
            self.name = name

            self.projection = Projection(projection)
            self.geotransform = geotransform

            self.rows = data.shape[0]
            self.columns = data.shape[1]

            self.number_of_bands = 1
Example #5
0
    def __init__(self, data=None, projection=None, geometry=None,
                 name='Vector layer', caption=''):
        """Initialise object with either geometry or filename

        Input
            data: Can be either
                * a filename of a vector file format known to GDAL
                * List of dictionaries of fields associated with
                  point coordinates
                * None
            projection: Geospatial reference in WKT format.
                        Only used if geometry is provide as a numeric array,
            geometry: An Nx2 array of point coordinates
            name: Optional name for layer.
                  Only used if geometry is provide as a numeric array
            caption: Optional text field that describes the layer. This field
                     can for example be used to display text about the layer
                     in a web application.
        """

        self.caption = caption
        if data is None and projection is None and geometry is None:
            # Instantiate empty object
            self.name = name
            self.projection = None
            self.geometry = None
            self.filename = None
            self.data = None
            self.extent = None
            return

        if isinstance(data, basestring):
            self.read_from_file(data)
        else:
            # Assume that geometry is provided as an array
            # with extra keyword arguments supplying metadata

            msg = 'Geometry must be specified'
            assert geometry is not None, msg
            self.geometry = numpy.array(geometry, dtype='d', copy=False)

            msg = 'Projection must be specified'
            assert projection is not None, msg
            self.projection = Projection(projection)

            self.data = data
            self.name = name
            self.filename = None
Example #6
0
    def __init__(self, data=None, projection=None, geotransform=None,
                 name='Raster layer', keywords=None):
        """Initialise object with either data or filename

        Input
            data: Can be either
                * a filename of a raster file format known to GDAL
                * an MxN array of raster data
                * None (FIXME (Ole): Remove this option)
            projection: Geospatial reference in WKT format.
                        Only used if data is provide as a numeric array,
            geotransform: GDAL geotransform (6-tuple).
                          (top left x, w-e pixel resolution, rotation,
                           top left y, rotation, n-s pixel resolution).
                          See e.g. http://www.gdal.org/gdal_tutorial.html
                          Only used if data is provide as a numeric array,
            name: Optional name for layer.
                  Only used if data is provide as a numeric array,
            keywords: Optional dictionary with keywords that describe the
                      layer. When the layer is stored, these keywords will
                      be written into an associated file with extension
                      .keywords.

                      Keywords can for example be used to display text
                      about the layer in a web application.

        Note that if data is a filename, all other arguments are ignored
        as they will be inferred from the file.
        """

        # Input checks
        if data is None:
            # Instantiate empty object
            self.name = name
            self.data = None
            self.projection = None
            self.coordinates = None
            self.filename = None
            self.keywords = {}
            return

        # Initialisation
        if isinstance(data, basestring):
            self.read_from_file(data)
        else:
            # Assume that data is provided as an array
            # with extra keyword arguments supplying metadata
            if keywords is None:
                self.keywords = {}
            else:
                msg = ('Specified keywords must be either None or a '
                       'dictionary. I got %s' % keywords)
                assert isinstance(keywords, dict), msg
                self.keywords = keywords

            self.data = numpy.array(data, dtype='d', copy=False)

            self.filename = None
            self.name = name

            self.projection = Projection(projection)
            self.geotransform = geotransform

            self.rows = data.shape[0]
            self.columns = data.shape[1]

            self.number_of_bands = 1
Example #7
0
File: vector.py Project: AIFDR/riab
    def __init__(self, data=None, projection=None, geometry=None, name="Vector layer", keywords=None):
        """Initialise object with either geometry or filename

        Input
            data: Can be either
                * a filename of a vector file format known to GDAL
                * List of dictionaries of fields associated with
                  point coordinates
                * None
            projection: Geospatial reference in WKT format.
                        Only used if geometry is provide as a numeric array,
            geometry: A list of either point coordinates or polygons
            name: Optional name for layer.
                  Only used if geometry is provide as a numeric array
            keywords: Optional dictionary with keywords that describe the
                      layer. When the layer is stored, these keywords will
                      be written into an associated file with extension
                      .keywords.

                      Keywords can for example be used to display text
                      about the layer in a web application.

        Note that if data is a filename, all other arguments are ignored
        as they will be inferred from the file.

        The geometry type will be inferred from the dimensions of geometry.
        If each entry is one set of coordinates the type will be ogr.wkbPoint,
        if it is an array of coordinates the type will be ogr.wkbPolygon.
        """

        if data is None and projection is None and geometry is None:
            # Instantiate empty object
            self.name = name
            self.projection = None
            self.geometry = None
            self.geometry_type = None
            self.filename = None
            self.data = None
            self.extent = None
            self.keywords = {}
            return

        if isinstance(data, basestring):
            self.read_from_file(data)
        else:
            # Assume that data is provided as sequences provided as
            # arguments to the Vector constructor
            # with extra keyword arguments supplying metadata

            self.name = name
            self.filename = None

            if keywords is None:
                self.keywords = {}
            else:
                msg = "Specified keywords must be either None or a " "dictionary. I got %s" % keywords
                assert isinstance(keywords, dict), msg
                self.keywords = keywords

            msg = "Geometry must be specified"
            assert geometry is not None, msg

            msg = "Geometry must be a sequence"
            assert is_sequence(geometry), msg
            self.geometry = geometry

            self.geometry_type = get_geometry_type(geometry)

            msg = "Projection must be specified"
            assert projection is not None, msg
            self.projection = Projection(projection)

            self.data = data
            if data is not None:
                msg = "Data must be a sequence"
                assert is_sequence(data), msg

                msg = "The number of entries in geometry and data " "must be the same"
                assert len(geometry) == len(data), msg
Example #8
0
File: vector.py Project: AIFDR/riab
    def read_from_file(self, filename):
        """ Read and unpack vector data.

        It is assumed that the file contains only one layer with the
        pertinent features. Further it is assumed for the moment that
        all geometries are points.

        * A feature is a geometry and a set of attributes.
        * A geometry refers to location and can be point, line, polygon or
          combinations thereof.
        * The attributes or obtained through GetField()

        The full OGR architecture is documented at
        * http://www.gdal.org/ogr/ogr_arch.html
        * http://www.gdal.org/ogr/ogr_apitut.html

        Examples are at
        * danieljlewis.org/files/2010/09/basicpythonmap.pdf
        * http://invisibleroads.com/tutorials/gdal-shapefile-points-save.html
        * http://www.packtpub.com/article/geospatial-data-python-geometry
        """

        basename, _ = os.path.splitext(filename)

        # Look for any keywords
        self.keywords = read_keywords(basename + ".keywords")

        # Determine name
        if "title" in self.keywords:
            vectorname = self.keywords["title"]
        else:
            # Use basename without leading directories as name
            vectorname = os.path.split(basename)[-1]

        self.name = vectorname
        self.filename = filename
        self.geometry_type = None  # In case there are no features

        fid = ogr.Open(filename)
        if fid is None:
            msg = "Could not open %s" % filename
            raise IOError(msg)

        # Assume that file contains all data in one layer
        msg = "Only one vector layer currently allowed"
        if fid.GetLayerCount() > 1:
            msg = (
                "WARNING: Number of layers in %s are %i. "
                "Only the first layer will currently be "
                "used." % (filename, fid.GetLayerCount())
            )
            raise Exception(msg)

        layer = fid.GetLayerByIndex(0)

        # Get spatial extent
        self.extent = layer.GetExtent()

        # Get projection
        p = layer.GetSpatialRef()
        self.projection = Projection(p)

        # Get number of features
        N = layer.GetFeatureCount()

        # Extract coordinates and attributes for all features
        geometry = []
        data = []
        for i in range(N):
            feature = layer.GetFeature(i)
            if feature is None:
                msg = "Could not get feature %i from %s" % (i, filename)
                raise Exception(msg)

            # Record coordinates ordered as Longitude, Latitude
            G = feature.GetGeometryRef()
            if G is None:
                msg = "Geometry was None in filename %s " % filename
                raise Exception(msg)
            else:
                self.geometry_type = G.GetGeometryType()
                if self.geometry_type == ogr.wkbPoint:
                    geometry.append((G.GetX(), G.GetY()))
                elif self.geometry_type == ogr.wkbPolygon:
                    ring = G.GetGeometryRef(0)
                    M = ring.GetPointCount()
                    coordinates = []
                    for j in range(M):
                        coordinates.append((ring.GetX(j), ring.GetY(j)))

                    # Record entire polygon ring as an Mx2 numpy array
                    geometry.append(numpy.array(coordinates, dtype="d", copy=False))
                else:
                    msg = (
                        "Only point and polygon geometries are supported. "
                        "Geometry in filename %s "
                        "was %s." % (filename, G.GetGeometryType())
                    )
                    raise Exception(msg)

            # Record attributes by name
            number_of_fields = feature.GetFieldCount()
            fields = {}
            for j in range(number_of_fields):
                name = feature.GetFieldDefnRef(j).GetName()

                # FIXME (Ole): Ascertain the type of each field?
                #              We need to cast each appropriately?
                #              This is issue #66
                # feature_type = feature.GetFieldDefnRef(j).GetType()
                fields[name] = feature.GetField(j)
                # print 'Field', name, feature_type, j, fields[name]

            data.append(fields)

        # Store geometry coordinates as a compact numeric array
        self.geometry = geometry
        self.data = data
Example #9
0
File: vector.py Project: AIFDR/riab
class Vector:
    """Class for abstraction of vector data
    """

    def __init__(self, data=None, projection=None, geometry=None, name="Vector layer", keywords=None):
        """Initialise object with either geometry or filename

        Input
            data: Can be either
                * a filename of a vector file format known to GDAL
                * List of dictionaries of fields associated with
                  point coordinates
                * None
            projection: Geospatial reference in WKT format.
                        Only used if geometry is provide as a numeric array,
            geometry: A list of either point coordinates or polygons
            name: Optional name for layer.
                  Only used if geometry is provide as a numeric array
            keywords: Optional dictionary with keywords that describe the
                      layer. When the layer is stored, these keywords will
                      be written into an associated file with extension
                      .keywords.

                      Keywords can for example be used to display text
                      about the layer in a web application.

        Note that if data is a filename, all other arguments are ignored
        as they will be inferred from the file.

        The geometry type will be inferred from the dimensions of geometry.
        If each entry is one set of coordinates the type will be ogr.wkbPoint,
        if it is an array of coordinates the type will be ogr.wkbPolygon.
        """

        if data is None and projection is None and geometry is None:
            # Instantiate empty object
            self.name = name
            self.projection = None
            self.geometry = None
            self.geometry_type = None
            self.filename = None
            self.data = None
            self.extent = None
            self.keywords = {}
            return

        if isinstance(data, basestring):
            self.read_from_file(data)
        else:
            # Assume that data is provided as sequences provided as
            # arguments to the Vector constructor
            # with extra keyword arguments supplying metadata

            self.name = name
            self.filename = None

            if keywords is None:
                self.keywords = {}
            else:
                msg = "Specified keywords must be either None or a " "dictionary. I got %s" % keywords
                assert isinstance(keywords, dict), msg
                self.keywords = keywords

            msg = "Geometry must be specified"
            assert geometry is not None, msg

            msg = "Geometry must be a sequence"
            assert is_sequence(geometry), msg
            self.geometry = geometry

            self.geometry_type = get_geometry_type(geometry)

            msg = "Projection must be specified"
            assert projection is not None, msg
            self.projection = Projection(projection)

            self.data = data
            if data is not None:
                msg = "Data must be a sequence"
                assert is_sequence(data), msg

                msg = "The number of entries in geometry and data " "must be the same"
                assert len(geometry) == len(data), msg

            # FIXME: Need to establish extent here

    def __str__(self):

        g_type_str = geometrytype2string(self.geometry_type)
        return "Vector data set: %s, %i features, geometry type " "%s (%s)" % (
            self.name,
            len(self),
            str(self.geometry_type),
            g_type_str,
        )

    def __len__(self):
        """Size of vector layer defined as number of features
        """

        return len(self.geometry)

    def __eq__(self, other, rtol=1.0e-5, atol=1.0e-8):
        """Override '==' to allow comparison with other vector objecs

        Input
           other: Vector instance to compare to
           rtol, atol: Relative and absolute tolerance.
                       See numpy.allclose for details
        """

        # Check type
        if not isinstance(other, Vector):
            msg = "Vector instance cannot be compared to %s" " as its type is %s " % (str(other), type(other))
            raise TypeError(msg)

        # Check projection
        if self.projection != other.projection:
            return False

        # Check geometry
        if not numpy.allclose(self.get_geometry(), other.get_geometry(), rtol=rtol, atol=atol):
            return False

        # Check keys
        x = self.get_data()
        y = other.get_data()

        for key in x[0]:
            for i in range(len(y)):
                if key not in y[i]:
                    return False

        for key in y[0]:
            for i in range(len(x)):
                if key not in x[i]:
                    return False

        # Check data
        for i, a in enumerate(x):
            for key in a:
                if a[key] != y[i][key]:
                    # Not equal, try numerical comparison with tolerances

                    if not numpy.allclose(a[key], y[i][key], rtol=rtol, atol=atol):
                        return False

        # Check keywords
        if self.keywords != other.keywords:
            return False

        # Vector layers are identical up to the specified tolerance
        return True

    def __ne__(self, other):
        """Override '!=' to allow comparison with other projection objecs
        """
        return not self == other

    def get_name(self):
        return self.name

    def get_keywords(self, key=None):
        """Return keywords dictionary
        """
        if key is None:
            return self.keywords
        else:
            if key in self.keywords:
                return self.keywords[key]
            else:
                msg = "Keyword %s does not exist in %s: Options are " "%s" % (
                    key,
                    self.get_name(),
                    self.keywords.keys(),
                )
                raise Exception(msg)

    def get_caption(self):
        """Return 'caption' keyword if present. Otherwise ''.
        """
        if "caption" in self.keywords:
            return self.keywords["caption"]
        else:
            return ""

    def read_from_file(self, filename):
        """ Read and unpack vector data.

        It is assumed that the file contains only one layer with the
        pertinent features. Further it is assumed for the moment that
        all geometries are points.

        * A feature is a geometry and a set of attributes.
        * A geometry refers to location and can be point, line, polygon or
          combinations thereof.
        * The attributes or obtained through GetField()

        The full OGR architecture is documented at
        * http://www.gdal.org/ogr/ogr_arch.html
        * http://www.gdal.org/ogr/ogr_apitut.html

        Examples are at
        * danieljlewis.org/files/2010/09/basicpythonmap.pdf
        * http://invisibleroads.com/tutorials/gdal-shapefile-points-save.html
        * http://www.packtpub.com/article/geospatial-data-python-geometry
        """

        basename, _ = os.path.splitext(filename)

        # Look for any keywords
        self.keywords = read_keywords(basename + ".keywords")

        # Determine name
        if "title" in self.keywords:
            vectorname = self.keywords["title"]
        else:
            # Use basename without leading directories as name
            vectorname = os.path.split(basename)[-1]

        self.name = vectorname
        self.filename = filename
        self.geometry_type = None  # In case there are no features

        fid = ogr.Open(filename)
        if fid is None:
            msg = "Could not open %s" % filename
            raise IOError(msg)

        # Assume that file contains all data in one layer
        msg = "Only one vector layer currently allowed"
        if fid.GetLayerCount() > 1:
            msg = (
                "WARNING: Number of layers in %s are %i. "
                "Only the first layer will currently be "
                "used." % (filename, fid.GetLayerCount())
            )
            raise Exception(msg)

        layer = fid.GetLayerByIndex(0)

        # Get spatial extent
        self.extent = layer.GetExtent()

        # Get projection
        p = layer.GetSpatialRef()
        self.projection = Projection(p)

        # Get number of features
        N = layer.GetFeatureCount()

        # Extract coordinates and attributes for all features
        geometry = []
        data = []
        for i in range(N):
            feature = layer.GetFeature(i)
            if feature is None:
                msg = "Could not get feature %i from %s" % (i, filename)
                raise Exception(msg)

            # Record coordinates ordered as Longitude, Latitude
            G = feature.GetGeometryRef()
            if G is None:
                msg = "Geometry was None in filename %s " % filename
                raise Exception(msg)
            else:
                self.geometry_type = G.GetGeometryType()
                if self.geometry_type == ogr.wkbPoint:
                    geometry.append((G.GetX(), G.GetY()))
                elif self.geometry_type == ogr.wkbPolygon:
                    ring = G.GetGeometryRef(0)
                    M = ring.GetPointCount()
                    coordinates = []
                    for j in range(M):
                        coordinates.append((ring.GetX(j), ring.GetY(j)))

                    # Record entire polygon ring as an Mx2 numpy array
                    geometry.append(numpy.array(coordinates, dtype="d", copy=False))
                else:
                    msg = (
                        "Only point and polygon geometries are supported. "
                        "Geometry in filename %s "
                        "was %s." % (filename, G.GetGeometryType())
                    )
                    raise Exception(msg)

            # Record attributes by name
            number_of_fields = feature.GetFieldCount()
            fields = {}
            for j in range(number_of_fields):
                name = feature.GetFieldDefnRef(j).GetName()

                # FIXME (Ole): Ascertain the type of each field?
                #              We need to cast each appropriately?
                #              This is issue #66
                # feature_type = feature.GetFieldDefnRef(j).GetType()
                fields[name] = feature.GetField(j)
                # print 'Field', name, feature_type, j, fields[name]

            data.append(fields)

        # Store geometry coordinates as a compact numeric array
        self.geometry = geometry
        self.data = data

    def write_to_file(self, filename):
        """Save vector data to file

        Input
            filename: filename with extension .shp or .gml

        Note, if attribute names are longer than 10 characters they will be
        truncated. This is due to limitations in the shp file driver and has
        to be done here since gdal v1.7 onwards has changed its handling of
        this issue: http://www.gdal.org/ogr/drv_shapefile.html
        """

        # Check file format
        basename, extension = os.path.splitext(filename)

        msg = "Invalid file type for file %s. Only extensions " "shp or gml allowed." % filename
        assert extension == ".shp" or extension == ".gml", msg
        driver = DRIVER_MAP[extension]

        # FIXME (Ole): Tempory flagging of GML issue (ticket #18)
        if extension == ".gml":
            msg = (
                "OGR GML driver does not store geospatial reference."
                "This format is disabled for the time being. See "
                "https://github.com/AIFDR/riab/issues/18"
            )
            raise Exception(msg)

        # Derive layername from filename (excluding preceding dirs)
        layername = os.path.split(basename)[-1]

        # Get vector data
        geometry = self.get_geometry()
        data = self.get_data()

        N = len(geometry)

        # Clear any previous file of this name (ogr does not overwrite)
        try:
            os.remove(filename)
        except:
            pass

        # Create new file with one layer
        drv = ogr.GetDriverByName(driver)
        if drv is None:
            msg = "OGR driver %s not available" % driver
            raise Exception(msg)

        ds = drv.CreateDataSource(filename)
        if ds is None:
            msg = "Creation of output file %s failed" % filename
            raise Exception(msg)

        lyr = ds.CreateLayer(layername, self.projection.spatial_reference, self.geometry_type)
        if lyr is None:
            msg = "Could not create layer %s" % layername
            raise Exception(msg)

        # Define attributes if any
        store_attributes = False
        if data is not None:
            if len(data) > 0:
                try:
                    fields = data[0].keys()
                except:
                    msg = (
                        'Input parameter "attributes" was specified '
                        "but it does not contain dictionaries with "
                        "field information as expected. The first"
                        "element is %s" % data[0]
                    )
                    raise Exception(msg)
                else:
                    # Establish OGR types for each element
                    ogrtypes = {}
                    for name in fields:
                        att = data[0][name]
                        py_type = type(att)
                        msg = "Unknown type for storing vector " "data: %s, %s" % (name, str(py_type)[1:-1])
                        assert py_type in TYPE_MAP, msg
                        ogrtypes[name] = TYPE_MAP[py_type]

            else:
                msg = 'Input parameter "data" was specified ' "but appears to be empty"
                raise Exception(msg)

            # Create attribute fields in layer
            store_attributes = True
            for name in fields:
                fd = ogr.FieldDefn(name, ogrtypes[name])
                # FIXME (Ole): Trying to address issue #16
                #              But it doesn't work and
                #              somehow changes the values of MMI in test
                # width = max(128, len(name))
                # print name, width
                # fd.SetWidth(width)

                # Silent handling of warnings like
                # Warning 6: Normalized/laundered field name:
                #'CONTENTS_LOSS_AUD' to 'CONTENTS_L'
                gdal.PushErrorHandler("CPLQuietErrorHandler")
                if lyr.CreateField(fd) != 0:
                    msg = "Could not create field %s" % name
                    raise Exception(msg)

                # Restore error handler
                gdal.PopErrorHandler()

        # Store geometry
        geom = ogr.Geometry(self.geometry_type)
        layer_def = lyr.GetLayerDefn()
        for i in range(N):
            # Create new feature instance
            feature = ogr.Feature(layer_def)

            # Store geometry and check
            if self.geometry_type == ogr.wkbPoint:
                x = float(geometry[i][0])
                y = float(geometry[i][1])
                geom.SetPoint_2D(0, x, y)
            elif self.geometry_type == ogr.wkbPolygon:
                wkt = array2wkt(geometry[i], geom_type="POLYGON")
                geom = ogr.CreateGeometryFromWkt(wkt)
            else:
                msg = "Geometry type %s not implemented" % self.geometry_type
                raise Exception(msg)

            feature.SetGeometry(geom)

            G = feature.GetGeometryRef()
            if G is None:
                msg = "Could not create GeometryRef for file %s" % filename
                raise Exception(msg)

            # Store attributes
            if store_attributes:
                for j, name in enumerate(fields):
                    actual_field_name = layer_def.GetFieldDefn(j).GetNameRef()

                    val = data[i][name]
                    if type(val) == numpy.ndarray:
                        # A singleton of type <type 'numpy.ndarray'> works
                        # for gdal version 1.6 but fails for version 1.8
                        # in SetField with error: NotImplementedError:
                        # Wrong number of arguments for overloaded function
                        val = float(val)

                    feature.SetField(actual_field_name, val)

            # Save this feature
            if lyr.CreateFeature(feature) != 0:
                msg = "Failed to create feature %i in file %s" % (i, filename)
                raise Exception(msg)

            feature.Destroy()

        # Write keywords if any
        write_keywords(self.keywords, basename + ".keywords")

    def get_attribute_names(self):
        """ Get available attribute names

        These are the ones that can be used with get_data
        """

        return self.data[0].keys()

    def get_data(self, attribute=None, index=None):
        """Get vector attributes

        Data is returned as a list where each entry is a dictionary of
        attributes for one feature. Entries in get_geometry() and
        get_data() are related as 1-to-1

        If optional argument attribute is specified and a valid name,
        then the list of values for that attribute is returned.

        If optional argument index is specified on the that value will
        be returned. Any value of index is ignored if attribute is None.
        """

        if hasattr(self, "data"):
            if attribute is None:
                return self.data
            else:
                msg = (
                    "Specified attribute %s does not exist in "
                    "vector layer %s. Valid names are %s"
                    "" % (attribute, self, self.data[0].keys())
                )
                assert attribute in self.data[0], msg

                if index is None:
                    # Return all values for specified attribute
                    return [x[attribute] for x in self.data]
                else:
                    # Return value for specified attribute and index
                    msg = "Specified index must be either None or " "an integer. I got %s" % index
                    assert type(index) == type(0)

                    msg = (
                        "Specified index must lie within the bounds "
                        "of vector layer %s which is [%i, %i]"
                        "" % (self, 0, len(self) - 1)
                    )
                    assert 0 <= index < len(self)

                    return self.data[index][attribute]
        else:
            msg = "Vector data instance does not have any attributes"
            raise Exception(msg)

    def get_geometry(self):
        """Return geometry for vector layer.

        Depending on the feature type, geometry is

        geometry type     output type
        -----------------------------
        point             coordinates (Nx2 array of longitudes and latitudes)
        line              TODO
        polygon           list of arrays of coordinates

        """
        return self.geometry

    def get_projection(self, proj4=False):
        """Return projection of this layer as a string
        """
        return self.projection.get_projection(proj4)

    def get_bounding_box(self):
        """Get bounding box coordinates for vector layer.

        Format is [West, South, East, North]
        """
        e = self.extent
        return [e[0], e[2], e[1], e[3]]  # West  # South  # East  # North

    def get_extrema(self, attribute=None):
        """Get min and max values from specified attribute

        Return min, max
        """
        if attribute is None:
            msg = "Valid attribute name must be specified in get_extrema " "for vector layers. I got None."
            raise RuntimeError(msg)

        x = self.get_data(attribute)
        return min(x), max(x)

    def get_topN(self, attribute, N=10):
        """Get top N features

        Input
            attribute: The name of attribute where values are sought
            N: How many

        Output
            layer: New vector layer with selected features
        """

        # FIXME (Ole): Maybe generalise this to arbitrary expressions

        # Input checks
        msg = "Specfied attribute must be a string. " "I got %s" % (type(attribute))
        assert isinstance(attribute, basestring), msg

        msg = "Specified attribute was empty"
        assert attribute != "", msg

        msg = "N must be a positive number. I got %i" % N
        assert N > 0, msg

        # Create list of values for specified attribute
        values = self.get_data(attribute)

        # Sort and select using Schwarzian transform
        A = zip(values, self.data, self.geometry)
        A.sort()

        # Pick top N and unpack
        _, data, geometry = zip(*A[-N:])

        # Create new Vector instance and return
        return Vector(data=data, projection=self.get_projection(), geometry=geometry)

    def interpolate(self, X, name=None):
        """Interpolate values of this vector layer to other layer

        Input
            X: Layer object defining target
            name: Optional name of interpolated layer

        Output
            Y: Layer object with values of this vector layer interpolated to
               geometry of input layer X
        """

        msg = "Interpolation from vector layers not yet implemented"
        raise Exception(msg)

    @property
    def is_raster(self):
        return False

    @property
    def is_vector(self):
        return True

    @property
    def is_point_data(self):
        return self.is_vector and self.geometry_type == ogr.wkbPoint

    @property
    def is_polygon_data(self):
        return self.is_vector and self.geometry_type == ogr.wkbPolygon
Example #10
0
class Raster:
    """Internal representation of raster data
    """

    def __init__(self, data=None, projection=None, geotransform=None,
                 name='Raster layer', caption=''):
        """Initialise object with either data or filename

        Input
            data: Can be either
                * a filename of a raster file format known to GDAL
                * an MxN array of raster data
                * None
            projection: Geospatial reference in WKT format.
                        Only used if data is provide as a numeric array,
            geotransform: GDAL geotransform (6-tuple).
                          (top left x, w-e pixel resolution, rotation,
                           top left y, rotation, n-s pixel resolution).
                          See e.g. http://www.gdal.org/gdal_tutorial.html
                          Only used if data is provide as a numeric array,
            name: Optional name for layer.
                  Only used if data is provide as a numeric array,
            caption: Optional text field that describes the layer. This field
                     can for example be used to display text about the layer
                     in a web application.
        """

        self.caption = caption
        if data is None:
            # Instantiate empty object
            self.name = name
            self.data = None
            self.projection = None
            self.coordinates = None
            self.filename = None
            return

        if isinstance(data, basestring):
            self.read_from_file(data)
        else:
            # Assume that data is provided as an array
            # with extra keyword arguments supplying metadata

            self.data = numpy.array(data, dtype='d', copy=False)

            self.filename = None
            self.name = name

            self.projection = Projection(projection)
            self.geotransform = geotransform

            self.rows = data.shape[0]
            self.columns = data.shape[1]

            self.number_of_bands = 1

    def __str__(self):
        return self.name

    def __len__(self):
        """Size of data set defined as total number of grid points
        """
        return len(self.get_data().flat)

    def __eq__(self, other, rtol=1.0e-5, atol=1.0e-8):
        """Override '==' to allow comparison with other raster objecs

        Input
           other: Raster instance to compare to
           rtol, atol: Relative and absolute tolerance.
                       See numpy.allclose for details
        """

        # Check type
        if not isinstance(other, Raster):
            msg = ('Raster instance cannot be compared to %s'
                   ' as its type is %s ' % (str(other), type(other)))
            raise TypeError(msg)

        # Check projection
        if self.projection != other.projection:
            return False

        # Check geotransform
        if self.get_geotransform() != other.get_geotransform():
            return False

        # Check data
        if not numpy.allclose(self.get_data(),
                              other.get_data(),
                              rtol=rtol, atol=atol):
            return False

        # Raster layers are identical up to the specified tolerance
        return True

    def __ne__(self, other):
        """Override '!=' to allow comparison with other projection objecs
        """
        return not self == other

    def get_name(self):
        return self.name

    def get_caption(self):
        return self.caption

    def read_from_file(self, filename):

        # Open data file for reading
        # File must be kept open, otherwise GDAL methods segfault.
        fid = self.fid = gdal.Open(filename, gdal.GA_ReadOnly)
        if fid is None:
            msg = 'Could not open file %s' % filename
            raise Exception(msg)

        # Record raster metadata from file
        basename, ext = os.path.splitext(filename)

        # If file is ASCII, check that projection is around.
        # GDAL does not check this nicely, so it is worth an
        # error message
        if ext == '.asc':
            try:
                open(basename + '.prj')
            except IOError:
                msg = ('Projection file not found for %s. You must supply '
                       'a projection file with extension .prj' % filename)
                raise RuntimeError(msg)

        # Always use basename without leading directories as name
        rastername = os.path.split(basename)[-1]

        self.filename = filename
        self.name = rastername

        self.projection = Projection(self.fid.GetProjection())
        self.geotransform = self.fid.GetGeoTransform()
        self.columns = fid.RasterXSize
        self.rows = fid.RasterYSize
        self.number_of_bands = fid.RasterCount

        # Assume that file contains all data in one band
        msg = 'Only one raster band currently allowed'
        if self.number_of_bands > 1:
            msg = ('WARNING: Number of bands in %s are %i. '
                   'Only the first band will currently be '
                   'used.' % (filename, self.number_of_bands))
            # FIXME(Ole): Let us use python warnings here
            raise Exception(msg)

        # Get first band.
        band = self.band = fid.GetRasterBand(1)
        if band is None:
            msg = 'Could not read raster band from %s' % filename
            raise Exception(msg)

    def write_to_file(self, filename):
        """Save raster data to file

        Input
            filename: filename with extension .tif
        """

        # Check file format
        _, extension = os.path.splitext(filename)

        msg = ('Invalid file type for file %s. Only extension '
               'tif allowed.' % filename)
        assert extension == '.tif', msg
        format = DRIVER_MAP[extension]

        # Get raster data
        A = self.get_data()

        # Get Dimensions. Note numpy and Gdal swap order
        N, M = A.shape

        # Create empty file
        driver = gdal.GetDriverByName(format)
        fid = driver.Create(filename, M, N, 1, gdal.GDT_Float64)
        if fid is None:
            msg = ('Gdal could not create filename %s using '
                   'format %s' % (filename, format))
            raise Exception(msg)

        # Write metada
        fid.SetProjection(str(self.projection))
        fid.SetGeoTransform(self.geotransform)

        # Write data
        fid.GetRasterBand(1).WriteArray(A)

    def interpolate(self, X, name=None):
        """Interpolate values of this raster layer to other layer

        Input
            X: Layer object defining target
            name: Optional name of interpolated layer.
                  If name is None, the name of self is used.

        Output
            Y: Layer object with values of this raster layer interpolated to
               geometry of input layer X
        """

        if X.is_raster:
            if self.get_geotransform() != X.get_geotransform():
                # Need interpolation between grids
                msg = 'Intergrid interpolation not yet implemented'
                raise Exception(msg)
            else:
                # Rasters are aligned, no need to interpolate
                return self
        else:
            # Interpolate this raster layer to geometry of X
            return interpolate_raster_vector(self, X, name)

    def get_data(self, nan=False):
        """Get raster data as numeric array
        If keyword nan is True, nodata values will be replaced with NaN
        If keyword nan has a numeric value, that will be used for NODATA
        """

        # FIXME (Ole): Once we have the ability to use numpy.nan throughout,
        #              make that the default and name everything better

        if hasattr(self, 'data'):
            A = self.data
            assert A.shape[0] == self.rows and A.shape[1] == self.columns
        else:
            # Read from raster file
            A = self.band.ReadAsArray()

            M, N = A.shape
            msg = ('Dimensions of raster array do not match those of '
                   'raster file %s' % self.filename)
            assert M == self.rows, msg
            assert N == self.columns, msg

        if nan is False:
            pass
        else:
            if nan is True:
                NAN = numpy.nan
            else:
                NAN = nan

            # Replace NODATA_VALUE with NaN
            nodata = self.get_nodata_value()

            NaN = numpy.ones(A.shape, A.dtype) * NAN
            A = numpy.where(A == nodata, NaN, A)

        return A

    def get_projection(self, proj4=False):
        """Return projection of this layer as a string.
        """
        return self.projection.get_projection(proj4)

    def get_geotransform(self):
        """Return geotransform for this raster layer

        Output
        geotransform: 6 digit vector
                      (top left x, w-e pixel resolution, rotation,
                       top left y, rotation, n-s pixel resolution).

                       See e.g. http://www.gdal.org/gdal_tutorial.html
        """

        return self.geotransform

    def get_geometry(self):
        """Return longitudes and latitudes (the axes) for grid.

        Return two vectors (longitudes and latitudes) corresponding to
        grid. The values are offset by half a pixel size to correspond to
        pixel registration.

        I.e. If the grid origin (top left corner) is (105, 10) and the
        resolution is 1 degrees in each direction, then the vectors will
        take the form

        longitudes = [100.5, 101.5, ..., 109.5]
        latitudes = [0.5, 1.5, ..., 9.5]
        """

        # Get parameters for axes
        g = self.get_geotransform()

        lon_ul = g[0]  # Longitude of upper left corner
        lat_ul = g[3]  # Latitude of upper left corner
        dx = g[1]      # Longitudinal resolution
        dy = - g[5]    # Latitudinal resolution (always(?) negative)
        nx = self.columns
        ny = self.rows

        assert dx > 0
        assert dy > 0

        # Coordinates of lower left corner
        lon_ll = lon_ul
        lat_ll = lat_ul - ny * dy

        # Coordinates of upper right corner
        lon_ur = lon_ul + nx * dx

        # Define pixel centers along each directions
        dy2 = dy / 2
        dx2 = dx / 2

        # Define longitudes and latitudes for each axes
        x = numpy.linspace(lon_ll + dx2,
                           lon_ur - dx2, nx)
        y = numpy.linspace(lat_ll + dy2,
                           lat_ul - dy2, ny)

        # Return
        return x, y

    def __mul__(self, other):
        return self.get_data() * other.get_data()

    def __add__(self, other):
        return self.get_data() + other.get_data()

    def get_extrema(self):
        """Get min and max from raster
        If raster has a nominated no_data value, this is ignored.

        Return min, max
        """

        A = self.get_data(nan=True)
        min = numpy.nanmin(A.flat[:])
        max = numpy.nanmax(A.flat[:])

        return min, max

    def get_nodata_value(self):
        """Get the internal representation of NODATA

        If the internal value is None, the standard -9999 is assumed
        """

        nodata = self.band.GetNoDataValue()

        # Use common default in case nodata was not registered in raster file
        if nodata is None:
            nodata = -9999

        return nodata

    def get_bins(self, N=10, quantiles=False):
        """Get N values between the min and the max occurred in this dataset.

        Return sorted list of length N+1 where the first element is min and
        the last is max. Intermediate values depend on the keyword quantiles:
        If quantiles is True, they represent boundaries between quantiles.
        If quantiles is False, they represent equidistant interval boundaries.
        """

        min, max = self.get_extrema()

        levels = []
        if quantiles is False:
            # Linear intervals
            d = (max - min) / N

            for i in range(N):
                levels.append(min + i * d)
        else:
            # Quantiles
            # FIXME (Ole): Not 100% sure about this algorithm,
            # but it is close enough

            A = self.get_data(nan=True).flat[:]

            mask = numpy.logical_not(numpy.isnan(A))  # Omit NaN's
            A = A.compress(mask)

            A.sort()

            assert len(A) == A.shape[0]

            d = float(len(A) + 0.5) / N
            for i in range(N):
                levels.append(A[int(i * d)])

        levels.append(max)

        return levels

    def get_bounding_box(self):
        """Get bounding box coordinates for raster layer

        Format is [West, South, East, North]
        """

        geotransform = self.geotransform

        x_origin = geotransform[0]  # top left x
        y_origin = geotransform[3]  # top left y
        x_res = geotransform[1]     # w-e pixel resolution
        y_res = geotransform[5]     # n-s pixel resolution
        x_pix = self.columns
        y_pix = self.rows

        minx = x_origin
        maxx = x_origin + (x_pix * x_res)
        miny = y_origin + (y_pix * y_res)
        maxy = y_origin

        return [minx, miny, maxx, maxy]

    @property
    def is_raster(self):
        return True

    @property
    def is_vector(self):
        return False
Example #11
0
    def __init__(self, data=None, projection=None, geotransform=None, name="Raster layer", keywords=None):
        """Initialise object with either data or filename

        Input
            data: Can be either
                * a filename of a raster file format known to GDAL
                * an MxN array of raster data
                * None (FIXME (Ole): Remove this option)
            projection: Geospatial reference in WKT format.
                        Only used if data is provide as a numeric array,
            geotransform: GDAL geotransform (6-tuple).
                          (top left x, w-e pixel resolution, rotation,
                           top left y, rotation, n-s pixel resolution).
                          See e.g. http://www.gdal.org/gdal_tutorial.html
                          Only used if data is provide as a numeric array,
            name: Optional name for layer.
                  Only used if data is provide as a numeric array,
            keywords: Optional dictionary with keywords that describe the
                      layer. When the layer is stored, these keywords will
                      be written into an associated file with extension
                      .keywords.

                      Keywords can for example be used to display text
                      about the layer in a web application.

        Note that if data is a filename, all other arguments are ignored
        as they will be inferred from the file.
        """

        # Input checks
        if data is None:
            # Instantiate empty object
            self.name = name
            self.data = None
            self.projection = None
            self.coordinates = None
            self.filename = None
            self.keywords = {}
            return

        # Initialisation
        if isinstance(data, basestring):
            self.read_from_file(data)
        else:
            # Assume that data is provided as an array
            # with extra keyword arguments supplying metadata
            if keywords is None:
                self.keywords = {}
            else:
                msg = "Specified keywords must be either None or a " "dictionary. I got %s" % keywords
                assert isinstance(keywords, dict), msg
                self.keywords = keywords

            self.data = numpy.array(data, dtype="d", copy=False)

            self.filename = None
            self.name = name

            self.projection = Projection(projection)
            self.geotransform = geotransform

            self.rows = data.shape[0]
            self.columns = data.shape[1]

            self.number_of_bands = 1
Example #12
0
def check_data_integrity(layer_files):
    """Read list of layer files and verify that that they have the same
    projection and georeferencing.
    """

    # Set default values for projection and geotransform.
    # Enforce DEFAULT (WGS84).
    # Choosing 'None' will use value of first layer.
    reference_projection = Projection(DEFAULT_PROJECTION)
    geotransform = None
    coordinates = None

    for layer in layer_files:

        # Ensure that projection is consistent across all layers
        if reference_projection is None:
            reference_projection = layer.projection
        else:
            msg = ('Projections in input layer %s is not as expected:\n'
                   'projection: %s\n'
                   'default:    %s'
                   '' % (layer, layer.projection, reference_projection))
            assert reference_projection == layer.projection, msg

        # Ensure that geotransform and dimensions is consistent across
        # all *raster* layers
        if layer.is_raster:
            if geotransform is None:
                geotransform = layer.get_geotransform()
            else:
                msg = ('Geotransforms in input raster layers are different: '
                       '%s %s' % (geotransform, layer.get_geotransform()))
                # FIXME (Ole): Use high tolerance until we find out
                # why geoserver changes resolution.
                assert numpy.allclose(geotransform,
                                      layer.get_geotransform(),
                                      rtol=1.0e-1), msg

        # In either case of vector layers, we check that the coordinates
        # are the same
        if layer.is_vector:
            if coordinates is None:
                coordinates = layer.get_geometry()
            else:
                msg = ('Coordinates in input vector layers are different: '
                       '%s %s' % (coordinates, layer.get_geometry()))
                assert numpy.allclose(coordinates, layer.get_geometry()), msg

            msg = ('There are no data points to interpolate to. '
                   'Perhaps zoom out or pan to the study area '
                   'and try again')
            assert len(layer) > 0, msg

    # Check that arrays are aligned.
    #
    # We have observerd Geoserver resolution changes - see ticket:102
    # https://github.com/AIFDR/riab/issues/102
    #
    # However, both rasters are now downloaded with exactly the same
    # parameters since we have made bbox and resolution variable in ticket:103
    # https://github.com/AIFDR/riab/issues/103
    #
    # So if they are still not aligned, we raise an Exception

    # First find the minimum dimensions
    M = N = sys.maxint
    refname = ''
    for layer in layer_files:
        if layer.is_raster:
            if layer.rows < M:
                refname = layer.get_name()
                M = layer.rows
            if layer.columns < N:
                refname = layer.get_name()
                N = layer.columns

    # Then check for alignment
    for layer in layer_files:
        if layer.is_raster:
            data = layer.get_data()

            msg = ('Rasters are not aligned!\n'
                   'Raster %s has %i rows but raster %s has %i rows\n'
                   'Refer to issue #102' %
                   (layer.get_name(), layer.rows, refname, M))
            assert layer.rows == M, msg

            msg = ('Rasters are not aligned!\n'
                   'Raster %s has %i columns but raster %s has %i columns\n'
                   'Refer to issue #102' %
                   (layer.get_name(), layer.columns, refname, N))
            assert layer.columns == N, msg
Example #13
0
    def __init__(self,
                 data=None,
                 projection=None,
                 geometry=None,
                 name='Vector layer',
                 keywords=None):
        """Initialise object with either geometry or filename

        Input
            data: Can be either
                * a filename of a vector file format known to GDAL
                * List of dictionaries of fields associated with
                  point coordinates
                * None
            projection: Geospatial reference in WKT format.
                        Only used if geometry is provide as a numeric array,
            geometry: A list of either point coordinates or polygons
            name: Optional name for layer.
                  Only used if geometry is provide as a numeric array
            keywords: Optional dictionary with keywords that describe the
                      layer. When the layer is stored, these keywords will
                      be written into an associated file with extension
                      .keywords.

                      Keywords can for example be used to display text
                      about the layer in a web application.

        Note that if data is a filename, all other arguments are ignored
        as they will be inferred from the file.

        The geometry type will be inferred from the dimensions of geometry.
        If each entry is one set of coordinates the type will be ogr.wkbPoint,
        if it is an array of coordinates the type will be ogr.wkbPolygon.
        """

        if data is None and projection is None and geometry is None:
            # Instantiate empty object
            self.name = name
            self.projection = None
            self.geometry = None
            self.geometry_type = None
            self.filename = None
            self.data = None
            self.extent = None
            self.keywords = {}
            return

        if isinstance(data, basestring):
            self.read_from_file(data)
        else:
            # Assume that data is provided as sequences provided as
            # arguments to the Vector constructor
            # with extra keyword arguments supplying metadata

            self.name = name
            self.filename = None

            if keywords is None:
                self.keywords = {}
            else:
                msg = ('Specified keywords must be either None or a '
                       'dictionary. I got %s' % keywords)
                assert isinstance(keywords, dict), msg
                self.keywords = keywords

            msg = 'Geometry must be specified'
            assert geometry is not None, msg

            msg = 'Geometry must be a sequence'
            assert is_sequence(geometry), msg
            self.geometry = geometry

            self.geometry_type = get_geometry_type(geometry)

            msg = 'Projection must be specified'
            assert projection is not None, msg
            self.projection = Projection(projection)

            self.data = data
            if data is not None:
                msg = 'Data must be a sequence'
                assert is_sequence(data), msg

                msg = ('The number of entries in geometry and data '
                       'must be the same')
                assert len(geometry) == len(data), msg
Example #14
0
    def read_from_file(self, filename):
        """ Read and unpack vector data.

        It is assumed that the file contains only one layer with the
        pertinent features. Further it is assumed for the moment that
        all geometries are points.

        * A feature is a geometry and a set of attributes.
        * A geometry refers to location and can be point, line, polygon or
          combinations thereof.
        * The attributes or obtained through GetField()

        The full OGR architecture is documented at
        * http://www.gdal.org/ogr/ogr_arch.html
        * http://www.gdal.org/ogr/ogr_apitut.html

        Examples are at
        * danieljlewis.org/files/2010/09/basicpythonmap.pdf
        * http://invisibleroads.com/tutorials/gdal-shapefile-points-save.html
        * http://www.packtpub.com/article/geospatial-data-python-geometry
        """

        basename, _ = os.path.splitext(filename)

        # Look for any keywords
        self.keywords = read_keywords(basename + '.keywords')

        # Determine name
        if 'title' in self.keywords:
            vectorname = self.keywords['title']
        else:
            # Use basename without leading directories as name
            vectorname = os.path.split(basename)[-1]

        self.name = vectorname
        self.filename = filename
        self.geometry_type = None  # In case there are no features

        fid = ogr.Open(filename)
        if fid is None:
            msg = 'Could not open %s' % filename
            raise IOError(msg)

        # Assume that file contains all data in one layer
        msg = 'Only one vector layer currently allowed'
        if fid.GetLayerCount() > 1:
            msg = ('WARNING: Number of layers in %s are %i. '
                   'Only the first layer will currently be '
                   'used.' % (filename, fid.GetLayerCount()))
            raise Exception(msg)

        layer = fid.GetLayerByIndex(0)

        # Get spatial extent
        self.extent = layer.GetExtent()

        # Get projection
        p = layer.GetSpatialRef()
        self.projection = Projection(p)

        # Get number of features
        N = layer.GetFeatureCount()

        # Extract coordinates and attributes for all features
        geometry = []
        data = []
        for i in range(N):
            feature = layer.GetFeature(i)
            if feature is None:
                msg = 'Could not get feature %i from %s' % (i, filename)
                raise Exception(msg)

            # Record coordinates ordered as Longitude, Latitude
            G = feature.GetGeometryRef()
            if G is None:
                msg = ('Geometry was None in filename %s ' % filename)
                raise Exception(msg)
            else:
                self.geometry_type = G.GetGeometryType()
                if self.geometry_type == ogr.wkbPoint:
                    geometry.append((G.GetX(), G.GetY()))
                elif self.geometry_type == ogr.wkbPolygon:
                    ring = G.GetGeometryRef(0)
                    M = ring.GetPointCount()
                    coordinates = []
                    for j in range(M):
                        coordinates.append((ring.GetX(j), ring.GetY(j)))

                    # Record entire polygon ring as an Mx2 numpy array
                    geometry.append(
                        numpy.array(coordinates, dtype='d', copy=False))
                else:
                    msg = ('Only point and polygon geometries are supported. '
                           'Geometry in filename %s '
                           'was %s.' % (filename, G.GetGeometryType()))
                    raise Exception(msg)

            # Record attributes by name
            number_of_fields = feature.GetFieldCount()
            fields = {}
            for j in range(number_of_fields):
                name = feature.GetFieldDefnRef(j).GetName()

                # FIXME (Ole): Ascertain the type of each field?
                #              We need to cast each appropriately?
                #              This is issue #66
                #feature_type = feature.GetFieldDefnRef(j).GetType()
                fields[name] = feature.GetField(j)
                #print 'Field', name, feature_type, j, fields[name]

            data.append(fields)

        # Store geometry coordinates as a compact numeric array
        self.geometry = geometry
        self.data = data
Example #15
0
class Vector:
    """Class for abstraction of vector data
    """
    def __init__(self,
                 data=None,
                 projection=None,
                 geometry=None,
                 name='Vector layer',
                 keywords=None):
        """Initialise object with either geometry or filename

        Input
            data: Can be either
                * a filename of a vector file format known to GDAL
                * List of dictionaries of fields associated with
                  point coordinates
                * None
            projection: Geospatial reference in WKT format.
                        Only used if geometry is provide as a numeric array,
            geometry: A list of either point coordinates or polygons
            name: Optional name for layer.
                  Only used if geometry is provide as a numeric array
            keywords: Optional dictionary with keywords that describe the
                      layer. When the layer is stored, these keywords will
                      be written into an associated file with extension
                      .keywords.

                      Keywords can for example be used to display text
                      about the layer in a web application.

        Note that if data is a filename, all other arguments are ignored
        as they will be inferred from the file.

        The geometry type will be inferred from the dimensions of geometry.
        If each entry is one set of coordinates the type will be ogr.wkbPoint,
        if it is an array of coordinates the type will be ogr.wkbPolygon.
        """

        if data is None and projection is None and geometry is None:
            # Instantiate empty object
            self.name = name
            self.projection = None
            self.geometry = None
            self.geometry_type = None
            self.filename = None
            self.data = None
            self.extent = None
            self.keywords = {}
            return

        if isinstance(data, basestring):
            self.read_from_file(data)
        else:
            # Assume that data is provided as sequences provided as
            # arguments to the Vector constructor
            # with extra keyword arguments supplying metadata

            self.name = name
            self.filename = None

            if keywords is None:
                self.keywords = {}
            else:
                msg = ('Specified keywords must be either None or a '
                       'dictionary. I got %s' % keywords)
                assert isinstance(keywords, dict), msg
                self.keywords = keywords

            msg = 'Geometry must be specified'
            assert geometry is not None, msg

            msg = 'Geometry must be a sequence'
            assert is_sequence(geometry), msg
            self.geometry = geometry

            self.geometry_type = get_geometry_type(geometry)

            msg = 'Projection must be specified'
            assert projection is not None, msg
            self.projection = Projection(projection)

            self.data = data
            if data is not None:
                msg = 'Data must be a sequence'
                assert is_sequence(data), msg

                msg = ('The number of entries in geometry and data '
                       'must be the same')
                assert len(geometry) == len(data), msg

            # FIXME: Need to establish extent here

    def __str__(self):

        g_type_str = geometrytype2string(self.geometry_type)
        return ('Vector data set: %s, %i features, geometry type '
                '%s (%s)' %
                (self.name, len(self), str(self.geometry_type), g_type_str))

    def __len__(self):
        """Size of vector layer defined as number of features
        """

        return len(self.geometry)

    def __eq__(self, other, rtol=1.0e-5, atol=1.0e-8):
        """Override '==' to allow comparison with other vector objecs

        Input
           other: Vector instance to compare to
           rtol, atol: Relative and absolute tolerance.
                       See numpy.allclose for details
        """

        # Check type
        if not isinstance(other, Vector):
            msg = ('Vector instance cannot be compared to %s'
                   ' as its type is %s ' % (str(other), type(other)))
            raise TypeError(msg)

        # Check projection
        if self.projection != other.projection:
            return False

        # Check geometry
        if not numpy.allclose(
                self.get_geometry(), other.get_geometry(), rtol=rtol,
                atol=atol):
            return False

        # Check keys
        x = self.get_data()
        y = other.get_data()

        for key in x[0]:
            for i in range(len(y)):
                if key not in y[i]:
                    return False

        for key in y[0]:
            for i in range(len(x)):
                if key not in x[i]:
                    return False

        # Check data
        for i, a in enumerate(x):
            for key in a:
                if a[key] != y[i][key]:
                    # Not equal, try numerical comparison with tolerances

                    if not numpy.allclose(
                            a[key], y[i][key], rtol=rtol, atol=atol):
                        return False

        # Check keywords
        if self.keywords != other.keywords:
            return False

        # Vector layers are identical up to the specified tolerance
        return True

    def __ne__(self, other):
        """Override '!=' to allow comparison with other projection objecs
        """
        return not self == other

    def get_name(self):
        return self.name

    def get_keywords(self, key=None):
        """Return keywords dictionary
        """
        if key is None:
            return self.keywords
        else:
            if key in self.keywords:
                return self.keywords[key]
            else:
                msg = ('Keyword %s does not exist in %s: Options are '
                       '%s' % (key, self.get_name(), self.keywords.keys()))
                raise Exception(msg)

    def get_caption(self):
        """Return 'caption' keyword if present. Otherwise ''.
        """
        if 'caption' in self.keywords:
            return self.keywords['caption']
        else:
            return ''

    def read_from_file(self, filename):
        """ Read and unpack vector data.

        It is assumed that the file contains only one layer with the
        pertinent features. Further it is assumed for the moment that
        all geometries are points.

        * A feature is a geometry and a set of attributes.
        * A geometry refers to location and can be point, line, polygon or
          combinations thereof.
        * The attributes or obtained through GetField()

        The full OGR architecture is documented at
        * http://www.gdal.org/ogr/ogr_arch.html
        * http://www.gdal.org/ogr/ogr_apitut.html

        Examples are at
        * danieljlewis.org/files/2010/09/basicpythonmap.pdf
        * http://invisibleroads.com/tutorials/gdal-shapefile-points-save.html
        * http://www.packtpub.com/article/geospatial-data-python-geometry
        """

        basename, _ = os.path.splitext(filename)

        # Look for any keywords
        self.keywords = read_keywords(basename + '.keywords')

        # Determine name
        if 'title' in self.keywords:
            vectorname = self.keywords['title']
        else:
            # Use basename without leading directories as name
            vectorname = os.path.split(basename)[-1]

        self.name = vectorname
        self.filename = filename
        self.geometry_type = None  # In case there are no features

        fid = ogr.Open(filename)
        if fid is None:
            msg = 'Could not open %s' % filename
            raise IOError(msg)

        # Assume that file contains all data in one layer
        msg = 'Only one vector layer currently allowed'
        if fid.GetLayerCount() > 1:
            msg = ('WARNING: Number of layers in %s are %i. '
                   'Only the first layer will currently be '
                   'used.' % (filename, fid.GetLayerCount()))
            raise Exception(msg)

        layer = fid.GetLayerByIndex(0)

        # Get spatial extent
        self.extent = layer.GetExtent()

        # Get projection
        p = layer.GetSpatialRef()
        self.projection = Projection(p)

        # Get number of features
        N = layer.GetFeatureCount()

        # Extract coordinates and attributes for all features
        geometry = []
        data = []
        for i in range(N):
            feature = layer.GetFeature(i)
            if feature is None:
                msg = 'Could not get feature %i from %s' % (i, filename)
                raise Exception(msg)

            # Record coordinates ordered as Longitude, Latitude
            G = feature.GetGeometryRef()
            if G is None:
                msg = ('Geometry was None in filename %s ' % filename)
                raise Exception(msg)
            else:
                self.geometry_type = G.GetGeometryType()
                if self.geometry_type == ogr.wkbPoint:
                    geometry.append((G.GetX(), G.GetY()))
                elif self.geometry_type == ogr.wkbPolygon:
                    ring = G.GetGeometryRef(0)
                    M = ring.GetPointCount()
                    coordinates = []
                    for j in range(M):
                        coordinates.append((ring.GetX(j), ring.GetY(j)))

                    # Record entire polygon ring as an Mx2 numpy array
                    geometry.append(
                        numpy.array(coordinates, dtype='d', copy=False))
                else:
                    msg = ('Only point and polygon geometries are supported. '
                           'Geometry in filename %s '
                           'was %s.' % (filename, G.GetGeometryType()))
                    raise Exception(msg)

            # Record attributes by name
            number_of_fields = feature.GetFieldCount()
            fields = {}
            for j in range(number_of_fields):
                name = feature.GetFieldDefnRef(j).GetName()

                # FIXME (Ole): Ascertain the type of each field?
                #              We need to cast each appropriately?
                #              This is issue #66
                #feature_type = feature.GetFieldDefnRef(j).GetType()
                fields[name] = feature.GetField(j)
                #print 'Field', name, feature_type, j, fields[name]

            data.append(fields)

        # Store geometry coordinates as a compact numeric array
        self.geometry = geometry
        self.data = data

    def write_to_file(self, filename):
        """Save vector data to file

        Input
            filename: filename with extension .shp or .gml

        Note, if attribute names are longer than 10 characters they will be
        truncated. This is due to limitations in the shp file driver and has
        to be done here since gdal v1.7 onwards has changed its handling of
        this issue: http://www.gdal.org/ogr/drv_shapefile.html
        """

        # Check file format
        basename, extension = os.path.splitext(filename)

        msg = ('Invalid file type for file %s. Only extensions '
               'shp or gml allowed.' % filename)
        assert extension == '.shp' or extension == '.gml', msg
        driver = DRIVER_MAP[extension]

        # FIXME (Ole): Tempory flagging of GML issue (ticket #18)
        if extension == '.gml':
            msg = ('OGR GML driver does not store geospatial reference.'
                   'This format is disabled for the time being. See '
                   'https://github.com/AIFDR/riab/issues/18')
            raise Exception(msg)

        # Derive layername from filename (excluding preceding dirs)
        layername = os.path.split(basename)[-1]

        # Get vector data
        geometry = self.get_geometry()
        data = self.get_data()

        N = len(geometry)

        # Clear any previous file of this name (ogr does not overwrite)
        try:
            os.remove(filename)
        except:
            pass

        # Create new file with one layer
        drv = ogr.GetDriverByName(driver)
        if drv is None:
            msg = 'OGR driver %s not available' % driver
            raise Exception(msg)

        ds = drv.CreateDataSource(filename)
        if ds is None:
            msg = 'Creation of output file %s failed' % filename
            raise Exception(msg)

        lyr = ds.CreateLayer(layername, self.projection.spatial_reference,
                             self.geometry_type)
        if lyr is None:
            msg = 'Could not create layer %s' % layername
            raise Exception(msg)

        # Define attributes if any
        store_attributes = False
        if data is not None:
            if len(data) > 0:
                try:
                    fields = data[0].keys()
                except:
                    msg = ('Input parameter "attributes" was specified '
                           'but it does not contain dictionaries with '
                           'field information as expected. The first'
                           'element is %s' % data[0])
                    raise Exception(msg)
                else:
                    # Establish OGR types for each element
                    ogrtypes = {}
                    for name in fields:
                        att = data[0][name]
                        py_type = type(att)
                        msg = ('Unknown type for storing vector '
                               'data: %s, %s' % (name, str(py_type)[1:-1]))
                        assert py_type in TYPE_MAP, msg
                        ogrtypes[name] = TYPE_MAP[py_type]

            else:
                msg = ('Input parameter "data" was specified '
                       'but appears to be empty')
                raise Exception(msg)

            # Create attribute fields in layer
            store_attributes = True
            for name in fields:
                fd = ogr.FieldDefn(name, ogrtypes[name])
                # FIXME (Ole): Trying to address issue #16
                #              But it doesn't work and
                #              somehow changes the values of MMI in test
                #width = max(128, len(name))
                #print name, width
                #fd.SetWidth(width)

                # Silent handling of warnings like
                # Warning 6: Normalized/laundered field name:
                #'CONTENTS_LOSS_AUD' to 'CONTENTS_L'
                gdal.PushErrorHandler('CPLQuietErrorHandler')
                if lyr.CreateField(fd) != 0:
                    msg = 'Could not create field %s' % name
                    raise Exception(msg)

                # Restore error handler
                gdal.PopErrorHandler()

        # Store geometry
        geom = ogr.Geometry(self.geometry_type)
        layer_def = lyr.GetLayerDefn()
        for i in range(N):
            # Create new feature instance
            feature = ogr.Feature(layer_def)

            # Store geometry and check
            if self.geometry_type == ogr.wkbPoint:
                x = float(geometry[i][0])
                y = float(geometry[i][1])
                geom.SetPoint_2D(0, x, y)
            elif self.geometry_type == ogr.wkbPolygon:
                wkt = array2wkt(geometry[i], geom_type='POLYGON')
                geom = ogr.CreateGeometryFromWkt(wkt)
            else:
                msg = 'Geometry type %s not implemented' % self.geometry_type
                raise Exception(msg)

            feature.SetGeometry(geom)

            G = feature.GetGeometryRef()
            if G is None:
                msg = 'Could not create GeometryRef for file %s' % filename
                raise Exception(msg)

            # Store attributes
            if store_attributes:
                for j, name in enumerate(fields):
                    actual_field_name = layer_def.GetFieldDefn(j).GetNameRef()

                    val = data[i][name]
                    if type(val) == numpy.ndarray:
                        # A singleton of type <type 'numpy.ndarray'> works
                        # for gdal version 1.6 but fails for version 1.8
                        # in SetField with error: NotImplementedError:
                        # Wrong number of arguments for overloaded function
                        val = float(val)

                    feature.SetField(actual_field_name, val)

            # Save this feature
            if lyr.CreateFeature(feature) != 0:
                msg = 'Failed to create feature %i in file %s' % (i, filename)
                raise Exception(msg)

            feature.Destroy()

        # Write keywords if any
        write_keywords(self.keywords, basename + '.keywords')

    def get_attribute_names(self):
        """ Get available attribute names

        These are the ones that can be used with get_data
        """

        return self.data[0].keys()

    def get_data(self, attribute=None, index=None):
        """Get vector attributes

        Data is returned as a list where each entry is a dictionary of
        attributes for one feature. Entries in get_geometry() and
        get_data() are related as 1-to-1

        If optional argument attribute is specified and a valid name,
        then the list of values for that attribute is returned.

        If optional argument index is specified on the that value will
        be returned. Any value of index is ignored if attribute is None.
        """

        if hasattr(self, 'data'):
            if attribute is None:
                return self.data
            else:
                msg = ('Specified attribute %s does not exist in '
                       'vector layer %s. Valid names are %s'
                       '' % (attribute, self, self.data[0].keys()))
                assert attribute in self.data[0], msg

                if index is None:
                    # Return all values for specified attribute
                    return [x[attribute] for x in self.data]
                else:
                    # Return value for specified attribute and index
                    msg = ('Specified index must be either None or '
                           'an integer. I got %s' % index)
                    assert type(index) == type(0)

                    msg = ('Specified index must lie within the bounds '
                           'of vector layer %s which is [%i, %i]'
                           '' % (self, 0, len(self) - 1))
                    assert 0 <= index < len(self)

                    return self.data[index][attribute]
        else:
            msg = 'Vector data instance does not have any attributes'
            raise Exception(msg)

    def get_geometry(self):
        """Return geometry for vector layer.

        Depending on the feature type, geometry is

        geometry type     output type
        -----------------------------
        point             coordinates (Nx2 array of longitudes and latitudes)
        line              TODO
        polygon           list of arrays of coordinates

        """
        return self.geometry

    def get_projection(self, proj4=False):
        """Return projection of this layer as a string
        """
        return self.projection.get_projection(proj4)

    def get_bounding_box(self):
        """Get bounding box coordinates for vector layer.

        Format is [West, South, East, North]
        """
        e = self.extent
        return [
            e[0],  # West
            e[2],  # South
            e[1],  # East
            e[3]
        ]  # North

    def get_extrema(self, attribute=None):
        """Get min and max values from specified attribute

        Return min, max
        """
        if attribute is None:
            msg = ('Valid attribute name must be specified in get_extrema '
                   'for vector layers. I got None.')
            raise RuntimeError(msg)

        x = self.get_data(attribute)
        return min(x), max(x)

    def get_topN(self, attribute, N=10):
        """Get top N features

        Input
            attribute: The name of attribute where values are sought
            N: How many

        Output
            layer: New vector layer with selected features
        """

        # FIXME (Ole): Maybe generalise this to arbitrary expressions

        # Input checks
        msg = ('Specfied attribute must be a string. '
               'I got %s' % (type(attribute)))
        assert isinstance(attribute, basestring), msg

        msg = 'Specified attribute was empty'
        assert attribute != '', msg

        msg = 'N must be a positive number. I got %i' % N
        assert N > 0, msg

        # Create list of values for specified attribute
        values = self.get_data(attribute)

        # Sort and select using Schwarzian transform
        A = zip(values, self.data, self.geometry)
        A.sort()

        # Pick top N and unpack
        _, data, geometry = zip(*A[-N:])

        # Create new Vector instance and return
        return Vector(data=data,
                      projection=self.get_projection(),
                      geometry=geometry)

    def interpolate(self, X, name=None):
        """Interpolate values of this vector layer to other layer

        Input
            X: Layer object defining target
            name: Optional name of interpolated layer

        Output
            Y: Layer object with values of this vector layer interpolated to
               geometry of input layer X
        """

        msg = 'Interpolation from vector layers not yet implemented'
        raise Exception(msg)

    @property
    def is_raster(self):
        return False

    @property
    def is_vector(self):
        return True

    @property
    def is_point_data(self):
        return self.is_vector and self.geometry_type == ogr.wkbPoint

    @property
    def is_polygon_data(self):
        return self.is_vector and self.geometry_type == ogr.wkbPolygon
Example #16
0
    def read_from_file(self, filename):
        """ Read and unpack vector data.

        It is assumed that the file contains only one layer with the
        pertinent features. Further it is assumed for the moment that
        all geometries are points.

        * A feature is a geometry and a set of attributes.
        * A geometry refers to location and can be point, line, polygon or
          combinations thereof.
        * The attributes or obtained through GetField()

        The full OGR architecture is documented at
        * http://www.gdal.org/ogr/ogr_arch.html
        * http://www.gdal.org/ogr/ogr_apitut.html

        Examples are at
        * danieljlewis.org/files/2010/09/basicpythonmap.pdf
        * http://invisibleroads.com/tutorials/gdal-shapefile-points-save.html
        * http://www.packtpub.com/article/geospatial-data-python-geometry
        """

        self.name, _ = os.path.splitext(filename)

        fid = ogr.Open(filename)
        if fid is None:
            msg = 'Could not open %s' % filename
            raise IOError(msg)

        # Assume that file contains all data in one layer
        msg = 'Only one vector layer currently allowed'
        if fid.GetLayerCount() > 1:
            msg = ('WARNING: Number of layers in %s are %i. '
                   'Only the first layer will currently be '
                   'used.' % (filename, fid.GetLayerCount()))
            raise Exception(msg)

        layer = fid.GetLayerByIndex(0)

        # Get spatial extent
        self.extent = layer.GetExtent()

        # Get projection
        p = layer.GetSpatialRef()
        self.projection = Projection(p)

        # Get number of features
        N = layer.GetFeatureCount()

        # Extract coordinates and attributes for all features
        geometry = []
        data = []
        for i in range(N):
            feature = layer.GetFeature(i)
            if feature is None:
                msg = 'Could not get feature %i from %s' % (i, filename)
                raise Exception(msg)

            # Record coordinates
            G = feature.GetGeometryRef()
            if G is not None and G.GetGeometryType() == ogr.wkbPoint:
                # Longitude, Latitude
                geometry.append((G.GetX(), G.GetY()))
            else:
                msg = ('Only point geometries are supported. '
                       'Geometry in filename %s '
                       'was %s.' % (filename,
                                    G.GetGeometryType()))
                raise Exception(msg)

            # Record attributes by name
            number_of_fields = feature.GetFieldCount()
            fields = {}
            for j in range(number_of_fields):
                name = feature.GetFieldDefnRef(j).GetName()

                # FIXME (Ole): Ascertain the type of each field?
                #              We need to cast each appropriately?
                #              This is issue #66
                feature_type = feature.GetFieldDefnRef(j).GetType()
                #print 'Field', name, type

                fields[name] = feature.GetField(j)

            data.append(fields)

        # FIXME: When we get to more general geometries, we
        #        should probably just stay with a list of features.
        self.geometry = numpy.array(geometry, dtype='d', copy=False)
        self.data = data
        self.filename = filename
Example #17
0
class Vector:
    """Class for abstraction of vector data
    """

    def __init__(self, data=None, projection=None, geometry=None,
                 name='Vector layer', caption=''):
        """Initialise object with either geometry or filename

        Input
            data: Can be either
                * a filename of a vector file format known to GDAL
                * List of dictionaries of fields associated with
                  point coordinates
                * None
            projection: Geospatial reference in WKT format.
                        Only used if geometry is provide as a numeric array,
            geometry: An Nx2 array of point coordinates
            name: Optional name for layer.
                  Only used if geometry is provide as a numeric array
            caption: Optional text field that describes the layer. This field
                     can for example be used to display text about the layer
                     in a web application.
        """

        self.caption = caption
        if data is None and projection is None and geometry is None:
            # Instantiate empty object
            self.name = name
            self.projection = None
            self.geometry = None
            self.filename = None
            self.data = None
            self.extent = None
            return

        if isinstance(data, basestring):
            self.read_from_file(data)
        else:
            # Assume that geometry is provided as an array
            # with extra keyword arguments supplying metadata

            msg = 'Geometry must be specified'
            assert geometry is not None, msg
            self.geometry = numpy.array(geometry, dtype='d', copy=False)

            msg = 'Projection must be specified'
            assert projection is not None, msg
            self.projection = Projection(projection)

            self.data = data
            self.name = name
            self.filename = None

            # FIXME: Need to establish extent here

    def __str__(self):
        return self.name

    def __len__(self):
        """Size of vector layer defined as number of features
        """

        # FIXME - change to len(self.geometry)
        return self.geometry.shape[0]

    def __eq__(self, other, rtol=1.0e-5, atol=1.0e-8):
        """Override '==' to allow comparison with other vector objecs

        Input
           other: Vector instance to compare to
           rtol, atol: Relative and absolute tolerance.
                       See numpy.allclose for details
        """

        # Check type
        if not isinstance(other, Vector):
            msg = ('Vector instance cannot be compared to %s'
                   ' as its type is %s ' % (str(other), type(other)))
            raise TypeError(msg)

        # Check projection
        if self.projection != other.projection:
            return False

        # Check geometry
        if not numpy.allclose(self.get_geometry(),
                              other.get_geometry(),
                              rtol=rtol, atol=atol):
            return False

        # Check keys
        x = self.get_data()
        y = other.get_data()

        for key in x[0]:
            for i in range(len(y)):
                if key not in y[i]:
                    return False

        for key in y[0]:
            for i in range(len(x)):
                if key not in x[i]:
                    return False

        # Check data
        for i, a in enumerate(x):
            for key in a:
                if a[key] != y[i][key]:
                    # Not equal, try numerical comparison with tolerances

                    if not numpy.allclose(a[key], y[i][key],
                                          rtol=rtol, atol=atol):
                        return False

        # Vector layers are identical up to the specified tolerance
        return True

    def __ne__(self, other):
        """Override '!=' to allow comparison with other projection objecs
        """
        return not self == other

    def get_name(self):
        return self.name

    def get_caption(self):
        return self.caption

    def read_from_file(self, filename):
        """ Read and unpack vector data.

        It is assumed that the file contains only one layer with the
        pertinent features. Further it is assumed for the moment that
        all geometries are points.

        * A feature is a geometry and a set of attributes.
        * A geometry refers to location and can be point, line, polygon or
          combinations thereof.
        * The attributes or obtained through GetField()

        The full OGR architecture is documented at
        * http://www.gdal.org/ogr/ogr_arch.html
        * http://www.gdal.org/ogr/ogr_apitut.html

        Examples are at
        * danieljlewis.org/files/2010/09/basicpythonmap.pdf
        * http://invisibleroads.com/tutorials/gdal-shapefile-points-save.html
        * http://www.packtpub.com/article/geospatial-data-python-geometry
        """

        self.name, _ = os.path.splitext(filename)

        fid = ogr.Open(filename)
        if fid is None:
            msg = 'Could not open %s' % filename
            raise IOError(msg)

        # Assume that file contains all data in one layer
        msg = 'Only one vector layer currently allowed'
        if fid.GetLayerCount() > 1:
            msg = ('WARNING: Number of layers in %s are %i. '
                   'Only the first layer will currently be '
                   'used.' % (filename, fid.GetLayerCount()))
            raise Exception(msg)

        layer = fid.GetLayerByIndex(0)

        # Get spatial extent
        self.extent = layer.GetExtent()

        # Get projection
        p = layer.GetSpatialRef()
        self.projection = Projection(p)

        # Get number of features
        N = layer.GetFeatureCount()

        # Extract coordinates and attributes for all features
        geometry = []
        data = []
        for i in range(N):
            feature = layer.GetFeature(i)
            if feature is None:
                msg = 'Could not get feature %i from %s' % (i, filename)
                raise Exception(msg)

            # Record coordinates
            G = feature.GetGeometryRef()
            if G is not None and G.GetGeometryType() == ogr.wkbPoint:
                # Longitude, Latitude
                geometry.append((G.GetX(), G.GetY()))
            else:
                msg = ('Only point geometries are supported. '
                       'Geometry in filename %s '
                       'was %s.' % (filename,
                                    G.GetGeometryType()))
                raise Exception(msg)

            # Record attributes by name
            number_of_fields = feature.GetFieldCount()
            fields = {}
            for j in range(number_of_fields):
                name = feature.GetFieldDefnRef(j).GetName()

                # FIXME (Ole): Ascertain the type of each field?
                #              We need to cast each appropriately?
                #              This is issue #66
                feature_type = feature.GetFieldDefnRef(j).GetType()
                #print 'Field', name, type

                fields[name] = feature.GetField(j)

            data.append(fields)

        # FIXME: When we get to more general geometries, we
        #        should probably just stay with a list of features.
        self.geometry = numpy.array(geometry, dtype='d', copy=False)
        self.data = data
        self.filename = filename

    def write_to_file(self, filename):
        """Save vector data to file

        Input
            filename: filename with extension .shp or .gml
        """

        # Derive layername from filename (excluding preceding dirs)
        x = os.path.split(filename)[-1]
        layername, extension = os.path.splitext(x)

        # Check file format
        msg = ('Invalid file type for file %s. Only extensions '
               'shp or gml allowed.' % filename)
        assert extension == '.shp' or extension == '.gml', msg
        driver = DRIVER_MAP[extension]

        # FIXME (Ole): Tempory flagging of GML issue
        if extension == '.gml':
            msg = ('OGR GML driver does not store geospatial reference.'
                   'This format is disabled for the time being')
            raise Exception(msg)

        # Get vector data
        geometry = self.get_geometry()
        data = self.get_data()
        N = len(geometry)

        # Clear any previous file of this name (ogr does not overwrite)
        try:
            os.remove(filename)
        except:
            pass

        # Create new file with one layer
        drv = ogr.GetDriverByName(driver)
        if drv is None:
            msg = 'OGR driver %s not available' % driver
            raise Exception(msg)

        ds = drv.CreateDataSource(filename)
        if ds is None:
            msg = 'Creation of output file %s failed' % filename
            raise Exception(msg)

        lyr = ds.CreateLayer(layername,
                             self.projection.spatial_reference,
                             ogr.wkbPoint)
        if lyr is None:
            msg = 'Could not create layer %s' % layername
            raise Exception(msg)

        # Define attributes if any
        store_attributes = False
        if data is not None:
            if len(data) > 0:
                try:
                    fields = data[0].keys()
                except:
                    msg = ('Input parameter "attributes" was specified '
                           'but it does not contain dictionaries with '
                           'field information as expected. The first'
                           'element is %s' % data[0])
                    raise Exception(msg)
                else:
                    # Establish OGR types for each element
                    ogrtypes = {}
                    for name in fields:
                        py_type = type(data[0][name])
                        ogrtypes[name] = TYPE_MAP[py_type]

            else:
                msg = ('Input parameter "data" was specified '
                       'but appears to be empty')
                raise Exception(msg)

            # Create attribute fields in layer
            store_attributes = True
            for name in fields:

                fd = ogr.FieldDefn(name, ogrtypes[name])

                # FIXME (Ole): Trying to address issue #16
                #              But it doesn't work and
                #              somehow changes the values of MMI in test
                #width = max(128, len(name))
                #print name, width
                #fd.SetWidth(width)

                if lyr.CreateField(fd) != 0:
                    msg = 'Could not create field %s' % name
                    raise Exception(msg)

        # Store point data
        for i in range(N):
            # FIXME (Ole): Need to assign entire vector if at all possible

            # Coordinates
            x = float(geometry[i, 0])
            y = float(geometry[i, 1])

            pt = ogr.Geometry(ogr.wkbPoint)
            pt.SetPoint_2D(0, x, y)

            feature = ogr.Feature(lyr.GetLayerDefn())
            feature.SetGeometry(pt)

            G = feature.GetGeometryRef()
            if G is None:
                msg = 'Could not create GeometryRef for file %s' % filename
                raise Exception(msg)

            # Attributes
            if store_attributes:
                for name in fields:
                    feature.SetField(name, data[i][name])

            # Save this feature
            if lyr.CreateFeature(feature) != 0:
                msg = 'Failed to create feature %i in file %s' % (i, filename)
                raise Exception(msg)

            feature.Destroy()

    def get_data(self, attribute=None, index=None):
        """Get vector attributes

        Data is returned as a list where each entry is a dictionary of
        attributes for one feature. Entries in get_geometry() and
        get_data() are related as 1-to-1

        If optional argument attribute is specified and a valid name,
        then the list of values for that attribute is returned.

        If optional argument index is specified on the that value will
        be returned. Any value of index is ignored if attribute is None.
        """
        if hasattr(self, 'data'):
            if attribute is None:
                return self.data
            else:
                msg = ('Specified attribute %s does not exist in '
                       'vector layer %s. Valid names are %s'
                       '' % (attribute, self, self.data[0].keys()))
                assert attribute in self.data[0], msg

                if index is None:
                    # Return all values for specified attribute
                    return [x[attribute] for x in self.data]
                else:
                    # Return value for specified attribute and index
                    msg = ('Specified index must be either None or '
                           'an integer. I got %s' % index)
                    assert type(index) == type(0)

                    msg = ('Specified index must lie within the bounds '
                           'of vector layer %s which is [%i, %i]'
                           '' % (self, 0, len(self) - 1))
                    assert 0 <= index < len(self)

                    return self.data[index][attribute]
        else:
            msg = 'Vector data instance does not have any attributes'
            raise Exception(msg)

    def get_geometry(self):
        """Return geometry for vector layer.

        Depending on the feature type, geometry is

        geometry type     output type
        -----------------------------
        point             coordinates (Nx2 array of longitudes and latitudes)
        line              TODO
        polygon           TODO

        """
        return self.geometry

    def get_projection(self, proj4=False):
        """Return projection of this layer as a string
        """
        return self.projection.get_projection(proj4)

    def get_bounding_box(self):
        """Get bounding box coordinates for vector layer.

        Format is [West, South, East, North]
        """
        e = self.extent
        return [e[0],  # West
                e[2],  # South
                e[1],  # East
                e[3]]  # North

    def get_extrema(self, attribute=None):
        """Get min and max values from specified attribute

        Return min, max
        """
        if attribute is None:
            msg = ('Valid attribute name must be specified in get_extrema '
                   'for vector layers. I got None.')
            raise RuntimeError(msg)

        x = self.get_data(attribute)
        return min(x), max(x)

    def get_topN(self, attribute, N=10):
        """Get top N features

        Input
            attribute: The name of attribute where values are sought
            N: How many

        Output
            layer: New vector layer with selected features
        """

        # FIXME (Ole): Maybe generalise this to arbitrary expressions

        # Input checks
        msg = ('Specfied attribute must be a string. '
               'I got %s' % (type(attribute)))
        assert isinstance(attribute, basestring), msg

        msg = 'Specified attribute was empty'
        assert attribute != '', msg

        msg = 'N must be a positive number. I got %i' % N
        assert N > 0, msg

        # Create list of values for specified attribute
        values = self.get_data(attribute)

        # Sort and select using Schwarzian transform
        A = zip(values, self.data, self.geometry)
        A.sort()

        # Pick top N and unpack
        _, data, geometry = zip(*A[-N:])

        # Create new Vector instance and return
        return Vector(data=data,
                      projection=self.get_projection(),
                      geometry=geometry)

    def interpolate(self, X, name=None):
        """Interpolate values of this vector layer to other layer

        Input
            X: Layer object defining target
            name: Optional name of interpolated layer

        Output
            Y: Layer object with values of this vector layer interpolated to
               geometry of input layer X
        """

        msg = 'Interpolation from vector layers not yet implemented'
        raise Exception(msg)

    @property
    def is_raster(self):
        return False

    @property
    def is_vector(self):
        return True