Beispiel #1
0
 def test_nocrs_wgs84_geojson(self):
     vector_io = VectorFileIO(
         uri=os.path.join(testfile_path, 'iraq_hospitals.geojson'))
     raw_json = json.loads(vector_io.read(format=geo.formats.JSON))
     self.assertFalse(hasattr(raw_json, 'crs'))
     epsg = vector_io.get_epsg()
     self.assertEquals(epsg, 4326)
Beispiel #2
0
    def __init__(self, **kwargs):
        """
        Create an instance of LeastCostProcess class.
        :param kwargs:
        """
        super(LeastCostProcess, self).__init__(**kwargs)

        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        self.validate()
Beispiel #3
0
    def test_validationInputsMax(self):
        """
        Test the GaiaProcess.validate() function - fail on > max input types
        """

        vector_io1 = VectorFileIO(uri='/fake/path')
        vector_io2 = VectorFileIO(uri='/fake/path')

        with self.assertRaises(geo.GaiaException) as ge:
            geo.LengthProcess(inputs=[vector_io1, vector_io2])
        self.assertIn('Incorrect # of inputs; expected 1', str(ge.exception))
Beispiel #4
0
 def test_mercator_geojson(self):
     vector_io = VectorFileIO(
         uri=os.path.join(testfile_path, 'iraq_hospitals_3857.json'))
     self.assertEquals(vector_io.get_epsg(), 3857)
     jsonwm = json.loads(vector_io.read(format=geo.formats.JSON))
     self.assertEquals(jsonwm['crs']['properties']['name'], 'EPSG:3857')
     self.assertEquals(jsonwm['features'][0]['geometry']['coordinates'],
                       [4940150.544527022, 3941210.867854486])
     json84 = json.loads(vector_io.read(format=geo.formats.JSON, epsg=4326))
     self.assertEquals(json84['crs']['properties']['name'], 'EPSG:4326')
     self.assertEquals(json84['features'][0]['geometry']['coordinates'],
                       [44.378127400000004, 33.34517919999999])
Beispiel #5
0
 def test_within(self):
     """
     Test WithinProcess for vector inputs
     """
     vector1_io = VectorFileIO(
         uri=os.path.join(testfile_path, 'iraq_hospitals.geojson'))
     vector2_io = VectorFileIO(
         uri=os.path.join(testfile_path, 'baghdad_districts.geojson'))
     process = geo.WithinProcess(inputs=[vector1_io, vector2_io])
     try:
         process.compute()
         self.assertEquals(len(process.output.data), 19)
     finally:
         if process:
             process.purge()
Beispiel #6
0
 def test_within_reproject(self):
     """
     Test WithinProcess for vector inputs, where output should be in
     same projection as first input (in this case, 3857).
     """
     vector1_io = VectorFileIO(
         uri=os.path.join(testfile_path, 'iraq_hospitals_3857.json'))
     vector2_io = VectorFileIO(
         uri=os.path.join(testfile_path, 'baghdad_districts.geojson'))
     process = geo.WithinProcess(inputs=[vector1_io, vector2_io])
     try:
         process.compute()
         self.assertEquals(process.output.data.crs, {'init': u'epsg:3857'})
         self.assertEquals(len(process.output.data), 19)
     finally:
         if process:
             process.purge()
Beispiel #7
0
    def test_validationInputsMin(self):
        """
        Test the GaiaProcess.validate() function - fail on < minimum input types
        """

        vector_io = VectorFileIO(uri='/fake/path1')
        with self.assertRaises(geo.GaiaException) as ge:
            geo.IntersectsProcess(inputs=[vector_io])
        self.assertIn('Not enough inputs for process', str(ge.exception))
Beispiel #8
0
 def test_validationInputsPass(self):
     """
     Test the GaiaProcess.validate() function - pass on valid input
     """
     raster_io = RasterFileIO(uri='/fake/path')
     vector_io = VectorFileIO(uri='/fake/path')
     try:
         geo.ZonalStatsProcess(inputs=[raster_io, vector_io])
     except geo.GaiaException:
         self.fail("ZonalProcess should have passed validation but did not")
Beispiel #9
0
    def test_validationInputsOrder(self):
        """
        Test the GaiaProcess.validate() function - fail on incorrect order
        """
        raster_iO = RasterFileIO(uri='/fake/path1')
        vector_io = VectorFileIO(uri='/fake/path2')

        with self.assertRaises(geo.GaiaException) as ge:
            geo.ZonalStatsProcess(inputs=[vector_io, raster_iO])
        self.assertIn('Input #1 is of incorrect type.', str(ge.exception))
Beispiel #10
0
 def test_distance(self):
     """
     Test DistanceProcess for vector inputs
     """
     vector1_io = VectorFileIO(
         uri=os.path.join(testfile_path, 'baghdad_districts.geojson'))
     vector2_io = VectorFileIO(
         uri=os.path.join(testfile_path, 'iraq_hospitals.geojson'))
     process = geo.DistanceProcess(inputs=[vector1_io, vector2_io])
     try:
         process.compute()
         with open(
                 os.path.join(testfile_path,
                              'distance_process_results.json')) as exp:
             expected_json = json.load(exp)
         actual_json = json.loads(
             process.output.read(format=geo.formats.JSON))
         self.assertEquals(len(expected_json['features']),
                           len(actual_json['features']))
     finally:
         if process:
             process.purge()
Beispiel #11
0
class ZonalStatsProcess(GaiaProcess):
    """
    Calculates statistical values from a raster dataset for each polygon
    in a vector dataset.
    """

    #: Tuple of required inputs; name, type , max # of each; None = no max
    required_inputs = [
        {'description': 'Raster image',
         'type': types.RASTER,
         'max': 1
         },
        {'description': 'Zones',
         'type': types.VECTOR,
         'max': 1
         }
    ]

    #: Default output format
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(ZonalStatsProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())

    def compute(self):
        """
        Run the process
        """
        self.output.create_output_dir(self.output.uri)
        features = gdal_zonalstats(
            self.inputs[1].read(format=formats.JSON,
                                epsg=self.inputs[0].get_epsg()),
            self.inputs[0].read())
        self.output.data = GeoDataFrame.from_features(features)
        self.output.write()
Beispiel #12
0
 def test_union(self):
     """
     Test UnionProcess for vector inputs
     """
     vector1_io = VectorFileIO(uri=os.path.join(
         testfile_path, 'baghdad_districts.geojson'),
                               filters=[('NNAME', 'contains', '^A')])
     vector2_io = VectorFileIO(uri=os.path.join(
         testfile_path, 'baghdad_districts.geojson'),
                               filters=[('NNAME', 'contains', '^B')])
     process = geo.UnionProcess(inputs=[vector1_io, vector2_io])
     try:
         process.compute()
         with open(os.path.join(testfile_path,
                                'union_process_results.json')) as exp:
             expected_json = json.load(exp)
         actual_json = json.loads(
             process.output.read(format=geo.formats.JSON))
         self.assertEquals(len(expected_json['features']),
                           len(actual_json['features']))
     finally:
         if process:
             process.purge()
Beispiel #13
0
 def test_length(self):
     """
     Test LengthProcess for vector inputs
     """
     vector_roads = VectorFileIO(uri=os.path.join(testfile_path,
                                                  'iraq_roads.geojson'),
                                 filters=[('type', '=', 'motorway'),
                                          ('bridge', '=', 1)])
     process = geo.LengthProcess(inputs=[vector_roads])
     try:
         process.compute()
         with open(
                 os.path.join(testfile_path,
                              'length_process_results.json')) as exp:
             expected_json = json.load(exp)
         actual_json = json.loads(
             process.output.read(format=geo.formats.JSON))
         self.assertEquals(len(expected_json['features']),
                           len(actual_json['features']))
     finally:
         if process:
             process.purge()
Beispiel #14
0
    def test_subset_raster(self):
        """
        Test SubsetProcess for vector & raster inputs
        """
        zipfile = ZipFile(os.path.join(testfile_path, '2states.zip'), 'r')
        zipfile.extract('2states.geojson', testfile_path)

        vector_io = VectorFileIO(
            uri=os.path.join(testfile_path, '2states.geojson'))
        raster_io = RasterFileIO(
            uri=os.path.join(testfile_path, 'globalairtemp.tif'))
        process = geo.SubsetProcess(inputs=[raster_io, vector_io])
        try:
            process.compute()
            self.assertEquals(type(process.output.data).__name__, 'Dataset')
            self.assertTrue(os.path.exists(process.output.uri))
            self.assertIsNotNone(process.id)
            self.assertIn(process.id, process.output.uri)
        finally:
            testfile = os.path.join(testfile_path, '2states.geojson')
            if os.path.exists(testfile):
                os.remove(testfile)
            if process:
                process.purge()
Beispiel #15
0
class LengthProcess(GaiaProcess):
    """
    Calculate the length of each feature in a dataset.
    If the dataset projection is not in metric units, it will
    be temporarily reprojected to EPSG:3857 to calculate the area.
    """

    #: Tuple of required inputs; name, type , max # of each; None = no max
    required_inputs = [
        {'description': 'Line/Polygon dataset',
         'type': types.VECTOR,
         'max': 1
         }
    ]

    #: Default output format
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(LengthProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())

    def calc_pandas(self):
        """
        Calculate lengths using pandas

        :return: Result as a GeoDataFrame
        """
        featureio = self.inputs[0]
        original_projection = featureio.get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        feature_df = GeoDataFrame.copy(featureio.read(epsg=epsg))
        feature_df['length'] = feature_df.geometry.length
        if original_projection:
            feature_df[feature_df.geometry.name] = feature_df.geometry.to_crs(
                epsg=original_projection)
            feature_df.crs = fiona.crs.from_epsg(original_projection)
        return feature_df

    def calc_postgis(self):
        """
        Calculate lengths using PostGIS

        :return: Result as a GeoDataFrame
        """
        featureio = self.inputs[0]
        geom0, epsg = featureio.geom_column, featureio.epsg
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(epsg)
        geom_query = geom0
        geometry_type = featureio.geometry_type
        length_func = 'ST_Perimeter' if 'POLYGON' in geometry_type.upper() \
            else 'ST_Length'
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            geom_query = 'ST_Transform({}, {})'.format(
                geom_query, 3857)
        geom_query = ', {}({}) as length'.format(length_func, geom_query)
        query, params = featureio.get_query()
        query = query.replace('FROM', '{} FROM'.format(geom_query))
        logger.debug(query)
        return df_from_postgis(featureio.engine, query, params, geom0, epsg)

    def compute(self):
        """
        Run the length process
        """
        if self.inputs[0].__class__.__name__ == 'PostgisIO':
            data = self.calc_postgis()
        else:
            data = self.calc_pandas()
        self.output.data = data
        self.output.write()
Beispiel #16
0
 def __init__(self, inputs=None, buffer_size=None, **kwargs):
     self.buffer_size = buffer_size
     super(BufferProcess, self).__init__(inputs, **kwargs)
     if not self.output:
         self.output = VectorFileIO(name='result',
                                    uri=self.get_outpath())
Beispiel #17
0
 def __init__(self, distance=None, **kwargs):
     super(NearProcess, self).__init__(**kwargs)
     self.distance = distance
     if not self.output:
         self.output = VectorFileIO(name='result',
                                    uri=self.get_outpath())
Beispiel #18
0
class NearProcess(GaiaProcess):
    """
    Takes two inputs, the second assumed to contain a single feature,
    the first a vector dataset. Requires a distance argument, and the unit of
    measure should be meters.  If inputs are not in a
    metric projection they will be reprojected to EPSG:3857.
    Returns the features in the first input within a specified distance
    of the point in the second input.
    """

    #: Tuple of required inputs; name, type , max # of each; None = no max
    required_inputs = [
        {'description': 'Features',
         'type': types.VECTOR,
         'max': 1
         },
        {'description': 'Point',
         'type': types.VECTOR,
         'max': 1
         }
    ]

    #: Required arguments, data types as dict
    required_args = [{
        'name': 'distance',
        'title': 'Distance',
        'description': 'Distance to search for features, in meters',
        'type': float
    }]

    #: Default output format
    default_output = formats.JSON

    def __init__(self, distance=None, **kwargs):
        super(NearProcess, self).__init__(**kwargs)
        self.distance = distance
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())

    def calc_pandas(self):
        """
        Calculates the features within the specified distance using pandas

        :return: results as a GeoDataFrame
        """
        features = self.inputs[0]
        original_projection = self.inputs[0].get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        features_df = features.read(epsg=epsg)
        features_gs = features_df.geometry
        point_df = self.inputs[1].read(epsg=epsg)[:1]
        point_gs = point_df.geometry
        features_length = len(features_gs)
        min_dist = np.empty(features_length)
        for i, feature in enumerate(features_gs):
            min_dist[i] = np.min([feature.distance(point_gs[0])])

        nearby_df = GeoDataFrame.copy(features_df)
        nearby_df['distance'] = min_dist
        distance_max = self.distance
        nearby_df = nearby_df[(nearby_df['distance'] <= distance_max)]\
            .sort_values('distance')
        if original_projection:
            nearby_df[nearby_df.geometry.name] = \
                nearby_df.geometry.to_crs(epsg=original_projection)
        return nearby_df

    def calc_postgis(self):
        """
        Calculates the features within the specified distance using PostGIS
        via DWithin plus K-Nearest Neighbor (KNN) query

        :return: results as a GeoDataFrame
        """
        featureio = self.inputs[0]
        pointio = self.inputs[1]
        feature_geom, epsg = featureio.geom_column, featureio.epsg
        point_json = json.loads(pointio.read(
            format=formats.JSON))['features'][0]
        point_epsg = pointio.get_epsg()

        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(epsg))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857

        io_query, params = featureio.get_query()

        point_geom = 'ST_Transform(ST_SetSRID(ST_GeomFromGeoJSON(\'' \
                     '{geojson}\'),{point_epsg}), {epsg})'.\
            format(geojson=json.dumps(point_json['geometry']),
                   point_epsg=point_epsg, epsg=epsg)

        dist1 = """, (SELECT ST_Distance(
                ST_Transform({table0}.{geom0},{epsg}),
                ST_Transform(point, {epsg}))
                FROM {point_geom} as point
                ORDER BY {table0}.{geom0} <#> point LIMIT 1) as distance FROM
                """.format(table0=featureio.table,
                           geom0=feature_geom,
                           point_geom=point_geom,
                           epsg=epsg)

        dist2 = """
                WHERE ST_DWithin({point_geom},
                ST_Transform({table0}.{geom0},{epsg}), {distance})
                """.format(table0=featureio.table,
                           geom0=feature_geom,
                           point_geom=point_geom,
                           epsg=epsg,
                           distance=self.distance)

        dist3 = ' ORDER BY distance ASC'
        query = re.sub('FROM', dist1, io_query).rstrip(';')
        if 'WHERE' in query:
            query = re.sub('WHERE', dist2 + ' AND ', query)
        else:
            query += dist2
        query += dist3
        logger.debug(query)
        return df_from_postgis(featureio.engine,
                               query, params, feature_geom, epsg)

    def compute(self):
        """
        Run the process
        """
        if self.inputs[0].__class__.__name__ == 'PostgisIO':
            data = self.calc_postgis()
        else:
            data = self.calc_pandas()
        self.output.data = data
        self.output.write()
Beispiel #19
0
class DistanceProcess(GaiaProcess):
    """
    Calculates the minimum distance from each feature of the first dataset
    to the nearest feature of the second dataset.
    """

    #: Tuple of required inputs; name, type , max # of each; None = no max
    required_inputs = [
        {'description': 'From dataset',
         'type': types.VECTOR,
         'max': 1
         },
        {'description': 'To dataset',
         'type': types.VECTOR,
         'max': 1
         }
    ]

    #: Default output format
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(DistanceProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())

    def calc_pandas(self):
        """
        Calculate the minimum distance between features using pandas
        GeoDataFrames.

        :return: Minimum distance results as a GeoDataFrame
        """
        first = self.inputs[0]
        original_projection = first.get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        first_df = first.read(epsg=epsg)
        first_gs = first_df.geometry
        first_length = len(first_gs)
        second_df = self.inputs[1].read(epsg=epsg)
        second_gs = second_df.geometry
        min_dist = np.empty(first_length)
        for i, first_features in enumerate(first_gs):
            min_dist[i] = np.min([first_features.distance(second_features)
                                  for second_features in second_gs])

        distance_df = GeoDataFrame.copy(first_df)
        distance_df['distance'] = min_dist
        distance_df.sort_values('distance', inplace=True)
        if original_projection:
            distance_df[distance_df.geometry.name] = \
                distance_df.geometry.to_crs(epsg=original_projection)
        return distance_df

    def calc_postgis(self):
        """
        Calculate the minimum distance between features using PostGIS
        K-Nearest Neighbor (KNN) query

        :return: Minimum distance results as a GeoDataFrame
        """
        diff_queries = []
        diff_params = []
        first = self.inputs[0]
        geom0, epsg = first.geom_column, first.epsg
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(epsg))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857

        geom1 = self.inputs[1].geom_column
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            diff_queries.append(io_query.rstrip(';'))
            diff_params.insert(0, params)

        diff_params = [item for x in diff_params for item in x]
        dist1 = """, (SELECT ST_Distance(
                ST_Transform({table0}.{geom0},{epsg}),
                ST_Transform(query2.{geom1},{epsg}))
                as distance
                """.format(table0=self.inputs[0].table,
                           geom0=geom0,
                           geom1=geom1,
                           epsg=epsg)

        dist2 = """
                ORDER BY {table0}.{geom0} <#> query2.{geom1} LIMIT 1) FROM
                """.format(table0=self.inputs[0].table,
                           geom0=geom0,
                           geom1=geom1,
                           epsg=epsg)

        dist3 = ' ORDER BY distance ASC'
        query = re.sub('FROM', dist1 + ' FROM (' + diff_queries[1] +
                       ') as query2 ' + dist2, diff_queries[0]) + dist3
        return df_from_postgis(first.engine, query, diff_params, geom0, epsg)

    def compute(self):
        """
        Run the distance process
        """
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1 and
                       input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
Beispiel #20
0
 def __init__(self, combined=False, **kwargs):
     super(CentroidProcess, self).__init__(**kwargs)
     self.combined = combined
     if not self.output:
         self.output = VectorFileIO(name='result',
                                    uri=self.get_outpath())
Beispiel #21
0
class CentroidProcess(GaiaProcess):
    """
    Calculates the centroid point of a vector dataset.
    """

    #: List of required inputs; name, type , max # of each; None = no max
    required_inputs = [
        {'description': 'Line/Polygon dataset',
         'type': types.VECTOR,
         'max': 1
         }
    ]

    optional_args = [{
        'name': 'combined',
        'title': 'Combined',
        'description': 'Get centroid of features combined (default False)',
        'type': bool,

    }]

    #: Default output format
    default_output = formats.JSON

    def __init__(self, combined=False, **kwargs):
        super(CentroidProcess, self).__init__(**kwargs)
        self.combined = combined
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())

    def calc_pandas(self):
        """
        Calculate the centroid using pandas GeoDataFrames

        :return: centroid as a GeoDataFrame
        """
        df_in = self.inputs[0].read()
        df = GeoDataFrame(df_in.copy(), geometry=df_in.geometry.name)
        if self.combined:
            gs = GeoSeries(df.geometry.unary_union.centroid,
                           name=df_in.geometry.name)
            return GeoDataFrame(gs)
        else:
            df[df.geometry.name] = df.geometry.centroid
            return df

    def calc_postgis(self):
        """
        Calculate the centroid using PostGIS

        :return: centroid as a GeoDataFrame
        """
        pg_io = self.inputs[0]
        io_query, params = pg_io.get_query()
        geom0, epsg = pg_io.geom_column, pg_io.epsg
        if self.combined:
            query = 'SELECT ST_Centroid(ST_Union({geom})) as {geom}' \
                    ' from ({query}) as foo'.format(geom=geom0,
                                                    query=io_query.rstrip(';'))
        else:
            query = re.sub('"{}"'.format(geom0),
                           'ST_Centroid("{geom}") as {geom}'.format(
                               geom=geom0), io_query, 1)
        return df_from_postgis(pg_io.engine, query, params, geom0, epsg)

    def compute(self):
        """
        Run the centroid process
        """
        use_postgis = self.inputs[0].__class__.__name__ == 'PostgisIO'
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
        logger.debug(self.output)
Beispiel #22
0
class BufferProcess(GaiaProcess):
    """
    Generates a buffer polygon around the geometries of the input data.
    The size of the buffer is determined by the 'buffer_size' args key
    and the unit of measure should be meters.  If inputs are not in a
    metric projection they will be reprojected to EPSG:3857.
    """

    #: Tuple of required inputs; name, type , max # of each; None = no max
    required_inputs = [
        {'description': 'Feature dataset',
         'type': types.VECTOR,
         'max': 1
         }
    ]

    #: Required arguments, data types as dict
    required_args = [
        {
            'name': 'buffer_size',
            'title': 'Buffer Size',
            'description': 'Size of the buffer in meters',
            'type': float
        }
    ]

    #: Default output format
    default_output = formats.JSON

    def __init__(self, inputs=None, buffer_size=None, **kwargs):
        self.buffer_size = buffer_size
        super(BufferProcess, self).__init__(inputs, **kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())

    def calc_pandas(self):
        """
        Calculate the buffer using pandas GeoDataFrames

        :return: Buffer as a pandas GeoDataFrame
        """
        featureio = self.inputs[0]
        original_projection = featureio.get_epsg()
        epsg = original_projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))
        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            epsg = 3857
        else:
            original_projection = None
        feature_df = featureio.read(epsg=epsg)
        buffer = GeoSeries(feature_df.buffer(self.buffer_size).unary_union)
        buffer_df = GeoDataFrame(geometry=buffer)
        buffer_df.crs = feature_df.crs
        if original_projection:
            buffer_df[buffer_df.geometry.name] = buffer_df.to_crs(
                epsg=original_projection)
            buffer_df.crs = fiona.crs.from_epsg(original_projection)
        return buffer_df

    def calc_postgis(self):
        """
        Calculate the buffer using PostGIS

        :return: Buffer as a pandas GeoDataFrame
        """
        pg_io = self.inputs[0]
        original_projection = pg_io.epsg
        io_query, params = pg_io.get_query()
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(int(original_projection))

        if not srs.GetAttrValue('UNIT').lower().startswith('met'):
            geom_query = 'ST_Transform({}, {})'.format(
                pg_io.geom_column, 3857)
        else:
            original_projection = None
        buffer_query = 'ST_Union(ST_Buffer({}, %s))'.format(geom_query)
        if original_projection:
            buffer_query = 'ST_Transform({}, {})'.format(buffer_query,
                                                         original_projection)

        query = 'SELECT {buffer} as {geocol} ' \
                'FROM ({query}) as foo'.format(buffer=buffer_query,
                                               geocol=pg_io.geom_column,
                                               query=io_query.rstrip(';'))
        params.insert(0, self.buffer_size)
        logger.debug(query)
        return df_from_postgis(pg_io.engine, query, params,
                               pg_io.geom_column, pg_io.epsg)

    def compute(self):
        """
        Run the buffer process.
        """
        if self.inputs[0].__class__.__name__ == 'PostgisIO':
            data = self.calc_postgis()
        else:
            data = self.calc_pandas()
        self.output.data = data
        self.output.write()
Beispiel #23
0
class UnionProcess(GaiaProcess):
    """
    Combines two vector datasets into one.
    They datasets should have the same columns.
    """

    #: Tuple of required inputs; name, type , max # of each; None = no max
    required_inputs = [
        {'description': 'First dataset',
         'type': types.VECTOR,
         'max': 1
         },
        {'description': 'Second dataset',
         'type': types.VECTOR,
         'max': 1
         }
    ]

    #: Default output format
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(UnionProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())

    def calc_pandas(self):
        """
        Calculate the union using pandas GeoDataFrames

        :return: union result as a GeoDataFrame
        """
        first, second = self.inputs[0], self.inputs[1]
        first_df = first.read()
        second_df = second.read(epsg=first.get_epsg())
        if ''.join(first_df.columns) != ''.join(second_df.columns):
            raise GaiaException('Inputs must have the same columns')
        uniondf = GeoDataFrame(pd.concat([first_df, second_df]))
        return uniondf

    def calc_postgis(self):
        """
        Calculate the union using PostGIS

        :return: union result as a GeoDataFrame
        """
        union_queries = []
        union_params = []
        first = self.inputs[0]
        second = self.inputs[1]
        geom0, epsg = first.geom_column, first.epsg
        geom1, epsg1 = second.geom_column, second.epsg
        if ''.join(first.columns) != ''.join(second.columns):
            raise GaiaException('Inputs must have the same columns')
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            union_queries.append(io_query.rstrip(';'))
            union_params.extend(params)

        if epsg1 != epsg:
            geom1_query = 'ST_Transform({},{})'.format(geom1, epsg)
            union_queries[1] = union_queries[1].replace(
                '"{}"'.format(geom1), geom1_query)
        query = '({query0}) UNION ({query1})'\
            .format(query0=union_queries[0], query1=union_queries[1])
        return df_from_postgis(first.engine,
                               query, union_params, geom0, epsg)

    def compute(self):
        """
        Run the union process.
        """
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1 and
                       input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
        logger.debug(self.output)
Beispiel #24
0
class WithinProcess(GaiaProcess):
    """
    Similar to SubsetProcess but for vectors: calculates the features within
    a vector dataset that are within (or whose centroids are within) the
    polygons of a second vector dataset.
    """

    #: Tuple of required inputs; name, type , max # of each; None = no max
    required_inputs = [
        {'description': 'Feature dataset',
         'type': types.VECTOR,
         'max': 1
         },
        {'description': 'Within dataset',
         'type': types.VECTOR,
         'max': 1
         }
    ]

    #: Default output format
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(WithinProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())

    def calc_pandas(self):
        """
        Calculate the within process using pandas GeoDataFrames

        :return: within result as a GeoDataFrame
        """
        first, second = self.inputs[0], self.inputs[1]
        first_df = first.read()
        second_df = second.read(epsg=first.get_epsg())
        first_within = first_df[first_df.geometry.within(
            second_df.geometry.unary_union)]
        return first_within

    def calc_postgis(self):
        """
        Calculate the within process using PostGIS

        :return: within result as a GeoDataFrame
        """
        first = self.inputs[0]
        within_queries = []
        within_params = []
        geom0 = first.geom_column
        epsg = first.epsg
        geom1 = self.inputs[1].geom_column
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            within_queries.append(io_query.rstrip(';'))
            within_params.extend(params)
        joinstr = ' AND ' if 'WHERE' in within_queries[0].upper() else ' WHERE '
        query = '{query0} {join} ST_Within(ST_Transform({geom0},{epsg}), ' \
                '(SELECT ST_Union(ST_TRANSFORM({geom1},{epsg})) ' \
                'from ({query1}) as q2))'\
            .format(query0=within_queries[0], join=joinstr, geom0=geom0,
                    geom1=geom1, epsg=epsg, query1=within_queries[1])
        return df_from_postgis(first.engine, query, params, geom0, epsg)

    def compute(self):
        """
        Run the Within process
        """
        if len(self.inputs) != 2:
            raise GaiaException('WithinProcess requires 2 inputs')
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1 and
                       input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
Beispiel #25
0
class EqualsProcess(GaiaProcess):
    """
    Calculates the features within the first vector dataset that are the same as
    the features of the second vector dataset.
    """

    #: Tuple of required inputs; name, type , max # of each; None = no max
    required_inputs = [
        {'description': 'First dataset',
         'type': types.VECTOR,
         'max': 1
         },
        {'description': 'Second dataset',
         'type': types.VECTOR,
         'max': 1
         }
    ]

    #: Default output format
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(EqualsProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())

    def calc_pandas(self):
        """
        Calculate which features are equal using pandas

        :return: result as a GeoDataFrame
        """
        first, second = self.inputs[0], self.inputs[1]
        first_df = first.read()
        second_df = second.read(epsg=first.get_epsg())
        first_gs = first_df.geometry
        first_length = len(first_gs)
        second_gs = second_df.geometry
        matches = np.empty(first_length)
        for i, first_features in enumerate(first_gs):
            matched = [first_features.equals(second_features)
                       for second_features in second_gs]
            matches[i] = True if (True in matched) else False
        output_df = GeoDataFrame.copy(first_df)
        output_df['equals'] = matches
        output_df = output_df[
            (output_df['equals'] == 1)].drop('equals', 1)
        return output_df

    def calc_postgis(self):
        """
        Calculate which features are equal using PostGIS

        :return: result as a GeoDataFrame
        """
        equals_queries = []
        equals_params = []
        first = self.inputs[0]
        geom0, epsg = first.geom_column, first.epsg
        geom1 = self.inputs[1].geom_column
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            equals_queries.append(io_query.rstrip(';'))
            equals_params.extend(params)
        joinstr = ' AND ' if 'WHERE' in equals_queries[0].upper() else ' WHERE '
        query = '{query0} {join} {geom0} IN (SELECT {geom1} ' \
                'FROM ({query1}) as second)'.format(query0=equals_queries[0],
                                                    query1=equals_queries[1],
                                                    join=joinstr,
                                                    geom0=geom0,
                                                    geom1=geom1)
        logger.debug(query)
        return df_from_postgis(first.engine, query, equals_params, geom0, epsg)

    def compute(self):
        """
        Run the process
        """
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1 and
                       input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
        logger.debug(self.output)
Beispiel #26
0
class LeastCostProcess(GaiaProcess):
    """
    Process to calculate the least cost path between
    two points over a raster grid.
    """

    #: Tuple of required inputs; name, type , max # of each; None = no max
    required_inputs = [{
        'description': 'Raster dataset',
        'type': types.RASTER,
        'max': 1
    }, {
        'description': 'Start/end point dataset(s)',
        'type': types.VECTOR,
        'max': 2
    }]

    default_output = formats.JSON

    def __init__(self, **kwargs):
        """
        Create an instance of LeastCostProcess class.
        :param kwargs:
        """
        super(LeastCostProcess, self).__init__(**kwargs)

        if not self.output:
            self.output = VectorFileIO(name='result', uri=self.get_outpath())
        self.validate()

    def array2shp(self, array, outSHPfn, rasterfn, pixelValue):
        """
        Convert a grid array representation of the path into a shapefile
        :param array: least cost path as numeric grid array
        :param outSHPfn: output shapefile
        :param rasterfn: raster file used to calculate path
        :param pixelValue: cell value of path in grid array
        """
        raster = get_dataset(rasterfn.uri)
        geotransform = raster.GetGeoTransform()
        pixelWidth = geotransform[1]
        maxDistance = ceil(sqrt(2 * pixelWidth * pixelWidth))

        count = 0
        roadList = np.where(array == pixelValue)
        pointDict = {}
        for indexY in roadList[0]:
            indexX = roadList[1][count]
            Xcoord, Ycoord = self.pixel_offset2coord(rasterfn, indexX, indexY)
            pointDict[count] = (Xcoord, Ycoord)
            count += 1

        multiline = ogr.Geometry(ogr.wkbMultiLineString)
        for i in itertools.combinations(pointDict.values(), 2):
            point1 = ogr.Geometry(ogr.wkbPoint)
            point1.AddPoint(i[0][0], i[0][1])
            point2 = ogr.Geometry(ogr.wkbPoint)
            point2.AddPoint(i[1][0], i[1][1])

            distance = point1.Distance(point2)

            # calculate the distance between two points
            if distance < maxDistance:
                line = ogr.Geometry(ogr.wkbLineString)
                line.AddPoint(i[0][0], i[0][1])
                line.AddPoint(i[1][0], i[1][1])
                multiline.AddGeometry(line)

        shpDriver = ogr.GetDriverByName("GeoJSON")
        if os.path.exists(outSHPfn):
            shpDriver.DeleteDataSource(outSHPfn)
        else:
            self.output.create_output_dir(outSHPfn)

        outDataSource = shpDriver.CreateDataSource(outSHPfn)
        outLayer = outDataSource.CreateLayer(outSHPfn,
                                             geom_type=ogr.wkbMultiLineString)

        featureDefn = outLayer.GetLayerDefn()
        outFeature = ogr.Feature(featureDefn)
        outFeature.SetGeometry(multiline)
        outLayer.CreateFeature(outFeature)

    def raster_to_array(self, rasterfn):
        """
        Convert a raster grid into an array
        :param raster: input raster
        :return: array
        """
        raster = get_dataset(rasterfn.uri)
        band = raster.GetRasterBand(1)
        array = band.ReadAsArray()
        return array

    def coord2pixeloffset(self, rasterfn, x, y):
        """
        Convert lat/long coordinates to pixel coordinates
        :param rasterfn: raster file
        :param x: longitude
        :param y: latitude
        :return:
        """
        raster = get_dataset(rasterfn.uri)
        geotransform = raster.GetGeoTransform()
        originX = geotransform[0]
        originY = geotransform[3]
        pixelWidth = geotransform[1]
        pixelHeight = geotransform[5]
        xOffset = int((x - originX) / pixelWidth)
        yOffset = int((y - originY) / pixelHeight)
        return xOffset, yOffset

    def pixel_offset2coord(self, rasterfn, xOffset, yOffset):
        """
        Convert pixel coordinates to lat/long coordinates
        :param rasterfn: raster dataset
        :param xOffset:  longitude offset
        :param yOffset: latitude offset
        :return: coordinates
        """
        raster = get_dataset(rasterfn.uri)
        geotransform = raster.GetGeoTransform()
        originX = geotransform[0]
        originY = geotransform[3]
        pixelWidth = geotransform[1]
        pixelHeight = geotransform[5]
        coordX = originX + pixelWidth * xOffset
        coordY = originY + pixelHeight * yOffset
        return coordX, coordY

    def create_path(self, raster, costSurfaceArray, start, end):
        """
        Calculate the least cost path
        :param raster: Raster file
        :param costSurfaceArray: raster file as numeric array
        :param start: start point
        :param end: end point
        :return: least cost path as grid array
        """

        # coordinates to array index
        startCoordX = start.x
        startCoordY = start.y
        startIndexX, startIndexY = self.coord2pixeloffset(
            raster, startCoordX, startCoordY)

        stopCoordX = end.x
        stopCoordY = end.y
        stopIndexX, stopIndexY = self.coord2pixeloffset(
            raster, stopCoordX, stopCoordY)

        # create path
        indices, weight = route_through_array(costSurfaceArray,
                                              (startIndexY, startIndexX),
                                              (stopIndexY, stopIndexX),
                                              geometric=True,
                                              fully_connected=True)
        indices = np.array(indices).T
        path = np.zeros_like(costSurfaceArray)
        path[indices[0], indices[1]] = 1
        return path

    def calculate_path(self, raster, start, end):
        """
        Convert the input raster into an array and calculate the least
        cost path as an array
        :param raster: raster file
        :param start: start point
        :param end: end point
        """
        costSurfaceArray = self.raster_to_array(raster)
        pathArray = self.create_path(raster, costSurfaceArray, start, end)
        self.array2shp(pathArray, self.output.uri, raster, 1)

    def compute(self):
        """
        Perform the process calculations
        """
        if len(self.inputs) == 3:
            start_point = self.inputs[1].read().iloc[0].geometry.centroid
            end_point = self.inputs[2].read().iloc[0].geometry.centroid
        else:
            start_point = self.inputs[1].read().iloc[0].geometry.centroid
            end_point = self.inputs[1].read().iloc[1].geometry.centroid
        self.calculate_path(self.inputs[0], start_point, end_point)
Beispiel #27
0
class CrossesProcess(GaiaProcess):
    """
    Calculates the features within the first vector dataset that cross
    the combined features of the second vector dataset.
    """

    #: Tuple of required inputs; name, type , max # of each; None = no max
    required_inputs = [
        {'description': 'Feature dataset',
         'type': types.VECTOR,
         'max': 1
         },
        {'description': 'Crosses dataset',
         'type': types.VECTOR,
         'max': 1
         }
    ]

    #: Default output format
    default_output = formats.JSON

    def __init__(self, **kwargs):
        super(CrossesProcess, self).__init__(**kwargs)
        if not self.output:
            self.output = VectorFileIO(name='result',
                                       uri=self.get_outpath())

    def calc_pandas(self):
        """
        Calculate the process using pandas

        :return: result as a GeoDataFrame
        """
        first, second = self.inputs[0], self.inputs[1]
        first_df = first.read()
        second_df = second.read(epsg=first.get_epsg())
        first_intersects = first_df[first_df.geometry.crosses(
            second_df.geometry.unary_union)]
        return first_intersects

    def calc_postgis(self):
        """
        Calculate the process using PostGIS

        :return: result as a GeoDataFrame
        """
        cross_queries = []
        cross_params = []
        first = self.inputs[0]
        geom0, epsg = first.geom_column, first.epsg
        geom1 = self.inputs[1].geom_column
        for pg_io in self.inputs:
            io_query, params = pg_io.get_query()
            cross_queries.append(io_query.rstrip(';'))
            cross_params.extend(params)
        joinstr = ' AND ' if 'WHERE' in cross_queries[0].upper() else ' WHERE '
        query = '{query0} {join} (SELECT ST_Crosses(ST_Transform(' \
                '{table}.{geom0},{epsg}), ST_Union(ST_Transform(' \
                'q2.{geom1},{epsg}))) from ({query1}) as q2)'\
            .format(query0=cross_queries[0], join=joinstr, geom0=geom0,
                    geom1=geom1, epsg=epsg, query1=cross_queries[1],
                    table=first.table)
        return df_from_postgis(first.engine, query, cross_params, geom0, epsg)

    def compute(self):
        """
        Run the crosses process
        """
        input_classes = list(self.get_input_classes())
        use_postgis = (len(input_classes) == 1 and
                       input_classes[0] == 'PostgisIO')
        data = self.calc_postgis() if use_postgis else self.calc_pandas()
        self.output.data = data
        self.output.write()
        logger.debug(self.output)
Beispiel #28
0
 def __init__(self, **kwargs):
     super(CrossesProcess, self).__init__(**kwargs)
     if not self.output:
         self.output = VectorFileIO(name='result',
                                    uri=self.get_outpath())
def main(args):
    parser = argparse.ArgumentParser(
        description=
        "Write a destination file with all features from source file "
        "overlapping specified polygon")
    parser.add_argument(
        "source", help="Source vector file name that contain all features")
    parser.add_argument(
        "-p",
        "--polygon",
        nargs="+",
        help="xmin ymin xmax ymax or vector file specifying the polygon")
    parser.add_argument("destination",
                        help="Destination vector file with only features "
                        "overlapping the specified polygon")
    args = parser.parse_args(args)

    if (len(args.polygon) == 4):
        xmin, ymin, xmax, ymax = [float(val) for val in args.polygon]
        polygon = FeatureIO(features=[
            {
                "geometry": {
                    "type":
                    "Polygon",
                    "coordinates": [[[xmin, ymin], [xmax, ymin], [xmax, ymax],
                                     [xmin, ymax]]]
                },
                "properties": {
                    "id": "Bounding box"
                }
            },
        ])
    elif (len(args.polygon) == 1):
        polygonImage = gdal.Open(args.polygon[0], gdal.GA_ReadOnly)
        if (polygonImage):
            gt = polygonImage.GetGeoTransform()
            cols = polygonImage.RasterXSize
            rows = polygonImage.RasterYSize
            ext = GetExtent(gt, cols, rows)

            src_srs = osr.SpatialReference()
            src_srs.ImportFromWkt(polygonImage.GetProjection())
            tgt_srs = src_srs.CloneGeogCS()
            p = ReprojectCoords(ext, src_srs, tgt_srs)
            polygon = FeatureIO(features=[
                {
                    "geometry": {
                        "type":
                        "Polygon",
                        "coordinates": [[[p[0][0], p[0][1]], [
                            p[1][0], p[1][1]
                        ], [p[2][0], p[2][1]], [p[3][0], p[3][1]]]]
                    },
                    "properties": {
                        "id": "Bounding box"
                    }
                },
            ])
        else:
            polygon = VectorFileIO(uri=args.polygon[0])
    else:
        raise RuntimeError("Error: wrong number of parameters for polygon: {} "
                           "(can be 4 or 1)".format(len(args.polygon)))

    source = VectorFileIO(uri=args.source)
    destination = VectorFileIO(uri=args.destination)

    intersectProcess = IntersectsProcess(inputs=[source, polygon],
                                         output=destination)
    intersectProcess.compute()