def _parse_girder_url(url): """ Returns either None or tuple(resource_type, resource_id) """ if url is None: raise GaiaException('Internal error - url is None') girder_scheme = 'girder://' if not url.startswith(girder_scheme): return None # Extract resource type (file or folder) and id start_index = len(girder_scheme) path_string = url[start_index:] path_list = path_string.split('/') # print('path_list: ', path_list) if (len(path_list) != 2): raise GaiaException('Invalid girder url; path must be length 2') resource_type, resource_id = path_list if (resource_type not in ['item', 'folder']): msg = """Invalid girder url; path must start with either \ \"item/\" or \"folder/\"""" raise GaiaException(msg) return resource_type, resource_id
def can_read(source, *args, **kwargs): # For now, support either url (string) or tuple (GirderInterface,path) if isinstance(source, str): girder_scheme = 'girder://' if source is not None and source.startswith(girder_scheme): result = GirderReader._parse_girder_url(source) if result is None: return False # Todo Confirm that resource exists on girder? return True # (else) return False else: if not isinstance(source, tuple) and not len(source) == 2: return False gint, path = source if not isinstance(gint, GirderInterface): return False if not isinstance(path, str): raise GaiaException('Second tuple element is not a string') if not gint.is_initialized(): msg = """Cannot read girder object; \ must first call gaia.connect()""" raise GaiaException(msg) # (else) return True
def read(self, **kwargs): """Returns a GirderDataset Doesn't actally load or move data; it remains on Girder Todo: kwargs should probably be a union of raster and vector types, that get passed to GirderDataset :return: Girder Dataset """ if self.url: parsed_result = self.__class__._parse_girder_url(self.url) if parsed_result is None: raise GaiaException('Internal error - not a girder url') resource_type, resource_id = parsed_result return GirderDataObject(self, resource_type, resource_id) elif self.girder_source: gint, path = self.girder_source resource = gint.lookup_resource(path) if resource is None: template = 'File not found on Girder at specified path ({})' msg = template.format(path) raise GaiaException(msg) resource_type = resource['_modelType'] resource_id = resource['_id'] return GirderDataObject(self, resource_type, resource_id) raise GaiaException( 'Internal error - should never reach end of GirderReader.read()') return None
def validate_base(inputs, args, required_inputs=[], required_args=[], optional_args=[]): """ Ensure that all required inputs and arguments are present. """ input_types = [] errors = [] for procInput in inputs: inputDataType = procInput.datatype if inputDataType == types.PROCESS: for t in [i for i in dir(types) if not i.startswith("__")]: if any((True for x in procInput.default_output if x in getattr(formats, t, []))): inputDataType = getattr(types, t) break input_types.append(inputDataType) for i, req_input in enumerate(required_inputs): if i >= len(input_types): errors.append("Not enough inputs for process") elif req_input['type'] != input_types[i]: errors.append("Input #{} is of incorrect type.".format(i + 1)) if len(input_types) > len(required_inputs): if (required_inputs[-1]['max'] is not None and len(input_types) > len(required_inputs) + required_inputs[-1]['max'] - 1): errors.append("Incorrect # of inputs; expected {}".format( len(required_inputs))) else: for i in range(len(required_inputs) - 1, len(input_types)): if input_types[i] != required_inputs[-1]['type']: errors.append("Input #{} is of incorrect type.".format(i + 1)) if errors: raise GaiaException('\n'.join(errors)) for item in required_args: arg, arg_type = item['name'], item['type'] if arg not in args or args[arg] is None: raise GaiaException('Missing required argument {}'.format(arg)) test_arg_type(args, arg, arg_type) if 'options' in item and args[arg] not in item['options']: raise GaiaException('Invalid value for {}'.format(item['name'])) for item in optional_args: arg, arg_type = item['name'], item['type'] if arg in optional_args and optional_args[arg] is not None: test_arg_type(optional_args, arg, arg_type) argval = args[arg] if 'options' in item and argval not in item['options']: raise GaiaException('Invalid value for {}'.format( item['name']))
def validate(self): """ Ensure that all required inputs and arguments are present. """ # for input in self.inputs: # if input. input_types = [] errors = [] for input in self.inputs: type = input.type if type == types.PROCESS: for t in [i for i in dir(types) if not i.startswith("__")]: if any((True for x in input.default_output if x in getattr( formats, t, []))): type = getattr(types, t) break input_types.append(type) for i, req_input in enumerate(self.required_inputs): if i >= len(input_types): errors.append("Not enough inputs for process") elif req_input['type'] != input_types[i]: errors.append("Input #{} is of incorrect type.".format(i+1)) if len(input_types) > len(self.required_inputs): if (self.required_inputs[-1]['max'] is not None and len(input_types) > len(self.required_inputs) + self.required_inputs[-1]['max']-1): errors.append("Incorrect # of inputs; expected {}".format( len(self.required_inputs))) else: for i in range(len(self.required_inputs)-1, len(input_types)): if input_types[i] != self.required_inputs[-1]['type']: errors.append( "Input #{} is of incorrect type.".format(i + 1)) if errors: raise GaiaException('\n'.join(errors)) for item in self.required_args: arg, arg_type = item['name'], item['type'] if not hasattr(self, arg) or getattr(self, arg) is None: raise GaiaException('Missing required argument {}'.format(arg)) self.test_arg_type(arg, arg_type) if 'options' in item and getattr(self, arg) not in item['options']: raise GaiaException('Invalid value for {}'.format(item['name'])) for item in self.optional_args: arg, arg_type = item['name'], item['type'] if hasattr(self, arg) and getattr(self, arg) is not None: self.test_arg_type(arg, arg_type) argval = getattr(self, arg) if 'options' in item and argval not in item['options']: raise GaiaException( 'Invalid value for {}'.format(item['name']))
def __call__(cls, *args, **kwargs): registry = GaiaReaderFactoryMetaclass._registry subclass = None instance = None if id(cls) != id(GaiaReader): # Allow for direct subclass instantiation instance = cls.__new__(cls, args, kwargs) else: if 'reader_class' in kwargs: classname = kwargs['reader_class'] if classname in registry: subclass = registry[classname] else: for classname, classinstance in registry.items(): if hasattr(classinstance, 'can_read'): canReadMethod = getattr(classinstance, 'can_read') if canReadMethod(*args, **kwargs): subclass = classinstance # FIXME: break if subclass: instance = subclass.__new__(subclass, args, kwargs) else: argsstr = 'args: %s, kwargs: %s' % (args, kwargs) msg = 'Unable to find GaiaReader subclass for: %s' % argsstr raise GaiaException(msg) if instance is not None: instance.__init__(*args, **kwargs) return instance
def transform_data(self, outformat=None, epsg=None): """ Transform the IO data into the requested format and projection if necessary. :param format: Output format :param epsg: :return: """ out_data = geopandas.GeoDataFrame.copy(self.data) if epsg and str(self.get_epsg()) != epsg: out_data[out_data.geometry.name] = \ self.data.geometry.to_crs(epsg=epsg) out_data.crs = fiona.crs.from_epsg(epsg) if outformat == formats.JSON and self.default_output in ( formats.PANDAS, formats.JSON): out_json = out_data.to_json() if out_data.crs: gj = json.loads(out_json) gj["crs"] = { "type": "name", "properties": { "name": out_data.crs["init"].upper() } } return json.dumps(gj) else: return out_json elif outformat in [formats.PANDAS, None]: return out_data else: raise GaiaException("Format {} not supported".format(outformat))
def calc_postgis(self): """ Calculate the union using PostGIS :return: union result as a GeoDataFrame """ union_queries = [] union_params = [] first = self.inputs[0] second = self.inputs[1] geom0, epsg = first.geom_column, first.epsg geom1, epsg1 = second.geom_column, second.epsg if ''.join(first.columns) != ''.join(second.columns): raise GaiaException('Inputs must have the same columns') for pg_io in self.inputs: io_query, params = pg_io.get_query() union_queries.append(io_query.rstrip(';')) union_params.extend(params) if epsg1 != epsg: geom1_query = 'ST_Transform({},{})'.format(geom1, epsg) union_queries[1] = union_queries[1].replace( '"{}"'.format(geom1), geom1_query) query = '({query0}) UNION ({query1})'\ .format(query0=union_queries[0], query1=union_queries[1]) return df_from_postgis(first.engine, query, union_params, geom0, epsg)
def validator(inputs=[], args={}): # First should check if input is compatible w/ pandas computation if type(inputs[0].get_data()) is not GeoDataFrame: raise GaiaException('pandas process requires a GeoDataFrame') # Otherwise call up the chain to let parent do common validation return v(inputs, args)
def get_epsg(self): """ Get the EPSG code of the data :return: EPSG code (integer) """ if self.data is None: self.read() if self.data.__class__.__name__ == 'GeoDataFrame': if self.data.crs is None: # Make educated guess about projection based on longitude coords minx = min(self.data.geometry.bounds['minx']) maxx = max(self.data.geometry.bounds['maxx']) if minx >= -180.0 and maxx <= 180.0: self.epsg = 4326 self.data.crs = fiona_crs.from_epsg(self.epsg) elif minx >= -20026376.39 and maxx <= 20026376.39: self.epsg = 3857 self.data.crs = fiona_crs.from_epsg(self.epsg) else: raise GaiaException('Could not determine data projection.') return self.epsg else: crs = self.data.crs.get('init', None) if crs: if ':' in crs: crs = crs.split(':')[1] if crs.isdigit(): self.epsg = int(crs) return self.epsg # Assume EPSG:4326 self.epsg = 4326 self.data.crs = fiona_crs.from_epsg(self.epsg) return self.epsg else: # Assume EPSG:4326 self.epsg = 4326 self.data.crs = fiona_crs.from_epsg(self.epsg) return self.epsg elif self.data.__class__.__name__ == 'Dataset': projection = self.data.GetProjection() data_crs = osr.SpatialReference(wkt=projection) try: self.epsg = int(data_crs.GetAttrValue('AUTHORITY', 1)) return self.epsg except KeyError: raise GaiaException("EPSG code coud not be determined")
def compute(processName, inputs, args): """ Just looks up a process that can do the job and asks it to 'compute' """ processes = find_processes(processName) if not processes: list_processes(processName) raise GaiaException('Unable to find suitable %s process' % processName) for p in processes: # How will we choose between equally "valid" processes? For now # just return the first one. try: return p(inputs, args) except GaiaException: pass raise GaiaException('No registered processes were able to validate inputs')
def compute(self): """ Run the Within process """ if len(self.inputs) != 2: raise GaiaException('WithinProcess requires 2 inputs') input_classes = list(self.get_input_classes()) use_postgis = (len(input_classes) == 1 and input_classes[0] == 'PostgisIO') data = self.calc_postgis() if use_postgis else self.calc_pandas() self.output.data = data self.output.write()
def test_arg_type(args, arg, arg_type): """ Try to cast a process argument to its required type. Raise an exception if not successful. :param arg: The argument property :param arg_type: The required argument type (int, str, etc) """ try: arg_type(args[arg]) except Exception: raise GaiaException('Required argument {} must be of type {}'.format( arg, arg_type))
def calc_pandas(self): """ Calculate the union using pandas GeoDataFrames :return: union result as a GeoDataFrame """ first, second = self.inputs[0], self.inputs[1] first_df = first.read() second_df = second.read(epsg=first.get_epsg()) if ''.join(first_df.columns) != ''.join(second_df.columns): raise GaiaException('Inputs must have the same columns') uniondf = GeoDataFrame(pd.concat([first_df, second_df])) return uniondf
def __init__(self, uri='', **kwargs): """ :param uri: Filepath of IO object :param kwargs: :return: """ if uri and not self.allowed_folder(uri): raise GaiaException( "Access to this directory is not permitted : {}".format( os.path.dirname(uri))) self.uri = uri super(FileIO, self).__init__(uri=uri, **kwargs) if self.uri: self.ext = os.path.splitext(self.uri)[1].lower()
def get_epsg(self): if not self._epsgComputed: if not self._data: self.get_data() projection = self._data.GetProjection() data_crs = osr.SpatialReference(wkt=projection) try: self.epsg = int(data_crs.GetAttrValue('AUTHORITY', 1)) self._epsgComputed = True except KeyError: raise GaiaException("EPSG code coud not be determined") return self.epsg
def gen_zonalstats(zones_json, raster): """ Generator function that yields the statistics of a raster dataset within each polygon (zone) of a vector dataset. :param zones_json: Polygons in GeoJSON format :param raster: Raster dataset :return: Polygons with additional properties for calculated raster stats. """ # Open data raster = get_dataset(raster) if type(zones_json) is str: shp = ogr.Open(zones_json) zones_json = json.loads(zones_json) else: shp = ogr.Open(json.dumps(zones_json)) lyr = shp.GetLayer() # Get raster georeference info transform = raster.GetGeoTransform() xOrigin = transform[0] yOrigin = transform[3] pixelWidth = transform[1] pixelHeight = transform[5] # Reproject vector geometry to same projection as raster sourceSR = lyr.GetSpatialRef() targetSR = osr.SpatialReference() targetSR.ImportFromWkt(raster.GetProjectionRef()) coordTrans = osr.CoordinateTransformation(sourceSR, targetSR) # TODO: Use a multiprocessing pool to process features more quickly for feature in zones_json['features']: geom = ogr.CreateGeometryFromJson(json.dumps(feature['geometry'])) if sourceSR.ExportToWkt() != targetSR.ExportToWkt(): geom.Transform(coordTrans) # Get extent of feat if geom.GetGeometryName() == 'MULTIPOLYGON': count = 0 pointsX = [] pointsY = [] for polygon in geom: ring = geom.GetGeometryRef(count).GetGeometryRef(0) numpoints = ring.GetPointCount() for p in range(numpoints): lon, lat, z = ring.GetPoint(p) if abs(lon) != float('inf'): pointsX.append(lon) if abs(lat) != float('inf'): pointsY.append(lat) count += 1 elif geom.GetGeometryName() == 'POLYGON': ring = geom.GetGeometryRef(0) numpoints = ring.GetPointCount() pointsX = [] pointsY = [] for p in range(numpoints): lon, lat, z = ring.GetPoint(p) if abs(lon) != float('inf'): pointsX.append(lon) if abs(lat) != float('inf'): pointsY.append(lat) else: raise GaiaException( "ERROR: Geometry needs to be either Polygon or Multipolygon") xmin = min(pointsX) xmax = max(pointsX) ymin = min(pointsY) ymax = max(pointsY) # Specify offset and rows and columns to read xoff = int((xmin - xOrigin) / pixelWidth) yoff = int((yOrigin - ymax) / pixelWidth) xcount = int((xmax - xmin) / pixelWidth) + 1 ycount = int((ymax - ymin) / pixelWidth) + 1 # Create memory target raster target_ds = gdal.GetDriverByName('MEM').Create('', xcount, ycount, 1, gdal.GDT_Byte) target_ds.SetGeoTransform(( xmin, pixelWidth, 0, ymax, 0, pixelHeight, )) # Create for target raster the same projection as for the value raster raster_srs = osr.SpatialReference() raster_srs.ImportFromWkt(raster.GetProjectionRef()) target_ds.SetProjection(raster_srs.ExportToWkt()) # Rasterize zone polygon to raster gdal.RasterizeLayer(target_ds, [1], lyr, burn_values=[1]) # Read raster as arrays banddataraster = raster.GetRasterBand(1) try: dataraster = banddataraster.ReadAsArray(xoff, yoff, xcount, ycount).astype(numpy.float) except AttributeError: # Nothing within bounds, move on to next polygon properties = feature[u'properties'] for p in [ 'count', 'sum', 'mean', 'median', 'min', 'max', 'stddev' ]: properties[p] = None yield feature else: # Get no data value of array noDataValue = banddataraster.GetNoDataValue() if noDataValue: # Updata no data value in array with new value dataraster[dataraster == noDataValue] = numpy.nan bandmask = target_ds.GetRasterBand(1) datamask = bandmask.ReadAsArray(0, 0, xcount, ycount).astype(numpy.float) # Mask zone of raster zoneraster = numpy.ma.masked_array(dataraster, numpy.logical_not(datamask)) properties = feature['properties'] properties['count'] = zoneraster.count() properties['sum'] = numpy.nansum(zoneraster) if type(properties['sum']) == MaskedConstant: # No non-null values for raster data in polygon, skip for p in ['sum', 'mean', 'median', 'min', 'max', 'stddev']: properties[p] = None else: properties['mean'] = numpy.nanmean(zoneraster) properties['min'] = numpy.nanmin(zoneraster) properties['max'] = numpy.nanmax(zoneraster) properties['stddev'] = numpy.nanstd(zoneraster) median = numpy.ma.median(zoneraster) if hasattr(median, 'data') and not numpy.isnan(median.data): properties['median'] = median.data.item() yield (feature)