def get_field_mappings(schema): """Maps Fiona data types to Redshift data types for each in the schema :param schema: Fiona schema :return: Dictionary with the data type for each field """ field_mappings = OrderedDict() for property in schema['properties']: property_type = fiona.prop_type(schema['properties'][property]) if property_type == type(int()): # Redshift data types: INTEGER, BIGINT field_mappings[property] = 'BIGINT' elif property_type == type(float()): # Redshift data types: REAL, DOUBLE PRECISION field_mappings[property] = 'DOUBLE PRECISION' elif property_type == type(str()): length = fiona.prop_width(schema['properties'][property]) field_mappings[property] = 'VARCHAR({0})'.format(length) elif property_type == type(bool()): field_mappings[property] = 'BOOLEAN' elif "FionaDateType" in str(property_type): field_mappings[property] = 'DATE' elif "FionaTimeType" in str(property_type): field_mappings[property] = 'TIME' elif "FionaDateTimeType" in str(property_type): field_mappings[property] = 'TIMESTAMP' else: # If it is a different type, we will use VARCHAR(MAX) field_mappings[property] = 'VARCHAR(MAX)' return field_mappings
def validate_attribute_field_info(attribute_field_info): # make sure it's a list if not isinstance(attribute_field_info, list): raise TypeError("attribute_field_info must be a list") # make sure each element is a dictionary with the correct attributes expected_keys = ['name', 'dtype', 'value'] for d in attribute_field_info: if isinstance(d, dict) is False: raise TypeError( "Elements of attribute_field_info must be a dictionary.") if sorted(expected_keys) != sorted(list(d.keys())): raise KeyError( "Keys in attributed_field_info do not match the expected set: {}" .format(expected_keys)) try: fiona.prop_type(d['dtype']) fiona.prop_width(d['dtype']) except Exception as e: raise TypeError("Specified dtype {dtype} is invalid: {e}".format( dtype=d['dtype'], e=str(e)))
def get_bigquery_schema(filepath, layer_name=None, gdb_name=None): """ Generate a Bigquery table schema from a geospatial file python -m geobeam.util get_bigquery_schema ...args Args: filepath (str): full path to the input file layer_name (str, optional): name of the layer, if file contains multiple layers Returns: dict: the schema, convertable to json by json.dumps(schema, indent=2) """ import fiona from fiona.io import ZipMemoryFile from fiona import prop_type bq_schema = [] if layer_name is None: profile = fiona.open(filepath).profile elif gdb_name is None: profile = fiona.open(filepath, layer=layer_name).profile else: f = open(filepath, 'rb') mem = ZipMemoryFile(f.read()) profile = mem.open(gdb_name, layer=layer_name).profile for field_name, field_type in profile['schema']['properties'].items(): fiona_type = prop_type(field_type) bq_type = BQ_FIELD_TYPES[fiona.schema.FIELD_TYPES_MAP_REV[fiona_type]] bq_schema.append({'name': field_name, 'type': bq_type}) bq_schema.append({ 'name': 'geom', 'type': 'GEOGRAPHY', 'description': '{} reprojected from {}. source: {}'.format( profile['schema']['geometry'], profile['crs']['init'], profile['driver']) }) return bq_schema
def writeGisFile(self, path, driver, node_dtype="str", node_fields=None, edge_fields=None): """ """ if node_fields is None: node_fields = {} if edge_fields is None: edge_fields = {} try: for dtype in node_fields.values(): fiona.prop_type(dtype) for dtype in edge_fields.values(): fiona.prop_type(dtype) fiona.prop_type(node_dtype) except Exception as e: raise Exception("error converting dtype to python type", e) node_fields["node"] = node_dtype edge_fields["anode"] = node_dtype edge_fields["bnode"] = node_dtype with fiona.open( path, "w", layer="nodes", driver=driver, crs=self.graph["crs"], schema={"geometry": "Point", "properties": node_fields}, ) as c: for node in self.nodes_iter(): geom = self.geometry(node) props = {} for k, dtype in node_fields.items(): if k != "node": if self.node[node].get(k) is None: props[k] = None else: props[k] = fiona.prop_type(dtype)(self.node[node][k]) props["node"] = fiona.prop_type(node_dtype)(node) c.write({"geometry": geom, "properties": props}) with fiona.open( path, "w", layer="edges", driver=driver, crs=self.graph["crs"], schema={"geometry": "LineString", "properties": edge_fields}, ) as c: for u, v in self.edges_iter(): geom = self.geometry(u, v) props = {} for k, dtype in edge_fields.items(): if k not in ["anode", "bnode"]: if self.edge[u][v].get(k) is None: props[k] = None else: props[k] = fiona.prop_type(dtype)(self.edge[u][v][k]) props.update({"anode": fiona.prop_type(node_dtype)(u), "bnode": fiona.prop_type(node_dtype)(v)}) c.write({"geometry": geom, "properties": props}) return None
def get_bigquery_schema_dataflow(filepath, layer_name=None, gdb_name=None): """ Generate a Bigquery table schema from a geospatial file hosted on a Google Cloud Storage bucket from apache_beam.io.gcp.bigquery_tools import parse_table_schema_from_json table_schema = parse_table_schema_from_json(get_bigquery_schema_dataflow(known_args.gcs_url, known_args.layer_name)) Args: filepath (str): full path to the input file hosted on Google Cloud Storage layer_name (str, optional): name of the layer, if file contains multiple layers Returns: JSON: the schema in JSON that can be passed to the schema argument in WriteToBigQuery. Must use the parse_table_schema_from_json() from apache_beam.io.gcp.bigquery_tools """ from google.cloud import storage import fiona import json from fiona import BytesCollection bucket_name = gcs_url.split('/')[2] file_name = '/'.join(gcs_url.split('/')[3:]) zip_name = gcs_url.split('/')[-1].split('.')[0] storage_client = storage.Client() blob = storage_client.bucket(bucket_name).get_blob(file_name) source_bucket = storage_client.bucket(bucket_name) blob_uri = gcs_url blob_2 = source_bucket.blob(file_name) data = blob.download_as_string() profile = None if gdb_name is not None: with fiona.io.ZipMemoryFile(data) as zip: with zip.open(f'{zip_name}.gdb', layer=gdb_name) as collection: print(collection) profile = collection.profile elif layer_name is not None: profile = BytesCollection(data, layer=layer_name).profile else: profile = fiona.open(gcs_url).profile from fiona import prop_type BQ_FIELD_TYPES = { 'int': 'INT64', 'str': 'STRING', 'float': 'FLOAT64', 'bool': 'BOOL', 'date': 'DATE', 'time': 'TIME', 'datetime': 'DATETIME', 'bytes': 'BYTES' } bq_schema = [] for field_name, field_type in profile['schema']['properties'].items(): fiona_type = prop_type(field_type) bq_type = BQ_FIELD_TYPES[fiona.schema.FIELD_TYPES_MAP_REV[fiona_type]] bq_schema.append({'name': field_name, 'type': bq_type}) bq_schema.append({ 'name': 'geom', 'type': 'GEOGRAPHY', 'description': '{} reprojected from {}. source: {}'.format( profile['schema']['geometry'], profile['crs']['init'], profile['driver']) }) return json.JSONEncoder(sort_keys=True).encode({"fields": bq_schema})
def writeGisFile(self, path, driver, node_dtype = 'str', node_fields = None, edge_fields = None): ''' ''' if node_fields is None: node_fields = {} if edge_fields is None: edge_fields = {} try: for dtype in node_fields.values(): fiona.prop_type(dtype) for dtype in edge_fields.values(): fiona.prop_type(dtype) fiona.prop_type(node_dtype) except Exception as e: raise Exception('error converting dtype to python type', e) node_fields['node'] = node_dtype edge_fields['anode'] = node_dtype edge_fields['bnode'] = node_dtype with fiona.open( path, 'w', layer = 'nodes', driver = driver, crs = self.graph['crs'], schema = {'geometry':'Point', 'properties':node_fields}) as c: for node in self.nodes_iter(): geom = self.geometry(node) props = {} for k, dtype in node_fields.items(): if k != 'node': if self.node[node].get(k) is None: props[k] = None else: props[k] = fiona.prop_type(dtype)(self.node[node][k]) props['node'] = fiona.prop_type(node_dtype)(node) c.write({'geometry':geom, 'properties':props}) with fiona.open( path, 'w', layer = 'edges', driver = driver, crs = self.graph['crs'], schema = {'geometry':'LineString', 'properties':edge_fields}) as c: for u, v in self.edges_iter(): geom = self.geometry(u, v) props = {} for k, dtype in edge_fields.items(): if k not in ['anode', 'bnode']: if self.edge[u][v].get(k) is None: props[k] = None else: props[k] = fiona.prop_type(dtype)(self.edge[u][v][k]) props.update({'anode':fiona.prop_type(node_dtype)(u), 'bnode':fiona.prop_type(node_dtype)(v)}) c.write({'geometry':geom, 'properties':props}) return None
c = fiona.open('/gdata/GSHHS_c.shp') pprint(c.schema) ############################################################################### rec = next(c) set(rec.keys()) - set(c.schema.keys()) set(rec['properties'].keys()) == set(c.schema['properties'].keys()) ############################################################################### type(rec['properties']['source']) c.schema['properties']['source'] ############################################################################### from fiona import prop_width prop_width('str:25') prop_width('str') ############################################################################### from fiona import prop_type prop_type('int') prop_type('float') prop_type('str:25') ############################################################################### c = fiona.open('/gdata/GSHHS_c.shp') rec = c.next() pprint(rec) ############################################################################### c.close() rec['id'] ############################################################################### c = fiona.open('/gdata/GSHHS_c.shp') rec = next(c) rec['id'] ############################################################################### pprint(rec['properties'])
def test_types(): assert prop_type('str:254') == text_type assert prop_type('str') == text_type assert isinstance(0, prop_type('int')) assert isinstance(0.0, prop_type('float')) assert prop_type('date') == FionaDateType
def writeGisFile(self, path, driver, node_dtype='str', node_fields=None, edge_fields=None): ''' ''' if node_fields is None: node_fields = {} if edge_fields is None: edge_fields = {} try: for dtype in node_fields.values(): fiona.prop_type(dtype) for dtype in edge_fields.values(): fiona.prop_type(dtype) fiona.prop_type(node_dtype) except Exception as e: raise Exception('error converting dtype to python type', e) node_fields['node'] = node_dtype edge_fields['anode'] = node_dtype edge_fields['bnode'] = node_dtype with fiona.open(path, 'w', layer='nodes', driver=driver, crs=self.graph['crs'], schema={ 'geometry': 'Point', 'properties': node_fields }) as c: for node in self.nodes_iter(): geom = self.geometry(node) props = {} for k, dtype in node_fields.items(): if k != 'node': if self.node[node].get(k) is None: props[k] = None else: props[k] = fiona.prop_type(dtype)( self.node[node][k]) props['node'] = fiona.prop_type(node_dtype)(node) c.write({'geometry': geom, 'properties': props}) with fiona.open(path, 'w', layer='edges', driver=driver, crs=self.graph['crs'], schema={ 'geometry': 'LineString', 'properties': edge_fields }) as c: for u, v in self.edges_iter(): geom = self.geometry(u, v) props = {} for k, dtype in edge_fields.items(): if k not in ['anode', 'bnode']: if self.edge[u][v].get(k) is None: props[k] = None else: props[k] = fiona.prop_type(dtype)( self.edge[u][v][k]) props.update({ 'anode': fiona.prop_type(node_dtype)(u), 'bnode': fiona.prop_type(node_dtype)(v) }) c.write({'geometry': geom, 'properties': props}) return None
def test_types(): assert prop_type('str:254') == text_type assert prop_type('str') == text_type assert prop_type('int') == type(0) assert prop_type('float') == type(0.0) assert prop_type('date') == FionaDateType
def test_types(): assert prop_type("str:254") == text_type assert prop_type("str") == text_type assert isinstance(0, prop_type("int")) assert isinstance(0.0, prop_type("float")) assert prop_type("date") == FionaDateType