Exemple #1
0
def get_field_mappings(schema):
    """Maps Fiona data types to Redshift data types for each in the schema

    :param schema: Fiona schema
    :return: Dictionary with the data type for each field
    """
    field_mappings = OrderedDict()
    for property in schema['properties']:
        property_type = fiona.prop_type(schema['properties'][property])
        if property_type == type(int()):
            # Redshift data types: INTEGER, BIGINT
            field_mappings[property] = 'BIGINT'
        elif property_type == type(float()):
            # Redshift data types: REAL, DOUBLE PRECISION
            field_mappings[property] = 'DOUBLE PRECISION'
        elif property_type == type(str()):
            length = fiona.prop_width(schema['properties'][property])
            field_mappings[property] = 'VARCHAR({0})'.format(length)
        elif property_type == type(bool()):
            field_mappings[property] = 'BOOLEAN'
        elif "FionaDateType" in str(property_type):
            field_mappings[property] = 'DATE'
        elif "FionaTimeType" in str(property_type):
            field_mappings[property] = 'TIME'
        elif "FionaDateTimeType" in str(property_type):
            field_mappings[property] = 'TIMESTAMP'
        else:
            # If it is a different type, we will use VARCHAR(MAX)
            field_mappings[property] = 'VARCHAR(MAX)'

    return field_mappings
Exemple #2
0
def validate_attribute_field_info(attribute_field_info):

    # make sure it's a list
    if not isinstance(attribute_field_info, list):
        raise TypeError("attribute_field_info must be a list")

    # make sure each element is a dictionary with the correct attributes
    expected_keys = ['name', 'dtype', 'value']
    for d in attribute_field_info:
        if isinstance(d, dict) is False:
            raise TypeError(
                "Elements of attribute_field_info must be a dictionary.")
        if sorted(expected_keys) != sorted(list(d.keys())):
            raise KeyError(
                "Keys in attributed_field_info do not match the expected set: {}"
                .format(expected_keys))
        try:
            fiona.prop_type(d['dtype'])
            fiona.prop_width(d['dtype'])
        except Exception as e:
            raise TypeError("Specified dtype {dtype} is invalid: {e}".format(
                dtype=d['dtype'], e=str(e)))
Exemple #3
0
def get_bigquery_schema(filepath, layer_name=None, gdb_name=None):
    """
    Generate a Bigquery table schema from a geospatial file

        python -m geobeam.util get_bigquery_schema ...args

    Args:
        filepath (str): full path to the input file
        layer_name (str, optional): name of the layer, if file contains
            multiple layers
    Returns:
        dict: the schema, convertable to json by json.dumps(schema, indent=2)
    """

    import fiona
    from fiona.io import ZipMemoryFile
    from fiona import prop_type

    bq_schema = []

    if layer_name is None:
        profile = fiona.open(filepath).profile
    elif gdb_name is None:
        profile = fiona.open(filepath, layer=layer_name).profile
    else:
        f = open(filepath, 'rb')
        mem = ZipMemoryFile(f.read())
        profile = mem.open(gdb_name, layer=layer_name).profile

    for field_name, field_type in profile['schema']['properties'].items():
        fiona_type = prop_type(field_type)
        bq_type = BQ_FIELD_TYPES[fiona.schema.FIELD_TYPES_MAP_REV[fiona_type]]
        bq_schema.append({'name': field_name, 'type': bq_type})

    bq_schema.append({
        'name':
        'geom',
        'type':
        'GEOGRAPHY',
        'description':
        '{} reprojected from {}. source: {}'.format(
            profile['schema']['geometry'], profile['crs']['init'],
            profile['driver'])
    })

    return bq_schema
    def writeGisFile(self, path, driver, node_dtype="str", node_fields=None, edge_fields=None):
        """
                
        
        """

        if node_fields is None:
            node_fields = {}

        if edge_fields is None:
            edge_fields = {}

        try:
            for dtype in node_fields.values():
                fiona.prop_type(dtype)
            for dtype in edge_fields.values():
                fiona.prop_type(dtype)
            fiona.prop_type(node_dtype)
        except Exception as e:
            raise Exception("error converting dtype to python type", e)

        node_fields["node"] = node_dtype
        edge_fields["anode"] = node_dtype
        edge_fields["bnode"] = node_dtype

        with fiona.open(
            path,
            "w",
            layer="nodes",
            driver=driver,
            crs=self.graph["crs"],
            schema={"geometry": "Point", "properties": node_fields},
        ) as c:

            for node in self.nodes_iter():

                geom = self.geometry(node)

                props = {}

                for k, dtype in node_fields.items():
                    if k != "node":
                        if self.node[node].get(k) is None:
                            props[k] = None
                        else:
                            props[k] = fiona.prop_type(dtype)(self.node[node][k])

                props["node"] = fiona.prop_type(node_dtype)(node)

                c.write({"geometry": geom, "properties": props})

        with fiona.open(
            path,
            "w",
            layer="edges",
            driver=driver,
            crs=self.graph["crs"],
            schema={"geometry": "LineString", "properties": edge_fields},
        ) as c:

            for u, v in self.edges_iter():

                geom = self.geometry(u, v)

                props = {}

                for k, dtype in edge_fields.items():
                    if k not in ["anode", "bnode"]:
                        if self.edge[u][v].get(k) is None:
                            props[k] = None
                        else:
                            props[k] = fiona.prop_type(dtype)(self.edge[u][v][k])

                props.update({"anode": fiona.prop_type(node_dtype)(u), "bnode": fiona.prop_type(node_dtype)(v)})

                c.write({"geometry": geom, "properties": props})

        return None
Exemple #5
0
def get_bigquery_schema_dataflow(filepath, layer_name=None, gdb_name=None):
    """
    Generate a Bigquery table schema from a geospatial file hosted on a Google Cloud Storage bucket

        from apache_beam.io.gcp.bigquery_tools import parse_table_schema_from_json

        table_schema = parse_table_schema_from_json(get_bigquery_schema_dataflow(known_args.gcs_url, known_args.layer_name))

    Args:
        filepath (str): full path to the input file hosted on Google Cloud Storage
        layer_name (str, optional): name of the layer, if file contains
            multiple layers
    Returns:
        JSON: the schema in JSON that can be passed to the schema argument in WriteToBigQuery. Must use the parse_table_schema_from_json() from apache_beam.io.gcp.bigquery_tools
    """

    from google.cloud import storage
    import fiona
    import json
    from fiona import BytesCollection

    bucket_name = gcs_url.split('/')[2]
    file_name = '/'.join(gcs_url.split('/')[3:])
    zip_name = gcs_url.split('/')[-1].split('.')[0]

    storage_client = storage.Client()
    blob = storage_client.bucket(bucket_name).get_blob(file_name)
    source_bucket = storage_client.bucket(bucket_name)
    blob_uri = gcs_url

    blob_2 = source_bucket.blob(file_name)
    data = blob.download_as_string()

    profile = None

    if gdb_name is not None:
        with fiona.io.ZipMemoryFile(data) as zip:
            with zip.open(f'{zip_name}.gdb', layer=gdb_name) as collection:
                print(collection)
                profile = collection.profile
    elif layer_name is not None:
        profile = BytesCollection(data, layer=layer_name).profile
    else:
        profile = fiona.open(gcs_url).profile

    from fiona import prop_type

    BQ_FIELD_TYPES = {
        'int': 'INT64',
        'str': 'STRING',
        'float': 'FLOAT64',
        'bool': 'BOOL',
        'date': 'DATE',
        'time': 'TIME',
        'datetime': 'DATETIME',
        'bytes': 'BYTES'
    }

    bq_schema = []

    for field_name, field_type in profile['schema']['properties'].items():
        fiona_type = prop_type(field_type)
        bq_type = BQ_FIELD_TYPES[fiona.schema.FIELD_TYPES_MAP_REV[fiona_type]]
        bq_schema.append({'name': field_name, 'type': bq_type})

    bq_schema.append({
        'name':
        'geom',
        'type':
        'GEOGRAPHY',
        'description':
        '{} reprojected from {}. source: {}'.format(
            profile['schema']['geometry'], profile['crs']['init'],
            profile['driver'])
    })

    return json.JSONEncoder(sort_keys=True).encode({"fields": bq_schema})
    def writeGisFile(self, path, driver, node_dtype = 'str', node_fields = None, edge_fields = None):
        '''
                
        
        '''

        if node_fields is None:
            node_fields = {}

        if edge_fields is None:
            edge_fields = {}
        
        try:
            for dtype in node_fields.values():
                fiona.prop_type(dtype)
            for dtype in edge_fields.values():
                fiona.prop_type(dtype)
            fiona.prop_type(node_dtype)
        except Exception as e:
            raise Exception('error converting dtype to python type', e)


        node_fields['node'] = node_dtype
        edge_fields['anode'] = node_dtype
        edge_fields['bnode'] = node_dtype
        
          
        with fiona.open(
            path,
            'w',
            layer = 'nodes',
            driver = driver,
            crs = self.graph['crs'],
            schema = {'geometry':'Point',
                      'properties':node_fields}) as c:

            for node in self.nodes_iter():

                geom = self.geometry(node)  

                props = {}

                for k, dtype in node_fields.items():
                    if k != 'node':
                        if self.node[node].get(k) is None:
                            props[k] = None
                        else:
                            props[k] = fiona.prop_type(dtype)(self.node[node][k])
                        
                props['node'] = fiona.prop_type(node_dtype)(node)
                
                c.write({'geometry':geom, 'properties':props})
                     

        


        with fiona.open(
            path,
            'w',
            layer = 'edges',
            driver = driver,
            crs = self.graph['crs'],
            schema = {'geometry':'LineString',
                      'properties':edge_fields}) as c:

            for u, v in self.edges_iter():

                geom = self.geometry(u, v)

                props = {}

                for k, dtype in edge_fields.items():
                    if k not in ['anode', 'bnode']:
                        if self.edge[u][v].get(k) is None:
                            props[k] = None
                        else:
                            props[k] = fiona.prop_type(dtype)(self.edge[u][v][k])
 
                            


                props.update({'anode':fiona.prop_type(node_dtype)(u),
                              'bnode':fiona.prop_type(node_dtype)(v)})

                c.write({'geometry':geom, 'properties':props})

                
        return None
Exemple #7
0
c = fiona.open('/gdata/GSHHS_c.shp')
pprint(c.schema)
###############################################################################
rec = next(c)
set(rec.keys()) - set(c.schema.keys())
set(rec['properties'].keys()) == set(c.schema['properties'].keys())
###############################################################################
type(rec['properties']['source'])
c.schema['properties']['source']
###############################################################################
from fiona import prop_width
prop_width('str:25')
prop_width('str')
###############################################################################
from fiona import prop_type
prop_type('int')
prop_type('float')
prop_type('str:25')
###############################################################################
c = fiona.open('/gdata/GSHHS_c.shp')
rec = c.next()
pprint(rec)
###############################################################################
c.close()
rec['id']
###############################################################################
c = fiona.open('/gdata/GSHHS_c.shp')
rec = next(c)
rec['id']
###############################################################################
pprint(rec['properties'])
Exemple #8
0
def test_types():
    assert prop_type('str:254') == text_type
    assert prop_type('str') == text_type
    assert isinstance(0, prop_type('int'))
    assert isinstance(0.0, prop_type('float'))
    assert prop_type('date') == FionaDateType
    def writeGisFile(self,
                     path,
                     driver,
                     node_dtype='str',
                     node_fields=None,
                     edge_fields=None):
        '''
                
        
        '''

        if node_fields is None:
            node_fields = {}

        if edge_fields is None:
            edge_fields = {}

        try:
            for dtype in node_fields.values():
                fiona.prop_type(dtype)
            for dtype in edge_fields.values():
                fiona.prop_type(dtype)
            fiona.prop_type(node_dtype)
        except Exception as e:
            raise Exception('error converting dtype to python type', e)

        node_fields['node'] = node_dtype
        edge_fields['anode'] = node_dtype
        edge_fields['bnode'] = node_dtype

        with fiona.open(path,
                        'w',
                        layer='nodes',
                        driver=driver,
                        crs=self.graph['crs'],
                        schema={
                            'geometry': 'Point',
                            'properties': node_fields
                        }) as c:

            for node in self.nodes_iter():

                geom = self.geometry(node)

                props = {}

                for k, dtype in node_fields.items():
                    if k != 'node':
                        if self.node[node].get(k) is None:
                            props[k] = None
                        else:
                            props[k] = fiona.prop_type(dtype)(
                                self.node[node][k])

                props['node'] = fiona.prop_type(node_dtype)(node)

                c.write({'geometry': geom, 'properties': props})

        with fiona.open(path,
                        'w',
                        layer='edges',
                        driver=driver,
                        crs=self.graph['crs'],
                        schema={
                            'geometry': 'LineString',
                            'properties': edge_fields
                        }) as c:

            for u, v in self.edges_iter():

                geom = self.geometry(u, v)

                props = {}

                for k, dtype in edge_fields.items():
                    if k not in ['anode', 'bnode']:
                        if self.edge[u][v].get(k) is None:
                            props[k] = None
                        else:
                            props[k] = fiona.prop_type(dtype)(
                                self.edge[u][v][k])

                props.update({
                    'anode': fiona.prop_type(node_dtype)(u),
                    'bnode': fiona.prop_type(node_dtype)(v)
                })

                c.write({'geometry': geom, 'properties': props})

        return None
Exemple #10
0
def test_types():
    assert prop_type('str:254') == text_type
    assert prop_type('str') == text_type
    assert isinstance(0, prop_type('int'))
    assert isinstance(0.0, prop_type('float'))
    assert prop_type('date') == FionaDateType
Exemple #11
0
def test_types():
    assert prop_type('str:254') == text_type
    assert prop_type('str') == text_type
    assert prop_type('int') == type(0)
    assert prop_type('float') == type(0.0)
    assert prop_type('date') == FionaDateType
Exemple #12
0
def test_types():
    assert prop_type('str:254') == text_type
    assert prop_type('str') == text_type
    assert prop_type('int') == type(0)
    assert prop_type('float') == type(0.0)
    assert prop_type('date') == FionaDateType
Exemple #13
0
def test_types():
    assert prop_type("str:254") == text_type
    assert prop_type("str") == text_type
    assert isinstance(0, prop_type("int"))
    assert isinstance(0.0, prop_type("float"))
    assert prop_type("date") == FionaDateType