def from_featureclass(filename, **kwargs): """ Returns a GeoDataFrame (Spatially Enabled Pandas DataFrame) from a feature class. =========================== ==================================================================== **Argument** **Description** --------------------------- -------------------------------------------------------------------- filename Required string. Full path to the feature class =========================== ==================================================================== *Optional parameters when ArcPy library is available in the current environment*: =========================== ==================================================================== **Key** **Value** --------------------------- -------------------------------------------------------------------- sql_clause sql clause to parse data down. To learn more see [ArcPy Search Cursor](https://pro.arcgis.com/en/pro-app/arcpy/data-access/searchcursor-class.htm) --------------------------- -------------------------------------------------------------------- where_clause where statement. To learn more see [ArcPy SQL reference](https://pro.arcgis.com/en/pro-app/help/mapping/navigation/sql-reference-for-elements-used-in-query-expressions.htm) --------------------------- -------------------------------------------------------------------- fields list of strings specifying the field names. =========================== ==================================================================== :returns: pandas.core.frame.DataFrame """ from arcgis.geometry import _types import json if HASARCPY: sql_clause = kwargs.pop('sql_clause', (None,None)) where_clause = kwargs.pop('where_clause', None) fields = kwargs.pop('fields', None) sr = kwargs.pop('sr', None) try: desc = arcpy.da.Describe(filename) area_field = desc.pop('areaFieldName', None) length_field = desc.pop('lengthFieldName', None) except: # for older versions of arcpy desc = arcpy.Describe(filename) desc = { 'fields' : desc.fields, 'shapeType' : desc.shapeType } area_field = getattr(desc, 'areaFieldName', None) length_field = getattr(desc, 'lengthFieldName', None) shape_name = desc['shapeType'] if fields is None: fields = [fld.name for fld in desc['fields'] \ if fld.type not in ['Geometry'] and \ fld.name not in [area_field, length_field]] cursor_fields = fields + ['SHAPE@JSON'] df_fields = fields + ['SHAPE'] count = 0 dfs = [] shape_field_idx = cursor_fields.index("SHAPE@JSON") with da.SearchCursor(filename, field_names=cursor_fields, where_clause=where_clause, sql_clause=sql_clause, spatial_reference=sr) as rows: srows = [] for row in rows: srows.append(row) if len(srows) == 25000: dfs.append( pd.DataFrame(srows, columns=df_fields)) srows = [] if len(srows): dfs.append( pd.DataFrame(srows, columns=df_fields)) srows = [] del srows if len(dfs) > 0: df = pd.concat(dfs) df = df.reset_index(drop=True) elif len(dfs) == 1: df = dfs[0] else: df = pd.DataFrame([], columns=df_fields) q = df.SHAPE.notnull() gt = desc['shapeType'].lower() geoms = { "point" : _types.Point, "polygon" : _types.Polygon, "polyline" : _types.Polyline, "multipoint" : _types.MultiPoint, "envelope" : _types.Envelope, "geometry" : _types.Geometry } df.SHAPE = ( df.SHAPE[q] .apply(pd.io.json.loads) .apply(geoms[gt]) ) df.spatial.set_geometry("SHAPE") return df elif HASARCPY == False and \ HASPYSHP == True and\ filename.lower().find('.shp') > -1: geoms = [] records = [] reader = shapefile.Reader(filename) fields = [field[0] for field in reader.fields if field[0] != 'DeletionFlag'] for r in reader.shapeRecords(): atr = dict(zip(fields, r.record)) g = r.shape.__geo_interface__ g = _geojson_to_esrijson(g) geom = _types.Geometry(g) atr['SHAPE'] = geom records.append(atr) del atr del r, g del geom sdf = pd.DataFrame(records) sdf.spatial.set_geometry('SHAPE') sdf.reset_index(inplace=True) return sdf elif HASARCPY == False and \ HASFIONA == True and \ (filename.lower().find('.shp') > -1 or \ os.path.dirname(filename).lower().find('.gdb') > -1): is_gdb = os.path.dirname(filename).lower().find('.gdb') > -1 if is_gdb: # Remove deprecation warning. fiona_env = fiona.drivers if hasattr(fiona,'Env'): fiona_env = fiona.Env with fiona_env(): from arcgis.geometry import _types fp = os.path.dirname(filename) fn = os.path.basename(filename) geoms = [] atts = [] with fiona.open(fp, layer=fn) as source: meta = source.meta cols = list(source.schema['properties'].keys()) # Get the CRS try: wkid = source.crs['init'].split(':')[1] except: wkid = 4326 sr = _types.SpatialReference({'wkid':int(wkid)}) for idx, row in source.items(): g = _types.Geometry(row['geometry']) geoms.append(g) atts.append(list(row['properties'].values())) del idx, row df = pd.DataFrame(data=atts, columns=cols) df.spatial.set_geometry(geoms) df.spatial.sr = sr return df else: with fiona.drivers(): from arcgis.geometry import _types geoms = [] atts = [] with fiona.open(filename) as source: meta = source.meta cols = list(source.schema['properties'].keys()) for idx, row in source.items(): geoms.append(_types.Geometry(row['geometry'])) atts.append(list(row['properties'].values())) del idx, row df = pd.DataFrame(data=atts, columns=cols) df.spatial.set_geometry(geoms) return df return
def from_featureclass(filename, **kwargs): """ Returns a GeoDataFrame (Spatially Enabled Pandas DataFrame) from a feature class. =========================== ==================================================================== **Argument** **Description** --------------------------- -------------------------------------------------------------------- filename Required string or pathlib.Path. Full path to the feature class =========================== ==================================================================== *Optional parameters when ArcPy library is available in the current environment*: =========================== ==================================================================== **Key** **Value** --------------------------- -------------------------------------------------------------------- sql_clause sql clause to parse data down. To learn more see [ArcPy Search Cursor](https://pro.arcgis.com/en/pro-app/arcpy/data-access/searchcursor-class.htm) --------------------------- -------------------------------------------------------------------- where_clause where statement. To learn more see [ArcPy SQL reference](https://pro.arcgis.com/en/pro-app/help/mapping/navigation/sql-reference-for-elements-used-in-query-expressions.htm) --------------------------- -------------------------------------------------------------------- fields list of strings specifying the field names. --------------------------- -------------------------------------------------------------------- spatial_filter A `Geometry` object that will filter the results. This requires `arcpy` to work. =========================== ==================================================================== :returns: pandas.core.frame.DataFrame """ from arcgis.geometry import _types import json filename = _ensure_path_string(filename) if HASARCPY: sql_clause = kwargs.pop('sql_clause', (None, None)) where_clause = kwargs.pop('where_clause', None) fields = kwargs.pop('fields', None) sr = kwargs.pop('sr', None) spatial_filter = kwargs.pop('spatial_filter', None) geom = None try: desc = arcpy.da.Describe(filename) area_field = desc.pop('areaFieldName', None) length_field = desc.pop('lengthFieldName', None) except: # for older versions of arcpy desc = arcpy.Describe(filename) desc = {'fields': desc.fields, 'shapeType': desc.shapeType} area_field = getattr(desc, 'areaFieldName', None) length_field = getattr(desc, 'lengthFieldName', None) if spatial_filter: _sf_lu = { "esriSpatialRelIntersects": "INTERSECT", "esriSpatialRelContains": "CONTAINS", "esriSpatialRelCrosses": "CROSSED_BY_THE_OUTLINE_OF", "esriSpatialRelEnvelopeIntersects": "INTERSECT", "esriSpatialRelIndexIntersects": "INTERSECT", "esriSpatialRelOverlaps": "INTERSECT", "esriSpatialRelTouches": "BOUNDARY_TOUCHES", "esriSpatialRelWithin": "WITHIN" } relto = _sf_lu[spatial_filter['spatialRel']] geom = spatial_filter['geometry'] if hasattr(geom, 'polygon'): geom = geom.polygon geom = geom.as_arcpy flname = "a" + uuid.uuid4().hex[:6] filename = arcpy.management.MakeFeatureLayer( filename, out_layer=flname, where_clause=where_clause)[0] arcpy.management.SelectLayerByLocation(filename, overlap_type=relto, select_features=geom)[0] shape_name = desc['shapeType'] if fields is None: fields = [fld.name for fld in desc['fields'] \ if fld.type not in ['Geometry'] and \ fld.name not in [area_field, length_field]] cursor_fields = fields + ['SHAPE@JSON'] df_fields = fields + ['SHAPE'] count = 0 dfs = [] shape_field_idx = cursor_fields.index("SHAPE@JSON") with da.SearchCursor(filename, field_names=cursor_fields, where_clause=where_clause, sql_clause=sql_clause, spatial_reference=sr) as rows: srows = [] for row in rows: srows.append(row) if len(srows) == 25000: dfs.append(pd.DataFrame(srows, columns=df_fields)) srows = [] if len(srows): dfs.append(pd.DataFrame(srows, columns=df_fields)) srows = [] del srows if len(dfs) > 0: df = pd.concat(dfs) df = df.reset_index(drop=True) elif len(dfs) == 1: df = dfs[0] else: df = pd.DataFrame([], columns=df_fields) q = df.SHAPE.notnull() gt = desc['shapeType'].lower() geoms = { "point": _types.Point, "polygon": _types.Polygon, "polyline": _types.Polyline, "multipoint": _types.MultiPoint, "envelope": _types.Envelope, "geometry": _types.Geometry } df.SHAPE = (df.SHAPE[q].apply(pd.io.json.loads).apply(geoms[gt])) df.spatial.set_geometry("SHAPE") df.spatial._meta.source = filename return df elif HASARCPY == False and \ HASPYSHP == True and\ filename.lower().find('.shp') > -1: geoms = [] records = [] reader = shapefile.Reader(filename) fields = [ field[0] for field in reader.fields if field[0] != 'DeletionFlag' ] for r in reader.shapeRecords(): atr = dict(zip(fields, r.record)) g = r.shape.__geo_interface__ g = _geojson_to_esrijson(g) geom = _types.Geometry(g) atr['SHAPE'] = geom records.append(atr) del atr del r, g del geom sdf = pd.DataFrame(records) sdf.spatial.set_geometry('SHAPE') sdf['OBJECTID'] = range(sdf.shape[0]) sdf.reset_index(inplace=True) sdf.spatial._meta.source = filename return sdf elif HASARCPY == False and \ HASFIONA == True and \ (filename.lower().find('.shp') > -1 or \ os.path.dirname(filename).lower().find('.gdb') > -1): is_gdb = os.path.dirname(filename).lower().find('.gdb') > -1 if is_gdb: # Remove deprecation warning. fiona_env = fiona.drivers if hasattr(fiona, 'Env'): fiona_env = fiona.Env with fiona_env(): from arcgis.geometry import _types fp = os.path.dirname(filename) fn = os.path.basename(filename) geoms = [] atts = [] with fiona.open(fp, layer=fn) as source: meta = source.meta cols = list(source.schema['properties'].keys()) # Get the CRS try: wkid = source.crs['init'].split(':')[1] except: wkid = 4326 sr = _types.SpatialReference({'wkid': int(wkid)}) for idx, row in source.items(): g = _types.Geometry(row['geometry']) geoms.append(g) atts.append(list(row['properties'].values())) del idx, row df = pd.DataFrame(data=atts, columns=cols) df.spatial.set_geometry(geoms) df.spatial.sr = sr df.spatial._meta.source = filename return df else: with fiona.drivers(): from arcgis.geometry import _types geoms = [] atts = [] with fiona.open(filename) as source: meta = source.meta cols = list(source.schema['properties'].keys()) for idx, row in source.items(): geoms.append(_types.Geometry(row['geometry'])) atts.append(list(row['properties'].values())) del idx, row df = pd.DataFrame(data=atts, columns=cols) df.spatial.set_geometry(geoms) df.spatial._meta.source = filename return df else: if os.path.dirname(filename).lower().find('.gdb') > -1: message = """ Cannot Open Geodatabase without Arcpy or Fiona \nPlease switch to Arcpy for full support or install fiona by this command `conda install fiona` """.strip() print(message) raise Exception( 'Failed to import Feature Class from Geodatabase specified') else: raise Exception( 'Unsupported Data Format or Invalid Feature Class specified')
def to_featureclass(df, out_name, out_location=None, overwrite=True, out_sr=None, skip_invalid=True): """ converts a SpatialDataFrame to a feature class Parameters: :out_location: path to the workspace :out_name: name of the output feature class table :overwrite: True, the data will be erased then replaced, else the table will be appended to an existing table. :out_sr: if set, the data will try to reproject itself :skip_invalid: if True, the cursor object will not raise an error on insertion of invalid data, if False, the first occurence of invalid data will raise an exception. Returns: path to the feature class """ fc = None if HASARCPY: import arcgis cols = [] dt_idx = [] invalid_rows = [] idx = 0 max_length = None if out_location: if os.path.isdir(out_location) == False and \ out_location.lower().endswith('.gdb'): out_location = arcpy.CreateFileGDB_management(out_folder_path=os.path.dirname(out_location), out_name=os.path.basename(out_location))[0] elif os.path.isdir(out_location) == False and \ out_name.lower().endswith('.shp'): os.makedirs(out_location) elif os.path.isfile(out_location) == False and \ out_location.lower().endswith('.sde'): raise ValueError("The sde connection file does not exist") else: if out_name.lower().endswith('.shp'): out_location = tempfile.gettempdir() elif HASARCPY: out_location = arcpy.env.scratchGDB else: out_location = tempfile.gettempdir() out_name = out_name + ".shp" fc = os.path.join(out_location, out_name) df = df.copy() # create a copy so we don't modify the source data. if out_name.lower().endswith('.shp'): max_length = 10 for col in df.columns: if col.lower() != 'shape': if df[col].dtype.type in NUMERIC_TYPES: df[col] = df[col].fillna(0) elif df[col].dtype.type in DATETIME_TYPES: dt_idx.append(col) else: df.loc[df[col].isnull(), col] = "" idx += 1 col = sanitize_field_name(s=col, length=max_length) cols.append(col) del col df.columns = cols if arcpy.Exists(fc) and \ overwrite: arcpy.Delete_management(fc) if arcpy.Exists(fc) == False: sr = df.sr if df.sr is None: sr = df['SHAPE'].loc[df['SHAPE'].first_valid_index()].spatial_reference if isinstance(sr, dict) and \ 'wkid' in sr: sr = arcpy.SpatialReference(sr['wkid']) elif isinstance(sr, arcpy.SpatialReference): sr = sr else: sr = None elif df.sr: sr = _types.SpatialReference(df.sr).as_arcpy elif sr is None: sr = df['SHAPE'].loc[df['SHAPE'].first_valid_index()].spatial_reference if isinstance(sr, dict) and \ 'wkid' in sr: sr = arcpy.SpatialReference(sr['wkid']) elif isinstance(sr, arcpy.SpatialReference): sr = sr else: sr = None elif isinstance(sr, dict): sr = _types.SpatialReference(sr).as_arcpy elif isinstance(sr, _types.SpatialReference): sr = df.sr.as_arcpy fc = arcpy.CreateFeatureclass_management(out_path=out_location, out_name=out_name, geometry_type=df.geometry_type.upper(), spatial_reference=sr)[0] desc = arcpy.Describe(fc) oidField = desc.oidFieldName col_insert = copy.copy(df.columns).tolist() if hasattr(desc, 'areaFieldName'): af = desc.areaFieldName.lower() else: af = None if hasattr(desc, 'lengthFieldName'): lf = desc.lengthFieldName.lower() else: lf = None col_insert = [f for f in col_insert if f.lower() not in ['oid', 'objectid', 'fid', desc.oidFieldName.lower(), af, lf]] df_cols = col_insert.copy() lower_col_names = [f.lower() for f in col_insert if f.lower() not in ['oid', 'objectid', 'fid']] idx_shp = None if oidField.lower() in lower_col_names: val = col_insert.pop(lower_col_names.index(oidField.lower())) del df[val] col_insert = copy.copy(df.columns).tolist() lower_col_names = [f.lower() for f in col_insert] if hasattr(desc, "areaFieldName") and \ desc.areaFieldName.lower() in lower_col_names: val = col_insert.pop(lower_col_names.index(desc.areaFieldName.lower())) del df[val] col_insert = copy.copy(df.columns).tolist() lower_col_names = [f.lower() for f in col_insert] elif 'shape_area' in lower_col_names: val = col_insert.pop(lower_col_names.index('shape_area')) del df[val] col_insert = copy.copy(df.columns).tolist() lower_col_names = [f.lower() for f in col_insert] if hasattr(desc, "lengthFieldName") and \ desc.lengthFieldName.lower() in lower_col_names: val = col_insert.pop(lower_col_names.index(desc.lengthFieldName.lower())) del df[val] col_insert = copy.copy(df.columns).tolist() lower_col_names = [f.lower() for f in col_insert] elif 'shape_length' in lower_col_names: val = col_insert.pop(lower_col_names.index('shape_length')) del df[val] col_insert = copy.copy(df.columns).tolist() lower_col_names = [f.lower() for f in col_insert] if "SHAPE" in df.columns: idx_shp = col_insert.index("SHAPE") col_insert[idx_shp] = "SHAPE@" existing_fields = [field.name.lower() for field in arcpy.ListFields(fc)] for col in col_insert: if col.lower() != 'shape@' and \ col.lower() != 'shape' and \ col.lower() not in existing_fields: try: t = _infer_type(df, col) if t == "TEXT" and out_name.lower().endswith('.shp') == False: l = int(df[col].str.len().max()) or 0 if l < 255: l = 255 arcpy.AddField_management(in_table=fc, field_name=col, field_length=l, field_type=_infer_type(df, col)) else: arcpy.AddField_management(in_table=fc, field_name=col, field_type=t) except: print('col %s' % col) dt_idx = [col_insert.index(col) for col in dt_idx if col in col_insert] icur = da.InsertCursor(fc, col_insert) for index, row in df[df_cols].iterrows(): if len(dt_idx) > 0: row = row.tolist() for i in dt_idx: row[i] = row[i].to_pydatetime() del i try: if idx_shp: row[idx_shp] = row[idx_shp].as_arcpy icur.insertRow(row) except: invalid_rows.append(index) if skip_invalid == False: raise Exception("Invalid row detected at index: %s" % index) else: try: row = row.tolist() if isinstance(idx_shp, int): row[idx_shp] = row[idx_shp].as_arcpy icur.insertRow(row) except: invalid_rows.append(index) if skip_invalid == False: raise Exception("Invalid row detected at index: %s" % index) del row del icur if len(invalid_rows) > 0: t = ",".join([str(r) for r in invalid_rows]) _log.warning('The following rows could not be written to the table: %s' % t) elif HASARCPY == False and \ HASPYSHP: return _pyshp_to_shapefile(df=df, out_path=out_location, out_name=out_name) else: raise Exception("Cannot Export the data without ArcPy or PyShp modules. "+ \ "Please install them and try again.") return fc