def get_bigquery_schema(filepath, layer_name=None, gdb_name=None): """ Generate a Bigquery table schema from a geospatial file python -m geobeam.util get_bigquery_schema ...args Args: filepath (str): full path to the input file layer_name (str, optional): name of the layer, if file contains multiple layers Returns: dict: the schema, convertable to json by json.dumps(schema, indent=2) """ import fiona from fiona.io import ZipMemoryFile from fiona import prop_type bq_schema = [] if layer_name is None: profile = fiona.open(filepath).profile elif gdb_name is None: profile = fiona.open(filepath, layer=layer_name).profile else: f = open(filepath, 'rb') mem = ZipMemoryFile(f.read()) profile = mem.open(gdb_name, layer=layer_name).profile for field_name, field_type in profile['schema']['properties'].items(): fiona_type = prop_type(field_type) bq_type = BQ_FIELD_TYPES[fiona.schema.FIELD_TYPES_MAP_REV[fiona_type]] bq_schema.append({'name': field_name, 'type': bq_type}) bq_schema.append({ 'name': 'geom', 'type': 'GEOGRAPHY', 'description': '{} reprojected from {}. source: {}'.format( profile['schema']['geometry'], profile['crs']['init'], profile['driver']) }) return bq_schema
def test_open_closed_zip(): """Get an exception when opening a dataset on a closed ZipMemoryFile""" memfile = ZipMemoryFile() memfile.close() assert memfile.closed with pytest.raises(OSError): memfile.open()
def read_records(self, file_name, range_tracker): from fiona import transform from fiona.io import ZipMemoryFile import json total_bytes = self.estimate_size() next_pos = range_tracker.start_position() def split_points_unclaimed(stop_pos): return 0 if stop_pos <= next_pos else iobase.RangeTracker.SPLIT_POINTS_UNKNOWN with self.open_file(file_name) as f, ZipMemoryFile(f.read()) as mem: collection = mem.open(self.gdb_name, layer=self.layer_name) src_crs = _GeoSourceUtils.validate_crs(collection.crs, self.in_epsg) num_features = len(collection) feature_bytes = math.floor(total_bytes / num_features) i = 0 # XXX workaround due to https://github.com/Toblerity/Fiona/issues/996 features = list(collection) logging.info( json.dumps({ 'msg': 'read_records', 'file_name': file_name, 'profile': collection.profile, 'num_features': num_features, 'total_bytes': total_bytes })) while range_tracker.try_claim(next_pos): i = math.ceil(next_pos / feature_bytes) if i >= num_features: break cur_feature = features[i] geom = cur_feature['geometry'] props = cur_feature['properties'] if not self.skip_reproject: geom = transform.transform_geom(src_crs, 'epsg:4326', geom) yield (props, geom) next_pos = next_pos + feature_bytes
def read_file(data, format): if format == 'csv': return pd.read_csv(BytesIO(data)) if format == 'json': return pd.DataFrame(json.loads(data)) if format in ['geojson', 'gpkg']: with BytesCollection(data) as f: return gpd.GeoDataFrame.from_features(f, crs=f.crs) if format == 'zip': with ZipMemoryFile(data) as f: for layer in fiona.listlayers(f.name, vfs='zip://'): # Only reading the first layer of the Shapefile with f.open('{0}.shp'.format(layer)) as collection: return gpd.GeoDataFrame.from_features(collection, crs=collection.crs) else: raise 'Incompatible format'
def download(self, overwrite=False): url = 'https://www.fs.usda.gov/Internet/FSE_DOCUMENTS/stelprdb5332131.zip' local_path = self.raw_dir / Path(url).name if overwrite or (not local_path.exists()): urlretrieve(url, local_path) with open(local_path, 'rb') as f: with ZipMemoryFile(f.read()) as z: with z.open('PacificCrestTrail.shp') as collection: crs = collection.crs fc = list(collection) gdf = gpd.GeoDataFrame.from_features(fc, crs=crs) gdf = gdf.to_crs(epsg=4326) save_dir = self.data_dir / 'pct' / 'line' / 'usfs' save_dir.mkdir(parents=True, exist_ok=True) gdf.to_file(save_dir / 'trail.geojson', driver='GeoJSON')
def read_warnings(zipfile, start_date=None, end_date=None): if zipfile.endswith('.pic'): with open(zipfile, 'rb') as f: records = pickle.load(f) records = [wwaVTEC(r) for r in records] return records with open(zipfile, 'rb') as fid: file = ZipFile(fid).namelist() fid.seek(0) data = fid.read() # for item in file: if item.endswith('.shp'): shapefile = item # with ZipMemoryFile(data) as zip: with zip.open(shapefile) as collection: records = [] for record in collection: try: wwa = wwaVTEC(record) except: continue if start_date is None and end_date is None: records.append(wwa) elif start_date and end_date is None: if wwa.EXPIRED >= start_date: records.append(wwa) elif start_date is None and end_date: if wwa.ISSUED <= end_date: records.append(wwa) else: if (wwa.EXPIRED >= start_date and wwa.ISSUED <= end_date ) or (wwa.ISSUED <= end_date and wwa.EXPIRED >= start_date): records.append(wwa) return records
def load_geodataframe(filepath: Path) -> GeoDataFrame: """Given a filepath for a cached shapefile, load it as a dataframe. This function takes a roundabout approach to reading the zipped shapefile due to cryptic and unpredictable errors that occur when opening zip files on disk with Fiona/GDAL. """ # Get .shp filename from within zipped shapefile with ZipFile(filepath, 'r') as zip_file: shp_filename: str = next(filter(is_shp_file, zip_file.filelist)).filename # Use default Python opener to prevent cryptic GDAL filepath errors with open(filepath, 'rb') as bytes_file: with ZipMemoryFile(bytes_file.read()) as zip_memory_file: with zip_memory_file.open(shp_filename) as collection: # Load GeoDataFrame using NAD83 projection (EPSG 4269) geodataframe = GeoDataFrame.from_features(collection, crs='EPSG:4269') # Add year column geodataframe['year'] = int(shp_filename[3:7]) return geodataframe
def import_data(file_path, dataset): """Takes InMemoryFile user, and dat_created and imports data into the database accordingly (creates GeoTiff or Shapefile model and creates a Dataset for each file) Args: user (User): Data owner file_path (String): File Path on system dataset (Dataset): Dataset in order to use existing dataset Returns: True for success, False for failure may be worth returning other info to debug? """ if zipfile.is_zipfile(file_path): binary = open(file_path, 'rb').read() try: with ZipMemoryFile(binary) as zip_mem: zf = zipfile.ZipFile(io.BytesIO(binary)) shapefile_locs = list( filter(lambda v: v.endswith('.shp'), zf.namelist())) new_path = os.path.join(settings.PRIVATE_STORAGE_ROOT, "raw_files/", str(uuid.uuid4())) raw_shp_data = RawData.objects.create() raw_shp_data.name = f"{dataset.name}" raw_shp_data.ext = "zip" raw_shp_data.path.save(new_path, io.BytesIO(binary)) raw_shp_data.save() raw_shp = RawShapefile.objects.create(rawshp=raw_shp_data, dataset=dataset) raw_shp.save() return __import_shapefile(shapefile_locs, zip_mem, dataset, zip=zf) except zipfile.BadZipfile: return False else: dataset = __import_tiff(file_path, dataset) return dataset is not None
def read_vector_file_to_df( uploaded_file: st.uploaded_file_manager.UploadedFile, ) -> Union[GeoDataFrame, None]: """ Args: uploaded_file: A single bytesIO like object Returns: Geopandas dataframe """ filename = uploaded_file.name suffix = Path(filename).suffix if suffix == ".kml": # st.info("Reading KML file ...") gpd.io.file.fiona.drvsupport.supported_drivers["KML"] = "rw" df = gpd.read_file(uploaded_file, driver="KML") elif suffix == ".wkt": # st.info("Reading WKT file ...") wkt_string = uploaded_file.read().decode("utf-8") df = pd.DataFrame({"geometry": [wkt_string]}) df["geometry"] = df["geometry"].apply(shapely.wkt.loads) df = gpd.GeoDataFrame(df, geometry="geometry", crs=4326) elif suffix == ".zip": # st.info("Reading zipped Shapefile ...") with (ZipMemoryFile(uploaded_file)) as memfile: with memfile.open() as src: crs = src.crs df = gpd.GeoDataFrame.from_features(src, crs=crs) if df.crs is None: st.error("The provided shapefile has no crs!") st.stop() else: # st.info("Reading GeoJSON/JSON file ...") df = gpd.read_file(uploaded_file) # Geojson etc. return df
def test_zip_memoryfile_infer_layer_name(bytes_coutwildrnp_zip): """In-memory zipped Shapefile can be read with the default layer""" with ZipMemoryFile(bytes_coutwildrnp_zip) as memfile: with memfile.open() as collection: assert len(collection) == 67
def test_zip_memoryfile(bytes_coutwildrnp_zip): """In-memory zipped Shapefile can be read""" with ZipMemoryFile(bytes_coutwildrnp_zip) as memfile: with memfile.open('coutwildrnp.shp') as collection: assert len(collection) == 67