def test_cache_working(self): f1 = 'f1.shp' f1 = create_dummy_shp(f1) cf1 = utils.cached_shapefile_path(f1) self.assertFalse(os.path.exists(cf1)) _ = read_shapefile(f1) self.assertFalse(os.path.exists(cf1)) _ = read_shapefile(f1, cached=True) self.assertTrue(os.path.exists(cf1)) # nested calls self.assertTrue(cf1 == utils.cached_shapefile_path(cf1)) # wait a bit time.sleep(0.1) f1 = create_dummy_shp(f1) cf2 = utils.cached_shapefile_path(f1) self.assertFalse(os.path.exists(cf1)) _ = read_shapefile(f1, cached=True) self.assertFalse(os.path.exists(cf1)) self.assertTrue(os.path.exists(cf2)) df = read_shapefile(f1, cached=True) np.testing.assert_allclose(df.min_x, [1., 2.]) np.testing.assert_allclose(df.max_x, [2., 3.]) np.testing.assert_allclose(df.min_y, [1., 1.3]) np.testing.assert_allclose(df.max_y, [2., 2.3]) self.assertRaises(ValueError, read_shapefile, 'f1.sph') self.assertRaises(ValueError, utils.cached_shapefile_path, 'f1.splash')
def test_cache_working(self): f1 = "f1.shp" f1 = create_dummy_shp(f1) cf1 = utils.cached_shapefile_path(f1) self.assertFalse(os.path.exists(cf1)) _ = read_shapefile(f1) self.assertFalse(os.path.exists(cf1)) _ = read_shapefile(f1, cached=True) self.assertTrue(os.path.exists(cf1)) # nested calls self.assertTrue(cf1 == utils.cached_shapefile_path(cf1)) # wait a bit time.sleep(0.1) f1 = create_dummy_shp(f1) cf2 = utils.cached_shapefile_path(f1) self.assertFalse(os.path.exists(cf1)) _ = read_shapefile(f1, cached=True) self.assertFalse(os.path.exists(cf1)) self.assertTrue(os.path.exists(cf2)) df = read_shapefile(f1, cached=True) np.testing.assert_allclose(df.min_x, [1.0, 2.0]) np.testing.assert_allclose(df.max_x, [2.0, 3.0]) np.testing.assert_allclose(df.min_y, [1.0, 1.3]) np.testing.assert_allclose(df.max_y, [2.0, 2.3]) self.assertRaises(ValueError, read_shapefile, "f1.sph") self.assertRaises(ValueError, utils.cached_shapefile_path, "f1.splash")
def read_shapefile_to_grid(fpath, grid): """Same as read_shapefile but directly transformed to a grid. The whole thing is cached so that the second call will will be much faster. Parameters ---------- fpath: path to the file grid: the arrival grid """ # ensure it is a cached pickle (copy code smell) shape_cpath = cached_shapefile_path(fpath) if not os.path.exists(shape_cpath): out = read_shapefile(fpath, cached=False) with open(shape_cpath, 'wb') as f: pickle.dump(out, f) return _memory_shapefile_to_grid(shape_cpath, grid=grid, **grid.to_dict())
def read_shapefile_to_grid(fpath, grid): """Same as read_shapefile but directly transformed to a grid. The whole thing is cached so that the second call will will be much faster. Parameters ---------- fpath: path to the file grid: the arrival grid """ # ensure it is a cached pickle (copy code smell) shape_cpath = cached_shapefile_path(fpath) if not os.path.exists(shape_cpath): out = read_shapefile(fpath, cached=False) with open(shape_cpath, 'wb') as f: pickle.dump(out, f) return _memory_transform(shape_cpath, grid=grid, grid_str=str(grid))
def read_shapefile(fpath, cached=False): """Reads a shapefile using geopandas. For convenience, it adds four columns to the dataframe: [min_x, max_x, min_y, max_y] Because reading a shapefile can take a long time, Salem provides a caching utility (cached=True). This will save a pickle of the shapefile in the cache directory. """ import geopandas as gpd _, ext = os.path.splitext(fpath) if ext.lower() in ['.shp', '.p']: if cached: cpath = cached_shapefile_path(fpath) # unpickle if cached, read and pickle if not if os.path.exists(cpath): with open(cpath, 'rb') as f: out = pickle.load(f) else: out = read_shapefile(fpath, cached=False) with open(cpath, 'wb') as f: pickle.dump(out, f) else: out = gpd.read_file(fpath) out['min_x'] = [g.bounds[0] for g in out.geometry] out['max_x'] = [g.bounds[2] for g in out.geometry] out['min_y'] = [g.bounds[1] for g in out.geometry] out['max_y'] = [g.bounds[3] for g in out.geometry] else: raise ValueError('File extension not recognised: {}'.format(ext)) return out