def _localclient(memory: int) -> Client: numcores = cpu_count() availablememory = memory_limit() nworkers = int(max(1, min(availablememory // memory, numcores))) cluster = LocalCluster(n_workers=nworkers, threads_per_worker=1, memory_limit="auto") return Client(address=cluster)
def test_cpu_count_cgroups(dirname, monkeypatch): def mycpu_count(): # Absurdly high, unlikely to match real value return 250 monkeypatch.setattr(os, "cpu_count", mycpu_count) class MyProcess: def cpu_affinity(self): # No affinity set return [] monkeypatch.setattr(psutil, "Process", MyProcess) if dirname: paths = { "/sys/fs/cgroup/%s/cpu.cfs_quota_us" % dirname: io.StringIO("2005"), "/sys/fs/cgroup/%s/cpu.cfs_period_us" % dirname: io.StringIO("10"), } builtin_open = builtins.open def myopen(path, *args, **kwargs): if path in paths: return paths.get(path) return builtin_open(path, *args, **kwargs) monkeypatch.setattr(builtins, "open", myopen) monkeypatch.setattr(sys, "platform", "linux") count = cpu_count() if dirname: # Rounds up assert count == 201 else: assert count == 250
def _enclosed_tessellation( self, buildings, enclosures, unique_id, enclosure_id="eID", threshold=0.05, use_dask=True, n_chunks=None, **kwargs, ): """Enclosed tessellation Generate enclosed tessellation based on barriers defining enclosures and buildings footprints. Parameters ---------- buildings : GeoDataFrame GeoDataFrame containing building footprints. Expects (Multi)Polygon geometry. enclosures : GeoDataFrame Enclosures geometry. Can be generated using :func:`momepy.enclosures`. unique_id : str name of the column with unique id of buildings gdf threshold : float (default 0.05) The minimum threshold for a building to be considered within an enclosure. Threshold is a ratio of building area which needs to be within an enclosure to inlude it in the tessellation of that enclosure. Resolves sliver geometry issues. use_dask : bool (default True) Use parallelised algorithm based on ``dask.dataframe``. Requires dask. n_chunks : None Number of chunks to be used in parallelization. Ideal is one chunk per thread. Applies only if ``enclosures`` are passed. Defualt automatically uses n == dask.system.cpu_count. **kwargs Keyword arguments passed to Tessellation algorithm (as ``shrink`` or ``segment``). Returns ------- tessellation : GeoDataFrame gdf contains three columns: geometry, unique_id matching with parental building, enclosure_id matching with enclosure integer index Examples -------- >>> enclosures = mm.enclosures(streets, admin_boundary, [railway, rivers]) >>> enclosed_tess = mm.enclosed_tessellation(buildings, enclosures) """ enclosures = enclosures.reset_index(drop=True) # determine which polygons should be split inp, res = buildings.sindex.query_bulk(enclosures.geometry, predicate="intersects") unique, counts = np.unique(inp, return_counts=True) splits = unique[counts > 1] single = unique[counts == 1] if use_dask: try: import dask.dataframe as dd from dask.system import cpu_count except ImportError: use_dask = False import warnings warnings.warn( "dask.dataframe could not be imported. Setting `use_dask=False`." ) if use_dask: if n_chunks is None: n_chunks = cpu_count() - 1 if cpu_count() > 1 else 1 # initialize dask.series ds = dd.from_array(splits, chunksize=len(splits) // n_chunks) # generate enclosed tessellation using dask new = (ds.apply( self._tess, meta=(None, "object"), args=(enclosures, buildings, inp, res, threshold, unique_id), ).compute().to_list()) else: new = [ self._tess( i, enclosures, buildings, inp, res, threshold=threshold, unique_id=unique_id, **kwargs, ) for i in splits ] # finalise the result clean_blocks = enclosures.drop(splits) clean_blocks.loc[ single, "uID"] = clean_blocks.loc[single][enclosure_id].apply( lambda ix: buildings.iloc[res[inp == ix][0]][unique_id]) tessellation = pd.concat(new) return tessellation.append(clean_blocks).reset_index(drop=True)
def test_cpu_count(): count = cpu_count() assert isinstance(count, int) assert count <= os.cpu_count() assert count >= 1