def buffers_for_shadow_pricing(shadow_pricing_info): """ Allocate shared_data buffers for multiprocess shadow pricing Allocates one buffer per model_selector. Buffer datatype and shape specified by shadow_pricing_info buffers are multiprocessing.Array (RawArray protected by a multiprocessing.Lock wrapper) We don't actually use the wrapped version as it slows access down and doesn't provide protection for numpy-wrapped arrays, but it does provide a convenient way to bundle RawArray and an associated lock. (ShadowPriceCalculator uses the lock to coordinate access to the numpy-wrapped RawArray.) Parameters ---------- shadow_pricing_info : dict Returns ------- data_buffers : dict {<model_selector> : <shared_data_buffer>} dict of multiprocessing.Array keyed by model_selector """ dtype = shadow_pricing_info['dtype'] block_shapes = shadow_pricing_info['block_shapes'] data_buffers = {} for block_key, block_shape in block_shapes.items(): # buffer_size must be int, not np.int64 buffer_size = util.iprod(block_shape) csz = buffer_size * np.dtype(dtype).itemsize logger.info( "allocating shared shadow pricing buffer %s %s buffer_size %s bytes %s (%s)" % (block_key, buffer_size, block_shape, csz, util.GB(csz))) if np.issubdtype(dtype, np.int64): typecode = ctypes.c_int64 else: raise RuntimeError( "buffer_for_shadow_pricing unrecognized dtype %s" % dtype) shared_data_buffer = multiprocessing.Array(typecode, buffer_size) logger.info("buffer_for_shadow_pricing added block %s" % block_key) data_buffers[block_key] = shared_data_buffer return data_buffers
def _skim_data_from_buffer(self, skim_info, skim_buffer): """ return a numpy ndarray using skim_buffer as backing store Parameters ---------- skim_info skim_buffer Returns ------- """ dtype = np.dtype(skim_info.dtype_name) assert len(skim_buffer) == util.iprod(skim_info.skim_data_shape) skim_data = np.frombuffer(skim_buffer, dtype=dtype).reshape( skim_info.skim_data_shape) return skim_data
def allocate_skim_buffer(self, skim_info, shared=False): """ Allocate a ram skim buffer to use as frombuffer for SkimData If shared is True, return a shareable multiprocessing.RawArray, otherwise a numpy.ndarray Parameters ---------- skim_info: dict shared: boolean Returns ------- multiprocessing.RawArray or numpy.ndarray """ assert shared == self.network_los.multiprocess(), \ f"NumpyArraySkimFactory.allocate_skim_buffer shared {shared} multiprocess {not shared}" dtype_name = skim_info.dtype_name dtype = np.dtype(dtype_name) # multiprocessing.RawArray argument buffer_size must be int, not np.int64 buffer_size = util.iprod(skim_info.skim_data_shape) csz = buffer_size * dtype.itemsize logger.info( f"allocate_skim_buffer shared {shared} {skim_info.skim_tag} shape {skim_info.skim_data_shape} " f"total size: {csz} ({tracing.si_units(csz)})") if shared: if dtype_name == 'float64': typecode = 'd' elif dtype_name == 'float32': typecode = 'f' else: raise RuntimeError( "allocate_skim_buffer unrecognized dtype %s" % dtype_name) buffer = multiprocessing.RawArray(typecode, buffer_size) else: buffer = np.zeros(buffer_size, dtype=dtype) return buffer
def allocate_data_buffer(self, shared=False): """ allocate fully_populated_shape data buffer for cached data if shared, return a multiprocessing.Array that can be shared across subprocesses if not shared, return a numpy ndarrray Parameters ---------- shared: boolean Returns ------- multiprocessing.Array or numpy ndarray sized to hole fully_populated utility array """ assert not self.is_open assert shared == self.network_los.multiprocess() dtype_name = DTYPE_NAME dtype = np.dtype(DTYPE_NAME) # multiprocessing.Array argument buffer_size must be int, not np.int64 shape = self.uid_calculator.fully_populated_shape buffer_size = util.iprod(self.uid_calculator.fully_populated_shape) csz = buffer_size * dtype.itemsize logger.info( f"TVPBCache.allocate_data_buffer allocating data buffer " f"shape {shape} buffer_size {buffer_size} total size: {csz} ({tracing.si_units(csz)})" ) if shared: if dtype_name == 'float64': typecode = 'd' elif dtype_name == 'float32': typecode = 'f' else: raise RuntimeError( "allocate_data_buffer unrecognized dtype %s" % dtype_name) if RAWARRAY: with memo("TVPBCache.allocate_data_buffer allocate RawArray"): buffer = multiprocessing.RawArray(typecode, buffer_size) logger.info( f"TVPBCache.allocate_data_buffer allocated shared multiprocessing.RawArray as buffer" ) else: with memo("TVPBCache.allocate_data_buffer allocate Array"): buffer = multiprocessing.Array(typecode, buffer_size) logger.info( f"TVPBCache.allocate_data_buffer allocated shared multiprocessing.Array as buffer" ) else: buffer = np.empty(buffer_size, dtype=dtype) np.copyto(buffer, np.nan) # fill with np.nan logger.info( f"TVPBCache.allocate_data_buffer allocating non-shared numpy array as buffer" ) return buffer
def check_fit(self, iteration): """ Check convergence criteria fit of modeled_size to target desired_size (For multiprocessing, this is global modeled_size summed across processes, so each process will independently calculate the same result.) Parameters ---------- iteration: int iteration number (informational, for num_failand max_diff history columns) Returns ------- converged: boolean """ # fixme if not self.use_shadow_pricing: return False assert self.modeled_size is not None assert self.desired_size is not None # - convergence criteria for check_fit # ignore convergence criteria for zones smaller than size_threshold size_threshold = self.shadow_settings['SIZE_THRESHOLD'] # zone passes if modeled is within percent_tolerance of desired_size percent_tolerance = self.shadow_settings['PERCENT_TOLERANCE'] # max percentage of zones allowed to fail fail_threshold = self.shadow_settings['FAIL_THRESHOLD'] modeled_size = self.modeled_size desired_size = self.desired_size abs_diff = (desired_size - modeled_size).abs() rel_diff = abs_diff / modeled_size # ignore zones where desired_size < threshold rel_diff.where(desired_size >= size_threshold, 0, inplace=True) # ignore zones where rel_diff < percent_tolerance rel_diff.where(rel_diff > (percent_tolerance / 100.0), 0, inplace=True) self.num_fail['iter%s' % iteration] = (rel_diff > 0).sum() self.max_abs_diff['iter%s' % iteration] = abs_diff.max() self.max_rel_diff['iter%s' % iteration] = rel_diff.max() total_fails = (rel_diff > 0).values.sum() # FIXME - should not count zones where desired_size < threshold? (could calc in init) max_fail = (fail_threshold / 100.0) * util.iprod(desired_size.shape) converged = (total_fails <= max_fail) # for c in desired_size: # print("check_fit %s segment %s" % (self.model_selector, c)) # print(" modeled %s" % (modeled_size[c].sum())) # print(" desired %s" % (desired_size[c].sum())) # print(" max abs diff %s" % (abs_diff[c].max())) # print(" max rel diff %s" % (rel_diff[c].max())) logger.info("check_fit %s iteration: %s converged: %s max_fail: %s total_fails: %s" % (self.model_selector, iteration, converged, max_fail, total_fails)) # - convergence stats if converged or iteration == self.max_iterations: logger.info("\nshadow_pricing max_abs_diff\n%s" % self.max_abs_diff) logger.info("\nshadow_pricing max_rel_diff\n%s" % self.max_rel_diff) logger.info("\nshadow_pricing num_fail\n%s" % self.num_fail) return converged