Beispiel #1
0
def buffers_for_shadow_pricing(shadow_pricing_info):
    """
    Allocate shared_data buffers for multiprocess shadow pricing

    Allocates one buffer per model_selector.
    Buffer datatype and shape specified by shadow_pricing_info

    buffers are multiprocessing.Array (RawArray protected by a multiprocessing.Lock wrapper)
    We don't actually use the wrapped version as it slows access down and doesn't provide
    protection for numpy-wrapped arrays, but it does provide a convenient way to bundle
    RawArray and an associated lock. (ShadowPriceCalculator uses the lock to coordinate access to
    the numpy-wrapped RawArray.)

    Parameters
    ----------
    shadow_pricing_info : dict

    Returns
    -------
        data_buffers : dict {<model_selector> : <shared_data_buffer>}
        dict of multiprocessing.Array keyed by model_selector
    """

    dtype = shadow_pricing_info['dtype']
    block_shapes = shadow_pricing_info['block_shapes']

    data_buffers = {}
    for block_key, block_shape in block_shapes.items():

        # buffer_size must be int, not np.int64
        buffer_size = util.iprod(block_shape)

        csz = buffer_size * np.dtype(dtype).itemsize
        logger.info(
            "allocating shared shadow pricing buffer %s %s buffer_size %s bytes %s (%s)"
            % (block_key, buffer_size, block_shape, csz, util.GB(csz)))

        if np.issubdtype(dtype, np.int64):
            typecode = ctypes.c_int64
        else:
            raise RuntimeError(
                "buffer_for_shadow_pricing unrecognized dtype %s" % dtype)

        shared_data_buffer = multiprocessing.Array(typecode, buffer_size)

        logger.info("buffer_for_shadow_pricing added block %s" % block_key)

        data_buffers[block_key] = shared_data_buffer

    return data_buffers
    def _skim_data_from_buffer(self, skim_info, skim_buffer):
        """
        return a numpy ndarray using skim_buffer as backing store

        Parameters
        ----------
        skim_info
        skim_buffer

        Returns
        -------

        """

        dtype = np.dtype(skim_info.dtype_name)
        assert len(skim_buffer) == util.iprod(skim_info.skim_data_shape)
        skim_data = np.frombuffer(skim_buffer, dtype=dtype).reshape(
            skim_info.skim_data_shape)
        return skim_data
    def allocate_skim_buffer(self, skim_info, shared=False):
        """
        Allocate a ram skim buffer to use as frombuffer for SkimData
        If shared is True, return a shareable multiprocessing.RawArray, otherwise a numpy.ndarray

        Parameters
        ----------
        skim_info: dict
        shared: boolean

        Returns
        -------
        multiprocessing.RawArray or numpy.ndarray
        """

        assert shared == self.network_los.multiprocess(), \
            f"NumpyArraySkimFactory.allocate_skim_buffer shared {shared} multiprocess {not shared}"

        dtype_name = skim_info.dtype_name
        dtype = np.dtype(dtype_name)

        # multiprocessing.RawArray argument buffer_size must be int, not np.int64
        buffer_size = util.iprod(skim_info.skim_data_shape)

        csz = buffer_size * dtype.itemsize
        logger.info(
            f"allocate_skim_buffer shared {shared} {skim_info.skim_tag} shape {skim_info.skim_data_shape} "
            f"total size: {csz} ({tracing.si_units(csz)})")

        if shared:
            if dtype_name == 'float64':
                typecode = 'd'
            elif dtype_name == 'float32':
                typecode = 'f'
            else:
                raise RuntimeError(
                    "allocate_skim_buffer unrecognized dtype %s" % dtype_name)

            buffer = multiprocessing.RawArray(typecode, buffer_size)
        else:
            buffer = np.zeros(buffer_size, dtype=dtype)

        return buffer
    def allocate_data_buffer(self, shared=False):
        """
        allocate fully_populated_shape data buffer for cached data

        if shared, return a multiprocessing.Array that can be shared across subprocesses
        if not shared, return a numpy ndarrray

        Parameters
        ----------
        shared: boolean

        Returns
        -------
            multiprocessing.Array or numpy ndarray sized to hole fully_populated utility array
        """

        assert not self.is_open
        assert shared == self.network_los.multiprocess()

        dtype_name = DTYPE_NAME
        dtype = np.dtype(DTYPE_NAME)

        # multiprocessing.Array argument buffer_size must be int, not np.int64
        shape = self.uid_calculator.fully_populated_shape
        buffer_size = util.iprod(self.uid_calculator.fully_populated_shape)

        csz = buffer_size * dtype.itemsize
        logger.info(
            f"TVPBCache.allocate_data_buffer allocating data buffer "
            f"shape {shape} buffer_size {buffer_size} total size: {csz} ({tracing.si_units(csz)})"
        )

        if shared:
            if dtype_name == 'float64':
                typecode = 'd'
            elif dtype_name == 'float32':
                typecode = 'f'
            else:
                raise RuntimeError(
                    "allocate_data_buffer unrecognized dtype %s" % dtype_name)

            if RAWARRAY:
                with memo("TVPBCache.allocate_data_buffer allocate RawArray"):
                    buffer = multiprocessing.RawArray(typecode, buffer_size)
                logger.info(
                    f"TVPBCache.allocate_data_buffer allocated shared multiprocessing.RawArray as buffer"
                )
            else:
                with memo("TVPBCache.allocate_data_buffer allocate Array"):
                    buffer = multiprocessing.Array(typecode, buffer_size)
                logger.info(
                    f"TVPBCache.allocate_data_buffer allocated shared multiprocessing.Array as buffer"
                )

        else:
            buffer = np.empty(buffer_size, dtype=dtype)
            np.copyto(buffer, np.nan)  # fill with np.nan

            logger.info(
                f"TVPBCache.allocate_data_buffer allocating non-shared numpy array as buffer"
            )

        return buffer
    def check_fit(self, iteration):
        """
        Check convergence criteria fit of modeled_size to target desired_size
        (For multiprocessing, this is global modeled_size summed across processes,
        so each process will independently calculate the same result.)

        Parameters
        ----------
        iteration: int
            iteration number (informational, for num_failand max_diff history columns)

        Returns
        -------
        converged: boolean

        """

        # fixme

        if not self.use_shadow_pricing:
            return False

        assert self.modeled_size is not None
        assert self.desired_size is not None

        # - convergence criteria for check_fit
        # ignore convergence criteria for zones smaller than size_threshold
        size_threshold = self.shadow_settings['SIZE_THRESHOLD']
        # zone passes if modeled is within percent_tolerance of  desired_size
        percent_tolerance = self.shadow_settings['PERCENT_TOLERANCE']
        # max percentage of zones allowed to fail
        fail_threshold = self.shadow_settings['FAIL_THRESHOLD']

        modeled_size = self.modeled_size
        desired_size = self.desired_size

        abs_diff = (desired_size - modeled_size).abs()

        rel_diff = abs_diff / modeled_size

        # ignore zones where desired_size < threshold
        rel_diff.where(desired_size >= size_threshold, 0, inplace=True)

        # ignore zones where rel_diff < percent_tolerance
        rel_diff.where(rel_diff > (percent_tolerance / 100.0), 0, inplace=True)

        self.num_fail['iter%s' % iteration] = (rel_diff > 0).sum()
        self.max_abs_diff['iter%s' % iteration] = abs_diff.max()
        self.max_rel_diff['iter%s' % iteration] = rel_diff.max()

        total_fails = (rel_diff > 0).values.sum()

        # FIXME - should not count zones where desired_size < threshold? (could calc in init)
        max_fail = (fail_threshold / 100.0) * util.iprod(desired_size.shape)

        converged = (total_fails <= max_fail)

        # for c in desired_size:
        #     print("check_fit %s segment %s" % (self.model_selector, c))
        #     print("  modeled %s" % (modeled_size[c].sum()))
        #     print("  desired %s" % (desired_size[c].sum()))
        #     print("  max abs diff %s" % (abs_diff[c].max()))
        #     print("  max rel diff %s" % (rel_diff[c].max()))

        logger.info("check_fit %s iteration: %s converged: %s max_fail: %s total_fails: %s" %
                    (self.model_selector, iteration, converged, max_fail, total_fails))

        # - convergence stats
        if converged or iteration == self.max_iterations:
            logger.info("\nshadow_pricing max_abs_diff\n%s" % self.max_abs_diff)
            logger.info("\nshadow_pricing max_rel_diff\n%s" % self.max_rel_diff)
            logger.info("\nshadow_pricing num_fail\n%s" % self.num_fail)

        return converged