def skim_dict(data_dir, settings): omx_file_path = config.data_file_path(settings["skims_file"]) tags_to_load = settings['skim_time_periods']['labels'] logger.info("loading skim_dict from %s" % (omx_file_path, )) # select the skims to load skim_info = get_skim_info(omx_file_path, tags_to_load) logger.debug("omx_shape %s skim_dtype %s" % (skim_info['omx_shape'], skim_info['dtype'])) skim_buffers = inject.get_injectable('data_buffers', None) if skim_buffers: logger.info('Using existing skim_buffers for skims') else: skim_buffers = buffers_for_skims(skim_info, shared=False) load_skims(omx_file_path, skim_info, skim_buffers) skim_data = skim_data_from_buffers(skim_buffers, skim_info) block_names = list(skim_info['blocks'].keys()) for i in range(len(skim_data)): block_name = block_names[i] block_data = skim_data[i] logger.info( "block_name %s bytes %s (%s)" % (block_name, block_data.nbytes, util.GB(block_data.nbytes))) # create skim dict skim_dict = skim.SkimDict(skim_data, skim_info) skim_dict.offset_mapper.set_offset_int(-1) return skim_dict
def buffers_for_skims(skim_info, shared=False): skim_dtype = skim_info['dtype'] omx_shape = skim_info['omx_shape'] blocks = skim_info['blocks'] skim_buffers = {} for block_name, block_size in blocks.items(): # buffer_size must be int, not np.int64 buffer_size = int(multiply_large_numbers(omx_shape) * block_size) itemsize = np.dtype(skim_dtype).itemsize csz = buffer_size * itemsize logger.info( "allocating shared buffer %s for %s skims (skim size: %s * %s bytes = %s) total size: %s (%s)" % (block_name, block_size, omx_shape, itemsize, buffer_size, csz, util.GB(csz))) if shared: if np.issubdtype(skim_dtype, np.float64): typecode = 'd' elif np.issubdtype(skim_dtype, np.float32): typecode = 'f' else: raise RuntimeError("buffers_for_skims unrecognized dtype %s" % skim_dtype) buffer = multiprocessing.RawArray(typecode, buffer_size) else: buffer = np.zeros(buffer_size, dtype=skim_dtype) skim_buffers[block_name] = buffer return skim_buffers
def buffers_for_skims(skim_info, shared=False): skim_dtype = skim_info['dtype'] omx_shape = [np.float64(x) for x in skim_info['omx_shape']] blocks = skim_info['blocks'] skim_buffers = {} for block_name, block_size in iteritems(blocks): # buffer_size must be int (or p2.7 long), not np.int64 buffer_size = int(np.prod(omx_shape) * block_size) csz = buffer_size * np.dtype(skim_dtype).itemsize logger.info("allocating shared buffer %s for %s (%s) matrices (%s)" % (block_name, buffer_size, omx_shape, util.GB(csz))) if shared: if np.issubdtype(skim_dtype, np.float64): typecode = 'd' elif np.issubdtype(skim_dtype, np.float32): typecode = 'f' else: raise RuntimeError("buffers_for_skims unrecognized dtype %s" % skim_dtype) buffer = multiprocessing.RawArray(typecode, buffer_size) else: buffer = np.zeros(buffer_size, dtype=skim_dtype) skim_buffers[block_name] = buffer return skim_buffers
def buffers_for_shadow_pricing(shadow_pricing_info): """ Allocate shared_data buffers for multiprocess shadow pricing Allocates one buffer per model_selector. Buffer datatype and shape specified by shadow_pricing_info buffers are multiprocessing.Array (RawArray protected by a multiprocessing.Lock wrapper) We don't actually use the wrapped version as it slows access down and doesn't provide protection for numpy-wrapped arrays, but it does provide a convenient way to bundle RawArray and an associated lock. (ShadowPriceCalculator uses the lock to coordinate access to the numpy-wrapped RawArray.) Parameters ---------- shadow_pricing_info : dict Returns ------- data_buffers : dict {<model_selector> : <shared_data_buffer>} dict of multiprocessing.Array keyed by model_selector """ dtype = shadow_pricing_info['dtype'] block_shapes = shadow_pricing_info['block_shapes'] data_buffers = {} for block_key, block_shape in block_shapes.items(): # buffer_size must be int, not np.int64 buffer_size = util.iprod(block_shape) csz = buffer_size * np.dtype(dtype).itemsize logger.info( "allocating shared shadow pricing buffer %s %s buffer_size %s bytes %s (%s)" % (block_key, buffer_size, block_shape, csz, util.GB(csz))) if np.issubdtype(dtype, np.int64): typecode = ctypes.c_int64 else: raise RuntimeError( "buffer_for_shadow_pricing unrecognized dtype %s" % dtype) shared_data_buffer = multiprocessing.Array(typecode, buffer_size) logger.info("buffer_for_shadow_pricing added block %s" % block_key) data_buffers[block_key] = shared_data_buffer return data_buffers