def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = self.src_filter() if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') if not self.csm.get_sources(): raise RuntimeError('All sources were filtered away!') csm_info = self.datastore['csm_info'] self.poes_disagg = oq.poes_disagg or (None, ) self.imts = list(oq.imtls) self.ws = [rlz.weight for rlz in self.rlzs_assoc.realizations] self.pgetter = getters.PmapGetter(self.datastore, self.ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve(curves, oq.imtls, stats.mean_curve, self.ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [ self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids ] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if oq.disagg_by_src: self.build_disagg_by_src(rlzs) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(csm_info.trts) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = csm_info.min_mag max_mag = csm_info.max_mag mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() self.imldict = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: for z, rlz in enumerate(rlzs[s]): logging.info('Site #%d, disaggregating for rlz=#%d', s, rlz) for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z] # submit disagg tasks gid = self.datastore['rup/grp_id'][()] indices_by_grp = get_indices(gid) # grp_id -> [(start, stop),...] blocksize = len(gid) // (oq.concurrent_tasks or 1) + 1 # NB: removing the blocksize causes slow disaggregation tasks allargs = [] dstore = (self.datastore.parent if self.datastore.parent else self.datastore) for grp_id, trt in csm_info.trt_by_grp.items(): trti = trt_num[trt] rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(grp_id) cmaker = ContextMaker( trt, rlzs_by_gsim, { 'truncation_level': oq.truncation_level, 'maximum_distance': src_filter.integration_distance, 'filter_distance': oq.filter_distance, 'imtls': oq.imtls }) for start, stop in indices_by_grp[grp_id]: for slc in gen_slices(start, stop, blocksize): allargs.append((dstore, slc, self.sitecol, oq, cmaker, self.iml4, trti, self.bin_edges)) results = parallel.Starmap(compute_disagg, allargs, h5=self.datastore.hdf5).reduce( self.agg_result, AccumDict(accum={})) return results # sid -> trti-> 8D array
def full_disaggregation(self, curves): """ Run the disaggregation phase. :param curves: a list of hazard curves, one per site The curves can be all None if iml_disagg is set in the job.ini """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance) csm = self.csm if not csm.get_sources(): raise RuntimeError('All sources were filtered away!') R = len(self.rlzs_assoc.realizations) I = len(oq.imtls) P = len(oq.poes_disagg) or 1 if R * I * P > 10: logging.warn( 'You have %d realizations, %d IMTs and %d poes_disagg: the ' 'disaggregation will be heavy and memory consuming', R, I, P) iml4 = disagg.make_iml4( R, oq.iml_disagg, oq.imtls, oq.poes_disagg or (None,), curves) if oq.disagg_by_src: if R == 1: self.build_disagg_by_src(iml4) else: logging.warn('disagg_by_src works only with 1 realization, ' 'you have %d', R) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) self.bin_edges = {} # build trt_edges trts = tuple(sorted(set(sg.trt for smodel in csm.source_models for sg in smodel.src_groups))) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = min(sg.min_mag for smodel in csm.source_models for sg in smodel.src_groups) max_mag = max(sg.max_mag for smodel in csm.source_models for sg in smodel.src_groups) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() # build all_args all_args = [] maxweight = csm.get_maxweight(weight, oq.concurrent_tasks) mon = self.monitor('disaggregation') R = iml4.shape[1] self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: for r in range(R): for p, poe in enumerate(oq.poes_disagg or [None]): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml4[s, r, m, p] for smodel in csm.source_models: sm_id = smodel.ordinal for trt, groups in groupby( smodel.src_groups, operator.attrgetter('trt')).items(): trti = trt_num[trt] sources = sum([grp.sources for grp in groups], []) rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id) cmaker = ContextMaker( rlzs_by_gsim, src_filter.integration_distance, {'filter_distance': oq.filter_distance}) for block in block_splitter(sources, maxweight, weight): all_args.append( (src_filter, block, cmaker, iml4, trti, self.bin_edges, oq, mon)) self.num_ruptures = [0] * len(self.trts) self.cache_info = numpy.zeros(3) # operations, cache_hits, num_zeros results = parallel.Starmap( compute_disagg, all_args, self.monitor() ).reduce(self.agg_result, AccumDict(accum={})) # set eff_ruptures trti = csm.info.trt2i() for smodel in csm.info.source_models: for sg in smodel.src_groups: sg.eff_ruptures = self.num_ruptures[trti[sg.trt]] self.datastore['csm_info'] = csm.info ops, hits, num_zeros = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) logging.info('Discarded zero matrices: %d', num_zeros) return results
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance) csm = self.csm for sg in csm.src_groups: if sg.atomic: raise NotImplemented('Atomic groups are not supported yet') if not csm.get_sources(): raise RuntimeError('All sources were filtered away!') poes_disagg = oq.poes_disagg or (None,) N = len(self.sitecol) R = len(self.rlzs_assoc.realizations) if oq.rlz_index is None: try: rlzs = self.datastore['best_rlz'][()] except KeyError: rlzs = numpy.zeros(N, int) else: rlzs = [oq.rlz_index] * N if oq.iml_disagg: curves = [None] * len(self.sitecol) # no hazard curves are needed else: curves = [self.get_curve(sid, rlzs) for sid in self.sitecol.sids] self.check_poes_disagg(curves, rlzs) iml2s = _iml2s(rlzs, oq.iml_disagg, oq.imtls, poes_disagg, curves) if oq.disagg_by_src: if R == 1: self.build_disagg_by_src(iml2s) else: logging.warning('disagg_by_src works only with 1 realization, ' 'you have %d', R) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(sorted(set(sg.trt for smodel in csm.source_models for sg in smodel.src_groups))) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges mmm = numpy.array([src.get_min_max_mag() for src in csm.get_sources()]) min_mag = mmm[:, 0].min() max_mag = mmm[:, 1].max() mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() # build all_args all_args = [] maxweight = csm.get_maxweight(weight, oq.concurrent_tasks) self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: iml2 = iml2s[s] r = rlzs[s] for p, poe in enumerate(oq.poes_disagg or [None]): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml2[m, p] for smodel in csm.source_models: sm_id = smodel.ordinal for trt, groups in groupby( smodel.src_groups, operator.attrgetter('trt')).items(): trti = trt_num[trt] sources = sum([grp.sources for grp in groups], []) rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id) cmaker = ContextMaker( trt, rlzs_by_gsim, src_filter.integration_distance, {'filter_distance': oq.filter_distance}) for block in block_splitter(sources, maxweight, weight): all_args.append( (src_filter.sitecol, block, cmaker, iml2s, trti, self.bin_edges, oq)) self.num_ruptures = [0] * len(self.trts) mon = self.monitor() results = parallel.Starmap(compute_disagg, all_args, mon).reduce( self.agg_result, AccumDict(accum={})) # set eff_ruptures trti = csm.info.trt2i() for smodel in csm.info.source_models: for sg in smodel.src_groups: sg.eff_ruptures = self.num_ruptures[trti[sg.trt]] self.datastore['csm_info'] = csm.info return results
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = self.src_filter() if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') if not self.csm.get_sources(): raise RuntimeError('All sources were filtered away!') csm_info = self.datastore['csm_info'] self.poes_disagg = oq.poes_disagg or (None, ) self.imts = list(oq.imtls) if oq.rlz_index is None: try: rlzs = self.datastore['best_rlz'][()] except KeyError: rlzs = numpy.zeros(self.N, int) else: rlzs = [oq.rlz_index] * self.N if oq.iml_disagg: self.poe_id = {None: 0} curves = [None] * len(self.sitecol) # no hazard curves are needed self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml2s = _iml2s(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if oq.disagg_by_src: self.build_disagg_by_src() eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(csm_info.trts) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = csm_info.min_mag max_mag = csm_info.max_mag mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: iml2 = self.iml2s[s] r = rlzs[s] logging.info('Site #%d, disaggregating for rlz=#%d', s, r) for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml2[m, p] # submit disagg tasks gid = self.datastore['rup/grp_id'][()] indices_by_grp = get_indices(gid) # grp_id -> [(start, stop),...] blocksize = len(gid) // (oq.concurrent_tasks or 1) + 1 allargs = [] for grp_id, trt in csm_info.trt_by_grp.items(): trti = trt_num[trt] rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(grp_id) cmaker = ContextMaker( trt, rlzs_by_gsim, { 'truncation_level': oq.truncation_level, 'maximum_distance': src_filter.integration_distance, 'filter_distance': oq.filter_distance, 'imtls': oq.imtls }) for start, stop in indices_by_grp[grp_id]: for slc in gen_slices(start, stop, blocksize): allargs.append((self.datastore, slc, cmaker, self.iml2s, trti, self.bin_edges)) results = parallel.Starmap(compute_disagg, allargs, h5=self.datastore.hdf5).reduce( self.agg_result, AccumDict(accum={})) return results # sid -> trti-> 7D array
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance) if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplemented('Atomic groups are not supported yet') if not self.csm.get_sources(): raise RuntimeError('All sources were filtered away!') csm_info = self.datastore['csm_info'] poes_disagg = oq.poes_disagg or (None, ) R = len(self.rlzs_assoc.realizations) rlzs = extract.disagg_key(self.datastore).rlzs if oq.iml_disagg: self.poe_id = {None: 0} curves = [None] * len(self.sitecol) # no hazard curves are needed else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs) for sid in self.sitecol.sids] self.check_poes_disagg(curves, rlzs) iml2s = _iml2s(rlzs, oq.iml_disagg, oq.imtls, poes_disagg, curves) if oq.disagg_by_src: if R == 1: self.build_disagg_by_src(iml2s) else: logging.warning( 'disagg_by_src works only with 1 realization, ' 'you have %d', R) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(csm_info.trts) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = csm_info.min_mag max_mag = csm_info.max_mag mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() # build all_args all_args = [] self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: iml2 = iml2s[s] r = rlzs[s] logging.info('Site #%d, disaggregating for rlz=#%d', s, r) for p, poe in enumerate(oq.poes_disagg or [None]): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml2[m, p] for grp, dset in self.datastore['rup'].items(): grp_id = int(grp[4:]) trt = csm_info.trt_by_grp[grp_id] trti = trt_num[trt] rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(grp_id) cmaker = ContextMaker(trt, rlzs_by_gsim, src_filter.integration_distance, {'filter_distance': oq.filter_distance}) for block in block_splitter(dset[()], 1000): all_args.append((src_filter.sitecol, numpy.array(block), cmaker, iml2s, trti, self.bin_edges, oq)) self.num_ruptures = [0] * len(self.trts) mon = self.monitor() results = parallel.Starmap(compute_disagg, all_args, mon).reduce(self.agg_result, AccumDict(accum={})) return results
def full_disaggregation(self, curves): """ Run the disaggregation phase. :param curves: a list of hazard curves, one per site The curves can be all None if iml_disagg is set in the job.ini """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance) csm = self.csm for sg in csm.src_groups: if sg.atomic: raise NotImplemented('Atomic groups are not supported yet') if not csm.get_sources(): raise RuntimeError('All sources were filtered away!') R = len(self.rlzs_assoc.realizations) M = len(oq.imtls) P = len(oq.poes_disagg) or 1 if R * M * P > 10: logging.warning( 'You have %d realizations, %d IMTs and %d poes_disagg: the ' 'disaggregation will be heavy and memory consuming', R, M, P) iml4 = disagg.make_iml4( R, oq.iml_disagg, oq.imtls, oq.poes_disagg or (None,), curves) if oq.disagg_by_src: if R == 1: self.build_disagg_by_src(iml4) else: logging.warning('disagg_by_src works only with 1 realization, ' 'you have %d', R) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) self.bin_edges = {} # build trt_edges trts = tuple(sorted(set(sg.trt for smodel in csm.source_models for sg in smodel.src_groups))) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges mmm = numpy.array([src.get_min_max_mag() for src in csm.get_sources()]) min_mag = mmm[:, 0].min() max_mag = mmm[:, 1].max() mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() # build all_args all_args = [] maxweight = csm.get_maxweight(weight, oq.concurrent_tasks) R = iml4.shape[1] self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: for r in range(R): for p, poe in enumerate(oq.poes_disagg or [None]): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml4[s, r, m, p] for smodel in csm.source_models: sm_id = smodel.ordinal for trt, groups in groupby( smodel.src_groups, operator.attrgetter('trt')).items(): trti = trt_num[trt] sources = sum([grp.sources for grp in groups], []) rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id) cmaker = ContextMaker( trt, rlzs_by_gsim, src_filter.integration_distance, {'filter_distance': oq.filter_distance}) for block in block_splitter(sources, maxweight, weight): all_args.append( (src_filter.sitecol, block, cmaker, iml4, trti, self.bin_edges, oq)) self.num_ruptures = [0] * len(self.trts) self.cache_info = numpy.zeros(3) # operations, cache_hits, num_zeros results = parallel.Starmap( compute_disagg, all_args, self.monitor() ).reduce(self.agg_result, AccumDict(accum={})) # set eff_ruptures trti = csm.info.trt2i() for smodel in csm.info.source_models: for sg in smodel.src_groups: sg.eff_ruptures = self.num_ruptures[trti[sg.trt]] self.datastore['csm_info'] = csm.info ops, hits, num_zeros = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) logging.info('Discarded zero matrices: %d', num_zeros) return results
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = self.src_filter() if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None,) self.imts = list(oq.imtls) self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()] self.pgetter = getters.PmapGetter( self.datastore, self.ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve( curves, oq.imtls, stats.mean_curve, self.ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if oq.disagg_by_src: self.build_disagg_by_src(rlzs) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(self.full_lt.trts) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges mags = [float(mag) for mag in self.datastore['source_mags']] mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min(mags) / oq.mag_bin_width)), int(numpy.ceil(max(mags) / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max(mags)) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges shapedic = self.save_bin_edges() del shapedic['trt'] shapedic['N'] = self.N shapedic['M'] = len(oq.imtls) shapedic['P'] = len(oq.poes_disagg) shapedic['Z'] = Z shapedic['concurrent_tasks'] = oq.concurrent_tasks nbytes, msg = get_array_nbytes(shapedic) if nbytes > oq.max_data_transfer: raise ValueError('Estimated data transfer too big\n%s' % msg) logging.info('Estimated data transfer: %s', msg) self.imldict = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: for z, rlz in enumerate(rlzs[s]): for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z] # submit #groups disaggregation tasks dstore = (self.datastore.parent if self.datastore.parent else self.datastore) indices = get_indices(dstore, oq.concurrent_tasks or 1) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5) for grp_id, trt in self.full_lt.trt_by_grp.items(): logging.info('Group #%d, sending rup_data for %s', grp_id, trt) trti = trt_num[trt] cmaker = ContextMaker( trt, self.full_lt.get_rlzs_by_gsim(grp_id), {'truncation_level': oq.truncation_level, 'maximum_distance': src_filter.integration_distance, 'filter_distance': oq.filter_distance, 'imtls': oq.imtls}) for idxs in indices[grp_id]: smap.submit((dstore, idxs, cmaker, self.iml4, trti, self.bin_edges)) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # sid -> trti-> 8D array
def full_disaggregation(self, curves): """ Run the disaggregation phase. :param curves: a list of hazard curves, one per site The curves can be all None if iml_disagg is set in the job.ini """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance, use_rtree=False) csm = self.csm.filter(src_filter) # fine filtering self.datastore['csm_info'] = csm.info eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) self.bin_edges = {} # build trt_edges trts = tuple( sorted( set(sg.trt for smodel in csm.source_models for sg in smodel.src_groups))) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = min(sg.min_mag for smodel in csm.source_models for sg in smodel.src_groups) max_mag = max(sg.max_mag for smodel in csm.source_models for sg in smodel.src_groups) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() # build all_args all_args = [] maxweight = csm.get_maxweight(oq.concurrent_tasks) mon = self.monitor('disaggregation') R = len(self.rlzs_assoc.realizations) iml4 = disagg.make_iml4(R, oq.imtls, oq.iml_disagg, oq.poes_disagg or (None, ), curves) self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: for r in range(R): for p, poe in enumerate(oq.poes_disagg or [None]): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml4[s, r, m, p] for smodel in csm.source_models: sm_id = smodel.ordinal for trt, groups in groupby(smodel.src_groups, operator.attrgetter('trt')).items(): trti = trt_num[trt] sources = sum([grp.sources for grp in groups], []) rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id) cmaker = ContextMaker(rlzs_by_gsim, src_filter.integration_distance) for block in csm.split_in_blocks(maxweight, sources): all_args.append((src_filter, block, cmaker, iml4, trti, self.bin_edges, oq, mon)) self.num_ruptures = [0] * len(self.trts) self.cache_info = numpy.zeros(3) # operations, cache_hits, num_zeros results = parallel.Starmap(compute_disagg, all_args).reduce(self.agg_result, AccumDict(accum={})) ops, hits, num_zeros = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) logging.info('Discarded zero matrices: %d', num_zeros) return results
def full_disaggregation(self): """ Run the disaggregation phase after hazard curve finalization. """ oq = self.oqparam tl = self.oqparam.truncation_level sitecol = self.sitecol eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1) self.bin_edges = {} curves = [self.get_curves(sid) for sid in sitecol.sids] # determine the number of effective source groups sg_data = self.datastore['csm_info/sg_data'] num_grps = sum(1 for effrup in sg_data['effrup'] if effrup > 0) nblocks = math.ceil(oq.concurrent_tasks / num_grps) src_filter = SourceFilter(sitecol, oq.maximum_distance) R = len(self.rlzs_assoc.realizations) max_poe = numpy.zeros(R, oq.imt_dt()) # build trt_edges trts = tuple( sorted( set(sg.trt for smodel in self.csm.source_models for sg in smodel.src_groups))) # build mag_edges min_mag = min(sg.min_mag for smodel in self.csm.source_models for sg in smodel.src_groups) max_mag = max(sg.max_mag for smodel in self.csm.source_models for sg in smodel.src_groups) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) logging.info('dist = %s...%s', min(dist_edges), max(dist_edges)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) for sid, bb in zip(self.sitecol.sids, bbs): lon_edges, lat_edges = disagg.lon_lat_bins(bb, oq.coordinate_bin_width) logging.info('site %d, lon = %s...%s', sid, min(lon_edges), max(lon_edges)) logging.info('site %d, lat = %s...%s', sid, min(lat_edges), max(lat_edges)) self.bin_edges[sid] = bs = (mag_edges, dist_edges, lon_edges, lat_edges, eps_edges) shape = [len(edges) - 1 for edges in bs] + [len(trts)] logging.info('%s for sid %d', shape, sid) # check poes for smodel in self.csm.source_models: sm_id = smodel.ordinal for i, site in enumerate(sitecol): sid = sitecol.sids[i] curve = curves[i] # populate max_poe array for rlzi, poes in curve.items(): for imt in oq.imtls: max_poe[rlzi][imt] = max(max_poe[rlzi][imt], poes[imt].max()) if not curve: continue # skip zero-valued hazard curves # check for too big poes_disagg for poe in oq.poes_disagg: for rlz in self.rlzs_assoc.rlzs_by_smodel[sm_id]: rlzi = rlz.ordinal for imt in oq.imtls: min_poe = max_poe[rlzi][imt] if poe > min_poe: raise ValueError( self.POE_TOO_BIG % (poe, sm_id, smodel.name, min_poe, rlzi, imt)) # build all_args all_args = [] for smodel in self.csm.source_models: for sg in smodel.src_groups: split_sources = [] for src in sg: for split, _sites in src_filter( sourceconverter.split_source(src), sitecol): split_sources.append(split) if not split_sources: continue mon = self.monitor('disaggregation') rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim( sg.trt, smodel.ordinal) cmaker = ContextMaker(rlzs_by_gsim, src_filter.integration_distance) imls = [ disagg.make_imldict(rlzs_by_gsim, oq.imtls, oq.iml_disagg, oq.poes_disagg, curve) for curve in curves ] for srcs in split_in_blocks(split_sources, nblocks): all_args.append((src_filter, srcs, cmaker, imls, trts, self.bin_edges, oq, mon)) self.cache_info = numpy.zeros(2) # operations, cache_hits results = parallel.Starmap(compute_disagg, all_args).reduce(self.agg_result) ops, hits = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) self.save_disagg_results(results)