def get_source_ids(self): """ :returns: the unique source IDs contained in the composite model """ oq = self.oqparam self.M = len(oq.imtls) self.L1 = len(oq.imtls.array) // self.M sources = encode([src_id for src_id in self.csm.source_info]) size, msg = get_array_nbytes( dict(N=self.N, R=self.R, M=self.M, L1=self.L1, Ns=self.Ns)) ps = 'pointSource' in self.full_lt.source_model_lt.source_types if size > TWO32 and not ps: raise RuntimeError('The matrix disagg_by_src is too large: %s' % msg) elif size > TWO32: msg = ('The source model contains point sources: you cannot set ' 'disagg_by_src=true unless you convert them to multipoint ' 'sources with the command oq upgrade_nrml --multipoint %s' ) % oq.base_path raise RuntimeError(msg) return sources
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam mags_by_trt = self.datastore['source_mags'] all_edges, shapedic = disagg.get_edges_shapedic( oq, self.sitecol, mags_by_trt) *self.bin_edges, self.trts = all_edges src_filter = self.src_filter() if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None,) self.imts = list(oq.imtls) self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()] self.pgetter = getters.PmapGetter( self.datastore, self.ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve( curves, oq.imtls, stats.mean_curve, self.ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml3 = _iml3(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if oq.disagg_by_src: self.build_disagg_by_src(rlzs) self.save_bin_edges() sd = shapedic.copy() sd.pop('trt') nbytes, msg = get_array_nbytes(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) tot = get_outputs_size(shapedic, oq.disagg_outputs or disagg.pmf_map) logging.info('Total output size: %s', humansize(sum(tot.values()))) self.imldic = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: for z, rlz in enumerate(rlzs[s]): for p, poe in enumerate(self.poes_disagg): for imt in oq.imtls: self.imldic[s, rlz, poe, imt] = self.iml3[imt][s, p, z] # submit #groups disaggregation tasks dstore = (self.datastore.parent if self.datastore.parent else self.datastore) M = len(oq.imtls) tasks_per_imt = numpy.ceil(oq.concurrent_tasks / M) or 1 rups_per_task = len(dstore['rup/mag']) / tasks_per_imt logging.info('Considering ~%d ruptures per task', rups_per_task) indices = get_indices(dstore, tasks_per_imt) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5) trt_num = {trt: i for i, trt in enumerate(self.trts)} for grp_id, trt in self.full_lt.trt_by_grp.items(): logging.info('Group #%d, sending rup_data for %s', grp_id, trt) trti = trt_num[trt] cmaker = ContextMaker( trt, self.full_lt.get_rlzs_by_gsim(grp_id), {'truncation_level': oq.truncation_level, 'maximum_distance': src_filter.integration_distance, 'imtls': oq.imtls}) for idxs in indices[grp_id]: for imt in oq.imtls: smap.submit((dstore, idxs, cmaker, self.iml3[imt], trti, self.bin_edges, oq)) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # sid -> trti-> 8D array
def compute(self): """ Submit disaggregation tasks and return the results """ logging.info('Reading ruptures') oq = self.oqparam dstore = (self.datastore.parent if self.datastore.parent else self.datastore) mags = set() for trt, dset in self.datastore['source_mags'].items(): mags.update(dset[:]) mags = sorted(mags) allargs = [] totweight = sum(d['rctx']['nsites'].sum() for n, d in dstore.items() if n.startswith('mag_') and len(d['rctx'])) grp_ids = dstore['grp_ids'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(grp_ids) G = max(len(rbg) for rbg in rlzs_by_gsim) maxw = 2 * 1024**3 / (16 * G * self.M) # at max 2 GB maxweight = min(numpy.ceil(totweight / (oq.concurrent_tasks or 1)), maxw) num_eff_rlzs = len(self.full_lt.sm_rlzs) task_inputs = [] U = 0 totrups = 0 for mag in mags: rctx = dstore['mag_%s/rctx' % mag][:] totrups += len(rctx) for gidx, gids in enumerate(grp_ids): idxs, = numpy.where(rctx['gidx'] == gidx) if len(idxs) == 0: continue trti = gids[0] // num_eff_rlzs trt = self.trts[trti] cmaker = ContextMaker( trt, rlzs_by_gsim[gidx], { 'truncation_level': oq.truncation_level, 'maximum_distance': oq.maximum_distance, 'collapse_level': oq.collapse_level, 'imtls': oq.imtls }) for blk in block_splitter(rctx[idxs], maxweight, nsites): nr = len(blk) U = max(U, blk.weight) allargs.append((dstore, numpy.array(blk), cmaker, self.hmap4, trti, self.bin_edges, oq)) task_inputs.append((trti, mag, nr)) logging.info('Found {:_d} ruptures'.format(totrups)) nbytes, msg = get_array_nbytes(dict(M=self.M, G=G, U=U, F=2)) logging.info('Maximum mean_std per task:\n%s', msg) s = self.shapedic size = s['dist'] * s['eps'] + s['lon'] * s['lat'] sd = dict(N=s['N'], M=s['M'], P=s['P'], Z=s['Z'], size=size) sd['tasks'] = numpy.ceil(len(allargs)) nbytes, msg = get_array_nbytes(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) sd.pop('tasks') sd['mags_trt'] = sum( len(mags) for mags in self.datastore['source_mags'].values()) nbytes, msg = get_array_nbytes(sd) logging.info('Estimated memory on the master:\n%s', msg) dt = numpy.dtype([('trti', U8), ('mag', '|S4'), ('nrups', U32)]) self.datastore['disagg_task'] = numpy.array(task_inputs, dt) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, allargs, h5=self.datastore.hdf5) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # imti, sid -> trti, magi -> 6D array
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam mags_by_trt = self.datastore['source_mags'] all_edges, self.shapedic = disagg.get_edges_shapedic( oq, self.sitecol, mags_by_trt) *self.bin_edges, self.trts = all_edges if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') elif self.datastore['source_info'].attrs['atomic']: raise NotImplementedError( 'Atomic groups are not supported yet') self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None,) self.imts = list(oq.imtls) self.M = len(self.imts) ws = [rlz.weight for rlz in self.full_lt.get_realizations()] self.pgetter = getters.PmapGetter( self.datastore, ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg or 1 rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve( curves, oq.imtls, stats.mean_curve, ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] self.datastore['best_rlzs'] = rlzs assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) self.datastore['iml4'] = self.iml4 self.datastore['poe4'] = numpy.zeros_like(self.iml4.array) self.save_bin_edges() tot = get_outputs_size(self.shapedic, oq.disagg_outputs) logging.info('Total output size: %s', humansize(sum(tot.values()))) self.imldic = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: iml3 = self.iml4[s] for z, rlz in enumerate(rlzs[s]): for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldic[s, rlz, poe, imt] = iml3[m, p, z] # submit disaggregation tasks dstore = (self.datastore.parent if self.datastore.parent else self.datastore) mag_edges = self.bin_edges[0] indices = get_indices_by_gidx_mag(dstore, mag_edges) allargs = [] totweight = sum(sum(ri.weight for ri in indices[gm]) for gm in indices) maxweight = int(numpy.ceil(totweight / (oq.concurrent_tasks or 1))) grp_ids = dstore['grp_ids'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(grp_ids) num_eff_rlzs = len(self.full_lt.sm_rlzs) task_inputs = [] G, U = 0, 0 for gidx, magi in indices: trti = grp_ids[gidx][0] // num_eff_rlzs trt = self.trts[trti] cmaker = ContextMaker( trt, rlzs_by_gsim[gidx], {'truncation_level': oq.truncation_level, 'maximum_distance': oq.maximum_distance, 'collapse_level': oq.collapse_level, 'imtls': oq.imtls}) G = max(G, len(cmaker.gsims)) for rupidxs in block_splitter( indices[gidx, magi], maxweight, weight): idxs = numpy.array([ri.index for ri in rupidxs]) U = max(U, len(idxs)) allargs.append((dstore, idxs, cmaker, self.iml4, trti, magi, self.bin_edges[1:], oq)) task_inputs.append((trti, magi, len(idxs))) nbytes, msg = get_array_nbytes(dict(N=self.N, M=self.M, G=G, U=U)) logging.info('Maximum mean_std per task:\n%s', msg) sd = self.shapedic.copy() sd.pop('trt') sd.pop('mag') sd['tasks'] = numpy.ceil(len(allargs)) nbytes, msg = get_array_nbytes(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) dt = numpy.dtype([('trti', U8), ('magi', U8), ('nrups', U32)]) self.datastore['disagg_task'] = numpy.array(task_inputs, dt) self.datastore.swmr_on() smap = parallel.Starmap( compute_disagg, allargs, h5=self.datastore.hdf5) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # imti, sid -> trti, magi -> 6D array
def acc0(self): """ Initial accumulator, a dict grp_id -> ProbabilityMap(L, G) """ zd = AccumDict() num_levels = len(self.oqparam.imtls.array) rparams = { 'grp_id', 'occurrence_rate', 'weight', 'probs_occur', 'clon_', 'clat_', 'rrup_' } gsims_by_trt = self.full_lt.get_gsims_by_trt() n = len(self.full_lt.sm_rlzs) trts = list(self.full_lt.gsim_lt.values) for sm in self.full_lt.sm_rlzs: for grp_id in self.full_lt.grp_ids(sm.ordinal): trt = trts[grp_id // n] gsims = gsims_by_trt[trt] cm = ContextMaker(trt, gsims) rparams.update(cm.REQUIRES_RUPTURE_PARAMETERS) for dparam in cm.REQUIRES_DISTANCES: rparams.add(dparam + '_') zd.eff_ruptures = AccumDict(accum=0) # trt -> eff_ruptures if self.few_sites: self.rparams = sorted(rparams) for k in self.rparams: # variable length arrays if k == 'grp_id': self.datastore.create_dset('rup/' + k, U16) elif k == 'probs_occur': # vlen self.datastore.create_dset('rup/' + k, hdf5.vfloat64) elif k.endswith('_'): # array of shape (U, N) self.datastore.create_dset('rup/' + k, F32, shape=(None, self.N), compression='gzip') else: self.datastore.create_dset('rup/' + k, F32) else: self.rparams = {} self.by_task = {} # task_no => src_ids self.totrups = 0 # total number of ruptures before collapsing self.maxradius = 0 self.gidx = { tuple(grp_ids): i for i, grp_ids in enumerate(self.datastore['grp_ids']) } # estimate max memory per core max_num_gsims = max(len(gsims) for gsims in gsims_by_trt.values()) max_num_grp_ids = max(len(grp_ids) for grp_ids in self.gidx) pmapbytes = self.N * num_levels * max_num_gsims * max_num_grp_ids * 8 if pmapbytes > TWO32: logging.warning(TOOBIG % (self.N, num_levels, max_num_gsims, max_num_grp_ids, humansize(pmapbytes))) logging.info(MAXMEMORY % (self.N, num_levels, max_num_gsims, max_num_grp_ids, humansize(pmapbytes))) self.Ns = len(self.csm.source_info) if self.oqparam.disagg_by_src: self.M = len(self.oqparam.imtls) self.L1 = num_levels // self.M sources = encode([src_id for src_id in self.csm.source_info]) size, msg = get_array_nbytes( dict(N=self.N, R=self.R, M=self.M, L1=self.L1, Ns=self.Ns)) if size > TWO32: raise RuntimeError( 'The matrix disagg_by_src is too large: %s' % msg) self.datastore.create_dset( 'disagg_by_src', F32, (self.N, self.R, self.M, self.L1, self.Ns)) self.datastore.set_shape_attrs('disagg_by_src', site_id=self.N, rlz_id=self.R, imt=list(self.oqparam.imtls), lvl=self.L1, src_id=sources) return zd
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = self.src_filter() if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None,) self.imts = list(oq.imtls) self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()] self.pgetter = getters.PmapGetter( self.datastore, self.ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve( curves, oq.imtls, stats.mean_curve, self.ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if oq.disagg_by_src: self.build_disagg_by_src(rlzs) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(self.full_lt.trts) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges mags = [float(mag) for mag in self.datastore['source_mags']] mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min(mags) / oq.mag_bin_width)), int(numpy.ceil(max(mags) / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max(mags)) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges shapedic = self.save_bin_edges() del shapedic['trt'] shapedic['N'] = self.N shapedic['M'] = len(oq.imtls) shapedic['P'] = len(oq.poes_disagg) shapedic['Z'] = Z shapedic['concurrent_tasks'] = oq.concurrent_tasks nbytes, msg = get_array_nbytes(shapedic) if nbytes > oq.max_data_transfer: raise ValueError('Estimated data transfer too big\n%s' % msg) logging.info('Estimated data transfer: %s', msg) self.imldict = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: for z, rlz in enumerate(rlzs[s]): for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z] # submit #groups disaggregation tasks dstore = (self.datastore.parent if self.datastore.parent else self.datastore) indices = get_indices(dstore, oq.concurrent_tasks or 1) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5) for grp_id, trt in self.full_lt.trt_by_grp.items(): logging.info('Group #%d, sending rup_data for %s', grp_id, trt) trti = trt_num[trt] cmaker = ContextMaker( trt, self.full_lt.get_rlzs_by_gsim(grp_id), {'truncation_level': oq.truncation_level, 'maximum_distance': src_filter.integration_distance, 'filter_distance': oq.filter_distance, 'imtls': oq.imtls}) for idxs in indices[grp_id]: smap.submit((dstore, idxs, cmaker, self.iml4, trti, self.bin_edges)) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # sid -> trti-> 8D array