def pre_checks(self): """ Checks on the number of sites, atomic groups and size of the disaggregation matrix. """ if self.N >= 32768: raise ValueError('You can disaggregate at max 32,768 sites') few = self.oqparam.max_sites_disagg if self.N > few: raise ValueError( 'The number of sites is to disaggregate is %d, but you have ' 'max_sites_disagg=%d' % (self.N, few)) if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') elif self.datastore['source_info'].attrs['atomic']: raise NotImplementedError('Atomic groups are not supported yet') all_edges, shapedic = disagg.get_edges_shapedic( self.oqparam, self.sitecol, self.datastore['source_mags']) *b, trts = all_edges T = len(trts) shape = [len(bin) - 1 for bin in (b[0], b[1], b[2][0], b[3][0], b[4])] + [T] matrix_size = numpy.prod(shape) # 6D if matrix_size > 1E6: raise ValueError('The disaggregation matrix is too large ' '(%d elements): fix the binning!' % matrix_size) tot = get_outputs_size(shapedic, self.oqparam.disagg_outputs) logging.info('Total output size: %s', humansize(sum(tot.values())))
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam edges, self.shapedic = disagg.get_edges_shapedic( oq, self.sitecol, self.datastore['source_mags']) self.save_bin_edges(edges) self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None, ) self.imts = list(oq.imtls) self.M = len(self.imts) ws = [rlz.weight for rlz in self.full_lt.get_realizations()] dstore = (self.datastore.parent if self.datastore.parent else self.datastore) nrows = len(dstore['_poes/sid']) self.pgetter = getters.PmapGetter(dstore, ws, [(0, nrows + 1)], oq.imtls, oq.poes) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg or 1 rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: pcurve = self.pgetter.get_pcurve(sid) mean = getters.build_stat_curve(pcurve, oq.imtls, stats.mean_curve, ws) # get the closest realization to the mean rlzs[sid] = util.closest_to_ref(pcurve.array.T, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] self.datastore['best_rlzs'] = rlzs assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs self.curves = [] if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [ self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids ] self.hmap4 = _hmap4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if self.hmap4.array.sum() == 0: raise SystemExit('Cannot do any disaggregation: zero hazard') self.datastore['hmap4'] = self.hmap4 self.datastore['poe4'] = numpy.zeros_like(self.hmap4.array) return self.compute()
def extract_disagg_layer(dstore, what): """ Extract a disaggregation layer containing all sites and outputs Example: http://127.0.0.1:8800/v1/calc/30/extract/disagg_layer? """ qdict = parse(what) oq = dstore['oqparam'] oq.maximum_distance = filters.MagDepDistance(oq.maximum_distance) if 'kind' in qdict: kinds = qdict['kind'] else: kinds = oq.disagg_outputs sitecol = dstore['sitecol'] poes_disagg = oq.poes_disagg or (None, ) edges, shapedic = disagg.get_edges_shapedic(oq, sitecol, dstore['source_mags']) dt = _disagg_output_dt(shapedic, kinds, oq.imtls, poes_disagg) out = numpy.zeros(len(sitecol), dt) realizations = numpy.array(dstore['full_lt'].get_realizations()) hmap4 = dstore['hmap4'][:] best_rlzs = dstore['best_rlzs'][:] arr = {kind: dstore['disagg/' + kind][:] for kind in kinds} for sid, lon, lat, rec in zip(sitecol.sids, sitecol.lons, sitecol.lats, out): rlzs = realizations[best_rlzs[sid]] rec['site_id'] = sid rec['lon'] = lon rec['lat'] = lat rec['lon_bins'] = edges[2][sid] rec['lat_bins'] = edges[3][sid] for m, imt in enumerate(oq.imtls): ws = numpy.array([rlz.weight[imt] for rlz in rlzs]) ws /= ws.sum() # normalize to 1 for p, poe in enumerate(poes_disagg): for kind in kinds: key = '%s-%s-%s' % (kind, imt, poe) rec[key] = arr[kind][sid, m, p] @ ws rec['iml-%s-%s' % (imt, poe)] = hmap4[sid, m, p] return ArrayWrapper( out, dict(mag=edges[0], dist=edges[1], eps=edges[-2], trt=numpy.array(encode(edges[-1]))))
def extract_disagg_layer(dstore, what): """ Extract a disaggregation layer containing all sites and outputs Example: http://127.0.0.1:8800/v1/calc/30/extract/disagg_layer? """ qdict = parse(what) oq = dstore['oqparam'] if 'kind' in qdict: kinds = qdict['kind'] else: kinds = list(oq.disagg_outputs or disagg.pmf_map) sitecol = dstore['sitecol'] poes_disagg = oq.poes_disagg or (None, ) edges, shapedic = disagg.get_edges_shapedic(oq, sitecol, dstore['source_mags']) dt = _disagg_output_dt(shapedic, kinds, oq.imtls, poes_disagg) out = numpy.zeros(len(sitecol), dt) try: best_rlzs = dstore['best_rlzs'] except KeyError: best_rlzs = numpy.zeros((len(sitecol), shapedic['Z']), U16) for sid, lon, lat, rec in zip(sitecol.sids, sitecol.lons, sitecol.lats, out): rec['site_id'] = sid rec['lon'] = lon rec['lat'] = lat rec['rlz_id'] = rlzs = best_rlzs[sid] rec['lon_bins'] = edges[2][sid] rec['lat_bins'] = edges[3][sid] for kind in kinds: for imt in oq.imtls: for p, poe in enumerate(poes_disagg): for rlz in rlzs: key = '%s-%s-%s' % (kind, imt, poe) label = 'disagg/rlz-%d-%s-sid-%d-poe-%s/%s' % ( rlz, imt, sid, p, kind) rec[key] = dstore[label][()] return ArrayWrapper( out, dict(mag=edges[0], dist=edges[1], eps=edges[-2], trt=encode(edges[-1])))
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam mags_by_trt = self.datastore['source_mags'] all_edges, shapedic = disagg.get_edges_shapedic( oq, self.sitecol, mags_by_trt) *self.bin_edges, self.trts = all_edges src_filter = self.src_filter() if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None,) self.imts = list(oq.imtls) self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()] self.pgetter = getters.PmapGetter( self.datastore, self.ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve( curves, oq.imtls, stats.mean_curve, self.ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml3 = _iml3(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if oq.disagg_by_src: self.build_disagg_by_src(rlzs) self.save_bin_edges() sd = shapedic.copy() sd.pop('trt') nbytes, msg = get_array_nbytes(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) tot = get_outputs_size(shapedic, oq.disagg_outputs or disagg.pmf_map) logging.info('Total output size: %s', humansize(sum(tot.values()))) self.imldic = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: for z, rlz in enumerate(rlzs[s]): for p, poe in enumerate(self.poes_disagg): for imt in oq.imtls: self.imldic[s, rlz, poe, imt] = self.iml3[imt][s, p, z] # submit #groups disaggregation tasks dstore = (self.datastore.parent if self.datastore.parent else self.datastore) M = len(oq.imtls) tasks_per_imt = numpy.ceil(oq.concurrent_tasks / M) or 1 rups_per_task = len(dstore['rup/mag']) / tasks_per_imt logging.info('Considering ~%d ruptures per task', rups_per_task) indices = get_indices(dstore, tasks_per_imt) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5) trt_num = {trt: i for i, trt in enumerate(self.trts)} for grp_id, trt in self.full_lt.trt_by_grp.items(): logging.info('Group #%d, sending rup_data for %s', grp_id, trt) trti = trt_num[trt] cmaker = ContextMaker( trt, self.full_lt.get_rlzs_by_gsim(grp_id), {'truncation_level': oq.truncation_level, 'maximum_distance': src_filter.integration_distance, 'imtls': oq.imtls}) for idxs in indices[grp_id]: for imt in oq.imtls: smap.submit((dstore, idxs, cmaker, self.iml3[imt], trti, self.bin_edges, oq)) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # sid -> trti-> 8D array
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam mags_by_trt = self.datastore['source_mags'] all_edges, self.shapedic = disagg.get_edges_shapedic( oq, self.sitecol, mags_by_trt) *self.bin_edges, self.trts = all_edges if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') elif self.datastore['source_info'].attrs['atomic']: raise NotImplementedError('Atomic groups are not supported yet') self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None, ) self.imts = list(oq.imtls) self.M = len(self.imts) ws = [rlz.weight for rlz in self.full_lt.get_realizations()] self.pgetter = getters.PmapGetter(self.datastore, ws, self.sitecol.sids, oq.imtls, oq.poes) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg or 1 rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve(curves, oq.imtls, stats.mean_curve, ws) # get the closest realization to the mean rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] self.datastore['best_rlzs'] = rlzs assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs self.curves = [] if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [ self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids ] self.hmap4 = _hmap4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if self.hmap4.array.sum() == 0: raise SystemExit('Cannot do any disaggregation: zero hazard') self.datastore['hmap4'] = self.hmap4 self.datastore['poe4'] = numpy.zeros_like(self.hmap4.array) self.save_bin_edges() tot = get_outputs_size(self.shapedic, oq.disagg_outputs) logging.info('Total output size: %s', humansize(sum(tot.values()))) return self.compute()
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam mags_by_trt = self.datastore['source_mags'] all_edges, self.shapedic = disagg.get_edges_shapedic( oq, self.sitecol, mags_by_trt) *self.bin_edges, self.trts = all_edges if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') elif self.datastore['source_info'].attrs['atomic']: raise NotImplementedError( 'Atomic groups are not supported yet') self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None,) self.imts = list(oq.imtls) self.M = len(self.imts) ws = [rlz.weight for rlz in self.full_lt.get_realizations()] self.pgetter = getters.PmapGetter( self.datastore, ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg or 1 rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve( curves, oq.imtls, stats.mean_curve, ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] self.datastore['best_rlzs'] = rlzs assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) self.datastore['iml4'] = self.iml4 self.datastore['poe4'] = numpy.zeros_like(self.iml4.array) self.save_bin_edges() tot = get_outputs_size(self.shapedic, oq.disagg_outputs) logging.info('Total output size: %s', humansize(sum(tot.values()))) self.imldic = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: iml3 = self.iml4[s] for z, rlz in enumerate(rlzs[s]): for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldic[s, rlz, poe, imt] = iml3[m, p, z] # submit disaggregation tasks dstore = (self.datastore.parent if self.datastore.parent else self.datastore) mag_edges = self.bin_edges[0] indices = get_indices_by_gidx_mag(dstore, mag_edges) allargs = [] totweight = sum(sum(ri.weight for ri in indices[gm]) for gm in indices) maxweight = int(numpy.ceil(totweight / (oq.concurrent_tasks or 1))) grp_ids = dstore['grp_ids'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(grp_ids) num_eff_rlzs = len(self.full_lt.sm_rlzs) task_inputs = [] G, U = 0, 0 for gidx, magi in indices: trti = grp_ids[gidx][0] // num_eff_rlzs trt = self.trts[trti] cmaker = ContextMaker( trt, rlzs_by_gsim[gidx], {'truncation_level': oq.truncation_level, 'maximum_distance': oq.maximum_distance, 'collapse_level': oq.collapse_level, 'imtls': oq.imtls}) G = max(G, len(cmaker.gsims)) for rupidxs in block_splitter( indices[gidx, magi], maxweight, weight): idxs = numpy.array([ri.index for ri in rupidxs]) U = max(U, len(idxs)) allargs.append((dstore, idxs, cmaker, self.iml4, trti, magi, self.bin_edges[1:], oq)) task_inputs.append((trti, magi, len(idxs))) nbytes, msg = get_array_nbytes(dict(N=self.N, M=self.M, G=G, U=U)) logging.info('Maximum mean_std per task:\n%s', msg) sd = self.shapedic.copy() sd.pop('trt') sd.pop('mag') sd['tasks'] = numpy.ceil(len(allargs)) nbytes, msg = get_array_nbytes(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) dt = numpy.dtype([('trti', U8), ('magi', U8), ('nrups', U32)]) self.datastore['disagg_task'] = numpy.array(task_inputs, dt) self.datastore.swmr_on() smap = parallel.Starmap( compute_disagg, allargs, h5=self.datastore.hdf5) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # imti, sid -> trti, magi -> 6D array