def _read_scenario_ruptures(self): oq = self.oqparam gsim_lt = readinput.get_gsim_lt(self.oqparam) G = gsim_lt.get_num_paths() if oq.calculation_mode.startswith('scenario'): ngmfs = oq.number_of_ground_motion_fields if oq.inputs['rupture_model'].endswith('.xml'): # check the number of branchsets bsets = len(gsim_lt._ltnode) if bsets > 1: raise InvalidFile( '%s for a scenario calculation must contain a single ' 'branchset, found %d!' % (oq.inputs['job_ini'], bsets)) [(trt, rlzs_by_gsim)] = gsim_lt.get_rlzs_by_gsim_trt().items() self.cmaker = ContextMaker(trt, rlzs_by_gsim, oq) rup = readinput.get_rupture(oq) if self.N > oq.max_sites_disagg: # many sites, split rupture ebrs = [ EBRupture(copyobj(rup, rup_id=rup.rup_id + i), 'NA', 0, G, e0=i * G, scenario=True) for i in range(ngmfs) ] else: # keep a single rupture with a big occupation number ebrs = [ EBRupture(rup, 'NA', 0, G * ngmfs, rup.rup_id, scenario=True) ] srcfilter = SourceFilter(self.sitecol, oq.maximum_distance(trt)) aw = get_rup_array(ebrs, srcfilter) if len(aw) == 0: raise RuntimeError( 'The rupture is too far from the sites! Please check the ' 'maximum_distance and the position of the rupture') elif oq.inputs['rupture_model'].endswith('.csv'): aw = get_ruptures(oq.inputs['rupture_model']) if len(gsim_lt.values) == 1: # fix for scenario_damage/case_12 aw['trt_smr'] = 0 # a single TRT if oq.calculation_mode.startswith('scenario'): # rescale n_occ by ngmfs and nrlzs aw['n_occ'] *= ngmfs * gsim_lt.get_num_paths() else: raise InvalidFile("Something wrong in %s" % oq.inputs['job_ini']) rup_array = aw.array hdf5.extend(self.datastore['rupgeoms'], aw.geom) if len(rup_array) == 0: raise RuntimeError( 'There are no sites within the maximum_distance' ' of %s km from the rupture' % oq.maximum_distance(rup.tectonic_region_type)(rup.mag)) fake = logictree.FullLogicTree.fake(gsim_lt) self.realizations = fake.get_realizations() self.datastore['full_lt'] = fake self.store_rlz_info({}) # store weights self.save_params() imp = calc.RuptureImporter(self.datastore) imp.import_rups_events(rup_array, get_rupture_getters)
def _collect_bins_data(trt_num, source_ruptures, site, curves, trt_model_id, rlzs_assoc, gsims, imtls, poes, truncation_level, n_epsilons, mon): # returns a BinData instance sitecol = SiteCollection([site]) mags = [] dists = [] lons = [] lats = [] trts = [] pnes = [] sitemesh = sitecol.mesh make_ctxt = mon('making contexts', measuremem=False) disagg_poe = mon('disaggregate_poe', measuremem=False) cmaker = ContextMaker(gsims) for source, ruptures in source_ruptures: try: tect_reg = trt_num[source.tectonic_region_type] for rupture in ruptures: with make_ctxt: sctx, rctx, dctx = cmaker.make_contexts(sitecol, rupture) # extract rupture parameters of interest mags.append(rupture.mag) dists.append(dctx.rjb[0]) # single site => single distance [closest_point] = rupture.surface.get_closest_points(sitemesh) lons.append(closest_point.longitude) lats.append(closest_point.latitude) trts.append(tect_reg) pne_dict = {} # a dictionary rlz.id, poe, imt_str -> prob_no_exceed for gsim in gsims: gs = str(gsim) for imt_str, imls in imtls.iteritems(): imt = from_string(imt_str) imls = numpy.array(imls[::-1]) for rlz in rlzs_assoc[trt_model_id, gs]: rlzi = rlz.ordinal curve_poes = curves[rlzi, imt_str][::-1] for poe in poes: iml = numpy.interp(poe, curve_poes, imls) # compute probability of exceeding iml given # the current rupture and epsilon_bin, that is # ``P(IMT >= iml | rup, epsilon_bin)`` # for each of the epsilon bins with disagg_poe: [poes_given_rup_eps] = \ gsim.disaggregate_poe( sctx, rctx, dctx, imt, iml, truncation_level, n_epsilons) pne = rupture.get_probability_no_exceedance( poes_given_rup_eps) pne_dict[rlzi, poe, imt_str] = (iml, pne) pnes.append(pne_dict) except Exception as err: etype, err, tb = sys.exc_info() msg = 'An error occurred with source id=%s. Error: %s' msg %= (source.source_id, err) raise etype, msg, tb return BinData(numpy.array(mags, float), numpy.array(dists, float), numpy.array(lons, float), numpy.array(lats, float), numpy.array(trts, int), pnes)
def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ oq = self.oqparam params = dict(imtls=oq.imtls, ses_per_logic_tree_path=oq.ses_per_logic_tree_path, ses_seed=oq.ses_seed) gsims_by_trt = self.csm.full_lt.get_gsims_by_trt() sources = self.csm.get_sources() # weighting the heavy sources nrups = parallel.Starmap(count_ruptures, [(src, ) for src in sources if src.code in b'AMC'], progress=logging.debug).reduce() for src in sources: try: src.num_ruptures = nrups[src.source_id] except KeyError: src.num_ruptures = src.count_ruptures() src.weight = src.num_ruptures maxweight = sum(sg.weight for sg in self.csm.src_groups) / ( self.oqparam.concurrent_tasks or 1) eff_ruptures = AccumDict(accum=0) # grp_id => potential ruptures source_data = AccumDict(accum=[]) allargs = [] if self.oqparam.is_ucerf(): # manage the filtering in a special way for sg in self.csm.src_groups: for src in sg: src.src_filter = self.srcfilter srcfilter = nofilter # otherwise it would be ultra-slow else: srcfilter = self.srcfilter logging.info('Building ruptures') for sg in self.csm.src_groups: if not sg.sources: continue logging.info('Sending %s', sg) cmaker = ContextMaker(sg.trt, gsims_by_trt[sg.trt], oq) for src_group in sg.split(maxweight): allargs.append((src_group, cmaker, srcfilter.sitecol)) smap = parallel.Starmap(sample_ruptures, allargs, h5=self.datastore.hdf5) mon = self.monitor('saving ruptures') self.nruptures = 0 # estimated classical ruptures within maxdist for dic in smap: # NB: dic should be a dictionary, but when the calculation dies # for an OOM it can become None, thus giving a very confusing error if dic is None: raise MemoryError('You ran out of memory!') rup_array = dic['rup_array'] if len(rup_array) == 0: continue if dic['source_data']: source_data += dic['source_data'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] with mon: n = len(rup_array) rup_array['id'] = numpy.arange(self.nruptures, self.nruptures + n) self.nruptures += n hdf5.extend(self.datastore['ruptures'], rup_array) hdf5.extend(self.datastore['rupgeoms'], rup_array.geom) if len(self.datastore['ruptures']) == 0: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # don't change the order of the 3 things below! self.store_source_info(source_data) self.store_rlz_info(eff_ruptures) imp = calc.RuptureImporter(self.datastore) with self.monitor('saving ruptures and events'): imp.import_rups_events( self.datastore.getitem('ruptures')[()], get_rupture_getters)
def calc_hazard_curves(groups, srcfilter, imtls, gsim_by_trt, truncation_level=None, apply=sequential_apply, reqv=None, **kwargs): """ Compute hazard curves on a list of sites, given a set of seismic source groups and a dictionary of ground shaking intensity models (one per tectonic region type). Probability of ground motion exceedance is computed in different ways depending if the sources are independent or mutually exclusive. :param groups: A sequence of groups of seismic sources objects (instances of of :class:`~openquake.hazardlib.source.base.BaseSeismicSource`). :param srcfilter: A source filter over the site collection or the site collection itself :param imtls: Dictionary mapping intensity measure type strings to lists of intensity measure levels. :param gsim_by_trt: Dictionary mapping tectonic region types (members of :class:`openquake.hazardlib.const.TRT`) to :class:`~openquake.hazardlib.gsim.base.GMPE` or :class:`~openquake.hazardlib.gsim.base.IPE` objects. :param truncation_level: Float, number of standard deviations for truncation of the intensity distribution. :param apply: apply function to use (default sequential_apply) :param reqv: If not None, an instance of RjbEquivalent :returns: An array of size N, where N is the number of sites, which elements are records with fields given by the intensity measure types; the size of each field is given by the number of levels in ``imtls``. """ # This is ensuring backward compatibility i.e. processing a list of # sources if not isinstance(groups[0], SourceGroup): # sent a list of sources odic = groupby(groups, operator.attrgetter('tectonic_region_type')) groups = [ SourceGroup(trt, odic[trt], 'src_group', 'indep', 'indep') for trt in odic ] idx = 0 span = None for i, grp in enumerate(groups): for src in grp: tom = getattr(src, 'temporal_occurrence_model', None) if tom: span = tom.time_span src.weight = src.count_ruptures() src.grp_id = i src.id = idx idx += 1 imtls = DictArray(imtls) shift_hypo = kwargs['shift_hypo'] if 'shift_hypo' in kwargs else False param = dict(imtls=imtls, truncation_level=truncation_level, reqv=reqv, cluster=grp.cluster, shift_hypo=shift_hypo, investigation_time=kwargs.get('investigation_time', span)) pmap = ProbabilityMap(imtls.size, 1) # Processing groups with homogeneous tectonic region mon = Monitor() sitecol = getattr(srcfilter, 'sitecol', srcfilter) for group in groups: trt = group.trt if sitecol is not srcfilter: param['maximum_distance'] = srcfilter.integration_distance(trt) cmaker = ContextMaker(trt, [gsim_by_trt[trt]], param, mon) if group.atomic: # do not split it = [classical(group, sitecol, cmaker)] else: # split the group and apply `classical` in parallel it = apply(classical, (group.sources, sitecol, cmaker), weight=operator.attrgetter('weight')) for dic in it: pmap |= dic['pmap'] return pmap.convert(imtls, len(sitecol.complete))
def get_ctx(self, rupture, site_collection): return ContextMaker('*', [self.gsim_class], dict(imtls={})).get_ctxs( [rupture], site_collection, 'src_id')[0]
def make_contexts(self, site_collection, rupture): return ContextMaker('faketrt', [self.gsim_class]).make_contexts( site_collection, rupture)
def compute(self): """ Submit disaggregation tasks and return the results """ oq = self.oqparam dstore = (self.datastore.parent if self.datastore.parent else self.datastore) magi = numpy.searchsorted(self.bin_edges[0], dstore['rup/mag'][:]) - 1 magi[magi == -1] = 0 # when the magnitude is on the edge totrups = len(magi) logging.info('Reading {:_d} ruptures'.format(totrups)) rdt = [('grp_id', U16), ('magi', U8), ('nsites', U16), ('idx', U32)] rdata = numpy.zeros(totrups, rdt) rdata['magi'] = magi rdata['idx'] = numpy.arange(totrups) rdata['grp_id'] = dstore['rup/grp_id'][:] rdata['nsites'] = dstore['rup/nsites'][:] totweight = rdata['nsites'].sum() et_ids = dstore['et_ids'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(et_ids) G = max(len(rbg) for rbg in rlzs_by_gsim) maxw = 2 * 1024**3 / (16 * G * self.M) # at max 2 GB maxweight = min( numpy.ceil(totweight / (oq.concurrent_tasks or 1)), maxw) num_eff_rlzs = len(self.full_lt.sm_rlzs) task_inputs = [] U = 0 self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5) # ABSURDLY IMPORTANT!! we rely on the fact that the classical part # of the calculation stores the ruptures in chunks of constant # grp_id, therefore it is possible to build (start, stop) slices; # we are NOT grouping by operator.itemgetter('grp_id', 'magi'): # that would break the ordering of the indices causing an incredibly # worse performance, but visible only in extra-large calculations! for block in block_splitter(rdata, maxweight, operator.itemgetter('nsites'), operator.itemgetter('grp_id')): grp_id = block[0]['grp_id'] trti = et_ids[grp_id][0] // num_eff_rlzs trt = self.trts[trti] cmaker = ContextMaker( trt, rlzs_by_gsim[grp_id], {'truncation_level': oq.truncation_level, 'maximum_distance': oq.maximum_distance, 'collapse_level': oq.collapse_level, 'num_epsilon_bins': oq.num_epsilon_bins, 'investigation_time': oq.investigation_time, 'imtls': oq.imtls}) U = max(U, block.weight) slc = slice(block[0]['idx'], block[-1]['idx'] + 1) smap.submit((dstore, slc, cmaker, self.hmap4, trti, magi[slc], self.bin_edges)) task_inputs.append((trti, slc.stop-slc.start)) nbytes, msg = get_nbytes_msg(dict(M=self.M, G=G, U=U, F=2)) logging.info('Maximum mean_std per task:\n%s', msg) s = self.shapedic sd = dict(N=s['N'], M=s['M'], P=s['P'], Z=s['Z'], D=s['dist'], E=s['eps'], Lo=s['lon'], La=s['lat']) sd['tasks'] = numpy.ceil(len(task_inputs)) nbytes, msg = get_nbytes_msg(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) sd.pop('tasks') dt = numpy.dtype([('trti', U8), ('nrups', U32)]) self.datastore['disagg_task'] = numpy.array(task_inputs, dt) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # imti, sid -> trti, magi -> 6D array
def compute_ruptures(sources, sitecol, siteidx, rlzs_assoc, monitor): """ :param sources: List of commonlib.source.Source tuples :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param siteidx: always equal to 0 :param rlzs_assoc: a :class:`openquake.commonlib.source.RlzsAssoc` instance :param monitor: monitor instance :returns: a dictionary trt_model_id -> [Rupture instances] """ assert siteidx == 0, ( 'siteidx can be nonzero only for the classical_tiling calculations: ' 'tiling with the EventBasedRuptureCalculator is an error') # NB: by construction each block is a non-empty list with # sources of the same trt_model_id trt_model_id = sources[0].trt_model_id oq = monitor.oqparam trt = sources[0].tectonic_region_type try: max_dist = oq.maximum_distance[trt] except KeyError: max_dist = oq.maximum_distance['default'] cmaker = ContextMaker(rlzs_assoc.gsims_by_trt_id[trt_model_id]) params = cmaker.REQUIRES_RUPTURE_PARAMETERS rup_data_dt = numpy.dtype( [('rupserial', U32), ('multiplicity', U16), ('numsites', U32)] + [ (param, F32) for param in params]) eb_ruptures = [] rup_data = [] calc_times = [] rup_mon = monitor('filtering ruptures', measuremem=False) # Compute and save stochastic event sets for src in sources: t0 = time.time() s_sites = src.filter_sites_by_distance_to_source(max_dist, sitecol) if s_sites is None: continue rupture_filter = RuptureFilter( s_sites, max_dist, oq.imtls, cmaker.gsims, oq.truncation_level, oq.minimum_intensity) num_occ_by_rup = sample_ruptures( src, oq.ses_per_logic_tree_path, rlzs_assoc.csm_info) # NB: the number of occurrences is very low, << 1, so it is # more efficient to filter only the ruptures that occur, i.e. # to call sample_ruptures *before* the filtering for ebr in build_eb_ruptures( src, num_occ_by_rup, rupture_filter, oq.random_seed, rup_mon): nsites = len(ebr.indices) rc = cmaker.make_rupture_context(ebr.rupture) ruptparams = tuple(getattr(rc, param) for param in params) rup_data.append((ebr.serial, len(ebr.etags), nsites) + ruptparams) eb_ruptures.append(ebr) dt = time.time() - t0 calc_times.append((src.id, dt)) res = AccumDict({trt_model_id: eb_ruptures}) res.calc_times = calc_times res.rup_data = numpy.array(rup_data, rup_data_dt) res.trt = trt return res
def full_disaggregation(self, curves): """ Run the disaggregation phase. :param curves: a list of hazard curves, one per site The curves can be all None if iml_disagg is set in the job.ini """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance, use_rtree=False) csm = self.csm.filter(src_filter) # fine filtering self.datastore['csm_info'] = csm.info eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) self.bin_edges = {} # build trt_edges trts = tuple( sorted( set(sg.trt for smodel in csm.source_models for sg in smodel.src_groups))) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = min(sg.min_mag for smodel in csm.source_models for sg in smodel.src_groups) max_mag = max(sg.max_mag for smodel in csm.source_models for sg in smodel.src_groups) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() # build all_args all_args = [] maxweight = csm.get_maxweight(oq.concurrent_tasks) mon = self.monitor('disaggregation') R = len(self.rlzs_assoc.realizations) iml4 = disagg.make_iml4(R, oq.imtls, oq.iml_disagg, oq.poes_disagg or (None, ), curves) self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: for r in range(R): for p, poe in enumerate(oq.poes_disagg or [None]): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml4[s, r, m, p] for smodel in csm.source_models: sm_id = smodel.ordinal for trt, groups in groupby(smodel.src_groups, operator.attrgetter('trt')).items(): trti = trt_num[trt] sources = sum([grp.sources for grp in groups], []) rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id) cmaker = ContextMaker(rlzs_by_gsim, src_filter.integration_distance) for block in csm.split_in_blocks(maxweight, sources): all_args.append((src_filter, block, cmaker, iml4, trti, self.bin_edges, oq, mon)) self.num_ruptures = [0] * len(self.trts) self.cache_info = numpy.zeros(3) # operations, cache_hits, num_zeros results = parallel.Starmap(compute_disagg, all_args).reduce(self.agg_result, AccumDict(accum={})) ops, hits, num_zeros = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) logging.info('Discarded zero matrices: %d', num_zeros) return results
def compute(self): """ Submit disaggregation tasks and return the results """ logging.info('Reading ruptures') oq = self.oqparam dstore = (self.datastore.parent if self.datastore.parent else self.datastore) mags = set() for trt, dset in self.datastore['source_mags'].items(): mags.update(dset[:]) mags = sorted(mags) allargs = [] totweight = sum(d['rctx']['nsites'].sum() for n, d in dstore.items() if n.startswith('mag_') and len(d['rctx'])) et_ids = dstore['et_ids'][:] rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(et_ids) G = max(len(rbg) for rbg in rlzs_by_gsim) maxw = 2 * 1024**3 / (16 * G * self.M) # at max 2 GB maxweight = min(numpy.ceil(totweight / (oq.concurrent_tasks or 1)), maxw) num_eff_rlzs = len(self.full_lt.sm_rlzs) task_inputs = [] U = 0 totrups = 0 for mag in mags: rctx = dstore['mag_%s/rctx' % mag][:] totrups += len(rctx) for grp_id, gids in enumerate(et_ids): idxs, = numpy.where(rctx['grp_id'] == grp_id) if len(idxs) == 0: continue trti = gids[0] // num_eff_rlzs trt = self.trts[trti] cmaker = ContextMaker( trt, rlzs_by_gsim[grp_id], { 'truncation_level': oq.truncation_level, 'maximum_distance': oq.maximum_distance, 'collapse_level': oq.collapse_level, 'imtls': oq.imtls }) for blk in block_splitter(rctx[idxs], maxweight, nsites): nr = len(blk) U = max(U, blk.weight) allargs.append((dstore, numpy.array(blk), cmaker, self.hmap4, trti, self.bin_edges, oq)) task_inputs.append((trti, mag, nr)) logging.info('Found {:_d} ruptures'.format(totrups)) nbytes, msg = get_array_nbytes(dict(M=self.M, G=G, U=U, F=2)) logging.info('Maximum mean_std per task:\n%s', msg) s = self.shapedic sd = dict(N=s['N'], M=s['M'], P=s['P'], Z=s['Z'], D=s['dist'], E=s['eps'], Lo=s['lon'], La=s['lat']) sd['tasks'] = numpy.ceil(len(allargs)) nbytes, msg = get_array_nbytes(sd) if nbytes > oq.max_data_transfer: raise ValueError( 'Estimated data transfer too big\n%s > max_data_transfer=%s' % (msg, humansize(oq.max_data_transfer))) logging.info('Estimated data transfer:\n%s', msg) sd.pop('tasks') sd['mags_trt'] = sum( len(mags) for mags in self.datastore['source_mags'].values()) nbytes, msg = get_array_nbytes(sd) logging.info('Estimated memory on the master:\n%s', msg) dt = numpy.dtype([('trti', U8), ('mag', '|S4'), ('nrups', U32)]) self.datastore['disagg_task'] = numpy.array(task_inputs, dt) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, allargs, h5=self.datastore.hdf5) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # imti, sid -> trti, magi -> 6D array
def disaggregation(sources, site, imt, iml, gsim_by_trt, truncation_level, n_epsilons, mag_bin_width, dist_bin_width, coord_bin_width, source_filter=filters.source_site_noop_filter): """ Compute "Disaggregation" matrix representing conditional probability of an intensity mesaure type ``imt`` exceeding, at least once, an intensity measure level ``iml`` at a geographical location ``site``, given rupture scenarios classified in terms of: - rupture magnitude - Joyner-Boore distance from rupture surface to site - longitude and latitude of the surface projection of a rupture's point closest to ``site`` - epsilon: number of standard deviations by which an intensity measure level deviates from the median value predicted by a GSIM, given the rupture parameters - rupture tectonic region type In other words, the disaggregation matrix allows to compute the probability of each scenario with the specified properties (e.g., magnitude, or the magnitude and distance) to cause one or more exceedences of a given hazard level. For more detailed information about the disaggregation, see for instance "Disaggregation of Seismic Hazard", Paolo Bazzurro, C. Allin Cornell, Bulletin of the Seismological Society of America, Vol. 89, pp. 501-520, April 1999. :param sources: Seismic source model, as for :mod:`PSHA <openquake.hazardlib.calc.hazard_curve>` calculator it should be an iterator of seismic sources. :param site: :class:`~openquake.hazardlib.site.Site` of interest to calculate disaggregation matrix for. :param imt: Instance of :mod:`intensity measure type <openquake.hazardlib.imt>` class. :param iml: Intensity measure level. A float value in units of ``imt``. :param gsim_by_trt: Tectonic region type to GSIM objects mapping. :param truncation_level: Float, number of standard deviations for truncation of the intensity distribution. :param n_epsilons: Integer number of epsilon histogram bins in the result matrix. :param mag_bin_width: Magnitude discretization step, width of one magnitude histogram bin. :param dist_bin_width: Distance histogram discretization step, in km. :param coord_bin_width: Longitude and latitude histograms discretization step, in decimal degrees. :param source_filter: Optional source-site filter function. See :mod:`openquake.hazardlib.calc.filters`. :returns: A tuple of two items. First is itself a tuple of bin edges information for (in specified order) magnitude, distance, longitude, latitude, epsilon and tectonic region types. Second item is 6d-array representing the full disaggregation matrix. Dimensions are in the same order as bin edges in the first item of the result tuple. The matrix can be used directly by pmf-extractor functions. """ trts = sorted(set(src.tectonic_region_type for src in sources)) trt_num = dict((trt, i) for i, trt in enumerate(trts)) rlzs_by_gsim = {gsim_by_trt[trt]: [0] for trt in trts} cmaker = ContextMaker(rlzs_by_gsim, source_filter.integration_distance) imldict = make_imldict(rlzs_by_gsim, {str(imt): [iml]}, {str(imt): iml}) bdata = collect_bins_data(trt_num, sources, site, cmaker, imldict, truncation_level, n_epsilons) bd = pack(bdata, 'mags dists lons lats trti'.split()) if len(bd.mags) == 0: warnings.warn( 'No ruptures have contributed to the hazard at site %s' % site, RuntimeWarning) return None, None mag_bins = mag_bin_width * numpy.arange( int(numpy.floor(bd.mags.min() / mag_bin_width)), int(numpy.ceil(bd.mags.max() / mag_bin_width) + 1)) dist_bins = dist_bin_width * numpy.arange( int(numpy.floor(bd.dists.min() / dist_bin_width)), int(numpy.ceil(bd.dists.max() / dist_bin_width) + 1)) bb = (bd.lons.min(), bd.lons.min(), bd.lats.max(), bd.lats.max()) lon_bins, lat_bins = lon_lat_bins(bb, coord_bin_width) eps_bins = numpy.linspace(-truncation_level, truncation_level, n_epsilons + 1) bin_edges = (mag_bins, dist_bins, lon_bins, lat_bins, eps_bins, trts) [matrix] = arrange_data_in_bins(bd, bin_edges, 'matrix').values() return bin_edges, matrix
def full_disaggregation(self): """ Run the disaggregation phase after hazard curve finalization. """ oq = self.oqparam tl = self.oqparam.truncation_level sitecol = self.sitecol eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1) self.bin_edges = {} curves = [self.get_curves(sid) for sid in sitecol.sids] # determine the number of effective source groups sg_data = self.datastore['csm_info/sg_data'] num_grps = sum(1 for effrup in sg_data['effrup'] if effrup > 0) nblocks = math.ceil(oq.concurrent_tasks / num_grps) src_filter = SourceFilter(sitecol, oq.maximum_distance) R = len(self.rlzs_assoc.realizations) max_poe = numpy.zeros(R, oq.imt_dt()) # build trt_edges trts = tuple( sorted( set(sg.trt for smodel in self.csm.source_models for sg in smodel.src_groups))) # build mag_edges min_mag = min(sg.min_mag for smodel in self.csm.source_models for sg in smodel.src_groups) max_mag = max(sg.max_mag for smodel in self.csm.source_models for sg in smodel.src_groups) mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) logging.info('dist = %s...%s', min(dist_edges), max(dist_edges)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) for sid, bb in zip(self.sitecol.sids, bbs): lon_edges, lat_edges = disagg.lon_lat_bins(bb, oq.coordinate_bin_width) logging.info('site %d, lon = %s...%s', sid, min(lon_edges), max(lon_edges)) logging.info('site %d, lat = %s...%s', sid, min(lat_edges), max(lat_edges)) self.bin_edges[sid] = bs = (mag_edges, dist_edges, lon_edges, lat_edges, eps_edges) shape = [len(edges) - 1 for edges in bs] + [len(trts)] logging.info('%s for sid %d', shape, sid) # check poes for smodel in self.csm.source_models: sm_id = smodel.ordinal for i, site in enumerate(sitecol): sid = sitecol.sids[i] curve = curves[i] # populate max_poe array for rlzi, poes in curve.items(): for imt in oq.imtls: max_poe[rlzi][imt] = max(max_poe[rlzi][imt], poes[imt].max()) if not curve: continue # skip zero-valued hazard curves # check for too big poes_disagg for poe in oq.poes_disagg: for rlz in self.rlzs_assoc.rlzs_by_smodel[sm_id]: rlzi = rlz.ordinal for imt in oq.imtls: min_poe = max_poe[rlzi][imt] if poe > min_poe: raise ValueError( self.POE_TOO_BIG % (poe, sm_id, smodel.name, min_poe, rlzi, imt)) # build all_args all_args = [] for smodel in self.csm.source_models: for sg in smodel.src_groups: split_sources = [] for src in sg: for split, _sites in src_filter( sourceconverter.split_source(src), sitecol): split_sources.append(split) if not split_sources: continue mon = self.monitor('disaggregation') rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim( sg.trt, smodel.ordinal) cmaker = ContextMaker(rlzs_by_gsim, src_filter.integration_distance) imls = [ disagg.make_imldict(rlzs_by_gsim, oq.imtls, oq.iml_disagg, oq.poes_disagg, curve) for curve in curves ] for srcs in split_in_blocks(split_sources, nblocks): all_args.append((src_filter, srcs, cmaker, imls, trts, self.bin_edges, oq, mon)) self.cache_info = numpy.zeros(2) # operations, cache_hits results = parallel.Starmap(compute_disagg, all_args).reduce(self.agg_result) ops, hits = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) self.save_disagg_results(results)
def classical(group, src_filter, gsims, param, monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a dictionary {grp_id: pmap} with attributes .grp_ids, .calc_times, .eff_ruptures """ if getattr(group, 'src_interdep', None) == 'mutex': mutex_weight = { src.source_id: weight for src, weight in zip(group.sources, group.srcs_weights) } else: mutex_weight = None grp_ids = set() for src in group: grp_ids.update(src.src_group_ids) maxdist = src_filter.integration_distance imtls = param['imtls'] trunclevel = param.get('truncation_level') cmaker = ContextMaker(gsims, maxdist, param['filter_distance'], monitor) pmap = AccumDict({ grp_id: ProbabilityMap(len(imtls.array), len(gsims)) for grp_id in grp_ids }) # AccumDict of arrays with 4 elements weight, nsites, calc_time, split pmap.calc_times = AccumDict(accum=numpy.zeros(4)) pmap.eff_ruptures = AccumDict() # grp_id -> num_ruptures for src, s_sites in src_filter(group): # filter now t0 = time.time() indep = group.rup_interdep == 'indep' if mutex_weight else True try: poemap = cmaker.poe_map(src, s_sites, imtls, trunclevel, indep) except Exception as err: etype, err, tb = sys.exc_info() msg = '%s (source id=%s)' % (str(err), src.source_id) raise etype(msg).with_traceback(tb) if mutex_weight: # mutex sources weight = mutex_weight[src.source_id] for sid in poemap: pcurve = pmap[group.id].setdefault(sid, 0) pcurve += poemap[sid] * weight elif poemap: for grp_id in src.src_group_ids: pmap[grp_id] |= poemap src_id = src.source_id.split(':', 1)[0] pmap.calc_times[src_id] += numpy.array( [src.weight, len(s_sites), time.time() - t0, 1]) # storing the number of contributing ruptures too pmap.eff_ruptures += { grp_id: getattr(poemap, 'eff_ruptures', 0) for grp_id in src.src_group_ids } if mutex_weight and group.grp_probability is not None: pmap[group.id] *= group.grp_probability return pmap
def make_contexts(self, site_collection, rupture): param = dict(imtls={}) return ContextMaker('faketrt', [self.gsim_class], param).make_contexts(site_collection, rupture)
def event_based(proxies, full_lt, oqparam, dstore, monitor): """ Compute GMFs and optionally hazard curves """ alldata = AccumDict(accum=[]) sig_eps = [] times = [] # rup_id, nsites, dt hcurves = {} # key -> poes trt_smr = proxies[0]['trt_smr'] fmon = monitor('filtering ruptures', measuremem=False) cmon = monitor('computing gmfs', measuremem=False) with dstore: trt = full_lt.trts[trt_smr // len(full_lt.sm_rlzs)] srcfilter = SourceFilter(dstore['sitecol'], oqparam.maximum_distance(trt)) rupgeoms = dstore['rupgeoms'] rlzs_by_gsim = full_lt._rlzs_by_gsim(trt_smr) cmaker = ContextMaker(trt, rlzs_by_gsim, oqparam) cmaker.min_mag = getdefault(oqparam.minimum_magnitude, trt) for proxy in proxies: t0 = time.time() with fmon: if proxy['mag'] < cmaker.min_mag: continue sids = srcfilter.close_sids(proxy, trt) if len(sids) == 0: # filtered away continue proxy.geom = rupgeoms[proxy['geom_id']] ebr = proxy.to_ebr(cmaker.trt) # after the geometry is set try: computer = GmfComputer(ebr, srcfilter.sitecol.filtered(sids), cmaker, oqparam.correl_model, oqparam.cross_correl, oqparam._amplifier, oqparam._sec_perils) except FarAwayRupture: continue with cmon: data = computer.compute_all(sig_eps) dt = time.time() - t0 times.append((computer.ebrupture.id, len(computer.ctx.sids), dt)) for key in data: alldata[key].extend(data[key]) for key, val in sorted(alldata.items()): if key in 'eid sid rlz': alldata[key] = U32(alldata[key]) else: alldata[key] = F32(alldata[key]) gmfdata = strip_zeros(pandas.DataFrame(alldata)) if len(gmfdata) and oqparam.hazard_curves_from_gmfs: hc_mon = monitor('building hazard curves', measuremem=False) for (sid, rlz), df in gmfdata.groupby(['sid', 'rlz']): with hc_mon: poes = calc.gmvs_to_poes(df, oqparam.imtls, oqparam.ses_per_logic_tree_path) for m, imt in enumerate(oqparam.imtls): hcurves[rsi2str(rlz, sid, imt)] = poes[m] times = numpy.array([tup + (monitor.task_no, ) for tup in times], time_dt) times.sort(order='rup_id') if not oqparam.ground_motion_fields: gmfdata = () return dict(gmfdata=gmfdata, hcurves=hcurves, times=times, sig_eps=numpy.array(sig_eps, sig_eps_dt(oqparam.imtls)))
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = self.src_filter() if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') self.full_lt = self.datastore['full_lt'] self.poes_disagg = oq.poes_disagg or (None,) self.imts = list(oq.imtls) self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()] self.pgetter = getters.PmapGetter( self.datastore, self.ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve( curves, oq.imtls, stats.mean_curve, self.ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if oq.disagg_by_src: self.build_disagg_by_src(rlzs) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(self.full_lt.trts) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges mags = [float(mag) for mag in self.datastore['source_mags']] mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min(mags) / oq.mag_bin_width)), int(numpy.ceil(max(mags) / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max(mags)) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges shapedic = self.save_bin_edges() del shapedic['trt'] shapedic['N'] = self.N shapedic['M'] = len(oq.imtls) shapedic['P'] = len(oq.poes_disagg) shapedic['Z'] = Z shapedic['concurrent_tasks'] = oq.concurrent_tasks nbytes, msg = get_array_nbytes(shapedic) if nbytes > oq.max_data_transfer: raise ValueError('Estimated data transfer too big\n%s' % msg) logging.info('Estimated data transfer: %s', msg) self.imldict = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: for z, rlz in enumerate(rlzs[s]): for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z] # submit #groups disaggregation tasks dstore = (self.datastore.parent if self.datastore.parent else self.datastore) indices = get_indices(dstore, oq.concurrent_tasks or 1) self.datastore.swmr_on() smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5) for grp_id, trt in self.full_lt.trt_by_grp.items(): logging.info('Group #%d, sending rup_data for %s', grp_id, trt) trti = trt_num[trt] cmaker = ContextMaker( trt, self.full_lt.get_rlzs_by_gsim(grp_id), {'truncation_level': oq.truncation_level, 'maximum_distance': src_filter.integration_distance, 'filter_distance': oq.filter_distance, 'imtls': oq.imtls}) for idxs in indices[grp_id]: smap.submit((dstore, idxs, cmaker, self.iml4, trti, self.bin_edges)) results = smap.reduce(self.agg_result, AccumDict(accum={})) return results # sid -> trti-> 8D array
def full_disaggregation(self, curves): """ Run the disaggregation phase. :param curves: a list of hazard curves, one per site The curves can be all None if iml_disagg is set in the job.ini """ oq = self.oqparam tl = oq.truncation_level src_filter = SourceFilter(self.sitecol, oq.maximum_distance) csm = self.csm for sg in csm.src_groups: if sg.atomic: raise NotImplemented('Atomic groups are not supported yet') if not csm.get_sources(): raise RuntimeError('All sources were filtered away!') R = len(self.rlzs_assoc.realizations) M = len(oq.imtls) P = len(oq.poes_disagg) or 1 if R * M * P > 10: logging.warning( 'You have %d realizations, %d IMTs and %d poes_disagg: the ' 'disaggregation will be heavy and memory consuming', R, M, P) iml4 = disagg.make_iml4( R, oq.iml_disagg, oq.imtls, oq.poes_disagg or (None,), curves) if oq.disagg_by_src: if R == 1: self.build_disagg_by_src(iml4) else: logging.warning('disagg_by_src works only with 1 realization, ' 'you have %d', R) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) self.bin_edges = {} # build trt_edges trts = tuple(sorted(set(sg.trt for smodel in csm.source_models for sg in smodel.src_groups))) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges mmm = numpy.array([src.get_min_max_mag() for src in csm.get_sources()]) min_mag = mmm[:, 0].min() max_mag = mmm[:, 1].max() mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() # build all_args all_args = [] maxweight = csm.get_maxweight(weight, oq.concurrent_tasks) R = iml4.shape[1] self.imldict = {} # sid, rlzi, poe, imt -> iml for s in self.sitecol.sids: for r in range(R): for p, poe in enumerate(oq.poes_disagg or [None]): for m, imt in enumerate(oq.imtls): self.imldict[s, r, poe, imt] = iml4[s, r, m, p] for smodel in csm.source_models: sm_id = smodel.ordinal for trt, groups in groupby( smodel.src_groups, operator.attrgetter('trt')).items(): trti = trt_num[trt] sources = sum([grp.sources for grp in groups], []) rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(trt, sm_id) cmaker = ContextMaker( trt, rlzs_by_gsim, src_filter.integration_distance, {'filter_distance': oq.filter_distance}) for block in block_splitter(sources, maxweight, weight): all_args.append( (src_filter.sitecol, block, cmaker, iml4, trti, self.bin_edges, oq)) self.num_ruptures = [0] * len(self.trts) self.cache_info = numpy.zeros(3) # operations, cache_hits, num_zeros results = parallel.Starmap( compute_disagg, all_args, self.monitor() ).reduce(self.agg_result, AccumDict(accum={})) # set eff_ruptures trti = csm.info.trt2i() for smodel in csm.info.source_models: for sg in smodel.src_groups: sg.eff_ruptures = self.num_ruptures[trti[sg.trt]] self.datastore['csm_info'] = csm.info ops, hits, num_zeros = self.cache_info logging.info('Cache speedup %s', ops / (ops - hits)) logging.info('Discarded zero matrices: %d', num_zeros) return results
def pmap_from_grp( sources, source_site_filter, imtls, gsims, truncation_level=None, bbs=(), monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a ProbabilityMap instance """ if isinstance(sources, SourceGroup): group = sources sources = group.sources trt = sources[0].tectonic_region_type mutex_weight = {src.source_id: weight for src, weight in zip(group.sources, group.srcs_weights)} else: # list of sources trt = sources[0].tectonic_region_type group = SourceGroup(trt, sources, 'src_group', 'indep', 'indep') grp_id = sources[0].src_group_id maxdist = source_site_filter.integration_distance if hasattr(gsims, 'keys'): # dictionary trt -> gsim gsims = [gsims[trt]] srcs = [] for src in sources: if hasattr(src, '__iter__'): # MultiPointSource srcs.extend(src) else: srcs.append(src) del sources with GroundShakingIntensityModel.forbid_instantiation(): imtls = DictArray(imtls) cmaker = ContextMaker(gsims, maxdist) ctx_mon = monitor('making contexts', measuremem=False) pne_mons = [monitor('%s.get_poes' % gsim, measuremem=False) for gsim in gsims] disagg_mon = monitor('get closest points', measuremem=False) src_indep = group.src_interdep == 'indep' pmap = ProbabilityMap(len(imtls.array), len(gsims)) pmap.calc_times = [] # pairs (src_id, delta_t) pmap.grp_id = grp_id for src, s_sites in source_site_filter(srcs): t0 = time.time() poemap = poe_map( src, s_sites, imtls, cmaker, truncation_level, ctx_mon, pne_mons, bbs, group.rup_interdep == 'indep', disagg_mon) if src_indep: # usual composition of probabilities pmap |= poemap else: # mutually exclusive probabilities weight = mutex_weight[src.source_id] for sid in poemap: pcurve = pmap.setdefault(sid, 0) pcurve += poemap[sid] * weight pmap.calc_times.append( (src.source_id, len(s_sites), time.time() - t0)) # storing the number of contributing ruptures too pmap.eff_ruptures = {pmap.grp_id: pne_mons[0].counts} if group.grp_probability is not None: return pmap * group.grp_probability return pmap
def get_conditional_gmfs(database, rupture, sites, gsims, imts, number_simulations, truncation_level, correlation_model=DEFAULT_CORRELATION): """ Get a set of random fields conditioned on a set of observations :param database: Ground motion records for the event as instance of :class: smtk.sm_database.GroundMotionDatabase :param rupture: Event rupture as instance of :class: openquake.hazardlib.source.rupture.Rupture :param sites: Target sites as instance of :class: openquake.hazardlib.site.SiteCollection :param list gsims: List of GMPEs required :param list imts: List of intensity measures required :param int number_simulations: Number of simulated fields required :param float truncation_level: Ground motion truncation level """ # Get known sites mesh known_sites = database.get_site_collection() # Get Observed Residuals residuals = Residuals(gsims, imts) residuals.get_residuals(database) imt_dict = OrderedDict([(imtx, np.zeros([len(sites.lons), number_simulations])) for imtx in imts]) gmfs = OrderedDict([(gmpe, imt_dict) for gmpe in gsims]) gmpe_list = [GSIM_LIST[gmpe]() for gmpe in gsims] cmaker = ContextMaker(gmpe_list) sctx, dctx = cmaker.make_contexts(sites, rupture) for gsim in gmpe_list: gmpe = gsim.__class__.__name__ for imtx in imts: if truncation_level == 0: gmfs[gmpe][imtx], _ = gsim.get_mean_and_stddevs( sctx, rupture, dctx, from_string(imtx), stddev_types=[]) continue if "Intra event" in gsim.DEFINED_FOR_STANDARD_DEVIATION_TYPES: epsilon = conditional_simulation( known_sites, residuals.residuals[gmpe][imtx]["Intra event"], sites, imtx, number_simulations, correlation_model) tau = np.unique(residuals.residuals[gmpe][imtx]["Inter event"]) mean, [stddev_inter, stddev_intra] = gsim.get_mean_and_stddevs( sctx, rupture, dctx, from_string(imtx), ["Inter event", "Intra event"]) for iloc in range(0, number_simulations): gmfs[gmpe][imtx][:, iloc] = np.exp(mean + (tau * stddev_inter) + (epsilon[:, iloc].A1 * stddev_intra)) else: epsilon = conditional_simulation( known_sites, residuals.residuals[gmpe][imtx]["Total"], sites, imtx, number_simulations, correlation_model) tau = None mean, [stddev_total ] = gsim.get_mean_and_stddevs(sctx, rupture, dctx, from_string(imtx), ["Total"]) for iloc in range(0, number_simulations): gmfs[gmpe][imtx][:, iloc] = np.exp(mean + epsilon[:, iloc].A1 * stddev_total.flatten()) return gmfs
def classical(group, src_filter, gsims, param, monitor=Monitor()): """ Compute the hazard curves for a set of sources belonging to the same tectonic region type for all the GSIMs associated to that TRT. The arguments are the same as in :func:`calc_hazard_curves`, except for ``gsims``, which is a list of GSIM instances. :returns: a dictionary {grp_id: pmap} with attributes .grp_ids, .calc_times, .eff_ruptures """ if getattr(group, 'src_interdep', None) == 'mutex': mutex_weight = { src.source_id: weight for src, weight in zip(group.sources, group.srcs_weights) } srcs = group.sources else: mutex_weight = None srcs = sum([split_source(src) for src in group], []) grp_ids = set() for src in group: grp_ids.update(src.src_group_ids) maxdist = src_filter.integration_distance with GroundShakingIntensityModel.forbid_instantiation(): imtls = param['imtls'] trunclevel = param.get('truncation_level') cmaker = ContextMaker(gsims, maxdist) ctx_mon = monitor('make_contexts', measuremem=False) poe_mon = monitor('get_poes', measuremem=False) pmap = AccumDict({ grp_id: ProbabilityMap(len(imtls.array), len(gsims)) for grp_id in grp_ids }) # AccumDict of arrays with 4 elements weight, nsites, calc_time, split pmap.calc_times = AccumDict(accum=numpy.zeros(4)) pmap.eff_ruptures = AccumDict() # grp_id -> num_ruptures for src, s_sites in src_filter(srcs): # filter now t0 = time.time() indep = group.rup_interdep == 'indep' if mutex_weight else True poemap = cmaker.poe_map(src, s_sites, imtls, trunclevel, ctx_mon, poe_mon, indep) if mutex_weight: # mutex sources weight = mutex_weight[src.source_id] for sid in poemap: pcurve = pmap[group.id].setdefault(sid, 0) pcurve += poemap[sid] * weight elif poemap: for grp_id in src.src_group_ids: pmap[grp_id] |= poemap src_id = src.source_id.split(':', 1)[0] pmap.calc_times[src_id] += numpy.array( [src.weight, len(s_sites), time.time() - t0, 1]) # storing the number of contributing ruptures too pmap.eff_ruptures += { grp_id: getattr(poemap, 'eff_ruptures', 0) for grp_id in src.src_group_ids } if mutex_weight and group.grp_probability is not None: pmap[group.id] *= group.grp_probability return pmap
def disaggregation(sources, site, imt, iml, gsim_by_trt, truncation_level, n_epsilons, mag_bin_width, dist_bin_width, coord_bin_width, source_filter=filters.nofilter, **kwargs): """ Compute "Disaggregation" matrix representing conditional probability of an intensity mesaure type ``imt`` exceeding, at least once, an intensity measure level ``iml`` at a geographical location ``site``, given rupture scenarios classified in terms of: - rupture magnitude - Joyner-Boore distance from rupture surface to site - longitude and latitude of the surface projection of a rupture's point closest to ``site`` - epsilon: number of standard deviations by which an intensity measure level deviates from the median value predicted by a GSIM, given the rupture parameters - rupture tectonic region type In other words, the disaggregation matrix allows to compute the probability of each scenario with the specified properties (e.g., magnitude, or the magnitude and distance) to cause one or more exceedences of a given hazard level. For more detailed information about the disaggregation, see for instance "Disaggregation of Seismic Hazard", Paolo Bazzurro, C. Allin Cornell, Bulletin of the Seismological Society of America, Vol. 89, pp. 501-520, April 1999. :param sources: Seismic source model, as for :mod:`PSHA <openquake.hazardlib.calc.hazard_curve>` calculator it should be an iterator of seismic sources. :param site: :class:`~openquake.hazardlib.site.Site` of interest to calculate disaggregation matrix for. :param imt: Instance of :mod:`intensity measure type <openquake.hazardlib.imt>` class. :param iml: Intensity measure level. A float value in units of ``imt``. :param gsim_by_trt: Tectonic region type to GSIM objects mapping. :param truncation_level: Float, number of standard deviations for truncation of the intensity distribution. :param n_epsilons: Integer number of epsilon histogram bins in the result matrix. :param mag_bin_width: Magnitude discretization step, width of one magnitude histogram bin. :param dist_bin_width: Distance histogram discretization step, in km. :param coord_bin_width: Longitude and latitude histograms discretization step, in decimal degrees. :param source_filter: Optional source-site filter function. See :mod:`openquake.hazardlib.calc.filters`. :returns: A tuple of two items. First is itself a tuple of bin edges information for (in specified order) magnitude, distance, longitude, latitude, epsilon and tectonic region types. Second item is 6d-array representing the full disaggregation matrix. Dimensions are in the same order as bin edges in the first item of the result tuple. The matrix can be used directly by pmf-extractor functions. """ trts = sorted(set(src.tectonic_region_type for src in sources)) trt_num = dict((trt, i) for i, trt in enumerate(trts)) rlzs_by_gsim = {gsim_by_trt[trt]: [0] for trt in trts} by_trt = groupby(sources, operator.attrgetter('tectonic_region_type')) bdata = {} # by TRT sitecol = SiteCollection([site]) imls = hdf5.ArrayWrapper(numpy.array([iml]), dict(imt=imt, poes_disagg=[None], rlzi=0)) eps3 = _eps3(truncation_level, n_epsilons) for trt, srcs in by_trt.items(): gsim = gsim_by_trt[trt] cmaker = ContextMaker( trt, rlzs_by_gsim, { 'truncation_level': truncation_level, 'maximum_distance': source_filter.integration_distance, 'imtls': { str(imt): [iml] } }) contexts.RuptureContext.temporal_occurrence_model = ( srcs[0].temporal_occurrence_model) ctxs = cmaker.from_srcs(srcs, sitecol) mean_std = get_mean_stdv(sitecol, ctxs, imt, gsim) bdata[trt] = disaggregate(mean_std, ctxs, imt, imls, eps3) if sum(len(bd.mags) for bd in bdata.values()) == 0: warnings.warn( 'No ruptures have contributed to the hazard at site %s' % site, RuntimeWarning) return None, None min_mag = min(bd.mags.min() for bd in bdata.values()) max_mag = max(bd.mags.max() for bd in bdata.values()) mag_bins = mag_bin_width * numpy.arange( int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1)) min_dist = min(bd.dists.min() for bd in bdata.values()) max_dist = max(bd.dists.max() for bd in bdata.values()) dist_bins = dist_bin_width * numpy.arange( int(numpy.floor(min_dist / dist_bin_width)), int(numpy.ceil(max_dist / dist_bin_width) + 1)) lon_bins, lat_bins = lon_lat_bins(site.location.x, site.location.y, max_dist, coord_bin_width) eps_bins = numpy.linspace(-truncation_level, truncation_level, n_epsilons + 1) bin_edges = (mag_bins, dist_bins, lon_bins, lat_bins, eps_bins) matrix = numpy.zeros( (len(mag_bins) - 1, len(dist_bins) - 1, len(lon_bins) - 1, len(lat_bins) - 1, len(eps_bins) - 1, len(trts))) for trt in bdata: mat6 = build_disagg_matrix(bdata[trt], bin_edges) matrix[..., trt_num[trt]] = mat6[..., 0] # shape (..., P) return bin_edges + (trts, ), matrix
def full_disaggregation(self): """ Run the disaggregation phase. """ oq = self.oqparam tl = oq.truncation_level src_filter = self.src_filter() if hasattr(self, 'csm'): for sg in self.csm.src_groups: if sg.atomic: raise NotImplementedError( 'Atomic groups are not supported yet') if not self.csm.get_sources(): raise RuntimeError('All sources were filtered away!') csm_info = self.datastore['csm_info'] self.poes_disagg = oq.poes_disagg or (None, ) self.imts = list(oq.imtls) self.ws = [rlz.weight for rlz in self.rlzs_assoc.realizations] self.pgetter = getters.PmapGetter(self.datastore, self.ws, self.sitecol.sids) # build array rlzs (N, Z) if oq.rlz_index is None: Z = oq.num_rlzs_disagg rlzs = numpy.zeros((self.N, Z), int) if self.R > 1: for sid in self.sitecol.sids: curves = numpy.array( [pc.array for pc in self.pgetter.get_pcurves(sid)]) mean = getters.build_stat_curve(curves, oq.imtls, stats.mean_curve, self.ws) rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z] self.datastore['best_rlzs'] = rlzs else: Z = len(oq.rlz_index) rlzs = numpy.zeros((self.N, Z), int) for z in range(Z): rlzs[:, z] = oq.rlz_index[z] assert Z <= self.R, (Z, self.R) self.Z = Z self.rlzs = rlzs if oq.iml_disagg: # no hazard curves are needed self.poe_id = {None: 0} curves = [[None for z in range(Z)] for s in range(self.N)] self.ok_sites = set(self.sitecol.sids) else: self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)} curves = [ self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids ] self.ok_sites = set(self.check_poes_disagg(curves, rlzs)) self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg, curves) if oq.disagg_by_src: self.build_disagg_by_src(rlzs) eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build trt_edges trts = tuple(csm_info.trts) trt_num = {trt: i for i, trt in enumerate(trts)} self.trts = trts # build mag_edges min_mag = csm_info.min_mag max_mag = csm_info.max_mag mag_edges = oq.mag_bin_width * numpy.arange( int(numpy.floor(min_mag / oq.mag_bin_width)), int(numpy.ceil(max_mag / oq.mag_bin_width) + 1)) # build dist_edges maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts) dist_edges = oq.distance_bin_width * numpy.arange( 0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1)) # build eps_edges eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1) # build lon_edges, lat_edges per sid bbs = src_filter.get_bounding_boxes(mag=max_mag) lon_edges, lat_edges = {}, {} # by sid for sid, bb in zip(self.sitecol.sids, bbs): lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins( bb, oq.coordinate_bin_width) self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges self.save_bin_edges() self.imldict = {} # sid, rlz, poe, imt -> iml for s in self.sitecol.sids: for z, rlz in enumerate(rlzs[s]): logging.info('Site #%d, disaggregating for rlz=#%d', s, rlz) for p, poe in enumerate(self.poes_disagg): for m, imt in enumerate(oq.imtls): self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z] # submit disagg tasks gid = self.datastore['rup/grp_id'][()] indices_by_grp = get_indices(gid) # grp_id -> [(start, stop),...] blocksize = len(gid) // (oq.concurrent_tasks or 1) + 1 # NB: removing the blocksize causes slow disaggregation tasks allargs = [] dstore = (self.datastore.parent if self.datastore.parent else self.datastore) for grp_id, trt in csm_info.trt_by_grp.items(): trti = trt_num[trt] rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(grp_id) cmaker = ContextMaker( trt, rlzs_by_gsim, { 'truncation_level': oq.truncation_level, 'maximum_distance': src_filter.integration_distance, 'filter_distance': oq.filter_distance, 'imtls': oq.imtls }) for start, stop in indices_by_grp[grp_id]: for slc in gen_slices(start, stop, blocksize): allargs.append((dstore, slc, self.sitecol, oq, cmaker, self.iml4, trti, self.bin_edges)) results = parallel.Starmap(compute_disagg, allargs, h5=self.datastore.hdf5).reduce( self.agg_result, AccumDict(accum={})) return results # sid -> trti-> 8D array