def get_mesh_hcurves(oqparam): """ Read CSV data in the format `lon lat, v1-vN, w1-wN, ...`. :param oqparam: an :class:`openquake.commonlib.oqvalidation.OqParam` instance :returns: the mesh of points and the data as a dictionary imt -> array of curves for each site """ imtls = oqparam.imtls lon_lats = set() data = AccumDict() # imt -> list of arrays ncols = len(imtls) + 1 # lon_lat + curve_per_imt ... csvfile = oqparam.inputs["hazard_curves"] for line, row in enumerate(csv.reader(csvfile), 1): try: if len(row) != ncols: raise ValueError("Expected %d columns, found %d" % ncols, len(row)) x, y = row[0].split() lon_lat = valid.longitude(x), valid.latitude(y) if lon_lat in lon_lats: raise DuplicatedPoint(lon_lat) lon_lats.add(lon_lat) for i, imt_ in enumerate(imtls, 1): values = valid.decreasing_probabilities(row[i]) if len(values) != len(imtls[imt_]): raise ValueError("Found %d values, expected %d" % (len(values), len(imtls([imt_])))) data += {imt_: [numpy.array(values)]} except (ValueError, DuplicatedPoint) as err: raise err.__class__("%s: file %s, line %d" % (err, csvfile, line)) lons, lats = zip(*sorted(lon_lats)) mesh = geo.Mesh(numpy.array(lons), numpy.array(lats)) return mesh, {imt: numpy.array(lst) for imt, lst in data.items()}
def read(cls, dstore): """ :param dstore: a DataStore instance :returns: a :class:`CompositeRiskModel` instance """ oqparam = dstore['oqparam'] crm = dstore.getitem('risk_model') riskdict = AccumDict(accum={}) riskdict.limit_states = crm.attrs['limit_states'] for quoted_id, rm in crm.items(): riskid = unquote_plus(quoted_id) for lt_kind in rm: lt, kind = lt_kind.rsplit('-', 1) rf = dstore['risk_model/%s/%s' % (quoted_id, lt_kind)] if kind == 'consequence': riskdict[riskid][lt, kind] = rf elif kind == 'fragility': # rf is a FragilityFunctionList try: rf = rf.build( riskdict.limit_states, oqparam.continuous_fragility_discretization, oqparam.steps_per_interval) except ValueError as err: raise ValueError('%s: %s' % (riskid, err)) riskdict[riskid][lt, kind] = rf else: # rf is a vulnerability function rf.init() if lt.endswith('_retrofitted'): # strip _retrofitted, since len('_retrofitted') = 12 riskdict[riskid][ lt[:-12], 'vulnerability_retrofitted'] = rf else: riskdict[riskid][lt, 'vulnerability'] = rf return CompositeRiskModel(oqparam, riskdict)
def get_trt_sources(self, optimize_same_id=None): """ :returns: a list of pairs [(trt, group of sources)] """ atomic = [] acc = AccumDict(accum=[]) for sm in self.source_models: for grp in sm.src_groups: if grp and grp.atomic: atomic.append((grp.trt, grp)) elif grp: acc[grp.trt].extend(grp) if optimize_same_id is None: optimize_same_id = self.optimize_same_id if optimize_same_id is False: return atomic + list(acc.items()) # extract a single source from multiple sources with the same ID n = 0 tot = 0 dic = {} for trt in acc: dic[trt] = [] for grp in groupby(acc[trt], lambda x: x.source_id).values(): src = grp[0] n += 1 tot += len(grp) # src.src_group_id can be a list if get_sources_by_trt was # called before if len(grp) > 1 and not isinstance(src.src_group_id, list): src.src_group_id = [s.src_group_id for s in grp] dic[trt].append(src) if n < tot: logging.info('Reduced %d sources to %d sources with unique IDs', tot, n) return atomic + list(dic.items())
def compute_ruptures(branch_info, ucerf, sitecol, oqparam, monitor): """ Returns the ruptures as a TRT set :param str branch_info: Tuple of (ltbr, branch_id, branch_weight) :param ucerf: Instance of the UCERFSESControl object :param sitecol: Site collection :class:`openquake.hazardlib.site.SiteCollection` :param oqparam: Instance of :class:`openquake.commonlib.oqvalidation.OqParam` :param monitor: Instance of :class:`openquake.baselib.performance.Monitor` :returns: Dictionary of rupture instances associated to a TRT ID """ integration_distance = oqparam.maximum_distance[DEFAULT_TRT] res = AccumDict() res.calc_times = AccumDict() serial = 1 filter_mon = monitor('update_background_site_filter', measuremem=False) event_mon = monitor('sampling ruptures', measuremem=False) for src_group_id, (ltbrid, branch_id, _) in enumerate(branch_info): t0 = time.time() with filter_mon: ucerf.update_background_site_filter(sitecol, integration_distance) # set the seed before calling generate_event_set numpy.random.seed(oqparam.random_seed + src_group_id) ses_ruptures = [] for ses_idx in range(1, oqparam.ses_per_logic_tree_path + 1): with event_mon: rups, n_occs = ucerf.generate_event_set( branch_id, sitecol, integration_distance) for i, rup in enumerate(rups): rup.seed = oqparam.random_seed # to think rrup = rup.surface.get_min_distance(sitecol.mesh) r_sites = sitecol.filter(rrup <= integration_distance) if r_sites is None: continue indices = r_sites.indices events = [] for j in range(n_occs[i]): # NB: the first 0 is a placeholder for the eid that will be # set later, in EventBasedRuptureCalculator.post_execute; # the second 0 is the sampling ID events.append((0, ses_idx, j, 0)) if len(events): ses_ruptures.append( event_based.EBRupture( rup, indices, numpy.array(events, event_based.event_dt), ucerf.source_id, src_group_id, serial)) serial += 1 dt = time.time() - t0 res.calc_times[src_group_id] = (ltbrid, dt) res[src_group_id] = ses_ruptures res.trt = DEFAULT_TRT return res
def zerodict(self): """ Initial accumulator, a dictionary (grp_id, gsim) -> curves """ zd = AccumDict() zd.calc_times = [] zd.eff_ruptures = AccumDict() return zd
def build_starmap(self, ssm, sitecol, assetcol, riskmodel, imts, trunc_level, correl_model, min_iml, monitor): """ :param ssm: CompositeSourceModel containing a single source model :param sitecol: a SiteCollection instance :param assetcol: an AssetCollection instance :param riskmodel: a RiskModel instance :param imts: a list of Intensity Measure Types :param trunc_level: truncation level :param correl_model: correlation model :param min_iml: vector of minimum intensities, one per IMT :param monitor: a Monitor instance :returns: a pair (starmap, dictionary) """ ruptures_by_grp = AccumDict() num_ruptures = 0 num_events = 0 allargs = [] grp_trt = {} # collect the sources maxweight = ssm.get_maxweight(self.oqparam.concurrent_tasks) logging.info('Using a maxweight of %d', maxweight) for src_group in ssm.src_groups: grp_trt[src_group.id] = trt = src_group.trt gsims = ssm.gsim_lt.values[trt] for block in block_splitter(src_group, maxweight, getweight): allargs.append((block, self.sitecol, gsims, monitor)) # collect the ruptures for dic in parallel.starmap(self.compute_ruptures, allargs): ruptures_by_grp += dic [rupts] = dic.values() num_ruptures += len(rupts) num_events += dic.num_events ruptures_by_grp.num_events = num_events save_ruptures(self, ruptures_by_grp) # determine the realizations rlzs_assoc = ssm.info.get_rlzs_assoc( count_ruptures=lambda grp: len(ruptures_by_grp.get(grp.id, 0))) allargs = [] # prepare the risk inputs ruptures_per_block = self.oqparam.ruptures_per_block for src_group in ssm.src_groups: for rupts in block_splitter( ruptures_by_grp[src_group.id], ruptures_per_block): trt = grp_trt[rupts[0].grp_id] ri = riskinput.RiskInputFromRuptures( trt, imts, sitecol, rupts, trunc_level, correl_model, min_iml) allargs.append((ri, riskmodel, rlzs_assoc, assetcol, monitor)) taskname = '%s#%d' % (losses_by_taxonomy.__name__, ssm.sm_id + 1) smap = starmap(losses_by_taxonomy, allargs, name=taskname) attrs = dict(num_ruptures={ sg_id: len(rupts) for sg_id, rupts in ruptures_by_grp.items()}, num_events=num_events, num_rlzs=len(rlzs_assoc.realizations), sm_id=ssm.sm_id) return smap, attrs
def ucerf_classical_hazard_by_branch(branchnames, ucerf_source, src_group_id, src_filter, gsims, monitor): """ :param branchnames: a list of branch names :param ucerf_source: a source-like object for the UCERF model :param src_group_id: an ordinal number for the source :param source filter: a filter returning the sites affected by the source :param gsims: a list of GSIMs :param monitor: a monitor instance :returns: an AccumDict rlz -> curves """ truncation_level = monitor.oqparam.truncation_level imtls = monitor.oqparam.imtls trt = ucerf_source.tectonic_region_type max_dist = monitor.oqparam.maximum_distance[trt] dic = AccumDict() dic.bbs = [] dic.calc_times = [] for branchname in branchnames: # Two step process here - the first generates the hazard curves from # the rupture sets monitor.eff_ruptures = 0 # Apply the initial rupture to site filtering rupset_idx = ucerf_source.get_rupture_indices(branchname) rupset_idx, s_sites = \ ucerf_source.filter_sites_by_distance_from_rupture_set( rupset_idx, src_filter.sitecol, max_dist) if len(s_sites): dic[src_group_id] = hazard_curves_per_rupture_subset( rupset_idx, ucerf_source, src_filter, imtls, gsims, truncation_level, bbs=dic.bbs, monitor=monitor) else: dic[src_group_id] = ProbabilityMap(len(imtls.array), len(gsims)) dic.calc_times += monitor.calc_times # added by pmap_from_grp dic.eff_ruptures = {src_group_id: monitor.eff_ruptures} # idem logging.info('Branch %s', branchname) # Get the background point sources background_sids = ucerf_source.get_background_sids( src_filter.sitecol, max_dist) bckgnd_sources = ucerf_source.get_background_sources(background_sids) if bckgnd_sources: pmap = pmap_from_grp( bckgnd_sources, src_filter, imtls, gsims, truncation_level, bbs=dic.bbs, monitor=monitor) dic[src_group_id] |= pmap dic.eff_ruptures[src_group_id] += monitor.eff_ruptures dic.calc_times += monitor.calc_times return dic
def zerodict(self): """ Initial accumulator, a dictionary trt_model_id -> list of ruptures """ smodels = self.rlzs_assoc.csm_info.source_models zd = AccumDict((tm.id, []) for smodel in smodels for tm in smodel.trt_models) zd.calc_times = [] return zd
def count_eff_ruptures(sources, sitecol, siteidx, rlzs_assoc, monitor): """ Count the number of ruptures contained in the given sources and return a dictionary trt_model_id -> num_ruptures. All sources belong to the same tectonic region type. """ acc = AccumDict() acc.eff_ruptures = {sources[0].trt_model_id: sum(src.num_ruptures for src in sources)} return acc
def zerodict(self): """ Initial accumulator, a dictionary (grp_id, gsim) -> curves """ zd = AccumDict() zd.calc_times = [] zd.eff_ruptures = AccumDict() self.eid = collections.Counter() # sm_id -> event_id self.sm_by_grp = self.csm.info.get_sm_by_grp() return zd
def count_eff_ruptures(sources, sitecol, gsims, monitor): """ Count the number of ruptures contained in the given sources and return a dictionary src_group_id -> num_ruptures. All sources belong to the same tectonic region type. """ grp_id = sources[0].src_group_id acc = AccumDict({grp_id: {}}) acc.eff_ruptures = {grp_id: sum(src.num_ruptures for src in sources)} return acc
def combine(self, results, agg=agg_prob): """ :param results: a dictionary (trt_model_id, gsim_no) -> floats :param agg: an aggregation function :returns: a dictionary rlz -> aggregated floats Example: a case with tectonic region type T1 with GSIMS A, B, C and tectonic region type T2 with GSIMS D, E. >> assoc = RlzsAssoc(CompositionInfo([], [])) >> assoc.rlzs_assoc = { ... ('T1', 'A'): ['r0', 'r1'], ... ('T1', 'B'): ['r2', 'r3'], ... ('T1', 'C'): ['r4', 'r5'], ... ('T2', 'D'): ['r0', 'r2', 'r4'], ... ('T2', 'E'): ['r1', 'r3', 'r5']} ... >> results = { ... ('T1', 'A'): 0.01, ... ('T1', 'B'): 0.02, ... ('T1', 'C'): 0.03, ... ('T2', 'D'): 0.04, ... ('T2', 'E'): 0.05,} ... >> combinations = assoc.combine(results, operator.add) >> for key, value in sorted(combinations.items()): print key, value r0 0.05 r1 0.06 r2 0.06 r3 0.07 r4 0.07 r5 0.08 You can check that all the possible sums are performed: r0: 0.01 + 0.04 (T1A + T2D) r1: 0.01 + 0.05 (T1A + T2E) r2: 0.02 + 0.04 (T1B + T2D) r3: 0.02 + 0.05 (T1B + T2E) r4: 0.03 + 0.04 (T1C + T2D) r5: 0.03 + 0.05 (T1C + T2E) In reality, the `combine_curves` method is used with hazard_curves and the aggregation function is the `agg_curves` function, a composition of probability, which however is close to the sum for small probabilities. """ ad = AccumDict() for key, value in results.items(): gsim = self.csm_info.gsimdict[key] for rlz in self.rlzs_assoc[key[0], gsim]: ad[rlz] = agg(ad.get(rlz, 0), value) return ad
def acc0(self): """ Initial accumulator, a dict grp_id -> ProbabilityMap(L, G) """ csm_info = self.csm.info zd = AccumDict() num_levels = len(self.oqparam.imtls.array) for grp in self.csm.src_groups: num_gsims = len(csm_info.gsim_lt.get_gsims(grp.trt)) zd[grp.id] = ProbabilityMap(num_levels, num_gsims) zd.eff_ruptures = AccumDict() # grp_id -> eff_ruptures zd.nsites = AccumDict() # src.id -> nsites return zd
def zerodict(self): """ Initial accumulator, a dictionary (trt_id, gsim) -> curves """ zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls) zd = AccumDict((key, zc) for key in self.rlzs_assoc) zd.calc_times = [] zd.eff_ruptures = AccumDict() # trt_id -> eff_ruptures zd.bb_dict = { (smodel.ordinal, site.id): BoundingBox(smodel.ordinal, site.id) for site in self.sitecol for smodel in self.csm.source_models } if self.oqparam.poes_disagg else {} return zd
def get_fragility_functions(fname, continuous_fragility_discretization): """ :param fname: path of the fragility file :returns: damage_states list and dictionary taxonomy -> functions """ [fmodel] = read_nodes( fname, lambda el: el.tag.endswith('fragilityModel'), nodefactory['fragilityModel']) # ~fmodel.description is ignored limit_states = ~fmodel.limitStates tag = 'ffc' if fmodel['format'] == 'continuous' else 'ffd' fragility_functions = AccumDict() # taxonomy -> functions for ffs in fmodel.getnodes('ffs'): nodamage = ffs.attrib.get('noDamageLimit') taxonomy = ~ffs.taxonomy imt_str, imls, min_iml, max_iml, imlUnit = ~ffs.IML if continuous_fragility_discretization and not imls: imls = numpy.linspace(min_iml, max_iml, continuous_fragility_discretization + 1) fragility_functions[taxonomy] = FragilityFunctionList( [], imt=imt_str, imls=imls) lstates = [] for ff in ffs.getnodes(tag): ls = ff['ls'] # limit state lstates.append(ls) if tag == 'ffc': with context(fname, ff): mean_stddev = ~ff.params fragility_functions[taxonomy].append( scientific.FragilityFunctionContinuous(ls, *mean_stddev)) else: # discrete with context(fname, ff): poes = ~ff.poEs if nodamage is None: fragility_functions[taxonomy].append( scientific.FragilityFunctionDiscrete( ls, imls, poes, imls[0])) else: fragility_functions[taxonomy].append( scientific.FragilityFunctionDiscrete( ls, [nodamage] + imls, [0.0] + poes, nodamage)) if lstates != limit_states: raise InvalidFile("Expected limit states %s, got %s in %s" % (limit_states, lstates, fname)) fragility_functions.damage_states = ['no_damage'] + limit_states return fragility_functions
def compute_ruptures(sources, src_filter, gsims, monitor): """ :param sources: List of commonlib.source.Source tuples :param src_filter: a source site filter :param gsims: a list of GSIMs for the current tectonic region model :param monitor: monitor instance :returns: a dictionary src_group_id -> [Rupture instances] """ # NB: by construction each block is a non-empty list with # sources of the same src_group_id grp_id = sources[0].src_group_id trt = sources[0].tectonic_region_type eb_ruptures = [] calc_times = [] rup_mon = monitor('filtering ruptures', measuremem=False) num_samples = monitor.samples num_events = 0 # Compute and save stochastic event sets for src, s_sites in src_filter(sources): t0 = time.time() if s_sites is None: continue max_dist = src_filter.integration_distance[trt] rupture_filter = functools.partial( filter_sites_by_distance_to_rupture, integration_distance=max_dist, sites=s_sites) num_occ_by_rup = sample_ruptures( src, monitor.ses_per_logic_tree_path, num_samples, monitor.seed) # NB: the number of occurrences is very low, << 1, so it is # more efficient to filter only the ruptures that occur, i.e. # to call sample_ruptures *before* the filtering for ebr in build_eb_ruptures( src, num_occ_by_rup, rupture_filter, monitor.seed, rup_mon): eb_ruptures.append(ebr) num_events += ebr.multiplicity dt = time.time() - t0 calc_times.append((src.id, dt)) res = AccumDict({grp_id: eb_ruptures}) res.num_events = num_events res.calc_times = calc_times res.rup_data = {grp_id: calc.RuptureData(trt, gsims).to_array(eb_ruptures)} return res
def compute_ruptures(sources, sitecol, gsims, monitor): """ :param sources: a sequence of UCERF sources :param sitecol: a SiteCollection instance :param gsims: a list of GSIMs :param monitor: a Monitor instance :returns: an AccumDict grp_id -> EBRuptures """ [src] = sources # there is a single source per UCERF branch integration_distance = monitor.maximum_distance[DEFAULT_TRT] res = AccumDict() res.calc_times = AccumDict() serial = 1 event_mon = monitor('sampling ruptures', measuremem=False) res.num_events = 0 res.trt = DEFAULT_TRT t0 = time.time() # set the seed before calling generate_event_set numpy.random.seed(monitor.seed + src.src_group_id) ebruptures = [] eid = 0 src.build_idx_set() background_sids = src.get_background_sids(sitecol, integration_distance) for ses_idx in range(1, monitor.ses_per_logic_tree_path + 1): with event_mon: rups, n_occs = src.generate_event_set(background_sids) for rup, n_occ in zip(rups, n_occs): rup.seed = monitor.seed # to think rrup = rup.surface.get_min_distance(sitecol.mesh) r_sites = sitecol.filter(rrup <= integration_distance) if r_sites is None: continue indices = r_sites.indices events = [] for occ in range(n_occ): events.append((eid, ses_idx, occ, 0)) # 0 is the sampling eid += 1 if events: ebruptures.append( event_based.EBRupture( rup, indices, numpy.array(events, event_based.event_dt), src.source_id, src.src_group_id, serial)) serial += 1 res.num_events += len(events) res[src.src_group_id] = ebruptures res.calc_times[src.src_group_id] = ( src.source_id, len(sitecol), time.time() - t0) return res
def ucerf_classical_hazard_by_rupture_set( rupset_idx, branchname, ucerf_source, src_group_id, sitecol, gsims, monitor): """ :param rupset_idx: indices of the rupture sets :param branchname: name of the branch :param ucerf_source: an object taking the place of a source for UCERF :param src_group_id: source group index :param sitecol: a SiteCollection instance :param gsims: a list of GSIMs :param monitor: a monitor instance :returns: an AccumDict rlz -> curves """ truncation_level = monitor.oqparam.truncation_level imtls = monitor.oqparam.imtls max_dist = monitor.oqparam.maximum_distance[DEFAULT_TRT] dic = AccumDict() dic.bbs = [] dic.calc_times = [] monitor.eff_ruptures = 0 monitor.calc_times = [] # Apply the initial rupture to site filtering rupset_idx, s_sites = \ ucerf_source.filter_sites_by_distance_from_rupture_set( rupset_idx, sitecol, monitor.oqparam.maximum_distance[DEFAULT_TRT]) if len(s_sites): dic[src_group_id] = hazard_curves_per_rupture_subset( rupset_idx, ucerf_source, s_sites, imtls, gsims, truncation_level, maximum_distance=max_dist, bbs=dic.bbs, monitor=monitor) else: dic[src_group_id] = ProbabilityMap(len(imtls.array), len(gsims)) dic.calc_times += monitor.calc_times # added by pmap_from_grp dic.eff_ruptures = {src_group_id: monitor.eff_ruptures} # idem return dic
def zerodict(self): """ Initial accumulator, a dict grp_id -> ProbabilityMap(L, G) """ zd = AccumDict() num_levels = len(self.oqparam.imtls.array) for grp in self.csm.src_groups: num_gsims = len(self.rlzs_assoc.gsims_by_grp_id[grp.id]) zd[grp.id] = ProbabilityMap(num_levels, num_gsims) zd.calc_times = [] zd.eff_ruptures = AccumDict() # grp_id -> eff_ruptures zd.bb_dict = BBdict() if self.oqparam.poes_disagg: for sid in self.sitecol.sids: for smodel in self.csm.source_models: zd.bb_dict[smodel.ordinal, sid] = BoundingBox( smodel.ordinal, sid) return zd
def __init__(self, oqtask, name=None): self.oqtask = oqtask self.task_func = getattr(oqtask, 'task_func', oqtask) self.name = name or oqtask.__name__ self.results = [] self.sent = AccumDict() self.received = [] self.no_distribute = no_distribute() self.argnames = inspect.getargspec(self.task_func).args
def get_mesh_csvdata(csvfile, imts, num_values, validvalues): """ Read CSV data in the format `IMT lon lat value1 ... valueN`. :param csvfile: a file or file-like object with the CSV data :param imts: a list of intensity measure types :param num_values: dictionary with the number of expected values per IMT :param validvalues: validation function for the values :returns: the mesh of points and the data as a dictionary imt -> array of curves for each site """ number_of_values = dict(zip(imts, num_values)) lon_lats = {imt: set() for imt in imts} data = AccumDict() # imt -> list of arrays check_imt = valid.Choice(*imts) for line, row in enumerate(csv.reader(csvfile, delimiter=' '), 1): try: imt = check_imt(row[0]) lon_lat = valid.longitude(row[1]), valid.latitude(row[2]) if lon_lat in lon_lats[imt]: raise DuplicatedPoint(lon_lat) lon_lats[imt].add(lon_lat) values = validvalues(' '.join(row[3:])) if len(values) != number_of_values[imt]: raise ValueError('Found %d values, expected %d' % (len(values), number_of_values[imt])) except (ValueError, DuplicatedPoint) as err: raise err.__class__('%s: file %s, line %d' % (err, csvfile, line)) data += {imt: [numpy.array(values)]} points = lon_lats.pop(imts[0]) for other_imt, other_points in lon_lats.iteritems(): if points != other_points: raise ValueError('Inconsistent locations between %s and %s' % (imts[0], other_imt)) lons, lats = zip(*sorted(points)) mesh = geo.Mesh(numpy.array(lons), numpy.array(lats)) return mesh, {imt: numpy.array(lst) for imt, lst in data.iteritems()}
def __init__(self, oqtask, name=None): self.task_func = oqtask self.name = name or oqtask.__name__ self.results = [] self.sent = AccumDict() self.distribute = oq_distribute() self.argnames = inspect.getargspec(self.task_func).args if self.distribute == 'ipython' and isinstance( self.executor, ProcessPoolExecutor): client = ipp.Client() self.__class__.executor = client.executor()
def calc_gmfs(oqparam, sitecol): """ Build all the ground motion fields for the whole site collection """ correl_model = get_correl_model(oqparam) rnd = random.Random() rnd.seed(getattr(oqparam, 'random_seed', 42)) imts = get_imts(oqparam) gsim = get_gsim(oqparam) trunc_level = getattr(oqparam, 'truncation_level', None) n_gmfs = getattr(oqparam, 'number_of_ground_motion_fields', 1) rupture = get_rupture(oqparam) computer = gmf.GmfComputer(rupture, sitecol, imts, gsim, trunc_level, correl_model) seeds = [rnd.randint(0, MAX_INT) for _ in xrange(n_gmfs)] res = AccumDict() # imt -> gmf for seed in seeds: for imt, gmfield in computer.compute(seed): res += {imt: [gmfield]} # res[imt] is a matrix R x N return {imt: numpy.array(matrix).T for imt, matrix in res.iteritems()}
def get_gmfs_by_imt(fname, sitecol, imts): """ Return a list of dictionaries with a ground motion field per IMT, one dictionary per rupture. :param fname: path to the CSV file :param sitecol: the underlying site collection :param imts: the IMTs corresponding to the columns in the CSV file """ dicts = [] with open(fname) as f: for row in csv.reader(f): indices = map(int, row[1].split()) sc = FilteredSiteCollection(indices, sitecol) dic = AccumDict() for imt, col in zip(imts, row[2:]): gmf = numpy.array(map(float, col.split())) dic[imt] = sc.expand(gmf, 0) dic.tag = row[0] dicts.append(dic) return sorted(dicts, key=lambda dic: dic.tag)
def build_ruptures(sources, src_filter, param, monitor): """ :param sources: a list with a single UCERF source :param param: extra parameters :param monitor: a Monitor instance :returns: an AccumDict grp_id -> EBRuptures """ [src] = sources res = AccumDict() res.calc_times = [] sampl_mon = monitor('sampling ruptures', measuremem=True) res.trt = DEFAULT_TRT background_sids = src.get_background_sids(src_filter) samples = getattr(src, 'samples', 1) n_occ = AccumDict(accum=0) t0 = time.time() with sampl_mon: for sam_idx in range(samples): for ses_idx, ses_seed in param['ses_seeds']: seed = sam_idx * TWO16 + ses_seed rups, occs = generate_event_set( src, background_sids, src_filter, ses_idx, seed) for rup, occ in zip(rups, occs): n_occ[rup] += occ tot_occ = sum(n_occ.values()) dic = {'eff_ruptures': {src.src_group_id: src.num_ruptures}} eb_ruptures = [EBRupture(rup, src.id, src.src_group_id, n, samples) for rup, n in n_occ.items()] dic['rup_array'] = stochastic.get_rup_array(eb_ruptures, src_filter) dt = time.time() - t0 dic['calc_times'] = {src.id: numpy.array([tot_occ, dt], F32)} return dic
def classical(sources, sitecol, gsims, monitor): """ :param sources: a non-empty sequence of sources of homogeneous tectonic region type :param sitecol: a SiteCollection instance :param gsims: a list of GSIMs for the current tectonic region type :param monitor: a monitor instance :returns: an AccumDict rlz -> curves """ truncation_level = monitor.truncation_level imtls = monitor.imtls src_group_id = sources[0].src_group_id # sanity check: the src_group must be the same for all sources for src in sources[1:]: assert src.src_group_id == src_group_id trt = sources[0].tectonic_region_type max_dist = monitor.maximum_distance[trt] dic = AccumDict() if monitor.poes_disagg: sm_id = monitor.sm_id dic.bbs = [BoundingBox(sm_id, sid) for sid in sitecol.sids] else: dic.bbs = [] # NB: the source_site_filter below is ESSENTIAL for performance inside # pmap_from_grp, since it reduces the full site collection # to a filtered one *before* doing the rupture filtering dic[src_group_id] = pmap_from_grp( sources, sitecol, imtls, gsims, truncation_level, maximum_distance=max_dist, bbs=dic.bbs, monitor=monitor) dic.calc_times = monitor.calc_times # added by pmap_from_grp dic.eff_ruptures = {src_group_id: monitor.eff_ruptures} # idem return dic
def out_by_lr(self, imt, assets, hazard, epsgetter): """ :param imt: restrict the risk functions to this IMT :param assets: an array of assets of homogeneous taxonomy :param hazard: a dictionary rlz -> hazard :param epsgetter: a callable returning epsilons for the given eids :returns: a dictionary (l, r) -> output """ out_by_lr = AccumDict() out_by_lr.assets = assets loss_types = self.get_loss_types(imt) for rlz in sorted(hazard): haz = hazard[rlz] if len(haz) == 0: continue r = rlz.ordinal for loss_type in loss_types: out = self(loss_type, assets, haz, epsgetter) if out: # can be None in scenario_risk with no valid values l = self.compositemodel.lti[loss_type] out.hid = r out.weight = rlz.weight out_by_lr[l, r] = out return out_by_lr
def disaggregate(self, sitecol, ruptures, iml4, truncnorm, epsilons, monitor=Monitor()): """ Disaggregate (separate) PoE of `imldict` in different contributions each coming from `n_epsilons` distribution bins. :param sitecol: a SiteCollection :param ruptures: an iterator over ruptures with the same TRT :param iml4: a 4d array of IMLs of shape (N, R, M, P) :param truncnorm: an instance of scipy.stats.truncnorm :param epsilons: the epsilon bins :param monitor: a Monitor instance :returns: an AccumDict with keys (poe, imt, rlzi) and mags, dists, lons, lats """ acc = AccumDict(accum=[]) ctx_mon = monitor('disagg_contexts', measuremem=False) pne_mon = monitor('disaggregate_pne', measuremem=False) clo_mon = monitor('get_closest', measuremem=False) for rupture in ruptures: with ctx_mon: orig_dctx = DistancesContext( (param, get_distances(rupture, sitecol, param)) for param in self.REQUIRES_DISTANCES) self.add_rup_params(rupture) with clo_mon: # this is faster than computing orig_dctx closest_points = rupture.surface.get_closest_points(sitecol) cache = {} for r, gsim in self.gsim_by_rlzi.items(): dctx = orig_dctx.roundup(gsim.minimum_distance) for m, imt in enumerate(iml4.imts): for p, poe in enumerate(iml4.poes_disagg): iml = tuple(iml4.array[:, r, m, p]) try: pne = cache[gsim, imt, iml] except KeyError: with pne_mon: pne = gsim.disaggregate_pne( rupture, sitecol, dctx, imt, iml, truncnorm, epsilons) cache[gsim, imt, iml] = pne acc[poe, str(imt), r].append(pne) acc['mags'].append(rupture.mag) acc['dists'].append(getattr(dctx, self.filter_distance)) acc['lons'].append(closest_points.lons) acc['lats'].append(closest_points.lats) return acc
def sample_ruptures(sources, src_filter=source_site_noop_filter, gsims=(), param=(), monitor=Monitor()): """ :param sources: a sequence of sources of the same group :param src_filter: a source site filter :param gsims: a list of GSIMs for the current tectonic region model (can be empty) :param param: a dictionary of additional parameters (by default ses_per_logic_tree_path=1 and filter_distance=1000) :param monitor: monitor instance :returns: a dictionary with eb_ruptures, num_events, num_ruptures, calc_times """ if not param: param = dict(ses_per_logic_tree_path=1, filter_distance=1000) eb_ruptures = [] # AccumDict of arrays with 3 elements weight, nsites, calc_time calc_times = AccumDict(accum=numpy.zeros(3, numpy.float32)) rup_mon = monitor('making contexts', measuremem=False) # Compute and save stochastic event sets cmaker = ContextMaker(gsims, src_filter.integration_distance, param, monitor) for src, s_sites in src_filter(sources): mutex_weight = getattr(src, 'mutex_weight', 1) samples = getattr(src, 'samples', 1) t0 = time.time() num_occ_by_rup = _sample_ruptures(src, mutex_weight, param['ses_per_logic_tree_path'], samples) # NB: the number of occurrences is very low, << 1, so it is # more efficient to filter only the ruptures that occur, i.e. # to call sample_ruptures *before* the filtering ebrs = list( _build_eb_ruptures(src, num_occ_by_rup, cmaker, s_sites, rup_mon)) eb_ruptures.extend(ebrs) eids = set_eids(ebrs) dt = time.time() - t0 calc_times[src.id] += numpy.array([len(eids), src.nsites, dt]) dic = dict(eb_ruptures=eb_ruptures, calc_times=calc_times) return dic
def create_dsets(self): """ Store some empty datasets in the datastore """ params = {'grp_id', 'occurrence_rate', 'clon_', 'clat_', 'rrup_', 'nsites', 'probs_occur_', 'sids_', 'src_id'} gsims_by_trt = self.full_lt.get_gsims_by_trt() for trt, gsims in gsims_by_trt.items(): cm = ContextMaker(trt, gsims, dict(imtls=self.oqparam.imtls)) params.update(cm.REQUIRES_RUPTURE_PARAMETERS) for dparam in cm.REQUIRES_DISTANCES: params.add(dparam + '_') mags = set() for trt, dset in self.datastore['source_mags'].items(): mags.update(dset[:]) mags = sorted(mags) if self.few_sites: descr = [] # (param, dt) for param in params: if param == 'sids_': dt = hdf5.vuint16 elif param == 'probs_occur_': dt = hdf5.vfloat64 elif param.endswith('_'): dt = hdf5.vfloat32 elif param == 'src_id': dt = U32 elif param in {'nsites', 'grp_id'}: dt = U16 else: dt = F32 descr.append((param, dt)) self.datastore.create_dframe('rup', descr, 'gzip') self.by_task = {} # task_no => src_ids self.maxradius = 0 self.Ns = len(self.csm.source_info) self.rel_ruptures = AccumDict(accum=0) # trt -> rel_ruptures # NB: the relevant ruptures are less than the effective ruptures, # which are a preclassical concept if self.oqparam.disagg_by_src: sources = self.get_source_ids() self.datastore.create_dset( 'disagg_by_src', F32, (self.N, self.R, self.M, self.L1, self.Ns)) self.datastore.set_shape_descr( 'disagg_by_src', site_id=self.N, rlz_id=self.R, imt=list(self.oqparam.imtls), lvl=self.L1, src_id=sources)
def apply_reduce(cls, task, task_args, agg=operator.add, acc=None, concurrent_tasks=executor._max_workers, weight=lambda item: 1, key=lambda item: 'Unspecified', name=None): """ Apply a task to a tuple of the form (sequence, \*other_args) by first splitting the sequence in chunks, according to the weight of the elements and possibly to a key (see :function: `openquake.baselib.general.split_in_blocks`). Then reduce the results with an aggregation function. The chunks which are generated internally can be seen directly ( useful for debugging purposes) by looking at the attribute `._chunks`, right after the `apply_reduce` function has been called. :param task: a task to run in parallel :param task_args: the arguments to be passed to the task function :param agg: the aggregation function :param acc: initial value of the accumulator (default empty AccumDict) :param concurrent_tasks: hint about how many tasks to generate :param weight: function to extract the weight of an item in arg0 :param key: function to extract the kind of an item in arg0 """ arg0 = task_args[0] # this is assumed to be a sequence num_items = len(arg0) args = task_args[1:] task_func = getattr(task, 'task_func', task) if acc is None: acc = AccumDict() if num_items == 0: # nothing to do return acc elif num_items == 1: # apply the function in the master process return agg(acc, task_func(arg0, *args)) chunks = list(split_in_blocks(arg0, concurrent_tasks or 1, weight, key)) cls.apply_reduce.__func__._chunks = chunks if not concurrent_tasks or no_distribute(): for chunk in chunks: acc = agg(acc, task_func(chunk, *args)) return acc logging.info('Starting %d tasks', len(chunks)) self = cls.starmap(task, [(chunk, ) + args for chunk in chunks], name) return self.reduce(agg, acc)
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ monitor = self.monitor(self.core_task.__name__) monitor.oqparam = oq = self.oqparam self.src_filter = SourceFilter(self.sitecol, oq.maximum_distance) self.nsites = [] acc = AccumDict({ grp_id: ProbabilityMap(len(oq.imtls.array), len(gsims)) for grp_id, gsims in self.gsims_by_grp.items() }) acc.calc_times = {} acc.eff_ruptures = AccumDict() # grp_id -> eff_ruptures acc.bb_dict = {} # just for API compatibility param = dict(imtls=oq.imtls, truncation_level=oq.truncation_level, filter_distance=oq.filter_distance) for sm in self.csm.source_models: # one branch at the time grp_id = sm.ordinal gsims = self.gsims_by_grp[grp_id] [[ucerf_source]] = sm.src_groups ucerf_source.nsites = len(self.sitecol) self.csm.infos[ucerf_source.source_id] = source.SourceInfo( ucerf_source) ct = self.oqparam.concurrent_tasks or 1 # parallelize by rupture subsets rup_sets = numpy.arange(ucerf_source.num_ruptures) taskname = 'ucerf_classical_%d' % grp_id acc = parallel.Starmap.apply( ucerf_classical, (rup_sets, ucerf_source, self.src_filter, gsims, monitor), concurrent_tasks=ct, name=taskname).reduce(self.agg_dicts, acc) # parallelize on the background sources, small tasks bckgnd_sources = ucerf_source.get_background_sources( self.src_filter) args = (bckgnd_sources, self.src_filter, gsims, param, monitor) bg_res = parallel.Starmap.apply(classical, args, name='background_sources_%d' % grp_id, concurrent_tasks=ct) # compose probabilities from background sources for pmap in bg_res: acc[grp_id] |= pmap[grp_id] with self.monitor('store source_info', autoflush=True): self.store_source_info(self.csm.infos, acc) return acc # {grp_id: pmap}
def compute_ruptures(sources, src_filter, gsims, param, monitor): """ :param sources: a list with a single UCERF source :param src_filter: a SourceFilter instance :param gsims: a list of GSIMs :param param: extra parameters :param monitor: a Monitor instance :returns: an AccumDict grp_id -> EBRuptures """ [src] = sources res = AccumDict() res.calc_times = [] serial = 1 sampl_mon = monitor('sampling ruptures', measuremem=True) filt_mon = monitor('filtering ruptures', measuremem=False) res.trt = DEFAULT_TRT ebruptures = [] background_sids = src.get_background_sids(src_filter) sitecol = src_filter.sitecol cmaker = ContextMaker(gsims, src_filter.integration_distance) for sample in range(param['samples']): for ses_idx, ses_seed in param['ses_seeds']: seed = sample * TWO16 + ses_seed with sampl_mon: rups, n_occs = src.generate_event_set( background_sids, src_filter, seed) with filt_mon: for rup, n_occ in zip(rups, n_occs): rup.serial = serial rup.seed = seed try: rup.sctx, rup.dctx = cmaker.make_contexts(sitecol, rup) indices = rup.sctx.sids except FarAwayRupture: continue events = [] for _ in range(n_occ): events.append((0, src.src_group_id, ses_idx, sample)) if events: evs = numpy.array(events, stochastic.event_dt) ebruptures.append(EBRupture(rup, indices, evs)) serial += 1 res.num_events = len(stochastic.set_eids(ebruptures)) res[src.src_group_id] = ebruptures if not param['save_ruptures']: res.events_by_grp = {grp_id: event_based.get_events(res[grp_id]) for grp_id in res} res.eff_ruptures = {src.src_group_id: src.num_ruptures} return res
def view_assets_by_site(token, dstore): """ Display statistical information about the distribution of the assets """ assets_by_site = dstore['assetcol'].assets_by_site() data = ['taxonomy mean stddev min max num_sites num_assets'.split()] num_assets = AccumDict() for assets in assets_by_site: num_assets += {k: [len(v)] for k, v in groupby( assets, operator.attrgetter('taxonomy')).items()} for taxo in sorted(num_assets): val = numpy.array(num_assets[taxo]) data.append(stats(taxo, val, val.sum())) if len(num_assets) > 1: # more than one taxonomy, add a summary n_assets = numpy.array([len(assets) for assets in assets_by_site]) data.append(stats('*ALL*', n_assets, n_assets.sum())) return rst_table(data)
def get_ruptures_by_grp(dstore): """ Extracts the dictionary `ruptures_by_grp` from the given calculator """ n = 0 for grp in dstore['ruptures']: n += len(dstore['ruptures/' + grp]) logging.info('Reading %d ruptures from the datastore', n) # disable check on PlaceSurface to support UCERF ruptures with mock.patch( 'openquake.hazardlib.geo.surface.PlanarSurface.' 'IMPERFECT_RECTANGLE_TOLERANCE', numpy.inf): ruptures_by_grp = AccumDict(accum=[]) for grp in dstore['ruptures']: grp_id = int(grp[4:]) # strip 'grp-' ruptures_by_grp[grp_id] = list(calc.get_ruptures(dstore, grp_id)) return ruptures_by_grp
def __init__(self, dstore, kind, getter, imtls, eids=None): assert kind in ('poe', 'gmf'), kind self.kind = kind self.sids = getter.sids self._getter = getter self.imtls = imtls self.eids = eids self.num_rlzs = dstore['csm_info'].get_num_rlzs() oq = dstore['oqparam'] self.E = getattr(oq, 'number_of_ground_motion_fields', None) self.I = len(oq.imtls) if kind == 'gmf': # now some attributes set for API compatibility with the GmfGetter # number of ground motion fields # dictionary rlzi -> array(imts, events, nbytes) self.gmdata = AccumDict( accum=numpy.zeros(len(self.imtls) + 2, F32))
def export_ruptures_xml(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ fmt = ekey[-1] oq = dstore['oqparam'] num_ses = oq.ses_per_logic_tree_path ruptures_by_grp = AccumDict(accum=[]) for rgetter in gen_rgetters(dstore): ebrs = [ebr.export(rgetter.rlzs_by_gsim, num_ses) for ebr in rgetter.get_ruptures()] ruptures_by_grp[rgetter.grp_id].extend(ebrs) dest = dstore.export_path('ses.' + fmt) writer = hazard_writers.SESXMLWriter(dest) writer.serialize(ruptures_by_grp, oq.investigation_time) return [dest]
def init(self): if hasattr(self, 'data'): # already initialized return self.dstore.open() # if not already open self.data = collections.OrderedDict() for sid in self.sids: self.data[sid] = data = self[sid] if not data: # no GMVs, return 0, counted in no_damage self.data[sid] = {rlzi: 0 for rlzi in range(self.num_rlzs)} # dictionary eid -> index if self.eids is not None: self.eid2idx = dict(zip(self.eids, range(len(self.eids)))) # now some attributes set for API compatibility with the GmfGetter # number of ground motion fields # dictionary rlzi -> array(imts, events, nbytes) self.imtls = self.dstore['oqparam'].imtls self.gmdata = AccumDict(accum=numpy.zeros(len(self.imtls) + 2, F32))
def make(self): self.rupdata = [] self.source_data = AccumDict(accum=[]) if self.src_mutex: pmap = self._make_src_mutex() else: pmap = self._make_src_indep() dic = { 'pmap': pmap, 'rup_data': self.dictarray(self.rupdata), 'source_data': self.source_data, 'task_no': self.task_no, 'grp_id': self.group[0].grp_id } if self.disagg_by_src: dic['source_id'] = self.group[0].source_id return dic
def classical(sources, sitecol, siteidx, rlzs_assoc, monitor): """ :param sources: a non-empty sequence of sources of homogeneous tectonic region type :param sitecol: a SiteCollection instance :param siteidx: index of the first site (0 if there is a single tile) :param rlzs_assoc: a RlzsAssoc instance :param monitor: a monitor instance :returns: an AccumDict rlz -> curves """ truncation_level = monitor.oqparam.truncation_level imtls = monitor.oqparam.imtls trt_model_id = sources[0].trt_model_id # sanity check: the trt_model must be the same for all sources for src in sources[1:]: assert src.trt_model_id == trt_model_id gsims = rlzs_assoc.gsims_by_trt_id[trt_model_id] trt = sources[0].tectonic_region_type max_dist = monitor.oqparam.maximum_distance[trt] dic = AccumDict() dic.siteslice = slice(siteidx, siteidx + len(sitecol)) if monitor.oqparam.poes_disagg: sm_id = rlzs_assoc.sm_ids[trt_model_id] dic.bbs = [BoundingBox(sm_id, sid) for sid in sitecol.sids] else: dic.bbs = [] # NB: the source_site_filter below is ESSENTIAL for performance inside # hazard_curves_per_trt, since it reduces the full site collection # to a filtered one *before* doing the rupture filtering dic[trt_model_id] = hazard_curves_per_trt( sources, sitecol, imtls, gsims, truncation_level, source_site_filter=source_site_distance_filter(max_dist), maximum_distance=max_dist, bbs=dic.bbs, monitor=monitor) dic.calc_times = monitor.calc_times # added by hazard_curves_per_trt dic.eff_ruptures = {trt_model_id: monitor.eff_ruptures} # idem return dic
def __init__(self, info, groups, ses_seed=0, event_based=False): self.gsim_lt = info.gsim_lt self.source_model_lt = info.source_model_lt self.sm_rlzs = info.sm_rlzs self.info = info # extract a single source from multiple sources with the same ID # and regroup the sources in non-atomic groups by TRT atomic = [] acc = AccumDict(accum=[]) get_grp_id = info.source_model_lt.get_grp_id(info.gsim_lt.values) for sm in self.sm_rlzs: for grp in groups[sm.ordinal]: if grp and grp.atomic: atomic.append(grp) elif grp: acc[grp.trt].extend(grp) grp_id = get_grp_id(grp.trt, sm.ordinal) for src in grp: src.grp_id = grp_id if sm.samples > 1: src.samples = sm.samples dic = {} key = operator.attrgetter('source_id', 'checksum') idx = 0 for trt in acc: lst = [] for srcs in groupby(acc[trt], key).values(): for src in srcs: src.id = idx idx += 1 if len(srcs) > 1: # happens in classical/case_20 src.grp_id = [s.grp_id for s in srcs] lst.append(src) dic[trt] = sourceconverter.SourceGroup(trt, lst) for ag in atomic: for src in ag: src.id = idx idx += 1 self.src_groups = list(dic.values()) + atomic if event_based: # init serials serial = ses_seed for sg in self.src_groups: for src in sg: src.serial = serial serial += src.num_ruptures * len(src.grp_ids)
def __init__(self, oqtask, task_args, name=None): self.task_func = oqtask self.task_args = task_args self.name = name or oqtask.__name__ self.results = [] self.sent = AccumDict() self.distribute = oq_distribute(oqtask) # a task can be a function, a class or an instance with a __call__ if inspect.isfunction(oqtask): self.argnames = inspect.getargspec(oqtask).args elif inspect.isclass(oqtask): self.argnames = inspect.getargspec(oqtask.__init__).args[1:] else: # instance with a __call__ method self.argnames = inspect.getargspec(oqtask.__call__).args[1:] if self.distribute == 'ipython' and isinstance( self.executor, ProcessPoolExecutor): client = ipp.Client() self.__class__.executor = client.executor()
def process(self, csm, dummy=None): """ :param csm: a CompositeSourceModel instance :returns: the times spent in sequential and parallel processing """ sources = csm.get_sources() self.infos = [] seqtime, partime = 0, 0 sources_by_trt = AccumDict() logging.info('Sequential processing of %d sources...', len(sources)) t1 = time.time() for src in sources: sources_by_trt = self.agg_source_info(sources_by_trt, self.filter(src)) seqtime = time.time() - t1 self.update(csm, sources_by_trt) return seqtime, partime
def export_dmg_xml(key, dstore, damage_states, dmg_data, suffix): """ Export damage outputs in XML format. :param key: dmg_dist_per_asset|dmg_dist_per_taxonomy|dmg_dist_total|collapse_map :param dstore: the datastore :param damage_states: the list of damage states :param dmg_data: a list [(loss_type, unit, asset_ref, mean, stddev), ...] :param suffix: a suffix specifying the GSIM realization """ dest = dstore.export_path('%s%s.%s' % (key[0], suffix, key[1])) risk_writers.DamageWriter(damage_states).to_nrml(key[0], dmg_data, dest) return AccumDict({key: [dest]})
def disaggregate(self, sitecol, ruptures, iml4, truncnorm, epsilons, monitor=Monitor()): """ Disaggregate (separate) PoE of `imldict` in different contributions each coming from `n_epsilons` distribution bins. :param sitecol: a SiteCollection :param ruptures: an iterator over ruptures with the same TRT :param iml4: a 4d array of IMLs of shape (N, R, M, P) :param truncnorm: an instance of scipy.stats.truncnorm :param epsilons: the epsilon bins :param monitor: a Monitor instance :returns: an AccumDict """ sitemesh = sitecol.mesh acc = AccumDict(accum=[]) ctx_mon = monitor('disagg_contexts', measuremem=False) pne_mon = monitor('disaggregate_pne', measuremem=False) for rupture in ruptures: with ctx_mon: # do not filter to avoid changing the number of sites sctx, rctx, orig_dctx = self.make_contexts( sitecol, rupture, filter_sites=False) cache = {} for r, gsim in self.gsim_by_rlzi.items(): dctx = orig_dctx.roundup(gsim.minimum_distance) for m, imt in enumerate(iml4.imts): for p, poe in enumerate(iml4.poes_disagg): iml = tuple(iml4.array[:, r, m, p]) try: pne = cache[gsim, imt, iml] except KeyError: with pne_mon: pne = gsim.disaggregate_pne( rupture, sctx, rctx, dctx, imt, iml, truncnorm, epsilons) cache[gsim, imt, iml] = pne acc[poe, str(imt), r].append(pne) closest_points = rupture.surface.get_closest_points(sitemesh) acc['mags'].append(rupture.mag) acc['dists'].append(dctx.rjb) acc['lons'].append(closest_points.lons) acc['lats'].append(closest_points.lats) return acc
class RupData(object): """ A class to collect rupture information into an AccumDict """ def __init__(self, cmaker, num_probs_occur, data=None): self.cmaker = cmaker self.num_probs_occur = num_probs_occur self.data = AccumDict(accum=[]) if data is None else data def add(self, ctxs, sites, grp_ids): """ Populate the inner AccumDict :param ctxs: a list of pairs (rctx, dctx) associated to U ruptures :param sites: a filtered site collection with N'<=N sites :param grp_ids: a tuple of indices associated to the ruptures """ N = len(sites.complete) params = (sorted(self.cmaker.REQUIRES_DISTANCES | {'rrup'}) + ['clon', 'clat']) for r, ctx in enumerate(ctxs): if numpy.isnan(ctx.occurrence_rate): # for nonparametric ruptures probs_occur = ctx.probs_occur else: probs_occur = numpy.zeros(0) self.data['occurrence_rate'].append(ctx.occurrence_rate) self.data['probs_occur'].append(probs_occur) self.data['weight'].append(ctx.weight or numpy.nan) self.data['grp_id'].append(','.join(map(str, grp_ids)) + ',') for rup_param in self.cmaker.REQUIRES_RUPTURE_PARAMETERS: self.data[rup_param].append(getattr(ctx, rup_param)) for dst_param in params: # including clon, clat dst = numpy.ones(N) * 9999 dst[sites.sids] = getattr(ctx, dst_param) self.data[dst_param + '_'].append(dst) def dictarray(self): """ :returns: key -> array """ dic = {} for k, v in self.data.items(): dic[k] = numpy.array(v) return dic
def get_assets_by_taxo(assets, tempname=None): """ :param assets: an array of assets :param tempname: hdf5 file where the epsilons are (or None) :returns: assets_by_taxo with attributes eps and idxs """ assets_by_taxo = AccumDict(group_array(assets, 'taxonomy')) assets_by_taxo.assets = assets assets_by_taxo.idxs = numpy.argsort( numpy.concatenate([a['ordinal'] for a in assets_by_taxo.values()])) assets_by_taxo.eps = {} if tempname is None: # no epsilons return assets_by_taxo # otherwise read the epsilons and group them by taxonomy with hdf5.File(tempname, 'r') as h5: dset = h5['epsilon_matrix'] for taxo, assets in assets_by_taxo.items(): lst = [dset[aid] for aid in assets['ordinal']] assets_by_taxo.eps[taxo] = numpy.array(lst) return assets_by_taxo
def _make_src_indep(self): # srcs with the same source_id and grp_ids for srcs, sites in self.srcfilter.get_sources_sites(self.group): t0 = time.time() src_id = srcs[0].source_id grp_ids = numpy.array(srcs[0].grp_ids) self.numrups = 0 self.numsites = 0 ########################################################### # we can afford using a lot of memory to store the ruptures rups = self._get_rups(srcs, sites) # print_finite_size(rups) with self.ctx_mon: ctxs = list(self._gen_ctxs(rups, sites, grp_ids)) self._update_pmap(ctxs) ########################################################### if self.fewsites: # we can afford using a lot of memory to store the ruptures rups = self._get_rups(srcs, sites) # print_finite_size(rups) with self.ctx_mon: ctxs = list(self._gen_ctxs(rups, sites, grp_ids)) self._update_pmap(ctxs) else: # many sites: keep in memory less ruptures for src in srcs: for rup in self._get_rups([src], sites): with self.ctx_mon: ctxs = self.cmaker.make_ctxs([rup], rup.sites, grp_ids, filt=True) self.numrups += len(ctxs) self.numsites += sum(len(ctx[1]) for ctx in ctxs) self._update_pmap(ctxs) self.calc_times[src_id] += numpy.array( [self.numrups, self.numsites, time.time() - t0]) return AccumDict((grp_id, ~p if self.rup_indep else p) for grp_id, p in self.pmap.items())
def disaggregate(self, sitecol, ruptures, imldict, truncnorm, n_epsilons, disagg_pne=Monitor()): """ Disaggregate (separate) PoE of `imldict` in different contributions each coming from `n_epsilons` distribution bins. :param sitecol: a SiteCollection with a single site :param ruptures: an iterator over ruptures :param imldict: a dictionary poe, gsim, imt, rlzi -> iml :param truncnorm: an instance of scipy.stats.truncnorm :param n_epsilons: the number of bins :param disagg_pne: a monitor of the disaggregation time :returns: an AccumDict """ assert len(sitecol) == 1, sitecol sitemesh = sitecol.mesh epsilons = numpy.linspace(truncnorm.a, truncnorm.b, n_epsilons + 1) acc = AccumDict(accum=[]) for rupture in ruptures: try: sctx, rctx, dctx = self.make_contexts(sitecol, rupture) except FarAwayRupture: continue cache = {} # gsim, imt, iml -> pne # if imldict comes from iml_disagg, it has duplicated values # we are using a cache to avoid duplicating computation for (poe, gsim, imt, rlzi), iml in imldict.items(): try: pne = cache[gsim, imt, iml] except KeyError: with disagg_pne: pne = self._disaggregate_pne( gsim, rupture, sctx, rctx, dctx, imt, iml, truncnorm, epsilons) cache[gsim, imt, iml] = pne acc[poe, str(imt), iml, rlzi].append(pne) [rjb_dist] = dctx.rjb # 1 site => 1 distance [closest_point] = rupture.surface.get_closest_points(sitemesh) acc['mags'].append(rupture.mag) acc['dists'].append(rjb_dist) acc['lons'].append(closest_point.longitude) acc['lats'].append(closest_point.latitude) return acc
def get_args(self, grp_ids, hazard): """ :returns: a list of Starmap arguments """ oq = self.oqparam allargs = [] src_groups = self.csm.src_groups tot_weight = 0 for grp_id in grp_ids: rlzs_by_gsim = hazard.rlzs_by_gsim_list[grp_id] sg = src_groups[grp_id] for src in sg: src.ngsims = len(rlzs_by_gsim) tot_weight += src.weight if src.code == b'C' and src.num_ruptures > 20_000: msg = ('{} is suspiciously large, containing {:_d} ' 'ruptures with complex_fault_mesh_spacing={} km') spc = oq.complex_fault_mesh_spacing logging.info(msg.format(src, src.num_ruptures, spc)) assert tot_weight max_weight = max(tot_weight / self.ct, oq.min_weight) self.params['max_weight'] = max_weight logging.info('tot_weight={:_d}, max_weight={:_d}'.format( int(tot_weight), int(max_weight))) self.counts = AccumDict(accum=0) for grp_id in grp_ids: rlzs_by_gsim = hazard.rlzs_by_gsim_list[grp_id] sg = src_groups[grp_id] if sg.atomic: # do not split atomic groups self.counts[grp_id] += 1 allargs.append((sg, rlzs_by_gsim, self.params)) else: # regroup the sources in blocks blks = (groupby(sg, get_source_id).values() if oq.disagg_by_src else block_splitter(sg, max_weight, get_weight, sort=True)) blocks = list(blks) self.counts[grp_id] += len(blocks) for block in blocks: logging.debug('Sending %d source(s) with weight %d', len(block), sum(src.weight for src in block)) allargs.append((block, rlzs_by_gsim, self.params)) return allargs
def build_ruptures(sources, src_filter, param, monitor): """ :param sources: a list with a single UCERF source :param src_filter: a SourceFilter instance :param param: extra parameters :param monitor: a Monitor instance :returns: an AccumDict grp_id -> EBRuptures """ [src] = sources res = AccumDict() res.calc_times = [] sampl_mon = monitor('sampling ruptures', measuremem=True) filt_mon = monitor('filtering ruptures', measuremem=False) res.trt = DEFAULT_TRT background_sids = src.get_background_sids(src_filter) sitecol = src_filter.sitecol cmaker = ContextMaker(param['gsims'], src_filter.integration_distance) num_ses = param['ses_per_logic_tree_path'] samples = getattr(src, 'samples', 1) n_occ = AccumDict(accum=0) t0 = time.time() with sampl_mon: for sam_idx in range(samples): for ses_idx, ses_seed in param['ses_seeds']: seed = sam_idx * TWO16 + ses_seed rups, occs = generate_event_set(src, background_sids, src_filter, ses_idx, seed) for rup, occ in zip(rups, occs): n_occ[rup] += occ tot_occ = sum(n_occ.values()) dic = {'eff_ruptures': {src.src_group_id: src.num_ruptures}} with filt_mon: eb_ruptures = stochastic.build_eb_ruptures(src, num_ses, cmaker, sitecol, n_occ.items()) dic['rup_array'] = (stochastic.get_rup_array(eb_ruptures) if eb_ruptures else ()) dt = time.time() - t0 dic['calc_times'] = {src.id: numpy.array([tot_occ, len(sitecol), dt], F32)} return dic
def export_ruptures_xml(ekey, dstore): """ :param ekey: export key, i.e. a pair (datastore key, fmt) :param dstore: datastore object """ fmt = ekey[-1] oq = dstore['oqparam'] events = group_array(dstore['events'][()], 'rup_id') ruptures_by_grp = AccumDict(accum=[]) for rgetter in gen_rupture_getters(dstore): ebrs = [] for proxy in rgetter.get_proxies(): events_by_ses = group_array(events[proxy['id']], 'ses_id') ebr = proxy.to_ebr(rgetter.trt) ebrs.append(ebr.export(events_by_ses)) ruptures_by_grp[rgetter.et_id].extend(ebrs) dest = dstore.export_path('ses.' + fmt) writer = hazard_writers.SESXMLWriter(dest) writer.serialize(ruptures_by_grp, oq.investigation_time) return [dest]
def __init__(self, task_func, task_args, name=None, distribute=None): self.__class__.init() # if not already self.task_func = task_func self.name = name or task_func.__name__ self.task_args = task_args if self.name.startswith('_'): # secret task self.progress = lambda *args: None else: self.progress = logging.info self.distribute = distribute or oq_distribute(task_func) self.sent = AccumDict() # a task can be a function, a class or an instance with a __call__ if inspect.isfunction(task_func): self.argnames = inspect.getargspec(task_func).args elif inspect.isclass(task_func): self.argnames = inspect.getargspec(task_func.__init__).args[1:] else: # instance with a __call__ method self.argnames = inspect.getargspec(task_func.__call__).args[1:] self.receiver = 'tcp://%s:%s' % (config.dbserver.host, config.zworkers.receiver_ports)
def export_dmg_xml(key, dstore, damage_states, dmg_data, lt, rlz): """ Export damage outputs in XML format. :param key: dmg_dist_per_asset|dmg_dist_per_taxonomy|dmg_dist_total|collapse_map :param dstore: the datastore :param damage_states: the list of damage states :param dmg_data: a list [(loss_type, unit, asset_ref, mean, stddev), ...] :param lt: loss type string :param rlz: a realization object """ dest = dstore.build_fname('%s-%s' % (key[0], lt), rlz, key[1]) risk_writers.DamageWriter(damage_states).to_nrml(key[0], dmg_data, dest) return AccumDict({key: [dest]})
def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the sources according to their weight and tectonic region type. """ oq = self.oqparam if oq.hazard_calculation_id and not oq.compare_with_classical: parent = datastore.read(self.oqparam.hazard_calculation_id) self.csm_info = parent['csm_info'] parent.close() self.calc_stats(parent) # post-processing return {} with self.monitor('managing sources', autoflush=True): smap = parallel.Starmap( self.core_task.__func__, monitor=self.monitor()) source_ids = [] data = [] for i, sources in enumerate(self._send_sources(smap)): source_ids.append(get_src_ids(sources)) for src in sources: # collect source data data.append((i, src.nsites, src.num_ruptures, src.weight)) if source_ids: self.datastore['task_sources'] = encode(source_ids) self.datastore.extend( 'source_data', numpy.array(data, source_data_dt)) self.calc_times = AccumDict(accum=numpy.zeros(2, F32)) try: acc = smap.reduce(self.agg_dicts, self.acc0()) self.store_rlz_info(acc.eff_ruptures) finally: with self.monitor('store source_info', autoflush=True): self.store_source_info(self.calc_times) if acc.nsites: src_ids = sorted(acc.nsites) nsites = [acc.nsites[i] for i in src_ids] self.datastore['source_info'][src_ids, 'num_sites'] = nsites if not self.calc_times: raise RuntimeError('All sources were filtered away!') self.calc_times.clear() # save a bit of memory return acc
class RupData(object): """ A class to collect rupture information into an array """ def __init__(self, cmaker): self.cmaker = cmaker self.data = AccumDict(accum=[]) def from_srcs(self, srcs, sites): # used in disagg.disaggregation """ :returns: param -> array """ for src in srcs: for rup in src.iter_ruptures(): self.cmaker.add_rup_params(rup) self.add(rup, src.id, sites) return {k: numpy.array(v) for k, v in self.data.items()} def add(self, rup, src_id, sctx, dctx=None): rate = rup.occurrence_rate if numpy.isnan(rate): # for nonparametric ruptures probs_occur = rup.probs_occur else: probs_occur = numpy.zeros(0, numpy.float64) self.data['srcidx'].append(src_id or 0) self.data['occurrence_rate'].append(rate) self.data['weight'].append(rup.weight or numpy.nan) self.data['probs_occur'].append(probs_occur) for rup_param in self.cmaker.REQUIRES_RUPTURE_PARAMETERS: self.data[rup_param].append(getattr(rup, rup_param)) self.data['sid_'].append(numpy.int16(sctx.sids)) for dst_param in self.cmaker.REQUIRES_DISTANCES: if dctx is None: # compute the distances dists = get_distances(rup, sctx, dst_param) else: # reuse already computed distances dists = getattr(dctx, dst_param) self.data[dst_param + '_'].append(F32(dists)) closest = rup.surface.get_closest_points(sctx) self.data['lon_'].append(F32(closest.lons)) self.data['lat_'].append(F32(closest.lats))
def reduce(self, agg=operator.add, acc=None, posthook=None): """ Loop on a set of results and update the accumulator by using the aggregation function. :param agg: the aggregation function, (acc, val) -> new acc :param acc: the initial value of the accumulator :returns: the final value of the accumulator """ if acc is None: acc = AccumDict() num_tasks = len(self.results) if num_tasks == 0: logging.warn('No tasks were submitted') return acc log_percent = log_percent_gen(self.name, num_tasks, self.progress) next(log_percent) def agg_and_percent(acc, triple): (val, exc, mon) = triple if exc: raise RuntimeError(val) res = agg(acc, val) next(log_percent) mon.flush() return res if self.no_distribute: agg_result = reduce(agg_and_percent, self.results, acc) else: self.progress('Sent %s of data in %d task(s)', humansize(sum(self.sent.values())), num_tasks) agg_result = self.aggregate_result_set(agg_and_percent, acc) self.progress('Received %s of data, maximum per task %s', humansize(sum(self.received)), humansize(max(self.received))) if posthook: posthook(self) self.results = [] return agg_result
def classical(sources, sitecol, siteidx, rlzs_assoc, monitor): """ :param sources: a non-empty sequence of sources of homogeneous tectonic region type :param sitecol: a SiteCollection instance :param siteidx: index of the first site (0 if there is a single tile) :param rlzs_assoc: a RlzsAssoc instance :param monitor: a monitor instance :returns: an AccumDict rlz -> curves """ truncation_level = monitor.oqparam.truncation_level imtls = monitor.oqparam.imtls trt_model_id = sources[0].trt_model_id # sanity check: the trt_model must be the same for all sources for src in sources[1:]: assert src.trt_model_id == trt_model_id gsims = rlzs_assoc.gsims_by_trt_id[trt_model_id] trt = sources[0].tectonic_region_type try: max_dist = monitor.oqparam.maximum_distance[trt] except KeyError: max_dist = monitor.oqparam.maximum_distance['default'] dic = AccumDict() dic.siteslice = slice(siteidx, siteidx + len(sitecol)) if monitor.oqparam.poes_disagg: sm_id = rlzs_assoc.get_sm_id(trt_model_id) dic.bbs = [BoundingBox(sm_id, sid) for sid in sitecol.sids] else: dic.bbs = [] # NB: the source_site_filter below is ESSENTIAL for performance inside # hazard_curves_per_trt, since it reduces the full site collection # to a filtered one *before* doing the rupture filtering curves_by_gsim = hazard_curves_per_trt( sources, sitecol, imtls, gsims, truncation_level, source_site_filter=source_site_distance_filter(max_dist), maximum_distance=max_dist, bbs=dic.bbs, monitor=monitor) dic.calc_times = monitor.calc_times # added by hazard_curves_per_trt dic.eff_ruptures = {trt_model_id: monitor.eff_ruptures} # idem for gsim, curves in zip(gsims, curves_by_gsim): dic[trt_model_id, str(gsim)] = curves return dic