def post_execute(self, result): """ :param result: a dictionary (trt_model_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return if oq.hazard_curves_from_gmfs: ClassicalCalculator.post_execute.__func__(self, result) if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # use a different datastore self.cl = ClassicalCalculator(oq, self.monitor) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(hazard_calculation_id=self.datastore.calc_id) for imt in self.mean_curves.dtype.fields: rdiff, index = max_rel_diff_index( self.cl.mean_curves[imt], self.mean_curves[imt]) logging.warn('Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
def post_execute(self, result): """ :param result: a dictionary (src_group_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return elif oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations ClassicalCalculator.post_execute( self, ((rlzs[i], result[i]) for i in result)) if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq, self.monitor) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run() for imt in self.mean_curves.dtype.fields: rdiff, index = max_rel_diff_index( self.cl.mean_curves[imt], self.mean_curves[imt]) logging.warn('Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
def post_execute(self, result): """ :param result: a dictionary (trt_model_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return if oq.hazard_curves_from_gmfs: ClassicalCalculator.post_execute.__func__(self, result) if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # use a different datastore self.cl = ClassicalCalculator(oq, self.monitor) self.cl.datastore.parent = self.datastore result = self.cl.run(pre_execute=False, clean_up=False) for imt in self.mean_curves.dtype.fields: rdiff, index = max_rel_diff_index(self.cl.mean_curves[imt], self.mean_curves[imt]) logging.warn( 'Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
def post_execute(self, result): """ :param result: a dictionary (trt_model_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return elif oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations dic = {} for rlzi in result: dic[rlzs[rlzi]] = array_of_curves(result[rlzi], len(self.sitecol), oq.imtls) self.save_curves(dic) if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq, self.monitor) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(hazard_calculation_id=self.datastore.calc_id) for imt in self.mean_curves.dtype.fields: rdiff, index = max_rel_diff_index(self.cl.mean_curves[imt], self.mean_curves[imt]) logging.warn( 'Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
def post_execute(self, result): """ :param result: a dictionary (src_group_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return elif oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations # save individual curves for i in sorted(result): key = 'hcurves/rlz-%03d' % i if result[i]: self.datastore[key] = result[i] else: self.datastore[key] = ProbabilityMap(oq.imtls.array.size) logging.info('Zero curves for %s', key) # compute and save statistics; this is done in process # we don't need to parallelize, since event based calculations # involves a "small" number of sites (<= 65,536) weights = [rlz.weight for rlz in rlzs] hstats = self.oqparam.hazard_stats() if len(hstats) and len(rlzs) > 1: for kind, stat in hstats: pmap = compute_pmap_stats(result.values(), [stat], weights) self.datastore['hcurves/' + kind] = pmap if self.datastore.parent: self.datastore.parent.open() if 'gmf_data' in self.datastore: self.save_gmf_bytes() if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq, self.monitor('classical')) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) for imt in eb_mean_curves.dtype.names: rdiff, index = util.max_rel_diff_index(cl_mean_curves[imt], eb_mean_curves[imt]) logging.warn( 'Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
def post_execute(self, result): """ :param result: a dictionary (trt_model_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return if oq.hazard_curves_from_gmfs: ClassicalCalculator.post_execute.__func__(self, result) if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # use a different datastore self.cl = ClassicalCalculator(oq, self.monitor) self.cl.datastore.parent = self.datastore result = self.cl.run(pre_execute=False, clean_up=False) for imt in self.mean_curves.dtype.fields: rdiff, index = max_rel_diff_index( self.cl.mean_curves[imt], self.mean_curves[imt]) logging.warn('Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
def post_execute(self, result): """ :param result: a dictionary (src_group_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return elif oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations # save individual curves if self.oqparam.individual_curves: for i in sorted(result): key = 'hcurves/rlz-%03d' % i if result[i]: self.datastore[key] = result[i] else: logging.info('Zero curves for %s', key) # compute and save statistics; this is done in process # we don't need to parallelize, since event based calculations # involves a "small" number of sites (<= 65,536) weights = (None if self.oqparam.number_of_logic_tree_samples else [rlz.weight for rlz in rlzs]) pstats = PmapStats(self.oqparam.quantile_hazard_curves, weights) for kind, stat in pstats.compute( self.sitecol.sids, list(result.values())): if kind == 'mean' and not self.oqparam.mean_hazard_curves: continue self.datastore['hcurves/' + kind] = stat if ('gmf_data' in self.datastore and 'nbytes' not in self.datastore['gmf_data'].attrs): self.datastore.set_nbytes('gmf_data') for sm_id in self.datastore['gmf_data']: for rlzno in self.datastore['gmf_data/' + sm_id]: self.datastore.set_nbytes( 'gmf_data/%s/%s' % (sm_id, rlzno)) if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq, self.monitor) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) for imt in eb_mean_curves.dtype.names: rdiff, index = util.max_rel_diff_index( cl_mean_curves[imt], eb_mean_curves[imt]) logging.warn('Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
def post_execute(self, result): oq = self.oqparam if not oq.ground_motion_fields: return N = len(self.sitecol.complete) L = len(oq.imtls.array) if result and oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] # NB: in the future we may want to save to individual hazard # curves if oq.individual_curves is set; for the moment we # save the statistical curves only hstats = oq.hazard_stats() pmaps = list(result.values()) if len(hstats): logging.info('Computing statistical hazard curves') if len(weights) != len(pmaps): # this should never happen, unless I break the # logic tree reduction mechanism during refactoring raise AssertionError('Expected %d pmaps, got %d' % (len(weights), len(pmaps))) for statname, stat in hstats: pmap = compute_pmap_stats(pmaps, [stat], weights, oq.imtls) arr = numpy.zeros((N, L), F32) for sid in pmap: arr[sid] = pmap[sid].array[:, 0] self.datastore['hcurves/' + statname] = arr if oq.poes: P = len(oq.poes) M = len(oq.imtls) self.datastore.create_dset( 'hmaps/' + statname, F32, (N, M, P)) self.datastore.set_attrs( 'hmaps/' + statname, nbytes=N * P * M * 4) hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) ds = self.datastore['hmaps/' + statname] for sid in hmap: ds[sid] = hmap[sid].array if self.datastore.parent: self.datastore.parent.open('r') if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir job_id = logs.init('job') self.cl = ClassicalCalculator(oq, job_id) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) rdiff, index = util.max_rel_diff_index( cl_mean_curves, eb_mean_curves) logging.warning('Relative difference with the classical ' 'mean curves: %d%% at site index %d', rdiff * 100, index)
class EventBasedCalculator(ClassicalCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ pre_calculator = 'event_based_rupture' core_task = compute_gmfs_and_curves is_stochastic = True def pre_execute(self): """ Read the precomputed ruptures (or compute them on the fly) and prepare some empty files in the export directory to store the gmfs (if any). If there were pre-existing files, they will be erased. """ super(EventBasedCalculator, self).pre_execute() self.sesruptures = [] for serial in self.datastore['sescollection']: self.sesruptures.append(self.datastore['sescollection/' + serial]) self.sesruptures.sort(key=operator.attrgetter('serial')) def combine_curves_and_save_gmfs(self, acc, res): """ Combine the hazard curves (if any) and save the gmfs (if any) sequentially; notice that the gmfs may come from different tasks in any order. :param acc: an accumulator for the hazard curves :param res: a dictionary trt_id, gsim -> gmf_array or curves_by_imt :returns: a new accumulator """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') save_gmfs = self.oqparam.ground_motion_fields for trt_id in res: if isinstance(trt_id, int) and save_gmfs: with sav_mon: gmfa_sids_etags = res[trt_id] for serial in sorted(gmfa_sids_etags): gst = gmfa_sids_etags[serial] self.datastore['gmf_data/%s' % serial] = gst.gmfa self.datastore['sid_data/%s' % serial] = gst.sids self.datastore.set_attrs('gmf_data/%s' % serial, trt_id=trt_id, etags=gst.etags) self.datastore.hdf5.flush() elif isinstance(trt_id, tuple): # aggregate hcurves with agg_mon: self.agg_dicts(acc, {trt_id: res[trt_id]}) sav_mon.flush() agg_mon.flush() return acc def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return monitor = self.monitor(self.core_task.__name__) monitor.oqparam = oq zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls) zerodict = AccumDict((key, zc) for key in self.rlzs_assoc) curves_by_trt_gsim = parallel.apply_reduce( self.core_task.__func__, (self.sesruptures, self.sitecol, self.rlzs_assoc, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, acc=zerodict, agg=self.combine_curves_and_save_gmfs, key=operator.attrgetter('trt_id'), weight=operator.attrgetter('multiplicity')) if oq.ground_motion_fields: self.datastore.set_nbytes('gmf_data') self.datastore.set_nbytes('sid_data') return curves_by_trt_gsim def post_execute(self, result): """ :param result: a dictionary (trt_model_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return if oq.hazard_curves_from_gmfs: ClassicalCalculator.post_execute.__func__(self, result) if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # use a different datastore self.cl = ClassicalCalculator(oq, self.monitor) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(hazard_calculation_id=self.datastore.calc_id) for imt in self.mean_curves.dtype.fields: rdiff, index = max_rel_diff_index( self.cl.mean_curves[imt], self.mean_curves[imt]) logging.warn('Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
class EventBasedCalculator(ClassicalCalculator): """ Event based PSHA calculator generating the ruptures only """ pre_calculator = 'event_based_rupture' core_func = compute_gmfs_and_curves is_stochastic = True def pre_execute(self): """ Read the precomputed ruptures (or compute them on the fly) and prepare some empty files in the export directory to store the gmfs (if any). If there were pre-existing files, they will be erased. """ super(EventBasedCalculator, self).pre_execute() self.sesruptures = [] gsims_by_col = self.rlzs_assoc.get_gsims_by_col() self.datasets = {} for col_id, sescol in enumerate(self.datastore['sescollection']): gmf_dt = gsim_imt_dt(gsims_by_col[col_id], self.oqparam.imtls) for tag, sesrup in sorted(sescol.items()): sesrup = sescol[tag] self.sesruptures.append(sesrup) if self.oqparam.ground_motion_fields and sescol: self.datasets[col_id] = self.datastore.create_dset( 'gmfs/col%02d' % col_id, gmf_dt) def combine_curves_and_save_gmfs(self, acc, res): """ Combine the hazard curves (if any) and save the gmfs (if any) sequentially; notice that the gmfs may come from different tasks in any order. :param acc: an accumulator for the hazard curves :param res: a dictionary trt_id, gsim -> gmf_array or curves_by_imt :returns: a new accumulator """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') save_gmfs = self.oqparam.ground_motion_fields for trt_id, gsim_or_col in res: if isinstance(gsim_or_col, int) and save_gmfs: with sav_mon: gmfa = res[trt_id, gsim_or_col] dataset = self.datasets[gsim_or_col] dataset.attrs['trt_model_id'] = trt_id dataset.extend(gmfa) self.nbytes += gmfa.nbytes self.datastore.hdf5.flush() elif isinstance(gsim_or_col, str): # aggregate hcurves with agg_mon: curves_by_imt = res[trt_id, gsim_or_col] acc = agg_dicts( acc, AccumDict({(trt_id, gsim_or_col): curves_by_imt})) sav_mon.flush() agg_mon.flush() return acc def execute(self): """ Run in parallel `core_func(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return monitor = self.monitor(self.core_func.__name__) monitor.oqparam = oq zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls) zerodict = AccumDict((key, zc) for key in self.rlzs_assoc) self.nbytes = 0 curves_by_trt_gsim = parallel.apply_reduce( self.core_func.__func__, (self.sesruptures, self.sitecol, self.rlzs_assoc, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, acc=zerodict, agg=self.combine_curves_and_save_gmfs, key=operator.attrgetter('col_id')) if oq.ground_motion_fields: # sanity check on the saved gmfs size expected_nbytes = self.datastore['counts_per_rlz'].attrs[ 'gmfs_nbytes'] self.datastore['gmfs'].attrs['nbytes'] = self.nbytes assert self.nbytes == expected_nbytes, (self.nbytes, expected_nbytes) return curves_by_trt_gsim def post_execute(self, result): """ :param result: a dictionary (trt_model_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return if oq.hazard_curves_from_gmfs: ClassicalCalculator.post_execute.__func__(self, result) if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # use a different datastore self.cl = ClassicalCalculator(oq, self.monitor) self.cl.datastore.parent = self.datastore result = self.cl.run(pre_execute=False, clean_up=False) for imt in self.mean_curves.dtype.fields: rdiff, index = max_rel_diff_index(self.cl.mean_curves[imt], self.mean_curves[imt]) logging.warn( 'Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
class EventBasedCalculator(base.HazardCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ core_task = compute_hazard is_stochastic = True def gen_args(self, monitor): """ :yields: the arguments for compute_gmfs_and_curves """ oq = self.oqparam param = dict(oqparam=oq, min_iml=self.get_min_iml(oq), truncation_level=oq.truncation_level, imtls=oq.imtls, filter_distance=oq.filter_distance, seed=oq.ses_seed, maximum_distance=oq.maximum_distance, ses_per_logic_tree_path=oq.ses_per_logic_tree_path) concurrent_tasks = oq.concurrent_tasks if oq.hazard_calculation_id: U = len(self.datastore.parent['ruptures']) logging.info('Found %d ruptures', U) parent = self.can_read_parent() or self.datastore samples_by_grp = self.csm_info.get_samples_by_grp() for slc in split_in_slices(U, concurrent_tasks or 1): for grp_id in self.rlzs_by_gsim_grp: rlzs_by_gsim = self.rlzs_by_gsim_grp[grp_id] ruptures = RuptureGetter(parent, slc, grp_id) param['samples'] = samples_by_grp[grp_id] yield ruptures, self.sitecol, rlzs_by_gsim, param, monitor return maxweight = self.csm.get_maxweight(weight, concurrent_tasks or 1) logging.info('Using maxweight=%d', maxweight) num_tasks = 0 num_sources = 0 for sm in self.csm.source_models: param['samples'] = sm.samples for sg in sm.src_groups: rlzs_by_gsim = self.rlzs_by_gsim_grp[sg.id] self.csm.add_infos(sg.sources) if sg.src_interdep == 'mutex': # do not split yield sg, self.src_filter, rlzs_by_gsim, param, monitor num_tasks += 1 num_sources += len(sg.sources) continue for block in block_splitter(sg.sources, maxweight, weight): yield block, self.src_filter, rlzs_by_gsim, param, monitor num_tasks += 1 num_sources += len(block) logging.info('Sent %d sources in %d tasks', num_sources, num_tasks) def zerodict(self): """ Initial accumulator, a dictionary (grp_id, gsim) -> curves """ if self.oqparam.hazard_calculation_id is None: # filter_csm must be called first self.src_filter, self.csm = self.filter_csm() self.csm_info = self.csm.info else: self.datastore.parent = datastore.read( self.oqparam.hazard_calculation_id) self.csm_info = self.datastore.parent['csm_info'] self.rlzs_by_gsim_grp = self.csm_info.get_rlzs_by_gsim_grp() self.L = len(self.oqparam.imtls.array) self.R = self.csm_info.get_num_rlzs() zd = AccumDict({r: ProbabilityMap(self.L) for r in range(self.R)}) zd.eff_ruptures = AccumDict() self.grp_trt = self.csm_info.grp_by("trt") return zd def agg_dicts(self, acc, result): """ :param acc: accumulator dictionary :param result: an AccumDict with events, ruptures, gmfs and hcurves """ oq = self.oqparam if oq.save_ruptures and not oq.ground_motion_fields: self.gmf_size += max_gmf_size( result['ruptures'], self.csm_info.rlzs_assoc.get_rlzs_by_gsim, self.csm_info.get_samples_by_grp(), len(self.oqparam.imtls)) if hasattr(result, 'calc_times'): for srcid, nsites, eids, dt in result.calc_times: info = self.csm.infos[srcid] info.num_sites += nsites info.calc_time += dt info.num_split += 1 info.events += len(eids) if hasattr(result, 'eff_ruptures'): acc.eff_ruptures += result.eff_ruptures if hasattr(result, 'events'): self.datastore.extend('events', result.events) self.save_ruptures(result['ruptures']) sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') if 'gmdata' in result: self.gmdata += result['gmdata'] data = result['gmfdata'] with sav_mon: self.datastore.extend('gmf_data/data', data) # it is important to save the number of bytes while the # computation is going, to see the progress update_nbytes(self.datastore, 'gmf_data/data', data) for sid, start, stop in result['indices']: self.indices[sid, 0].append(start + self.offset) self.indices[sid, 1].append(stop + self.offset) self.offset += len(data) if self.offset >= TWO32: raise RuntimeError( 'The gmf_data table has more than %d rows' % TWO32) imtls = self.oqparam.imtls with agg_mon: for key, poes in result.get('hcurves', {}).items(): r, sid, imt = str2rsi(key) array = acc[r].setdefault(sid, 0).array[imtls(imt), 0] array[:] = 1. - (1. - array) * (1. - poes) sav_mon.flush() agg_mon.flush() self.datastore.flush() return acc def save_ruptures(self, ruptures_by_grp_id): """ Extend the 'events' dataset with the events from the given ruptures; also, save the ruptures if the flag `save_ruptures` is on. :param ruptures_by_grp_id: a dictionary grp_id -> list of EBRuptures """ with self.monitor('saving ruptures', autoflush=True): for grp_id, ebrs in ruptures_by_grp_id.items(): if len(ebrs): events = get_events(ebrs) dset = self.datastore.extend('events', events) if self.oqparam.save_ruptures: self.rupser.save(ebrs, eidx=len(dset) - len(events)) def check_overflow(self): """ Raise a ValueError if the number of sites is larger than 65,536 or the number of IMTs is larger than 256 or the number of ruptures is larger than 4,294,967,296. The limits are due to the numpy dtype used to store the GMFs (gmv_dt). They could be relaxed in the future. """ max_ = dict(sites=2**16, events=2**32, imts=2**8) try: events = len(self.datastore['events']) except KeyError: events = 0 num_ = dict(sites=len(self.sitecol), events=events, imts=len(self.oqparam.imtls)) for var in max_: if num_[var] > max_[var]: raise ValueError('The event based calculator is restricted to ' '%d %s, got %d' % (max_[var], var, num_[var])) def execute(self): if self.oqparam.hazard_calculation_id: def saving_sources_by_task(allargs, dstore): return allargs else: from openquake.calculators.classical import saving_sources_by_task self.gmdata = {} self.offset = 0 self.gmf_size = 0 self.indices = collections.defaultdict(list) # sid, idx -> indices acc = self.zerodict() with self.monitor('managing sources', autoflush=True): allargs = self.gen_args(self.monitor('classical')) iterargs = saving_sources_by_task(allargs, self.datastore) if isinstance(allargs, list): # there is a trick here: if the arguments are known # (a list, not an iterator), keep them as a list # then the Starmap will understand the case of a single # argument tuple and it will run in core the task iterargs = list(iterargs) if self.oqparam.ground_motion_fields is False: logging.info('Generating ruptures only') ires = parallel.Starmap(self.core_task.__func__, iterargs).submit_all() acc = ires.reduce(self.agg_dicts, acc) if self.oqparam.hazard_calculation_id is None: with self.monitor('store source_info', autoflush=True): self.store_source_info(self.csm.infos, acc) self.check_overflow() # check the number of events base.save_gmdata(self, self.R) if self.indices: N = len(self.sitecol.complete) logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True, autoflush=True): dset = self.datastore.create_dset('gmf_data/indices', hdf5.vuint32, shape=(N, 2), fillvalue=None) for sid in self.sitecol.complete.sids: dset[sid, 0] = self.indices[sid, 0] dset[sid, 1] = self.indices[sid, 1] elif (self.oqparam.ground_motion_fields and 'ucerf' not in self.oqparam.calculation_mode): raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') return acc def save_gmf_bytes(self): """Save the attribute nbytes in the gmf_data datasets""" ds = self.datastore for sm_id in ds['gmf_data']: ds.set_nbytes('gmf_data/' + sm_id) ds.set_nbytes('gmf_data') def init(self): """ Set the random seed passed to the SourceManager and the minimum_intensity dictionary. """ self.rupser = calc.RuptureSerializer(self.datastore) def post_execute(self, result): """ Save the SES collection """ oq = self.oqparam N = len(self.sitecol.complete) L = len(oq.imtls.array) if oq.hazard_calculation_id is None: self.rupser.close() num_events = sum(set_counts(self.datastore, 'events').values()) if num_events == 0: raise RuntimeError( 'No seismic events! Perhaps the investigation time is too ' 'small or the maximum_distance is too small') if oq.save_ruptures: logging.info('Setting %d event years on %d ruptures', num_events, self.rupser.nruptures) with self.monitor('setting event years', measuremem=True, autoflush=True): numpy.random.seed(self.oqparam.ses_seed) set_random_years(self.datastore, 'events', int(self.oqparam.investigation_time)) if self.gmf_size: self.datastore.set_attrs('events', max_gmf_size=self.gmf_size) msg = 'less than ' if self.get_min_iml(self.oqparam).sum() else '' logging.info('Generating %s%s of GMFs', msg, humansize(self.gmf_size)) if oq.hazard_curves_from_gmfs: rlzs = self.csm_info.rlzs_assoc.realizations # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] hstats = self.oqparam.hazard_stats() if len(hstats): logging.info('Computing statistical hazard curves') for kind, stat in hstats: pmap = compute_pmap_stats(result.values(), [stat], weights) arr = numpy.zeros((N, L), F32) for sid in pmap: arr[sid] = pmap[sid].array[:, 0] self.datastore['hcurves/' + kind] = arr self.save_hmaps() if self.datastore.parent: self.datastore.parent.open('r') if 'gmf_data' in self.datastore: self.save_gmf_bytes() if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) rdiff, index = util.max_rel_diff_index(cl_mean_curves, eb_mean_curves) logging.warn( 'Relative difference with the classical ' 'mean curves: %d%% at site index %d', rdiff * 100, index)
def post_execute(self, result): oq = self.oqparam if not oq.ground_motion_fields and not oq.hazard_curves_from_gmfs: return N = len(self.sitecol.complete) M = len(oq.imtls) L = len(oq.imtls.array) L1 = L // M if result and oq.hazard_curves_from_gmfs: rlzs = self.datastore['full_lt'].get_realizations() # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] # NB: in the future we may want to save to individual hazard # curves if oq.individual_curves is set; for the moment we # save the statistical curves only hstats = oq.hazard_stats() S = len(hstats) pmaps = list(result.values()) R = len(weights) if len(pmaps) != R: # this should never happen, unless I break the # logic tree reduction mechanism during refactoring raise AssertionError('Expected %d pmaps, got %d' % (len(weights), len(pmaps))) if oq.individual_curves: logging.info('Saving individual hazard curves') self.datastore.create_dset('hcurves-rlzs', F32, (N, R, M, L1)) self.datastore.set_shape_attrs('hcurves-rlzs', site_id=N, rlz_id=R, imt=list(oq.imtls), lvl=numpy.arange(L1)) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset('hmaps-rlzs', F32, (N, R, M, P)) self.datastore.set_shape_attrs('hmaps-rlzs', site_id=N, rlz_id=R, imt=list(oq.imtls), poe=oq.poes) for r, pmap in enumerate(pmaps): arr = numpy.zeros((N, M, L1), F32) for sid in pmap: arr[sid] = pmap[sid].array.reshape(M, L1) self.datastore['hcurves-rlzs'][:, r] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, r] = hmap[sid].array if S: logging.info('Computing statistical hazard curves') self.datastore.create_dset('hcurves-stats', F32, (N, S, M, L1)) self.datastore.set_shape_attrs('hcurves-stats', site_id=N, stat=list(hstats), imt=list(oq.imtls), lvl=numpy.arange(L1)) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset('hmaps-stats', F32, (N, S, M, P)) self.datastore.set_shape_attrs('hmaps-stats', site_id=N, stat=list(hstats), imt=list(oq.imtls), poes=oq.poes) for s, stat in enumerate(hstats): pmap = compute_pmap_stats(pmaps, [hstats[stat]], weights, oq.imtls) arr = numpy.zeros((N, M, L1), F32) for sid in pmap: arr[sid] = pmap[sid].array.reshape(M, L1) self.datastore['hcurves-stats'][:, s] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, s] = hmap[sid].array if self.datastore.parent: self.datastore.parent.open('r') if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir job_id = logs.init('job') oq.calculation_mode = 'classical' self.cl = ClassicalCalculator(oq, job_id) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run() engine.expose_outputs(self.datastore) for imt in oq.imtls: cl_mean_curves = get_mean_curves(self.datastore, imt) eb_mean_curves = get_mean_curves(self.datastore, imt) self.rdiff, index = util.max_rel_diff_index( cl_mean_curves, eb_mean_curves) logging.warning( 'Relative difference with the classical ' 'mean curves: %d%% at site index %d, imt=%s', self.rdiff * 100, index, imt)
class EventBasedCalculator(base.HazardCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ core_task = compute_gmfs is_stochastic = True @cached_property def csm_info(self): """ :returns: a cached CompositionInfo object """ try: return self.csm.info except AttributeError: return self.datastore.parent['csm_info'] def init(self): if hasattr(self, 'csm'): self.check_floating_spinning() self.rupser = calc.RuptureSerializer(self.datastore) self.rlzs_by_gsim_grp = self.csm_info.get_rlzs_by_gsim_grp() self.samples_by_grp = self.csm_info.get_samples_by_grp() def from_ruptures(self, param, monitor): """ :yields: the arguments for compute_gmfs_and_curves """ oq = self.oqparam concurrent_tasks = oq.concurrent_tasks U = len(self.datastore.parent['ruptures']) logging.info('Found %d ruptures', U) parent = self.can_read_parent() or self.datastore for slc in split_in_slices(U, concurrent_tasks or 1): for grp_id in self.rlzs_by_gsim_grp: rlzs_by_gsim = self.rlzs_by_gsim_grp[grp_id] ruptures = RuptureGetter(parent, slc, grp_id) par = param.copy() par['samples'] = self.samples_by_grp[grp_id] yield ruptures, self.sitecol, rlzs_by_gsim, par, monitor def zerodict(self): """ Initial accumulator, a dictionary (grp_id, gsim) -> curves """ self.R = self.csm_info.get_num_rlzs() self.L = len(self.oqparam.imtls.array) zd = AccumDict({r: ProbabilityMap(self.L) for r in range(self.R)}) zd.eff_ruptures = AccumDict() self.grp_trt = self.csm_info.grp_by("trt") return zd def _store_ruptures(self, ires): gmf_size = 0 calc_times = AccumDict(accum=numpy.zeros(3, F32)) for srcs in ires: for src in srcs: # save the events always; save the ruptures # if oq.save_ruptures is true self.save_ruptures(src.eb_ruptures) gmf_size += max_gmf_size( {src.src_group_id: src.eb_ruptures}, self.rlzs_by_gsim_grp, self.samples_by_grp, len(self.oqparam.imtls)) calc_times += src.calc_times del src.calc_times yield from src.eb_ruptures del src.eb_ruptures self.rupser.close() if gmf_size: self.datastore.set_attrs('events', max_gmf_size=gmf_size) msg = 'less than ' if self.get_min_iml(self.oqparam).sum() else '' logging.info('Estimating %s%s of GMFs', msg, humansize(gmf_size)) with self.monitor('store source_info', autoflush=True): self.store_source_info(calc_times) eff_ruptures = { grp.id: sum(src.num_ruptures for src in grp) for grp in self.csm.src_groups} self.store_csm_info(eff_ruptures) def from_sources(self, par, monitor): """ Prefilter the composite source model and store the source_info """ self.R = self.csm.info.get_num_rlzs() num_rlzs = {grp_id: sum( len(rlzs) for rlzs in self.rlzs_by_gsim_grp[grp_id].values()) for grp_id in self.rlzs_by_gsim_grp} param = {'ruptures_per_block': RUPTURES_PER_BLOCK} param['filter_distance'] = self.oqparam.filter_distance param['ses_per_logic_tree_path'] = self.oqparam.ses_per_logic_tree_path param['gsims_by_trt'] = self.csm.gsim_lt.values param['pointsource_distance'] = self.oqparam.pointsource_distance logging.info('Building ruptures') ires = parallel.Starmap.apply( build_ruptures, (self.csm.get_sources(), self.src_filter, param, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, weight=operator.attrgetter('num_ruptures'), key=operator.attrgetter('src_group_id')) def weight(ebr): return numpy.sqrt(num_rlzs[ebr.grp_id] * ebr.multiplicity * len(ebr.sids)) for ruptures in block_splitter(self._store_ruptures(ires), BLOCKSIZE, weight, operator.attrgetter('grp_id')): ebr = ruptures[0] rlzs_by_gsim = self.rlzs_by_gsim_grp[ebr.grp_id] par = par.copy() par['samples'] = self.samples_by_grp[ebr.grp_id] yield ruptures, self.src_filter, rlzs_by_gsim, par, monitor self.setting_events() if self.oqparam.ground_motion_fields: logging.info('Building GMFs') def agg_dicts(self, acc, result): """ :param acc: accumulator dictionary :param result: an AccumDict with events, ruptures, gmfs and hcurves """ # in UCERF if hasattr(result, 'ruptures_by_grp'): for ruptures in result.ruptures_by_grp.values(): self.save_ruptures(ruptures) elif hasattr(result, 'events_by_grp'): for grp_id in result.events_by_grp: events = result.events_by_grp[grp_id] self.datastore.extend('events', events) sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') if 'gmdata' in result: self.gmdata += result['gmdata'] data = result.pop('gmfdata') with sav_mon: self.datastore.extend('gmf_data/data', data) # it is important to save the number of bytes while the # computation is going, to see the progress update_nbytes(self.datastore, 'gmf_data/data', data) for sid, start, stop in result['indices']: self.indices[sid, 0].append(start + self.offset) self.indices[sid, 1].append(stop + self.offset) self.offset += len(data) if self.offset >= TWO32: raise RuntimeError( 'The gmf_data table has more than %d rows' % TWO32) imtls = self.oqparam.imtls with agg_mon: for key, poes in result.get('hcurves', {}).items(): r, sid, imt = str2rsi(key) array = acc[r].setdefault(sid, 0).array[imtls(imt), 0] array[:] = 1. - (1. - array) * (1. - poes) sav_mon.flush() agg_mon.flush() self.datastore.flush() return acc def save_ruptures(self, ruptures): """ Extend the 'events' dataset with the events from the given ruptures; also, save the ruptures if the flag `save_ruptures` is on. :param ruptures: a list of EBRuptures """ if len(ruptures): events = get_events(ruptures) dset = self.datastore.extend('events', events) if self.oqparam.save_ruptures: self.rupser.save(ruptures, eidx=len(dset)-len(events)) def check_overflow(self): """ Raise a ValueError if the number of sites is larger than 65,536 or the number of IMTs is larger than 256 or the number of ruptures is larger than 4,294,967,296. The limits are due to the numpy dtype used to store the GMFs (gmv_dt). They could be relaxed in the future. """ max_ = dict(sites=2**16, events=2**32, imts=2**8) try: events = len(self.datastore['events']) except KeyError: events = 0 num_ = dict(sites=len(self.sitecol), events=events, imts=len(self.oqparam.imtls)) for var in max_: if num_[var] > max_[var]: raise ValueError( 'The event based calculator is restricted to ' '%d %s, got %d' % (max_[var], var, num_[var])) def execute(self): oq = self.oqparam self.gmdata = {} self.offset = 0 self.indices = collections.defaultdict(list) # sid, idx -> indices param = dict( oqparam=oq, min_iml=self.get_min_iml(oq), save_ruptures=oq.save_ruptures, gmf=oq.ground_motion_fields, truncation_level=oq.truncation_level, imtls=oq.imtls, filter_distance=oq.filter_distance, ses_per_logic_tree_path=oq.ses_per_logic_tree_path) if oq.hazard_calculation_id: # from ruptures assert oq.ground_motion_fields, 'must be True!' self.datastore.parent = datastore.read(oq.hazard_calculation_id) iterargs = self.from_ruptures(param, self.monitor()) else: # from sources iterargs = self.from_sources(param, self.monitor()) if oq.ground_motion_fields is False: for args in iterargs: # store the ruptures/events pass return {} acc = parallel.Starmap( self.core_task.__func__, iterargs, self.monitor() ).reduce(self.agg_dicts, self.zerodict()) self.check_overflow() # check the number of events base.save_gmdata(self, self.R) if self.indices: N = len(self.sitecol.complete) logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True, autoflush=True): dset = self.datastore.create_dset( 'gmf_data/indices', hdf5.vuint32, shape=(N, 2), fillvalue=None) for sid in self.sitecol.complete.sids: dset[sid, 0] = self.indices[sid, 0] dset[sid, 1] = self.indices[sid, 1] elif (oq.ground_motion_fields and 'ucerf' not in oq.calculation_mode): raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') return acc def save_gmf_bytes(self): """Save the attribute nbytes in the gmf_data datasets""" ds = self.datastore for sm_id in ds['gmf_data']: ds.set_nbytes('gmf_data/' + sm_id) ds.set_nbytes('gmf_data') def setting_events(self): """ Call set_random_years on the events dataset """ if self.oqparam.hazard_calculation_id is None: num_events = sum(set_counts(self.datastore, 'events').values()) if num_events == 0: raise RuntimeError( 'No seismic events! Perhaps the investigation time is too ' 'small or the maximum_distance is too small') if self.oqparam.save_ruptures: logging.info('Setting %d event years on %d ruptures', num_events, self.rupser.nruptures) with self.monitor('setting event years', measuremem=True, autoflush=True): set_random_years(self.datastore, 'events', self.oqparam.ses_seed, int(self.oqparam.investigation_time)) def post_execute(self, result): """ Save the SES collection """ oq = self.oqparam if 'ucerf' in oq.calculation_mode: self.rupser.close() self.csm.info.update_eff_ruptures(self.csm.get_num_ruptures()) self.setting_events() N = len(self.sitecol.complete) L = len(oq.imtls.array) if result and oq.hazard_curves_from_gmfs: rlzs = self.csm_info.get_rlzs_assoc().realizations # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] # NB: in the future we may want to save to individual hazard # curves if oq.individual_curves is set; for the moment we # save the statistical curves only hstats = oq.hazard_stats() if len(hstats): logging.info('Computing statistical hazard curves') for statname, stat in hstats: pmap = compute_pmap_stats(result.values(), [stat], weights) arr = numpy.zeros((N, L), F32) for sid in pmap: arr[sid] = pmap[sid].array[:, 0] self.datastore['hcurves/' + statname] = arr if oq.poes: P = len(oq.poes) I = len(oq.imtls) self.datastore.create_dset( 'hmaps/' + statname, F32, (N, P * I)) self.datastore.set_attrs( 'hmaps/' + statname, nbytes=N * P * I * 4) hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) ds = self.datastore['hmaps/' + statname] for sid in hmap: ds[sid] = hmap[sid].array[:, 0] if self.datastore.parent: self.datastore.parent.open('r') if 'gmf_data' in self.datastore: self.save_gmf_bytes() if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) rdiff, index = util.max_rel_diff_index( cl_mean_curves, eb_mean_curves) logging.warn('Relative difference with the classical ' 'mean curves: %d%% at site index %d', rdiff * 100, index)
class EventBasedCalculator(base.HazardCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ pre_calculator = 'event_based_rupture' core_task = compute_gmfs_and_curves is_stochastic = True def combine_pmaps_and_save_gmfs(self, acc, results): """ Combine the hazard curves (if any) and save the gmfs (if any) sequentially; notice that the gmfs may come from different tasks in any order. :param acc: an accumulator for the hazard curves :param results: dictionaries rlzi, imt -> [gmf_array, curves_by_imt] :returns: a new accumulator """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') hdf5path = self.datastore.hdf5path for res in results: self.gmdata += res['gmdata'] data = res['gmfdata'] with sav_mon: hdf5.extend3(hdf5path, 'gmf_data/data', data) # it is important to save the number of bytes while the # computation is going, to see the progress update_nbytes(self.datastore, 'gmf_data/data', data) for sid, start, stop in res['indices']: self.indices[sid].append( (start + self.offset, stop + self.offset)) self.offset += len(data) slicedic = self.oqparam.imtls.slicedic with agg_mon: for key, poes in res['hcurves'].items(): r, sid, imt = str2rsi(key) array = acc[r].setdefault(sid, 0).array[slicedic[imt], 0] array[:] = 1. - (1. - array) * (1. - poes) sav_mon.flush() agg_mon.flush() self.datastore.flush() if 'ruptures' in res: vars(EventBasedRuptureCalculator)['save_ruptures']( self, res['ruptures']) return acc def gen_args(self): """ :yields: the arguments for compute_gmfs_and_curves """ oq = self.oqparam sitecol = self.sitecol.complete monitor = self.monitor(self.core_task.__name__) imts = list(oq.imtls) min_iml = self.get_min_iml(oq) correl_model = oq.get_correl_model() try: csm_info = self.csm.info except AttributeError: # no csm csm_info = self.datastore['csm_info'] samples_by_grp = csm_info.get_samples_by_grp() rlzs_by_gsim = { grp_id: self.rlzs_assoc.get_rlzs_by_gsim(grp_id) for grp_id in samples_by_grp } if self.precalc: num_ruptures = sum(len(rs) for rs in self.precalc.result.values()) block_size = math.ceil(num_ruptures / (oq.concurrent_tasks or 1)) for grp_id, ruptures in self.precalc.result.items(): if not ruptures: continue for block in block_splitter(ruptures, block_size): getter = GmfGetter(rlzs_by_gsim[grp_id], block, sitecol, imts, min_iml, oq.maximum_distance, oq.truncation_level, correl_model, oq.filter_distance, samples_by_grp[grp_id]) yield [getter], oq, monitor return U = len(self.datastore['ruptures']) logging.info('Found %d ruptures', U) parent = self.can_read_parent() or self.datastore for slc in split_in_slices(U, oq.concurrent_tasks or 1): getters = [] for grp_id in rlzs_by_gsim: ruptures = RuptureGetter(parent, slc, grp_id) if parent is self.datastore: # not accessible parent ruptures = list(ruptures) if not ruptures: continue getters.append( GmfGetter(rlzs_by_gsim[grp_id], ruptures, sitecol, imts, min_iml, oq.maximum_distance, oq.truncation_level, correl_model, oq.filter_distance, samples_by_grp[grp_id])) yield getters, oq, monitor def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam calc.check_overflow(self) self.csm_info = self.datastore['csm_info'] self.sm_id = { tuple(sm.path): sm.ordinal for sm in self.csm_info.source_models } L = len(oq.imtls.array) R = self.datastore['csm_info'].get_num_rlzs() self.gmdata = {} self.offset = 0 self.indices = collections.defaultdict(list) # sid -> indices ires = parallel.Starmap(self.core_task.__func__, self.gen_args()).submit_all() if self.precalc and self.precalc.result: # remove the ruptures in memory to save memory self.precalc.result.clear() acc = ires.reduce(self.combine_pmaps_and_save_gmfs, {r: ProbabilityMap(L) for r in range(R)}) base.save_gmdata(self, R) if self.indices: logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True, autoflush=True): self.datastore.save_vlen('gmf_data/indices', [ numpy.array(self.indices[sid], indices_dt) for sid in self.sitecol.complete.sids ]) else: raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') return acc def save_gmf_bytes(self): """Save the attribute nbytes in the gmf_data datasets""" ds = self.datastore for sm_id in ds['gmf_data']: ds.set_nbytes('gmf_data/' + sm_id) ds.set_nbytes('gmf_data') def post_execute(self, result): """ :param result: a dictionary (src_group_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations # save individual curves for i in sorted(result): key = 'hcurves/rlz-%03d' % i if result[i]: self.datastore[key] = result[i] else: self.datastore[key] = ProbabilityMap(oq.imtls.array.size) logging.info('Zero curves for %s', key) # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] hstats = self.oqparam.hazard_stats() if len(hstats) and len(rlzs) > 1: logging.info('Computing statistical hazard curves') for kind, stat in hstats: pmap = compute_pmap_stats(result.values(), [stat], weights) self.datastore['hcurves/' + kind] = pmap if self.datastore.parent: self.datastore.parent.open() if 'gmf_data' in self.datastore: self.save_gmf_bytes() if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) for imt in eb_mean_curves.dtype.names: rdiff, index = util.max_rel_diff_index(cl_mean_curves[imt], eb_mean_curves[imt]) logging.warn( 'Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
class EventBasedCalculator(base.HazardCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ core_task = compute_gmfs is_stochastic = True accept_precalc = ['event_based', 'ebrisk', 'event_based_risk'] def init(self): if hasattr(self, 'csm'): self.check_floating_spinning() if hasattr(self.oqparam, 'maximum_distance'): self.srcfilter = self.src_filter() else: self.srcfilter = nofilter if not self.datastore.parent: self.datastore.create_dset('ruptures', rupture_dt) self.datastore.create_dset('rupgeoms', hdf5.vfloat32) def acc0(self): """ Initial accumulator, a dictionary rlz -> ProbabilityMap """ self.L = self.oqparam.imtls.size return {r: ProbabilityMap(self.L) for r in range(self.R)} def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ gsims_by_trt = self.csm.full_lt.get_gsims_by_trt() sources = self.csm.get_sources() # weighting the heavy sources nrups = parallel.Starmap( count_ruptures, [(src,) for src in sources if src.code in b'AMC'], progress=logging.debug ).reduce() for src in sources: src.nsites = 1 # avoid 0 weight try: src.num_ruptures = nrups[src.source_id] except KeyError: src.num_ruptures = src.count_ruptures() maxweight = sum(sg.weight for sg in self.csm.src_groups) / ( self.oqparam.concurrent_tasks or 1) eff_ruptures = AccumDict(accum=0) # trt => potential ruptures calc_times = AccumDict(accum=numpy.zeros(3, F32)) # nr, ns, dt allargs = [] if self.oqparam.is_ucerf(): # manage the filtering in a special way for sg in self.csm.src_groups: for src in sg: src.src_filter = self.srcfilter srcfilter = nofilter # otherwise it would be ultra-slow else: srcfilter = self.srcfilter logging.info('Building ruptures') for sg in self.csm.src_groups: if not sg.sources: continue logging.info('Sending %s', sg) par = self.param.copy() par['gsims'] = gsims_by_trt[sg.trt] for src_group in sg.split(maxweight): allargs.append((src_group, srcfilter, par)) smap = parallel.Starmap( sample_ruptures, allargs, h5=self.datastore.hdf5) mon = self.monitor('saving ruptures') self.nruptures = 0 for dic in smap: # NB: dic should be a dictionary, but when the calculation dies # for an OOM it can become None, thus giving a very confusing error if dic is None: raise MemoryError('You ran out of memory!') rup_array = dic['rup_array'] if len(rup_array) == 0: continue if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] with mon: n = len(rup_array) rup_array['id'] = numpy.arange( self.nruptures, self.nruptures + n) self.nruptures += n hdf5.extend(self.datastore['ruptures'], rup_array) hdf5.extend(self.datastore['rupgeoms'], rup_array.geom) if len(self.datastore['ruptures']) == 0: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # must be called before storing the events self.store_rlz_info(eff_ruptures) # store full_lt self.store_source_info(calc_times) imp = calc.RuptureImporter(self.datastore) with self.monitor('saving ruptures and events'): imp.import_rups_events( self.datastore.getitem('ruptures')[()], get_rupture_getters) def agg_dicts(self, acc, result): """ :param acc: accumulator dictionary :param result: an AccumDict with events, ruptures, gmfs and hcurves """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') primary = self.oqparam.get_primary_imtls() sec_imts = self.oqparam.get_sec_imts() with sav_mon: df = result.pop('gmfdata') if len(df): dset = self.datastore['gmf_data/sid'] times = result.pop('times') [task_no] = numpy.unique(times['task_no']) rupids = list(times['rup_id']) self.datastore['gmf_data/time_by_rup'][rupids] = times hdf5.extend(dset, df.sid.to_numpy()) hdf5.extend(self.datastore['gmf_data/eid'], df.eid.to_numpy()) for m in range(len(primary)): hdf5.extend(self.datastore[f'gmf_data/gmv_{m}'], df[f'gmv_{m}']) for sec_imt in sec_imts: hdf5.extend(self.datastore[f'gmf_data/{sec_imt}'], df[sec_imt]) sig_eps = result.pop('sig_eps') hdf5.extend(self.datastore['gmf_data/sigma_epsilon'], sig_eps) self.offset += len(df) if self.offset >= TWO32: raise RuntimeError( 'The gmf_data table has more than %d rows' % TWO32) imtls = self.oqparam.imtls with agg_mon: for key, poes in result.get('hcurves', {}).items(): r, sid, imt = str2rsi(key) array = acc[r].setdefault(sid, 0).array[imtls(imt), 0] array[:] = 1. - (1. - array) * (1. - poes) self.datastore.flush() return acc def set_param(self, **kw): oq = self.oqparam if oq.ground_motion_fields and oq.min_iml.sum() == 0: logging.warning('The GMFs are not filtered: ' 'you may want to set a minimum_intensity') else: logging.info('minimum_intensity=%s', oq.minimum_intensity) self.param.update( oqparam=oq, gmf=oq.ground_motion_fields, truncation_level=oq.truncation_level, imtls=oq.imtls, ses_per_logic_tree_path=oq.ses_per_logic_tree_path, **kw) def _read_scenario_ruptures(self): oq = self.oqparam gsim_lt = readinput.get_gsim_lt(self.oqparam) G = gsim_lt.get_num_paths() if oq.calculation_mode.startswith('scenario'): ngmfs = oq.number_of_ground_motion_fields if oq.inputs['rupture_model'].endswith('.xml'): self.gsims = [gsim_rlz.value[0] for gsim_rlz in gsim_lt] self.cmaker = ContextMaker( '*', self.gsims, {'maximum_distance': oq.maximum_distance, 'imtls': oq.imtls}) rup = readinput.get_rupture(oq) if self.N > oq.max_sites_disagg: # many sites, split rupture ebrs = [EBRupture(copyobj(rup, rup_id=rup.rup_id + i), 0, 0, G, e0=i * G) for i in range(ngmfs)] else: # keep a single rupture with a big occupation number ebrs = [EBRupture(rup, 0, 0, G * ngmfs, rup.rup_id)] aw = get_rup_array(ebrs, self.srcfilter) if len(aw) == 0: raise RuntimeError( 'The rupture is too far from the sites! Please check the ' 'maximum_distance and the position of the rupture') elif oq.inputs['rupture_model'].endswith('.csv'): aw = readinput.get_ruptures(oq.inputs['rupture_model']) num_gsims = numpy.array( [len(gsim_lt.values[trt]) for trt in gsim_lt.values], U32) if oq.calculation_mode.startswith('scenario'): # rescale n_occ aw['n_occ'] *= ngmfs * num_gsims[aw['trt_smr']] rup_array = aw.array hdf5.extend(self.datastore['rupgeoms'], aw.geom) if len(rup_array) == 0: raise RuntimeError( 'There are no sites within the maximum_distance' ' of %s km from the rupture' % oq.maximum_distance( rup.tectonic_region_type, rup.mag)) # check the number of branchsets branchsets = len(gsim_lt._ltnode) if len(rup_array) == 1 and branchsets > 1: raise InvalidFile( '%s for a scenario calculation must contain a single ' 'branchset, found %d!' % (oq.inputs['job_ini'], branchsets)) fake = logictree.FullLogicTree.fake(gsim_lt) self.realizations = fake.get_realizations() self.datastore['full_lt'] = fake self.store_rlz_info({}) # store weights self.save_params() imp = calc.RuptureImporter(self.datastore) imp.import_rups_events(rup_array, get_rupture_getters) def execute(self): oq = self.oqparam dstore = self.datastore self.set_param() self.offset = 0 if oq.hazard_calculation_id: # from ruptures dstore.parent = datastore.read(oq.hazard_calculation_id) elif hasattr(self, 'csm'): # from sources self.build_events_from_sources() if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} elif 'rupture_model' not in oq.inputs: logging.warning( 'There is no rupture_model, the calculator will just ' 'import data without performing any calculation') fake = logictree.FullLogicTree.fake() dstore['full_lt'] = fake # needed to expose the outputs dstore['weights'] = [1.] return {} else: # scenario self._read_scenario_ruptures() if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} if oq.ground_motion_fields: imts = oq.get_primary_imtls() nrups = len(dstore['ruptures']) base.create_gmf_data(dstore, imts, oq.get_sec_imts()) dstore.create_dset('gmf_data/sigma_epsilon', sig_eps_dt(oq.imtls)) dstore.create_dset('gmf_data/time_by_rup', time_dt, (nrups,), fillvalue=None) # compute_gmfs in parallel nr = len(dstore['ruptures']) logging.info('Reading {:_d} ruptures'.format(nr)) rgetters = get_rupture_getters(dstore, oq.concurrent_tasks * 1.25, srcfilter=self.srcfilter) allargs = [(rgetter, self.param) for rgetter in rgetters] dstore.swmr_on() smap = parallel.Starmap( self.core_task.__func__, allargs, h5=dstore.hdf5) smap.monitor.save('srcfilter', self.srcfilter) acc = smap.reduce(self.agg_dicts, self.acc0()) if 'gmf_data' not in dstore: return acc if oq.ground_motion_fields: with self.monitor('saving avg_gmf', measuremem=True): self.save_avg_gmf() return acc def save_avg_gmf(self): """ Compute and save avg_gmf, unless there are too many GMFs """ size = self.datastore.getsize('gmf_data') logging.info(f'Stored {humansize(size)} of GMFs') if size > 100 * 1024**2: logging.warning( 'There are more than 100 MB of GMFs, not computing avg_gmf') return numpy.unique(self.datastore['gmf_data/eid'][:]) rlzs = self.datastore['events']['rlz_id'] self.weights = self.datastore['weights'][:][rlzs] gmf_df = self.datastore.read_df('gmf_data', 'sid') for sec_imt in self.oqparam.get_sec_imts(): # ignore secondary perils del gmf_df[sec_imt] rel_events = gmf_df.eid.unique() e = len(rel_events) if e == 0: raise RuntimeError( 'No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') elif e < len(self.datastore['events']): self.datastore['relevant_events'] = rel_events logging.info('Stored {:_d} relevant event IDs'.format(e)) # really compute and store the avg_gmf M = len(self.oqparam.min_iml) avg_gmf = numpy.zeros((2, self.N, M), F32) for sid, avgstd in compute_avg_gmf( gmf_df, self.weights, self.oqparam.min_iml).items(): avg_gmf[:, sid] = avgstd self.datastore['avg_gmf'] = avg_gmf return rel_events def post_execute(self, result): oq = self.oqparam if (not result or not oq.ground_motion_fields and not oq.hazard_curves_from_gmfs): return N = len(self.sitecol.complete) M = len(oq.imtls) # 0 in scenario L = oq.imtls.size L1 = L // (M or 1) # check seed dependency unless the number of GMFs is huge if ('gmf_data' in self.datastore and self.datastore.getsize('gmf_data') < 1E9): logging.info('Checking seed dependency') err = views.view('gmf_error', self.datastore) if err > .05: logging.warning('Your results are expected to have a large ' 'dependency from ses_seed') if oq.hazard_curves_from_gmfs: rlzs = self.datastore['full_lt'].get_realizations() # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] # NB: in the future we may want to save to individual hazard # curves if oq.individual_curves is set; for the moment we # save the statistical curves only hstats = oq.hazard_stats() S = len(hstats) pmaps = list(result.values()) R = len(weights) if len(pmaps) != R: # this should never happen, unless I break the # logic tree reduction mechanism during refactoring raise AssertionError('Expected %d pmaps, got %d' % (len(weights), len(pmaps))) if oq.individual_curves: logging.info('Saving individual hazard curves') self.datastore.create_dset('hcurves-rlzs', F32, (N, R, M, L1)) self.datastore.set_shape_descr( 'hcurves-rlzs', site_id=N, rlz_id=R, imt=list(oq.imtls), lvl=numpy.arange(L1)) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset( 'hmaps-rlzs', F32, (N, R, M, P)) self.datastore.set_shape_descr( 'hmaps-rlzs', site_id=N, rlz_id=R, imt=list(oq.imtls), poe=oq.poes) for r, pmap in enumerate(pmaps): arr = numpy.zeros((N, M, L1), F32) for sid in pmap: arr[sid] = pmap[sid].array.reshape(M, L1) self.datastore['hcurves-rlzs'][:, r] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, r] = hmap[sid].array if S: logging.info('Computing statistical hazard curves') self.datastore.create_dset('hcurves-stats', F32, (N, S, M, L1)) self.datastore.set_shape_descr( 'hcurves-stats', site_id=N, stat=list(hstats), imt=list(oq.imtls), lvl=numpy.arange(L1)) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset( 'hmaps-stats', F32, (N, S, M, P)) self.datastore.set_shape_descr( 'hmaps-stats', site_id=N, stat=list(hstats), imt=list(oq.imtls), poes=oq.poes) for s, stat in enumerate(hstats): pmap = compute_pmap_stats( pmaps, [hstats[stat]], weights, oq.imtls) arr = numpy.zeros((N, M, L1), F32) for sid in pmap: arr[sid] = pmap[sid].array.reshape(M, L1) self.datastore['hcurves-stats'][:, s] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, s] = hmap[sid].array if self.datastore.parent: self.datastore.parent.open('r') if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir oq.calculation_mode = 'classical' with logs.init('job', vars(oq)) as log: self.cl = ClassicalCalculator(oq, log.calc_id) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run() engine.expose_outputs(self.cl.datastore) for imt in oq.imtls: cl_mean_curves = get_mean_curves(self.datastore, imt) eb_mean_curves = get_mean_curves(self.datastore, imt) self.rdiff, index = util.max_rel_diff_index( cl_mean_curves, eb_mean_curves) logging.warning( 'Relative difference with the classical ' 'mean curves: %d%% at site index %d, imt=%s', self.rdiff * 100, index, imt)
class EventBasedCalculator(ClassicalCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ pre_calculator = 'event_based_rupture' core_task = compute_gmfs_and_curves is_stochastic = True def pre_execute(self): """ Read the precomputed ruptures (or compute them on the fly) and prepare some empty files in the export directory to store the gmfs (if any). If there were pre-existing files, they will be erased. """ super(EventBasedCalculator, self).pre_execute() rlzs_by_tr_id = self.rlzs_assoc.get_rlzs_by_trt_id() num_rlzs = {t: len(rlzs) for t, rlzs in rlzs_by_tr_id.items()} self.sesruptures = [] for serial in self.datastore['sescollection']: sr = self.datastore['sescollection/' + serial] sr.set_weight(num_rlzs, {}) self.sesruptures.append(sr) self.sesruptures.sort(key=operator.attrgetter('serial')) if self.oqparam.ground_motion_fields: for rlz in self.rlzs_assoc.realizations: self.datastore.create_dset('gmf_data/%04d' % rlz.ordinal, gmv_dt) def combine_curves_and_save_gmfs(self, acc, res): """ Combine the hazard curves (if any) and save the gmfs (if any) sequentially; notice that the gmfs may come from different tasks in any order. :param acc: an accumulator for the hazard curves :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt] :returns: a new accumulator """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') for rlzi in res: gmfa, curves = res[rlzi] if gmfa is not None: with sav_mon: hdf5.extend(self.datastore['gmf_data/%04d' % rlzi], gmfa) if curves is not None: # aggregate hcurves with agg_mon: self.agg_dicts(acc, {rlzi: curves}) sav_mon.flush() agg_mon.flush() self.datastore.flush() return acc def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return monitor = self.monitor(self.core_task.__name__) monitor.oqparam = oq min_iml = fix_minimum_intensity(oq.minimum_intensity, oq.imtls) acc = parallel.apply_reduce( self.core_task.__func__, (self.sesruptures, self.sitecol, oq.imtls, self.rlzs_assoc, min_iml, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, agg=self.combine_curves_and_save_gmfs, acc=ProbabilityMap(), key=operator.attrgetter('trt_id'), weight=operator.attrgetter('weight')) if oq.ground_motion_fields: self.datastore.set_nbytes('gmf_data') return acc def post_execute(self, result): """ :param result: a dictionary (trt_model_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return elif oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations dic = {} for rlzi in result: dic[rlzs[rlzi]] = array_of_curves(result[rlzi], len(self.sitecol), oq.imtls) self.save_curves(dic) if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq, self.monitor) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(hazard_calculation_id=self.datastore.calc_id) for imt in self.mean_curves.dtype.fields: rdiff, index = max_rel_diff_index(self.cl.mean_curves[imt], self.mean_curves[imt]) logging.warn( 'Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
class EventBasedCalculator(ClassicalCalculator): """ Event based PSHA calculator generating the ruptures only """ pre_calculator = 'event_based_rupture' core_func = compute_gmfs_and_curves is_stochastic = True def pre_execute(self): """ Read the precomputed ruptures (or compute them on the fly) and prepare some empty files in the export directory to store the gmfs (if any). If there were pre-existing files, they will be erased. """ super(EventBasedCalculator, self).pre_execute() self.sesruptures = [] gsims_by_col = self.rlzs_assoc.get_gsims_by_col() self.datasets = {} for col_id, sescol in enumerate(self.datastore['sescollection']): gmf_dt = gsim_imt_dt(gsims_by_col[col_id], self.oqparam.imtls) for tag, sesrup in sorted(sescol.items()): sesrup = sescol[tag] self.sesruptures.append(sesrup) if self.oqparam.ground_motion_fields and sescol: self.datasets[col_id] = self.datastore.create_dset( 'gmfs/col%02d' % col_id, gmf_dt) def combine_curves_and_save_gmfs(self, acc, res): """ Combine the hazard curves (if any) and save the gmfs (if any) sequentially; notice that the gmfs may come from different tasks in any order. :param acc: an accumulator for the hazard curves :param res: a dictionary trt_id, gsim -> gmf_array or curves_by_imt :returns: a new accumulator """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') save_gmfs = self.oqparam.ground_motion_fields for trt_id, gsim_or_col in res: if isinstance(gsim_or_col, int) and save_gmfs: with sav_mon: gmfa = res[trt_id, gsim_or_col] dataset = self.datasets[gsim_or_col] dataset.attrs['trt_model_id'] = trt_id dataset.extend(gmfa) self.nbytes += gmfa.nbytes self.datastore.hdf5.flush() elif isinstance(gsim_or_col, str): # aggregate hcurves with agg_mon: curves_by_imt = res[trt_id, gsim_or_col] acc = agg_dicts( acc, AccumDict({(trt_id, gsim_or_col): curves_by_imt})) sav_mon.flush() agg_mon.flush() return acc def execute(self): """ Run in parallel `core_func(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return monitor = self.monitor(self.core_func.__name__) monitor.oqparam = oq zc = zero_curves(len(self.sitecol.complete), self.oqparam.imtls) zerodict = AccumDict((key, zc) for key in self.rlzs_assoc) self.nbytes = 0 curves_by_trt_gsim = parallel.apply_reduce( self.core_func.__func__, (self.sesruptures, self.sitecol, self.rlzs_assoc, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, acc=zerodict, agg=self.combine_curves_and_save_gmfs, key=operator.attrgetter('col_id')) if oq.ground_motion_fields: # sanity check on the saved gmfs size expected_nbytes = self.datastore[ 'counts_per_rlz'].attrs['gmfs_nbytes'] self.datastore['gmfs'].attrs['nbytes'] = self.nbytes assert self.nbytes == expected_nbytes, ( self.nbytes, expected_nbytes) return curves_by_trt_gsim def post_execute(self, result): """ :param result: a dictionary (trt_model_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return if oq.hazard_curves_from_gmfs: ClassicalCalculator.post_execute.__func__(self, result) if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # use a different datastore self.cl = ClassicalCalculator(oq, self.monitor) self.cl.datastore.parent = self.datastore result = self.cl.run(pre_execute=False, clean_up=False) for imt in self.mean_curves.dtype.fields: rdiff, index = max_rel_diff_index( self.cl.mean_curves[imt], self.mean_curves[imt]) logging.warn('Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
class EventBasedCalculator(ClassicalCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ pre_calculator = 'event_based_rupture' core_task = compute_gmfs_and_curves is_stochastic = True def combine_pmaps_and_save_gmfs(self, acc, res): """ Combine the hazard curves (if any) and save the gmfs (if any) sequentially; notice that the gmfs may come from different tasks in any order. :param acc: an accumulator for the hazard curves :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt] :returns: a new accumulator """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') if res['gmfcoll'] is not None: with sav_mon: for rlz, array in res['gmfcoll'].items(): if len(array): key = 'gmf_data/%04d' % rlz.ordinal self.datastore.extend(key, array) slicedic = self.oqparam.imtls.slicedic with agg_mon: for key, poes in res['hcurves'].items(): rlzi, sid, imt = str2rsi(key) array = acc[rlzi].setdefault(sid, 0).array[slicedic[imt], 0] array[:] = 1. - (1. - array) * (1. - poes) sav_mon.flush() agg_mon.flush() self.datastore.flush() if 'ruptures' in res: vars(EventBasedRuptureCalculator)['save_ruptures']( self, res['ruptures']) return acc def gen_args(self, ebruptures): """ :param ebruptures: a list of EBRupture objects to be split :yields: the arguments for compute_gmfs_and_curves """ oq = self.oqparam monitor = self.monitor(self.core_task.__name__) monitor.oqparam = oq imts = list(oq.imtls) min_iml = calc.fix_minimum_intensity(oq.minimum_intensity, imts) grp_trt = {sg.id: sg.trt for sm in self.csm.info.source_models for sg in sm.src_groups} rlzs_by_grp = self.rlzs_assoc.get_rlzs_by_grp_id() correl_model = oq.get_correl_model() for block in split_in_blocks( ebruptures, oq.concurrent_tasks or 1, key=operator.attrgetter('grp_id')): grp_id = block[0].grp_id trt = grp_trt[grp_id] gsims = [dic[trt] for dic in self.rlzs_assoc.gsim_by_trt] samples = self.rlzs_assoc.samples[grp_id] getter = GmfGetter(gsims, block, self.sitecol, imts, min_iml, oq.truncation_level, correl_model, samples) yield getter, rlzs_by_grp[grp_id], monitor def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return self.sesruptures = [] if self.precalc: # the ruptures are already in memory for grp_id, sesruptures in self.precalc.result.items(): for sr in sesruptures: self.sesruptures.append(sr) else: # read the ruptures from the datastore for serial in self.datastore['sescollection']: sr = self.datastore['sescollection/' + serial] self.sesruptures.append(sr) self.sesruptures.sort(key=operator.attrgetter('serial')) if self.oqparam.ground_motion_fields: calc.check_overflow(self) L = len(oq.imtls.array) res = parallel.starmap( self.core_task.__func__, self.gen_args(self.sesruptures) ).submit_all() acc = functools.reduce(self.combine_pmaps_and_save_gmfs, res, { rlz.ordinal: ProbabilityMap(L, 1) for rlz in self.rlzs_assoc.realizations}) self.save_data_transfer(res) return acc def post_execute(self, result): """ :param result: a dictionary (src_group_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return elif oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations # save individual curves if self.oqparam.individual_curves: for i in sorted(result): key = 'hcurves/rlz-%03d' % i if result[i]: self.datastore[key] = result[i] else: logging.info('Zero curves for %s', key) # compute and save statistics; this is done in process # we don't need to parallelize, since event based calculations # involves a "small" number of sites (<= 65,536) weights = (None if self.oqparam.number_of_logic_tree_samples else [rlz.weight for rlz in rlzs]) pstats = PmapStats(self.oqparam.quantile_hazard_curves, weights) for kind, stat in pstats.compute( self.sitecol.sids, list(result.values())): if kind == 'mean' and not self.oqparam.mean_hazard_curves: continue self.datastore['hcurves/' + kind] = stat if ('gmf_data' in self.datastore and 'nbytes' not in self.datastore['gmf_data'].attrs): self.datastore.set_nbytes('gmf_data') if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq, self.monitor) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) for imt in eb_mean_curves.dtype.names: rdiff, index = max_rel_diff_index( cl_mean_curves[imt], eb_mean_curves[imt]) logging.warn('Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
class EventBasedCalculator(base.HazardCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ core_task = compute_gmfs is_stochastic = True accept_precalc = ['event_based', 'event_based_risk', 'ucerf_hazard'] build_ruptures = sample_ruptures @cached_property def csm_info(self): """ :returns: a cached CompositionInfo object """ try: return self.csm.info except AttributeError: return self.datastore.parent['csm_info'] def init(self): if hasattr(self, 'csm'): self.check_floating_spinning() self.rupser = calc.RuptureSerializer(self.datastore) def init_logic_tree(self, csm_info): self.trt_by_grp = csm_info.grp_by("trt") self.rlzs_assoc = csm_info.get_rlzs_assoc() self.rlzs_by_gsim_grp = csm_info.get_rlzs_by_gsim_grp() self.samples_by_grp = csm_info.get_samples_by_grp() self.num_rlzs_by_grp = { grp_id: sum(len(rlzs) for rlzs in self.rlzs_by_gsim_grp[grp_id].values()) for grp_id in self.rlzs_by_gsim_grp } def acc0(self): """ Initial accumulator, a dictionary (grp_id, gsim) -> curves """ self.L = len(self.oqparam.imtls.array) zd = {r: ProbabilityMap(self.L) for r in range(self.R)} return zd def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ oq = self.oqparam gsims_by_trt = self.csm.gsim_lt.values logging.info('Building ruptures') smap = parallel.Starmap(self.build_ruptures.__func__, hdf5path=self.datastore.filename) eff_ruptures = AccumDict(accum=0) # grp_id => potential ruptures calc_times = AccumDict(accum=numpy.zeros(2, F32)) ses_idx = 0 for sm_id, sm in enumerate(self.csm.source_models): logging.info('Sending %s', sm) for sg in sm.src_groups: if not sg.sources: continue par = self.param.copy() par['gsims'] = gsims_by_trt[sg.trt] if sg.atomic: # do not split the group smap.submit(sg, self.src_filter, par) else: # traditional groups for block in self.block_splitter(sg.sources, key=by_grp): if 'ucerf' in oq.calculation_mode: for i in range(oq.ses_per_logic_tree_path): par['ses_seeds'] = [(ses_idx, oq.ses_seed + i + 1)] smap.submit(block, self.src_filter, par) ses_idx += 1 else: smap.submit(block, self.src_filter, par) mon = self.monitor('saving ruptures') for dic in smap: if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] if dic['rup_array']: with mon: self.rupser.save(dic['rup_array']) self.rupser.close() if not self.rupser.nruptures: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # logic tree reduction, must be called before storing the events self.store_rlz_info(eff_ruptures) self.init_logic_tree(self.csm.info) with self.monitor('store source_info', autoflush=True): self.store_source_info(calc_times) logging.info('Reordering the ruptures and storing the events') attrs = self.datastore.getitem('ruptures').attrs sorted_ruptures = self.datastore.getitem('ruptures')[()] # order the ruptures by serial sorted_ruptures.sort(order='serial') ngroups = len(self.csm.info.trt_by_grp) grp_indices = numpy.zeros((ngroups, 2), U32) grp_ids = sorted_ruptures['grp_id'] for grp_id, [startstop] in get_indices(grp_ids).items(): grp_indices[grp_id] = startstop self.datastore['ruptures'] = sorted_ruptures self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs) with self.monitor('saving events'): self.save_events(sorted_ruptures) def gen_rupture_getters(self): """ :returns: a list of RuptureGetters """ dstore = (self.datastore.parent if self.datastore.parent else self.datastore) hdf5cache = dstore.hdf5cache() mode = 'r+' if os.path.exists(hdf5cache) else 'w' with hdf5.File(hdf5cache, mode) as cache: if 'ruptures' not in cache: dstore.hdf5.copy('ruptures', cache) if 'rupgeoms' not in cache: dstore.hdf5.copy('rupgeoms', cache) yield from gen_rupture_getters( dstore, concurrent_tasks=self.oqparam.concurrent_tasks or 1, hdf5cache=hdf5cache) if self.datastore.parent: self.datastore.parent.close() @cached_property def eid2idx(self): """ :returns: a dict eid -> index in the events table """ return dict(zip(self.datastore['events']['id'], range(self.E))) def agg_dicts(self, acc, result): """ :param acc: accumulator dictionary :param result: an AccumDict with events, ruptures, gmfs and hcurves """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') with sav_mon: data = result.pop('gmfdata') if len(data): idxs = base.get_idxs(data, self.eid2idx) # this has to be fast data['eid'] = idxs # replace eid with idx self.datastore.extend('gmf_data/data', data) sig_eps = result.pop('sig_eps') sig_eps['eid'] = base.get_idxs(sig_eps, self.eid2idx) self.datastore.extend('gmf_data/sigma_epsilon', sig_eps) # it is important to save the number of bytes while the # computation is going, to see the progress update_nbytes(self.datastore, 'gmf_data/data', data) for sid, start, stop in result['indices']: self.indices[sid, 0].append(start + self.offset) self.indices[sid, 1].append(stop + self.offset) self.offset += len(data) if self.offset >= TWO32: raise RuntimeError( 'The gmf_data table has more than %d rows' % TWO32) imtls = self.oqparam.imtls with agg_mon: for key, poes in result.get('hcurves', {}).items(): r, sid, imt = str2rsi(key) array = acc[r].setdefault(sid, 0).array[imtls(imt), 0] array[:] = 1. - (1. - array) * (1. - poes) self.datastore.flush() return acc def save_events(self, rup_array): """ :param rup_array: an array of ruptures with fields grp_id :returns: a list of RuptureGetters """ # this is very fast compared to saving the ruptures eids = rupture.get_eids(rup_array, self.samples_by_grp, self.num_rlzs_by_grp) self.check_overflow() # check the number of events events = numpy.zeros(len(eids), rupture.events_dt) # when computing the events all ruptures must be considered, # including the ones far away that will be discarded later on rgetters = self.gen_rupture_getters() # build the associations eid -> rlz sequentially or in parallel # this is very fast: I saw 30 million events associated in 1 minute! logging.info('Building associations event_id -> rlz_id for %d events', len(events)) if len(events) < 1E5: it = map(RuptureGetter.get_eid_rlz, rgetters) else: it = parallel.Starmap(RuptureGetter.get_eid_rlz, ((rgetter, ) for rgetter in rgetters), progress=logging.debug, hdf5path=self.datastore.filename) i = 0 for eid_rlz in it: for er in eid_rlz: events[i] = er i += 1 if i >= TWO32: raise ValueError('There are more than %d events!' % i) events.sort(order='id') # fast too n_unique_events = len(numpy.unique(events['id'])) assert n_unique_events == len(events), (n_unique_events, len(events)) self.datastore['events'] = events def check_overflow(self): """ Raise a ValueError if the number of sites is larger than 65,536 or the number of IMTs is larger than 256 or the number of ruptures is larger than 4,294,967,296. The limits are due to the numpy dtype used to store the GMFs (gmv_dt). They could be relaxed in the future. """ oq = self.oqparam max_ = dict(sites=TWO32, events=TWO32, imts=2**8) num_ = dict(events=self.E, imts=len(self.oqparam.imtls)) if self.sitecol: num_['sites'] = n = len(self.sitecol) if (oq.calculation_mode == 'event_based' and oq.ground_motion_fields and n > oq.max_sites_per_gmf): raise ValueError( 'You cannot compute the GMFs for %d > %d sites' % (n, oq.max_sites_per_gmf)) for var in num_: if num_[var] > max_[var]: raise ValueError( 'The %s calculator is restricted to %d %s, got %d' % (oq.calculation_mode, max_[var], var, num_[var])) def set_param(self, **kw): oq = self.oqparam # set the minimum_intensity if hasattr(self, 'crmodel') and not oq.minimum_intensity: # infer it from the risk models if not directly set in job.ini oq.minimum_intensity = self.crmodel.min_iml min_iml = oq.min_iml if min_iml.sum() == 0: logging.warning('The GMFs are not filtered: ' 'you may want to set a minimum_intensity') else: logging.info('minimum_intensity=%s', oq.minimum_intensity) self.param.update(oqparam=oq, gmf=oq.ground_motion_fields, truncation_level=oq.truncation_level, ruptures_per_block=oq.ruptures_per_block, imtls=oq.imtls, filter_distance=oq.filter_distance, ses_per_logic_tree_path=oq.ses_per_logic_tree_path, **kw) def execute(self): oq = self.oqparam self.set_param() self.offset = 0 self.indices = collections.defaultdict(list) # sid, idx -> indices if oq.hazard_calculation_id and 'ruptures' in self.datastore: # from ruptures self.datastore.parent = util.read(oq.hazard_calculation_id) self.init_logic_tree(self.csm_info) else: # from sources self.build_events_from_sources() if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} if not oq.imtls: raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) iterargs = ((rgetter, self.src_filter, self.param) for rgetter in self.gen_rupture_getters()) # call compute_gmfs in parallel acc = parallel.Starmap(self.core_task.__func__, iterargs, hdf5path=self.datastore.filename).reduce( self.agg_dicts, self.acc0()) if self.indices: N = len(self.sitecol.complete) logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True, autoflush=True): self.datastore['gmf_data/imts'] = ' '.join(oq.imtls) dset = self.datastore.create_dset('gmf_data/indices', hdf5.vuint32, shape=(N, 2), fillvalue=None) num_evs = self.datastore.create_dset('gmf_data/events_by_sid', U32, (N, )) for sid in self.sitecol.complete.sids: start = numpy.array(self.indices[sid, 0]) stop = numpy.array(self.indices[sid, 1]) dset[sid, 0] = start dset[sid, 1] = stop num_evs[sid] = (stop - start).sum() num_evs = num_evs[()] avg_events_by_sid = num_evs.sum() / N logging.info('Found ~%d GMVs per site', avg_events_by_sid) self.datastore.set_attrs('gmf_data', avg_events_by_sid=avg_events_by_sid, max_events_by_sid=num_evs.max()) elif oq.ground_motion_fields: raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') return acc def post_execute(self, result): oq = self.oqparam if not oq.ground_motion_fields and not oq.hazard_curves_from_gmfs: return N = len(self.sitecol.complete) L = len(oq.imtls.array) if result and oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] # NB: in the future we may want to save to individual hazard # curves if oq.individual_curves is set; for the moment we # save the statistical curves only hstats = oq.hazard_stats() S = len(hstats) pmaps = list(result.values()) R = len(weights) if len(pmaps) != R: # this should never happen, unless I break the # logic tree reduction mechanism during refactoring raise AssertionError('Expected %d pmaps, got %d' % (len(weights), len(pmaps))) if oq.individual_curves: logging.info('Saving individual hazard curves') self.datastore.create_dset('hcurves-rlzs', F32, (N, R, L)) self.datastore.set_attrs('hcurves-rlzs', nbytes=N * R * L * 4) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset('hmaps-rlzs', F32, (N, R, M, P)) self.datastore.set_attrs('hmaps-rlzs', nbytes=N * R * P * M * 4) for r, pmap in enumerate(pmaps): arr = numpy.zeros((N, L), F32) for sid in pmap: arr[sid] = pmap[sid].array[:, 0] self.datastore['hcurves-rlzs'][:, r] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, r] = hmap[sid].array if S: logging.info('Computing statistical hazard curves') self.datastore.create_dset('hcurves-stats', F32, (N, S, L)) self.datastore.set_attrs('hcurves-stats', nbytes=N * S * L * 4) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset('hmaps-stats', F32, (N, S, M, P)) self.datastore.set_attrs('hmaps-stats', nbytes=N * S * P * M * 4) for s, stat in enumerate(hstats): pmap = compute_pmap_stats(pmaps, [hstats[stat]], weights, oq.imtls) arr = numpy.zeros((N, L), F32) for sid in pmap: arr[sid] = pmap[sid].array[:, 0] self.datastore['hcurves-stats'][:, s] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, s] = hmap[sid].array if self.datastore.parent: self.datastore.parent.open('r') if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir job_id = logs.init('job') oq.calculation_mode = 'classical' self.cl = ClassicalCalculator(oq, job_id) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) engine.expose_outputs(self.cl.datastore) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) self.rdiff, index = util.max_rel_diff_index( cl_mean_curves, eb_mean_curves) logging.warning( 'Relative difference with the classical ' 'mean curves: %d%% at site index %d', self.rdiff * 100, index)
class EventBasedCalculator(ClassicalCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ pre_calculator = 'event_based_rupture' core_task = compute_gmfs_and_curves is_stochastic = True def pre_execute(self): """ Read the precomputed ruptures (or compute them on the fly) and prepare some empty files in the export directory to store the gmfs (if any). If there were pre-existing files, they will be erased. """ super(EventBasedCalculator, self).pre_execute() rlzs_by_tr_id = self.rlzs_assoc.get_rlzs_by_grp_id() num_rlzs = {t: len(rlzs) for t, rlzs in rlzs_by_tr_id.items()} self.sesruptures = [] for serial in self.datastore['sescollection']: sr = self.datastore['sescollection/' + serial] sr.set_weight(num_rlzs, {}) self.sesruptures.append(sr) self.sesruptures.sort(key=operator.attrgetter('serial')) if self.oqparam.ground_motion_fields: calc.check_overflow(self) for rlz in self.rlzs_assoc.realizations: self.datastore.create_dset( 'gmf_data/%04d' % rlz.ordinal, calc.gmv_dt) def combine_curves_and_save_gmfs(self, acc, res): """ Combine the hazard curves (if any) and save the gmfs (if any) sequentially; notice that the gmfs may come from different tasks in any order. :param acc: an accumulator for the hazard curves :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt] :returns: a new accumulator """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') for rlzi in res: gmfa, curves = res[rlzi] if gmfa is not None: with sav_mon: hdf5.extend(self.datastore['gmf_data/%04d' % rlzi], gmfa) if curves is not None: # aggregate hcurves with agg_mon: self.agg_dicts(acc, {rlzi: curves}) sav_mon.flush() agg_mon.flush() self.datastore.flush() return acc def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return monitor = self.monitor(self.core_task.__name__) monitor.oqparam = oq min_iml = calc.fix_minimum_intensity( oq.minimum_intensity, oq.imtls) acc = parallel.apply_reduce( self.core_task.__func__, (self.sesruptures, self.sitecol, oq.imtls, self.rlzs_assoc, min_iml, monitor), concurrent_tasks=self.oqparam.concurrent_tasks, agg=self.combine_curves_and_save_gmfs, acc=ProbabilityMap(), key=operator.attrgetter('grp_id'), weight=operator.attrgetter('weight')) if oq.ground_motion_fields: self.datastore.set_nbytes('gmf_data') return acc def post_execute(self, result): """ :param result: a dictionary (src_group_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return elif oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations dic = {} for rlzi in result: dic[rlzs[rlzi]] = array_of_curves( result[rlzi], len(self.sitecol), oq.imtls) self.save_curves(dic) if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq, self.monitor) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run() for imt in self.mean_curves.dtype.fields: rdiff, index = max_rel_diff_index( self.cl.mean_curves[imt], self.mean_curves[imt]) logging.warn('Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
class EventBasedCalculator(base.HazardCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ core_task = compute_gmfs is_stochastic = True accept_precalc = ['event_based', 'ebrisk', 'event_based_risk'] def init(self): if hasattr(self, 'csm'): self.check_floating_spinning() if hasattr(self.oqparam, 'maximum_distance'): self.srcfilter = self.src_filter(self.datastore.tempname) else: self.srcfilter = nofilter if not self.datastore.parent: self.datastore.create_dset('ruptures', rupture_dt) self.datastore.create_dset('rupgeoms', hdf5.vfloat32) def acc0(self): """ Initial accumulator, a dictionary (grp_id, gsim) -> curves """ self.L = len(self.oqparam.imtls.array) zd = {r: ProbabilityMap(self.L) for r in range(self.R)} return zd def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ gsims_by_trt = self.csm.full_lt.get_gsims_by_trt() logging.info('Building ruptures') maxweight = sum(sg.weight for sg in self.csm.src_groups) / ( self.oqparam.concurrent_tasks or 1) eff_ruptures = AccumDict(accum=0) # trt => potential ruptures calc_times = AccumDict(accum=numpy.zeros(3, F32)) # nr, ns, dt allargs = [] if self.oqparam.is_ucerf(): # manage the filtering in a special way for sg in self.csm.src_groups: for src in sg: src.src_filter = self.srcfilter srcfilter = nofilter # otherwise it would be ultra-slow else: srcfilter = self.srcfilter for sg in self.csm.src_groups: if not sg.sources: continue logging.info('Sending %s', sg) par = self.param.copy() par['gsims'] = gsims_by_trt[sg.trt] for src_group in sg.split(maxweight): allargs.append((src_group, srcfilter, par)) smap = parallel.Starmap( sample_ruptures, allargs, h5=self.datastore.hdf5) mon = self.monitor('saving ruptures') self.nruptures = 0 for dic in smap: rup_array = dic['rup_array'] if len(rup_array) == 0: continue if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] with mon: n = len(rup_array) rup_array['id'] = numpy.arange( self.nruptures, self.nruptures + n) self.nruptures += n hdf5.extend(self.datastore['ruptures'], rup_array) hdf5.extend(self.datastore['rupgeoms'], rup_array.geom) if len(self.datastore['ruptures']) == 0: if os.environ.get('OQ_SAMPLE_SOURCES'): raise SystemExit(0) # success even with no ruptures raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # must be called before storing the events self.store_rlz_info(eff_ruptures) # store full_lt with self.monitor('store source_info'): self.store_source_info(calc_times) imp = calc.RuptureImporter(self.datastore) with self.monitor('saving ruptures and events'): imp.import_rups(self.datastore.getitem('ruptures')[()]) def agg_dicts(self, acc, result): """ :param acc: accumulator dictionary :param result: an AccumDict with events, ruptures, gmfs and hcurves """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') with sav_mon: data = result.pop('gmfdata') if len(data): times = result.pop('times') rupids = list(times['rup_id']) self.datastore['gmf_data/time_by_rup'][rupids] = times hdf5.extend(self.datastore['gmf_data/data'], data) sig_eps = result.pop('sig_eps') hdf5.extend(self.datastore['gmf_data/sigma_epsilon'], sig_eps) for sid, start, stop in result['indices']: self.indices[sid, 0].append(start + self.offset) self.indices[sid, 1].append(stop + self.offset) self.offset += len(data) if self.offset >= TWO32: raise RuntimeError( 'The gmf_data table has more than %d rows' % TWO32) imtls = self.oqparam.imtls with agg_mon: for key, poes in result.get('hcurves', {}).items(): r, sid, imt = str2rsi(key) array = acc[r].setdefault(sid, 0).array[imtls(imt), 0] array[:] = 1. - (1. - array) * (1. - poes) self.datastore.flush() return acc def set_param(self, **kw): oq = self.oqparam # set the minimum_intensity if hasattr(self, 'crmodel') and not oq.minimum_intensity: # infer it from the risk models if not directly set in job.ini oq.minimum_intensity = self.crmodel.min_iml min_iml = oq.min_iml if oq.ground_motion_fields and min_iml.sum() == 0: logging.warning('The GMFs are not filtered: ' 'you may want to set a minimum_intensity') else: logging.info('minimum_intensity=%s', oq.minimum_intensity) self.param.update( oqparam=oq, gmf=oq.ground_motion_fields, truncation_level=oq.truncation_level, imtls=oq.imtls, filter_distance=oq.filter_distance, ses_per_logic_tree_path=oq.ses_per_logic_tree_path, **kw) def _read_scenario_ruptures(self): oq = self.oqparam if oq.inputs['rupture_model'].endswith(('.xml', '.toml', '.txt')): self.gsims = readinput.get_gsims(oq) self.cmaker = ContextMaker( '*', self.gsims, {'maximum_distance': oq.maximum_distance, 'filter_distance': oq.filter_distance}) n_occ = numpy.array([oq.number_of_ground_motion_fields]) rup = readinput.get_rupture(oq) ebr = EBRupture(rup, 0, 0, n_occ) ebr.e0 = 0 rup_array = get_rup_array([ebr], self.srcfilter).array mesh = surface_to_array(rup.surface).transpose(1, 2, 0).flatten() hdf5.extend(self.datastore['rupgeoms'], numpy.array([mesh], object)) elif oq.inputs['rupture_model'].endswith('.csv'): aw = readinput.get_ruptures(oq.inputs['rupture_model']) rup_array = aw.array hdf5.extend(self.datastore['rupgeoms'], aw.geom) if len(rup_array) == 0: raise RuntimeError( 'There are no sites within the maximum_distance' ' of %s km from the rupture' % oq.maximum_distance( rup.tectonic_region_type, rup.mag)) gsim_lt = readinput.get_gsim_lt(self.oqparam) # check the number of branchsets branchsets = len(gsim_lt._ltnode) if len(rup_array) == 1 and branchsets > 1: raise InvalidFile( '%s for a scenario calculation must contain a single ' 'branchset, found %d!' % (oq.inputs['job_ini'], branchsets)) fake = logictree.FullLogicTree.fake(gsim_lt) self.realizations = fake.get_realizations() self.datastore['full_lt'] = fake self.store_rlz_info({}) # store weights self.save_params() calc.RuptureImporter(self.datastore).import_rups(rup_array) def execute(self): oq = self.oqparam self.set_param() self.offset = 0 self.indices = AccumDict(accum=[]) # sid, idx -> indices if oq.hazard_calculation_id: # from ruptures self.datastore.parent = util.read(oq.hazard_calculation_id) elif hasattr(self, 'csm'): # from sources self.build_events_from_sources() if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} elif 'rupture_model' not in oq.inputs: logging.warning( 'There is no rupture_model, the calculator will just ' 'import data without performing any calculation') return {} else: # scenario self._read_scenario_ruptures() if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} if not oq.imtls: raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) N = len(self.sitecol.complete) if oq.ground_motion_fields: nrups = len(self.datastore['ruptures']) self.datastore.create_dset('gmf_data/data', oq.gmf_data_dt()) self.datastore.create_dset('gmf_data/sigma_epsilon', sig_eps_dt(oq.imtls)) self.datastore.create_dset( 'gmf_data/indices', hdf5.vuint32, shape=(N, 2), fillvalue=None) self.datastore.create_dset('gmf_data/events_by_sid', U32, (N,)) self.datastore.create_dset('gmf_data/time_by_rup', time_dt, (nrups,), fillvalue=None) if oq.hazard_curves_from_gmfs: self.param['rlz_by_event'] = self.datastore['events']['rlz_id'] # compute_gmfs in parallel nr = len(self.datastore['ruptures']) self.datastore.swmr_on() logging.info('Reading %d ruptures', nr) iterargs = ((rgetter, self.srcfilter, self.param) for rgetter in gen_rupture_getters( self.datastore, self.srcfilter, oq.concurrent_tasks)) acc = parallel.Starmap( self.core_task.__func__, iterargs, h5=self.datastore.hdf5, num_cores=oq.num_cores ).reduce(self.agg_dicts, self.acc0()) if self.indices: dset = self.datastore['gmf_data/indices'] num_evs = self.datastore['gmf_data/events_by_sid'] logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True): self.datastore['gmf_data/imts'] = ' '.join(oq.imtls) for sid in self.sitecol.complete.sids: start = numpy.array(self.indices[sid, 0]) stop = numpy.array(self.indices[sid, 1]) dset[sid, 0] = start dset[sid, 1] = stop num_evs[sid] = (stop - start).sum() avg_events_by_sid = num_evs[()].sum() / N logging.info('Found ~%d GMVs per site', avg_events_by_sid) elif oq.ground_motion_fields: raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') return acc def post_execute(self, result): oq = self.oqparam if not oq.ground_motion_fields and not oq.hazard_curves_from_gmfs: return N = len(self.sitecol.complete) M = len(oq.imtls) # 0 in scenario L = len(oq.imtls.array) L1 = L // (M or 1) if result and oq.hazard_curves_from_gmfs: rlzs = self.datastore['full_lt'].get_realizations() # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] # NB: in the future we may want to save to individual hazard # curves if oq.individual_curves is set; for the moment we # save the statistical curves only hstats = oq.hazard_stats() S = len(hstats) pmaps = list(result.values()) R = len(weights) if len(pmaps) != R: # this should never happen, unless I break the # logic tree reduction mechanism during refactoring raise AssertionError('Expected %d pmaps, got %d' % (len(weights), len(pmaps))) if oq.individual_curves: logging.info('Saving individual hazard curves') self.datastore.create_dset('hcurves-rlzs', F32, (N, R, M, L1)) self.datastore.set_shape_attrs( 'hcurves-rlzs', site_id=N, rlz_id=R, imt=list(oq.imtls), lvl=numpy.arange(L1)) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset( 'hmaps-rlzs', F32, (N, R, M, P)) self.datastore.set_shape_attrs( 'hmaps-rlzs', site_id=N, rlz_id=R, imt=list(oq.imtls), poe=oq.poes) for r, pmap in enumerate(pmaps): arr = numpy.zeros((N, M, L1), F32) for sid in pmap: arr[sid] = pmap[sid].array.reshape(M, L1) self.datastore['hcurves-rlzs'][:, r] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, r] = hmap[sid].array if S: logging.info('Computing statistical hazard curves') self.datastore.create_dset('hcurves-stats', F32, (N, S, M, L1)) self.datastore.set_shape_attrs( 'hcurves-stats', site_id=N, stat=list(hstats), imt=list(oq.imtls), lvl=numpy.arange(L1)) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset( 'hmaps-stats', F32, (N, S, M, P)) self.datastore.set_shape_attrs( 'hmaps-stats', site_id=N, stat=list(hstats), imt=list(oq.imtls), poes=oq.poes) for s, stat in enumerate(hstats): pmap = compute_pmap_stats( pmaps, [hstats[stat]], weights, oq.imtls) arr = numpy.zeros((N, M, L1), F32) for sid in pmap: arr[sid] = pmap[sid].array.reshape(M, L1) self.datastore['hcurves-stats'][:, s] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, s] = hmap[sid].array if self.datastore.parent: self.datastore.parent.open('r') if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir job_id = logs.init('job') oq.calculation_mode = 'classical' self.cl = ClassicalCalculator(oq, job_id) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run() engine.expose_outputs(self.datastore) for imt in oq.imtls: cl_mean_curves = get_mean_curves(self.datastore, imt) eb_mean_curves = get_mean_curves(self.datastore, imt) self.rdiff, index = util.max_rel_diff_index( cl_mean_curves, eb_mean_curves) logging.warning('Relative difference with the classical ' 'mean curves: %d%% at site index %d, imt=%s', self.rdiff * 100, index, imt)
class EventBasedCalculator(ClassicalCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ pre_calculator = 'event_based_rupture' core_task = compute_gmfs_and_curves is_stochastic = True def combine_pmaps_and_save_gmfs(self, acc, res): """ Combine the hazard curves (if any) and save the gmfs (if any) sequentially; notice that the gmfs may come from different tasks in any order. :param acc: an accumulator for the hazard curves :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt] :returns: a new accumulator """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') self.gmdata += res['gmdata'] data = res['gmfdata'] if data is not None: with sav_mon: hdf5.extend3(self.datastore.hdf5path, 'gmf_data/data', data) for sid, start, stop in res['indices']: self.indices[sid].append( (start + self.offset, stop + self.offset)) self.offset += len(data) slicedic = self.oqparam.imtls.slicedic with agg_mon: for key, poes in res['hcurves'].items(): rlzi, sid, imt = str2rsi(key) array = acc[rlzi].setdefault(sid, 0).array[slicedic[imt], 0] array[:] = 1. - (1. - array) * (1. - poes) sav_mon.flush() agg_mon.flush() self.datastore.flush() if 'ruptures' in res: vars(EventBasedRuptureCalculator)['save_ruptures'](self, res['ruptures']) return acc def gen_args(self, ruptures_by_grp): """ :param ruptures_by_grp: a dictionary of EBRupture objects :yields: the arguments for compute_gmfs_and_curves """ oq = self.oqparam monitor = self.monitor(self.core_task.__name__) imts = list(oq.imtls) min_iml = calc.fix_minimum_intensity(oq.minimum_intensity, imts) correl_model = oq.get_correl_model() for grp_id in ruptures_by_grp: ruptures = ruptures_by_grp[grp_id] if not ruptures: continue rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(grp_id) for block in block_splitter(ruptures, oq.ruptures_per_block): samples = self.rlzs_assoc.samples[grp_id] getter = GmfGetter(grp_id, rlzs_by_gsim, block, self.sitecol, imts, min_iml, oq.truncation_level, correl_model, samples) yield getter, oq, monitor def execute(self): """ Run in parallel `core_task(sources, sitecol, monitor)`, by parallelizing on the ruptures according to their weight and tectonic region type. """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return if self.oqparam.ground_motion_fields: calc.check_overflow(self) with self.monitor('reading ruptures', autoflush=True): ruptures_by_grp = (self.precalc.result if self.precalc else get_ruptures_by_grp(self.datastore.parent)) self.csm_info = self.datastore['csm_info'] self.sm_id = { tuple(sm.path): sm.ordinal for sm in self.csm_info.source_models } L = len(oq.imtls.array) rlzs = self.rlzs_assoc.realizations allargs = list(self.gen_args(ruptures_by_grp)) res = parallel.Starmap(self.core_task.__func__, allargs).submit_all() self.gmdata = {} self.offset = 0 self.indices = collections.defaultdict(list) # sid -> indices acc = res.reduce(self.combine_pmaps_and_save_gmfs, {rlz.ordinal: ProbabilityMap(L) for rlz in rlzs}) save_gmdata(self, len(rlzs)) if self.indices: logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True, autoflush=True): self.datastore.save_vlen('gmf_data/indices', [ numpy.array(self.indices[sid], indices_dt) for sid in self.sitecol.complete.sids ]) return acc def save_gmf_bytes(self): """Save the attribute nbytes in the gmf_data datasets""" ds = self.datastore for sm_id in ds['gmf_data']: ds.set_nbytes('gmf_data/' + sm_id) ds.set_nbytes('gmf_data') def post_execute(self, result): """ :param result: a dictionary (src_group_id, gsim) -> haz_curves or an empty dictionary if hazard_curves_from_gmfs is false """ oq = self.oqparam if not oq.hazard_curves_from_gmfs and not oq.ground_motion_fields: return elif oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations # save individual curves for i in sorted(result): key = 'hcurves/rlz-%03d' % i if result[i]: self.datastore[key] = result[i] else: self.datastore[key] = ProbabilityMap(oq.imtls.array.size) logging.info('Zero curves for %s', key) # compute and save statistics; this is done in process # we don't need to parallelize, since event based calculations # involves a "small" number of sites (<= 65,536) weights = [rlz.weight for rlz in rlzs] hstats = self.oqparam.hazard_stats() if len(hstats) and len(rlzs) > 1: for kind, stat in hstats: pmap = compute_pmap_stats(result.values(), [stat], weights) self.datastore['hcurves/' + kind] = pmap if 'gmf_data' in self.datastore: self.save_gmf_bytes() if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq, self.monitor('classical')) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) for imt in eb_mean_curves.dtype.names: rdiff, index = util.max_rel_diff_index(cl_mean_curves[imt], eb_mean_curves[imt]) logging.warn( 'Relative difference with the classical ' 'mean curves for IMT=%s: %d%% at site index %d', imt, rdiff * 100, index)
def post_execute(self, result): """ Save the SES collection """ oq = self.oqparam if 'ucerf' in oq.calculation_mode: self.rupser.close() self.csm.info.update_eff_ruptures(self.csm.get_num_ruptures()) self.setting_events() N = len(self.sitecol.complete) L = len(oq.imtls.array) if result and oq.hazard_curves_from_gmfs: rlzs = self.csm_info.get_rlzs_assoc().realizations # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] # NB: in the future we may want to save to individual hazard # curves if oq.individual_curves is set; for the moment we # save the statistical curves only hstats = oq.hazard_stats() if len(hstats): logging.info('Computing statistical hazard curves') for statname, stat in hstats: pmap = compute_pmap_stats(result.values(), [stat], weights) arr = numpy.zeros((N, L), F32) for sid in pmap: arr[sid] = pmap[sid].array[:, 0] self.datastore['hcurves/' + statname] = arr if oq.poes: P = len(oq.poes) I = len(oq.imtls) self.datastore.create_dset( 'hmaps/' + statname, F32, (N, P * I)) self.datastore.set_attrs( 'hmaps/' + statname, nbytes=N * P * I * 4) hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) ds = self.datastore['hmaps/' + statname] for sid in hmap: ds[sid] = hmap[sid].array[:, 0] if self.datastore.parent: self.datastore.parent.open('r') if 'gmf_data' in self.datastore: self.save_gmf_bytes() if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) rdiff, index = util.max_rel_diff_index( cl_mean_curves, eb_mean_curves) logging.warn('Relative difference with the classical ' 'mean curves: %d%% at site index %d', rdiff * 100, index)
def post_execute(self, result): oq = self.oqparam if not oq.ground_motion_fields: return N = len(self.sitecol.complete) L = len(oq.imtls.array) if result and oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] # NB: in the future we may want to save to individual hazard # curves if oq.individual_curves is set; for the moment we # save the statistical curves only hstats = oq.hazard_stats() S = len(hstats) pmaps = list(result.values()) R = len(weights) if len(pmaps) != R: # this should never happen, unless I break the # logic tree reduction mechanism during refactoring raise AssertionError('Expected %d pmaps, got %d' % (len(weights), len(pmaps))) if oq.individual_curves: logging.info('Saving individual hazard curves') self.datastore.create_dset('hcurves-rlzs', F32, (N, R, L)) self.datastore.set_attrs('hcurves-rlzs', nbytes=N * R * L * 4) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset( 'hmaps-rlzs', F32, (N, R, M, P)) self.datastore.set_attrs( 'hmaps-rlzs', nbytes=N * R * P * M * 4) for r, pmap in enumerate(pmaps): arr = numpy.zeros((N, L), F32) for sid in pmap: arr[sid] = pmap[sid].array[:, 0] self.datastore['hcurves-rlzs'][:, r] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, r] = hmap[sid].array if S: logging.info('Computing statistical hazard curves') self.datastore.create_dset('hcurves-stats', F32, (N, S, L)) self.datastore.set_attrs('hcurves-stats', nbytes=N * S * L * 4) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset( 'hmaps-stats', F32, (N, S, M, P)) self.datastore.set_attrs( 'hmaps-stats', nbytes=N * S * P * M * 4) for s, stat in enumerate(hstats): pmap = compute_pmap_stats( pmaps, [hstats[stat]], weights, oq.imtls) arr = numpy.zeros((N, L), F32) for sid in pmap: arr[sid] = pmap[sid].array[:, 0] self.datastore['hcurves-stats'][:, s] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, s] = hmap[sid].array if self.datastore.parent: self.datastore.parent.open('r') if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir job_id = logs.init('job') self.cl = ClassicalCalculator(oq, job_id) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) engine.expose_outputs(self.cl.datastore) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) self.rdiff, index = util.max_rel_diff_index( cl_mean_curves, eb_mean_curves) logging.warning('Relative difference with the classical ' 'mean curves: %d%% at site index %d', self.rdiff * 100, index)
class EventBasedCalculator(base.HazardCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ core_task = compute_gmfs is_stochastic = True accept_precalc = ['event_based', 'ebrisk', 'event_based_risk'] def init(self): if hasattr(self, 'csm'): self.check_floating_spinning() if not self.datastore.parent: self.rupser = calc.RuptureSerializer(self.datastore) self.srcfilter = self.src_filter(self.datastore.tempname) def init_logic_tree(self, full_lt): self.trt_by_grp = full_lt.trt_by_grp self.rlzs_by_gsim_grp = full_lt.get_rlzs_by_gsim_grp() self.samples_by_grp = full_lt.get_samples_by_grp() self.num_rlzs_by_grp = { grp_id: sum(len(rlzs) for rlzs in self.rlzs_by_gsim_grp[grp_id].values()) for grp_id in self.rlzs_by_gsim_grp } def acc0(self): """ Initial accumulator, a dictionary (grp_id, gsim) -> curves """ self.L = len(self.oqparam.imtls.array) zd = {r: ProbabilityMap(self.L) for r in range(self.R)} return zd def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ gsims_by_trt = self.csm.full_lt.get_gsims_by_trt() logging.info('Building ruptures') maxweight = sum(sg.weight for sg in self.csm.src_groups) / ( self.oqparam.concurrent_tasks or 1) eff_ruptures = AccumDict(accum=0) # trt => potential ruptures calc_times = AccumDict(accum=numpy.zeros(3, F32)) # nr, ns, dt allargs = [] if self.oqparam.is_ucerf(): # manage the filtering in a special way for sg in self.csm.src_groups: for src in sg: src.src_filter = self.srcfilter srcfilter = nofilter # otherwise it would be ultra-slow else: srcfilter = self.srcfilter for sg in self.csm.src_groups: if not sg.sources: continue logging.info('Sending %s', sg) par = self.param.copy() par['gsims'] = gsims_by_trt[sg.trt] for src_group in sg.split(maxweight): allargs.append((src_group, srcfilter, par)) smap = parallel.Starmap(sample_ruptures, allargs, h5=self.datastore.hdf5) mon = self.monitor('saving ruptures') for dic in smap: if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] if dic['rup_array']: with mon: self.rupser.save(dic['rup_array']) self.rupser.close() if not self.rupser.nruptures: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # logic tree reduction, must be called before storing the events self.store_rlz_info(eff_ruptures) self.init_logic_tree(self.csm.full_lt) with self.monitor('store source_info'): self.store_source_info(calc_times) logging.info('Reordering the ruptures and storing the events') sorted_ruptures = self.datastore.getitem('ruptures')[()] # order the ruptures by rup_id sorted_ruptures.sort(order='serial') nr = len(sorted_ruptures) assert len(numpy.unique(sorted_ruptures['serial'])) == nr # sanity self.datastore['ruptures'] = sorted_ruptures self.datastore['ruptures']['id'] = numpy.arange(nr) with self.monitor('saving events'): self.save_events(sorted_ruptures) def agg_dicts(self, acc, result): """ :param acc: accumulator dictionary :param result: an AccumDict with events, ruptures, gmfs and hcurves """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') with sav_mon: data = result.pop('gmfdata') if len(data): times = result.pop('times') rupids = list(times['rup_id']) self.datastore['gmf_data/time_by_rup'][rupids] = times hdf5.extend(self.datastore['gmf_data/data'], data) sig_eps = result.pop('sig_eps') hdf5.extend(self.datastore['gmf_data/sigma_epsilon'], sig_eps) for sid, start, stop in result['indices']: self.indices[sid, 0].append(start + self.offset) self.indices[sid, 1].append(stop + self.offset) self.offset += len(data) if self.offset >= TWO32: raise RuntimeError('The gmf_data table has more than %d rows' % TWO32) imtls = self.oqparam.imtls with agg_mon: for key, poes in result.get('hcurves', {}).items(): r, sid, imt = str2rsi(key) array = acc[r].setdefault(sid, 0).array[imtls(imt), 0] array[:] = 1. - (1. - array) * (1. - poes) self.datastore.flush() return acc def save_events(self, rup_array): """ :param rup_array: an array of ruptures with fields grp_id :returns: a list of RuptureGetters """ # this is very fast compared to saving the ruptures eids = rupture.get_eids(rup_array, self.samples_by_grp, self.num_rlzs_by_grp) self.check_overflow() # check the number of events events = numpy.zeros(len(eids), rupture.events_dt) # when computing the events all ruptures must be considered, # including the ones far away that will be discarded later on rgetters = gen_rgetters(self.datastore) # build the associations eid -> rlz sequentially or in parallel # this is very fast: I saw 30 million events associated in 1 minute! logging.info('Building assocs event_id -> rlz_id for {:_d} events' ' and {:_d} ruptures'.format(len(events), len(rup_array))) if len(events) < 1E5: it = map(RuptureGetter.get_eid_rlz, rgetters) else: it = parallel.Starmap(RuptureGetter.get_eid_rlz, ((rgetter, ) for rgetter in rgetters), progress=logging.debug, h5=self.datastore.hdf5) i = 0 for eid_rlz in it: for er in eid_rlz: events[i] = er i += 1 if i >= TWO32: raise ValueError('There are more than %d events!' % i) events.sort(order='rup_id') # fast too # sanity check n_unique_events = len(numpy.unique(events[['id', 'rup_id']])) assert n_unique_events == len(events), (n_unique_events, len(events)) events['id'] = numpy.arange(len(events)) # set event year and event ses starting from 1 itime = int(self.oqparam.investigation_time) nses = self.oqparam.ses_per_logic_tree_path extra = numpy.zeros(len(events), [('year', U16), ('ses_id', U16)]) numpy.random.seed(self.oqparam.ses_seed) extra['year'] = numpy.random.choice(itime, len(events)) + 1 extra['ses_id'] = numpy.random.choice(nses, len(events)) + 1 self.datastore['events'] = util.compose_arrays(events, extra) eindices = get_indices(events['rup_id']) arr = numpy.array(list(eindices.values()))[:, 0, :] self.datastore['ruptures']['e0'] = arr[:, 0] self.datastore['ruptures']['e1'] = arr[:, 1] def check_overflow(self): """ Raise a ValueError if the number of sites is larger than 65,536 or the number of IMTs is larger than 256 or the number of ruptures is larger than 4,294,967,296. The limits are due to the numpy dtype used to store the GMFs (gmv_dt). There also a limit of max_potential_gmfs on the number of sites times the number of events, to avoid producing too many GMFs. In that case split the calculation or be smarter. """ oq = self.oqparam max_ = dict(sites=TWO32, events=TWO32, imts=2**8) num_ = dict(events=self.E, imts=len(self.oqparam.imtls)) n = len(getattr(self, 'sitecol', ()) or ()) num_['sites'] = n if oq.calculation_mode == 'event_based' and oq.ground_motion_fields: if n > oq.max_sites_per_gmf: raise ValueError( 'You cannot compute the GMFs for %d > %d sites' % (n, oq.max_sites_per_gmf)) elif n * self.E > oq.max_potential_gmfs: raise ValueError( 'A GMF calculation with %d sites and %d events is ' 'impossibly large' % (n, self.E)) for var in num_: if num_[var] > max_[var]: raise ValueError( 'The %s calculator is restricted to %d %s, got %d' % (oq.calculation_mode, max_[var], var, num_[var])) def set_param(self, **kw): oq = self.oqparam # set the minimum_intensity if hasattr(self, 'crmodel') and not oq.minimum_intensity: # infer it from the risk models if not directly set in job.ini oq.minimum_intensity = self.crmodel.min_iml min_iml = oq.min_iml if oq.ground_motion_fields and min_iml.sum() == 0: logging.warning('The GMFs are not filtered: ' 'you may want to set a minimum_intensity') else: logging.info('minimum_intensity=%s', oq.minimum_intensity) self.param.update(oqparam=oq, gmf=oq.ground_motion_fields, truncation_level=oq.truncation_level, imtls=oq.imtls, filter_distance=oq.filter_distance, ses_per_logic_tree_path=oq.ses_per_logic_tree_path, **kw) def execute(self): oq = self.oqparam self.set_param() self.offset = 0 self.indices = AccumDict(accum=[]) # sid, idx -> indices if oq.hazard_calculation_id: # from ruptures self.datastore.parent = util.read(oq.hazard_calculation_id) self.init_logic_tree(self.datastore.parent['full_lt']) else: # from sources self.build_events_from_sources() if (oq.ground_motion_fields is False and oq.hazard_curves_from_gmfs is False): return {} if not oq.imtls: raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) N = len(self.sitecol.complete) if oq.ground_motion_fields: nrups = len(self.datastore['ruptures']) self.datastore.create_dset('gmf_data/data', oq.gmf_data_dt()) self.datastore.create_dset('gmf_data/sigma_epsilon', sig_eps_dt(oq.imtls)) self.datastore.create_dset('gmf_data/indices', hdf5.vuint32, shape=(N, 2), fillvalue=None) self.datastore.create_dset('gmf_data/events_by_sid', U32, (N, )) self.datastore.create_dset('gmf_data/time_by_rup', time_dt, (nrups, ), fillvalue=None) if oq.hazard_curves_from_gmfs: self.param['rlz_by_event'] = self.datastore['events']['rlz_id'] # compute_gmfs in parallel self.datastore.swmr_on() logging.info('Reading %d ruptures', len(self.datastore['ruptures'])) iterargs = ((rgetter, self.srcfilter, self.param) for rgetter in gen_rupture_getters( self.datastore, self.srcfilter, oq.concurrent_tasks)) acc = parallel.Starmap(self.core_task.__func__, iterargs, h5=self.datastore.hdf5, num_cores=oq.num_cores).reduce( self.agg_dicts, self.acc0()) if self.indices: dset = self.datastore['gmf_data/indices'] num_evs = self.datastore['gmf_data/events_by_sid'] logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True): self.datastore['gmf_data/imts'] = ' '.join(oq.imtls) for sid in self.sitecol.complete.sids: start = numpy.array(self.indices[sid, 0]) stop = numpy.array(self.indices[sid, 1]) dset[sid, 0] = start dset[sid, 1] = stop num_evs[sid] = (stop - start).sum() avg_events_by_sid = num_evs[()].sum() / N logging.info('Found ~%d GMVs per site', avg_events_by_sid) elif oq.ground_motion_fields: raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') return acc def post_execute(self, result): oq = self.oqparam if not oq.ground_motion_fields and not oq.hazard_curves_from_gmfs: return N = len(self.sitecol.complete) M = len(oq.imtls) L = len(oq.imtls.array) L1 = L // M if result and oq.hazard_curves_from_gmfs: rlzs = self.datastore['full_lt'].get_realizations() # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] # NB: in the future we may want to save to individual hazard # curves if oq.individual_curves is set; for the moment we # save the statistical curves only hstats = oq.hazard_stats() S = len(hstats) pmaps = list(result.values()) R = len(weights) if len(pmaps) != R: # this should never happen, unless I break the # logic tree reduction mechanism during refactoring raise AssertionError('Expected %d pmaps, got %d' % (len(weights), len(pmaps))) if oq.individual_curves: logging.info('Saving individual hazard curves') self.datastore.create_dset('hcurves-rlzs', F32, (N, R, M, L1)) self.datastore.set_shape_attrs('hcurves-rlzs', site_id=N, rlz_id=R, imt=list(oq.imtls), lvl=numpy.arange(L1)) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset('hmaps-rlzs', F32, (N, R, M, P)) self.datastore.set_shape_attrs('hmaps-rlzs', site_id=N, rlz_id=R, imt=list(oq.imtls), poe=oq.poes) for r, pmap in enumerate(pmaps): arr = numpy.zeros((N, M, L1), F32) for sid in pmap: arr[sid] = pmap[sid].array.reshape(M, L1) self.datastore['hcurves-rlzs'][:, r] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, r] = hmap[sid].array if S: logging.info('Computing statistical hazard curves') self.datastore.create_dset('hcurves-stats', F32, (N, S, M, L1)) self.datastore.set_shape_attrs('hcurves-stats', site_id=N, stat=list(hstats), imt=list(oq.imtls), lvl=numpy.arange(L1)) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset('hmaps-stats', F32, (N, S, M, P)) self.datastore.set_shape_attrs('hmaps-stats', site_id=N, stat=list(hstats), imt=list(oq.imtls), poes=oq.poes) for s, stat in enumerate(hstats): pmap = compute_pmap_stats(pmaps, [hstats[stat]], weights, oq.imtls) arr = numpy.zeros((N, M, L1), F32) for sid in pmap: arr[sid] = pmap[sid].array.reshape(M, L1) self.datastore['hcurves-stats'][:, s] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, s] = hmap[sid].array if self.datastore.parent: self.datastore.parent.open('r') if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir job_id = logs.init('job') oq.calculation_mode = 'classical' self.cl = ClassicalCalculator(oq, job_id) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run() engine.expose_outputs(self.datastore) for imt in oq.imtls: cl_mean_curves = get_mean_curves(self.datastore, imt) eb_mean_curves = get_mean_curves(self.datastore, imt) self.rdiff, index = util.max_rel_diff_index( cl_mean_curves, eb_mean_curves) logging.warning( 'Relative difference with the classical ' 'mean curves: %d%% at site index %d, imt=%s', self.rdiff * 100, index, imt)
class EventBasedCalculator(base.HazardCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ core_task = compute_gmfs is_stochastic = True accept_precalc = ['event_based', 'event_based_risk', 'ucerf_hazard'] build_ruptures = sample_ruptures @cached_property def csm_info(self): """ :returns: a cached CompositionInfo object """ try: return self.csm.info except AttributeError: return self.datastore.parent['csm_info'] def init(self): if hasattr(self, 'csm'): self.check_floating_spinning() self.rupser = calc.RuptureSerializer(self.datastore) def init_logic_tree(self, csm_info): self.trt_by_grp = csm_info.grp_by("trt") self.rlzs_assoc = csm_info.get_rlzs_assoc() self.rlzs_by_gsim_grp = csm_info.get_rlzs_by_gsim_grp() self.samples_by_grp = csm_info.get_samples_by_grp() self.num_rlzs_by_grp = { grp_id: sum(len(rlzs) for rlzs in self.rlzs_by_gsim_grp[grp_id].values()) for grp_id in self.rlzs_by_gsim_grp} def acc0(self): """ Initial accumulator, a dictionary (grp_id, gsim) -> curves """ self.L = len(self.oqparam.imtls.array) zd = {r: ProbabilityMap(self.L) for r in range(self.R)} return zd def build_events_from_sources(self): """ Prefilter the composite source model and store the source_info """ oq = self.oqparam gsims_by_trt = self.csm.gsim_lt.values def weight_src(src): return src.num_ruptures logging.info('Building ruptures') smap = parallel.Starmap( self.build_ruptures.__func__, monitor=self.monitor()) eff_ruptures = AccumDict(accum=0) # grp_id => potential ruptures calc_times = AccumDict(accum=numpy.zeros(2, F32)) ses_idx = 0 for sm_id, sm in enumerate(self.csm.source_models): logging.info('Sending %s', sm) for sg in sm.src_groups: if not sg.sources: continue par = self.param.copy() par['gsims'] = gsims_by_trt[sg.trt] if sg.atomic: # do not split the group smap.submit(sg, self.src_filter, par) else: # traditional groups for block in self.block_splitter( sg.sources, weight_src, by_grp): if 'ucerf' in oq.calculation_mode: for i in range(oq.ses_per_logic_tree_path): par['ses_seeds'] = [ (ses_idx, oq.ses_seed + i + 1)] smap.submit(block, self.src_filter, par) ses_idx += 1 else: smap.submit(block, self.src_filter, par) mon = self.monitor('saving ruptures') for dic in smap: if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] if dic['rup_array']: with mon: self.rupser.save(dic['rup_array']) self.rupser.close() if not self.rupser.nruptures: raise RuntimeError('No ruptures were generated, perhaps the ' 'investigation time is too short') # logic tree reduction, must be called before storing the events self.store_rlz_info(eff_ruptures) store_rlzs_by_grp(self.datastore) self.init_logic_tree(self.csm.info) with self.monitor('store source_info', autoflush=True): self.store_source_info(calc_times) logging.info('Reordering the ruptures and storing the events') attrs = self.datastore.getitem('ruptures').attrs sorted_ruptures = self.datastore.getitem('ruptures')[()] # order the ruptures by serial sorted_ruptures.sort(order='serial') ngroups = len(self.csm.info.trt_by_grp) grp_indices = numpy.zeros((ngroups, 2), U32) grp_ids = sorted_ruptures['grp_id'] for grp_id, [startstop] in get_indices(grp_ids).items(): grp_indices[grp_id] = startstop self.datastore['ruptures'] = sorted_ruptures self.datastore.set_attrs('ruptures', grp_indices=grp_indices, **attrs) self.save_events(sorted_ruptures) def gen_rupture_getters(self): """ :returns: a list of RuptureGetters """ dstore = (self.datastore.parent if self.datastore.parent else self.datastore) hdf5cache = dstore.hdf5cache() mode = 'r+' if os.path.exists(hdf5cache) else 'w' with hdf5.File(hdf5cache, mode) as cache: if 'ruptures' not in cache: dstore.hdf5.copy('ruptures', cache) if 'rupgeoms' not in cache: dstore.hdf5.copy('rupgeoms', cache) yield from gen_rupture_getters( dstore, concurrent_tasks=self.oqparam.concurrent_tasks or 1, hdf5cache=hdf5cache) if self.datastore.parent: self.datastore.parent.close() @cached_property def eid2idx(self): """ :returns: a dict eid -> index in the events table """ return dict(zip(self.datastore['events']['id'], range(self.E))) def agg_dicts(self, acc, result): """ :param acc: accumulator dictionary :param result: an AccumDict with events, ruptures, gmfs and hcurves """ sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') with sav_mon: data = result.pop('gmfdata') if len(data) == 0: return acc idxs = base.get_idxs(data, self.eid2idx) # this has to be fast data['eid'] = idxs # replace eid with idx self.datastore.extend('gmf_data/data', data) sig_eps = result.pop('sig_eps') sig_eps['eid'] = base.get_idxs(sig_eps, self.eid2idx) self.datastore.extend('gmf_data/sigma_epsilon', sig_eps) # it is important to save the number of bytes while the # computation is going, to see the progress update_nbytes(self.datastore, 'gmf_data/data', data) for sid, start, stop in result['indices']: self.indices[sid, 0].append(start + self.offset) self.indices[sid, 1].append(stop + self.offset) self.offset += len(data) if self.offset >= TWO32: raise RuntimeError( 'The gmf_data table has more than %d rows' % TWO32) imtls = self.oqparam.imtls with agg_mon: for key, poes in result.get('hcurves', {}).items(): r, sid, imt = str2rsi(key) array = acc[r].setdefault(sid, 0).array[imtls(imt), 0] array[:] = 1. - (1. - array) * (1. - poes) sav_mon.flush() agg_mon.flush() self.datastore.flush() return acc def save_events(self, rup_array): """ :param rup_array: an array of ruptures with fields grp_id :returns: a list of RuptureGetters """ # this is very fast compared to saving the ruptures eids = rupture.get_eids( rup_array, self.samples_by_grp, self.num_rlzs_by_grp) self.check_overflow() # check the number of events events = numpy.zeros(len(eids), rupture.events_dt) # when computing the events all ruptures must be considered, # including the ones far away that will be discarded later on rgetters = self.gen_rupture_getters() # build the associations eid -> rlz in parallel smap = parallel.Starmap(RuptureGetter.get_eid_rlz, ((rgetter,) for rgetter in rgetters), self.monitor('get_eid_rlz'), progress=logging.debug) i = 0 for eid_rlz in smap: # 30 million of events associated in 1 minute! for er in eid_rlz: events[i] = er i += 1 if i >= TWO32: raise ValueError('There are more than %d events!' % i) events.sort(order='id') # fast too n_unique_events = len(numpy.unique(events['id'])) assert n_unique_events == len(events), (n_unique_events, len(events)) self.datastore['events'] = events def check_overflow(self): """ Raise a ValueError if the number of sites is larger than 65,536 or the number of IMTs is larger than 256 or the number of ruptures is larger than 4,294,967,296. The limits are due to the numpy dtype used to store the GMFs (gmv_dt). They could be relaxed in the future. """ max_ = dict(sites=TWO32, events=TWO32, imts=2**8) num_ = dict(events=self.E, imts=len(self.oqparam.imtls)) if self.sitecol: num_['sites'] = len(self.sitecol) for var in num_: if num_[var] > max_[var]: raise ValueError( 'The event based calculator is restricted to ' '%d %s, got %d' % (max_[var], var, num_[var])) def set_param(self, **kw): oq = self.oqparam # set the minimum_intensity if hasattr(self, 'riskmodel') and not oq.minimum_intensity: # infer it from the risk models if not directly set in job.ini oq.minimum_intensity = self.riskmodel.min_iml min_iml = oq.min_iml if min_iml.sum() == 0: logging.warning('The GMFs are not filtered: ' 'you may want to set a minimum_intensity') else: logging.info('minimum_intensity=%s', oq.minimum_intensity) self.param.update( oqparam=oq, gmf=oq.ground_motion_fields, truncation_level=oq.truncation_level, ruptures_per_block=oq.ruptures_per_block, imtls=oq.imtls, filter_distance=oq.filter_distance, ses_per_logic_tree_path=oq.ses_per_logic_tree_path, **kw) def execute(self): oq = self.oqparam self.set_param() self.offset = 0 self.indices = collections.defaultdict(list) # sid, idx -> indices if oq.hazard_calculation_id and 'ruptures' in self.datastore: # from ruptures self.datastore.parent = datastore.read(oq.hazard_calculation_id) self.init_logic_tree(self.csm_info) else: # from sources self.build_events_from_sources() if oq.ground_motion_fields is False: return {} if not oq.imtls: raise InvalidFile('There are no intensity measure types in %s' % oq.inputs['job_ini']) iterargs = ((rgetter, self.src_filter, self.param) for rgetter in self.gen_rupture_getters()) # call compute_gmfs in parallel acc = parallel.Starmap( self.core_task.__func__, iterargs, self.monitor() ).reduce(self.agg_dicts, self.acc0()) if self.indices: N = len(self.sitecol.complete) logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True, autoflush=True): self.datastore['gmf_data/imts'] = ' '.join(oq.imtls) dset = self.datastore.create_dset( 'gmf_data/indices', hdf5.vuint32, shape=(N, 2), fillvalue=None) num_evs = self.datastore.create_dset( 'gmf_data/events_by_sid', U32, (N,)) for sid in self.sitecol.complete.sids: start = numpy.array(self.indices[sid, 0]) stop = numpy.array(self.indices[sid, 1]) dset[sid, 0] = start dset[sid, 1] = stop num_evs[sid] = (stop - start).sum() num_evs = num_evs[()] avg_events_by_sid = num_evs.sum() / N logging.info('Found ~%d GMVs per site', avg_events_by_sid) self.datastore.set_attrs( 'gmf_data', avg_events_by_sid=avg_events_by_sid, max_events_by_sid=num_evs.max()) elif oq.ground_motion_fields: raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') return acc def post_execute(self, result): oq = self.oqparam if not oq.ground_motion_fields: return N = len(self.sitecol.complete) L = len(oq.imtls.array) if result and oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] # NB: in the future we may want to save to individual hazard # curves if oq.individual_curves is set; for the moment we # save the statistical curves only hstats = oq.hazard_stats() S = len(hstats) pmaps = list(result.values()) R = len(weights) if len(pmaps) != R: # this should never happen, unless I break the # logic tree reduction mechanism during refactoring raise AssertionError('Expected %d pmaps, got %d' % (len(weights), len(pmaps))) if oq.individual_curves: logging.info('Saving individual hazard curves') self.datastore.create_dset('hcurves-rlzs', F32, (N, R, L)) self.datastore.set_attrs('hcurves-rlzs', nbytes=N * R * L * 4) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset( 'hmaps-rlzs', F32, (N, R, M, P)) self.datastore.set_attrs( 'hmaps-rlzs', nbytes=N * R * P * M * 4) for r, pmap in enumerate(pmaps): arr = numpy.zeros((N, L), F32) for sid in pmap: arr[sid] = pmap[sid].array[:, 0] self.datastore['hcurves-rlzs'][:, r] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, r] = hmap[sid].array if S: logging.info('Computing statistical hazard curves') self.datastore.create_dset('hcurves-stats', F32, (N, S, L)) self.datastore.set_attrs('hcurves-stats', nbytes=N * S * L * 4) if oq.poes: P = len(oq.poes) M = len(oq.imtls) ds = self.datastore.create_dset( 'hmaps-stats', F32, (N, S, M, P)) self.datastore.set_attrs( 'hmaps-stats', nbytes=N * S * P * M * 4) for s, stat in enumerate(hstats): pmap = compute_pmap_stats( pmaps, [hstats[stat]], weights, oq.imtls) arr = numpy.zeros((N, L), F32) for sid in pmap: arr[sid] = pmap[sid].array[:, 0] self.datastore['hcurves-stats'][:, s] = arr if oq.poes: hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) for sid in hmap: ds[sid, s] = hmap[sid].array if self.datastore.parent: self.datastore.parent.open('r') if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir job_id = logs.init('job') self.cl = ClassicalCalculator(oq, job_id) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) engine.expose_outputs(self.cl.datastore) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) self.rdiff, index = util.max_rel_diff_index( cl_mean_curves, eb_mean_curves) logging.warning('Relative difference with the classical ' 'mean curves: %d%% at site index %d', self.rdiff * 100, index)
def post_execute(self, result): """ Save the SES collection """ oq = self.oqparam N = len(self.sitecol.complete) L = len(oq.imtls.array) if oq.hazard_calculation_id is None: self.rupser.close() num_events = sum(set_counts(self.datastore, 'events').values()) if num_events == 0: raise RuntimeError( 'No seismic events! Perhaps the investigation time is too ' 'small or the maximum_distance is too small') if oq.save_ruptures: logging.info('Setting %d event years on %d ruptures', num_events, self.rupser.nruptures) with self.monitor('setting event years', measuremem=True, autoflush=True): numpy.random.seed(self.oqparam.ses_seed) set_random_years(self.datastore, 'events', int(self.oqparam.investigation_time)) if self.gmf_size: self.datastore.set_attrs('events', max_gmf_size=self.gmf_size) msg = 'less than ' if self.get_min_iml(self.oqparam).sum() else '' logging.info('Generating %s%s of GMFs', msg, humansize(self.gmf_size)) if oq.hazard_curves_from_gmfs: rlzs = self.csm_info.rlzs_assoc.realizations # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] hstats = self.oqparam.hazard_stats() if len(hstats): logging.info('Computing statistical hazard curves') for kind, stat in hstats: pmap = compute_pmap_stats(result.values(), [stat], weights) arr = numpy.zeros((N, L), F32) for sid in pmap: arr[sid] = pmap[sid].array[:, 0] self.datastore['hcurves/' + kind] = arr self.save_hmaps() if self.datastore.parent: self.datastore.parent.open('r') if 'gmf_data' in self.datastore: self.save_gmf_bytes() if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) rdiff, index = util.max_rel_diff_index(cl_mean_curves, eb_mean_curves) logging.warn( 'Relative difference with the classical ' 'mean curves: %d%% at site index %d', rdiff * 100, index)
class EventBasedCalculator(base.HazardCalculator): """ Event based PSHA calculator generating the ground motion fields and the hazard curves from the ruptures, depending on the configuration parameters. """ core_task = compute_gmfs is_stochastic = True build_ruptures = sample_ruptures @cached_property def csm_info(self): """ :returns: a cached CompositionInfo object """ try: return self.csm.info except AttributeError: return self.datastore.parent['csm_info'] def init(self): if hasattr(self, 'csm'): self.check_floating_spinning() self.rupser = calc.RuptureSerializer(self.datastore) def init_logic_tree(self, csm_info): self.grp_trt = csm_info.grp_by("trt") self.rlzs_assoc = csm_info.get_rlzs_assoc() self.rlzs_by_gsim_grp = csm_info.get_rlzs_by_gsim_grp() self.samples_by_grp = csm_info.get_samples_by_grp() self.num_rlzs_by_grp = { grp_id: sum(len(rlzs) for rlzs in self.rlzs_by_gsim_grp[grp_id].values()) for grp_id in self.rlzs_by_gsim_grp } self.R = len(self.rlzs_assoc.realizations) def zerodict(self): """ Initial accumulator, a dictionary (grp_id, gsim) -> curves """ self.L = len(self.oqparam.imtls.array) zd = {r: ProbabilityMap(self.L) for r in range(self.R)} self.E = len(self.datastore['events']) return zd def from_sources(self, par): """ Prefilter the composite source model and store the source_info """ oq = self.oqparam gsims_by_trt = self.csm.gsim_lt.values def weight_src(src): return src.num_ruptures logging.info('Building ruptures') smap = parallel.Starmap(self.build_ruptures.__func__, monitor=self.monitor()) eff_ruptures = AccumDict(accum=0) # grp_id => potential ruptures calc_times = AccumDict(accum=numpy.zeros(3, F32)) ses_idx = 0 for sm_id, sm in enumerate(self.csm.source_models): logging.info('Sending %s', sm) for sg in sm.src_groups: if not sg.sources: continue par['gsims'] = gsims_by_trt[sg.trt] for block in self.block_splitter(sg.sources, weight_src): if 'ucerf' in oq.calculation_mode: for i in range(oq.ses_per_logic_tree_path): par['ses_seeds'] = [(ses_idx, oq.ses_seed + i + 1)] smap.submit(block, self.src_filter, par) ses_idx += 1 else: smap.submit(block, par, self.src_filter) mon = self.monitor('saving ruptures') for dic in smap: if dic['calc_times']: calc_times += dic['calc_times'] if dic['eff_ruptures']: eff_ruptures += dic['eff_ruptures'] if dic['rup_array']: with mon: self.rupser.save(dic['rup_array']) self.rupser.close() # logic tree reduction, must be called before storing the events self.store_csm_info(eff_ruptures) store_rlzs_by_grp(self.datastore) self.init_logic_tree(self.csm.info) with self.monitor('store source_info', autoflush=True): self.store_source_info(calc_times) logging.info('Reordering the ruptures and storing the events') attrs = self.datastore.getitem('ruptures').attrs sorted_ruptures = self.datastore.getitem('ruptures').value # order the ruptures by serial sorted_ruptures.sort(order='serial') self.datastore['ruptures'] = sorted_ruptures self.datastore.set_attrs('ruptures', **attrs) rgetters = self.save_events(sorted_ruptures) return ((rgetter, self.sitecol, par) for rgetter in rgetters) def get_rupture_getters(self): """ :returns: a list of RuptureGetters """ dstore = (self.datastore.parent if self.datastore.parent else self.datastore) hdf5cache = dstore.hdf5cache() with hdf5.File(hdf5cache, 'r+') as cache: if 'rupgeoms' not in cache: dstore.hdf5.copy('rupgeoms', cache) rgetters = get_rupture_getters(dstore, split=self.oqparam.concurrent_tasks, hdf5cache=hdf5cache) num_events = self.E if hasattr(self, 'E') else len(dstore['events']) num_ruptures = len(dstore['ruptures']) logging.info('Found {:,d} ruptures and {:,d} events'.format( num_ruptures, num_events)) if self.datastore.parent: self.datastore.parent.close() return rgetters def agg_dicts(self, acc, result): """ :param acc: accumulator dictionary :param result: an AccumDict with events, ruptures, gmfs and hcurves """ try: eid2idx = self.eid2idx except AttributeError: # first call eid2idx = self.eid2idx = dict( zip(self.datastore['events']['eid'], range(self.E))) sav_mon = self.monitor('saving gmfs') agg_mon = self.monitor('aggregating hcurves') with sav_mon: data = result.pop('gmfdata') if len(data) == 0: return acc idxs = get_idxs(data, eid2idx) # this has to be fast data['eid'] = idxs # replace eid with idx self.datastore.extend('gmf_data/data', data) # it is important to save the number of bytes while the # computation is going, to see the progress update_nbytes(self.datastore, 'gmf_data/data', data) for sid, start, stop in result['indices']: self.indices[sid, 0].append(start + self.offset) self.indices[sid, 1].append(stop + self.offset) self.offset += len(data) if self.offset >= TWO32: raise RuntimeError('The gmf_data table has more than %d rows' % TWO32) imtls = self.oqparam.imtls with agg_mon: for key, poes in result.get('hcurves', {}).items(): r, sid, imt = str2rsi(key) array = acc[r].setdefault(sid, 0).array[imtls(imt), 0] array[:] = 1. - (1. - array) * (1. - poes) sav_mon.flush() agg_mon.flush() self.datastore.flush() return acc def save_events(self, rup_array): """ :param rup_array: an array of ruptures with fields grp_id :returns: a list of RuptureGetters """ # this is very fast compared to saving the ruptures eids = rupture.get_eids(rup_array, self.samples_by_grp, self.num_rlzs_by_grp) self.E = len(eids) self.check_overflow() # check the number of events events = numpy.zeros(len(eids), rupture.events_dt) events['eid'] = eids self.eid2idx = eid2idx = dict(zip(events['eid'], range(self.E))) rgetters = self.get_rupture_getters() # build the associations eid -> rlz in parallel smap = parallel.Starmap(RuptureGetter.get_eid_rlz, ((rgetter, ) for rgetter in rgetters), self.monitor('get_eid_rlz'), progress=logging.debug) for eid_rlz in smap: # fast: 30 million of events associated in 1 minute for eid, rlz in eid_rlz: events[eid2idx[eid]]['rlz'] = rlz self.datastore['events'] = events # fast too return rgetters def check_overflow(self): """ Raise a ValueError if the number of sites is larger than 65,536 or the number of IMTs is larger than 256 or the number of ruptures is larger than 4,294,967,296. The limits are due to the numpy dtype used to store the GMFs (gmv_dt). They could be relaxed in the future. """ max_ = dict(events=TWO32, imts=2**8) E = getattr(self, 'E', 0) # 0 for non event based num_ = dict(events=E, imts=len(self.oqparam.imtls)) if self.sitecol: max_['sites'] = min(self.oqparam.max_num_sites, TWO32) num_['sites'] = len(self.sitecol) for var in max_: if num_[var] > max_[var]: raise ValueError('The event based calculator is restricted to ' '%d %s, got %d' % (max_[var], var, num_[var])) def execute(self): oq = self.oqparam self.offset = 0 self.indices = collections.defaultdict(list) # sid, idx -> indices self.min_iml = self.get_min_iml(oq) param = self.param.copy() param.update(oqparam=oq, min_iml=self.min_iml, gmf=oq.ground_motion_fields, truncation_level=oq.truncation_level, ruptures_per_block=oq.ruptures_per_block, imtls=oq.imtls, filter_distance=oq.filter_distance, ses_per_logic_tree_path=oq.ses_per_logic_tree_path) if oq.hazard_calculation_id: # from ruptures assert oq.ground_motion_fields, 'must be True!' self.datastore.parent = datastore.read(oq.hazard_calculation_id) self.init_logic_tree(self.csm_info) iterargs = ((rgetter, self.sitecol, param) for rgetter in self.get_rupture_getters()) else: # from sources iterargs = self.from_sources(param) if oq.ground_motion_fields is False: return {} # call compute_gmfs in parallel acc = parallel.Starmap(self.core_task.__func__, iterargs, self.monitor()).reduce(self.agg_dicts, self.zerodict()) if self.indices: N = len(self.sitecol.complete) logging.info('Saving gmf_data/indices') with self.monitor('saving gmf_data/indices', measuremem=True, autoflush=True): self.datastore['gmf_data/imts'] = ' '.join(oq.imtls) dset = self.datastore.create_dset('gmf_data/indices', hdf5.vuint32, shape=(N, 2), fillvalue=None) num_evs = self.datastore.create_dset('gmf_data/events_by_sid', U32, (N, )) for sid in self.sitecol.complete.sids: start = numpy.array(self.indices[sid, 0]) stop = numpy.array(self.indices[sid, 1]) dset[sid, 0] = start dset[sid, 1] = stop num_evs[sid] = (stop - start).sum() avg_events_by_sid = num_evs.value.sum() / N logging.info('Found ~%d GMVs per site', avg_events_by_sid) self.datastore.set_attrs('gmf_data', avg_events_by_sid=avg_events_by_sid, max_events_by_sid=num_evs.value.max()) elif oq.ground_motion_fields: raise RuntimeError('No GMFs were generated, perhaps they were ' 'all below the minimum_intensity threshold') return acc def save_gmf_bytes(self): """Save the attribute nbytes in the gmf_data datasets""" ds = self.datastore for sm_id in ds['gmf_data']: ds.set_nbytes('gmf_data/' + sm_id) ds.set_nbytes('gmf_data') def post_execute(self, result): oq = self.oqparam if not oq.ground_motion_fields: return N = len(self.sitecol.complete) L = len(oq.imtls.array) if result and oq.hazard_curves_from_gmfs: rlzs = self.rlzs_assoc.realizations # compute and save statistics; this is done in process and can # be very slow if there are thousands of realizations weights = [rlz.weight for rlz in rlzs] # NB: in the future we may want to save to individual hazard # curves if oq.individual_curves is set; for the moment we # save the statistical curves only hstats = oq.hazard_stats() pmaps = list(result.values()) if len(hstats): logging.info('Computing statistical hazard curves') if len(weights) != len(pmaps): # this should never happen, unless I break the # logic tree reduction mechanism during refactoring raise AssertionError('Expected %d pmaps, got %d' % (len(weights), len(pmaps))) for statname, stat in hstats: pmap = compute_pmap_stats(pmaps, [stat], weights) arr = numpy.zeros((N, L), F32) for sid in pmap: arr[sid] = pmap[sid].array[:, 0] self.datastore['hcurves/' + statname] = arr if oq.poes: P = len(oq.poes) I = len(oq.imtls) self.datastore.create_dset('hmaps/' + statname, F32, (N, P * I)) self.datastore.set_attrs('hmaps/' + statname, nbytes=N * P * I * 4) hmap = calc.make_hmap(pmap, oq.imtls, oq.poes) ds = self.datastore['hmaps/' + statname] for sid in hmap: ds[sid] = hmap[sid].array[:, 0] if self.datastore.parent: self.datastore.parent.open('r') if 'gmf_data' in self.datastore: self.save_gmf_bytes() if oq.compare_with_classical: # compute classical curves export_dir = os.path.join(oq.export_dir, 'cl') if not os.path.exists(export_dir): os.makedirs(export_dir) oq.export_dir = export_dir # one could also set oq.number_of_logic_tree_samples = 0 self.cl = ClassicalCalculator(oq) # TODO: perhaps it is possible to avoid reprocessing the source # model, however usually this is quite fast and do not dominate # the computation self.cl.run(close=False) cl_mean_curves = get_mean_curves(self.cl.datastore) eb_mean_curves = get_mean_curves(self.datastore) rdiff, index = util.max_rel_diff_index(cl_mean_curves, eb_mean_curves) logging.warn( 'Relative difference with the classical ' 'mean curves: %d%% at site index %d', rdiff * 100, index)