def compute_hazard_curves( job_id, sitecol, sources, trt_model_id, gsims, task_no): """ This task computes R2 * I hazard curves (each one is a numpy array of S * L floats) from the given source_ruptures pairs. :param job_id: ID of the currently running job :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param sources: a block of source objects :param trt_model: a :class:`openquake.engine.db.TrtModel` instance :param gsims: a list of distint GSIM instances :param int task_no: the ordinal number of the current task """ hc = models.HazardCalculation.objects.get(oqjob=job_id) total_sites = len(sitecol) sitemesh = sitecol.mesh sorted_imts = sorted(hc.intensity_measure_types_and_levels) sorted_imls = [hc.intensity_measure_types_and_levels[imt] for imt in sorted_imts] sorted_imts = map(from_string, sorted_imts) curves = [[numpy.ones([total_sites, len(ls)]) for ls in sorted_imls] for gsim in gsims] if hc.poes_disagg: # doing disaggregation lt_model_id = models.TrtModel.objects.get(pk=trt_model_id).lt_model.id bbs = [BoundingBox(lt_model_id, site_id) for site_id in sitecol.sids] else: bbs = [] mon = LightMonitor( 'getting ruptures', job_id, compute_hazard_curves) make_ctxt_mon = LightMonitor( 'making contexts', job_id, compute_hazard_curves) calc_poes_mon = LightMonitor( 'computing poes', job_id, compute_hazard_curves) num_sites = 0 # NB: rows are namedtuples with fields (source, rupture, rupture_sites) for source, rows in itertools.groupby( hc.gen_ruptures(sources, mon, sitecol), key=operator.attrgetter('source')): t0 = time.time() num_ruptures = 0 for _source, rupture, r_sites in rows: num_sites = max(num_sites, len(r_sites)) num_ruptures += 1 if hc.poes_disagg: # doing disaggregation jb_dists = rupture.surface.get_joyner_boore_distance(sitemesh) closest_points = rupture.surface.get_closest_points(sitemesh) for bb, dist, point in itertools.izip( bbs, jb_dists, closest_points): if dist < hc.maximum_distance: # ruptures too far away are ignored bb.update([dist], [point.longitude], [point.latitude]) # compute probabilities for all realizations for gsim, curv in itertools.izip(gsims, curves): for i, pnes in enumerate(_calc_pnes( gsim, r_sites, rupture, sorted_imts, sorted_imls, hc.truncation_level, make_ctxt_mon, calc_poes_mon)): curv[i] *= pnes inserter.add( models.SourceInfo(trt_model_id=trt_model_id, source_id=source.source_id, source_class=source.__class__.__name__, num_sites=num_sites, num_ruptures=num_ruptures, occ_ruptures=num_ruptures, calc_time=time.time() - t0)) make_ctxt_mon.flush() calc_poes_mon.flush() inserter.flush() # the 0 here is a shortcut for filtered sources giving no contribution; # this is essential for performance, we want to avoid returning # big arrays of zeros (MS) curves_by_gsim = [ (gsim.__class__.__name__, [0 if general.all_equal(c, 1) else 1. - c for c in curv]) for gsim, curv in zip(gsims, curves)] return curves_by_gsim, trt_model_id, bbs
def compute_hazard_curves( job_id, sitecol, sources, lt_model, gsim_by_rlz, task_no): """ This task computes R2 * I hazard curves (each one is a numpy array of S * L floats) from the given source_ruptures pairs. :param job_id: ID of the currently running job :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param sources: a block of source objects :param lt_model: a :class:`openquake.engine.db.LtSourceModel` instance :param gsim_by_rlz: a dictionary of gsims, one for each realization :param int task_no: the ordinal number of the current task """ hc = models.HazardCalculation.objects.get(oqjob=job_id) total_sites = len(sitecol) sitemesh = sitecol.mesh imts = general.im_dict_to_hazardlib( hc.intensity_measure_types_and_levels) curves = dict((rlz, dict((imt, numpy.ones([total_sites, len(imts[imt])])) for imt in imts)) for rlz in gsim_by_rlz) if hc.poes_disagg: # doing disaggregation bbs = [BoundingBox(lt_model.id, site_id) for site_id in sitecol.sids] else: bbs = [] mon = LightMonitor( 'getting ruptures', job_id, compute_hazard_curves) make_ctxt_mon = LightMonitor( 'making contexts', job_id, compute_hazard_curves) calc_poes_mon = LightMonitor( 'computing poes', job_id, compute_hazard_curves) # NB: rows are a namedtuples with fields (source, rupture, rupture_sites) for source, rows in itertools.groupby( hc.gen_ruptures(sources, mon, sitecol), key=operator.attrgetter('source')): t0 = time.time() num_ruptures = 0 for _source, rupture, r_sites in rows: num_ruptures += 1 if hc.poes_disagg: # doing disaggregation jb_dists = rupture.surface.get_joyner_boore_distance(sitemesh) closest_points = rupture.surface.get_closest_points(sitemesh) for bb, dist, point in zip(bbs, jb_dists, closest_points): if dist < hc.maximum_distance: # ruptures too far away are ignored bb.update([dist], [point.longitude], [point.latitude]) # compute probabilities for all realizations for rlz, curv in curves.iteritems(): gsim = gsim_by_rlz[rlz] with make_ctxt_mon: sctx, rctx, dctx = gsim.make_contexts(r_sites, rupture) with calc_poes_mon: for imt in imts: poes = gsim.get_poes(sctx, rctx, dctx, imt, imts[imt], hc.truncation_level) pno = rupture.get_probability_no_exceedance(poes) curv[imt] *= r_sites.expand(pno, placeholder=1) logs.LOG.info('job=%d, src=%s:%s, num_ruptures=%d, calc_time=%fs', job_id, source.source_id, source.__class__.__name__, num_ruptures, time.time() - t0) make_ctxt_mon.flush() calc_poes_mon.flush() # the 0 here is a shortcut for filtered sources giving no contribution; # this is essential for performance, we want to avoid returning # big arrays of zeros (MS) curve_dict = dict((rlz, [0 if (curv[imt] == 1.0).all() else 1. - curv[imt] for imt in sorted(imts)]) for rlz, curv in curves.iteritems()) return curve_dict, bbs
def compute_ruptures( job_id, sitecol, src_seeds, trt_model_id, task_no): """ Celery task for the stochastic event set calculator. Samples logic trees and calls the stochastic event set calculator. Once stochastic event sets are calculated, results will be saved to the database. See :class:`openquake.engine.db.models.SESCollection`. Optionally (specified in the job configuration using the `ground_motion_fields` parameter), GMFs can be computed from each rupture in each stochastic event set. GMFs are also saved to the database. :param int job_id: ID of the currently running job. :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param src_seeds: List of pairs (source, seed) :param task_no: an ordinal so that GMV can be collected in a reproducible order """ # NB: all realizations in gsims correspond to the same source model trt_model = models.TrtModel.objects.get(pk=trt_model_id) ses_coll = models.SESCollection.objects.get(lt_model=trt_model.lt_model) hc = models.HazardCalculation.objects.get(oqjob=job_id) all_ses = range(1, hc.ses_per_logic_tree_path + 1) tot_ruptures = 0 filter_sites_mon = LightMonitor( 'filtering sites', job_id, compute_ruptures) generate_ruptures_mon = LightMonitor( 'generating ruptures', job_id, compute_ruptures) filter_ruptures_mon = LightMonitor( 'filtering ruptures', job_id, compute_ruptures) save_ruptures_mon = LightMonitor( 'saving ruptures', job_id, compute_ruptures) # Compute and save stochastic event sets rnd = random.Random() for src, seed in src_seeds: t0 = time.time() rnd.seed(seed) with filter_sites_mon: # filtering sources s_sites = src.filter_sites_by_distance_to_source( hc.maximum_distance, sitecol ) if hc.maximum_distance else sitecol if s_sites is None: continue # the dictionary `ses_num_occ` contains [(ses, num_occurrences)] # for each occurring rupture for each ses in the ses collection ses_num_occ = collections.defaultdict(list) with generate_ruptures_mon: # generating ruptures for the given source for rup_no, rup in enumerate(src.iter_ruptures(), 1): rup.rup_no = rup_no for ses_idx in all_ses: numpy.random.seed(rnd.randint(0, models.MAX_SINT_32)) num_occurrences = rup.sample_number_of_occurrences() if num_occurrences: ses_num_occ[rup].append((ses_idx, num_occurrences)) # NB: the number of occurrences is very low, << 1, so it is # more efficient to filter only the ruptures that occur, i.e. # to call sample_number_of_occurrences() *before* the filtering for rup in sorted(ses_num_occ, key=operator.attrgetter('rup_no')): with filter_ruptures_mon: # filtering ruptures r_sites = filters.filter_sites_by_distance_to_rupture( rup, hc.maximum_distance, s_sites ) if hc.maximum_distance else s_sites if r_sites is None: # ignore ruptures which are far away del ses_num_occ[rup] # save memory continue # saving ses_ruptures with save_ruptures_mon: # using a django transaction make the saving faster with transaction.commit_on_success(using='job_init'): indices = r_sites.indices if len(r_sites) < len(sitecol) \ else None # None means that nothing was filtered prob_rup = models.ProbabilisticRupture.create( rup, ses_coll, trt_model, indices) for ses_idx, num_occurrences in ses_num_occ[rup]: for occ_no in range(1, num_occurrences + 1): rup_seed = rnd.randint(0, models.MAX_SINT_32) models.SESRupture.create( prob_rup, ses_idx, src.source_id, rup.rup_no, occ_no, rup_seed) if ses_num_occ: num_ruptures = len(ses_num_occ) occ_ruptures = sum(num for rup in ses_num_occ for ses, num in ses_num_occ[rup]) tot_ruptures += occ_ruptures else: num_ruptures = rup_no occ_ruptures = 0 # save SourceInfo source_inserter.add( models.SourceInfo(trt_model_id=trt_model_id, source_id=src.source_id, source_class=src.__class__.__name__, num_sites=len(s_sites), num_ruptures=rup_no, occ_ruptures=occ_ruptures, uniq_ruptures=num_ruptures, calc_time=time.time() - t0)) filter_sites_mon.flush() generate_ruptures_mon.flush() filter_ruptures_mon.flush() save_ruptures_mon.flush() source_inserter.flush() return tot_ruptures, trt_model_id
def compute_ses_and_gmfs( job_id, sitecol, src_seeds, lt_model, gsim_by_rlz, task_no): """ Celery task for the stochastic event set calculator. Samples logic trees and calls the stochastic event set calculator. Once stochastic event sets are calculated, results will be saved to the database. See :class:`openquake.engine.db.models.SESCollection`. Optionally (specified in the job configuration using the `ground_motion_fields` parameter), GMFs can be computed from each rupture in each stochastic event set. GMFs are also saved to the database. :param int job_id: ID of the currently running job. :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param src_seeds: List of pairs (source, seed) :params gsim_by_rlz: dictionary of GSIM :param task_no: an ordinal so that GMV can be collected in a reproducible order """ # NB: all realizations in gsim_by_rlz correspond to the same source model ses_coll = models.SESCollection.objects.get(lt_model=lt_model) hc = models.HazardCalculation.objects.get(oqjob=job_id) all_ses = list(ses_coll) imts = map(from_string, hc.intensity_measure_types) params = dict( correl_model=general.get_correl_model(hc), truncation_level=hc.truncation_level, maximum_distance=hc.maximum_distance) gmfcollector = GmfCollector(params, imts, gsim_by_rlz) filter_sites_mon = LightMonitor( 'filtering sites', job_id, compute_ses_and_gmfs) generate_ruptures_mon = LightMonitor( 'generating ruptures', job_id, compute_ses_and_gmfs) filter_ruptures_mon = LightMonitor( 'filtering ruptures', job_id, compute_ses_and_gmfs) save_ruptures_mon = LightMonitor( 'saving ses', job_id, compute_ses_and_gmfs) compute_gmfs_mon = LightMonitor( 'computing gmfs', job_id, compute_ses_and_gmfs) # Compute and save stochastic event sets rnd = random.Random() num_distinct_ruptures = 0 total_ruptures = 0 for src, seed in src_seeds: t0 = time.time() rnd.seed(seed) with filter_sites_mon: # filtering sources s_sites = src.filter_sites_by_distance_to_source( hc.maximum_distance, sitecol ) if hc.maximum_distance else sitecol if s_sites is None: continue # the dictionary `ses_num_occ` contains [(ses, num_occurrences)] # for each occurring rupture for each ses in the ses collection ses_num_occ = collections.defaultdict(list) with generate_ruptures_mon: # generating ruptures for the given source for rup_no, rup in enumerate(src.iter_ruptures(), 1): rup.rup_no = rup_no for ses in all_ses: numpy.random.seed(rnd.randint(0, models.MAX_SINT_32)) num_occurrences = rup.sample_number_of_occurrences() if num_occurrences: ses_num_occ[rup].append((ses, num_occurrences)) total_ruptures += num_occurrences # NB: the number of occurrences is very low, << 1, so it is # more efficient to filter only the ruptures that occur, i.e. # to call sample_number_of_occurrences() *before* the filtering for rup in ses_num_occ.keys(): with filter_ruptures_mon: # filtering ruptures r_sites = rup.source_typology.\ filter_sites_by_distance_to_rupture( rup, hc.maximum_distance, s_sites ) if hc.maximum_distance else s_sites if r_sites is None: # ignore ruptures which are far away del ses_num_occ[rup] # save memory continue ses_ruptures = [] with save_ruptures_mon: # saving ses_ruptures # using a django transaction make the saving faster with transaction.commit_on_success(using='job_init'): prob_rup = models.ProbabilisticRupture.create( rup, ses_coll) for ses, num_occurrences in ses_num_occ[rup]: for occ_no in range(1, num_occurrences + 1): rup_seed = rnd.randint(0, models.MAX_SINT_32) ses_rup = models.SESRupture.create( prob_rup, ses, src.source_id, rup.rup_no, occ_no, rup_seed) ses_ruptures.append(ses_rup) with compute_gmfs_mon: # computing GMFs if hc.ground_motion_fields: for ses_rup in ses_ruptures: gmfcollector.calc_gmf( r_sites, rup, ses_rup.id, ses_rup.seed) # log calc_time per distinct rupture if ses_num_occ: num_ruptures = len(ses_num_occ) tot_ruptures = sum(num for rup in ses_num_occ for ses, num in ses_num_occ[rup]) logs.LOG.info( 'job=%d, src=%s:%s, num_ruptures=%d, tot_ruptures=%d, ' 'num_sites=%d, calc_time=%fs', job_id, src.source_id, src.__class__.__name__, num_ruptures, tot_ruptures, len(s_sites), time.time() - t0) num_distinct_ruptures += num_ruptures if num_distinct_ruptures: logs.LOG.info('job=%d, task %d generated %d/%d ruptures', job_id, task_no, num_distinct_ruptures, total_ruptures) filter_sites_mon.flush() generate_ruptures_mon.flush() filter_ruptures_mon.flush() save_ruptures_mon.flush() compute_gmfs_mon.flush() if hc.ground_motion_fields: with EnginePerformanceMonitor( 'saving gmfs', job_id, compute_ses_and_gmfs): gmfcollector.save_gmfs(task_no)