def compute_gmfs(job_id, sites, rupture_id, gmfcoll_id, realizations): """ Compute ground motion fields and store them in the db. :param job_id: ID of the currently running job. :param sites: The subset of the full SiteCollection scanned by this task :param rupture_id: The parsed rupture model from which we will generate ground motion fields. :param gmfcoll_id: the id of a :class:`openquake.engine.db.models.Gmf` record :param realizations: Number of realizations to create. """ hc = models.HazardCalculation.objects.get(oqjob=job_id) rupture_mdl = source.nrml_to_hazardlib( models.ParsedRupture.objects.get(id=rupture_id).nrml, hc.rupture_mesh_spacing, None, None) imts = [haz_general.imt_to_hazardlib(x) for x in hc.intensity_measure_types] gsim = AVAILABLE_GSIMS[hc.gsim]() # instantiate the GSIM class correlation_model = haz_general.get_correl_model(hc) with EnginePerformanceMonitor('computing gmfs', job_id, gmfs): gmf = ground_motion_fields( rupture_mdl, sites, imts, gsim, hc.truncation_level, realizations=realizations, correlation_model=correlation_model) with EnginePerformanceMonitor('saving gmfs', job_id, gmfs): save_gmf(gmfcoll_id, gmf, sites)
def compute_gmfs(job_id, sites, rupture, gmf_id, realizations): """ Compute ground motion fields and store them in the db. :param job_id: ID of the currently running job. :param sites: The subset of the full SiteCollection scanned by this task :param rupture: The hazardlib rupture from which we will generate ground motion fields. :param gmf_id: the id of a :class:`openquake.engine.db.models.Gmf` record :param realizations: Number of realizations to create. """ hc = models.HazardCalculation.objects.get(oqjob=job_id) imts = [from_string(x) for x in hc.intensity_measure_types] gsim = AVAILABLE_GSIMS[hc.gsim]() # instantiate the GSIM class correlation_model = haz_general.get_correl_model(hc) with EnginePerformanceMonitor('computing gmfs', job_id, gmfs): return ground_motion_fields( rupture, sites, imts, gsim, hc.truncation_level, realizations=realizations, correlation_model=correlation_model)
def compute_gmfs(job_id, rupture_ids, output_id, task_no, realizations): """ Compute ground motion fields and store them in the db. :param job_id: ID of the currently running job. :param rupture_ids: List of ids of parsed rupture model from which we will generate ground motion fields. :param output_id: output_id idenfitifies the reference to the output record. :param task_no: The task_no in which the calculation results will be placed. This ID basically corresponds to the sequence number of the task, in the context of the entire calculation. :param realizations: Number of realizations which are going to be created. """ hc = models.HazardCalculation.objects.get(oqjob=job_id) rupture_mdl = source.nrml_to_hazardlib( models.ParsedRupture.objects.get(id=rupture_ids[0]).nrml, hc.rupture_mesh_spacing, None, None) imts = [haz_general.imt_to_hazardlib(x) for x in hc.intensity_measure_types] gsim = AVAILABLE_GSIMS[hc.gsim] correlation_model = haz_general.get_correl_model(hc) gmf = ground_motion_fields( rupture_mdl, hc.site_collection, imts, gsim(), hc.truncation_level, realizations=realizations, correlation_model=correlation_model) save_gmf(output_id, gmf, hc.site_collection.mesh, task_no)
def compute_gmfs(job_id, sites, rupture_id, output_id, realizations): """ Compute ground motion fields and store them in the db. :param job_id: ID of the currently running job. :param sites: The subset of the full SiteCollection scanned by this task :param rupture_id: The parsed rupture model from which we will generate ground motion fields. :param output_id: output_id idenfitifies the reference to the output record. :param realizations: Number of realizations to create. """ hc = models.HazardCalculation.objects.get(oqjob=job_id) rupture_mdl = source.nrml_to_hazardlib( models.ParsedRupture.objects.get(id=rupture_id).nrml, hc.rupture_mesh_spacing, None, None) imts = [haz_general.imt_to_hazardlib(x) for x in hc.intensity_measure_types] gsim = AVAILABLE_GSIMS[hc.gsim]() # instantiate the GSIM class correlation_model = haz_general.get_correl_model(hc) gmf = ground_motion_fields( rupture_mdl, sites, imts, gsim, hc.truncation_level, realizations=realizations, correlation_model=correlation_model) save_gmf(output_id, gmf, sites.mesh)
def compute_gmvs(self, hazard_output, site_assets, monitor): """ Compute ground motion values on the fly """ # get needed hazard calculation params from the db hc = hazard_output.output.oq_job.hazard_calculation truncation_level = hc.truncation_level gsims = self.logic_tree_processor.parse_gmpe_logictree_path( hazard_output.lt_realization.gsim_lt_path) if hc.ground_motion_correlation_model is not None: model = general.get_correl_model(hc) else: model = None # check that the ruptures have been computed by a sufficiently # new version of openquake queryset = models.SESRupture.objects.filter( ses__ses_collection=hazard_output).order_by('tag') if queryset.filter(rupture="not computed").exists(): msg = ("The stochastic event set has been computed with " " a version of openquake engine too old. " "Please, re-run your hazard") logs.LOG.error(msg) raise RuntimeError(msg) count = queryset.count() # using a generator over ruptures to save memory # the ruptures are ordered by tag def ruptures(): cursor = models.getcursor('job_init') # a rupture "consumes" 8Kb. This limit actually # control the amount of memory used to store them limit = 10000 offsets = range(0, count, limit) query = """ SELECT rup.rupture FROM hzrdr.ses_rupture AS rup JOIN hzrdr.ses AS ses ON ses.id = rup.ses_id WHERE ses.ses_collection_id = %s ORDER BY rup.tag LIMIT %s OFFSET %s""" for offset in offsets: cursor.execute(query, (hazard_output.id, limit, offset)) for (rupture_data,) in cursor.fetchall(): yield pickle.loads(str(rupture_data)) r_objs = list(ruptures()) r_seeds = numpy.random.randint(0, models.MAX_SINT_32, count) r_ids = queryset.values_list('id', flat=True) calc_getter = GroundMotionValuesCalcGetter( self.imt, hc.site_collection, site_assets, truncation_level, gsims, model) with monitor.copy('computing gmvs'): all_assets, gmvs = calc_getter.compute( r_objs, r_seeds, r_ids, hc.maximum_distance) return all_assets, (gmvs, r_ids)
def compute_gmf_arg_gen(self): """ Argument generator for the task compute_gmf. For each SES yields a tuple of the form (job_id, params, imt, gsims, ses, site_coll, rupture_ids, rupture_seeds). """ rnd = random.Random() rnd.seed(self.hc.random_seed) site_coll = self.hc.site_collection params = dict( correl_model=haz_general.get_correl_model(self.hc), truncation_level=self.hc.truncation_level, maximum_distance=self.hc.maximum_distance, ) for lt_rlz in self._get_realizations(): ltp = logictree.LogicTreeProcessor.from_hc(self.hc) gsims = ltp.parse_gmpe_logictree_path(lt_rlz.gsim_lt_path) all_ses = models.SES.objects.filter(ses_collection__lt_realization=lt_rlz, ordinal__isnull=False).order_by( "ordinal" ) for ses in all_ses: # count the ruptures in the given SES rupture_ids = models.SESRupture.objects.filter(ses=ses).values_list("id", flat=True) if not rupture_ids: continue # compute the associated seeds rupture_seeds = [rnd.randint(0, models.MAX_SINT_32) for _ in range(len(rupture_ids))] # splitting on IMTs to generate more tasks and save memory for imt in self.hc.intensity_measure_types: if self.hc.ground_motion_correlation_model is None: # we split on sites to avoid running out of memory # on the workers for computations like the full Japan for sites in block_splitter(site_coll, BLOCK_SIZE): yield ( self.job.id, params, imt, gsims, ses, models.SiteCollection(sites), rupture_ids, rupture_seeds, ) else: # we split on ruptures to avoid running out of memory rupt_iter = block_splitter(rupture_ids, BLOCK_SIZE) seed_iter = block_splitter(rupture_seeds, BLOCK_SIZE) for rupts, seeds in zip(rupt_iter, seed_iter): yield (self.job.id, params, imt, gsims, ses, site_coll, rupts, seeds)
def compute_gmf_arg_gen(self): """ Argument generator for the task compute_gmf. For each SES yields a tuple of the form (job_id, params, imt, gsims, ses, site_coll, rupture_ids, rupture_seeds). """ rnd = random.Random() rnd.seed(self.hc.random_seed) site_coll = self.hc.site_collection params = dict( correl_model=haz_general.get_correl_model(self.hc), truncation_level=self.hc.truncation_level, maximum_distance=self.hc.maximum_distance) for lt_rlz in self._get_realizations(): ltp = logictree.LogicTreeProcessor.from_hc(self.hc) gsims = ltp.parse_gmpe_logictree_path(lt_rlz.gsim_lt_path) all_ses = models.SES.objects.filter( ses_collection__lt_realization=lt_rlz, ordinal__isnull=False).order_by('ordinal') for ses in all_ses: # count the ruptures in the given SES rupture_ids = models.SESRupture.objects.filter( ses=ses).values_list('id', flat=True) if not rupture_ids: continue # compute the associated seeds rupture_seeds = [rnd.randint(0, models.MAX_SINT_32) for _ in range(len(rupture_ids))] # splitting on IMTs to generate more tasks and save memory for imt in self.hc.intensity_measure_types: if self.hc.ground_motion_correlation_model is None: # we split on sites to avoid running out of memory # on the workers for computations like the full Japan for sites in block_splitter(site_coll, BLOCK_SIZE): yield (self.job.id, params, imt, gsims, ses, models.SiteCollection(sites), rupture_ids, rupture_seeds) else: # we split on ruptures to avoid running out of memory rupt_iter = block_splitter(rupture_ids, BLOCK_SIZE) seed_iter = block_splitter(rupture_seeds, BLOCK_SIZE) for rupts, seeds in zip(rupt_iter, seed_iter): yield (self.job.id, params, imt, gsims, ses, site_coll, rupts, seeds)
def compute_gmf_cache(hc, gsims, ruptures, rupture_ids): """ Compute a ground motion field value for each rupture, for all the points affected by that rupture, for all IMTs. """ imts = [haz_general.imt_to_hazardlib(x) for x in hc.intensity_measure_types] correl_model = None if hc.ground_motion_correlation_model is not None: correl_model = haz_general.get_correl_model(hc) n_points = len(hc.site_collection) # initialize gmf_cache, a dict imt -> {gmvs, rupture_ids} gmf_cache = dict((imt, dict(gmvs=numpy.empty((n_points, 0)), rupture_ids=[])) for imt in imts) for rupture, rupture_id in zip(ruptures, rupture_ids): # Compute and save ground motion fields gmf_calc_kwargs = { 'rupture': rupture, 'sites': hc.site_collection, 'imts': imts, 'gsim': gsims[rupture.tectonic_region_type], 'truncation_level': hc.truncation_level, 'realizations': DEFAULT_GMF_REALIZATIONS, 'correlation_model': correl_model, 'rupture_site_filter': filters.rupture_site_distance_filter( hc.maximum_distance), } gmf_dict = gmf.ground_motion_fields(**gmf_calc_kwargs) # update the gmf cache: for imt_key, v in gmf_dict.iteritems(): gmf_cache[imt_key]['gmvs'] = numpy.append( gmf_cache[imt_key]['gmvs'], v, axis=1) gmf_cache[imt_key]['rupture_ids'].append(rupture_id) return gmf_cache
def ses_and_gmfs(job_id, src_ids, lt_rlz_id, task_seed, result_grp_ordinal): """ Celery task for the stochastic event set calculator. Samples logic trees and calls the stochastic event set calculator. Once stochastic event sets are calculated, results will be saved to the database. See :class:`openquake.engine.db.models.SESCollection`. Optionally (specified in the job configuration using the `ground_motion_fields` parameter), GMFs can be computed from each rupture in each stochastic event set. GMFs are also saved to the database. Once all of this work is complete, a signal will be sent via AMQP to let the control noe know that the work is complete. (If there is any work left to be dispatched, this signal will indicate to the control node that more work can be enqueued.) :param int job_id: ID of the currently running job. :param src_ids: List of ids of parsed source models from which we will generate stochastic event sets/ruptures. :param lt_rlz_id: Id of logic tree realization model to calculate for. :param int task_seed: Value for seeding numpy/scipy in the computation of stochastic event sets and ground motion fields. :param int result_grp_ordinal: The result group in which the calculation results will be placed. This ID basically corresponds to the sequence number of the task, in the context of the entire calculation. """ logs.LOG.debug(('> starting `stochastic_event_sets` task: job_id=%s, ' 'lt_realization_id=%s') % (job_id, lt_rlz_id)) numpy.random.seed(task_seed) hc = models.HazardCalculation.objects.get(oqjob=job_id) cmplt_lt_ses = None if hc.complete_logic_tree_ses: cmplt_lt_ses = models.SES.objects.get( ses_collection__output__oq_job=job_id, complete_logic_tree_ses=True) if hc.ground_motion_fields: # For ground motion field calculation, we need the points of interest # for the calculation. points_to_compute = hc.points_to_compute() imts = [haz_general.imt_to_hazardlib(x) for x in hc.intensity_measure_types] correl_model = None if hc.ground_motion_correlation_model is not None: correl_model = haz_general.get_correl_model(hc) lt_rlz = models.LtRealization.objects.get(id=lt_rlz_id) ltp = logictree.LogicTreeProcessor(hc.id) apply_uncertainties = ltp.parse_source_model_logictree_path( lt_rlz.sm_lt_path) gsims = ltp.parse_gmpe_logictree_path(lt_rlz.gsim_lt_path) sources = list(haz_general.gen_sources( src_ids, apply_uncertainties, hc.rupture_mesh_spacing, hc.width_of_mfd_bin, hc.area_source_discretization)) # Compute stochastic event sets # For each rupture generated, we can optionally calculate a GMF for ses_rlz_n in xrange(1, hc.ses_per_logic_tree_path + 1): logs.LOG.debug('> computing stochastic event set %s of %s' % (ses_rlz_n, hc.ses_per_logic_tree_path)) # This is the container for all ruptures for this stochastic event set # (specified by `ordinal` and the logic tree realization). # NOTE: Many tasks can contribute ruptures to this SES. ses = models.SES.objects.get( ses_collection__lt_realization=lt_rlz, ordinal=ses_rlz_n) sources_sites = ((src, hc.site_collection) for src in sources) ssd_filter = filters.source_site_distance_filter(hc.maximum_distance) # Get the filtered sources, ignore the site collection: filtered_sources = (src for src, _ in ssd_filter(sources_sites)) # Calculate stochastic event sets: logs.LOG.debug('> computing stochastic event sets') if hc.ground_motion_fields: gmf_cache = _create_gmf_cache(len(points_to_compute), imts) logs.LOG.debug('> computing also ground motion fields') # This will be the "container" for all computed ground motion field # results for this stochastic event set. gmf_set = models.GmfSet.objects.get( gmf_collection__lt_realization=lt_rlz, ses_ordinal=ses_rlz_n) ses_poissonian = stochastic.stochastic_event_set_poissonian( filtered_sources, hc.investigation_time) logs.LOG.debug('> looping over ruptures') rupture_ordinal = 0 for rupture in ses_poissonian: rupture_ordinal += 1 # Prepare and save SES ruptures to the db: logs.LOG.debug('> saving SES rupture to DB') _save_ses_rupture( ses, rupture, cmplt_lt_ses, result_grp_ordinal, rupture_ordinal) logs.LOG.debug('> done saving SES rupture to DB') # Compute ground motion fields (if requested) logs.LOG.debug('compute ground motion fields? %s' % hc.ground_motion_fields) if hc.ground_motion_fields: # Compute and save ground motion fields gmf_calc_kwargs = { 'rupture': rupture, 'sites': hc.site_collection, 'imts': imts, 'gsim': gsims[rupture.tectonic_region_type], 'truncation_level': hc.truncation_level, 'realizations': DEFAULT_GMF_REALIZATIONS, 'correlation_model': correl_model, 'rupture_site_filter': filters.rupture_site_distance_filter( hc.maximum_distance), } logs.LOG.debug('> computing ground motion fields') gmf_dict = gmf_calc.ground_motion_fields(**gmf_calc_kwargs) logs.LOG.debug('< done computing ground motion fields') # update the gmf cache: for k, v in gmf_dict.iteritems(): gmf_cache[k] = numpy.append( gmf_cache[k], v, axis=1) logs.LOG.debug('< Done looping over ruptures') logs.LOG.debug('%s ruptures computed for SES realization %s of %s' % (rupture_ordinal, ses_rlz_n, hc.ses_per_logic_tree_path)) logs.LOG.debug('< done computing stochastic event set %s of %s' % (ses_rlz_n, hc.ses_per_logic_tree_path)) if hc.ground_motion_fields: # save the GMFs to the DB logs.LOG.debug('> saving GMF results to DB') _save_gmfs( gmf_set, gmf_cache, points_to_compute, result_grp_ordinal) logs.LOG.debug('< done saving GMF results to DB') logs.LOG.debug('< task complete, signalling completion') base.signal_task_complete(job_id=job_id, num_items=len(src_ids))
def compute_gmvs(self, hazard_output, site_assets, monitor): """ Compute ground motion values on the fly """ # get needed hazard calculation params from the db hc = hazard_output.output.oq_job.hazard_calculation truncation_level = hc.truncation_level gsims = self.logic_tree_processor.parse_gmpe_logictree_path( hazard_output.lt_realization.gsim_lt_path) if hc.ground_motion_correlation_model is not None: model = general.get_correl_model(hc) else: model = None # check that the ruptures have been computed by a sufficiently # new version of openquake queryset = models.SESRupture.objects.filter( ses__ses_collection=hazard_output).order_by('tag') if queryset.filter(rupture="not computed").exists(): msg = ("The stochastic event set has been computed with " " a version of openquake engine too old. " "Please, re-run your hazard") logs.LOG.error(msg) raise RuntimeError(msg) count = queryset.count() # using a generator over ruptures to save memory # the ruptures are ordered by tag def ruptures(): cursor = models.getcursor('job_init') # a rupture "consumes" 8Kb. This limit actually # control the amount of memory used to store them limit = 10000 offsets = range(0, count, limit) query = """ SELECT rup.rupture FROM hzrdr.ses_rupture AS rup JOIN hzrdr.ses AS ses ON ses.id = rup.ses_id WHERE ses.ses_collection_id = %s ORDER BY rup.tag LIMIT %s OFFSET %s""" for offset in offsets: cursor.execute(query, (hazard_output.id, limit, offset)) for (rupture_data, ) in cursor.fetchall(): yield pickle.loads(str(rupture_data)) r_objs = list(ruptures()) r_seeds = numpy.random.randint(0, models.MAX_SINT_32, count) r_ids = queryset.values_list('id', flat=True) calc_getter = GroundMotionValuesCalcGetter(self.imt, hc.site_collection, site_assets, truncation_level, gsims, model) with monitor.copy('computing gmvs'): all_assets, gmvs = calc_getter.compute(r_objs, r_seeds, r_ids, hc.maximum_distance) return all_assets, (gmvs, r_ids)
def compute_ses_and_gmfs( job_id, sitecol, src_seeds, lt_model, gsim_by_rlz, task_no): """ Celery task for the stochastic event set calculator. Samples logic trees and calls the stochastic event set calculator. Once stochastic event sets are calculated, results will be saved to the database. See :class:`openquake.engine.db.models.SESCollection`. Optionally (specified in the job configuration using the `ground_motion_fields` parameter), GMFs can be computed from each rupture in each stochastic event set. GMFs are also saved to the database. :param int job_id: ID of the currently running job. :param sitecol: a :class:`openquake.hazardlib.site.SiteCollection` instance :param src_seeds: List of pairs (source, seed) :params gsim_by_rlz: dictionary of GSIM :param task_no: an ordinal so that GMV can be collected in a reproducible order """ # NB: all realizations in gsim_by_rlz correspond to the same source model ses_coll = models.SESCollection.objects.get(lt_model=lt_model) hc = models.HazardCalculation.objects.get(oqjob=job_id) all_ses = list(ses_coll) imts = map(from_string, hc.intensity_measure_types) params = dict( correl_model=general.get_correl_model(hc), truncation_level=hc.truncation_level, maximum_distance=hc.maximum_distance) gmfcollector = GmfCollector(params, imts, gsim_by_rlz) filter_sites_mon = LightMonitor( 'filtering sites', job_id, compute_ses_and_gmfs) generate_ruptures_mon = LightMonitor( 'generating ruptures', job_id, compute_ses_and_gmfs) filter_ruptures_mon = LightMonitor( 'filtering ruptures', job_id, compute_ses_and_gmfs) save_ruptures_mon = LightMonitor( 'saving ses', job_id, compute_ses_and_gmfs) compute_gmfs_mon = LightMonitor( 'computing gmfs', job_id, compute_ses_and_gmfs) # Compute and save stochastic event sets rnd = random.Random() num_distinct_ruptures = 0 total_ruptures = 0 for src, seed in src_seeds: t0 = time.time() rnd.seed(seed) with filter_sites_mon: # filtering sources s_sites = src.filter_sites_by_distance_to_source( hc.maximum_distance, sitecol ) if hc.maximum_distance else sitecol if s_sites is None: continue # the dictionary `ses_num_occ` contains [(ses, num_occurrences)] # for each occurring rupture for each ses in the ses collection ses_num_occ = collections.defaultdict(list) with generate_ruptures_mon: # generating ruptures for the given source for rup_no, rup in enumerate(src.iter_ruptures(), 1): rup.rup_no = rup_no for ses in all_ses: numpy.random.seed(rnd.randint(0, models.MAX_SINT_32)) num_occurrences = rup.sample_number_of_occurrences() if num_occurrences: ses_num_occ[rup].append((ses, num_occurrences)) total_ruptures += num_occurrences # NB: the number of occurrences is very low, << 1, so it is # more efficient to filter only the ruptures that occur, i.e. # to call sample_number_of_occurrences() *before* the filtering for rup in ses_num_occ.keys(): with filter_ruptures_mon: # filtering ruptures r_sites = rup.source_typology.\ filter_sites_by_distance_to_rupture( rup, hc.maximum_distance, s_sites ) if hc.maximum_distance else s_sites if r_sites is None: # ignore ruptures which are far away del ses_num_occ[rup] # save memory continue ses_ruptures = [] with save_ruptures_mon: # saving ses_ruptures # using a django transaction make the saving faster with transaction.commit_on_success(using='job_init'): prob_rup = models.ProbabilisticRupture.create( rup, ses_coll) for ses, num_occurrences in ses_num_occ[rup]: for occ_no in range(1, num_occurrences + 1): rup_seed = rnd.randint(0, models.MAX_SINT_32) ses_rup = models.SESRupture.create( prob_rup, ses, src.source_id, rup.rup_no, occ_no, rup_seed) ses_ruptures.append(ses_rup) with compute_gmfs_mon: # computing GMFs if hc.ground_motion_fields: for ses_rup in ses_ruptures: gmfcollector.calc_gmf( r_sites, rup, ses_rup.id, ses_rup.seed) # log calc_time per distinct rupture if ses_num_occ: num_ruptures = len(ses_num_occ) tot_ruptures = sum(num for rup in ses_num_occ for ses, num in ses_num_occ[rup]) logs.LOG.info( 'job=%d, src=%s:%s, num_ruptures=%d, tot_ruptures=%d, ' 'num_sites=%d, calc_time=%fs', job_id, src.source_id, src.__class__.__name__, num_ruptures, tot_ruptures, len(s_sites), time.time() - t0) num_distinct_ruptures += num_ruptures if num_distinct_ruptures: logs.LOG.info('job=%d, task %d generated %d/%d ruptures', job_id, task_no, num_distinct_ruptures, total_ruptures) filter_sites_mon.flush() generate_ruptures_mon.flush() filter_ruptures_mon.flush() save_ruptures_mon.flush() compute_gmfs_mon.flush() if hc.ground_motion_fields: with EnginePerformanceMonitor( 'saving gmfs', job_id, compute_ses_and_gmfs): gmfcollector.save_gmfs(task_no)