def compute_gmf_arg_gen(self): """ Argument generator for the task compute_gmf. For each SES yields a tuple of the form (job_id, params, imt, gsims, ses, site_coll, rupture_ids, rupture_seeds). """ rnd = random.Random() rnd.seed(self.hc.random_seed) site_coll = self.hc.site_collection params = dict( correl_model=haz_general.get_correl_model(self.hc), truncation_level=self.hc.truncation_level, maximum_distance=self.hc.maximum_distance, ) for lt_rlz in self._get_realizations(): ltp = logictree.LogicTreeProcessor.from_hc(self.hc) gsims = ltp.parse_gmpe_logictree_path(lt_rlz.gsim_lt_path) all_ses = models.SES.objects.filter(ses_collection__lt_realization=lt_rlz, ordinal__isnull=False).order_by( "ordinal" ) for ses in all_ses: # count the ruptures in the given SES rupture_ids = models.SESRupture.objects.filter(ses=ses).values_list("id", flat=True) if not rupture_ids: continue # compute the associated seeds rupture_seeds = [rnd.randint(0, models.MAX_SINT_32) for _ in range(len(rupture_ids))] # splitting on IMTs to generate more tasks and save memory for imt in self.hc.intensity_measure_types: if self.hc.ground_motion_correlation_model is None: # we split on sites to avoid running out of memory # on the workers for computations like the full Japan for sites in block_splitter(site_coll, BLOCK_SIZE): yield ( self.job.id, params, imt, gsims, ses, models.SiteCollection(sites), rupture_ids, rupture_seeds, ) else: # we split on ruptures to avoid running out of memory rupt_iter = block_splitter(rupture_ids, BLOCK_SIZE) seed_iter = block_splitter(rupture_seeds, BLOCK_SIZE) for rupts, seeds in zip(rupt_iter, seed_iter): yield (self.job.id, params, imt, gsims, ses, site_coll, rupts, seeds)
def task_arg_gen(self, _block_size=None): """ Loop through realizations and sources to generate a sequence of task arg tuples. Each tuple of args applies to a single task. Yielded results are tuples of the form job_id, src_ids, ses, seeds (seeds will be used to seed numpy for temporal occurence sampling). """ hc = self.hc rnd = random.Random() rnd.seed(hc.random_seed) realizations = self._get_realizations() ltp = logictree.LogicTreeProcessor.from_hc(self.hc) for lt_rlz in realizations: sources = ( models.SourceProgress.objects.filter(is_complete=False, lt_realization=lt_rlz) .order_by("id") .values_list("parsed_source_id", flat=True) ) all_ses = list( models.SES.objects.filter(ses_collection__lt_realization=lt_rlz, ordinal__isnull=False).order_by( "ordinal" ) ) # source, ses, seed triples sss = [(src, ses, rnd.randint(0, models.MAX_SINT_32)) for src, ses in itertools.product(sources, all_ses)] preferred_block_size = int(math.ceil(float(len(sources) * len(all_ses)) / self.concurrent_tasks())) logs.LOG.info("Using block size %d", preferred_block_size) for block in block_splitter(sss, preferred_block_size): yield self.job.id, block, lt_rlz, ltp
def task_arg_gen(self, _block_size=None): """ Loop through realizations and sources to generate a sequence of task arg tuples. Each tuple of args applies to a single task. Yielded results are tuples of the form job_id, src_ids, ses, seeds (seeds will be used to seed numpy for temporal occurence sampling). """ hc = self.hc rnd = random.Random() rnd.seed(hc.random_seed) realizations = self._get_realizations() ltp = logictree.LogicTreeProcessor.from_hc(self.hc) for lt_rlz in realizations: sources = models.SourceProgress.objects\ .filter(is_complete=False, lt_realization=lt_rlz)\ .order_by('id')\ .values_list('parsed_source_id', flat=True) all_ses = list(models.SES.objects.filter( ses_collection__lt_realization=lt_rlz, ordinal__isnull=False).order_by('ordinal')) for src_ids in block_splitter(sources, self.preferred_block_size): for ses in all_ses: # compute seeds for the sources src_seeds = [rnd.randint(0, models.MAX_SINT_32) for _ in src_ids] yield self.job.id, src_ids, ses, src_seeds, ltp
def task_arg_gen(self): """ Loop through realizations and sources to generate a sequence of task arg tuples. Each tuple of args applies to a single task. Yielded results are tuples of the form job_id, src_ids, ses, task_seed (task_seed will be used to seed numpy for temporal occurence sampling). """ hc = self.hc rnd = random.Random() rnd.seed(hc.random_seed) realizations = self._get_realizations() for lt_rlz in realizations: sources = models.SourceProgress.objects\ .filter(is_complete=False, lt_realization=lt_rlz)\ .order_by('id')\ .values_list('parsed_source_id', flat=True) all_ses = list(models.SES.objects.filter( ses_collection__lt_realization=lt_rlz, ordinal__isnull=False).order_by('ordinal')) for src_ids in block_splitter(sources, self.preferred_block_size): for ses in all_ses: task_seed = rnd.randint(0, models.MAX_SINT_32) task_args = (self.job.id, src_ids, ses, task_seed) yield task_args
def parallelize(self, task_func, task_arg_gen): """ Given a callable and a task arg generator, apply the callable to the arguments in parallel. To save memory the tasks are spawned in blocks with maximum size defined by the method .concurrent_tasks(). It is possible to pass a function side_effect(ret) which takes the return value of the callable and does something with it, such as saving or printing it. The order is not preserved. :param task_func: a `celery` task callable :param task_args: an iterable over positional arguments NB: if the environment variable OQ_NO_DISTRIBUTE is set the tasks are run sequentially in the current process. """ taskname = task_func.__name__ logs.LOG.debug('building arglist') arglist = list(task_arg_gen) total = len(arglist) logs.LOG.progress('spawning %d tasks of kind %s', total, taskname) ntasks = 0 for argblock in general.block_splitter( arglist, self.concurrent_tasks()): tasks.parallelize(task_func, argblock, lambda _: None) ntasks += len(argblock) percent = math.ceil(float(ntasks) / total * 100) logs.LOG.progress('> %s %3d%% complete', taskname, percent)
def task_arg_gen(self, block_size): """ Generate task args for the first phase of the disaggregation calculations. This phase is concerned with computing hazard curves, which must be completed in full before disaggregation calculation can begin. See also :meth:`disagg_task_arg_gen`. :param int block_size: The number of items per task. In this case, this the number of sources for hazard curve calc task, or number of sites for disagg calc tasks. """ realizations = models.LtRealization.objects.filter( hazard_calculation=self.hc, is_complete=False) # first, distribute tasks for hazard curve computation for lt_rlz in realizations: source_progress = models.SourceProgress.objects.filter( is_complete=False, lt_realization=lt_rlz).order_by('id') source_ids = source_progress.values_list( 'parsed_source_id', flat=True) for block in general_utils.block_splitter(source_ids, block_size): # job_id, source id block, lt rlz, calc_type yield (self.job.id, block, lt_rlz.id, 'hazard_curve')
def task_arg_gen(self, block_size, check_num_task=True): """ Loop through realizations and sources to generate a sequence of task arg tuples. Each tuple of args applies to a single task. For this default implementation, yielded results are triples of (job_id, realization_id, source_id_list). Override this in subclasses as necessary. :param int block_size: The (max) number of work items for each each task. In this case, sources. """ point_source_block_size = self.point_source_block_size() realizations = self._get_realizations() n = 0 # number of yielded arguments ltp = logictree.LogicTreeProcessor.from_hc(self.hc) for lt_rlz in realizations: # separate point sources from all the other types, since # we distribution point sources in different sized chunks # point sources first point_source_ids = self._get_point_source_ids(lt_rlz) for block in block_splitter(point_source_ids, point_source_block_size): task_args = (self.job.id, block, lt_rlz.id, ltp) yield task_args n += 1 # now for area and fault sources other_source_ids = self._get_source_ids(lt_rlz) for block in block_splitter(other_source_ids, block_size): task_args = (self.job.id, block, lt_rlz.id, ltp) yield task_args n += 1 # this sanity check should go into a unit test, and will likely # go there in the future if check_num_task: num_tasks = models.JobStats.objects.get( oq_job=self.job.id).num_tasks assert num_tasks == n, 'Expected %d tasks, got %d' % (num_tasks, n)
def test_block_splitter(self): expected = [ [0, 1, 2], [3, 4, 5], [6, 7, 8], [9], ] actual = [x for x in block_splitter(self.DATA, 3)] self.assertEqual(expected, actual)
def compute_gmf_arg_gen(self): """ Argument generator for the task compute_gmf. For each SES yields a tuple of the form (job_id, params, imt, gsims, ses, site_coll, rupture_ids, rupture_seeds). """ rnd = random.Random() rnd.seed(self.hc.random_seed) site_coll = self.hc.site_collection params = dict( correl_model=haz_general.get_correl_model(self.hc), truncation_level=self.hc.truncation_level, maximum_distance=self.hc.maximum_distance) for lt_rlz in self._get_realizations(): ltp = logictree.LogicTreeProcessor.from_hc(self.hc) gsims = ltp.parse_gmpe_logictree_path(lt_rlz.gsim_lt_path) all_ses = models.SES.objects.filter( ses_collection__lt_realization=lt_rlz, ordinal__isnull=False).order_by('ordinal') for ses in all_ses: # count the ruptures in the given SES rupture_ids = models.SESRupture.objects.filter( ses=ses).values_list('id', flat=True) if not rupture_ids: continue # compute the associated seeds rupture_seeds = [rnd.randint(0, models.MAX_SINT_32) for _ in range(len(rupture_ids))] # splitting on IMTs to generate more tasks and save memory for imt in self.hc.intensity_measure_types: if self.hc.ground_motion_correlation_model is None: # we split on sites to avoid running out of memory # on the workers for computations like the full Japan for sites in block_splitter(site_coll, BLOCK_SIZE): yield (self.job.id, params, imt, gsims, ses, models.SiteCollection(sites), rupture_ids, rupture_seeds) else: # we split on ruptures to avoid running out of memory rupt_iter = block_splitter(rupture_ids, BLOCK_SIZE) seed_iter = block_splitter(rupture_seeds, BLOCK_SIZE) for rupts, seeds in zip(rupt_iter, seed_iter): yield (self.job.id, params, imt, gsims, ses, site_coll, rupts, seeds)
def test_block_splitter_with_iter(self): # Test the block with a data set of unknown length data = iter(range(10)) expected = [ [0, 1, 2], [3, 4, 5], [6, 7, 8], [9], ] actual = [x for x in block_splitter(data, 3)] self.assertEqual(expected, actual)
def test_block_splitter_with_generator(self): # Test the block with a data set of unknown length # (such as a generator) data = xrange(10) expected = [ [0, 1, 2], [3, 4, 5], [6, 7, 8], [9], ] actual = [x for x in block_splitter(data, 3)] self.assertEqual(expected, actual)
def task_arg_gen(self, block_size): """ Loop through realizations and sources to generate a sequence of task arg tuples. Each tuple of args applies to a single task. For this default implementation, yielded results are triples of (job_id, realization_id, source_id_list). Override this in subclasses as necessary. :param int block_size: The (max) number of work items for each each task. In this case, sources. """ point_source_block_size = self.point_source_block_size() realizations = self._get_realizations() n = 0 # number of yielded arguments ltp = logictree.LogicTreeProcessor.from_hc(self.hc) for lt_rlz in realizations: sm = self.rlz_to_sm[lt_rlz] # separate point sources from all the other types, since # we distribution point sources in different sized chunks # point sources first point_sources = self.sources_per_model[sm, 'point'] for block in block_splitter(point_sources, point_source_block_size): task_args = (self.job.id, block, lt_rlz.id, ltp) yield task_args n += 1 # now for area and fault sources other_sources = self.sources_per_model[sm, 'other'] for block in block_splitter(other_sources, block_size): task_args = (self.job.id, block, lt_rlz.id, ltp) yield task_args n += 1
def block_split(self, items, max_block_size=MAX_BLOCK_SIZE): """ Split the given items in blocks, depending on the parameter concurrent tasks. Notice that in order to save memory there is a maximum block size of %d items. :param list items: the items to split in blocks """ % MAX_BLOCK_SIZE assert len(items) > 0, 'No items in %s' % items num_rlzs = len(self._get_realizations()) bs = min(ceil(len(items), ceil(self.concurrent_tasks(), num_rlzs)), max_block_size) logs.LOG.warn('Using block size=%d', bs) return block_splitter(items, bs)
def do_hazard_map_post_process(job): """ Create and distribute tasks for processing hazard curves into hazard maps. :param job: A :class:`openquake.engine.db.models.OqJob` which has some hazard curves associated with it. """ logs.LOG.debug('> Post-processing - Hazard Maps') block_size = int(config.get('hazard', 'concurrent_tasks')) poes = job.hazard_calculation.poes_hazard_maps # Stats for debug logging: hazard_curve_ids = models.HazardCurve.objects.filter( output__oq_job=job).values_list('id', flat=True) logs.LOG.debug('num haz curves: %s' % len(hazard_curve_ids)) # Limit the number of concurrent tasks to the configured concurrency level: block_gen = block_splitter(hazard_curve_ids, block_size) total_blocks = int(math.ceil(len(hazard_curve_ids) / float(block_size))) for i, block in enumerate(block_gen): logs.LOG.debug('> Hazard post-processing block, %s of %s' % (i + 1, total_blocks)) if openquake.engine.no_distribute(): # just execute the post-processing using the plain function form of # the task for hazard_curve_id in block: hazard_curves_to_hazard_map_task(job.id, hazard_curve_id, poes) else: tasks = [] for hazard_curve_id in block: tasks.append(hazard_curves_to_hazard_map_task.subtask( (job.id, hazard_curve_id, poes))) results = TaskSet(tasks=tasks).apply_async() utils_tasks._check_exception(results) logs.LOG.debug('< Done Hazard Map post-processing block, %s of %s' % (i + 1, total_blocks)) logs.LOG.debug('< Done post-processing - Hazard Maps')
def task_arg_gen(self, block_size): """ Loop through realizations and sources to generate a sequence of task arg tuples. Each tuple of args applies to a single task. Yielded results are 6-uples of the form (job_id, sites, rupture_id, gmf_id, task_seed, realizations) (task_seed will be used to seed numpy for temporal occurence sampling). :param int block_size: The number of work items for each task. Fixed to 1. """ rnd = random.Random() rnd.seed(self.hc.random_seed) for sites in block_splitter(self.hc.site_collection, BLOCK_SIZE): task_seed = rnd.randint(0, models.MAX_SINT_32) yield (self.job.id, models.SiteCollection(sites), self.rupture, self.gmf.id, task_seed, self.hc.number_of_ground_motion_fields)
def disagg_task_arg_gen(self, block_size): """ Generate task args for the second phase of disaggregation calculations. This phase is concerned with computing the disaggregation histograms. :param int block_size: The number of items per task. In this case, this the number of sources for hazard curve calc task, or number of sites for disagg calc tasks. """ realizations = models.LtRealization.objects.filter( hazard_calculation=self.hc, is_complete=False) # then distribute tasks for disaggregation histogram computation for lt_rlz in realizations: for block in general_utils.block_splitter(self.hc.site_collection, block_size): # job_id, Site block, lt rlz, calc_type yield (self.job.id, block, lt_rlz.id, 'disagg')
def disagg_task_arg_gen(self, block_size): """ Generate task args for the second phase of disaggregation calculations. This phase is concerned with computing the disaggregation histograms. :param int block_size: The number of items per task. In this case, this the number of sources for hazard curve calc task, or number of sites for disagg calc tasks. """ realizations = models.LtRealization.objects.filter( hazard_calculation=self.hc) ltp = logictree.LogicTreeProcessor.from_hc(self.hc) # then distribute tasks for disaggregation histogram computation for lt_rlz in realizations: for sites in general_utils.block_splitter( self.hc.site_collection, block_size): yield self.job.id, sites, lt_rlz.id, ltp
def disagg_task_arg_gen(self, block_size): """ Generate task args for the second phase of disaggregation calculations. This phase is concerned with computing the disaggregation histograms. :param int block_size: The number of items per task. In this case, this the number of sources for hazard curve calc task, or number of sites for disagg calc tasks. """ realizations = models.LtRealization.objects.filter( hazard_calculation=self.hc, is_complete=False) ltp = logictree.LogicTreeProcessor.from_hc(self.hc) # then distribute tasks for disaggregation histogram computation for lt_rlz in realizations: for block in general_utils.block_splitter(self.hc.site_collection, block_size): # job_id, Site block, lt rlz, calc_type yield (self.job.id, block, lt_rlz.id, ltp, 'disagg')
def task_arg_gen(self): """ Loop through realizations and sources to generate a sequence of task arg tuples. Each tuple of args applies to a single task. Yielded results are 6-uples of the form (job_id, sites, rupture_id, gmf_id, task_seed, realizations, task_no) (task_seed will be used to seed numpy for temporal occurence sampling). """ rnd = random.Random() rnd.seed(self.hc.random_seed) # TODO: fix the block size dependency # (https://bugs.launchpad.net/oq-engine/+bug/1225287) # then self.block_split can be used, consistently with the # other calculators blocks = block_splitter(self.hc.site_collection, 1000) for task_no, sites in enumerate(blocks): task_seed = rnd.randint(0, models.MAX_SINT_32) yield (self.job.id, models.SiteCollection(sites), self.rupture, self.gmf.id, task_seed, self.hc.number_of_ground_motion_fields, task_no)
def task_arg_gen(self, block_size, _check_num_task=True): """ Loop through realizations and sources to generate a sequence of task arg tuples. Each tuple of args applies to a single task. Yielded results are 6-uples of the form (job_id, sites, rupture_id, gmfcoll_id, task_seed, realizations) (task_seed will be used to seed numpy for temporal occurence sampling). :param int block_size: The number of work items for each task. Fixed to 1. """ rnd = random.Random() rnd.seed(self.hc.random_seed) rupture_id = self.job.parsedrupture.id for sites in block_splitter(self.hc.site_collection, BLOCK_SIZE): task_seed = rnd.randint(0, models.MAX_SINT_32) yield (self.job.id, models.SiteCollection(sites), rupture_id, self.gmfcoll.id, task_seed, self.hc.number_of_ground_motion_fields)
def task_arg_gen(self, block_size): """ Loop through realizations and sources to generate a sequence of task arg tuples. Each tuple of args applies to a single task. Yielded results are 6-uples of the form (job_id, sites, rupture_id, output_id, task_seed, realizations) (task_seed will be used to seed numpy for temporal occurence sampling). :param int block_size: The number of work items for each task. Fixed to 1. """ rnd = random.Random() rnd.seed(self.hc.random_seed) inp = models.inputs4hcalc(self.hc.id, 'rupture_model')[0] ruptures = models.ParsedRupture.objects.filter(input__id=inp.id) rupture_id = [rupture.id for rupture in ruptures][0] # only one for sites in block_splitter(self.hc.site_collection, BLOCK_SIZE): task_seed = rnd.randint(0, MAX_SINT_32) yield (self.job.id, SiteCollection(sites), rupture_id, self.output.id, task_seed, self.hc.number_of_ground_motion_fields)
def do_post_process(job): """ Run the GMF to hazard curve post-processing tasks for the given ``job``. :param job: A :class:`openquake.engine.db.models.OqJob` instance. """ logs.LOG.debug('> Post-processing - GMFs to Hazard Curves') block_size = int(config.get('hazard', 'concurrent_tasks')) block_gen = block_splitter(gmf_post_process_arg_gen(job), block_size) hc = job.hazard_calculation # Stats for debug logging: n_imts = len(hc.intensity_measure_types_and_levels) n_sites = len(hc.points_to_compute()) n_rlzs = models.LtRealization.objects.filter(hazard_calculation=hc).count() total_blocks = int(math.ceil( (n_imts * n_sites * n_rlzs) / float(block_size))) for i, block in enumerate(block_gen): logs.LOG.debug('> GMF post-processing block, %s of %s' % (i + 1, total_blocks)) # Run the tasks in blocks, to avoid overqueueing: tasks = [] for the_args in block: tasks.append(gmf_to_hazard_curve_task.subtask(the_args)) results = TaskSet(tasks=tasks).apply_async() # Check for Exceptions in the results and raise utils_tasks._check_exception(results) logs.LOG.debug('< Done GMF post-processing block, %s of %s' % (i + 1, total_blocks)) logs.LOG.debug('< Done post-processing - GMFs to Hazard Curves')
def test_block_splitter_block_size_lt_zero(self): gen = block_splitter(self.DATA, -1) self.assertRaises(ValueError, gen.next)
def test_block_splitter_block_size_gt_data_len(self): expected = [self.DATA] actual = [x for x in block_splitter(self.DATA, 11)] self.assertEqual(expected, actual)
def test_block_splitter_zero_block_size(self): gen = block_splitter(self.DATA, 0) self.assertRaises(ValueError, gen.next)
def do_aggregate_post_proc(self): """ Grab hazard data for all realizations and sites from the database and compute mean and/or quantile aggregates (depending on which options are enabled in the calculation). Post-processing results will be stored directly into the database. """ num_rlzs = models.LtRealization.objects.filter( lt_model__hazard_calculation=self.hc).count() num_site_blocks_per_incr = int(CURVE_CACHE_SIZE) / int(num_rlzs) if num_site_blocks_per_incr == 0: # This means we have `num_rlzs` >= `CURVE_CACHE_SIZE`. # The minimum number of sites should be 1. num_site_blocks_per_incr = 1 slice_incr = num_site_blocks_per_incr * num_rlzs # unit: num records if self.hc.mean_hazard_curves: # create a new `HazardCurve` 'container' record for mean # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, "mean-curves-multi-imt", "hazard_curve_multi"), statistics="mean", imt=None, investigation_time=self.hc.investigation_time) if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: # create a new `HazardCurve` 'container' record for quantile # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, 'quantile(%s)-curves' % quantile, "hazard_curve_multi"), statistics="quantile", imt=None, quantile=quantile, investigation_time=self.hc.investigation_time) for imt, imls in self.hc.intensity_measure_types_and_levels.items(): im_type, sa_period, sa_damping = from_string(imt) # prepare `output` and `hazard_curve` containers in the DB: container_ids = dict() if self.hc.mean_hazard_curves: mean_output = models.Output.objects.create_output( job=self.job, display_name='Mean Hazard Curves %s' % imt, output_type='hazard_curve' ) mean_hc = models.HazardCurve.objects.create( output=mean_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='mean' ) container_ids['mean'] = mean_hc.id if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: q_output = models.Output.objects.create_output( job=self.job, display_name=( '%s quantile Hazard Curves %s' % (quantile, imt) ), output_type='hazard_curve' ) q_hc = models.HazardCurve.objects.create( output=q_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='quantile', quantile=quantile ) container_ids['q%s' % quantile] = q_hc.id all_curves_for_imt = models.order_by_location( models.HazardCurveData.objects.all_curves_for_imt( self.job.id, im_type, sa_period, sa_damping)) with transaction.commit_on_success(using='job_init'): inserter = writer.CacheInserter( models.HazardCurveData, CURVE_CACHE_SIZE) for chunk in models.queryset_iter(all_curves_for_imt, slice_incr): # slice each chunk by `num_rlzs` into `site_chunk` # and compute the aggregate for site_chunk in block_splitter(chunk, num_rlzs): site = site_chunk[0].location curves_poes = [x.poes for x in site_chunk] curves_weights = [x.weight for x in site_chunk] # do means and quantiles # quantiles first: if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: if self.hc.number_of_logic_tree_samples == 0: # explicitly weighted quantiles q_curve = weighted_quantile_curve( curves_poes, curves_weights, quantile ) else: # implicitly weighted quantiles q_curve = quantile_curve( curves_poes, quantile ) inserter.add( models.HazardCurveData( hazard_curve_id=( container_ids['q%s' % quantile]), poes=q_curve.tolist(), location=site.wkt) ) # then means if self.hc.mean_hazard_curves: m_curve = mean_curve( curves_poes, weights=curves_weights ) inserter.add( models.HazardCurveData( hazard_curve_id=container_ids['mean'], poes=m_curve.tolist(), location=site.wkt) ) inserter.flush()
def do_aggregate_post_proc(self): """ Grab hazard data for all realizations and sites from the database and compute mean and/or quantile aggregates (depending on which options are enabled in the calculation). Post-processing results will be stored directly into the database. """ num_rlzs = models.LtRealization.objects.filter( hazard_calculation=self.hc).count() num_site_blocks_per_incr = int(CURVE_CACHE_SIZE) / int(num_rlzs) if num_site_blocks_per_incr == 0: # This means we have `num_rlzs` >= `CURVE_CACHE_SIZE`. # The minimum number of sites should be 1. num_site_blocks_per_incr = 1 slice_incr = num_site_blocks_per_incr * num_rlzs # unit: num records if self.hc.mean_hazard_curves: # create a new `HazardCurve` 'container' record for mean # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, "mean-curves-multi-imt", "hazard_curve_multi"), statistics="mean", imt=None, investigation_time=self.hc.investigation_time) if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: # create a new `HazardCurve` 'container' record for quantile # curves (virtual container for multiple imts) models.HazardCurve.objects.create( output=models.Output.objects.create_output( self.job, 'quantile(%s)-curves' % quantile, "hazard_curve_multi"), statistics="quantile", imt=None, quantile=quantile, investigation_time=self.hc.investigation_time) for imt, imls in self.hc.intensity_measure_types_and_levels.items(): im_type, sa_period, sa_damping = models.parse_imt(imt) # prepare `output` and `hazard_curve` containers in the DB: container_ids = dict() if self.hc.mean_hazard_curves: mean_output = models.Output.objects.create_output( job=self.job, display_name='mean-curves-%s' % imt, output_type='hazard_curve') mean_hc = models.HazardCurve.objects.create( output=mean_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='mean') container_ids['mean'] = mean_hc.id if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: q_output = models.Output.objects.create_output( job=self.job, display_name=('quantile(%s)-curves-%s' % (quantile, imt)), output_type='hazard_curve') q_hc = models.HazardCurve.objects.create( output=q_output, investigation_time=self.hc.investigation_time, imt=im_type, imls=imls, sa_period=sa_period, sa_damping=sa_damping, statistics='quantile', quantile=quantile) container_ids['q%s' % quantile] = q_hc.id all_curves_for_imt = models.order_by_location( models.HazardCurveData.objects.all_curves_for_imt( self.job.id, im_type, sa_period, sa_damping)) with transaction.commit_on_success(using='reslt_writer'): inserter = writer.CacheInserter(models.HazardCurveData, CURVE_CACHE_SIZE) for chunk in models.queryset_iter(all_curves_for_imt, slice_incr): # slice each chunk by `num_rlzs` into `site_chunk` # and compute the aggregate for site_chunk in block_splitter(chunk, num_rlzs): site = site_chunk[0].location curves_poes = [x.poes for x in site_chunk] curves_weights = [x.weight for x in site_chunk] # do means and quantiles # quantiles first: if self.hc.quantile_hazard_curves: for quantile in self.hc.quantile_hazard_curves: if self.hc.number_of_logic_tree_samples == 0: # explicitly weighted quantiles q_curve = weighted_quantile_curve( curves_poes, curves_weights, quantile) else: # implicitly weighted quantiles q_curve = quantile_curve( curves_poes, quantile) inserter.add( models.HazardCurveData( hazard_curve_id=( container_ids['q%s' % quantile]), poes=q_curve.tolist(), location=site.wkt)) # then means if self.hc.mean_hazard_curves: m_curve = mean_curve(curves_poes, weights=curves_weights) inserter.add( models.HazardCurveData( hazard_curve_id=container_ids['mean'], poes=m_curve.tolist(), location=site.wkt)) inserter.flush()