def do_quantiles( self, sites, realizations, quantiles, curve_serializer=None, curve_task=compute_quantile_curves, map_func=None, map_serializer=None): """Trigger the calculation/serialization of quantile curves/maps. The calculated quantile curves/maps will only be serialized if the corresponding `serializer` parameter was set. :param sites: The sites for which to calculate quantile curves/maps. :type sites: list of :py:class:`openquake.shapes.Site` :param realizations: The number of realizations that were calculated :type realizations: :py:class:`int` :param quantiles: The quantiles to calculate :param quantiles: list of float :param curve_serializer: A serializer for the calculated curves, receives the KVS keys of the calculated curves in its single parameter. :type curve_serializer: function([string]) :param map_serializer: A serializer for the calculated maps, receives the KVS keys of the calculated maps in its single parameter. :type map_serializer: function([string]) :param curve_task: The `celery` task to use for the curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves :type curve_task: function(string, [:py:class:`openquake.shapes.Site`]) :param map_func: A function that computes quantile hazard maps. :type map_func: function(:py:class:`openquake.engine.JobContext`) :returns: `None` """ if not quantiles: return # compute and serialize quantile hazard curves LOG.info("Computing quantile hazard curves") tf_args = dict(job_id=self.job_ctxt.job_id, realizations=realizations, quantiles=quantiles) ath_args = dict(sites=sites, quantiles=quantiles) utils_tasks.distribute( curve_task, ("sites", [[s] for s in sites]), tf_args=tf_args, ath=curve_serializer, ath_args=ath_args) if self.poes_hazard_maps: assert map_func, "No calculation function for quantile maps set." assert map_serializer, "No serializer for the quantile maps set." # quantile maps LOG.info("Computing quantile hazard maps") map_func(self.job_ctxt.job_id, sites, quantiles, self.job_ctxt.imls, self.poes_hazard_maps) LOG.info("Serializing quantile maps for %s values" % len(quantiles)) for quantile in quantiles: LOG.debug(">> quantile maps!") map_serializer(self.job_ctxt, sites, self.poes_hazard_maps, quantile)
def test_distribute_with_failing_subtask(self): """At least one subtask failed, a `TaskFailed` exception is raised.""" try: tasks.distribute(failing_task, ("data", range(5))) except Exception, exc: # The exception is raised by the first task. self.assertEqual(0, exc.args[0])
def post_process(self): """ If requested, perform additional processing of GMFs to produce hazard curves. """ logs.LOG.debug('> starting post processing') if self.hc.hazard_curves_from_gmfs: post_processing.do_post_process(self.job) # If `mean_hazard_curves` is True and/or `quantile_hazard_curves` # has some value (not an empty list), do this additional # post-processing. if self.hc.mean_hazard_curves or self.hc.quantile_hazard_curves: tasks = cls_post_processing.setup_tasks( self.job, self.job.hazard_calculation, curve_finder=models.HazardCurveData.objects, writers=dict(mean_curves=MeanCurveWriter, quantile_curves=QuantileCurveWriter)) utils_tasks.distribute( cls_post_processing.do_post_process, ("post_processing_task", tasks), tf_args=dict(job_id=self.job.id)) logs.LOG.debug('< done with post processing')
def execute(self): """Loop over realizations (logic tree samples), split the geometry of interest into blocks of sites, and distribute Celery tasks to carry out the UHS computation. """ job_ctxt = self.job_ctxt all_sites = job_ctxt.sites_to_compute() site_block_size = config.hazard_block_size() job_profile = job_ctxt.oq_job_profile src_model_rnd = random.Random(job_profile.source_model_lt_random_seed) gmpe_rnd = random.Random(job_profile.gmpe_lt_random_seed) for rlz in xrange(job_ctxt.oq_job_profile.realizations): # Sample the gmpe and source models: general.store_source_model( job_ctxt.job_id, src_model_rnd.getrandbits(32), job_ctxt.params, self.lt_processor) general.store_gmpe_map( job_ctxt.job_id, gmpe_rnd.getrandbits(32), self.lt_processor) for site_block in block_splitter(all_sites, site_block_size): tf_args = dict(job_id=job_ctxt.job_id, realization=rlz) num_tasks_completed = completed_task_count(job_ctxt.job_id) ath_args = dict(job_id=job_ctxt.job_id, num_tasks=len(site_block), start_count=num_tasks_completed) utils_tasks.distribute( compute_uhs_task, ('site', site_block), tf_args=tf_args, ath=uhs_task_handler, ath_args=ath_args)
def do_means( self, sites, realizations, curve_serializer=None, curve_task=tasks.compute_mean_curves, map_func=None, map_serializer=None, ): """Trigger the calculation of mean curves/maps, serialize as requested. The calculated mean curves/maps will only be serialized if the corresponding `serializer` parameter was set. :param sites: The sites for which to calculate mean curves/maps. :type sites: list of :py:class:`openquake.shapes.Site` :param realizations: The number of realizations that were calculated :type realizations: :py:class:`int` :param curve_serializer: A serializer for the calculated curves, receives the KVS keys of the calculated curves in its single parameter. :type curve_serializer: function([string]) :param map_serializer: A serializer for the calculated maps, receives the KVS keys of the calculated maps in its single parameter. :type map_serializer: function([string]) :param curve_task: The `celery` task to use for the curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves :type curve_task: function(string, [:py:class:`openquake.shapes.Site`]) :param map_func: A function that computes mean hazard maps. :type map_func: function(:py:class:`openquake.job.Job`) :returns: `None` """ if not self["COMPUTE_MEAN_HAZARD_CURVE"]: return # Compute and serialize the mean curves. LOG.info("Computing mean hazard curves") utils_tasks.distribute( self.number_of_tasks(), curve_task, ("sites", sites), dict(job_id=self.job_id, realizations=realizations), flatten_results=True, ath=curve_serializer, ) if self.poes_hazard_maps: assert map_func, "No calculation function for mean hazard maps set" assert map_serializer, "No serializer for the mean hazard maps set" LOG.info("Computing/serializing mean hazard maps") map_func(self.job_id, sites, self.imls, self.poes_hazard_maps) map_serializer(sites, self.poes_hazard_maps)
def do_means(self, sites, realizations, curve_serializer=None, curve_task=compute_mean_curves, map_func=None, map_serializer=None): """Trigger the calculation of mean curves/maps, serialize as requested. The calculated mean curves/maps will only be serialized if the corresponding `serializer` parameter was set. :param sites: The sites for which to calculate mean curves/maps. :type sites: list of :py:class:`openquake.shapes.Site` :param realizations: The number of realizations that were calculated :type realizations: :py:class:`int` :param curve_serializer: A serializer for the calculated curves, receives the KVS keys of the calculated curves in its single parameter. :type curve_serializer: function([string]) :param map_serializer: A serializer for the calculated maps, receives the KVS keys of the calculated maps in its single parameter. :type map_serializer: function([string]) :param curve_task: The `celery` task to use for the curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves :type curve_task: function(string, [:py:class:`openquake.shapes.Site`]) :param map_func: A function that computes mean hazard maps. :type map_func: function(:py:class:`openquake.engine.JobContext`) :returns: `None` """ if not self.job_ctxt["COMPUTE_MEAN_HAZARD_CURVE"]: return # Compute and serialize the mean curves. LOG.info("Computing mean hazard curves") tf_args = dict(job_id=self.job_ctxt.job_id, realizations=realizations) ath_args = dict(sites=sites, rtype="mean") utils_tasks.distribute(curve_task, ("sites", [[s] for s in sites]), tf_args=tf_args, ath=curve_serializer, ath_args=ath_args) if self.poes_hazard_maps: assert map_func, "No calculation function for mean hazard maps set" assert map_serializer, "No serializer for the mean hazard maps set" LOG.info("Computing/serializing mean hazard maps") map_func(self.job_ctxt.job_id, sites, self.job_ctxt.imls, self.poes_hazard_maps) LOG.debug(">> mean maps!") map_serializer(self.job_ctxt, sites, self.poes_hazard_maps)
def test_distribute_with_keyword_argument_not_expected_by_task(self): """ An unexpected keyword argument is passed to the subtask triggering a `TypeError` exception. """ try: tasks.distribute(2, single_arg_called_a, ("data", range(5))) except tasks.WrongTaskParameters, exc: self.assertEqual( "single_arg_called_a() got an unexpected keyword argument " "'data'", exc.args[0])
def test_distribute_with_keyword_argument_not_expected_by_task(self): """ An unexpected keyword argument is passed to the subtask triggering a `TypeError` exception. """ try: tasks.distribute(single_arg_called_a, ("data", range(5))) except Exception, exc: self.assertEqual( "single_arg_called_a() got an unexpected keyword argument " "'data'", exc.args[0])
def test_distribute_with_type_error_and_no_exception_msg(self): """ Exceptions without error messages should not result in another exception when being reraised. """ from celery.result import TaskSetResult try: with patch('celery.task.sets.TaskSet.apply_async') as m2: m2.return_value = mock.Mock(spec=TaskSetResult) m2.return_value.join_native.side_effect = TypeError tasks.distribute(single_arg_called_a, ("a", range(5))) except Exception, exc: self.assertEqual((), exc.args)
def do_means(self, sites, realizations, curve_serializer=None, curve_task=compute_mean_curves, map_func=None, map_serializer=None): """Trigger the calculation of mean curves/maps, serialize as requested. The calculated mean curves/maps will only be serialized if the corresponding `serializer` parameter was set. :param sites: The sites for which to calculate mean curves/maps. :type sites: list of :py:class:`openquake.shapes.Site` :param realizations: The number of realizations that were calculated :type realizations: :py:class:`int` :param curve_serializer: A serializer for the calculated curves, receives the KVS keys of the calculated curves in its single parameter. :type curve_serializer: function([string]) :param map_serializer: A serializer for the calculated maps, receives the KVS keys of the calculated maps in its single parameter. :type map_serializer: function([string]) :param curve_task: The `celery` task to use for the curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves :type curve_task: function(string, [:py:class:`openquake.shapes.Site`]) :param map_func: A function that computes mean hazard maps. :type map_func: function(:py:class:`openquake.engine.JobContext`) :returns: `None` """ if not self.job_ctxt["COMPUTE_MEAN_HAZARD_CURVE"]: return # Compute and serialize the mean curves. LOG.info("Computing mean hazard curves") tf_args = dict(job_id=self.job_ctxt.job_id, realizations=realizations) ath_args = dict(sites=sites, rtype="mean") utils_tasks.distribute( curve_task, ("sites", [[s] for s in sites]), tf_args=tf_args, ath=curve_serializer, ath_args=ath_args) if self.poes_hazard_maps: assert map_func, "No calculation function for mean hazard maps set" assert map_serializer, "No serializer for the mean hazard maps set" LOG.info("Computing/serializing mean hazard maps") map_func(self.job_ctxt.job_id, sites, self.job_ctxt.imls, self.poes_hazard_maps) LOG.debug(">> mean maps!") map_serializer(self.job_ctxt, sites, self.poes_hazard_maps)
def do_curves(self, sites, realizations, serializer=None, the_task=compute_hazard_curve): """Trigger the calculation of hazard curves, serialize as requested. The calculated curves will only be serialized if the `serializer` parameter is not `None`. :param sites: The sites for which to calculate hazard curves. :type sites: list of :py:class:`openquake.shapes.Site` :param realizations: The number of realizations to calculate :type realizations: :py:class:`int` :param serializer: A serializer for the calculated hazard curves, receives the KVS keys of the calculated hazard curves in its single parameter. :type serializer: a callable with a single parameter: list of strings :param the_task: The `celery` task to use for the hazard curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves * the logic tree realization number :type the_task: a callable taking three parameters :returns: KVS keys of the calculated hazard curves. :rtype: list of string """ source_model_generator = random.Random() source_model_generator.seed( self.job_ctxt["SOURCE_MODEL_LT_RANDOM_SEED"]) gmpe_generator = random.Random() gmpe_generator.seed(self.job_ctxt["GMPE_LT_RANDOM_SEED"]) stats.pk_set(self.job_ctxt.job_id, "hcls_crealization", 0) for realization in xrange(0, realizations): stats.pk_inc(self.job_ctxt.job_id, "hcls_crealization") LOG.info("Calculating hazard curves for realization %s" % realization) self.store_source_model(source_model_generator.getrandbits(32)) self.store_gmpe_map(source_model_generator.getrandbits(32)) tf_args = dict(job_id=self.job_ctxt.job_id, realization=realization) ath_args = dict(sites=sites, rtype="curve", datum=realization) utils_tasks.distribute(the_task, ("sites", [[s] for s in sites]), tf_args=tf_args, ath=serializer, ath_args=ath_args)
def test_distribute_returns_results_in_right_order_when_flattened(self): """Results are returned in the right order when flattened.""" expected = range(7) result = tasks.distribute(reflect_data_to_be_processed, ("data", range(7)), flatten_results=True) self.assertEqual(expected, result)
def do_quantiles(self, sites, curve_serializer=None, map_serializer=None, curve_task=tasks.compute_quantile_curves, map_func=classical_psha.compute_quantile_hazard_maps): """Trigger the calculation/serialization of quantile curves/maps. The calculated quantile curves/maps will only be serialized if the corresponding `serializer` parameter was set. :param sites: The sites for which to calculate quantile curves/maps. :type sites: list of :py:class:`openquake.shapes.Site` :param curve_serializer: A serializer for the calculated curves, receives the KVS keys of the calculated curves in its single parameter. :type curve_serializer: function([string]) :param map_serializer: A serializer for the calculated maps, receives the KVS keys of the calculated maps in its single parameter. :type map_serializer: function([string]) :param curve_task: The `celery` task to use for the curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves :type curve_task: function(string, [:py:class:`openquake.shapes.Site`]) :param map_func: A function that computes quantile hazard maps. :type map_func: function(:py:class:`openquake.job.Job`) :returns: `None` """ # compute and serialize quantile hazard curves LOG.info("Computing quantile hazard curves") results = utils_tasks.distribute(self.number_of_tasks(), curve_task, ("sites", sites), dict(job_id=self.id), flatten_results=True) # collect hazard curve keys per quantile value quantiles = _collect_curve_keys_per_quantile(results) LOG.info("Serializing quantile curves for %s values" % len(quantiles)) for curves in quantiles.values(): curve_serializer(curves) # compute quantile hazard maps if (not self.param_set(classical_psha.POES_PARAM_NAME) or len(quantiles) < 1): return assert map_func, "No calculation function for quantile maps set." assert map_serializer, "No serializer for the quantile maps set." LOG.info("Computing quantile hazard maps") results = map_func(self) quantiles = _collect_map_keys_per_quantile(results) LOG.info("Serializing quantile maps for %s values" % len(quantiles)) for maps in quantiles.values(): map_serializer(maps)
def execute(self): """ Entry point for triggering the computation. """ LOGGER.debug("Executing scenario risk computation.") LOGGER.debug("This will calculate mean and standard deviation loss" "values for the region defined in the job config.") vuln_model = vulnerability.load_vuln_model_from_kvs( self.job_ctxt.job_id) region_losses = [] region_loss_map_data = {} region_data = distribute( general.compute_risk, ("block_id", self.job_ctxt.blocks_keys), tf_args=dict(job_id=self.job_ctxt.job_id, vuln_model=vuln_model, insured_losses=self._insured_losses)) for block_data in region_data: region_losses.append(block_data[0]) collect_region_data(block_data[1], region_loss_map_data) self._sum_region_losses = reduce(lambda x, y: x + y, region_losses) self._loss_map_data = [ (site, data) for site, data in region_loss_map_data.iteritems() ]
def test_distribute_with_other_args(self): """ The subtask is invoked with the data to be processed as well as with other parameters. """ # The keyword arguments below will be passed to the celery subtasks in # addition to the data that is to be processed. tf_args = {"1+1": 2, "2/1": 1} # We expect the subtasks to see the following positional and keyword # arguments respectively. expected = [((), { "data_to_process": [13], "1+1": 2, "2/1": 1 }), ((), { "data_to_process": [14], "1+1": 2, "2/1": 1 })] # Two subtasks will be spawned and just return the arguments they # received. result = tasks.distribute(reflect_args, ("data_to_process", [[13], [14]]), tf_args=tf_args) self.assertEqual(expected, result)
def execute(self): """ Entry point for triggering the computation. """ LOGGER.debug("Executing scenario risk computation.") LOGGER.debug( "This will calculate mean and standard deviation loss" "values for the region defined in the job config." ) vuln_model = vulnerability.load_vuln_model_from_kvs(self.job_ctxt.job_id) region_loss_map_data = {} region_losses = distribute( general.compute_risk, ("block_id", self.job_ctxt.blocks_keys), tf_args=dict(job_id=self.job_ctxt.job_id, vuln_model=vuln_model, insured_losses=self._insured_losses), ) for block_data in region_losses: self._region_losses.append(block_data[0]) collect_region_data(block_data[1], region_loss_map_data) self._loss_map_data = [(site, data) for site, data in region_loss_map_data.iteritems()]
def test_distribute_with_other_args(self): """ The subtask is invoked with the data to be processed as well as with other parameters. """ # The keyword arguments below will be passed to the celery subtasks in # addition to the data that is to be processed. other_args = {"1+1": 2, "2/1": 1} # We expect the subtasks to see the following positional and keyword # arguments respectively. expected = [((), { "data_to_process": [88], "1+1": 2, "2/1": 1 }), ((), { "data_to_process": [99], "1+1": 2, "2/1": 1 })] actual = [] # Two subtasks will be spawned and just return the arguments they # received. result = tasks.distribute(2, reflect_args, ("data_to_process", [88, 99]), other_args=other_args) # Remove celery-injected keyword arguments. for args, kwargs in result: actual.append((args, actual_kwargs(kwargs))) self.assertEqual(expected, actual)
def test_distribute_with_ignore_result_set(self): """ The specified number of subtasks is actually spawned even for tasks with ignore_result=True and these run and complete. Since the results of the tasks are ignored, the only way to know that they ran and completed is to verify that the data they were supposed to write the key value store is actually there. """ def value(key): """Construct a test value for the given key.""" return key[-3:] * 2 keys = ["irtc:%s" % str(uuid.uuid4())[:8] for _ in xrange(5)] values = [value(uid) for uid in keys] data = zip(keys, values) result = tasks.distribute(ignore_result, ("data", [[d] for d in data])) # An empty list is returned for tasks with ignore_result=True # and no asynchronous task handler function. self.assertEqual(False, bool(result)) # Give the tasks a bit of time to complete. time.sleep(0.1) for key, value in data: self.assertEqual(value, TestStore.get(key))
def do_quantiles( self, sites, curve_serializer=None, map_serializer=None, curve_task=tasks.compute_quantile_curves, map_func=classical_psha.compute_quantile_hazard_maps, ): """Trigger the calculation/serialization of quantile curves/maps. The calculated quantile curves/maps will only be serialized if the corresponding `serializer` parameter was set. :param sites: The sites for which to calculate quantile curves/maps. :type sites: list of :py:class:`openquake.shapes.Site` :param curve_serializer: A serializer for the calculated curves, receives the KVS keys of the calculated curves in its single parameter. :type curve_serializer: function([string]) :param map_serializer: A serializer for the calculated maps, receives the KVS keys of the calculated maps in its single parameter. :type map_serializer: function([string]) :param curve_task: The `celery` task to use for the curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves :type curve_task: function(string, [:py:class:`openquake.shapes.Site`]) :param map_func: A function that computes quantile hazard maps. :type map_func: function(:py:class:`openquake.job.Job`) :returns: `None` """ # compute and serialize quantile hazard curves LOG.info("Computing quantile hazard curves") results = utils_tasks.distribute( self.number_of_tasks(), curve_task, ("sites", sites), dict(job_id=self.id), flatten_results=True ) # collect hazard curve keys per quantile value quantiles = _collect_curve_keys_per_quantile(results) LOG.info("Serializing quantile curves for %s values" % len(quantiles)) for curves in quantiles.values(): curve_serializer(curves) # compute quantile hazard maps if not self.param_set(classical_psha.POES_PARAM_NAME) or len(quantiles) < 1: return assert map_func, "No calculation function for quantile maps set." assert map_serializer, "No serializer for the quantile maps set." LOG.info("Computing quantile hazard maps") results = map_func(self) quantiles = _collect_map_keys_per_quantile(results) LOG.info("Serializing quantile maps for %s values" % len(quantiles)) for maps in quantiles.values(): map_serializer(maps)
def test_distribute_with_other_args(self): """ The subtask is invoked with the data to be processed as well as with other parameters. """ # The keyword arguments below will be passed to the celery subtasks in # addition to the data that is to be processed. other_args = {"1+1": 2, "2/1": 1} # We expect the subtasks to see the following positional and keyword # arguments respectively. expected = [ ((), {"data_to_process": [88], "1+1": 2, "2/1": 1}), ((), {"data_to_process": [99], "1+1": 2, "2/1": 1})] actual = [] # Two subtasks will be spawned and just return the arguments they # received. result = tasks.distribute( 2, reflect_args, ("data_to_process", [88, 99]), other_args=other_args) # Remove celery-injected keyword arguments. for args, kwargs in result: actual.append((args, actual_kwargs(kwargs))) self.assertEqual(expected, actual)
def test_distribute_returns_flattened_results_in_right_order(self): """Flattened results are returned in the right order.""" expected = range(7) result = tasks.distribute( 3, reflect_data_to_be_processed, ("data", range(7)), flatten_results=True) self.assertEqual(expected, result)
def test_distribute_with_no_other_args(self): """The subtask is only invoked with the data to be processed.""" # We expect the subtasks to see no positional arguments. The # data to be processed is passed in the keyword arguments. expected = [(), {"data_to_process": 11}, (), {"data_to_process": 12}] result = tasks.distribute(reflect_args, ("data_to_process", [11, 12]), flatten_results=True) self.assertEqual(expected, result)
def test_distribute_with_too_little_data(self): """ When the data to be processed is a list of N items and the specified cardinality was M where (N < M), only N subtasks are invoked. """ expected = ["hello"] * 3 result = tasks.distribute(5, just_say_hello, ("data", range(3))) self.assertEqual(expected, result)
def do_curves(self, sites, realizations, serializer=None, the_task=compute_hazard_curve): """Trigger the calculation of hazard curves, serialize as requested. The calculated curves will only be serialized if the `serializer` parameter is not `None`. :param sites: The sites for which to calculate hazard curves. :type sites: list of :py:class:`openquake.shapes.Site` :param realizations: The number of realizations to calculate :type realizations: :py:class:`int` :param serializer: A serializer for the calculated hazard curves, receives the KVS keys of the calculated hazard curves in its single parameter. :type serializer: a callable with a single parameter: list of strings :param the_task: The `celery` task to use for the hazard curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves * the logic tree realization number :type the_task: a callable taking three parameters :returns: KVS keys of the calculated hazard curves. :rtype: list of string """ source_model_generator = random.Random() source_model_generator.seed( self.job_ctxt["SOURCE_MODEL_LT_RANDOM_SEED"]) gmpe_generator = random.Random() gmpe_generator.seed(self.job_ctxt["GMPE_LT_RANDOM_SEED"]) stats.pk_set(self.job_ctxt.job_id, "hcls_crealization", 0) for realization in xrange(0, realizations): stats.pk_inc(self.job_ctxt.job_id, "hcls_crealization") LOG.info("Calculating hazard curves for realization %s" % realization) self.store_source_model(source_model_generator.getrandbits(32)) self.store_gmpe_map(source_model_generator.getrandbits(32)) tf_args = dict(job_id=self.job_ctxt.job_id, realization=realization) ath_args = dict(sites=sites, rtype="curve", datum=realization) utils_tasks.distribute( the_task, ("sites", [[s] for s in sites]), tf_args=tf_args, ath=serializer, ath_args=ath_args)
def do_means( self, sites, curve_serializer=None, map_serializer=None, curve_task=tasks.compute_mean_curves, map_func=classical_psha.compute_mean_hazard_maps, ): """Trigger the calculation of mean curves/maps, serialize as requested. The calculated mean curves/maps will only be serialized if the corresponding `serializer` parameter was set. :param sites: The sites for which to calculate mean curves/maps. :type sites: list of :py:class:`openquake.shapes.Site` :param curve_serializer: A serializer for the calculated curves, receives the KVS keys of the calculated curves in its single parameter. :type curve_serializer: function([string]) :param map_serializer: A serializer for the calculated maps, receives the KVS keys of the calculated maps in its single parameter. :type map_serializer: function([string]) :param curve_task: The `celery` task to use for the curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves :type curve_task: function(string, [:py:class:`openquake.shapes.Site`]) :param map_func: A function that computes mean hazard maps. :type map_func: function(:py:class:`openquake.job.Job`) :returns: `None` """ if not self.param_set("COMPUTE_MEAN_HAZARD_CURVE"): return # Compute and serialize the mean curves. LOG.info("Computing mean hazard curves") results = utils_tasks.distribute( self.number_of_tasks(), curve_task, ("sites", sites), dict(job_id=self.id), flatten_results=True ) if curve_serializer: LOG.info("Serializing mean hazard curves") curve_serializer(results) if not self.param_set(classical_psha.POES_PARAM_NAME): return assert map_func, "No calculation function for mean hazard maps set." assert map_serializer, "No serializer for the mean hazard maps set." # Compute and serialize the mean curves. LOG.info("Computing/serializing mean hazard maps") results = map_func(self) LOG.debug("results = '%s'" % results) map_serializer(results)
def do_curves(self, sites, realizations, serializer=None, the_task=tasks.compute_hazard_curve): """Trigger the calculation of hazard curves, serialize as requested. The calculated curves will only be serialized if the `serializer` parameter is not `None`. :param sites: The sites for which to calculate hazard curves. :type sites: list of :py:class:`openquake.shapes.Site` :param realizations: The number of realizations to calculate :type realizations: :py:class:`int` :param serializer: A serializer for the calculated hazard curves, receives the KVS keys of the calculated hazard curves in its single parameter. :type serializer: a callable with a single parameter: list of strings :param the_task: The `celery` task to use for the hazard curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves * the logic tree realization number :type the_task: a callable taking three parameters :returns: KVS keys of the calculated hazard curves. :rtype: list of string """ source_model_generator = random.Random() source_model_generator.seed( self.params.get("SOURCE_MODEL_LT_RANDOM_SEED", None)) gmpe_generator = random.Random() gmpe_generator.seed(self.params.get("GMPE_LT_RANDOM_SEED", None)) for realization in xrange(0, realizations): LOG.info("Calculating hazard curves for realization %s" % realization) self.store_source_model(source_model_generator.getrandbits(32)) self.store_gmpe_map(source_model_generator.getrandbits(32)) utils_tasks.distribute( self.number_of_tasks(), the_task, ("site_list", sites), dict(job_id=self.job_id, realization=realization), flatten_results=True) if serializer: serializer(sites, realization)
def test_distribute_with_no_other_args(self): """The subtask is only invoked with the data to be processed.""" # We expect the subtasks to see no positional arguments. The # data to be processed is passed in the keyword arguments. expected = [ (), {"data_to_process": 11}, (), {"data_to_process": 12}] result = tasks.distribute(reflect_args, ("data_to_process", [11, 12]), flatten_results=True) self.assertEqual(expected, result)
def do_means(self, sites, curve_serializer=None, map_serializer=None, curve_task=tasks.compute_mean_curves, map_func=classical_psha.compute_mean_hazard_maps): """Trigger the calculation of mean curves/maps, serialize as requested. The calculated mean curves/maps will only be serialized if the corresponding `serializer` parameter was set. :param sites: The sites for which to calculate mean curves/maps. :type sites: list of :py:class:`openquake.shapes.Site` :param curve_serializer: A serializer for the calculated curves, receives the KVS keys of the calculated curves in its single parameter. :type curve_serializer: function([string]) :param map_serializer: A serializer for the calculated maps, receives the KVS keys of the calculated maps in its single parameter. :type map_serializer: function([string]) :param curve_task: The `celery` task to use for the curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves :type curve_task: function(string, [:py:class:`openquake.shapes.Site`]) :param map_func: A function that computes mean hazard maps. :type map_func: function(:py:class:`openquake.job.Job`) :returns: `None` """ if not self.param_set("COMPUTE_MEAN_HAZARD_CURVE"): return # Compute and serialize the mean curves. LOG.info("Computing mean hazard curves") results = utils_tasks.distribute(self.number_of_tasks(), curve_task, ("sites", sites), dict(job_id=self.id), flatten_results=True) if curve_serializer: LOG.info("Serializing mean hazard curves") curve_serializer(results) if not self.param_set(classical_psha.POES_PARAM_NAME): return assert map_func, "No calculation function for mean hazard maps set." assert map_serializer, "No serializer for the mean hazard maps set." # Compute and serialize the mean curves. LOG.info("Computing/serializing mean hazard maps") results = map_func(self) LOG.info("results = '%s'" % results) map_serializer(results)
def test_distribute_with_even_data_and_cardinality_above_one(self): """The data set divides evenly among the subtasks in the task set.""" expected = ( (), {"data_to_process": range(2)}, (), {"data_to_process": range(2, 4)}) [(args1, kwargs1), (args2, kwargs2)] = tasks.distribute( 2, reflect_args, ("data_to_process", range(4))) self.assertEqual( expected, (args1, actual_kwargs(kwargs1), args2, actual_kwargs(kwargs2)))
def do_curves(self, sites, serializer=None, the_task=tasks.compute_hazard_curve): """Trigger the calculation of hazard curves, serialize as requested. The calculated curves will only be serialized if the `serializer` parameter is not `None`. :param sites: The sites for which to calculate hazard curves. :type sites: list of :py:class:`openquake.shapes.Site` :param serializer: A serializer for the calculated hazard curves, receives the KVS keys of the calculated hazard curves in its single parameter. :type serializer: a callable with a single parameter: list of strings :param the_task: The `celery` task to use for the hazard curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves * the logic tree realization number :type the_task: a callable taking three parameters :returns: KVS keys of the calculated hazard curves. :rtype: list of string """ results = [] source_model_generator = random.Random() source_model_generator.seed( self.params.get("SOURCE_MODEL_LT_RANDOM_SEED", None)) gmpe_generator = random.Random() gmpe_generator.seed(self.params.get("GMPE_LT_RANDOM_SEED", None)) realizations = int(self.params["NUMBER_OF_LOGIC_TREE_SAMPLES"]) LOG.info("Going to run classical PSHA hazard for %s realizations " "and %s sites" % (realizations, len(sites))) for realization in xrange(0, realizations): LOG.info("Calculating hazard curves for realization %s" % realization) self.store_source_model(source_model_generator.getrandbits(32)) self.store_gmpe_map(source_model_generator.getrandbits(32)) results_per_realization = utils_tasks.distribute( self.number_of_tasks(), the_task, ("site_list", sites), dict(job_id=self.id, realization=realization), flatten_results=True) if serializer: serializer(results_per_realization) results.extend(results_per_realization) return results
def execute(self): """Execute the job.""" region_losses = distribute( general.compute_risk, ("block_id", self.job_ctxt.blocks_keys), tf_args=dict(job_id=self.job_ctxt.job_id)) if not self.is_benefit_cost_ratio_mode(): self.agg_curve = event_based.aggregate_loss_curve( region_losses, self._tses(), self._time_span(), self.job_ctxt.oq_job_profile.loss_histogram_bins)
def test_distribute_with_noneven_data_and_cardinality_above_one(self): """ The data set does *not* divide evenly among the subtasks in the task set. The last subtask gets all the remaining data. """ expected = ( (), {"data_to_process": range(2)}, (), {"data_to_process": range(2, 5)}) [(args1, kwargs1), (args2, kwargs2)] = tasks.distribute( 2, reflect_args, ("data_to_process", range(5))) self.assertEqual( expected, (args1, actual_kwargs(kwargs1), args2, actual_kwargs(kwargs2)))
def test_distribute_with_even_data_and_cardinality_above_one(self): """The data set divides evenly among the subtasks in the task set.""" expected = ((), { "data_to_process": range(2) }, (), { "data_to_process": range(2, 4) }) [(args1, kwargs1), (args2, kwargs2)] = tasks.distribute(2, reflect_args, ("data_to_process", range(4))) self.assertEqual( expected, (args1, actual_kwargs(kwargs1), args2, actual_kwargs(kwargs2)))
def post_process(self): logs.LOG.debug('> starting post processing') # If `mean_hazard_curves` is True and/or `quantile_hazard_curves` # has some value (not an empty list), do post processing. # Otherwise, just skip it altogether. if self.hc.mean_hazard_curves or self.hc.quantile_hazard_curves: tasks = post_processing.setup_tasks( self.job, self.job.hazard_calculation, curve_finder=models.HazardCurveData.objects, writers=dict(mean_curves=MeanCurveWriter, quantile_curves=QuantileCurveWriter)) utils_tasks.distribute( post_processing.do_post_process, ("post_processing_task", tasks), tf_args=dict(job_id=self.job.id)) if len(self.hc.poes_hazard_maps) > 0: post_processing.do_hazard_map_post_process(self.job) logs.LOG.debug('< done with post processing')
def execute(self): """Loop over realizations (logic tree samples), split the geometry of interest into blocks of sites, and distribute Celery tasks to carry out the UHS computation. """ job_ctxt = self.job_ctxt all_sites = job_ctxt.sites_to_compute() site_block_size = config.hazard_block_size() job_profile = job_ctxt.oq_job_profile self.initialize_pr_data( sites=all_sites, realizations=job_ctxt.oq_job_profile.realizations) src_model_rnd = random.Random(job_profile.source_model_lt_random_seed) gmpe_rnd = random.Random(job_profile.gmpe_lt_random_seed) for rlz in xrange(job_ctxt.oq_job_profile.realizations): # Sample the gmpe and source models: general.store_source_model( job_ctxt.job_id, src_model_rnd.getrandbits(32), job_ctxt.params, self.lt_processor) general.store_gmpe_map( job_ctxt.job_id, gmpe_rnd.getrandbits(32), self.lt_processor) for site_block in block_splitter(all_sites, site_block_size): tf_args = dict(job_id=job_ctxt.job_id, realization=rlz) num_tasks_completed = completed_task_count(job_ctxt.job_id) ath_args = dict(job_id=job_ctxt.job_id, num_tasks=len(site_block), start_count=num_tasks_completed) utils_tasks.distribute( compute_uhs_task, ('site', site_block), tf_args=tf_args, ath=uhs_task_handler, ath_args=ath_args)
def test_distribute_with_no_other_args(self): """The subtask is only invoked with the data to be processed.""" # We expect the subtasks to see no positional arguments. The # data to be processed is passed in the keyword arguments. expected = [ ((), {"data_to_process": [100]}), ((), {"data_to_process": [101]})] actual = [] result = tasks.distribute( 2, reflect_args, ("data_to_process", [100, 101])) # Remove celery-injected keyword arguments. for args, kwargs in result: actual.append((args, actual_kwargs(kwargs))) self.assertEqual(expected, actual)
def execute(self): """Execute the job.""" aggregate_curve = general.AggregateLossCurve() region_losses = distribute(general.compute_risk, ("block_id", self.job_ctxt.blocks_keys), tf_args=dict(job_id=self.job_ctxt.job_id)) for losses in region_losses: aggregate_curve.append(losses) self.agg_curve = aggregate_curve.compute( self._tses(), self._time_span(), self.job_ctxt.oq_job_profile.loss_histogram_bins)
def execute(self): """ Dispatch the computation into multiple tasks. """ LOGGER.debug("Executing scenario damage risk computation.") region_fractions = distribute( general.compute_risk, ("block_id", self.job_ctxt.blocks_keys), tf_args=dict(job_id=self.job_ctxt.job_id, fmodel=_fm(self.job_ctxt.oq_job))) self._collect_fractions(region_fractions) LOGGER.debug("Scenario damage risk computation completed.")
def test_distribute_with_noneven_data_and_cardinality_above_one(self): """ The data set does *not* divide evenly among the subtasks in the task set. The last subtask gets all the remaining data. """ expected = ((), { "data_to_process": range(2) }, (), { "data_to_process": range(2, 5) }) [(args1, kwargs1), (args2, kwargs2)] = tasks.distribute(2, reflect_args, ("data_to_process", range(5))) self.assertEqual( expected, (args1, actual_kwargs(kwargs1), args2, actual_kwargs(kwargs2)))
def test_distribute_with_no_other_args(self): """The subtask is only invoked with the data to be processed.""" # We expect the subtasks to see no positional arguments. The # data to be processed is passed in the keyword arguments. expected = [((), { "data_to_process": [100] }), ((), { "data_to_process": [101] })] actual = [] result = tasks.distribute(2, reflect_args, ("data_to_process", [100, 101])) # Remove celery-injected keyword arguments. for args, kwargs in result: actual.append((args, actual_kwargs(kwargs))) self.assertEqual(expected, actual)
def execute(self): """ Dispatch the computation into multiple tasks. """ LOGGER.debug("Executing scenario damage risk computation.") region_fractions = distribute( general.compute_risk, ("block_id", self.job_ctxt.blocks_keys), tf_args=dict(job_id=self.job_ctxt.job_id)) self.dd_taxonomy_means, self.dd_taxonomy_stddevs = \ scenario_damage.damage_distribution_by_taxonomy(region_fractions) self.total_distribution_means, self.total_distribution_stddevs = \ scenario_damage.total_damage_distribution(region_fractions) LOGGER.debug("Scenario damage risk computation completed.")
def test_distribute_with_ignore_result_set_and_ath(self): """ The specified number of subtasks is actually spawned (even for tasks with ignore_result=True) and the asynchronous task handler function is run. """ def value(key): """Construct a test value for the given key.""" return key[-3:] * 2 def ath(data): """ An asynchronous task handler function that converts all task results to upper case and returns the list of keys found. """ items_expected = len(data) items_found = [] while len(items_found) < items_expected: for key, _ in data: if key in items_found: continue value = TestStore.get(key) if value is not None: TestStore.set(key, value.upper()) items_found.append(key) time.sleep(0.05) return items_found keys = ["irtc:%s" % str(uuid.uuid4())[:8] for _ in xrange(5)] values = [value(uid) for uid in keys] data = zip(keys, values) args = ("data", [[d] for d in data]) result = tasks.distribute(ignore_result, args, ath=ath, ath_args=dict(data=data)) self.assertEqual(sorted(keys), sorted(result)) for key, value in data: self.assertEqual(value.upper(), TestStore.get(key))
def test_distribute_with_other_args(self): """ The subtask is invoked with the data to be processed as well as with other parameters. """ # The keyword arguments below will be passed to the celery subtasks in # addition to the data that is to be processed. tf_args = {"1+1": 2, "2/1": 1} # We expect the subtasks to see the following positional and keyword # arguments respectively. expected = [ ((), {"data_to_process": [13], "1+1": 2, "2/1": 1}), ((), {"data_to_process": [14], "1+1": 2, "2/1": 1})] # Two subtasks will be spawned and just return the arguments they # received. result = tasks.distribute(reflect_args, ("data_to_process", [[13], [14]]), tf_args=tf_args) self.assertEqual(expected, result)
def test_distribute_returns_results_in_the_right_order(self): """Results are returned in the right order.""" expected = [[0, 1], [2, 3], [4, 5, 6]] result = tasks.distribute(3, reflect_data_to_be_processed, ("data", range(7))) self.assertEqual(expected, result)
def test_distribute_with_non_empty_data_and_cardinality_one(self): """A single subtask will receive all the data to be processed.""" expected = ((), {"data_to_process": range(5)}) [(args, kwargs)] = tasks.distribute(1, reflect_args, ("data_to_process", range(5))) self.assertEqual(expected, (args, actual_kwargs(kwargs)))
def test_distribute_with_empty_data_and_cardinality_one(self): """A *single* subtask will be spawned even with an empty data set.""" expected = ((), {"data_to_process": []}) [(args, kwargs)] = tasks.distribute(1, reflect_args, ("data_to_process", [])) self.assertEqual(expected, (args, actual_kwargs(kwargs)))
def test_distribute_returns_results_wo_flattening(self): """Results are returned in the right order.""" expected = [[i] for i in range(7)] result = tasks.distribute(reflect_data_to_be_processed, ("data", [[i] for i in range(7)])) self.assertEqual(expected, result)
def test_distribute_with_task_returning_single_item(self): """distribute() copes with tasks that return a single item.""" expected = [1] * 5 result = tasks.distribute(just_say_1, ("data", range(5))) self.assertEqual(expected, result)
def test_distribute_uses_the_specified_number_of_subtasks(self): """One subtasks per data item is actually spawned.""" expected = ["hello"] * 5 result = tasks.distribute(just_say_hello, ("data", range(5))) self.assertEqual(expected, result)
def do_quantiles(self, sites, realizations, quantiles, curve_serializer=None, curve_task=compute_quantile_curves, map_func=None, map_serializer=None): """Trigger the calculation/serialization of quantile curves/maps. The calculated quantile curves/maps will only be serialized if the corresponding `serializer` parameter was set. :param sites: The sites for which to calculate quantile curves/maps. :type sites: list of :py:class:`openquake.shapes.Site` :param realizations: The number of realizations that were calculated :type realizations: :py:class:`int` :param quantiles: The quantiles to calculate :param quantiles: list of float :param curve_serializer: A serializer for the calculated curves, receives the KVS keys of the calculated curves in its single parameter. :type curve_serializer: function([string]) :param map_serializer: A serializer for the calculated maps, receives the KVS keys of the calculated maps in its single parameter. :type map_serializer: function([string]) :param curve_task: The `celery` task to use for the curve calculation, it takes the following parameters: * job ID * the sites for which to calculate the hazard curves :type curve_task: function(string, [:py:class:`openquake.shapes.Site`]) :param map_func: A function that computes quantile hazard maps. :type map_func: function(:py:class:`openquake.engine.JobContext`) :returns: `None` """ if not quantiles: return # compute and serialize quantile hazard curves LOG.info("Computing quantile hazard curves") tf_args = dict(job_id=self.job_ctxt.job_id, realizations=realizations, quantiles=quantiles) ath_args = dict(sites=sites, quantiles=quantiles) utils_tasks.distribute(curve_task, ("sites", [[s] for s in sites]), tf_args=tf_args, ath=curve_serializer, ath_args=ath_args) if self.poes_hazard_maps: assert map_func, "No calculation function for quantile maps set." assert map_serializer, "No serializer for the quantile maps set." # quantile maps LOG.info("Computing quantile hazard maps") map_func(self.job_ctxt.job_id, sites, quantiles, self.job_ctxt.imls, self.poes_hazard_maps) LOG.info("Serializing quantile maps for %s values" % len(quantiles)) for quantile in quantiles: LOG.debug(">> quantile maps!") map_serializer(self.job_ctxt, sites, self.poes_hazard_maps, quantile)
def test_distribute_with_failing_subtask(self): """At least one subtask failed, a `TaskFailed` exception is raised.""" try: tasks.distribute(1, failing_task, ("data", range(5))) except tasks.TaskFailed, exc: self.assertEqual(range(5), exc.args[0])
def test_distribute_uses_the_specified_number_of_subtasks(self): """The specified number of subtasks is actually spawned.""" expected = ["hello"] * 5 result = tasks.distribute(5, just_say_hello, ("data", range(5))) self.assertEqual(expected, result)