コード例 #1
0
ファイル: core.py プロジェクト: Chunghan/oq-engine
 def compute_gmf_arg_gen(self):
     """
     Argument generator for the task compute_gmf. For each SES yields a
     tuple of the form (job_id, params, imt, gsims, ses, site_coll,
     rupture_ids, rupture_seeds).
     """
     rnd = random.Random()
     rnd.seed(self.hc.random_seed)
     site_coll = self.hc.site_collection
     params = dict(
         correl_model=haz_general.get_correl_model(self.hc),
         truncation_level=self.hc.truncation_level,
         maximum_distance=self.hc.maximum_distance,
     )
     for lt_rlz in self._get_realizations():
         ltp = logictree.LogicTreeProcessor.from_hc(self.hc)
         gsims = ltp.parse_gmpe_logictree_path(lt_rlz.gsim_lt_path)
         all_ses = models.SES.objects.filter(ses_collection__lt_realization=lt_rlz, ordinal__isnull=False).order_by(
             "ordinal"
         )
         for ses in all_ses:
             # count the ruptures in the given SES
             rupture_ids = models.SESRupture.objects.filter(ses=ses).values_list("id", flat=True)
             if not rupture_ids:
                 continue
             # compute the associated seeds
             rupture_seeds = [rnd.randint(0, models.MAX_SINT_32) for _ in range(len(rupture_ids))]
             # splitting on IMTs to generate more tasks and save memory
             for imt in self.hc.intensity_measure_types:
                 if self.hc.ground_motion_correlation_model is None:
                     # we split on sites to avoid running out of memory
                     # on the workers for computations like the full Japan
                     for sites in block_splitter(site_coll, BLOCK_SIZE):
                         yield (
                             self.job.id,
                             params,
                             imt,
                             gsims,
                             ses,
                             models.SiteCollection(sites),
                             rupture_ids,
                             rupture_seeds,
                         )
                 else:
                     # we split on ruptures to avoid running out of memory
                     rupt_iter = block_splitter(rupture_ids, BLOCK_SIZE)
                     seed_iter = block_splitter(rupture_seeds, BLOCK_SIZE)
                     for rupts, seeds in zip(rupt_iter, seed_iter):
                         yield (self.job.id, params, imt, gsims, ses, site_coll, rupts, seeds)
コード例 #2
0
ファイル: core.py プロジェクト: Chunghan/oq-engine
    def task_arg_gen(self, _block_size=None):
        """
        Loop through realizations and sources to generate a sequence of
        task arg tuples. Each tuple of args applies to a single task.
        Yielded results are tuples of the form job_id, src_ids, ses, seeds
        (seeds will be used to seed numpy for temporal occurence sampling).
        """
        hc = self.hc
        rnd = random.Random()
        rnd.seed(hc.random_seed)
        realizations = self._get_realizations()

        ltp = logictree.LogicTreeProcessor.from_hc(self.hc)
        for lt_rlz in realizations:
            sources = (
                models.SourceProgress.objects.filter(is_complete=False, lt_realization=lt_rlz)
                .order_by("id")
                .values_list("parsed_source_id", flat=True)
            )

            all_ses = list(
                models.SES.objects.filter(ses_collection__lt_realization=lt_rlz, ordinal__isnull=False).order_by(
                    "ordinal"
                )
            )

            # source, ses, seed triples
            sss = [(src, ses, rnd.randint(0, models.MAX_SINT_32)) for src, ses in itertools.product(sources, all_ses)]
            preferred_block_size = int(math.ceil(float(len(sources) * len(all_ses)) / self.concurrent_tasks()))
            logs.LOG.info("Using block size %d", preferred_block_size)
            for block in block_splitter(sss, preferred_block_size):
                yield self.job.id, block, lt_rlz, ltp
コード例 #3
0
    def task_arg_gen(self, _block_size=None):
        """
        Loop through realizations and sources to generate a sequence of
        task arg tuples. Each tuple of args applies to a single task.
        Yielded results are tuples of the form job_id, src_ids, ses, seeds
        (seeds will be used to seed numpy for temporal occurence sampling).
        """
        hc = self.hc
        rnd = random.Random()
        rnd.seed(hc.random_seed)
        realizations = self._get_realizations()

        ltp = logictree.LogicTreeProcessor.from_hc(self.hc)
        for lt_rlz in realizations:
            sources = models.SourceProgress.objects\
                .filter(is_complete=False, lt_realization=lt_rlz)\
                .order_by('id')\
                .values_list('parsed_source_id', flat=True)

            all_ses = list(models.SES.objects.filter(
                           ses_collection__lt_realization=lt_rlz,
                           ordinal__isnull=False).order_by('ordinal'))

            for src_ids in block_splitter(sources, self.preferred_block_size):
                for ses in all_ses:
                    # compute seeds for the sources
                    src_seeds = [rnd.randint(0, models.MAX_SINT_32)
                                 for _ in src_ids]
                    yield self.job.id, src_ids, ses, src_seeds, ltp
コード例 #4
0
ファイル: core.py プロジェクト: larsbutler/oq-engine
    def task_arg_gen(self):
        """
        Loop through realizations and sources to generate a sequence of
        task arg tuples. Each tuple of args applies to a single task.
        Yielded results are tuples of the form job_id, src_ids, ses, task_seed
        (task_seed will be used to seed numpy for temporal occurence sampling).
        """
        hc = self.hc
        rnd = random.Random()
        rnd.seed(hc.random_seed)
        realizations = self._get_realizations()

        for lt_rlz in realizations:
            sources = models.SourceProgress.objects\
                .filter(is_complete=False, lt_realization=lt_rlz)\
                .order_by('id')\
                .values_list('parsed_source_id', flat=True)

            all_ses = list(models.SES.objects.filter(
                           ses_collection__lt_realization=lt_rlz,
                           ordinal__isnull=False).order_by('ordinal'))

            for src_ids in block_splitter(sources, self.preferred_block_size):
                for ses in all_ses:
                    task_seed = rnd.randint(0, models.MAX_SINT_32)
                    task_args = (self.job.id, src_ids, ses, task_seed)
                    yield task_args
コード例 #5
0
ファイル: base.py プロジェクト: larsbutler/oq-engine
    def parallelize(self, task_func, task_arg_gen):
        """
        Given a callable and a task arg generator, apply the callable to
        the arguments in parallel. To save memory the tasks are spawned in
        blocks with maximum size defined by the method .concurrent_tasks().
        It is possible to pass a function side_effect(ret) which takes the
        return value of the callable and does something with it, such as
        saving or printing it. The order is not preserved.

        :param task_func: a `celery` task callable
        :param task_args: an iterable over positional arguments

        NB: if the environment variable OQ_NO_DISTRIBUTE is set the
        tasks are run sequentially in the current process.
        """
        taskname = task_func.__name__
        logs.LOG.debug('building arglist')
        arglist = list(task_arg_gen)
        total = len(arglist)
        logs.LOG.progress('spawning %d tasks of kind %s', total, taskname)
        ntasks = 0
        for argblock in general.block_splitter(
                arglist, self.concurrent_tasks()):
            tasks.parallelize(task_func, argblock, lambda _: None)
            ntasks += len(argblock)
            percent = math.ceil(float(ntasks) / total * 100)
            logs.LOG.progress('> %s %3d%% complete', taskname, percent)
コード例 #6
0
ファイル: core.py プロジェクト: 4x/oq-engine
    def task_arg_gen(self, block_size):
        """
        Generate task args for the first phase of the disaggregation
        calculations. This phase is concerned with computing hazard curves,
        which must be completed in full before disaggregation calculation
        can begin.

        See also :meth:`disagg_task_arg_gen`.

        :param int block_size:
            The number of items per task. In this case, this the number of
            sources for hazard curve calc task, or number of sites for disagg
            calc tasks.
        """
        realizations = models.LtRealization.objects.filter(
            hazard_calculation=self.hc, is_complete=False)

        # first, distribute tasks for hazard curve computation
        for lt_rlz in realizations:
            source_progress = models.SourceProgress.objects.filter(
                is_complete=False, lt_realization=lt_rlz).order_by('id')
            source_ids = source_progress.values_list(
                'parsed_source_id', flat=True)

            for block in general_utils.block_splitter(source_ids, block_size):
                # job_id, source id block, lt rlz, calc_type
                yield (self.job.id, block, lt_rlz.id, 'hazard_curve')
コード例 #7
0
    def task_arg_gen(self, block_size, check_num_task=True):
        """
        Loop through realizations and sources to generate a sequence of
        task arg tuples. Each tuple of args applies to a single task.

        For this default implementation, yielded results are triples of
        (job_id, realization_id, source_id_list).

        Override this in subclasses as necessary.

        :param int block_size:
            The (max) number of work items for each each task. In this case,
            sources.
        """
        point_source_block_size = self.point_source_block_size()

        realizations = self._get_realizations()

        n = 0  # number of yielded arguments
        ltp = logictree.LogicTreeProcessor.from_hc(self.hc)

        for lt_rlz in realizations:
            # separate point sources from all the other types, since
            # we distribution point sources in different sized chunks
            # point sources first
            point_source_ids = self._get_point_source_ids(lt_rlz)

            for block in block_splitter(point_source_ids,
                                        point_source_block_size):
                task_args = (self.job.id, block, lt_rlz.id, ltp)
                yield task_args
                n += 1
            # now for area and fault sources
            other_source_ids = self._get_source_ids(lt_rlz)

            for block in block_splitter(other_source_ids, block_size):
                task_args = (self.job.id, block, lt_rlz.id, ltp)
                yield task_args
                n += 1

        # this sanity check should go into a unit test, and will likely
        # go there in the future
        if check_num_task:
            num_tasks = models.JobStats.objects.get(
                oq_job=self.job.id).num_tasks
            assert num_tasks == n, 'Expected %d tasks, got %d' % (num_tasks, n)
コード例 #8
0
ファイル: general.py プロジェクト: ryanberrio/oq-engine
    def task_arg_gen(self, block_size, check_num_task=True):
        """
        Loop through realizations and sources to generate a sequence of
        task arg tuples. Each tuple of args applies to a single task.

        For this default implementation, yielded results are triples of
        (job_id, realization_id, source_id_list).

        Override this in subclasses as necessary.

        :param int block_size:
            The (max) number of work items for each each task. In this case,
            sources.
        """
        point_source_block_size = self.point_source_block_size()

        realizations = self._get_realizations()

        n = 0  # number of yielded arguments
        ltp = logictree.LogicTreeProcessor.from_hc(self.hc)

        for lt_rlz in realizations:
            # separate point sources from all the other types, since
            # we distribution point sources in different sized chunks
            # point sources first
            point_source_ids = self._get_point_source_ids(lt_rlz)

            for block in block_splitter(point_source_ids,
                                        point_source_block_size):
                task_args = (self.job.id, block, lt_rlz.id, ltp)
                yield task_args
                n += 1
            # now for area and fault sources
            other_source_ids = self._get_source_ids(lt_rlz)

            for block in block_splitter(other_source_ids, block_size):
                task_args = (self.job.id, block, lt_rlz.id, ltp)
                yield task_args
                n += 1

        # this sanity check should go into a unit test, and will likely
        # go there in the future
        if check_num_task:
            num_tasks = models.JobStats.objects.get(
                oq_job=self.job.id).num_tasks
            assert num_tasks == n, 'Expected %d tasks, got %d' % (num_tasks, n)
コード例 #9
0
 def test_block_splitter(self):
     expected = [
         [0, 1, 2],
         [3, 4, 5],
         [6, 7, 8],
         [9],
     ]
     actual = [x for x in block_splitter(self.DATA, 3)]
     self.assertEqual(expected, actual)
コード例 #10
0
 def test_block_splitter(self):
     expected = [
         [0, 1, 2],
         [3, 4, 5],
         [6, 7, 8],
         [9],
     ]
     actual = [x for x in block_splitter(self.DATA, 3)]
     self.assertEqual(expected, actual)
コード例 #11
0
 def compute_gmf_arg_gen(self):
     """
     Argument generator for the task compute_gmf. For each SES yields a
     tuple of the form (job_id, params, imt, gsims, ses, site_coll,
     rupture_ids, rupture_seeds).
     """
     rnd = random.Random()
     rnd.seed(self.hc.random_seed)
     site_coll = self.hc.site_collection
     params = dict(
         correl_model=haz_general.get_correl_model(self.hc),
         truncation_level=self.hc.truncation_level,
         maximum_distance=self.hc.maximum_distance)
     for lt_rlz in self._get_realizations():
         ltp = logictree.LogicTreeProcessor.from_hc(self.hc)
         gsims = ltp.parse_gmpe_logictree_path(lt_rlz.gsim_lt_path)
         all_ses = models.SES.objects.filter(
             ses_collection__lt_realization=lt_rlz,
             ordinal__isnull=False).order_by('ordinal')
         for ses in all_ses:
             # count the ruptures in the given SES
             rupture_ids = models.SESRupture.objects.filter(
                 ses=ses).values_list('id', flat=True)
             if not rupture_ids:
                 continue
             # compute the associated seeds
             rupture_seeds = [rnd.randint(0, models.MAX_SINT_32)
                              for _ in range(len(rupture_ids))]
             # splitting on IMTs to generate more tasks and save memory
             for imt in self.hc.intensity_measure_types:
                 if self.hc.ground_motion_correlation_model is None:
                     # we split on sites to avoid running out of memory
                     # on the workers for computations like the full Japan
                     for sites in block_splitter(site_coll, BLOCK_SIZE):
                         yield (self.job.id, params, imt, gsims, ses,
                                models.SiteCollection(sites),
                                rupture_ids, rupture_seeds)
                 else:
                     # we split on ruptures to avoid running out of memory
                     rupt_iter = block_splitter(rupture_ids, BLOCK_SIZE)
                     seed_iter = block_splitter(rupture_seeds, BLOCK_SIZE)
                     for rupts, seeds in zip(rupt_iter, seed_iter):
                         yield (self.job.id, params, imt, gsims, ses,
                                site_coll, rupts, seeds)
コード例 #12
0
 def test_block_splitter_with_iter(self):
     # Test the block with a data set of unknown length
     data = iter(range(10))
     expected = [
         [0, 1, 2],
         [3, 4, 5],
         [6, 7, 8],
         [9],
     ]
     actual = [x for x in block_splitter(data, 3)]
     self.assertEqual(expected, actual)
コード例 #13
0
 def test_block_splitter_with_iter(self):
     # Test the block with a data set of unknown length
     data = iter(range(10))
     expected = [
         [0, 1, 2],
         [3, 4, 5],
         [6, 7, 8],
         [9],
     ]
     actual = [x for x in block_splitter(data, 3)]
     self.assertEqual(expected, actual)
コード例 #14
0
 def test_block_splitter_with_generator(self):
     # Test the block with a data set of unknown length
     # (such as a generator)
     data = xrange(10)
     expected = [
         [0, 1, 2],
         [3, 4, 5],
         [6, 7, 8],
         [9],
     ]
     actual = [x for x in block_splitter(data, 3)]
     self.assertEqual(expected, actual)
コード例 #15
0
 def test_block_splitter_with_generator(self):
     # Test the block with a data set of unknown length
     # (such as a generator)
     data = xrange(10)
     expected = [
         [0, 1, 2],
         [3, 4, 5],
         [6, 7, 8],
         [9],
     ]
     actual = [x for x in block_splitter(data, 3)]
     self.assertEqual(expected, actual)
コード例 #16
0
ファイル: general.py プロジェクト: chenliu0831/oq-engine
    def task_arg_gen(self, block_size):
        """
        Loop through realizations and sources to generate a sequence of
        task arg tuples. Each tuple of args applies to a single task.

        For this default implementation, yielded results are triples of
        (job_id, realization_id, source_id_list).

        Override this in subclasses as necessary.

        :param int block_size:
            The (max) number of work items for each each task. In this case,
            sources.
        """
        point_source_block_size = self.point_source_block_size()

        realizations = self._get_realizations()

        n = 0  # number of yielded arguments
        ltp = logictree.LogicTreeProcessor.from_hc(self.hc)

        for lt_rlz in realizations:
            sm = self.rlz_to_sm[lt_rlz]

            # separate point sources from all the other types, since
            # we distribution point sources in different sized chunks
            # point sources first
            point_sources = self.sources_per_model[sm, 'point']
            for block in block_splitter(point_sources,
                                        point_source_block_size):
                task_args = (self.job.id, block, lt_rlz.id, ltp)
                yield task_args
                n += 1

            # now for area and fault sources
            other_sources = self.sources_per_model[sm, 'other']
            for block in block_splitter(other_sources, block_size):
                task_args = (self.job.id, block, lt_rlz.id, ltp)
                yield task_args
                n += 1
コード例 #17
0
ファイル: general.py プロジェクト: MohsenKohrangi/oq-engine
    def block_split(self, items, max_block_size=MAX_BLOCK_SIZE):
        """
        Split the given items in blocks, depending on the parameter
        concurrent tasks. Notice that in order to save memory there
        is a maximum block size of %d items.

        :param list items: the items to split in blocks
        """ % MAX_BLOCK_SIZE
        assert len(items) > 0, 'No items in %s' % items
        num_rlzs = len(self._get_realizations())
        bs = min(ceil(len(items), ceil(self.concurrent_tasks(), num_rlzs)),
                 max_block_size)
        logs.LOG.warn('Using block size=%d', bs)
        return block_splitter(items, bs)
コード例 #18
0
ファイル: post_processing.py プロジェクト: 4x/oq-engine
def do_hazard_map_post_process(job):
    """
    Create and distribute tasks for processing hazard curves into hazard maps.

    :param job:
        A :class:`openquake.engine.db.models.OqJob` which has some hazard
        curves associated with it.
    """
    logs.LOG.debug('> Post-processing - Hazard Maps')
    block_size = int(config.get('hazard', 'concurrent_tasks'))

    poes = job.hazard_calculation.poes_hazard_maps

    # Stats for debug logging:
    hazard_curve_ids = models.HazardCurve.objects.filter(
        output__oq_job=job).values_list('id', flat=True)
    logs.LOG.debug('num haz curves: %s' % len(hazard_curve_ids))

    # Limit the number of concurrent tasks to the configured concurrency level:
    block_gen = block_splitter(hazard_curve_ids, block_size)
    total_blocks = int(math.ceil(len(hazard_curve_ids) / float(block_size)))

    for i, block in enumerate(block_gen):
        logs.LOG.debug('> Hazard post-processing block, %s of %s'
                       % (i + 1, total_blocks))

        if openquake.engine.no_distribute():
            # just execute the post-processing using the plain function form of
            # the task
            for hazard_curve_id in block:
                hazard_curves_to_hazard_map_task(job.id, hazard_curve_id, poes)
        else:
            tasks = []
            for hazard_curve_id in block:
                tasks.append(hazard_curves_to_hazard_map_task.subtask(
                    (job.id, hazard_curve_id, poes)))
            results = TaskSet(tasks=tasks).apply_async()

            utils_tasks._check_exception(results)

        logs.LOG.debug('< Done Hazard Map post-processing block, %s of %s'
                       % (i + 1, total_blocks))
    logs.LOG.debug('< Done post-processing - Hazard Maps')
コード例 #19
0
ファイル: core.py プロジェクト: Chunghan/oq-engine
    def task_arg_gen(self, block_size):
        """
        Loop through realizations and sources to generate a sequence of
        task arg tuples. Each tuple of args applies to a single task.

        Yielded results are 6-uples of the form (job_id,
        sites, rupture_id, gmf_id, task_seed, realizations)
        (task_seed will be used to seed numpy for temporal occurence sampling).

        :param int block_size:
            The number of work items for each task. Fixed to 1.
        """
        rnd = random.Random()
        rnd.seed(self.hc.random_seed)
        for sites in block_splitter(self.hc.site_collection, BLOCK_SIZE):
            task_seed = rnd.randint(0, models.MAX_SINT_32)
            yield (self.job.id, models.SiteCollection(sites),
                   self.rupture, self.gmf.id, task_seed,
                   self.hc.number_of_ground_motion_fields)
コード例 #20
0
ファイル: core.py プロジェクト: 4x/oq-engine
    def disagg_task_arg_gen(self, block_size):
        """
        Generate task args for the second phase of disaggregation calculations.
        This phase is concerned with computing the disaggregation histograms.

        :param int block_size:
            The number of items per task. In this case, this the number of
            sources for hazard curve calc task, or number of sites for disagg
            calc tasks.
        """
        realizations = models.LtRealization.objects.filter(
            hazard_calculation=self.hc, is_complete=False)

        # then distribute tasks for disaggregation histogram computation
        for lt_rlz in realizations:
            for block in general_utils.block_splitter(self.hc.site_collection,
                                                      block_size):
                # job_id, Site block, lt rlz, calc_type
                yield (self.job.id, block, lt_rlz.id, 'disagg')
コード例 #21
0
ファイル: core.py プロジェクト: Chunghan/oq-engine
    def disagg_task_arg_gen(self, block_size):
        """
        Generate task args for the second phase of disaggregation calculations.
        This phase is concerned with computing the disaggregation histograms.

        :param int block_size:
            The number of items per task. In this case, this the number of
            sources for hazard curve calc task, or number of sites for disagg
            calc tasks.
        """
        realizations = models.LtRealization.objects.filter(
            hazard_calculation=self.hc)

        ltp = logictree.LogicTreeProcessor.from_hc(self.hc)

        # then distribute tasks for disaggregation histogram computation
        for lt_rlz in realizations:
            for sites in general_utils.block_splitter(
                    self.hc.site_collection, block_size):
                yield self.job.id, sites, lt_rlz.id, ltp
コード例 #22
0
    def disagg_task_arg_gen(self, block_size):
        """
        Generate task args for the second phase of disaggregation calculations.
        This phase is concerned with computing the disaggregation histograms.

        :param int block_size:
            The number of items per task. In this case, this the number of
            sources for hazard curve calc task, or number of sites for disagg
            calc tasks.
        """
        realizations = models.LtRealization.objects.filter(
            hazard_calculation=self.hc, is_complete=False)
        ltp = logictree.LogicTreeProcessor.from_hc(self.hc)

        # then distribute tasks for disaggregation histogram computation
        for lt_rlz in realizations:
            for block in general_utils.block_splitter(self.hc.site_collection,
                                                      block_size):
                # job_id, Site block, lt rlz, calc_type
                yield (self.job.id, block, lt_rlz.id, ltp, 'disagg')
コード例 #23
0
ファイル: core.py プロジェクト: luisera/oq-engine
    def task_arg_gen(self):
        """
        Loop through realizations and sources to generate a sequence of
        task arg tuples. Each tuple of args applies to a single task.

        Yielded results are 6-uples of the form (job_id,
        sites, rupture_id, gmf_id, task_seed, realizations, task_no)
        (task_seed will be used to seed numpy for temporal occurence sampling).
        """
        rnd = random.Random()
        rnd.seed(self.hc.random_seed)
        # TODO: fix the block size dependency
        # (https://bugs.launchpad.net/oq-engine/+bug/1225287)
        # then self.block_split can be used, consistently with the
        # other calculators
        blocks = block_splitter(self.hc.site_collection, 1000)
        for task_no, sites in enumerate(blocks):
            task_seed = rnd.randint(0, models.MAX_SINT_32)
            yield (self.job.id, models.SiteCollection(sites),
                   self.rupture, self.gmf.id, task_seed,
                   self.hc.number_of_ground_motion_fields, task_no)
コード例 #24
0
    def task_arg_gen(self, block_size, _check_num_task=True):
        """
        Loop through realizations and sources to generate a sequence of
        task arg tuples. Each tuple of args applies to a single task.

        Yielded results are 6-uples of the form (job_id,
        sites, rupture_id, gmfcoll_id, task_seed, realizations)
        (task_seed will be used to seed numpy for temporal occurence sampling).

        :param int block_size:
            The number of work items for each task. Fixed to 1.
        """
        rnd = random.Random()
        rnd.seed(self.hc.random_seed)

        rupture_id = self.job.parsedrupture.id

        for sites in block_splitter(self.hc.site_collection, BLOCK_SIZE):
            task_seed = rnd.randint(0, models.MAX_SINT_32)
            yield (self.job.id, models.SiteCollection(sites),
                   rupture_id, self.gmfcoll.id, task_seed,
                   self.hc.number_of_ground_motion_fields)
コード例 #25
0
ファイル: core.py プロジェクト: xpb/oq-engine
    def task_arg_gen(self, block_size):
        """
        Loop through realizations and sources to generate a sequence of
        task arg tuples. Each tuple of args applies to a single task.

        Yielded results are 6-uples of the form (job_id,
        sites, rupture_id, output_id, task_seed, realizations)
        (task_seed will be used to seed numpy for temporal occurence sampling).

        :param int block_size:
            The number of work items for each task. Fixed to 1.
        """
        rnd = random.Random()
        rnd.seed(self.hc.random_seed)

        inp = models.inputs4hcalc(self.hc.id, 'rupture_model')[0]
        ruptures = models.ParsedRupture.objects.filter(input__id=inp.id)
        rupture_id = [rupture.id for rupture in ruptures][0]  # only one
        for sites in block_splitter(self.hc.site_collection, BLOCK_SIZE):
            task_seed = rnd.randint(0, MAX_SINT_32)
            yield (self.job.id, SiteCollection(sites),
                   rupture_id, self.output.id, task_seed,
                   self.hc.number_of_ground_motion_fields)
コード例 #26
0
ファイル: post_processing.py プロジェクト: 4x/oq-engine
def do_post_process(job):
    """
    Run the GMF to hazard curve post-processing tasks for the given ``job``.

    :param job:
        A :class:`openquake.engine.db.models.OqJob` instance.
    """
    logs.LOG.debug('> Post-processing - GMFs to Hazard Curves')
    block_size = int(config.get('hazard', 'concurrent_tasks'))
    block_gen = block_splitter(gmf_post_process_arg_gen(job), block_size)

    hc = job.hazard_calculation

    # Stats for debug logging:
    n_imts = len(hc.intensity_measure_types_and_levels)
    n_sites = len(hc.points_to_compute())
    n_rlzs = models.LtRealization.objects.filter(hazard_calculation=hc).count()
    total_blocks = int(math.ceil(
        (n_imts * n_sites * n_rlzs) / float(block_size)))

    for i, block in enumerate(block_gen):
        logs.LOG.debug('> GMF post-processing block, %s of %s'
                       % (i + 1, total_blocks))

        # Run the tasks in blocks, to avoid overqueueing:
        tasks = []
        for the_args in block:
            tasks.append(gmf_to_hazard_curve_task.subtask(the_args))
        results = TaskSet(tasks=tasks).apply_async()

        # Check for Exceptions in the results and raise
        utils_tasks._check_exception(results)

        logs.LOG.debug('< Done GMF post-processing block, %s of %s'
                       % (i + 1, total_blocks))
    logs.LOG.debug('< Done post-processing - GMFs to Hazard Curves')
コード例 #27
0
 def test_block_splitter_block_size_lt_zero(self):
     gen = block_splitter(self.DATA, -1)
     self.assertRaises(ValueError, gen.next)
コード例 #28
0
 def test_block_splitter_block_size_gt_data_len(self):
     expected = [self.DATA]
     actual = [x for x in block_splitter(self.DATA, 11)]
     self.assertEqual(expected, actual)
コード例 #29
0
 def test_block_splitter_block_size_lt_zero(self):
     gen = block_splitter(self.DATA, -1)
     self.assertRaises(ValueError, gen.next)
コード例 #30
0
 def test_block_splitter_zero_block_size(self):
     gen = block_splitter(self.DATA, 0)
     self.assertRaises(ValueError, gen.next)
コード例 #31
0
 def test_block_splitter_block_size_gt_data_len(self):
     expected = [self.DATA]
     actual = [x for x in block_splitter(self.DATA, 11)]
     self.assertEqual(expected, actual)
コード例 #32
0
ファイル: general.py プロジェクト: MohsenKohrangi/oq-engine
    def do_aggregate_post_proc(self):
        """
        Grab hazard data for all realizations and sites from the database and
        compute mean and/or quantile aggregates (depending on which options are
        enabled in the calculation).

        Post-processing results will be stored directly into the database.
        """
        num_rlzs = models.LtRealization.objects.filter(
            lt_model__hazard_calculation=self.hc).count()

        num_site_blocks_per_incr = int(CURVE_CACHE_SIZE) / int(num_rlzs)
        if num_site_blocks_per_incr == 0:
            # This means we have `num_rlzs` >= `CURVE_CACHE_SIZE`.
            # The minimum number of sites should be 1.
            num_site_blocks_per_incr = 1
        slice_incr = num_site_blocks_per_incr * num_rlzs  # unit: num records

        if self.hc.mean_hazard_curves:
            # create a new `HazardCurve` 'container' record for mean
            # curves (virtual container for multiple imts)
            models.HazardCurve.objects.create(
                output=models.Output.objects.create_output(
                    self.job, "mean-curves-multi-imt",
                    "hazard_curve_multi"),
                statistics="mean",
                imt=None,
                investigation_time=self.hc.investigation_time)

        if self.hc.quantile_hazard_curves:
            for quantile in self.hc.quantile_hazard_curves:
                # create a new `HazardCurve` 'container' record for quantile
                # curves (virtual container for multiple imts)
                models.HazardCurve.objects.create(
                    output=models.Output.objects.create_output(
                        self.job, 'quantile(%s)-curves' % quantile,
                        "hazard_curve_multi"),
                    statistics="quantile",
                    imt=None,
                    quantile=quantile,
                    investigation_time=self.hc.investigation_time)

        for imt, imls in self.hc.intensity_measure_types_and_levels.items():
            im_type, sa_period, sa_damping = from_string(imt)

            # prepare `output` and `hazard_curve` containers in the DB:
            container_ids = dict()
            if self.hc.mean_hazard_curves:
                mean_output = models.Output.objects.create_output(
                    job=self.job,
                    display_name='Mean Hazard Curves %s' % imt,
                    output_type='hazard_curve'
                )
                mean_hc = models.HazardCurve.objects.create(
                    output=mean_output,
                    investigation_time=self.hc.investigation_time,
                    imt=im_type,
                    imls=imls,
                    sa_period=sa_period,
                    sa_damping=sa_damping,
                    statistics='mean'
                )
                container_ids['mean'] = mean_hc.id

            if self.hc.quantile_hazard_curves:
                for quantile in self.hc.quantile_hazard_curves:
                    q_output = models.Output.objects.create_output(
                        job=self.job,
                        display_name=(
                            '%s quantile Hazard Curves %s' % (quantile, imt)
                        ),
                        output_type='hazard_curve'
                    )
                    q_hc = models.HazardCurve.objects.create(
                        output=q_output,
                        investigation_time=self.hc.investigation_time,
                        imt=im_type,
                        imls=imls,
                        sa_period=sa_period,
                        sa_damping=sa_damping,
                        statistics='quantile',
                        quantile=quantile
                    )
                    container_ids['q%s' % quantile] = q_hc.id

            all_curves_for_imt = models.order_by_location(
                models.HazardCurveData.objects.all_curves_for_imt(
                    self.job.id, im_type, sa_period, sa_damping))

            with transaction.commit_on_success(using='job_init'):
                inserter = writer.CacheInserter(
                    models.HazardCurveData, CURVE_CACHE_SIZE)

                for chunk in models.queryset_iter(all_curves_for_imt,
                                                  slice_incr):
                    # slice each chunk by `num_rlzs` into `site_chunk`
                    # and compute the aggregate
                    for site_chunk in block_splitter(chunk, num_rlzs):
                        site = site_chunk[0].location
                        curves_poes = [x.poes for x in site_chunk]
                        curves_weights = [x.weight for x in site_chunk]

                        # do means and quantiles
                        # quantiles first:
                        if self.hc.quantile_hazard_curves:
                            for quantile in self.hc.quantile_hazard_curves:
                                if self.hc.number_of_logic_tree_samples == 0:
                                    # explicitly weighted quantiles
                                    q_curve = weighted_quantile_curve(
                                        curves_poes, curves_weights, quantile
                                    )
                                else:
                                    # implicitly weighted quantiles
                                    q_curve = quantile_curve(
                                        curves_poes, quantile
                                    )
                                inserter.add(
                                    models.HazardCurveData(
                                        hazard_curve_id=(
                                            container_ids['q%s' % quantile]),
                                        poes=q_curve.tolist(),
                                        location=site.wkt)
                                )

                        # then means
                        if self.hc.mean_hazard_curves:
                            m_curve = mean_curve(
                                curves_poes, weights=curves_weights
                            )
                            inserter.add(
                                models.HazardCurveData(
                                    hazard_curve_id=container_ids['mean'],
                                    poes=m_curve.tolist(),
                                    location=site.wkt)
                            )
                inserter.flush()
コード例 #33
0
 def test_block_splitter_zero_block_size(self):
     gen = block_splitter(self.DATA, 0)
     self.assertRaises(ValueError, gen.next)
コード例 #34
0
    def do_aggregate_post_proc(self):
        """
        Grab hazard data for all realizations and sites from the database and
        compute mean and/or quantile aggregates (depending on which options are
        enabled in the calculation).

        Post-processing results will be stored directly into the database.
        """
        num_rlzs = models.LtRealization.objects.filter(
            hazard_calculation=self.hc).count()

        num_site_blocks_per_incr = int(CURVE_CACHE_SIZE) / int(num_rlzs)
        if num_site_blocks_per_incr == 0:
            # This means we have `num_rlzs` >= `CURVE_CACHE_SIZE`.
            # The minimum number of sites should be 1.
            num_site_blocks_per_incr = 1
        slice_incr = num_site_blocks_per_incr * num_rlzs  # unit: num records

        if self.hc.mean_hazard_curves:
            # create a new `HazardCurve` 'container' record for mean
            # curves (virtual container for multiple imts)
            models.HazardCurve.objects.create(
                output=models.Output.objects.create_output(
                    self.job, "mean-curves-multi-imt", "hazard_curve_multi"),
                statistics="mean",
                imt=None,
                investigation_time=self.hc.investigation_time)

        if self.hc.quantile_hazard_curves:
            for quantile in self.hc.quantile_hazard_curves:
                # create a new `HazardCurve` 'container' record for quantile
                # curves (virtual container for multiple imts)
                models.HazardCurve.objects.create(
                    output=models.Output.objects.create_output(
                        self.job, 'quantile(%s)-curves' % quantile,
                        "hazard_curve_multi"),
                    statistics="quantile",
                    imt=None,
                    quantile=quantile,
                    investigation_time=self.hc.investigation_time)

        for imt, imls in self.hc.intensity_measure_types_and_levels.items():
            im_type, sa_period, sa_damping = models.parse_imt(imt)

            # prepare `output` and `hazard_curve` containers in the DB:
            container_ids = dict()
            if self.hc.mean_hazard_curves:
                mean_output = models.Output.objects.create_output(
                    job=self.job,
                    display_name='mean-curves-%s' % imt,
                    output_type='hazard_curve')
                mean_hc = models.HazardCurve.objects.create(
                    output=mean_output,
                    investigation_time=self.hc.investigation_time,
                    imt=im_type,
                    imls=imls,
                    sa_period=sa_period,
                    sa_damping=sa_damping,
                    statistics='mean')
                container_ids['mean'] = mean_hc.id

            if self.hc.quantile_hazard_curves:
                for quantile in self.hc.quantile_hazard_curves:
                    q_output = models.Output.objects.create_output(
                        job=self.job,
                        display_name=('quantile(%s)-curves-%s' %
                                      (quantile, imt)),
                        output_type='hazard_curve')
                    q_hc = models.HazardCurve.objects.create(
                        output=q_output,
                        investigation_time=self.hc.investigation_time,
                        imt=im_type,
                        imls=imls,
                        sa_period=sa_period,
                        sa_damping=sa_damping,
                        statistics='quantile',
                        quantile=quantile)
                    container_ids['q%s' % quantile] = q_hc.id

            all_curves_for_imt = models.order_by_location(
                models.HazardCurveData.objects.all_curves_for_imt(
                    self.job.id, im_type, sa_period, sa_damping))

            with transaction.commit_on_success(using='reslt_writer'):
                inserter = writer.CacheInserter(models.HazardCurveData,
                                                CURVE_CACHE_SIZE)

                for chunk in models.queryset_iter(all_curves_for_imt,
                                                  slice_incr):
                    # slice each chunk by `num_rlzs` into `site_chunk`
                    # and compute the aggregate
                    for site_chunk in block_splitter(chunk, num_rlzs):
                        site = site_chunk[0].location
                        curves_poes = [x.poes for x in site_chunk]
                        curves_weights = [x.weight for x in site_chunk]

                        # do means and quantiles
                        # quantiles first:
                        if self.hc.quantile_hazard_curves:
                            for quantile in self.hc.quantile_hazard_curves:
                                if self.hc.number_of_logic_tree_samples == 0:
                                    # explicitly weighted quantiles
                                    q_curve = weighted_quantile_curve(
                                        curves_poes, curves_weights, quantile)
                                else:
                                    # implicitly weighted quantiles
                                    q_curve = quantile_curve(
                                        curves_poes, quantile)
                                inserter.add(
                                    models.HazardCurveData(
                                        hazard_curve_id=(
                                            container_ids['q%s' % quantile]),
                                        poes=q_curve.tolist(),
                                        location=site.wkt))

                        # then means
                        if self.hc.mean_hazard_curves:
                            m_curve = mean_curve(curves_poes,
                                                 weights=curves_weights)
                            inserter.add(
                                models.HazardCurveData(
                                    hazard_curve_id=container_ids['mean'],
                                    poes=m_curve.tolist(),
                                    location=site.wkt))
                inserter.flush()