Beispiel #1
0
 def test_split_in_blocks(self):
     weigths = dict([('a', 11), ('b', 10), ('c', 100), ('d', 15), ('e', 20),
                     ('f', 5), ('g', 30), ('h', 17), ('i', 25)])
     blocks = list(split_in_blocks('abcdefghi', 1, weigths.get))
     self.assertEqual(len(blocks), 1)
     blocks = list(split_in_blocks('abcdefghi', 2, weigths.get))
     self.assertEqual(len(blocks), 3)
     self.assertEqual(repr(blocks), "[<WeightedSequence ['a', 'b'], weight=21>, <WeightedSequence ['c', 'd'], weight=115>, <WeightedSequence ['e', 'f', 'g', 'h', 'i'], weight=97>]")
Beispiel #2
0
 def test_split_in_blocks(self):
     weigths = dict([('a', 11), ('b', 10), ('c', 100), ('d', 15), ('e', 20),
                     ('f', 5), ('g', 30), ('h', 17), ('i', 25)])
     blocks = list(split_in_blocks('abcdefghi', 1, weigths.get))
     self.assertEqual(len(blocks), 1)
     blocks = list(split_in_blocks('abcdefghi', 2, weigths.get))
     self.assertEqual(len(blocks), 3)
     self.assertEqual(
         repr(blocks),
         "[<WeightedSequence ['a', 'b'], weight=21>, <WeightedSequence ['c', 'd'], weight=115>, <WeightedSequence ['e', 'f', 'g', 'h', 'i'], weight=97>]"
     )
Beispiel #3
0
def apply_reduce(task, task_args,
                 agg=lambda a, x: x, acc=None,
                 concurrent_tasks=CONCURRENT_TASKS,
                 weight=lambda item: 1,
                 key=lambda item: 'Unspecified'):
    """
    Apply a task to a tuple of the form (job_id, data, *args)
    by splitting the data in chunks and reduce the results with an
    aggregation function.

    :param task: an oqtask
    :param task_args: the arguments to be passed to the task function
    :param agg: the aggregation function
    :param acc: initial value of the accumulator
    :param concurrent_tasks: hint about how many tasks to generate
    :param weight: function to extract the weight of an item in data
    :param key: function to extract the kind of an item in data
    """
    job_id = task_args[0]
    data = task_args[1]
    args = task_args[2:]
    if not data:
        return acc
    elif len(data) == 1 or not concurrent_tasks:
        return agg(acc, task.task_func(job_id, data, *args))
    blocks = split_in_blocks(data, concurrent_tasks, weight, key)
    alldata = [(job_id, block) + args for block in blocks]
    return map_reduce(task, alldata, agg, acc)
Beispiel #4
0
def split_site_collection(sitecol, num_chunks):
    """
    Split the full site collection in several FilteredSiteCollections

    :param sitecol: full site collection
    :param num_chunks: hint for the number of blocks to generate
    """
    for indices in split_in_blocks(sitecol.indices, num_chunks):
        yield FilteredSiteCollection(indices, sitecol)
Beispiel #5
0
 def task_arg_gen(self):
     """
     Yield a tuple of the form (job_id, sitecol, rupture_id, gmf_id,
     task_seed, num_realizations). `task_seed` will be used to seed
     numpy for temporal occurence sampling. Only a single task
     will be generated which is fine since the computation is fast
     anyway.
     """
     ses_ruptures = models.SESRupture.objects.filter(
         rupture__ses_collection=self.ses_coll.id)
     for ruptures in split_in_blocks(ses_ruptures, self.concurrent_tasks):
         yield self.job.id, ruptures, self.sites, self.gmf.id
Beispiel #6
0
    def split(self, hint):
        """
        Split the sources in a number of blocks close to the given `hint`.

        :param int hint: hint for the number of blocks
        """
        if self.sources:
            for block in split_in_blocks(
                    self.sources, hint,
                    self.weight.__getitem__,
                    self.trt_model.__getitem__):
                trt_model = self.trt_model[block[0]]
                yield trt_model, block
Beispiel #7
0
    def test_split_with_kind(self):
        Source = namedtuple('Source', 'typology, weight')
        s1 = Source('point', 1)
        s2 = Source('point', 1)
        s3 = Source('area', 2)
        s4 = Source('area', 4)
        s5 = Source('area', 4)
        blocks = list(
            block_splitter([s1, s2, s3, s4, s5],
                           max_weight=6,
                           weight=attrgetter('weight'),
                           kind=attrgetter('typology')))
        self.assertEqual(map(len, blocks), [2, 2, 1])
        self.assertEqual([b.weight for b in blocks], [2, 6, 4])

        blocks = list(
            split_in_blocks([s1, s2, s3, s4, s5],
                            hint=6,
                            weight=attrgetter('weight'),
                            kind=attrgetter('typology')))
        self.assertEqual(map(len, blocks), [2, 1, 1, 1])
        self.assertEqual([b.weight for b in blocks], [2, 2, 4, 4])
Beispiel #8
0
    def generate_gmfs(self):
        """
        Generate the GMFs and optionally the hazard curves too
        """
        sitecol = self.hc.site_collection
        otm = tasks.OqTaskManager(compute_and_save_gmfs, logs.LOG.progress)
        task_no = 0
        rupture_data = []
        for rupture in models.ProbabilisticRupture.objects.filter(
                trt_model__lt_model__hazard_calculation=self.hc
                ).order_by('trt_model'):
            rdata = RuptureData(
                self.hc.site_collection, rupture,
                [(r.id, r.seed) for r in rupture.sesrupture_set.all()])
            rupture_data.append(rdata)

        for rblock in split_in_blocks(
                rupture_data, self.concurrent_tasks,
                RuptureData.get_weight, RuptureData.get_trt):
            otm.submit(self.job.id, sitecol.sids, rblock, task_no)
            task_no += 1
        otm.aggregate_results(self.agg_curves, self.curves)
Beispiel #9
0
def parallel_apply(task, task_args,
                   concurrent_tasks=multiprocessing.cpu_count(),
                   weight=lambda item: 1, kind=lambda item: 'Unspecified'):
    """
    Apply a list processing task to a tuple of task_args
    with the form (job_id, data, *args).
    Return the list of processed data.

    :param task: an oqtask
    :param task_args: the arguments to be passed to the task function
    :param concurrent_tasks: hint about how many tasks to generate
    :param weight: function to extract the weight of an item in data
    :param kind: function to extract the kind of an item in data
    """
    job_id = task_args[0]
    data = task_args[1]
    args = task_args[2:]
    if not data:
        return []
    elif len(data) == 1:
        return task.task_func(job_id, data, *args)
    blocks = split_in_blocks(data, concurrent_tasks, weight, kind)
    alldata = [(job_id, block) + args for block in blocks]
    return map_reduce(task, alldata, list.__add__, [])