Ejemplo n.º 1
0
 def test_split_in_blocks(self):
     weights = dict([('a', 11), ('b', 10), ('c', 100), ('d', 15), ('e', 20),
                     ('f', 5), ('g', 30), ('h', 17), ('i', 25)])
     blocks = list(split_in_blocks('abcdefghi', 1, weights.get))
     self.assertEqual(len(blocks), 1)
     blocks = list(split_in_blocks('abcdefghi', 2, weights.get))
     self.assertEqual(len(blocks), 3)
     self.assertEqual(repr(blocks), "[<WeightedSequence ['a', 'b'], weight=21>, <WeightedSequence ['c', 'd'], weight=115>, <WeightedSequence ['e', 'f', 'g', 'h', 'i'], weight=97>]")
Ejemplo n.º 2
0
 def test_split_in_blocks(self):
     weights = dict(
         [("a", 11), ("b", 10), ("c", 100), ("d", 15), ("e", 20), ("f", 5), ("g", 30), ("h", 17), ("i", 25)]
     )
     blocks = list(split_in_blocks("abcdefghi", 1, weights.get))
     self.assertEqual(len(blocks), 1)
     blocks = list(split_in_blocks("abcdefghi", 2, weights.get))
     self.assertEqual(len(blocks), 3)
     self.assertEqual(
         repr(blocks),
         "[<WeightedSequence ['a', 'b'], weight=21>, <WeightedSequence ['c', 'd'], weight=115>, <WeightedSequence ['e', 'f', 'g', 'h', 'i'], weight=97>]",
     )
Ejemplo n.º 3
0
def get_data_transfer(dstore):
    """
    Determine the amount of data transferred from the controller node
    to the workers and back in a classical calculation.

    :param dstore: a :class:`openquake.commonlib.datastore.DataStore` instance
    :returns: (block_info, to_send_forward, to_send_back)
    """
    oqparam = OqParam.from_(dstore.attrs)
    sitecol = dstore['sitecol']
    rlzs_assoc = dstore['rlzs_assoc']
    info = dstore['job_info']
    sources = dstore['composite_source_model'].get_sources()
    num_gsims_by_trt = groupby(rlzs_assoc, operator.itemgetter(0),
                               lambda group: sum(1 for row in group))
    gsims_assoc = rlzs_assoc.gsims_by_trt_id
    to_send_forward = 0
    to_send_back = 0
    block_info = []
    for block in split_in_blocks(sources, oqparam.concurrent_tasks or 1,
                                 operator.attrgetter('weight'),
                                 operator.attrgetter('trt_model_id')):
        num_gsims = num_gsims_by_trt.get(block[0].trt_model_id, 0)
        back = info['n_sites'] * info['n_levels'] * info['n_imts'] * num_gsims
        to_send_back += back * 8  # 8 bytes per float
        args = (block, sitecol, gsims_assoc, PerformanceMonitor(''))
        to_send_forward += sum(len(p) for p in parallel.pickle_sequence(args))
        block_info.append((len(block), block.weight))
    return numpy.array(block_info, block_dt), to_send_forward, to_send_back
Ejemplo n.º 4
0
    def build_inputs_from_ruptures(
        self, grp_trt, imts, sitecol, all_ruptures, trunc_level, correl_model, min_iml, eps, hint
    ):
        """
        :param imts: list of intensity measure type strings
        :param sitecol: a SiteCollection instance
        :param all_ruptures: the complete list of EBRupture instances
        :param trunc_level: the truncation level (or None)
        :param correl_model: the correlation model (or None)
        :param min_iml: an array of minimum IMLs per IMT
        :param eps: a matrix of epsilons of shape (N, E) or None
        :param hint: hint for how many blocks to generate

        Yield :class:`RiskInputFromRuptures` instances.
        """
        by_grp_id = operator.attrgetter("grp_id")
        start = 0
        for ses_ruptures in split_in_blocks(
            all_ruptures, hint or 1, key=by_grp_id, weight=operator.attrgetter("weight")
        ):
            grp_id = ses_ruptures[0].grp_id
            num_events = sum(sr.multiplicity for sr in ses_ruptures)
            idxs = numpy.arange(start, start + num_events)
            start += num_events
            yield RiskInputFromRuptures(
                grp_trt[grp_id],
                imts,
                sitecol,
                ses_ruptures,
                trunc_level,
                correl_model,
                min_iml,
                eps[:, idxs] if eps is not None else None,
            )
Ejemplo n.º 5
0
 def gen_args(self, ebruptures):
     """
     :param ebruptures: a list of EBRupture objects to be split
     :yields: the arguments for compute_gmfs_and_curves
     """
     oq = self.oqparam
     monitor = self.monitor(self.core_task.__name__)
     monitor.oqparam = oq
     imts = list(oq.imtls)
     min_iml = calc.fix_minimum_intensity(oq.minimum_intensity, imts)
     grp_trt = {sg.id: sg.trt for sm in self.csm.info.source_models
                for sg in sm.src_groups}
     rlzs_by_grp = self.rlzs_assoc.get_rlzs_by_grp_id()
     correl_model = oq.get_correl_model()
     for block in split_in_blocks(
             ebruptures, oq.concurrent_tasks or 1,
             key=operator.attrgetter('grp_id')):
         grp_id = block[0].grp_id
         trt = grp_trt[grp_id]
         gsims = [dic[trt] for dic in self.rlzs_assoc.gsim_by_trt]
         samples = self.rlzs_assoc.samples[grp_id]
         getter = GmfGetter(gsims, block, self.sitecol,
                            imts, min_iml, oq.truncation_level,
                            correl_model, samples)
         yield getter, rlzs_by_grp[grp_id], monitor
Ejemplo n.º 6
0
    def execute(self):
        """
        Split the computation by tiles which are run in parallel.
        """
        monitor = self.monitor(self.core_func.__name__)
        monitor.oqparam = oq = self.oqparam
        self.tiles = split_in_blocks(
            self.sitecol, self.oqparam.concurrent_tasks or 1)
        oq.concurrent_tasks = 0
        calculator = ClassicalCalculator(
            self.oqparam, monitor, persistent=False)
        calculator.csm = self.csm
        rlzs_assoc = self.csm.get_rlzs_assoc()
        self.rlzs_assoc = calculator.rlzs_assoc = rlzs_assoc

        # parallelization
        all_args = []
        position = 0
        for (i, tile) in enumerate(self.tiles):
            all_args.append((calculator, SiteCollection(tile),
                             position, i, monitor))
            position += len(tile)
        acc = {trt_gsim: zero_curves(len(self.sitecol), oq.imtls)
               for trt_gsim in calculator.rlzs_assoc}
        acc['calc_times'] = []
        return parallel.starmap(classical_tiling, all_args).reduce(
            agg_curves_by_trt_gsim, acc)
Ejemplo n.º 7
0
    def apply(cls, task, task_args,
              concurrent_tasks=executor.num_tasks_hint,
              maxweight=None,
              weight=lambda item: 1,
              key=lambda item: 'Unspecified',
              name=None):
        """
        Apply a task to a tuple of the form (sequence, \*other_args)
        by first splitting the sequence in chunks, according to the weight
        of the elements and possibly to a key (see :function:
        `openquake.baselib.general.split_in_blocks`).
        Then reduce the results with an aggregation function.
        The chunks which are generated internally can be seen directly (
        useful for debugging purposes) by looking at the attribute `._chunks`,
        right after the `apply` function has been called.

        :param task: a task to run in parallel
        :param task_args: the arguments to be passed to the task function
        :param agg: the aggregation function
        :param acc: initial value of the accumulator (default empty AccumDict)
        :param concurrent_tasks: hint about how many tasks to generate
        :param maxweight: if not None, used to split the tasks
        :param weight: function to extract the weight of an item in arg0
        :param key: function to extract the kind of an item in arg0
        """
        arg0 = task_args[0]  # this is assumed to be a sequence
        args = task_args[1:]
        if maxweight:
            chunks = block_splitter(arg0, maxweight, weight, key)
        else:
            chunks = split_in_blocks(arg0, concurrent_tasks or 1, weight, key)
        return cls.starmap(task, [(chunk,) + args for chunk in chunks], name)
Ejemplo n.º 8
0
def apply_reduce(task, task_args,
                 agg=operator.add,
                 acc=None,
                 concurrent_tasks=CONCURRENT_TASKS,
                 weight=lambda item: 1,
                 key=lambda item: 'Unspecified',
                 name=None):
    """
    Apply a task to a tuple of the form (job_id, data, *args)
    by splitting the data in chunks and reduce the results with an
    aggregation function.

    :param task: an oqtask
    :param task_args: the arguments to be passed to the task function
    :param agg: the aggregation function
    :param acc: initial value of the accumulator
    :param concurrent_tasks: hint about how many tasks to generate
    :param weight: function to extract the weight of an item in data
    :param key: function to extract the kind of an item in data
    """
    if acc is None:
        acc = AccumDict()
    job_id = task_args[0]
    data = task_args[1]
    args = task_args[2:]
    if not data:
        return acc
    elif len(data) == 1 or not concurrent_tasks:
        return agg(acc, task.task_func(job_id, data, *args))
    blocks = split_in_blocks(data, concurrent_tasks, weight, key)
    task_args = [(job_id, block) + args for block in blocks]
    return starmap(task, task_args, logs.LOG.progress, name).reduce(agg, acc)
Ejemplo n.º 9
0
    def build_inputs_from_ruptures(
            self, sitecol, all_ruptures, trunc_level, correl_model,
            min_iml, eps, hint):
        """
        :param sitecol: a SiteCollection instance
        :param all_ruptures: the complete list of EBRupture instances
        :param trunc_level: the truncation level (or None)
        :param correl_model: the correlation model (or None)
        :param min_iml: an array of minimum IMLs per IMT
        :param eps: a matrix of epsilons of shape (N, E) or None
        :param hint: hint for how many blocks to generate

        Yield :class:`RiskInputFromRuptures` instances.
        """
        imt_taxonomies = self.get_imt_taxonomies()
        by_trt_id = operator.attrgetter('trt_id')
        for ses_ruptures in split_in_blocks(
                all_ruptures, hint or 1, key=by_trt_id,
                weight=operator.attrgetter('weight')):
            eids = []
            for sr in ses_ruptures:
                eids.extend(sr.events['eid'])
            yield RiskInputFromRuptures(
                imt_taxonomies, sitecol, ses_ruptures,
                trunc_level, correl_model, min_iml,
                eps[:, eids] if eps is not None else None, eids)
Ejemplo n.º 10
0
    def apply(cls, task, task_args,
              concurrent_tasks=executor.num_tasks_hint,
              maxweight=None,
              weight=lambda item: 1,
              key=lambda item: 'Unspecified',
              name=None):
        """
        Apply a task to a tuple of the form (sequence, \*other_args)
        by first splitting the sequence in chunks, according to the weight
        of the elements and possibly to a key (see :func:
        `openquake.baselib.general.split_in_blocks`).

        :param task: a task to run in parallel
        :param task_args: the arguments to be passed to the task function
        :param agg: the aggregation function
        :param acc: initial value of the accumulator (default empty AccumDict)
        :param concurrent_tasks: hint about how many tasks to generate
        :param maxweight: if not None, used to split the tasks
        :param weight: function to extract the weight of an item in arg0
        :param key: function to extract the kind of an item in arg0
        """
        arg0 = task_args[0]  # this is assumed to be a sequence
        args = task_args[1:]
        if maxweight:
            chunks = block_splitter(arg0, maxweight, weight, key)
        else:
            chunks = split_in_blocks(arg0, concurrent_tasks or 1, weight, key)
        return cls(task, [(chunk,) + args for chunk in chunks], name)
Ejemplo n.º 11
0
def data_transfer(calc):
    """
    Determine the amount of data transferred from the controller node
    to the workers and back in a classical calculation.

    :returns: a triple (num_tasks, to_send_forward, to_send_back)
    """
    oqparam = calc.oqparam
    info = calc.job_info
    calc.monitor.oqparam = oqparam
    sources = calc.composite_source_model.get_sources()
    num_gsims_by_trt = groupby(calc.rlzs_assoc, operator.itemgetter(0),
                               lambda group: sum(1 for row in group))
    gsims_assoc = calc.rlzs_assoc.get_gsims_by_trt_id()
    to_send_forward = 0
    to_send_back = 0
    n_tasks = 0
    for block in split_in_blocks(sources, oqparam.concurrent_tasks,
                                 operator.attrgetter('weight'),
                                 operator.attrgetter('trt_model_id')):
        num_gsims = num_gsims_by_trt[block[0].trt_model_id]
        back = info['n_sites'] * info['n_levels'] * info['n_imts'] * num_gsims
        to_send_back += back * 8  # 8 bytes per float
        args = (block, calc.sitecol, gsims_assoc, calc.monitor)
        logging.info('Pickling task args #%d', n_tasks)
        to_send_forward += sum(len(p) for p in parallel.pickle_sequence(args))
        n_tasks += 1
    return n_tasks, to_send_forward, to_send_back
Ejemplo n.º 12
0
 def gen_args(self, ruptures_by_grp):
     """
     :param ruptures_by_grp: a dictionary of EBRupture objects
     :yields: the arguments for compute_gmfs_and_curves
     """
     oq = self.oqparam
     monitor = self.monitor(self.core_task.__name__)
     monitor.oqparam = oq
     imts = list(oq.imtls)
     min_iml = calc.fix_minimum_intensity(oq.minimum_intensity, imts)
     self.grp_trt = self.csm.info.grp_trt()
     rlzs_by_grp = self.rlzs_assoc.get_rlzs_by_grp_id()
     correl_model = oq.get_correl_model()
     for grp_id in ruptures_by_grp:
         ruptures = ruptures_by_grp[grp_id]
         if not ruptures:
             continue
         for block in split_in_blocks(ruptures, oq.concurrent_tasks or 1):
             trt = self.grp_trt[grp_id]
             gsims = [dic[trt] for dic in self.rlzs_assoc.gsim_by_trt]
             samples = self.rlzs_assoc.samples[grp_id]
             getter = GmfGetter(gsims, block, self.sitecol,
                                imts, min_iml, oq.truncation_level,
                                correl_model, samples)
             yield getter, rlzs_by_grp[grp_id], monitor
Ejemplo n.º 13
0
    def split_in_tiles(self, hint):
        """
        Split a SiteCollection into a set of tiles (SiteCollection instances).

        :param hint: hint for how many tiles to generate
        """
        tiles = []
        for seq in split_in_blocks(range(len(self)), hint or 1):
            sc = SiteCollection.__new__(SiteCollection)
            sc.array = self.array[numpy.array(seq, int)]
            tiles.append(sc)
        return tiles
Ejemplo n.º 14
0
    def build_riskinputs(self, hazards_by_key, eps=numpy.zeros(0)):
        """
        :param hazards_by_key:
            a dictionary key -> IMT -> array of length num_sites
        :param eps:
            a matrix of epsilons (possibly empty)
        :returns:
            a list of RiskInputs objects, sorted by IMT.
        """
        self.check_poes(hazards_by_key)
        imtls = self.oqparam.imtls
        if not set(self.oqparam.risk_imtls) & set(imtls):
            rsk = ', '.join(self.oqparam.risk_imtls)
            haz = ', '.join(imtls)
            raise ValueError('The IMTs in the risk models (%s) are disjoint '
                             "from the IMTs in the hazard (%s)" % (rsk, haz))
        num_tasks = math.ceil((self.oqparam.concurrent_tasks or 1) /
                              len(imtls))
        with self.monitor('building riskinputs', autoflush=True):
            riskinputs = []
            idx_weight_pairs = [
                (i, len(assets))
                for i, assets in enumerate(self.assets_by_site)]
            blocks = general.split_in_blocks(
                idx_weight_pairs, num_tasks, weight=operator.itemgetter(1))
            for block in blocks:
                indices = numpy.array([idx for idx, _weight in block])
                reduced_assets = self.assets_by_site[indices]
                # dictionary of epsilons for the reduced assets
                reduced_eps = collections.defaultdict(F32)
                if len(eps):
                    for assets in reduced_assets:
                        for asset in assets:
                            reduced_eps[asset.ordinal] = eps[asset.ordinal]

                # collect the hazards by key into hazards by imt
                hdata = collections.defaultdict(lambda: [{} for _ in indices])
                for key, hazards_by_imt in hazards_by_key.items():
                    for imt in imtls:
                        hazards_by_site = hazards_by_imt[imt]
                        for i, haz in enumerate(hazards_by_site[indices]):
                            hdata[imt][i][key] = haz
                # build the riskinputs
                for imt in hdata:
                    ri = self.riskmodel.build_input(
                        imt, hdata[imt], reduced_assets, reduced_eps)
                    if ri.weight > 0:
                        riskinputs.append(ri)
            assert riskinputs
            logging.info('Built %d risk inputs', len(riskinputs))
            return sorted(riskinputs, key=self.riskinput_key)
Ejemplo n.º 15
0
def export_asset_loss_table(ekey, dstore):
    """
    Export in parallel the asset loss table from the datastore.

    NB1: for large calculation this may run out of memory
    NB2: due to an heisenbug in the parallel reading of .hdf5 files this works
    reliably only if the datastore has been created by a different process

    The recommendation is: *do not use this exporter*: rather, study its source
    code and write what you need. Every postprocessing is different.
    """
    key, fmt = ekey
    oq = dstore['oqparam']
    assetcol = dstore['assetcol']
    arefs = dstore['asset_refs'].value
    avals = assetcol.values()
    loss_types = dstore.get_attr('all_loss_ratios', 'loss_types').split()
    dtlist = [(lt, F32) for lt in loss_types]
    if oq.insured_losses:
        for lt in loss_types:
            dtlist.append((lt + '_ins', F32))
    lrs_dt = numpy.dtype([('rlzi', U16), ('losses', dtlist)])
    fname = dstore.export_path('%s.%s' % ekey)
    monitor = performance.Monitor(key, fname)
    lrgetter = riskinput.LossRatiosGetter(dstore)
    aids = range(len(assetcol))
    allargs = [(lrgetter, list(block), monitor)
               for block in split_in_blocks(aids, oq.concurrent_tasks)]
    dstore.close()  # avoid OSError: Can't read data (Wrong b-tree signature)
    L = len(loss_types)
    with hdf5.File(fname, 'w') as f:
        nbytes = 0
        total = numpy.zeros(len(dtlist), F32)
        for pairs in parallel.Starmap(get_loss_ratios, allargs):
            for aid, data in pairs:
                asset = assetcol[aid]
                avalue = avals[aid]
                for l, lt in enumerate(loss_types):
                    aval = avalue[lt]
                    for i in range(oq.insured_losses + 1):
                        data['ratios'][:, l + L * i] *= aval
                aref = arefs[asset.idx]
                f[b'asset_loss_table/' + aref] = data.view(lrs_dt)
                total += data['ratios'].sum(axis=0)
                nbytes += data.nbytes
        f['asset_loss_table'].attrs['loss_types'] = ' '.join(loss_types)
        f['asset_loss_table'].attrs['total'] = total
        f['asset_loss_table'].attrs['nbytes'] = nbytes
    return [fname]
Ejemplo n.º 16
0
    def apply_reduce(cls,
                     task,
                     task_args,
                     agg=operator.add,
                     acc=None,
                     concurrent_tasks=executor._max_workers,
                     weight=lambda item: 1,
                     key=lambda item: 'Unspecified',
                     name=None,
                     posthook=None):
        """
        Apply a task to a tuple of the form (sequence, \*other_args)
        by first splitting the sequence in chunks, according to the weight
        of the elements and possibly to a key (see :function:
        `openquake.baselib.general.split_in_blocks`).
        Then reduce the results with an aggregation function.
        The chunks which are generated internally can be seen directly (
        useful for debugging purposes) by looking at the attribute `._chunks`,
        right after the `apply_reduce` function has been called.

        :param task: a task to run in parallel
        :param task_args: the arguments to be passed to the task function
        :param agg: the aggregation function
        :param acc: initial value of the accumulator (default empty AccumDict)
        :param concurrent_tasks: hint about how many tasks to generate
        :param weight: function to extract the weight of an item in arg0
        :param key: function to extract the kind of an item in arg0
        """
        arg0 = task_args[0]  # this is assumed to be a sequence
        args = task_args[1:]
        task_func = getattr(task, 'task_func', task)
        if acc is None:
            acc = AccumDict()
        if len(arg0) == 0:  # nothing to do
            return acc
        chunks = list(split_in_blocks(arg0, concurrent_tasks or 1, weight,
                                      key))
        cls.apply_reduce.__func__._chunks = chunks
        if not concurrent_tasks or no_distribute() or len(chunks) == 1:
            # apply the function in the master process
            for i, chunk in enumerate(chunks):
                if args and hasattr(args[-1], 'flush'):  # is monitor
                    args[-1].task_no = i
                acc = agg(acc, task_func(chunk, *args))
            return acc
        logging.info('Starting %d tasks', len(chunks))
        self = cls.starmap(task, [(chunk, ) + args for chunk in chunks], name)
        return self.reduce(agg, acc, posthook)
Ejemplo n.º 17
0
 def build_riskinputs(self, kind, hazards_by_rlz, eps=numpy.zeros(0)):
     """
     :param kind:
         kind of hazard getter, can be 'poe' or 'gmf'
     :param hazards_by_rlz:
         a dictionary rlz -> IMT -> array of length num_sites
     :param eps:
         a matrix of epsilons (possibly empty)
     :returns:
         a list of RiskInputs objects, sorted by IMT.
     """
     self.check_poes(hazards_by_rlz)
     imtls = self.oqparam.imtls
     if not set(self.oqparam.risk_imtls) & set(imtls):
         rsk = ', '.join(self.oqparam.risk_imtls)
         haz = ', '.join(imtls)
         raise ValueError('The IMTs in the risk models (%s) are disjoint '
                          "from the IMTs in the hazard (%s)" % (rsk, haz))
     num_tasks = self.oqparam.concurrent_tasks or 1
     rlzs = range(len(hazards_by_rlz))
     assets_by_site = self.assetcol.assets_by_site()
     with self.monitor('building riskinputs', autoflush=True):
         riskinputs = []
         idx_weight_pairs = [(i, len(assets))
                             for i, assets in enumerate(assets_by_site)]
         blocks = general.split_in_blocks(idx_weight_pairs,
                                          num_tasks,
                                          weight=operator.itemgetter(1))
         for block in blocks:
             indices = numpy.array([idx for idx, _weight in block])
             reduced_assets = assets_by_site[indices]
             # dictionary of epsilons for the reduced assets
             reduced_eps = collections.defaultdict(F32)
             if len(eps):
                 for assets in reduced_assets:
                     for asset in assets:
                         reduced_eps[asset.ordinal] = eps[asset.ordinal]
             # build the riskinputs
             ri = riskinput.RiskInput(
                 riskinput.HazardGetter(kind, 0, {None: rlzs},
                                        hazards_by_rlz, indices,
                                        list(imtls)), reduced_assets,
                 reduced_eps)
             if ri.weight > 0:
                 riskinputs.append(ri)
         assert riskinputs
         logging.info('Built %d risk inputs', len(riskinputs))
         return riskinputs
Ejemplo n.º 18
0
 def _gen_riskinputs(self, kind, eps, num_events):
     num_tasks = self.oqparam.concurrent_tasks or 1
     assets_by_site = self.assetcol.assets_by_site()
     if kind == 'poe':
         indices = None
     else:
         indices = self.datastore['gmf_data/indices'].value
     dstore = self.can_read_parent() or self.datastore
     sid_weight = []
     for sid, assets in enumerate(assets_by_site):
         if len(assets) == 0:
             continue
         elif indices is None:
             weight = len(assets)
         else:
             idx = indices[sid]
             if indices.dtype.names:  # engine < 3.2
                 num_gmfs = sum(stop - start for start, stop in idx)
             else:  # engine >= 3.2
                 num_gmfs = (idx[1] - idx[0]).sum()
             weight = len(assets) * (num_gmfs or 1)
         sid_weight.append((sid, weight))
     for block in general.split_in_blocks(
             sid_weight, num_tasks, weight=operator.itemgetter(1)):
         sids = numpy.array([sid for sid, _weight in block])
         reduced_assets = assets_by_site[sids]
         # dictionary of epsilons for the reduced assets
         reduced_eps = {}
         for assets in reduced_assets:
             for ass in assets:
                 if eps is not None and len(eps):
                     reduced_eps[ass.ordinal] = eps[ass.ordinal]
         # build the riskinputs
         if kind == 'poe':  # hcurves, shape (R, N)
             getter = PmapGetter(dstore, self.rlzs_assoc, sids)
             getter.num_rlzs = self.R
         else:  # gmf
             getter = GmfDataGetter(dstore, sids, self.R,
                                    self.oqparam.imtls)
         if dstore is self.datastore:
             # read the hazard data in the controller node
             getter.init()
         else:
             # the datastore must be closed to avoid the HDF5 fork bug
             assert dstore.hdf5 == (), '%s is not closed!' % dstore
         ri = riskinput.RiskInput(getter, reduced_assets, reduced_eps)
         ri.weight = block.weight
         yield ri
Ejemplo n.º 19
0
    def split_in_tiles(self, hint):
        """
        Split a SiteCollection into a set of tiles (SiteCollection instances).

        :param hint: hint for how many tiles to generate
        """
        tiles = []
        offset = 0
        for seq in split_in_blocks(range(len(self)), hint or 1):
            sc = SiteCollection.__new__(SiteCollection)
            sc.indices = None
            sc.array = self.array[numpy.array(seq, int)]
            sc.offset = offset
            tiles.append(sc)
            offset += len(seq)
        return tiles
Ejemplo n.º 20
0
    def build_riskinputs(self, hazards_by_key, eps=numpy.zeros(0)):
        """
        :param hazards_by_key:
            a dictionary key -> IMT -> array of length num_sites
        :param eps:
            a matrix of epsilons (possibly empty)
        :returns:
            a list of RiskInputs objects, sorted by IMT.
        """
        # add asset.idx as side effect
        riskinput.build_asset_collection(self.assets_by_site,
                                         self.oqparam.time_event)
        imtls = self.oqparam.imtls
        with self.monitor('building riskinputs', autoflush=True):
            riskinputs = []
            idx_weight_pairs = [(i, len(assets))
                                for i, assets in enumerate(self.assets_by_site)
                                ]
            blocks = general.split_in_blocks(idx_weight_pairs,
                                             self.oqparam.concurrent_tasks
                                             or 1,
                                             weight=operator.itemgetter(1))
            for block in blocks:
                indices = numpy.array([idx for idx, _weight in block])
                reduced_assets = self.assets_by_site[indices]
                reduced_eps = {}  # for the assets belonging to the indices
                if len(eps):
                    for assets in reduced_assets:
                        for asset in assets:
                            reduced_eps[asset.idx] = eps[asset.idx]

                # collect the hazards by key into hazards by imt
                hdata = collections.defaultdict(lambda: [{} for _ in indices])
                for key, hazards_by_imt in hazards_by_key.items():
                    for imt in imtls:
                        hazards_by_site = hazards_by_imt[imt]
                        for i, haz in enumerate(hazards_by_site[indices]):
                            hdata[imt][i][key] = haz
                # build the riskinputs
                for imt in hdata:
                    ri = self.riskmodel.build_input(imt, hdata[imt],
                                                    reduced_assets,
                                                    reduced_eps)
                    if ri.weight > 0:
                        riskinputs.append(ri)
            logging.info('Built %d risk inputs', len(riskinputs))
            return sorted(riskinputs, key=self.riskinput_key)
Ejemplo n.º 21
0
 def execute(self):
     oq = self.oqparam
     R = len(self.loss_builder.weights)
     # build loss maps
     if 'all_loss_ratios' in self.datastore and oq.conditional_loss_poes:
         assetcol = self.datastore['assetcol']
         stats = oq.risk_stats()
         builder = self.loss_builder
         A = len(assetcol)
         S = len(stats)
         P = len(builder.return_periods)
         # create loss_maps datasets
         self.datastore.create_dset(
             'loss_maps-rlzs', self.loss_maps_dt, (A, R), fillvalue=None)
         if R > 1:
             self.datastore.create_dset(
                 'loss_maps-stats', self.loss_maps_dt, (A, S),
                 fillvalue=None)
             self.datastore.set_attrs(
                 'loss_maps-stats',
                 stats=[encode(name) for (name, func) in stats])
             self.datastore.create_dset(
                 'curves-stats', oq.loss_dt(), (A, S, P), fillvalue=None)
             self.datastore.set_attrs(
                 'curves-stats', return_periods=builder.return_periods,
                 stats=[encode(name) for (name, func) in stats])
         mon = self.monitor('loss maps')
         lazy = ('all_loss_ratios' in self.datastore.parent
                 and self.can_read_parent())
         logging.info('Instantiating LossRatiosGetters')
         with self.monitor('building lrgetters', measuremem=True,
                           autoflush=True):
             allargs = []
             for aids in split_in_blocks(range(A), oq.concurrent_tasks):
                 dstore = self.datastore.parent if lazy else self.datastore
                 getter = getters.LossRatiosGetter(dstore, aids, lazy)
                 # a lazy getter will read the loss_ratios from the workers
                 # an eager getter reads the loss_ratios upfront
                 allargs.append((assetcol.values(aids), builder, getter,
                                 stats, oq.conditional_loss_poes, mon))
         if lazy:
             # avoid OSError: Can't read data (Wrong b-tree signature)
             self.datastore.parent.close()
         parallel.Starmap(build_curves_maps, allargs).reduce(
             self.save_curves_maps)
         if lazy:  # the parent was closed, reopen it
             self.datastore.parent.open()
Ejemplo n.º 22
0
    def build_riskinputs(self, hazards_by_key, eps=numpy.zeros(0)):
        """
        :param hazards_by_key:
            a dictionary key -> IMT -> array of length num_sites
        :param eps:
            a matrix of epsilons (possibly empty)
        :returns:
            a list of RiskInputs objects, sorted by IMT.
        """
        # add asset.idx as side effect
        riskinput.build_asset_collection(
            self.assets_by_site, self.oqparam.time_event)
        imtls = self.oqparam.imtls
        with self.monitor('building riskinputs', autoflush=True):
            riskinputs = []
            idx_weight_pairs = [
                (i, len(assets))
                for i, assets in enumerate(self.assets_by_site)]
            blocks = general.split_in_blocks(
                idx_weight_pairs,
                self.oqparam.concurrent_tasks or 1,
                weight=operator.itemgetter(1))
            for block in blocks:
                indices = numpy.array([idx for idx, _weight in block])
                reduced_assets = self.assets_by_site[indices]
                reduced_eps = {}  # for the assets belonging to the indices
                if len(eps):
                    for assets in reduced_assets:
                        for asset in assets:
                            reduced_eps[asset.idx] = eps[asset.idx]

                # collect the hazards by key into hazards by imt
                hdata = collections.defaultdict(lambda: [{} for _ in indices])
                for key, hazards_by_imt in hazards_by_key.items():
                    for imt in imtls:
                        hazards_by_site = hazards_by_imt[imt]
                        for i, haz in enumerate(hazards_by_site[indices]):
                            hdata[imt][i][key] = haz
                # build the riskinputs
                for imt in hdata:
                    ri = self.riskmodel.build_input(
                        imt, hdata[imt], reduced_assets, reduced_eps)
                    if ri.weight > 0:
                        riskinputs.append(ri)
            logging.info('Built %d risk inputs', len(riskinputs))
            return sorted(riskinputs, key=self.riskinput_key)
Ejemplo n.º 23
0
 def pre_execute(self):
     """
     Read the full source model and sites and build the needed tiles
     """
     self.oqparam = self.job.get_oqparam()
     source_model_lt = readinput.get_source_model_lt(self.oqparam)
     source_models = list(readinput.get_source_models(
         self.oqparam, source_model_lt))
     self.parse_risk_model()
     self.initialize_site_collection()
     info = readinput.get_job_info(
         self.oqparam, source_models, self.site_collection)
     self.imtls = self.oqparam.imtls
     weight = info['n_sites'] * info['n_levels'] * info['max_realizations']
     nblocks = math.ceil(weight / self.oqparam.maximum_tile_weight)
     self.tiles = list(split_in_blocks(self.site_collection, nblocks))
     self.num_tiles = len(self.tiles)
Ejemplo n.º 24
0
 def pre_execute(self):
     """
     Read the full source model and sites and build the needed tiles
     """
     self.oqparam = self.job.get_oqparam()
     source_model_lt = readinput.get_source_model_lt(self.oqparam)
     source_models = list(
         readinput.get_source_models(self.oqparam, source_model_lt))
     self.parse_risk_model()
     self.initialize_site_collection()
     info = readinput.get_job_info(self.oqparam, source_models,
                                   self.site_collection)
     self.imtls = self.oqparam.imtls
     weight = info['n_sites'] * info['n_levels'] * info['max_realizations']
     nblocks = math.ceil(weight / self.oqparam.maximum_tile_weight)
     self.tiles = list(split_in_blocks(self.site_collection, nblocks))
     self.num_tiles = len(self.tiles)
Ejemplo n.º 25
0
    def test_split_with_kind(self):
        Source = namedtuple("Source", "typology, weight")
        s1 = Source("point", 1)
        s2 = Source("point", 1)
        s3 = Source("area", 2)
        s4 = Source("area", 4)
        s5 = Source("area", 4)
        blocks = list(
            block_splitter([s1, s2, s3, s4, s5], max_weight=6, weight=attrgetter("weight"), kind=attrgetter("typology"))
        )
        self.assertEqual(list(map(len, blocks)), [2, 2, 1])
        self.assertEqual([b.weight for b in blocks], [2, 6, 4])

        blocks = list(
            split_in_blocks([s1, s2, s3, s4, s5], hint=6, weight=attrgetter("weight"), key=attrgetter("typology"))
        )
        self.assertEqual(list(map(len, blocks)), [2, 1, 1, 1])
        self.assertEqual([b.weight for b in blocks], [2, 2, 4, 4])
Ejemplo n.º 26
0
def gen_rgetters(dstore, slc=slice(None)):
    """
    :yields: unfiltered RuptureGetters
    """
    full_lt = dstore['full_lt']
    trt_by_grp = full_lt.trt_by_grp
    samples = full_lt.get_samples_by_grp()
    rlzs_by_gsim = full_lt.get_rlzs_by_gsim_grp()
    rup_array = dstore['ruptures'][slc]
    nr = len(dstore['ruptures'])
    for grp_id, arr in general.group_array(rup_array, 'grp_id').items():
        if not rlzs_by_gsim.get(grp_id, []):  # the model has no sources
            continue
        for block in general.split_in_blocks(arr, len(arr) / nr):
            rgetter = RuptureGetter(
                [RuptureProxy(rec) for rec in block], dstore.filename, grp_id,
                trt_by_grp[grp_id], samples[grp_id], rlzs_by_gsim[grp_id])
            yield rgetter
Ejemplo n.º 27
0
def apply_reduce(task_func, task_args, agg=operator.add, acc=None,
                 concurrent_tasks=executor._max_workers,
                 weight=lambda item: 1,
                 key=lambda item: 'Unspecified',
                 name=None):
    """
    Apply a function to a tuple of the form (sequence, \*other_args)
    by first splitting the sequence in chunks, according to the weight
    of the elements and possibly to a key (see :function:
    `openquake.baselib.general.split_in_blocks`).
    Then reduce the results with an aggregation function. Here is an example:

    >>> apply_reduce(sum, ([1, 2, 3, 4, 5],), lambda acc, x: acc + x,
    ...             acc=0, concurrent_tasks=2)
    15

    The chunks which are generated internally can be seen directly (
    useful for debugging purposes) by looking at the attribute `._chunks`,
    right after the `apply_reduce` function has been called:

    >>> apply_reduce._chunks
    [<WeightedSequence [1, 2, 3], weight=3>, <WeightedSequence [4, 5], weight=2>]

    :param task_func: a function to run in parallel
    :param task_args: the arguments to be passed to the task function
    :param agg: the aggregation function
    :param acc: initial value of the accumulator (default empty AccumDict)
    :param concurrent_tasks: hint about how many tasks to generate
    :param weight: function to extract the weight of an item in arg0
    :param key: function to extract the kind of an item in arg0
    """
    arg0 = task_args[0]
    args = task_args[1:]
    if acc is None:
        acc = AccumDict()
    if not arg0:
        return acc
    elif len(arg0) == 1 or not concurrent_tasks:
        return agg(acc, task_func(arg0, *args))
    chunks = list(split_in_blocks(arg0, concurrent_tasks, weight, key))
    tm = starmap(task_func, [(chunk,) + args for chunk in chunks],
                 logging.info, name)
    apply_reduce._chunks = chunks
    return tm.reduce(agg, acc)
Ejemplo n.º 28
0
    def execute(self):
        oq = self.oqparam
        # build loss maps
        if 'all_loss_ratios' in self.datastore and oq.conditional_loss_poes:
            assetcol = self.assetcol
            stats = oq.risk_stats()
            builder = self.riskmodel.curve_builder
            A = len(assetcol)
            weights = self.datastore['realizations']['weight']
            R = len(weights)
            # create loss_maps datasets
            self.datastore.create_dset('loss_maps-rlzs',
                                       self.loss_maps_dt, (A, R),
                                       fillvalue=None)
            if R > 1:
                self.datastore.create_dset('loss_maps-stats',
                                           self.loss_maps_dt, (A, len(stats)),
                                           fillvalue=None)
            mon = self.monitor('loss maps')
            lazy = (oq.hazard_calculation_id
                    and 'all_loss_ratios' in self.datastore.parent)
            logging.info('Instantiating LossRatiosGetters')
            with self.monitor('building lrgetters',
                              measuremem=True,
                              autoflush=True):
                allargs = []
                for aids in split_in_blocks(range(A), oq.concurrent_tasks):
                    dstore = self.datastore.parent if lazy else self.datastore
                    getter = riskinput.LossRatiosGetter(dstore, aids, lazy)
                    # a lazy getter will read the loss_ratios from the workers
                    # an eager getter reads the loss_ratios upfront
                    allargs.append((assetcol.values(aids), builder, getter,
                                    weights, stats, mon))
            if lazy:
                # avoid OSError: Can't read data (Wrong b-tree signature)
                self.datastore.parent.close()
            parallel.Starmap(build_loss_maps,
                             allargs).reduce(self.save_loss_maps)
            if lazy:  # the parent was closed, reopen it
                self.datastore.parent.open()

        # build an aggregate loss curve per realization
        if 'agg_loss_table' in self.datastore:
            self.build_agg_curve()
Ejemplo n.º 29
0
    def apply_reduce(cls, task, task_args, agg=operator.add, acc=None,
                     concurrent_tasks=executor._max_workers,
                     weight=lambda item: 1,
                     key=lambda item: 'Unspecified',
                     name=None, posthook=None):
        """
        Apply a task to a tuple of the form (sequence, \*other_args)
        by first splitting the sequence in chunks, according to the weight
        of the elements and possibly to a key (see :function:
        `openquake.baselib.general.split_in_blocks`).
        Then reduce the results with an aggregation function.
        The chunks which are generated internally can be seen directly (
        useful for debugging purposes) by looking at the attribute `._chunks`,
        right after the `apply_reduce` function has been called.

        :param task: a task to run in parallel
        :param task_args: the arguments to be passed to the task function
        :param agg: the aggregation function
        :param acc: initial value of the accumulator (default empty AccumDict)
        :param concurrent_tasks: hint about how many tasks to generate
        :param weight: function to extract the weight of an item in arg0
        :param key: function to extract the kind of an item in arg0
        """
        arg0 = task_args[0]  # this is assumed to be a sequence
        args = task_args[1:]
        task_func = getattr(task, 'task_func', task)
        if acc is None:
            acc = AccumDict()
        if len(arg0) == 0:  # nothing to do
            return acc
        chunks = list(split_in_blocks(
            arg0, concurrent_tasks or 1, weight, key))
        cls.apply_reduce.__func__._chunks = chunks
        if not concurrent_tasks or no_distribute() or len(chunks) == 1:
            # apply the function in the master process
            for i, chunk in enumerate(chunks):
                if args and hasattr(args[-1], 'flush'):  # is monitor
                    args[-1].task_no = i
                acc = agg(acc, task_func(chunk, *args))
            return acc
        logging.info('Starting %d tasks', len(chunks))
        self = cls.starmap(task, [(chunk,) + args for chunk in chunks], name)
        return self.reduce(agg, acc, posthook)
Ejemplo n.º 30
0
    def test_split_with_kind(self):
        Source = namedtuple('Source', 'typology, weight')
        s1 = Source('point', 1)
        s2 = Source('point', 1)
        s3 = Source('area', 2)
        s4 = Source('area', 4)
        s5 = Source('area', 4)
        blocks = list(
            block_splitter([s1, s2, s3, s4, s5], max_weight=6,
                           weight=attrgetter('weight'),
                           key=attrgetter('typology')))
        self.assertEqual(list(map(len, blocks)), [2, 2, 1])
        self.assertEqual([b.weight for b in blocks], [2, 6, 4])

        blocks = list(
            split_in_blocks([s1, s2, s3, s4, s5], hint=6,
                            weight=attrgetter('weight'),
                            key=attrgetter('typology')))
        self.assertEqual(list(map(len, blocks)), [1, 1, 1, 2])
        self.assertEqual([b.weight for b in blocks], [2, 4, 4, 2])
Ejemplo n.º 31
0
def gen_rgetters(dstore, slc=slice(None)):
    """
    :yields: unfiltered RuptureGetters
    """
    csm_info = dstore['csm_info']
    trt_by_grp = csm_info.grp_by("trt")
    samples = csm_info.get_samples_by_grp()
    rlzs_by_gsim = csm_info.get_rlzs_by_gsim_grp()
    rup_array = dstore['ruptures'][slc]
    ct = dstore['oqparam'].concurrent_tasks or 1
    nr = len(dstore['ruptures'])
    for grp_id, arr in general.group_array(rup_array, 'grp_id').items():
        if not rlzs_by_gsim[grp_id]:  # the model has no sources
            continue
        for block in general.split_in_blocks(arr, len(arr) / nr * ct):
            rgetter = RuptureGetter([RuptureProxy(rec)
                                     for rec in block], dstore.filename,
                                    grp_id, trt_by_grp[grp_id],
                                    samples[grp_id], rlzs_by_gsim[grp_id])
            yield rgetter
Ejemplo n.º 32
0
    def test_split_with_kind(self):
        Source = namedtuple('Source', 'typology, weight')
        s1 = Source('point', 1)
        s2 = Source('point', 1)
        s3 = Source('area', 2)
        s4 = Source('area', 4)
        s5 = Source('area', 4)
        blocks = list(
            block_splitter([s1, s2, s3, s4, s5], max_weight=6,
                           weight=attrgetter('weight'),
                           kind=attrgetter('typology')))
        self.assertEqual(list(map(len, blocks)), [2, 2, 1])
        self.assertEqual([b.weight for b in blocks], [2, 6, 4])

        blocks = list(
            split_in_blocks([s1, s2, s3, s4, s5], hint=6,
                            weight=attrgetter('weight'),
                            key=attrgetter('typology')))
        self.assertEqual(list(map(len, blocks)), [1, 1, 1, 2])
        self.assertEqual([b.weight for b in blocks], [2, 4, 4, 2])
Ejemplo n.º 33
0
    def split_in_tiles(self, hint):
        """
        Split a SiteCollection into a set of tiles (SiteCollection instances).

        :param hint: hint for how many tiles to generate
        """
        tiles = []
        for seq in split_in_blocks(range(len(self)), hint or 1):
            indices = numpy.array(seq, int)
            sc = SiteCollection.__new__(SiteCollection)
            sc.complete = sc
            sc.total_sites = len(indices)
            sc.sids = self.sids[indices]
            sc.lons = self.lons[indices]
            sc.lats = self.lats[indices]
            sc._vs30 = _extract(self._vs30, indices)
            sc._vs30measured = _extract(self._vs30measured, indices)
            sc._z1pt0 = _extract(self._z1pt0, indices)
            sc._z2pt5 = _extract(self._z2pt5, indices)
            sc._backarc = _extract(self._backarc, indices)
            tiles.append(sc)
        return tiles
Ejemplo n.º 34
0
    def split_in_tiles(self, hint):
        """
        Split a SiteCollection into a set of tiles (SiteCollection instances).

        :param hint: hint for how many tiles to generate
        """
        tiles = []
        for seq in split_in_blocks(range(len(self)), hint or 1):
            indices = numpy.array(seq, int)
            sc = SiteCollection.__new__(SiteCollection)
            sc.complete = sc
            sc.total_sites = len(indices)
            sc.sids = self.sids[indices]
            sc.lons = self.lons[indices]
            sc.lats = self.lats[indices]
            sc._vs30 = _extract(self._vs30, indices)
            sc._vs30measured = _extract(self._vs30measured, indices)
            sc._z1pt0 = _extract(self._z1pt0, indices)
            sc._z2pt5 = _extract(self._z2pt5, indices)
            sc._backarc = _extract(self._backarc, indices)
            tiles.append(sc)
        return tiles
Ejemplo n.º 35
0
 def build_riskinputs(self, hazards_by_imt):
     """
     :param hazards_by_imt:
         a dictionary IMT -> array of length equal to the  number of sites
     :returns:
         a list of RiskInputs objects, sorted by IMT.
     """
     riskinputs = []
     idx_weight_pairs = [(i, len(assets))
                         for i, assets in enumerate(self.assets_by_site)]
     blocks = general.split_in_blocks(
         idx_weight_pairs,
         self.oqparam.concurrent_tasks or 1,
         weight=operator.itemgetter(1))
     for block in blocks:
         idx = numpy.array([idx for idx, _weight in block])
         for imt, hazards_by_site in hazards_by_imt.iteritems():
             ri = self.riskmodel.build_input(
                 imt, hazards_by_site[idx], self.assets_by_site[idx])
             if ri.weight > 0:
                 riskinputs.append(ri)
     logging.info('Built %d risk inputs', len(riskinputs))
     return sorted(riskinputs, key=get_imt)
Ejemplo n.º 36
0
    def apply(cls,
              task,
              args,
              concurrent_tasks=cpu_count * 3,
              maxweight=None,
              weight=lambda item: 1,
              key=lambda item: 'Unspecified',
              distribute=None,
              progress=logging.info):
        """
        Apply a task to a tuple of the form (sequence, \*other_args)
        by first splitting the sequence in chunks, according to the weight
        of the elements and possibly to a key (see :func:
        `openquake.baselib.general.split_in_blocks`).

        :param task: a task to run in parallel
        :param args: the arguments to be passed to the task function
        :param concurrent_tasks: hint about how many tasks to generate
        :param maxweight: if not None, used to split the tasks
        :param weight: function to extract the weight of an item in arg0
        :param key: function to extract the kind of an item in arg0
        :param distribute: if not given, inferred from OQ_DISTRIBUTE
        :param progress: logging function to use (default logging.info)
        :returns: an :class:`IterResult` object
        """
        arg0 = args[0]  # this is assumed to be a sequence
        args = args[1:]
        mon = args[-1]
        if maxweight:  # block_splitter is lazy
            task_args = (
                (blk, ) + args
                for blk in block_splitter(arg0, maxweight, weight, key))
        else:  # split_in_blocks is eager
            task_args = [(blk, ) + args for blk in split_in_blocks(
                arg0, concurrent_tasks or 1, weight, key)]
        return cls(task, task_args, mon, distribute, progress).submit_all()
Ejemplo n.º 37
0
def get_rupture_getters(dstore, slc=slice(None), split=0, hdf5cache=None):
    """
    :returns: a list of RuptureGetters
    """
    csm_info = dstore['csm_info']
    grp_trt = csm_info.grp_by("trt")
    samples = csm_info.get_samples_by_grp()
    rlzs_by_gsim = csm_info.get_rlzs_by_gsim_grp()
    rup_array = dstore['ruptures'][slc]
    code2cls = get_code2cls(dstore.get_attrs('ruptures'))
    rgetters = []
    by_grp = operator.itemgetter(2)  # serial, srcidx, grp_id
    for block in general.split_in_blocks(rup_array, split, key=by_grp):
        rups = numpy.array(block)
        grp_id = rups[0]['grp_id']
        if not rlzs_by_gsim[grp_id]:
            # this may happen if a source model has no sources, like
            # in event_based_risk/case_3
            continue
        rgetter = RuptureGetter(hdf5cache or dstore.hdf5path, code2cls, rups,
                                grp_trt[grp_id], samples[grp_id],
                                rlzs_by_gsim[grp_id])
        rgetters.append(rgetter)
    return rgetters
Ejemplo n.º 38
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase after hazard curve finalization.
        """
        oq = self.oqparam
        tl = self.oqparam.truncation_level
        bb_dict = self.datastore["bb_dict"]
        sitecol = self.sitecol
        mag_bin_width = self.oqparam.mag_bin_width
        eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1)
        logging.info("%d epsilon bins from %s to %s", len(eps_edges) - 1, min(eps_edges), max(eps_edges))

        self.bin_edges = {}
        curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids}
        all_args = []
        num_trts = sum(len(sm.src_groups) for sm in self.csm.source_models)
        nblocks = math.ceil(oq.concurrent_tasks / num_trts)
        for smodel in self.csm.source_models:
            sm_id = smodel.ordinal
            trt_names = tuple(mod.trt for mod in smodel.src_groups)
            max_mag = max(mod.max_mag for mod in smodel.src_groups)
            min_mag = min(mod.min_mag for mod in smodel.src_groups)
            mag_edges = mag_bin_width * numpy.arange(
                int(numpy.floor(min_mag / mag_bin_width)), int(numpy.ceil(max_mag / mag_bin_width) + 1)
            )
            logging.info("%d mag bins from %s to %s", len(mag_edges) - 1, min_mag, max_mag)
            for src_group in smodel.src_groups:
                if src_group.id not in self.rlzs_assoc.gsims_by_grp_id:
                    continue  # the group has been filtered away
                for sid, site in zip(sitecol.sids, sitecol):
                    curves = curves_dict[sid]
                    if not curves:
                        continue  # skip zero-valued hazard curves
                    bb = bb_dict[sm_id, sid]
                    if not bb:
                        logging.info("location %s was too far, skipping disaggregation", site.location)
                        continue

                    dist_edges, lon_edges, lat_edges = bb.bins_edges(oq.distance_bin_width, oq.coordinate_bin_width)
                    logging.info("%d dist bins from %s to %s", len(dist_edges) - 1, min(dist_edges), max(dist_edges))
                    logging.info("%d lon bins from %s to %s", len(lon_edges) - 1, bb.west, bb.east)
                    logging.info("%d lat bins from %s to %s", len(lon_edges) - 1, bb.south, bb.north)

                    self.bin_edges[sm_id, sid] = (mag_edges, dist_edges, lon_edges, lat_edges, eps_edges)

                bin_edges = {}
                for sid, site in zip(sitecol.sids, sitecol):
                    if (sm_id, sid) in self.bin_edges:
                        bin_edges[sid] = self.bin_edges[sm_id, sid]

                ss_filter = SourceSitesFilter(oq.maximum_distance)
                split_sources = []
                for src in src_group:
                    for split, _sites in ss_filter(sourceconverter.split_source(src), sitecol):
                        split_sources.append(split)
                for srcs in split_in_blocks(split_sources, nblocks):
                    all_args.append(
                        (
                            sitecol,
                            srcs,
                            src_group.id,
                            self.rlzs_assoc,
                            trt_names,
                            curves_dict,
                            bin_edges,
                            oq,
                            self.monitor,
                        )
                    )

        results = parallel.starmap(compute_disagg, all_args).reduce(self.agg_result)
        self.save_disagg_results(results)
Ejemplo n.º 39
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase after hazard curve finalization.
        """
        oq = self.oqparam
        tl = self.oqparam.truncation_level
        sitecol = self.sitecol
        eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1)

        self.bin_edges = {}
        curves = [self.get_curves(sid) for sid in sitecol.sids]
        # determine the number of effective source groups
        sg_data = self.datastore['csm_info/sg_data']
        num_grps = sum(1 for effrup in sg_data['effrup'] if effrup > 0)
        nblocks = math.ceil(oq.concurrent_tasks / num_grps)
        src_filter = SourceFilter(sitecol, oq.maximum_distance)
        R = len(self.rlzs_assoc.realizations)
        max_poe = numpy.zeros(R, oq.imt_dt())

        # build trt_edges
        trts = tuple(
            sorted(
                set(sg.trt for smodel in self.csm.source_models
                    for sg in smodel.src_groups)))

        # build mag_edges
        min_mag = min(sg.min_mag for smodel in self.csm.source_models
                      for sg in smodel.src_groups)
        max_mag = max(sg.max_mag for smodel in self.csm.source_models
                      for sg in smodel.src_groups)
        mag_edges = oq.mag_bin_width * numpy.arange(
            int(numpy.floor(min_mag / oq.mag_bin_width)),
            int(numpy.ceil(max_mag / oq.mag_bin_width) + 1))

        # build dist_edges
        maxdist = max(oq.maximum_distance(trt, max_mag) for trt in trts)
        dist_edges = oq.distance_bin_width * numpy.arange(
            0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1))
        logging.info('dist = %s...%s', min(dist_edges), max(dist_edges))

        # build eps_edges
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build lon_edges, lat_edges per sid
        bbs = src_filter.get_bounding_boxes(mag=max_mag)
        for sid, bb in zip(self.sitecol.sids, bbs):
            lon_edges, lat_edges = disagg.lon_lat_bins(bb,
                                                       oq.coordinate_bin_width)
            logging.info('site %d, lon = %s...%s', sid, min(lon_edges),
                         max(lon_edges))
            logging.info('site %d, lat = %s...%s', sid, min(lat_edges),
                         max(lat_edges))
            self.bin_edges[sid] = bs = (mag_edges, dist_edges, lon_edges,
                                        lat_edges, eps_edges)
            shape = [len(edges) - 1 for edges in bs] + [len(trts)]
            logging.info('%s for sid %d', shape, sid)

        # check poes
        for smodel in self.csm.source_models:
            sm_id = smodel.ordinal
            for i, site in enumerate(sitecol):
                sid = sitecol.sids[i]
                curve = curves[i]
                # populate max_poe array
                for rlzi, poes in curve.items():
                    for imt in oq.imtls:
                        max_poe[rlzi][imt] = max(max_poe[rlzi][imt],
                                                 poes[imt].max())
                if not curve:
                    continue  # skip zero-valued hazard curves

            # check for too big poes_disagg
            for poe in oq.poes_disagg:
                for rlz in self.rlzs_assoc.rlzs_by_smodel[sm_id]:
                    rlzi = rlz.ordinal
                    for imt in oq.imtls:
                        min_poe = max_poe[rlzi][imt]
                        if poe > min_poe:
                            raise ValueError(
                                self.POE_TOO_BIG %
                                (poe, sm_id, smodel.name, min_poe, rlzi, imt))

        # build all_args
        all_args = []
        for smodel in self.csm.source_models:
            for sg in smodel.src_groups:
                split_sources = []
                for src in sg:
                    for split, _sites in src_filter(
                            sourceconverter.split_source(src), sitecol):
                        split_sources.append(split)
                if not split_sources:
                    continue
                mon = self.monitor('disaggregation')
                rlzs_by_gsim = self.rlzs_assoc.get_rlzs_by_gsim(
                    sg.trt, smodel.ordinal)
                cmaker = ContextMaker(rlzs_by_gsim,
                                      src_filter.integration_distance)
                imls = [
                    disagg.make_imldict(rlzs_by_gsim, oq.imtls, oq.iml_disagg,
                                        oq.poes_disagg, curve)
                    for curve in curves
                ]
                for srcs in split_in_blocks(split_sources, nblocks):
                    all_args.append((src_filter, srcs, cmaker, imls, trts,
                                     self.bin_edges, oq, mon))

        self.cache_info = numpy.zeros(2)  # operations, cache_hits
        results = parallel.Starmap(compute_disagg,
                                   all_args).reduce(self.agg_result)
        ops, hits = self.cache_info
        logging.info('Cache speedup %s', ops / (ops - hits))
        self.save_disagg_results(results)
Ejemplo n.º 40
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase after hazard curve finalization.
        """
        oq = self.oqparam
        tl = self.oqparam.truncation_level
        bb_dict = self.datastore['bb_dict']
        sitecol = self.sitecol
        mag_bin_width = self.oqparam.mag_bin_width
        eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1)
        logging.info('%d epsilon bins from %s to %s',
                     len(eps_edges) - 1, min(eps_edges), max(eps_edges))

        self.bin_edges = {}
        curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids}
        all_args = []
        num_trts = sum(len(sm.src_groups) for sm in self.csm.source_models)
        nblocks = math.ceil(oq.concurrent_tasks / num_trts)
        for smodel in self.csm.source_models:
            sm_id = smodel.ordinal
            trt_names = tuple(mod.trt for mod in smodel.src_groups)
            max_mag = max(mod.max_mag for mod in smodel.src_groups)
            min_mag = min(mod.min_mag for mod in smodel.src_groups)
            mag_edges = mag_bin_width * numpy.arange(
                int(numpy.floor(min_mag / mag_bin_width)),
                int(numpy.ceil(max_mag / mag_bin_width) + 1))
            logging.info('%d mag bins from %s to %s',
                         len(mag_edges) - 1, min_mag, max_mag)
            for src_group in smodel.src_groups:
                if src_group.id not in self.rlzs_assoc.gsims_by_grp_id:
                    continue  # the group has been filtered away
                for sid, site in zip(sitecol.sids, sitecol):
                    curves = curves_dict[sid]
                    if not curves:
                        continue  # skip zero-valued hazard curves
                    bb = bb_dict[sm_id, sid]
                    if not bb:
                        logging.info(
                            'location %s was too far, skipping disaggregation',
                            site.location)
                        continue

                    dist_edges, lon_edges, lat_edges = bb.bins_edges(
                        oq.distance_bin_width, oq.coordinate_bin_width)
                    logging.info('%d dist bins from %s to %s',
                                 len(dist_edges) - 1, min(dist_edges),
                                 max(dist_edges))
                    logging.info('%d lon bins from %s to %s',
                                 len(lon_edges) - 1, bb.west, bb.east)
                    logging.info('%d lat bins from %s to %s',
                                 len(lon_edges) - 1, bb.south, bb.north)

                    self.bin_edges[sm_id,
                                   sid] = (mag_edges, dist_edges, lon_edges,
                                           lat_edges, eps_edges)

                bin_edges = {}
                for sid, site in zip(sitecol.sids, sitecol):
                    if (sm_id, sid) in self.bin_edges:
                        bin_edges[sid] = self.bin_edges[sm_id, sid]

                src_filter = SourceFilter(sitecol, oq.maximum_distance)
                split_sources = []
                for src in src_group:
                    for split, _sites in src_filter(
                            sourceconverter.split_source(src), sitecol):
                        split_sources.append(split)
                mon = self.monitor('disaggregation')
                for srcs in split_in_blocks(split_sources, nblocks):
                    all_args.append(
                        (src_filter, srcs, src_group.id, self.rlzs_assoc,
                         trt_names, curves_dict, bin_edges, oq, mon))

        results = parallel.Starmap(compute_disagg,
                                   all_args).reduce(self.agg_result)
        self.save_disagg_results(results)
Ejemplo n.º 41
0
 def apply(cls, func, args, concurrent_tasks=executor._max_workers * 5,
           weight=lambda item: 1, key=lambda item: 'Unspecified'):
     chunks = split_in_blocks(args[0], concurrent_tasks or 1, weight, key)
     if concurrent_tasks == 0:
         cls = Sequential
     return cls(func, (((chunk,) + args[1:]) for chunk in chunks))
Ejemplo n.º 42
0
 def apply(cls, func, args, concurrent_tasks=executor._max_workers * 5,
           weight=lambda item: 1, key=lambda item: 'Unspecified'):
     chunks = split_in_blocks(args[0], concurrent_tasks, weight, key)
     return cls(func, (((chunk,) + args[1:]) for chunk in chunks))
Ejemplo n.º 43
0
    def full_disaggregation(self, curves_by_trt_gsim):
        """
        Run the disaggregation phase after hazard curve finalization.
        """
        oq = self.oqparam
        tl = self.oqparam.truncation_level
        sitecol = self.sitecol
        mag_bin_width = self.oqparam.mag_bin_width
        eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1)
        logging.info('%d epsilon bins from %s to %s', len(eps_edges) - 1,
                     min(eps_edges), max(eps_edges))

        self.bin_edges = {}
        curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids}
        all_args = []
        num_trts = sum(len(sm.trt_models) for sm in self.csm.source_models)
        nblocks = math.ceil(oq.concurrent_tasks / num_trts)
        for smodel in self.csm.source_models:
            sm_id = smodel.ordinal
            trt_names = tuple(mod.trt for mod in smodel.trt_models)
            max_mag = max(mod.max_mag for mod in smodel.trt_models)
            min_mag = min(mod.min_mag for mod in smodel.trt_models)
            mag_edges = mag_bin_width * numpy.arange(
                int(numpy.floor(min_mag / mag_bin_width)),
                int(numpy.ceil(max_mag / mag_bin_width) + 1))
            logging.info('%d mag bins from %s to %s', len(mag_edges) - 1,
                         min_mag, max_mag)
            for trt_model in smodel.trt_models:
                for site in sitecol:
                    curves = curves_dict[site.id]
                    if not curves:
                        continue  # skip zero-valued hazard curves
                    bb = curves_by_trt_gsim.bb_dict[sm_id, site.id]
                    if not bb:
                        logging.info(
                            'location %s was too far, skipping disaggregation',
                            site.location)
                        continue

                    dist_edges, lon_edges, lat_edges = bb.bins_edges(
                        oq.distance_bin_width, oq.coordinate_bin_width)
                    logging.info(
                        '%d dist bins from %s to %s', len(dist_edges) - 1,
                        min(dist_edges), max(dist_edges))
                    logging.info(
                        '%d lon bins from %s to %s', len(lon_edges) - 1,
                        bb.west, bb.east)
                    logging.info(
                        '%d lat bins from %s to %s', len(lon_edges) - 1,
                        bb.south, bb.north)

                    self.bin_edges[sm_id, site.id] = (
                        mag_edges, dist_edges, lon_edges, lat_edges, eps_edges)

                bin_edges = {}
                for site in sitecol:
                    if (sm_id, site.id) in self.bin_edges:
                        bin_edges[site.id] = self.bin_edges[sm_id, site.id]

                for srcs in split_in_blocks(trt_model, nblocks):
                    all_args.append(
                        (sitecol, srcs, trt_model.id, self.rlzs_assoc,
                         trt_names, curves_dict, bin_edges, oq, self.monitor))

        results = parallel.starmap(compute_disagg, all_args).reduce(
            self.agg_result)
        self.save_disagg_results(results)
Ejemplo n.º 44
0
    def full_disaggregation(self, curves_by_trt_gsim):
        """
        Run the disaggregation phase after hazard curve finalization.
        """
        oq = self.oqparam
        tl = self.oqparam.truncation_level
        sitecol = self.sitecol
        mag_bin_width = self.oqparam.mag_bin_width
        eps_edges = numpy.linspace(-tl, tl, self.oqparam.num_epsilon_bins + 1)
        logging.info('%d epsilon bins from %s to %s',
                     len(eps_edges) - 1, min(eps_edges), max(eps_edges))

        self.bin_edges = {}
        curves_dict = {sid: self.get_curves(sid) for sid in sitecol.sids}
        all_args = []
        num_trts = sum(len(sm.trt_models) for sm in self.csm.source_models)
        nblocks = math.ceil(oq.concurrent_tasks / num_trts)
        for smodel in self.csm.source_models:
            sm_id = smodel.ordinal
            trt_names = tuple(mod.trt for mod in smodel.trt_models)
            max_mag = max(mod.max_mag for mod in smodel.trt_models)
            min_mag = min(mod.min_mag for mod in smodel.trt_models)
            mag_edges = mag_bin_width * numpy.arange(
                int(numpy.floor(min_mag / mag_bin_width)),
                int(numpy.ceil(max_mag / mag_bin_width) + 1))
            logging.info('%d mag bins from %s to %s',
                         len(mag_edges) - 1, min_mag, max_mag)
            for trt_model in smodel.trt_models:
                for site in sitecol:
                    curves = curves_dict[site.id]
                    if not curves:
                        continue  # skip zero-valued hazard curves
                    bb = curves_by_trt_gsim.bb_dict[sm_id, site.id]
                    if not bb:
                        logging.info(
                            'location %s was too far, skipping disaggregation',
                            site.location)
                        continue

                    dist_edges, lon_edges, lat_edges = bb.bins_edges(
                        oq.distance_bin_width, oq.coordinate_bin_width)
                    logging.info('%d dist bins from %s to %s',
                                 len(dist_edges) - 1, min(dist_edges),
                                 max(dist_edges))
                    logging.info('%d lon bins from %s to %s',
                                 len(lon_edges) - 1, bb.west, bb.east)
                    logging.info('%d lat bins from %s to %s',
                                 len(lon_edges) - 1, bb.south, bb.north)

                    self.bin_edges[sm_id,
                                   site.id] = (mag_edges, dist_edges,
                                               lon_edges, lat_edges, eps_edges)

                bin_edges = {}
                for site in sitecol:
                    if (sm_id, site.id) in self.bin_edges:
                        bin_edges[site.id] = self.bin_edges[sm_id, site.id]

                for srcs in split_in_blocks(trt_model, nblocks):
                    all_args.append(
                        (sitecol, srcs, trt_model.id, self.rlzs_assoc,
                         trt_names, curves_dict, bin_edges, oq, self.monitor))

        results = parallel.starmap(compute_disagg,
                                   all_args).reduce(self.agg_result)
        self.save_disagg_results(results)