Esempio n. 1
0
 def pre_checks(self):
     """
     Checks on the number of sites, atomic groups and size of the
     disaggregation matrix.
     """
     if self.N >= 32768:
         raise ValueError('You can disaggregate at max 32,768 sites')
     few = self.oqparam.max_sites_disagg
     if self.N > few:
         raise ValueError(
             'The number of sites is to disaggregate is %d, but you have '
             'max_sites_disagg=%d' % (self.N, few))
     if hasattr(self, 'csm'):
         for sg in self.csm.src_groups:
             if sg.atomic:
                 raise NotImplementedError(
                     'Atomic groups are not supported yet')
     elif self.datastore['source_info'].attrs['atomic']:
         raise NotImplementedError('Atomic groups are not supported yet')
     all_edges, shapedic = disagg.get_edges_shapedic(
         self.oqparam, self.sitecol, self.datastore['source_mags'])
     *b, trts = all_edges
     T = len(trts)
     shape = [len(bin) - 1
              for bin in (b[0], b[1], b[2][0], b[3][0], b[4])] + [T]
     matrix_size = numpy.prod(shape)  # 6D
     if matrix_size > 1E6:
         raise ValueError('The disaggregation matrix is too large '
                          '(%d elements): fix the binning!' % matrix_size)
     tot = get_outputs_size(shapedic, self.oqparam.disagg_outputs)
     logging.info('Total output size: %s', humansize(sum(tot.values())))
Esempio n. 2
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        edges, self.shapedic = disagg.get_edges_shapedic(
            oq, self.sitecol, self.datastore['source_mags'])
        self.save_bin_edges(edges)
        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None, )
        self.imts = list(oq.imtls)
        self.M = len(self.imts)
        ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        dstore = (self.datastore.parent
                  if self.datastore.parent else self.datastore)
        nrows = len(dstore['_poes/sid'])
        self.pgetter = getters.PmapGetter(dstore, ws, [(0, nrows + 1)],
                                          oq.imtls, oq.poes)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg or 1
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    pcurve = self.pgetter.get_pcurve(sid)
                    mean = getters.build_stat_curve(pcurve, oq.imtls,
                                                    stats.mean_curve, ws)
                    # get the closest realization to the mean
                    rlzs[sid] = util.closest_to_ref(pcurve.array.T,
                                                    mean.array)[:Z]
            self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
            self.datastore['best_rlzs'] = rlzs
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs
        self.curves = []

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [
                self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids
            ]
        self.hmap4 = _hmap4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg,
                            curves)
        if self.hmap4.array.sum() == 0:
            raise SystemExit('Cannot do any disaggregation: zero hazard')
        self.datastore['hmap4'] = self.hmap4
        self.datastore['poe4'] = numpy.zeros_like(self.hmap4.array)
        return self.compute()
Esempio n. 3
0
def extract_disagg_layer(dstore, what):
    """
    Extract a disaggregation layer containing all sites and outputs
    Example:
    http://127.0.0.1:8800/v1/calc/30/extract/disagg_layer?
    """
    qdict = parse(what)
    oq = dstore['oqparam']
    oq.maximum_distance = filters.MagDepDistance(oq.maximum_distance)
    if 'kind' in qdict:
        kinds = qdict['kind']
    else:
        kinds = oq.disagg_outputs
    sitecol = dstore['sitecol']
    poes_disagg = oq.poes_disagg or (None, )
    edges, shapedic = disagg.get_edges_shapedic(oq, sitecol,
                                                dstore['source_mags'])
    dt = _disagg_output_dt(shapedic, kinds, oq.imtls, poes_disagg)
    out = numpy.zeros(len(sitecol), dt)
    realizations = numpy.array(dstore['full_lt'].get_realizations())
    hmap4 = dstore['hmap4'][:]
    best_rlzs = dstore['best_rlzs'][:]
    arr = {kind: dstore['disagg/' + kind][:] for kind in kinds}
    for sid, lon, lat, rec in zip(sitecol.sids, sitecol.lons, sitecol.lats,
                                  out):
        rlzs = realizations[best_rlzs[sid]]
        rec['site_id'] = sid
        rec['lon'] = lon
        rec['lat'] = lat
        rec['lon_bins'] = edges[2][sid]
        rec['lat_bins'] = edges[3][sid]
        for m, imt in enumerate(oq.imtls):
            ws = numpy.array([rlz.weight[imt] for rlz in rlzs])
            ws /= ws.sum()  # normalize to 1
            for p, poe in enumerate(poes_disagg):
                for kind in kinds:
                    key = '%s-%s-%s' % (kind, imt, poe)
                    rec[key] = arr[kind][sid, m, p] @ ws
                rec['iml-%s-%s' % (imt, poe)] = hmap4[sid, m, p]
    return ArrayWrapper(
        out,
        dict(mag=edges[0],
             dist=edges[1],
             eps=edges[-2],
             trt=numpy.array(encode(edges[-1]))))
Esempio n. 4
0
def extract_disagg_layer(dstore, what):
    """
    Extract a disaggregation layer containing all sites and outputs
    Example:
    http://127.0.0.1:8800/v1/calc/30/extract/disagg_layer?
    """
    qdict = parse(what)
    oq = dstore['oqparam']
    if 'kind' in qdict:
        kinds = qdict['kind']
    else:
        kinds = list(oq.disagg_outputs or disagg.pmf_map)
    sitecol = dstore['sitecol']
    poes_disagg = oq.poes_disagg or (None, )
    edges, shapedic = disagg.get_edges_shapedic(oq, sitecol,
                                                dstore['source_mags'])
    dt = _disagg_output_dt(shapedic, kinds, oq.imtls, poes_disagg)
    out = numpy.zeros(len(sitecol), dt)
    try:
        best_rlzs = dstore['best_rlzs']
    except KeyError:
        best_rlzs = numpy.zeros((len(sitecol), shapedic['Z']), U16)
    for sid, lon, lat, rec in zip(sitecol.sids, sitecol.lons, sitecol.lats,
                                  out):
        rec['site_id'] = sid
        rec['lon'] = lon
        rec['lat'] = lat
        rec['rlz_id'] = rlzs = best_rlzs[sid]
        rec['lon_bins'] = edges[2][sid]
        rec['lat_bins'] = edges[3][sid]
        for kind in kinds:
            for imt in oq.imtls:
                for p, poe in enumerate(poes_disagg):
                    for rlz in rlzs:
                        key = '%s-%s-%s' % (kind, imt, poe)
                        label = 'disagg/rlz-%d-%s-sid-%d-poe-%s/%s' % (
                            rlz, imt, sid, p, kind)
                        rec[key] = dstore[label][()]
    return ArrayWrapper(
        out,
        dict(mag=edges[0], dist=edges[1], eps=edges[-2],
             trt=encode(edges[-1])))
Esempio n. 5
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        mags_by_trt = self.datastore['source_mags']
        all_edges, shapedic = disagg.get_edges_shapedic(
            oq, self.sitecol, mags_by_trt)
        *self.bin_edges, self.trts = all_edges
        src_filter = self.src_filter()
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')

        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None,)
        self.imts = list(oq.imtls)

        self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        self.pgetter = getters.PmapGetter(
            self.datastore, self.ws, self.sitecol.sids)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(
                        curves, oq.imtls, stats.mean_curve, self.ws)
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
                self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [self.get_curve(sid, rlzs[sid])
                      for sid in self.sitecol.sids]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml3 = _iml3(rlzs, oq.iml_disagg, oq.imtls,
                          self.poes_disagg, curves)
        if oq.disagg_by_src:
            self.build_disagg_by_src(rlzs)

        self.save_bin_edges()
        sd = shapedic.copy()
        sd.pop('trt')
        nbytes, msg = get_array_nbytes(sd)
        if nbytes > oq.max_data_transfer:
            raise ValueError(
                'Estimated data transfer too big\n%s > max_data_transfer=%s' %
                (msg, humansize(oq.max_data_transfer)))
        logging.info('Estimated data transfer:\n%s', msg)
        tot = get_outputs_size(shapedic, oq.disagg_outputs or disagg.pmf_map)
        logging.info('Total output size: %s', humansize(sum(tot.values())))
        self.imldic = {}  # sid, rlz, poe, imt -> iml
        for s in self.sitecol.sids:
            for z, rlz in enumerate(rlzs[s]):
                for p, poe in enumerate(self.poes_disagg):
                    for imt in oq.imtls:
                        self.imldic[s, rlz, poe, imt] = self.iml3[imt][s, p, z]

        # submit #groups disaggregation tasks
        dstore = (self.datastore.parent if self.datastore.parent
                  else self.datastore)
        M = len(oq.imtls)
        tasks_per_imt = numpy.ceil(oq.concurrent_tasks / M) or 1
        rups_per_task = len(dstore['rup/mag']) / tasks_per_imt
        logging.info('Considering ~%d ruptures per task', rups_per_task)
        indices = get_indices(dstore, tasks_per_imt)
        self.datastore.swmr_on()
        smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5)
        trt_num = {trt: i for i, trt in enumerate(self.trts)}
        for grp_id, trt in self.full_lt.trt_by_grp.items():
            logging.info('Group #%d, sending rup_data for %s', grp_id, trt)
            trti = trt_num[trt]
            cmaker = ContextMaker(
                trt, self.full_lt.get_rlzs_by_gsim(grp_id),
                {'truncation_level': oq.truncation_level,
                 'maximum_distance': src_filter.integration_distance,
                 'imtls': oq.imtls})
            for idxs in indices[grp_id]:
                for imt in oq.imtls:
                    smap.submit((dstore, idxs, cmaker, self.iml3[imt], trti,
                                 self.bin_edges, oq))
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # sid -> trti-> 8D array
Esempio n. 6
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        mags_by_trt = self.datastore['source_mags']
        all_edges, self.shapedic = disagg.get_edges_shapedic(
            oq, self.sitecol, mags_by_trt)
        *self.bin_edges, self.trts = all_edges
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')
        elif self.datastore['source_info'].attrs['atomic']:
            raise NotImplementedError('Atomic groups are not supported yet')

        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None, )
        self.imts = list(oq.imtls)
        self.M = len(self.imts)
        ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        self.pgetter = getters.PmapGetter(self.datastore, ws,
                                          self.sitecol.sids, oq.imtls, oq.poes)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg or 1
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(curves, oq.imtls,
                                                    stats.mean_curve, ws)
                    # get the closest realization to the mean
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
            self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
            self.datastore['best_rlzs'] = rlzs
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs
        self.curves = []

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [
                self.get_curve(sid, rlzs[sid]) for sid in self.sitecol.sids
            ]
        self.hmap4 = _hmap4(rlzs, oq.iml_disagg, oq.imtls, self.poes_disagg,
                            curves)
        if self.hmap4.array.sum() == 0:
            raise SystemExit('Cannot do any disaggregation: zero hazard')
        self.datastore['hmap4'] = self.hmap4
        self.datastore['poe4'] = numpy.zeros_like(self.hmap4.array)

        self.save_bin_edges()
        tot = get_outputs_size(self.shapedic, oq.disagg_outputs)
        logging.info('Total output size: %s', humansize(sum(tot.values())))
        return self.compute()
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        mags_by_trt = self.datastore['source_mags']
        all_edges, self.shapedic = disagg.get_edges_shapedic(
            oq, self.sitecol, mags_by_trt)
        *self.bin_edges, self.trts = all_edges
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')
        elif self.datastore['source_info'].attrs['atomic']:
            raise NotImplementedError(
                'Atomic groups are not supported yet')

        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None,)
        self.imts = list(oq.imtls)
        self.M = len(self.imts)
        ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        self.pgetter = getters.PmapGetter(
            self.datastore, ws, self.sitecol.sids)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg or 1
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(
                        curves, oq.imtls, stats.mean_curve, ws)
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
            self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
            self.datastore['best_rlzs'] = rlzs
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [self.get_curve(sid, rlzs[sid])
                      for sid in self.sitecol.sids]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls,
                          self.poes_disagg, curves)
        self.datastore['iml4'] = self.iml4
        self.datastore['poe4'] = numpy.zeros_like(self.iml4.array)

        self.save_bin_edges()
        tot = get_outputs_size(self.shapedic, oq.disagg_outputs)
        logging.info('Total output size: %s', humansize(sum(tot.values())))
        self.imldic = {}  # sid, rlz, poe, imt -> iml
        for s in self.sitecol.sids:
            iml3 = self.iml4[s]
            for z, rlz in enumerate(rlzs[s]):
                for p, poe in enumerate(self.poes_disagg):
                    for m, imt in enumerate(oq.imtls):
                        self.imldic[s, rlz, poe, imt] = iml3[m, p, z]

        # submit disaggregation tasks
        dstore = (self.datastore.parent if self.datastore.parent
                  else self.datastore)
        mag_edges = self.bin_edges[0]
        indices = get_indices_by_gidx_mag(dstore, mag_edges)
        allargs = []
        totweight = sum(sum(ri.weight for ri in indices[gm])
                        for gm in indices)
        maxweight = int(numpy.ceil(totweight / (oq.concurrent_tasks or 1)))
        grp_ids = dstore['grp_ids'][:]
        rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(grp_ids)
        num_eff_rlzs = len(self.full_lt.sm_rlzs)
        task_inputs = []
        G, U = 0, 0
        for gidx, magi in indices:
            trti = grp_ids[gidx][0] // num_eff_rlzs
            trt = self.trts[trti]
            cmaker = ContextMaker(
                trt, rlzs_by_gsim[gidx],
                {'truncation_level': oq.truncation_level,
                 'maximum_distance': oq.maximum_distance,
                 'collapse_level': oq.collapse_level,
                 'imtls': oq.imtls})
            G = max(G, len(cmaker.gsims))
            for rupidxs in block_splitter(
                    indices[gidx, magi], maxweight, weight):
                idxs = numpy.array([ri.index for ri in rupidxs])
                U = max(U, len(idxs))
                allargs.append((dstore, idxs, cmaker, self.iml4,
                                trti, magi, self.bin_edges[1:], oq))
                task_inputs.append((trti, magi, len(idxs)))

        nbytes, msg = get_array_nbytes(dict(N=self.N, M=self.M, G=G, U=U))
        logging.info('Maximum mean_std per task:\n%s', msg)
        sd = self.shapedic.copy()
        sd.pop('trt')
        sd.pop('mag')
        sd['tasks'] = numpy.ceil(len(allargs))
        nbytes, msg = get_array_nbytes(sd)
        if nbytes > oq.max_data_transfer:
            raise ValueError(
                'Estimated data transfer too big\n%s > max_data_transfer=%s' %
                (msg, humansize(oq.max_data_transfer)))
        logging.info('Estimated data transfer:\n%s', msg)
        dt = numpy.dtype([('trti', U8), ('magi', U8), ('nrups', U32)])
        self.datastore['disagg_task'] = numpy.array(task_inputs, dt)
        self.datastore.swmr_on()
        smap = parallel.Starmap(
            compute_disagg, allargs, h5=self.datastore.hdf5)
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # imti, sid -> trti, magi -> 6D array