コード例 #1
0
ファイル: classical.py プロジェクト: drotheram/oq-engine
 def get_source_ids(self):
     """
     :returns: the unique source IDs contained in the composite model
     """
     oq = self.oqparam
     self.M = len(oq.imtls)
     self.L1 = len(oq.imtls.array) // self.M
     sources = encode([src_id for src_id in self.csm.source_info])
     size, msg = get_array_nbytes(
         dict(N=self.N, R=self.R, M=self.M, L1=self.L1, Ns=self.Ns))
     ps = 'pointSource' in self.full_lt.source_model_lt.source_types
     if size > TWO32 and not ps:
         raise RuntimeError('The matrix disagg_by_src is too large: %s' %
                            msg)
     elif size > TWO32:
         msg = ('The source model contains point sources: you cannot set '
                'disagg_by_src=true unless you convert them to multipoint '
                'sources with the command oq upgrade_nrml --multipoint %s'
                ) % oq.base_path
         raise RuntimeError(msg)
     return sources
コード例 #2
0
ファイル: disaggregation.py プロジェクト: j-gaspar/oq-engine
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        mags_by_trt = self.datastore['source_mags']
        all_edges, shapedic = disagg.get_edges_shapedic(
            oq, self.sitecol, mags_by_trt)
        *self.bin_edges, self.trts = all_edges
        src_filter = self.src_filter()
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')

        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None,)
        self.imts = list(oq.imtls)

        self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        self.pgetter = getters.PmapGetter(
            self.datastore, self.ws, self.sitecol.sids)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(
                        curves, oq.imtls, stats.mean_curve, self.ws)
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
                self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [self.get_curve(sid, rlzs[sid])
                      for sid in self.sitecol.sids]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml3 = _iml3(rlzs, oq.iml_disagg, oq.imtls,
                          self.poes_disagg, curves)
        if oq.disagg_by_src:
            self.build_disagg_by_src(rlzs)

        self.save_bin_edges()
        sd = shapedic.copy()
        sd.pop('trt')
        nbytes, msg = get_array_nbytes(sd)
        if nbytes > oq.max_data_transfer:
            raise ValueError(
                'Estimated data transfer too big\n%s > max_data_transfer=%s' %
                (msg, humansize(oq.max_data_transfer)))
        logging.info('Estimated data transfer:\n%s', msg)
        tot = get_outputs_size(shapedic, oq.disagg_outputs or disagg.pmf_map)
        logging.info('Total output size: %s', humansize(sum(tot.values())))
        self.imldic = {}  # sid, rlz, poe, imt -> iml
        for s in self.sitecol.sids:
            for z, rlz in enumerate(rlzs[s]):
                for p, poe in enumerate(self.poes_disagg):
                    for imt in oq.imtls:
                        self.imldic[s, rlz, poe, imt] = self.iml3[imt][s, p, z]

        # submit #groups disaggregation tasks
        dstore = (self.datastore.parent if self.datastore.parent
                  else self.datastore)
        M = len(oq.imtls)
        tasks_per_imt = numpy.ceil(oq.concurrent_tasks / M) or 1
        rups_per_task = len(dstore['rup/mag']) / tasks_per_imt
        logging.info('Considering ~%d ruptures per task', rups_per_task)
        indices = get_indices(dstore, tasks_per_imt)
        self.datastore.swmr_on()
        smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5)
        trt_num = {trt: i for i, trt in enumerate(self.trts)}
        for grp_id, trt in self.full_lt.trt_by_grp.items():
            logging.info('Group #%d, sending rup_data for %s', grp_id, trt)
            trti = trt_num[trt]
            cmaker = ContextMaker(
                trt, self.full_lt.get_rlzs_by_gsim(grp_id),
                {'truncation_level': oq.truncation_level,
                 'maximum_distance': src_filter.integration_distance,
                 'imtls': oq.imtls})
            for idxs in indices[grp_id]:
                for imt in oq.imtls:
                    smap.submit((dstore, idxs, cmaker, self.iml3[imt], trti,
                                 self.bin_edges, oq))
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # sid -> trti-> 8D array
コード例 #3
0
    def compute(self):
        """
        Submit disaggregation tasks and return the results
        """
        logging.info('Reading ruptures')
        oq = self.oqparam
        dstore = (self.datastore.parent
                  if self.datastore.parent else self.datastore)
        mags = set()
        for trt, dset in self.datastore['source_mags'].items():
            mags.update(dset[:])
        mags = sorted(mags)
        allargs = []
        totweight = sum(d['rctx']['nsites'].sum() for n, d in dstore.items()
                        if n.startswith('mag_') and len(d['rctx']))
        grp_ids = dstore['grp_ids'][:]
        rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(grp_ids)
        G = max(len(rbg) for rbg in rlzs_by_gsim)
        maxw = 2 * 1024**3 / (16 * G * self.M)  # at max 2 GB
        maxweight = min(numpy.ceil(totweight / (oq.concurrent_tasks or 1)),
                        maxw)
        num_eff_rlzs = len(self.full_lt.sm_rlzs)
        task_inputs = []
        U = 0
        totrups = 0
        for mag in mags:
            rctx = dstore['mag_%s/rctx' % mag][:]
            totrups += len(rctx)
            for gidx, gids in enumerate(grp_ids):
                idxs, = numpy.where(rctx['gidx'] == gidx)
                if len(idxs) == 0:
                    continue
                trti = gids[0] // num_eff_rlzs
                trt = self.trts[trti]
                cmaker = ContextMaker(
                    trt, rlzs_by_gsim[gidx], {
                        'truncation_level': oq.truncation_level,
                        'maximum_distance': oq.maximum_distance,
                        'collapse_level': oq.collapse_level,
                        'imtls': oq.imtls
                    })
                for blk in block_splitter(rctx[idxs], maxweight, nsites):
                    nr = len(blk)
                    U = max(U, blk.weight)
                    allargs.append((dstore, numpy.array(blk), cmaker,
                                    self.hmap4, trti, self.bin_edges, oq))
                    task_inputs.append((trti, mag, nr))
        logging.info('Found {:_d} ruptures'.format(totrups))
        nbytes, msg = get_array_nbytes(dict(M=self.M, G=G, U=U, F=2))
        logging.info('Maximum mean_std per task:\n%s', msg)

        s = self.shapedic
        size = s['dist'] * s['eps'] + s['lon'] * s['lat']
        sd = dict(N=s['N'], M=s['M'], P=s['P'], Z=s['Z'], size=size)
        sd['tasks'] = numpy.ceil(len(allargs))
        nbytes, msg = get_array_nbytes(sd)
        if nbytes > oq.max_data_transfer:
            raise ValueError(
                'Estimated data transfer too big\n%s > max_data_transfer=%s' %
                (msg, humansize(oq.max_data_transfer)))
        logging.info('Estimated data transfer:\n%s', msg)

        sd.pop('tasks')
        sd['mags_trt'] = sum(
            len(mags) for mags in self.datastore['source_mags'].values())
        nbytes, msg = get_array_nbytes(sd)
        logging.info('Estimated memory on the master:\n%s', msg)

        dt = numpy.dtype([('trti', U8), ('mag', '|S4'), ('nrups', U32)])
        self.datastore['disagg_task'] = numpy.array(task_inputs, dt)
        self.datastore.swmr_on()
        smap = parallel.Starmap(compute_disagg,
                                allargs,
                                h5=self.datastore.hdf5)
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # imti, sid -> trti, magi -> 6D array
コード例 #4
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        mags_by_trt = self.datastore['source_mags']
        all_edges, self.shapedic = disagg.get_edges_shapedic(
            oq, self.sitecol, mags_by_trt)
        *self.bin_edges, self.trts = all_edges
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')
        elif self.datastore['source_info'].attrs['atomic']:
            raise NotImplementedError(
                'Atomic groups are not supported yet')

        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None,)
        self.imts = list(oq.imtls)
        self.M = len(self.imts)
        ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        self.pgetter = getters.PmapGetter(
            self.datastore, ws, self.sitecol.sids)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg or 1
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(
                        curves, oq.imtls, stats.mean_curve, ws)
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
            self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
            self.datastore['best_rlzs'] = rlzs
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [self.get_curve(sid, rlzs[sid])
                      for sid in self.sitecol.sids]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls,
                          self.poes_disagg, curves)
        self.datastore['iml4'] = self.iml4
        self.datastore['poe4'] = numpy.zeros_like(self.iml4.array)

        self.save_bin_edges()
        tot = get_outputs_size(self.shapedic, oq.disagg_outputs)
        logging.info('Total output size: %s', humansize(sum(tot.values())))
        self.imldic = {}  # sid, rlz, poe, imt -> iml
        for s in self.sitecol.sids:
            iml3 = self.iml4[s]
            for z, rlz in enumerate(rlzs[s]):
                for p, poe in enumerate(self.poes_disagg):
                    for m, imt in enumerate(oq.imtls):
                        self.imldic[s, rlz, poe, imt] = iml3[m, p, z]

        # submit disaggregation tasks
        dstore = (self.datastore.parent if self.datastore.parent
                  else self.datastore)
        mag_edges = self.bin_edges[0]
        indices = get_indices_by_gidx_mag(dstore, mag_edges)
        allargs = []
        totweight = sum(sum(ri.weight for ri in indices[gm])
                        for gm in indices)
        maxweight = int(numpy.ceil(totweight / (oq.concurrent_tasks or 1)))
        grp_ids = dstore['grp_ids'][:]
        rlzs_by_gsim = self.full_lt.get_rlzs_by_gsim_list(grp_ids)
        num_eff_rlzs = len(self.full_lt.sm_rlzs)
        task_inputs = []
        G, U = 0, 0
        for gidx, magi in indices:
            trti = grp_ids[gidx][0] // num_eff_rlzs
            trt = self.trts[trti]
            cmaker = ContextMaker(
                trt, rlzs_by_gsim[gidx],
                {'truncation_level': oq.truncation_level,
                 'maximum_distance': oq.maximum_distance,
                 'collapse_level': oq.collapse_level,
                 'imtls': oq.imtls})
            G = max(G, len(cmaker.gsims))
            for rupidxs in block_splitter(
                    indices[gidx, magi], maxweight, weight):
                idxs = numpy.array([ri.index for ri in rupidxs])
                U = max(U, len(idxs))
                allargs.append((dstore, idxs, cmaker, self.iml4,
                                trti, magi, self.bin_edges[1:], oq))
                task_inputs.append((trti, magi, len(idxs)))

        nbytes, msg = get_array_nbytes(dict(N=self.N, M=self.M, G=G, U=U))
        logging.info('Maximum mean_std per task:\n%s', msg)
        sd = self.shapedic.copy()
        sd.pop('trt')
        sd.pop('mag')
        sd['tasks'] = numpy.ceil(len(allargs))
        nbytes, msg = get_array_nbytes(sd)
        if nbytes > oq.max_data_transfer:
            raise ValueError(
                'Estimated data transfer too big\n%s > max_data_transfer=%s' %
                (msg, humansize(oq.max_data_transfer)))
        logging.info('Estimated data transfer:\n%s', msg)
        dt = numpy.dtype([('trti', U8), ('magi', U8), ('nrups', U32)])
        self.datastore['disagg_task'] = numpy.array(task_inputs, dt)
        self.datastore.swmr_on()
        smap = parallel.Starmap(
            compute_disagg, allargs, h5=self.datastore.hdf5)
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # imti, sid -> trti, magi -> 6D array
コード例 #5
0
ファイル: classical.py プロジェクト: MHamdache/oq-engine
    def acc0(self):
        """
        Initial accumulator, a dict grp_id -> ProbabilityMap(L, G)
        """
        zd = AccumDict()
        num_levels = len(self.oqparam.imtls.array)
        rparams = {
            'grp_id', 'occurrence_rate', 'weight', 'probs_occur', 'clon_',
            'clat_', 'rrup_'
        }
        gsims_by_trt = self.full_lt.get_gsims_by_trt()
        n = len(self.full_lt.sm_rlzs)
        trts = list(self.full_lt.gsim_lt.values)
        for sm in self.full_lt.sm_rlzs:
            for grp_id in self.full_lt.grp_ids(sm.ordinal):
                trt = trts[grp_id // n]
                gsims = gsims_by_trt[trt]
                cm = ContextMaker(trt, gsims)
                rparams.update(cm.REQUIRES_RUPTURE_PARAMETERS)
                for dparam in cm.REQUIRES_DISTANCES:
                    rparams.add(dparam + '_')
        zd.eff_ruptures = AccumDict(accum=0)  # trt -> eff_ruptures
        if self.few_sites:
            self.rparams = sorted(rparams)
            for k in self.rparams:
                # variable length arrays
                if k == 'grp_id':
                    self.datastore.create_dset('rup/' + k, U16)
                elif k == 'probs_occur':  # vlen
                    self.datastore.create_dset('rup/' + k, hdf5.vfloat64)
                elif k.endswith('_'):  # array of shape (U, N)
                    self.datastore.create_dset('rup/' + k,
                                               F32,
                                               shape=(None, self.N),
                                               compression='gzip')
                else:
                    self.datastore.create_dset('rup/' + k, F32)
        else:
            self.rparams = {}
        self.by_task = {}  # task_no => src_ids
        self.totrups = 0  # total number of ruptures before collapsing
        self.maxradius = 0
        self.gidx = {
            tuple(grp_ids): i
            for i, grp_ids in enumerate(self.datastore['grp_ids'])
        }

        # estimate max memory per core
        max_num_gsims = max(len(gsims) for gsims in gsims_by_trt.values())
        max_num_grp_ids = max(len(grp_ids) for grp_ids in self.gidx)
        pmapbytes = self.N * num_levels * max_num_gsims * max_num_grp_ids * 8
        if pmapbytes > TWO32:
            logging.warning(TOOBIG % (self.N, num_levels, max_num_gsims,
                                      max_num_grp_ids, humansize(pmapbytes)))
        logging.info(MAXMEMORY % (self.N, num_levels, max_num_gsims,
                                  max_num_grp_ids, humansize(pmapbytes)))

        self.Ns = len(self.csm.source_info)
        if self.oqparam.disagg_by_src:
            self.M = len(self.oqparam.imtls)
            self.L1 = num_levels // self.M
            sources = encode([src_id for src_id in self.csm.source_info])
            size, msg = get_array_nbytes(
                dict(N=self.N, R=self.R, M=self.M, L1=self.L1, Ns=self.Ns))
            if size > TWO32:
                raise RuntimeError(
                    'The matrix disagg_by_src is too large: %s' % msg)
            self.datastore.create_dset(
                'disagg_by_src', F32,
                (self.N, self.R, self.M, self.L1, self.Ns))
            self.datastore.set_shape_attrs('disagg_by_src',
                                           site_id=self.N,
                                           rlz_id=self.R,
                                           imt=list(self.oqparam.imtls),
                                           lvl=self.L1,
                                           src_id=sources)
        return zd
コード例 #6
0
    def full_disaggregation(self):
        """
        Run the disaggregation phase.
        """
        oq = self.oqparam
        tl = oq.truncation_level
        src_filter = self.src_filter()
        if hasattr(self, 'csm'):
            for sg in self.csm.src_groups:
                if sg.atomic:
                    raise NotImplementedError(
                        'Atomic groups are not supported yet')

        self.full_lt = self.datastore['full_lt']
        self.poes_disagg = oq.poes_disagg or (None,)
        self.imts = list(oq.imtls)

        self.ws = [rlz.weight for rlz in self.full_lt.get_realizations()]
        self.pgetter = getters.PmapGetter(
            self.datastore, self.ws, self.sitecol.sids)

        # build array rlzs (N, Z)
        if oq.rlz_index is None:
            Z = oq.num_rlzs_disagg
            rlzs = numpy.zeros((self.N, Z), int)
            if self.R > 1:
                for sid in self.sitecol.sids:
                    curves = numpy.array(
                        [pc.array for pc in self.pgetter.get_pcurves(sid)])
                    mean = getters.build_stat_curve(
                        curves, oq.imtls, stats.mean_curve, self.ws)
                    rlzs[sid] = util.closest_to_ref(curves, mean.array)[:Z]
                self.datastore['best_rlzs'] = rlzs
        else:
            Z = len(oq.rlz_index)
            rlzs = numpy.zeros((self.N, Z), int)
            for z in range(Z):
                rlzs[:, z] = oq.rlz_index[z]
        assert Z <= self.R, (Z, self.R)
        self.Z = Z
        self.rlzs = rlzs

        if oq.iml_disagg:
            # no hazard curves are needed
            self.poe_id = {None: 0}
            curves = [[None for z in range(Z)] for s in range(self.N)]
            self.ok_sites = set(self.sitecol.sids)
        else:
            self.poe_id = {poe: i for i, poe in enumerate(oq.poes_disagg)}
            curves = [self.get_curve(sid, rlzs[sid])
                      for sid in self.sitecol.sids]
            self.ok_sites = set(self.check_poes_disagg(curves, rlzs))
        self.iml4 = _iml4(rlzs, oq.iml_disagg, oq.imtls,
                          self.poes_disagg, curves)
        if oq.disagg_by_src:
            self.build_disagg_by_src(rlzs)

        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build trt_edges
        trts = tuple(self.full_lt.trts)
        trt_num = {trt: i for i, trt in enumerate(trts)}
        self.trts = trts

        # build mag_edges
        mags = [float(mag) for mag in self.datastore['source_mags']]
        mag_edges = oq.mag_bin_width * numpy.arange(
            int(numpy.floor(min(mags) / oq.mag_bin_width)),
            int(numpy.ceil(max(mags) / oq.mag_bin_width) + 1))

        # build dist_edges
        maxdist = max(oq.maximum_distance(trt) for trt in trts)
        dist_edges = oq.distance_bin_width * numpy.arange(
            0, int(numpy.ceil(maxdist / oq.distance_bin_width) + 1))

        # build eps_edges
        eps_edges = numpy.linspace(-tl, tl, oq.num_epsilon_bins + 1)

        # build lon_edges, lat_edges per sid
        bbs = src_filter.get_bounding_boxes(mag=max(mags))
        lon_edges, lat_edges = {}, {}  # by sid
        for sid, bb in zip(self.sitecol.sids, bbs):
            lon_edges[sid], lat_edges[sid] = disagg.lon_lat_bins(
                bb, oq.coordinate_bin_width)
        self.bin_edges = mag_edges, dist_edges, lon_edges, lat_edges, eps_edges
        shapedic = self.save_bin_edges()
        del shapedic['trt']
        shapedic['N'] = self.N
        shapedic['M'] = len(oq.imtls)
        shapedic['P'] = len(oq.poes_disagg)
        shapedic['Z'] = Z
        shapedic['concurrent_tasks'] = oq.concurrent_tasks
        nbytes, msg = get_array_nbytes(shapedic)
        if nbytes > oq.max_data_transfer:
            raise ValueError('Estimated data transfer too big\n%s' % msg)
        logging.info('Estimated data transfer: %s', msg)
        self.imldict = {}  # sid, rlz, poe, imt -> iml
        for s in self.sitecol.sids:
            for z, rlz in enumerate(rlzs[s]):
                for p, poe in enumerate(self.poes_disagg):
                    for m, imt in enumerate(oq.imtls):
                        self.imldict[s, rlz, poe, imt] = self.iml4[s, m, p, z]

        # submit #groups disaggregation tasks
        dstore = (self.datastore.parent if self.datastore.parent
                  else self.datastore)
        indices = get_indices(dstore, oq.concurrent_tasks or 1)
        self.datastore.swmr_on()
        smap = parallel.Starmap(compute_disagg, h5=self.datastore.hdf5)
        for grp_id, trt in self.full_lt.trt_by_grp.items():
            logging.info('Group #%d, sending rup_data for %s', grp_id, trt)
            trti = trt_num[trt]
            cmaker = ContextMaker(
                trt, self.full_lt.get_rlzs_by_gsim(grp_id),
                {'truncation_level': oq.truncation_level,
                 'maximum_distance': src_filter.integration_distance,
                 'filter_distance': oq.filter_distance, 'imtls': oq.imtls})
            for idxs in indices[grp_id]:
                smap.submit((dstore, idxs, cmaker, self.iml4, trti,
                             self.bin_edges))
        results = smap.reduce(self.agg_result, AccumDict(accum={}))
        return results  # sid -> trti-> 8D array