예제 #1
0
 def test_supertask(self):
     # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks
     # and 5 real outputs (one from the yield {})
     allargs = [('aaaaeeeeiii', ), ('uuuuaaaaeeeeiii', ),
                ('aaaaaaaaeeeeiii', ), ('aaaaeeeeiiiiiooooooo', )]
     numchars = sum(len(arg) for arg, in allargs)  # 61
     tmpdir = tempfile.mkdtemp()
     tmp = os.path.join(tmpdir, 'calc_1.hdf5')
     performance.init_performance(tmp, swmr=True)
     smap = parallel.Starmap(supertask, allargs, h5=hdf5.File(tmp, 'a'))
     res = smap.reduce()
     smap.h5.close()
     self.assertEqual(res, {'n': numchars})
     # check that the correct information is stored in the hdf5 file
     with hdf5.File(tmp, 'r') as h5:
         num = general.countby(h5['performance_data'][()], 'operation')
         self.assertEqual(num[b'waiting'], 4)
         self.assertEqual(num[b'total supertask'], 5)  # outputs
         self.assertEqual(num[b'total get_length'], 17)  # subtasks
         self.assertGreater(len(h5['task_info']), 0)
     shutil.rmtree(tmpdir)
예제 #2
0
 def sitecol(self):
     """
     Read the site collection from .filename and cache it
     """
     if 'sitecol' in vars(self):
         return self.__dict__['sitecol']
     if self.filename is None or not os.path.exists(self.filename):
         # case of nofilter/None sitecol
         return
     with hdf5.File(self.filename, 'r') as h5:
         self.__dict__['sitecol'] = sc = h5.get('sitecol')
     return sc
예제 #3
0
def convert_xml_hdf5(input_file, output_file):
    with hdf5.File(output_file, 'w') as out:
        inp = nrml.read(input_file)
        if inp['xmlns'].endswith('nrml/0.4'):  # old version
            d = os.path.dirname(input_file) or '.'
            raise ValueError('Please upgrade with `oq upgrade_nrml %s`' % d)
        elif inp['xmlns'].endswith('nrml/0.5'):  # current version
            sm = inp.sourceModel
        else:  # not a NRML
            raise ValueError('Unknown NRML:' % inp['xmlns'])
        out.save(node.node_to_dict(sm))
    return output_file
예제 #4
0
def hdf5new(datadir=None):
    """
    Return a new `hdf5.File by` instance with name determined by the last
    calculation in the datadir (plus one). Set the .path attribute to the
    generated filename.
    """
    datadir = datadir or get_datadir()
    calc_id = get_last_calc_id(datadir) + 1
    fname = os.path.join(datadir, 'calc_%d.hdf5' % calc_id)
    new = hdf5.File(fname, 'w')
    new.path = fname
    return new
예제 #5
0
 def open(self, mode):
     """
     Open the underlying .hdf5 file and the parent, if any
     """
     if self.hdf5 == ():  # not already open
         kw = dict(mode=mode, libver='latest')
         if mode == 'r':
             kw['swmr'] = True
         try:
             self.hdf5 = hdf5.File(self.hdf5path, **kw)
         except OSError as exc:
             raise OSError('%s in %s' % (exc, self.hdf5path))
예제 #6
0
 def test_supertask(self):
     # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks
     # and 18 outputs (1 output does not produce a subtask)
     allargs = [('aaaaeeeeiii',),
                ('uuuuaaaaeeeeiii',),
                ('aaaaaaaaeeeeiii',),
                ('aaaaeeeeiiiiiooooooo',)]
     numchars = sum(len(arg) for arg, in allargs)  # 61
     tmpdir = tempfile.mkdtemp()
     tmp = os.path.join(tmpdir, 'calc_1.hdf5')
     hdf5.File(tmp, 'w').close()  # the file must exist
     smap = parallel.Starmap(supertask, allargs, hdf5path=tmp)
     res = smap.reduce()
     self.assertEqual(res, {'n': numchars})
     # check that the correct information is stored in the hdf5 file
     with hdf5.File(tmp, 'r') as h5:
         num = general.countby(h5['performance_data'][()], 'operation')
         self.assertEqual(num[b'waiting'], 4)
         self.assertEqual(num[b'total supertask'], 5)  # outputs
         self.assertEqual(num[b'total get_length'], 17)  # subtasks
         self.assertGreater(len(h5['task_info/supertask']), 0)
     shutil.rmtree(tmpdir)
예제 #7
0
 def sitecol(self):
     """
     Read the site collection from .filename and cache it
     """
     if 'sitecol' in vars(self):
         return self.__dict__['sitecol']
     if self.filename is None:
         return
     elif not os.path.exists(self.filename):
         raise FileNotFoundError('%s: shared_dir issue?' % self.filename)
     with hdf5.File(self.filename, 'r') as h5:
         self.__dict__['sitecol'] = sc = h5.get('sitecol')
     return sc
예제 #8
0
 def test_supertask(self):
     # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks
     allargs = [('aaaaeeeeiii', ), ('uuuuaaaaeeeeiii', ),
                ('aaaaaaaaeeeeiii', ), ('aaaaeeeeiiiiiooooooo', )]
     numchars = sum(len(arg) for arg, in allargs)  # 61
     tmpdir = tempfile.mkdtemp()
     tmp = os.path.join(tmpdir, 'calc_1.hdf5')
     performance.init_performance(tmp)
     smap = parallel.Starmap(supertask, allargs, h5=hdf5.File(tmp, 'a'))
     res = smap.reduce()
     smap.h5.close()
     self.assertEqual(res, {'n': numchars})
     # check that the correct information is stored in the hdf5 file
     with hdf5.File(tmp, 'r') as h5:
         num = general.countby(h5['performance_data'][()], 'operation')
         self.assertEqual(num[b'waiting'], 4)
         self.assertEqual(num[b'total supertask'], 4)  # tasks
         self.assertEqual(num[b'total get_length'], 17)  # subtasks
         info = h5['task_info'][()]
         dic = dict(general.fast_agg3(info, 'taskname', ['received']))
         self.assertGreater(dic[b'get_length'], 0)
         self.assertGreater(dic[b'supertask'], 0)
     shutil.rmtree(tmpdir)
예제 #9
0
 def __init__(self, sitecol, integration_distance, hdf5path=None):
     if sitecol is not None and len(sitecol) < len(sitecol.complete):
         raise ValueError('%s is not complete!' % sitecol)
     self.hdf5path = hdf5path
     if hdf5path and (config.distribution.oq_distribute
                      in ('no', 'processpool')
                      or config.directory.shared_dir):  # store the sitecol
         with hdf5.File(hdf5path, 'w') as h5:
             h5['sitecol'] = sitecol
     else:  # keep the sitecol in memory
         self.__dict__['sitecol'] = sitecol
     self.integration_distance = (IntegrationDistance(integration_distance)
                                  if isinstance(integration_distance, dict)
                                  else integration_distance)
예제 #10
0
 def __init__(self, sitecol, integration_distance, filename=None):
     if sitecol is not None and len(sitecol) < len(sitecol.complete):
         raise ValueError('%s is not complete!' % sitecol)
     elif sitecol is None:
         integration_distance = {}
     self.filename = filename
     self.integration_distance = (IntegrationDistance(integration_distance)
                                  if isinstance(integration_distance, dict)
                                  else integration_distance)
     if filename and not os.path.exists(filename):  # store the sitecol
         with hdf5.File(filename, 'w') as h5:
             h5['sitecol'] = sitecol if sitecol else ()
     else:  # keep the sitecol in memory
         self.__dict__['sitecol'] = sitecol
예제 #11
0
def export_asset_loss_table(ekey, dstore):
    """
    Export in parallel the asset loss table from the datastore.

    NB1: for large calculation this may run out of memory
    NB2: due to an heisenbug in the parallel reading of .hdf5 files this works
    reliably only if the datastore has been created by a different process

    The recommendation is: *do not use this exporter*: rather, study its source
    code and write what you need. Every postprocessing is different.
    """
    key, fmt = ekey
    oq = dstore['oqparam']
    assetcol = dstore['assetcol']
    arefs = dstore['asset_refs'].value
    avals = assetcol.values()
    loss_types = dstore.get_attr('all_loss_ratios', 'loss_types').split()
    dtlist = [(lt, F32) for lt in loss_types]
    if oq.insured_losses:
        for lt in loss_types:
            dtlist.append((lt + '_ins', F32))
    lrs_dt = numpy.dtype([('rlzi', U16), ('losses', dtlist)])
    fname = dstore.export_path('%s.%s' % ekey)
    monitor = performance.Monitor(key, fname)
    lrgetter = riskinput.LossRatiosGetter(dstore)
    aids = range(len(assetcol))
    allargs = [(lrgetter, list(block), monitor)
               for block in split_in_blocks(aids, oq.concurrent_tasks)]
    dstore.close()  # avoid OSError: Can't read data (Wrong b-tree signature)
    L = len(loss_types)
    with hdf5.File(fname, 'w') as f:
        nbytes = 0
        total = numpy.zeros(len(dtlist), F32)
        for pairs in parallel.Starmap(get_loss_ratios, allargs):
            for aid, data in pairs:
                asset = assetcol[aid]
                avalue = avals[aid]
                for l, lt in enumerate(loss_types):
                    aval = avalue[lt]
                    for i in range(oq.insured_losses + 1):
                        data['ratios'][:, l + L * i] *= aval
                aref = arefs[asset.idx]
                f[b'asset_loss_table/' + aref] = data.view(lrs_dt)
                total += data['ratios'].sum(axis=0)
                nbytes += data.nbytes
        f['asset_loss_table'].attrs['loss_types'] = ' '.join(loss_types)
        f['asset_loss_table'].attrs['total'] = total
        f['asset_loss_table'].attrs['nbytes'] = nbytes
    return [fname]
예제 #12
0
def convert_xml_hdf5(input_file, output_file):
    with hdf5.File(output_file, 'w') as out:
        inp = nrml.read(input_file)
        if inp['xmlns'].endswith('nrml/0.4'):  # old version
            d = os.path.dirname(input_file) or '.'
            raise ValueError('Please upgrade with `oq upgrade_nrml %s`' % d)
        elif inp['xmlns'].endswith('nrml/0.5'):  # current version
            sm = inp.sourceModel
        else:  # not a NRML
            raise ValueError('Unknown NRML:' % inp['xmlns'])
        for group in sm:
            for src in group:  # make the trt implicit
                del src.attrib['tectonicRegion']
        out.save(node.node_to_dict(sm))
    return output_file
예제 #13
0
def to_python(fname, converter):
    """
    Convert a source model .hdf5 file into a :class:`SourceModel` object
    """
    with hdf5.File(fname, 'r') as f:
        source_model = f['/']
    for sg in source_model:
        for src in sg:
            if hasattr(src, 'mfd'):
                # multipoint source
                src.tom = converter.tom
                kwargs = getattr(src.mfd, 'kwargs', {})
                if 'bin_width' not in kwargs:
                    kwargs['bin_width'] = [converter.width_of_mfd_bin]
    return source_model
예제 #14
0
def convert_nonParametricSeismicSource(fname, node):
    """
    Convert the given node into a non parametric source object.

    :param fname:
        full pathname to the XML file associated to the node
    :param node:
        a Node object coming from an XML file
    :returns:
        a :class:`openquake.hazardlib.source.NonParametricSeismicSource`
        instance
    """
    trt = node.attrib.get('tectonicRegion')
    rups_weights = None
    if 'rup_weights' in node.attrib:
        rups_weights = F64(node['rup_weights'].split())
    nps = source.NonParametricSeismicSource(node['id'], node['name'], trt, [],
                                            [])
    nps.splittable = 'rup_weights' not in node.attrib
    path = os.path.splitext(fname)[0] + '.hdf5'
    hdf5_fname = path if os.path.exists(path) else None
    if hdf5_fname:
        # read the rupture data from the HDF5 file
        assert node.text is None, node.text
        with hdf5.File(hdf5_fname, 'r') as h:
            dic = {k: d[:] for k, d in h[node['id']].items()}
        nps.fromdict(dic, rups_weights)
        num_probs = len(dic['probs_occur'])
    else:
        # read the rupture data from the XML nodes
        num_probs = None
        for i, rupnode in enumerate(node):
            po = rupnode['probs_occur']
            probs = pmf.PMF(valid.pmf(po))
            if num_probs is None:  # first time
                num_probs = len(probs.data)
            elif len(probs.data) != num_probs:
                # probs_occur must have uniform length for all ruptures
                raise ValueError(
                    'prob_occurs=%s has %d elements, expected %s' %
                    (po, len(probs.data), num_probs))
            rup = RuptureConverter(5.).convert_node(rupnode)
            rup.tectonic_region_type = trt
            rup.weight = None if rups_weights is None else rups_weights[i]
            nps.data.append((rup, probs))
    nps.num_probs_occur = num_probs
    return nps
예제 #15
0
    def post_execute(self, pmap_by_grp_id):
        """
        Collect the hazard curves by realization and export them.

        :param pmap_by_grp_id:
            a dictionary grp_id -> hazard curves
        """
        oq = self.oqparam
        try:
            csm_info = self.csm.info
        except AttributeError:
            csm_info = self.datastore['csm_info']
        trt_by_grp = csm_info.grp_by("trt")
        grp_name = {
            grp.id: grp.name
            for sm in csm_info.source_models for grp in sm.src_groups
        }
        data = []
        with self.monitor('saving probability maps', autoflush=True):
            for grp_id, pmap in pmap_by_grp_id.items():
                if pmap:  # pmap can be missing if the group is filtered away
                    base.fix_ones(pmap)  # avoid saving PoEs == 1
                    trt = trt_by_grp[grp_id]
                    key = 'poes/grp-%02d' % grp_id
                    self.datastore[key] = pmap
                    self.datastore.set_attrs(key, trt=trt)
                    extreme = max(
                        get_extreme_poe(pmap[sid].array, oq.imtls)
                        for sid in pmap)
                    data.append((grp_id, grp_name[grp_id], extreme))
                    if 'rup' in set(self.datastore):
                        self.datastore.set_nbytes('rup/grp-%02d' % grp_id)
                        tot_ruptures = sum(
                            len(r) for r in self.datastore['rup'].values())
                        self.datastore.set_attrs('rup',
                                                 tot_ruptures=tot_ruptures)
        if oq.hazard_calculation_id is None and 'poes' in self.datastore:
            self.datastore.set_nbytes('poes')
            self.datastore['disagg_by_grp'] = numpy.array(
                sorted(data), grp_extreme_dt)

            # save a copy of the poes in hdf5cache
            with hdf5.File(self.hdf5cache) as cache:
                cache['oqparam'] = oq
                self.datastore.hdf5.copy('poes', cache)
            self.calc_stats(self.hdf5cache)
예제 #16
0
    def read_inputs(self):
        """
        Read risk data and sources if any
        """
        oq = self.oqparam
        self._read_risk_data()
        self.check_overflow()  # check if self.sitecol is too large

        if ('amplification' in oq.inputs
                and oq.amplification_method == 'kernel'):
            logging.info('Reading %s', oq.inputs['amplification'])
            df = readinput.get_amplification(oq)
            check_amplification(df, self.sitecol)
            self.af = AmplFunction.from_dframe(df)

        if getattr(self, 'sitecol', None):
            # can be None for the ruptures-only calculator
            with hdf5.File(self.datastore.tempname, 'w') as tmp:
                tmp['sitecol'] = self.sitecol
        elif (oq.calculation_mode == 'disaggregation'
              and oq.max_sites_disagg < len(self.sitecol)):
            raise ValueError('Please set max_sites_disagg=%d in %s' %
                             (len(self.sitecol), oq.inputs['job_ini']))
        elif oq.disagg_by_src and len(self.sitecol) > oq.max_sites_disagg:
            raise ValueError(
                'There are too many sites to use disagg_by_src=true')
        if ('source_model_logic_tree' in oq.inputs
                and oq.hazard_calculation_id is None):
            with self.monitor('composite source model', measuremem=True):
                self.csm = csm = readinput.get_composite_source_model(
                    oq, self.datastore.hdf5)
                srcs = [src for sg in csm.src_groups for src in sg]
                if not srcs:
                    raise RuntimeError('All sources were discarded!?')
                logging.info('Checking the sources bounding box')
                sids = self.src_filter().within_bbox(srcs)
                if len(sids) == 0:
                    raise RuntimeError('All sources were discarded!?')
                self.full_lt = csm.full_lt
        self.init()  # do this at the end of pre-execute

        if (not oq.hazard_calculation_id
                and oq.calculation_mode != 'preclassical'
                and not oq.save_disk_space):
            self.gzip_inputs()
예제 #17
0
    def init(self):
        """
        Read the poes and set the .data attribute with the hazard curves
        """
        if hasattr(self, '_pmap'):  # already initialized
            return self._pmap
        dstore = hdf5.File(self.filename, 'r')
        self.rlzs_by_g = dstore['rlzs_by_g'][()]

        # populate _pmap
        dset = dstore['_poes']  # NLG_
        L, G = dset.shape[1:]
        self._pmap = probability_map.ProbabilityMap.build(L, G, self.sids)
        for sid, array in zip(self.sids, dset[list(self.sids)]):
            self._pmap[sid].array = array
        self.nbytes = self._pmap.nbytes
        dstore.close()
        return self._pmap
예제 #18
0
 def gen_rupture_getters(self):
     """
     :returns: a list of RuptureGetters
     """
     dstore = (self.datastore.parent if self.datastore.parent
               else self.datastore)
     hdf5cache = dstore.hdf5cache()
     mode = 'r+' if os.path.exists(hdf5cache) else 'w'
     with hdf5.File(hdf5cache, mode) as cache:
         if 'ruptures' not in cache:
             dstore.hdf5.copy('ruptures', cache)
         if 'rupgeoms' not in cache:
             dstore.hdf5.copy('rupgeoms', cache)
     yield from gen_rupture_getters(
         dstore, concurrent_tasks=self.oqparam.concurrent_tasks or 1,
         hdf5cache=hdf5cache)
     if self.datastore.parent:
         self.datastore.parent.close()
예제 #19
0
    def post_execute(self, pmap_by_grp_id):
        """
        Collect the hazard curves by realization and export them.

        :param pmap_by_grp_id:
            a dictionary grp_id -> hazard curves
        """
        oq = self.oqparam
        grp_trt = self.csm_info.grp_by("trt")
        grp_source = self.csm_info.grp_by("name")
        if oq.disagg_by_src:
            src_name = {
                src.src_group_id: src.name
                for src in self.csm.get_sources()
            }
        data = []
        with self.monitor('saving probability maps', autoflush=True):
            for grp_id, pmap in pmap_by_grp_id.items():
                if pmap:  # pmap can be missing if the group is filtered away
                    fix_ones(pmap)  # avoid saving PoEs == 1
                    key = 'poes/grp-%02d' % grp_id
                    self.datastore[key] = pmap
                    self.datastore.set_attrs(key, trt=grp_trt[grp_id])
                    if oq.disagg_by_src:
                        data.append(
                            (grp_id, grp_source[grp_id], src_name[grp_id]))
        if oq.hazard_calculation_id is None and 'poes' in self.datastore:
            self.datastore.set_nbytes('poes')
            if oq.disagg_by_src and self.csm_info.get_num_rlzs() == 1:
                # this is useful for disaggregation, which is implemented
                # only for the case of a single realization
                self.datastore['disagg_by_src/source_id'] = numpy.array(
                    sorted(data), grp_source_dt)

            # save a copy of the poes in hdf5cache
            if hasattr(self, 'hdf5cache'):
                with hdf5.File(self.hdf5cache) as cache:
                    cache['oqparam'] = oq
                    self.datastore.hdf5.copy('poes', cache)
                self.calc_stats(self.hdf5cache)
            else:
                self.calc_stats(self.datastore)
        self.datastore.open('r+')
        self.save_hmaps()
예제 #20
0
    def init(self):
        """
        Read the poes and set the .data attribute with the hazard curves
        """
        if hasattr(self, '_pmap'):  # already initialized
            return self._pmap
        dstore = hdf5.File(self.filename, 'r')
        self.rlzs_by_g = dstore['rlzs_by_g'][()]

        # populate _pmap
        dset = dstore['_poes']  # GNL
        G, N, L = dset.shape
        self._pmap = probability_map.ProbabilityMap.build(L, G, self.sids)
        data = dset[:, self.sids, :]  # shape (G, N, L)
        for i, sid in enumerate(self.sids):
            self._pmap[sid].array = data[:, i, :].T  # shape (L, G)
        self.nbytes = self._pmap.nbytes
        dstore.close()
        return self._pmap
예제 #21
0
 def execute(self):
     oq = self.oqparam
     self.set_param(
         num_taxonomies=self.assetcol.num_taxonomies_by_site(),
         maxweight=oq.ebrisk_maxweight / (oq.concurrent_tasks or 1),
         epspath=cache_epsilons(self.datastore, oq, self.assetcol,
                                self.riskmodel, self.E))
     parent = self.datastore.parent
     if parent:
         hdf5path = parent.filename
         grp_indices = parent['ruptures'].attrs['grp_indices']
         nruptures = len(parent['ruptures'])
     else:
         hdf5path = self.datastore.hdf5cache()
         grp_indices = self.datastore['ruptures'].attrs['grp_indices']
         nruptures = len(self.datastore['ruptures'])
         with hdf5.File(hdf5path, 'r+') as cache:
             self.datastore.hdf5.copy('weights', cache)
             self.datastore.hdf5.copy('ruptures', cache)
             self.datastore.hdf5.copy('rupgeoms', cache)
     self.init_logic_tree(self.csm_info)
     smap = parallel.Starmap(self.core_task.__func__,
                             monitor=self.monitor())
     trt_by_grp = self.csm_info.grp_by("trt")
     samples = self.csm_info.get_samples_by_grp()
     rlzs_by_gsim_grp = self.csm_info.get_rlzs_by_gsim_grp()
     ruptures_per_block = numpy.ceil(nruptures / (oq.concurrent_tasks or 1))
     first_event = 0
     for grp_id, rlzs_by_gsim in rlzs_by_gsim_grp.items():
         start, stop = grp_indices[grp_id]
         for indices in general.block_splitter(range(start, stop),
                                               ruptures_per_block):
             rgetter = getters.RuptureGetter(hdf5path, list(indices),
                                             grp_id, trt_by_grp[grp_id],
                                             samples[grp_id], rlzs_by_gsim,
                                             first_event)
             first_event += rgetter.num_events
             smap.submit(rgetter, self.src_filter, self.param)
     self.events_per_sid = []
     self.gmf_nbytes = 0
     res = smap.reduce(self.agg_dicts, numpy.zeros(self.N))
     logging.info('Produced %s of GMFs', general.humansize(self.gmf_nbytes))
     return res
예제 #22
0
def get_assets_by_taxo(assets, tempname=None):
    """
    :param assets: an array of assets
    :param tempname: hdf5 file where the epsilons are (or None)
    :returns: assets_by_taxo with attributes eps and idxs
    """
    assets_by_taxo = AccumDict(group_array(assets, 'taxonomy'))
    assets_by_taxo.idxs = numpy.argsort(
        numpy.concatenate([a['ordinal'] for a in assets_by_taxo.values()]))
    assets_by_taxo.eps = {}
    if tempname is None:  # no epsilons
        return assets_by_taxo
    # otherwise read the epsilons and group them by taxonomy
    with hdf5.File(tempname, 'r') as h5:
        dset = h5['epsilon_matrix']
        for taxo, assets in assets_by_taxo.items():
            lst = [dset[aid] for aid in assets['ordinal']]
            assets_by_taxo.eps[taxo] = numpy.array(lst)
    return assets_by_taxo
예제 #23
0
 def init(self):
     """
     Build the probability curves from the underlying dataframes
     """
     if self._pmap:
         return self._pmap
     G = len(self.rlzs_by_g)
     with hdf5.File(self.filename) as dstore:
         for start, stop in self.slices:
             poes_df = dstore.read_df('_poes', slc=slice(start, stop))
             for sid, df in poes_df.groupby('sid'):
                 try:
                     array = self._pmap[sid].array
                 except KeyError:
                     array = numpy.zeros((self.L, G))
                     self._pmap[sid] = probability_map.ProbabilityCurve(
                         array)
                 array[df.lid, df.gid] = df.poe
     return self._pmap
예제 #24
0
def get_source_ids(oqparam):
    """
    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :returns:
        the complete set of source IDs found in all the source models
    """
    source_ids = set()
    for fname in oqparam.inputs['source']:
        if fname.endswith('.hdf5'):
            with hdf5.File(fname, 'r') as f:
                for sg in f['/']:
                    for src in sg:
                        source_ids.add(src.source_id)
        else:
            for sg in read_source_groups(fname):
                for src_node in sg:
                    source_ids.add(src_node['id'])
    return source_ids
예제 #25
0
def init_performance(hdf5file, swmr=False):
    """
    :param hdf5file: file name of hdf5.File instance
    """
    fname = isinstance(hdf5file, str)
    h5 = hdf5.File(hdf5file, 'a') if fname else hdf5file
    if 'performance_data' not in h5:
        hdf5.create(h5, 'performance_data', perf_dt)
    if 'task_info' not in h5:
        hdf5.create(h5, 'task_info', task_info_dt)
    if 'task_sent' not in h5:
        h5['task_sent'] = '{}'
    if swmr:
        try:
            h5.swmr_mode = True
        except ValueError as exc:
            raise ValueError('%s: %s' % (hdf5file, exc))
    if fname:
        h5.close()
예제 #26
0
def store_sm(smodel, filename, monitor):
    """
    :param smodel: a :class:`openquake.hazardlib.nrml.SourceModel` instance
    :param filename: path to an hdf5 file (cache_XXX.hdf5)
    :param monitor: a Monitor instance with an .hdf5 attribute
    """
    h5 = monitor.hdf5
    with monitor('store source model'):
        sources = h5['source_info']
        source_geom = h5['source_geom']
        gid = len(source_geom)
        for sg in smodel:
            if filename:
                with hdf5.File(filename, 'r+') as hdf5cache:
                    hdf5cache['grp-%02d' % sg.id] = sg
            srcs = []
            geoms = []
            for src in sg:
                srcgeom = src.geom()
                n = len(srcgeom)
                geom = numpy.zeros(n, point3d)
                geom['lon'], geom['lat'], geom['depth'] = srcgeom.T
                if len(geom) > 1:  # more than a point source
                    msg = 'source %s' % src.source_id
                    try:
                        geo.utils.check_extent(geom['lon'], geom['lat'], msg)
                    except ValueError as err:
                        logging.error(str(err))
                dic = {
                    k: v
                    for k, v in vars(src).items()
                    if k != 'id' and k != 'src_group_id'
                }
                src.checksum = zlib.adler32(pickle.dumps(dic))
                srcs.append((sg.id, src.source_id, src.code, gid, gid + n,
                             src.num_ruptures, 0, 0, 0, src.checksum))
                geoms.append(geom)
                gid += n
            if geoms:
                hdf5.extend(source_geom, numpy.concatenate(geoms))
            if sources:
                hdf5.extend(sources, numpy.array(srcs, source_info_dt))
예제 #27
0
 def get_rupture_getters(self):
     """
     :returns: a list of RuptureGetters
     """
     dstore = (self.datastore.parent
               if self.datastore.parent else self.datastore)
     hdf5cache = dstore.hdf5cache()
     with hdf5.File(hdf5cache, 'r+') as cache:
         if 'rupgeoms' not in cache:
             dstore.hdf5.copy('rupgeoms', cache)
     rgetters = get_rupture_getters(dstore,
                                    split=self.oqparam.concurrent_tasks,
                                    hdf5cache=hdf5cache)
     num_events = self.E if hasattr(self, 'E') else len(dstore['events'])
     num_ruptures = len(dstore['ruptures'])
     logging.info('Found {:,d} ruptures and {:,d} events'.format(
         num_ruptures, num_events))
     if self.datastore.parent:
         self.datastore.parent.close()
     return rgetters
예제 #28
0
 def flush(self, hdf5path):
     """
     Save the measurements on the performance file
     """
     if not self.children:
         data = self.get_data()
     else:
         lst = [self.get_data()]
         for child in self.children:
             lst.append(child.get_data())
             child.reset()
         data = numpy.concatenate(lst)
     if len(data) == 0:  # no information
         return
     elif not os.path.exists(hdf5path):
         with hdf5.File(hdf5path, 'w') as h5:
             hdf5.create(h5, 'performance_data', perf_dt)
             hdf5.create(h5, 'task_info', task_info_dt)
     hdf5.extend3(hdf5path, 'performance_data', data)
     self.reset()
예제 #29
0
    def test_from_sites(self):
        s1 = Site(location=Point(10, 20, 30),
                  vs30=1.2,
                  vs30measured=True,
                  z1pt0=3.4,
                  z2pt5=5.6,
                  backarc=True)
        s2 = Site(location=Point(-1.2, -3.4, -5.6),
                  vs30=55.4,
                  vs30measured=False,
                  z1pt0=66.7,
                  z2pt5=88.9,
                  backarc=False)
        cll = SiteCollection([s1, s2])
        self.assertTrue((cll.vs30 == [1.2, 55.4]).all())
        self.assertTrue((cll.vs30measured == [True, False]).all())
        self.assertTrue((cll.z1pt0 == [3.4, 66.7]).all())
        self.assertTrue((cll.z2pt5 == [5.6, 88.9]).all())
        self.assertTrue((cll.mesh.lons == [10, -1.2]).all())
        self.assertTrue((cll.mesh.lats == [20, -3.4]).all())
        self.assertTrue((cll.backarc == [True, False]).all())
        self.assertIs(cll.mesh.depths, None)
        for arr in (cll.vs30, cll.z1pt0, cll.z2pt5):
            self.assertIsInstance(arr, numpy.ndarray)
            self.assertEqual(arr.flags.writeable, False)
            self.assertEqual(arr.dtype, float)
        for arr in (cll.vs30measured, cll.backarc):
            self.assertIsInstance(arr, numpy.ndarray)
            self.assertEqual(arr.flags.writeable, False)
            self.assertEqual(arr.dtype, bool)
        self.assertEqual(len(cll), 2)

        # test serialization to hdf5
        fd, fpath = tempfile.mkstemp(suffix='.hdf5')
        os.close(fd)
        with hdf5.File(fpath, 'w') as f:
            f['folder'] = dict(sitecol=cll, b=[2, 3])
            newcll = f['folder/sitecol']
            self.assertEqual(newcll, cll)
            self.assertEqual(list(f['folder/b']), [2, 3])
        os.remove(fpath)
예제 #30
0
 def init(self):
     """
     Initialize the computers. Should be called on the workers
     """
     if hasattr(self, 'computers'):  # init already called
         return
     with hdf5.File(self.rupgetter.filename, 'r') as parent:
         self.weights = parent['weights'][()]
     self.computers = []
     for ebr in self.rupgetter.get_ruptures(self.srcfilter):
         sitecol = self.sitecol.filtered(ebr.sids)
         try:
             computer = calc.gmf.GmfComputer(
                 ebr, sitecol, self.oqparam.imtls, self.cmaker,
                 self.oqparam.truncation_level, self.correl_model)
         except FarAwayRupture:
             # due to numeric errors, ruptures within the maximum_distance
             # when written, can be outside when read; I found a case with
             # a distance of 99.9996936 km over a maximum distance of 100 km
             continue
         self.computers.append(computer)