Beispiel #1
0
def save_source_info(csm, h5):
    data = {}  # src_id -> row
    wkts = []
    lens = []
    for sg in csm.src_groups:
        for src in sg:
            lens.append(len(src.trt_smrs))
            row = [
                src.source_id, src.grp_id, src.code, 0, 0, 0,
                csm.full_lt.trti[src.tectonic_region_type], 0
            ]
            wkts.append(src._wkt)
            data[src.id] = row
    logging.info('There are %d groups and %d sources with len(trt_smrs)=%.2f',
                 len(csm.src_groups), sum(len(sg) for sg in csm.src_groups),
                 numpy.mean(lens))
    csm.source_info = data  # src_id -> row
    if h5:
        attrs = dict(atomic=any(grp.atomic for grp in csm.src_groups))
        # avoid hdf5 damned bug by creating source_info in advance
        hdf5.create(h5, 'source_info', source_info_dt, attrs=attrs)
        h5['source_wkt'] = numpy.array(wkts, hdf5.vstr)
        h5['trt_smrs'] = csm.get_trt_smrs()
        h5['toms'] = numpy.array([get_tom_name(sg) for sg in csm.src_groups],
                                 hdf5.vstr)
Beispiel #2
0
 def __init__(self, task_func, task_args=(), monitor=None, distribute=None,
              progress=logging.info):
     self.__class__.init(distribute=distribute or OQ_DISTRIBUTE)
     self.task_func = task_func
     self.monitor = monitor or Monitor(task_func.__name__)
     self.calc_id = getattr(self.monitor, 'calc_id', None)
     self.name = self.monitor.operation or task_func.__name__
     self.task_args = task_args
     self.distribute = distribute or oq_distribute(task_func)
     self.progress = progress
     try:
         self.num_tasks = len(self.task_args)
     except TypeError:  # generators have no len
         self.num_tasks = None
     # a task can be a function, a class or an instance with a __call__
     if inspect.isfunction(task_func):
         self.argnames = inspect.getfullargspec(task_func).args
     elif inspect.isclass(task_func):
         self.argnames = inspect.getfullargspec(task_func.__init__).args[1:]
     else:  # instance with a __call__ method
         self.argnames = inspect.getfullargspec(task_func.__call__).args[1:]
     self.receiver = 'tcp://%s:%s' % (
         config.dbserver.listen, config.dbserver.receiver_ports)
     self.sent = numpy.zeros(len(self.argnames) - 1)
     self.monitor.backurl = None  # overridden later
     self.tasks = []  # populated by .submit
     h5 = self.monitor.hdf5
     task_info = 'task_info/' + self.name
     if h5 and task_info not in h5:  # first time
         # task_info and performance_data should be generated in advance
         hdf5.create(h5, task_info, task_info_dt)
     if h5 and 'performance_data' not in h5:
         hdf5.create(h5, 'performance_data', perf_dt)
Beispiel #3
0
    def _genargs(self, backurl=None, pickle=True):
        """
        Add .task_no and .weight to the monitor and yield back
        the arguments by pickling them.
        """
        task_info = 'task_info/' + self.name
        for task_no, args in enumerate(self.task_args, 1):
            mon = args[-1]
            assert isinstance(mon, Monitor), mon
            if mon.hdf5 and task_no == 1:
                self.hdf5 = mon.hdf5
                if task_info not in self.hdf5:  # first time
                    # task_info performance_data should be generated in advance
                    hdf5.create(mon.hdf5, task_info, task_data_dt)
                if 'performance_data' not in self.hdf5:
                    hdf5.create(mon.hdf5, 'performance_data', perf_dt)

            # add incremental task number and task weight
            mon.task_no = task_no
            mon.weight = getattr(args[0], 'weight', 1.)
            mon.backurl = backurl
            self.calc_id = getattr(mon, 'calc_id', None)
            if pickle:
                args = pickle_sequence(args)
                self.sent += numpy.array([len(p) for p in args])
            yield args
Beispiel #4
0
def get_composite_source_model(oqparam, full_lt=None, h5=None):
    """
    Parse the XML and build a complete composite source model in memory.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param full_lt:
        a :class:`openquake.commonlib.logictree.FullLogicTree` or None
    :param h5:
         an open hdf5.File where to store the source info
    """
    if full_lt is None:
        full_lt = get_full_lt(oqparam)
    csm = get_csm(oqparam, full_lt, h5)
    grp_ids = csm.get_grp_ids()
    gidx = {tuple(arr): i for i, arr in enumerate(grp_ids)}
    if oqparam.is_event_based():
        csm.init_serials(oqparam.ses_seed)
    data = {}  # src_id -> row
    mags = AccumDict(accum=set())  # trt -> mags
    wkts = []
    ns = 0
    for sg in csm.src_groups:
        if hasattr(sg, 'mags'):  # UCERF
            mags[sg.trt].update('%.2f' % mag for mag in sg.mags)
        for src in sg:
            ns += 1
            if src.source_id in data:
                num_sources = data[src.source_id][3] + 1
            else:
                num_sources = 1
            row = [
                src.source_id, gidx[tuple(src.grp_ids)], src.code, num_sources,
                0, 0, 0, src.checksum, src.serial
            ]
            wkts.append(src._wkt)  # this is a bit slow but okay
            data[src.source_id] = row
            if hasattr(src, 'mags'):  # UCERF
                continue  # already accounted for in sg.mags
            elif hasattr(src, 'data'):  # nonparametric
                srcmags = ['%.2f' % item[0].mag for item in src.data]
            else:
                srcmags = [
                    '%.2f' % item[0]
                    for item in src.get_annual_occurrence_rates()
                ]
            mags[sg.trt].update(srcmags)

    logging.info('There are %d sources with %d unique IDs', ns, len(data))
    if h5:
        hdf5.create(h5, 'source_info', source_info_dt)  # avoid hdf5 damned bug
        h5['source_wkt'] = numpy.array(wkts, hdf5.vstr)
        for trt in mags:
            h5['source_mags/' + trt] = numpy.array(sorted(mags[trt]))
        h5['grp_ids'] = grp_ids
    csm.gsim_lt.check_imts(oqparam.imtls)
    csm.source_info = data
    if os.environ.get('OQ_CHECK_INPUT'):
        source.check_complex_faults(csm.get_sources())
    return csm
Beispiel #5
0
def get_composite_source_model(oqparam, h5=None):
    """
    Parse the XML and build a complete composite source model in memory.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param h5:
         an open hdf5.File where to store the source info
    """
    full_lt = get_full_lt(oqparam)
    if oqparam.csm_cache and not oqparam.is_ucerf():
        csm = _get_csm_cached(oqparam, full_lt, h5)
    else:
        csm = get_csm(oqparam, full_lt, h5)
    grp_ids = csm.get_grp_ids()
    gidx = {tuple(arr): i for i, arr in enumerate(grp_ids)}
    if oqparam.is_event_based():
        csm.init_serials(oqparam.ses_seed)
    data = {}  # src_id -> row
    mags = AccumDict(accum=set())  # trt -> mags
    wkts = []
    ns = -1
    for sg in csm.src_groups:
        if hasattr(sg, 'mags'):  # UCERF
            mags[sg.trt].update('%.2f' % mag for mag in sg.mags)
        for src in sg:
            if src.source_id in data:
                multiplicity = data[src.source_id][MULTIPLICITY] + 1
            else:
                multiplicity = 1
                ns += 1
            src.gidx = gidx[tuple(src.grp_ids)]
            row = [src.source_id, src.gidx, src.code,
                   multiplicity, 0, 0, 0, src.checksum, src.serial or ns,
                   full_lt.trti[src.tectonic_region_type]]
            wkts.append(src._wkt)  # this is a bit slow but okay
            data[src.source_id] = row
            if hasattr(src, 'mags'):  # UCERF
                continue  # already accounted for in sg.mags
            elif hasattr(src, 'data'):  # nonparametric
                srcmags = ['%.2f' % item[0].mag for item in src.data]
            else:
                srcmags = ['%.2f' % item[0] for item in
                           src.get_annual_occurrence_rates()]
            mags[sg.trt].update(srcmags)
    logging.info('There are %d sources', ns + 1)
    if h5:
        attrs = dict(atomic=any(grp.atomic for grp in csm.src_groups))
        # avoid hdf5 damned bug by creating source_info in advance
        hdf5.create(h5, 'source_info', source_info_dt, attrs=attrs)
        h5['source_wkt'] = numpy.array(wkts, hdf5.vstr)
        for trt in mags:
            h5['source_mags/' + trt] = numpy.array(sorted(mags[trt]))
        h5['grp_ids'] = grp_ids
    csm.gsim_lt.check_imts(oqparam.imtls)
    csm.source_info = data  # src_id -> row
    if os.environ.get('OQ_CHECK_INPUT'):
        source.check_complex_faults(csm.get_sources())
    return csm
Beispiel #6
0
    def flush(self):
        """
        Save the measurements on the performance file (or on stdout)
        """
        for child in self.children:
            child.flush()
        data = self.get_data()
        if len(data) == 0:  # no information
            return []

        # reset monitor
        self.duration = 0
        self.mem = 0
        self.counts = 0

        if self.hdf5path:
            h5 = h5py.File(self.hdf5path)
            try:
                pdata = h5['performance_data']
            except KeyError:
                pdata = hdf5.create(h5, 'performance_data', perf_dt)
            hdf5.extend(pdata, data)
            h5.close()
        # else print(data[0]) on stdout

        return data
Beispiel #7
0
def get_composite_source_model(oqparam, h5=None):
    """
    Parse the XML and build a complete composite source model in memory.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param h5:
         an open hdf5.File where to store the source info
    """
    ucerf = oqparam.calculation_mode.startswith('ucerf')
    source_model_lt = get_source_model_lt(oqparam, validate=not ucerf)
    trts = source_model_lt.tectonic_region_types
    trts_lower = {trt.lower() for trt in trts}
    reqv = oqparam.inputs.get('reqv', {})
    for trt in reqv:
        if trt.lower() not in trts_lower:
            raise ValueError('Unknown TRT=%s in %s [reqv]' %
                             (trt, oqparam.inputs['job_ini']))
    gsim_lt = get_gsim_lt(oqparam, trts or ['*'])
    p = source_model_lt.num_paths * gsim_lt.get_num_paths()
    if oqparam.number_of_logic_tree_samples:
        logging.info('Considering {:_d} logic tree paths out of {:_d}'.format(
            oqparam.number_of_logic_tree_samples, p))
    else:  # full enumeration
        if (oqparam.is_event_based() and
            (oqparam.ground_motion_fields or oqparam.hazard_curves_from_gmfs)
                and p > oqparam.max_potential_paths):
            raise ValueError(
                'There are too many potential logic tree paths (%d):'
                'use sampling instead of full enumeration or reduce the '
                'source model with oq reduce_sm' % p)
        logging.info('Potential number of logic tree paths = {:_d}'.format(p))

    if source_model_lt.on_each_source:
        logging.info('There is a logic tree on each source')
    ltmodels = get_ltmodels(oqparam, gsim_lt, source_model_lt, h5)
    csm = source.CompositeSourceModel(gsim_lt, source_model_lt, ltmodels)
    key = operator.attrgetter('source_id', 'checksum')
    srcidx = 0
    if h5:
        info = hdf5.create(h5, 'source_info', source_info_dt)
    data = []
    for k, srcs in groupby(csm.get_sources(), key).items():
        for src in srcs:
            src.id = srcidx
        data.append((0, src.src_group_ids[0], src.source_id, src.code,
                     src.num_ruptures, 0, 0, 0, src.checksum, src._wkt))
        srcidx += 1
    if h5:
        hdf5.extend(info, numpy.array(data, source_info_dt))
    if oqparam.is_event_based():
        # initialize the rupture rup_id numbers before splitting/filtering; in
        # this way the serials are independent from the site collection
        csm.init_serials(oqparam.ses_seed)

    if oqparam.disagg_by_src:
        csm = csm.grp_by_src()  # one group per source

    csm.info.gsim_lt.check_imts(oqparam.imtls)
    return csm
Beispiel #8
0
    def create_dframe(self, key, nametypes, compression=None, **kw):
        """
        Create a HDF5 datagroup readable as a pandas DataFrame

        :param key: name of the dataset
        :param nametypes: list of pairs (name, dtype) or (name, array)
        :param compression: the kind of HDF5 compression to use
        :param kw: attributes to add
        """
        names = []
        for name, value in nametypes:
            is_array = isinstance(value, numpy.ndarray)
            if is_array:
                dt = value.dtype
            else:
                dt = value
            dset = hdf5.create(self.hdf5, f'{key}/{name}', dt, (None, ),
                               compression)
            if is_array:
                hdf5.extend(dset, value)
            names.append(name)
        attrs = self.hdf5[key].attrs
        attrs['__pdcolumns__'] = ' '.join(names)
        for k, v in kw.items():
            attrs[k] = v
Beispiel #9
0
def get_composite_source_model(oqparam, h5=None):
    """
    Parse the XML and build a complete composite source model in memory.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param h5:
         an open hdf5.File where to store the source info
    """
    logging.info('Reading the CompositeSourceModel')
    full_lt = get_full_lt(oqparam)
    if oqparam.cachedir and not oqparam.is_ucerf():
        csm = _get_cachedir(oqparam, full_lt, h5)
    else:
        csm = get_csm(oqparam, full_lt, h5)
    et_ids = csm.get_et_ids()
    logging.info('%d effective smlt realization(s)', len(full_lt.sm_rlzs))
    data = {}  # src_id -> row
    mags_by_trt = csm.get_mags_by_trt()
    wkts = []
    lens = []
    for sg in csm.src_groups:
        for src in sg:
            lens.append(len(src.et_ids))
            row = [
                src.source_id, src.grp_id, src.code, 0, 0, 0,
                full_lt.trti[src.tectonic_region_type], 0
            ]
            wkts.append(src._wkt)
            data[src.id] = row
    logging.info('There are %d groups and %d sources with len(et_ids)=%.2f',
                 len(csm.src_groups), sum(len(sg) for sg in csm.src_groups),
                 numpy.mean(lens))
    if h5:
        attrs = dict(atomic=any(grp.atomic for grp in csm.src_groups))
        # avoid hdf5 damned bug by creating source_info in advance
        hdf5.create(h5, 'source_info', source_info_dt, attrs=attrs)
        h5['source_wkt'] = numpy.array(wkts, hdf5.vstr)
        h5['et_ids'] = et_ids
        for trt in mags_by_trt:
            h5['source_mags/' + trt] = numpy.array(mags_by_trt[trt])
        oqparam.maximum_distance.interp(mags_by_trt)
    csm.gsim_lt.check_imts(oqparam.imtls)
    csm.source_info = data  # src_id -> row
    if os.environ.get('OQ_CHECK_INPUT'):
        source.check_complex_faults(csm.get_sources())
    return csm
Beispiel #10
0
def get_ltmodels(oq, gsim_lt, source_model_lt, h5=None):
    """
    Build source models from the logic tree and to store
    them inside the `source_info` dataset.
    """
    if oq.pointsource_distance['default'] == {}:
        spinning_off = False
    else:
        spinning_off = sum(oq.pointsource_distance.values()) == 0
    if spinning_off:
        logging.info('Removing nodal plane and hypocenter distributions')
    # NB: the source models file are often NOT in the shared directory
    # (for instance in oq-engine/demos) so the processpool must be used
    dist = ('no' if os.environ.get('OQ_DISTRIBUTE') == 'no'
            else 'processpool')
    smlt_dir = os.path.dirname(source_model_lt.filename)
    converter = sourceconverter.SourceConverter(
        oq.investigation_time, oq.rupture_mesh_spacing,
        oq.complex_fault_mesh_spacing, oq.width_of_mfd_bin,
        oq.area_source_discretization, oq.minimum_magnitude,
        not spinning_off, oq.source_id)
    if h5:
        sources = hdf5.create(h5, 'source_info', source_info_dt)
    lt_models = list(source_model_lt.gen_source_models(gsim_lt))
    if oq.calculation_mode.startswith('ucerf'):
        idx = 0
        [grp] = nrml.to_python(oq.inputs["source_model"], converter)
        for grp_id, ltm in enumerate(lt_models):
            sg = copy.copy(grp)
            sg.id = grp_id
            ltm.src_groups = [sg]
            src = sg[0].new(ltm.ordinal, ltm.names)  # one source
            src.src_group_id = grp_id
            src.id = idx
            idx += 1
            if oq.number_of_logic_tree_samples:
                src.samples = ltm.samples
            sg.sources = [src]
            data = [((grp_id, grp_id, src.source_id, src.code,
                      0, 0, -1, src.num_ruptures, 0, ''))]
            hdf5.extend(sources, numpy.array(data, source_info_dt))
        return lt_models

    logging.info('Reading the source model(s) in parallel')
    allargs = []
    fileno = 0
    for ltm in lt_models:
        apply_unc = functools.partial(
            source_model_lt.apply_uncertainties, ltm.path)
        for name in ltm.names.split():
            fname = os.path.abspath(os.path.join(smlt_dir, name))
            allargs.append((ltm, apply_unc, fname, fileno))
            fileno += 1
    smap = parallel.Starmap(
        SourceReader(converter, smlt_dir, h5),
        allargs, distribute=dist, h5=h5 if h5 else None)
    # NB: h5 is None in logictree_test.py
    return _store_results(smap, lt_models, source_model_lt, gsim_lt, oq, h5)
Beispiel #11
0
def init_performance(hdf5file, swmr=False):
    """
    :param hdf5file: file name of hdf5.File instance
    """
    fname = isinstance(hdf5file, str)
    h5 = hdf5.File(hdf5file, 'a') if fname else hdf5file
    if 'performance_data' not in h5:
        hdf5.create(h5, 'performance_data', perf_dt)
    if 'task_info' not in h5:
        hdf5.create(h5, 'task_info', task_info_dt)
    if 'task_sent' not in h5:
        h5['task_sent'] = '{}'
    if swmr:
        try:
            h5.swmr_mode = True
        except ValueError as exc:
            raise ValueError('%s: %s' % (hdf5file, exc))
    if fname:
        h5.close()
Beispiel #12
0
def dump(temppath, perspath):
    """
    Dump the performance info into a persistent file,
    then remove the temporary file.

    :param temppath: the temporary file
    :param perspath: the persistent file
    """
    with hdf5.File(temppath, 'r') as h, hdf5.File(perspath, 'r+') as h5:
        if 'performance_data' not in h5:
            hdf5.create(h5, 'performance_data', perf_dt)
        if 'task_info' not in h5:
            hdf5.create(h5, 'task_info', task_info_dt)
        hdf5.extend(h5['performance_data'], h['performance_data'][()])
        hdf5.extend(h5['task_info'], h['task_info'][()])
        for k, v in h['task_info'].attrs.items():
            h5['task_info'].attrs[k] = v

    os.remove(temppath)
Beispiel #13
0
 def flush(self, hdf5path):
     """
     Save the measurements on the performance file
     """
     if not self.children:
         data = self.get_data()
     else:
         lst = [self.get_data()]
         for child in self.children:
             lst.append(child.get_data())
             child.reset()
         data = numpy.concatenate(lst)
     if len(data) == 0:  # no information
         return
     elif not os.path.exists(hdf5path):
         with hdf5.File(hdf5path, 'w') as h5:
             hdf5.create(h5, 'performance_data', perf_dt)
             hdf5.create(h5, 'task_info', task_info_dt)
     hdf5.extend3(hdf5path, 'performance_data', data)
     self.reset()
Beispiel #14
0
def get_composite_source_model(oqparam, h5=None):
    """
    Parse the XML and build a complete composite source model in memory.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param h5:
         an open hdf5.File where to store the source info
    """
    # first read the logic tree
    full_lt = get_full_lt(oqparam)

    # then read the composite source model from the cache if possible
    if oqparam.cachedir and not os.path.exists(oqparam.cachedir):
        os.makedirs(oqparam.cachedir)
    if oqparam.cachedir and not oqparam.is_ucerf():
        # for UCERF pickling the csm is slower
        checksum = get_checksum32(oqparam, h5)
        fname = os.path.join(oqparam.cachedir, 'csm_%s.pik' % checksum)
        if os.path.exists(fname):
            logging.info('Reading %s', fname)
            with open(fname, 'rb') as f:
                csm = pickle.load(f)
                csm.full_lt = full_lt
            if h5:
                # avoid errors with --reuse_hazard
                h5['et_ids'] = csm.get_et_ids()
                hdf5.create(h5, 'source_info', source_info_dt)
            _check_csm(csm, oqparam, h5)
            return csm

    # read and process the composite source model from the input files
    csm = get_csm(oqparam, full_lt,  h5)
    save_source_info(csm, h5)
    if oqparam.cachedir and not oqparam.is_ucerf():
        logging.info('Saving %s', fname)
        with open(fname, 'wb') as f:
            pickle.dump(csm, f)

    _check_csm(csm, oqparam, h5)
    return csm
Beispiel #15
0
    def create_dset(self, key, dtype, shape=(None,), compression=None,
                    fillvalue=0, attrs=None):
        """
        Create a one-dimensional HDF5 dataset.

        :param key: name of the dataset
        :param dtype: dtype of the dataset (usually composite)
        :param shape: shape of the dataset, possibly extendable
        :param compression: the kind of HDF5 compression to use
        :param attrs: dictionary of attributes of the dataset
        :returns: a HDF5 dataset
        """
        return hdf5.create(
            self.hdf5, key, dtype, shape, compression, fillvalue, attrs)
Beispiel #16
0
    def extend(self, key, array):
        """
        Extend the dataset associated to the given key; create it if needed

        :param key: name of the dataset
        :param array: array to store
        """
        try:
            dset = self.hdf5[key]
        except KeyError:
            dset = hdf5.create(self.hdf5, key, array.dtype,
                               shape=(None,) + array.shape[1:])
        hdf5.extend(dset, array)
        return dset
Beispiel #17
0
    def create_dset(self, key, dtype, shape=(None,), compression=None,
                    fillvalue=0, attrs=None):
        """
        Create a one-dimensional HDF5 dataset.

        :param key: name of the dataset
        :param dtype: dtype of the dataset (usually composite)
        :param shape: shape of the dataset, possibly extendable
        :param compression: the kind of HDF5 compression to use
        :param attrs: dictionary of attributes of the dataset
        :returns: a HDF5 dataset
        """
        return hdf5.create(
            self.hdf5, key, dtype, shape, compression, fillvalue, attrs)
Beispiel #18
0
    def extend(self, key, array, **attrs):
        """
        Extend the dataset associated to the given key; create it if needed

        :param key: name of the dataset
        :param array: array to store
        :param attrs: a dictionary of attributes
        """
        try:
            dset = self.hdf5[key]
        except KeyError:
            dset = hdf5.create(self.hdf5, key, array.dtype,
                               shape=(None,) + array.shape[1:])
        hdf5.extend(dset, array)
        for k, v in attrs.items():
            dset.attrs[k] = v
        return dset
Beispiel #19
0
    def extend(self, key, array, **attrs):
        """
        Extend the dataset associated to the given key; create it if needed

        :param key: name of the dataset
        :param array: array to store
        :param attrs: a dictionary of attributes
        """
        try:
            dset = self.hdf5[key]
        except KeyError:
            dset = hdf5.create(self.hdf5, key, array.dtype,
                               shape=(None,) + array.shape[1:])
        hdf5.extend(dset, array)
        for k, v in attrs.items():
            dset.attrs[k] = v
        return dset
Beispiel #20
0
def get_composite_source_model(oqparam, h5=None):
    """
    Parse the XML and build a complete composite source model in memory.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param h5:
         an open hdf5.File where to store the source info
    """
    full_lt = get_full_lt(oqparam)
    if oqparam.cachedir and not oqparam.is_ucerf():
        csm = _get_cachedir(oqparam, full_lt, h5)
    else:
        csm = get_csm(oqparam, full_lt, h5)
    et_ids = csm.get_et_ids()
    logging.info('%d effective smlt realization(s)', len(full_lt.sm_rlzs))
    grp_id = {tuple(arr): i for i, arr in enumerate(et_ids)}
    data = {}  # src_id -> row
    mags = AccumDict(accum=set())  # trt -> mags
    wkts = []
    lens = []
    for sg in csm.src_groups:
        if hasattr(sg, 'mags'):  # UCERF
            mags[sg.trt].update('%.2f' % mag for mag in sg.mags)
        for src in sg:
            lens.append(len(src.et_ids))
            src.grp_id = grp_id[tuple(src.et_ids)]
            row = [
                src.source_id, src.grp_id, src.code, 0, 0, 0, src.id,
                full_lt.trti[src.tectonic_region_type]
            ]
            wkts.append(src._wkt)  # this is a bit slow but okay
            data[src.source_id] = row
            if hasattr(src, 'mags'):  # UCERF
                continue  # already accounted for in sg.mags
            elif hasattr(src, 'data'):  # nonparametric
                srcmags = ['%.2f' % item[0].mag for item in src.data]
            else:
                srcmags = [
                    '%.2f' % item[0]
                    for item in src.get_annual_occurrence_rates()
                ]
            mags[sg.trt].update(srcmags)
    logging.info('There are %d groups and %d sources with len(et_ids)=%.1f',
                 len(csm.src_groups), sum(len(sg) for sg in csm.src_groups),
                 numpy.mean(lens))
    if h5:
        attrs = dict(atomic=any(grp.atomic for grp in csm.src_groups))
        # avoid hdf5 damned bug by creating source_info in advance
        hdf5.create(h5, 'source_info', source_info_dt, attrs=attrs)
        h5['source_wkt'] = numpy.array(wkts, hdf5.vstr)
        h5['et_ids'] = et_ids
        mags_by_trt = {}
        for trt in mags:
            mags_by_trt[trt] = arr = numpy.array(sorted(mags[trt]))
            h5['source_mags/' + trt] = arr
        oqparam.maximum_distance.interp(mags_by_trt)
    csm.gsim_lt.check_imts(oqparam.imtls)
    csm.source_info = data  # src_id -> row
    if os.environ.get('OQ_CHECK_INPUT'):
        source.check_complex_faults(csm.get_sources())
    return csm
Beispiel #21
0
def get_source_models(oqparam,
                      gsim_lt,
                      source_model_lt,
                      monitor,
                      in_memory=True):
    """
    Build all the source models generated by the logic tree.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param gsim_lt:
        a :class:`openquake.commonlib.logictree.GsimLogicTree` instance
    :param source_model_lt:
        a :class:`openquake.commonlib.logictree.SourceModelLogicTree` instance
    :param monitor:
        a `openquake.baselib.performance.Monitor` instance
    :param in_memory:
        if True, keep in memory the sources, else just collect the TRTs
    :returns:
        an iterator over :class:`openquake.commonlib.logictree.LtSourceModel`
        tuples
    """
    make_sm = SourceModelFactory()
    converter = sourceconverter.SourceConverter(
        oqparam.investigation_time, oqparam.rupture_mesh_spacing,
        oqparam.complex_fault_mesh_spacing, oqparam.width_of_mfd_bin,
        oqparam.area_source_discretization, oqparam.source_id)
    if oqparam.calculation_mode.startswith('ucerf'):
        [grp] = nrml.to_python(oqparam.inputs["source_model"], converter)
    elif in_memory:
        logging.info('Reading the source model(s)')
        dic = logictree.parallel_read_source_models(gsim_lt, source_model_lt,
                                                    converter, monitor)

    # consider only the effective realizations
    smlt_dir = os.path.dirname(source_model_lt.filename)
    idx = 0
    grp_id = 0
    if monitor.hdf5:
        sources = hdf5.create(monitor.hdf5, 'source_info', source_info_dt)
        hdf5.create(monitor.hdf5, 'source_geom', point3d)
    for sm in source_model_lt.gen_source_models(gsim_lt):
        src_groups = []
        for name in sm.names.split():
            fname = os.path.abspath(os.path.join(smlt_dir, name))
            if oqparam.calculation_mode.startswith('ucerf'):
                sg = copy.copy(grp)
                sg.id = grp_id
                src = sg[0].new(sm.ordinal, sm.names)  # one source
                src.id = idx
                sg.sources = [src]
                src_groups.append(sg)
                idx += 1
                grp_id += 1
                data = [((sg.id, src.source_id, src.code, 0, 0,
                          src.num_ruptures, 0, 0, 0, 0, 0))]
                hdf5.extend(sources, numpy.array(data, source_info_dt))
            elif in_memory:
                apply_unc = source_model_lt.make_apply_uncertainties(sm.path)
                newsm = make_sm(fname, dic[fname], apply_unc,
                                oqparam.investigation_time)
                for sg in newsm:
                    for src in sg:
                        src.src_group_id = grp_id
                        src.id = idx
                        idx += 1
                    sg.id = grp_id
                    grp_id += 1
                if monitor.hdf5:
                    store_sm(newsm, monitor.hdf5)
                src_groups.extend(newsm.src_groups)
            else:  # just collect the TRT models
                src_groups.extend(logictree.read_source_groups(fname))

        if grp_id >= TWO16:
            # the limit is really needed only for event based calculations
            raise ValueError('There is a limit of %d src groups!' % TWO16)

        num_sources = sum(len(sg.sources) for sg in src_groups)
        sm.src_groups = src_groups
        trts = [mod.trt for mod in src_groups]
        source_model_lt.tectonic_region_types.update(trts)
        logging.info(
            'Processed source model %d with %d potential gsim path(s) and %d '
            'sources', sm.ordinal + 1, sm.num_gsim_paths, num_sources)

        gsim_file = oqparam.inputs.get('gsim_logic_tree')
        if gsim_file:  # check TRTs
            for src_group in src_groups:
                if src_group.trt not in gsim_lt.values:
                    raise ValueError(
                        "Found in %r a tectonic region type %r inconsistent "
                        "with the ones in %r" % (sm, src_group.trt, gsim_file))
        yield sm

    # log if some source file is being used more than once
    dupl = 0
    for fname, hits in make_sm.fname_hits.items():
        if hits > 1:
            logging.info('%s has been considered %d times', fname, hits)
            if not make_sm.changed_sources:
                dupl += hits
    if (dupl and not oqparam.optimize_same_id_sources
            and 'event_based' not in oqparam.calculation_mode):
        logging.warn('You are doing redundant calculations: please make sure '
                     'that different sources have different IDs and set '
                     'optimize_same_id_sources=true in your .ini file')
    if make_sm.changed_sources:
        logging.info('Modified %d sources in the composite source model',
                     make_sm.changed_sources)
Beispiel #22
0
    def get_models(self):
        """
        :yields: :class:`openquake.commonlib.logictree.LtSourceModel` tuples
        """
        oq = self.oqparam
        spinning_off = self.oqparam.pointsource_distance == {'default': 0.0}
        if spinning_off:
            logging.info('Removing nodal plane and hypocenter distributions')
        dist = ('no'
                if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool')
        smlt_dir = os.path.dirname(self.source_model_lt.filename)
        converter = sourceconverter.SourceConverter(
            oq.investigation_time, oq.rupture_mesh_spacing,
            oq.complex_fault_mesh_spacing, oq.width_of_mfd_bin,
            oq.area_source_discretization, oq.minimum_magnitude,
            not spinning_off, oq.source_id)
        if oq.calculation_mode.startswith('ucerf'):
            [grp] = nrml.to_python(oq.inputs["source_model"], converter)
            dic = {'ucerf': grp}
        elif self.in_memory:
            logging.info('Reading the source model(s) in parallel')
            smap = parallel.Starmap(
                nrml.read_source_models,
                distribute=dist,
                hdf5path=self.hdf5.filename if self.hdf5 else None)
            for sm in self.source_model_lt.gen_source_models(self.gsim_lt):
                for name in sm.names.split():
                    fname = os.path.abspath(os.path.join(smlt_dir, name))
                    smap.submit([fname], converter)
            dic = {sm.fname: sm for sm in smap}
        else:
            dic = {}
        # consider only the effective realizations
        idx = 0
        if self.hdf5:
            sources = hdf5.create(self.hdf5, 'source_info', source_info_dt)
            hdf5.create(self.hdf5, 'source_geom', point3d)
            hdf5.create(self.hdf5, 'source_mfds', hdf5.vstr)
        grp_id = 0
        for sm in self.source_model_lt.gen_source_models(self.gsim_lt):
            if 'ucerf' in dic:
                sg = copy.copy(dic['ucerf'])
                sm.src_groups = [sg]
                sg.id = grp_id
                src = sg[0].new(sm.ordinal, sm.names)  # one source
                src.src_group_id = grp_id
                src.id = idx
                if oq.number_of_logic_tree_samples:
                    src.samples = sm.samples
                sg.sources = [src]
                idx += 1
                grp_id += 1
                data = [((sg.id, src.source_id, src.code, 0, 0, -1,
                          src.num_ruptures, 0, 0, 0, idx))]
                hdf5.extend(sources, numpy.array(data, source_info_dt))
            else:
                self.apply_uncertainties(sm, idx, dic)
            yield sm
            if self.hdf5:
                hdf5.extend(self.hdf5['source_mfds'],
                            numpy.array(list(self.mfds), hdf5.vstr))

        # log if some source file is being used more than once
        dupl = 0
        for fname, hits in self.fname_hits.items():
            if hits > 1:
                logging.info('%s has been considered %d times', fname, hits)
                if not self.changes:
                    dupl += hits
        if self.changes:
            logging.info('Applied %d changes to the composite source model',
                         self.changes)
Beispiel #23
0
def get_source_models(oqparam, gsim_lt, source_model_lt, monitor,
                      in_memory=True, srcfilter=None):
    """
    Build all the source models generated by the logic tree.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param gsim_lt:
        a :class:`openquake.commonlib.logictree.GsimLogicTree` instance
    :param source_model_lt:
        a :class:`openquake.commonlib.logictree.SourceModelLogicTree` instance
    :param monitor:
        a `openquake.baselib.performance.Monitor` instance
    :param in_memory:
        if True, keep in memory the sources, else just collect the TRTs
    :param srcfilter:
        a SourceFilter instance with an .filename pointing to the cache file
    :returns:
        an iterator over :class:`openquake.commonlib.logictree.LtSourceModel`
        tuples
    """
    make_sm = SourceModelFactory()
    spinning_off = oqparam.pointsource_distance == {'default': 0.0}
    if spinning_off:
        logging.info('Removing nodal plane and hypocenter distributions')
    dist = 'no' if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool'
    smlt_dir = os.path.dirname(source_model_lt.filename)
    converter = sourceconverter.SourceConverter(
        oqparam.investigation_time,
        oqparam.rupture_mesh_spacing,
        oqparam.complex_fault_mesh_spacing,
        oqparam.width_of_mfd_bin,
        oqparam.area_source_discretization,
        oqparam.minimum_magnitude,
        not spinning_off,
        oqparam.source_id)
    if oqparam.calculation_mode.startswith('ucerf'):
        [grp] = nrml.to_python(oqparam.inputs["source_model"], converter)
    elif in_memory:
        logging.info('Reading the source model(s) in parallel')
        smap = parallel.Starmap(
            nrml.read_source_models, monitor=monitor, distribute=dist)
        for sm in source_model_lt.gen_source_models(gsim_lt):
            for name in sm.names.split():
                fname = os.path.abspath(os.path.join(smlt_dir, name))
                smap.submit([fname], converter)
        dic = {sm.fname: sm for sm in smap}

    # consider only the effective realizations
    nr = 0
    idx = 0
    grp_id = 0
    if monitor.hdf5:
        sources = hdf5.create(monitor.hdf5, 'source_info', source_info_dt)
        hdf5.create(monitor.hdf5, 'source_geom', point3d)
        filename = None
    source_ids = set()
    for sm in source_model_lt.gen_source_models(gsim_lt):
        apply_unc = functools.partial(
            source_model_lt.apply_uncertainties, sm.path)
        src_groups = []
        for name in sm.names.split():
            fname = os.path.abspath(os.path.join(smlt_dir, name))
            if oqparam.calculation_mode.startswith('ucerf'):
                sg = copy.copy(grp)
                sg.id = grp_id
                src = sg[0].new(sm.ordinal, sm.names)  # one source
                source_ids.add(src.source_id)
                src.src_group_id = grp_id
                src.id = idx
                if oqparam.number_of_logic_tree_samples:
                    src.samples = sm.samples
                sg.sources = [src]
                src_groups.append(sg)
                idx += 1
                grp_id += 1
                data = [((sg.id, src.source_id, src.code, 0, 0,
                         src.num_ruptures, 0, 0, 0))]
                hdf5.extend(sources, numpy.array(data, source_info_dt))
            elif in_memory:
                newsm = make_sm(fname, dic[fname], apply_unc,
                                oqparam.investigation_time)
                for sg in newsm:
                    nr += sum(src.num_ruptures for src in sg)
                    # sample a source for each group
                    if os.environ.get('OQ_SAMPLE_SOURCES'):
                        sg.sources = random_filtered_sources(
                            sg.sources, srcfilter, sg.id + oqparam.random_seed)
                    for src in sg:
                        source_ids.add(src.source_id)
                        src.src_group_id = grp_id
                        src.id = idx
                        idx += 1
                    sg.id = grp_id
                    grp_id += 1
                    src_groups.append(sg)
                if monitor.hdf5:
                    store_sm(newsm, filename, monitor)
            else:  # just collect the TRT models
                groups = logictree.read_source_groups(fname)
                for group in groups:
                    source_ids.update(src['id'] for src in group)
                src_groups.extend(groups)

        if grp_id >= TWO16:
            # the limit is really needed only for event based calculations
            raise ValueError('There is a limit of %d src groups!' % TWO16)

        for brid, srcids in source_model_lt.info.applytosources.items():
            for srcid in srcids:
                if srcid not in source_ids:
                    raise ValueError(
                        'The source %s is not in the source model, please fix '
                        'applyToSources in %s or the source model' %
                        (srcid, source_model_lt.filename))
        num_sources = sum(len(sg.sources) for sg in src_groups)
        sm.src_groups = src_groups
        trts = [mod.trt for mod in src_groups]
        source_model_lt.tectonic_region_types.update(trts)
        logging.info(
            'Processed source model %d with %d gsim path(s) and %d '
            'sources', sm.ordinal + 1, sm.num_gsim_paths, num_sources)

        gsim_file = oqparam.inputs.get('gsim_logic_tree')
        if gsim_file:  # check TRTs
            for src_group in src_groups:
                if src_group.trt not in gsim_lt.values:
                    raise ValueError(
                        "Found in %r a tectonic region type %r inconsistent "
                        "with the ones in %r" % (sm, src_group.trt, gsim_file))
        yield sm

    logging.info('The composite source model has {:,d} ruptures'.format(nr))

    # log if some source file is being used more than once
    dupl = 0
    for fname, hits in make_sm.fname_hits.items():
        if hits > 1:
            logging.info('%s has been considered %d times', fname, hits)
            if not make_sm.changes:
                dupl += hits
    if (dupl and not oqparam.optimize_same_id_sources and
            not oqparam.is_event_based()):
        logging.warning(
            'You are doing redundant calculations: please make sure '
            'that different sources have different IDs and set '
            'optimize_same_id_sources=true in your .ini file')
    if make_sm.changes:
        logging.info('Applied %d changes to the composite source model',
                     make_sm.changes)
Beispiel #24
0
def get_source_models(oqparam,
                      gsim_lt,
                      source_model_lt,
                      monitor,
                      in_memory=True,
                      srcfilter=None):
    """
    Build all the source models generated by the logic tree.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param gsim_lt:
        a :class:`openquake.commonlib.logictree.GsimLogicTree` instance
    :param source_model_lt:
        a :class:`openquake.commonlib.logictree.SourceModelLogicTree` instance
    :param monitor:
        a `openquake.baselib.performance.Monitor` instance
    :param in_memory:
        if True, keep in memory the sources, else just collect the TRTs
    :param srcfilter:
        a SourceFilter instance with an .filename pointing to the cache file
    :returns:
        an iterator over :class:`openquake.commonlib.logictree.LtSourceModel`
        tuples
    """
    make_sm = SourceModelFactory()
    spinning_off = oqparam.pointsource_distance == {'default': 0.0}
    if spinning_off:
        logging.info('Removing nodal plane and hypocenter distributions')
    dist = 'no' if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool'
    smlt_dir = os.path.dirname(source_model_lt.filename)
    converter = sourceconverter.SourceConverter(
        oqparam.investigation_time, oqparam.rupture_mesh_spacing,
        oqparam.complex_fault_mesh_spacing, oqparam.width_of_mfd_bin,
        oqparam.area_source_discretization, oqparam.minimum_magnitude,
        not spinning_off, oqparam.source_id)
    if oqparam.calculation_mode.startswith('ucerf'):
        [grp] = nrml.to_python(oqparam.inputs["source_model"], converter)
    elif in_memory:
        logging.info('Reading the source model(s) in parallel')
        smap = parallel.Starmap(nrml.read_source_models,
                                monitor=monitor,
                                distribute=dist)
        for sm in source_model_lt.gen_source_models(gsim_lt):
            for name in sm.names.split():
                fname = os.path.abspath(os.path.join(smlt_dir, name))
                smap.submit([fname], converter)
        dic = {sm.fname: sm for sm in smap}

    # consider only the effective realizations
    nr = 0
    idx = 0
    grp_id = 0
    if monitor.hdf5:
        sources = hdf5.create(monitor.hdf5, 'source_info', source_info_dt)
        hdf5.create(monitor.hdf5, 'source_geom', point3d)
        filename = None
    source_ids = set()
    for sm in source_model_lt.gen_source_models(gsim_lt):
        apply_unc = functools.partial(source_model_lt.apply_uncertainties,
                                      sm.path)
        src_groups = []
        for name in sm.names.split():
            fname = os.path.abspath(os.path.join(smlt_dir, name))
            if oqparam.calculation_mode.startswith('ucerf'):
                sg = copy.copy(grp)
                sg.id = grp_id
                src = sg[0].new(sm.ordinal, sm.names)  # one source
                source_ids.add(src.source_id)
                src.src_group_id = grp_id
                src.id = idx
                if oqparam.number_of_logic_tree_samples:
                    src.samples = sm.samples
                sg.sources = [src]
                src_groups.append(sg)
                idx += 1
                grp_id += 1
                data = [((sg.id, src.source_id, src.code, 0, 0,
                          src.num_ruptures, 0, 0, 0))]
                hdf5.extend(sources, numpy.array(data, source_info_dt))
            elif in_memory:
                newsm = make_sm(fname, dic[fname], apply_unc,
                                oqparam.investigation_time)
                for sg in newsm:
                    nr += sum(src.num_ruptures for src in sg)
                    # sample a source for each group
                    if os.environ.get('OQ_SAMPLE_SOURCES'):
                        sg.sources = random_filtered_sources(
                            sg.sources, srcfilter, sg.id + oqparam.random_seed)
                    for src in sg:
                        source_ids.add(src.source_id)
                        src.src_group_id = grp_id
                        src.id = idx
                        idx += 1
                    sg.id = grp_id
                    grp_id += 1
                    src_groups.append(sg)
                if monitor.hdf5:
                    store_sm(newsm, filename, monitor)
            else:  # just collect the TRT models
                groups = logictree.read_source_groups(fname)
                for group in groups:
                    source_ids.update(src['id'] for src in group)
                src_groups.extend(groups)

        if grp_id >= TWO16:
            # the limit is really needed only for event based calculations
            raise ValueError('There is a limit of %d src groups!' % TWO16)

        for brid, srcids in source_model_lt.info.applytosources.items():
            for srcid in srcids:
                if srcid not in source_ids:
                    raise ValueError(
                        'The source %s is not in the source model, please fix '
                        'applyToSources in %s or the source model' %
                        (srcid, source_model_lt.filename))
        num_sources = sum(len(sg.sources) for sg in src_groups)
        sm.src_groups = src_groups
        trts = [mod.trt for mod in src_groups]
        source_model_lt.tectonic_region_types.update(trts)
        logging.info(
            'Processed source model %d with %d gsim path(s) and %d '
            'sources', sm.ordinal + 1, sm.num_gsim_paths, num_sources)

        gsim_file = oqparam.inputs.get('gsim_logic_tree')
        if gsim_file:  # check TRTs
            for src_group in src_groups:
                if src_group.trt not in gsim_lt.values:
                    raise ValueError(
                        "Found in %r a tectonic region type %r inconsistent "
                        "with the ones in %r" % (sm, src_group.trt, gsim_file))
        yield sm

    logging.info('The composite source model has {:,d} ruptures'.format(nr))

    # log if some source file is being used more than once
    dupl = 0
    for fname, hits in make_sm.fname_hits.items():
        if hits > 1:
            logging.info('%s has been considered %d times', fname, hits)
            if not make_sm.changes:
                dupl += hits
    if (dupl and not oqparam.optimize_same_id_sources
            and not oqparam.is_event_based()):
        logging.warning(
            'You are doing redundant calculations: please make sure '
            'that different sources have different IDs and set '
            'optimize_same_id_sources=true in your .ini file')
    if make_sm.changes:
        logging.info('Applied %d changes to the composite source model',
                     make_sm.changes)
Beispiel #25
0
def get_composite_source_model(oqparam, h5=None):
    """
    Parse the XML and build a complete composite source model in memory.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param h5:
         an open hdf5.File where to store the source info
    """
    source_model_lt = get_source_model_lt(oqparam)
    trts = source_model_lt.tectonic_region_types
    trts_lower = {trt.lower() for trt in trts}
    reqv = oqparam.inputs.get('reqv', {})
    for trt in reqv:
        if trt in oqparam.discard_trts:
            continue
        elif trt.lower() not in trts_lower:
            raise ValueError('Unknown TRT=%s in %s [reqv]' %
                             (trt, oqparam.inputs['job_ini']))
    gsim_lt = get_gsim_lt(oqparam, trts or ['*'])
    p = source_model_lt.num_paths * gsim_lt.get_num_paths()
    if oqparam.number_of_logic_tree_samples:
        logging.info('Considering {:_d} logic tree paths out of {:_d}'.format(
            oqparam.number_of_logic_tree_samples, p))
    else:  # full enumeration
        if (oqparam.is_event_based() and
            (oqparam.ground_motion_fields or oqparam.hazard_curves_from_gmfs)
                and p > oqparam.max_potential_paths):
            raise ValueError(
                'There are too many potential logic tree paths (%d):'
                'use sampling instead of full enumeration or reduce the '
                'source model with oq reduce_sm' % p)
        logging.info('Potential number of logic tree paths = {:_d}'.format(p))

    if source_model_lt.on_each_source:
        logging.info('There is a logic tree on each source')
    csm = get_csm(oqparam, source_model_lt, gsim_lt, h5)
    if oqparam.is_event_based():
        csm.init_serials(oqparam.ses_seed)
    if h5:
        info = hdf5.create(h5, 'source_info', source_info_dt)
    data = []
    mags = set()
    n = len(csm.full_lt.sm_rlzs)
    for sg in csm.src_groups:
        for src in sg:
            eri = src.grp_ids[0] % n
            data.append((eri, src.grp_ids[0], src.source_id, src.code,
                         src.num_ruptures, 0, 0, 0, src.checksum, src._wkt))
            if hasattr(src, 'mags'):  # UCERF
                srcmags = ['%.2f' % mag for mag in src.mags]
            elif hasattr(src, 'data'):  # nonparametric
                srcmags = ['%.2f' % item[0].mag for item in src.data]
            else:
                srcmags = ['%.2f' % item[0] for item in
                           src.get_annual_occurrence_rates()]
            mags.update(srcmags)
    if h5:
        hdf5.extend(info, numpy.array(data, source_info_dt))
        h5['source_mags'] = numpy.array(sorted(mags))
    csm.gsim_lt.check_imts(oqparam.imtls)
    return csm