Exemple #1
0
def store_sm(smodel, filename, monitor):
    """
    :param smodel: a :class:`openquake.hazardlib.nrml.SourceModel` instance
    :param filename: path to an hdf5 file (cache_XXX.hdf5)
    :param monitor: a Monitor instance with an .hdf5 attribute
    """
    h5 = monitor.hdf5
    with monitor('store source model'):
        sources = h5['source_info']
        source_geom = h5['source_geom']
        gid = len(source_geom)
        for sg in smodel:
            if filename:
                with hdf5.File(filename, 'r+') as hdf5cache:
                    hdf5cache['grp-%02d' % sg.id] = sg
            srcs = []
            geoms = []
            for src in sg:
                srcgeom = src.geom()
                n = len(srcgeom)
                geom = numpy.zeros(n, point3d)
                geom['lon'], geom['lat'], geom['depth'] = srcgeom.T
                srcs.append((sg.id, src.source_id, src.code, gid, gid + n,
                             src.num_ruptures, 0, 0, 0))
                geoms.append(geom)
                gid += n
            if geoms:
                hdf5.extend(source_geom, numpy.concatenate(geoms))
            if sources:
                hdf5.extend(sources, numpy.array(srcs, source_info_dt))
Exemple #2
0
    def flush(self):
        """
        Save the measurements on the performance file (or on stdout)
        """
        for child in self.children:
            child.flush()
        data = self.get_data()
        if len(data) == 0:  # no information
            return []

        # reset monitor
        self.duration = 0
        self.mem = 0
        self.counts = 0

        if self.hdf5path:
            h5 = h5py.File(self.hdf5path)
            try:
                pdata = h5['performance_data']
            except KeyError:
                pdata = hdf5.create(h5, 'performance_data', perf_dt)
            hdf5.extend(pdata, data)
            h5.close()
        # else print(data[0]) on stdout

        return data
Exemple #3
0
    def extend(self, key, array):
        """
        Extend the dataset associated to the given key; create it if needed

        :param key: name of the dataset
        :param array: array to store
        """
        try:
            dset = self.hdf5[key]
        except KeyError:
            dset = hdf5.create(self.hdf5, key, array.dtype,
                               shape=(None,) + array.shape[1:])
        hdf5.extend(dset, array)
        return dset
Exemple #4
0
    def extend(self, key, array, **attrs):
        """
        Extend the dataset associated to the given key; create it if needed

        :param key: name of the dataset
        :param array: array to store
        :param attrs: a dictionary of attributes
        """
        try:
            dset = self.hdf5[key]
        except KeyError:
            dset = hdf5.create(self.hdf5, key, array.dtype,
                               shape=(None,) + array.shape[1:])
        hdf5.extend(dset, array)
        for k, v in attrs.items():
            dset.attrs[k] = v
        return dset
Exemple #5
0
    def agg(self, acc, result):
        """
        Aggregate losses and store them in the datastore.

        :param acc: accumulator dictionary
        :param result: dictionary coming from event_based_risk
        """
        self.gmfbytes += result.pop('gmfbytes')
        with self.monitor('saving event loss tables', autoflush=True):
            if self.oqparam.asset_loss_table:
                for lr, array in sorted(result.pop('ASSLOSS').items()):
                    hdf5.extend(self.ass_loss_table[lr], array)
                    self.ass_bytes += array.nbytes
            for lr, array in sorted(result.pop('AGGLOSS').items()):
                hdf5.extend(self.agg_loss_table[lr], array)
                self.agg_bytes += array.nbytes
            self.datastore.hdf5.flush()
        return acc + result
    def flush(self):
        """
        Save the measurements on the performance file (or on stdout)
        """
        if not self._flush:
            raise RuntimeError(
                'Monitor(%r).flush() must not be called in a worker' %
                self.operation)
        for child in self.children:
            child.hdf5 = self.hdf5
            child.flush()
        data = self.get_data()
        if len(data) == 0:  # no information
            return []
        elif self.hdf5:
            hdf5.extend(self.hdf5['performance_data'], data)

        # reset monitor
        self.duration = 0
        self.mem = 0
        self.counts = 0
        return data
Exemple #7
0
    def combine_curves_and_save_gmfs(self, acc, res):
        """
        Combine the hazard curves (if any) and save the gmfs (if any)
        sequentially; notice that the gmfs may come from
        different tasks in any order.

        :param acc: an accumulator for the hazard curves
        :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt]
        :returns: a new accumulator
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        for rlzi in res:
            gmfa, curves = res[rlzi]
            if gmfa is not None:
                with sav_mon:
                    hdf5.extend(self.datastore['gmf_data/%04d' % rlzi], gmfa)
            if curves is not None:  # aggregate hcurves
                with agg_mon:
                    self.agg_dicts(acc, {rlzi: curves})
        sav_mon.flush()
        agg_mon.flush()
        self.datastore.flush()
        return acc
Exemple #8
0
def get_source_models(oqparam, gsim_lt, source_model_lt, monitor,
                      in_memory=True, srcfilter=None):
    """
    Build all the source models generated by the logic tree.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param gsim_lt:
        a :class:`openquake.commonlib.logictree.GsimLogicTree` instance
    :param source_model_lt:
        a :class:`openquake.commonlib.logictree.SourceModelLogicTree` instance
    :param monitor:
        a `openquake.baselib.performance.Monitor` instance
    :param in_memory:
        if True, keep in memory the sources, else just collect the TRTs
    :param srcfilter:
        a SourceFilter instance with an .filename pointing to the cache file
    :returns:
        an iterator over :class:`openquake.commonlib.logictree.LtSourceModel`
        tuples
    """
    make_sm = SourceModelFactory()
    spinning_off = oqparam.pointsource_distance == {'default': 0.0}
    if spinning_off:
        logging.info('Removing nodal plane and hypocenter distributions')
    dist = 'no' if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool'
    smlt_dir = os.path.dirname(source_model_lt.filename)
    converter = sourceconverter.SourceConverter(
        oqparam.investigation_time,
        oqparam.rupture_mesh_spacing,
        oqparam.complex_fault_mesh_spacing,
        oqparam.width_of_mfd_bin,
        oqparam.area_source_discretization,
        oqparam.minimum_magnitude,
        not spinning_off,
        oqparam.source_id)
    if oqparam.calculation_mode.startswith('ucerf'):
        [grp] = nrml.to_python(oqparam.inputs["source_model"], converter)
    elif in_memory:
        logging.info('Reading the source model(s) in parallel')
        smap = parallel.Starmap(
            nrml.read_source_models, monitor=monitor, distribute=dist)
        for sm in source_model_lt.gen_source_models(gsim_lt):
            for name in sm.names.split():
                fname = os.path.abspath(os.path.join(smlt_dir, name))
                smap.submit([fname], converter)
        dic = {sm.fname: sm for sm in smap}

    # consider only the effective realizations
    nr = 0
    idx = 0
    grp_id = 0
    if monitor.hdf5:
        sources = hdf5.create(monitor.hdf5, 'source_info', source_info_dt)
        hdf5.create(monitor.hdf5, 'source_geom', point3d)
        filename = None
    source_ids = set()
    for sm in source_model_lt.gen_source_models(gsim_lt):
        apply_unc = functools.partial(
            source_model_lt.apply_uncertainties, sm.path)
        src_groups = []
        for name in sm.names.split():
            fname = os.path.abspath(os.path.join(smlt_dir, name))
            if oqparam.calculation_mode.startswith('ucerf'):
                sg = copy.copy(grp)
                sg.id = grp_id
                src = sg[0].new(sm.ordinal, sm.names)  # one source
                source_ids.add(src.source_id)
                src.src_group_id = grp_id
                src.id = idx
                if oqparam.number_of_logic_tree_samples:
                    src.samples = sm.samples
                sg.sources = [src]
                src_groups.append(sg)
                idx += 1
                grp_id += 1
                data = [((sg.id, src.source_id, src.code, 0, 0,
                         src.num_ruptures, 0, 0, 0))]
                hdf5.extend(sources, numpy.array(data, source_info_dt))
            elif in_memory:
                newsm = make_sm(fname, dic[fname], apply_unc,
                                oqparam.investigation_time)
                for sg in newsm:
                    nr += sum(src.num_ruptures for src in sg)
                    # sample a source for each group
                    if os.environ.get('OQ_SAMPLE_SOURCES'):
                        sg.sources = random_filtered_sources(
                            sg.sources, srcfilter, sg.id + oqparam.random_seed)
                    for src in sg:
                        source_ids.add(src.source_id)
                        src.src_group_id = grp_id
                        src.id = idx
                        idx += 1
                    sg.id = grp_id
                    grp_id += 1
                    src_groups.append(sg)
                if monitor.hdf5:
                    store_sm(newsm, filename, monitor)
            else:  # just collect the TRT models
                groups = logictree.read_source_groups(fname)
                for group in groups:
                    source_ids.update(src['id'] for src in group)
                src_groups.extend(groups)

        if grp_id >= TWO16:
            # the limit is really needed only for event based calculations
            raise ValueError('There is a limit of %d src groups!' % TWO16)

        for brid, srcids in source_model_lt.info.applytosources.items():
            for srcid in srcids:
                if srcid not in source_ids:
                    raise ValueError(
                        'The source %s is not in the source model, please fix '
                        'applyToSources in %s or the source model' %
                        (srcid, source_model_lt.filename))
        num_sources = sum(len(sg.sources) for sg in src_groups)
        sm.src_groups = src_groups
        trts = [mod.trt for mod in src_groups]
        source_model_lt.tectonic_region_types.update(trts)
        logging.info(
            'Processed source model %d with %d gsim path(s) and %d '
            'sources', sm.ordinal + 1, sm.num_gsim_paths, num_sources)

        gsim_file = oqparam.inputs.get('gsim_logic_tree')
        if gsim_file:  # check TRTs
            for src_group in src_groups:
                if src_group.trt not in gsim_lt.values:
                    raise ValueError(
                        "Found in %r a tectonic region type %r inconsistent "
                        "with the ones in %r" % (sm, src_group.trt, gsim_file))
        yield sm

    logging.info('The composite source model has {:,d} ruptures'.format(nr))

    # log if some source file is being used more than once
    dupl = 0
    for fname, hits in make_sm.fname_hits.items():
        if hits > 1:
            logging.info('%s has been considered %d times', fname, hits)
            if not make_sm.changes:
                dupl += hits
    if (dupl and not oqparam.optimize_same_id_sources and
            not oqparam.is_event_based()):
        logging.warning(
            'You are doing redundant calculations: please make sure '
            'that different sources have different IDs and set '
            'optimize_same_id_sources=true in your .ini file')
    if make_sm.changes:
        logging.info('Applied %d changes to the composite source model',
                     make_sm.changes)
Exemple #9
0
    def build_events_from_sources(self):
        """
        Prefilter the composite source model and store the source_info
        """
        gsims_by_trt = self.csm.full_lt.get_gsims_by_trt()
        sources = self.csm.get_sources()
        # weighting the heavy sources
        nrups = parallel.Starmap(count_ruptures,
                                 [(src, )
                                  for src in sources if src.code in b'AMC'],
                                 h5=self.datastore.hdf5).reduce()
        for src in sources:
            src.nsites = 1  # avoid 0 weight
            try:
                src.num_ruptures = nrups[src.source_id]
            except KeyError:
                src.num_ruptures = src.count_ruptures()
        maxweight = sum(sg.weight for sg in self.csm.src_groups) / (
            self.oqparam.concurrent_tasks or 1)
        eff_ruptures = AccumDict(accum=0)  # trt => potential ruptures
        calc_times = AccumDict(accum=numpy.zeros(3, F32))  # nr, ns, dt
        allargs = []
        if self.oqparam.is_ucerf():
            # manage the filtering in a special way
            for sg in self.csm.src_groups:
                for src in sg:
                    src.src_filter = self.srcfilter
            srcfilter = nofilter  # otherwise it would be ultra-slow
        else:
            srcfilter = self.srcfilter
        logging.info('Building ruptures')
        for sg in self.csm.src_groups:
            if not sg.sources:
                continue
            logging.info('Sending %s', sg)
            par = self.param.copy()
            par['gsims'] = gsims_by_trt[sg.trt]
            for src_group in sg.split(maxweight):
                allargs.append((src_group, srcfilter, par))
        smap = parallel.Starmap(sample_ruptures,
                                allargs,
                                h5=self.datastore.hdf5)
        mon = self.monitor('saving ruptures')
        self.nruptures = 0
        for dic in smap:
            # NB: dic should be a dictionary, but when the calculation dies
            # for an OOM it can become None, thus giving a very confusing error
            if dic is None:
                raise MemoryError('You ran out of memory!')
            rup_array = dic['rup_array']
            if len(rup_array) == 0:
                continue
            if dic['calc_times']:
                calc_times += dic['calc_times']
            if dic['eff_ruptures']:
                eff_ruptures += dic['eff_ruptures']
            with mon:
                n = len(rup_array)
                rup_array['id'] = numpy.arange(self.nruptures,
                                               self.nruptures + n)
                self.nruptures += n
                hdf5.extend(self.datastore['ruptures'], rup_array)
                hdf5.extend(self.datastore['rupgeoms'], rup_array.geom)
        if len(self.datastore['ruptures']) == 0:
            raise RuntimeError('No ruptures were generated, perhaps the '
                               'investigation time is too short')

        # must be called before storing the events
        self.store_rlz_info(eff_ruptures)  # store full_lt
        self.store_source_info(calc_times)
        imp = calc.RuptureImporter(self.datastore)
        with self.monitor('saving ruptures and events'):
            imp.import_rups(self.datastore.getitem('ruptures')[()])
    def get_models(self):
        """
        :yields: :class:`openquake.commonlib.logictree.LtSourceModel` tuples
        """
        oq = self.oqparam
        spinning_off = self.oqparam.pointsource_distance == {'default': 0.0}
        if spinning_off:
            logging.info('Removing nodal plane and hypocenter distributions')
        dist = ('no'
                if os.environ.get('OQ_DISTRIBUTE') == 'no' else 'processpool')
        smlt_dir = os.path.dirname(self.source_model_lt.filename)
        converter = sourceconverter.SourceConverter(
            oq.investigation_time, oq.rupture_mesh_spacing,
            oq.complex_fault_mesh_spacing, oq.width_of_mfd_bin,
            oq.area_source_discretization, oq.minimum_magnitude,
            not spinning_off, oq.source_id)
        if oq.calculation_mode.startswith('ucerf'):
            [grp] = nrml.to_python(oq.inputs["source_model"], converter)
            dic = {'ucerf': grp}
        elif self.in_memory:
            logging.info('Reading the source model(s) in parallel')
            smap = parallel.Starmap(
                nrml.read_source_models,
                distribute=dist,
                hdf5path=self.hdf5.filename if self.hdf5 else None)
            # NB: hdf5path is None in logictree_test.py
            for sm in self.source_model_lt.gen_source_models(self.gsim_lt):
                for name in sm.names.split():
                    fname = os.path.abspath(os.path.join(smlt_dir, name))
                    smap.submit([fname], converter)
            dic = {sm.fname: sm for sm in smap}
        else:
            dic = {}
        # consider only the effective realizations
        idx = 0
        if self.hdf5:
            sources = hdf5.create(self.hdf5, 'source_info', source_info_dt)
            hdf5.create(self.hdf5, 'source_geom', point3d)
            hdf5.create(self.hdf5, 'source_mfds', hdf5.vstr)
        grp_id = 0
        for sm in self.source_model_lt.gen_source_models(self.gsim_lt):
            if 'ucerf' in dic:
                sg = copy.copy(dic['ucerf'])
                sm.src_groups = [sg]
                sg.id = grp_id
                src = sg[0].new(sm.ordinal, sm.names)  # one source
                src.src_group_id = grp_id
                src.id = idx
                if oq.number_of_logic_tree_samples:
                    src.samples = sm.samples
                sg.sources = [src]
                idx += 1
                grp_id += 1
                data = [((sg.id, src.source_id, src.code, 0, 0, -1,
                          src.num_ruptures, 0, 0, 0, idx))]
                hdf5.extend(sources, numpy.array(data, source_info_dt))
            else:
                self.apply_uncertainties(sm, idx, dic)
            yield sm
            if self.hdf5:
                hdf5.extend(self.hdf5['source_mfds'],
                            numpy.array(list(self.mfds), hdf5.vstr))

        if self.hdf5:
            self.hdf5['source_mags'] = sorted(self.mags)
        # log if some source file is being used more than once
        dupl = 0
        for fname, hits in self.fname_hits.items():
            if hits > 1:
                logging.info('%s has been considered %d times', fname, hits)
                if not self.changes:
                    dupl += hits
        if self.changes:
            logging.info('Applied %d changes to the composite source model',
                         self.changes)
Exemple #11
0
def _store_results(smap, lt_models, source_model_lt, gsim_lt, oq, h5):
    mags = set()
    changes = 0
    fname_hits = collections.Counter()
    groups = [[] for _ in lt_models]  # (fileno, src_groups)
    for dic in smap:
        ltm = lt_models[dic['ordinal']]
        groups[ltm.ordinal].append((dic['fileno'], dic['src_groups']))
        fname_hits += dic['fname_hits']
        changes += dic['changes']
        mags.update(dic['mags'])
        gsim_file = oq.inputs.get('gsim_logic_tree')
        if gsim_file:  # check TRTs
            for src_group in dic['src_groups']:
                if src_group.trt not in gsim_lt.values:
                    raise ValueError("Found in %r a tectonic region type %r "
                                     "inconsistent with the ones in %r" %
                                     (ltm, src_group.trt, gsim_file))
    # global checks
    idx = 0
    grp_id = 0
    for ltm in lt_models:
        for fileno, grps in sorted(groups[ltm.ordinal]):
            for grp in grps:
                grp.id = grp_id
                for src in grp:
                    src.src_group_id = grp_id
                    src.id = idx
                    idx += 1
                ltm.src_groups.append(grp)
                grp_id += 1
                if grp_id >= TWO16:
                    # the limit is only for event based calculations
                    raise ValueError('There is a limit of %d src groups!' %
                                     TWO16)
        # check applyToSources
        source_ids = set(src.source_id for grp in ltm.src_groups
                         for src in grp)
        for brid, srcids in source_model_lt.info.\
                applytosources.items():
            if brid in ltm.path:
                for srcid in srcids:
                    if srcid not in source_ids:
                        raise ValueError(
                            "The source %s is not in the source model,"
                            " please fix applyToSources in %s or the "
                            "source model" % (srcid, source_model_lt.filename))

        if h5:
            sources = h5['source_info']
            for sg in ltm.src_groups:
                sg.info['grp_id'] = sg.id
                hdf5.extend(sources, sg.info)

    if h5:
        h5['source_mags'] = numpy.array(sorted(mags))

    # log if some source file is being used more than once
    dupl = 0
    for fname, hits in fname_hits.items():
        if hits > 1:
            logging.info('%s has been considered %d times', fname, hits)
            if not changes:
                dupl += hits
    if changes:
        logging.info('Applied %d changes to the composite source model',
                     changes)
    return lt_models