Ejemplo n.º 1
0
 def post_execute(self, times):
     """
     Compute and store average losses from the losses_by_event dataset,
     and then loss curves and maps.
     """
     self.datastore.set_attrs('task_info/start_ebrisk', times=times)
     oq = self.oqparam
     elt_length = len(self.datastore['losses_by_event'])
     builder = get_loss_builder(self.datastore)
     self.build_datasets(builder)
     mon = performance.Monitor(hdf5=hdf5.File(self.datastore.hdf5cache()))
     smap = parallel.Starmap(compute_loss_curves_maps, monitor=mon)
     self.datastore.close()
     acc = []
     ct = oq.concurrent_tasks or 1
     for elt_slice in general.split_in_slices(elt_length, ct):
         smap.submit(self.datastore.filename, elt_slice,
                     oq.conditional_loss_poes, oq.individual_curves)
     acc = smap.reduce(acc=[])
     # copy performance information from the cache to the datastore
     pd = mon.hdf5['performance_data'].value
     hdf5.extend3(self.datastore.filename, 'performance_data', pd)
     self.datastore.open('r+')  # reopen
     self.datastore['task_info/compute_loss_curves_and_maps'] = (
         mon.hdf5['task_info/compute_loss_curves_maps'].value)
     with self.monitor('saving loss_curves and maps', autoflush=True):
         for name, idx, arr in acc:
             for ij, val in numpy.ndenumerate(arr):
                 self.datastore[name][ij + idx] = val
Ejemplo n.º 2
0
    def combine_pmaps_and_save_gmfs(self, acc, res):
        """
        Combine the hazard curves (if any) and save the gmfs (if any)
        sequentially; notice that the gmfs may come from
        different tasks in any order.

        :param acc: an accumulator for the hazard curves
        :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt]
        :returns: a new accumulator
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        self.gmdata += res['gmdata']
        if res['gmfcoll'] is not None:
            with sav_mon:
                for (grp_id, gsim), array in res['gmfcoll'].items():
                    if len(array):
                        key = 'gmf_data/grp-%02d/%s' % (grp_id, gsim)
                        hdf5.extend3(self.datastore.hdf5path, key, array)
        slicedic = self.oqparam.imtls.slicedic
        with agg_mon:
            for key, poes in res['hcurves'].items():
                rlzi, sid, imt = str2rsi(key)
                array = acc[rlzi].setdefault(sid, 0).array[slicedic[imt], 0]
                array[:] = 1. - (1. - array) * (1. - poes)
        sav_mon.flush()
        agg_mon.flush()
        self.datastore.flush()
        if 'ruptures' in res:
            vars(EventBasedRuptureCalculator)['save_ruptures'](self,
                                                               res['ruptures'])
        return acc
Ejemplo n.º 3
0
 def save_task_data(self, mon):
     if hasattr(mon, 'weight'):
         duration = mon.children[0].duration  # the task is the first child
         tup = (mon.task_no, mon.weight, duration)
         data = numpy.array([tup], self.task_data_dt)
         hdf5.extend3(mon.hdf5path, 'task_info/' + self.name, data)
     mon.flush()
Ejemplo n.º 4
0
 def save_task_data(self, mon):
     if mon.hdf5path and hasattr(mon, 'weight'):
         duration = mon.children[0].duration  # the task is the first child
         tup = (mon.task_no, mon.weight, duration)
         data = numpy.array([tup], self.task_data_dt)
         hdf5.extend3(mon.hdf5path, 'task_info/' + self.name, data)
     mon.flush()
Ejemplo n.º 5
0
    def combine_pmaps_and_save_gmfs(self, acc, res):
        """
        Combine the hazard curves (if any) and save the gmfs (if any)
        sequentially; notice that the gmfs may come from
        different tasks in any order.

        :param acc: an accumulator for the hazard curves
        :param res: a dictionary rlzi, imt -> [gmf_array, curves_by_imt]
        :returns: a new accumulator
        """
        sav_mon = self.monitor('saving gmfs')
        agg_mon = self.monitor('aggregating hcurves')
        self.gmdata += res['gmdata']
        data = res['gmfdata']
        if data is not None:
            with sav_mon:
                hdf5.extend3(self.datastore.hdf5path, 'gmf_data/data', data)
                for sid, start, stop in res['indices']:
                    self.indices[sid].append(
                        (start + self.offset, stop + self.offset))
                self.offset += len(data)
        slicedic = self.oqparam.imtls.slicedic
        with agg_mon:
            for key, poes in res['hcurves'].items():
                rlzi, sid, imt = str2rsi(key)
                array = acc[rlzi].setdefault(sid, 0).array[slicedic[imt], 0]
                array[:] = 1. - (1. - array) * (1. - poes)
        sav_mon.flush()
        agg_mon.flush()
        self.datastore.flush()
        if 'ruptures' in res:
            vars(EventBasedRuptureCalculator)['save_ruptures'](self,
                                                               res['ruptures'])
        return acc
Ejemplo n.º 6
0
 def save_task_info(self, mon):
     if mon.hdf5path:
         duration = mon.children[0].duration  # the task is the first child
         tup = (mon.task_no, mon.weight, duration, self.received[-1])
         data = numpy.array([tup], task_data_dt)
         hdf5.extend3(mon.hdf5path, 'task_info/' + self.name, data,
                      argnames=self.argnames, sent=self.sent)
     mon.flush()
Ejemplo n.º 7
0
 def save_info(self, dic):
     """
     Save (name, value) information in the associated hdf5path
     """
     if self.hdf5path:
         data = numpy.array(
             _pairs(dic.items()),
             [('par_name', hdf5.vstr), ('par_value', hdf5.vstr)])
         hdf5.extend3(self.hdf5path, 'job_info', data)
Ejemplo n.º 8
0
 def save_info(self, dic):
     """
     Save (name, value) information in the associated hdf5path
     """
     if self.hdf5path:
         if 'hostname' not in dic:
             dic['hostname'] = socket.gethostname()
         data = numpy.array(
             _pairs(dic.items()),
             [('par_name', hdf5.vstr), ('par_value', hdf5.vstr)])
         hdf5.extend3(self.hdf5path, 'job_info', data)
Ejemplo n.º 9
0
 def agg_dicts(self, acc, result):
     """
     :param acc: accumulator dictionary
     :param result: an AccumDict with events, ruptures, gmfs and hcurves
     """
     oq = self.oqparam
     if oq.save_ruptures and not oq.ground_motion_fields:
         self.gmf_size += max_gmf_size(
             result['ruptures'], self.csm_info.rlzs_assoc.get_rlzs_by_gsim,
             self.csm_info.get_samples_by_grp(), len(self.oqparam.imtls))
     if hasattr(result, 'calc_times'):
         for srcid, nsites, eids, dt in result.calc_times:
             info = self.csm.infos[srcid]
             info.num_sites += nsites
             info.calc_time += dt
             info.num_split += 1
             info.events += len(eids)
     if hasattr(result, 'eff_ruptures'):
         acc.eff_ruptures += result.eff_ruptures
     if hasattr(result, 'events'):
         self.datastore.extend('events', result.events)
     self.save_ruptures(result['ruptures'])
     sav_mon = self.monitor('saving gmfs')
     agg_mon = self.monitor('aggregating hcurves')
     hdf5path = self.datastore.hdf5path
     if 'gmdata' in result:
         self.gmdata += result['gmdata']
         data = result['gmfdata']
         with sav_mon:
             hdf5.extend3(hdf5path, 'gmf_data/data', data)
             # it is important to save the number of bytes while the
             # computation is going, to see the progress
             update_nbytes(self.datastore, 'gmf_data/data', data)
             for sid, start, stop in result['indices']:
                 self.indices[sid].append(
                     (start + self.offset, stop + self.offset))
             self.offset += len(data)
             if self.offset >= TWO32:
                 raise RuntimeError(
                     'The gmf_data table has more than %d rows' % TWO32)
     slicedic = self.oqparam.imtls.slicedic
     with agg_mon:
         for key, poes in result.get('hcurves', {}).items():
             r, sid, imt = str2rsi(key)
             array = acc[r].setdefault(sid, 0).array[slicedic[imt], 0]
             array[:] = 1. - (1. - array) * (1. - poes)
     sav_mon.flush()
     agg_mon.flush()
     self.datastore.flush()
     return acc
Ejemplo n.º 10
0
    def save_task_info(self, hdf5path, res, name, sent, mem_gb=0):
        """
        Called by parallel.IterResult.

        :param hdf5path: where to save the info
        :param res: a :class:`Result` object
        :param name: name of the task function
        :param sent: number of bytes sent
        :param mem_gb: memory consumption at the saving time (optional)
        """
        t = (name, self.task_no, self.weight, self.duration, len(res.pik),
             mem_gb)
        data = numpy.array([t], task_info_dt)
        hdf5.extend3(hdf5path, 'task_info', data,
                     **{'sent_' + name: str(sent)})
Ejemplo n.º 11
0
def save_task_info(self, res, mem_gb=0):
    """
    :param self: an object with attributes .hdf5, .argnames, .sent
    :parent res: a :class:`Result` object
    :param mem_gb: memory consumption at the saving time (optional)
    """
    mon = res.mon
    name = mon.operation[6:]  # strip 'total '
    if self.hdf5:
        mon.hdf5 = self.hdf5  # needed for the flush below
        t = (mon.task_no, mon.weight, mon.duration, len(res.pik), mem_gb)
        data = numpy.array([t], task_info_dt)
        hdf5.extend3(self.hdf5.filename, 'task_info/' + name, data,
                     argnames=self.argnames, sent=self.sent)
    mon.flush()
Ejemplo n.º 12
0
    def flush(self):
        """
        Save the measurements on the performance file (or on stdout)
        """
        for child in self.children:
            child.flush()
        data = self.get_data()
        if len(data) == 0:  # no information
            return []
        elif self.hdf5path:
            hdf5.extend3(self.hdf5path, 'performance_data', data)

        # reset monitor
        self.duration = 0
        self.mem = 0
        self.counts = 0
        return data
Ejemplo n.º 13
0
    def flush(self):
        """
        Save the measurements on the performance file (or on stdout)
        """
        for child in self.children:
            child.flush()
        data = self.get_data()
        if len(data) == 0:  # no information
            return []
        elif self.hdf5path:
            hdf5.extend3(self.hdf5path, 'performance_data', data)

        # reset monitor
        self.duration = 0
        self.mem = 0
        self.counts = 0
        return data
Ejemplo n.º 14
0
 def flush(self, hdf5path):
     """
     Save the measurements on the performance file
     """
     if not self.children:
         data = self.get_data()
     else:
         lst = [self.get_data()]
         for child in self.children:
             lst.append(child.get_data())
             child.reset()
         data = numpy.concatenate(lst)
     if len(data) == 0:  # no information
         return
     elif not os.path.exists(hdf5path):
         with hdf5.File(hdf5path, 'w') as h5:
             hdf5.create(h5, 'performance_data', perf_dt)
             hdf5.create(h5, 'task_info', task_info_dt)
     hdf5.extend3(hdf5path, 'performance_data', data)
     self.reset()
Ejemplo n.º 15
0
    def flush(self):
        """
        Save the measurements on the performance file (or on stdout)
        """
        if not self._flush:
            raise RuntimeError(
                'Monitor(%r).flush() must not be called in a worker' %
                self.operation)
        for child in self.children:
            child.flush()
        data = self.get_data()
        if len(data) == 0:  # no information
            return []
        elif self.hdf5path:
            hdf5.extend3(self.hdf5path, 'performance_data', data)

        # reset monitor
        self.duration = 0
        self.mem = 0
        self.counts = 0
        return data
Ejemplo n.º 16
0
 def test_extend3_vlen_same_len(self):
     data = numpy.array([[4, 1], [1, 2], [3, 1]], hdf5.vfloat32)
     nrows = hdf5.extend3(self.tmp, 'dset', data)
     self.assertEqual(nrows, 3)
     with hdf5.File(self.tmp, 'r') as f:
         print(f['dset'].value)
Ejemplo n.º 17
0
    def post_execute(self, dummy):
        """
        Compute and store average losses from the losses_by_event dataset,
        and then loss curves and maps.
        """
        oq = self.oqparam
        if oq.avg_losses:
            self.datastore['avg_losses-stats'].attrs['stats'] = [b'mean']
        logging.info('Building loss tables')
        build_loss_tables(self.datastore)
        shp = self.get_shape(self.L)  # (L, T...)
        text = ' x '.join('%d(%s)' % (n, t)
                          for t, n in zip(oq.aggregate_by, shp[1:]))
        logging.info('Producing %d(loss_types) x %s loss curves', self.L, text)
        builder = get_loss_builder(self.datastore)
        self.build_datasets(builder)
        self.datastore.close()
        if 'losses_by_event' in self.datastore.parent:
            dstore = self.datastore.parent
        else:
            dstore = self.datastore
        args = [(dstore.filename, builder, oq.ses_ratio, rlzi)
                for rlzi in range(self.R)]
        h5 = hdf5.File(self.datastore.cachepath())
        try:
            acc = list(
                parallel.Starmap(postprocess, args, hdf5path=h5.filename))
        finally:
            # copy performance information from the cache to the datastore
            pd = h5['performance_data'][()]
            hdf5.extend3(self.datastore.filename, 'performance_data', pd)
        self.datastore.open('r+')  # reopen
        for r, (curves, maps), agg_losses in acc:
            if len(curves):  # some realization can give zero contribution
                self.datastore['agg_curves-rlzs'][:, r] = curves
            if len(maps):  # conditional_loss_poes can be empty
                self.datastore['agg_maps-rlzs'][:, r] = maps
            self.datastore['agg_losses-rlzs'][:, r] = agg_losses
        if self.R > 1:
            logging.info('Computing aggregate statistics')
            set_rlzs_stats(self.datastore, 'agg_curves')
            set_rlzs_stats(self.datastore, 'agg_losses')
            if oq.conditional_loss_poes:
                set_rlzs_stats(self.datastore, 'agg_maps')

        # sanity check with the asset_loss_table
        if oq.asset_loss_table and len(oq.aggregate_by) == 1:
            alt = self.datastore['asset_loss_table'][()]
            if alt.sum() == 0:  # nothing was saved
                return
            logging.info('Checking the loss curves')
            tags = getattr(self.assetcol.tagcol, oq.aggregate_by[0])[1:]
            T = len(tags)
            P = len(builder.return_periods)
            # sanity check on the loss curves for simple tag aggregation
            arr = self.assetcol.aggregate_by(oq.aggregate_by, alt)
            # shape (T, E, L)
            rlzs = self.datastore['events']['rlz_id']
            curves = numpy.zeros((P, self.R, self.L, T))
            for t in range(T):
                for r in range(self.R):
                    for l in range(self.L):
                        curves[:, r, l,
                               t] = losses_by_period(arr[t, rlzs == r, l],
                                                     builder.return_periods,
                                                     builder.num_events[r],
                                                     builder.eff_time)
            numpy.testing.assert_allclose(
                curves, self.datastore['agg_curves-rlzs'][()])
Ejemplo n.º 18
0
 def test_extend3(self):
     nrows = hdf5.extend3(self.tmp, 'dset', numpy.zeros(3))
     self.assertEqual(nrows, 3)
Ejemplo n.º 19
0
 def test_extend3_vlen_same_len(self):
     data = numpy.array([[4, 1], [1, 2], [3, 1]], hdf5.vfloat32)
     nrows = hdf5.extend3(self.tmp, 'dset', data)
     self.assertEqual(nrows, 3)
     with hdf5.File(self.tmp, 'r') as f:
         print(f['dset'][()])
Ejemplo n.º 20
0
    def post_execute(self, times):
        """
        Compute and store average losses from the losses_by_event dataset,
        and then loss curves and maps.
        """
        if len(times):
            self.datastore.set_attrs(
                'task_info/start_ebrisk', times=times,
                events_per_sid=numpy.mean(self.events_per_sid))
        oq = self.oqparam
        shp = self.get_shape(self.L)  # (L, T...)
        text = ' x '.join(
            '%d(%s)' % (n, t) for t, n in zip(oq.aggregate_by, shp[1:]))
        logging.info('Producing %d(loss_types) x %s loss curves', self.L, text)
        builder = get_loss_builder(self.datastore)
        self.build_datasets(builder)
        self.datastore.close()
        if 'losses_by_event' in self.datastore.parent:
            dstore = self.datastore.parent
        else:
            dstore = self.datastore
        allargs = [(dstore.filename, builder, rlzi) for rlzi in range(self.R)]
        mon = performance.Monitor(hdf5=hdf5.File(self.datastore.hdf5cache()))
        acc = list(parallel.Starmap(compute_loss_curves_maps, allargs, mon))
        # copy performance information from the cache to the datastore
        pd = mon.hdf5['performance_data'][()]
        hdf5.extend3(self.datastore.filename, 'performance_data', pd)
        self.datastore.open('r+')  # reopen
        self.datastore['task_info/compute_loss_curves_and_maps'] = (
            mon.hdf5['task_info/compute_loss_curves_maps'][()])
        self.datastore.open('r+')
        with self.monitor('saving loss_curves and maps', autoflush=True):
            for r, (curves, maps) in acc:
                if len(curves):  # some realization can give zero contribution
                    self.datastore['agg_curves-rlzs'][:, r] = curves
                if len(maps):  # conditional_loss_poes can be empty
                    self.datastore['agg_maps-rlzs'][:, r] = maps
        if self.R > 1:
            logging.info('Computing aggregate loss curves statistics')
            set_rlzs_stats(self.datastore, 'agg_curves')
            self.datastore.set_attrs(
                'agg_curves-stats', return_periods=builder.return_periods,
                loss_types=' '.join(self.riskmodel.loss_types))
            if oq.conditional_loss_poes:
                logging.info('Computing aggregate loss maps statistics')
                set_rlzs_stats(self.datastore, 'agg_maps')

        # sanity check with the asset_loss_table
        if oq.asset_loss_table and len(oq.aggregate_by) == 1:
            alt = self.datastore['asset_loss_table'][()]
            if alt.sum() == 0:  # nothing was saved
                return
            logging.info('Checking the loss curves')
            tags = getattr(self.assetcol.tagcol, oq.aggregate_by[0])[1:]
            T = len(tags)
            P = len(builder.return_periods)
            # sanity check on the loss curves for simple tag aggregation
            arr = self.assetcol.aggregate_by(oq.aggregate_by, alt)
            # shape (T, E, L)
            rlzs = self.datastore['events']['rlz']
            curves = numpy.zeros((P, self.R, self.L, T))
            for t in range(T):
                for r in range(self.R):
                    for l in range(self.L):
                        curves[:, r, l, t] = losses_by_period(
                            arr[t, rlzs == r, l],
                            builder.return_periods,
                            builder.num_events[r],
                            builder.eff_time)
            numpy.testing.assert_allclose(
                curves, self.datastore['agg_curves-rlzs'][()])
Ejemplo n.º 21
0
    def post_execute(self, times):
        """
        Compute and store average losses from the losses_by_event dataset,
        and then loss curves and maps.
        """
        if len(times):
            self.datastore.set_attrs('task_info/start_ebrisk',
                                     times=times,
                                     events_per_sid=numpy.mean(
                                         self.events_per_sid))
        oq = self.oqparam
        shp = self.get_shape(self.L)  # (L, T...)
        text = ' x '.join('%d(%s)' % (n, t)
                          for t, n in zip(oq.aggregate_by, shp[1:]))
        logging.info('Producing %d(loss_types) x %s loss curves', self.L, text)
        builder = get_loss_builder(self.datastore)
        self.build_datasets(builder)
        self.datastore.close()
        if 'losses_by_event' in self.datastore.parent:
            dstore = self.datastore.parent
        else:
            dstore = self.datastore
        allargs = [(dstore.filename, builder, rlzi) for rlzi in range(self.R)]
        mon = performance.Monitor(hdf5=hdf5.File(self.datastore.hdf5cache()))
        acc = list(parallel.Starmap(compute_loss_curves_maps, allargs, mon))
        # copy performance information from the cache to the datastore
        pd = mon.hdf5['performance_data'][()]
        hdf5.extend3(self.datastore.filename, 'performance_data', pd)
        self.datastore.open('r+')  # reopen
        self.datastore['task_info/compute_loss_curves_and_maps'] = (
            mon.hdf5['task_info/compute_loss_curves_maps'][()])
        self.datastore.open('r+')
        with self.monitor('saving loss_curves and maps', autoflush=True):
            for r, (curves, maps) in acc:
                if len(curves):  # some realization can give zero contribution
                    self.datastore['agg_curves-rlzs'][:, r] = curves
                if len(maps):  # conditional_loss_poes can be empty
                    self.datastore['agg_maps-rlzs'][:, r] = maps
        if self.R > 1:
            logging.info('Computing aggregate loss curves statistics')
            set_rlzs_stats(self.datastore, 'agg_curves')
            self.datastore.set_attrs('agg_curves-stats',
                                     return_periods=builder.return_periods,
                                     loss_types=' '.join(
                                         self.riskmodel.loss_types))
            if oq.conditional_loss_poes:
                logging.info('Computing aggregate loss maps statistics')
                set_rlzs_stats(self.datastore, 'agg_maps')

        # sanity check with the asset_loss_table
        if oq.asset_loss_table and len(oq.aggregate_by) == 1:
            alt = self.datastore['asset_loss_table'][()]
            if alt.sum() == 0:  # nothing was saved
                return
            logging.info('Checking the loss curves')
            tags = getattr(self.assetcol.tagcol, oq.aggregate_by[0])[1:]
            T = len(tags)
            P = len(builder.return_periods)
            # sanity check on the loss curves for simple tag aggregation
            arr = self.assetcol.aggregate_by(oq.aggregate_by, alt)
            # shape (T, E, L)
            rlzs = self.datastore['events']['rlz']
            curves = numpy.zeros((P, self.R, self.L, T))
            for t in range(T):
                for r in range(self.R):
                    for l in range(self.L):
                        curves[:, r, l,
                               t] = losses_by_period(arr[t, rlzs == r, l],
                                                     builder.return_periods,
                                                     builder.num_events[r],
                                                     builder.eff_time)
            numpy.testing.assert_allclose(
                curves, self.datastore['agg_curves-rlzs'][()])
Ejemplo n.º 22
0
 def test_extend3(self):
     nrows = hdf5.extend3(self.tmp, 'dset', numpy.zeros(3))
     self.assertEqual(nrows, 3)