Ejemplo n.º 1
0
def _filter_agg(assetcol, losses, selected, stats=''):
    # losses is an array of shape (A, ..., R) with A=#assets, R=#realizations
    aids_by_tag = assetcol.get_aids_by_tag()
    idxs = set(range(len(assetcol)))
    tagnames = []
    for tag in selected:
        tagname, tagvalue = tag.split('=', 1)
        if tagvalue == '*':
            tagnames.append(tagname)
        else:
            idxs &= aids_by_tag[tag]
    if len(tagnames) > 1:
        raise ValueError('Too many * as tag values in %s' % tagnames)
    elif not tagnames:  # return an array of shape (..., R)
        return ArrayWrapper(
            _agg(losses, idxs), dict(selected=encode(selected), stats=stats))
    else:  # return an array of shape (T, ..., R)
        [tagname] = tagnames
        _tags = list(assetcol.tagcol.gen_tags(tagname))
        all_idxs = [idxs & aids_by_tag[t] for t in _tags]
        # NB: using a generator expression for all_idxs caused issues (?)
        data, tags = [], []
        for idxs, tag in zip(all_idxs, _tags):
            agglosses = _agg(losses, idxs)
            if len(agglosses):
                data.append(agglosses)
                tags.append(tag)
        return ArrayWrapper(
            numpy.array(data),
            dict(selected=encode(selected), tags=encode(tags), stats=stats))
Ejemplo n.º 2
0
 def build_datasets(self, builder):
     oq = self.oqparam
     stats = oq.hazard_stats().items()
     S = len(stats)
     P = len(builder.return_periods)
     C = len(oq.conditional_loss_poes)
     loss_types = ' '.join(self.riskmodel.loss_types)
     shp = self.get_shape(P, self.R, self.L)  # shape P, R, L, T...
     self.datastore.create_dset('agg_curves-rlzs', F32, shp)
     self.datastore.set_attrs(
         'agg_curves-rlzs', return_periods=builder.return_periods,
         loss_types=loss_types)
     if oq.conditional_loss_poes:
         shp = self.get_shape(C, self.R, self.L)  # shape C, R, L, T...
         self.datastore.create_dset('agg_maps-rlzs', F32, shp)
     if self.R > 1:
         shp = self.get_shape(P, S, self.L)  # shape P, S, L, T...
         self.datastore.create_dset('agg_curves-stats', F32, shp)
         self.datastore.set_attrs(
             'agg_curves-stats', return_periods=builder.return_periods,
             stats=[encode(name) for (name, func) in stats],
             loss_types=loss_types)
         if oq.conditional_loss_poes:
             shp = self.get_shape(C, S, self.L)  # shape C, S, L, T...
             self.datastore.create_dset('agg_maps-stats', F32, shp)
             self.datastore.set_attrs(
                 'agg_maps-stats',
                 stats=[encode(name) for (name, func) in stats],
                 loss_types=loss_types)
Ejemplo n.º 3
0
 def build_datasets(self, builder):
     oq = self.oqparam
     R = len(builder.weights)
     stats = self.param['stats']
     A = self.A
     S = len(stats)
     P = len(builder.return_periods)
     C = len(oq.conditional_loss_poes)
     L = self.L
     self.loss_maps_dt = (F32, (C, L))
     if oq.individual_curves or R == 1:
         self.datastore.create_dset('curves-rlzs', F32, (A, R, P, L))
         self.datastore.set_attrs(
             'curves-rlzs', return_periods=builder.return_periods)
     if oq.conditional_loss_poes:
         self.datastore.create_dset(
             'loss_maps-rlzs', self.loss_maps_dt, (A, R), fillvalue=None)
     if R > 1:
         self.datastore.create_dset('curves-stats', F32, (A, S, P, L))
         self.datastore.set_attrs(
             'curves-stats', return_periods=builder.return_periods,
             stats=[encode(name) for (name, func) in stats])
         if oq.conditional_loss_poes:
             self.datastore.create_dset(
                 'loss_maps-stats', self.loss_maps_dt, (A, S),
                 fillvalue=None)
             self.datastore.set_attrs(
                 'loss_maps-stats',
                 stats=[encode(name) for (name, func) in stats])
Ejemplo n.º 4
0
    def post_execute(self, results):
        """
        Save all the results of the disaggregation. NB: the number of results
        to save is #sites * #rlzs * #disagg_poes * #IMTs.

        :param results:
            a dictionary (sid, rlzi, poe, imt) -> trti -> disagg matrix
        """
        T = len(self.trts)
        # build a dictionary (sid, rlzi, poe, imt) -> 6D matrix
        results = {k: _to_matrix(v, T) for k, v in results.items()}

        # get the number of outputs
        shp = self.get_NRPM()
        logging.info('Extracting and saving the PMFs for %d outputs '
                     '(N=%s, R=%d, P=%d, M=%d)', numpy.prod(shp), *shp)
        self.save_disagg_result('disagg', results)

        hstats = self.oqparam.hazard_stats()
        if len(self.rlzs_assoc.realizations) > 1 and hstats:
            with self.monitor('computing and saving stats', measuremem=True):
                res = self.build_stats(results, hstats.items())
                self.save_disagg_result('disagg-stats', res)

        self.datastore.set_attrs(
            'disagg', trts=encode(self.trts), num_ruptures=self.num_ruptures)
Ejemplo n.º 5
0
 def store_source_info(self, pmap_by_grp_id):
     # store the information about received data
     received = self.taskman.received
     if received:
         tname = self.taskman.name
         self.datastore.save('job_info', {
             tname + '_max_received_per_task': max(received),
             tname + '_tot_received': sum(received),
             tname + '_num_tasks': len(received)})
     # then save the calculation times per each source
     calc_times = getattr(pmap_by_grp_id, 'calc_times', [])
     if calc_times:
         sources = self.csm.get_sources()
         info_dict = {(rec['src_group_id'], rec['source_id']): rec
                      for rec in self.source_info}
         for src_idx, dt in calc_times:
             src = sources[src_idx]
             info = info_dict[src.src_group_id, encode(src.source_id)]
             info['calc_time'] += dt
         rows = sorted(
             info_dict.values(), key=operator.itemgetter(7), reverse=True)
         array = numpy.zeros(len(rows), source.source_info_dt)
         for i, row in enumerate(rows):
             for name in array.dtype.names:
                 array[i][name] = row[name]
         self.source_info = array
     self.datastore.hdf5.flush()
Ejemplo n.º 6
0
def maybe_encode(value):
    """
    If value is a sequence of strings, encode it
    """
    if isinstance(value, (list, tuple)):
        if value and isinstance(value[0], str):
            return encode(value)
    return value
Ejemplo n.º 7
0
def get_gmfs_from_txt(oqparam, fname):
    """
    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param fname:
        the full path of the CSV file
    :returns:
        a composite array of shape (N, R) read from a CSV file with format
        `etag indices [gmv1 ... gmvN] * num_imts`
    """
    with open(fname) as csvfile:
        firstline = next(csvfile)
        try:
            coords = valid.coordinates(firstline)
        except:
            raise InvalidFile(
                'The first line of %s is expected to contain comma separated'
                'ordered coordinates, got %s instead' % (fname, firstline))
        sitecol = sitecol_from_coords(oqparam, coords)
        if not oqparam.imtls:
            oqparam.set_risk_imtls(get_risk_models(oqparam))
        imts = list(oqparam.imtls)
        imt_dt = numpy.dtype([(imt, F32) for imt in imts])
        num_gmfs = oqparam.number_of_ground_motion_fields
        gmf_by_imt = numpy.zeros((num_gmfs, len(sitecol)), imt_dt)
        etags = []

        for lineno, line in enumerate(csvfile, 2):
            row = line.split(',')
            try:
                indices = list(map(valid.positiveint, row[1].split()))
            except:
                raise InvalidFile(
                    'The second column in %s is expected to contain integer '
                    'indices, got %s' % (fname, row[1]))
            r_sites = (
                sitecol if not indices else
                site.FilteredSiteCollection(indices, sitecol))
            for i in range(len(imts)):
                try:
                    array = numpy.array(valid.positivefloats(row[i + 2]))
                    # NB: i + 2 because the first 2 fields are etag and indices
                except:
                    raise InvalidFile(
                        'The column #%d in %s is expected to contain positive '
                        'floats, got %s instead' % (i + 3, fname, row[i + 2]))
                gmf_by_imt[imts[i]][lineno - 2] = r_sites.expand(array, 0)
            etags.append(row[0])
    if lineno < num_gmfs + 1:
        raise InvalidFile('%s contains %d rows, expected %d' % (
            fname, lineno, num_gmfs + 1))
    if etags != sorted(etags):
        raise InvalidFile('The etags in %s are not ordered: %s'
                          % (fname, etags))
    return sitecol, numpy.array([encode(e) for e in etags]), gmf_by_imt.T
Ejemplo n.º 8
0
 def __toh5__(self):
     # NB: the loss types do not contain spaces, so we can store them
     # together as a single space-separated string
     op = decode(self.occupancy_periods)
     attrs = {'time_event': self.time_event or 'None',
              'occupancy_periods': op,
              'tot_sites': self.tot_sites,
              'fields': ' '.join(self.fields),
              'tagnames': encode(self.tagnames),
              'nbytes': self.array.nbytes}
     return dict(array=self.array, tagcol=self.tagcol), attrs
Ejemplo n.º 9
0
    def post_execute(self, num_events):
        """
        Save risk data and possibly execute the EbrPostCalculator
        """
        # gmv[:-2] are the total gmv per each IMT
        gmv = sum(gm[:-2].sum() for gm in self.gmdata.values())
        if not gmv:
            raise RuntimeError('No GMFs were generated, perhaps they were '
                               'all below the minimum_intensity threshold')

        if 'agg_loss_table' not in self.datastore:
            logging.warning(
                'No losses were generated: most likely there is an error in y'
                'our input files or the GMFs were below the minimum intensity')
        else:
            self.datastore.set_nbytes('agg_loss_table')
            E = sum(num_events.values())
            agglt = self.datastore['agg_loss_table']
            agglt.attrs['nonzero_fraction'] = len(agglt) / E

        # build aggregate loss curves
        self.before_export()  # set 'realizations'
        oq = self.oqparam
        b = get_loss_builder(self.datastore)
        alt = self.datastore['agg_loss_table']
        stats = oq.risk_stats()
        array, array_stats = b.build(alt, stats)
        self.datastore['agg_curves-rlzs'] = array
        units = self.assetcol.units(loss_types=array.dtype.names)
        self.datastore.set_attrs('agg_curves-rlzs',
                                 return_periods=b.return_periods,
                                 units=units)
        if array_stats is not None:
            self.datastore['agg_curves-stats'] = array_stats
            self.datastore.set_attrs(
                'agg_curves-stats',
                return_periods=b.return_periods,
                stats=[encode(name) for (name, func) in stats],
                units=units)

        if 'all_loss_ratios' in self.datastore:
            self.datastore.save_vlen('all_loss_ratios/indices', [
                numpy.array(self.indices[aid], riskinput.indices_dt)
                for aid in range(self.A)
            ])
            self.datastore.set_attrs('all_loss_ratios',
                                     loss_types=' '.join(
                                         self.riskmodel.loss_types))
            dset = self.datastore['all_loss_ratios/data']
            nbytes = dset.size * dset.dtype.itemsize
            self.datastore.set_attrs('all_loss_ratios/data',
                                     nbytes=nbytes,
                                     bytes_per_asset=nbytes / self.A)
            EbrPostCalculator(self).run(close=False)
Ejemplo n.º 10
0
def extract_task_info(dstore, what):
    """
    Extracts the task distribution. Use it as /extract/task_info?kind=classical
    """
    dic = group_array(dstore['task_info'][()], 'taskname')
    if 'kind' in what:
        name = parse(what)['kind'][0]
        yield name, dic[encode(name)]
        return
    for name in dic:
        yield decode(name), dic[name]
Ejemplo n.º 11
0
def get_gmfs_from_txt(oqparam, fname):
    """
    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :param fname:
        the full path of the CSV file
    :returns:
        a composite array of shape (N, R) read from a CSV file with format
        `etag indices [gmv1 ... gmvN] * num_imts`
    """
    with open(fname) as csvfile:
        firstline = next(csvfile)
        try:
            coords = valid.coordinates(firstline)
        except:
            raise InvalidFile(
                'The first line of %s is expected to contain comma separated'
                'ordered coordinates, got %s instead' % (fname, firstline))
        lons, lats, depths = zip(*coords)
        sitecol = site.SiteCollection.from_points(lons, lats, depths, oqparam)
        if not oqparam.imtls:
            oqparam.set_risk_imtls(get_risk_models(oqparam))
        imts = list(oqparam.imtls)
        imt_dt = numpy.dtype([(imt, F32) for imt in imts])
        num_gmfs = oqparam.number_of_ground_motion_fields
        gmf_by_imt = numpy.zeros((num_gmfs, len(sitecol)), imt_dt)
        eids = []

        for lineno, line in enumerate(csvfile, 2):
            row = line.split(',')
            try:
                indices = list(map(valid.positiveint, row[1].split()))
            except:
                raise InvalidFile(
                    'The second column in %s is expected to contain integer '
                    'indices, got %s' % (fname, row[1]))
            r_sites = (sitecol if not indices else site.FilteredSiteCollection(
                indices, sitecol))
            for i in range(len(imts)):
                try:
                    array = numpy.array(valid.positivefloats(row[i + 2]))
                    # NB: i + 2 because the first 2 fields are etag and indices
                except:
                    raise InvalidFile(
                        'The column #%d in %s is expected to contain positive '
                        'floats, got %s instead' % (i + 3, fname, row[i + 2]))
                gmf_by_imt[imts[i]][lineno - 2][r_sites.sids] = array
            eids.append(row[0])
    if lineno < num_gmfs + 1:
        raise InvalidFile('%s contains %d rows, expected %d' %
                          (fname, lineno, num_gmfs + 1))
    if eids != sorted(eids):
        raise InvalidFile('The eids in %s are not ordered: %s' % (fname, eids))
    return sitecol, numpy.array([encode(e) for e in eids]), gmf_by_imt.T
Ejemplo n.º 12
0
def _display(node, indent, expandattrs, expandvals, output):
    """Core function to display a Node object"""
    attrs = _displayattrs(node.attrib, expandattrs)
    if node.text is None or not expandvals:
        val = ''
    elif isinstance(node.text, str):
        val = ' %s' % repr(node.text.strip())
    else:
        val = ' %s' % repr(node.text)  # node.text can be a tuple
    output.write(encode(indent + striptag(node.tag) + attrs + val + '\n'))
    for sub_node in node:
        _display(sub_node, indent + '  ', expandattrs, expandvals, output)
Ejemplo n.º 13
0
    def postproc(self):
        """
        Build aggregate loss curves and run EbrPostCalculator
        """
        dstore = self.datastore
        self.before_export()  # set 'realizations'
        oq = self.oqparam
        eff_time = oq.investigation_time * oq.ses_per_logic_tree_path
        if eff_time < 2:
            logging.warn('eff_time=%s is too small to compute agg_curves',
                         eff_time)
            return
        stats = oq.risk_stats()
        # store avg_losses-stats
        if oq.avg_losses:
            set_rlzs_stats(self.datastore, 'avg_losses')
        b = get_loss_builder(dstore)
        if 'ruptures' in dstore:
            logging.info('Building rup_loss_table')
            with self.monitor('building rup_loss_table', measuremem=True):
                dstore['rup_loss_table'] = rlt = build_rup_loss_table(dstore)
                ridx = [rlt[lt].argmax() for lt in oq.loss_dt().names]
                dstore.set_attrs('rup_loss_table', ridx=ridx)
        logging.info('Building aggregate loss curves')
        with self.monitor('building agg_curves', measuremem=True):
            array, array_stats = b.build(dstore['agg_loss_table'].value, stats)
        self.datastore['agg_curves-rlzs'] = array
        units = self.assetcol.units(loss_types=array.dtype.names)
        self.datastore.set_attrs('agg_curves-rlzs',
                                 return_periods=b.return_periods,
                                 units=units)
        if array_stats is not None:
            self.datastore['agg_curves-stats'] = array_stats
            self.datastore.set_attrs(
                'agg_curves-stats',
                return_periods=b.return_periods,
                stats=[encode(name) for (name, func) in stats],
                units=units)

        if 'all_loss_ratios' in self.datastore:
            self.datastore.save_vlen('all_loss_ratios/indices', [
                numpy.array(self.indices[aid], riskinput.indices_dt)
                for aid in range(self.A)
            ])
            self.datastore.set_attrs('all_loss_ratios',
                                     loss_types=' '.join(
                                         self.riskmodel.loss_types))
            dset = self.datastore['all_loss_ratios/data']
            nbytes = dset.size * dset.dtype.itemsize
            self.datastore.set_attrs('all_loss_ratios/data',
                                     nbytes=nbytes,
                                     bytes_per_asset=nbytes / self.A)
            EbrPostCalculator(self).run(close=False)
Ejemplo n.º 14
0
def sanitize(value):
    """
    Sanitize the value so that it can be stored as an HDF5 attribute
    """
    if isinstance(value, bytes):
        return numpy.void(value)
    elif isinstance(value, (list, tuple)):
        if value and isinstance(value[0], str):
            return encode(value)
    elif isinstance(value, int) and value > sys.maxsize:
        return float(value)
    return value
Ejemplo n.º 15
0
 def etags(self):
     """
     An array of tags for the underlying seismic events
     """
     tags = []
     for (eid, ses, occ, sampleid) in self.events:
         tag = 'grp=%02d~ses=%04d~src=%s~rup=%d-%02d' % (
             self.grp_id, ses, self.source_id, self.serial, occ)
         if sampleid > 0:
             tag += '~sample=%d' % sampleid
         tags.append(encode(tag))
     return numpy.array(tags)
Ejemplo n.º 16
0
 def units(self, loss_types):
     """
     :param: a list of loss types
     :returns: an array of units as byte strings, suitable for HDF5
     """
     units = self.cc.units
     lst = []
     for lt in loss_types:
         if lt.endswith('_ins'):
             lt = lt[:-4]
         lst.append(encode(units[lt]))
     return numpy.array(lst)
Ejemplo n.º 17
0
def _display(node, indent, expandattrs, expandvals, output):
    """Core function to display a Node object"""
    attrs = _displayattrs(node.attrib, expandattrs)
    if node.text is None or not expandvals:
        val = ''
    elif isinstance(node.text, str):
        val = ' %s' % repr(node.text.strip())
    else:
        val = ' %s' % repr(node.text)  # node.text can be a tuple
    output.write(encode(indent + striptag(node.tag) + attrs + val + '\n'))
    for sub_node in node:
        _display(sub_node, indent + '  ', expandattrs, expandvals, output)
Ejemplo n.º 18
0
 def build_datasets(self, builder):
     oq = self.oqparam
     R = len(builder.weights)
     stats = self.param['stats']
     A = self.A
     S = len(stats)
     P = len(builder.return_periods)
     C = len(oq.conditional_loss_poes)
     L = self.L
     self.loss_maps_dt = (F32, (C, L))
     if oq.individual_curves or R == 1:
         self.datastore.create_dset('curves-rlzs', F32, (A, R, P, L))
         shape_descr = ['assets', 'rlzs', 'return_periods', 'loss_types']
         self.datastore.set_attrs('curves-rlzs',
                                  shape_descr=shape_descr,
                                  assets=self.assetcol['id'],
                                  return_periods=builder.return_periods,
                                  rlzs=numpy.arange(R),
                                  loss_types=oq.loss_names)
     if oq.conditional_loss_poes:
         self.datastore.create_dset('loss_maps-rlzs',
                                    self.loss_maps_dt, (A, R),
                                    fillvalue=None)
     if R > 1:
         self.datastore.create_dset('curves-stats', F32, (A, S, P, L))
         shape_descr = ['assets', 'stats', 'return_periods', 'loss_types']
         self.datastore.set_attrs(
             'curves-stats',
             shape_descr=shape_descr,
             assets=self.assetcol['id'],
             stats=[encode(name) for (name, func) in stats],
             return_periods=builder.return_periods,
             loss_types=oq.loss_names)
         if oq.conditional_loss_poes:
             self.datastore.create_dset('loss_maps-stats',
                                        self.loss_maps_dt, (A, S),
                                        fillvalue=None)
             self.datastore.set_attrs(
                 'loss_maps-stats',
                 stats=[encode(name) for (name, func) in stats])
Ejemplo n.º 19
0
def saving_sources_by_task(iterargs, dstore):
    """
    Yield the iterargs again by populating 'source_data'
    """
    source_ids = []
    data = []
    for i, args in enumerate(iterargs, 1):
        source_ids.append(get_src_ids(args[0]))
        for src in args[0]:  # collect source data
            data.append((i, src.nsites, src.num_ruptures, src.weight))
        yield args
    dstore['task_sources'] = encode(source_ids)
    dstore.extend('source_data', numpy.array(data, source_data_dt))
Ejemplo n.º 20
0
def saving_sources_by_task(iterargs, dstore):
    """
    Yield the iterargs again by populating 'task_info/source_ids'
    """
    source_ids = []
    data = []
    for i, args in enumerate(iterargs, 1):
        source_ids.append(' '.join(src.source_id for src in args[0]))
        for src in args[0]:  # collect source data
            data.append((i, src.nsites, src.weight))
        yield args
    dstore['task_sources'] = numpy.array([encode(s) for s in source_ids])
    dstore.extend('task_info/source_data', numpy.array(data, source_data_dt))
Ejemplo n.º 21
0
 def __toh5__(self):
     # NB: the loss types do not contain spaces, so we can store them
     # together as a single space-separated string
     op = decode(self.occupancy_periods)
     attrs = {'time_event': self.time_event or 'None',
              'occupancy_periods': op,
              'loss_types': ' '.join(self.loss_types),
              'tot_sites': self.tot_sites,
              'fields': ' '.join(self.fields),
              'tagnames': encode(self.tagnames),
              'nbytes': self.array.nbytes}
     return dict(
         array=self.array, tagcol=self.tagcol), attrs
Ejemplo n.º 22
0
    def post_execute(self, result):
        """
        Compute stats for the aggregated distributions and save
        the results on the datastore.
        """
        loss_dt = self.oqparam.loss_dt()
        LI = len(loss_dt.names)
        dtlist = [('eid', U64), ('rlzi', U16), ('loss', (F32, LI))]
        I = self.oqparam.insured_losses + 1
        R = self.R
        with self.monitor('saving outputs', autoflush=True):
            A = len(self.assetcol)

            # agg losses
            res = result['agg']
            E, LI = res.shape
            L = LI // I
            mean, std = scientific.mean_std(res)  # shape LI
            agglosses = numpy.zeros(LI, stat_dt)
            agglosses['mean'] = F32(mean)
            agglosses['stddev'] = F32(std)

            # losses by asset
            losses_by_asset = numpy.zeros((A, R, LI), stat_dt)
            for (l, r, aid, stat) in result['avg']:
                for i in range(I):
                    losses_by_asset[aid, r, l + L * i] = stat[i]
            self.datastore['losses_by_asset'] = losses_by_asset
            self.datastore['agglosses'] = agglosses

            # losses by event
            num_gmfs = self.oqparam.number_of_ground_motion_fields
            lbe = numpy.fromiter(
                ((ei, ei // num_gmfs, res[ei]) for ei in range(E)), dtlist)
            self.datastore['losses_by_event'] = lbe
            loss_types = ' '.join(self.oqparam.loss_dt().names)
            self.datastore.set_attrs('losses_by_event', loss_types=loss_types)

            # all losses
            if self.oqparam.asset_loss_table:
                array = numpy.zeros((A, E), loss_dt)
                for (l, r), losses_by_aid in result['all_losses'].items():
                    slc = self.event_slice(r)
                    for aid in losses_by_aid:
                        lba = losses_by_aid[aid]  # (E, I)
                        for i in range(I):
                            lt = loss_dt.names[l + L * i]
                            array[lt][aid, slc] = lba[:, i]
                self.datastore['asset_loss_table'] = array
                tags = [encode(tag) for tag in self.assetcol.tagcol]
                self.datastore.set_attrs('asset_loss_table', tags=tags)
Ejemplo n.º 23
0
    def post_execute(self, result):
        """
        Compute stats for the aggregated distributions and save
        the results on the datastore.
        """
        loss_dt = self.oqparam.loss_dt()
        L = len(loss_dt.names)
        dtlist = [('event_id', U32), ('rlzi', U16), ('loss', (F32, (L, )))]
        R = self.R
        with self.monitor('saving outputs'):
            A = len(self.assetcol)

            # agg losses
            res = result['agg']
            E, L = res.shape
            agglosses = numpy.zeros((R, L), stat_dt)
            for r in range(R):
                mean, std = scientific.mean_std(res[self.event_slice(r)])
                agglosses[r]['mean'] = F32(mean)
                agglosses[r]['stddev'] = F32(std)

            # losses by asset
            losses_by_asset = numpy.zeros((A, R, L), stat_dt)
            for (l, r, aid, stat) in result['avg']:
                losses_by_asset[aid, r, l] = stat
            self.datastore['losses_by_asset'] = losses_by_asset
            self.datastore['agglosses'] = agglosses

            # losses by event
            lbe = numpy.zeros(E, dtlist)
            lbe['event_id'] = range(E)
            lbe['rlzi'] = (lbe['event_id'] //
                           self.oqparam.number_of_ground_motion_fields)
            lbe['loss'] = res
            self.datastore['losses_by_event'] = lbe
            loss_types = self.oqparam.loss_dt().names
            self.datastore.set_attrs('losses_by_event', loss_types=loss_types)

            # all losses
            if self.oqparam.asset_loss_table:
                array = numpy.zeros((A, E), loss_dt)
                for (l, r), losses_by_aid in result['all_losses'].items():
                    slc = self.event_slice(r)
                    for aid in losses_by_aid:
                        lba = losses_by_aid[aid]  # E
                        lt = loss_dt.names[l]
                        array[lt][aid, slc] = lba
                self.datastore['asset_loss_table'] = array
                tags = [encode(tag) for tag in self.assetcol.tagcol]
                self.datastore.set_attrs('asset_loss_table', tags=tags)
Ejemplo n.º 24
0
def view_job_info(token, dstore):
    """
    Determine the amount of data transferred from the controller node
    to the workers and back in a classical calculation.
    """
    data = []
    task_info = dstore['task_info'][()]
    task_sent = ast.literal_eval(decode(dstore['task_sent'][()]))
    for task, dic in task_sent.items():
        sent = sorted(dic.items(), key=operator.itemgetter(1), reverse=True)
        sent = ['%s=%s' % (k, humansize(v)) for k, v in sent[:3]]
        recv = get_array(task_info, taskname=encode(task))['received'].sum()
        data.append((task, ' '.join(sent), humansize(recv)))
    return numpy.array(data, dt('task sent received'))
Ejemplo n.º 25
0
def set_shape_attrs(hdf5file, dsetname, kw):
    """
    Set shape attributes on a dataset (and possibly other attributes)
    """
    dset = hdf5file[dsetname]
    S = len(dset.shape)
    if len(kw) < S:
        raise ValueError('The dataset %s has %d dimensions but you passed %d'
                         ' axis' % (dsetname, S, len(kw)))
    dset.attrs['shape_descr'] = encode(list(kw))[:S]
    for k, v in kw.items():
        dset.attrs[k] = v
    for d, k in enumerate(dset.attrs['shape_descr']):
        dset.dims[d].label = k  # set dimension label
Ejemplo n.º 26
0
 def build_datasets(self, builder, aggregate_by, prefix):
     """
     Create the datasets agg_curves-XXX, tot_curves-XXX,
     agg_losses-XXX, tot_losses-XXX.
     """
     P = len(builder.return_periods)
     aggby = {'aggregate_by': aggregate_by}
     for tagname in aggregate_by:
         aggby[tagname] = encode(getattr(self.tagcol, tagname)[1:])
     shp = self.get_shape(self.L, self.R, aggregate_by=aggregate_by)
     # shape L, R, T...
     self.datastore.create_dset(prefix + 'losses-rlzs', F32, shp)
     shp = self.get_shape(P, self.R, self.L, aggregate_by=aggregate_by)
     # shape P, R, L, T...
     self.datastore.create_dset(prefix + 'curves-rlzs', F32, shp)
Ejemplo n.º 27
0
 def get_units(self, loss_types):
     """
     :param: a list of loss types
     :returns: an array of units as byte strings, suitable for HDF5
     """
     lst = []
     for lt in loss_types:
         if lt.endswith('_ins'):
             lt = lt[:-4]
         if lt == 'occupants':
             unit = 'people'
         else:
             unit = self.units[lt]
         lst.append(encode(unit))
     return numpy.array(lst)
Ejemplo n.º 28
0
 def get_units(self, loss_types):
     """
     :param: a list of loss types
     :returns: an array of units as byte strings, suitable for HDF5
     """
     lst = []
     for lt in loss_types:
         if lt.endswith('_ins'):
             lt = lt[:-4]
         if lt == 'occupants':
             unit = 'people'
         else:
             unit = self.units[lt]
         lst.append(encode(unit))
     return numpy.array(lst)
Ejemplo n.º 29
0
 def __toh5__(self):
     # NB: the loss types do not contain spaces, so we can store them
     # together as a single space-separated string
     op = decode(self.occupancy_periods)
     attrs = {'time_event': self.time_event or 'None',
              'occupancy_periods': op,
              'loss_types': ' '.join(self.loss_types),
              'deduc': ' '.join(self.deduc),
              'i_lim': ' '.join(self.i_lim),
              'retro': ' '.join(self.retro),
              'tot_sites': self.tot_sites,
              'tagnames': encode(self.tagnames),
              'nbytes': self.array.nbytes}
     return dict(
         array=self.array, cost_calculator=self.cost_calculator,
         tagcol=self.tagcol, asset_refs=self.asset_refs), attrs
Ejemplo n.º 30
0
 def __toh5__(self):
     # NB: the loss types do not contain spaces, so we can store them
     # together as a single space-separated string
     op = decode(self.occupancy_periods)
     attrs = {'time_event': self.time_event or 'None',
              'occupancy_periods': op,
              'loss_types': ' '.join(self.loss_types),
              'deduc': ' '.join(self.deduc),
              'i_lim': ' '.join(self.i_lim),
              'retro': ' '.join(self.retro),
              'tot_sites': self.tot_sites,
              'tagnames': encode(self.tagnames),
              'nbytes': self.array.nbytes}
     return dict(
         array=self.array, cost_calculator=self.cost_calculator,
         tagcol=self.tagcol, asset_refs=self.asset_refs), attrs
Ejemplo n.º 31
0
def get_assets(dstore):
    """
    :param dstore: a datastore with keys 'assetcol'
    :returns: an array of records (id, tag1, ..., tagN, lon, lat)
    """
    assetcol = dstore['assetcol']
    tagnames = sorted(tn for tn in assetcol.tagnames if tn != 'id')
    tag = {t: getattr(assetcol.tagcol, t) for t in tagnames}
    dtlist = [('id', '<S100')]
    for tagname in tagnames:
        dtlist.append((tagname, '<S100'))
    dtlist.extend([('lon', F32), ('lat', F32)])
    asset_data = []
    for a in assetcol.array:
        tup = tuple(python3compat.encode(tag[t][a[t]]) for t in tagnames)
        asset_data.append((a['id'],) + tup + (a['lon'], a['lat']))
    return numpy.array(asset_data, dtlist)
Ejemplo n.º 32
0
    def post_execute(self, result):
        """
        Compute stats for the aggregated distributions and save
        the results on the datastore.
        """
        loss_dt = self.oqparam.loss_dt()
        LI = len(loss_dt.names)
        dtlist = [('eid', U64), ('loss', (F32, LI))]
        R = self.R
        with self.monitor('saving outputs', autoflush=True):
            A = len(self.assetcol)

            # agg losses
            res = result['agg']
            E, L = res.shape
            agglosses = numpy.zeros((R, L), stat_dt)
            for r in range(R):
                mean, std = scientific.mean_std(res[self.event_slice(r)])
                agglosses[r]['mean'] = F32(mean)
                agglosses[r]['stddev'] = F32(std)

            # losses by asset
            losses_by_asset = numpy.zeros((A, R, L), stat_dt)
            for (l, r, aid, stat) in result['avg']:
                losses_by_asset[aid, r, l] = stat
            self.datastore['losses_by_asset'] = losses_by_asset
            self.datastore['agglosses'] = agglosses

            # losses by event
            lbe = numpy.fromiter(((ei, res[ei]) for ei in range(E)), dtlist)
            self.datastore['losses_by_event'] = lbe
            loss_types = ' '.join(self.oqparam.loss_dt().names)
            self.datastore.set_attrs('losses_by_event', loss_types=loss_types)

            # all losses
            if self.oqparam.asset_loss_table:
                array = numpy.zeros((A, E), loss_dt)
                for (l, r), losses_by_aid in result['all_losses'].items():
                    slc = self.event_slice(r)
                    for aid in losses_by_aid:
                        lba = losses_by_aid[aid]  # E
                        lt = loss_dt.names[l]
                        array[lt][aid, slc] = lba
                self.datastore['asset_loss_table'] = array
                tags = [encode(tag) for tag in self.assetcol.tagcol]
                self.datastore.set_attrs('asset_loss_table', tags=tags)
Ejemplo n.º 33
0
 def postproc(self):
     """
     Build aggregate loss curves in process
     """
     dstore = self.datastore
     self.before_export()  # set 'realizations'
     oq = self.oqparam
     stats = self.param['stats']
     # store avg_losses-stats
     if oq.avg_losses:
         set_rlzs_stats(self.datastore, 'avg_losses')
     try:
         b = self.param['builder']
     except KeyError:  # don't build auxiliary tables
         return
     if dstore.parent:
         dstore.parent.open('r')  # to read the ruptures
     if 'ruptures' in self.datastore and len(self.datastore['ruptures']):
         logging.info('Building loss tables')
         with self.monitor('building loss tables', measuremem=True):
             rlt, lbr = build_loss_tables(dstore)
             dstore['rup_loss_table'] = rlt
             dstore['losses_by_rlzi'] = lbr
             ridx = [rlt[:, lti].argmax() for lti in range(self.L)]
             dstore.set_attrs('rup_loss_table', ridx=ridx)
     logging.info('Building aggregate loss curves')
     with self.monitor('building agg_curves', measuremem=True):
         lbr = group_array(dstore['losses_by_event'][()], 'rlzi')
         dic = {r: arr['loss'] for r, arr in lbr.items()}
         array, arr_stats = b.build(dic, stats)
     loss_types = ' '.join(oq.loss_dt().names)
     units = self.datastore['cost_calculator'].get_units(loss_types.split())
     if oq.individual_curves or self.R == 1:
         self.datastore['agg_curves-rlzs'] = array  # shape (P, R, L)
         self.datastore.set_attrs('agg_curves-rlzs',
                                  return_periods=b.return_periods,
                                  loss_types=loss_types,
                                  units=units)
     if arr_stats is not None:
         self.datastore['agg_curves-stats'] = arr_stats  # shape (P, S, L)
         self.datastore.set_attrs(
             'agg_curves-stats',
             return_periods=b.return_periods,
             stats=[encode(name) for (name, func) in stats],
             loss_types=loss_types,
             units=units)
Ejemplo n.º 34
0
    def post_execute(self, results):
        """
        Save all the results of the disaggregation. NB: the number of results
        to save is #sites * #rlzs * #disagg_poes * #IMTs.

        :param results:
            a dictionary sid -> trti -> disagg matrix
        """
        T = len(self.trts)
        # build a dictionary m, s -> 8D matrix of shape (T, ..., E, P)
        results = {ms: _trt_matrix(dic, T) for ms, dic in results.items()}

        # get the number of outputs
        shp = (self.N, len(self.poes_disagg), len(self.imts), self.Z)
        logging.info('Extracting and saving the PMFs for %d outputs '
                     '(N=%s, P=%d, M=%d, Z=%d)', numpy.prod(shp), *shp)
        self.save_disagg_results(results, trts=encode(self.trts))
Ejemplo n.º 35
0
def extract_disagg_layer(dstore, what):
    """
    Extract a disaggregation layer containing all sites and outputs
    Example:
    http://127.0.0.1:8800/v1/calc/30/extract/disagg_layer?
    """
    qdict = parse(what)
    oq = dstore['oqparam']
    oq.maximum_distance = filters.MagDepDistance(oq.maximum_distance)
    if 'kind' in qdict:
        kinds = qdict['kind']
    else:
        kinds = oq.disagg_outputs
    sitecol = dstore['sitecol']
    poes_disagg = oq.poes_disagg or (None, )
    edges, shapedic = disagg.get_edges_shapedic(oq, sitecol,
                                                dstore['source_mags'])
    dt = _disagg_output_dt(shapedic, kinds, oq.imtls, poes_disagg)
    out = numpy.zeros(len(sitecol), dt)
    realizations = numpy.array(dstore['full_lt'].get_realizations())
    hmap4 = dstore['hmap4'][:]
    best_rlzs = dstore['best_rlzs'][:]
    arr = {kind: dstore['disagg/' + kind][:] for kind in kinds}
    for sid, lon, lat, rec in zip(sitecol.sids, sitecol.lons, sitecol.lats,
                                  out):
        rlzs = realizations[best_rlzs[sid]]
        rec['site_id'] = sid
        rec['lon'] = lon
        rec['lat'] = lat
        rec['lon_bins'] = edges[2][sid]
        rec['lat_bins'] = edges[3][sid]
        for m, imt in enumerate(oq.imtls):
            ws = numpy.array([rlz.weight[imt] for rlz in rlzs])
            ws /= ws.sum()  # normalize to 1
            for p, poe in enumerate(poes_disagg):
                for kind in kinds:
                    key = '%s-%s-%s' % (kind, imt, poe)
                    rec[key] = arr[kind][sid, m, p] @ ws
                rec['iml-%s-%s' % (imt, poe)] = hmap4[sid, m, p]
    return ArrayWrapper(
        out,
        dict(mag=edges[0],
             dist=edges[1],
             eps=edges[-2],
             trt=numpy.array(encode(edges[-1]))))
Ejemplo n.º 36
0
    def post_execute(self, result):
        """
        Compute stats for the aggregated distributions and save
        the results on the datastore.
        """
        loss_dt = self.oqparam.loss_dt()
        LI = len(loss_dt.names)
        dtlist = [('eid', U64), ('loss', (F32, LI))]
        R = self.R
        with self.monitor('saving outputs', autoflush=True):
            A = len(self.assetcol)

            # agg losses
            res = result['agg']
            E, L = res.shape
            mean, std = scientific.mean_std(res)  # shape L
            agglosses = numpy.zeros(L, stat_dt)
            agglosses['mean'] = F32(mean)
            agglosses['stddev'] = F32(std)

            # losses by asset
            losses_by_asset = numpy.zeros((A, R, L), stat_dt)
            for (l, r, aid, stat) in result['avg']:
                losses_by_asset[aid, r, l] = stat
            self.datastore['losses_by_asset'] = losses_by_asset
            self.datastore['agglosses'] = agglosses

            # losses by event
            lbe = numpy.fromiter(((ei, res[ei]) for ei in range(E)), dtlist)
            self.datastore['losses_by_event'] = lbe
            loss_types = ' '.join(self.oqparam.loss_dt().names)
            self.datastore.set_attrs('losses_by_event', loss_types=loss_types)

            # all losses
            if self.oqparam.asset_loss_table:
                array = numpy.zeros((A, E), loss_dt)
                for (l, r), losses_by_aid in result['all_losses'].items():
                    slc = self.event_slice(r)
                    for aid in losses_by_aid:
                        lba = losses_by_aid[aid]  # E
                        lt = loss_dt.names[l]
                        array[lt][aid, slc] = lba
                self.datastore['asset_loss_table'] = array
                tags = [encode(tag) for tag in self.assetcol.tagcol]
                self.datastore.set_attrs('asset_loss_table', tags=tags)
Ejemplo n.º 37
0
 def execute(self):
     """
     Run in parallel `core_task(sources, sitecol, monitor)`, by
     parallelizing on the sources according to their weight and
     tectonic region type.
     """
     oq = self.oqparam
     if oq.hazard_calculation_id and not oq.compare_with_classical:
         parent = util.read(self.oqparam.hazard_calculation_id)
         self.csm_info = parent['csm_info']
         parent.close()
         self.calc_stats(parent)  # post-processing
         return {}
     with self.monitor('managing sources', autoflush=True):
         smap = parallel.Starmap(
             self.core_task.__func__, monitor=self.monitor())
         source_ids = []
         data = []
         for i, sources in enumerate(self._send_sources(smap)):
             source_ids.append(get_src_ids(sources))
             for src in sources:  # collect source data
                 data.append((i, src.nsites, src.num_ruptures, src.weight))
         if source_ids:
             self.datastore['task_sources'] = encode(source_ids)
         self.datastore.extend(
             'source_data', numpy.array(data, source_data_dt))
     self.calc_times = AccumDict(accum=numpy.zeros(2, F32))
     try:
         acc = smap.reduce(self.agg_dicts, self.acc0())
         self.store_rlz_info(acc.eff_ruptures)
     finally:
         with self.monitor('store source_info', autoflush=True):
             self.store_source_info(self.calc_times)
     if acc.nsites:
         if len(acc.nsites) > 50000:
             logging.warn(
                 'There are %d contributing sources', len(acc.nsites))
         else:
             src_ids = sorted(acc.nsites)
             nsites = [acc.nsites[i] for i in src_ids]
             self.datastore['source_info'][src_ids, 'num_sites'] = nsites
     if not self.calc_times:
         raise RuntimeError('All sources were filtered away!')
     self.calc_times.clear()  # save a bit of memory
     return acc
Ejemplo n.º 38
0
    def post_execute(self, results):
        """
        Save all the results of the disaggregation. NB: the number of results
        to save is #sites * #rlzs * #disagg_poes * #IMTs.

        :param results:
            a dictionary sid -> trti -> disagg matrix
        """
        self.datastore.open('r+')
        T = len(self.trts)
        # build a dictionary sid -> 8D matrix of shape (T, ..., M, P)
        results = {sid: _8d_matrix(dic, T) for sid, dic in results.items()}

        # get the number of outputs
        shp = (len(self.sitecol), len(self.poes_disagg), len(self.imts))
        logging.info('Extracting and saving the PMFs for %d outputs '
                     '(N=%s, P=%d, M=%d)', numpy.prod(shp), *shp)
        self.save_disagg_result(results, trts=encode(self.trts))
Ejemplo n.º 39
0
 def postproc(self):
     """
     Build aggregate loss curves in process
     """
     dstore = self.datastore
     self.before_export()  # set 'realizations'
     oq = self.oqparam
     stats = self.param['stats']
     # store avg_losses-stats
     if oq.avg_losses:
         set_rlzs_stats(self.datastore, 'avg_losses')
     try:
         b = self.param['builder']
     except KeyError:  # don't build auxiliary tables
         return
     if dstore.parent:
         dstore.parent.open('r')  # to read the ruptures
     if 'ruptures' in self.datastore and len(self.datastore['ruptures']):
         logging.info('Building loss tables')
         with self.monitor('building loss tables', measuremem=True):
             rlt, lbr = build_loss_tables(dstore)
             dstore['rup_loss_table'] = rlt
             dstore['losses_by_rlzi'] = lbr
             ridx = [rlt[:, lti].argmax() for lti in range(self.L)]
             dstore.set_attrs('rup_loss_table', ridx=ridx)
     logging.info('Building aggregate loss curves')
     with self.monitor('building agg_curves', measuremem=True):
         lbr = group_array(dstore['losses_by_event'][()], 'rlzi')
         dic = {r: arr['loss'] for r, arr in lbr.items()}
         array, arr_stats = b.build(dic, stats)
     loss_types = ' '.join(oq.loss_dt().names)
     units = self.datastore['cost_calculator'].get_units(loss_types.split())
     if oq.individual_curves or self.R == 1:
         self.datastore['agg_curves-rlzs'] = array
         self.datastore.set_attrs(
             'agg_curves-rlzs',
             return_periods=b.return_periods,
             loss_types=loss_types, units=units)
     if arr_stats is not None:
         self.datastore['agg_curves-stats'] = arr_stats
         self.datastore.set_attrs(
             'agg_curves-stats', return_periods=b.return_periods,
             stats=[encode(name) for (name, func) in stats],
             loss_types=loss_types, units=units)
Ejemplo n.º 40
0
def reduce(fname, reduction_factor):
    """
    Produce a submodel from `fname` by sampling the nodes randomly.
    Supports source models, site models and exposure models. As a special
    case, it is also able to reduce .csv files by sampling the lines.
    This is a debugging utility to reduce large computations to small ones.
    """
    if fname.endswith('.csv'):
        with open(fname) as f:
            all_lines = f.readlines()
        lines = random_filter(all_lines, reduction_factor)
        shutil.copy(fname, fname + '.bak')
        print('Copied the original file in %s.bak' % fname)
        with open(fname, 'wb') as f:
            for line in lines:
                f.write(encode(line))
        print('Extracted %d lines out of %d' % (len(lines), len(all_lines)))
        return
    node = nrml.read(fname)
    model = node[0]
    if model.tag.endswith('exposureModel'):
        total = len(model.assets)
        model.assets.nodes = random_filter(model.assets, reduction_factor)
        num_nodes = len(model.assets)
    elif model.tag.endswith('siteModel'):
        total = len(model)
        model.nodes = random_filter(model, reduction_factor)
        num_nodes = len(model)
    elif model.tag.endswith('sourceModel'):
        if node['xmlns'] != 'http://openquake.org/xmlns/nrml/0.5':
            raise InvalidFile('%s: not NRML0.5' % fname)
        total = sum(len(sg) for sg in model)
        num_nodes = 0
        for sg in model:
            sg.nodes = random_filter(sg, reduction_factor)
            num_nodes += len(sg)
    else:
        raise RuntimeError('Unknown model tag: %s' % model.tag)
    shutil.copy(fname, fname + '.bak')
    print('Copied the original file in %s.bak' % fname)
    with open(fname, 'wb') as f:
        nrml.write([model], f, xmlns=node['xmlns'])
    print('Extracted %d nodes out of %d' % (num_nodes, total))
Ejemplo n.º 41
0
def set_rlzs_stats(dstore, prefix, arrayNR=None):
    """
    :param dstore: a DataStore object
    :param prefix: dataset prefix
    :param arrayNR: an array of shape (N, R, ...)
    """
    if arrayNR is None:
        # assume the -rlzs array is already stored
        arrayNR = dstore[prefix + '-rlzs'].value
    else:
        # store passed the -rlzs array
        dstore[prefix + '-rlzs'] = arrayNR
    R = arrayNR.shape[1]
    if R > 1:
        stats = dstore['oqparam'].hazard_stats()
        statnames, statfuncs = zip(*stats.items())
        weights = dstore['csm_info'].rlzs['weight']
        dstore[prefix + '-stats'] = compute_stats2(arrayNR, statfuncs, weights)
        dstore.set_attrs(prefix + '-stats', stats=encode(statnames))
Ejemplo n.º 42
0
 def postproc(self):
     """
     Build aggregate loss curves in process
     """
     dstore = self.datastore
     oq = self.oqparam
     stats = self.param['stats']
     # store avg_losses-stats
     if oq.avg_losses:
         set_rlzs_stats(self.datastore, 'avg_losses')
     try:
         b = self.param['builder']
     except KeyError:  # don't build auxiliary tables
         return
     if dstore.parent:
         dstore.parent.open('r')  # to read the ruptures
     logging.info('Building loss tables')
     build_loss_tables(dstore)
     logging.info('Building aggregate loss curves')
     with self.monitor('building agg_curves', measuremem=True):
         lbr = group_array(dstore['losses_by_event'][()], 'rlzi')
         dic = {r: arr['loss'] for r, arr in lbr.items()}
         array, arr_stats = b.build(dic, stats)
     loss_types = oq.loss_dt().names
     units = self.datastore['cost_calculator'].get_units(loss_types)
     if oq.individual_curves or self.R == 1:
         self.datastore['agg_curves-rlzs'] = array  # shape (P, R, L)
         self.datastore.set_attrs(
             'agg_curves-rlzs',
             shape_descr=['return_periods', 'rlzs', 'loss_types'],
             return_periods=b.return_periods,
             rlzs=numpy.arange(self.R),
             loss_types=loss_types,
             units=units)
     if arr_stats is not None:
         self.datastore['agg_curves-stats'] = arr_stats  # shape (P, S, L)
         self.datastore.set_attrs(
             'agg_curves-stats',
             shape_descr=['return_periods', 'stats', 'loss_types'],
             return_periods=b.return_periods,
             stats=[encode(name) for (name, func) in stats],
             loss_types=loss_types,
             units=units)
Ejemplo n.º 43
0
    def post_execute(self, results):
        """
        Save all the results of the disaggregation. NB: the number of results
        to save is #sites * #rlzs * #disagg_poes * #IMTs.

        :param results:
            a dictionary (sid, rlzi, poe, imt) -> trti -> disagg matrix
        """
        T = len(self.trts)
        # build a dictionary (sid, rlzi, poe, imt) -> 6D matrix
        results = {k: _to_matrix(v, T) for k, v in results.items()}

        # get the number of outputs
        shp = (len(self.sitecol), len(self.oqparam.poes_disagg or (None,)),
               len(self.oqparam.imtls))  # N, P, M
        logging.info('Extracting and saving the PMFs for %d outputs '
                     '(N=%s, P=%d, M=%d)', numpy.prod(shp), *shp)
        self.save_disagg_result(results, trts=encode(self.trts),
                                num_ruptures=self.num_ruptures)
Ejemplo n.º 44
0
def set_rlzs_stats(dstore, prefix, arrayNR=None):
    """
    :param dstore: a DataStore object
    :param prefix: dataset prefix
    :param arrayNR: an array of shape (N, R, ...)
    """
    if arrayNR is None:
        # assume the -rlzs array is already stored
        arrayNR = dstore[prefix + '-rlzs'].value
    else:
        # store passed the -rlzs array
        dstore[prefix + '-rlzs'] = arrayNR
    R = arrayNR.shape[1]
    if R > 1:
        stats = dstore['oqparam'].hazard_stats()
        statnames, statfuncs = zip(*stats.items())
        weights = dstore['weights'].value
        dstore[prefix + '-stats'] = compute_stats2(arrayNR, statfuncs, weights)
        dstore.set_attrs(prefix + '-stats', stats=encode(statnames))
Ejemplo n.º 45
0
    def post_execute(self, results):
        """
        Save all the results of the disaggregation. NB: the number of results
        to save is #sites * #rlzs * #disagg_poes * #IMTs.

        :param results:
            a dictionary of probability arrays
        """
        # since an extremely small subset of the full disaggregation matrix
        # is saved this method can be run sequentially on the controller node
        logging.info('Extracting and saving the PMFs')
        for key, matrices in sorted(results.items()):
            sid, rlzi, poe, imt = key
            self.save_disagg_result(sid, matrices, rlzi,
                                    self.oqparam.investigation_time, imt, poe)

        self.datastore.set_attrs('disagg',
                                 trts=encode(self.trts),
                                 num_ruptures=self.num_ruptures)
Ejemplo n.º 46
0
 def execute(self):
     """
     Run in parallel `core_task(sources, sitecol, monitor)`, by
     parallelizing on the sources according to their weight and
     tectonic region type.
     """
     oq = self.oqparam
     if oq.hazard_calculation_id and not oq.compare_with_classical:
         parent = datastore.read(self.oqparam.hazard_calculation_id)
         self.csm_info = parent['csm_info']
         parent.close()
         self.calc_stats(parent)  # post-processing
         return {}
     with self.monitor('managing sources', autoflush=True):
         smap = parallel.Starmap(
             self.core_task.__func__, monitor=self.monitor())
         source_ids = []
         data = []
         for i, sources in enumerate(self._send_sources(smap)):
             source_ids.append(get_src_ids(sources))
             for src in sources:  # collect source data
                 data.append((i, src.nsites, src.num_ruptures, src.weight))
         if source_ids:
             self.datastore['task_sources'] = encode(source_ids)
         self.datastore.extend(
             'source_data', numpy.array(data, source_data_dt))
     self.calc_times = AccumDict(accum=numpy.zeros(2, F32))
     try:
         acc = smap.reduce(self.agg_dicts, self.acc0())
         self.store_rlz_info(acc.eff_ruptures)
     finally:
         with self.monitor('store source_info', autoflush=True):
             self.store_source_info(self.calc_times)
     if acc.nsites:
         src_ids = sorted(acc.nsites)
         nsites = [acc.nsites[i] for i in src_ids]
         self.datastore['source_info'][src_ids, 'num_sites'] = nsites
     if not self.calc_times:
         raise RuntimeError('All sources were filtered away!')
     self.calc_times.clear()  # save a bit of memory
     return acc
Ejemplo n.º 47
0
def reduce(fname, reduction_factor):
    """
    Produce a submodel from `fname` by sampling the nodes randomly.
    Supports source models, site models and exposure models. As a special
    case, it is also able to reduce .csv files by sampling the lines.
    This is a debugging utility to reduce large computations to small ones.
    """
    if fname.endswith('.csv'):
        with open(fname) as f:
            all_lines = f.readlines()
        lines = random_filter(all_lines, reduction_factor)
        shutil.copy(fname, fname + '.bak')
        print('Copied the original file in %s.bak' % fname)
        with open(fname, 'wb') as f:
            for line in lines:
                f.write(encode(line))
        print('Extracted %d lines out of %d' % (len(lines), len(all_lines)))
        return
    node = nrml.read(fname)
    model = node[0]
    if model.tag.endswith('exposureModel'):
        total = len(model.assets)
        model.assets.nodes = random_filter(model.assets, reduction_factor)
        num_nodes = len(model.assets)
    elif model.tag.endswith('siteModel'):
        total = len(model)
        model.nodes = random_filter(model, reduction_factor)
        num_nodes = len(model)
    elif model.tag.endswith('sourceModel'):
        total = len(model)
        model.nodes = random_filter(model, reduction_factor)
        num_nodes = len(model)
    else:
        raise RuntimeError('Unknown model tag: %s' % model.tag)
    shutil.copy(fname, fname + '.bak')
    print('Copied the original file in %s.bak' % fname)
    with open(fname, 'wb') as f:
        nrml.write([model], f, xmlns=node['xmlns'])
    print('Extracted %d nodes out of %d' % (num_nodes, total))
Ejemplo n.º 48
0
    def post_execute(self, result):
        """
        Saving loss curves in the datastore.

        :param result: aggregated result of the task classical_risk
        """
        curve_res = {cp.loss_type: cp.curve_resolution
                     for cp in self.riskmodel.curve_params
                     if cp.user_provided}
        self.loss_curve_dt = scientific.build_loss_curve_dt(
            curve_res, insured_losses=False)
        ltypes = self.riskmodel.loss_types

        # loss curves stats are generated always
        stats = encode(list(self.oqparam.hazard_stats()))
        stat_curves = numpy.zeros((self.A, self.S), self.loss_curve_dt)
        avg_losses = numpy.zeros((self.A, self.S, self.L), F32)
        for l, a, losses, statpoes, statloss in result['stat_curves']:
            stat_curves_lt = stat_curves[ltypes[l]]
            for s in range(self.S):
                avg_losses[a, s, l] = statloss[s]
                base.set_array(stat_curves_lt['poes'][a, s], statpoes[s])
                base.set_array(stat_curves_lt['losses'][a, s], losses)
        self.datastore['avg_losses-stats'] = avg_losses
        self.datastore.set_attrs('avg_losses-stats', stats=stats)
        self.datastore['loss_curves-stats'] = stat_curves
        self.datastore.set_attrs('loss_curves-stats', stats=stats)

        if self.R > 1:  # individual realizations saved only if many
            loss_curves = numpy.zeros((self.A, self.R), self.loss_curve_dt)
            avg_losses = numpy.zeros((self.A, self.R, self.L), F32)
            for l, r, a, (losses, poes, avg) in result['loss_curves']:
                lc = loss_curves[a, r][ltypes[l]]
                avg_losses[a, r, l] = avg
                base.set_array(lc['losses'], losses)
                base.set_array(lc['poes'], poes)
            self.datastore['avg_losses-rlzs'] = avg_losses
            self.datastore['loss_curves-rlzs'] = loss_curves
Ejemplo n.º 49
0
def extract_aggregate(dstore, what):
    """
    /extract/aggregate/avg_losses?
    kind=mean&loss_type=structural&tag=taxonomy&tag=occupancy
    """
    name, qstring = what.split('?', 1)
    info = get_info(dstore)
    qdic = parse(qstring, info)
    suffix = '-rlzs' if qdic['rlzs'] else '-stats'
    tagnames = qdic.get('tag', [])
    assetcol = dstore['assetcol']
    ltypes = qdic.get('loss_type', [])
    if ltypes:
        array = dstore[name + suffix][:, qdic['k'][0], ltypes[0]]
    else:
        array = dstore[name + suffix][:, qdic['k'][0]]
    aw = ArrayWrapper(assetcol.aggregate_by(tagnames, array), {})
    for tagname in tagnames:
        setattr(aw, tagname, getattr(assetcol.tagcol, tagname))
    aw.tagnames = encode(tagnames)
    if not ltypes:
        aw.extra = ('loss_type',) + tuple(info['loss_types'])
    return aw
Ejemplo n.º 50
0
        sys.exit(2)


def flag_set(section, setting):
    """True if the given boolean setting is enabled in openquake.cfg

    :param string section: name of the configuration file section
    :param string setting: name of the configuration file setting

    :returns: True if the setting is enabled in openquake.cfg, False otherwise
    """
    setting = get(section, setting)
    if setting is None:
        return False
    return setting.lower() in ("true", "yes", "t", "1")


def refresh():
    """
    Re-parse config files and refresh the cached configuration.

    NOTE: Use with caution. Calling this during some phases of a calculation
    could cause undesirable side-effects.
    """
    cfg._load_from_file()


port = int(get("dbserver", "port"))
DBS_ADDRESS = (get("dbserver", "host"), port)
DBS_AUTHKEY = encode(get("dbserver", "authkey"))
Ejemplo n.º 51
0
 def __call__(self, *args):
     self.lst.append(b' '.join(encode(str(a)) for a in args))
Ejemplo n.º 52
0
def _save_csv(fname, lines, header):
    with open(fname, 'wb') as f:
        if header:
            f.write(encode(header))
        for line in lines:
            f.write(encode(line))
Ejemplo n.º 53
0
def write_csv(dest, data, sep=',', fmt='%.6E', header=None, comment=None):
    """
    :param dest: None, file, filename or io.BytesIO instance
    :param data: array to save
    :param sep: separator to use (default comma)
    :param fmt: formatting string (default '%12.8E')
    :param header:
       optional list with the names of the columns to display
    :param comment:
       optional comment dictionary
    """
    if comment is not None:
        comment = ', '.join('%s=%r' % item for item in comment.items())
    close = True
    if dest is None:  # write on a temporary file
        fd, dest = tempfile.mkstemp(suffix='.csv')
        os.close(fd)
    if hasattr(dest, 'write'):
        # file-like object in append mode
        # it must be closed by client code
        close = False
    elif not hasattr(dest, 'getvalue'):
        # not a BytesIO, assume dest is a filename
        dest = open(dest, 'wb')
    try:
        # see if data is a composite numpy array
        data.dtype.fields
    except AttributeError:
        # not a composite array
        autoheader = []
    else:
        autoheader = build_header(data.dtype)

    if comment:
        dest.write(encode('# %s\n' % comment))

    someheader = header or autoheader
    if header != 'no-header' and someheader:
        dest.write(encode(sep.join(someheader) + u'\n'))

    if autoheader:
        all_fields = [col.split(':', 1)[0].split('~')
                      for col in autoheader]
        for record in data:
            row = []
            for fields in all_fields:
                val = extract_from(record, fields)
                if fields[0] in ('lon', 'lat', 'depth'):
                    row.append('%.5f' % val)
                else:
                    row.append(scientificformat(val, fmt))
            dest.write(encode(sep.join(row) + u'\n'))
    else:
        for row in data:
            dest.write(encode(sep.join(scientificformat(col, fmt)
                                       for col in row) + u'\n'))
    if hasattr(dest, 'getvalue'):
        return dest.getvalue()[:-1]  # a newline is strangely added
    elif close:
        dest.close()
    return dest.name
Ejemplo n.º 54
0
 def save(self, fname):
     """Save the report"""
     with open(fname, 'wb') as f:
         f.write(encode(self.text))
Ejemplo n.º 55
0
        sys.exit(2)


def flag_set(section, setting):
    """True if the given boolean setting is enabled in openquake.cfg

    :param string section: name of the configuration file section
    :param string setting: name of the configuration file setting

    :returns: True if the setting is enabled in openquake.cfg, False otherwise
    """
    setting = get(section, setting)
    if setting is None:
        return False
    return setting.lower() in ("true", "yes", "t", "1")


def refresh():
    """
    Re-parse config files and refresh the cached configuration.

    NOTE: Use with caution. Calling this during some phases of a calculation
    could cause undesirable side-effects.
    """
    cfg._load_from_file()


port = int(get('dbserver', 'port'))
DBS_ADDRESS = (get('dbserver', 'host'), port)
DBS_AUTHKEY = encode(get('dbserver', 'authkey'))