Python countbyの例、openquake.baselib.general.countby Pythonの例

コード例 #1

0

ファイルを表示

    def test_case_7(self):
        # 2 models x 3 GMPEs, 1000 samples * 10 SES
        expected = [
            'hazard_curve-mean.csv',
        ]
        out = self.run_calc(case_7.__file__, 'job.ini', exports='csv')
        aw = extract(self.calc.datastore, 'realizations')
        dic = countby(aw.array, 'branch_path')
        self.assertEqual(
            {
                b'b11~BA': 332,  # w = .6 * .5 = .30
                b'b11~CB': 169,  # w = .6 * .3 = .18
                b'b11~CY': 108,  # w = .6 * .2 = .12
                b'b12~BA': 193,  # w = .4 * .5 = .20
                b'b12~CB': 115,  # w = .4 * .3 = .12
                b'b12~CY': 83
            },  # w = .4 * .2 = .08
            dic)

        fnames = out['hcurves', 'csv']
        mean_eb = get_mean_curves(self.calc.datastore)
        for exp, got in zip(expected, fnames):
            self.assertEqualFiles('expected/%s' % exp, got)
        mean_cl = get_mean_curves(self.calc.cl.datastore)
        reldiff, _index = max_rel_diff_index(mean_cl, mean_eb, min_value=0.1)
        self.assertLess(reldiff, 0.07)

コード例 #2

0

ファイルを表示

def get_site_model(oqparam):
    """
    Convert the NRML file into an array of site parameters.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :returns:
        an array with fields lon, lat, vs30, ...
    """
    req_site_params = get_gsim_lt(oqparam).req_site_params
    arrays = []
    for fname in oqparam.inputs['site_model']:
        if isinstance(fname, str) and fname.endswith('.csv'):
            sm = hdf5.read_csv(fname, {
                None: float,
                'vs30measured': numpy.uint8
            }).array
            if 'site_id' in sm.dtype.names:
                raise InvalidFile('%s: you passed a sites.csv file instead of '
                                  'a site_model.csv file!' % fname)
            z = numpy.zeros(len(sm), sorted(sm.dtype.descr))
            for name in z.dtype.names:  # reorder the fields
                z[name] = sm[name]
            arrays.append(z)
            continue
        nodes = nrml.read(fname).siteModel
        params = [valid.site_param(node.attrib) for node in nodes]
        missing = req_site_params - set(params[0])
        if 'vs30measured' in missing:  # use a default of False
            missing -= {'vs30measured'}
            for param in params:
                param['vs30measured'] = False
        if 'backarc' in missing:  # use a default of False
            missing -= {'backarc'}
            for param in params:
                param['backarc'] = False
        if missing:
            raise InvalidFile(
                '%s: missing parameter %s' %
                (oqparam.inputs['site_model'], ', '.join(missing)))
        # NB: the sorted in sorted(params[0]) is essential, otherwise there is
        # an heisenbug in scenario/test_case_4
        site_model_dt = numpy.dtype([(p, site.site_param_dt[p])
                                     for p in sorted(params[0])])
        sm = numpy.array([
            tuple(param[name] for name in site_model_dt.names)
            for param in params
        ], site_model_dt)
        dupl = "\n".join('%s %s' % loc
                         for loc, n in countby(sm, 'lon', 'lat').items()
                         if n > 1)
        if dupl:
            raise InvalidFile('There are duplicated sites in %s:\n%s' %
                              (fname, dupl))
        arrays.append(sm)
    return numpy.concatenate(arrays)

コード例 #3

0

ファイルを表示

ファイル: views.py プロジェクト: digitalsatori/oq-engine

def view_ruptures_events(token, dstore):
    num_ruptures = len(dstore['ruptures'])
    num_events = len(dstore['events'])
    events_by_rlz = countby(dstore['events'][()], 'rlz')
    mult = round(num_events / num_ruptures, 3)
    lst = [('Total number of ruptures', num_ruptures),
           ('Total number of events', num_events),
           ('Rupture multiplicity', mult),
           ('Events by rlz', events_by_rlz.values())]
    return rst_table(lst)

コード例 #4

0

ファイルを表示

ファイル: views.py プロジェクト: yasser64b/oq-engine

def view_ruptures_events(token, dstore):
    num_ruptures = len(dstore['ruptures'])
    num_events = len(dstore['events'])
    events_by_rlz = countby(dstore['events'][()], 'rlz_id')
    mult = round(num_events / num_ruptures, 3)
    lst = [('Total number of ruptures', num_ruptures),
           ('Total number of events', num_events),
           ('Rupture multiplicity', mult),
           ('Events by rlz', events_by_rlz.values())]
    return text_table(lst)

コード例 #5

0

ファイルを表示

def view_act_ruptures_by_src(token, dstore):
    """
    Display the actual number of ruptures by source in event based calculations
    """
    data = dstore['ruptures'][('source_id', 'grp_id', 'rup_id')]
    counts = sorted(countby(data, 'source_id').items(),
                    key=operator.itemgetter(1), reverse=True)
    table = [['src_id', 'grp_id', 'act_ruptures']]
    for source_id, act_ruptures in counts:
        table.append([source_id, src['grp_id'], act_ruptures])
    return rst_table(table)

コード例 #6

0

ファイルを表示

ファイル: views.py プロジェクト: digitalsatori/oq-engine

def view_act_ruptures_by_src(token, dstore):
    """
    Display the actual number of ruptures by source in event based calculations
    """
    data = dstore['ruptures'][('srcidx', 'serial')]
    counts = sorted(countby(data, 'srcidx').items(),
                    key=operator.itemgetter(1), reverse=True)
    src_info = dstore['source_info'][('grp_id', 'source_id')]
    table = [['src_id', 'grp_id', 'act_ruptures']]
    for srcidx, act_ruptures in counts:
        src = src_info[srcidx]
        table.append([src['source_id'], src['grp_id'], act_ruptures])
    return rst_table(table)

コード例 #7

0

ファイルを表示

ファイル: loss_curves.py プロジェクト: maswiet/oq-engine

def get_loss_builder(dstore, return_periods=None, loss_dt=None):
    """
    :param dstore: datastore for an event based risk calculation
    :returns: a LossesByPeriodBuilder instance
    """
    oq = dstore['oqparam']
    weights = dstore['csm_info'].rlzs['weight']
    eff_time = oq.investigation_time * oq.ses_per_logic_tree_path
    num_events = countby(dstore['events'].value, 'rlz')
    periods = return_periods or oq.return_periods or scientific.return_periods(
        eff_time, max(num_events.values()))
    return scientific.LossesByPeriodBuilder(
        numpy.array(periods), loss_dt or oq.loss_dt(), weights, num_events,
        eff_time, oq.risk_investigation_time)

コード例 #8

0

ファイルを表示

def view_act_ruptures_by_src(token, dstore):
    """
    Display the actual number of ruptures by source in event based calculations
    """
    data = dstore['ruptures'].value[['srcidx', 'serial']]
    counts = sorted(countby(data, 'srcidx').items(),
                    key=operator.itemgetter(1),
                    reverse=True)
    src_info = dstore['source_info'].value[['grp_id', 'source_id']]
    table = [['src_id', 'grp_id', 'act_ruptures']]
    for srcidx, act_ruptures in counts:
        src = src_info[srcidx]
        table.append([src['source_id'], src['grp_id'], act_ruptures])
    return rst_table(table)

コード例 #9

0

ファイルを表示

ファイル: loss_curves.py プロジェクト: digitalsatori/oq-engine

def get_loss_builder(dstore, return_periods=None, loss_dt=None):
    """
    :param dstore: datastore for an event based risk calculation
    :returns: a LossCurvesMapsBuilder instance
    """
    oq = dstore['oqparam']
    weights = dstore['weights'][()]
    eff_time = oq.investigation_time * oq.ses_per_logic_tree_path
    num_events = countby(dstore['events'][()], 'rlz')
    periods = return_periods or oq.return_periods or scientific.return_periods(
        eff_time, max(num_events.values()))
    return scientific.LossCurvesMapsBuilder(
        oq.conditional_loss_poes, numpy.array(periods),
        loss_dt or oq.loss_dt(), weights, num_events,
        eff_time, oq.risk_investigation_time)

コード例 #10

0

ファイルを表示

def get_loss_builder(dstore, return_periods=None, loss_dt=None):
    """
    :param dstore: datastore for an event based risk calculation
    :returns: a LossCurvesMapsBuilder instance
    """
    oq = dstore['oqparam']
    weights = dstore['weights'][()]
    eff_time = oq.investigation_time * oq.ses_per_logic_tree_path
    num_events = general.countby(dstore['events'][()], 'rlz_id')
    periods = return_periods or oq.return_periods or scientific.return_periods(
        eff_time, max(num_events.values()))
    return scientific.LossCurvesMapsBuilder(
        oq.conditional_loss_poes, numpy.array(periods),
        loss_dt or oq.loss_dt(), weights, num_events,
        eff_time, oq.risk_investigation_time)

コード例 #11

0

ファイルを表示

 def test_supertask(self):
     # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks
     # and 18 outputs (1 output does not produce a subtask)
     allargs = [('aaaaeeeeiii', ), ('uuuuaaaaeeeeiii', ),
                ('aaaaaaaaeeeeiii', ), ('aaaaeeeeiiiiiooooooo', )]
     numchars = sum(len(arg) for arg, in allargs)  # 61
     tmp = pathlib.Path(tempfile.mkdtemp(), 'calc_1.hdf5')
     with hdf5.File(tmp) as h5:
         monitor = performance.Monitor(hdf5=h5)
         res = parallel.Starmap(supertask, allargs, monitor).reduce()
     self.assertEqual(res, {'n': numchars})
     # check that the correct information is stored in the hdf5 file
     with hdf5.File(tmp) as h5:
         num = general.countby(h5['performance_data'].value, 'operation')
         self.assertEqual(num[b'waiting'], 4)
         self.assertEqual(num[b'total supertask'], 18)  # outputs
         self.assertEqual(num[b'total get_length'], 17)  # subtasks
         self.assertGreater(len(h5['task_info/supertask']), 0)
     shutil.rmtree(tmp.parent)

コード例 #12

0

ファイルを表示

 def test_supertask(self):
     # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks
     # and 5 real outputs (one from the yield {})
     allargs = [('aaaaeeeeiii', ), ('uuuuaaaaeeeeiii', ),
                ('aaaaaaaaeeeeiii', ), ('aaaaeeeeiiiiiooooooo', )]
     numchars = sum(len(arg) for arg, in allargs)  # 61
     tmpdir = tempfile.mkdtemp()
     tmp = os.path.join(tmpdir, 'calc_1.hdf5')
     hdf5.File(tmp, 'w').close()  # the file must exist
     smap = parallel.Starmap(supertask, allargs, hdf5path=tmp)
     res = smap.reduce()
     self.assertEqual(res, {'n': numchars})
     # check that the correct information is stored in the hdf5 file
     with hdf5.File(tmp, 'r') as h5:
         num = general.countby(h5['performance_data'][()], 'operation')
         self.assertEqual(num[b'waiting'], 4)
         self.assertEqual(num[b'total supertask'], 5)  # outputs
         self.assertEqual(num[b'total get_length'], 17)  # subtasks
         self.assertGreater(len(h5['task_info']), 0)
     shutil.rmtree(tmpdir)

コード例 #13

0

ファイルを表示

ファイル: parallel_test.py プロジェクト: digitalsatori/oq-engine

 def test_supertask(self):
     # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks
     # and 18 outputs (1 output does not produce a subtask)
     allargs = [('aaaaeeeeiii',),
                ('uuuuaaaaeeeeiii',),
                ('aaaaaaaaeeeeiii',),
                ('aaaaeeeeiiiiiooooooo',)]
     numchars = sum(len(arg) for arg, in allargs)  # 61
     tmp = pathlib.Path(tempfile.mkdtemp(), 'calc_1.hdf5')
     with hdf5.File(tmp) as h5:
         monitor = performance.Monitor(hdf5=h5)
         res = parallel.Starmap(supertask, allargs, monitor).reduce()
     self.assertEqual(res, {'n': numchars})
     # check that the correct information is stored in the hdf5 file
     with hdf5.File(tmp) as h5:
         num = general.countby(h5['performance_data'].value, 'operation')
         self.assertEqual(num[b'waiting'], 4)
         self.assertEqual(num[b'total supertask'], 18)  # outputs
         self.assertEqual(num[b'total get_length'], 17)  # subtasks
         self.assertGreater(len(h5['task_info/supertask']), 0)
     shutil.rmtree(tmp.parent)

コード例 #14

0

ファイルを表示

ファイル: parallel_test.py プロジェクト: ozkankale/oq-engine

 def test_supertask(self):
     # this test has 4 supertasks generating 4 + 5 + 3 + 5 = 17 subtasks
     allargs = [('aaaaeeeeiii', ), ('uuuuaaaaeeeeiii', ),
                ('aaaaaaaaeeeeiii', ), ('aaaaeeeeiiiiiooooooo', )]
     numchars = sum(len(arg) for arg, in allargs)  # 61
     tmpdir = tempfile.mkdtemp()
     tmp = os.path.join(tmpdir, 'calc_1.hdf5')
     performance.init_performance(tmp, swmr=True)
     smap = parallel.Starmap(supertask, allargs, h5=hdf5.File(tmp, 'a'))
     res = smap.reduce()
     smap.h5.close()
     self.assertEqual(res, {'n': numchars})
     # check that the correct information is stored in the hdf5 file
     with hdf5.File(tmp, 'r') as h5:
         num = general.countby(h5['performance_data'][()], 'operation')
         self.assertEqual(num[b'waiting'], 4)
         self.assertEqual(num[b'total supertask'], 4)  # tasks
         self.assertEqual(num[b'total get_length'], 17)  # subtasks
         info = h5['task_info'][()]
         dic = dict(general.fast_agg3(info, 'taskname', ['received']))
         self.assertGreater(dic[b'get_length'], 0)
         self.assertGreater(dic[b'supertask'], 0)
     shutil.rmtree(tmpdir)

コード例 #15

0

ファイルを表示

ファイル: readinput.py プロジェクト: mascandola/oq-engine

def get_site_model(oqparam):
    """
    Convert the NRML file into an array of site parameters.

    :param oqparam:
        an :class:`openquake.commonlib.oqvalidation.OqParam` instance
    :returns:
        an array with fields lon, lat, vs30, ...
    """
    req_site_params = get_gsim_lt(oqparam).req_site_params
    if 'amplification' in oqparam.inputs:
        req_site_params.add('ampcode')
    arrays = []
    for fname in oqparam.inputs['site_model']:
        if isinstance(fname, str) and fname.endswith('.csv'):
            sm = hdf5.read_csv(fname, site.site_param_dt).array
            sm['lon'] = numpy.round(sm['lon'], 5)
            sm['lat'] = numpy.round(sm['lat'], 5)
            dupl = get_duplicates(sm, 'lon', 'lat')
            if dupl:
                raise InvalidFile(
                    'Found duplicate sites %s in %s' % (dupl, fname))
            if 'site_id' in sm.dtype.names:
                raise InvalidFile('%s: you passed a sites.csv file instead of '
                                  'a site_model.csv file!' % fname)
            params = sorted(set(sm.dtype.names) | req_site_params)
            z = numpy.zeros(
                len(sm), [(p, site.site_param_dt[p]) for p in params])
            for name in z.dtype.names:
                try:
                    z[name] = sm[name]
                except ValueError:  # missing, use the global parameter
                    # exercised in the test classical/case_28
                    value = getattr(oqparam, site.param[name])
                    if name == 'vs30measured':  # special case
                        value = value == 'measured'
                    z[name] = value
            arrays.append(z)
            continue
        nodes = nrml.read(fname).siteModel
        params = [valid.site_param(node.attrib) for node in nodes]
        missing = req_site_params - set(params[0])
        if 'vs30measured' in missing:  # use a default of False
            missing -= {'vs30measured'}
            for param in params:
                param['vs30measured'] = False
        if 'backarc' in missing:  # use a default of False
            missing -= {'backarc'}
            for param in params:
                param['backarc'] = False
        if 'ampcode' in missing:  # use a default of b''
            missing -= {'ampcode'}
            for param in params:
                param['ampcode'] = b''
        if missing:
            raise InvalidFile('%s: missing parameter %s' %
                              (oqparam.inputs['site_model'],
                               ', '.join(missing)))
        # NB: the sorted in sorted(params[0]) is essential, otherwise there is
        # an heisenbug in scenario/test_case_4
        site_model_dt = numpy.dtype([(p, site.site_param_dt[p])
                                     for p in sorted(params[0])])
        sm = numpy.array([tuple(param[name] for name in site_model_dt.names)
                          for param in params], site_model_dt)
        dupl = "\n".join(
            '%s %s' % loc for loc, n in countby(sm, 'lon', 'lat').items()
            if n > 1)
        if dupl:
            raise InvalidFile('There are duplicated sites in %s:\n%s' %
                              (fname, dupl))
        arrays.append(sm)
    return numpy.concatenate(arrays)

コード例 #16

0

ファイルを表示

    def _read_risk_data(self):
        # read the exposure (if any), the risk model (if any) and then the
        # site collection, possibly extracted from the exposure.
        oq = self.oqparam
        self.load_crmodel()  # must be called first

        if oq.hazard_calculation_id:
            with util.read(oq.hazard_calculation_id) as dstore:
                haz_sitecol = dstore['sitecol'].complete
        else:
            haz_sitecol = readinput.get_site_collection(oq)
            if hasattr(self, 'rup'):
                # for scenario we reduce the site collection to the sites
                # within the maximum distance from the rupture
                haz_sitecol, _dctx = self.cmaker.filter(
                    haz_sitecol, self.rup)
                haz_sitecol.make_complete()

            if 'site_model' in oq.inputs:
                self.datastore['site_model'] = readinput.get_site_model(oq)

        oq_hazard = (self.datastore.parent['oqparam']
                     if self.datastore.parent else None)
        if 'exposure' in oq.inputs:
            exposure = self.read_exposure(haz_sitecol)
            self.datastore['assetcol'] = self.assetcol
            self.datastore['cost_calculator'] = exposure.cost_calculator
            if hasattr(readinput.exposure, 'exposures'):
                self.datastore['assetcol/exposures'] = (
                    numpy.array(exposure.exposures, hdf5.vstr))
        elif 'assetcol' in self.datastore.parent:
            assetcol = self.datastore.parent['assetcol']
            if oq.region:
                region = wkt.loads(oq.region)
                self.sitecol = haz_sitecol.within(region)
            if oq.shakemap_id or 'shakemap' in oq.inputs:
                self.sitecol, self.assetcol = self.read_shakemap(
                    haz_sitecol, assetcol)
                self.datastore['assetcol'] = self.assetcol
                logging.info('Extracted %d/%d assets',
                             len(self.assetcol), len(assetcol))
                nsites = len(self.sitecol)
                if (oq.spatial_correlation != 'no' and
                        nsites > MAXSITES):  # hard-coded, heuristic
                    raise ValueError(CORRELATION_MATRIX_TOO_LARGE % nsites)
            elif hasattr(self, 'sitecol') and general.not_equal(
                    self.sitecol.sids, haz_sitecol.sids):
                self.assetcol = assetcol.reduce(self.sitecol)
                self.datastore['assetcol'] = self.assetcol
                logging.info('Extracted %d/%d assets',
                             len(self.assetcol), len(assetcol))
            else:
                self.assetcol = assetcol
        else:  # no exposure
            self.sitecol = haz_sitecol
            if self.sitecol:
                logging.info('Read N=%d hazard sites and L=%d hazard levels',
                             len(self.sitecol), len(oq.imtls.array))

        if oq_hazard:
            parent = self.datastore.parent
            if 'assetcol' in parent:
                check_time_event(oq, parent['assetcol'].occupancy_periods)
            elif oq.job_type == 'risk' and 'exposure' not in oq.inputs:
                raise ValueError('Missing exposure both in hazard and risk!')
            if oq_hazard.time_event and oq_hazard.time_event != oq.time_event:
                raise ValueError(
                    'The risk configuration file has time_event=%s but the '
                    'hazard was computed with time_event=%s' % (
                        oq.time_event, oq_hazard.time_event))

        if oq.job_type == 'risk':
            tmap_arr, tmap_lst = logictree.taxonomy_mapping(
                self.oqparam.inputs.get('taxonomy_mapping'),
                self.assetcol.tagcol.taxonomy)
            self.crmodel.tmap = tmap_lst
            if len(tmap_arr):
                self.datastore['taxonomy_mapping'] = tmap_arr
            taxonomies = set(taxo for items in self.crmodel.tmap
                             for taxo, weight in items if taxo != '?')
            # check that we are covering all the taxonomies in the exposure
            missing = taxonomies - set(self.crmodel.taxonomies)
            if self.crmodel and missing:
                raise RuntimeError('The exposure contains the taxonomies %s '
                                   'which are not in the risk model' % missing)
            if len(self.crmodel.taxonomies) > len(taxonomies):
                logging.info('Reducing risk model from %d to %d taxonomies',
                             len(self.crmodel.taxonomies), len(taxonomies))
                self.crmodel = self.crmodel.reduce(taxonomies)
                self.crmodel.tmap = tmap_lst
            self.crmodel.vectorize_cons_model(self.assetcol.tagcol)

        if hasattr(self, 'sitecol') and self.sitecol:
            if 'site_model' in oq.inputs:
                assoc_dist = (oq.region_grid_spacing * 1.414
                              if oq.region_grid_spacing else 5)  # Graeme's 5km
                sm = readinput.get_site_model(oq)
                self.sitecol.complete.assoc(sm, assoc_dist)
            self.datastore['sitecol'] = self.sitecol.complete
        # used in the risk calculators
        self.param = dict(individual_curves=oq.individual_curves,
                          avg_losses=oq.avg_losses)

        # compute exposure stats
        if hasattr(self, 'assetcol'):
            arr = self.assetcol.array
            num_assets = list(general.countby(arr, 'site_id').values())
            self.datastore['assets_by_site'] = get_stats(num_assets)
            num_taxos = self.assetcol.num_taxonomies_by_site()
            self.datastore['taxonomies_by_site'] = get_stats(num_taxos)
            save_exposed_values(
                self.datastore, self.assetcol, oq.loss_names, oq.aggregate_by)