def test_esm_flatfile(self):
        input_file = os.path.join(os.path.dirname(self.input_file),
                                  'esm_sa_flatfile_2018.csv')
        log = EsmParser.parse(input_file,
                              output_path=self.output_file)
        self.assertEqual(log['total'], 98)
        self.assertEqual(log['written'], 98)
        missingvals = log['missing_values']
        self.assertTrue(missingvals['rjb'] == missingvals['rrup'] ==
                        missingvals['rupture_length'] ==
                        missingvals['ry0'] == missingvals['rx'] ==
                        missingvals['rupture_width'] == 97)
        self.assertTrue(missingvals['strike_1'] == missingvals['dip_1'] ==
                        missingvals['rake_1'] == missingvals['duration_5_75']
                        == 98)
        self.assertTrue(all(_ not in missingvals
                            for _ in ('pga', 'pgv', 'sa', 'duration_5_95')))
        self.assertTrue(all(_ + '_components' not in missingvals
                            for _ in ('pga', 'pgv', 'sa', 'duration_5_95')))
        self.assertEqual(missingvals['duration_5_75'], 98)
        self.assertTrue(missingvals['magnitude'] == 
                        missingvals['magnitude_type'] == 13)

        gmdb = GroundMotionTable(self.output_file, 'esm_sa_flatfile_2018')

        with self.assertRaises(ValueError):
            # trying to filter inside a with statement
            with gmdb:
                gmdb.filter('magnitude <= 4')

        gmdb2 = gmdb.filter('magnitude <= 4')
        # underlying HDF5 file not open (ValueError):
        with self.assertRaises(ValueError):
            for rec in gmdb2.records:
                pass

        # check that we correctly wrote default attrs:
        with gmdb2:
            tbl = gmdb2.table.attrs
            self.assertTrue(isinstance(tbl.parser_stats, dict))
            self.assertEqual(tbl.filename, 'template_basic_flatfile.hd5')
            self.assertEqual(len(gmdb2.attrnames()), 6)

        # now it works:
        with gmdb2:
            mag_le_4 = 0
            for rec in gmdb2.records:
                self.assertTrue(rec['magnitude'] <= 4)
                mag_le_4 += 1

        gmdb2 = gmdb.filter('magnitude > 4')
        with gmdb2:
            mag_gt_4 = 0
            for rec in gmdb2.records:
                self.assertTrue(rec['magnitude'] > 4)
                mag_gt_4 += 1

        self.assertTrue(mag_le_4 + mag_gt_4 == 98 - 13)
Exemple #2
0
def testing(params):
    '''Core method to compute testing data

    :param params: dict with the request parameters

    :return: json serializable dict to be passed into a Response object
    '''
    GMDB = 'gmdb'  # pylint: disable=invalid-name
    GSIM = 'gsim'  # pylint: disable=invalid-name
    IMT = 'imt'  # pylint: disable=invalid-name
    FIT_M = 'fit_measure'  # pylint: disable=invalid-name
    CONFIG = 'config'  # pylint: disable=invalid-name
    SEL = 'selexpr'  # pylint: disable=invalid-name

    # params[GMDB] is the tuple (hdf file name, table name):
    gmdb_base = GroundMotionTable(*params[GMDB], mode='r')

    ret = {}
    obs_count = defaultdict(int)
    gsim_skipped = {}
    config = params.get(CONFIG, {})
    # columns: "Measure of fit" "imt" "gsim" "value(s)"
    for gsim in params[GSIM]:
        try:
            residuals = Residuals([gsim], params[IMT])
            selexpr = _get_selexpr(gsim, params.get(SEL, ''))

            # we have some record to be used, compute residuals:
            gmdb = gmdb_base.filter(selexpr)
            numrecords = _gmdb_records(residuals, gmdb)

            obs_count[gsim] = numrecords
            if not numrecords:
                gsim_skipped[gsim] = 'No matching db record found'
                continue

            gsim_values = []

            for key, name, func in params[FIT_M]:
                result = func(residuals, config)
                gsim_values.extend(_itervalues(gsim, key, name, result))

            for moffit, imt, value in gsim_values:
                # note: value isa Numpy scalar, but not ALL numpy scalar
                # are json serializable: only those that are equal to Python's
                ret.setdefault(moffit, {}).\
                                setdefault(imt, {})[gsim] = value.item()

        except Exception as exc:  # pylint: disable=broad-except
            gsim_skipped[gsim] = str(exc)

    return {
        'Measure of fit': ret,
        'Db records': obs_count,
        'Gsim skipped': gsim_skipped
    }
Exemple #3
0
def check_gsim_defined_for_current_db(testdata):
    '''no test function, it is used to inspect in debug mode in order to get
    gsims with records in the current gmdb used for tests'''
    for gsim in OQ.gsims():
        try:
            residuals = Residuals([gsim], ['PGA', 'PGV', 'SA(0.1)'])
            gmdbpath = testdata.path('esm_sa_flatfile_2018.csv.hd5')
            gm_table = GroundMotionTable(gmdbpath,
                                         'esm_sa_flatfile_2018',
                                         mode='r')
            selexpr = get_selexpr(gsim)
            num = gmdb_records(residuals, gm_table.filter(selexpr))
        except:
            pass
Exemple #4
0
def get_residuals(params):
    '''Core method to compute residuals plots data

    :param params: dict with the request parameters

    :return: json serializable dict to be passed into a Response object
    '''
    # params:
    GMDB = 'gmdb'  # pylint: disable=invalid-name
    GSIM = 'gsim'  # pylint: disable=invalid-name
    IMT = 'imt'  # pylint: disable=invalid-name
    PLOTTYPE = 'plot_type'  # pylint: disable=invalid-name
    SEL = 'selexpr'  # pylint: disable=invalid-name

    func, kwargs = params[PLOTTYPE]
    residuals = Residuals(params[GSIM], params[IMT])

    # Compute residuals.
    # params[GMDB] is the tuple (hdf file name, table name):
    gmdb = GroundMotionTable(*params[GMDB], mode='r')
    if params.get(SEL):
        gmdb = gmdb.filter(params[SEL])
    residuals.get_residuals(gmdb)

    # statistics = residuals.get_residual_statistics()
    ret = defaultdict(lambda: defaultdict(lambda: {}))

    # extend keyword arguments:
    kwargs = dict(kwargs, residuals=residuals, as_json=True)
    # linestep = binwidth/10
    for gsim in residuals.residuals:
        for imt in residuals.residuals[gsim]:
            kwargs['gmpe'] = gsim
            kwargs['imt'] = imt
            imt2 = _relabel_sa(imt)
            res_plots = func(**kwargs)
            for res_type, res_plot in res_plots.items():
                for stat in RESIDUALS_STATS:
                    res_plot.setdefault(stat, None)
                if imt2 != imt:
                    res_plot['xlabel'] = _relabel_sa(res_plot['xlabel'])
                    res_plot['ylabel'] = _relabel_sa(res_plot['ylabel'])
                # make also x and y keys consistent with trellis response:
                res_plot['xvalues'] = res_plot.pop('x')
                res_plot['yvalues'] = res_plot.pop('y')
                ret[imt2][res_type][gsim] = res_plot

    return ret
    def setUpClass(cls):
        """
        Setup constructs the database from the ESM test data
        """
        ifile = os.path.join(BASE_DATA_PATH, "residual_tests_esm_data.csv")
        cls.out_location = os.path.join(BASE_DATA_PATH, "residual_tests")
        if os.path.exists(cls.out_location):
            shutil.rmtree(cls.out_location)
        parser = ESMFlatfileParser.autobuild("000", "ESM ALL",
                                             cls.out_location, ifile)
        del parser
        cls.database_file = os.path.join(cls.out_location,
                                         "metadatafile.pkl")
        cls.database = None
        with open(cls.database_file, "rb") as f:
            cls.database = pickle.load(f)
        cls.gsims = ["AkkarEtAlRjb2014",  "ChiouYoungs2014"]
        cls.imts = ["PGA", "SA(1.0)"]

        # create the sm table:
        cls.out_location2 = cls.out_location + '_table'
        EsmParser.parse(ifile, cls.out_location2, delimiter=';')
        cls.dbtable = \
            GroundMotionTable(cls.out_location2,
                              os.path.splitext(os.path.basename(ifile))[0])
    def test_esm_flatfile(self):
        input_file = os.path.join(os.path.dirname(self.input_file),
                                  'esm_sa_flatfile_2018.csv')
        log = EsmParser.parse(input_file, output_path=self.output_file)
        self.assertEqual(log['total'], 98)
        self.assertEqual(log['written'], 98)
        missingvals = log['missing_values']
        self.assertTrue(
            missingvals['rjb'] == missingvals['rrup'] ==
            missingvals['rupture_length'] == missingvals['ry0'] ==
            missingvals['rx'] == missingvals['rupture_width'] == 97)
        self.assertTrue(
            missingvals['strike_1'] == missingvals['dip_1'] ==
            missingvals['rake_1'] == missingvals['duration_5_75'] == 98)
        self.assertTrue(
            all(_ not in missingvals
                for _ in ('pga', 'pgv', 'sa', 'duration_5_95')))
        self.assertTrue(
            all(_ + '_components' not in missingvals
                for _ in ('pga', 'pgv', 'sa', 'duration_5_95')))
        self.assertEqual(missingvals['duration_5_75'], 98)
        self.assertTrue(
            missingvals['magnitude'] == missingvals['magnitude_type'] == 13)

        gmdb = GroundMotionTable(self.output_file, 'esm_sa_flatfile_2018')

        gmdb2 = gmdb.filter('magnitude <= 4')

        # check that we correctly wrote default attrs:
        with gmdb2.table as tbl:
            self.assertTrue(isinstance(tbl.attrs.parser_stats, dict))
            self.assertEqual(tbl.attrs.flatfilename,
                             'template_basic_flatfile.hd5')
            # self.assertEqual(len(gmdb2.attrnames()), 6)

        mag_le_4 = 0
        for rec in gmdb2.records:
            self.assertTrue(rec['magnitude'] <= 4)
            mag_le_4 += 1

        gmdb2 = gmdb.filter('magnitude > 4')
        mag_gt_4 = 0
        for rec in gmdb2.records:
            self.assertTrue(rec['magnitude'] > 4)
            mag_gt_4 += 1

        self.assertTrue(mag_le_4 + mag_gt_4 == 98 - 13)
Exemple #7
0
def records_iter(params):
    '''Computes the selection from the given already validated params and
    returns a filtered GroundMotionDatabase object'''
    # params:
    GMDB = 'gmdb'  # pylint: disable=invalid-name
    SEL = 'selexpr'  # pylint: disable=invalid-name

    # params[GMDB] is the tuple (hdf file name, table name):
    with GroundMotionTable(*params[GMDB], mode='r') as gmdb:
        for rec in records_where(gmdb.table, params.get(SEL)):
            yield rec
    def test_reading_concurrentcy(self):
        '''Tests that it is ok to open an HDF table twice
        (NOTE: this is currently NOT YET SUPPORTED)
        '''
        return
        # the test file has a comma delimiter. Test that we raise with
        # the default semicolon:
        # now should be ok:
        log = UserDefinedParser.parse(self.input_file,
                                      output_path=self.output_file,
                                      delimiter=',')

        dbname = os.path.splitext(os.path.basename(self.output_file))[0]
        rec1 = []
        rec2 = []
        gmdb = GroundMotionTable(self.output_file, dbname)
        for r in gmdb.records:
            rec1.append(r['record_id'])
            if not rec2:
                for r2 in gmdb.records:
                    rec2.append(r2['record_id'])
        self.assertEqual(rec1, rec2)
Exemple #9
0
    def test_template_basic_file_selection(self):
        '''parses a sample flatfile and tests some selection syntax on it'''
        # the test file has a comma delimiter. Test that we raise with
        # the default semicolon:
        with self.assertRaises(ValueError):
            log = UserDefinedParser.parse(self.input_file,
                                          output_path=self.output_file)
        # now should be ok:
        log = UserDefinedParser.parse(self.input_file,
                                      output_path=self.output_file,
                                      delimiter=',')

        dbname = os.path.splitext(os.path.basename(self.output_file))[0]
        with GroundMotionTable(self.output_file, dbname) as gmdb:
            table = gmdb.table
            total = table.nrows
            selection = 'pga <= %s' % 100.75
            ids = [r['record_id'] for r in records_where(table, selection)]
            ids_len = len(ids)
            # test that read where gets the same number of records:
            ids = [r['record_id'] for r in read_where(table, selection)]
            self.assertEqual(len(ids), ids_len)
            # test with limit given:
            ids = [r['record_id'] for r in records_where(table, selection,
                                                         ids_len-1)]
            self.assertEqual(len(ids), ids_len-1)
            # test by negating the selection condition and expect the remaining
            # records to be found:
            ids = [r['record_id'] for r in records_where(table,
                                                         "~(%s)" % selection)]
            self.assertEqual(len(ids), total - ids_len)
            # same should happend for read_where:
            ids = [r['record_id'] for r in read_where(table,
                                                      "~(%s)" % selection)]
            self.assertEqual(len(ids), total - ids_len)
            # test with limit 0 (expected: no record yielded):
            ids = [r['record_id'] for r in records_where(table,
                                                         "~(%s)" % selection,
                                                         0)]
            self.assertEqual(len(ids), 0)
            # restrict the search:
            # note that we must pass strings to event_time,
            # either 1935-01-01, 1935-01-01T00:00:00, or simply the year:
            selection2 = "(%s) & (%s)" % \
                (selection, '(event_time >= "1935") & '
                            '(event_time < \'1936-01-01\')')
            ids = [r['record_id'] for r in records_where(table, selection2)]
            ids_len2 = len(ids)
            # test that the search was restricted:
            self.assertTrue(ids_len2 < ids_len)
            # now negate the serarch on event_time and test that we get all
            # remaining records:
            selection2 = "(%s) & ~(%s)" % \
                (selection, '(event_time >= "1935") & '
                            '(event_time < "1936-01-01")')
            ids = [r['record_id'] for r in records_where(table, selection2)]
            self.assertEqual(len(ids) + ids_len2, ids_len)
            # test truthy condition (isaval on bool col returns True):
            selection = 'vs30_measured == vs30_measured'
            ids = read_where(table, selection)
            self.assertEqual(len(ids), total)
            # test with limit exceeding the available records (should get
            # all records as if limit was not given):
            ids = read_where(table, selection, total+1)
            self.assertEqual(len(ids), total)
            # records_where should get the same results as read_where:
            ids = [r['record_id'] for r in records_where(table, selection)]
            self.assertEqual(len(ids), total)
            # test falsy condition (isaval on bool col returns True):
            ids = read_where(table, "~(%s)" % selection)
            self.assertEqual(len(ids), 0)
            ids = read_where(table, selection, total+1)
            self.assertEqual(len(ids), total)
            ids = [r['record_id'] for r in records_where(table,
                                                         "~(%s)" % selection)]
            self.assertEqual(len(ids), 0)
Exemple #10
0
    def test_template_basic_file(self):
        '''parses sample flatfile and perfomrs some tests'''
        # test a file not found
        with self.assertRaises(IOError):
            with GroundMotionTable(self.output_file + 'what',
                      dbname='whatever', mode='r') as gmdb:
                pass

        log = UserDefinedParser.parse(self.input_file,
                                      output_path=self.output_file,
                                      delimiter=',')
        dbname = os.path.splitext(os.path.basename(self.output_file))[0]
        # the flatfile parsed has:
        # 1. an event latitude out of bound (row 0)
        # 2. an event longitude out of bound (row 1)
        # 3. a pga with extremely high value (row 2)
        # 4. a sa[0] with extremely high value (row 3)

        total = log['total']
        written = total - 2  # row 2 and 3 not written
        self.assertEqual(log['total'], 99)
        self.assertEqual(log['written'], written)
        self.assertEqual(sorted(log['error']), [2, 3])
        self.assertEqual(len(log['outofbound_values']), 2)  # rows 0 and 1
        self.assertEqual(log['outofbound_values']['event_latitude'], 1)  # 0
        self.assertEqual(log['outofbound_values']['event_longitude'], 1)  # 1
        # self.assertEqual(log['missing_values']['pga'], 0)
        self.assertEqual(log['missing_values']['pgv'], log['written'])
        self.assertEqual(log['missing_values']['pgv'], log['written'])

        # assert auto generated ids are not missing:
        self.assertFalse('record_id' in log['missing_values'])
        self.assertFalse('event_id' in log['missing_values'])
        self.assertFalse('station_id' in log['missing_values'])

        # PYTABLES. IMPORTANT
        # seems that this is NOT possible:
        # list(table.iterrows())  # returns N times the LAST row
        # seems also that we should NOT break inside a iterrows or where loop
        # (see here: https://github.com/PyTables/PyTables/issues/8)

        # open HDF5 and check for incremental ids:
        test_col = 'event_name'
        test_col_oldval, test_col_newval = None, b'dummy'
        test_cols_found = 0
        with GroundMotionTable(self.output_file, dbname, 'a') as gmdb:
            tbl = gmdb.table
            ids = list(r['event_id'] for r in tbl.iterrows())
            # assert record ids are the number of rows
            self.assertTrue(len(ids) == written)
            # assert we have some event shared across records:
            self.assertTrue(len(set(ids)) < written)
            # modify one row
            for row in tbl.iterrows():
                if test_col_oldval is None:
                    test_col_oldval = row[test_col]
                if row[test_col] == test_col_oldval:
                    row[test_col] = test_col_newval
                    test_cols_found += 1
                    row.update()
            tbl.flush()
            # all written columns have the same value of row[test_col]:
            self.assertTrue(test_cols_found == 1)

        # assert that we modified the event name
        with GroundMotionTable(self.output_file, dbname, 'r') as gmdb:
            tbl = gmdb.table
            count = 0
            for row in tbl.where('%s == %s' % (test_col, test_col_oldval)):
                # we should never be here (no row with the old value):
                count += 1
            self.assertTrue(count == 0)
            count = 0
            for row in tbl.where('%s == %s' % (test_col, test_col_newval)):
                count += 1
            self.assertTrue(count == test_cols_found)

        # now re-write, with append mode
        log = UserDefinedParser.parse(self.input_file,
                                      output_path=self.output_file,
                                      delimiter=',')

        # . NOT SUPPORTED, COMMENTED:
        # open HDF5 with append='a' (the default)
        # and check that wewrote stuff twice
        # with GroundMotionTable(self.output_file, dbname, 'r') as gmdb:
        #     tbl = gmdb.table
        #     self.assertTrue(tbl.nrows == written * 2)
        #     # assert the old rows are there
        #     oldrows = list(row[test_col] for row in
        #                    tbl.where('%s == %s' % (test_col, test_col_oldval)))
        #     self.assertTrue(len(oldrows) == test_cols_found)
        #     # assert the new rows are added:
        #     newrows = list(row[test_col] for row in
        #                    tbl.where('%s == %s' % (test_col, test_col_newval)))
        #     self.assertTrue(len(newrows) == test_cols_found)

        # now re-write, with no mode='w'
        log = UserDefinedParser.parse(self.input_file,
                                      output_path=self.output_file,
                                      delimiter=',')
        with GroundMotionTable(self.output_file, dbname, 'r') as gmdb:
            tbl = gmdb.table
            self.assertTrue(tbl.nrows == written)
            # assert the old rows are not there anymore
            oldrows = list(row[test_col] for row in
                           tbl.where('%s == %s' % (test_col, test_col_oldval)))
            self.assertTrue(len(oldrows) == test_cols_found)
            # assert the new rows are added:
            newrows = list(row[test_col] for row in
                           tbl.where('%s == %s' % (test_col, test_col_newval)))
            self.assertTrue(not newrows)

        # get db names:
        dbnames = get_dbnames(self.output_file)
        self.assertTrue(len(dbnames) == 1)
        name = os.path.splitext(os.path.basename(self.output_file))[0]
        self.assertTrue(dbnames[0] == name)

        # now a delete
        names = get_dbnames(self.output_file)
        assert len(names) > 0
        GroundMotionTable(self.output_file, name, 'w').delete()

        names = get_dbnames(self.output_file)
        assert len(names) == 0