Ejemplo n.º 1
0
    def execute(self):
        """
        Write info.json metadata file.

        Raises:
            NotADirectoryError: When the event data directory does not exist.
            FileNotFoundError: When the the shake_result HDF file does not
                exist.
        """
        _, data_path = get_config_paths()
        datadir = os.path.join(data_path, self._eventid, 'current')
        if not os.path.isdir(datadir):
            os.makedirs(datadir)

        # try to find the event by our event id
        try:
            detail = get_event_by_id(self._eventid)
        except Exception as e:
            fmt = 'Could not retrieve DYFI data for %s - error "%s"'
            self.logger.warning(fmt % (self._eventid, str(e)))
            return

        dataframe, msg = _get_dyfi_dataframe(detail)
        if dataframe is None:
            self.logger.info(msg)
            return

        reference = 'USGS Did You Feel It? System'
        xmlfile = os.path.join(datadir, 'dyfi_dat.xml')
        dataframe_to_xml(dataframe, xmlfile, reference)
        self.logger.info('Wrote %i DYFI records to %s' %
                         (len(dataframe), xmlfile))
Ejemplo n.º 2
0
def test_dataframe_to_xml():
    homedir = os.path.dirname(
        os.path.abspath(__file__))  # where is this script?
    datadir = os.path.join(homedir, '..', 'data')
    amps_output = os.path.join(datadir, 'amps.csv')
    df = pd.read_csv(amps_output)
    outdir = tempfile.mkdtemp()
    try:
        xmlfile = os.path.join(outdir, 'foo_dat.xml')
        dataframe_to_xml(df, xmlfile)
        # HNN,psa10,0.0107
        root = minidom.parse(xmlfile)
        comps = root.getElementsByTagName('comp')
        for comp in comps:
            if comp.getAttribute('name') == 'HNN':
                psa10 = comp.getElementsByTagName('psa10')[0]
                value = float(psa10.getAttribute('value'))
                assert value == 0.0107
    except Exception:
        assert 1 == 2
    finally:
        shutil.rmtree(outdir)
Ejemplo n.º 3
0
def test_write_xml():
    # where is this script?
    homedir = os.path.dirname(os.path.abspath(__file__))
    datadir = os.path.join(homedir, '..', 'data')
    complete_file = os.path.join(datadir, 'complete_pgm.xlsx')
    mmimin_file = os.path.join(datadir, 'minimum_mmi.xlsx')
    tempdir = None
    try:
        tempdir = tempfile.mkdtemp()
        df, reference = read_excel(complete_file)
        xmlfile = os.path.join(tempdir, 'foo.xml')
        dataframe_to_xml(df, xmlfile, reference=reference)

        xmlfile = os.path.join(tempdir, 'bar.xml')
        df_mmimin, reference = read_excel(mmimin_file)
        dataframe_to_xml(df_mmimin, xmlfile, reference=reference)

    except Exception:
        raise AssertionError('Could not write XML file.')
    finally:
        if tempdir is not None:
            shutil.rmtree(tempdir)
Ejemplo n.º 4
0
    def execute(self):
        """
        Get amps from the database(s) and write the XML file(s) to the
        event's current directory.

        Raises:
            NotADirectoryError: When the event data directory does not exist.
            FileNotFoundError: When the the shake_result HDF file does not
                exist.
        """
        install_path, data_path = get_config_paths()
        datadir = os.path.join(data_path, self._eventid, 'current')
        if not os.path.isdir(datadir):
            raise NotADirectoryError('%s is not a valid directory.' % datadir)
        datafile = os.path.join(datadir, 'event.xml')
        if not os.path.isfile(datafile):
            raise FileNotFoundError('%s does not exist.' % datafile)

        origin = Origin.fromFile(datafile)

        config = get_aqms_config()

        evtime = origin.time.strftime('%Y/%m/%d %H%M%S')

        success = False
        for dbname in sorted(config['dbs'].keys()):
            db = config['dbs'][dbname]
            dsn_tns = cx_Oracle.makedsn(db['host'], db['port'], sid=db['sid'])
            try:
                con = cx_Oracle.connect(user=db['user'],
                                        password=db['password'],
                                        dsn=dsn_tns)
            except cx_Oracle.DatabaseError as err:
                self.logger.warn('Error connecting to database: %s' % dbname)
                self.logger.warn('Error: %s' % err)
                continue
            cursor = con.cursor()

            query = ("SELECT d.description, c.net, c.sta, c.seedchan, "
                     "c.location, c.lat, c.lon, c.elev, s.staname "
                     "FROM channel_data c, station_data s, "
                     "d_abbreviation d "
                     "WHERE TO_DATE(:evtime, 'YYYY/MM/DD HH24MISS') BETWEEN "
                     "c.ondate AND c.offdate "
                     "AND c.net = s.net AND c.sta = s.sta "
                     "AND s.net_id = d.id")
            try:
                cursor.execute(query, {'evtime': evtime})
            except cx_Oracle.DatabaseError as err:
                self.logger.warn('Error: %s' % err)
                cursor.close()
                con.close()
                continue
            stadict = {}
            nlines = 0
            for line in cursor:
                nlines += 1
                desc, net, sta, chan, loc, lat, lon, elev, staname = line
                loc = loc.replace(' ', '-')
                netsta = net + '.' + sta
                if ' - ' in staname:
                    staname, staloc = staname.split(' - ', maxsplit=1)
                else:
                    staloc = ''
                cdict = {
                    'netdesc': desc,
                    'net': net,
                    'sta': sta,
                    'lon': lon,
                    'lat': lat,
                    'elev': elev,
                    'staname': staname,
                    'staloc': staloc
                }
                if netsta not in stadict:
                    stadict[netsta] = {loc: {chan: cdict}}
                    continue
                if loc not in stadict[netsta]:
                    stadict[netsta][loc] = {chan: cdict}
                    continue
                stadict[netsta][loc][chan] = cdict
            if nlines > 0:
                success = True
                break
        if not success:
            raise RuntimeError('Could not retrieve stations from database(s)')

        #
        # Here we're assuming that the last connection and cursor
        # are still valid and the database has the station information
        #
        # The stamapping table may not exist on some databases, so
        # we're going to ignore errors.
        #
        stalocdescr = {}
        try:
            cursor.execute('select sta, net, locdescr from stamapping')
        except cx_Oracle.DatabaseError as err:
            self.logger.warn('Warning: couldnt retrieve stamapping: %s' % err)
        else:
            for line in cursor:
                sta, net, locdescr = line
                if net not in stalocdescr:
                    stalocdescr[net] = {sta: locdescr}
                    continue
                stalocdescr[net][sta] = locdescr

        #
        # Now read the adhoc file and add the "table 6" values to
        # any stations/channels that are listed, also add any
        # unlisted stations to stadict
        #
        netcode = {}
        if config['adhoc_file'] and os.path.isfile(config['adhoc_file']):
            widths = [6, 3, 4, 3, 4, 10, 11, 6, 60]
            columns = [
                'sta', 'net', 'chan', 'loc', 't6', 'lat', 'lon', 'elev', 'name'
            ]
            df = pd.read_fwf(config['adhoc_file'],
                             widths=widths,
                             names=columns)
            for row in df.itertuples():
                _, sta, net, chan, loc, t6, lat, lon, elev, name = row
                netsta = net + '.' + sta
                loc = loc.replace(' ', '-')
                try:
                    stadict[netsta][loc][chan]['t6'] = t6
                    continue
                except KeyError:
                    pass

                if ' - ' in name:
                    nn, desc = name.split(' - ', maxsplit=1)
                else:
                    nn = name
                    desc = ''
                cdict = {
                    'staloc': desc,
                    'net': net,
                    'sta': sta,
                    'lon': lon,
                    'lat': lat,
                    'elev': elev,
                    'staname': nn,
                    't6': t6
                }
                if net in netcode:
                    cdict['netdesc'] = netcode[net]
                else:
                    try:
                        cursor.execute(
                            'select d.description '
                            'from d_abbreviation d, station_data s '
                            'where s.net = :net '
                            'and s.net_id = d.id', {'net': net})
                    except cx_Oracle.DatabaseError as err:
                        self.logger.warn(
                            'Error retrieving net description: %s' % err)
                        netdesc = ['Unknown']
                    else:
                        netdesc = cursor.fetchone()
                        if not netdesc:
                            netdesc = ['Unknown']
                    netcode[net] = netdesc[0]
                    cdict['netdesc'] = netcode[net]
                if netsta not in stadict:
                    stadict[netsta] = {loc: {chan: cdict}}
                    continue
                if loc not in stadict[netsta]:
                    stadict[netsta][loc] = {chan: cdict}
                    continue
                stadict[netsta][loc][chan] = cdict
        elif config['adhoc_file']:
            self.logger.warn('Warning: adhoc_file %s does not exist' %
                             config['adhoc_file'])

        #
        # Get the station location strings if possible; if it is just
        # a (possibly truncated copy of the station name, leave it blank
        #
        for netsta in stadict.keys():
            net, sta = netsta.split('.', maxsplit=1)
            try:
                staloc = stalocdescr[net][sta]
            except KeyError:
                staloc = ''
            for loc in stadict[netsta].keys():
                for chan in stadict[netsta][loc].keys():
                    if stadict[netsta][loc][chan]['staloc']:
                        continue
                    staname = stadict[netsta][loc][chan]['staname']
                    if staloc and staloc not in staname:
                        stadict[netsta][loc][chan]['staloc'] = staloc

        cursor.close()
        con.close()

        #
        # Now get the amps and match them up with the station info
        # and write the XML
        #
        qm2 = {}
        files_written = 0
        columns = ('station', 'channel', 'imt', 'value', 'lat', 'lon', 'netid',
                   'flag', 'name', 'loc', 'source')
        for dbname in sorted(config['dbs'].keys()):
            db = config['dbs'][dbname]
            dsn_tns = cx_Oracle.makedsn(db['host'], db['port'], sid=db['sid'])
            try:
                con = cx_Oracle.connect(user=db['user'],
                                        password=db['password'],
                                        dsn=dsn_tns)
            except cx_Oracle.DatabaseError as err:
                self.logger.warn('Error connecting to database: %s' % dbname)
                self.logger.warn('Error: %s' % err)
                continue
            cursor = con.cursor()

            query = ("WITH q1 AS ("
                     "SELECT a.net, a.sta, a.seedchan, a.location, "
                     "a.amplitude, a.amptype, a.cflag, a.quality, "
                     "a.units, a.lddate "
                     "FROM amp a, assocevampset asoc, ampset s "
                     "WHERE asoc.evid   = :evid "
                     "AND asoc.ampsetid = s.ampsetid AND asoc.isvalid = 1 "
                     "AND asoc.ampsettype = 'sm' "
                     "AND s.ampid  = a.ampid "
                     "AND a.amptype IN ('PGA', 'PGV', 'SP.3', 'SP1.0', "
                     "'SP3.0') "
                     "ORDER BY a.net, a.sta, a.seedchan, a.location, "
                     "a.amptype, a.lddate desc "
                     ") "
                     "SELECT UNIQUE net, sta, seedchan, location, "
                     "amplitude, amptype, cflag, quality, units "
                     "FROM q1 "
                     "ORDER BY net, sta, seedchan, location, amptype")

            try:
                cursor.execute(query, {'evid': self._eventid})
            except cx_Oracle.DatabaseError as err:
                self.logger.warn('Error: amp query failed: %s' % err)
                continue

            ampdata = {}
            amprows = []
            for row in cursor:
                (net, sta, chan, loc, amp, amptype, cflag, quality,
                 units) = row
                loc = loc.replace(' ', '-')
                netsta = net + '.' + sta
                try:
                    sd = stadict[netsta][loc][chan]
                except KeyError:
                    # Can't get station info for some reason
                    continue
                # Skip amps with unknown or disqualifying Cosmos Site Codes
                # unless no adhod file was provided, then trust everything
                if config['adhoc_file']:
                    if 't6' not in sd:
                        continue
                    if int(sd['t6']) not in config['valid_codes']:
                        continue
                if quality < 0.5:
                    continue
                if netsta not in ampdata:
                    ampdata[netsta] = {loc: {chan: {'n_amps_on_scale': 0}}}
                elif loc not in ampdata[netsta]:
                    ampdata[netsta][loc] = {chan: {'n_amps_on_scale': 0}}
                elif chan not in ampdata[netsta][loc]:
                    ampdata[netsta][loc][chan] = {'n_amps_on_scale': 0}
                # Use only the most recently loaded amp, which are returned
                # in descending order of lddate.
                if amptype.upper() in ampdata[netsta][loc][chan]:
                    continue
                ampdata[netsta][loc][chan][amptype.upper()] = True
                # CISN flag values are:
                #   BN  ->  below noise
                #   OS  ->  on scale
                #   CL  ->  clipped
                # Quality values are:
                #   1.0 ->  complete time window
                #   0.5 ->  partial time window, approved for use by analyst
                #   0.0 ->  incomplete time window
                if 'os' in cflag or 'OS' in cflag:
                    ampdata[netsta][loc][chan]['n_amps_on_scale'] += 1
                    cflag = 0
                else:
                    cflag = 1
                if amptype == 'PGA' or amptype == 'PGV':
                    imt = amptype.lower()
                elif amptype == 'SP.3':
                    imt = 'psa03'
                elif amptype == 'SP1.0':
                    imt = 'psa10'
                elif amptype == 'SP3.0':
                    imt = 'psa30'
                if units == 'cmss':
                    amp = amp / 9.81
                newrow = (netsta, chan, imt, amp, sd['lat'], sd['lon'], net,
                          cflag, sd['staname'], sd['staloc'], sd['netdesc'])
                amprows.append(newrow)
            cursor.close()
            con.close()
            #
            # Create a pandas dataframe then (possibly) write the data
            # to an XML file
            #
            if len(amprows) == 0:
                continue
            df = pd.DataFrame.from_records(amprows,
                                           columns=columns,
                                           coerce_float=True)
            xmlfile = os.path.join(datadir, dbname + '_dat.xml')
            nstas = len(set(df['station']))
            if config['query_mode'] == 3:
                dataframe_to_xml(df, xmlfile)
                files_written += 1
                continue
            elif config['query_mode'] == 1 and \
                    nstas >= config['query_min_stas']:
                dataframe_to_xml(df, xmlfile)
                files_written += 1
                break
            elif config['query_mode'] == 2:
                qm2[dbname] = {'nstas': nstas, 'df': df}
        # End of db loop
        if config['query_mode'] == 2:
            smax = 0
            for dbname, dbinfo in qm2:
                if qm2['nstas'] > smax:
                    smax = qm2['nstas']
                    dbmax = dbname
            if smax > 0:
                xmlfile = os.path.join(datadir, dbmax + '_dat.xml')
                dataframe_to_xml(qm2[dbmax]['df'], xmlfile)
                files_written += 1
        if files_written == 0:
            self.logger.warn("No data found for event %s" % self._eventid)

        return
Ejemplo n.º 5
0
def test_read_tables():
    homedir = os.path.dirname(
        os.path.abspath(__file__))  # where is this script?
    datadir = os.path.join(homedir, '..', 'data')

    ##########################################
    # these files should all read successfully
    ##########################################
    tmpdir = tempfile.mkdtemp()
    try:
        complete_file = os.path.join(datadir, 'complete_pgm.xlsx')
        df_complete, _ = read_excel(complete_file)
        np.testing.assert_almost_equal(df_complete['H1']['PGA'].sum(), 569.17)
        xmlfile = os.path.join(tmpdir, 'complete_pgm.xml')
        dataframe_to_xml(df_complete, xmlfile)

        pgamin_file = os.path.join(datadir, 'minimum_pga.xlsx')
        df_pgamin, _ = read_excel(pgamin_file)
        np.testing.assert_almost_equal(df_pgamin['UNK']['PGA'].sum(), 569.17)
        xmlfile = os.path.join(tmpdir, 'minimum_pga.xml')
        dataframe_to_xml(df_pgamin, xmlfile)

        mmimin_file = os.path.join(datadir, 'minimum_mmi.xlsx')
        df_mmimin, _ = read_excel(mmimin_file)
        np.testing.assert_almost_equal(df_mmimin['INTENSITY'].sum(),
                                       45.199872273516036)
        xmlfile = os.path.join(tmpdir, 'minimum_mmi.xml')
        dataframe_to_xml(df_mmimin, xmlfile)

        missing_data_file = os.path.join(datadir, 'missing_rows.xlsx')
        df, _ = read_excel(missing_data_file)
        assert np.isnan(df['H1']['SA(0.3)']['CHPA'])
        xmlfile = os.path.join(tmpdir, 'missing_rows.xml')
        dataframe_to_xml(df, xmlfile)

        sm2xml_example = os.path.join(datadir, 'sm2xml_output.xlsx')
        df, _ = read_excel(sm2xml_example)
        np.testing.assert_almost_equal(df['HHZ']['PGA'].sum(),
                                       150.82342541678645)
        xmlfile = os.path.join(tmpdir, 'sm2xml_output.xml')
        dataframe_to_xml(df, xmlfile)

    except Exception:
        assert 1 == 2
    finally:
        shutil.rmtree(tmpdir)

    ##########################################
    # these files should all fail
    ##########################################
    try:
        missing_file = os.path.join(datadir, 'missing_columns.xlsx')
        read_excel(missing_file)
        assert 1 == 2
    except KeyError:
        assert 1 == 1

    try:
        wrong_file = os.path.join(datadir, 'wrong_channels.xlsx')
        read_excel(wrong_file)
        assert 1 == 2
    except KeyError:
        assert 1 == 1

    try:
        nodata_file = os.path.join(datadir, 'no_data.xlsx')
        read_excel(nodata_file)
        assert 1 == 2
    except KeyError:
        assert 1 == 1

    try:
        emptyrow_file = os.path.join(datadir, 'empty_row.xlsx')
        read_excel(emptyrow_file)
        assert 1 == 2
    except IndexError:
        assert 1 == 1

    try:
        noref_file = os.path.join(datadir, 'no_reference.xlsx')
        read_excel(noref_file)
        assert 1 == 2
    except KeyError:
        assert 1 == 1