Example #1
0
def read_usc(filename, **kwargs):
    """Read USC V1 strong motion file.

    Args:
        filename (str): Path to possible USC V1 data file.
        kwargs (ref): Ignored by this function.
    Returns:
        Stream: Obspy Stream containing three channels of acceleration data
        (cm/s**2).
    """
    logging.debug("Starting read_usc.")
    valid, alternate = is_usc(filename, return_alternate=True)
    if not valid:
        raise Exception('%s is not a valid USC file' % filename)
    # Check for Location
    location = kwargs.get('location', '')

    f = None
    try:
        f = open(filename, 'rt')
        first_line = f.readline()
    except:
        pass
    finally:
        if f is not None:
            f.close()

    if first_line.find('OF UNCORRECTED ACCELEROGRAM DATA OF') >= 0:
        stream = read_volume_one(
            filename, location=location, alternate=alternate)
    else:
        raise GMProcessException('USC: Not a supported volume.')

    return stream
Example #2
0
def read_data(filename, read_format=None, **kwargs):
    """
    Read strong motion data from a file.

    Args:
        filename (str): Path to file
        read_format (str): Format of file

    Returns:
        list: Sequence of obspy.core.stream.Streams read from file
    """
    # Check if file exists
    if not os.path.exists(filename):
        raise GMProcessException('Not a file %r' % filename)
    # Get and validate format
    if read_format is None:
        read_format = _get_format(filename)
    else:
        read_format = _validate_format(filename, read_format.lower())
    # Load reader and read file
    reader = 'gmprocess.io.' + read_format + '.core'
    reader_module = importlib.import_module(reader)
    read_name = 'read_' + read_format
    read_method = getattr(reader_module, read_name)
    streams = read_method(filename, **kwargs)
    return streams
Example #3
0
def get_metadata(
        eqlat=None,
        eqlon=None,
        eqtime=None,
        eqradius=10,
        abandoned=False,
        station_type='Ground',
        eqtimewindow=10,  # seconds
        station_radius=200):
    """Retrieve station metadata JSON from CESMD web service.

    Args:
        eqlat (float): Earthquake latitude.
        eqlon (float): Earthquake longitude.
        eqtime (datetime): Earthquake origin time.
        eqradius (float): Earthquake search radius (km).
        abandoned (bool): Whether or not to include abandoned stations in the search.
        station_type (str): One of the following station types: [%s]
        eqtimewidow (float): Earthquake time search window in sec.
        station_radius (str): Radius (km) to search for stations from epicenter.
    Returns:
        dict: Dictionary of event/station information.

    """ % (','.join(STATION_TYPES))
    params = {
        'rettype': 'metadata',
        'groupby': 'event',
        'format': 'json',
        'nodata': 404,
        'sttype': STATION_TYPES[station_type],
        'abandoned': abandoned
    }
    has_event_info = (eqlat is not None) and (eqlon
                                              is not None) and (eqtime
                                                                is not None)

    if not has_event_info:
        raise GMProcessException(
            'get_metadata must get either event id or event information.')
    else:
        starttime = eqtime - timedelta(seconds=eqtimewindow // 2)
        endtime = eqtime + timedelta(seconds=eqtimewindow // 2)
        params['elat'] = eqlat
        params['elon'] = eqlon
        params['erad'] = eqradius
        params['startdate'] = starttime.strftime('%Y-%m-%dT%H:%M:%S')
        params['enddate'] = endtime.strftime('%Y-%m-%dT%H:%M:%S')
        params['maxepidist'] = station_radius

    session = Session()
    request = Request('GET', URL_TEMPLATE, params=params).prepare()
    response = session.get(request.url)
    if response.status_code != 200:
        fmt = 'Could not retrieve data from url "%s": Server response %i'
        raise Exception(fmt % (request.url, response.status_code))
    metadata = response.json()

    return metadata
Example #4
0
def pick_ar(stream, picker_config=None, config=None):
    """Wrapper around the AR P-phase picker.

    Args:
        stream (StationStream):
            Stream containing waveforms that need to be picked.
        picker_config (dict):
            Dictionary with parameters for AR P-phase picker. See picker.yml.
        config (dict):
            Configuration dictionary. Key value here is:
                windows:
                    window_checks:
                        min_noise_duration
    Returns:
        tuple:
            - Best estimate for p-wave arrival time (s since start of trace).
            - Mean signal to noise ratio based on the pick.
    """
    if picker_config is None:
        picker_config = get_config(section='pickers')
    if config is None:
        config = get_config()
    min_noise_dur = config['windows']['window_checks']['min_noise_duration']
    params = picker_config['ar']
    # Get the east, north, and vertical components from the stream
    st_e = stream.select(channel='??[E1]')
    st_n = stream.select(channel='??[N2]')
    st_z = stream.select(channel='??[Z3]')

    # Check if we found one of each component
    # If not, use the next picker in the order of preference
    if len(st_e) != 1 or len(st_n) != 1 or len(st_z) != 1:
        raise GMProcessException('Unable to perform AR picker.')

    minloc = ar_pick(st_z[0].data, st_n[0].data, st_e[0].data,
                     st_z[0].stats.sampling_rate,
                     **params)[0]
    if minloc < min_noise_dur:
        fmt = 'Noise window (%.1f s) less than minimum (%.1f)'
        tpl = (minloc, min_noise_dur)
        raise GMProcessException(fmt % tpl)
    mean_snr = calc_snr(stream, minloc)

    return (minloc, mean_snr)
Example #5
0
def _get_format(filename):
    """
    Get the format of the file.

    Args:
        filename (str): Path to file

    Returns:
        string: Format of file.
    """
    # Get the valid formats
    valid_formats = []
    io_directory = pkg_resources.resource_filename('gmprocess', 'io')
    # Create valid list
    for module in os.listdir(io_directory):
        if module.find('.') < 0 and module not in EXCLUDED:
            valid_formats += [module]
    # Test each format
    formats = []
    for valid_format in valid_formats:
        # Create the module and function name from the request
        reader = 'gmprocess.io.' + valid_format + '.core'
        reader_module = importlib.import_module(reader)
        is_name = 'is_' + valid_format
        is_method = getattr(reader_module, is_name)
        if is_method(filename):
            formats += [valid_format]
    # Return the format
    formats = np.asarray(formats)
    if len(formats) == 1:
        return formats[0]
    elif len(formats) == 2 and 'gmobspy' in formats:
        return formats[formats != 'gmobspy'][0]
    elif len(formats) == 0:
        raise GMProcessException('No format found for file %r.' % filename)
    else:
        raise GMProcessException(
            'Multiple formats passing: %r. Please retry file %r '
            'with a specified format.' % (formats.tolist(), filename))
Example #6
0
def is_usc(filename, **kwargs):
    """Check to see if file is a USC strong motion file.

    Args:
        filename (str): Path to possible USC V1 data file.
    Returns:
        bool: True if USC , False otherwise.
    """
    logging.debug("Checking if format is usc.")
    # USC requires unique integer values
    # in column 73-74 on all text header lines
    # excluding the first file line
    return_alternate = kwargs.get('return_alternate', False)

    try:
        f = open(filename, 'rt')
        first_line = f.readline()
        if first_line.find('OF UNCORRECTED ACCELEROGRAM DATA OF') >= 0:
            volume = 'V1'
            start = 1
            stop = 12
            alternate_start = start + 2
            alternate_stop = stop - 2
        elif first_line.find('CORRECTED ACCELEROGRAM') >= 0:
            volume = 'V2'
            start = 2
            stop = 12
            alternate_start = start + 2
            alternate_stop = stop - 2
        elif first_line.find('RESPONSE') >= 0:
            raise GMProcessException(
                'USC: Derived response spectra and fourier '
                'amplitude spectra not supported: %s' % filename)
        else:
            f.close()
            return False
        f.close()
    except Exception:
        return False
    finally:
        f.close()
    valid = _check_header(start, stop, filename)
    alternate = False
    if not valid:
        valid = _check_header(alternate_start, alternate_stop, filename)
        if valid:
            alternate = True
    if return_alternate:
        return valid, alternate
    else:
        return valid
Example #7
0
def is_smc(filename):
    """Check to see if file is a SMC (corrected, in acc.) strong motion file.

    Args:
        filename (str): Path to possible SMC corrected data file.
    Returns:
        bool: True if SMC, False otherwise.
    """
    logging.debug("Checking if format is smc.")
    try:
        with open(filename, 'rt') as f:
            lines = f.readlines()
            firstline = lines[0].strip()
            if firstline in VALID_HEADERS:
                return True
            if 'DISPLACEMENT' in firstline:
                return True
                raise GMProcessException(
                    'SMC: Diplacement records are not supported.')
            elif 'VELOCITY' in firstline:
                return True
                raise GMProcessException(
                    'SMC: Velocity records are not supported.')
            elif '*' in firstline:
                end_ascii = lines[10]
                if '*' in end_ascii:
                    comment_row = int(lines[12].strip().split()[-1])
                    for r in range(27, 27 + comment_row):
                        row = lines[r]
                        if not row.startswith('|'):
                            return False
                    return True
                else:
                    return False

        return False
    except UnicodeDecodeError:
        return False
Example #8
0
    def validate(self):
        """Some validation checks across streams.

        """
        # If tag exists, it should be consistent across StationStreams
        all_labels = []
        for stream in self:
            if hasattr(stream, 'tag'):
                station, label = stream.tag.split('_')
                all_labels.append(label)
            else:
                all_labels.append("")
        if len(set(all_labels)) > 1:
            raise GMProcessException(
                'Only one label allowed within a StreamCollection.')
Example #9
0
def pick_baer(stream, picker_config=None, config=None):
    """Wrapper around the Baer P-phase picker.

    Args:
        stream (StationStream):
            Stream containing waveforms that need to be picked.
        picker_config (dict):
            Dictionary with parameters for Baer P-phase picker. See picker.yml.
        config (dict):
            Configuration dictionary. Key value here is:
                windows:
                    window_checks:
                        min_noise_duration
    Returns:
        tuple:
            - Best estimate for p-wave arrival time (s since start of trace).
            - Mean signal to noise ratio based on the pick.
    """
    if picker_config is None:
        picker_config = get_config(section='pickers')
    if config is None:
        config = get_config()
    min_noise_dur = config['windows']['window_checks']['min_noise_duration']
    params = picker_config['baer']
    locs = []
    for trace in stream:
        pick_sample = pk_baer(trace.data, trace.stats.sampling_rate,
                              **params)[0]
        loc = pick_sample * trace.stats.delta
        locs.append(loc)

    locs = np.array(locs)
    if np.any(locs >= 0):
        minloc = np.min(locs[locs >= 0])
    else:
        minloc = -1
    if minloc < min_noise_dur:
        fmt = 'Noise window (%.1f s) less than minimum (%.1f)'
        tpl = (minloc, min_noise_dur)
        raise GMProcessException(fmt % tpl)
    mean_snr = calc_snr(stream, minloc)

    return (minloc, mean_snr)
Example #10
0
def pick_yeck(stream):
    """IN DEVELOPMENT! SNR based P-phase picker.

    Args:
        stream (StationStream):
            Stream containing waveforms that need to be picked.
    Returns:
        tuple:
            - Best estimate for p-wave arrival time (s since start of trace).
            - Mean signal to noise ratio based on the pick.
    """
    min_window = 5.0  # put into config
    config = get_config()
    min_noise_dur = config['windows']['window_checks']['min_noise_duration']
    locs = []
    for trace in stream:
        data = trace.data
        sr = trace.stats.sampling_rate
        pidx_start = int(min_window * sr)
        snr = np.zeros(len(data))
        for pidx in range(pidx_start, len(data) - pidx_start):
            snr_i = sub_calc_snr(data, pidx)
            snr[pidx] = snr_i
        snr = np.array(snr)
        pidx = snr.argmax()
        loc = pidx / sr
        locs.append(loc)

    locs = np.array(locs)
    if np.any(locs >= 0):
        minloc = np.min(locs[locs >= 0])
    else:
        minloc = -1
    if minloc < min_noise_dur:
        fmt = 'Noise window (%.1f s) less than minimum (%.1f)'
        tpl = (minloc, min_noise_dur)
        raise GMProcessException(fmt % tpl)
    mean_snr = calc_snr(stream, minloc)

    return (minloc, mean_snr)
Example #11
0
def _get_channel(angle, sampling_rate):
    if angle == 500 or angle == 600 or (angle >= 0 and angle <= 360):
        if angle == 500 or angle == 600:
            channel = get_channel_name(sampling_rate,
                                       is_acceleration=True,
                                       is_vertical=True,
                                       is_north=False)
        elif angle >= 315 or angle < 45 or (angle >= 135 and angle < 225):
            channel = get_channel_name(sampling_rate,
                                       is_acceleration=True,
                                       is_vertical=False,
                                       is_north=True)
        else:
            channel = get_channel_name(sampling_rate,
                                       is_acceleration=True,
                                       is_vertical=False,
                                       is_north=False)
    else:
        errstr = ('Not enough information to distinguish horizontal from '
                  'vertical channels.')
        raise GMProcessException('DMG: ' + errstr)
    return channel
Example #12
0
def _read_volume_two(filename, line_offset, location='', units='acc'):
    """Read channel data from DMG text file.

    Args:
        filename (str): Input DMG V2 filename.
        line_offset (int): Line offset to beginning of channel text block.
        units (str): units to get
    Returns:
        tuple: (list of obspy Trace, int line offset)
    """
    try:
        with open(filename, 'rt') as f:
            for _ in range(line_offset):
                next(f)
            lines = [next(f) for x in range(V2_TEXT_HDR_ROWS)]
    # Accounts for blank lines at end of files
    except StopIteration:
        return (None, 1 + line_offset)

    # read in lines of integer data
    skip_rows = V2_TEXT_HDR_ROWS + line_offset
    int_data = _read_lines(skip_rows, V2_INT_HDR_ROWS, V2_INT_FMT, filename)
    int_data = int_data[0:100].astype(np.int32)

    # read in lines of float data
    skip_rows += V2_INT_HDR_ROWS
    flt_data = _read_lines(skip_rows, V2_REAL_HDR_ROWS, V2_REAL_FMT, filename)
    flt_data = flt_data[:100]
    skip_rows += V2_REAL_HDR_ROWS

    # according to the powers that defined the Network.Station.Channel.Location
    # "standard", Location is a two character field.  Most data providers,
    # including csmip/dmg here, don't always provide this.  We'll flag it as
    # "--".
    hdr = _get_header_info(int_data, flt_data, lines, 'V2', location=location)
    head, tail = os.path.split(filename)
    hdr['standard']['source_file'] = tail or os.path.basename(head)

    traces = []
    # read acceleration data
    if hdr['npts'] > 0:
        acc_rows, acc_fmt, unit = _get_data_format(filename, skip_rows,
                                                   hdr['npts'])
        acc_data = _read_lines(skip_rows + 1, acc_rows, acc_fmt, filename)
        acc_data = acc_data[:hdr['npts']]
        if unit in UNIT_CONVERSIONS:
            acc_data *= UNIT_CONVERSIONS[unit]
            logging.debug('Data converted from %s to cm/s/s' % (unit))
        else:
            raise GMProcessException('DMG: %s is not a supported unit.' % unit)
        acc_trace = StationTrace(acc_data.copy(), Stats(hdr.copy()))

        response = {'input_units': 'counts', 'output_units': 'cm/s^2'}
        acc_trace.setProvenance('remove_response', response)

        if units == 'acc':
            traces += [acc_trace]
        skip_rows += int(acc_rows) + 1

    # -------------------------------------------------------------------------
    # NOTE: The way we were initially reading velocity and displacement data was
    # not correct. I'm deleting it for now since we don't need it. If/when we
    # revisit this we need to be more careful about how this is handled.
    # -------------------------------------------------------------------------

    # read velocity data
    vel_hdr = hdr.copy()
    vel_hdr['standard']['units'] = 'vel'
    vel_hdr['npts'] = int_data[63]
    if vel_hdr['npts'] > 0:
        vel_rows, vel_fmt, unit = _get_data_format(filename, skip_rows,
                                                   vel_hdr['npts'])
        vel_data = _read_lines(skip_rows + 1, vel_rows, vel_fmt, filename)
        vel_data = vel_data[:vel_hdr['npts']]
        skip_rows += int(vel_rows) + 1

    # read displacement data
    disp_hdr = hdr.copy()
    disp_hdr['standard']['units'] = 'disp'
    disp_hdr['npts'] = int_data[65]
    if disp_hdr['npts'] > 0:
        disp_rows, disp_fmt, unit = _get_data_format(filename, skip_rows,
                                                     disp_hdr['npts'])
        disp_data = _read_lines(skip_rows + 1, disp_rows, disp_fmt, filename)
        disp_data = disp_data[:disp_hdr['npts']]
        skip_rows += int(disp_rows) + 1

    new_offset = skip_rows + 1  # there is an 'end of record' line after the data]
    return (traces, new_offset)
Example #13
0
def _read_volume_one(filename, line_offset, location='', units='acc'):
    """Read channel data from DMG Volume 1 text file.

    Args:
        filename (str): Input DMG V1 filename.
        line_offset (int): Line offset to beginning of channel text block.
        units (str): units to get
    Returns:
        tuple: (list of obspy Trace, int line offset)
    """
    # Parse the header portion of the file
    try:
        with open(filename, 'rt') as f:
            for _ in range(line_offset):
                next(f)
            lines = [next(f) for x in range(V1_TEXT_HDR_ROWS)]
    # Accounts for blank lines at end of files
    except StopIteration:
        return (None, 1 + line_offset)

    unit = _get_units(lines[11])
    # read in lines of integer data
    skip_rows = V1_TEXT_HDR_ROWS + line_offset
    int_data = _read_lines(skip_rows, V1_INT_HDR_ROWS, V2_INT_FMT, filename)
    int_data = int_data[0:100].astype(np.int32)

    # read in lines of float data
    skip_rows += V1_INT_HDR_ROWS
    flt_data = _read_lines(skip_rows, V1_REAL_HDR_ROWS, V2_REAL_FMT, filename)
    skip_rows += V1_REAL_HDR_ROWS

    # according to the powers that defined the Network.Station.Channel.Location
    # "standard", Location is a two character field.  Most data providers,
    # including csmip/dmg here, don't always provide this.  We'll flag it as
    # "--".

    hdr = _get_header_info_v1(int_data,
                              flt_data,
                              lines,
                              'V1',
                              location=location)
    head, tail = os.path.split(filename)
    hdr['standard']['source_file'] = tail or os.path.basename(head)

    # sometimes (??) a line of text is inserted in between the float header and
    # the beginning of the data. Let's check for this...
    with open(filename, 'rt') as f:
        for _ in range(skip_rows):
            next(f)
        test_line = f.readline()

    has_text = re.search('[A-Z]+|[a-z]+', test_line) is not None
    if has_text:
        skip_rows += 1
        widths = [9] * 8
        max_rows = int(np.ceil(hdr['npts'] / 8))
        data = _read_lines(skip_rows, max_rows, widths, filename)
        acc_data = data[:hdr['npts']]
        evenly_spaced = True
        # Sometimes, npts is incrrectly specified, leading to nans
        # in the resulting data. Fix that here
        if np.any(np.isnan(acc_data)):
            while np.isnan(acc_data[-1]):
                acc_data = acc_data[:-1]
            hdr['npts'] = len(acc_data)
    else:
        # acceleration data is interleaved between time data
        max_rows = int(np.ceil(hdr['npts'] / 5))
        widths = [7] * 10
        data = _read_lines(skip_rows, max_rows, widths, filename)
        acc_data = data[1::2][:hdr['npts']]
        times = data[0::2][:hdr['npts']]
        evenly_spaced = is_evenly_spaced(times)

    if unit in UNIT_CONVERSIONS:
        acc_data *= UNIT_CONVERSIONS[unit]
        logging.debug('Data converted from %s to cm/s/s' % (unit))
    else:
        raise GMProcessException('DMG: %s is not a supported unit.' % unit)

    acc_trace = StationTrace(acc_data.copy(), Stats(hdr.copy()))

    # Check if the times were included in the file but were not evenly spaced
    if not evenly_spaced:
        acc_trace = resample_uneven_trace(acc_trace, times, acc_data)

    response = {'input_units': 'counts', 'output_units': 'cm/s^2'}
    acc_trace.setProvenance('remove_response', response)

    traces = [acc_trace]
    new_offset = skip_rows + max_rows + 1  # there is an end of record line
    return (traces, new_offset)
Example #14
0
def read_dmg(filename, **kwargs):
    """Read DMG strong motion file.

    Notes:
        CSMIP is synonymous to as DMG in this reader.

    Args:
        filename (str): Path to possible DMG data file.
        kwargs (ref):
            units (str): String determining which timeseries is return. Valid
                    options include 'acc', 'vel', 'disp'. Default is 'acc'.
            Other arguments will be ignored.

    Returns:
        Stream: Obspy Stream containing three channels of acceleration data
        (cm/s**2).
    """
    logging.debug("Starting read_dmg.")
    if not is_dmg(filename):
        raise Exception('%s is not a valid DMG strong motion data file.' %
                        filename)

    # Check for units and location
    units = kwargs.get('units', 'acc')
    location = kwargs.get('location', '')

    if units not in UNITS:
        raise Exception('DMG: Not a valid choice of units.')

    # Check for DMG format and determine volume type
    line = open(filename, 'rt').readline()
    if is_dmg(filename):
        if line.lower().find('uncorrected') >= 0:
            reader = 'V1'
        elif line.lower().find('corrected') >= 0:
            reader = 'V2'
        elif line.lower().find('response') >= 0:
            reader = 'V3'

    # Count the number of lines in the file
    with open(filename) as f:
        line_count = sum(1 for _ in f)

    # Read as many channels as are present in the file
    line_offset = 0
    trace_list = []
    while line_offset < line_count:
        if reader == 'V2':
            traces, line_offset = _read_volume_two(filename,
                                                   line_offset,
                                                   location=location,
                                                   units=units)
            if traces is not None:
                trace_list += traces
        elif reader == 'V1':
            traces, line_offset = _read_volume_one(filename,
                                                   line_offset,
                                                   location=location,
                                                   units=units)
            if traces is not None:
                trace_list += traces
        else:
            raise GMProcessException('DMG: Not a supported volume.')

    stream = StationStream([])
    for trace in trace_list:
        # For our purposes, we only want acceleration, so lets only return
        # that; we may need to change this later if others start using this
        # code and want to read in the other data.
        if trace.stats['standard']['units'] == units:
            stream.append(trace)
    return [stream]
Example #15
0
    def getInventory(self):
        """
        Extract an ObsPy inventory object from a Stream read in by gmprocess
        tools.
        """
        networks = [trace.stats.network for trace in self]
        if len(set(networks)) > 1:
            raise Exception(
                "Input stream has stations from multiple networks.")

        # We'll first create all the various objects. These strongly follow the
        # hierarchy of StationXML files.
        source = ''
        if 'standard' in self[0].stats and 'source' in self[0].stats.standard:
            source = self[0].stats.standard.source
        inv = Inventory(
            # We'll add networks later.
            networks=[],
            # The source should be the id whoever create the file.
            source=source)

        net = Network(
            # This is the network code according to the SEED standard.
            code=networks[0],
            # A list of stations. We'll add one later.
            stations=[],
            description="source",
            # Start-and end dates are optional.
        )
        channels = []
        for trace in self:
            logging.debug('trace: %s' % trace)
            channel = _channel_from_stats(trace.stats)
            channels.append(channel)

        subdict = {}
        for k in UNUSED_STANDARD_PARAMS:
            if k in self[0].stats.standard:
                subdict[k] = self[0].stats.standard[k]

        format_specific = {}
        if 'format_specific' in self[0].stats:
            format_specific = dict(self[0].stats.format_specific)

        big_dict = {'standard': subdict,
                    'format_specific': format_specific}
        try:
            jsonstr = json.dumps(big_dict)
        except Exception as e:
            raise GMProcessException('Exception in json.dumps: %s' % e)
        sta = Station(
            # This is the station code according to the SEED standard.
            code=self[0].stats.station,
            latitude=self[0].stats.coordinates.latitude,
            elevation=self[0].stats.coordinates.elevation,
            longitude=self[0].stats.coordinates.longitude,
            channels=channels,
            site=Site(name=self[0].stats.standard.station_name),
            description=jsonstr,
            creation_date=UTCDateTime(1970, 1, 1),  # this is bogus
            total_number_of_channels=len(self))

        net.stations.append(sta)
        inv.networks.append(net)

        return inv
Example #16
0
def read_smc(filename, **kwargs):
    """Read SMC strong motion file.

    Args:
        filename (str): Path to possible SMC data file.
        kwargs (ref):
            any_structure (bool): Read data from any type of structure,
                raise Exception if False and structure type is not free-field.
            accept_flagged (bool): accept problem flagged data.
            set_location (str): Two character code for location.
            Other arguments will be ignored.
    Returns:
        Stream: Obspy Stream containing one channel of acceleration data
        (cm/s**2).
    """
    logging.debug("Starting read_smc.")
    any_structure = kwargs.get('any_structure', False)
    accept_flagged = kwargs.get('accept_flagged', False)
    location = kwargs.get('location', '')

    if not is_smc(filename):
        raise Exception('%s is not a valid SMC file' % filename)

    with open(filename, 'rt') as f:
        line = f.readline().strip()
        if 'DISPLACEMENT' in line:
            raise GMProcessException(
                'SMC: Diplacement records are not supported: '
                '%s.' % filename)
        elif 'VELOCITY' in line:
            raise GMProcessException(
                'SMC: Velocity records are not supported: '
                '%s.' % filename)
        elif line == "*":
            raise GMProcessException(
                'SMC: No record volume specified in file: '
                '%s.' % filename)

    stats, num_comments = _get_header_info(filename,
                                           any_structure=any_structure,
                                           accept_flagged=accept_flagged,
                                           location=location)

    skip = ASCII_HEADER_LINES + INTEGER_HEADER_LINES + \
        num_comments + FLOAT_HEADER_LINES

    # read float data (8 columns per line)
    nrows = int(np.floor(stats['npts'] / DATA_COLUMNS))
    data = np.genfromtxt(filename,
                         max_rows=nrows,
                         skip_header=skip,
                         delimiter=FLOAT_DATA_WIDTHS)
    data = data.flatten()
    if stats['npts'] % DATA_COLUMNS:
        lastrow = np.genfromtxt(filename,
                                max_rows=1,
                                skip_header=skip + nrows,
                                delimiter=FLOAT_DATA_WIDTHS)
        data = np.append(data, lastrow)
    data = data[0:stats['npts']]
    trace = StationTrace(data, header=stats)

    response = {'input_units': 'counts', 'output_units': 'cm/s^2'}
    trace.setProvenance('remove_response', response)

    stream = StationStream(traces=[trace])
    return [stream]
Example #17
0
def _read_channel(filename, line_offset, location=''):
    """Read channel data from COSMOS V1/V2 text file.

    Args:
        filename (str): Input COSMOS V1/V2 filename.
        line_offset (int): Line offset to beginning of channel text block.

    Returns:
        tuple: (obspy Trace, int line offset)
    """
    # read station, location, and process level from text header
    with open(filename, 'rt') as f:
        for _ in range(line_offset):
            next(f)
        lines = [next(f) for x in range(TEXT_HDR_ROWS)]

    # read in lines of integer data
    skiprows = line_offset + TEXT_HDR_ROWS
    int_lines, int_data = _read_lines(skiprows, filename)
    int_data = int_data.astype(np.int32)

    # read in lines of float data
    skiprows += int_lines + 1
    flt_lines, flt_data = _read_lines(skiprows, filename)

    # read in comment lines
    skiprows += flt_lines + 1
    cmt_lines, cmt_data = _read_lines(skiprows, filename)
    skiprows += cmt_lines + 1

    # according to the powers that defined the Network.Station.Channel.Location
    # "standard", Location is a two character field.  Most data providers,
    # including cosmos here, don't provide this.  We'll flag it as "--".
    hdr = _get_header_info(int_data, flt_data, lines,
                           cmt_data, location=location)
    head, tail = os.path.split(filename)
    hdr['standard']['source_file'] = tail or os.path.basename(head)

    # read in the data
    nrows, data = _read_lines(skiprows, filename)

    # Check for "off-by-one" problem that sometimes occurs with cosmos data
    # Notes:
    #     - We cannot do this check inside _get_header_info because we don't
    #       have the data there.
    #     - That method is written to set npts from the header as documented in
    #       the spec ("lenght" == npts*dt) but it appears that sometimes a
    #       different convention is used where the "length" of the record is
    #       actually is actuation (npts-1)*dt. In this case, we need to
    #       recompute duration and npts
    if hdr['npts'] == (len(data) - 1):
        hdr['npts'] = len(data)
        hdr['duration'] = (hdr['npts'] - 1) * hdr['delta']

    # check units
    unit = hdr['format_specific']['physical_units']
    if unit in UNIT_CONVERSIONS:
        data *= UNIT_CONVERSIONS[unit]
        logging.debug('Data converted from %s to cm/s/s' % (unit))
    else:
        if unit != 'counts':
            raise GMProcessException(
                'COSMOS: %s is not a supported unit.' % unit)

    if hdr['standard']['units'] != 'acc':
        raise GMProcessException('COSMOS: Only acceleration data accepted.')

    trace = StationTrace(data.copy(), Stats(hdr.copy()))

    # record that this data has been converted to g, if it has
    if hdr['standard']['process_level'] != PROCESS_LEVELS['V0']:
        response = {'input_units': 'counts', 'output_units': 'cm/s^2'}
        trace.setProvenance('remove_response', response)

    # set new offset
    new_offset = skiprows + nrows
    new_offset += 1  # there is an 'end of record' line after the data

    return (trace, new_offset)
Example #18
0
    def addStreams(self, event, streams, label=None):
        """Add a sequence of StationStream objects to an ASDF file.

        Args:
            event (Event):
                Obspy event object.
            streams (list):
                List of StationStream objects.
            label (str):
                Label to attach to stream sequence. Cannot contain an
                underscore.
        """
        if label is not None:
            if '_' in label:
                raise GMProcessException(
                    'Stream label cannot contain an underscore.')

        # To allow for multiple processed versions of the same Stream
        # let's keep a dictionary of stations and sequence number.
        eventid = _get_id(event)
        if not self.hasEvent(eventid):
            self.addEvent(event)
        station_dict = {}
        for stream in streams:
            station = stream[0].stats['station']
            logging.info('Adding waveforms for station %s' % station)
            # is this a raw file? Check the trace for provenance info.
            is_raw = not len(stream[0].getProvenanceKeys())

            if label is not None:
                tag = '%s_%s_%s' % (eventid, station.lower(), label)
            else:
                if station.lower() in station_dict:
                    station_sequence = station_dict[station.lower()] + 1
                else:
                    station_sequence = 1
                station_dict[station.lower()] = station_sequence
                tag = '%s_%s_%05i' % (
                    eventid, station.lower(), station_sequence)
            if is_raw:
                level = 'raw'
            else:
                level = 'processed'
            self.dataset.add_waveforms(stream, tag=tag, event_id=event)

            # add processing provenance info from traces
            if level == 'processed':
                provdocs = stream.getProvenanceDocuments()
                for provdoc, trace in zip(provdocs, stream):
                    tpl = (trace.stats.network.lower(),
                           trace.stats.station.lower(),
                           trace.stats.channel.lower())
                    channel = '%s_%s_%s' % tpl
                    channel_tag = '%s_%s' % (tag, channel)
                    self.dataset.add_provenance_document(
                        provdoc,
                        name=channel_tag
                    )

            # add processing parameters from streams
            jdict = {}
            for key in stream.getStreamParamKeys():
                value = stream.getStreamParam(key)
                jdict[key] = value

            if len(jdict):
                # NOTE: We would store this dictionary just as
                # the parameters dictionary, but HDF cannot handle
                # nested dictionaries.
                # Also, this seems like a lot of effort
                # just to store a string in HDF, but other
                # approached failed. Suggestions are welcome.
                jdict = _stringify_dict(jdict)
                jsonbytes = json.dumps(jdict).encode('utf-8')
                jsonarray = np.frombuffer(jsonbytes, dtype=np.uint8)
                dtype = 'StreamProcessingParameters'
                self.dataset.add_auxiliary_data(
                    jsonarray,
                    data_type=dtype,
                    path=tag,
                    parameters={}
                )

            # add processing parameters from traces
            for trace in stream:
                path = '%s_%s' % (tag, trace.stats.channel)
                jdict = {}
                for key in trace.getParameterKeys():
                    value = trace.getParameter(key)
                    jdict[key] = value
                if len(jdict):
                    # NOTE: We would store this dictionary just as
                    # the parameters dictionary, but HDF cannot handle
                    # nested dictionaries.
                    # Also, this seems like a lot of effort
                    # just to store a string in HDF, but other
                    # approached failed. Suggestions are welcome.
                    jdict = _stringify_dict(jdict)
                    jsonbytes = json.dumps(jdict).encode('utf-8')
                    jsonarray = np.frombuffer(jsonbytes, dtype=np.uint8)
                    dtype = 'TraceProcessingParameters'
                    self.dataset.add_auxiliary_data(
                        jsonarray,
                        data_type=dtype,
                        path=path,
                        parameters={}
                    )
            inventory = stream.getInventory()
            self.dataset.add_stationxml(inventory)
Example #19
0
def _get_header_info(int_data, flt_data, lines, cmt_data, location=''):
    """Return stats structure from various headers.

    Output is a dictionary like this:
     - network (str): Default is '--'. Determined using COSMOS_NETWORKS
     - station (str)
     - channel (str): Determined using COSMOS_ORIENTATIONS
     - location (str): Set to location index of sensor site at station.
            If not a multi-site array, default is '--'.
     - starttime (datetime)
     - duration (float)
     - sampling_rate (float)
     - delta (float)
     - npts (int)
     - coordinates:
       - latitude (float)
       - longitude (float)
       - elevation (float)
    - standard (Defaults are either np.nan or '')
      - horizontal_orientation (float): Rotation from north (degrees)
      - instrument_period (float): Period of sensor (Hz)
      - instrument_damping (float): Fraction of critical
      - process_time (datetime): Reported date of processing
      - process_level: Either 'V0', 'V1', 'V2', or 'V3'
      - station_name (str): Long form station description
      - sensor_serial_number (str): Reported sensor serial
      - instrument (str): See SENSOR_TYPES
      - comments (str): Processing comments
      - structure_type (str): See BUILDING_TYPES
      - corner_frequency (float): Sensor corner frequency (Hz)
      - units (str): See UNITS
      - source (str): Network source description
      - source_format (str): Always cosmos
    - format_specific
      - physical_units (str): See PHYSICAL_UNITS
      - v30 (float): Site geology V30 (km/s)
      - least_significant_bit: Recorder LSB in micro-volts (uv/count)
      - low_filter_type (str): Filter used for low frequency
            V2 filtering (see FILTERS)
      - low_filter_corner (float): Filter corner for low frequency
            V2 filtering (Hz)
      - low_filter_decay (float): Filter decay for low frequency
            V2 filtering (dB/octabe)
      - high_filter_type (str): Filter used for high frequency
            V2 filtering (see FILTERS)
      - high_filter_corner (float): Filter corner for high frequency
            V2 filtering (Hz)
      - high_filter_decay (float): Filter decay for high frequency
            V2 filtering (dB/octabe)
      - maximum (float): Maximum value
      - maximum_time (float): Time at which maximum occurs
      - station_code (int): Code for structure_type
      - record_flag (str): Either 'No problem', 'Fixed', 'Unfixed problem'.
            Should be described in more depth in comments.
      - scaling_factor (float): Scaling used for converting acceleration
            from g/10 to cm/s/s
      - sensor_sensitivity (float): Sensitvity in volts/g

    Args:
        int_data (ndarray): Array of integer data
        flt_data (ndarray): Array of float data
        lines (list): List of text headers (str)
        cmt_data (ndarray): Array of comments (str)

    Returns:
        dictionary: Dictionary of header/metadata information
    """
    hdr = {}
    coordinates = {}
    standard = {}
    format_specific = {}
    # Get unknown parameter number
    try:
        unknown = int(lines[12][64:71])
    except ValueError:
        unknown = -999
    # required metadata
    network_num = int(int_data[10])
    # Get network from cosmos table or fdsn code sheet
    if network_num in COSMOS_NETWORKS:
        network = COSMOS_NETWORKS[network_num][0]
        source = COSMOS_NETWORKS[network_num][1]
        if network == '':
            network = COSMOS_NETWORKS[network_num][2]
    else:
        network_code = lines[4][25:27].upper()
        if network_code in CODES:
            network = network_code
            idx = np.argwhere(CODES == network_code)[0][0]
            source = SOURCES1[idx].decode(
                'utf-8') + ', ' + SOURCES2[idx].decode('utf-8')
        else:
            network = 'ZZ'
            source = ''
    hdr['network'] = network
    logging.debug('network: %s' % network)
    hdr['station'] = lines[4][28:34].strip()
    logging.debug('station: %s' % hdr['station'])
    horizontal_angle = int(int_data[53])
    logging.debug('horizontal_angle: %s' % horizontal_angle)
    if horizontal_angle not in VALID_AZIMUTH_INTS:
        logging.warning("Horizontal_angle in COSMOS header is not valid.")
    horizontal_angle = float(horizontal_angle)

    # Store delta and duration. Use them to calculate npts and sampling_rate

    # NOTE: flt_data[33] is the delta of the V0 format, and if we are reading
    # a V1 or V2 format then it may have been resampled. We should consider
    # adding flt_data[33] delta to the provenance record at some point.

    delta = float(flt_data[61]) * MSEC_TO_SEC
    if delta != unknown:
        hdr['delta'] = delta
        hdr['sampling_rate'] = 1 / delta

    # Determine the angle based upon the cosmos table
    # Set horizontal angles other than N,S,E,W to H1 and H2
    # Missing angle results in the channel number
    if horizontal_angle != unknown:
        if horizontal_angle in COSMOS_ORIENTATIONS:
            channel = COSMOS_ORIENTATIONS[horizontal_angle][1].upper()
            if channel == 'UP' or channel == 'DOWN' or channel == 'VERT':
                channel = get_channel_name(hdr['sampling_rate'],
                                           is_acceleration=True,
                                           is_vertical=True,
                                           is_north=False)
        elif horizontal_angle >= 0 and horizontal_angle <= 360:
            if (horizontal_angle > 315 or horizontal_angle < 45
                    or (horizontal_angle > 135 and horizontal_angle < 225)):
                channel = get_channel_name(hdr['sampling_rate'],
                                           is_acceleration=True,
                                           is_vertical=False,
                                           is_north=True)
            else:
                channel = get_channel_name(hdr['sampling_rate'],
                                           is_acceleration=True,
                                           is_vertical=False,
                                           is_north=False)
        horizontal_orientation = horizontal_angle
    else:
        errstr = ('Not enough information to distinguish horizontal from '
                  'vertical channels.')
        raise GMProcessException('COSMOS: ' + errstr)
    hdr['channel'] = channel
    logging.debug('channel: %s' % hdr['channel'])
    if location == '':
        location = int(int_data[55])
        location = str(_check_assign(location, unknown, '--'))
        if len(location) < 2:
            location = location.zfill(2)
        hdr['location'] = location
    else:
        hdr['location'] = location
    year = int(int_data[39])
    month = int(int_data[41])
    day = int(int_data[42])
    hour = int(int_data[43])
    minute = int(int_data[44])
    second = float(flt_data[29])
    # If anything more than seconds is excluded
    # It is considered inadequate time information
    if second == unknown:
        try:
            hdr['starttime'] = datetime(year, month, day, hour, minute)
        except Exception:
            raise GMProcessException(
                'COSMOS: Inadequate start time information.')
    else:
        second = second
        microsecond = int((second - int(second)) * 1e6)
        try:
            hdr['starttime'] = datetime(year, month, day, hour, minute,
                                        int(second), microsecond)
        except Exception:
            raise GMProcessException(
                'COSMOS: Inadequate start time information.')

    if flt_data[62] != unknown:
        # COSMOS **defines** "length" as npts*dt (note this is a bit unusual)
        cosmos_length = flt_data[62]
        npts = int(cosmos_length / delta)
        hdr['duration'] = (npts - 1) * delta
        hdr['npts'] = npts
    else:
        raise ValueError('COSMOS file does not specify length.')

    # coordinate information
    coordinates['latitude'] = float(flt_data[0])
    coordinates['longitude'] = float(flt_data[1])
    coordinates['elevation'] = float(flt_data[2])
    for key in coordinates:
        if coordinates[key] == unknown:
            warnings.warn('Missing %r. Setting to np.nan.' % key, Warning)
            coordinates[key] = np.nan
    hdr['coordinates'] = coordinates

    # standard metadata
    standard['source'] = source
    standard['horizontal_orientation'] = horizontal_orientation
    station_name = lines[4][40:-1].strip()
    standard['station_name'] = station_name
    instrument_frequency = float(flt_data[39])
    standard['instrument_period'] = 1.0 / _check_assign(
        instrument_frequency, unknown, np.nan)
    instrument_damping = float(flt_data[40])
    standard['instrument_damping'] = _check_assign(instrument_damping, unknown,
                                                   np.nan)
    process_line = lines[10][10:40]
    if process_line.find('-') >= 0 or process_line.find('/') >= 0:
        if process_line.find('-') >= 0:
            delimeter = '-'
        elif process_line.find('/') >= 0:
            delimeter = '/'
        try:
            date = process_line.split(delimeter)
            month = int(date[0][-2:])
            day = int(date[1])
            year = int(date[2][:4])
            time = process_line.split(':')
            hour = int(time[0][-2:])
            minute = int(time[1])
            second = float(time[2][:2])
            microsecond = int((second - int(second)) * 1e6)
            etime = datetime(year, month, day, hour, minute, int(second),
                             microsecond)
            standard['process_time'] = etime.strftime(TIMEFMT)
        except Exception:
            standard['process_time'] = ''
    else:
        standard['process_time'] = ''
    process_level = int(int_data[0])
    if process_level == 0:
        standard['process_level'] = PROCESS_LEVELS['V0']
    elif process_level == 1:
        standard['process_level'] = PROCESS_LEVELS['V1']
    elif process_level == 2:
        standard['process_level'] = PROCESS_LEVELS['V2']
    elif process_level == 3:
        standard['process_level'] = PROCESS_LEVELS['V3']
    else:
        standard['process_level'] = PROCESS_LEVELS['V1']
    logging.debug("process_level: %s" % process_level)
    serial = int(int_data[52])
    if serial != unknown:
        standard['sensor_serial_number'] = str(
            _check_assign(serial, unknown, ''))
    else:
        standard['sensor_serial_number'] = ''
    instrument = int(int_data[51])
    if instrument != unknown and instrument in SENSOR_TYPES:
        standard['instrument'] = SENSOR_TYPES[instrument]
    else:
        standard['instrument'] = lines[6][57:-1].strip()
    structure_type = int(int_data[18])
    if structure_type != unknown and structure_type in BUILDING_TYPES:
        standard['structure_type'] = BUILDING_TYPES[structure_type]
    else:
        standard['structure_type'] = ''
    frequency = float(flt_data[25])
    standard['corner_frequency'] = _check_assign(frequency, unknown, np.nan)
    physical_parameter = int(int_data[2])
    units = int(int_data[1])
    if units != unknown and units in UNITS:
        standard['units'] = UNITS[units]
    else:
        if physical_parameter in [2, 4, 7, 10, 11, 12, 23]:
            standard['units'] = 'acc'
        elif physical_parameter in [5, 8, 24]:
            standard['units'] = 'vel'
        elif physical_parameter in [6, 9, 25]:
            standard['units'] = 'disp'
    standard['source_format'] = 'cosmos'
    standard['comments'] = ', '.join(cmt_data)

    # format specific metadata
    if physical_parameter in PHYSICAL_UNITS:
        physical_parameter = PHYSICAL_UNITS[physical_parameter][0]
    format_specific['physical_units'] = physical_parameter
    v30 = float(flt_data[3])
    format_specific['v30'] = _check_assign(v30, unknown, np.nan)
    least_significant_bit = float(flt_data[21])
    format_specific['least_significant_bit'] = _check_assign(
        least_significant_bit, unknown, np.nan)
    low_filter_type = int(int_data[60])
    if low_filter_type in FILTERS:
        format_specific['low_filter_type'] = FILTERS[low_filter_type]
    else:
        format_specific['low_filter_type'] = ''
    low_filter_corner = float(flt_data[53])
    format_specific['low_filter_corner'] = _check_assign(
        low_filter_corner, unknown, np.nan)
    low_filter_decay = float(flt_data[54])
    format_specific['low_filter_decay'] = _check_assign(
        low_filter_decay, unknown, np.nan)
    high_filter_type = int(int_data[61])
    if high_filter_type in FILTERS:
        format_specific['high_filter_type'] = FILTERS[high_filter_type]
    else:
        format_specific['high_filter_type'] = ''
    high_filter_corner = float(flt_data[56])
    format_specific['high_filter_corner'] = _check_assign(
        high_filter_corner, unknown, np.nan)
    high_filter_decay = float(flt_data[57])
    format_specific['high_filter_decay'] = _check_assign(
        high_filter_decay, unknown, np.nan)
    maximum = float(flt_data[63])
    format_specific['maximum'] = _check_assign(maximum, unknown, np.nan)
    maximum_time = float(flt_data[64])
    format_specific['maximum_time'] = _check_assign(maximum_time, unknown,
                                                    np.nan)
    format_specific['station_code'] = _check_assign(structure_type, unknown,
                                                    np.nan)
    record_flag = int(int_data[75])
    if record_flag == 0:
        format_specific['record_flag'] = 'No problem'
    elif record_flag == 1:
        format_specific['record_flag'] = 'Fixed'
    elif record_flag == 2:
        format_specific['record_flag'] = 'Unfixed problem'
    else:
        format_specific['record_flag'] = ''
    scaling_factor = float(flt_data[87])
    format_specific['scaling_factor'] = _check_assign(scaling_factor, unknown,
                                                      np.nan)
    scaling_factor = float(flt_data[41])
    format_specific['sensor_sensitivity'] = _check_assign(
        scaling_factor, unknown, np.nan)
    # Set dictionary
    hdr['standard'] = standard
    hdr['coordinates'] = coordinates
    hdr['format_specific'] = format_specific
    return hdr
Example #20
0
def _read_channel(filename, line_offset, location=''):
    """Read channel data from COSMOS V1/V2 text file.

    Args:
        filename (str): Input COSMOS V1/V2 filename.
        line_offset (int): Line offset to beginning of channel text block.

    Returns:
        tuple: (obspy Trace, int line offset)
    """
    # read station, location, and process level from text header
    with open(filename, 'rt') as f:
        for _ in range(line_offset):
            next(f)
        lines = [next(f) for x in range(TEXT_HDR_ROWS)]

    # read in lines of integer data
    skiprows = line_offset + TEXT_HDR_ROWS
    int_lines, int_data = _read_lines(skiprows, filename)
    int_data = int_data.astype(np.int32)

    # read in lines of float data
    skiprows += int_lines + 1
    flt_lines, flt_data = _read_lines(skiprows, filename)

    # read in comment lines
    skiprows += flt_lines + 1
    cmt_lines, cmt_data = _read_lines(skiprows, filename)
    skiprows += cmt_lines + 1

    # according to the powers that defined the Network.Station.Channel.Location
    # "standard", Location is a two character field.  Most data providers,
    # including cosmos here, don't provide this.  We'll flag it as "--".
    hdr = _get_header_info(int_data,
                           flt_data,
                           lines,
                           cmt_data,
                           location=location)
    head, tail = os.path.split(filename)
    hdr['standard']['source_file'] = tail or os.path.basename(head)

    # read in the data
    nrows, data = _read_lines(skiprows, filename)
    # check units
    unit = hdr['format_specific']['physical_units']
    if unit in UNIT_CONVERSIONS:
        data *= UNIT_CONVERSIONS[unit]
        logging.debug('Data converted from %s to cm/s/s' % (unit))
    else:
        raise GMProcessException('COSMOS: %s is not a supported unit.' % unit)

    if hdr['standard']['units'] != 'acc':
        raise GMProcessException('COSMOS: Only acceleration data accepted.')

    trace = StationTrace(data.copy(), Stats(hdr.copy()))

    # record that this data has been converted to g, if it has
    if hdr['standard']['process_level'] != PROCESS_LEVELS['V0']:
        response = {'input_units': 'counts', 'output_units': 'cm/s^2'}
        trace.setProvenance('remove_response', response)

    # set new offset
    new_offset = skiprows + nrows
    new_offset += 1  # there is an 'end of record' line after the data

    return (trace, new_offset)
Example #21
0
def _read_channel(filename, line_offset, volume, location='', alternate=False):
    """Read channel data from USC V1 text file.

    Args:
        filename (str): Input USC V1 filename.
        line_offset (int): Line offset to beginning of channel text block.
        volume (dictionary): Dictionary of formatting information
    Returns:
        tuple: (obspy Trace, int line offset)
    """
    if alternate:
        int_rows = 5
        int_fmt = 20 * [4]
        data_cols = 8
    else:
        int_rows = volume['INT_HDR_ROWS']
        int_fmt = volume['INT_FMT']
        data_cols = 10
    # Parse the header portion of the file
    try:
        with open(filename, 'rt') as f:
            for _ in range(line_offset):
                next(f)
            lines = [next(f) for x in range(volume['TEXT_HDR_ROWS'])]
    # Accounts for blank lines at end of files
    except StopIteration:
        return (None, 1 + line_offset)
    # read in lines of integer data
    skiprows = line_offset + volume['TEXT_HDR_ROWS']
    int_data = np.genfromtxt(filename, skip_header=skiprows,
                             max_rows=int_rows, dtype=np.int32,
                             delimiter=int_fmt).flatten()

    # read in lines of float data
    skiprows += int_rows
    flt_data = np.genfromtxt(filename, skip_header=skiprows,
                             max_rows=volume['FLT_HDR_ROWS'], dtype=np.float64,
                             delimiter=volume['FLT_FMT']).flatten()
    hdr = _get_header_info(int_data, flt_data, lines, 'V1', location=location)
    skiprows += volume['FLT_HDR_ROWS']
    # read in the data
    nrows = int(np.floor(hdr['npts'] * 2 / data_cols))
    all_data = np.genfromtxt(filename, skip_header=skiprows,
                             max_rows=nrows, dtype=np.float64,
                             delimiter=volume['COL_FMT'])
    data = all_data.flatten()[1::2]
    times = all_data.flatten()[0::2]

    frac = hdr['format_specific']['fractional_unit']
    if frac > 0:
        data *= UNIT_CONVERSIONS['g'] * frac
        logging.debug('Data converted from g * %s to cm/s/s' % (frac))
    else:
        unit = _get_units(lines[11])
        if unit in UNIT_CONVERSIONS:
            data *= UNIT_CONVERSIONS[unit]
            logging.debug('Data converted from %s to cm/s/s' % (unit))
        else:
            raise GMProcessException('USC: %s is not a supported unit.' % unit)

    # Put file name into dictionary
    head, tail = os.path.split(filename)
    hdr['standard']['source_file'] = tail or os.path.basename(head)

    trace = StationTrace(data.copy(), Stats(hdr.copy()))
    if not is_evenly_spaced(times):
        trace = resample_uneven_trace(trace, times, data)

    response = {'input_units': 'counts', 'output_units': 'cm/s^2'}
    trace.setProvenance('remove_response', response)

    # set new offset
    new_offset = skiprows + nrows
    new_offset += 1  # there is an 'end of record' line after the data

    return (trace, new_offset)
Example #22
0
def group_channels(streams):
    """Consolidate streams for the same event.

    Checks to see if there are channels for one station in different
    streams, and groups them into one stream. Then streams are checked for
    duplicate channels (traces).

    Args:
        streams (list): List of Stream objects.

    Returns:
        list: List of Stream objects.
    """
    # Return the original stream if there is only one
    if len(streams) <= 1:
        return streams

    # Get the all traces
    trace_list = []
    for stream in streams:
        for trace in stream:
            if trace.stats.network == '' or str(trace.stats.network) == 'nan':
                trace.stats.network = 'ZZ'
            if str(trace.stats.location) == 'nan':
                trace.stats.location = ''
            if trace.stats.location == '' or str(
                    trace.stats.location) == 'nan':
                trace.stats.location = '--'
            trace_list += [trace]

    # Create a list of duplicate traces and event matches
    duplicate_list = []
    match_list = []
    for idx1, trace1 in enumerate(trace_list):
        matches = []
        network = trace1.stats['network']
        station = trace1.stats['station']
        starttime = trace1.stats['starttime']
        endtime = trace1.stats['endtime']
        channel = trace1.stats['channel']
        location = trace1.stats['location']
        if 'units' in trace1.stats.standard:
            units = trace1.stats.standard['units']
        else:
            units = ''
        if 'process_level' in trace1.stats.standard:
            process_level = trace1.stats.standard['process_level']
        else:
            process_level = ''
        data = np.asarray(trace1.data)
        for idx2, trace2 in enumerate(trace_list):
            if idx1 != idx2 and idx1 not in duplicate_list:
                event_match = False
                duplicate = False
                if data.shape == trace2.data.shape:
                    try:
                        same_data = ((data == np.asarray(trace2.data)).all())
                    except AttributeError:
                        same_data = (data == np.asarray(trace2.data))
                else:
                    same_data = False
                if 'units' in trace2.stats.standard:
                    units2 = trace2.stats.standard['units']
                else:
                    units2 = ''
                if 'process_level' in trace2.stats.standard:
                    process_level2 = trace2.stats.standard['process_level']
                else:
                    process_level2 = ''
                if (network == trace2.stats['network']
                        and station == trace2.stats['station']
                        and starttime == trace2.stats['starttime']
                        and endtime == trace2.stats['endtime']
                        and channel == trace2.stats['channel']
                        and location == trace2.stats['location']
                        and units == units2 and process_level == process_level2
                        and same_data):
                    duplicate = True
                elif (network == trace2.stats['network']
                      and station == trace2.stats['station']
                      and starttime == trace2.stats['starttime']
                      and location == trace2.stats['location']
                      and units == units2 and process_level == process_level2):
                    event_match = True
                if duplicate:
                    duplicate_list += [idx2]
                if event_match:
                    matches += [idx2]
        match_list += [matches]

    # Create an updated list of streams
    streams = []
    for idx, matches in enumerate(match_list):
        stream = Stream()
        grouped = False
        for match_idx in matches:
            if match_idx not in duplicate_list:
                if idx not in duplicate_list:
                    stream.append(trace_list[match_idx])
                    duplicate_list += [match_idx]
                    grouped = True
        if grouped:
            stream.append(trace_list[idx])
            duplicate_list += [idx]
            streams += [stream]

    # Check for ungrouped traces
    for idx, trace in enumerate(trace_list):
        if idx not in duplicate_list:
            stream = Stream()
            streams += [stream.append(trace)]
            logging.warning('One channel stream:\n%s' % (stream))

    # Check for streams with more than three channels
    for stream in streams:
        if len(stream) > 3:
            raise GMProcessException('Stream with more than 3 channels:\n%s.' %
                                     (stream))

    return streams
Example #23
0
def get_records(
    output,
    email,
    unpack=False,
    process_level='raw',
    group_by='event',
    minpga=None,
    maxpga=None,
    min_station_dist=None,
    max_station_dist=None,
    network=None,
    station_type='Ground',
    include_inactive=False,
    station_name=None,
    min_station_latitude=None,
    max_station_latitude=None,
    min_station_longitude=None,
    max_station_longitude=None,
    station_latitude=None,
    station_longitude=None,
    radius_km=None,
    station_code=None,
    event_name=None,
    minmag=None,
    maxmag=None,
    fault_type=None,
    startdate=None,
    enddate=None,
    min_event_latitude=None,
    max_event_latitude=None,
    min_event_longitude=None,
    max_event_longitude=None,
    event_latitude=None,
    event_longitude=None,
    event_radius=None,
    eventid=None,
):
    """Retrieve strong motion waveform records from CESMD website.

    Args:
        output (str): Filename or directory where downloaded zip data will be written.
        unpack (bool): If True, all zipped files will be unpacked (output will become a directory name.)
        email (str): Email address of requesting user.
        process_level (str): One of 'raw','processed','plots'.
        group_by (str): One of 'event', 'station'
        minpga (float): Minimum PGA value.
        maxpga (float): Maximum PGA value.
        min_station_dist (float): Minimum station distance from epicenter.
        max_station_dist (float): Maximum station distance from epicenter.
        network (str): Source network of strong motion data.
        station_type (str): Type of strong motion station (array, dam, etc.)
        include_inactive (bool): Include results from stations that are no longer active.
        station_name (str): Search only for station matching input name.
        min_station_latitude (float): Latitude station min when using a box search.
        max_station_latitude (float): Latitude station max when using a box search.
        min_station_longitude (float): Longitude station min when using a box search.
        max_station_longitude (float): Longitude station max when using a box search.
        station_latitude (float): Center latitude for station search. 
        station_longitude (float): Center longitude for station search.
        radius_km (float): Radius (km) for station search.
        station_code (str): Particular station code to search for.
        event_name (str): Earthquake name to search for.
        minmag (float): Magnitude minimum when using a magnitude search.
        maxmag (float): Magnitude maximum when using a magnitude search.
        fault_type (str): Fault type.
        start_date (str): Start date/time in YYYY-MM-DD HH:MM:SS format
        end_date (str): End date/time in YYYY-MM-DD HH:MM:SS format
        min_event_latitude (float): Latitude event min when using a box search.
        max_event_latitude (float): Latitude event max when using a box search.
        min_event_longitude (float): Longitude event min when using a box search.
        max_event_longitude (float): Longitude event max when using a box search.
        event_latitude (float): Center earthquake latitude for radius search.
        event_longitude (float): Center earthquake longitude for radius search.
        event_radius (float): Earthquake search radius (km).
        eventid (str): NEIC or other ANSS event ID.
    Returns:
        tuple: (Top level output directory, list of data files)

    """
    # getting the inputargs must be the first line of the method!
    inputargs = locals().copy()
    del inputargs['output']
    del inputargs['unpack']

    # note: this only supports one of the options or all of them,
    # no other combinations. ??
    if process_level not in PROCESS_LEVELS:
        fmt = 'Only process levels of %s are supported (%s was input)'
        tpl = (','.join(PROCESS_LEVELS), process_level)
        raise KeyError(fmt % tpl)

    if group_by not in GROUP_OPTIONS:
        fmt = 'Only process levels of %s are supported (%s was input)'
        tpl = (','.join(GROUP_OPTIONS), group_by)
        raise KeyError(fmt % tpl)

    # determine which network user wanted
    if network is not None and network not in NETWORKS:
        fmt = 'Network with ID %s not found in list of supported networks.'
        tpl = network
        raise KeyError(fmt % tpl)

    if station_type is not None and station_type not in STATION_TYPES:
        fmt = 'Station type %s not found in list of supported types.'
        tpl = station_type
        raise KeyError(fmt % tpl)

    # convert 'Ground' to 'G' for example
    inputargs['station_type'] = STATION_TYPES[inputargs['station_type']]

    # check against list of fault types
    if fault_type is not None and fault_type not in FAULT_TYPES:
        fmt = 'Fault type %s not found in supported fault types %s.'
        tpl = (fault_type, ','.join(FAULT_TYPES))
        raise KeyError(fmt % tpl)

    # make sure there is only one method being used to select station geographically
    if min_station_latitude is not None and station_latitude is not None:
        raise Exception(
            'Select stations either by bounding box or by radius, not both.')

    # make sure there is only one method being used to select events geographically
    if min_event_latitude is not None and event_latitude is not None:
        raise Exception(
            'Select events either by bounding box or by radius, not both.')

    # now convert process levels to string webservice expects
    levels = {'processed': 'P', 'raw': 'R', 'plots': 'T', 'all': 'P,R,T'}
    inputargs['process_level'] = levels[process_level]

    # now convert input args to keys of parameters expected by
    params = {}
    for key, value in inputargs.items():
        if key in KEY_TABLE:
            params[KEY_TABLE[key]] = value
        else:
            params[key] = value

    # convert all booleans to strings that are 'true' and 'false'
    for key, value in params.items():
        if isinstance(value, bool):
            if value:
                params[key] = 'true'
            else:
                params[key] = 'false'

    # add in a couple of parameters that seem to be required
    params['orderby'] = 'epidist-asc'
    params['nodata'] = '404'
    params['rettype'] = 'dataset'

    session = Session()
    request = Request('GET', URL_TEMPLATE, params=params).prepare()
    url = request.url
    response = session.get(request.url)

    if not response.status_code == 200:
        fmt = 'Your url "%s" returned a status code of %i with message: "%s"'
        raise GMProcessException(fmt %
                                 (url, response.status_code, response.reason))

    if unpack:
        if not os.path.exists(output):
            os.makedirs(output)
        fbytes = io.BytesIO(response.content)
        myzip = zipfile.ZipFile(fbytes, mode='r')
        members = myzip.namelist()
        for member in members:
            finfo = myzip.getinfo(member)
            if finfo.is_dir():
                continue
            if not member.lower().endswith('.zip'):
                fin = myzip.open(member)
                flatfile = member.replace('/', '_')
                outfile = os.path.join(output, flatfile)
                with open(outfile, 'wb') as fout:
                    fout.write(fin.read())
                fin.close()
            else:
                zfiledata = io.BytesIO(myzip.read(member))
                try:
                    tmpzip = zipfile.ZipFile(zfiledata, mode='r')
                    tmp_members = tmpzip.namelist()
                    for tmp_member in tmp_members:
                        tfinfo = tmpzip.getinfo(tmp_member)
                        if not tfinfo.is_dir():
                            fin = tmpzip.open(tmp_member)
                            flatfile = tmp_member.replace('/', '_')
                            parent, _ = os.path.splitext(member)
                            parent = parent.replace('/', '_')
                            # sometimes the member ends with .zip.zip (??)
                            parent = parent.replace('.zip', '')
                            datadir = os.path.join(output, parent)
                            if not os.path.exists(datadir):
                                os.makedirs(datadir)
                            outfile = os.path.join(datadir, flatfile)
                            with open(outfile, 'wb') as fout:
                                fout.write(fin.read())
                            fin.close()
                    tmpzip.close()
                    zfiledata.close()
                except Exception as e:
                    fmt = (
                        'Could not unpack sub-zip file "%s" due to error "%s". '
                        'Skipping.')
                    print(fmt % (member, str(e)))
                    continue

        myzip.close()

        datafiles = []
        for root, fdir, files in os.walk(output):
            for tfile in files:
                if not tfile.endswith('.json'):
                    datafile = os.path.join(root, tfile)
                    datafiles.append(datafile)

        return (os.path.abspath(output), datafiles)
    else:
        if not output.endswith('.zip'):
            output += '.zip'
        with open(output, 'wb') as f:
            f.write(response.content)
        return (output, [])
Example #24
0
    def addStreams(self, event, streams, label=None):
        """Add a sequence of StationStream objects to an ASDF file.

        Args:
            event (Event):
                Obspy event object.
            streams (list):
                List of StationStream objects.
            label (str):
                Label to attach to stream sequence. Cannot contain an
                underscore.
        """
        if label is not None:
            if '_' in label:
                raise GMProcessException(
                    'Stream label cannot contain an underscore.')

        # To allow for multiple processed versions of the same Stream
        # let's keep a dictionary of stations and sequence number.
        eventid = _get_id(event)
        if not self.hasEvent(eventid):
            self.addEvent(event)

        for stream in streams:
            station = stream[0].stats['station']
            logging.info('Adding waveforms for station %s' % station)
            # is this a raw file? Check the trace for provenance info.
            is_raw = not len(stream[0].getProvenanceKeys())

            if label is None:
                tfmt = '%Y%m%d%H%M%S'
                tnow = UTCDateTime.now().strftime(tfmt)
                label = 'processed%s' % tnow
            tag = '{}_{}'.format(eventid, label)
            if is_raw:
                level = 'raw'
            else:
                level = 'processed'
            self.dataset.add_waveforms(stream, tag=tag, event_id=event)

            # add processing provenance info from traces
            if level == 'processed':
                
                provdocs = stream.getProvenanceDocuments()
                for provdoc, trace in zip(provdocs, stream):
                    provname = format_nslct(trace.stats, tag)
                    self.dataset.add_provenance_document(
                        provdoc,
                        name=provname
                    )

            # add processing parameters from streams
            jdict = {}
            for key in stream.getStreamParamKeys():
                value = stream.getStreamParam(key)
                jdict[key] = value

            if len(jdict):
                # NOTE: We would store this dictionary just as
                # the parameters dictionary, but HDF cannot handle
                # nested dictionaries.
                # Also, this seems like a lot of effort
                # just to store a string in HDF, but other
                # approached failed. Suggestions are welcome.
                jdict = _stringify_dict(jdict)
                jsonbytes = json.dumps(jdict).encode('utf-8')
                jsonarray = np.frombuffer(jsonbytes, dtype=np.uint8)
                dtype = 'StreamProcessingParameters'
                parampath = '/'.join([
                    format_netsta(stream[0].stats),
                    format_nslit(stream[0].stats, stream.get_inst(), tag)
                ])
                self.dataset.add_auxiliary_data(
                    jsonarray,
                    data_type=dtype,
                    path=parampath,
                    parameters={}
                )

            # add processing parameters from traces
            for trace in stream:
                procname = '/'.join([format_netsta(trace.stats),
                                     format_nslct(trace.stats, tag),
                ])
                jdict = {}
                for key in trace.getParameterKeys():
                    value = trace.getParameter(key)
                    jdict[key] = value
                if len(jdict):
                    # NOTE: We would store this dictionary just as
                    # the parameters dictionary, but HDF cannot handle
                    # nested dictionaries.
                    # Also, this seems like a lot of effort
                    # just to store a string in HDF, but other
                    # approached failed. Suggestions are welcome.
                    jdict = _stringify_dict(jdict)
                    jsonbytes = json.dumps(jdict).encode('utf-8')
                    jsonarray = np.frombuffer(jsonbytes, dtype=np.uint8)
                    dtype = 'TraceProcessingParameters'
                    self.dataset.add_auxiliary_data(
                        jsonarray,
                        data_type=dtype,
                        path=procname,
                        parameters={}
                    )

                # Some processing data is computationally intensive to
                # compute, so we store it in the 'Cache' group.
                for specname in trace.getCachedNames():
                    spectrum = trace.getCached(specname)
                    # we expect many of these specnames to
                    # be joined with underscores.
                    name_parts = specname.split('_')
                    base_dtype = ''.join([part.capitalize()
                                          for part in name_parts])
                    for array_name, array in spectrum.items():
                        path = base_dtype + array_name.capitalize() + "/" + procname
                        try:
                            self.dataset.add_auxiliary_data(
                                array,
                                data_type='Cache',
                                path=path,
                                parameters={}
                            )
                        except Exception as e:
                            pass
            inventory = stream.getInventory()
            self.dataset.add_stationxml(inventory)
Example #25
0
def _get_header_info(filename,
                     any_structure=False,
                     accept_flagged=False,
                     location=''):
    """Return stats structure from various headers.

    Output is a dictionary like this:
     - network
     - station
     - channel
     - location (str): Set to floor the sensor is located on. If not a
            multi-sensor array, default is '--'. Can be set manually by
            the user.
     - starttime
     - sampling_rate
     - npts
     - coordinates:
       - latitude
       - longitude
       - elevation
    - standard
      - horizontal_orientation
      - instrument_period
      - instrument_damping
      - process_level
      - station_name
      - sensor_serial_number
      - instrument
      - comments
      - structure_type
      - corner_frequency
      - units
      - source
      - source_format
    - format_specific
      - vertical_orientation
      - building_floor (0=basement, 1=floor above basement, -1=1st sub-basement, etc.
      - bridge_number_spans
      - bridge_transducer_location ("free field",
                                    "at the base of a pier or abutment",
                                    "on an abutment",
                                    "on the deck at the top of a pier"
                                    "on the deck between piers or between an abutment and a pier."
        dam_transducer_location ("upstream or downstream free field",
                                 "at the base of the dam",
                                 "on the crest of the dam",
                                 on the abutment of the dam")
        construction_type ("Reinforced concrete gravity",
                           "Reinforced concrete arch",
                           "earth fill",
                           "other")

        filter_poles
        data_source
    """
    stats = {}
    standard = {}
    format_specific = {}
    coordinates = {}
    # read the ascii header lines
    with open(filename) as f:
        ascheader = [next(f).strip() for x in range(ASCII_HEADER_LINES)]

    standard['process_level'] = PROCESS_LEVELS[VALID_HEADERS[ascheader[0]]]
    logging.debug("process_level: %s" % standard['process_level'])

    # station code is in the third line
    stats['station'] = ''
    if len(ascheader[2]) >= 4:
        stats['station'] = ascheader[2][0:4].strip()
        stats['station'] = stats['station'].strip('\x00')
    logging.debug('station: %s' % stats['station'])

    standard['process_time'] = ''
    standard['station_name'] = ascheader[5][10:40].strip()
    # sometimes the data source has nothing in it,
    # most of the time it seems has has USGS in it
    # sometimes it's something like JPL/USGS, CDOT/USGS, etc.
    # if it's got USGS in it, let's just say network=US, otherwise "--"
    stats['network'] = 'ZZ'
    if ascheader[7].find('USGS') > -1:
        stats['network'] = 'US'

    try:
        standard['source'] = ascheader[7].split('=')[2].strip()
    except IndexError:
        standard['source'] = 'USGS'
    if standard['source'] == '':
        standard['source'] = 'USGS'
    standard['source_format'] = 'smc'

    # read integer header data

    intheader = np.genfromtxt(filename,
                              dtype=np.int32,
                              max_rows=INTEGER_HEADER_LINES,
                              skip_header=ASCII_HEADER_LINES,
                              delimiter=INT_HEADER_WIDTHS)
    # 8 columns per line
    # first line is start time information, and then inst. serial number
    missing_data = intheader[0, 0]
    year = intheader[0, 1]

    # sometimes the year field has a 0 in it. When this happens, we
    # can try to get a timestamp from line 4 of the ascii header.
    if year == 0:
        parts = ascheader[3].split()
        try:
            year = int(parts[0])
        except ValueError as ve:
            fmt = ('Could not find year in SMC file %s. Not present '
                   'in integer header and not parseable from line '
                   '4 of ASCII header. Error: "%s"')
            raise GMProcessException(fmt % (filename, str(ve)))

    jday = intheader[0, 2]
    hour = intheader[0, 3]
    minute = intheader[0, 4]
    if (year != missing_data and jday != missing_data and hour != missing_data
            and minute != missing_data):

        # Handle second if missing
        second = 0
        if not intheader[0, 5] == missing_data:
            second = intheader[0, 5]

        # Handle microsecond if missing and convert milliseconds to microseconds
        microsecond = 0
        if not intheader[0, 6] == missing_data:
            microsecond = intheader[0, 6] / 1e3
        datestr = '%i %00i %i %i %i %i' % (year, jday, hour, minute, second,
                                           microsecond)

        stats['starttime'] = datetime.strptime(datestr, '%Y %j %H %M %S %f')
    else:
        logging.warning('No start time provided. '
                        'This must be set manually for network/station: '
                        '%s/%s.' % (stats['network'], stats['station']))
        standard['comments'] = 'Missing start time.'

    standard['sensor_serial_number'] = ''
    if intheader[1, 3] != missing_data:
        standard['sensor_serial_number'] = str(intheader[1, 3])

    # we never get a two character location code so floor location is used
    if location == '':
        location = intheader.flatten()[24]
        if location != missing_data:
            location = str(location)
            if len(location) < 2:
                location = location.zfill(2)
            stats['location'] = location
        else:
            stats['location'] = '--'
    else:
        stats['location'] = location

    # second line is information about number of channels, orientations
    # we care about orientations
    format_specific['vertical_orientation'] = np.nan
    if intheader[1, 4] != missing_data:
        format_specific['vertical_orientation'] = int(intheader[1, 4])

    standard['horizontal_orientation'] = np.nan
    standard['vertical_orientation'] = np.nan
    if intheader[1, 5] != missing_data:
        standard['horizontal_orientation'] = float(intheader[1, 5])

    if intheader[1, 6] == missing_data or intheader[1, 6] not in INSTRUMENTS:
        standard['instrument'] = ''
    else:
        standard['instrument'] = INSTRUMENTS[intheader[1, 6]]

    num_comments = intheader[1, 7]

    # third line contains number of data points
    stats['npts'] = intheader[2, 0]
    problem_flag = intheader[2, 1]
    if problem_flag == 1:
        if not accept_flagged:
            fmt = 'SMC: Record found in file %s has a problem flag!'
            raise GMProcessException(fmt % filename)
        else:
            logging.warning(
                'SMC: Data contains a problem flag for network/station: '
                '%s/%s. See comments.' % (stats['network'], stats['station']))
    stype = intheader[2, 2]
    if stype == missing_data:
        stype = np.nan
    elif stype not in STRUCTURES:
        # structure type is not defined and should will be considered 'other'
        stype = 4
    fmt = 'SMC: Record found in file %s is not a free-field sensor!'
    standard['structure_type'] = STRUCTURES[stype]
    if standard['structure_type'] == 'building' and not any_structure:
        raise Exception(fmt % filename)

    format_specific['building_floor'] = np.nan
    if intheader[3, 0] != missing_data:
        format_specific['building_floor'] = intheader[3, 0]

    format_specific['bridge_number_spans'] = np.nan
    if intheader[3, 1] != missing_data:
        format_specific['bridge_number_spans'] = intheader[3, 1]

    format_specific['bridge_transducer_location'] = BRIDGE_LOCATIONS[0]
    if intheader[3, 2] != missing_data:
        bridge_number = intheader[3, 2]
        format_specific['bridge_transducer_location'] = \
            BRIDGE_LOCATIONS[bridge_number]

    format_specific['dam_transducer_location'] = DAM_LOCATIONS[0]
    if intheader[3, 3] != missing_data:
        dam_number = intheader[3, 3]
        format_specific['dam_transducer_location'] = DAM_LOCATIONS[dam_number]

    c1 = format_specific['bridge_transducer_location'].find('free field') == -1
    c2 = format_specific['dam_transducer_location'].find('free field') == -1
    if (c1 or c2) and not any_structure:
        raise Exception(fmt % filename)

    format_specific['construction_type'] = CONSTRUCTION_TYPES[4]
    if intheader[3, 4] != missing_data:
        format_specific['construction_type'] = \
            CONSTRUCTION_TYPES[intheader[3, 4]]

    # station is repeated here if all numeric
    if not len(stats['station']):
        stats['station'] = '%i' % intheader[3, 5]

    # read float header data
    skip = ASCII_HEADER_LINES + INTEGER_HEADER_LINES
    floatheader = np.genfromtxt(filename,
                                max_rows=FLOAT_HEADER_LINES,
                                skip_header=skip,
                                delimiter=FLOAT_HEADER_WIDTHS)

    # float headers are 10 lines of 5 floats each
    missing_data = floatheader[0, 0]
    stats['sampling_rate'] = floatheader[0, 1]
    if stats['sampling_rate'] >= MAX_ALLOWED_SAMPLE_RATE:
        fmt = 'Sampling rate of %.2g samples/second is nonsensical.'
        raise Exception(fmt % stats['sampling_rate'])
    coordinates['latitude'] = floatheader[2, 0]
    # the documentation for SMC says that sometimes longitudes are
    # positive in the western hemisphere. Since it is very unlikely
    # any of these files exist for the eastern hemisphere, check for
    # positive longitudes and fix them.
    lon = floatheader[2, 1]
    if lon > 0:
        lon = -1 * lon
    coordinates['longitude'] = lon
    coordinates['elevation'] = 0.0
    if floatheader[2, 2] != missing_data:
        coordinates['elevation'] = floatheader[2, 2]
    else:
        logging.warning('Setting elevation to 0.0')

    # figure out the channel code
    if format_specific['vertical_orientation'] in [0, 180]:
        stats['channel'] = get_channel_name(stats['sampling_rate'],
                                            is_acceleration=True,
                                            is_vertical=True,
                                            is_north=False)
    else:
        ho = standard['horizontal_orientation']
        quad1 = ho > 315 and ho <= 360
        quad2 = ho > 0 and ho <= 45
        quad3 = ho > 135 and ho <= 225
        if quad1 or quad2 or quad3:
            stats['channel'] = get_channel_name(stats['sampling_rate'],
                                                is_acceleration=True,
                                                is_vertical=False,
                                                is_north=True)
        else:
            stats['channel'] = get_channel_name(stats['sampling_rate'],
                                                is_acceleration=True,
                                                is_vertical=False,
                                                is_north=False)

    logging.debug('channel: %s' % stats['channel'])
    sensor_frequency = floatheader[4, 1]
    standard['instrument_period'] = 1 / sensor_frequency
    standard['instrument_damping'] = floatheader[4, 2]

    standard['corner_frequency'] = floatheader[3, 4]
    format_specific['filter_poles'] = floatheader[4, 0]
    standard['units'] = 'acc'
    standard['units_type'] = get_units_type(stats['channel'])

    # this field can be used for instrument correction
    # when data is in counts
    standard['instrument_sensitivity'] = np.nan

    # read in the comment lines
    with open(filename) as f:
        skip = ASCII_HEADER_LINES + INTEGER_HEADER_LINES + FLOAT_HEADER_LINES
        _ = [next(f) for x in range(skip)]
        standard['comments'] = [
            next(f).strip().lstrip('|') for x in range(num_comments)
        ]

    standard['comments'] = ' '.join(standard['comments'])
    stats['coordinates'] = coordinates
    stats['standard'] = standard
    stats['format_specific'] = format_specific

    head, tail = os.path.split(filename)
    stats['standard']['source_file'] = tail or os.path.basename(head)

    return (stats, num_comments)
Example #26
0
def _get_header_info(int_data, flt_data, lines, volume, location=''):
    """Return stats structure from various headers.

    Output is a dictionary like this:
     - network (str): 'LA'
     - station (str)
     - channel (str): Determined using COSMOS_ORIENTATIONS
     - location (str): Default is '--'
     - starttime (datetime)
     - duration (float)
     - sampling_rate (float)
     - npts (int)
     - coordinates:
       - latitude (float)
       - longitude (float)
       - elevation (float)
    - standard (Defaults are either np.nan or '')
      - horizontal_orientation (float): Rotation from north (degrees)
      - instrument_period (float): Period of sensor (Hz)
      - instrument_damping (float): Fraction of critical
      - process_time (datetime): Reported date of processing
      - process_level: Either 'V0', 'V1', 'V2', or 'V3'
      - station_name (str): Long form station description
      - sensor_serial_number (str): Reported sensor serial
      - instrument (str): See SENSOR_TYPES
      - comments (str): Processing comments
      - structure_type (str): See BUILDING_TYPES
      - corner_frequency (float): Sensor corner frequency (Hz)
      - units (str): See UNITS
      - source (str): Network source description
      - source_format (str): Always cosmos
    - format_specific
      - fractional_unit (float): Units of digitized acceleration
            in file (fractions of g)

    Args:
        int_data (ndarray): Array of integer data
        flt_data (ndarray): Array of float data
        lines (list): List of text headers (str)

    Returns:
        dictionary: Dictionary of header/metadata information
    """
    hdr = {}
    coordinates = {}
    standard = {}
    format_specific = {}
    if volume == 'V1':
        hdr['duration'] = flt_data[2]
        hdr['npts'] = int_data[27]
        hdr['sampling_rate'] = (hdr['npts'] - 1) / hdr['duration']

        # Get required parameter number
        hdr['network'] = 'LA'
        hdr['station'] = str(int_data[8])
        logging.debug('station: %s' % hdr['station'])
        horizontal_angle = int_data[26]
        logging.debug('horizontal: %s' % horizontal_angle)
        if (horizontal_angle in USC_ORIENTATIONS or
                (horizontal_angle >= 0 and horizontal_angle <= 360)):
            if horizontal_angle in USC_ORIENTATIONS:
                channel = USC_ORIENTATIONS[horizontal_angle][1].upper()
                if channel == 'UP' or channel == 'DOWN' or channel == 'VERT':
                    channel = get_channel_name(
                        hdr['sampling_rate'],
                        is_acceleration=True,
                        is_vertical=True,
                        is_north=False)
                horizontal_angle = 0.0
            elif (
                horizontal_angle > 315 or
                horizontal_angle < 45 or
                (horizontal_angle > 135 and horizontal_angle < 225)
            ):
                channel = get_channel_name(
                    hdr['sampling_rate'],
                    is_acceleration=True,
                    is_vertical=False,
                    is_north=True)
            else:
                channel = get_channel_name(
                    hdr['sampling_rate'],
                    is_acceleration=True,
                    is_vertical=False,
                    is_north=False)
            horizontal_orientation = horizontal_angle
            hdr['channel'] = channel
            logging.debug('channel: %s' % hdr['channel'])
        else:
            errstr = ('USC: Not enough information to distinguish horizontal from '
                      'vertical channels.')
            raise GMProcessException(errstr)

        if location == '':
            hdr['location'] = '--'
        else:
            hdr['location'] = location
        month = str(int_data[21])
        day = str(int_data[22])
        year = str(int_data[23])
        time = str(int_data[24])
        tstr = month + '/' + day + '/' + year + '_' + time
        starttime = datetime.strptime(tstr, '%m/%d/%Y_%H%M')
        hdr['starttime'] = starttime

        # Get coordinates
        lat_deg = int_data[9]
        lat_min = int_data[10]
        lat_sec = int_data[11]
        lon_deg = int_data[12]
        lon_min = int_data[13]
        lon_sec = int_data[14]
        # Check for southern hemisphere, default is northern
        if lines[4].find('STATION USC#') >= 0:
            idx = lines[4].find('STATION USC#') + 12
            if 'S' in lines[4][idx:]:
                lat_sign = -1
            else:
                lat_sign = 1
        else:
            lat_sign = 1
        # Check for western hemisphere, default is western
        if lines[4].find('STATION USC#') >= 0:
            idx = lines[4].find('STATION USC#') + 12
            if 'W' in lines[4][idx:]:
                lon_sign = -1
            else:
                lon_sign = 1
        else:
            lon_sign = -1
        latitude = lat_sign * _dms2dd(lat_deg, lat_min, lat_sec)
        longitude = lon_sign * _dms2dd(lon_deg, lon_min, lon_sec)
        # Since sometimes longitudes are positive in this format for data in
        # the western hemisphere, we "fix" it here. Hopefully no one in the
        # eastern hemisphere uses this format!
        if longitude > 0:
            longitude = -longitude
        coordinates['latitude'] = latitude
        coordinates['longitude'] = longitude
        logging.warn('Setting elevation to 0.0')
        coordinates['elevation'] = 0.0
        # Get standard paramaters
        standard['units_type'] = get_units_type(hdr['channel'])
        standard['horizontal_orientation'] = float(horizontal_orientation)
        standard['instrument_period'] = flt_data[0]
        standard['instrument_damping'] = flt_data[1]
        standard['process_time'] = ''
        station_line = lines[5]
        station_length = int(lines[5][72:74])
        name = station_line[:station_length]
        standard['station_name'] = name
        standard['sensor_serial_number'] = ''
        standard['instrument'] = ''
        standard['comments'] = ''
        standard['units'] = 'acc'
        standard['structure_type'] = ''
        standard['process_level'] = PROCESS_LEVELS['V1']
        standard['corner_frequency'] = np.nan
        standard['source'] = ('Los Angeles Basin Seismic Network, University '
                              'of Southern California')
        standard['source_format'] = 'usc'

        # this field can be used for instrument correction
        # when data is in counts
        standard['instrument_sensitivity'] = np.nan

        # Get format specific
        format_specific['fractional_unit'] = flt_data[4]

    # Set dictionary
    hdr['standard'] = standard
    hdr['coordinates'] = coordinates
    hdr['format_specific'] = format_specific
    return hdr