def test_writeAndReadDifferentRecordLengths(self):
     """
     Tests Mini-SEED writing and record lengths.
     """
     # libmseed instance.
     npts = 6000
     np.random.seed(815)  # make test reproducable
     data = np.random.randint(-1000, 1000, npts).astype('int32')
     st = Stream([Trace(data=data)])
     record_lengths = [256, 512, 1024, 2048, 4096, 8192]
     # Loop over some record lengths.
     for rec_len in record_lengths:
         # Write it.
         with NamedTemporaryFile() as tf:
             tempfile = tf.name
             st.write(tempfile, format="MSEED", reclen=rec_len)
             # Get additional header info
             info = util.getRecordInformation(tempfile)
             # Test reading the two files.
             temp_st = read(tempfile)
         np.testing.assert_array_equal(data, temp_st[0].data)
         del temp_st
         # Check record length.
         self.assertEqual(info['record_length'], rec_len)
         # Check if filesize is a multiple of the record length.
         self.assertEqual(info['filesize'] % rec_len, 0)
 def test_getRecordInformation(self):
     """
     Tests the reading of Mini-SEED record information.
     """
     # Build encoding strings.
     encoding_strings = {}
     for key, value in ENCODINGS.iteritems():
         encoding_strings[value[0]] = key
     # Test the encodings and byteorders.
     path = os.path.join(self.path, "data", "encoding")
     files = ['float32_Float32_bigEndian.mseed',
              'float32_Float32_littleEndian.mseed',
              'float64_Float64_bigEndian.mseed',
              'float64_Float64_littleEndian.mseed',
              'fullASCII_bigEndian.mseed', 'fullASCII_littleEndian.mseed',
              'int16_INT16_bigEndian.mseed',
              'int16_INT16_littleEndian.mseed',
              'int32_INT32_bigEndian.mseed',
              'int32_INT32_littleEndian.mseed',
              'int32_Steim1_bigEndian.mseed',
              'int32_Steim1_littleEndian.mseed',
              'int32_Steim2_bigEndian.mseed',
              'int32_Steim2_littleEndian.mseed']
     for file in files:
         info = util.getRecordInformation(os.path.join(path, file))
         if not 'ASCII' in file:
             encoding = file.split('_')[1].upper()
             byteorder = file.split('_')[2].split('.')[0]
         else:
             encoding = 'ASCII'
             byteorder = file.split('_')[1].split('.')[0]
         if 'big' in byteorder:
             byteorder = '>'
         else:
             byteorder = '<'
         self.assertEqual(encoding_strings[encoding], info['encoding'])
         self.assertEqual(byteorder, info['byteorder'])
         # Also test the record length although it is equal for all files.
         self.assertEqual(256, info['record_length'])
     # No really good test files for the record length so just two files
     # with known record lengths are tested.
     info = util.getRecordInformation(os.path.join(self.path, 'data',
                                     'timingquality.mseed'))
     self.assertEqual(info['record_length'], 512)
     info = util.getRecordInformation(os.path.join(self.path, 'data',
                                      'steim2.mseed'))
     self.assertEqual(info['record_length'], 4096)
Exemple #3
0
 def test_getRecordInformation(self):
     """
     Tests the util._getMSFileInfo method with known values.
     """
     filename = os.path.join(self.path, 'data',
                             'BW.BGLD.__.EHE.D.2008.001.first_10_records')
     # Simply reading the file.
     info = util.getRecordInformation(filename)
     self.assertEqual(info['filesize'], 5120)
     self.assertEqual(info['record_length'], 512)
     self.assertEqual(info['number_of_records'], 10)
     self.assertEqual(info['excess_bytes'], 0)
     # Now with an open file. This should work regardless of the current
     # value of the file pointer and it should also not change the file
     # pointer.
     open_file = open(filename, 'rb')
     open_file.seek(1234)
     info = util.getRecordInformation(open_file)
     self.assertEqual(info['filesize'], 5120 - 1234)
     self.assertEqual(info['record_length'], 512)
     self.assertEqual(info['number_of_records'], 7)
     self.assertEqual(info['excess_bytes'], 302)
     self.assertEqual(open_file.tell(), 1234)
     open_file.close()
     # Now test with a StringIO with the first ten percent.
     open_file = open(filename, 'rb')
     open_file_string = StringIO(open_file.read())
     open_file.close()
     open_file_string.seek(111)
     info = util.getRecordInformation(open_file_string)
     self.assertEqual(info['filesize'], 5120 - 111)
     self.assertEqual(info['record_length'], 512)
     self.assertEqual(info['number_of_records'], 9)
     self.assertEqual(info['excess_bytes'], 401)
     self.assertEqual(open_file_string.tell(), 111)
     # One more file containing two records.
     filename = os.path.join(self.path, 'data', 'test.mseed')
     info = util.getRecordInformation(filename)
     self.assertEqual(info['filesize'], 8192)
     self.assertEqual(info['record_length'], 4096)
     self.assertEqual(info['number_of_records'], 2)
     self.assertEqual(info['excess_bytes'], 0)
Exemple #4
0
 def test_getRecordInformation(self):
     """
     Tests the util._getMSFileInfo method with known values.
     """
     filename = os.path.join(self.path, 'data',
                             'BW.BGLD.__.EHE.D.2008.001.first_10_records')
     # Simply reading the file.
     info = util.getRecordInformation(filename)
     self.assertEqual(info['filesize'], 5120)
     self.assertEqual(info['record_length'], 512)
     self.assertEqual(info['number_of_records'], 10)
     self.assertEqual(info['excess_bytes'], 0)
     # Now with an open file. This should work regardless of the current
     # value of the file pointer and it should also not change the file
     # pointer.
     open_file = open(filename, 'rb')
     open_file.seek(1234)
     info = util.getRecordInformation(open_file)
     self.assertEqual(info['filesize'], 5120 - 1234)
     self.assertEqual(info['record_length'], 512)
     self.assertEqual(info['number_of_records'], 7)
     self.assertEqual(info['excess_bytes'], 302)
     self.assertEqual(open_file.tell(), 1234)
     open_file.close()
     # Now test with a StringIO with the first ten percent.
     open_file = open(filename, 'rb')
     open_file_string = StringIO(open_file.read())
     open_file.close()
     open_file_string.seek(111)
     info = util.getRecordInformation(open_file_string)
     self.assertEqual(info['filesize'], 5120 - 111)
     self.assertEqual(info['record_length'], 512)
     self.assertEqual(info['number_of_records'], 9)
     self.assertEqual(info['excess_bytes'], 401)
     self.assertEqual(open_file_string.tell(), 111)
     # One more file containing two records.
     filename = os.path.join(self.path, 'data', 'test.mseed')
     info = util.getRecordInformation(filename)
     self.assertEqual(info['filesize'], 8192)
     self.assertEqual(info['record_length'], 4096)
     self.assertEqual(info['number_of_records'], 2)
     self.assertEqual(info['excess_bytes'], 0)
Exemple #5
0
def readMSEED(mseed_object,
              starttime=None,
              endtime=None,
              headonly=False,
              sourcename=None,
              reclen=None,
              details=False,
              header_byteorder=None,
              verbose=None,
              **kwargs):
    """
    Reads a Mini-SEED file and returns a Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :param mseed_object: Filename or open file like object that contains the
        binary Mini-SEED data. Any object that provides a read() method will be
        considered to be a file like object.
    :type starttime: :class:`~obspy.core.utcdatetime.UTCDateTime`
    :param starttime: Only read data samples after or at the start time.
    :type endtime: :class:`~obspy.core.utcdatetime.UTCDateTime`
    :param endtime: Only read data samples before or at the end time.
    :param headonly: Determines whether or not to unpack the data or just
        read the headers.
    :type sourcename: str
    :param sourcename: Source name has to have the structure
        'network.station.location.channel' and can contain globbing characters.
        Defaults to ``None``.
    :param reclen: If it is None, it will be automatically determined for every
        record. If it is known, just set it to the record length in bytes which
        will increase the reading speed slightly.
    :type details: bool, optional
    :param details: If ``True`` read additional information: timing quality
        and availability of calibration information.
        Note, that the traces are then also split on these additional
        information. Thus the number of traces in a stream will change.
        Details are stored in the mseed stats AttribDict of each trace.
        -1 specifies for both cases, that these information is not available.
        ``timing_quality`` specifies the timing quality from 0 to 100 [%].
        ``calibration_type`` specifies the type of available calibration
        information: 1 == Step Calibration, 2 == Sine Calibration, 3 ==
        Pseudo-random Calibration, 4 == Generic Calibration and -2 ==
        Calibration Abort.
    :type header_byteorder: int or str, optional
    :param header_byteorder: Must be either ``0`` or ``'<'`` for LSBF or
        little-endian, ``1`` or ``'>'`` for MBF or big-endian. ``'='`` is the
        native byte order. Used to enforce the header byte order. Useful in
        some rare cases where the automatic byte order detection fails.

    .. rubric:: Example

    >>> from obspy import read
    >>> st = read("/path/to/two_channels.mseed")
    >>> print(st)  # doctest: +ELLIPSIS
    2 Trace(s) in Stream:
    BW.UH3..EHE | 2010-06-20T00:00:00.279999Z - ... | 200.0 Hz, 386 samples
    BW.UH3..EHZ | 2010-06-20T00:00:00.279999Z - ... | 200.0 Hz, 386 samples

    >>> from obspy import UTCDateTime
    >>> st = read("/path/to/test.mseed",
    ...           starttime=UTCDateTime("2003-05-29T02:16:00"),
    ...           selection="NL.*.*.?HZ")
    >>> print(st)  # doctest: +ELLIPSIS
    1 Trace(s) in Stream:
    NL.HGN.00.BHZ | 2003-05-29T02:15:59.993400Z - ... | 40.0 Hz, 5629 samples
    """
    # Parse the headonly and reclen flags.
    if headonly is True:
        unpack_data = 0
    else:
        unpack_data = 1
    if reclen is None:
        reclen = -1
    elif reclen not in VALID_RECORD_LENGTHS:
        msg = 'Invalid record length. Autodetection will be used.'
        warnings.warn(msg)
        reclen = -1

    # Determine the byte order.
    if header_byteorder == "=":
        header_byteorder = NATIVE_BYTEORDER

    if header_byteorder is None:
        header_byteorder = -1
    elif header_byteorder in [0, "0", "<"]:
        header_byteorder = 0
    elif header_byteorder in [1, "1", ">"]:
        header_byteorder = 1

    # The quality flag is no more supported. Raise a warning.
    if 'quality' in kwargs:
        msg = 'The quality flag is no longer supported in this version of ' + \
            'obspy.mseed. obspy.mseed.util has some functions with similar' + \
            ' behavior.'
        warnings.warn(msg, category=DeprecationWarning)

    # Parse some information about the file.
    if header_byteorder == 0:
        bo = "<"
    elif header_byteorder > 0:
        bo = ">"
    else:
        bo = None

    info = util.getRecordInformation(mseed_object, endian=bo)

    # Map the encoding to a readable string value.
    if info["encoding"] in ENCODINGS:
        info['encoding'] = ENCODINGS[info['encoding']][0]
    elif info["encoding"] in UNSUPPORTED_ENCODINGS:
        msg = ("Encoding '%s' (%i) is not supported by ObsPy. Please send "
               "the file to the ObsPy developers so that we can add "
               "support for it.") % \
            (UNSUPPORTED_ENCODINGS[info['encoding']], info['encoding'])
        raise ValueError(msg)
    else:
        msg = "Encoding '%i' is not a valid MiniSEED encoding." % \
            info['encoding']
        raise ValueError(msg)

    # Only keep information relevant for the whole file.
    info = {
        'encoding': info['encoding'],
        'filesize': info['filesize'],
        'record_length': info['record_length'],
        'byteorder': info['byteorder'],
        'number_of_records': info['number_of_records']
    }

    # If its a filename just read it.
    if isinstance(mseed_object, (str, native_str)):
        # Read to NumPy array which is used as a buffer.
        buffer = np.fromfile(mseed_object, dtype=np.int8)
    elif hasattr(mseed_object, 'read'):
        buffer = np.fromstring(mseed_object.read(), dtype=np.int8)

    # Get the record length
    try:
        record_length = pow(2, int(''.join([chr(_i) for _i in buffer[19:21]])))
    except ValueError:
        record_length = 4096

    # Search for data records and pass only the data part to the underlying C
    # routine.
    offset = 0
    # 0 to 9 are defined in a row in the ASCII charset.
    min_ascii = ord('0')
    # Small function to check whether an array of ASCII values contains only
    # digits.
    isdigit = lambda x: True if (x - min_ascii).max() <= 9 else False
    while True:
        # This should never happen
        if (isdigit(buffer[offset:offset + 6]) is False) or \
                (buffer[offset + 6] not in VALID_CONTROL_HEADERS):
            msg = 'Not a valid (Mini-)SEED file'
            raise Exception(msg)
        elif buffer[offset + 6] in SEED_CONTROL_HEADERS:
            offset += record_length
            continue
        break
    buffer = buffer[offset:]
    buflen = len(buffer)

    # If no selection is given pass None to the C function.
    if starttime is None and endtime is None and sourcename is None:
        selections = None
    else:
        select_time = SelectTime()
        selections = Selections()
        selections.timewindows.contents = select_time
        if starttime is not None:
            if not isinstance(starttime, UTCDateTime):
                msg = 'starttime needs to be a UTCDateTime object'
                raise ValueError(msg)
            selections.timewindows.contents.starttime = \
                util._convertDatetimeToMSTime(starttime)
        else:
            # HPTERROR results in no starttime.
            selections.timewindows.contents.starttime = HPTERROR
        if endtime is not None:
            if not isinstance(endtime, UTCDateTime):
                msg = 'endtime needs to be a UTCDateTime object'
                raise ValueError(msg)
            selections.timewindows.contents.endtime = \
                util._convertDatetimeToMSTime(endtime)
        else:
            # HPTERROR results in no starttime.
            selections.timewindows.contents.endtime = HPTERROR
        if sourcename is not None:
            if not isinstance(sourcename, (str, native_str)):
                msg = 'sourcename needs to be a string'
                raise ValueError(msg)
            # libmseed uses underscores as separators and allows filtering
            # after the dataquality which is disabled here to not confuse
            # users. (* == all data qualities)
            selections.srcname = (sourcename.replace('.', '_') + '_*').\
                encode('ascii', 'ignore')
        else:
            selections.srcname = b'*'
    all_data = []

    # Use a callback function to allocate the memory and keep track of the
    # data.
    def allocate_data(samplecount, sampletype):
        # Enhanced sanity checking for libmseed 2.10 can result in the
        # sampletype not being set. Just return an empty array in this case.
        if sampletype == b"\x00":
            data = np.empty(0)
        else:
            data = np.empty(samplecount, dtype=DATATYPES[sampletype])
        all_data.append(data)
        return data.ctypes.data

    # XXX: Do this properly!
    # Define Python callback function for use in C function. Return a long so
    # it hopefully works on 32 and 64 bit systems.
    allocData = C.CFUNCTYPE(C.c_long, C.c_int, C.c_char)(allocate_data)

    def log_error_or_warning(msg):
        if msg.startswith(b"ERROR: "):
            raise InternalMSEEDReadingError(msg[7:].strip())
        if msg.startswith(b"INFO: "):
            msg = msg[6:].strip()
            # Append the offset of the full SEED header if necessary. That way
            # the C code does not have to deal with it.
            if offset and "offset" in msg:
                msg = ("%s The file contains a %i byte dataless part at the "
                       "beginning. Make sure to add that to the reported "
                       "offset to get the actual location in the file." %
                       (msg, offset))
            warnings.warn(msg, InternalMSEEDReadingWarning)

    diag_print = C.CFUNCTYPE(C.c_void_p, C.c_char_p)(log_error_or_warning)

    def log_message(msg):
        print(msg[6:].strip())

    log_print = C.CFUNCTYPE(C.c_void_p, C.c_char_p)(log_message)

    try:
        verbose = int(verbose)
    except:
        verbose = 0

    lil = clibmseed.readMSEEDBuffer(buffer, buflen, selections,
                                    C.c_int8(unpack_data), reclen,
                                    C.c_int8(verbose), C.c_int8(details),
                                    header_byteorder, allocData, diag_print,
                                    log_print)

    # XXX: Check if the freeing works.
    del selections

    traces = []
    try:
        currentID = lil.contents
    # Return stream if not traces are found.
    except ValueError:
        clibmseed.lil_free(lil)
        del lil
        return Stream()

    while True:
        # Init header with the essential information.
        header = {
            'network': currentID.network.strip(),
            'station': currentID.station.strip(),
            'location': currentID.location.strip(),
            'channel': currentID.channel.strip(),
            'mseed': {
                'dataquality': currentID.dataquality
            }
        }
        # Loop over segments.
        try:
            currentSegment = currentID.firstSegment.contents
        except ValueError:
            break
        while True:
            header['sampling_rate'] = currentSegment.samprate
            header['starttime'] = \
                util._convertMSTimeToDatetime(currentSegment.starttime)
            # TODO: write support is missing
            if details:
                timing_quality = currentSegment.timing_quality
                if timing_quality == 0xFF:  # 0xFF is mask for not known timing
                    timing_quality = -1
                header['mseed']['timing_quality'] = timing_quality
                header['mseed']['calibration_type'] = \
                    currentSegment.calibration_type

            if headonly is False:
                # The data always will be in sequential order.
                data = all_data.pop(0)
                header['npts'] = len(data)
            else:
                data = np.array([])
                header['npts'] = currentSegment.samplecnt
            # Make sure to init the number of samples.
            # Py3k: convert to unicode
            header['mseed'] = dict(
                (k, v.decode()) if isinstance(v, bytes) else (k, v)
                for k, v in header['mseed'].items())
            header = dict((k, v.decode()) if isinstance(v, bytes) else (k, v)
                          for k, v in header.items())
            trace = Trace(header=header, data=data)
            # Append information.
            for key, value in info.items():
                setattr(trace.stats.mseed, key, value)
            traces.append(trace)
            # A Null pointer access results in a ValueError
            try:
                currentSegment = currentSegment.next.contents
            except ValueError:
                break
        try:
            currentID = currentID.next.contents
        except ValueError:
            break

    clibmseed.lil_free(lil)  # NOQA
    del lil  # NOQA
    return Stream(traces=traces)
Exemple #6
0
def readMSEED(mseed_object, starttime=None, endtime=None, headonly=False,
              sourcename=None, reclen=None, details=False,
              header_byteorder=None, verbose=None, **kwargs):
    """
    Reads a Mini-SEED file and returns a Stream object.

    .. warning::
        This function should NOT be called directly, it registers via the
        ObsPy :func:`~obspy.core.stream.read` function, call this instead.

    :param mseed_object: Filename or open file like object that contains the
        binary Mini-SEED data. Any object that provides a read() method will be
        considered to be a file like object.
    :type starttime: :class:`~obspy.core.utcdatetime.UTCDateTime`
    :param starttime: Only read data samples after or at the start time.
    :type endtime: :class:`~obspy.core.utcdatetime.UTCDateTime`
    :param endtime: Only read data samples before or at the end time.
    :param headonly: Determines whether or not to unpack the data or just
        read the headers.
    :type sourcename: str
    :param sourcename: Source name has to have the structure
        'network.station.location.channel' and can contain globbing characters.
        Defaults to ``None``.
    :param reclen: If it is None, it will be automatically determined for every
        record. If it is known, just set it to the record length in bytes which
        will increase the reading speed slightly.
    :type details: bool, optional
    :param details: If ``True`` read additional information: timing quality
        and availability of calibration information.
        Note, that the traces are then also split on these additional
        information. Thus the number of traces in a stream will change.
        Details are stored in the mseed stats AttribDict of each trace.
        ``False`` specifies for both cases, that this information is not
        available. ``blkt1001.timing_quality`` specifies the timing quality
        from 0 to 100 [%]. ``calibration_type`` specifies the type of available
        calibration information blockettes:

        - ``1``: Step Calibration (Blockette 300)
        - ``2``: Sine Calibration (Blockette 310)
        - ``3``: Pseudo-random Calibration (Blockette 320)
        - ``4``: Generic Calibration  (Blockette 390)
        - ``-2``: Calibration Abort (Blockette 395)

    :type header_byteorder: int or str, optional
    :param header_byteorder: Must be either ``0`` or ``'<'`` for LSBF or
        little-endian, ``1`` or ``'>'`` for MBF or big-endian. ``'='`` is the
        native byte order. Used to enforce the header byte order. Useful in
        some rare cases where the automatic byte order detection fails.

    .. rubric:: Example

    >>> from obspy import read
    >>> st = read("/path/to/two_channels.mseed")
    >>> print(st)  # doctest: +ELLIPSIS
    2 Trace(s) in Stream:
    BW.UH3..EHE | 2010-06-20T00:00:00.279999Z - ... | 200.0 Hz, 386 samples
    BW.UH3..EHZ | 2010-06-20T00:00:00.279999Z - ... | 200.0 Hz, 386 samples

    >>> from obspy import UTCDateTime
    >>> st = read("/path/to/test.mseed",
    ...           starttime=UTCDateTime("2003-05-29T02:16:00"),
    ...           selection="NL.*.*.?HZ")
    >>> print(st)  # doctest: +ELLIPSIS
    1 Trace(s) in Stream:
    NL.HGN.00.BHZ | 2003-05-29T02:15:59.993400Z - ... | 40.0 Hz, 5629 samples

    Read with ``details=True`` to read more details of the file if present.

    >>> st = read("/path/to/timingquality.mseed", details=True)
    >>> print(st[0].stats.mseed.blkt1001.timing_quality)
    55

    ``False`` means that the necessary information could not be found in the
    file.

    >>> print(st[0].stats.mseed.calibration_type)
    False

    Note that each change in timing quality from record to record may trigger a
    new Trace object to be created so the Stream object may contain many Trace
    objects if ``details=True`` is used.

    >>> print(len(st))
    101
    """
    # Parse the headonly and reclen flags.
    if headonly is True:
        unpack_data = 0
    else:
        unpack_data = 1
    if reclen is None:
        reclen = -1
    elif reclen not in VALID_RECORD_LENGTHS:
        msg = 'Invalid record length. Autodetection will be used.'
        warnings.warn(msg)
        reclen = -1

    # Determine the byte order.
    if header_byteorder == "=":
        header_byteorder = NATIVE_BYTEORDER

    if header_byteorder is None:
        header_byteorder = -1
    elif header_byteorder in [0, "0", "<"]:
        header_byteorder = 0
    elif header_byteorder in [1, "1", ">"]:
        header_byteorder = 1

    # The quality flag is no more supported. Raise a warning.
    if 'quality' in kwargs:
        msg = 'The quality flag is no longer supported in this version of ' + \
            'obspy.mseed. obspy.mseed.util has some functions with similar' + \
            ' behavior.'
        warnings.warn(msg, category=DeprecationWarning)

    # Parse some information about the file.
    if header_byteorder == 0:
        bo = "<"
    elif header_byteorder > 0:
        bo = ">"
    else:
        bo = None

    info = util.getRecordInformation(mseed_object, endian=bo)

    # Map the encoding to a readable string value.
    if info["encoding"] in ENCODINGS:
        info['encoding'] = ENCODINGS[info['encoding']][0]
    elif info["encoding"] in UNSUPPORTED_ENCODINGS:
        msg = ("Encoding '%s' (%i) is not supported by ObsPy. Please send "
               "the file to the ObsPy developers so that we can add "
               "support for it.") % \
            (UNSUPPORTED_ENCODINGS[info['encoding']], info['encoding'])
        raise ValueError(msg)
    else:
        msg = "Encoding '%i' is not a valid MiniSEED encoding." % \
            info['encoding']
        raise ValueError(msg)

    # Only keep information relevant for the whole file.
    info = {'encoding': info['encoding'],
            'filesize': info['filesize'],
            'record_length': info['record_length'],
            'byteorder': info['byteorder'],
            'number_of_records': info['number_of_records']}

    # If it's a file name just read it.
    if isinstance(mseed_object, (str, native_str)):
        # Read to NumPy array which is used as a buffer.
        bfrNp = np.fromfile(mseed_object, dtype=np.int8)
    elif hasattr(mseed_object, 'read'):
        bfrNp = np.fromstring(mseed_object.read(), dtype=np.int8)

    # Get the record length
    try:
        record_length = pow(2, int(''.join([chr(_i) for _i in bfrNp[19:21]])))
    except ValueError:
        record_length = 4096

    # Search for data records and pass only the data part to the underlying C
    # routine.
    offset = 0
    # 0 to 9 are defined in a row in the ASCII charset.
    min_ascii = ord('0')
    # Small function to check whether an array of ASCII values contains only
    # digits.
    isdigit = lambda x: True if (x - min_ascii).max() <= 9 else False
    while True:
        # This should never happen
        if (isdigit(bfrNp[offset:offset + 6]) is False) or \
                (bfrNp[offset + 6] not in VALID_CONTROL_HEADERS):
            msg = 'Not a valid (Mini-)SEED file'
            raise Exception(msg)
        elif bfrNp[offset + 6] in SEED_CONTROL_HEADERS:
            offset += record_length
            continue
        break
    bfrNp = bfrNp[offset:]
    buflen = len(bfrNp)

    # If no selection is given pass None to the C function.
    if starttime is None and endtime is None and sourcename is None:
        selections = None
    else:
        select_time = SelectTime()
        selections = Selections()
        selections.timewindows.contents = select_time
        if starttime is not None:
            if not isinstance(starttime, UTCDateTime):
                msg = 'starttime needs to be a UTCDateTime object'
                raise ValueError(msg)
            selections.timewindows.contents.starttime = \
                util._convertDatetimeToMSTime(starttime)
        else:
            # HPTERROR results in no starttime.
            selections.timewindows.contents.starttime = HPTERROR
        if endtime is not None:
            if not isinstance(endtime, UTCDateTime):
                msg = 'endtime needs to be a UTCDateTime object'
                raise ValueError(msg)
            selections.timewindows.contents.endtime = \
                util._convertDatetimeToMSTime(endtime)
        else:
            # HPTERROR results in no starttime.
            selections.timewindows.contents.endtime = HPTERROR
        if sourcename is not None:
            if not isinstance(sourcename, (str, native_str)):
                msg = 'sourcename needs to be a string'
                raise ValueError(msg)
            # libmseed uses underscores as separators and allows filtering
            # after the dataquality which is disabled here to not confuse
            # users. (* == all data qualities)
            selections.srcname = (sourcename.replace('.', '_') + '_*').\
                encode('ascii', 'ignore')
        else:
            selections.srcname = b'*'
    all_data = []

    # Use a callback function to allocate the memory and keep track of the
    # data.
    def allocate_data(samplecount, sampletype):
        # Enhanced sanity checking for libmseed 2.10 can result in the
        # sampletype not being set. Just return an empty array in this case.
        if sampletype == b"\x00":
            data = np.empty(0)
        else:
            data = np.empty(samplecount, dtype=DATATYPES[sampletype])
        all_data.append(data)
        return data.ctypes.data
    # XXX: Do this properly!
    # Define Python callback function for use in C function. Return a long so
    # it hopefully works on 32 and 64 bit systems.
    allocData = C.CFUNCTYPE(C.c_long, C.c_int, C.c_char)(allocate_data)

    def log_error_or_warning(msg):
        if msg.startswith(b"ERROR: "):
            raise InternalMSEEDReadingError(msg[7:].strip())
        if msg.startswith(b"INFO: "):
            msg = msg[6:].strip()
            # Append the offset of the full SEED header if necessary. That way
            # the C code does not have to deal with it.
            if offset and "offset" in msg:
                msg = ("%s The file contains a %i byte dataless part at the "
                       "beginning. Make sure to add that to the reported "
                       "offset to get the actual location in the file." % (
                           msg, offset))
            warnings.warn(msg, InternalMSEEDReadingWarning)
    diag_print = C.CFUNCTYPE(C.c_void_p, C.c_char_p)(log_error_or_warning)

    def log_message(msg):
        print(msg[6:].strip())
    log_print = C.CFUNCTYPE(C.c_void_p, C.c_char_p)(log_message)

    try:
        verbose = int(verbose)
    except:
        verbose = 0

    lil = clibmseed.readMSEEDBuffer(
        bfrNp, buflen, selections, C.c_int8(unpack_data),
        reclen, C.c_int8(verbose), C.c_int8(details), header_byteorder,
        allocData, diag_print, log_print)

    # XXX: Check if the freeing works.
    del selections

    traces = []
    try:
        currentID = lil.contents
    # Return stream if not traces are found.
    except ValueError:
        clibmseed.lil_free(lil)
        del lil
        return Stream()

    while True:
        # Init header with the essential information.
        header = {'network': currentID.network.strip(),
                  'station': currentID.station.strip(),
                  'location': currentID.location.strip(),
                  'channel': currentID.channel.strip(),
                  'mseed': {'dataquality': currentID.dataquality}}
        # Loop over segments.
        try:
            currentSegment = currentID.firstSegment.contents
        except ValueError:
            break
        while True:
            header['sampling_rate'] = currentSegment.samprate
            header['starttime'] = \
                util._convertMSTimeToDatetime(currentSegment.starttime)
            if details:
                timing_quality = currentSegment.timing_quality
                if timing_quality == 0xFF:  # 0xFF is mask for not known timing
                    timing_quality = False
                header['mseed']['blkt1001'] = {}
                header['mseed']['blkt1001']['timing_quality'] = timing_quality
                header['mseed']['calibration_type'] = \
                    currentSegment.calibration_type \
                    if currentSegment.calibration_type != -1 else False

            if headonly is False:
                # The data always will be in sequential order.
                data = all_data.pop(0)
                header['npts'] = len(data)
            else:
                data = np.array([])
                header['npts'] = currentSegment.samplecnt
            # Make sure to init the number of samples.
            # Py3k: convert to unicode
            header['mseed'] = dict((k, v.decode())
                                   if isinstance(v, bytes) else (k, v)
                                   for k, v in header['mseed'].items())
            header = dict((k, v.decode()) if isinstance(v, bytes) else (k, v)
                          for k, v in header.items())
            trace = Trace(header=header, data=data)
            # Append information.
            for key, value in info.items():
                setattr(trace.stats.mseed, key, value)
            traces.append(trace)
            # A Null pointer access results in a ValueError
            try:
                currentSegment = currentSegment.next.contents
            except ValueError:
                break
        try:
            currentID = currentID.next.contents
        except ValueError:
            break

    clibmseed.lil_free(lil)  # NOQA
    del lil  # NOQA
    return Stream(traces=traces)
Exemple #7
0
def download_and_split_mseed_bulk(client, client_name, starttime, endtime,
                                  stations, logger):
    """
    Downloads the channels of a list of stations in bulk, saves it in the
    temp folder and splits it at the record level to obtain the final
    miniseed files.

    :param client:
    :param client_name:
    :param starttime:
    :param endtime:
    :param stations:
    :param temp_folder:
    :return:
    """
    bulk = []
    filenames = {}
    for station in stations:
        for channel in station.channels:
            net, sta, loc, chan = station.network, station.station, \
                channel.location, channel.channel
            filenames["%s.%s.%s.%s" % (net, sta, loc, chan)] = \
                channel.mseed_filename
            bulk.append((net, sta, loc, chan, starttime, endtime))

    temp_filename = NamedTemporaryFile().name

    try:
        client.get_waveforms_bulk(bulk, filename=temp_filename)

        open_files = {}
        # If that succeeds, split the old file into multiple new ones.
        file_size = os.path.getsize(temp_filename)
        with open(temp_filename, "rb") as fh:
            try:
                while True:
                    if fh.tell() >= (file_size - 256):
                        break
                    info = getRecordInformation(fh)

                    position = fh.tell()
                    fh.seek(position + 8, 0)
                    data = fh.read(12)
                    info["station"] = data[:5].strip().decode()
                    info["location"] = data[5:7].strip().decode()
                    info["channel"] = data[7:10].strip().decode()
                    info["network"] = data[10:12].strip().decode()
                    fh.seek(position, 0)

                    channel_id = "%s.%s.%s.%s" % (
                        info["network"], info["station"], info["location"],
                        info["channel"])
                    # Sometimes the services return something noone wants.
                    if channel_id not in filenames:
                        fh.read(info["record_length"])
                        continue
                    filename = filenames[channel_id]
                    if filename not in open_files:
                        open_files[filename] = open(filename, "wb")
                    open_files[filename].write(fh.read(info["record_length"]))
            finally:
                for f in open_files:
                    try:
                        f.close()
                    except:
                        pass
    finally:
        try:
            os.remove(temp_filename)
        except:
            pass
    logger.info("Client '%s' - Successfully downloaded %i channels (of %i)" % (
        client_name, len(open_files), len(bulk)))
    return open_files.keys()