Exemple #1
0
 def test_create_preview_with_unrounded_sample_rate(self):
     """
     Test for creating preview.
     """
     tr = Trace(data=np.arange(4000))
     tr.stats.sampling_rate = 124.999992371
     tr.stats.starttime = UTCDateTime("1989-10-06T14:31:14.000000Z")
     create_preview(tr, delta=30)
Exemple #2
0
 def test_createPreviewWithUnroundedSampleRate(self):
     """
     Test for creating preview.
     """
     tr = Trace(data=np.arange(4000))
     tr.stats.sampling_rate = 124.999992371
     tr.stats.starttime = UTCDateTime("1989-10-06T14:31:14.000000Z")
     create_preview(tr, delta=30)
Exemple #3
0
 def test_createPreviewWithVerySmallSampleRate(self):
     """
     Test for creating previews with samples per slice less than 1.
     """
     tr = Trace(data=np.arange(4000))
     # 1 - should raise
     tr.stats.sampling_rate = 0.1
     self.assertRaises(ValueError, create_preview, tr)
     # 2 - should work
     tr.stats.sampling_rate = 1
     create_preview(tr)
Exemple #4
0
 def test_create_preview_with_very_small_sample_rate(self):
     """
     Test for creating previews with samples per slice less than 1.
     """
     tr = Trace(data=np.arange(4000))
     # 1 - should raise
     tr.stats.sampling_rate = 0.1
     with pytest.raises(ValueError):
         create_preview(tr)
     # 2 - should work
     tr.stats.sampling_rate = 1
     create_preview(tr)
Exemple #5
0
 def test_create_preview_with_masked_arrays(self):
     """
     Test for creating preview using masked arrays.
     """
     # 1 - masked arrays without masked values
     trace = Trace(data=np.ma.ones(600))
     preview = create_preview(trace, delta=60)
     # only masked values get replaced with an -1
     np.testing.assert_array_equal(preview.data, np.array(10 * [0]))
     # 2 - masked arrays with masked values
     trace = Trace(data=np.ma.ones(600))
     trace.data.mask = [False] * 600
     trace.data.mask[200:400] = True
     preview = create_preview(trace, delta=60)
     # masked values get replaced with an -1
     np.testing.assert_array_equal(preview.data,
                                   np.array(4 * [0] + 2 * [-1] + 4 * [0]))
Exemple #6
0
 def test_createPreviewWithMaskedArrays(self):
     """
     Test for creating preview using masked arrays.
     """
     # 1 - masked arrays without masked values
     trace = Trace(data=np.ma.ones(600))
     preview = create_preview(trace, delta=60)
     # only masked values get replaced with an -1
     np.testing.assert_array_equal(preview.data, np.array(10 * [0]))
     # 2 - masked arrays with masked values
     trace = Trace(data=np.ma.ones(600))
     trace.data.mask = [False] * 600
     trace.data.mask[200:400] = True
     preview = create_preview(trace, delta=60)
     # masked values get replaced with an -1
     np.testing.assert_array_equal(preview.data,
                                   np.array(4 * [0] + 2 * [-1] + 4 * [0]))
Exemple #7
0
 def test_createPreview(self):
     """
     Test for creating preview.
     """
     # Wrong delta should raise.
     self.assertRaises(TypeError, create_preview,
                       Trace(data=np.arange(10)), 60.0)
     self.assertRaises(TypeError, create_preview,
                       Trace(data=np.arange(10)), 0)
     # 1
     trace = Trace(data=np.array([0] * 28 + [0, 1] * 30 + [-1, 1] * 29))
     trace.stats.starttime = UTCDateTime(32)
     preview = create_preview(trace, delta=60)
     self.assertEqual(preview.stats.starttime, UTCDateTime(60))
     self.assertEqual(preview.stats.endtime, UTCDateTime(120))
     self.assertEqual(preview.stats.delta, 60)
     np.testing.assert_array_equal(preview.data, np.array([1, 2]))
     # 2
     trace = Trace(data=np.arange(0, 30))
     preview = create_preview(trace, delta=60)
     self.assertEqual(preview.stats.starttime, UTCDateTime(0))
     self.assertEqual(preview.stats.endtime, UTCDateTime(0))
     self.assertEqual(preview.stats.delta, 60)
     np.testing.assert_array_equal(preview.data, np.array([29]))
     # 3
     trace = Trace(data=np.arange(0, 60))
     preview = create_preview(trace, delta=60)
     self.assertEqual(preview.stats.starttime, UTCDateTime(0))
     self.assertEqual(preview.stats.endtime, UTCDateTime(0))
     self.assertEqual(preview.stats.delta, 60)
     np.testing.assert_array_equal(preview.data, np.array([59]))
     # 4
     trace = Trace(data=np.arange(0, 90))
     preview = create_preview(trace, delta=60)
     self.assertEqual(preview.stats.starttime, UTCDateTime(0))
     self.assertEqual(preview.stats.endtime, UTCDateTime(60))
     self.assertEqual(preview.stats.delta, 60)
     np.testing.assert_array_equal(preview.data, np.array([59, 29]))
Exemple #8
0
def worker(_i, input_queue, work_queue, output_queue, log_queue, mappings={}):
    try:
        # fetch and initialize all possible waveform feature plug-ins
        all_features = {}
        for (key, ep) in _get_entry_points('obspy.db.feature').items():
            try:
                # load plug-in
                cls = ep.load()
                # initialize class
                func = cls().process
            except Exception as e:
                msg = 'Could not initialize feature %s. (%s)'
                log_queue.append(msg % (key, str(e)))
                continue
            all_features[key] = {}
            all_features[key]['run'] = func
            try:
                all_features[key]['indexer_kwargs'] = cls['indexer_kwargs']
            except Exception:
                all_features[key]['indexer_kwargs'] = {}
        # loop through input queue
        while True:
            # fetch a unprocessed item
            try:
                filepath, (path, file, features) = input_queue.popitem()
            except Exception:
                continue
            # skip item if already in work queue
            if filepath in work_queue:
                continue
            work_queue.append(filepath)
            # get additional kwargs for read method from waveform plug-ins
            kwargs = {'verify_chksum': False}
            for feature in features:
                if feature not in all_features:
                    log_queue.append('%s: Unknown feature %s' % (filepath,
                                                                 feature))
                    continue
                kwargs.update(all_features[feature]['indexer_kwargs'])
            # read file and get file stats
            try:
                stats = os.stat(filepath)
                stream = read(filepath, **kwargs)
                # get gap and overlap information
                gap_list = stream.get_gaps()
                # merge channels and replace gaps/overlaps with 0 to prevent
                # generation of masked arrays
                stream.merge(fill_value=0)
            except Exception as e:
                msg = '[Reading stream] %s: %s'
                log_queue.append(msg % (filepath, e))
                try:
                    work_queue.remove(filepath)
                except Exception:
                    pass
                continue
            # build up dictionary of gaps and overlaps for easier lookup
            gap_dict = {}
            for gap in gap_list:
                id = '.'.join(gap[0:4])
                temp = {
                    'gap': gap[6] >= 0,
                    'starttime': gap[4].datetime,
                    'endtime': gap[5].datetime,
                    'samples': abs(gap[7])
                }
                gap_dict.setdefault(id, []).append(temp)
            # loop through traces
            dataset = []
            for trace in stream:
                result = {}
                # general file information
                result['mtime'] = int(stats.st_mtime)
                result['size'] = stats.st_size
                result['path'] = path
                result['file'] = file
                result['filepath'] = filepath
                # trace information
                result['format'] = trace.stats._format
                result['station'] = trace.stats.station
                result['location'] = trace.stats.location
                result['channel'] = trace.stats.channel
                result['network'] = trace.stats.network
                result['starttime'] = trace.stats.starttime.datetime
                result['endtime'] = trace.stats.endtime.datetime
                result['calib'] = trace.stats.calib
                result['npts'] = trace.stats.npts
                result['sampling_rate'] = trace.stats.sampling_rate
                # check for any id mappings
                if trace.id in mappings:
                    old_id = trace.id
                    for mapping in mappings[old_id]:
                        if trace.stats.starttime and \
                           trace.stats.starttime > mapping['endtime']:
                            continue
                        if trace.stats.endtime and \
                           trace.stats.endtime < mapping['starttime']:
                            continue
                        result['network'] = mapping['network']
                        result['station'] = mapping['station']
                        result['location'] = mapping['location']
                        result['channel'] = mapping['channel']
                        msg = "Mapping '%s' to '%s.%s.%s.%s'" % \
                            (old_id, mapping['network'], mapping['station'],
                             mapping['location'], mapping['channel'])
                        log_queue.append(msg)
                # gaps/overlaps for current trace
                result['gaps'] = gap_dict.get(trace.id, [])
                # apply feature functions
                result['features'] = []
                for key in features:
                    if key not in all_features:
                        continue
                    try:
                        # run plug-in and update results
                        temp = all_features[key]['run'](trace)
                        for key, value in temp.items():
                            result['features'].append({'key': key,
                                                       'value': value})
                    except Exception as e:
                        msg = '[Processing feature] %s: %s'
                        log_queue.append(msg % (filepath, e))
                        continue
                # generate preview of trace
                result['preview'] = None
                if '.LOG.L.' not in file or trace.stats.channel != 'LOG':
                    # create previews only for non-log files (see issue #400)
                    try:
                        trace = create_preview(trace, 30)
                        result['preview'] = trace.data.dumps()
                    except ValueError:
                        pass
                    except Exception as e:
                        msg = '[Creating preview] %s: %s'
                        log_queue.append(msg % (filepath, e))
                # update dataset
                dataset.append(result)
            del stream
            # return results to main loop
            try:
                output_queue.append(dataset)
            except Exception:
                pass
            try:
                work_queue.remove(filepath)
            except Exception:
                pass
    except KeyboardInterrupt:
        return
Exemple #9
0
def process_file(filename):
    """
    Process a single waveform file.

    This is a bit more complex as it needs to update existing database
    objects and cannot just always create new ones. Otherwise the
    identifiers quickly reach very high numbers.
    """
    # Resolve symlinks and make a canonical simple path.
    filename = os.path.realpath(os.path.normpath(os.path.abspath(filename)))

    # ------------------------------------------------------------------------
    # Step 1: Get the file if it exists.
    try:
        file = models.File.objects.get(path__name=os.path.dirname(filename),
                                       name=os.path.basename(filename))

        # This path is only reached if the file exists. Check size, mtime,
        # and ctime and if it all remains the same, return.
        stats = os.stat(filename)
        mtime = to_datetime(stats.st_mtime)
        ctime = to_datetime(stats.st_ctime)
        size = int(stats.st_size)

        # Nothing to do if nothing changed.
        if file.size == size and file.mtime == mtime and file.ctime == ctime:
            return

    # If it does not exist, create it in the next step.
    except models.File.DoesNotExist:
        file = None

    # ------------------------------------------------------------------------
    # Step 2: Read the file and perform a couple of sanity checks. Delete an
    #         eventually existing file.
    try:
        stream = read(filename, verify_chksum=False)
    except:
        # Delete if invalid file.
        if file is not None:
            file.delete()
        # Reraise the exception.
        raise

    if len(stream) == 0:
        msg = "'%s' is a valid waveform file but contains no actual data"
        raise JaneWaveformTaskException(msg % filename)
        # Delete if invalid file.
        if file is not None:
            file.delete()

    # Log channels for example are special as they have no sampling rate.
    if any(tr.stats.sampling_rate == 0 for tr in stream):
        # Make sure there is only one set of network, station,
        # location, and channel.
        ids = set(tr.id for tr in stream)
        if len(ids) != 1:
            # Delete if invalid file.
            if file is not None:
                file.delete()
            raise ValueError("File has a trace with sampling rate zero "
                             "and more then one different id.")

    # ------------------------------------------------------------------------
    # Step 3: Parse the file. Figure out which traces changed.
    #         Make sure it either gets created for a file or not.
    with transaction.atomic():
        # Create the file object if it does not exist.
        if file is None:
            path_obj = models.Path.objects.get_or_create(
                name=os.path.dirname(os.path.abspath(filename)))[0]
            models.File.objects. \
                filter(path=path_obj, name=os.path.basename(filename)). \
                delete()
            file = models.File.objects. \
                create(path=path_obj, name=os.path.basename(filename))

        # set format
        file.format = stream[0].stats._format

        # Collect information about all traces in a dictionary.
        traces_in_file = {}

        # Log channels for example are special as they have no sampling rate.
        if any(tr.stats.sampling_rate == 0 for tr in stream):
            starttime = min(tr.stats.starttime for tr in stream)
            endtime = max(tr.stats.endtime for tr in stream)
            if starttime == endtime:
                starttime += 0.001

            file.gaps = 0
            file.overlaps = 0
            file.save()

            try:
                quality = stream[0].stats.mseed.dataquality
            except AttributeError:
                quality = None

            traces_in_file[0] = {
                "starttime": starttime,
                "endtime": endtime,
                "network": stream[0].stats.network.upper(),
                "station": stream[0].stats.station.upper(),
                "location": stream[0].stats.location.upper(),
                "channel": stream[0].stats.channel.upper(),
                "sampling_rate": stream[0].stats.sampling_rate,
                "npts": sum(tr.stats.npts for tr in stream),
                "duration": endtime - starttime,
                "quality": quality,
                "preview_trace": None,
                "pos": 0
            }
        else:
            # get number of gaps and overlaps per file
            gap_list = stream.get_gaps()
            file.gaps = len([g for g in gap_list if g[6] >= 0])
            file.overlaps = len([g for g in gap_list if g[6] < 0])
            file.save()
            for pos, trace in enumerate(stream):
                try:
                    quality = trace.stats.mseed.dataquality
                except AttributeError:
                    quality = None

                # Preview is optional. For some traces, e.g. LOG channels it
                # does not work.
                try:
                    preview_trace = create_preview(trace, 60)
                except:
                    preview_trace = None
                else:
                    preview_trace = list(map(float, preview_trace.data))

                traces_in_file[pos] = {
                    "starttime": trace.stats.starttime,
                    "endtime": trace.stats.endtime,
                    "network": trace.stats.network.upper(),
                    "station": trace.stats.station.upper(),
                    "location": trace.stats.location.upper(),
                    "channel": trace.stats.channel.upper(),
                    "sampling_rate": trace.stats.sampling_rate,
                    "npts": trace.stats.npts,
                    "duration": trace.stats.endtime - trace.stats.starttime,
                    "quality": quality,
                    "preview_trace": preview_trace,
                    "pos": pos
                }

        # Get all existing traces.
        for tr_db in models.ContinuousTrace.objects.filter(file=file):
            # Attempt to get the existing trace object.
            if tr_db.pos in traces_in_file:
                tr = traces_in_file[tr_db.pos]
                # Delete in the dictionary.
                del traces_in_file[tr_db.pos]

                tr_db.timerange = DateTimeTZRange(
                    lower=tr["starttime"].datetime,
                    upper=tr["endtime"].datetime)
                tr_db.network = tr["network"]
                tr_db.station = tr["station"]
                tr_db.location = tr["location"]
                tr_db.channel = tr["channel"]
                tr_db.sampling_rate = tr["sampling_rate"]
                tr_db.npts = tr["npts"]
                tr_db.duration = tr["duration"]
                tr_db.quality = tr["quality"]
                tr_db.preview_trace = tr["preview_trace"]
                tr_db.pos = tr["pos"]
                tr_db.save()

            # If it does not exist in the waveform file, delete it here as
            # it is (for whatever reason) no longer in the file..
            else:
                tr_db.delete()

        # Add remaining items.
        for tr in traces_in_file.values():
            tr_db = models.ContinuousTrace(file=file,
                                           timerange=DateTimeTZRange(
                                               lower=tr["starttime"].datetime,
                                               upper=tr["endtime"].datetime))
            tr_db.network = tr["network"]
            tr_db.station = tr["station"]
            tr_db.location = tr["location"]
            tr_db.channel = tr["channel"]
            tr_db.sampling_rate = tr["sampling_rate"]
            tr_db.npts = tr["npts"]
            tr_db.duration = tr["duration"]
            tr_db.quality = tr["quality"]
            tr_db.preview_trace = tr["preview_trace"]
            tr_db.pos = tr["pos"]
            tr_db.save()
Exemple #10
0
def worker(_i, input_queue, work_queue, output_queue, log_queue, mappings={}):
    try:
        # fetch and initialize all possible waveform feature plug-ins
        all_features = {}
        for (key, ep) in _get_entry_points('obspy.db.feature').items():
            try:
                # load plug-in
                cls = ep.load()
                # initialize class
                func = cls().process
            except Exception as e:
                msg = 'Could not initialize feature %s. (%s)'
                log_queue.append(msg % (key, str(e)))
                continue
            all_features[key] = {}
            all_features[key]['run'] = func
            try:
                all_features[key]['indexer_kwargs'] = cls['indexer_kwargs']
            except Exception:
                all_features[key]['indexer_kwargs'] = {}
        # loop through input queue
        while True:
            # fetch a unprocessed item
            try:
                filepath, (path, file, features) = input_queue.popitem()
            except Exception:
                continue
            # skip item if already in work queue
            if filepath in work_queue:
                continue
            work_queue.append(filepath)
            # get additional kwargs for read method from waveform plug-ins
            kwargs = {'verify_chksum': False}
            for feature in features:
                if feature not in all_features:
                    log_queue.append('%s: Unknown feature %s' %
                                     (filepath, feature))
                    continue
                kwargs.update(all_features[feature]['indexer_kwargs'])
            # read file and get file stats
            try:
                stats = os.stat(filepath)
                stream = read(filepath, **kwargs)
                # get gap and overlap information
                gap_list = stream.get_gaps()
                # merge channels and replace gaps/overlaps with 0 to prevent
                # generation of masked arrays
                stream.merge(fill_value=0)
            except Exception as e:
                msg = '[Reading stream] %s: %s'
                log_queue.append(msg % (filepath, e))
                try:
                    work_queue.remove(filepath)
                except Exception:
                    pass
                continue
            # build up dictionary of gaps and overlaps for easier lookup
            gap_dict = {}
            for gap in gap_list:
                id = '.'.join(gap[0:4])
                temp = {
                    'gap': gap[6] >= 0,
                    'starttime': gap[4].datetime,
                    'endtime': gap[5].datetime,
                    'samples': abs(gap[7])
                }
                gap_dict.setdefault(id, []).append(temp)
            # loop through traces
            dataset = []
            for trace in stream:
                result = {}
                # general file information
                result['mtime'] = int(stats.st_mtime)
                result['size'] = stats.st_size
                result['path'] = path
                result['file'] = file
                result['filepath'] = filepath
                # trace information
                result['format'] = trace.stats._format
                result['station'] = trace.stats.station
                result['location'] = trace.stats.location
                result['channel'] = trace.stats.channel
                result['network'] = trace.stats.network
                result['starttime'] = trace.stats.starttime.datetime
                result['endtime'] = trace.stats.endtime.datetime
                result['calib'] = trace.stats.calib
                result['npts'] = trace.stats.npts
                result['sampling_rate'] = trace.stats.sampling_rate
                # check for any id mappings
                if trace.id in mappings:
                    old_id = trace.id
                    for mapping in mappings[old_id]:
                        if trace.stats.starttime and \
                           trace.stats.starttime > mapping['endtime']:
                            continue
                        if trace.stats.endtime and \
                           trace.stats.endtime < mapping['starttime']:
                            continue
                        result['network'] = mapping['network']
                        result['station'] = mapping['station']
                        result['location'] = mapping['location']
                        result['channel'] = mapping['channel']
                        msg = "Mapping '%s' to '%s.%s.%s.%s'" % \
                            (old_id, mapping['network'], mapping['station'],
                             mapping['location'], mapping['channel'])
                        log_queue.append(msg)
                # gaps/overlaps for current trace
                result['gaps'] = gap_dict.get(trace.id, [])
                # apply feature functions
                result['features'] = []
                for key in features:
                    if key not in all_features:
                        continue
                    try:
                        # run plug-in and update results
                        temp = all_features[key]['run'](trace)
                        for key, value in temp.items():
                            result['features'].append({
                                'key': key,
                                'value': value
                            })
                    except Exception as e:
                        msg = '[Processing feature] %s: %s'
                        log_queue.append(msg % (filepath, e))
                        continue
                # generate preview of trace
                result['preview'] = None
                if '.LOG.L.' not in file or trace.stats.channel != 'LOG':
                    # create previews only for non-log files (see issue #400)
                    try:
                        trace = create_preview(trace, 30)
                        result['preview'] = trace.data.dumps()
                    except ValueError:
                        pass
                    except Exception as e:
                        msg = '[Creating preview] %s: %s'
                        log_queue.append(msg % (filepath, e))
                # update dataset
                dataset.append(result)
            del stream
            # return results to main loop
            try:
                output_queue.append(dataset)
            except Exception:
                pass
            try:
                work_queue.remove(filepath)
            except Exception:
                pass
    except KeyboardInterrupt:
        return
Exemple #11
0
 def setUpClass(cls):
     # Create a in memory database only once for test suite
     url = 'sqlite:///:memory:'
     cls.client = Client(url)
     # add paths
     session = cls.client.session()
     path1 = WaveformPath({'path': '/path/to/1'})
     path2 = WaveformPath({'path': '/path/to/2'})
     session.add_all([path1, path2])
     # add files
     file1 = WaveformFile({
         'file': 'file_001.mseed',
         'size': 2000,
         'mtime': UTCDateTime('20120101').timestamp,
         'format': 'MSEED'
     })
     file2 = WaveformFile({
         'file': 'file_002.mseed',
         'size': 2000,
         'mtime': UTCDateTime('20120102').timestamp,
         'format': 'MSEED'
     })
     file3 = WaveformFile({
         'file': 'file_001.gse2',
         'size': 2000,
         'mtime': UTCDateTime('20120102').timestamp,
         'format': 'GSE2'
     })
     path1.files.append(file1)
     path1.files.append(file2)
     path2.files.append(file3)
     session.add_all([file1, file2, file3])
     # add channels
     channel1 = WaveformChannel({
         'network':
         'BW',
         'station':
         'MANZ',
         'location':
         '',
         'channel':
         'EHZ',
         'starttime':
         UTCDateTime('2012-01-01 00:00:00.000000').datetime,
         'endtime':
         UTCDateTime('2012-01-01 23:59:59.999999').datetime,
         'npts':
         3000,
         'sampling_rate':
         100.0
     })
     channel2 = WaveformChannel({
         'network':
         'BW',
         'station':
         'MANZ',
         'location':
         '',
         'channel':
         'EHZ',
         'starttime':
         UTCDateTime('2012-01-02 01:00:00.000000').datetime,
         'endtime':
         UTCDateTime('2012-01-02 23:59:59.999999').datetime,
         'npts':
         3000,
         'sampling_rate':
         100.0
     })
     # create a channel with preview
     header = {
         'network': 'GE',
         'station': 'FUR',
         'location': '00',
         'channel': 'BHZ',
         'starttime': UTCDateTime('2012-01-01 00:00:00.000000'),
         'sampling_rate': 100.0
     }
     # linear trend
     data = np.linspace(0, 1, 3000000)
     # some peaks
     data[20000] = 15
     data[20001] = -15
     data[1000000] = 22
     data[1000001] = -22
     data[2000000] = 14
     data[2000001] = -14
     tr = Trace(data=data, header=header)
     cls.preview = create_preview(tr, 30).data
     header = dict(tr.stats)
     header['starttime'] = tr.stats.starttime.datetime
     header['endtime'] = tr.stats.endtime.datetime
     channel3 = WaveformChannel(header)
     channel3.preview = cls.preview.dumps()
     file1.channels.append(channel1)
     file2.channels.append(channel2)
     file3.channels.append(channel3)
     session.add_all([channel1, channel2, channel3])
     session.commit()
     session.close()
Exemple #12
0
 def setUpClass(cls):
     # Create a in memory database only once for test suite
     url = 'sqlite:///:memory:'
     cls.client = Client(url)
     # add paths
     session = cls.client.session()
     path1 = WaveformPath({'path': '/path/to/1'})
     path2 = WaveformPath({'path': '/path/to/2'})
     session.add_all([path1, path2])
     # add files
     file1 = WaveformFile(
         {'file': 'file_001.mseed', 'size': 2000,
             'mtime': UTCDateTime('20120101').timestamp, 'format': 'MSEED'})
     file2 = WaveformFile(
         {'file': 'file_002.mseed', 'size': 2000,
             'mtime': UTCDateTime('20120102').timestamp, 'format': 'MSEED'})
     file3 = WaveformFile(
         {'file': 'file_001.gse2', 'size': 2000,
             'mtime': UTCDateTime('20120102').timestamp, 'format': 'GSE2'})
     path1.files.append(file1)
     path1.files.append(file2)
     path2.files.append(file3)
     session.add_all([file1, file2, file3])
     # add channels
     channel1 = WaveformChannel(
         {'network': 'BW', 'station': 'MANZ',
             'location': '', 'channel': 'EHZ',
             'starttime':
             UTCDateTime('2012-01-01 00:00:00.000000').datetime,
             'endtime': UTCDateTime('2012-01-01 23:59:59.999999').datetime,
             'npts': 3000, 'sampling_rate': 100.0})
     channel2 = WaveformChannel(
         {'network': 'BW', 'station': 'MANZ',
             'location': '', 'channel': 'EHZ',
             'starttime':
             UTCDateTime('2012-01-02 01:00:00.000000').datetime,
             'endtime':
             UTCDateTime('2012-01-02 23:59:59.999999').datetime,
             'npts': 3000,
             'sampling_rate': 100.0})
     # create a channel with preview
     header = {'network': 'GE', 'station': 'FUR',
               'location': '00', 'channel': 'BHZ',
               'starttime': UTCDateTime('2012-01-01 00:00:00.000000'),
               'sampling_rate': 100.0}
     # linear trend
     data = np.linspace(0, 1, 3000000)
     # some peaks
     data[20000] = 15
     data[20001] = -15
     data[1000000] = 22
     data[1000001] = -22
     data[2000000] = 14
     data[2000001] = -14
     tr = Trace(data=data, header=header)
     cls.preview = create_preview(tr, 30).data
     header = dict(tr.stats)
     header['starttime'] = tr.stats.starttime.datetime
     header['endtime'] = tr.stats.endtime.datetime
     channel3 = WaveformChannel(header)
     channel3.preview = cls.preview.dumps()
     file1.channels.append(channel1)
     file2.channels.append(channel2)
     file3.channels.append(channel3)
     session.add_all([channel1, channel2, channel3])
     session.commit()
     session.close()
Exemple #13
0
 def setUpClass(cls):
     # Create a in memory database only once for test suite
     url = "sqlite:///:memory:"
     cls.client = Client(url)
     # add paths
     session = cls.client.session()
     path1 = WaveformPath({"path": "/path/to/1"})
     path2 = WaveformPath({"path": "/path/to/2"})
     session.add_all([path1, path2])
     # add files
     file1 = WaveformFile(
         {"file": "file_001.mseed", "size": 2000, "mtime": UTCDateTime("20120101").timestamp, "format": "MSEED"}
     )
     file2 = WaveformFile(
         {"file": "file_002.mseed", "size": 2000, "mtime": UTCDateTime("20120102").timestamp, "format": "MSEED"}
     )
     file3 = WaveformFile(
         {"file": "file_001.gse2", "size": 2000, "mtime": UTCDateTime("20120102").timestamp, "format": "GSE2"}
     )
     path1.files.append(file1)
     path1.files.append(file2)
     path2.files.append(file3)
     session.add_all([file1, file2, file3])
     # add channels
     channel1 = WaveformChannel(
         {
             "network": "BW",
             "station": "MANZ",
             "location": "",
             "channel": "EHZ",
             "starttime": UTCDateTime("2012-01-01 00:00:00.000000").datetime,
             "endtime": UTCDateTime("2012-01-01 23:59:59.999999").datetime,
             "npts": 3000,
             "sampling_rate": 100.0,
         }
     )
     channel2 = WaveformChannel(
         {
             "network": "BW",
             "station": "MANZ",
             "location": "",
             "channel": "EHZ",
             "starttime": UTCDateTime("2012-01-02 01:00:00.000000").datetime,
             "endtime": UTCDateTime("2012-01-02 23:59:59.999999").datetime,
             "npts": 3000,
             "sampling_rate": 100.0,
         }
     )
     # create a channel with preview
     header = {
         "network": "GE",
         "station": "FUR",
         "location": "00",
         "channel": "BHZ",
         "starttime": UTCDateTime("2012-01-01 00:00:00.000000"),
         "sampling_rate": 100.0,
     }
     # linear trend
     data = np.linspace(0, 1, 3000000)
     # some peaks
     data[20000] = 15
     data[20001] = -15
     data[1000000] = 22
     data[1000001] = -22
     data[2000000] = 14
     data[2000001] = -14
     tr = Trace(data=data, header=header)
     cls.preview = create_preview(tr, 30).data
     header = dict(tr.stats)
     header["starttime"] = tr.stats.starttime.datetime
     header["endtime"] = tr.stats.endtime.datetime
     channel3 = WaveformChannel(header)
     channel3.preview = cls.preview.dumps()
     file1.channels.append(channel1)
     file2.channels.append(channel2)
     file3.channels.append(channel3)
     session.add_all([channel1, channel2, channel3])
     session.commit()
     session.close()
Exemple #14
0
def process_file(filename):
    """
    Process a single waveform file.

    This is a bit more complex as it needs to update existing database
    objects and cannot just always create new ones. Otherwise the
    identifiers quickly reach very high numbers.
    """
    # Resolve symlinks and make a canonical simple path.
    filename = os.path.realpath(os.path.normpath(os.path.abspath(filename)))

    # ------------------------------------------------------------------------
    # Step 1: Get the file if it exists.
    try:
        file = models.File.objects.get(
            path__name=os.path.dirname(filename),
            name=os.path.basename(filename))

        # This path is only reached if the file exists. Check size, mtime,
        # and ctime and if it all remains the same, return.
        stats = os.stat(filename)
        mtime = to_datetime(stats.st_mtime)
        ctime = to_datetime(stats.st_ctime)
        size = int(stats.st_size)

        # Nothing to do if nothing changed.
        if file.size == size and file.mtime == mtime and file.ctime == ctime:
            return

    # If it does not exist, create it in the next step.
    except models.File.DoesNotExist:
        file = None

    # ------------------------------------------------------------------------
    # Step 2: Read the file and perform a couple of sanity checks. Delete an
    #         eventually existing file.
    try:
        stream = read(filename)
    except:
        # Delete if invalid file.
        if file is not None:
            file.delete()
        # Reraise the exception.
        raise

    if len(stream) == 0:
        msg = "'%s' is a valid waveform file but contains no actual data"
        raise JaneWaveformTaskException(msg % filename)
        # Delete if invalid file.
        if file is not None:
            file.delete()

    # Log channels for example are special as they have no sampling rate.
    if any(tr.stats.sampling_rate == 0 for tr in stream):
        # Make sure there is only one set of network, station,
        # location, and channel.
        ids = set(tr.id for tr in stream)
        if len(ids) != 1:
            # Delete if invalid file.
            if file is not None:
                file.delete()
            raise ValueError("File has a trace with sampling rate zero "
                             "and more then one different id.")

    # ------------------------------------------------------------------------
    # Step 3: Parse the file. Figure out which traces changed.
    #         Make sure it either gets created for a file or not.
    with transaction.atomic():
        # Create the file object if it does not exist.
        if file is None:
            path_obj = models.Path.objects.get_or_create(
                name=os.path.dirname(os.path.abspath(filename)))[0]
            models.File.objects. \
                filter(path=path_obj, name=os.path.basename(filename)). \
                delete()
            file = models.File.objects. \
                create(path=path_obj, name=os.path.basename(filename))

        # set format
        file.format = stream[0].stats._format

        # Collect information about all traces in a dictionary.
        traces_in_file = {}

        # Log channels for example are special as they have no sampling rate.
        if any(tr.stats.sampling_rate == 0 for tr in stream):
            starttime = min(tr.stats.starttime for tr in stream)
            endtime = max(tr.stats.endtime for tr in stream)
            if starttime == endtime:
                starttime += 0.001

            file.gaps = 0
            file.overlaps = 0
            file.save()

            try:
                quality = stream[0].stats.mseed.dataquality
            except AttributeError:
                quality = None

            traces_in_file[0] = {
                "starttime": starttime,
                "endtime": endtime,
                "network": stream[0].stats.network.upper(),
                "station": stream[0].stats.station.upper(),
                "location": stream[0].stats.location.upper(),
                "channel": stream[0].stats.channel.upper(),
                "sampling_rate": stream[0].stats.sampling_rate,
                "npts": sum(tr.stats.npts for tr in stream),
                "duration": endtime - starttime,
                "quality": quality,
                "preview_trace": None,
                "pos": 0}
        else:
            # get number of gaps and overlaps per file
            gap_list = stream.get_gaps()
            file.gaps = len([g for g in gap_list if g[6] >= 0])
            file.overlaps = len([g for g in gap_list if g[6] < 0])
            file.save()
            for pos, trace in enumerate(stream):
                try:
                    quality = trace.stats.mseed.dataquality
                except AttributeError:
                    quality = None

                # Preview is optional. For some traces, e.g. LOG channels it
                # does not work.
                try:
                    preview_trace = create_preview(trace, 60)
                except:
                    preview_trace = None
                else:
                    preview_trace = list(map(float, preview_trace.data))

                traces_in_file[pos] = {
                    "starttime": trace.stats.starttime,
                    "endtime": trace.stats.endtime,
                    "network": trace.stats.network.upper(),
                    "station": trace.stats.station.upper(),
                    "location": trace.stats.location.upper(),
                    "channel": trace.stats.channel.upper(),
                    "sampling_rate": trace.stats.sampling_rate,
                    "npts": trace.stats.npts,
                    "duration": trace.stats.endtime - trace.stats.starttime,
                    "quality": quality,
                    "preview_trace": preview_trace,
                    "pos": pos}

        # Get all existing traces.
        for tr_db in models.ContinuousTrace.objects.filter(file=file):
            # Attempt to get the existing trace object.
            if tr_db.pos in traces_in_file:
                tr = traces_in_file[tr_db.pos]
                # Delete in the dictionary.
                del traces_in_file[tr_db.pos]

                tr_db.timerange = DateTimeTZRange(
                    lower=tr["starttime"].datetime,
                    upper=tr["endtime"].datetime)
                tr_db.network = tr["network"]
                tr_db.station = tr["station"]
                tr_db.location = tr["location"]
                tr_db.channel = tr["channel"]
                tr_db.sampling_rate = tr["sampling_rate"]
                tr_db.npts = tr["npts"]
                tr_db.duration = tr["duration"]
                tr_db.quality = tr["quality"]
                tr_db.preview_trace = tr["preview_trace"]
                tr_db.pos = tr["pos"]
                tr_db.save()

            # If it does not exist in the waveform file, delete it here as
            # it is (for whatever reason) no longer in the file..
            else:
                tr_db.delete()

        # Add remaining items.
        for tr in traces_in_file.values():
            tr_db = models.ContinuousTrace(
                file=file,
                timerange=DateTimeTZRange(
                    lower=tr["starttime"].datetime,
                    upper=tr["endtime"].datetime))
            tr_db.network = tr["network"]
            tr_db.station = tr["station"]
            tr_db.location = tr["location"]
            tr_db.channel = tr["channel"]
            tr_db.sampling_rate = tr["sampling_rate"]
            tr_db.npts = tr["npts"]
            tr_db.duration = tr["duration"]
            tr_db.quality = tr["quality"]
            tr_db.preview_trace = tr["preview_trace"]
            tr_db.pos = tr["pos"]
            tr_db.save()