def test_create_preview_with_unrounded_sample_rate(self): """ Test for creating preview. """ tr = Trace(data=np.arange(4000)) tr.stats.sampling_rate = 124.999992371 tr.stats.starttime = UTCDateTime("1989-10-06T14:31:14.000000Z") create_preview(tr, delta=30)
def test_createPreviewWithUnroundedSampleRate(self): """ Test for creating preview. """ tr = Trace(data=np.arange(4000)) tr.stats.sampling_rate = 124.999992371 tr.stats.starttime = UTCDateTime("1989-10-06T14:31:14.000000Z") create_preview(tr, delta=30)
def test_createPreviewWithVerySmallSampleRate(self): """ Test for creating previews with samples per slice less than 1. """ tr = Trace(data=np.arange(4000)) # 1 - should raise tr.stats.sampling_rate = 0.1 self.assertRaises(ValueError, create_preview, tr) # 2 - should work tr.stats.sampling_rate = 1 create_preview(tr)
def test_create_preview_with_very_small_sample_rate(self): """ Test for creating previews with samples per slice less than 1. """ tr = Trace(data=np.arange(4000)) # 1 - should raise tr.stats.sampling_rate = 0.1 with pytest.raises(ValueError): create_preview(tr) # 2 - should work tr.stats.sampling_rate = 1 create_preview(tr)
def test_create_preview_with_masked_arrays(self): """ Test for creating preview using masked arrays. """ # 1 - masked arrays without masked values trace = Trace(data=np.ma.ones(600)) preview = create_preview(trace, delta=60) # only masked values get replaced with an -1 np.testing.assert_array_equal(preview.data, np.array(10 * [0])) # 2 - masked arrays with masked values trace = Trace(data=np.ma.ones(600)) trace.data.mask = [False] * 600 trace.data.mask[200:400] = True preview = create_preview(trace, delta=60) # masked values get replaced with an -1 np.testing.assert_array_equal(preview.data, np.array(4 * [0] + 2 * [-1] + 4 * [0]))
def test_createPreviewWithMaskedArrays(self): """ Test for creating preview using masked arrays. """ # 1 - masked arrays without masked values trace = Trace(data=np.ma.ones(600)) preview = create_preview(trace, delta=60) # only masked values get replaced with an -1 np.testing.assert_array_equal(preview.data, np.array(10 * [0])) # 2 - masked arrays with masked values trace = Trace(data=np.ma.ones(600)) trace.data.mask = [False] * 600 trace.data.mask[200:400] = True preview = create_preview(trace, delta=60) # masked values get replaced with an -1 np.testing.assert_array_equal(preview.data, np.array(4 * [0] + 2 * [-1] + 4 * [0]))
def test_createPreview(self): """ Test for creating preview. """ # Wrong delta should raise. self.assertRaises(TypeError, create_preview, Trace(data=np.arange(10)), 60.0) self.assertRaises(TypeError, create_preview, Trace(data=np.arange(10)), 0) # 1 trace = Trace(data=np.array([0] * 28 + [0, 1] * 30 + [-1, 1] * 29)) trace.stats.starttime = UTCDateTime(32) preview = create_preview(trace, delta=60) self.assertEqual(preview.stats.starttime, UTCDateTime(60)) self.assertEqual(preview.stats.endtime, UTCDateTime(120)) self.assertEqual(preview.stats.delta, 60) np.testing.assert_array_equal(preview.data, np.array([1, 2])) # 2 trace = Trace(data=np.arange(0, 30)) preview = create_preview(trace, delta=60) self.assertEqual(preview.stats.starttime, UTCDateTime(0)) self.assertEqual(preview.stats.endtime, UTCDateTime(0)) self.assertEqual(preview.stats.delta, 60) np.testing.assert_array_equal(preview.data, np.array([29])) # 3 trace = Trace(data=np.arange(0, 60)) preview = create_preview(trace, delta=60) self.assertEqual(preview.stats.starttime, UTCDateTime(0)) self.assertEqual(preview.stats.endtime, UTCDateTime(0)) self.assertEqual(preview.stats.delta, 60) np.testing.assert_array_equal(preview.data, np.array([59])) # 4 trace = Trace(data=np.arange(0, 90)) preview = create_preview(trace, delta=60) self.assertEqual(preview.stats.starttime, UTCDateTime(0)) self.assertEqual(preview.stats.endtime, UTCDateTime(60)) self.assertEqual(preview.stats.delta, 60) np.testing.assert_array_equal(preview.data, np.array([59, 29]))
def worker(_i, input_queue, work_queue, output_queue, log_queue, mappings={}): try: # fetch and initialize all possible waveform feature plug-ins all_features = {} for (key, ep) in _get_entry_points('obspy.db.feature').items(): try: # load plug-in cls = ep.load() # initialize class func = cls().process except Exception as e: msg = 'Could not initialize feature %s. (%s)' log_queue.append(msg % (key, str(e))) continue all_features[key] = {} all_features[key]['run'] = func try: all_features[key]['indexer_kwargs'] = cls['indexer_kwargs'] except Exception: all_features[key]['indexer_kwargs'] = {} # loop through input queue while True: # fetch a unprocessed item try: filepath, (path, file, features) = input_queue.popitem() except Exception: continue # skip item if already in work queue if filepath in work_queue: continue work_queue.append(filepath) # get additional kwargs for read method from waveform plug-ins kwargs = {'verify_chksum': False} for feature in features: if feature not in all_features: log_queue.append('%s: Unknown feature %s' % (filepath, feature)) continue kwargs.update(all_features[feature]['indexer_kwargs']) # read file and get file stats try: stats = os.stat(filepath) stream = read(filepath, **kwargs) # get gap and overlap information gap_list = stream.get_gaps() # merge channels and replace gaps/overlaps with 0 to prevent # generation of masked arrays stream.merge(fill_value=0) except Exception as e: msg = '[Reading stream] %s: %s' log_queue.append(msg % (filepath, e)) try: work_queue.remove(filepath) except Exception: pass continue # build up dictionary of gaps and overlaps for easier lookup gap_dict = {} for gap in gap_list: id = '.'.join(gap[0:4]) temp = { 'gap': gap[6] >= 0, 'starttime': gap[4].datetime, 'endtime': gap[5].datetime, 'samples': abs(gap[7]) } gap_dict.setdefault(id, []).append(temp) # loop through traces dataset = [] for trace in stream: result = {} # general file information result['mtime'] = int(stats.st_mtime) result['size'] = stats.st_size result['path'] = path result['file'] = file result['filepath'] = filepath # trace information result['format'] = trace.stats._format result['station'] = trace.stats.station result['location'] = trace.stats.location result['channel'] = trace.stats.channel result['network'] = trace.stats.network result['starttime'] = trace.stats.starttime.datetime result['endtime'] = trace.stats.endtime.datetime result['calib'] = trace.stats.calib result['npts'] = trace.stats.npts result['sampling_rate'] = trace.stats.sampling_rate # check for any id mappings if trace.id in mappings: old_id = trace.id for mapping in mappings[old_id]: if trace.stats.starttime and \ trace.stats.starttime > mapping['endtime']: continue if trace.stats.endtime and \ trace.stats.endtime < mapping['starttime']: continue result['network'] = mapping['network'] result['station'] = mapping['station'] result['location'] = mapping['location'] result['channel'] = mapping['channel'] msg = "Mapping '%s' to '%s.%s.%s.%s'" % \ (old_id, mapping['network'], mapping['station'], mapping['location'], mapping['channel']) log_queue.append(msg) # gaps/overlaps for current trace result['gaps'] = gap_dict.get(trace.id, []) # apply feature functions result['features'] = [] for key in features: if key not in all_features: continue try: # run plug-in and update results temp = all_features[key]['run'](trace) for key, value in temp.items(): result['features'].append({'key': key, 'value': value}) except Exception as e: msg = '[Processing feature] %s: %s' log_queue.append(msg % (filepath, e)) continue # generate preview of trace result['preview'] = None if '.LOG.L.' not in file or trace.stats.channel != 'LOG': # create previews only for non-log files (see issue #400) try: trace = create_preview(trace, 30) result['preview'] = trace.data.dumps() except ValueError: pass except Exception as e: msg = '[Creating preview] %s: %s' log_queue.append(msg % (filepath, e)) # update dataset dataset.append(result) del stream # return results to main loop try: output_queue.append(dataset) except Exception: pass try: work_queue.remove(filepath) except Exception: pass except KeyboardInterrupt: return
def process_file(filename): """ Process a single waveform file. This is a bit more complex as it needs to update existing database objects and cannot just always create new ones. Otherwise the identifiers quickly reach very high numbers. """ # Resolve symlinks and make a canonical simple path. filename = os.path.realpath(os.path.normpath(os.path.abspath(filename))) # ------------------------------------------------------------------------ # Step 1: Get the file if it exists. try: file = models.File.objects.get(path__name=os.path.dirname(filename), name=os.path.basename(filename)) # This path is only reached if the file exists. Check size, mtime, # and ctime and if it all remains the same, return. stats = os.stat(filename) mtime = to_datetime(stats.st_mtime) ctime = to_datetime(stats.st_ctime) size = int(stats.st_size) # Nothing to do if nothing changed. if file.size == size and file.mtime == mtime and file.ctime == ctime: return # If it does not exist, create it in the next step. except models.File.DoesNotExist: file = None # ------------------------------------------------------------------------ # Step 2: Read the file and perform a couple of sanity checks. Delete an # eventually existing file. try: stream = read(filename, verify_chksum=False) except: # Delete if invalid file. if file is not None: file.delete() # Reraise the exception. raise if len(stream) == 0: msg = "'%s' is a valid waveform file but contains no actual data" raise JaneWaveformTaskException(msg % filename) # Delete if invalid file. if file is not None: file.delete() # Log channels for example are special as they have no sampling rate. if any(tr.stats.sampling_rate == 0 for tr in stream): # Make sure there is only one set of network, station, # location, and channel. ids = set(tr.id for tr in stream) if len(ids) != 1: # Delete if invalid file. if file is not None: file.delete() raise ValueError("File has a trace with sampling rate zero " "and more then one different id.") # ------------------------------------------------------------------------ # Step 3: Parse the file. Figure out which traces changed. # Make sure it either gets created for a file or not. with transaction.atomic(): # Create the file object if it does not exist. if file is None: path_obj = models.Path.objects.get_or_create( name=os.path.dirname(os.path.abspath(filename)))[0] models.File.objects. \ filter(path=path_obj, name=os.path.basename(filename)). \ delete() file = models.File.objects. \ create(path=path_obj, name=os.path.basename(filename)) # set format file.format = stream[0].stats._format # Collect information about all traces in a dictionary. traces_in_file = {} # Log channels for example are special as they have no sampling rate. if any(tr.stats.sampling_rate == 0 for tr in stream): starttime = min(tr.stats.starttime for tr in stream) endtime = max(tr.stats.endtime for tr in stream) if starttime == endtime: starttime += 0.001 file.gaps = 0 file.overlaps = 0 file.save() try: quality = stream[0].stats.mseed.dataquality except AttributeError: quality = None traces_in_file[0] = { "starttime": starttime, "endtime": endtime, "network": stream[0].stats.network.upper(), "station": stream[0].stats.station.upper(), "location": stream[0].stats.location.upper(), "channel": stream[0].stats.channel.upper(), "sampling_rate": stream[0].stats.sampling_rate, "npts": sum(tr.stats.npts for tr in stream), "duration": endtime - starttime, "quality": quality, "preview_trace": None, "pos": 0 } else: # get number of gaps and overlaps per file gap_list = stream.get_gaps() file.gaps = len([g for g in gap_list if g[6] >= 0]) file.overlaps = len([g for g in gap_list if g[6] < 0]) file.save() for pos, trace in enumerate(stream): try: quality = trace.stats.mseed.dataquality except AttributeError: quality = None # Preview is optional. For some traces, e.g. LOG channels it # does not work. try: preview_trace = create_preview(trace, 60) except: preview_trace = None else: preview_trace = list(map(float, preview_trace.data)) traces_in_file[pos] = { "starttime": trace.stats.starttime, "endtime": trace.stats.endtime, "network": trace.stats.network.upper(), "station": trace.stats.station.upper(), "location": trace.stats.location.upper(), "channel": trace.stats.channel.upper(), "sampling_rate": trace.stats.sampling_rate, "npts": trace.stats.npts, "duration": trace.stats.endtime - trace.stats.starttime, "quality": quality, "preview_trace": preview_trace, "pos": pos } # Get all existing traces. for tr_db in models.ContinuousTrace.objects.filter(file=file): # Attempt to get the existing trace object. if tr_db.pos in traces_in_file: tr = traces_in_file[tr_db.pos] # Delete in the dictionary. del traces_in_file[tr_db.pos] tr_db.timerange = DateTimeTZRange( lower=tr["starttime"].datetime, upper=tr["endtime"].datetime) tr_db.network = tr["network"] tr_db.station = tr["station"] tr_db.location = tr["location"] tr_db.channel = tr["channel"] tr_db.sampling_rate = tr["sampling_rate"] tr_db.npts = tr["npts"] tr_db.duration = tr["duration"] tr_db.quality = tr["quality"] tr_db.preview_trace = tr["preview_trace"] tr_db.pos = tr["pos"] tr_db.save() # If it does not exist in the waveform file, delete it here as # it is (for whatever reason) no longer in the file.. else: tr_db.delete() # Add remaining items. for tr in traces_in_file.values(): tr_db = models.ContinuousTrace(file=file, timerange=DateTimeTZRange( lower=tr["starttime"].datetime, upper=tr["endtime"].datetime)) tr_db.network = tr["network"] tr_db.station = tr["station"] tr_db.location = tr["location"] tr_db.channel = tr["channel"] tr_db.sampling_rate = tr["sampling_rate"] tr_db.npts = tr["npts"] tr_db.duration = tr["duration"] tr_db.quality = tr["quality"] tr_db.preview_trace = tr["preview_trace"] tr_db.pos = tr["pos"] tr_db.save()
def worker(_i, input_queue, work_queue, output_queue, log_queue, mappings={}): try: # fetch and initialize all possible waveform feature plug-ins all_features = {} for (key, ep) in _get_entry_points('obspy.db.feature').items(): try: # load plug-in cls = ep.load() # initialize class func = cls().process except Exception as e: msg = 'Could not initialize feature %s. (%s)' log_queue.append(msg % (key, str(e))) continue all_features[key] = {} all_features[key]['run'] = func try: all_features[key]['indexer_kwargs'] = cls['indexer_kwargs'] except Exception: all_features[key]['indexer_kwargs'] = {} # loop through input queue while True: # fetch a unprocessed item try: filepath, (path, file, features) = input_queue.popitem() except Exception: continue # skip item if already in work queue if filepath in work_queue: continue work_queue.append(filepath) # get additional kwargs for read method from waveform plug-ins kwargs = {'verify_chksum': False} for feature in features: if feature not in all_features: log_queue.append('%s: Unknown feature %s' % (filepath, feature)) continue kwargs.update(all_features[feature]['indexer_kwargs']) # read file and get file stats try: stats = os.stat(filepath) stream = read(filepath, **kwargs) # get gap and overlap information gap_list = stream.get_gaps() # merge channels and replace gaps/overlaps with 0 to prevent # generation of masked arrays stream.merge(fill_value=0) except Exception as e: msg = '[Reading stream] %s: %s' log_queue.append(msg % (filepath, e)) try: work_queue.remove(filepath) except Exception: pass continue # build up dictionary of gaps and overlaps for easier lookup gap_dict = {} for gap in gap_list: id = '.'.join(gap[0:4]) temp = { 'gap': gap[6] >= 0, 'starttime': gap[4].datetime, 'endtime': gap[5].datetime, 'samples': abs(gap[7]) } gap_dict.setdefault(id, []).append(temp) # loop through traces dataset = [] for trace in stream: result = {} # general file information result['mtime'] = int(stats.st_mtime) result['size'] = stats.st_size result['path'] = path result['file'] = file result['filepath'] = filepath # trace information result['format'] = trace.stats._format result['station'] = trace.stats.station result['location'] = trace.stats.location result['channel'] = trace.stats.channel result['network'] = trace.stats.network result['starttime'] = trace.stats.starttime.datetime result['endtime'] = trace.stats.endtime.datetime result['calib'] = trace.stats.calib result['npts'] = trace.stats.npts result['sampling_rate'] = trace.stats.sampling_rate # check for any id mappings if trace.id in mappings: old_id = trace.id for mapping in mappings[old_id]: if trace.stats.starttime and \ trace.stats.starttime > mapping['endtime']: continue if trace.stats.endtime and \ trace.stats.endtime < mapping['starttime']: continue result['network'] = mapping['network'] result['station'] = mapping['station'] result['location'] = mapping['location'] result['channel'] = mapping['channel'] msg = "Mapping '%s' to '%s.%s.%s.%s'" % \ (old_id, mapping['network'], mapping['station'], mapping['location'], mapping['channel']) log_queue.append(msg) # gaps/overlaps for current trace result['gaps'] = gap_dict.get(trace.id, []) # apply feature functions result['features'] = [] for key in features: if key not in all_features: continue try: # run plug-in and update results temp = all_features[key]['run'](trace) for key, value in temp.items(): result['features'].append({ 'key': key, 'value': value }) except Exception as e: msg = '[Processing feature] %s: %s' log_queue.append(msg % (filepath, e)) continue # generate preview of trace result['preview'] = None if '.LOG.L.' not in file or trace.stats.channel != 'LOG': # create previews only for non-log files (see issue #400) try: trace = create_preview(trace, 30) result['preview'] = trace.data.dumps() except ValueError: pass except Exception as e: msg = '[Creating preview] %s: %s' log_queue.append(msg % (filepath, e)) # update dataset dataset.append(result) del stream # return results to main loop try: output_queue.append(dataset) except Exception: pass try: work_queue.remove(filepath) except Exception: pass except KeyboardInterrupt: return
def setUpClass(cls): # Create a in memory database only once for test suite url = 'sqlite:///:memory:' cls.client = Client(url) # add paths session = cls.client.session() path1 = WaveformPath({'path': '/path/to/1'}) path2 = WaveformPath({'path': '/path/to/2'}) session.add_all([path1, path2]) # add files file1 = WaveformFile({ 'file': 'file_001.mseed', 'size': 2000, 'mtime': UTCDateTime('20120101').timestamp, 'format': 'MSEED' }) file2 = WaveformFile({ 'file': 'file_002.mseed', 'size': 2000, 'mtime': UTCDateTime('20120102').timestamp, 'format': 'MSEED' }) file3 = WaveformFile({ 'file': 'file_001.gse2', 'size': 2000, 'mtime': UTCDateTime('20120102').timestamp, 'format': 'GSE2' }) path1.files.append(file1) path1.files.append(file2) path2.files.append(file3) session.add_all([file1, file2, file3]) # add channels channel1 = WaveformChannel({ 'network': 'BW', 'station': 'MANZ', 'location': '', 'channel': 'EHZ', 'starttime': UTCDateTime('2012-01-01 00:00:00.000000').datetime, 'endtime': UTCDateTime('2012-01-01 23:59:59.999999').datetime, 'npts': 3000, 'sampling_rate': 100.0 }) channel2 = WaveformChannel({ 'network': 'BW', 'station': 'MANZ', 'location': '', 'channel': 'EHZ', 'starttime': UTCDateTime('2012-01-02 01:00:00.000000').datetime, 'endtime': UTCDateTime('2012-01-02 23:59:59.999999').datetime, 'npts': 3000, 'sampling_rate': 100.0 }) # create a channel with preview header = { 'network': 'GE', 'station': 'FUR', 'location': '00', 'channel': 'BHZ', 'starttime': UTCDateTime('2012-01-01 00:00:00.000000'), 'sampling_rate': 100.0 } # linear trend data = np.linspace(0, 1, 3000000) # some peaks data[20000] = 15 data[20001] = -15 data[1000000] = 22 data[1000001] = -22 data[2000000] = 14 data[2000001] = -14 tr = Trace(data=data, header=header) cls.preview = create_preview(tr, 30).data header = dict(tr.stats) header['starttime'] = tr.stats.starttime.datetime header['endtime'] = tr.stats.endtime.datetime channel3 = WaveformChannel(header) channel3.preview = cls.preview.dumps() file1.channels.append(channel1) file2.channels.append(channel2) file3.channels.append(channel3) session.add_all([channel1, channel2, channel3]) session.commit() session.close()
def setUpClass(cls): # Create a in memory database only once for test suite url = 'sqlite:///:memory:' cls.client = Client(url) # add paths session = cls.client.session() path1 = WaveformPath({'path': '/path/to/1'}) path2 = WaveformPath({'path': '/path/to/2'}) session.add_all([path1, path2]) # add files file1 = WaveformFile( {'file': 'file_001.mseed', 'size': 2000, 'mtime': UTCDateTime('20120101').timestamp, 'format': 'MSEED'}) file2 = WaveformFile( {'file': 'file_002.mseed', 'size': 2000, 'mtime': UTCDateTime('20120102').timestamp, 'format': 'MSEED'}) file3 = WaveformFile( {'file': 'file_001.gse2', 'size': 2000, 'mtime': UTCDateTime('20120102').timestamp, 'format': 'GSE2'}) path1.files.append(file1) path1.files.append(file2) path2.files.append(file3) session.add_all([file1, file2, file3]) # add channels channel1 = WaveformChannel( {'network': 'BW', 'station': 'MANZ', 'location': '', 'channel': 'EHZ', 'starttime': UTCDateTime('2012-01-01 00:00:00.000000').datetime, 'endtime': UTCDateTime('2012-01-01 23:59:59.999999').datetime, 'npts': 3000, 'sampling_rate': 100.0}) channel2 = WaveformChannel( {'network': 'BW', 'station': 'MANZ', 'location': '', 'channel': 'EHZ', 'starttime': UTCDateTime('2012-01-02 01:00:00.000000').datetime, 'endtime': UTCDateTime('2012-01-02 23:59:59.999999').datetime, 'npts': 3000, 'sampling_rate': 100.0}) # create a channel with preview header = {'network': 'GE', 'station': 'FUR', 'location': '00', 'channel': 'BHZ', 'starttime': UTCDateTime('2012-01-01 00:00:00.000000'), 'sampling_rate': 100.0} # linear trend data = np.linspace(0, 1, 3000000) # some peaks data[20000] = 15 data[20001] = -15 data[1000000] = 22 data[1000001] = -22 data[2000000] = 14 data[2000001] = -14 tr = Trace(data=data, header=header) cls.preview = create_preview(tr, 30).data header = dict(tr.stats) header['starttime'] = tr.stats.starttime.datetime header['endtime'] = tr.stats.endtime.datetime channel3 = WaveformChannel(header) channel3.preview = cls.preview.dumps() file1.channels.append(channel1) file2.channels.append(channel2) file3.channels.append(channel3) session.add_all([channel1, channel2, channel3]) session.commit() session.close()
def setUpClass(cls): # Create a in memory database only once for test suite url = "sqlite:///:memory:" cls.client = Client(url) # add paths session = cls.client.session() path1 = WaveformPath({"path": "/path/to/1"}) path2 = WaveformPath({"path": "/path/to/2"}) session.add_all([path1, path2]) # add files file1 = WaveformFile( {"file": "file_001.mseed", "size": 2000, "mtime": UTCDateTime("20120101").timestamp, "format": "MSEED"} ) file2 = WaveformFile( {"file": "file_002.mseed", "size": 2000, "mtime": UTCDateTime("20120102").timestamp, "format": "MSEED"} ) file3 = WaveformFile( {"file": "file_001.gse2", "size": 2000, "mtime": UTCDateTime("20120102").timestamp, "format": "GSE2"} ) path1.files.append(file1) path1.files.append(file2) path2.files.append(file3) session.add_all([file1, file2, file3]) # add channels channel1 = WaveformChannel( { "network": "BW", "station": "MANZ", "location": "", "channel": "EHZ", "starttime": UTCDateTime("2012-01-01 00:00:00.000000").datetime, "endtime": UTCDateTime("2012-01-01 23:59:59.999999").datetime, "npts": 3000, "sampling_rate": 100.0, } ) channel2 = WaveformChannel( { "network": "BW", "station": "MANZ", "location": "", "channel": "EHZ", "starttime": UTCDateTime("2012-01-02 01:00:00.000000").datetime, "endtime": UTCDateTime("2012-01-02 23:59:59.999999").datetime, "npts": 3000, "sampling_rate": 100.0, } ) # create a channel with preview header = { "network": "GE", "station": "FUR", "location": "00", "channel": "BHZ", "starttime": UTCDateTime("2012-01-01 00:00:00.000000"), "sampling_rate": 100.0, } # linear trend data = np.linspace(0, 1, 3000000) # some peaks data[20000] = 15 data[20001] = -15 data[1000000] = 22 data[1000001] = -22 data[2000000] = 14 data[2000001] = -14 tr = Trace(data=data, header=header) cls.preview = create_preview(tr, 30).data header = dict(tr.stats) header["starttime"] = tr.stats.starttime.datetime header["endtime"] = tr.stats.endtime.datetime channel3 = WaveformChannel(header) channel3.preview = cls.preview.dumps() file1.channels.append(channel1) file2.channels.append(channel2) file3.channels.append(channel3) session.add_all([channel1, channel2, channel3]) session.commit() session.close()
def process_file(filename): """ Process a single waveform file. This is a bit more complex as it needs to update existing database objects and cannot just always create new ones. Otherwise the identifiers quickly reach very high numbers. """ # Resolve symlinks and make a canonical simple path. filename = os.path.realpath(os.path.normpath(os.path.abspath(filename))) # ------------------------------------------------------------------------ # Step 1: Get the file if it exists. try: file = models.File.objects.get( path__name=os.path.dirname(filename), name=os.path.basename(filename)) # This path is only reached if the file exists. Check size, mtime, # and ctime and if it all remains the same, return. stats = os.stat(filename) mtime = to_datetime(stats.st_mtime) ctime = to_datetime(stats.st_ctime) size = int(stats.st_size) # Nothing to do if nothing changed. if file.size == size and file.mtime == mtime and file.ctime == ctime: return # If it does not exist, create it in the next step. except models.File.DoesNotExist: file = None # ------------------------------------------------------------------------ # Step 2: Read the file and perform a couple of sanity checks. Delete an # eventually existing file. try: stream = read(filename) except: # Delete if invalid file. if file is not None: file.delete() # Reraise the exception. raise if len(stream) == 0: msg = "'%s' is a valid waveform file but contains no actual data" raise JaneWaveformTaskException(msg % filename) # Delete if invalid file. if file is not None: file.delete() # Log channels for example are special as they have no sampling rate. if any(tr.stats.sampling_rate == 0 for tr in stream): # Make sure there is only one set of network, station, # location, and channel. ids = set(tr.id for tr in stream) if len(ids) != 1: # Delete if invalid file. if file is not None: file.delete() raise ValueError("File has a trace with sampling rate zero " "and more then one different id.") # ------------------------------------------------------------------------ # Step 3: Parse the file. Figure out which traces changed. # Make sure it either gets created for a file or not. with transaction.atomic(): # Create the file object if it does not exist. if file is None: path_obj = models.Path.objects.get_or_create( name=os.path.dirname(os.path.abspath(filename)))[0] models.File.objects. \ filter(path=path_obj, name=os.path.basename(filename)). \ delete() file = models.File.objects. \ create(path=path_obj, name=os.path.basename(filename)) # set format file.format = stream[0].stats._format # Collect information about all traces in a dictionary. traces_in_file = {} # Log channels for example are special as they have no sampling rate. if any(tr.stats.sampling_rate == 0 for tr in stream): starttime = min(tr.stats.starttime for tr in stream) endtime = max(tr.stats.endtime for tr in stream) if starttime == endtime: starttime += 0.001 file.gaps = 0 file.overlaps = 0 file.save() try: quality = stream[0].stats.mseed.dataquality except AttributeError: quality = None traces_in_file[0] = { "starttime": starttime, "endtime": endtime, "network": stream[0].stats.network.upper(), "station": stream[0].stats.station.upper(), "location": stream[0].stats.location.upper(), "channel": stream[0].stats.channel.upper(), "sampling_rate": stream[0].stats.sampling_rate, "npts": sum(tr.stats.npts for tr in stream), "duration": endtime - starttime, "quality": quality, "preview_trace": None, "pos": 0} else: # get number of gaps and overlaps per file gap_list = stream.get_gaps() file.gaps = len([g for g in gap_list if g[6] >= 0]) file.overlaps = len([g for g in gap_list if g[6] < 0]) file.save() for pos, trace in enumerate(stream): try: quality = trace.stats.mseed.dataquality except AttributeError: quality = None # Preview is optional. For some traces, e.g. LOG channels it # does not work. try: preview_trace = create_preview(trace, 60) except: preview_trace = None else: preview_trace = list(map(float, preview_trace.data)) traces_in_file[pos] = { "starttime": trace.stats.starttime, "endtime": trace.stats.endtime, "network": trace.stats.network.upper(), "station": trace.stats.station.upper(), "location": trace.stats.location.upper(), "channel": trace.stats.channel.upper(), "sampling_rate": trace.stats.sampling_rate, "npts": trace.stats.npts, "duration": trace.stats.endtime - trace.stats.starttime, "quality": quality, "preview_trace": preview_trace, "pos": pos} # Get all existing traces. for tr_db in models.ContinuousTrace.objects.filter(file=file): # Attempt to get the existing trace object. if tr_db.pos in traces_in_file: tr = traces_in_file[tr_db.pos] # Delete in the dictionary. del traces_in_file[tr_db.pos] tr_db.timerange = DateTimeTZRange( lower=tr["starttime"].datetime, upper=tr["endtime"].datetime) tr_db.network = tr["network"] tr_db.station = tr["station"] tr_db.location = tr["location"] tr_db.channel = tr["channel"] tr_db.sampling_rate = tr["sampling_rate"] tr_db.npts = tr["npts"] tr_db.duration = tr["duration"] tr_db.quality = tr["quality"] tr_db.preview_trace = tr["preview_trace"] tr_db.pos = tr["pos"] tr_db.save() # If it does not exist in the waveform file, delete it here as # it is (for whatever reason) no longer in the file.. else: tr_db.delete() # Add remaining items. for tr in traces_in_file.values(): tr_db = models.ContinuousTrace( file=file, timerange=DateTimeTZRange( lower=tr["starttime"].datetime, upper=tr["endtime"].datetime)) tr_db.network = tr["network"] tr_db.station = tr["station"] tr_db.location = tr["location"] tr_db.channel = tr["channel"] tr_db.sampling_rate = tr["sampling_rate"] tr_db.npts = tr["npts"] tr_db.duration = tr["duration"] tr_db.quality = tr["quality"] tr_db.preview_trace = tr["preview_trace"] tr_db.pos = tr["pos"] tr_db.save()