def test_issue193(self): """ Test for issue #193: if non-contiguous array is written correctly. """ warnings.filterwarnings("ignore", "Detected non contiguous data") # test all plugins with both read and write method formats_write = \ set(_getEntryPoints('obspy.plugin.waveform', 'writeFormat')) formats_read = \ set(_getEntryPoints('obspy.plugin.waveform', 'readFormat')) formats = set.intersection(formats_write, formats_read) # mseed will raise exception for int64 data, thus use int32 only data = np.arange(10, dtype='int32') # make array non-contiguous data = data[::2] tr = Trace(data=data) for format in formats: # XXX: skip SEGY and SU formats for now as they need some special # headers. if format in ['SEGY', 'SU', 'SEG2']: continue tempfile = NamedTemporaryFile().name tr.write(tempfile, format) if format == "Q": tempfile = tempfile + ".QHD" tr_test = read(tempfile, format)[0] # clean up os.remove(tempfile) if format == 'Q': os.remove(tempfile[:-4] + '.QBN') os.remove(tempfile[:-4]) np.testing.assert_array_equal(tr.data, tr_test.data)
def test_isFormat(self): """ Tests all isFormat methods against all data test files from the other modules for false positives. """ formats_ep = _getEntryPoints('obspy.plugin.waveform', 'isFormat') formats = formats_ep.values() # Collect all false positives. false_positives = [] # Big loop over every format. for format in formats: # search isFormat for given entry point isFormat = load_entry_point(format.dist.key, 'obspy.plugin.waveform.' + format.name, 'isFormat') module_path = os.path.join( os.path.join(format.dist.location, *format.dist.key.split('.')), 'tests', 'data') # Get all the test directories. paths = [ os.path.join( os.path.join(f.dist.location, *f.dist.key.split('.')), 'tests', 'data') for f in formats ] # Remove the paths from the current module. paths = [path for path in paths if path != module_path] # Remove double paths because some modules can have two file # formats. paths = set(paths) # Remove path if one module defines two file formats. for path in paths: # Collect all files found. filelist = [] # Walk every path. for directory, _, files in os.walk(path): # Remove double entries from the .svn directories. if '.svn' in directory: continue filelist.extend( [os.path.join(directory, _i) for _i in files]) for file in filelist: if isFormat(file) != False: false_positives.append((format.name, file)) # Use try except to produce a meaningful error message. try: self.assertEqual(len(false_positives), 0) except: msg = 'False positives for isFormat:\n' msg += '\n'.join(['\tFormat %s: %s' % (_i[0], _i[1]) for _i in \ false_positives]) raise Exception(msg)
def test_deepcopy(self): """ Test for issue #689: deepcopy did not work for segy. In order to avoid complicated code to find test data for each waveform pluging, which read OK and have no errors we simply test by first writing the waveform and then reading it in. Thus test is limited to formats which we can also write. """ # find all plugins with both read and write method formats_write = \ set(_getEntryPoints('obspy.plugin.waveform', 'writeFormat')) formats_read = \ set(_getEntryPoints('obspy.plugin.waveform', 'readFormat')) formats = set.intersection(formats_write, formats_read) stream_orig = read() for format in formats: # TODO: these formats error in read and writing, not in # deepcopy if format in ('SAC', 'SACXY', 'SEG2', 'Q', 'WAV'): continue stream = deepcopy(stream_orig) # set some data dt = np.float32 if format in ('GSE2', 'MSEED'): dt = np.int32 for tr in stream: tr.data = np.arange(tr.stats.npts).astype(dt) with NamedTemporaryFile() as tf: tmpfile = tf.name with warnings.catch_warnings(): warnings.simplefilter("ignore") stream.write(format=format, filename=tmpfile) st = read(tmpfile, format=format) st.sort() st_deepcopy = deepcopy(st) st_deepcopy.sort() msg = "Error in wavform format=%s" % format self.assertEqual(str(st), str(st_deepcopy), msg=msg)
def test_deepcopy(self): """ Test for issue #689: deepcopy did not work for segy. In order to avoid complicated code to find test data for each waveform pluging, which read OK and have no errors we simply test by first writing the waveform and then reading it in. Thus test is limited to formats which we can also write. """ # find all plugins with both read and write method formats_write = \ set(_getEntryPoints('obspy.plugin.waveform', 'writeFormat')) formats_read = \ set(_getEntryPoints('obspy.plugin.waveform', 'readFormat')) formats = set.intersection(formats_write, formats_read) stream_orig = read() for format in formats: # TODO: these formats error in read and writing, not in # deepcopy if format in ('SAC', 'SACXY', 'SEG2', 'Q', 'WAV'): continue stream = deepcopy(stream_orig) # set some data dt = 'f4' if format in ('GSE2', 'MSEED'): dt = 'i4' for tr in stream: tr.data = np.arange(tr.stats.npts).astype(dt) with NamedTemporaryFile() as tf: tmpfile = tf.name with warnings.catch_warnings(): warnings.simplefilter("ignore") stream.write(format=format, filename=tmpfile) st = read(tmpfile, format=format) st.sort() st_deepcopy = deepcopy(st) st_deepcopy.sort() msg = "Error in wavform format=%s" % format self.assertEqual(str(st), str(st_deepcopy), msg=msg)
def test_raiseOnEmptyFile(self): """ Test case ensures that empty files do raise warnings. """ tmpfile = NamedTemporaryFile().name # create empty file open(tmpfile, 'wb').close() formats_ep = _getEntryPoints('obspy.plugin.waveform', 'readFormat') # using format keyword for ep in formats_ep.values(): isFormat = load_entry_point(ep.dist.key, 'obspy.plugin.waveform.' + ep.name, 'isFormat') self.assertFalse(False, isFormat(tmpfile)) os.remove(tmpfile)
def test_isFormat(self): """ Tests all isFormat methods against all data test files from the other modules for false positives. """ formats_ep = _getEntryPoints('obspy.plugin.waveform', 'isFormat') formats = formats_ep.values() # Collect all false positives. false_positives = [] # Big loop over every format. for format in formats: # search isFormat for given entry point isFormat = load_entry_point(format.dist.key, 'obspy.plugin.waveform.' + format.name, 'isFormat') module_path = os.path.join(os.path.join(format.dist.location, *format.dist.key.split('.')), 'tests', 'data') # Get all the test directories. paths = [os.path.join(os.path.join(f.dist.location, *f.dist.key.split('.')), 'tests', 'data') for f in formats] # Remove the paths from the current module. paths = [path for path in paths if path != module_path] # Remove double paths because some modules can have two file # formats. paths = set(paths) # Remove path if one module defines two file formats. for path in paths: # Collect all files found. filelist = [] # Walk every path. for directory, _, files in os.walk(path): # Remove double entries from the .svn directories. if '.svn' in directory: continue filelist.extend([os.path.join(directory, _i) for _i in files]) for file in filelist: if isFormat(file) != False: false_positives.append((format.name, file)) # Use try except to produce a meaningful error message. try: self.assertEqual(len(false_positives), 0) except: msg = 'False positives for isFormat:\n' msg += '\n'.join(['\tFormat %s: %s' % (_i[0], _i[1]) for _i in \ false_positives]) raise Exception(msg)
def test_readThreadSafe(self): """ Tests for race conditions. Reading n_threads (currently 30) times the same waveform file in parallel and compare the results which must be all the same. """ data = np.arange(0, 500) start = UTCDateTime(2009, 1, 13, 12, 1, 2, 999000) formats = _getEntryPoints('obspy.plugin.waveform', 'writeFormat') for format in formats: # XXX: skip SEGY and SU formats for now as they need some special # headers. if format in ['SEGY', 'SU', 'SEG2']: continue dt = np.dtype("int") if format in ('MSEED', 'GSE2'): dt = "int32" tr = Trace(data=data.astype(dt)) tr.stats.network = "BW" tr.stats.station = "MANZ1" tr.stats.location = "00" tr.stats.channel = "EHE" tr.stats.calib = 0.999999 tr.stats.delta = 0.005 tr.stats.starttime = start # create waveform file with given format and byte order outfile = NamedTemporaryFile().name tr.write(outfile, format=format) if format == 'Q': outfile += '.QHD' n_threads = 30 streams = [] def testFunction(streams): st = read(outfile, format=format) streams.append(st) # Read the ten files at one and save the output in the just created # class. for _i in xrange(n_threads): thread = threading.Thread(target=testFunction, args=(streams,)) thread.start() # Loop until all threads are finished. start = time.time() while True: if threading.activeCount() == 1: break # Avoid infinite loop and leave after 120 seconds # such a long time is needed for debugging with valgrind elif time.time() - start >= 120: msg = 'Not all threads finished!' raise Warning(msg) break else: continue # Compare all values which should be identical and clean up files #for data in : # np.testing.assert_array_equal(values, original) os.remove(outfile) if format == 'Q': os.remove(outfile[:-4] + '.QBN') os.remove(outfile[:-4])
def test_readThreadSafe(self): """ Tests for race conditions. Reading n_threads (currently 30) times the same waveform file in parallel and compare the results which must be all the same. """ data = np.arange(0, 500) start = UTCDateTime(2009, 1, 13, 12, 1, 2, 999000) formats = _getEntryPoints('obspy.plugin.waveform', 'writeFormat') for format in formats: # XXX: skip SEGY and SU formats for now as they need some special # headers. if format in ['SEGY', 'SU', 'SEG2']: continue dt = np.int_ if format in ('MSEED', 'GSE2'): dt = np.int32 tr = Trace(data=data.astype(dt)) tr.stats.network = "BW" tr.stats.station = "MANZ1" tr.stats.location = "00" tr.stats.channel = "EHE" tr.stats.calib = 0.999999 tr.stats.delta = 0.005 tr.stats.starttime = start # create waveform file with given format and byte order with NamedTemporaryFile() as tf: outfile = tf.name tr.write(outfile, format=format) if format == 'Q': outfile += '.QHD' n_threads = 30 streams = [] timeout = 120 if 'TRAVIS' in os.environ: timeout = 570 # 30 seconds under Travis' limit cond = threading.Condition() def testFunction(streams, cond): st = read(outfile, format=format) streams.append(st) with cond: cond.notify() # Read the ten files at one and save the output in the just # created class. our_threads = [] for _i in range(n_threads): thread = threading.Thread(target=testFunction, args=(streams, cond)) thread.start() our_threads.append(thread) our_threads = set(our_threads) # Loop until all threads are finished. start = time.time() while True: with cond: cond.wait(1) remaining_threads = set(threading.enumerate()) if len(remaining_threads & our_threads) == 0: break # Avoid infinite loop and leave after some time; such a # long time is needed for debugging with valgrind or Travis elif time.time() - start >= timeout: # pragma: no cover msg = 'Not all threads finished after %d seconds!' % ( timeout) raise Warning(msg) # Compare all values which should be identical and clean up # files for st in streams: np.testing.assert_array_equal(st[0].data, tr.data) if format == 'Q': os.remove(outfile[:-4] + '.QBN') os.remove(outfile[:-4] + '.QHD')
def worker(_i, input_queue, work_queue, output_queue, log_queue, mappings={}): try: # fetch and initialize all possible waveform feature plug-ins all_features = {} for (key, ep) in _getEntryPoints("obspy.db.feature").items(): try: # load plug-in cls = ep.load() # initialize class func = cls().process except Exception as e: msg = "Could not initialize feature %s. (%s)" log_queue.append(msg % (key, str(e))) continue all_features[key] = {} all_features[key]["run"] = func try: all_features[key]["indexer_kwargs"] = cls["indexer_kwargs"] except: all_features[key]["indexer_kwargs"] = {} # loop through input queue while True: # fetch a unprocessed item try: filepath, (path, file, features) = input_queue.popitem() except: continue # skip item if already in work queue if filepath in work_queue: continue work_queue.append(filepath) # get additional kwargs for read method from waveform plug-ins kwargs = {"verify_chksum": False} for feature in features: if feature not in all_features: log_queue.append("%s: Unknown feature %s" % (filepath, feature)) continue kwargs.update(all_features[feature]["indexer_kwargs"]) # read file and get file stats try: stats = os.stat(filepath) stream = read(filepath, **kwargs) # get gap and overlap information gap_list = stream.getGaps() # merge channels and replace gaps/overlaps with 0 to prevent # generation of masked arrays stream.merge(fill_value=0) except Exception as e: msg = "[Reading stream] %s: %s" log_queue.append(msg % (filepath, e)) try: work_queue.remove(filepath) except: pass continue # build up dictionary of gaps and overlaps for easier lookup gap_dict = {} for gap in gap_list: id = ".".join(gap[0:4]) temp = { "gap": gap[6] >= 0, "starttime": gap[4].datetime, "endtime": gap[5].datetime, "samples": abs(gap[7]), } gap_dict.setdefault(id, []).append(temp) # loop through traces dataset = [] for trace in stream: result = {} # general file information result["mtime"] = int(stats.st_mtime) result["size"] = stats.st_size result["path"] = path result["file"] = file result["filepath"] = filepath # trace information result["format"] = trace.stats._format result["station"] = trace.stats.station result["location"] = trace.stats.location result["channel"] = trace.stats.channel result["network"] = trace.stats.network result["starttime"] = trace.stats.starttime.datetime result["endtime"] = trace.stats.endtime.datetime result["calib"] = trace.stats.calib result["npts"] = trace.stats.npts result["sampling_rate"] = trace.stats.sampling_rate # check for any id mappings if trace.id in mappings: old_id = trace.id for mapping in mappings[old_id]: if trace.stats.starttime and trace.stats.starttime > mapping["endtime"]: continue if trace.stats.endtime and trace.stats.endtime < mapping["starttime"]: continue result["network"] = mapping["network"] result["station"] = mapping["station"] result["location"] = mapping["location"] result["channel"] = mapping["channel"] msg = "Mapping '%s' to '%s.%s.%s.%s'" % ( old_id, mapping["network"], mapping["station"], mapping["location"], mapping["channel"], ) log_queue.append(msg) # gaps/overlaps for current trace result["gaps"] = gap_dict.get(trace.id, []) # apply feature functions result["features"] = [] for key in features: if key not in all_features: continue try: # run plug-in and update results temp = all_features[key]["run"](trace) for key, value in temp.items(): result["features"].append({"key": key, "value": value}) except Exception as e: msg = "[Processing feature] %s: %s" log_queue.append(msg % (filepath, e)) continue # generate preview of trace result["preview"] = None if ".LOG.L." not in file or trace.stats.channel != "LOG": # create previews only for non-log files (see issue #400) try: trace = createPreview(trace, 30) result["preview"] = trace.data.dumps() except ValueError: pass except Exception as e: msg = "[Creating preview] %s: %s" log_queue.append(msg % (filepath, e)) # update dataset dataset.append(result) del stream # return results to main loop try: output_queue.append(dataset) except: pass try: work_queue.remove(filepath) except: pass except KeyboardInterrupt: return
def test_readThreadSafe(self): """ Tests for race conditions. Reading n_threads (currently 30) times the same waveform file in parallel and compare the results which must be all the same. """ data = np.arange(0, 500) start = UTCDateTime(2009, 1, 13, 12, 1, 2, 999000) formats = _getEntryPoints('obspy.plugin.waveform', 'writeFormat') for format in formats: # XXX: skip SEGY and SU formats for now as they need some special # headers. if format in ['SEGY', 'SU', 'SEG2']: continue dt = np.dtype("int") if format in ('MSEED', 'GSE2'): dt = "int32" tr = Trace(data=data.astype(dt)) tr.stats.network = "BW" tr.stats.station = "MANZ1" tr.stats.location = "00" tr.stats.channel = "EHE" tr.stats.calib = 0.999999 tr.stats.delta = 0.005 tr.stats.starttime = start # create waveform file with given format and byte order outfile = NamedTemporaryFile().name tr.write(outfile, format=format) if format == 'Q': outfile += '.QHD' n_threads = 30 streams = [] def testFunction(streams): st = read(outfile, format=format) streams.append(st) # Read the ten files at one and save the output in the just created # class. for _i in xrange(n_threads): thread = threading.Thread(target=testFunction, args=(streams, )) thread.start() # Loop until all threads are finished. start = time.time() while True: if threading.activeCount() == 1: break # Avoid infinite loop and leave after 120 seconds # such a long time is needed for debugging with valgrind elif time.time() - start >= 120: msg = 'Not all threads finished!' raise Warning(msg) break else: continue # Compare all values which should be identical and clean up files #for data in : # np.testing.assert_array_equal(values, original) os.remove(outfile) if format == 'Q': os.remove(outfile[:-4] + '.QBN') os.remove(outfile[:-4])
def worker(_i, input_queue, work_queue, output_queue, log_queue, mappings={}): try: # fetch and initialize all possible waveform feature plug-ins all_features = {} for (key, ep) in _getEntryPoints('obspy.db.feature').items(): try: # load plug-in cls = ep.load() # initialize class func = cls().process except Exception as e: msg = 'Could not initialize feature %s. (%s)' log_queue.append(msg % (key, str(e))) continue all_features[key] = {} all_features[key]['run'] = func try: all_features[key]['indexer_kwargs'] = cls['indexer_kwargs'] except: all_features[key]['indexer_kwargs'] = {} # loop through input queue while True: # fetch a unprocessed item try: filepath, (path, file, features) = input_queue.popitem() except: continue # skip item if already in work queue if filepath in work_queue: continue work_queue.append(filepath) # get additional kwargs for read method from waveform plug-ins kwargs = {'verify_chksum': False} for feature in features: if feature not in all_features: log_queue.append('%s: Unknown feature %s' % (filepath, feature)) continue kwargs.update(all_features[feature]['indexer_kwargs']) # read file and get file stats try: stats = os.stat(filepath) stream = read(filepath, **kwargs) # get gap and overlap information gap_list = stream.getGaps() # merge channels and replace gaps/overlaps with 0 to prevent # generation of masked arrays stream.merge(fill_value=0) except Exception as e: msg = '[Reading stream] %s: %s' log_queue.append(msg % (filepath, e)) try: work_queue.remove(filepath) except: pass continue # build up dictionary of gaps and overlaps for easier lookup gap_dict = {} for gap in gap_list: id = '.'.join(gap[0:4]) temp = { 'gap': gap[6] >= 0, 'starttime': gap[4].datetime, 'endtime': gap[5].datetime, 'samples': abs(gap[7]) } gap_dict.setdefault(id, []).append(temp) # loop through traces dataset = [] for trace in stream: result = {} # general file information result['mtime'] = int(stats.st_mtime) result['size'] = stats.st_size result['path'] = path result['file'] = file result['filepath'] = filepath # trace information result['format'] = trace.stats._format result['station'] = trace.stats.station result['location'] = trace.stats.location result['channel'] = trace.stats.channel result['network'] = trace.stats.network result['starttime'] = trace.stats.starttime.datetime result['endtime'] = trace.stats.endtime.datetime result['calib'] = trace.stats.calib result['npts'] = trace.stats.npts result['sampling_rate'] = trace.stats.sampling_rate # check for any id mappings if trace.id in mappings: old_id = trace.id for mapping in mappings[old_id]: if trace.stats.starttime and \ trace.stats.starttime > mapping['endtime']: continue if trace.stats.endtime and \ trace.stats.endtime < mapping['starttime']: continue result['network'] = mapping['network'] result['station'] = mapping['station'] result['location'] = mapping['location'] result['channel'] = mapping['channel'] msg = "Mapping '%s' to '%s.%s.%s.%s'" % \ (old_id, mapping['network'], mapping['station'], mapping['location'], mapping['channel']) log_queue.append(msg) # gaps/overlaps for current trace result['gaps'] = gap_dict.get(trace.id, []) # apply feature functions result['features'] = [] for key in features: if key not in all_features: continue try: # run plug-in and update results temp = all_features[key]['run'](trace) for key, value in temp.items(): result['features'].append({ 'key': key, 'value': value }) except Exception as e: msg = '[Processing feature] %s: %s' log_queue.append(msg % (filepath, e)) continue # generate preview of trace result['preview'] = None if '.LOG.L.' not in file or trace.stats.channel != 'LOG': # create previews only for non-log files (see issue #400) try: trace = createPreview(trace, 30) result['preview'] = trace.data.dumps() except ValueError: pass except Exception as e: msg = '[Creating preview] %s: %s' log_queue.append(msg % (filepath, e)) # update dataset dataset.append(result) del stream # return results to main loop try: output_queue.append(dataset) except: pass try: work_queue.remove(filepath) except: pass except KeyboardInterrupt: return
def test_readAndWrite(self): """ Tests read and write methods for all waveform plug-ins. """ data = np.arange(0, 2000) start = UTCDateTime(2009, 1, 13, 12, 1, 2, 999000) formats = _getEntryPoints('obspy.plugin.waveform', 'writeFormat') for format in formats: # XXX: skip SEGY and SU formats for now as they need some special # headers. if format in ['SEGY', 'SU', 'SEG2']: continue for native_byteorder in ['<', '>']: for byteorder in ['<', '>', '=']: # new trace object in native byte order dt = np.dtype("int").newbyteorder(native_byteorder) if format in ('MSEED', 'GSE2'): # MiniSEED and GSE2 cannot write int64, enforce type dt = "int32" tr = Trace(data=data.astype(dt)) tr.stats.network = "BW" tr.stats.station = "MANZ1" tr.stats.location = "00" tr.stats.channel = "EHE" tr.stats.calib = 0.199999 tr.stats.delta = 0.005 tr.stats.starttime = start # create waveform file with given format and byte order outfile = NamedTemporaryFile().name tr.write(outfile, format=format, byteorder=byteorder) if format == 'Q': outfile += '.QHD' # read in again using auto detection st = read(outfile) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # read in using format argument st = read(outfile, format=format) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # read in using a StringIO instances, skip Q files as it # needs multiple files if format not in ['Q']: # file handler without format temp = open(outfile, 'rb') st = read(temp) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # file handler with format temp = open(outfile, 'rb') st = read(temp, format=format) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # StringIO without format temp = StringIO.StringIO(open(outfile, 'rb').read()) st = read(temp) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # StringIO with format temp = StringIO.StringIO(open(outfile, 'rb').read()) st = read(temp, format=format) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # cStringIO without format temp = cStringIO.StringIO(open(outfile, 'rb').read()) st = read(temp) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # cStringIO with format temp = cStringIO.StringIO(open(outfile, 'rb').read()) st = read(temp, format=format) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # check byte order self.assertEquals(st[0].data.dtype.byteorder, '=') # check meta data # some formats do not contain a calibration factor if format not in ['MSEED', 'WAV', 'TSPAIR', 'SLIST']: self.assertAlmostEquals(st[0].stats.calib, 0.199999, 5) else: self.assertEquals(st[0].stats.calib, 1.0) if format not in ['WAV']: self.assertEquals(st[0].stats.starttime, start) self.assertEquals(st[0].stats.endtime, start + 9.995) self.assertEquals(st[0].stats.delta, 0.005) self.assertEquals(st[0].stats.sampling_rate, 200.0) # network/station/location/channel codes if format in ['Q', 'SH_ASC', 'GSE2']: # no network or location code in Q, SH_ASC, GSE2 self.assertEquals(st[0].id, ".MANZ1..EHE") elif format not in ['WAV']: self.assertEquals(st[0].id, "BW.MANZ1.00.EHE") # remove temporary files os.remove(outfile) # Q files consist of two files - deleting additional file if format == 'Q': os.remove(outfile[:-4] + '.QBN') os.remove(outfile[:-4])
def test_readAndWrite(self): """ Tests read and write methods for all installed waveform plug-ins. """ data = np.arange(0, 2000) start = UTCDateTime(2009, 1, 13, 12, 1, 2, 999000) formats = _getEntryPoints('obspy.plugin.waveform', 'writeFormat') for format in formats: # XXX: skip SEGY and SU formats for now as they need some special # headers. if format in ['SEGY', 'SU', 'SEG2']: continue for native_byteorder in ['<', '>']: for byteorder in ['<', '>', '=']: # new trace object in native byte order dt = np.dtype("int").newbyteorder(native_byteorder) if format in ('MSEED', 'GSE2'): # MiniSEED and GSE2 cannot write int64, enforce type dt = "int32" tr = Trace(data=data.astype(dt)) tr.stats.network = "BW" tr.stats.station = "MANZ1" tr.stats.location = "00" tr.stats.channel = "EHE" tr.stats.calib = 0.199999 tr.stats.delta = 0.005 tr.stats.starttime = start # create waveform file with given format and byte order outfile = NamedTemporaryFile().name tr.write(outfile, format=format, byteorder=byteorder) if format == 'Q': outfile += '.QHD' # read in again using auto detection st = read(outfile) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # read in using format argument st = read(outfile, format=format) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # read in using a StringIO instances, skip Q files as it # needs multiple files if format not in ['Q']: # file handler without format temp = open(outfile, 'rb') st = read(temp) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # file handler with format temp = open(outfile, 'rb') st = read(temp, format=format) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # StringIO without format temp = StringIO.StringIO(open(outfile, 'rb').read()) st = read(temp) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # StringIO with format temp = StringIO.StringIO(open(outfile, 'rb').read()) st = read(temp, format=format) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # cStringIO without format temp = cStringIO.StringIO(open(outfile, 'rb').read()) st = read(temp) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # cStringIO with format temp = cStringIO.StringIO(open(outfile, 'rb').read()) st = read(temp, format=format) self.assertEquals(len(st), 1) self.assertEquals(st[0].stats._format, format) # check byte order self.assertEquals(st[0].data.dtype.byteorder, '=') # check meta data # some formats do not contain a calibration factor if format not in ['MSEED', 'WAV', 'TSPAIR', 'SLIST']: self.assertAlmostEquals(st[0].stats.calib, 0.199999, 5) else: self.assertEquals(st[0].stats.calib, 1.0) if format not in ['WAV']: self.assertEquals(st[0].stats.starttime, start) self.assertEquals(st[0].stats.endtime, start + 9.995) self.assertEquals(st[0].stats.delta, 0.005) self.assertEquals(st[0].stats.sampling_rate, 200.0) # network/station/location/channel codes if format in ['Q', 'SH_ASC', 'GSE2']: # no network or location code in Q, SH_ASC, GSE2 self.assertEquals(st[0].id, ".MANZ1..EHE") elif format not in ['WAV']: self.assertEquals(st[0].id, "BW.MANZ1.00.EHE") # remove temporary files os.remove(outfile) # Q files consist of two files - deleting additional file if format == 'Q': os.remove(outfile[:-4] + '.QBN') os.remove(outfile[:-4])
def worker(_i, input_queue, work_queue, output_queue, log_queue, mappings={}): try: # fetch and initialize all possible waveform feature plug-ins all_features = {} for (key, ep) in _getEntryPoints('obspy.db.feature').iteritems(): try: # load plug-in cls = ep.load() # initialize class func = cls().process except Exception, e: msg = 'Could not initialize feature %s. (%s)' log_queue.append(msg % (key, str(e))) continue all_features[key] = {} all_features[key]['run'] = func try: all_features[key]['indexer_kwargs'] = cls['indexer_kwargs'] except: all_features[key]['indexer_kwargs'] = {} # loop through input queue while True: # fetch a unprocessed item try: filepath, (path, file, features) = input_queue.popitem() except: continue # skip item if already in work queue if filepath in work_queue: continue work_queue.append(filepath) # get additional kwargs for read method from waveform plug-ins kwargs = {'verify_chksum': False} for feature in features: if feature not in all_features: log_queue.append('%s: Unknown feature %s' % (filepath, feature)) continue kwargs.update(all_features[feature]['indexer_kwargs']) # read file and get file stats try: stats = os.stat(filepath) stream = read(filepath, **kwargs) # get gap and overlap information gap_list = stream.getGaps() # merge channels and replace gaps/overlaps with 0 to prevent # generation of masked arrays stream.merge(fill_value=0) except Exception, e: msg = '[Reading stream] %s: %s' log_queue.append(msg % (filepath, e)) try: work_queue.remove(filepath) except: pass continue # build up dictionary of gaps and overlaps for easier lookup gap_dict = {} for gap in gap_list: id = '.'.join(gap[0:4]) temp = { 'gap': gap[6] >= 0, 'starttime': gap[4].datetime, 'endtime': gap[5].datetime, 'samples': abs(gap[7]) } gap_dict.setdefault(id, []).append(temp) # loop through traces dataset = [] for trace in stream: result = {} # general file information result['mtime'] = int(stats.st_mtime) result['size'] = stats.st_size result['path'] = path result['file'] = file result['filepath'] = filepath # trace information result['format'] = format = trace.stats._format result['station'] = trace.stats.station result['location'] = trace.stats.location result['channel'] = trace.stats.channel result['network'] = trace.stats.network result['starttime'] = trace.stats.starttime.datetime result['endtime'] = trace.stats.endtime.datetime result['calib'] = trace.stats.calib result['npts'] = trace.stats.npts result['sampling_rate'] = trace.stats.sampling_rate # check for any id mappings if trace.id in mappings: old_id = trace.id for mapping in mappings[old_id]: if trace.stats.starttime and \ trace.stats.starttime > mapping['endtime']: continue if trace.stats.endtime and \ trace.stats.endtime < mapping['starttime']: continue result['network'] = mapping['network'] result['station'] = mapping['station'] result['location'] = mapping['location'] result['channel'] = mapping['channel'] msg = "Mapping '%s' to '%s.%s.%s.%s'" % \ (old_id, mapping['network'], mapping['station'], mapping['location'], mapping['channel']) log_queue.append(msg) # gaps/overlaps for current trace result['gaps'] = gap_dict.get(trace.id, []) # apply feature functions result['features'] = [] for key in features: if key not in all_features: continue try: # run plug-in and update results temp = all_features[key]['run'](trace) for key, value in temp.iteritems(): result['features'].append({'key': key, 'value': value}) except Exception, e: msg = '[Processing feature] %s: %s' log_queue.append(msg % (filepath, e)) continue # generate preview of trace result['preview'] = None if '.LOG.L.' not in file or trace.stats.channel != 'LOG': # create previews only for non-log files (see issue #400) try: trace = createPreview(trace, 30) result['preview'] = trace.data.dumps() except ValueError: pass except Exception, e: msg = '[Creating preview] %s: %s' log_queue.append(msg % (filepath, e))
def test_readThreadSafe(self): """ Tests for race conditions. Reading n_threads (currently 30) times the same waveform file in parallel and compare the results which must be all the same. """ data = np.arange(0, 500) start = UTCDateTime(2009, 1, 13, 12, 1, 2, 999000) formats = _getEntryPoints('obspy.plugin.waveform', 'writeFormat') for format in formats: # XXX: skip SEGY and SU formats for now as they need some special # headers. if format in ['SEGY', 'SU', 'SEG2']: continue dt = np.int_ if format in ('MSEED', 'GSE2'): dt = np.int32 tr = Trace(data=data.astype(dt)) tr.stats.network = "BW" tr.stats.station = "MANZ1" tr.stats.location = "00" tr.stats.channel = "EHE" tr.stats.calib = 0.999999 tr.stats.delta = 0.005 tr.stats.starttime = start # create waveform file with given format and byte order with NamedTemporaryFile() as tf: outfile = tf.name tr.write(outfile, format=format) if format == 'Q': outfile += '.QHD' n_threads = 30 streams = [] timeout = 120 if 'TRAVIS' in os.environ: timeout = 570 # 30 seconds under Travis' limit cond = threading.Condition() def testFunction(streams, cond): st = read(outfile, format=format) streams.append(st) with cond: cond.notify() # Read the ten files at one and save the output in the just # created class. for _i in range(n_threads): thread = threading.Thread(target=testFunction, args=(streams, cond)) thread.start() # Loop until all threads are finished. start = time.time() while True: with cond: cond.wait(1) if threading.active_count() == 1: break # Avoid infinite loop and leave after some time; such a # long time is needed for debugging with valgrind or Travis elif time.time() - start >= timeout: # pragma: no cover msg = 'Not all threads finished after %d seconds!' % ( timeout) raise Warning(msg) # Compare all values which should be identical and clean up # files for st in streams: np.testing.assert_array_equal(st[0].data, tr.data) if format == 'Q': os.remove(outfile[:-4] + '.QBN') os.remove(outfile[:-4] + '.QHD')