def mspass_func_wrapper_multi(func, data1, data2, *args, preserve_history=False, instance=None, dryrun=False, **kwargs): """ This wrapper serves the same functionality as mspass_func_wrapper, but there are a few differences. The first is this wrapper accepts two mspasspy data objects as input data. The second is that inplace_return is not implemented here. The same processing history and error logs will be duplicated and stored in both of the input. :param func: target function :param data1: input data, only mspasspy data objects are accepted, i.e. TimeSeries, Seismogram, Ensemble. :param data2: input data, only mspasspy data objects are accepted, i.e. TimeSeries, Seismogram, Ensemble. :param args: extra arguments :param preserve_history: True to preserve this processing history in the data object, False not to. preserve_history and instance are intimately related and control how object level history is handled. Object level history is disabled by default for efficiency. If preserve_history is set True and the string passed as instance is defined (not None which is the default) each Seismogram or TimeSeries object will attempt to save the history through a new_map operation. If the history chain is empty this will silently generate an error posted to error log on each object. :param instance: instance is a unique id to record the usage of func while preserving the history. :type instance: str :param dryrun: True for dry-run, the algorithm is not run, but the arguments used in this wrapper will be checked. This is useful for pre-run checks of a large job to validate a workflow. Errors generate exceptions but the function returns before attempting any calculations. :param kwargs: extra kv arguments :return: the output of func """ if not isinstance( data1, (Seismogram, TimeSeries, SeismogramEnsemble, TimeSeriesEnsemble)): raise TypeError( "mspass_func_wrapper_multi only accepts mspass object as data input" ) if not isinstance( data2, (Seismogram, TimeSeries, SeismogramEnsemble, TimeSeriesEnsemble)): raise TypeError( "mspass_func_wrapper_multi only accepts mspass object as data input" ) algname = func.__name__ if preserve_history and instance is None: raise ValueError( algname + ": preserve_history was true but instance not defined") if dryrun: return "OK" try: res = func(data1, data2, *args, **kwargs) if preserve_history: logging_helper.info(data1, algname, instance) logging_helper.info(data2, algname, instance) return res except RuntimeError as err: if isinstance(data1, (Seismogram, TimeSeries)): data1.elog.log_error(algname, str(err), ErrorSeverity.Invalid) else: logging_helper.ensemble_error(data1, algname, err, ErrorSeverity.Invalid) if isinstance(data2, (Seismogram, TimeSeries)): data2.elog.log_error(algname, str(err), ErrorSeverity.Invalid) else: logging_helper.ensemble_error(data2, algname, err, ErrorSeverity.Invalid) except MsPASSError as ex: if ex.severity == ErrorSeverity.Fatal: raise if isinstance(data1, (Seismogram, TimeSeries)): data1.elog.log_error(algname, ex.message, ex.severity) else: logging_helper.ensemble_error(data1, algname, ex.message, ex.severity) if isinstance(data2, (Seismogram, TimeSeries)): data2.elog.log_error(algname, ex.message, ex.severity) else: logging_helper.ensemble_error(data2, algname, ex.message, ex.severity)
def test_main(self): self.db["wf_TimeSeries"].delete_many({}) ts1 = copy.deepcopy(self.test_ts) ts2 = copy.deepcopy(self.test_ts) ts3 = copy.deepcopy(self.test_ts) logging_helper.info(ts1, "1", "deepcopy") logging_helper.info(ts2, "1", "deepcopy") logging_helper.info(ts3, "1", "deepcopy") # fix types ts1["npts"] = "123" ts2["delta"] = "3" ts3["npts"] = "xyz" save_res_code = self.db.save_data(ts1, mode="promiscuous", storage_mode="gridfs", exclude_keys=["extra2"]) save_res_code = self.db.save_data(ts2, mode="promiscuous", storage_mode="gridfs", exclude_keys=["extra2"]) save_res_code = self.db.save_data(ts3, mode="promiscuous", storage_mode="gridfs", exclude_keys=["extra2"]) # exit with pytest.raises(SystemExit) as e: dbclean.main(["test_dbclean", "wf_TimeSeries"]) assert e.type == SystemExit assert e.value.code == -1 # delete starttime attribute # rename calib to rename_calib dbclean.main([ "test_dbclean", "wf_TimeSeries", "-ft", "-d", "starttime", "-r", "calib:rename_calib", ]) res1 = self.db["wf_TimeSeries"].find_one({"_id": ts1["_id"]}) res2 = self.db["wf_TimeSeries"].find_one({"_id": ts2["_id"]}) res3 = self.db["wf_TimeSeries"].find_one({"_id": ts3["_id"]}) assert res1["npts"] == 123 assert "starttime" not in res1 assert "calib" not in res1 assert "rename_calib" in res1 assert res2["delta"] == 3.0 assert "starttime" not in res2 assert "calib" not in res2 assert "rename_calib" in res2 # can't be fixed assert res3["npts"] == "xyz" assert "starttime" not in res3 assert "calib" not in res3 assert "rename_calib" in res3 self.db["wf_TimeSeries"].delete_many({}) ts1 = copy.deepcopy(self.test_ts) ts2 = copy.deepcopy(self.test_ts) ts3 = copy.deepcopy(self.test_ts) logging_helper.info(ts1, "1", "deepcopy") logging_helper.info(ts2, "1", "deepcopy") logging_helper.info(ts3, "1", "deepcopy") # fix types ts1["npts"] = "123" ts2["delta"] = "3" ts3["npts"] = "xyz" save_res_code = self.db.save_data(ts1, mode="promiscuous", storage_mode="gridfs", exclude_keys=["extra2"]) save_res_code = self.db.save_data(ts2, mode="promiscuous", storage_mode="gridfs", exclude_keys=["extra2"]) save_res_code = self.db.save_data(ts3, mode="promiscuous", storage_mode="gridfs", exclude_keys=["extra2"]) # only fix types dbclean.main(["test_dbclean", "wf_TimeSeries", "-ft"]) assert res1["npts"] == 123 assert res2["delta"] == 3.0 # can't be fixed assert res3["npts"] == "xyz"
def mspass_func_wrapper(func, data, *args, preserve_history=False, instance=None, dryrun=False, inplace_return=False, **kwargs): """ This function serves as a decorator wrapper, which is widely used in mspasspy library. It executes the target function on input data. Data are restricted to be mspasspy objects. It also preserves the processing history and error logs into the mspasspy objects. By wrapping your function using this decorator, you can save some workload. Runtime error won't be raised in order to be efficient in map-reduce operations. MspassError with a severity Fatal will be raised, others won't be raised. :param func: target function :param data: input data, only mspasspy data objects are accepted, i.e. TimeSeries, Seismogram, Ensemble. :param args: extra arguments :param preserve_history: True to preserve this processing history in the data object, False not to. preserve_history and instance are intimately related and control how object level history is handled. Object level history is disabled by default for efficiency. If preserve_history is set True and the string passed as instance is defined (not None which is the default) each Seismogram or TimeSeries object will attempt to save the history through a new_map operation. If the history chain is empty this will silently generate an error posted to error log on each object. :param instance: instance is a unique id to record the usage of func while preserving the history. :type instance: str :param dryrun: True for dry-run, the algorithm is not run, but the arguments used in this wrapper will be checked. This is useful for pre-run checks of a large job to validate a workflow. Errors generate exceptions but the function returns before attempting any calculations. :param inplace_return: when func is an in-place function that doesn't return anything, but you want to return the origin data (for example, in map-reduce), set inplace_return as true. :param kwargs: extra kv arguments :return: origin data or the output of func """ if not isinstance( data, (Seismogram, TimeSeries, SeismogramEnsemble, TimeSeriesEnsemble)): raise TypeError( "mspass_func_wrapper only accepts mspass object as data input") algname = func.__name__ if preserve_history and instance is None: raise ValueError( algname + ": preserve_history was true but instance not defined") if dryrun: return "OK" try: res = func(data, *args, **kwargs) if preserve_history: logging_helper.info(data, algname, instance) if res is None and inplace_return: return data return res except RuntimeError as err: if isinstance(data, (Seismogram, TimeSeries)): data.elog.log_error(algname, str(err), ErrorSeverity.Invalid) else: logging_helper.ensemble_error(data, algname, err, ErrorSeverity.Invalid) except MsPASSError as ex: if ex.severity == ErrorSeverity.Fatal: raise if isinstance(data, (Seismogram, TimeSeries)): data.elog.log_error(algname, ex.message, ex.severity) else: logging_helper.ensemble_error(data, algname, ex.message, ex.severity)
def test_main(self): self.db['wf_TimeSeries'].delete_many({}) ts1 = copy.deepcopy(self.test_ts) ts2 = copy.deepcopy(self.test_ts) ts3 = copy.deepcopy(self.test_ts) logging_helper.info(ts1, 'deepcopy', '1') logging_helper.info(ts2, 'deepcopy', '1') logging_helper.info(ts3, 'deepcopy', '1') # fix types ts1['npts'] = '123' ts2['delta'] = '3' ts3['npts'] = 'xyz' save_res_code = self.db.save_data(ts1, mode='promiscuous', storage_mode='gridfs', exclude_keys=['extra2']) save_res_code = self.db.save_data(ts2, mode='promiscuous', storage_mode='gridfs', exclude_keys=['extra2']) save_res_code = self.db.save_data(ts3, mode='promiscuous', storage_mode='gridfs', exclude_keys=['extra2']) # exit with pytest.raises(SystemExit) as e: dbclean.main(['test_dbclean', 'wf_TimeSeries']) assert e.type == SystemExit assert e.value.code == -1 # delete starttime attribute # rename calib to rename_calib dbclean.main([ 'test_dbclean', 'wf_TimeSeries', '-ft', '-d', 'starttime', '-r', 'calib:rename_calib' ]) res1 = self.db['wf_TimeSeries'].find_one({'_id': ts1['_id']}) res2 = self.db['wf_TimeSeries'].find_one({'_id': ts2['_id']}) res3 = self.db['wf_TimeSeries'].find_one({'_id': ts3['_id']}) assert res1['npts'] == 123 assert 'starttime' not in res1 assert 'calib' not in res1 assert 'rename_calib' in res1 assert res2['delta'] == 3.0 assert 'starttime' not in res2 assert 'calib' not in res2 assert 'rename_calib' in res2 # can't be fixed assert res3['npts'] == 'xyz' assert 'starttime' not in res3 assert 'calib' not in res3 assert 'rename_calib' in res3 self.db['wf_TimeSeries'].delete_many({}) ts1 = copy.deepcopy(self.test_ts) ts2 = copy.deepcopy(self.test_ts) ts3 = copy.deepcopy(self.test_ts) logging_helper.info(ts1, 'deepcopy', '1') logging_helper.info(ts2, 'deepcopy', '1') logging_helper.info(ts3, 'deepcopy', '1') # fix types ts1['npts'] = '123' ts2['delta'] = '3' ts3['npts'] = 'xyz' save_res_code = self.db.save_data(ts1, mode='promiscuous', storage_mode='gridfs', exclude_keys=['extra2']) save_res_code = self.db.save_data(ts2, mode='promiscuous', storage_mode='gridfs', exclude_keys=['extra2']) save_res_code = self.db.save_data(ts3, mode='promiscuous', storage_mode='gridfs', exclude_keys=['extra2']) # only fix types dbclean.main(['test_dbclean', 'wf_TimeSeries', '-ft']) assert res1['npts'] == 123 assert res2['delta'] == 3.0 # can't be fixed assert res3['npts'] == 'xyz'
def test_main(self, capfd): self.db['wf_TimeSeries'].delete_many({}) ts1 = copy.deepcopy(self.test_ts) ts2 = copy.deepcopy(self.test_ts) ts3 = copy.deepcopy(self.test_ts) logging_helper.info(ts1, 'deepcopy', '1') logging_helper.info(ts2, 'deepcopy', '1') logging_helper.info(ts3, 'deepcopy', '1') # fix types ts1['npts'] = '123' ts1['extra1'] = 'extra1' ts2['delta'] = '3' ts2['extra2'] = 'extra2' ts3['npts'] = 'xyz' ts3['extra2'] = 'extra2' # wrong normalized key ts1['site_id'] = ObjectId() ts2.erase('source_id') save_res_code = self.db.save_data(ts1, mode='promiscuous', storage_mode='gridfs') save_res_code = self.db.save_data(ts2, mode='promiscuous', storage_mode='gridfs') # erase required attributes save_res_code = self.db.save_data(ts3, mode='promiscuous', storage_mode='gridfs', exclude_keys=['starttime']) doc1 = self.db['wf_TimeSeries'].find_one({'_id': ts1['_id']}) doc2 = self.db['wf_TimeSeries'].find_one({'_id': ts2['_id']}) doc3 = self.db['wf_TimeSeries'].find_one({'_id': ts3['_id']}) doc1_str = json_util.dumps(doc1, indent=2) doc2_str = json_util.dumps(doc2, indent=2) doc3_str = json_util.dumps(doc3, indent=2) # default normalization test dbverify.main(['test_dbverify', '-t', 'normalization']) out, err = capfd.readouterr() assert out == "normalization test on normalized key= site_id found problems\nFound broken links in 1 documents checked\nNote error count limit= 1000\nIf the count is the same it means all data probably contain missing cross referencing ids\nRun in verbose mode to find out more information you will need to fix the problem\ncheck_links found no broken links with normalized key= channel_id\ncheck_links found no broken links with normalized key= source_id\n" # more than 1 collection to test dbverify.main([ 'test_dbverify', '-t', 'normalization', '-c', 'wf_TimeSeries', 'site' ]) out, err = capfd.readouterr() assert out == "WARNING: normalization test can only be run on one collection at a time\nParsed a list with the following contents: ['wf_TimeSeries', 'site']\nRunning test on the first item in that list\nnormalization test on normalized key= site_id found problems\nFound broken links in 1 documents checked\nNote error count limit= 1000\nIf the count is the same it means all data probably contain missing cross referencing ids\nRun in verbose mode to find out more information you will need to fix the problem\ncheck_links found no broken links with normalized key= channel_id\ncheck_links found no broken links with normalized key= source_id\n" # verbose mode dbverify.main(['test_dbverify', '-t', 'normalization', '-v']) out, err = capfd.readouterr() assert out == "check_link found the following docs in wf_TimeSeries with broken links to site_id\n////////////////Doc number 1 with error///////////////\n" + doc1_str + "\n////////////////////////////////////////////////////////\ncheck_links found no undefined linking key to normalized key= site_id\ncheck_links found no broken links with normalized key= channel_id\ncheck_links found no undefined linking key to normalized key= channel_id\ncheck_links found no broken links with normalized key= source_id\ncheck_link found the following docs in wf_TimeSeries with undefined link keys to source_id\n////////////////Doc number 1 with error///////////////\n" + doc2_str + "\n////////////////////////////////////////////////////////\n" # default required test dbverify.main(['test_dbverify', '-t', 'required']) out, err = capfd.readouterr() mmkeys = {'npts': 2, 'delta': 1} mm_keys_str = json_util.dumps(mmkeys, indent=2) undef_keys = {'starttime': 1} undef_keys_str = json_util.dumps(undef_keys, indent=2) assert out == "////Results from run_check_required on collection= wf_TimeSeries\nCollection found 3 documents with type inconsistencies\nOffending keys and number found follow:\n" + mm_keys_str + "\nCollection found 1 documents with required keys that were not defined\nOffending keys and number found follow:\n" + undef_keys_str + "\n" # default schema_check test dbverify.main(['test_dbverify', '-t', 'schema_check']) out, err = capfd.readouterr() mmkeys = {'npts': 2, 'delta': 1} mm_keys_str = json_util.dumps(mmkeys, indent=2) undef_keys = {'extra1': 1, 'extra2': 2} undef_keys_str = json_util.dumps(undef_keys, indent=2) assert out == "check_attribute_types result for collection= wf_TimeSeries\nCollection found 3 documents with type inconsistencies\nOffending keys and number found follow:\n" + mm_keys_str + "\nCollection found 3 documents with keys not defined in the schema\nOffending keys and number found follow:\n" + undef_keys_str + "\n"
def mspass_func_wrapper(func, data, *args, object_history=False, alg_id=None, alg_name=None, dryrun=False, inplace_return=False, function_return_key=None, **kwargs): """ Decorator wrapper to adapt a simple function to the mspass parallel processing framework. This function serves as a decorator wrapper, which is widely used in mspasspy library. It executes the target function on input data. Data are restricted to be mspasspy objects. It also preserves the processing history and error logs into the mspasspy objects. By wrapping your function using this decorator, you can save some workload. Runtime error won't be raised in order to be efficient in map-reduce operations. MspassError with a severity Fatal will be raised, others won't be raised. :param func: target function :param data: input data, only mspasspy data objects are accepted, i.e. TimeSeries, Seismogram, Ensemble. :param args: extra arguments :param object_history: True to preserve this processing history in the data object, False not to. object_history and alg_id are intimately related and control how object level history is handled. Object level history is disabled by default for efficiency. If object_history is set True and the string passed as alg_id is defined (not None which is the default) each Seismogram or TimeSeries object will attempt to save the history through a new_map operation. If the history chain is empty this will silently generate an error posted to error log on each object. :param alg_id: alg_id is a unique id to record the usage of func while preserving the history. :type alg_id: :class:`bson.objectid.ObjectId` :param alg_name: alg_name is the name the func we are gonna save while preserving the history. :type alg_name: :class:`str` :param dryrun: True for dry-run, the algorithm is not run, but the arguments used in this wrapper will be checked. This is useful for pre-run checks of a large job to validate a workflow. Errors generate exceptions but the function returns before attempting any calculations. :param inplace_return: when func is an in-place function that doesn't return anything, but you want to return the origin data (for example, in map-reduce), set inplace_return as true. :param function_return_key: Some functions one might want to wrap with this decorator return something that is appropriate to save as Metadata. If so, use this argument to define the key used to set that field in the data that is returned. This feature should normally be considered as a way to wrap an existing algorithm that you do not wish to alter, but which returns something useful. In principle that return can be almost anything, but we recommend this feature be limited to only simple types (i.e. int, float, etc.). The decorator makes no type checks so the caller is responsible for assuring what is posted will not cause downstream problems. The default for this parameter is None, which is taken to mean any return of the wrapped function will be ignored. Note that when function_return_key is anything but None, it is assumed the returned object is the (usually modified) data object. :param kwargs: extra kv arguments :return: origin data or the output of func """ if not isinstance( data, (Seismogram, TimeSeries, SeismogramEnsemble, TimeSeriesEnsemble)): raise TypeError( "mspass_func_wrapper only accepts mspass object as data input") # if not defined if not alg_name: alg_name = func.__name__ if object_history and alg_id is None: raise ValueError(alg_name + ": object_history was true but alg_id not defined") if dryrun: return "OK" if is_input_dead(data): return data try: res = func(data, *args, **kwargs) if object_history: logging_helper.info(data, alg_id, alg_name) if function_return_key is not None: if isinstance(function_return_key, str): data[function_return_key] = res else: data.elog.log_error( alg_name, "Illegal type received for function_return_key argument=" + str(type(function_return_key)) + "\nReturn value not saved in Metadata", ErrorSeverity.Complaint, ) if not inplace_return: data.elog.log_error( alg_name, "Inconsistent arguments; inplace_return was set False and function_return_key was not None.\nAssuming inplace_return == True is correct", ErrorSeverity.Complaint, ) return data elif inplace_return: return data else: return res except RuntimeError as err: if isinstance(data, (Seismogram, TimeSeries)): data.elog.log_error(alg_name, str(err), ErrorSeverity.Invalid) else: logging_helper.ensemble_error(data, alg_name, err, ErrorSeverity.Invalid) # some unexpected error happen, if inplace_return is true, we may want to return the original data if inplace_return: return data except MsPASSError as ex: if ex.severity == ErrorSeverity.Fatal: raise if isinstance(data, (Seismogram, TimeSeries)): data.elog.log_error(alg_name, ex.message, ex.severity) else: logging_helper.ensemble_error(data, alg_name, ex.message, ex.severity) # some unexpected error happen, if inplace_return is true, we may want to return the original data if inplace_return: return data
def test_main(self, capfd): self.db["wf_TimeSeries"].delete_many({}) ts1 = copy.deepcopy(self.test_ts) ts2 = copy.deepcopy(self.test_ts) ts3 = copy.deepcopy(self.test_ts) logging_helper.info(ts1, "1", "deepcopy") logging_helper.info(ts2, "1", "deepcopy") logging_helper.info(ts3, "1", "deepcopy") # fix types ts1["npts"] = "123" ts1["extra1"] = "extra1" ts2["delta"] = "3" ts2["extra2"] = "extra2" ts3["npts"] = "xyz" ts3["extra2"] = "extra2" # wrong normalized key ts1["site_id"] = ObjectId() ts2.erase("source_id") save_res_code = self.db.save_data( ts1, mode="promiscuous", storage_mode="gridfs" ) save_res_code = self.db.save_data( ts2, mode="promiscuous", storage_mode="gridfs" ) # erase required attributes save_res_code = self.db.save_data( ts3, mode="promiscuous", storage_mode="gridfs", exclude_keys=["starttime"] ) doc1 = self.db["wf_TimeSeries"].find_one({"_id": ts1["_id"]}) doc2 = self.db["wf_TimeSeries"].find_one({"_id": ts2["_id"]}) doc3 = self.db["wf_TimeSeries"].find_one({"_id": ts3["_id"]}) doc1_str = json_util.dumps(doc1, indent=2) doc2_str = json_util.dumps(doc2, indent=2) doc3_str = json_util.dumps(doc3, indent=2) # default normalization test dbverify.main(["test_dbverify", "-t", "normalization"]) out, err = capfd.readouterr() assert ( out == "normalization test on normalized key= site_id found problems\nFound broken links in 1 documents checked\nNote error count limit= 1000\nIf the count is the same it means all data probably contain missing cross referencing ids\nRun in verbose mode to find out more information you will need to fix the problem\ncheck_links found no broken links with normalized key= channel_id\ncheck_links found no broken links with normalized key= source_id\n" ) # more than 1 collection to test dbverify.main( ["test_dbverify", "-t", "normalization", "-c", "wf_TimeSeries", "site"] ) out, err = capfd.readouterr() assert ( out == "WARNING: normalization test can only be run on one collection at a time\nParsed a list with the following contents: ['wf_TimeSeries', 'site']\nRunning test on the first item in that list\nnormalization test on normalized key= site_id found problems\nFound broken links in 1 documents checked\nNote error count limit= 1000\nIf the count is the same it means all data probably contain missing cross referencing ids\nRun in verbose mode to find out more information you will need to fix the problem\ncheck_links found no broken links with normalized key= channel_id\ncheck_links found no broken links with normalized key= source_id\n" ) # verbose mode dbverify.main(["test_dbverify", "-t", "normalization", "-v"]) out, err = capfd.readouterr() assert ( out == "check_link found the following docs in wf_TimeSeries with broken links to site_id\n////////////////Doc number 1 with error///////////////\n" + doc1_str + "\n////////////////////////////////////////////////////////\ncheck_links found no undefined linking key to normalized key= site_id\ncheck_links found no broken links with normalized key= channel_id\ncheck_links found no undefined linking key to normalized key= channel_id\ncheck_links found no broken links with normalized key= source_id\ncheck_link found the following docs in wf_TimeSeries with undefined link keys to source_id\n////////////////Doc number 1 with error///////////////\n" + doc2_str + "\n////////////////////////////////////////////////////////\n" ) # default required test dbverify.main(["test_dbverify", "-t", "required"]) out, err = capfd.readouterr() mmkeys = {"npts": 2, "delta": 1} mm_keys_str = json_util.dumps(mmkeys, indent=2) undef_keys = {"starttime": 1} undef_keys_str = json_util.dumps(undef_keys, indent=2) assert ( out == "////Results from run_check_required on collection= wf_TimeSeries\nCollection found 3 documents with type inconsistencies\nOffending keys and number found follow:\n" + mm_keys_str + "\nCollection found 1 documents with required keys that were not defined\nOffending keys and number found follow:\n" + undef_keys_str + "\n" ) # default schema_check test dbverify.main(["test_dbverify", "-t", "schema_check"]) out, err = capfd.readouterr() mmkeys = {"npts": 2, "delta": 1} mm_keys_str = json_util.dumps(mmkeys, indent=2) undef_keys = {"extra1": 1, "extra2": 2} undef_keys_str = json_util.dumps(undef_keys, indent=2) assert ( out == "check_attribute_types result for collection= wf_TimeSeries\nCollection found 3 documents with type inconsistencies\nOffending keys and number found follow:\n" + mm_keys_str + "\nCollection found 3 documents with keys not defined in the schema\nOffending keys and number found follow:\n" + undef_keys_str + "\n" )