def __setitem__(self, key, value): if isinstance(value, collections.OrderedDict): # First check if the input value is already a GTree. # if so, we don't need to construct it. if isinstance(value, ParameterGTree): branch = value else: branch = ParameterGTree(value) if key in self.get_leaf_keys(): raise MsPASSError( "[Warning] There already exists a leaf in this GTree with key '{leaf_key}', Please check again.".format( leaf_key=key ) ) collections.OrderedDict.__setitem__(self, key, branch) else: if key in self.get_branch_keys(): raise MsPASSError( "[Warning] There already exists a branch in this GTree with key '{branch_key}', Please check again.".format( branch_key=key ) ) collections.OrderedDict.__setitem__(self, key, value)
def bury_the_dead(self, d, save_history=True): """ Clear the contents of an ensemble and optionally save the history and error log of the dead. Return the cleaned ensmble. """ if not (isinstance(d, TimeSeriesEnsemble) or isinstance(d, SeismogramEnsemble)): raise MsPASSError( "Undertaker.bury_the_dead", "Illegal input type - only works with ensemble objects", ErrorSeverity.Invalid, ) # This is a pybind11 wrapper not defined in C++ but useful here ensmd = d._get_ensemble_md() nlive = 0 for x in d.member: if x.live: nlive += 1 if isinstance(d, TimeSeriesEnsemble): newens = TimeSeriesEnsemble(ensmd, nlive) elif isinstance(d, SeismogramEnsemble): newens = SeismogramEnsemble(ensmd, nlive) else: raise MsPASSError( "Undertaker.bury_the_dead", "Coding error - newens constructor section has invalid type\nThat cannot happen unless the original code was incorrectly changed", ErrorSeverity.Invalid, ) for x in d.member: if x.live: newens.member.append(x) else: if save_history: self._save_elog(d.id, d.elog) return newens
def test_MsPASSError(): try: x = MetadataDefinitions('foo') except MsPASSError as err: assert err.message == 'bad file' assert err.severity == ErrorSeverity.Invalid try: raise MsPASSError('test error1', ErrorSeverity.Informational) except MsPASSError as err: assert err.message == 'test error1' assert err.severity == ErrorSeverity.Informational try: raise MsPASSError('test error2', "Suspect") except MsPASSError as err: assert err.message == 'test error2' assert err.severity == ErrorSeverity.Suspect try: raise MsPASSError('test error3', 123) except MsPASSError as err: assert err.message == 'test error3' assert err.severity == ErrorSeverity.Fatal try: raise MsPASSError("test error4") except MsPASSError as err: assert err.message == 'test error4' assert err.severity == ErrorSeverity.Fatal
def test_MsPASSError(): try: x = MetadataDefinitions("foo") except MsPASSError as err: assert err.message == "bad file" assert err.severity == ErrorSeverity.Invalid try: raise MsPASSError("test error1", ErrorSeverity.Informational) except MsPASSError as err: assert err.message == "test error1" assert err.severity == ErrorSeverity.Informational try: raise MsPASSError("test error2", "Suspect") except MsPASSError as err: assert err.message == "test error2" assert err.severity == ErrorSeverity.Suspect try: raise MsPASSError("test error3", 123) except MsPASSError as err: assert err.message == "test error3" assert err.severity == ErrorSeverity.Fatal try: raise MsPASSError("test error4") except MsPASSError as err: assert err.message == "test error4" assert err.severity == ErrorSeverity.Fatal
def __init__(self, schema_file=None): self._attr_dict = {} if schema_file is None: if 'MSPASS_HOME' in os.environ: schema_file = os.path.abspath(os.environ['MSPASS_HOME']) + '/data/yaml/mspass.yaml' else: schema_file = os.path.abspath(os.path.dirname(__file__) + '/../data/yaml/mspass.yaml') elif not os.path.isfile(schema_file): if 'MSPASS_HOME' in os.environ: schema_file = os.path.join(os.path.abspath(os.environ['MSPASS_HOME']), 'data/yaml', schema_file) else: schema_file = os.path.abspath(os.path.join(os.path.dirname(__file__), '../data/yaml', schema_file)) try: with open(schema_file, 'r') as stream: schema_dic = yaml.safe_load(stream) except yaml.YAMLError as e: raise MsPASSError('Cannot parse schema definition file: ' + schema_file, 'Fatal') from e except EnvironmentError as e: raise MsPASSError('Cannot open schema definition file: ' + schema_file, 'Fatal') from e try: _check_format(schema_dic) except schema.SchemaError as e: raise MsPASSError('The schema definition is not valid', 'Fatal') from e self._raw = schema_dic
def force_net(db, sta=None, net=None): """ Forces all entries in arrival collection matching input station code sta to input value of parameter net. This is the most brute force solution to set a net code, but is often the right tool. Kind of like every toolbox needs a hammer. :param db: Database handle (function hits only arrival collection) :param sta: station to set :param net: network code to set sta entries to :return: number or documents set. """ if sta == None or net == None: raise MsPASSError( "force_net (usage error): missing required sta and net argument", "Fatal" ) dbarr = db.arrival query = {"sta": sta} curs = dbarr.find(query) n = 0 for doc in curs: oid = doc["_id"] dbarr.update_one({"_id": oid}, {"$set": {"net": net}}) n += 1 return n
def str_to_parameters_dict(parameter_str): """ Parse the parameter string defined by user into an ordered dict. The input str should be in the format like "a, b, c=d, e=f, ..." :param parameter_str: a parameter string defined by user :return: An OrderedDict of parameters and arguments. """ parameters_dict = collections.OrderedDict() pairs = parameter_str.replace(" ", "").split(",") unkeyword_index = 0 for pair in pairs: k_v = pair.split("=") # unkeyworded para if len(k_v) == 1: key = "arg_{arg_index:d}".format(arg_index=unkeyword_index) value = k_v[0] unkeyword_index += 1 if len(k_v) == 2: key = k_v[0] value = k_v[1] if len(k_v) > 2: raise MsPASSError( "Wrong parameter string format: " + parameter_str + " Fatal" ) parameters_dict[key] = value return parameters_dict
def _all_members_match(ens, key): """ This is a helper function for below. I scans ens to assure all members of the ensemble have the same value for the requested key. It uses the python operator == for testing. That can fail for a variety of reasons the "match" may be overly restrictive for some types of data linked to key. :param ens: ensemble data to scan. Function will throw a MsPASS error if the data this symbol is associated with is not a mspass ensemble object. :param key: key whose values are to be tested for all members of ens. :return: True of all members match, false if there are any differences. Note if a key is not defined in a live member the result will be false. Dead data are ignored. """ if isinstance(ens, TimeSeriesEnsemble) or isinstance( ens, SeismogramEnsemble): nlive = 0 for d in ens.member: if d.live: if nlive == 0: val0 = d[key] nlive += 1 else: if not key in d: return False val = d[key] if val0 != val: return False nlive += 1 return True else: raise MsPASSError( "_all_members_match: input is not a mspass ensemble object", "Invalid")
def set_database_client(self, database_host, database_port=None): """ Set a database client by database_host(and database_port) :param database_host: the host address of database client :type database_host: :class:`str` :param database_port: the port of database client :type database_port: :class:`str` """ database_host_has_port = False database_address = database_host # check if port is already in the database_host address if ":" in database_host: database_host_has_port = True # add port if not database_host_has_port and database_port: database_address += ":" + database_port # sanity check temp_db_client = self._db_client try: self._db_client = DBClient(database_address) self._db_client.server_info() except Exception as err: # restore the _db_client self._db_client = temp_db_client raise MsPASSError( "Runntime error: cannot create a database client with: " + database_address, "Fatal", )
def Pf2AttributeNameTbl(pf, tag="attributes"): """ This function will parse a pf file to extract a tbl with a specific key and return a data structure that defines the names and types of each column in the input file. The structure returned is a tuple with three components: 1 (index 0) python array of attribute names in the original tbl order This is used to parse the text file so the order matters a lot. 2 (index 1) parallel array of type names for each attribute. These are actual python type objects that can be used as the second arg of isinstance. 3 (index 2) python dictionary keyed by name field that defines what a null value is for this attribute. :param pf: AntelopePf object to be parsed :param tag: &Tbl tag for section of pf to be parsed. """ tbl = pf.get_tbl(tag) names = [] dtypes = [] nullvalues = [] i = 0 for line in tbl: temp = line.split() names.append(temp[0]) typenamein = temp[1].lower() # allows upper or lower case in names if typenamein == "string" or typenamein == "str": typ = type("foobar") nullval = temp[2] elif typenamein == "integer" or typenamein == "int" or typenamein == "long": typ = type(1) nullval = int(temp[2]) elif (typenamein == "float" or typenamein == "double" or typenamein == "real" or typenamein == "epochtime"): typ = type(1.0) nullval = float(temp[2]) elif typenamein == "bool" or typenamein == "boolean": typ = type(True) nullval = bool(temp[2]) else: raise MsPASSError( "parse_attribute_name_tbl: unsupported data type file=" + typenamein, "Fatal", ) dtypes.append(typ) nullvalues.append(nullval) i += 1 nulls = dict() i = 0 for k in names: nulls[k] = nullvalues[i] i += 1 return tuple([names, dtypes, nulls])
def dummy_reduce_func_mspasserror(data1, data2, *args, object_history=False, alg_id=None, dryrun=False, **kwargs): raise MsPASSError("test", ErrorSeverity.Fatal)
def load_source_data_by_id(db, mspass_object): """ Prototype function to load source data to any MsPASS data object based on the normalization key. That keys is frozen in this version as "source_id" but may be changed to force constraints by the mspasspy schema classes. Handling of Ensembles and atomic objects are different conceptually but in fact do exactly the same thing. That is, in all cases the algorithm queries the input object for the key "source_id". If that fails it returns an error. Otherwise, it finds the associated document in the source collection. It then posts a frozen set of metadata to mspass_object. If that is an ensemble it is posted to the ensemble metadata area. If it is an atomic object it gets posted to the atomic object's metadata area. """ dbsource = db.source try: if not "source_id" in mspass_object: raise MsPASSError( "load_source_data_by_id", "required attribute source_id not in ensemble metadata", ErrorSeverity.Invalid, ) source_id = mspass_object["source_id"] # The way we currently do this source_id eithe rmaches one documentn in # source or none. Hence, we can jus use a find_one query srcrec = dbsource.find_one({"_id": source_id}) # note find_one returns a None if there is no match. Point this out # because if we used find we would use test size of return and use # next to get the data. Find_one return is easier but depends upon # the uniqueness assumption if srcrec == None: raise MsPASSError( "load_source_data", "no match found in source collection for source_id=" + source_id, ErrorSeverity.Invalid, ) else: mspass_object["source_lat"] = srcrec["lat"] mspass_object["source_lon"] = srcrec["lon"] mspass_object["source_depth"] = srcrec["depth"] mspass_object["source_time"] = srcrec["time"] return mspass_object except: print("something threw an unexpected exception")
def __getitem__(self, key): if key not in self: raise MsPASSError( "[Warning] The key provided ({branch_key}) is not in this GTree, Please check again.".format( branch_key=key ) ) return collections.OrderedDict.__getitem__(self, key)
def bring_out_your_dead(self, d, bury=False): """ Seperate an ensemble into live and dead members. :param d: must be either a TimeSeriesEnsemble or SeismogramEnsemble of data to be processed. :param bury: if true the bury_the_dead method will be called on the ensemble of dead data before returning :return: python list with two elements. 0 is ensemble with live data and 1 is ensemble with dead data. :rtype: python list with two components """ if not (isinstance(d, TimeSeriesEnsemble) or isinstance(d, SeismogramEnsemble)): raise MsPASSError( "Undertaker.bring_out_your_dead", "Illegal input type - only works with ensemble objects", ErrorSeverity.Invalid, ) # This is a pybind11 wrapper not defined in C++ but useful here ensmd = d._get_ensemble_md() nlive = 0 for x in d.member: if x.live: nlive += 1 ndead = len(d.member) - nlive if isinstance(d, TimeSeriesEnsemble): newens = TimeSeriesEnsemble(ensmd, nlive) bodies = TimeSeriesEnsemble(ensmd, ndead) elif isinstance(d, SeismogramEnsemble): newens = SeismogramEnsemble(ensmd, nlive) bodies = SeismogramEnsemble(ensmd, ndead) else: raise MsPASSError( "Undertaker.bring_out_your_dead", "Coding error - newens constructor section has invalid type\nThat cannot happen unless the original code was incorrectly changed", ErrorSeverity.Invalid, ) for x in d.member: if x.live: newens.member.append(x) else: bodies.member.append(x) if bury: self._save_elog(d.id, d.elog) return [newens, bodies]
def apply_aliases(self, md, alias): """ Apply a set of aliases to a data object. This method will change the unique keys of a data object into aliases. The alias argument can either be a path to a valid yaml file of key:alias pairs or a dict. If the "key" is an alias itself, it will be converted to its corresponding unique name before being used to change to the alias. It will also add the applied alias to the schema's internal alias container such that the same schema object can be used to convert the alias back. :param md: Data object to be altered. Normally a :class:`mspasspy.ccore.seismic.Seismogram` or :class:`mspasspy.ccore.seismic.TimeSeries` but can be a raw :class:`mspasspy.ccore.utility.Metadata`. :type md: :class:`mspasspy.ccore.utility.Metadata` :param alias: a yaml file or a dict that have pairs of key:alias :type alias: dict/str """ alias_dic = alias if isinstance(alias, str) and os.path.isfile(alias): try: with open(alias, "r") as stream: alias_dic = yaml.safe_load(stream) except yaml.YAMLError as e: raise MsPASSError( "Cannot parse alias definition file: " + alias, "Fatal") from e except EnvironmentError as e: raise MsPASSError( "Cannot open alias definition file: " + alias, "Fatal") from e if isinstance(alias_dic, dict): for k, a in alias_dic.items(): unique_k = self.unique_name(k) self.add_alias(unique_k, a) md.change_key(unique_k, a) else: raise MsPASSError( "The alias argument of type {} is not recognized, it should be either a {} path or a {}" .format(type(alias), str, dict), "Fatal", )
def get_branch(self, key): """ Extract the contents of a named branch. Returns a copy of the tree with the associated key from the branch name upward. The tree returned will have the root of the tree set as current. """ if key not in self.get_branch_keys(): raise MsPASSError("[Error] Wrong Key, Please check your input key again.") branch = self[key] return branch
def get_leaf(self, key): """ Returns a copy of the key-value pair defined by key. This function only search for the key in this layer, and won't return value stored in higher levels. To search in the entire tree, use "get". """ if key not in self.get_leaf_keys(): raise MsPASSError("[Error] Wrong Key, Please check your input key again.") leaf = self[key] return leaf
def get_required(collection): if collection=='site': return ['lat','lon','elev'] elif collection=='channel': return ['lat','lon','elev','hang','vang'] elif collection=='source': return ['lat','lon','depth','time'] elif collection=='wf_TimeSeries' or collection=='wf_Seismogram': return ['npts','delta','starttime'] else: raise MsPASSError('No data on required attributes for collection=' +collection,'Fatal')
def bundle_seed_data(ensemble): """ This function can be used to take an (unordered) input ensemble of TimeSeries objects generated from miniseed data and produce an output ensemble of Seismograms produced by bundles linked to the seed name codes net, sta, chan, and loc. An implicit assumption of the algorithm used here is that the data are a variant of a shot gather and the input ensemble defines one net:sta:chan:loc:time_interval for each record that is to be bundled. It can only properly handle pure duplicates for a given net:sta:chan:loc combination. (i.e. if the input has the same TimeSeries defined by net:sta:chan:loc AND a common start and end time). Data with gaps broken into multiple net:sta:chan:loc TimeSeries with different start and end times will produce incomplete results. That is, Seismograms in the output associated with such inputs will either be killed with an associated error log entry or in the best case truncated to the overlap range of one of the segments with the gap(s) between. Irregular start times of any set of TimeSeries forming a single bundle are subject to the same truncation or discard rules described in the related function Bundle3C. :param ensemble: is the input ensemble of TimeSeries to be processed. :return: ensemble of Seismogram objects made by bundling input data :rtype: SeismogramEnsemble :exception: Can throw a MsPASSError for a number of conditions. Caller should be enclosed in a handler if run on a large data set. """ if not isinstance(ensemble, TimeSeriesEnsemble): raise MsPASSError( "bundle_seed_data: illegal input - must be a TimeSeriesEnsemble", ErrorSeverity.Invalid) try: d3c = _bundle_seed_data(ensemble) except Exception as err: raise MsPASSError( '_bundle_seed_data threw an exception - see more messages below', ErrorSeverity.Invalid) from err return d3c
def reference(self, key): """ Return the collection name that a key is referenced from. :param key: the name of the key :type key: str :return: the name of the collection :rtype: str :raises mspasspy.ccore.utility.MsPASSError: if the key is not defined """ if key not in self._main_dic: raise MsPASSError(key + ' is not defined', 'Invalid') return self._collection_str if 'reference' not in self._main_dic[key] else self._main_dic[key]['reference']
def get_required(collection): if collection == "site": return ["lat", "lon", "elev"] elif collection == "channel": return ["lat", "lon", "elev", "hang", "vang"] elif collection == "source": return ["lat", "lon", "depth", "time"] elif collection == "wf_TimeSeries" or collection == "wf_Seismogram": return ["npts", "delta", "starttime"] else: raise MsPASSError( "No data on required attributes for collection=" + collection, "Fatal")
def concept(self, key): """ Return a description of the concept this attribute defines. :param key: The name that defines the attribute of interest :type key: str :return: A string with a terse description of the concept this attribute defines :rtype: str :raises mspasspy.ccore.utility.MsPASSError: if concept is not defined """ if "concept" not in self._main_dic[key]: raise MsPASSError("concept is not defined for " + key, "Complaint") return self._main_dic[key]["concept"]
def readonly(self, key): """ Check if an attribute is marked readonly. :param key: key to be tested :type key: str :return: `True` if the key is readonly or its readonly attribute is not defined, else return `False` :rtype: bool :raises mspasspy.ccore.utility.MsPASSError: if the key is not defined """ if key not in self._main_dic: raise MsPASSError(key + ' is not defined', 'Invalid') return True if 'readonly' not in self._main_dic[key] else self._main_dic[key]['readonly']
def prune(self, key): """ Remove a branch or leaf defined by key from self. Return a copy of the branch/leaf pruned in the process (like get_branch/get_leaf but self is altered) """ if key not in self: raise MsPASSError("[Error] Wrong Key, Please check your input key again.") if key in self.get_leaf_keys(): ret_val = self.get_leaf(key) if key in self.get_branch_keys(): ret_val = self.get_branch(key) collections.OrderedDict.popitem(self, key) return ret_val
def put(self, key, value, separator="."): """ putter with same behavior for compound keys defined for get method. A put would create a new branch it implies if that branch is not already present. Same as the setter function, users can also use index to put new data in GTree here. Please note that when put data using indexes, new branches won't be created automatically, and users should add the intermediate branches themselves. """ key_list = key if isinstance(key, str): key_list = key.split(separator) if len(key_list) == 0: raise MsPASSError("The key is empty, please check again.") root = self for i in range(len(key_list) - 1): branch_level = key_list[i] if branch_level in root.get_leaf_keys(): raise MsPASSError( "[Error] Invalid compound Key, there is a leaf with the same name in level " + branch_level + ". Please check your input key again." ) if branch_level not in root.get_branch_keys(): root[branch_level] = ParameterGTree() root = root.get_branch(branch_level) leaf_key = key_list[-1] if leaf_key in root.get_branch_keys(): raise MsPASSError( "[Error] Invalid compound Key, there is a branch with the same name in " + leaf_key + ". Please check your input key again." ) root[leaf_key] = value
def load_site_data(db, ens): """ Loads site data into ens. Similar to load_source_data but uses a diffrent match: net,sta, time matching startdate->enddate. Mark members dead and post an elog message if the site coordinates are not found. """ dbsite = db.site try: for d in ens.member: if d.dead(): continue t0 = d['starttime'] net = d['net'] sta = d['sta'] query = { 'net': { '$eq': net }, 'sta': { '$eq': sta }, 'starttime': { '$lt': t0 }, 'endtime': { '$gt': t0 } } n = dbsite.count_documents(query) if n == 0: d.kill() d.elog.log_error( 'load_site_data', 'no match found in site collection for net=' + net + ' sta=' + sta + ' for this event', ErrorSeverity.Invalid) else: siterec = dbsite.find_one(query) d['site_lat'] = siterec['lat'] d['site_lon'] = siterec['lon'] d['site_elev'] = siterec['elev'] d['site_id'] = siterec['site_id'] if n > 1: message = "Multiple ({n}) matches found for net={net} and sta={sta} with reference time {t0}".format( n=n, net=net, sta=sta, t0=t0) d.elog.log_error('load_site_data', message, ErrorSeverity.Complaint) return ens except Exception as err: raise MsPASSError('Something threw an unexpected exception', ErrorSeverity.Invalid) from err
def load_site_data(db, ens): """ Loads site data into ens. Similar to load_source_data but uses a diffrent match: net,sta, time matching startdate->enddate. Mark members dead and post an elog message if the site coordinates are not found. """ dbsite = db.site try: for d in ens.member: if d.dead(): continue t0 = d["starttime"] net = d["net"] sta = d["sta"] query = { "net": {"$eq": net}, "sta": {"$eq": sta}, "starttime": {"$lt": t0}, "endtime": {"$gt": t0}, } n = dbsite.count_documents(query) if n == 0: d.kill() d.elog.log_error( "load_site_data", "no match found in site collection for net=" + net + " sta=" + sta + " for this event", ErrorSeverity.Invalid, ) else: siterec = dbsite.find_one(query) d["site_lat"] = siterec["lat"] d["site_lon"] = siterec["lon"] d["site_elev"] = siterec["elev"] d["site_id"] = siterec["site_id"] if n > 1: message = "Multiple ({n}) matches found for net={net} and sta={sta} with reference time {t0}".format( n=n, net=net, sta=sta, t0=t0 ) d.elog.log_error("load_site_data", message, ErrorSeverity.Complaint) return ens except Exception as err: raise MsPASSError( "Something threw an unexpected exception", ErrorSeverity.Invalid ) from err
def __init__(self, schema_file=None): self._attr_dict = {} if schema_file is None: if "MSPASS_HOME" in os.environ: schema_file = (os.path.abspath(os.environ["MSPASS_HOME"]) + "/data/yaml/mspass.yaml") else: schema_file = os.path.abspath( os.path.dirname(__file__) + "/../data/yaml/mspass.yaml") elif not os.path.isfile(schema_file): if "MSPASS_HOME" in os.environ: schema_file = os.path.join( os.path.abspath(os.environ["MSPASS_HOME"]), "data/yaml", schema_file) else: schema_file = os.path.abspath( os.path.join(os.path.dirname(__file__), "../data/yaml", schema_file)) try: with open(schema_file, "r") as stream: schema_dic = yaml.safe_load(stream) except yaml.YAMLError as e: raise MsPASSError( "Cannot parse schema definition file: " + schema_file, "Fatal") from e except EnvironmentError as e: raise MsPASSError( "Cannot open schema definition file: " + schema_file, "Fatal") from e try: _check_format(schema_dic) except schema.SchemaError as e: raise MsPASSError("The schema definition is not valid", "Fatal") from e self._raw = schema_dic
def test_ErrorLogger(): errlog = ErrorLogger() assert errlog.log_error("1", "2", ErrorSeverity(3)) == 1 assert errlog[0].algorithm == "1" assert errlog[0].message == "2" assert errlog[0].badness == ErrorSeverity.Complaint assert errlog[0].job_id == errlog.get_job_id() err = MsPASSError("foo", ErrorSeverity.Fatal) errlog.log_error(err) assert errlog[1].algorithm == "MsPASSError" assert errlog[1].message == "foo" assert errlog[1].badness == ErrorSeverity.Fatal assert errlog[1].job_id == errlog.get_job_id() with pytest.raises(TypeError, match="'int' is given"): errlog.log_error(123)
def set_writeable(self, key): """ Force an attribute to be writeable. Normally some parameters are marked readonly on construction to avoid corrupting the database with inconsistent data defined with a common key. (e.g. sta) This method overrides such definitions for any key so marked. This method should be used with caution as it could have unintended side effects. :param key: the key for the attribute with properties to be redefined :type key: str :raises mspasspy.ccore.utility.MsPASSError: if the key is not defined """ if key not in self._main_dic: raise MsPASSError(key + " is not defined", "Invalid") self._main_dic[key]["readonly"] = False