def test_Pf2AttributeNameTbl(): pf = AntelopePf("python/tests/data/test_import.pf") attributes = Pf2AttributeNameTbl(pf, tag="wfprocess") names = attributes[0] types = attributes[1] nullvals = attributes[2] assert names == [ "pwfid", "starttime", "endtime", "time_standard", "dir", "dfile", "foff", "dtype", "samprate", "unkwown", "algorithm", "lddate", ] assert len(types) == 12 assert len(nullvals) == 12 assert types == [ int, float, float, str, str, str, int, str, float, int, str, float ] assert nullvals["pwfid"] == -1
def build_helper(): # A helper function to build a GTree instance for test. pfPath = "./data/pf/RFdeconProcessor.pf" pf = AntelopePf(pfPath) pf_dict = AntelopePf2dict(pf) parameter_dict = collections.OrderedDict() parameter_dict["alg"] = "LeastSquares" parameter_dict["pf"] = pf_dict parameter_dict["object_history"] = "True" gTree = ParameterGTree(parameter_dict) return gTree
def __init__(self, alg="LeastSquares", pf="RFdeconProcessor.pf"): self.algorithm = alg pfhandle = AntelopePf(pf) if (self.algorithm == "LeastSquares"): self.md = pfhandle.get_branch('LeastSquare') self.processor = decon.LeastSquareDecon(self.md) self.__uses_noise = False elif (alg == "WaterLevel"): self.md = pfhandle.get_branch('WaterLevel') self.processor = decon.WaterLevelDecon(self.md) self.__uses_noise = False elif (alg == "MultiTaperXcor"): self.md = pfhandle.get_branch('MultiTaperXcor') self.processor = decon.MultiTaperXcorDecon(self.md) self.__uses_noise = True elif (alg == "MultiTaperSpecDiv"): self.md = pfhandle.get_branch('MultiTaperSpecDiv') self.processor = decon.MultiTaperSpecDivDecon(self.md) self.__uses_noise = True elif (alg == "GeneralizedIterative"): raise RuntimeError( "Generalized Iterative method not yet supported") else: raise RuntimeError("Illegal value for alg=" + alg) # although loaddata and loadwavelet methods don't always require # it we cache the analysis time window for efficiency tws = self.md.get_double("deconvolution_data_window_start") twe = self.md.get_double("deconvolution_data_window_end") self.__dwin = mspass.TimeWindow(tws, twe) if (self.__uses_noise): tws = self.md.get_double("noise_window_start") twe = self.md.get_double("noise_window_end") self.__nwin = mspass.TimeWindow(tws, twe) else: self.__nwin = mspass.TimeWindow # always initialize even if not used
def test_Textfile2Dataframe(): pf = AntelopePf("python/tests/data/test_import.pf") attributes = Pf2AttributeNameTbl(pf, tag="wfprocess") names = attributes[0] textfile = "python/tests/data/testdb.wfprocess" for p in [True, False]: df = Textfile2Dataframe(textfile, attribute_names=names, parallel=p) assert df.shape[0] == 652 assert df.shape[1] == 12 assert df.iloc[1]["pwfid"] == 3103 assert np.isclose(df.iloc[1]["starttime"], 1577912954.62975) assert np.isclose(df.iloc[1]["endtime"], 1577913089.7297499) assert df.iloc[1]["time_standard"] == "a" # Test setting null values df = Textfile2Dataframe(textfile, attribute_names=names, parallel=p) assert df.shape[0] == 652 assert df.shape[1] == 12 assert df.iloc[0]["pwfid"] == 3102 assert np.isclose(df.iloc[0]["starttime"], 1577912967.53105) assert np.isclose(df.iloc[0]["endtime"], 1577913102.63105) assert df.iloc[0]["time_standard"] == "a" assert df.iloc[0]["foff"] == 0 # Test turning off one_to_one df = Textfile2Dataframe(textfile, attribute_names=names, one_to_one=False, parallel=p) assert df.shape[0] == 651 assert df.shape[1] == 12 # Test add column df = Textfile2Dataframe(textfile, attribute_names=names, parallel=p, insert_column={"test_col": 1}) assert df.shape[0] == 652 assert df.shape[1] == 13 assert df.at[0, "test_col"] == 1
def check_and_parse_file(arg): if ( isinstance(arg, os.PathLike) or isinstance(arg, str) or isinstance(arg, bytes) ) and os.path.isfile(arg): file_path = str(arg) if file_path.endswith(".pf"): pf = AntelopePf(file_path) # Convert PF into an OrderedDict to coordinate with the GTree pf_value = AntelopePf2dict(pf) return pf_value elif file_path.endswith(".yaml"): with open(file_path, "r") as yaml_file: yaml_value = yaml.safe_load(yaml_file) return yaml_value # Currently only support pf and yaml else: raise MsPASSError("Cannot handle file: " + file_path + "Fatal") return arg
def __init__(self, alg="LeastSquares", pf="RFdeconProcessor.pf"): self.algorithm = alg self.pf = pf pfhandle = AntelopePf(pf) if self.algorithm == "LeastSquares": self.md = pfhandle.get_branch("LeastSquare") self.__uses_noise = False elif alg == "WaterLevel": self.md = pfhandle.get_branch("WaterLevel") self.__uses_noise = False elif alg == "MultiTaperXcor": self.md = pfhandle.get_branch("MultiTaperXcor") self.__uses_noise = True elif alg == "MultiTaperSpecDiv": self.md = pfhandle.get_branch("MultiTaperSpecDiv") self.__uses_noise = True elif alg == "GeneralizedIterative": raise RuntimeError( "Generalized Iterative method not yet supported") else: raise RuntimeError("Illegal value for alg=" + alg) # below is needed because AntelopePf cannot be serialized. self.md = Metadata(self.md)
def test_mspass_map_with_filePath(self, spark_context): # test mapass_map for spark (file input) # data input of RFdecon, needed for parallelization d = [get_live_seismogram(71, 2.0) for i in range(5)] for i in range(5): d[i].t0 = -5 # parameters string pfPath = "python/mspasspy/data/pf/RFdeconProcessor.pf" pf = AntelopePf(pfPath) pf_dict = AntelopePf2dict(pf) parameter_dict = collections.OrderedDict() parameter_dict["alg"] = "LeastSquares" parameter_dict["pf"] = pf_dict parameter_dict["object_history"] = "True" gTree = ParameterGTree(parameter_dict) json_params = json.dumps(gTree.asdict()) data = spark_context.parallelize(d) spark_res = data.mspass_map( RFdecon, alg="LeastSquares", pf=pfPath, object_history=True, global_history=self.manager, alg_name=None, parameters=None, ).collect() manager_db = Database(self.client, "test_manager") assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 8) res = manager_db["history_global"].find_one({"alg_name": "RFdecon"}) assert res["job_id"] == self.manager.job_id assert res["job_name"] == self.manager.job_name assert res["alg_name"] == "RFdecon" assert res["parameters"] == json_params spark_alg_id = res["alg_id"] # test mspass_map for dask ddb = daskbag.from_sequence(d) dask_res = ddb.mspass_map( RFdecon, alg="LeastSquares", pf=pfPath, object_history=True, global_history=self.manager, alg_name=None, parameters=None, ).compute() assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 9) assert (manager_db["history_global"].count_documents( {"alg_id": spark_alg_id}) == 2) docs = manager_db["history_global"].find({"alg_id": spark_alg_id}) assert docs[0]["job_id"] == docs[1]["job_id"] == self.manager.job_id assert docs[0]["job_name"] == docs[1][ "job_name"] == self.manager.job_name assert docs[0]["alg_name"] == docs[1]["alg_name"] == "RFdecon" assert docs[0]["parameters"] == docs[1]["parameters"] == json_params assert not docs[0]["time"] == docs[1]["time"] # same alg + parameters combination -> same alg_id ddb = daskbag.from_sequence(d) dask_res = ddb.mspass_map( RFdecon, alg="LeastSquares", pf=pfPath, object_history=True, global_history=self.manager, alg_name=None, parameters=None, ).compute() assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 10) assert (manager_db["history_global"].count_documents( {"alg_id": spark_alg_id}) == 3) # SPARK test user provided alg_name and parameter(exist) spark_alg_name = "RFdecon" spark_alg_parameters = ( "alg=LeastSquares, pf={pfPath}, object_history=True".format( pfPath=pfPath)) data = spark_context.parallelize(d) spark_res = data.mspass_map( RFdecon, alg="LeastSquares", pf=pfPath, object_history=True, global_history=self.manager, alg_name=spark_alg_name, parameters=spark_alg_parameters, ).collect() assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 11) assert (manager_db["history_global"].count_documents( {"alg_id": spark_alg_id}) == 4) # SPARK test user provided alg_name and parameter(new) spark_alg_name = "RFdecon_2" spark_alg_parameters = ( "alg=LeastSquares, pf={pfPath}, object_history=True".format( pfPath=pfPath)) data = spark_context.parallelize(d) spark_res = data.mspass_map( RFdecon, alg="LeastSquares", pf=pfPath, object_history=True, global_history=self.manager, alg_name=spark_alg_name, parameters=spark_alg_parameters, ).collect() assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 12) assert (manager_db["history_global"].count_documents( {"alg_name": "RFdecon_2"}) == 1) res = manager_db["history_global"].find_one({"alg_name": "RFdecon_2"}) assert res["job_id"] == self.manager.job_id assert res["job_name"] == self.manager.job_name assert res["alg_name"] == "RFdecon_2" assert res["parameters"] == json_params new_spark_alg_id = res["alg_id"] assert (manager_db["history_global"].count_documents( {"alg_id": new_spark_alg_id}) == 1) # DASK test user provided alg_name and parameter(exist) dask_alg_name = "RFdecon" dask_alg_parameters = ( "alg=LeastSquares, pf={pfPath}, object_history=True".format( pfPath=pfPath)) ddb = daskbag.from_sequence(d) dask_res = ddb.mspass_map( RFdecon, alg="LeastSquares", pf=pfPath, object_history=True, global_history=self.manager, alg_name=dask_alg_name, parameters=dask_alg_parameters, ).compute() assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 13) assert (manager_db["history_global"].count_documents( {"alg_id": spark_alg_id}) == 5) # DASK test user provided alg_name and parameter(new) dask_alg_name = "RFdecon_3" dask_alg_parameters = ( "alg=LeastSquares, pf={pfPath}, object_history=True".format( pfPath=pfPath)) ddb = daskbag.from_sequence(d) dask_res = ddb.mspass_map( RFdecon, alg="LeastSquares", pf=pfPath, object_history=True, global_history=self.manager, alg_name=dask_alg_name, parameters=dask_alg_parameters, ).compute() assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 14) assert (manager_db["history_global"].count_documents( {"alg_name": "RFdecon_3"}) == 1) res = manager_db["history_global"].find_one({"alg_name": "RFdecon_3"}) assert res["job_id"] == self.manager.job_id assert res["job_name"] == self.manager.job_name assert res["alg_name"] == "RFdecon_3" assert res["parameters"] == json_params new_dask_alg_id = res["alg_id"] assert (manager_db["history_global"].count_documents( {"alg_id": new_dask_alg_id}) == 1)
else: wstr="No" fh.write('\"%s\",\"%s\",\"%s\",\"%s\"\n' % (k,tstr,wstr,mdef.concept(k))) fh.close() fh=open(aliasfile,"w+") fh.write('\"%s\",\"%s\"\n' % ("Unique Key","Valid Aliases")) for k in mdk: aliaslist=mdef.aliases(k) if(len(aliaslist)>0): if(len(aliaslist)==1): fh.write('\"%s\",\"%s\"' % (k,aliaslist[0])) else: fh.write('\"%s\",\"' % k ) for i in range(len(aliaslist)-1): fh.write('%s : ' % aliaslist[i]) val=aliaslist[len(aliaslist)-1] fh.write('%s\"' % val) fh.write('\n') # Now build the group tables using the function above. Need the pf first pf=AntelopePf('build_metadata_tbls.pf') write_group(pf,'site',mdef) write_group(pf,'source',mdef) write_group(pf,'obspy_trace',mdef) write_group(pf,'sitechan',mdef) write_group(pf,'3Cdata',mdef) write_group(pf,'sitechan',mdef) write_group(pf,'phase',mdef) write_group(pf,'MongoDB',mdef) write_group(pf,'files',mdef)
def branch_helper(): pfPath = "./data/pf/RFdeconProcessor.pf" pf = AntelopePf(pfPath) pf_dict = collections.OrderedDict(AntelopePf2dict(pf)) return ("pf", pf_dict)