예제 #1
0
def test_Pf2AttributeNameTbl():
    pf = AntelopePf("python/tests/data/test_import.pf")
    attributes = Pf2AttributeNameTbl(pf, tag="wfprocess")
    names = attributes[0]
    types = attributes[1]
    nullvals = attributes[2]

    assert names == [
        "pwfid",
        "starttime",
        "endtime",
        "time_standard",
        "dir",
        "dfile",
        "foff",
        "dtype",
        "samprate",
        "unkwown",
        "algorithm",
        "lddate",
    ]
    assert len(types) == 12
    assert len(nullvals) == 12
    assert types == [
        int, float, float, str, str, str, int, str, float, int, str, float
    ]
    assert nullvals["pwfid"] == -1
예제 #2
0
def build_helper():
    #   A helper function to build a GTree instance for test.
    pfPath = "./data/pf/RFdeconProcessor.pf"
    pf = AntelopePf(pfPath)
    pf_dict = AntelopePf2dict(pf)
    parameter_dict = collections.OrderedDict()
    parameter_dict["alg"] = "LeastSquares"
    parameter_dict["pf"] = pf_dict
    parameter_dict["object_history"] = "True"
    gTree = ParameterGTree(parameter_dict)
    return gTree
예제 #3
0
 def __init__(self, alg="LeastSquares", pf="RFdeconProcessor.pf"):
     self.algorithm = alg
     pfhandle = AntelopePf(pf)
     if (self.algorithm == "LeastSquares"):
         self.md = pfhandle.get_branch('LeastSquare')
         self.processor = decon.LeastSquareDecon(self.md)
         self.__uses_noise = False
     elif (alg == "WaterLevel"):
         self.md = pfhandle.get_branch('WaterLevel')
         self.processor = decon.WaterLevelDecon(self.md)
         self.__uses_noise = False
     elif (alg == "MultiTaperXcor"):
         self.md = pfhandle.get_branch('MultiTaperXcor')
         self.processor = decon.MultiTaperXcorDecon(self.md)
         self.__uses_noise = True
     elif (alg == "MultiTaperSpecDiv"):
         self.md = pfhandle.get_branch('MultiTaperSpecDiv')
         self.processor = decon.MultiTaperSpecDivDecon(self.md)
         self.__uses_noise = True
     elif (alg == "GeneralizedIterative"):
         raise RuntimeError(
             "Generalized Iterative method not yet supported")
     else:
         raise RuntimeError("Illegal value for alg=" + alg)
     # although loaddata and loadwavelet methods don't always require
     # it we cache the analysis time window for efficiency
     tws = self.md.get_double("deconvolution_data_window_start")
     twe = self.md.get_double("deconvolution_data_window_end")
     self.__dwin = mspass.TimeWindow(tws, twe)
     if (self.__uses_noise):
         tws = self.md.get_double("noise_window_start")
         twe = self.md.get_double("noise_window_end")
         self.__nwin = mspass.TimeWindow(tws, twe)
     else:
         self.__nwin = mspass.TimeWindow  # always initialize even if not used
예제 #4
0
def test_Textfile2Dataframe():
    pf = AntelopePf("python/tests/data/test_import.pf")
    attributes = Pf2AttributeNameTbl(pf, tag="wfprocess")
    names = attributes[0]

    textfile = "python/tests/data/testdb.wfprocess"

    for p in [True, False]:
        df = Textfile2Dataframe(textfile, attribute_names=names, parallel=p)
        assert df.shape[0] == 652
        assert df.shape[1] == 12
        assert df.iloc[1]["pwfid"] == 3103
        assert np.isclose(df.iloc[1]["starttime"], 1577912954.62975)
        assert np.isclose(df.iloc[1]["endtime"], 1577913089.7297499)
        assert df.iloc[1]["time_standard"] == "a"

        #   Test setting null values
        df = Textfile2Dataframe(textfile, attribute_names=names, parallel=p)
        assert df.shape[0] == 652
        assert df.shape[1] == 12
        assert df.iloc[0]["pwfid"] == 3102
        assert np.isclose(df.iloc[0]["starttime"], 1577912967.53105)
        assert np.isclose(df.iloc[0]["endtime"], 1577913102.63105)
        assert df.iloc[0]["time_standard"] == "a"
        assert df.iloc[0]["foff"] == 0

        #   Test turning off one_to_one
        df = Textfile2Dataframe(textfile,
                                attribute_names=names,
                                one_to_one=False,
                                parallel=p)
        assert df.shape[0] == 651
        assert df.shape[1] == 12

        #   Test add column
        df = Textfile2Dataframe(textfile,
                                attribute_names=names,
                                parallel=p,
                                insert_column={"test_col": 1})
        assert df.shape[0] == 652
        assert df.shape[1] == 13
        assert df.at[0, "test_col"] == 1
예제 #5
0
 def check_and_parse_file(arg):
     if (
         isinstance(arg, os.PathLike)
         or isinstance(arg, str)
         or isinstance(arg, bytes)
     ) and os.path.isfile(arg):
         file_path = str(arg)
         if file_path.endswith(".pf"):
             pf = AntelopePf(file_path)
             #   Convert PF into an OrderedDict to coordinate with the GTree
             pf_value = AntelopePf2dict(pf)
             return pf_value
         elif file_path.endswith(".yaml"):
             with open(file_path, "r") as yaml_file:
                 yaml_value = yaml.safe_load(yaml_file)
             return yaml_value
         #   Currently only support pf and yaml
         else:
             raise MsPASSError("Cannot handle file: " + file_path + "Fatal")
     return arg
예제 #6
0
 def __init__(self, alg="LeastSquares", pf="RFdeconProcessor.pf"):
     self.algorithm = alg
     self.pf = pf
     pfhandle = AntelopePf(pf)
     if self.algorithm == "LeastSquares":
         self.md = pfhandle.get_branch("LeastSquare")
         self.__uses_noise = False
     elif alg == "WaterLevel":
         self.md = pfhandle.get_branch("WaterLevel")
         self.__uses_noise = False
     elif alg == "MultiTaperXcor":
         self.md = pfhandle.get_branch("MultiTaperXcor")
         self.__uses_noise = True
     elif alg == "MultiTaperSpecDiv":
         self.md = pfhandle.get_branch("MultiTaperSpecDiv")
         self.__uses_noise = True
     elif alg == "GeneralizedIterative":
         raise RuntimeError(
             "Generalized Iterative method not yet supported")
     else:
         raise RuntimeError("Illegal value for alg=" + alg)
     # below is needed because AntelopePf cannot be serialized.
     self.md = Metadata(self.md)
예제 #7
0
    def test_mspass_map_with_filePath(self, spark_context):
        # test mapass_map for spark (file input)
        # data input of RFdecon, needed for parallelization
        d = [get_live_seismogram(71, 2.0) for i in range(5)]
        for i in range(5):
            d[i].t0 = -5

        # parameters string
        pfPath = "python/mspasspy/data/pf/RFdeconProcessor.pf"
        pf = AntelopePf(pfPath)
        pf_dict = AntelopePf2dict(pf)
        parameter_dict = collections.OrderedDict()
        parameter_dict["alg"] = "LeastSquares"
        parameter_dict["pf"] = pf_dict
        parameter_dict["object_history"] = "True"
        gTree = ParameterGTree(parameter_dict)
        json_params = json.dumps(gTree.asdict())

        data = spark_context.parallelize(d)
        spark_res = data.mspass_map(
            RFdecon,
            alg="LeastSquares",
            pf=pfPath,
            object_history=True,
            global_history=self.manager,
            alg_name=None,
            parameters=None,
        ).collect()
        manager_db = Database(self.client, "test_manager")
        assert (manager_db["history_global"].count_documents(
            {"job_name": self.manager.job_name}) == 8)
        res = manager_db["history_global"].find_one({"alg_name": "RFdecon"})
        assert res["job_id"] == self.manager.job_id
        assert res["job_name"] == self.manager.job_name
        assert res["alg_name"] == "RFdecon"
        assert res["parameters"] == json_params
        spark_alg_id = res["alg_id"]

        # test mspass_map for dask
        ddb = daskbag.from_sequence(d)
        dask_res = ddb.mspass_map(
            RFdecon,
            alg="LeastSquares",
            pf=pfPath,
            object_history=True,
            global_history=self.manager,
            alg_name=None,
            parameters=None,
        ).compute()

        assert (manager_db["history_global"].count_documents(
            {"job_name": self.manager.job_name}) == 9)
        assert (manager_db["history_global"].count_documents(
            {"alg_id": spark_alg_id}) == 2)
        docs = manager_db["history_global"].find({"alg_id": spark_alg_id})
        assert docs[0]["job_id"] == docs[1]["job_id"] == self.manager.job_id
        assert docs[0]["job_name"] == docs[1][
            "job_name"] == self.manager.job_name
        assert docs[0]["alg_name"] == docs[1]["alg_name"] == "RFdecon"
        assert docs[0]["parameters"] == docs[1]["parameters"] == json_params
        assert not docs[0]["time"] == docs[1]["time"]

        # same alg + parameters combination -> same alg_id
        ddb = daskbag.from_sequence(d)
        dask_res = ddb.mspass_map(
            RFdecon,
            alg="LeastSquares",
            pf=pfPath,
            object_history=True,
            global_history=self.manager,
            alg_name=None,
            parameters=None,
        ).compute()
        assert (manager_db["history_global"].count_documents(
            {"job_name": self.manager.job_name}) == 10)
        assert (manager_db["history_global"].count_documents(
            {"alg_id": spark_alg_id}) == 3)

        # SPARK test user provided alg_name and parameter(exist)
        spark_alg_name = "RFdecon"
        spark_alg_parameters = (
            "alg=LeastSquares, pf={pfPath}, object_history=True".format(
                pfPath=pfPath))
        data = spark_context.parallelize(d)
        spark_res = data.mspass_map(
            RFdecon,
            alg="LeastSquares",
            pf=pfPath,
            object_history=True,
            global_history=self.manager,
            alg_name=spark_alg_name,
            parameters=spark_alg_parameters,
        ).collect()
        assert (manager_db["history_global"].count_documents(
            {"job_name": self.manager.job_name}) == 11)
        assert (manager_db["history_global"].count_documents(
            {"alg_id": spark_alg_id}) == 4)

        # SPARK test user provided alg_name and parameter(new)
        spark_alg_name = "RFdecon_2"
        spark_alg_parameters = (
            "alg=LeastSquares, pf={pfPath}, object_history=True".format(
                pfPath=pfPath))
        data = spark_context.parallelize(d)
        spark_res = data.mspass_map(
            RFdecon,
            alg="LeastSquares",
            pf=pfPath,
            object_history=True,
            global_history=self.manager,
            alg_name=spark_alg_name,
            parameters=spark_alg_parameters,
        ).collect()
        assert (manager_db["history_global"].count_documents(
            {"job_name": self.manager.job_name}) == 12)
        assert (manager_db["history_global"].count_documents(
            {"alg_name": "RFdecon_2"}) == 1)
        res = manager_db["history_global"].find_one({"alg_name": "RFdecon_2"})
        assert res["job_id"] == self.manager.job_id
        assert res["job_name"] == self.manager.job_name
        assert res["alg_name"] == "RFdecon_2"
        assert res["parameters"] == json_params
        new_spark_alg_id = res["alg_id"]
        assert (manager_db["history_global"].count_documents(
            {"alg_id": new_spark_alg_id}) == 1)

        # DASK test user provided alg_name and parameter(exist)
        dask_alg_name = "RFdecon"
        dask_alg_parameters = (
            "alg=LeastSquares, pf={pfPath}, object_history=True".format(
                pfPath=pfPath))
        ddb = daskbag.from_sequence(d)
        dask_res = ddb.mspass_map(
            RFdecon,
            alg="LeastSquares",
            pf=pfPath,
            object_history=True,
            global_history=self.manager,
            alg_name=dask_alg_name,
            parameters=dask_alg_parameters,
        ).compute()
        assert (manager_db["history_global"].count_documents(
            {"job_name": self.manager.job_name}) == 13)
        assert (manager_db["history_global"].count_documents(
            {"alg_id": spark_alg_id}) == 5)

        # DASK test user provided alg_name and parameter(new)
        dask_alg_name = "RFdecon_3"
        dask_alg_parameters = (
            "alg=LeastSquares, pf={pfPath}, object_history=True".format(
                pfPath=pfPath))
        ddb = daskbag.from_sequence(d)
        dask_res = ddb.mspass_map(
            RFdecon,
            alg="LeastSquares",
            pf=pfPath,
            object_history=True,
            global_history=self.manager,
            alg_name=dask_alg_name,
            parameters=dask_alg_parameters,
        ).compute()
        assert (manager_db["history_global"].count_documents(
            {"job_name": self.manager.job_name}) == 14)
        assert (manager_db["history_global"].count_documents(
            {"alg_name": "RFdecon_3"}) == 1)
        res = manager_db["history_global"].find_one({"alg_name": "RFdecon_3"})
        assert res["job_id"] == self.manager.job_id
        assert res["job_name"] == self.manager.job_name
        assert res["alg_name"] == "RFdecon_3"
        assert res["parameters"] == json_params
        new_dask_alg_id = res["alg_id"]
        assert (manager_db["history_global"].count_documents(
            {"alg_id": new_dask_alg_id}) == 1)
예제 #8
0
    else:
        wstr="No"
    fh.write('\"%s\",\"%s\",\"%s\",\"%s\"\n' % (k,tstr,wstr,mdef.concept(k)))
fh.close()
fh=open(aliasfile,"w+")
fh.write('\"%s\",\"%s\"\n' % ("Unique Key","Valid Aliases"))
for k in mdk:
    aliaslist=mdef.aliases(k)
    if(len(aliaslist)>0):
        if(len(aliaslist)==1):
            fh.write('\"%s\",\"%s\"' % (k,aliaslist[0]))
        else:
            fh.write('\"%s\",\"' % k )
            for i in range(len(aliaslist)-1):
                fh.write('%s : ' % aliaslist[i])
            val=aliaslist[len(aliaslist)-1]
            fh.write('%s\"' % val)
        fh.write('\n')

# Now build the group tables using the function above.  Need the pf first
pf=AntelopePf('build_metadata_tbls.pf')
write_group(pf,'site',mdef)
write_group(pf,'source',mdef)
write_group(pf,'obspy_trace',mdef)
write_group(pf,'sitechan',mdef)
write_group(pf,'3Cdata',mdef)
write_group(pf,'sitechan',mdef)
write_group(pf,'phase',mdef)
write_group(pf,'MongoDB',mdef)
write_group(pf,'files',mdef)
예제 #9
0
def branch_helper():
    pfPath = "./data/pf/RFdeconProcessor.pf"
    pf = AntelopePf(pfPath)
    pf_dict = collections.OrderedDict(AntelopePf2dict(pf))
    return ("pf", pf_dict)