Python Forgeの例、mdf_forge.forge.Forge Pythonの例

コード例 #1

0

ファイルを表示

def test_forge_reset_query():
    f = forge.Forge()
    # Term will return results
    f.match_field("elements", "Al")
    f.reset_query()
    # Specifying no query will return no results
    assert f.search() == []

コード例 #2

0

ファイルを表示

def test_forge_match_resource_types():
    f1 = forge.Forge()
    # Test one type
    f1.match_resource_types("record")
    res1 = f1.search(limit=10)
    assert check_field(res1, "mdf.resource_type", "record") == 0
    # Test two types
    f2 = forge.Forge()
    f2.match_resource_types(["collection", "dataset"])
    res2 = f2.search()
    assert check_field(res2, "mdf.resource_type", "record") == -1
    #TODO: Re-enable this assert after we get collections in MDF
    #    assert check_field(res2, "mdf.resource_type", "dataset") == 2
    # Test zero types
    f3 = forge.Forge()
    assert f3.match_resource_types("") == f3

コード例 #3

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_exclude_field():
    f = forge.Forge(index="mdf")
    # Basic usage
    f.exclude_field("mdf.elements", "Al")
    f.match_field("mdf.source_name", "core_mof")
    res1 = f.search()
    assert check_field(res1, "mdf.elements", "Al") == -1

コード例 #4

0

ファイルを表示

ファイル: test_forge.py プロジェクト: maxhutch/forge

def test_forge_aggregate_source():
    # Test limit
    f1 = forge.Forge()
    res1 = f1.aggregate_source("amcs")
    assert type(res1) is list
    assert len(res1) > 10000
    assert type(res1[0]) is dict

コード例 #5

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_match_tags():
    # Get one tag
    f = forge.Forge(index="mdf")
    res0 = f.search("mdf.source_name:trinkle_elastic_fe_bcc",
                    advanced=True,
                    limit=1)
    tags1 = res0[0]["mdf"]["tags"][0]

    # One tag
    res1 = f.match_tags(tags1).search()
    assert check_field(res1, "mdf.tags", tags1) == 2

    tags2 = "\"ab initio\""
    f.match_tags(tags2)
    res2 = f.search()
    # Elasticsearch splits ["ab-initio"] into ["ab", "initio"]
    assert check_field(res2, "mdf.tags", "ab-initio") == 2

    # Multiple tags
    tags3 = ["\"density functional theory calculations\"", "\"X-ray\""]
    res3 = f.match_tags(tags3, match_all=True).search()
    # "source_name": "ge_nanoparticles",
    # "tags": [ "amorphization","density functional theory calculations","Ge nanoparticles",
    #           "high pressure","phase transformation","Raman","X-ray absorption","zip" ]
    assert check_field(res3, "mdf.tags", "Raman") == 1
    assert check_field(res3, "mdf.tags", "X-ray absorption") == 1
    assert check_field(res3, "mdf.tags",
                       "density functional theory calculations") == 1

    # No tag
    assert f.match_tags("") == f

コード例 #6

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_aggregate_source():
    # Test limit
    f = forge.Forge(index="mdf")
    res1 = f.aggregate_source("amcs")
    assert isinstance(res1, list)
    assert len(res1) > 10000
    assert isinstance(res1[0], dict)

コード例 #7

0

ファイルを表示

ファイル: test_forge.py プロジェクト: maxhutch/forge

def test_forge_reset_query():
    f = forge.Forge()
    # Term will return results
    f.match_term("data")
    f.reset_query()
    # Specifying no query will return no results
    assert f.search() == []

コード例 #8

0

ファイルを表示

def test_forge_search_by_tags():
    f1 = forge.Forge()
    tags1 = "DFT"
    res1 = f1.search_by_tags(tags1)
    assert check_field(res1, "mdf.tags", "DFT") == 2

    f2 = forge.Forge()
    tags2 = ["\"Density Functional Theory\"", "\"X-ray\""]
    res2 = f2.search_by_tags(tags2, match_all=True)
    f3 = forge.Forge()
    tags3 = ["\"Density Functional Theory\"", "\"X-ray\""]
    res3 = f3.search_by_tags(tags3, match_all=False)

    # res2 is a subset of res3
    assert len(res3) > len(res2)
    assert all([r in res3 for r in res2])

コード例 #9

0

ファイルを表示

def test_forge_match_elements():
    f1 = forge.Forge()
    # One element
    f1.match_elements("Al")
    res1 = f1.search()
    assert res1 != []
    check_val1 = check_field(res1, "mdf.elements", "Al")
    assert check_val1 == 0 or check_val1 == 1
    # Multi-element
    f2 = forge.Forge()
    f2.match_elements(["Al", "Cu"])
    res2 = f2.search()
    assert check_field(res2, "mdf.elements", "Al") == 1
    assert check_field(res2, "mdf.elements", "Cu") == 1
    # No elements
    f3 = forge.Forge()
    assert f3.match_elements("") == f3

コード例 #10

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_chaining():
    f = forge.Forge(index="mdf")
    f.match_field("source_name", "cip")
    f.match_field("elements", "Al")
    res1 = f.search()
    res2 = f.match_field("source_name", "cip").match_field("elements",
                                                           "Al").search()
    assert all([r in res2 for r in res1]) and all([r in res1 for r in res2])

コード例 #11

0

ファイルを表示

def test_forge_exclude_range():
    # Single-value use
    f1 = forge.Forge()
    f1.exclude_range("mdf.elements", "Am", "*")
    f1.exclude_range("mdf.elements", "*", "Ak")
    res1, info1 = f1.search(info=True)
    assert (check_field(res1, "mdf.elements", "Al") == 0
            or check_field(res1, "mdf.elements", "Al") == 2)
    f2 = forge.Forge()
    res2, info2 = f2.search("mdf.elements:Al", advanced=True, info=True)
    assert info1["total_query_matches"] <= info2["total_query_matches"]
    # Non-matching use, test inclusive
    f3 = forge.Forge()
    f3.exclude_range("mdf.elements", "Am", "*")
    f3.exclude_range("mdf.elements", "*", "Ak")
    f3.exclude_range("mdf.elements", "Al", "Al", inclusive=False)
    res3, info3 = f3.search(info=True)
    assert info1["total_query_matches"] == info3["total_query_matches"]

コード例 #12

0

ファイルを表示

def test_forge_match_sources():
    f1 = forge.Forge()
    # One source
    f1.match_sources("nist_janaf")
    res1 = f1.search()
    assert res1 != []
    assert check_field(res1, "mdf.source_name", "nist_janaf") == 0
    # Multi-source
    f2 = forge.Forge()
    f2.match_sources(["nist_janaf", "hopv"])
    res2 = f2.search()
    # res1 is a subset of res2
    assert len(res2) > len(res1)
    assert all([r1 in res2 for r1 in res1])
    assert check_field(res2, "mdf.source_name", "nist_janaf") == 2
    # No source
    f3 = forge.Forge()
    assert f3.match_sources("") == f3

コード例 #13

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_search_by_titles():
    f = forge.Forge(index="mdf")
    titles1 = ["\"AMCS - Tungsten\""]
    res1 = f.search_by_titles(titles1)
    assert check_field(res1, "mdf.title", "AMCS - Tungsten") == 0

    titles2 = ["Tungsten"]
    res2 = f.search_by_titles(titles2)
    assert check_field(res2, "mdf.title", "AMCS - Tungsten") == 2

コード例 #14

0

ファイルを表示

def test_forge_match_titles():
    # One title
    f1 = forge.Forge()
    titles1 = '"OQMD - Na1Y2Zr1"'
    res1 = f1.match_titles(titles1).search()
    assert res1 != []
    assert check_field(res1, "mdf.title", "OQMD - Na1Y2Zr1") == 0

    # Multiple titles
    f2 = forge.Forge()
    titles2 = ['"AMCS - Tungsten"', '"Cytochrome QSAR"']
    res2 = f2.match_titles(titles2).search()
    assert res2 != []
    assert check_field(res2, "mdf.title", "Cytochrome QSAR - C13F2N6O") == 2

    # No titles
    f3 = forge.Forge()
    assert f3.match_titles("") == f3

コード例 #15

0

ファイルを表示

ファイル: test_forge.py プロジェクト: maxhutch/forge

def test_forge_http_return():
    f = forge.Forge()
    # Simple case
    res1 = f.http_return(example_result1)
    assert isinstance(res1, list)
    assert res1 == ["This is a test document for Forge testing. Please do not remove.\n"]
    # With multiple files
    res2 = f.http_return(example_result2)
    assert isinstance(res2, list)
    assert res2 == ["This is a test document for Forge testing. Please do not remove.\n", "This is a second test document for Forge testing. Please do not remove.\n"]

コード例 #16

0

ファイルを表示

def test_forge_aggregate():
    # Test that aggregate uses the current query properly
    # And returns results
    # And respects the reset_query arg
    f1 = forge.Forge()
    f1.match_field("mdf.source_name", "nist_xps_db")
    res1 = f1.aggregate(reset_query=False)
    assert len(res1) > 10000
    res2 = f1.aggregate()
    assert all([r in res2 for r in res1]) and all([r in res1 for r in res2])

コード例 #17

0

ファイルを表示

ファイル: test_forge.py プロジェクト: maxhutch/forge

def test_forge_http_stream():
    f = forge.Forge()
    # Simple case
    res1 = f.http_stream(example_result1)
    assert isinstance(res1, types.GeneratorType)
    assert res1.__next__() == "This is a test document for Forge testing. Please do not remove.\n"
    # With multiple files
    res2 = f.http_stream(example_result2)
    assert isinstance(res2, types.GeneratorType)
    assert res2.__next__() == "This is a test document for Forge testing. Please do not remove.\n"
    assert res2.__next__() == "This is a second test document for Forge testing. Please do not remove.\n"

コード例 #18

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_fetch_datasets_from_results():
    # Get some results
    f = forge.Forge(index="mdf")
    # Record from OQMD
    res01 = f.search("mdf.source_name:oqmd AND mdf.resource_type:record",
                     advanced=True,
                     limit=1)
    # Record from OQMD with info
    res02 = f.search("mdf.source_name:oqmd AND mdf.resource_type:record",
                     advanced=True,
                     limit=1,
                     info=True)
    # Records from JANAF
    res03 = f.search("mdf.source_name:nist_janaf AND mdf.resource_type:record",
                     advanced=True,
                     limit=2)
    # Dataset for NIST XPS DB
    res04 = f.search(
        "mdf.source_name:nist_xps_db AND mdf.resource_type:dataset",
        advanced=True)

    # Get the correct dataset entries
    oqmd = f.search("mdf.source_name:oqmd AND mdf.resource_type:dataset",
                    advanced=True)[0]
    nist_janaf = f.search(
        "mdf.source_name:nist_janaf AND mdf.resource_type:dataset",
        advanced=True)[0]

    # Fetch single dataset
    res1 = f.fetch_datasets_from_results(res01[0])
    assert res1[0] == oqmd

    # Fetch dataset with results + info
    res2 = f.fetch_datasets_from_results(res02)
    assert res2[0] == oqmd

    # Fetch multiple datasets
    rtemp = res01 + res03
    res3 = f.fetch_datasets_from_results(rtemp)
    assert len(res3) == 2
    assert oqmd in res3
    assert nist_janaf in res3

    # Fetch dataset from dataset
    res4 = f.fetch_datasets_from_results(res04)
    assert res4 == res04

    # Fetch entries from current query
    f.match_sources("nist_xps_db")
    assert f.fetch_datasets_from_results() == res04

    # Fetch nothing
    unknown_entry = {"mdf": {"resource_type": "unknown"}}
    assert f.fetch_datasets_from_results(unknown_entry) == []

コード例 #19

0

ファイルを表示

def test_forge_match_range():
    # Single-value use
    f1 = forge.Forge()
    f1.match_range("mdf.elements", "Al", "Al")
    res1, info1 = f1.search(info=True)
    assert check_field(res1, "mdf.elements", "Al") == 1
    f2 = forge.Forge()
    res2, info2 = f2.search("mdf.elements:Al", advanced=True, info=True)
    assert info1["total_query_matches"] == info2["total_query_matches"]
    # Non-matching use, test inclusive
    f3 = forge.Forge()
    f3.match_range("mdf.elements", "Al", "Al", inclusive=False)
    assert f3.search() == []
    # Actual range
    f4 = forge.Forge()
    f4.match_range("mdf.elements", "Al", "Cu")
    res4, info4 = f4.search(info=True)
    assert info1["total_query_matches"] < info4["total_query_matches"]
    assert (check_field(res4, "mdf.elements", "Al") >= 0
            or check_field(res4, "mdf.elements", "Cu") >= 0)

コード例 #20

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_exclusive_match():
    f = forge.Forge(index="mdf")
    f.exclusive_match("mdf.elements", "Al")
    res1 = f.search()
    assert check_field(res1, "mdf.elements", "Al") == 0

    f.exclusive_match("mdf.elements", ["Al", "Cu"])
    res2 = f.search()
    assert check_field(res2, "mdf.elements", "Al") == 1
    assert check_field(res2, "mdf.elements", "Cu") == 1
    assert check_field(res2, "mdf.elements", "Cp") == -1
    assert check_field(res2, "mdf.elements", "Fe") == -1

コード例 #21

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_aggregate():
    # Test that aggregate uses the current query properly
    # And returns results
    # And respects the reset_query arg
    f = forge.Forge(index="mdf")
    f.match_field("mdf.source_name", "nist_xps_db")
    res1 = f.aggregate(reset_query=False, index="mdf")
    assert len(res1) > 10000
    assert check_field(res1, "mdf.source_name", "nist_xps_db") == 0
    res2 = f.aggregate()
    assert len(res2) == len(res1)
    assert check_field(res2, "mdf.source_name", "nist_xps_db") == 0

コード例 #22

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_search_by_elements():
    f = forge.Forge(index="mdf")
    elements = ["Cu", "Al"]
    sources = ["oqmd", "nist_xps_db"]
    res1, info1 = f.match_sources(sources).match_elements(elements).search(
        limit=10000, info=True)
    res2, info2 = f.search_by_elements(elements,
                                       sources,
                                       limit=10000,
                                       info=True)
    assert all([r in res2 for r in res1]) and all([r in res1 for r in res2])
    assert check_field(res1, "mdf.elements", "Al") == 1
    assert check_field(res1, "mdf.source_name", "oqmd") == 2

コード例 #23

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_match_field():
    f = forge.Forge(index="mdf")
    # Basic usage
    f.match_field("mdf.source_name", "nist_janaf")
    res1 = f.search()
    assert check_field(res1, "mdf.source_name", "nist_janaf") == 0
    # Check that query clears
    assert f.search() == []

    # Also checking check_field
    f.match_field("mdf.elements", "Al")
    res2 = f.search()
    assert check_field(res2, "mdf.elements", "Al") == 1

コード例 #24

0

ファイルを表示

def test_forge_match_ids():
    # Get a couple IDs
    f0 = forge.Forge()
    res0 = f0.search("mdf.source_name:nist_janaf", advanced=True, limit=2)
    id1 = res0[0]["mdf"]["mdf_id"]
    id2 = res0[1]["mdf"]["mdf_id"]
    f1 = forge.Forge()
    # One ID
    f1.match_ids(id1)
    res1 = f1.search()
    assert res1 != []
    assert check_field(res1, "mdf.mdf_id", id1) == 0
    # Multi-ID
    f2 = forge.Forge()
    f2.match_ids([id1, id2])
    res2 = f2.search()
    # res1 is a subset of res2
    assert len(res2) > len(res1)
    assert all([r1 in res2 for r1 in res1])
    assert check_field(res2, "mdf.mdf_id", id2) == 2
    # No id
    f3 = forge.Forge()
    assert f3.match_ids("") == f3

コード例 #25

0

ファイルを表示

ファイル: test_forge.py プロジェクト: maxhutch/forge

def test_forge_search(capsys):
    # Error on no query
    f1 = forge.Forge()
    assert f1.search() == []
    out, err = capsys.readouterr()
    assert "Error: No query specified" in out

    # Return info if requested
    f2 = forge.Forge()
    res2 = f2.search(q="Al", info=False)
    assert type(res2) is list
    assert type(res2[0]) is dict
    f3 = forge.Forge()
    res3 = f3.search(q="Al", info=True)
    assert type(res3) is tuple
    assert type(res3[0]) is list
    assert type(res3[0][0]) is dict
    assert type(res3[1]) is dict

    # Check limit
    f4 = forge.Forge()
    res4 = f4.search("oqmd", limit=3)
    assert len(res4) == 3

コード例 #26

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_match_elements():
    f = forge.Forge(index="mdf")
    # One element
    f.match_elements("Al")
    res1 = f.search()
    assert res1 != []
    check_val1 = check_field(res1, "mdf.elements", "Al")
    assert check_val1 == 0 or check_val1 == 1

    # Multi-element
    f.match_elements(["Al", "Cu"])
    res2 = f.search()
    assert check_field(res2, "mdf.elements", "Al") == 1
    assert check_field(res2, "mdf.elements", "Cu") == 1

    # No elements
    assert f.match_elements("") == f

コード例 #27

0

ファイルを表示

def test_forge_globus_download():
    f = forge.Forge()
    # Simple case
    res1 = f.globus_download(example_result1)
    assert os.path.exists("./test_fetch.txt")
    os.remove("./test_fetch.txt")
    # With dest and preserve_dir
    dest_path = os.path.expanduser("~/mdf")
    f.globus_download(example_result1, dest=dest_path, preserve_dir=True)
    assert os.path.exists(os.path.join(dest_path, "test", "test_fetch.txt"))
    os.remove(os.path.join(dest_path, "test", "test_fetch.txt"))
    os.rmdir(os.path.join(dest_path, "test"))
    # With multiple files
    f.globus_download(example_result2, dest=dest_path)
    assert os.path.exists(os.path.join(dest_path, "test_fetch.txt"))
    assert os.path.exists(os.path.join(dest_path, "test_multifetch.txt"))
    os.remove(os.path.join(dest_path, "test_fetch.txt"))
    os.remove(os.path.join(dest_path, "test_multifetch.txt"))

コード例 #28

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_http_download(capsys):
    f = forge.Forge(index="mdf")
    # Simple case
    f.http_download(example_result1)
    assert os.path.exists("./test_fetch.txt")

    # Test conflicting filenames
    f.http_download(example_result1)
    assert os.path.exists("./test_fetch(1).txt")
    f.http_download(example_result1)
    assert os.path.exists("./test_fetch(2).txt")
    os.remove("./test_fetch.txt")
    os.remove("./test_fetch(1).txt")
    os.remove("./test_fetch(2).txt")

    # With dest and preserve_dir, and tuple of results
    dest_path = os.path.expanduser("~/mdf")
    f.http_download(([example_result1], {
        "info": None
    }),
                    dest=dest_path,
                    preserve_dir=True)
    assert os.path.exists(os.path.join(dest_path, "test", "test_fetch.txt"))
    os.remove(os.path.join(dest_path, "test", "test_fetch.txt"))
    os.rmdir(os.path.join(dest_path, "test"))

    # With multiple files
    f.http_download(example_result2, dest=dest_path)
    assert os.path.exists(os.path.join(dest_path, "test_fetch.txt"))
    assert os.path.exists(os.path.join(dest_path, "test_multifetch.txt"))
    os.remove(os.path.join(dest_path, "test_fetch.txt"))
    os.remove(os.path.join(dest_path, "test_multifetch.txt"))

    # Too many files
    assert f.http_download(list(range(10001)))["success"] is False

    # "Missing" files
    f.http_download(example_result_missing)
    out, err = capsys.readouterr()
    assert not os.path.exists("./missing.txt")
    assert ("Error 404 when attempting to access "
            "'https://data.materialsdatafacility.org/test/missing.txt'") in out

コード例 #29

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_exclude_range():
    # Single-value use
    f = forge.Forge(index="mdf")
    f.exclude_range("mdf.elements", "Am", "*")
    f.exclude_range("mdf.elements", "*", "Ak")
    res1, info1 = f.search(info=True)
    assert (check_field(res1, "mdf.elements", "Al") == 0
            or check_field(res1, "mdf.elements", "Al") == 2)

    res2, info2 = f.search("mdf.elements:Al", advanced=True, info=True)
    assert info1["total_query_matches"] <= info2["total_query_matches"]

    # Non-matching use, test inclusive
    f.exclude_range("mdf.elements", "Am", "*")
    f.exclude_range("mdf.elements", "*", "Ak")
    f.exclude_range("mdf.elements", "Al", "Al", inclusive=False)
    res3, info3 = f.search(info=True)
    assert info1["total_query_matches"] == info3["total_query_matches"]

    # Nothing to match
    assert f.exclude_range("field", start=None, stop=None) == f

コード例 #30

0

ファイルを表示

ファイル: test_forge.py プロジェクト: nds-org/forge

def test_forge_match_years(capsys):
    # One year of data/results
    f = forge.Forge(index="mdf")
    res1 = f.match_years("2015").search()
    assert res1 != []
    assert check_field(res1, "mdf.year", 2015) == 0

    # Multiple years
    res2 = f.match_years(years=["2015", 2011]).search()
    assert check_field(res2, "mdf.year", 2011) == 2

    # Wrong input
    f.match_years(["20x5"]).search()
    out, err = capsys.readouterr()
    assert "Invalid year: '20x5'" in out

    f.match_years(start="20x5").search()
    out, err = capsys.readouterr()
    assert "Invalid start year: '20x5'" in out

    f.match_years(stop="20x5").search()
    out, err = capsys.readouterr()
    assert "Invalid stop year: '20x5'" in out

    assert f.match_years() == f

    # Test range
    res4 = f.match_years(start=2015, stop=2015, inclusive=True).search()
    assert check_field(res4, "mdf.year", 2015) == 0

    res5 = f.match_years(start=2014, stop=2017, inclusive=False).search()
    assert check_field(res5, "mdf.year", 2013) == -1
    assert check_field(res5, "mdf.year", 2014) == -1
    assert check_field(res5, "mdf.year", 2015) == 2
    assert check_field(res5, "mdf.year", 2016) == 2
    assert check_field(res5, "mdf.year", 2017) == -1

    assert f.match_years(start=2015, stop=2015, inclusive=False).search() == []