コード例 #1
0
def test_forge_anonymous(capsys):
    f = Forge(anonymous=True)
    # Test search
    assert len(
        f.search("mdf.source_name:ab_initio_solute_database",
                 advanced=True,
                 limit=300)) == 300

    # Test aggregation
    assert len(f.aggregate("mdf.source_name:nist_xps_db")) > 10000

    # Error on auth-only functions
    # http_download
    assert f.http_download({})["success"] is False
    out, err = capsys.readouterr()
    assert "Error: Anonymous HTTP download not yet supported." in out
    # globus_download
    assert f.globus_download({})["success"] is False
    out, err = capsys.readouterr()
    assert "Error: Anonymous Globus Transfer not supported." in out
    # http_stream
    res = f.http_stream({})
    assert next(res)["success"] is False
    out, err = capsys.readouterr()
    assert "Error: Anonymous HTTP download not yet supported." in out
    with pytest.raises(StopIteration):
        next(res)
コード例 #2
0
def test_forge_aggregate_sources():
    # Test limit
    f = Forge(index="mdf")
    res1 = f.aggregate_sources("nist_xps_db")
    assert isinstance(res1, list)
    assert len(res1) > 10000
    assert isinstance(res1[0], dict)
コード例 #3
0
def test_forge_globus_download():
    f = Forge(index="mdf")
    # Simple case
    f.globus_download(example_result1)
    assert os.path.exists("./test_fetch.txt")
    os.remove("./test_fetch.txt")

    # With dest and preserve_dir
    dest_path = os.path.expanduser("~/mdf")
    f.globus_download(example_result1, dest=dest_path, preserve_dir=True)
    assert os.path.exists(os.path.join(dest_path, "test", "test_fetch.txt"))
    os.remove(os.path.join(dest_path, "test", "test_fetch.txt"))
    os.rmdir(os.path.join(dest_path, "test"))

    # With multiple files
    f.globus_download(example_result2, dest=dest_path)
    assert os.path.exists(os.path.join(dest_path, "test_fetch.txt"))
    assert os.path.exists(os.path.join(dest_path, "test_multifetch.txt"))
    os.remove(os.path.join(dest_path, "test_fetch.txt"))
    os.remove(os.path.join(dest_path, "test_multifetch.txt"))

    f.globus_download(example_result3, dest=dest_path)
    assert os.path.exists(os.path.join(dest_path, "test_fetch.txt"))
    assert os.path.exists(os.path.join(dest_path, "test_multifetch.txt"))
    os.remove(os.path.join(dest_path, "test_fetch.txt"))
    os.remove(os.path.join(dest_path, "test_multifetch.txt"))
コード例 #4
0
 def __init__(self, no_local_server, anonymous, test):
     self.no_local_server = no_local_server
     self.anonymous = anonymous
     self.test = test
     self.mdf = Forge(no_local_server=self.no_local_server,
                      anonymous=self.anonymous,
                      test=self.test)
コード例 #5
0
def test_forge_chaining():
    f = Forge(index="mdf")
    f.match_field("source_name", "cip")
    f.match_field("material.elements", "Al")
    res1 = f.search()
    res2 = f.match_field("source_name",
                         "cip").match_field("material.elements",
                                            "Al").search()
    assert all([r in res2 for r in res1]) and all([r in res1 for r in res2])
コード例 #6
0
def test_forge_http_stream(capsys):
    f = Forge(index="mdf")
    # Simple case
    res1 = f.http_stream(example_result1)
    assert isinstance(res1, types.GeneratorType)
    assert next(
        res1
    ) == "This is a test document for Forge testing. Please do not remove.\n"

    # With multiple files
    res2 = f.http_stream((example_result2, {"info": {}}))
    assert isinstance(res2, types.GeneratorType)
    assert next(
        res2
    ) == "This is a test document for Forge testing. Please do not remove.\n"
    assert next(
        res2
    ) == "This is a second test document for Forge testing. Please do not remove.\n"
    assert next(
        res2
    ) == "This is a test document for Forge testing. Please do not remove.\n"
    assert next(
        res2
    ) == "This is a second test document for Forge testing. Please do not remove.\n"

    res3 = f.http_stream((example_result3, {"info": {}}))
    assert isinstance(res3, types.GeneratorType)
    assert next(
        res3
    ) == "This is a test document for Forge testing. Please do not remove.\n"
    assert next(
        res3
    ) == "This is a second test document for Forge testing. Please do not remove.\n"
    assert next(
        res3
    ) == "This is a test document for Forge testing. Please do not remove.\n"
    assert next(
        res3
    ) == "This is a second test document for Forge testing. Please do not remove.\n"

    # Too many results
    res4 = f.http_stream(list(range(10001)))
    assert next(res4)["success"] is False
    out, err = capsys.readouterr()
    assert "Too many results supplied. Use globus_download()" in out
    with pytest.raises(StopIteration):
        next(res4)

    # "Missing" files
    assert next(f.http_stream(example_result_missing)) is None
    out, err = capsys.readouterr()
    assert not os.path.exists("./should_not_exist.txt")
    assert (
        "Error 404 when attempting to access "
        "'https://data.materialsdatafacility.org/test/should_not_exist.txt'"
    ) in out
コード例 #7
0
def test_forge_fetch_datasets_from_results():
    # Get some results
    f = Forge(index="mdf")
    # Record from OQMD
    res01 = f.search("mdf.source_name:oqmd AND mdf.resource_type:record",
                     advanced=True,
                     limit=1)
    # Record from OQMD with info
    res02 = f.search("mdf.source_name:oqmd AND mdf.resource_type:record",
                     advanced=True,
                     limit=1,
                     info=True)
    # Records from JANAF
    res03 = f.search(
        "mdf.source_name:khazana_vasp AND mdf.resource_type:record",
        advanced=True,
        limit=2)
    # Dataset for NIST XPS DB
    res04 = f.search(
        "mdf.source_name:nist_xps_db AND mdf.resource_type:dataset",
        advanced=True)

    # Get the correct dataset entries
    oqmd = f.search("mdf.source_name:oqmd AND mdf.resource_type:dataset",
                    advanced=True)[0]
    khazana_vasp = f.search(
        "mdf.source_name:khazana_vasp AND mdf.resource_type:dataset",
        advanced=True)[0]

    # Fetch single dataset
    res1 = f.fetch_datasets_from_results(res01[0])
    assert mdf_toolbox.insensitive_comparison(res1[0], oqmd)

    # Fetch dataset with results + info
    res2 = f.fetch_datasets_from_results(res02)
    assert mdf_toolbox.insensitive_comparison(res2[0], oqmd)

    # Fetch multiple datasets
    rtemp = res01 + res03
    res3 = f.fetch_datasets_from_results(rtemp)
    assert len(res3) == 2
    assert oqmd in res3
    assert khazana_vasp in res3

    # Fetch dataset from dataset
    res4 = f.fetch_datasets_from_results(res04)
    assert mdf_toolbox.insensitive_comparison(res4, res04)

    # Fetch entries from current query
    f.match_source_names("nist_xps_db")
    assert f.fetch_datasets_from_results() == res04

    # Fetch nothing
    unknown_entry = {"mdf": {"resource_type": "unknown"}}
    assert f.fetch_datasets_from_results(unknown_entry) == []
コード例 #8
0
def test_get_dataset_version():
    # Get the version number of the OQMD
    f = Forge()
    hits = f.search('mdf.source_name:oqmd AND mdf.resource_type:dataset',
                    advanced=True,
                    limit=1)
    assert hits[0]['mdf']['version'] == f.get_dataset_version('oqmd')

    # Test invalid source_name
    with pytest.raises(ValueError):
        f.get_dataset_version('notreal')
コード例 #9
0
def test_forge_search_by_titles():
    f = Forge(index="mdf")
    titles1 = ['"High-throughput Ab-initio Dilute Solute Diffusion Database"']
    res1 = f.search_by_titles(titles1)
    assert check_field(
        res1, "dc.titles.[].title",
        "High-throughput Ab-initio Dilute Solute Diffusion Database") == 0

    titles2 = ["Database"]
    res2 = f.search_by_titles(titles2)
    assert check_field(res2, "dc.titles.[].title",
                       "NIST X-ray Photoelectron Spectroscopy Database") == 2
コード例 #10
0
def test_forge_search_by_elements():
    f = Forge(index="mdf")
    elements = ["Cu", "Al"]
    sources = ["oqmd", "nist_xps_db"]
    res1, info1 = f.match_source_names(sources).match_elements(
        elements).search(limit=10000, info=True)
    res2, info2 = f.search_by_elements(elements,
                                       sources,
                                       limit=10000,
                                       info=True)
    assert all([r in res2 for r in res1]) and all([r in res1 for r in res2])
    assert check_field(res1, "material.elements", "Al") == 1
    assert check_field(res1, "mdf.source_name", "oqmd") == 2
コード例 #11
0
def test_forge_match_resource_types():
    f = Forge(index="mdf")
    # Test one type
    f.match_resource_types("record")
    res1 = f.search(limit=10)
    assert check_field(res1, "mdf.resource_type", "record") == 0

    # Test two types
    f.match_resource_types(["collection", "dataset"])
    res2 = f.search()
    assert check_field(res2, "mdf.resource_type", "record") == -1

    # Test zero types
    assert f.match_resource_types("") == f
コード例 #12
0
    def __init__(self,
                 no_browser=False,
                 no_local_server=False,
                 search_index="mdf-test",
                 **data):
        super().__init__(**data)
        auths = mdf_toolbox.login(
            services=[
                "data_mdf",
                "search",
                "petrel",
                "transfer",
                "dlhub",
                "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all",
            ],
            app_name="Foundry",
            make_clients=True,
            no_browser=no_browser,
            no_local_server=no_local_server,
        )

        self.forge_client = Forge(
            index=search_index,
            services=None,
            search_client=auths["search"],
            transfer_client=auths["transfer"],
            data_mdf_authorizer=auths["data_mdf"],
            petrel_authorizer=auths["petrel"],
        )

        self.dlhub_client = DLHubClient(
            dlh_authorizer=auths["dlhub"],
            search_client=auths["search"],
            fx_authorizer=auths[
                "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all"],
            force_login=False,
        )

        self.xtract_tokens = {
            'auth_token':
            auths['petrel'].access_token,
            'transfer_token':
            auths['transfer'].authorizer.access_token,
            'funx_token':
            auths[
                'https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all']
            .access_token
        }
コード例 #13
0
def test_describe_organization(capsys):
    f = Forge()
    # Basic usage (with raw=True)
    res = f.describe_organization("Argonne National Laboratory", raw=True)
    assert res["success"]
    assert isinstance(res["organization"], dict)
    assert res["organization"][
        "canonical_name"] == "Argonne National Laboratory"
    assert "ANL" in res["organization"]["aliases"]
    # List
    res = f.describe_organization("list", raw=True)
    assert isinstance(res["organization"], list)
    assert "Center for Hierarchical Materials Design" in res["organization"]
    # All
    res = f.describe_organization("all", raw=True)
    assert isinstance(res["organization"], list)
    assert isinstance(res["organization"][0], dict)
    # Print to stdout
    f.describe_organization("CHiMaD")
    out, err = capsys.readouterr()
    assert "canonical_name: Center for Hierarchical Materials Design" in out
    assert "CHiMaD" in out
    assert "public" in out
    # List
    f.describe_organization("list")
    out, err = capsys.readouterr()
    assert "Center for Hierarchical Materials Design" in out
    assert "CHiMaD" not in out
    assert "Argonne National Laboratory" in out
    assert "ANL" not in out
    # Summary flag
    f.describe_organization("chimad", summary=True)
    out, err = capsys.readouterr()
    assert "canonical_name: Center for Hierarchical Materials Design" not in out
    assert "Center for Hierarchical Materials Design" in out
    assert "CHiMaD" in out
    assert "public" not in out

    # Errors
    # Invalid org
    res = f.describe_organization("foobar", raw=True)
    assert res["success"] is False
    assert "Error 404" in res["error"]
    assert res["status_code"] == 404
    # stdout
    res = f.describe_organization("foobar")
    out, err = capsys.readouterr()
    assert "Error 404" in out
コード例 #14
0
def test_forge_match_organizations():
    f = Forge(index="mdf")
    # One repo
    f.match_organizations("NIST")
    res1 = f.search()
    assert res1 != []
    check_val1 = check_field(res1, "mdf.organizations", "NIST")
    assert check_val1 == 1

    # Multi-repo
    f.match_organizations(["NIST", "PRISMS"], match_all=False)
    res2 = f.search()
    assert check_field(res2, "mdf.organizations", "PRISMS") == 2
    assert check_field(res2, "mdf.organizations", "NIST") == 2

    # No repos
    assert f.match_organizations("") == f
コード例 #15
0
def test_forge_match_elements():
    f = Forge(index="mdf")
    # One element
    f.match_elements("Al")
    res1 = f.search()
    assert res1 != []
    check_val1 = check_field(res1, "material.elements", "Al")
    assert check_val1 == 0 or check_val1 == 1

    # Multi-element
    f.match_elements(["Al", "Cu"])
    res2 = f.search()
    assert check_field(res2, "material.elements", "Al") == 1
    assert check_field(res2, "material.elements", "Cu") == 1

    # No elements
    assert f.match_elements("") == f
コード例 #16
0
def test_forge_test_match_records():
    f = Forge(index="mdf")
    # One record
    f.match_records("cip", 1006)
    res = f.search()
    assert len(res) == 1
    assert check_field(res, "mdf.source_name", "cip") == 0
    assert check_field(res, "mdf.scroll_id", 1006) == 0

    # Multi-record, strip version info
    f.match_records("cip_v3.4", [1006, 1002])
    res = f.search()
    assert len(res) == 2
    assert check_field(res, "mdf.source_name", "cip") == 0
    assert check_field(res, "mdf.scroll_id", 1006) == 2

    # No args
    assert f.match_records("", "") == f
コード例 #17
0
def test_describe_field(capsys):
    f = Forge()
    # Basic usage (raw=True for ease of testing)
    res = f.describe_field("dataset", raw=True)
    assert res["success"]
    assert "dc" in res["schema"]["properties"].keys()
    assert res["schema"]["properties"]["mdf"]["properties"]["source_id"]
    # Specific field
    res = f.describe_field("dataset", field="dc", raw=True)
    assert "mdf" not in res["schema"]["properties"].keys()
    assert "titles" in res["schema"]["properties"].keys()
    # Special case
    res = f.describe_field("list", raw=True)
    assert isinstance(res["schema"], list)
    assert "mdf" in res["schema"]
    # Printing to stdout
    f.describe_field("record")
    out, err = capsys.readouterr()
    assert "- custom" in out
    # Specific field
    f.describe_field("record", field="mdf")
    out, err = capsys.readouterr()
    assert "- custom" not in out
    assert "- source_id" in out

    # Errors
    # Invalid resource_type
    res = f.describe_field("notexists", raw=True)
    assert res["success"] is False
    assert res["schema"] is None
    assert res["error"].startswith("Error 404")
    # stdout
    f.describe_field("notexists")
    out, err = capsys.readouterr()
    assert "Error 404" in out
    # Invalid field
    res = f.describe_field("dataset", field="foo.bar", raw=True)
    assert res["success"] is False
    assert res["schema"] is None
    assert res["error"].startswith("Error: Field 'foo' (from 'foo.bar')")
    # stdout
    f.describe_field("dataset", field="foo.bar")
    out, err = capsys.readouterr()
    assert "Error: Field 'foo' (from 'foo.bar')" in out
コード例 #18
0
def test_forge_match_source_names():
    f = Forge(index="mdf")
    # One source
    f.match_source_names("khazana_vasp")
    res1 = f.search()
    assert res1 != []
    assert check_field(res1, "mdf.source_name", "khazana_vasp") == 0

    # Multi-source, strip version info
    f.match_source_names(["khazana_vasp", "ta_melting_v3.4"])
    res2 = f.search()

    # res1 is a subset of res2
    assert len(res2) > len(res1)
    assert all([r1 in res2 for r1 in res1])
    assert check_field(res2, "mdf.source_name", "ta_melting") == 2

    # No source
    assert f.match_source_names("") == f
コード例 #19
0
def test_forge_match_titles():
    # One title
    f = Forge(index="mdf")
    titles1 = '"High-throughput Ab-initio Dilute Solute Diffusion Database"'
    res1 = f.match_titles(titles1).search()
    assert res1 != []
    assert check_field(
        res1, "dc.titles.[].title",
        "High-throughput Ab-initio Dilute Solute Diffusion Database") == 0

    # Multiple titles
    titles2 = [
        '"High-throughput Ab-initio Dilute Solute Diffusion Database"',
        '"Khazana (VASP)"'
    ]
    res2 = f.match_titles(titles2).search()
    assert res2 != []
    assert check_field(res2, "dc.titles.[].title", "Khazana (VASP)") == 2

    # No titles
    assert f.match_titles("") == f
コード例 #20
0
def test_forge_match_dois():
    f = Forge(index="mdf")
    # One doi
    f.match_dois("https://dx.doi.org/10.13011/M3B36G")
    res1 = f.search()
    assert res1 != []
    assert check_field(res1, "dc.identifier.identifier",
                       "https://dx.doi.org/10.13011/M3B36G") == 0

    # Multiple dois
    f.match_dois(["https://dx.doi.org/10.13011/M3B36G", "10.18126/M23P9G"])
    res2 = f.search()

    # # res1 is ça subset of res2
    assert len(res2) > len(res1)
    assert all([r1 in res2 for r1 in res1])
    assert check_field(res2, "dc.identifier.identifier",
                       "10.18126/M23P9G") == 2

    # No doi
    assert f.match_dois("") == f
コード例 #21
0
def test_forge_match_years(capsys):
    # One year of data/results
    f = Forge(index="mdf")
    res1 = f.match_years("2015").search()
    assert res1 != []
    assert check_field(res1, "dc.publicationYear", 2015) == 0

    # Multiple years
    res2 = f.match_years(years=["2015", 2016]).search()
    assert check_field(res2, "dc.publicationYear", 2016) == 2

    # Wrong input
    with pytest.raises(AttributeError) as excinfo:
        f.match_years(["20x5"]).search()
    assert "Invalid year: '20x5'" in str(excinfo.value)

    with pytest.raises(AttributeError) as excinfo:
        f.match_years(start="20x5").search()
    assert "Invalid start year: '20x5'" in str(excinfo.value)

    with pytest.raises(AttributeError) as excinfo:
        f.match_years(stop="20x5").search()
    assert "Invalid stop year: '20x5'" in str(excinfo.value)

    # No filters with no input
    f.match_years()
    assert f.current_query() == ""

    # Test range
    res4 = f.match_years(start=2015, stop=2015, inclusive=True).search()
    assert check_field(res4, "dc.publicationYear", 2015) == 0

    res5 = f.match_years(start=2014, stop=2017, inclusive=False).search()
    assert check_field(res5, "dc.publicationYear", 2013) == -1
    assert check_field(res5, "dc.publicationYear", 2014) == -1
    assert check_field(res5, "dc.publicationYear", 2015) == 2
    assert check_field(res5, "dc.publicationYear", 2016) == 2
    assert check_field(res5, "dc.publicationYear", 2017) == -1

    assert f.match_years(start=2015, stop=2015, inclusive=False).search() == []
コード例 #22
0
ファイル: test_foundry.py プロジェクト: ethantruelove/foundry
def test_forge_match_source_names():
    os.system('echo hello')
    f = Forge(index="mdf", no_local_server=True, no_browser=True)
    os.system('echo there')
    assert True
    # One source
    f.match_source_names("khazana_vasp")
    res1 = f.search()
    assert res1 != []
    assert check_field(res1, "mdf.source_name", "khazana_vasp") == 0

    # Multi-source, strip version info
    f.match_source_names(["khazana_vasp", "ta_melting_v3.4"])
    res2 = f.search()

    # res1 is a subset of res2
    assert len(res2) > len(res1)
    assert all([r1 in res2 for r1 in res1])
    assert check_field(res2, "mdf.source_name", "ta_melting") == 2

    # No source
    assert f.match_source_names("") == f
コード例 #23
0
def generate_stats(raw=False, return_all=False, many_cutoff=100):
    """Generates statistics on datasets in MDF Search.

    Arguments:
        raw (bool): When False, will print stats to stdout and display a progress bar.
                When True, will return a dict of stats and will not display a progress bar.
                Default False.
        return_all (bool): When False or when raw is False, generate summary statistics.
                When True and raw is True, return the dataset source_ids for each category.
                Extremely verbose.
                Default False.
        many_cutoff (int): The number of records required to be considered "many" records.
                Thie value is inclusive.
                Default 100.

    Returns:
        dict: Stats, when raw is True (else these are printed)
            
    """
    mdf = Forge()
    dataset_list = mdf.match_resource_types("dataset").search()

    all_datasets = []
    num_records = 0
    zero_records = []
    one_record = []
    multiple_records = []
    many_records = []

    for ds in tqdm(dataset_list, disable=raw):
        source_id = ds["mdf"]["source_id"]
        record_count = mdf.match_resource_types("record") \
                          .match_source_names(source_id) \
                          .search(limit=0, info=True)[1]["total_query_matches"]

        all_datasets.append((source_id, record_count))
        num_records += record_count
        if record_count == 0:
            zero_records.append(source_id)
        elif record_count == 1:
            one_record.append(source_id)
        elif record_count > 1:
            multiple_records.append(source_id)
            if record_count >= int(many_cutoff):
                many_records.append(source_id)

    if raw:
        returnable = {}
        returnable["all_datasets_count"] = len(all_datasets)
        returnable["all_records_count"] = num_records
        returnable["zero_records_count"] = len(zero_records)
        returnable["one_record_count"] = len(one_record)
        returnable["multiple_records_count"] = len(multiple_records)
        returnable["many_records_count"] = len(many_records)
        returnable["one_or_more_count"] = len(one_record) + len(
            multiple_records)

        if return_all:
            returnable["all_datasets"] = all_datasets
            returnable["zero_records"] = zero_records
            returnable["one_record"] = one_record
            returnable["multiple_records"] = multiple_records
            returnable["many_records"] = many_records
            returnable["one_or_more"] = one_record + multiple_records

        return returnable
    else:
        print("MDF Search Statistics")
        print("---------------------")
        print("Total datasets:", len(all_datasets))
        print("Total records:", num_records)
        print("Datasets with zero records:", len(zero_records))
        print("Datasets with any records: ",
              len(one_record) + len(multiple_records))
        print("{}% of datasets have records".format(
            int((len(one_record) + len(multiple_records)) / len(all_datasets) *
                100)))
        print()
        print("Datasets with exactly one record:   ", len(one_record))
        print("Datasets with more than one record: ", len(multiple_records))
        print("Datasets with more than", many_cutoff, "records:",
              len(many_records))
        print()
        return
コード例 #24
0
    client.add_service("mrr")

    # Make the source name "nist_MDR_[item_number]" to make retrieval easy
    client.set_source_name("mdr_item_{}".format(item_id))
    return client


def _make_failure(req):
    return RuntimeError('Problem connecting with {}. HTTP Status Code: {}'.format(req.url,
                                                                                  req.status_code))


if __name__ == "__main__":
    # Make the client
    client = MDFConnectClient()
    forge = Forge()

    # Create an array to store the source_id
    source_ids = []

    # Loop through all items
    for item in tqdm(get_all_publications()):
        # Check if we have done it already
        if has_been_submitted(item, forge):
            continue

        # If not, ready the client to submit
        prepare_client_submission(item, client)

        # Skip if no data
        if len(client.data) == 0:
コード例 #25
0
def test_forge_search_by_dois():
    f = Forge(index="mdf")
    res1 = f.search_by_dois("https://dx.doi.org/10.13011/M3B36G")
    assert check_field(res1, "dc.identifier.identifier",
                       "https://dx.doi.org/10.13011/M3B36G") == 0
コード例 #26
0
# -*- coding: utf-8 -*-
"""
@Project : formationEPres
@Author  : Xu-Shan Zhao
@Filename: mdfOqmdRetrieval202005280940.py
@IDE     : PyCharm
@Time1   : 2020-05-28 09:40:27
@Time2   : 2020/5/28 9:40
@Month1  : 5月
@Month2  : 五月
"""

from mdf_forge import Forge

mdf = Forge()

dataset_name = 'oqmd'
# ro = mdf.match_source_names(dataset_name)
# ro = ro.search(limit=-1)
ro = mdf.aggregate_sources(dataset_name)

import pymongo

client = pymongo.MongoClient(host='localhost', port=27017)
collection = client['MDF_datasets']['oqmd']
# collection.insert_many(ro)
for i in range(len(ro)):
    try:
        collection.insert_one(ro[i])
    except:
        print(i)
コード例 #27
0
def test_forge_http_download(capsys):
    f = Forge(index="mdf")
    # Simple case
    f.http_download(example_result1)
    assert os.path.exists("./test_fetch.txt")

    # Test conflicting filenames
    f.http_download(example_result1)
    assert os.path.exists("./test_fetch(1).txt")
    f.http_download(example_result1)
    assert os.path.exists("./test_fetch(2).txt")
    os.remove("./test_fetch.txt")
    os.remove("./test_fetch(1).txt")
    os.remove("./test_fetch(2).txt")

    # With dest and preserve_dir, and tuple of results
    dest_path = os.path.expanduser("~/mdf")
    f.http_download(([example_result1], {
        "info": None
    }),
                    dest=dest_path,
                    preserve_dir=True)
    assert os.path.exists(os.path.join(dest_path, "test", "test_fetch.txt"))
    os.remove(os.path.join(dest_path, "test", "test_fetch.txt"))
    os.rmdir(os.path.join(dest_path, "test"))

    # With multiple files
    f.http_download(example_result2, dest=dest_path)
    assert os.path.exists(os.path.join(dest_path, "test_fetch.txt"))
    assert os.path.exists(os.path.join(dest_path, "test_multifetch.txt"))
    assert os.path.exists(os.path.join(dest_path, "petrel_fetch.txt"))
    assert os.path.exists(os.path.join(dest_path, "petrel_multifetch.txt"))
    os.remove(os.path.join(dest_path, "test_fetch.txt"))
    os.remove(os.path.join(dest_path, "test_multifetch.txt"))
    os.remove(os.path.join(dest_path, "petrel_fetch.txt"))
    os.remove(os.path.join(dest_path, "petrel_multifetch.txt"))

    f.http_download(example_result3, dest=dest_path)
    assert os.path.exists(os.path.join(dest_path, "test_fetch.txt"))
    assert os.path.exists(os.path.join(dest_path, "test_multifetch.txt"))
    assert os.path.exists(os.path.join(dest_path, "petrel_fetch.txt"))
    assert os.path.exists(os.path.join(dest_path, "petrel_multifetch.txt"))
    os.remove(os.path.join(dest_path, "test_fetch.txt"))
    os.remove(os.path.join(dest_path, "test_multifetch.txt"))
    os.remove(os.path.join(dest_path, "petrel_fetch.txt"))
    os.remove(os.path.join(dest_path, "petrel_multifetch.txt"))

    # Too many files
    assert f.http_download(list(range(10001)))["success"] is False
    out, err = capsys.readouterr()
    assert "Too many results supplied. Use globus_download()" in out

    # "Missing" files
    f.http_download(example_result_missing)
    out, err = capsys.readouterr()
    assert not os.path.exists("./should_not_exist.txt")
    assert (
        "Error 404 when attempting to access "
        "'https://data.materialsdatafacility.org/test/should_not_exist.txt'"
    ) in out

    # No datasets
    f.http_download(example_dataset)
    out, err = capsys.readouterr()
    assert not os.path.exists(os.path.join(dest_path, "petrel_fetch.txt"))
    assert (
        "Skipping datset entry for 'foobar_v1': Cannot download dataset over HTTPS. "
        "Use globus_download() for datasets.") in out

    # Bad resource_type
    f.http_download(example_bad_resource)
    out, err = capsys.readouterr()
    assert "Error: Found unknown resource_type 'foobar'. Skipping entry." in out