def test_forge_fetch_datasets_from_results(): # Get some results f = Forge(index="mdf") # Record from OQMD res01 = f.search("mdf.source_name:oqmd AND mdf.resource_type:record", advanced=True, limit=1) # Record from OQMD with info res02 = f.search("mdf.source_name:oqmd AND mdf.resource_type:record", advanced=True, limit=1, info=True) # Records from JANAF res03 = f.search( "mdf.source_name:khazana_vasp AND mdf.resource_type:record", advanced=True, limit=2) # Dataset for NIST XPS DB res04 = f.search( "mdf.source_name:nist_xps_db AND mdf.resource_type:dataset", advanced=True) # Get the correct dataset entries oqmd = f.search("mdf.source_name:oqmd AND mdf.resource_type:dataset", advanced=True)[0] khazana_vasp = f.search( "mdf.source_name:khazana_vasp AND mdf.resource_type:dataset", advanced=True)[0] # Fetch single dataset res1 = f.fetch_datasets_from_results(res01[0]) assert mdf_toolbox.insensitive_comparison(res1[0], oqmd) # Fetch dataset with results + info res2 = f.fetch_datasets_from_results(res02) assert mdf_toolbox.insensitive_comparison(res2[0], oqmd) # Fetch multiple datasets rtemp = res01 + res03 res3 = f.fetch_datasets_from_results(rtemp) assert len(res3) == 2 assert oqmd in res3 assert khazana_vasp in res3 # Fetch dataset from dataset res4 = f.fetch_datasets_from_results(res04) assert mdf_toolbox.insensitive_comparison(res4, res04) # Fetch entries from current query f.match_source_names("nist_xps_db") assert f.fetch_datasets_from_results() == res04 # Fetch nothing unknown_entry = {"mdf": {"resource_type": "unknown"}} assert f.fetch_datasets_from_results(unknown_entry) == []
class FoundryDatasets(): """ Class to download datasets hosted on Materials Data Facility Args: no_local_server: (bool), whether or not the server is local. Set to True if running on e.g. Google Colab anonymous: (bool), whether to use your MDF user or be anonymous. Some functionality may be disabled if True test: (bool), whether to be in test mode. Some functionality may be disabled if True Methods: download_data: downloads specified data from MDF and saves to current directory Args: name: (str), name of the dataset to download doi: (str), digital object identifier of the dataset to download download: (bool), whether or not to download the full dataset Returns: None """ def __init__(self, no_local_server, anonymous, test): self.no_local_server = no_local_server self.anonymous = anonymous self.test = test self.mdf = Forge(no_local_server=self.no_local_server, anonymous=self.anonymous, test=self.test) def download_data(self, name=None, doi=None, download=False): if name is not None: self.mdf.match_source_names(name) elif doi is not None: self.mdf.match_dois(doi) else: print('ERROR: please specify either the dataset name or DOI for lookup MDF') result = self.mdf.search() if len(result) == 1: print('Successfully found the desired dataset on MDF') print('MDF entry:') pprint(result) if download == True: print('Downloading dataset from MDF') self.mdf.globus_download(results=result) return
def test_forge_match_source_names(): f = Forge(index="mdf") # One source f.match_source_names("khazana_vasp") res1 = f.search() assert res1 != [] assert check_field(res1, "mdf.source_name", "khazana_vasp") == 0 # Multi-source, strip version info f.match_source_names(["khazana_vasp", "ta_melting_v3.4"]) res2 = f.search() # res1 is a subset of res2 assert len(res2) > len(res1) assert all([r1 in res2 for r1 in res1]) assert check_field(res2, "mdf.source_name", "ta_melting") == 2 # No source assert f.match_source_names("") == f
def test_forge_search_by_elements(): f = Forge(index="mdf") elements = ["Cu", "Al"] sources = ["oqmd", "nist_xps_db"] res1, info1 = f.match_source_names(sources).match_elements( elements).search(limit=10000, info=True) res2, info2 = f.search_by_elements(elements, sources, limit=10000, info=True) assert all([r in res2 for r in res1]) and all([r in res1 for r in res2]) assert check_field(res1, "material.elements", "Al") == 1 assert check_field(res1, "mdf.source_name", "oqmd") == 2
def test_forge_match_source_names(): os.system('echo hello') f = Forge(index="mdf", no_local_server=True, no_browser=True) os.system('echo there') assert True # One source f.match_source_names("khazana_vasp") res1 = f.search() assert res1 != [] assert check_field(res1, "mdf.source_name", "khazana_vasp") == 0 # Multi-source, strip version info f.match_source_names(["khazana_vasp", "ta_melting_v3.4"]) res2 = f.search() # res1 is a subset of res2 assert len(res2) > len(res1) assert all([r1 in res2 for r1 in res1]) assert check_field(res2, "mdf.source_name", "ta_melting") == 2 # No source assert f.match_source_names("") == f