def build_db(session): mdf = Forge("mdf-test") mdf.match_field("projects.nanomfg.catalyst", "*") rslt = mdf.search() def safe_get_recipe_value(recipe, property): if property in recipe and recipe[property]: return recipe[property] else: return None for recipe in rslt: r = MdfForge() r.mdf_id = recipe["mdf"]["mdf_id"] r.title = recipe["dc"]["titles"][0]["title"] recipe_data = recipe["projects"]["nanomfg"] r.base_pressure = recipe_data["base_pressure"] r.carbon_source = recipe_data["carbon_source"] r.catalyst = recipe_data["catalyst"] r.grain_size = safe_get_recipe_value(recipe_data, "grain_size") r.max_temperature = recipe_data["max_temperature"] r.orientation = safe_get_recipe_value(recipe_data, "orientation") r.sample_surface_area = safe_get_recipe_value(recipe_data, "sample_surface_area") r.sample_thickness = safe_get_recipe_value(recipe_data, "sample_thickness") session.add(r) session.commit()
def __init__(self, anonymous=False, **kwargs): """ Args: anonymous (bool): whether to use anonymous login (i. e. no globus authentication) **kwargs: kwargs for Forge, including index (globus search index to search on), local_ep, anonymous """ self.forge = Forge(anonymous=anonymous, **kwargs)
class MDFDataRetrieval(BaseDataRetrieval): """ MDFDataRetrieval is used to retrieve data from the Materials Data Facility database and convert them into a Pandas DataFrame. Note that invocation with full access to MDF will require authentication (see api_link) but an anonymous mode is supported, which can be used with anonymous=True as a keyword arg. Examples: >>>mdf_dr = MDFDataRetrieval(anonymous=True) >>>results = mdf_dr.get_dataframe({"elements":["Ag", "Be"], "source_names": ["oqmd"]}) >>>results = mdf_dr.get_dataframe({"source_names": ["oqmd"], >>> "match_ranges": {"oqmd.band_gap.value": [4.0, "*"]}}) If you use this data retrieval class, please additionally cite: Blaiszik, B., Chard, K., Pruyne, J., Ananthakrishnan, R., Tuecke, S., Foster, I., 2016. The Materials Data Facility: Data Services to Advance Materials Science Research. JOM 68, 2045–2052. https://doi.org/10.1007/s11837-016-2001-3 """ def __init__(self, anonymous=False, **kwargs): """ Args: anonymous (bool): whether to use anonymous login (i. e. no globus authentication) **kwargs: kwargs for Forge, including index (globus search index to search on), local_ep, anonymous """ self.forge = Forge(anonymous=anonymous, **kwargs) def api_link(self): return "https://github.com/materials-data-facility/forge" def get_dataframe(self, criteria, properties=None, unwind_arrays=True): """ Retrieves data from the MDF API and formats it as a Pandas Dataframe Args: criteria (dict): options for keys are source_names ([str]): source names to include, e. g. ["oqmd"] elements ([str]): elements to include, e. g. ["Ag", "Si"] titles ([str]): titles to include, e. g. ["Coarsening of a semisolid Al-Cu alloy"] tags ([str]): tags to include, e. g. ["outcar"] resource_types ([str]): resources to include, e. g. ["record"] match_fields ({}): field-value mappings to include, e. g. {"oqmd.converged": True} exclude_fields ({}): field-value mappings to exclude, e. g. {"oqmd.converged": False} match_ranges ({}): field-range mappings to include, e. g. {"oqmd.band_gap.value": [1, 5]}, use "*" for no lower or upper bound, e. g. {"oqdm.band_gap.value": [1, "*"]}, exclude_ranges ({}): field-range mapping to exclude, {"oqmd.band_gap.value": [3, "*"]} to exclude all results with band gap higher than 3. raw (bool): whether or not to return raw (non-dataframe) output, defaults to False unwind_arrays (bool): whether or not to unwind arrays in flattening docs for dataframe Returns (pandas.DataFrame): DataFrame corresponding to all documents from aggregated query """ # Each of these fields has a "match_X" method in forge, do these first for key in ["source_names", "elements", "titles", "tags", "resource_types"]: if criteria.get(key): fn = getattr(self.forge, "match_{}".format(key)) fn(criteria.get(key)) # Each of these requires unpacking a dictionary and sometimes a range for key in ["match_fields", "exclude_fields", "match_ranges", "exclude_ranges"]: qvalue = criteria.get(key) if qvalue: fn = getattr(self.forge, key[:-1]) # remove 's' at end for field, value in qvalue.items(): if "ranges" in key: fn(field, *value) else: fn(field, value) results = self.forge.aggregate() return make_dataframe(results, unwind_arrays=unwind_arrays) def get_data(self, squery, unwind_arrays=True, **kwargs): """ Gets a dataframe from the MDF API from an explicit string query (rather than input args like get_dataframe). Args: squery (str): String for explicit query unwind_arrays (bool): whether or not to unwind arrays in flattening docs for dataframe **kwargs: kwargs for query Returns: dataframe corresponding to query """ results = self.forge.aggregate(q=squery, **kwargs) return make_dataframe(results, unwind_arrays=unwind_arrays)
# -*- coding: utf-8 -*- """ @Project : matminer @Author : Xu-Shan Zhao @Filename: mdfsearch202004252216.py @IDE : PyCharm @Time1 : 2020-04-25 22:16:41 @Time2 : 2020/4/25 10:16 下午 @Month1 : 4月 @Month2 : 四月 """ from mdf_forge.forge import Forge mdf = Forge() res = mdf.search_by_elements(elements=['Fe', 'H', 'Zr']) for i in res: print(i['material']['composition']) print(len(res)) print(res[0])
class MDFDataRetrieval: """ MDFDataRetrieval is used to retrieve data from the Materials Data Facility database and convert them into a Pandas dataframe. Note that invocation with full access to MDF will require authentication via https://materialsdatafacility.org/, but an anonymous mode is supported, which can be used with anonymous=True as a keyword arg. Examples: >>>mdf_dr = MDFDataRetrieval(anonymous=True) >>>results = mdf_dr.get_dataframe(elements=["Ag", "Be"], sources=["oqmd"]) >>>results = mdf_dr.get_dataframe(sources=['oqmd'], >>> match_ranges={"oqmd.band_gap.value": [4.0, "*"]}) """ def __init__(self, anonymous=False, **kwargs): """ Args: anonymous (bool): whether to use anonymous login (i. e. no globus authentication) **kwargs: kwargs for Forge, including index (globus search index to search on), local_ep, anonymous """ self.forge = Forge(anonymous=anonymous, **kwargs) def get_dataframe(self, sources=None, elements=None, titles=None, tags=None, resource_types=None, match_fields=None, exclude_fields=None, match_ranges=None, exclude_ranges=None, unwind_arrays=True): """ Retrieves data from the MDF API and formats it as a Pandas Dataframe Args: sources ([str]): source names to include, e. g. ["oqmd"] elements ([str]): elements to include, e. g. ["Ag", "Si"] titles ([str]): titles to include, e. g. ["Coarsening of a semisolid Al-Cu alloy"] tags ([str]): tags to include, e. g. ["outcar"] resource_types ([str]): resources to include, e. g. ["record"] match_fields ({}): field-value mappings to include, e. g. {"oqdm.converged": True} exclude_fields ({}): field-value mappings to exclude, e. g. {"oqdm.converged": False} match_ranges ({}): field-range mappings to include, e. g. {"oqdm.band_gap.value": [1, 5]}, use "*" for no lower or upper bound, e. g. {"oqdm.band_gap.value": [1, "*"]}, exclude_ranges ({}): field-range mapping to exclude, {"oqdm.band_gap.value": [3, "*"]} to exclude all results with band gap higher than 3. raw (bool): whether or not to return raw (non-dataframe) output, defaults to False unwind_arrays (bool): whether or not to unwind arrays in flattening docs for dataframe Returns: DataFrame corresponding to all documents from aggregated query """ # self.forge.reset_query() search_args = locals() # Each of these fields has a "match_X" method in forge, do these first for query_field in [ "sources", "elements", "titles", "tags", "resource_types" ]: if search_args.get(query_field): fn = getattr(self.forge, "match_{}".format(query_field)) fn(search_args.get(query_field)) # Each of these requires unpacking a dictionary and sometimes a range for query_field in [ "match_fields", "exclude_fields", "match_ranges", "exclude_ranges" ]: query_value = search_args.get(query_field) if query_value: fn = getattr(self.forge, query_field[:-1]) # remove 's' at end for field, value in query_value.items(): if "ranges" in query_field: fn(field, *value) else: fn(field, value) results = self.forge.aggregate() # Make into DataFrame return make_dataframe(results, unwind_arrays=unwind_arrays) def get_dataframe_by_query(self, query, unwind_arrays=True, **kwargs): """ Gets a dataframe from the MDF API from an explicit string query (rather than input args like get_dataframe). Args: query (str): String for explicit query unwind_arrays (bool): whether or not to unwind arrays in flattening docs for dataframe **kwargs: kwargs for query Returns: dataframe corresponding to query """ results = self.forge.aggregate(q=query, **kwargs) return make_dataframe(results, unwind_arrays=unwind_arrays)
from mdf_forge.forge import Forge # You don't have to use the name "mdf" but we do for consistency. mdf = Forge("mdf-test") def catalysts(datasets): return set( map(lambda dataset: dataset["projects"]["nanomfg"]["catalyst"], datasets)) mdf.match_field("projects.nanomfg.catalyst", "*") rslt = mdf.search() print(len(rslt)) print("Unique catalysts: " + str(catalysts(rslt))) mdf.match_range("projects.nanomfg.max_temperature", 0, 1000) res = mdf.search() print(len(res))
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import time from mdf_forge.forge import Forge # You don't have to use the name "mdf" but we do for consistency. from gresq.util.mdf_adaptor import MDFAdaptor source_id = '_test_nanohub_gresq_id_19_v1.1' mdf = MDFAdaptor() active = True while active: s = mdf.get_status(source_id, raw=True) print(s) active = s['status']['active'] if active: time.sleep(10) forge = Forge("mdf-test") titles = forge.match_source_names(source_id).search() # titles = forge.match_titles(['Graphene Synthesis Raman Analysis']).search() for title in titles: print(title)
# -*- coding: utf-8 -*- """ @Project : matminer @Author : Xu-Shan Zhao @Filename: mdfForge202004252009.py @IDE : PyCharm @Time1 : 2020-04-25 20:09:10 @Time2 : 2020/4/25 8:09 下午 @Month1 : 4月 @Month2 : 四月 """ from mdf_forge.forge import Forge import json mdf = Forge() # mdf.match_field("material.elements", "Al") # mdf.match_field("material.elements", "Cu") # mdf.match_field("material.elements", "Sn") mdf.match_field("material.elements", "H") # mdf.match_field("material.elements", "Zr") # mdf.match_field("material.elements", "Fe") mdf.exclude_field("material.elements", "C") mdf.exclude_field("material.elements", "O") # mdf.exclude_field("material.elements", "N") # mdf.exclude_field("material.elements", "F") # mdf.exclude_field("material.elements", "Cl") mdf.match_field("mdf.source_name", "oqmd*") res = mdf.search()