Beispiel #1
0
def build_db(session):
    mdf = Forge("mdf-test")
    mdf.match_field("projects.nanomfg.catalyst", "*")
    rslt = mdf.search()

    def safe_get_recipe_value(recipe, property):
        if property in recipe and recipe[property]:
            return recipe[property]
        else:
            return None

    for recipe in rslt:
        r = MdfForge()
        r.mdf_id = recipe["mdf"]["mdf_id"]
        r.title = recipe["dc"]["titles"][0]["title"]
        recipe_data = recipe["projects"]["nanomfg"]
        r.base_pressure = recipe_data["base_pressure"]
        r.carbon_source = recipe_data["carbon_source"]
        r.catalyst = recipe_data["catalyst"]
        r.grain_size = safe_get_recipe_value(recipe_data, "grain_size")
        r.max_temperature = recipe_data["max_temperature"]
        r.orientation = safe_get_recipe_value(recipe_data, "orientation")
        r.sample_surface_area = safe_get_recipe_value(recipe_data,
                                                      "sample_surface_area")
        r.sample_thickness = safe_get_recipe_value(recipe_data,
                                                   "sample_thickness")
        session.add(r)
    session.commit()
Beispiel #2
0
    def __init__(self, anonymous=False, **kwargs):
        """
        Args:
            anonymous (bool): whether to use anonymous login (i. e. no
                globus authentication)
            **kwargs: kwargs for Forge, including index (globus search index
                to search on), local_ep, anonymous
        """

        self.forge = Forge(anonymous=anonymous, **kwargs)
Beispiel #3
0
class MDFDataRetrieval(BaseDataRetrieval):
    """
    MDFDataRetrieval is used to retrieve data from the Materials Data Facility
    database and convert them into a Pandas DataFrame. Note that invocation
    with full access to MDF will require authentication (see api_link) but an
    anonymous mode is supported, which can be used with anonymous=True as a
    keyword arg.

    Examples:
        >>>mdf_dr = MDFDataRetrieval(anonymous=True)
        >>>results = mdf_dr.get_dataframe({"elements":["Ag", "Be"], "source_names": ["oqmd"]})

        >>>results = mdf_dr.get_dataframe({"source_names": ["oqmd"],
        >>>          "match_ranges": {"oqmd.band_gap.value": [4.0, "*"]}})

    If you use this data retrieval class, please additionally cite:
    Blaiszik, B., Chard, K., Pruyne, J., Ananthakrishnan, R., Tuecke, S.,
    Foster, I., 2016. The Materials Data Facility: Data Services to Advance
    Materials Science Research. JOM 68, 2045–2052.
    https://doi.org/10.1007/s11837-016-2001-3
    """

    def __init__(self, anonymous=False, **kwargs):
        """
        Args:
            anonymous (bool): whether to use anonymous login (i. e. no
                globus authentication)
            **kwargs: kwargs for Forge, including index (globus search index
                to search on), local_ep, anonymous
        """

        self.forge = Forge(anonymous=anonymous, **kwargs)

    def api_link(self):
        return "https://github.com/materials-data-facility/forge"

    def get_dataframe(self, criteria, properties=None, unwind_arrays=True):
        """
        Retrieves data from the MDF API and formats it as a Pandas Dataframe

        Args:
            criteria (dict): options for keys are
                source_names ([str]): source names to include, e. g. ["oqmd"]
                elements ([str]): elements to include, e. g. ["Ag", "Si"]
                titles ([str]): titles to include, e. g. ["Coarsening of a
                    semisolid Al-Cu alloy"]
                tags ([str]): tags to include, e. g. ["outcar"]
                resource_types ([str]): resources to include, e. g. ["record"]
                match_fields ({}): field-value mappings to include, e. g.
                    {"oqmd.converged": True}
                exclude_fields ({}): field-value mappings to exclude, e. g.
                    {"oqmd.converged": False}
                match_ranges ({}): field-range mappings to include, e. g.
                    {"oqmd.band_gap.value": [1, 5]}, use "*" for no lower
                    or upper bound, e. g. {"oqdm.band_gap.value": [1, "*"]},
                exclude_ranges ({}): field-range mapping to exclude,
                    {"oqmd.band_gap.value": [3, "*"]} to exclude all
                    results with band gap higher than 3.
                raw (bool): whether or not to return raw (non-dataframe)
                    output, defaults to False
            unwind_arrays (bool): whether or not to unwind arrays in
                flattening docs for dataframe

        Returns (pandas.DataFrame):
            DataFrame corresponding to all documents from aggregated query
        """
        # Each of these fields has a "match_X" method in forge, do these first
        for key in ["source_names", "elements", "titles", "tags", "resource_types"]:
            if criteria.get(key):
                fn = getattr(self.forge, "match_{}".format(key))
                fn(criteria.get(key))

        # Each of these requires unpacking a dictionary and sometimes a range
        for key in ["match_fields", "exclude_fields", "match_ranges",
                            "exclude_ranges"]:
            qvalue = criteria.get(key)
            if qvalue:
                fn = getattr(self.forge, key[:-1])  # remove 's' at end
                for field, value in qvalue.items():
                    if "ranges" in key:
                        fn(field, *value)
                    else:
                        fn(field, value)
        results = self.forge.aggregate()
        return make_dataframe(results, unwind_arrays=unwind_arrays)

    def get_data(self, squery, unwind_arrays=True, **kwargs):
        """
        Gets a dataframe from the MDF API from an explicit string
        query (rather than input args like get_dataframe).

        Args:
            squery (str): String for explicit query
            unwind_arrays (bool): whether or not to unwind arrays in
                flattening docs for dataframe
            **kwargs: kwargs for query

        Returns:
            dataframe corresponding to query

        """
        results = self.forge.aggregate(q=squery, **kwargs)
        return make_dataframe(results, unwind_arrays=unwind_arrays)
Beispiel #4
0
# -*- coding: utf-8 -*-
"""
@Project : matminer
@Author  : Xu-Shan Zhao
@Filename: mdfsearch202004252216.py
@IDE     : PyCharm
@Time1   : 2020-04-25 22:16:41
@Time2   : 2020/4/25 10:16 下午
@Month1  : 4月
@Month2  : 四月
"""

from mdf_forge.forge import Forge

mdf = Forge()

res = mdf.search_by_elements(elements=['Fe', 'H', 'Zr'])

for i in res:
    print(i['material']['composition'])

print(len(res))
print(res[0])

Beispiel #5
0
class MDFDataRetrieval:
    """
    MDFDataRetrieval is used to retrieve data from the
    Materials Data Facility database and convert them
    into a Pandas dataframe.  Note that invocation with
    full access to MDF will require authentication via
    https://materialsdatafacility.org/, but an anonymous
    mode is supported, which can be used with
    anonymous=True as a keyword arg.

    Examples:
        >>>mdf_dr = MDFDataRetrieval(anonymous=True)
        >>>results = mdf_dr.get_dataframe(elements=["Ag", "Be"], sources=["oqmd"])

        >>>results = mdf_dr.get_dataframe(sources=['oqmd'],
        >>>              match_ranges={"oqmd.band_gap.value": [4.0, "*"]})
    """
    def __init__(self, anonymous=False, **kwargs):
        """
        Args:
            anonymous (bool): whether to use anonymous login (i. e. no
                globus authentication)
            **kwargs: kwargs for Forge, including index (globus search index
                to search on), local_ep, anonymous
        """

        self.forge = Forge(anonymous=anonymous, **kwargs)

    def get_dataframe(self,
                      sources=None,
                      elements=None,
                      titles=None,
                      tags=None,
                      resource_types=None,
                      match_fields=None,
                      exclude_fields=None,
                      match_ranges=None,
                      exclude_ranges=None,
                      unwind_arrays=True):
        """
        Retrieves data from the MDF API and formats it as
        a Pandas Dataframe

        Args:
            sources ([str]): source names to include, e. g. ["oqmd"]
            elements ([str]): elements to include, e. g. ["Ag", "Si"]
            titles ([str]): titles to include, e. g. ["Coarsening of a semisolid
                Al-Cu alloy"]
            tags ([str]): tags to include, e. g. ["outcar"]
            resource_types ([str]): resources to include, e. g. ["record"]
            match_fields ({}): field-value mappings to include, e. g.
                {"oqdm.converged": True}
            exclude_fields ({}): field-value mappings to exclude, e. g.
                {"oqdm.converged": False}
            match_ranges ({}): field-range mappings to include, e. g.
                {"oqdm.band_gap.value": [1, 5]}, use "*" for no lower
                or upper bound, e. g. {"oqdm.band_gap.value": [1, "*"]},
            exclude_ranges ({}): field-range mapping to exclude,
                {"oqdm.band_gap.value": [3, "*"]} to exclude all
                results with band gap higher than 3.
            raw (bool): whether or not to return raw (non-dataframe)
                output, defaults to False
            unwind_arrays (bool): whether or not to unwind arrays in
                flattening docs for dataframe

        Returns:
            DataFrame corresponding to all documents from aggregated query
        """

        # self.forge.reset_query()

        search_args = locals()

        # Each of these fields has a "match_X" method in forge, do these first
        for query_field in [
                "sources", "elements", "titles", "tags", "resource_types"
        ]:
            if search_args.get(query_field):
                fn = getattr(self.forge, "match_{}".format(query_field))
                fn(search_args.get(query_field))

        # Each of these requires unpacking a dictionary and sometimes a range
        for query_field in [
                "match_fields", "exclude_fields", "match_ranges",
                "exclude_ranges"
        ]:
            query_value = search_args.get(query_field)
            if query_value:
                fn = getattr(self.forge, query_field[:-1])  # remove 's' at end
                for field, value in query_value.items():
                    if "ranges" in query_field:
                        fn(field, *value)
                    else:
                        fn(field, value)

        results = self.forge.aggregate()

        # Make into DataFrame
        return make_dataframe(results, unwind_arrays=unwind_arrays)

    def get_dataframe_by_query(self, query, unwind_arrays=True, **kwargs):
        """
        Gets a dataframe from the MDF API from an explicit string
        query (rather than input args like get_dataframe).

        Args:
            query (str): String for explicit query
            unwind_arrays (bool): whether or not to unwind arrays in
                flattening docs for dataframe
            **kwargs: kwargs for query

        Returns:
            dataframe corresponding to query

        """
        results = self.forge.aggregate(q=query, **kwargs)
        return make_dataframe(results, unwind_arrays=unwind_arrays)
Beispiel #6
0
from mdf_forge.forge import Forge
# You don't have to use the name "mdf" but we do for consistency.
mdf = Forge("mdf-test")


def catalysts(datasets):
    return set(
        map(lambda dataset: dataset["projects"]["nanomfg"]["catalyst"],
            datasets))


mdf.match_field("projects.nanomfg.catalyst", "*")
rslt = mdf.search()
print(len(rslt))
print("Unique catalysts: " + str(catalysts(rslt)))

mdf.match_range("projects.nanomfg.max_temperature", 0, 1000)
res = mdf.search()
print(len(res))
Beispiel #7
0
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import time

from mdf_forge.forge import Forge
# You don't have to use the name "mdf" but we do for consistency.
from gresq.util.mdf_adaptor import MDFAdaptor

source_id = '_test_nanohub_gresq_id_19_v1.1'
mdf = MDFAdaptor()
active = True
while active:
    s = mdf.get_status(source_id, raw=True)
    print(s)
    active = s['status']['active']
    if active:
        time.sleep(10)

forge = Forge("mdf-test")

titles = forge.match_source_names(source_id).search()
# titles = forge.match_titles(['Graphene Synthesis Raman Analysis']).search()
for title in titles:
    print(title)
# -*- coding: utf-8 -*-
"""
@Project : matminer
@Author  : Xu-Shan Zhao
@Filename: mdfForge202004252009.py
@IDE     : PyCharm
@Time1   : 2020-04-25 20:09:10
@Time2   : 2020/4/25 8:09 下午
@Month1  : 4月
@Month2  : 四月
"""

from mdf_forge.forge import Forge
import json

mdf = Forge()

# mdf.match_field("material.elements", "Al")
# mdf.match_field("material.elements", "Cu")
# mdf.match_field("material.elements", "Sn")
mdf.match_field("material.elements", "H")
# mdf.match_field("material.elements", "Zr")
# mdf.match_field("material.elements", "Fe")
mdf.exclude_field("material.elements", "C")
mdf.exclude_field("material.elements", "O")
# mdf.exclude_field("material.elements", "N")
# mdf.exclude_field("material.elements", "F")
# mdf.exclude_field("material.elements", "Cl")
mdf.match_field("mdf.source_name", "oqmd*")

res = mdf.search()