class TestMEGNetData(TestCase): def setUp(self): self.data_source= MEGNetElementData() def test_get_property(self): embedding_cu = self.data_source.get_elemental_property(Element("Cu"), "embedding 1") self.assertAlmostEqual(0.18259364366531372, embedding_cu) # MEGNet embeddings have element data for elements 1-94, plus 0 for # "dummy" atoms. embedding_md = self.data_source.get_elemental_property(Element("Md"), "embedding 1") self.assertAlmostEqual(-0.044910576194524765, embedding_md) embedding_dummy = self.data_source.all_element_data["Dummy"]["embedding 1"] self.assertAlmostEqual(-0.044910576194524765, embedding_dummy)
def from_preset(cls, preset_name): """ Return ElementProperty from a preset string Args: preset_name: (str) can be one of "magpie", "deml", "matminer", "matscholar_el", or "megnet_el". Returns: ElementProperty based on the preset name. """ if preset_name == "magpie": data_source = "magpie" features = [ "Number", "MendeleevNumber", "AtomicWeight", "MeltingT", "Column", "Row", "CovalentRadius", "Electronegativity", "NsValence", "NpValence", "NdValence", "NfValence", "NValence", "NsUnfilled", "NpUnfilled", "NdUnfilled", "NfUnfilled", "NUnfilled", "GSvolume_pa", "GSbandgap", "GSmagmom", "SpaceGroupNumber" ] stats = ["minimum", "maximum", "range", "mean", "avg_dev", "mode"] elif preset_name == "deml": data_source = "deml" stats = ["minimum", "maximum", "range", "mean", "std_dev"] features = [ "atom_num", "atom_mass", "row_num", "col_num", "atom_radius", "molar_vol", "heat_fusion", "melting_point", "boiling_point", "heat_cap", "first_ioniz", "electronegativity", "electric_pol", "GGAU_Etot", "mus_fere", "FERE correction" ] elif preset_name == "matminer": data_source = "pymatgen" stats = ["minimum", "maximum", "range", "mean", "std_dev"] features = [ "X", "row", "group", "block", "atomic_mass", "atomic_radius", "mendeleev_no", "electrical_resistivity", "velocity_of_sound", "thermal_conductivity", "melting_point", "bulk_modulus", "coefficient_of_linear_thermal_expansion" ] elif preset_name == "matscholar_el": data_source = "matscholar_el" stats = ["minimum", "maximum", "range", "mean", "std_dev"] features = MatscholarElementData().prop_names elif preset_name == "megnet_el": data_source = "megnet_el" stats = ["minimum", "maximum", "range", "mean", "std_dev"] features = MEGNetElementData().prop_names else: raise ValueError("Invalid preset_name specified!") return cls(data_source, features, stats)
def __init__(self, data_source, features, stats): if data_source == "pymatgen": self.data_source = PymatgenData() elif data_source == "magpie": self.data_source = MagpieData() elif data_source == "deml": self.data_source = DemlData() elif data_source == "matscholar_el": self.data_source = MatscholarElementData() elif data_source == "megnet_el": self.data_source = MEGNetElementData() else: self.data_source = data_source self.features = features self.stats = stats # Initialize stats computer self.pstats = PropertyStats()
def setUp(self): self.data_source = MEGNetElementData()
class ElementProperty(BaseFeaturizer): """ Class to calculate elemental property attributes. To initialize quickly, use the from_preset() method. Features: Based on the statistics of the data_source chosen, computed by element stoichiometry. The format generally is: "{data source} {statistic} {property}" For example: "PymetgenData range X" # Range of electronegativity from Pymatgen data For a list of all statistics, see the PropertyStats documentation; for a list of all attributes available for a given data_source, see the documentation for the data sources (e.g., PymatgenData, MagpieData, MatscholarElementData, etc.). Args: data_source (AbstractData or str): source from which to retrieve element property data (or use str for preset: "pymatgen", "magpie", or "deml") features (list of strings): List of elemental properties to use (these must be supported by data_source) stats (list of strings): a list of weighted statistics to compute to for each property (see PropertyStats for available stats) """ def __init__(self, data_source, features, stats): if data_source == "pymatgen": self.data_source = PymatgenData() elif data_source == "magpie": self.data_source = MagpieData() elif data_source == "deml": self.data_source = DemlData() elif data_source == "matscholar_el": self.data_source = MatscholarElementData() elif data_source == "megnet_el": self.data_source = MEGNetElementData() else: self.data_source = data_source self.features = features self.stats = stats # Initialize stats computer self.pstats = PropertyStats() @classmethod def from_preset(cls, preset_name): """ Return ElementProperty from a preset string Args: preset_name: (str) can be one of "magpie", "deml", "matminer", "matscholar_el", or "megnet_el". Returns: ElementProperty based on the preset name. """ if preset_name == "magpie": data_source = "magpie" features = [ "Number", "MendeleevNumber", "AtomicWeight", "MeltingT", "Column", "Row", "CovalentRadius", "Electronegativity", "NsValence", "NpValence", "NdValence", "NfValence", "NValence", "NsUnfilled", "NpUnfilled", "NdUnfilled", "NfUnfilled", "NUnfilled", "GSvolume_pa", "GSbandgap", "GSmagmom", "SpaceGroupNumber" ] stats = ["minimum", "maximum", "range", "mean", "avg_dev", "mode"] elif preset_name == "deml": data_source = "deml" stats = ["minimum", "maximum", "range", "mean", "std_dev"] features = [ "atom_num", "atom_mass", "row_num", "col_num", "atom_radius", "molar_vol", "heat_fusion", "melting_point", "boiling_point", "heat_cap", "first_ioniz", "electronegativity", "electric_pol", "GGAU_Etot", "mus_fere", "FERE correction" ] elif preset_name == "matminer": data_source = "pymatgen" stats = ["minimum", "maximum", "range", "mean", "std_dev"] features = [ "X", "row", "group", "block", "atomic_mass", "atomic_radius", "mendeleev_no", "electrical_resistivity", "velocity_of_sound", "thermal_conductivity", "melting_point", "bulk_modulus", "coefficient_of_linear_thermal_expansion" ] elif preset_name == "matscholar_el": data_source = "matscholar_el" stats = ["minimum", "maximum", "range", "mean", "std_dev"] features = MatscholarElementData().prop_names elif preset_name == "megnet_el": data_source = "megnet_el" stats = ["minimum", "maximum", "range", "mean", "std_dev"] features = MEGNetElementData().prop_names else: raise ValueError("Invalid preset_name specified!") return cls(data_source, features, stats) def featurize(self, comp): """ Get elemental property attributes Args: comp: Pymatgen composition object Returns: all_attributes: Specified property statistics of features """ all_attributes = [] # Get the element names and fractions elements, fractions = zip(*comp.element_composition.items()) for attr in self.features: elem_data = [ self.data_source.get_elemental_property(e, attr) for e in elements ] for stat in self.stats: all_attributes.append( self.pstats.calc_stat(elem_data, stat, fractions)) return all_attributes def feature_labels(self): labels = [] for attr in self.features: src = self.data_source.__class__.__name__ for stat in self.stats: labels.append(f"{src} {stat} {attr}") return labels def citations(self): if self.data_source.__class__.__name__ == "MagpieData": citation = [ "@article{ward_agrawal_choudary_wolverton_2016, title={A general-purpose " "machine learning framework for predicting properties of inorganic materials}, " "volume={2}, DOI={10.1038/npjcompumats.2017.28}, number={1}, journal={npj " "Computational Materials}, author={Ward, Logan and Agrawal, Ankit and Choudhary, " "Alok and Wolverton, Christopher}, year={2016}}" ] elif self.data_source.__class__.__name__ == "DemlData": citation = [ "@article{deml_ohayre_wolverton_stevanovic_2016, title={Predicting density " "functional theory total energies and enthalpies of formation of metal-nonmetal " "compounds by linear regression}, volume={47}, DOI={10.1002/chin.201644254}, " "number={44}, journal={ChemInform}, author={Deml, Ann M. and Ohayre, Ryan and " "Wolverton, Chris and Stevanovic, Vladan}, year={2016}}" ] elif self.data_source.__class__.__name__ == "PymatgenData": citation = [ "@article{Ong2013, author = {Ong, Shyue Ping and Richards, William Davidson and Jain, Anubhav and Hautier, " "Geoffroy and Kocher, Michael and Cholia, Shreyas and Gunter, Dan and Chevrier, Vincent L. and Persson, " "Kristin A. and Ceder, Gerbrand}, doi = {10.1016/j.commatsci.2012.10.028}, issn = {09270256}, " "journal = {Computational Materials Science}, month = {feb}, pages = {314--319}, " "publisher = {Elsevier B.V.}, title = {{Python Materials Genomics (pymatgen): A robust, open-source python " "library for materials analysis}}, url = {http://linkinghub.elsevier.com/retrieve/pii/S0927025612006295}, " "volume = {68}, year = {2013} } " ] elif self.data_source.__class__.__name__ == "MEGNetElementData": # TODO: Cite MEGNet publication (not preprint) once released! citation = [ "@ARTICLE{2018arXiv181205055C," "author = {{Chen}, Chi and {Ye}, Weike and {Zuo}, Yunxing and {Zheng}, Chen and {Ong}, Shyue Ping}," "title = '{Graph Networks as a Universal Machine Learning Framework for Molecules and Crystals}'," "journal = {arXiv e-prints}," "keywords = {Condensed Matter - Materials Science, Physics - Computational Physics}," "year = '2018'," "month = 'Dec'," "eid = {arXiv:1812.05055}," "pages = {arXiv:1812.05055}," "archivePrefix = {arXiv}," "eprint = {1812.05055}," "primaryClass = {cond-mat.mtrl-sci}," "adsurl = {https://ui.adsabs.harvard.edu/\#abs/2018arXiv181205055C}," "adsnote = {Provided by the SAO/NASA Astrophysics Data System}}" ] else: citation = [] return citation def implementors(self): return ["Jiming Chen", "Logan Ward", "Anubhav Jain", "Alex Dunn"]
def setUp(self): self.data_source= MEGNetElementData()