def featurize(self, comp): # Check if oxidation states are present if not has_oxidation_states(comp): raise ValueError('Oxidation states have not been determined') # Get the oxidation states and their proportions oxid_states, fractions = zip(*[(s.oxi_state, f) for s, f in comp.items()]) # Compute statistics return [PropertyStats.calc_stat(oxid_states, s, fractions) for s in self.stats]
def _run_test(self, statistic, sample_1, sample_1_weighted, sample_2, sample_2_weighted): """ Run a test for a certain statistic against the two sample datasets Args: statistic: name of statistic sample_1: float, expected value for statistic of sample 1 without weights sample_1_weighted: float, expected value for statistic of sample 1 with weights sample_2: float, expected value for statistic of sample 2 without weights sample_2_weighted: float, expected value for statistic of sample 2 with weights """ self.assertAlmostEqual(sample_1, PropertyStats.calc_stat(self.sample_1, statistic)) self.assertAlmostEqual(sample_1_weighted, PropertyStats.calc_stat(self.sample_1, statistic, self.sample_1_weights)) self.assertAlmostEqual(sample_2, PropertyStats.calc_stat(self.sample_2, statistic)) self.assertAlmostEqual(sample_2_weighted, PropertyStats.calc_stat(self.sample_2, statistic, self.sample_2_weights))
def _run_test(self, statistic, sample_1, sample_1_weighted, sample_2, sample_2_weighted): """ Run a test for a certain statistic against the two sample datasets :param statistic: name of statistic :param sample_1: float, expected value for statistic of sample 1 without weights :param sample_1_weighted: float, expected value for statistic of sample 1 with weights :param sample_2: float, expected value for statistic of sample 2 without weights :param sample_2_weighted: float, expected value for statistic of sample 2 with weights """ self.assertAlmostEqual( sample_1, PropertyStats.calc_stat(self.sample_1, statistic)) self.assertAlmostEqual( sample_1_weighted, PropertyStats.calc_stat(self.sample_1, statistic, self.sample_1_weights)) self.assertAlmostEqual( sample_2, PropertyStats.calc_stat(self.sample_2, statistic)) self.assertAlmostEqual( sample_2_weighted, PropertyStats.calc_stat(self.sample_2, statistic, self.sample_2_weights))
def featurize(self, strc): # Compute the Voronoi tessellation of each site voro = VoronoiNN(extra_nn_info=True, weight=self.weight) nns = get_all_nearest_neighbors(voro, strc) # Compute the mean bond length of each atom, and the mean # variation within each cell mean_bond_lengths = np.zeros((len(strc), )) bond_length_var = np.zeros_like(mean_bond_lengths) for i, nn in enumerate(nns): weights = [n['weight'] for n in nn] lengths = [n['poly_info']['face_dist'] * 2 for n in nn] mean_bond_lengths[i] = PropertyStats.mean(lengths, weights) # Compute the mean absolute deviation of the bond lengths bond_length_var[i] = PropertyStats.avg_dev(lengths, weights) / \ mean_bond_lengths[i] # Normalize the bond lengths by the average of the whole structure # This is done to make the attributes length-scale-invariant mean_bond_lengths /= mean_bond_lengths.mean() # Compute statistics related to bond lengths features = [ PropertyStats.avg_dev(mean_bond_lengths), mean_bond_lengths.max(), mean_bond_lengths.min() ] features += [ PropertyStats.calc_stat(bond_length_var, stat) for stat in self.stats ] # Compute the variance in volume cell_volumes = [ sum(x['poly_info']['volume'] for x in nn) for nn in nns ] features.append( PropertyStats.avg_dev(cell_volumes) / np.mean(cell_volumes)) return features
def featurize(self, comp): """ Args: comp: Pymatgen Composition object Returns: en_diff_stats (list of floats): Property stats of electronegativity difference """ # Check if oxidation states have been determined if not has_oxidation_states(comp): raise ValueError('Oxidation states have not yet been determined') if not is_ionic(comp): raise ValueError('Composition is not ionic') # Determine the average anion EN anions, anion_fractions = zip(*[(s, x) for s, x in comp.items() if s.oxi_state < 0]) # If there are no anions, raise an Exception if len(anions) == 0: raise Exception('Features not applicable: Compound contains no anions') anion_en = [s.element.X for s in anions] mean_anion_en = PropertyStats.mean(anion_en, anion_fractions) # Determine the EN difference for each cation cations, cation_fractions = zip(*[(s, x) for s, x in comp.items() if s.oxi_state > 0]) # If there are no cations, raise an Exception # It is possible to construct a non-charge-balanced Composition, # so we have to check for both the presence of anions and cations if len(cations) == 0: raise Exception('Features not applicable: Compound contains no cations') en_difference = [mean_anion_en - s.element.X for s in cations] # Compute the statistics return [ PropertyStats.calc_stat(en_difference, stat, cation_fractions) for stat in self.stats ]
def featurize(self, comp): # Check if oxidation states are present if not has_oxidation_states(comp): raise ValueError('Oxidation states have not been determined') if not is_ionic(comp): raise ValueError('Composition is not ionic') # Prepare to store the attributes all_attributes = [] # Initialize stats computer pstats = PropertyStats() # Get the cation species and fractions cations, fractions = zip(*[(s, f) for s, f in comp.items() if s.oxi_state > 0]) for attr in self.features: elem_data = [self.data_source.get_charge_dependent_property_from_specie(c, attr) for c in cations] for stat in self.stats: all_attributes.append(pstats.calc_stat(elem_data, stat, fractions)) return all_attributes
class ElementProperty(BaseFeaturizer): """ Class to calculate elemental property attributes. To initialize quickly, use the from_preset() method. Features: Based on the statistics of the data_source chosen, computed by element stoichiometry. The format generally is: "{data source} {statistic} {property}" For example: "PymetgenData range X" # Range of electronegativity from Pymatgen data For a list of all statistics, see the PropertyStats documentation; for a list of all attributes available for a given data_source, see the documentation for the data sources (e.g., PymatgenData, MagpieData, MatscholarElementData, etc.). Args: data_source (AbstractData or str): source from which to retrieve element property data (or use str for preset: "pymatgen", "magpie", or "deml") features (list of strings): List of elemental properties to use (these must be supported by data_source) stats (list of strings): a list of weighted statistics to compute to for each property (see PropertyStats for available stats) """ def __init__(self, data_source, features, stats): if data_source == "pymatgen": self.data_source = PymatgenData() elif data_source == "magpie": self.data_source = MagpieData() elif data_source == "deml": self.data_source = DemlData() elif data_source == "matscholar_el": self.data_source = MatscholarElementData() elif data_source == "megnet_el": self.data_source = MEGNetElementData() else: self.data_source = data_source self.features = features self.stats = stats # Initialize stats computer self.pstats = PropertyStats() @classmethod def from_preset(cls, preset_name): """ Return ElementProperty from a preset string Args: preset_name: (str) can be one of "magpie", "deml", "matminer", "matscholar_el", or "megnet_el". Returns: ElementProperty based on the preset name. """ if preset_name == "magpie": data_source = "magpie" features = [ "Number", "MendeleevNumber", "AtomicWeight", "MeltingT", "Column", "Row", "CovalentRadius", "Electronegativity", "NsValence", "NpValence", "NdValence", "NfValence", "NValence", "NsUnfilled", "NpUnfilled", "NdUnfilled", "NfUnfilled", "NUnfilled", "GSvolume_pa", "GSbandgap", "GSmagmom", "SpaceGroupNumber" ] stats = ["minimum", "maximum", "range", "mean", "avg_dev", "mode"] elif preset_name == "deml": data_source = "deml" stats = ["minimum", "maximum", "range", "mean", "std_dev"] features = [ "atom_num", "atom_mass", "row_num", "col_num", "atom_radius", "molar_vol", "heat_fusion", "melting_point", "boiling_point", "heat_cap", "first_ioniz", "electronegativity", "electric_pol", "GGAU_Etot", "mus_fere", "FERE correction" ] elif preset_name == "matminer": data_source = "pymatgen" stats = ["minimum", "maximum", "range", "mean", "std_dev"] features = [ "X", "row", "group", "block", "atomic_mass", "atomic_radius", "mendeleev_no", "electrical_resistivity", "velocity_of_sound", "thermal_conductivity", "melting_point", "bulk_modulus", "coefficient_of_linear_thermal_expansion" ] elif preset_name == "matscholar_el": data_source = "matscholar_el" stats = ["minimum", "maximum", "range", "mean", "std_dev"] features = MatscholarElementData().prop_names elif preset_name == "megnet_el": data_source = "megnet_el" stats = ["minimum", "maximum", "range", "mean", "std_dev"] features = MEGNetElementData().prop_names else: raise ValueError("Invalid preset_name specified!") return cls(data_source, features, stats) def featurize(self, comp): """ Get elemental property attributes Args: comp: Pymatgen composition object Returns: all_attributes: Specified property statistics of features """ all_attributes = [] # Get the element names and fractions elements, fractions = zip(*comp.element_composition.items()) for attr in self.features: elem_data = [ self.data_source.get_elemental_property(e, attr) for e in elements ] for stat in self.stats: all_attributes.append( self.pstats.calc_stat(elem_data, stat, fractions)) return all_attributes def feature_labels(self): labels = [] for attr in self.features: src = self.data_source.__class__.__name__ for stat in self.stats: labels.append(f"{src} {stat} {attr}") return labels def citations(self): if self.data_source.__class__.__name__ == "MagpieData": citation = [ "@article{ward_agrawal_choudary_wolverton_2016, title={A general-purpose " "machine learning framework for predicting properties of inorganic materials}, " "volume={2}, DOI={10.1038/npjcompumats.2017.28}, number={1}, journal={npj " "Computational Materials}, author={Ward, Logan and Agrawal, Ankit and Choudhary, " "Alok and Wolverton, Christopher}, year={2016}}" ] elif self.data_source.__class__.__name__ == "DemlData": citation = [ "@article{deml_ohayre_wolverton_stevanovic_2016, title={Predicting density " "functional theory total energies and enthalpies of formation of metal-nonmetal " "compounds by linear regression}, volume={47}, DOI={10.1002/chin.201644254}, " "number={44}, journal={ChemInform}, author={Deml, Ann M. and Ohayre, Ryan and " "Wolverton, Chris and Stevanovic, Vladan}, year={2016}}" ] elif self.data_source.__class__.__name__ == "PymatgenData": citation = [ "@article{Ong2013, author = {Ong, Shyue Ping and Richards, William Davidson and Jain, Anubhav and Hautier, " "Geoffroy and Kocher, Michael and Cholia, Shreyas and Gunter, Dan and Chevrier, Vincent L. and Persson, " "Kristin A. and Ceder, Gerbrand}, doi = {10.1016/j.commatsci.2012.10.028}, issn = {09270256}, " "journal = {Computational Materials Science}, month = {feb}, pages = {314--319}, " "publisher = {Elsevier B.V.}, title = {{Python Materials Genomics (pymatgen): A robust, open-source python " "library for materials analysis}}, url = {http://linkinghub.elsevier.com/retrieve/pii/S0927025612006295}, " "volume = {68}, year = {2013} } " ] elif self.data_source.__class__.__name__ == "MEGNetElementData": # TODO: Cite MEGNet publication (not preprint) once released! citation = [ "@ARTICLE{2018arXiv181205055C," "author = {{Chen}, Chi and {Ye}, Weike and {Zuo}, Yunxing and {Zheng}, Chen and {Ong}, Shyue Ping}," "title = '{Graph Networks as a Universal Machine Learning Framework for Molecules and Crystals}'," "journal = {arXiv e-prints}," "keywords = {Condensed Matter - Materials Science, Physics - Computational Physics}," "year = '2018'," "month = 'Dec'," "eid = {arXiv:1812.05055}," "pages = {arXiv:1812.05055}," "archivePrefix = {arXiv}," "eprint = {1812.05055}," "primaryClass = {cond-mat.mtrl-sci}," "adsurl = {https://ui.adsabs.harvard.edu/\#abs/2018arXiv181205055C}," "adsnote = {Provided by the SAO/NASA Astrophysics Data System}}" ] else: citation = [] return citation def implementors(self): return ["Jiming Chen", "Logan Ward", "Anubhav Jain", "Alex Dunn"]
class Meredig(BaseFeaturizer): """ Class to calculate features as defined in Meredig et. al. Features: Atomic fraction of each of the first 103 elements, in order of atomic number. 17 statistics of elemental properties; Mean atomic weight of constituent elements Mean periodic table row and column number Mean and range of atomic number Mean and range of atomic radius Mean and range of electronegativity Mean number of valence electrons in each orbital Fraction of total valence electrons in each orbital """ def __init__(self): self.data_source = MagpieData() #The labels for statistics on element properties self._element_property_feature_labels = [ "mean AtomicWeight", "mean Column", "mean Row", "range Number", "mean Number", "range AtomicRadius", "mean AtomicRadius", "range Electronegativity", "mean Electronegativity" ] # Initialize stats computer self.pstats = PropertyStats() def featurize(self, comp): """ Get elemental property attributes Args: comp: Pymatgen composition object Returns: all_attributes: Specified property statistics of features """ # First 103 features are element fractions, we can get these from the ElementFraction featurizer element_fraction_features = ElementFraction().featurize(comp) # Next 9 features are statistics on elemental properties elements, fractions = zip(*comp.element_composition.items()) element_property_features = [0] * len(self._element_property_feature_labels) for i,feat in enumerate(self._element_property_feature_labels): stat = feat.split(" ")[0] attr = " ".join(feat.split(" ")[1:]) elem_data = [self.data_source.get_elemental_property(e, attr) for e in elements] element_property_features[i] = self.pstats.calc_stat(elem_data, stat, fractions) # Final 8 features are statistics on valence orbitals, available from the ValenceOrbital featurizer valence_orbital_features = ValenceOrbital(orbitals=("s", "p", "d", "f"), props=("avg", "frac")).featurize(comp) return element_fraction_features+element_property_features+valence_orbital_features def feature_labels(self): # Since we have more features than just element fractions, append 'fraction' to element symbols for clarity element_fraction_features = [e + " fraction" for e in ElementFraction().feature_labels()] valence_orbital_features = ValenceOrbital().feature_labels() return element_fraction_features+self._element_property_feature_labels+valence_orbital_features def citations(self): citation = [ "@article{meredig_agrawal_kirklin_saal_doak_thompson_zhang_choudhary_wolverton_2014, title={Combinatorial " "screening for new materials in unconstrained composition space with machine learning}, " "volume={89}, DOI={10.1103/PhysRevB.89.094104}, number={1}, journal={Physical " "Review B}, author={B. Meredig, A. Agrawal, S. Kirklin, J. E. Saal, J. W. Doak, A. Thompson, " "K. Zhang, A. Choudhary, and C. Wolverton}, year={2014}}"] return citation def implementors(self): return ["Amalie Trewartha"]