def __init__(self): # Load in the mixing enthalpy data # Creates a lookup table of the liquid mixing enthalpies self.dhf_mix = MixingEnthalpy() # Load in a table of elemental properties self.elem_data = MagpieData()
class TestMagpieData(TestCase): def setUp(self): self.data_source = MagpieData() def test_get_property(self): self.assertAlmostEqual(9.012182, self.data_source.get_elemental_property(Element("Be"), "AtomicWeight")) def test_get_oxidation(self): self.assertEqual([-4, 2, 4], self.data_source.get_oxidation_states(Element("C")))
def featurize(self, struct, idx): """ Get interstice distribution fingerprints of site with given index in input structure. Args: struct (Structure): Pymatgen Structure object. idx (int): index of target site in structure. Returns: interstice_fps ([float]): Interstice distribution fingerprints. """ interstice_fps = list() # Get the nearest neighbors using Voronoi tessellation n_w = VoronoiNN(cutoff=self.cutoff).get_voronoi_polyhedra( struct, idx).values() nn_coords = np.array([nn['site'].coords for nn in n_w]) # Get center atom's radius and its nearest neighbors' radii center_r = MagpieData().get_elemental_properties( [struct[idx].specie], self.radius_type)[0] / 100 nn_els = [nn['site'].specie for nn in n_w] nn_rs = np.array(MagpieData().get_elemental_properties( nn_els, self.radius_type)) / 100 # Get indices of atoms forming the simplices of convex hull convex_hull_simplices = ConvexHull(nn_coords).simplices if 'dist' in self.interstice_types: nn_dists = [nn['face_dist'] * 2 for nn in n_w] interstice_dist_list = IntersticeDistribution.\ analyze_dist_interstices(center_r, nn_rs, nn_dists) interstice_fps += [PropertyStats().calc_stat( interstice_dist_list, stat) for stat in self.stats] if 'area' in self.interstice_types: interstice_area_list = IntersticeDistribution.\ analyze_area_interstice(nn_coords, nn_rs, convex_hull_simplices) interstice_fps += [PropertyStats().calc_stat( interstice_area_list, stat) for stat in self.stats] if 'vol' in self.interstice_types: interstice_vol_list = IntersticeDistribution.\ analyze_vol_interstice(struct[idx].coords, nn_coords, center_r, nn_rs, convex_hull_simplices) interstice_fps += [PropertyStats().calc_stat( interstice_vol_list, stat) for stat in self.stats] return interstice_fps
def from_preset(preset): """ Create a new LocalPropertyStats class according to a preset Args: preset (str) - Name of preset """ if preset == 'interpretable': return LocalPropertyStatsNew( data_source=MagpieData(), properties=[ 'MendeleevNumber', 'Column', 'Row', 'Electronegativity', 'NsValence', 'NpValence', 'NdValence', 'NfValence', 'NValence', 'NsUnfilled', 'NpUnfilled', 'NdUnfilled', 'NfUnfilled', 'NUnfilled', 'GSbandgap', ], ) else: raise ValueError('Unrecognized preset: ' + preset)
def get_fps(structure, cutoff=10.0, processes=8): all_descrs = [] try: coordination_number_ = CoordinationNumber.from_preset('VoronoiNN') voronoi_fps_ = VoronoiFingerprintModified( cutoff=cutoff).featurize_structure(structure) crystal_nn_fingerprint_ = CrystalNNFingerprint.from_preset('cn') op_site_fingerprint_ = OPSiteFingerprint() agni_fingerprints_ = AGNIFingerprints() gaussian_symm_func_fps_ = GaussianSymmFuncModified( ).featurize_structure(structure) pymatgen_data_ = PymatgenData() magpie_data_ = MagpieData() data_list = [[ structure, i, site, coordination_number_, voronoi_fps_, crystal_nn_fingerprint_, op_site_fingerprint_, agni_fingerprints_, gaussian_symm_func_fps_, pymatgen_data_, magpie_data_ ] for i, site in enumerate(structure)] pool = multiprocessing.Pool(processes=processes) all_descrs = np.array(pool.map(get_all_site_descrs, data_list)) except (AttributeError, IndexError) as error: pass return all_descrs
def from_preset(preset: str, cutoff: float = 13): """ Create a new LocalPropertyStats class according to a preset Args: preset (str) - Name of preset cutoff (float) - Cutoff for the nearest neighbor search """ if preset == "interpretable": return LocalPropertyStatsNew( data_source=MagpieData(), properties=[ "MendeleevNumber", "Column", "Row", "Electronegativity", "NsValence", "NpValence", "NdValence", "NfValence", "NValence", "NsUnfilled", "NpUnfilled", "NdUnfilled", "NfUnfilled", "NUnfilled", "GSbandgap", ], cutoff=cutoff, ) else: raise ValueError("Unrecognized preset: " + preset)
def __init__(self, data_source=MagpieData(), orbitals=["s", "p", "d", "f"], props=["avg", "frac"]): self.data_source = data_source self.orbitals = orbitals self.props = props
def create_dict_for_feature_table( picklefile: Union[str, Path]) -> List[dict]: """Reads in a pickle with features and returns a list of dictionaries with one dictionary per metal site. Arguments: picklefile (Union[str, Path]) -- path to pickle file Returns: List[dict] -- list of dicionary """ result = read_pickle(picklefile) mpd = MagpieData() result_list = [] for site in result: e = Element(site["metal"]) valence_electrons = mpd.get_elemental_properties([e], "NValence")[0] valence_to_donate = diff_to_18e(valence_electrons) sunfilled = mpd.get_elemental_properties([e], "NsUnfilled")[0] dunfilled = mpd.get_elemental_properties([e], "NdUnfilled")[0] punfilled = mpd.get_elemental_properties([e], "NpUnfilled")[0] metal_encoding = [ e.number, e.row, e.group, valence_electrons, valence_to_donate, sunfilled, punfilled, dunfilled, np.random.randint(1, 18), ] features = list(site["feature"]) features.extend(metal_encoding) result_dict = { "metal": site["metal"], "coordinate_x": int(site["coords"][0]), "coordinate_y": int(site["coords"][1]), "coordinate_z": int(site["coords"][2]), "feature": features, "name": Path(picklefile).stem, } if not np.isnan(np.array(features)).any(): result_list.append(result_dict) return result_list
def __init__(self): self.data_source = MagpieData() #The labels for statistics on element properties self._element_property_feature_labels = [ "mean AtomicWeight", "mean Column", "mean Row", "range Number", "mean Number", "range AtomicRadius", "mean AtomicRadius", "range Electronegativity", "mean Electronegativity" ] # Initialize stats computer self.pstats = PropertyStats()
def create_dict_for_feature_table(picklefile: str) -> list: """Reads in a pickle with features and returns a list of dictionaries with one dictionary per metal site. Arguments: picklefile {str} -- path to pickle file Returns: list -- list of dicionary """ result = read_pickle(picklefile) mpd = MagpieData() result_list = [] for site in result: e = Element(site['metal']) valence_electrons = mpd.get_elemental_properties([e], 'NValence')[0] valence_to_donate = diff_to_18e(valence_electrons) sunfilled = mpd.get_elemental_properties([e], 'NsUnfilled')[0] dunfilled = mpd.get_elemental_properties([e], 'NdUnfilled')[0] punfilled = mpd.get_elemental_properties([e], 'NpUnfilled')[0] metal_encoding = [ e.number, e.row, e.group, valence_electrons, valence_to_donate, sunfilled, punfilled, dunfilled, np.random.randint(1, 18), ] features = list(site['feature']) features.extend(metal_encoding) result_dict = { 'metal': site['metal'], 'coordinate_x': int(site['coords'][0]), 'coordinate_y': int(site['coords'][1]), 'coordinate_z': int(site['coords'][2]), 'feature': features, 'name': Path(picklefile).stem, } if not np.isnan(np.array(features)).any(): result_list.append(result_dict) return result_list
def __init__(self, data_source, features, stats): if data_source == "pymatgen": self.data_source = PymatgenData() elif data_source == "magpie": self.data_source = MagpieData() elif data_source == "deml": self.data_source = DemlData() else: self.data_source = data_source self.features = features self.stats = stats
class ValenceOrbitalEnergy(BaseFeaturizer): def __init__(self): self.element_props = {} self.MagpieData = MagpieData() def get_element_props(self,el): try: props = self.element_props[el] except KeyError: subshells = 'spdf' n_elec = {sub:self.MagpieData.get_elemental_property(el,f'N{sub}Valence') for sub in subshells} orbitals = sorted(el.atomic_orbitals.keys())[::-1] #look up valence orbital for subshell orbital_func = lambda x: '{}{}'.format(max([orb[0] for orb in orbitals if orb[1]==x]),x) #get valence orbital energy for subshell energy_func = lambda x: el.atomic_orbitals[orbital_func(x)] props = {x:{'n_elec':n_elec[x],'energy':energy_func(x),'shell':orbital_func(x)[0]} for x in subshells if n_elec[x]>0} self.element_props[el] = props return props def featurize(self,comp): tot_energy = 0 tot_elec = 0 for el in comp.elements: props = self.get_element_props(el) tot_energy += comp[el]*sum([v['energy']*v['n_elec'] for v in props.values()]) tot_elec += comp[el]*sum([v['n_elec'] for v in props.values()]) return [tot_energy/tot_elec] def feature_labels(self): return ['MeanValenceEnergy'] def citations(self): return [ "@article{Ward2016," "archivePrefix = {arXiv}," "arxivId = {1606.09551}," "author = {Ward, Logan and Agrawal, Ankit and Choudhary, Alok and Wolverton, Christopher}," "doi = {10.1038/npjcompumats.2016.28}," "eprint = {1606.09551}," "isbn = {2057-3960}," "issn = {20573960}," "journal = {npj Computational Materials}," "number = {June}," "pages = {1--7}," "title = {{A general-purpose machine learning framework for predicting properties of inorganic materials}}," "volume = {2}," "year = {2016}" "}"]
def __init__(self, data_source=MagpieData(), weight='area', properties=('Electronegativity',)): """ Initialize the featurizer Args: data_source (AbstractData) - Class from which to retrieve elemental properties weight (str) - What aspect of each voronoi facet to use to weigh each neighbor (see VoronoiNN) properties ([str]) - List of properties to use (default=['Electronegativity']) signed (bool) - whether to return absolute difference or signed difference of properties(default=False (absolute difference)) """ self.data_source = data_source self.properties = properties self.weight = weight
def __init__(self, threshold=0.01, n_nearest=(1, 3, 5), max_types=6): """ Initialize the featurizer Args: threshold (float):Threshold to use for determining whether a cluster is efficiently packed. n_nearest ({int}): Number of nearest clusters to use when considering features max_types (int): Maximum number of atom types to consider when looking for efficient clusters. The process for finding efficient clusters very expensive for large numbers of types """ # Store the options self.threshold = threshold self.n_nearest = n_nearest self.max_types = max_types # Tool to convert composition objects to fractions as a vector self._el_frac = ElementFraction() # Get the number of elements in the output of `_el_frac` self._n_elems = len(self._el_frac.featurize(Composition('H'))) # Tool for looking up radii self._data_source = MagpieData() # Lookup table of ideal radius ratios self.ideal_ratio = dict( [(3, 0.154701), (4, 0.224745), (5, 0.361654), (6, 0.414214), (7, 0.518145), (8, 0.616517), (9, 0.709914), (10, 0.798907), (11, 0.884003), (12, 0.902113), (13, 0.976006), (14, 1.04733), (15, 1.11632), (16, 1.18318), (17, 1.2481), (18, 1.31123), (19, 1.37271), (20, 1.43267), (21, 1.49119), (22, 1.5484), (23, 1.60436), (24, 1.65915)])
def __init__(self, data_source, features, stats): if data_source == "pymatgen": self.data_source = PymatgenData() elif data_source == "magpie": self.data_source = MagpieData() elif data_source == "deml": self.data_source = DemlData() elif data_source == "matscholar_el": self.data_source = MatscholarElementData() elif data_source == "megnet_el": self.data_source = MEGNetElementData() else: self.data_source = data_source self.features = features self.stats = stats # Initialize stats computer self.pstats = PropertyStats()
def __init__( self, data_source=MagpieData(), weight: str = "area", properties: List[str] = ("Electronegativity", ), cutoff: List[str] = 35, ): """Initialize the featurizer Args: data_source (AbstractData) - Class from which to retrieve elemental properties weight (str) - What aspect of each voronoi facet to use to weigh each neighbor (see VoronoiNN) properties (List[str]) - List of properties to use (default=['Electronegativity']) cutoff (float) """ self.data_source = data_source self.properties = properties self.weight = weight self.cutoff = cutoff
def setUp(self): self.data_source = MagpieData()
def __init__(self): self.element_props = {} self.MagpieData = MagpieData()
class ValenceOrbital(BaseFeaturizer): """ Attributes of valence orbital shells Args: data_source (data object): source from which to retrieve element data orbitals (list): orbitals to calculate props (list): specifies whether to return average number of electrons in each orbital, fraction of electrons in each orbital, or both """ def __init__(self, orbitals=("s", "p", "d", "f"), props=("avg", "frac")): self.data_source = MagpieData() self.orbitals = orbitals self.props = props def featurize(self, comp): """Weighted fraction of valence electrons in each orbital Args: comp: Pymatgen composition object Returns: valence_attributes (list of floats): Average number and/or fraction of valence electrons in specfied orbitals """ elements, fractions = zip(*comp.element_composition.items()) # Get the mean number of electrons in each shell avg = [ PropertyStats.mean(self.data_source.get_elemental_properties( elements, f"N{orb}Valence"), weights=fractions) for orb in self.orbitals ] # If needed, get fraction of electrons in each shell if "frac" in self.props: # NOTE comprhys: even if needed frac isn't used? avg_total_valence = PropertyStats.mean( self.data_source.get_elemental_properties( elements, "NValence"), weights=fractions) frac = [a / avg_total_valence for a in avg] # Get the desired attributes valence_attributes = [] for prop in self.props: valence_attributes += locals()[prop] return valence_attributes def feature_labels(self): labels = [] for prop in self.props: for orb in self.orbitals: labels.append("%s %s valence electrons" % (prop, orb)) return labels def citations(self): ward_citation = ( "@article{ward_agrawal_choudary_wolverton_2016, title={A general-purpose " "machine learning framework for predicting properties of inorganic materials}, " "volume={2}, DOI={10.1038/npjcompumats.2017.28}, number={1}, journal={npj " "Computational Materials}, author={Ward, Logan and Agrawal, Ankit and Choudhary, " "Alok and Wolverton, Christopher}, year={2016}}") deml_citation = ( "@article{deml_ohayre_wolverton_stevanovic_2016, title={Predicting density " "functional theory total energies and enthalpies of formation of metal-nonmetal " "compounds by linear regression}, volume={47}, DOI={10.1002/chin.201644254}, " "number={44}, journal={ChemInform}, author={Deml, Ann M. and Ohayre, Ryan and " "Wolverton, Chris and Stevanovic, Vladan}, year={2016}}") citations = [ward_citation, deml_citation] return citations def implementors(self): return ["Jiming Chen", "Logan Ward"]
def __init__(self, orbitals=("s", "p", "d", "f"), props=("avg", "frac")): self.data_source = MagpieData() self.orbitals = orbitals self.props = props
def __init__(self, data_source=MagpieData()): self.data_source = data_source
class AtomicPackingEfficiency(BaseFeaturizer): """ Packing efficiency based on a geometric theory of the amorphous packing of hard spheres. This featurizer computes two different kinds of the features. The first relate to the distance between a composition and the composition of the clusters of atoms expected to be efficiently packed based on a theory from `Laws et al.<http://www.nature.com/doifinder/10.1038/ncomms9123>`_. The second corresponds to the packing efficiency of a system if all atoms in the alloy are simultaneously as efficiently-packed as possible. The packing efficiency in these models is based on the Atomic Packing Efficiency (APE), which measures the difference between the ratio of the radii of the central atom to its neighbors and the ideal ratio of a cluster with the same number of atoms that has optimal packing efficiency. If the difference between the ratios is too large, the APE is positive. If the difference is too small, the APE is negative. Features: dist from {k} clusters |APE| < {thr} - The distance between an alloy composition and the k clusters that have a packing efficiency below thr from ideal mean simul. packing efficiency - Mean packing efficiency of all atoms. The packing efficiency is measured with respect to ideal (0) mean abs simul. packing efficiency - Mean absolute value of the packing efficiencies. Closer to zero is more efficiently packed References: [1] K.J. Laws, D.B. Miracle, M. Ferry, A predictive structural model for bulk metallic glasses, Nat. Commun. 6 (2015) 8123. doi:10.1038/ncomms9123. """ def __init__(self, threshold=0.01, n_nearest=(1, 3, 5), max_types=6): """ Initialize the featurizer Args: threshold (float):Threshold to use for determining whether a cluster is efficiently packed. n_nearest ({int}): Number of nearest clusters to use when considering features max_types (int): Maximum number of atom types to consider when looking for efficient clusters. The process for finding efficient clusters very expensive for large numbers of types """ # Store the options self.threshold = threshold self.n_nearest = n_nearest self.max_types = max_types # Tool to convert composition objects to fractions as a vector self._el_frac = ElementFraction() # Get the number of elements in the output of `_el_frac` self._n_elems = len(self._el_frac.featurize(Composition('H'))) # Tool for looking up radii self._data_source = MagpieData() # Lookup table of ideal radius ratios self.ideal_ratio = dict( [(3, 0.154701), (4, 0.224745), (5, 0.361654), (6, 0.414214), (7, 0.518145), (8, 0.616517), (9, 0.709914), (10, 0.798907), (11, 0.884003), (12, 0.902113), (13, 0.976006), (14, 1.04733), (15, 1.11632), (16, 1.18318), (17, 1.2481), (18, 1.31123), (19, 1.37271), (20, 1.43267), (21, 1.49119), (22, 1.5484), (23, 1.60436), (24, 1.65915)]) def __hash__(self): return hash(self.threshold) def __eq__(self, other): if isinstance(other, AtomicPackingEfficiency): return self.get_params() == other.get_params() def featurize(self, comp): return list(self.compute_simultaneous_packing_efficiency(comp)) + \ self.compute_nearest_cluster_distance(comp) def feature_labels(self): return ['mean simul. packing efficiency', 'mean abs simul. packing efficiency'] + [ f"dist from {k} clusters |APE| < {self.threshold:.3f}" for k in self.n_nearest] def citations(self): return ["@article{Laws2015," "author = {Laws, K. J. and Miracle, D. B. and Ferry, M.}," "doi = {10.1038/ncomms9123}," "journal = {Nature Communications}," "pages = {8123}," "title = {{A predictive structural model for bulk metallic glasses}}," "url = {http://www.nature.com/doifinder/10.1038/ncomms9123}," "volume = {6}," "year = {2015}"] def implementors(self): return ['Logan Ward'] def compute_simultaneous_packing_efficiency(self, comp): """Compute the packing efficiency of the system when the neighbor shell of each atom has the same composition as the alloy. When this criterion is satisfied, it is possible for every atom in this system to be simultaneously as efficiently-packed as possible. Args: comp (Composition): Composition to be assessed Returns (float) Average APE of all atoms (float) Average deviation of the APE of each atom from ideal (0) """ # Compute the average atomic radius of the system elements, fractions = zip(*comp.element_composition.items()) radii = self._data_source.get_elemental_properties(elements, 'MiracleRadius') mean_radius = PropertyStats.mean(radii, fractions) # Compute the APE for each cluster best_ape = [ self.find_ideal_cluster_size(r / mean_radius)[1] for r in radii ] # Return the averages return PropertyStats.mean(best_ape, fractions), PropertyStats.mean(np.abs(best_ape), fractions) def compute_nearest_cluster_distance(self, comp): """Compute the distance between a composition and that the nearest efficiently-packed clusters. Measures the mean :math:`L_2` distance between the alloy composition and the :math:`k`-nearest clusters with Atomic Packing Efficiencies within the user-specified tolerance of 1. :math:`k` is any of the numbers defined in the "n_nearest" parameter of this class. If there are less than `k` efficient clusters in the system, we use the maximum distance betweeen any two compositions (1) for the unmatched neighbors. Args: comp (Composition): Composition of material to evaluate Return: [float] Average distances """ # Get the most common elements elems, _ = zip(*sorted(comp.element_composition.items(), key=lambda x: x[1], reverse=True)) # Get the cluster lookup tool using the most common elements cluster_lookup = self.create_cluster_lookup_tool( elems[:self.max_types] ) # Compute the composition vector comp_vec = self._el_frac.featurize(comp) # Compute the distances means = [] for k in self.n_nearest: # Get the nearest clusters if cluster_lookup is None: dists = (np.array([]),) to_lookup = 0 else: to_lookup = min(cluster_lookup._fit_X.shape[0], k) dists, _ = cluster_lookup.kneighbors([comp_vec], to_lookup) # Pad the list with 1's dists = dists[0].tolist() + [1]*(k - to_lookup) # Compute the average means.append(np.mean(dists)) return means def create_cluster_lookup_tool(self, elements): """ Get the compositions of efficiently-packed clusters in a certain system of elements Args: elements ([Element]): Elements in system Return: (NearNeighbors): Tool to find nearby clusters in this system. None if there are no efficiently-packed clusters for this combination of elements """ elements = list(set(elements)) return self._create_cluster_lookup_tool(tuple(sorted(elements))) @lru_cache() def _create_cluster_lookup_tool(self, elements): """ Cached version of `create_cluster_lookup_tool`. Assumes that the elements are passed as sorted tuple with no duplicates Args: elements ([Element]): Elements in system Return: (NearNeighbors): Tool to find nearby clusters in this system. If there are no clusters, this class returns None """ # Get the radii radii = self._data_source.get_elemental_properties(elements, "MiracleRadius") # Get the maximum and minimum cluster sizes max_size = self.find_ideal_cluster_size(max(radii) / min(radii))[0] min_size = self.find_ideal_cluster_size(min(radii) / max(radii))[0] # Prepare a list to hold all possible clusters eff_clusters = [] # Loop through all possible neighbor shells for size in range(min_size, max_size + 1): # Get the ideal radius ratio for a cluster of this size ideal_ratio = self.get_ideal_radius_ratio(size) # Get the mean radii and compositions of all possible # combinations of elements in the neighbor shell s_radii = itertools.combinations_with_replacement(radii, size) s_elems = itertools.combinations_with_replacement(elements, size) # Put the results in arrays for fast indexing mean_radii = np.array(list(s_radii)).mean(axis=1) s_elems = np.array(list(s_elems)) # For each type of central atom, determine which have an APE # within `self.threshold` of 1 for center_radius, center_elem in zip(radii, elements): # Compute the APE of each cluster ape = 1 - np.divide(ideal_ratio, np.divide(center_radius, mean_radii)) # Get those which are within the threshold of 0 # and add their composition to the list of OK elements for hit in s_elems[np.abs(ape) < self.threshold]: eff_clusters.append([center_elem] + hit.tolist()) # Compute the composition vectors for all of the efficient clusters comps = np.zeros((len(eff_clusters), self._n_elems)) for i, elems in enumerate(eff_clusters): for elem in elems: comps[i, elem.Z - 1] += 1 comps = np.divide(comps, comps.sum(axis=1)[:, None]) # Return tool to quickly determine distance from efficient clusters # NearNeighbors requires at least 1 entry, so we return None if # there are no nearby clusters return NearestNeighbors().fit(comps) if len(comps) > 0 else None def find_ideal_cluster_size(self, radius_ratio): """ Get the optimal cluster size for a certain radius ratio Finds the number of nearest neighbors :math:`n` that minimizes :math:`|1 - rp(n)/r|`, where :math:`rp(n)` is the ideal radius ratio for a certain :math:`n` and :math:`r` is the actual ratio. Args: radius_ratio (float): :math:`r / r_{neighbor}` Returns: (int) number of neighboring atoms for that will be the most efficiently packed. (float) Optimal APE """ # Loop through cluster sizes from 3 to 24 best_ape = np.inf best_n = None for n in range(3, 25): # Compute APE, check if it is the best ape = 1 - self.get_ideal_radius_ratio(n) / radius_ratio if abs(ape) < abs(best_ape): best_ape = ape best_n = n # If the APE is negative, this is either the best APE or # We have already passed it if ape < 0: return best_n, best_ape return best_n, best_ape def get_ideal_radius_ratio(self, n_neighbors): """Compute the idea ratio between the central atom and neighboring atoms for a neighbor with a certain number of nearest neighbors. Based on work by `Miracle, Lord, and Ranganathan <https://www.jstage.jst.go.jp/article/matertrans/47/7/47_7_1737/_article/-char/en>`_. Args: n_neighbors (int): Number of atoms in 1st NN shell Return: (float) ideal radius ratio :math:`r / r_{neighbor}` """ # NN must be in [3, 24] n = max(3, min(n_neighbors, 24)) return self.ideal_ratio[n]
class YangSolidSolution(BaseFeaturizer): """ Mixing thermochemistry and size mismatch terms of Yang and Zhang (2012) This featurizer returns two different features developed by .. Yang and Zhang `https://linkinghub.elsevier.com/retrieve/pii/S0254058411009357` to predict whether metal alloys will form metallic glasses, crystalline solid solutions, or intermetallics. The first, Omega, is related to the balance between the mixing entropy and mixing enthalpy of the liquid phase. The second, delta, is related to the atomic size mismatch between the different elements of the material. Features Yang omega - Mixing thermochemistry feature, Omega Yang delta - Atomic size mismatch term References: .. Yang and Zhang (2012) `https://linkinghub.elsevier.com/retrieve/pii/S0254058411009357`. """ def __init__(self): # Load in the mixing enthalpy data # Creates a lookup table of the liquid mixing enthalpies self.dhf_mix = MixingEnthalpy() # Load in a table of elemental properties self.elem_data = MagpieData() def precheck(self, c: Composition) -> bool: """ Precheck a single entry. YangSolidSolution does not work for compositons containing any binary elment combinations for which the model has no parameters. We can nearly equivalently approximate this by checking against the unary element list. To precheck an entire dataframe (qnd automatically gather the fraction of structures that will pass the precheck), please use precheck_dataframe. Args: c (pymatgen.Composition): The composition to precheck. Returns: (bool): If True, s passed the precheck; otherwise, it failed. """ return all([ e in self.dhf_mix.valid_element_list for e in c.element_composition.elements ]) def featurize(self, comp): return [self.compute_omega(comp), self.compute_delta(comp)] def compute_omega(self, comp): """Compute Yang's mixing thermodynamics descriptor :math:`\\frac{T_m \Delta S_{mix}}{ | \Delta H_{mix} | }` Where :math:`T_m` is average melting temperature, :math:`\Delta S_{mix}` is the ideal mixing entropy, and :math:`\Delta H_{mix}` is the average mixing enthalpies of all pairs of elements in the alloy Args: comp (Composition) - Composition to featurizer Returns: (float) Omega """ # Special case: Elemental compound (entropy == 0 -> Omega == 1) if len(comp) == 1: return 0 # Get the element names and fractions elements, fractions = zip( *comp.element_composition.fractional_composition.items()) # Get the mean melting temperature mean_Tm = PropertyStats.mean( self.elem_data.get_elemental_properties(elements, "MeltingT"), fractions) # Get the mixing entropy entropy = np.dot(fractions, np.log(fractions)) * 8.314 / 1000 # Get the mixing enthalpy enthalpy = 0 for i, (e1, f1) in enumerate(zip(elements, fractions)): for e2, f2 in zip(elements[:i], fractions): enthalpy += f1 * f2 * self.dhf_mix.get_mixing_enthalpy(e1, e2) enthalpy *= 4 # Make sure the enthalpy is nonzero # The limit as dH->0 of omega is +\inf. A very small positive dH will approximate # this limit without causing issues with infinite features enthalpy = max(1e-6, abs(enthalpy)) return abs(mean_Tm * entropy / enthalpy) def compute_delta(self, comp): """Compute Yang's delta parameter :math:`\sqrt{\sum^n_{i=1} c_i \left( 1 - \\frac{r_i}{\\bar{r}} \\right)^2 }` where :math:`c_i` and :math:`r_i` are the fraction and radius of element :math:`i`, and :math:`\\bar{r}` is the fraction-weighted average of the radii. We use the radii compiled by .. Miracle et al. `https://www.tandfonline.com/doi/ref/10.1179/095066010X12646898728200?scroll=top`. Args: comp (Composition) - Composition to assess Returns: (float) delta """ elements, fractions = zip(*comp.element_composition.items()) # Get the radii of elements radii = self.elem_data.get_elemental_properties( elements, "MiracleRadius") mean_r = PropertyStats.mean(radii, fractions) # Compute the mean (1 - r/\\bar{r})^2 r_dev = np.power(1.0 - np.divide(radii, mean_r), 2) return np.sqrt(PropertyStats.mean(r_dev, fractions)) def feature_labels(self): return ['Yang omega', 'Yang delta'] def citations(self): return [ "@article{Yang2012," "author = {Yang, X. and Zhang, Y.}," "doi = {10.1016/j.matchemphys.2011.11.021}," "journal = {Materials Chemistry and Physics}," "number = {2-3}," "pages = {233--238}," "title = {{Prediction of high-entropy stabilized solid-solution in multi-component alloys}}," "url = {http://dx.doi.org/10.1016/j.matchemphys.2011.11.021}," "volume = {132},year = {2012}}" ] def implementors(self): return ['Logan Ward']
class Meredig(BaseFeaturizer): """ Class to calculate features as defined in Meredig et. al. Features: Atomic fraction of each of the first 103 elements, in order of atomic number. 17 statistics of elemental properties; Mean atomic weight of constituent elements Mean periodic table row and column number Mean and range of atomic number Mean and range of atomic radius Mean and range of electronegativity Mean number of valence electrons in each orbital Fraction of total valence electrons in each orbital """ def __init__(self): self.data_source = MagpieData() #The labels for statistics on element properties self._element_property_feature_labels = [ "mean AtomicWeight", "mean Column", "mean Row", "range Number", "mean Number", "range AtomicRadius", "mean AtomicRadius", "range Electronegativity", "mean Electronegativity" ] # Initialize stats computer self.pstats = PropertyStats() def featurize(self, comp): """ Get elemental property attributes Args: comp: Pymatgen composition object Returns: all_attributes: Specified property statistics of features """ # First 103 features are element fractions, we can get these from the ElementFraction featurizer element_fraction_features = ElementFraction().featurize(comp) # Next 9 features are statistics on elemental properties elements, fractions = zip(*comp.element_composition.items()) element_property_features = [0] * len(self._element_property_feature_labels) for i,feat in enumerate(self._element_property_feature_labels): stat = feat.split(" ")[0] attr = " ".join(feat.split(" ")[1:]) elem_data = [self.data_source.get_elemental_property(e, attr) for e in elements] element_property_features[i] = self.pstats.calc_stat(elem_data, stat, fractions) # Final 8 features are statistics on valence orbitals, available from the ValenceOrbital featurizer valence_orbital_features = ValenceOrbital(orbitals=("s", "p", "d", "f"), props=("avg", "frac")).featurize(comp) return element_fraction_features+element_property_features+valence_orbital_features def feature_labels(self): # Since we have more features than just element fractions, append 'fraction' to element symbols for clarity element_fraction_features = [e + " fraction" for e in ElementFraction().feature_labels()] valence_orbital_features = ValenceOrbital().feature_labels() return element_fraction_features+self._element_property_feature_labels+valence_orbital_features def citations(self): citation = [ "@article{meredig_agrawal_kirklin_saal_doak_thompson_zhang_choudhary_wolverton_2014, title={Combinatorial " "screening for new materials in unconstrained composition space with machine learning}, " "volume={89}, DOI={10.1103/PhysRevB.89.094104}, number={1}, journal={Physical " "Review B}, author={B. Meredig, A. Agrawal, S. Kirklin, J. E. Saal, J. W. Doak, A. Thompson, " "K. Zhang, A. Choudhary, and C. Wolverton}, year={2014}}"] return citation def implementors(self): return ["Amalie Trewartha"]