예제 #1
0
    def create_dict_for_feature_table(
            picklefile: Union[str, Path]) -> List[dict]:
        """Reads in a pickle with features and returns a list of dictionaries with one dictionary per metal site.

        Arguments:
            picklefile (Union[str, Path]) -- path to pickle file

        Returns:
            List[dict] -- list of dicionary
        """
        result = read_pickle(picklefile)
        mpd = MagpieData()
        result_list = []
        for site in result:
            e = Element(site["metal"])
            valence_electrons = mpd.get_elemental_properties([e],
                                                             "NValence")[0]
            valence_to_donate = diff_to_18e(valence_electrons)
            sunfilled = mpd.get_elemental_properties([e], "NsUnfilled")[0]
            dunfilled = mpd.get_elemental_properties([e], "NdUnfilled")[0]
            punfilled = mpd.get_elemental_properties([e], "NpUnfilled")[0]
            metal_encoding = [
                e.number,
                e.row,
                e.group,
                valence_electrons,
                valence_to_donate,
                sunfilled,
                punfilled,
                dunfilled,
                np.random.randint(1, 18),
            ]
            features = list(site["feature"])
            features.extend(metal_encoding)
            result_dict = {
                "metal": site["metal"],
                "coordinate_x": int(site["coords"][0]),
                "coordinate_y": int(site["coords"][1]),
                "coordinate_z": int(site["coords"][2]),
                "feature": features,
                "name": Path(picklefile).stem,
            }

            if not np.isnan(np.array(features)).any():
                result_list.append(result_dict)

        return result_list
예제 #2
0
    def create_dict_for_feature_table(picklefile: str) -> list:
        """Reads in a pickle with features and returns a list of dictionaries with one dictionary per metal site.

        Arguments:
            picklefile {str} -- path to pickle file

        Returns:
            list -- list of dicionary
        """
        result = read_pickle(picklefile)
        mpd = MagpieData()
        result_list = []
        for site in result:
            e = Element(site['metal'])
            valence_electrons = mpd.get_elemental_properties([e],
                                                             'NValence')[0]
            valence_to_donate = diff_to_18e(valence_electrons)
            sunfilled = mpd.get_elemental_properties([e], 'NsUnfilled')[0]
            dunfilled = mpd.get_elemental_properties([e], 'NdUnfilled')[0]
            punfilled = mpd.get_elemental_properties([e], 'NpUnfilled')[0]
            metal_encoding = [
                e.number,
                e.row,
                e.group,
                valence_electrons,
                valence_to_donate,
                sunfilled,
                punfilled,
                dunfilled,
                np.random.randint(1, 18),
            ]
            features = list(site['feature'])
            features.extend(metal_encoding)
            result_dict = {
                'metal': site['metal'],
                'coordinate_x': int(site['coords'][0]),
                'coordinate_y': int(site['coords'][1]),
                'coordinate_z': int(site['coords'][2]),
                'feature': features,
                'name': Path(picklefile).stem,
            }

            if not np.isnan(np.array(features)).any():
                result_list.append(result_dict)

        return result_list
예제 #3
0
class ValenceOrbital(BaseFeaturizer):
    """
        Attributes of valence orbital shells

        Args:
            data_source (data object): source from which to retrieve element data
            orbitals (list): orbitals to calculate
            props (list): specifies whether to return average number of electrons in each orbital,
                fraction of electrons in each orbital, or both
    """
    def __init__(self, orbitals=("s", "p", "d", "f"), props=("avg", "frac")):
        self.data_source = MagpieData()
        self.orbitals = orbitals
        self.props = props

    def featurize(self, comp):
        """Weighted fraction of valence electrons in each orbital

           Args:
                comp: Pymatgen composition object

           Returns:
                valence_attributes (list of floats): Average number and/or
                    fraction of valence electrons in specfied orbitals
        """

        elements, fractions = zip(*comp.element_composition.items())

        # Get the mean number of electrons in each shell
        avg = [
            PropertyStats.mean(self.data_source.get_elemental_properties(
                elements, f"N{orb}Valence"),
                               weights=fractions) for orb in self.orbitals
        ]

        # If needed, get fraction of electrons in each shell
        if "frac" in self.props:
            # NOTE comprhys: even if needed frac isn't used?
            avg_total_valence = PropertyStats.mean(
                self.data_source.get_elemental_properties(
                    elements, "NValence"),
                weights=fractions)
            frac = [a / avg_total_valence for a in avg]

        # Get the desired attributes
        valence_attributes = []
        for prop in self.props:
            valence_attributes += locals()[prop]

        return valence_attributes

    def feature_labels(self):
        labels = []
        for prop in self.props:
            for orb in self.orbitals:
                labels.append("%s %s valence electrons" % (prop, orb))

        return labels

    def citations(self):
        ward_citation = (
            "@article{ward_agrawal_choudary_wolverton_2016, title={A general-purpose "
            "machine learning framework for predicting properties of inorganic materials}, "
            "volume={2}, DOI={10.1038/npjcompumats.2017.28}, number={1}, journal={npj "
            "Computational Materials}, author={Ward, Logan and Agrawal, Ankit and Choudhary, "
            "Alok and Wolverton, Christopher}, year={2016}}")
        deml_citation = (
            "@article{deml_ohayre_wolverton_stevanovic_2016, title={Predicting density "
            "functional theory total energies and enthalpies of formation of metal-nonmetal "
            "compounds by linear regression}, volume={47}, DOI={10.1002/chin.201644254}, "
            "number={44}, journal={ChemInform}, author={Deml, Ann M. and Ohayre, Ryan and "
            "Wolverton, Chris and Stevanovic, Vladan}, year={2016}}")
        citations = [ward_citation, deml_citation]
        return citations

    def implementors(self):
        return ["Jiming Chen", "Logan Ward"]
예제 #4
0
파일: thermo.py 프로젝트: CompRhys/matminer
class YangSolidSolution(BaseFeaturizer):
    """
    Mixing thermochemistry and size mismatch terms of Yang and Zhang (2012)

    This featurizer returns two different features developed by
    .. Yang and Zhang `https://linkinghub.elsevier.com/retrieve/pii/S0254058411009357`
    to predict whether metal alloys will form metallic glasses,
    crystalline solid solutions, or intermetallics.
    The first, Omega, is related to the balance between the mixing entropy and
    mixing enthalpy of the liquid phase. The second, delta, is related to the
    atomic size mismatch between the different elements of the material.

    Features
        Yang omega - Mixing thermochemistry feature, Omega
        Yang delta - Atomic size mismatch term

    References:
        .. Yang and Zhang (2012) `https://linkinghub.elsevier.com/retrieve/pii/S0254058411009357`.
    """
    def __init__(self):
        # Load in the mixing enthalpy data
        #  Creates a lookup table of the liquid mixing enthalpies
        self.dhf_mix = MixingEnthalpy()

        # Load in a table of elemental properties
        self.elem_data = MagpieData()

    def precheck(self, c: Composition) -> bool:
        """
        Precheck a single entry. YangSolidSolution does not work for compositons
        containing any binary elment combinations for which the model has no
        parameters. We can nearly equivalently approximate this by checking
        against the unary element list.

        To precheck an entire dataframe (qnd automatically gather
        the fraction of structures that will pass the precheck), please use
        precheck_dataframe.

        Args:
            c (pymatgen.Composition): The composition to precheck.

        Returns:
            (bool): If True, s passed the precheck; otherwise, it failed.
        """
        return all([
            e in self.dhf_mix.valid_element_list
            for e in c.element_composition.elements
        ])

    def featurize(self, comp):
        return [self.compute_omega(comp), self.compute_delta(comp)]

    def compute_omega(self, comp):
        """Compute Yang's mixing thermodynamics descriptor

        :math:`\\frac{T_m \Delta S_{mix}}{ |  \Delta H_{mix} | }`

        Where :math:`T_m` is average melting temperature,
        :math:`\Delta S_{mix}` is the ideal mixing entropy,
        and :math:`\Delta H_{mix}` is the average mixing enthalpies
        of all pairs of elements in the alloy

        Args:
            comp (Composition) - Composition to featurizer
        Returns:
            (float) Omega
        """

        # Special case: Elemental compound (entropy == 0 -> Omega == 1)
        if len(comp) == 1:
            return 0

        # Get the element names and fractions
        elements, fractions = zip(
            *comp.element_composition.fractional_composition.items())

        # Get the mean melting temperature
        mean_Tm = PropertyStats.mean(
            self.elem_data.get_elemental_properties(elements, "MeltingT"),
            fractions)

        # Get the mixing entropy
        entropy = np.dot(fractions, np.log(fractions)) * 8.314 / 1000

        # Get the mixing enthalpy
        enthalpy = 0
        for i, (e1, f1) in enumerate(zip(elements, fractions)):
            for e2, f2 in zip(elements[:i], fractions):
                enthalpy += f1 * f2 * self.dhf_mix.get_mixing_enthalpy(e1, e2)
        enthalpy *= 4

        # Make sure the enthalpy is nonzero
        #  The limit as dH->0 of omega is +\inf. A very small positive dH will approximate
        #  this limit without causing issues with infinite features
        enthalpy = max(1e-6, abs(enthalpy))

        return abs(mean_Tm * entropy / enthalpy)

    def compute_delta(self, comp):
        """Compute Yang's delta parameter

        :math:`\sqrt{\sum^n_{i=1} c_i \left( 1 - \\frac{r_i}{\\bar{r}} \\right)^2 }`

        where :math:`c_i` and :math:`r_i` are the fraction and radius of
        element :math:`i`, and :math:`\\bar{r}` is the fraction-weighted
        average of the radii. We use the radii compiled by
        .. Miracle et al. `https://www.tandfonline.com/doi/ref/10.1179/095066010X12646898728200?scroll=top`.

        Args:
            comp (Composition) - Composition to assess
        Returns:
            (float) delta

        """

        elements, fractions = zip(*comp.element_composition.items())

        # Get the radii of elements
        radii = self.elem_data.get_elemental_properties(
            elements, "MiracleRadius")
        mean_r = PropertyStats.mean(radii, fractions)

        # Compute the mean (1 - r/\\bar{r})^2
        r_dev = np.power(1.0 - np.divide(radii, mean_r), 2)
        return np.sqrt(PropertyStats.mean(r_dev, fractions))

    def feature_labels(self):
        return ['Yang omega', 'Yang delta']

    def citations(self):
        return [
            "@article{Yang2012,"
            "author = {Yang, X. and Zhang, Y.},"
            "doi = {10.1016/j.matchemphys.2011.11.021},"
            "journal = {Materials Chemistry and Physics},"
            "number = {2-3},"
            "pages = {233--238},"
            "title = {{Prediction of high-entropy stabilized solid-solution in multi-component alloys}},"
            "url = {http://dx.doi.org/10.1016/j.matchemphys.2011.11.021},"
            "volume = {132},year = {2012}}"
        ]

    def implementors(self):
        return ['Logan Ward']
예제 #5
0
class AtomicPackingEfficiency(BaseFeaturizer):
    """
    Packing efficiency based on a geometric theory of the amorphous packing
    of hard spheres.

    This featurizer computes two different kinds of the features. The first
    relate to the distance between a composition and the composition of
    the clusters of atoms expected to be efficiently packed based on a
    theory from
    `Laws et al.<http://www.nature.com/doifinder/10.1038/ncomms9123>`_.
    The second corresponds to the packing efficiency of a system if all atoms
    in the alloy are simultaneously as efficiently-packed as possible.

    The packing efficiency in these models is based on the Atomic Packing
    Efficiency (APE), which measures the difference between the ratio of
    the radii of the central atom to its neighbors and the ideal ratio
    of a cluster with the same number of atoms that has optimal packing
    efficiency. If the difference between the ratios is too large, the APE is
    positive. If the difference is too small, the APE is negative.

    Features:
        dist from {k} clusters |APE| < {thr} - The distance between an
            alloy composition and the k clusters that have a packing efficiency
            below thr from ideal
        mean simul. packing efficiency - Mean packing efficiency of all atoms.
            The packing efficiency is measured with respect to ideal (0)
        mean abs simul. packing efficiency - Mean absolute value of the
            packing efficiencies. Closer to zero is more efficiently packed

    References:
        [1] K.J. Laws, D.B. Miracle, M. Ferry, A predictive structural model
        for bulk metallic glasses, Nat. Commun. 6 (2015) 8123. doi:10.1038/ncomms9123.
    """

    def __init__(self, threshold=0.01, n_nearest=(1, 3, 5), max_types=6):
        """
        Initialize the featurizer

        Args:
            threshold (float):Threshold to use for determining whether
                a cluster is efficiently packed.
            n_nearest ({int}): Number of nearest clusters to use when
                considering features
            max_types (int): Maximum number of atom types to consider when
                looking for efficient clusters. The process for finding
                efficient clusters very expensive for large numbers of types
        """

        # Store the options
        self.threshold = threshold
        self.n_nearest = n_nearest
        self.max_types = max_types

        # Tool to convert composition objects to fractions as a vector
        self._el_frac = ElementFraction()

        # Get the number of elements in the output of `_el_frac`
        self._n_elems = len(self._el_frac.featurize(Composition('H')))

        # Tool for looking up radii
        self._data_source = MagpieData()

        # Lookup table of ideal radius ratios
        self.ideal_ratio = dict(
            [(3, 0.154701), (4, 0.224745), (5, 0.361654), (6, 0.414214),
             (7, 0.518145), (8, 0.616517), (9, 0.709914), (10, 0.798907),
             (11, 0.884003), (12, 0.902113), (13, 0.976006), (14, 1.04733),
             (15, 1.11632), (16, 1.18318), (17, 1.2481), (18, 1.31123),
             (19, 1.37271), (20, 1.43267), (21, 1.49119), (22, 1.5484),
             (23, 1.60436), (24, 1.65915)])

    def __hash__(self):
        return hash(self.threshold)

    def __eq__(self, other):
        if isinstance(other, AtomicPackingEfficiency):
            return self.get_params() == other.get_params()

    def featurize(self, comp):
        return list(self.compute_simultaneous_packing_efficiency(comp)) + \
               self.compute_nearest_cluster_distance(comp)

    def feature_labels(self):
        return ['mean simul. packing efficiency',
                'mean abs simul. packing efficiency'] + [
                   f"dist from {k} clusters |APE| < {self.threshold:.3f}"
                   for k in self.n_nearest]

    def citations(self):
        return ["@article{Laws2015,"
                "author = {Laws, K. J. and Miracle, D. B. and Ferry, M.},"
                "doi = {10.1038/ncomms9123},"
                "journal = {Nature Communications},"
                "pages = {8123},"
                "title = {{A predictive structural model for bulk metallic glasses}},"
                "url = {http://www.nature.com/doifinder/10.1038/ncomms9123},"
                "volume = {6},"
                "year = {2015}"]

    def implementors(self):
        return ['Logan Ward']

    def compute_simultaneous_packing_efficiency(self, comp):
        """Compute the packing efficiency of the system when the neighbor
        shell of each atom has the same composition as the alloy. When this
        criterion is satisfied, it is possible for every atom in this system
        to be simultaneously as efficiently-packed as possible.

        Args:
            comp (Composition): Composition to be assessed
        Returns
            (float) Average APE of all atoms
            (float) Average deviation of the APE of each atom from ideal (0)
        """

        # Compute the average atomic radius of the system
        elements, fractions = zip(*comp.element_composition.items())
        radii = self._data_source.get_elemental_properties(elements,
                                                           'MiracleRadius')
        mean_radius = PropertyStats.mean(radii, fractions)

        # Compute the APE for each cluster
        best_ape = [
            self.find_ideal_cluster_size(r / mean_radius)[1] for r in radii
        ]

        # Return the averages
        return PropertyStats.mean(best_ape, fractions), PropertyStats.mean(np.abs(best_ape), fractions)

    def compute_nearest_cluster_distance(self, comp):
        """Compute the distance between a composition and that the nearest
        efficiently-packed clusters.

        Measures the mean :math:`L_2` distance between the alloy composition
        and the :math:`k`-nearest clusters with Atomic Packing Efficiencies
        within the user-specified tolerance of 1. :math:`k` is any of the
        numbers defined in the "n_nearest" parameter of this class.

        If there are less than `k` efficient clusters in the system, we use
        the maximum distance betweeen any two compositions (1) for the
        unmatched neighbors.

        Args:
            comp (Composition): Composition of material to evaluate
        Return:
            [float] Average distances
        """

        # Get the most common elements
        elems, _ = zip(*sorted(comp.element_composition.items(),
                                   key=lambda x: x[1], reverse=True))

        # Get the cluster lookup tool using the most common elements
        cluster_lookup = self.create_cluster_lookup_tool(
            elems[:self.max_types]
        )

        # Compute the composition vector
        comp_vec = self._el_frac.featurize(comp)

        # Compute the distances
        means = []
        for k in self.n_nearest:
            # Get the nearest clusters
            if cluster_lookup is None:
                dists = (np.array([]),)
                to_lookup = 0
            else:
                to_lookup = min(cluster_lookup._fit_X.shape[0], k)
                dists, _ = cluster_lookup.kneighbors([comp_vec], to_lookup)

            # Pad the list with 1's
            dists = dists[0].tolist() + [1]*(k - to_lookup)

            # Compute the average
            means.append(np.mean(dists))

        return means

    def create_cluster_lookup_tool(self, elements):
        """
        Get the compositions of efficiently-packed clusters in a certain system
        of elements

        Args:
            elements ([Element]): Elements in system
        Return:
            (NearNeighbors): Tool to find nearby clusters in this system.
                None if there are no efficiently-packed clusters for this
                combination of elements
        """
        elements = list(set(elements))
        return self._create_cluster_lookup_tool(tuple(sorted(elements)))

    @lru_cache()
    def _create_cluster_lookup_tool(self, elements):
        """
        Cached version of `create_cluster_lookup_tool`. Assumes that the
        elements are passed as sorted tuple with no duplicates

        Args:
            elements ([Element]): Elements in system
        Return:
            (NearNeighbors): Tool to find nearby clusters in this system. If
            there are no clusters, this class returns None
        """

        # Get the radii
        radii = self._data_source.get_elemental_properties(elements,
                                                           "MiracleRadius")

        # Get the maximum and minimum cluster sizes
        max_size = self.find_ideal_cluster_size(max(radii) / min(radii))[0]
        min_size = self.find_ideal_cluster_size(min(radii) / max(radii))[0]

        # Prepare a list to hold all possible clusters
        eff_clusters = []

        # Loop through all possible neighbor shells
        for size in range(min_size, max_size + 1):
            # Get the ideal radius ratio for a cluster of this size
            ideal_ratio = self.get_ideal_radius_ratio(size)

            # Get the mean radii and compositions of all possible
            #  combinations of elements in the neighbor shell
            s_radii = itertools.combinations_with_replacement(radii, size)
            s_elems = itertools.combinations_with_replacement(elements, size)

            #  Put the results in arrays for fast indexing
            mean_radii = np.array(list(s_radii)).mean(axis=1)
            s_elems = np.array(list(s_elems))

            # For each type of central atom, determine which have an APE
            #  within `self.threshold` of 1
            for center_radius, center_elem in zip(radii, elements):
                # Compute the APE of each cluster
                ape = 1 - np.divide(ideal_ratio, np.divide(center_radius,
                                                           mean_radii))

                # Get those which are within the threshold of 0
                #  and add their composition to the list of OK elements
                for hit in s_elems[np.abs(ape) < self.threshold]:
                    eff_clusters.append([center_elem] + hit.tolist())

        # Compute the composition vectors for all of the efficient clusters
        comps = np.zeros((len(eff_clusters), self._n_elems))
        for i, elems in enumerate(eff_clusters):
            for elem in elems:
                comps[i, elem.Z - 1] += 1
        comps = np.divide(comps, comps.sum(axis=1)[:, None])

        # Return tool to quickly determine distance from efficient clusters
        #  NearNeighbors requires at least 1 entry, so we return None if
        #   there are no nearby clusters
        return NearestNeighbors().fit(comps) if len(comps) > 0 else None

    def find_ideal_cluster_size(self, radius_ratio):
        """
        Get the optimal cluster size for a certain radius ratio

        Finds the number of nearest neighbors :math:`n` that minimizes
        :math:`|1 - rp(n)/r|`, where :math:`rp(n)` is the ideal radius
        ratio for a certain :math:`n` and :math:`r` is the actual ratio.

        Args:
            radius_ratio (float): :math:`r / r_{neighbor}`
        Returns:
            (int) number of neighboring atoms for that will be the most
            efficiently packed.
            (float) Optimal APE
        """

        # Loop through cluster sizes from 3 to 24
        best_ape = np.inf
        best_n = None
        for n in range(3, 25):
            # Compute APE, check if it is the best
            ape = 1 - self.get_ideal_radius_ratio(n) / radius_ratio
            if abs(ape) < abs(best_ape):
                best_ape = ape
                best_n = n

            # If the APE is negative, this is either the best APE or
            #  We have already passed it
            if ape < 0:
                return best_n, best_ape

        return best_n, best_ape

    def get_ideal_radius_ratio(self, n_neighbors):
        """Compute the idea ratio between the central atom and neighboring
        atoms for a neighbor with a certain number of nearest neighbors.

        Based on work by `Miracle, Lord, and Ranganathan
        <https://www.jstage.jst.go.jp/article/matertrans/47/7/47_7_1737/_article/-char/en>`_.

        Args:
            n_neighbors (int): Number of atoms in 1st NN shell
        Return:
            (float) ideal radius ratio :math:`r / r_{neighbor}`
        """

        # NN must be in [3, 24]
        n = max(3, min(n_neighbors, 24))

        return self.ideal_ratio[n]