Beispiel #1
0
    def __init__(self, materials, site_descriptors, query=None, **kwargs):
        """
        Calculates site descriptors for materials

        Args:
            materials (Store): Store of materials documents
            site_descriptors (Store): Store of site-descriptors data such as tetrahedral order parameter or percentage of 8-fold coordination
            query (dict): dictionary to limit materials to be analyzed
        """

        self.materials = materials
        self.site_descriptors = site_descriptors
        self.query = query if query else {}

        # Set up all targeted site descriptors.
        self.sds = {}
        for nn in NearNeighbors.__subclasses__():
            nn_ = getattr(pymatgen.analysis.local_env, nn.__name__)
            t = nn.__name__ if nn.__name__ \
                not in cls_to_abbrev.keys() \
                else cls_to_abbrev[nn.__name__]
            k = 'cn_{}'.format(t)
            self.sds[k] = CoordinationNumber(nn_(), use_weights=False)
            k = 'cn_wt_{}'.format(t)
            self.sds[k] = CoordinationNumber(nn_(), use_weights=True)
        self.sds['opsf'] = OPSiteFingerprint()
        #self.sds['csf'] = CrystalSiteFingerprint.from_preset('ops')

        super().__init__(sources=[materials],
                         targets=[site_descriptors],
                         **kwargs)
Beispiel #2
0
    def __init__(self, materials, descriptors, **kwargs):
        """
        Calculates site-based descriptors (e.g., coordination numbers
        with different near-neighbor finding approaches) for materials and
        runs statistics analysis on selected descriptor types
        (order parameter-based site fingerprints).  The latter is
        useful as a definition of a structure fingerprint
        on the basis of local coordination information.
        Furthermore, composition descriptors are calculated
        (Magpie element property vector).

        Args:
            materials (Store): Store of materials documents.
            descriptors (Store): Store of composition, site, and
                                 structure descriptor data such
                                 as tetrahedral order parameter or
                                 fraction of being 8-fold coordinated.
            mat_query (dict): dictionary to limit materials to be analyzed.
        """

        self.materials = materials
        self.descriptors = descriptors

        # Set up all targeted site descriptors.
        self.sds = {}
        for nn in nn_target_classes:
            nn_ = getattr(local_env, nn)
            k = "cn_{}".format(nn)
            self.sds[k] = CoordinationNumber(nn_(), use_weights="none")
            k = "cn_wt_{}".format(nn)
            self.sds[k] = CoordinationNumber(nn_(), use_weights="sum")
        self.all_output_pieces = {"site_descriptors": [k for k in self.sds.keys()]}
        self.sds["csf"] = CrystalNNFingerprint.from_preset("ops",
                                                           distance_cutoffs=None,
                                                           x_diff_weight=None)
        self.all_output_pieces["statistics"] = ["csf"]

        # Set up all targeted composition descriptors.
        self.cds = {}
        self.cds["magpie"] = ElementProperty.from_preset("magpie")
        self.all_output_pieces["composition_descriptors"] = ["magpie"]

        self.all_output_pieces["meta"] = ["atomate"]

        super().__init__(source=materials,
                         target=descriptors,
                         ufn=self.calc,
                         projection=["structure"],
                         **kwargs)
Beispiel #3
0
def get_fps(structure, cutoff=10.0, processes=8):
    all_descrs = []

    try:
        coordination_number_ = CoordinationNumber.from_preset('VoronoiNN')
        voronoi_fps_ = VoronoiFingerprintModified(
            cutoff=cutoff).featurize_structure(structure)
        crystal_nn_fingerprint_ = CrystalNNFingerprint.from_preset('cn')
        op_site_fingerprint_ = OPSiteFingerprint()
        agni_fingerprints_ = AGNIFingerprints()
        gaussian_symm_func_fps_ = GaussianSymmFuncModified(
        ).featurize_structure(structure)
        pymatgen_data_ = PymatgenData()
        magpie_data_ = MagpieData()

        data_list = [[
            structure, i, site, coordination_number_, voronoi_fps_,
            crystal_nn_fingerprint_, op_site_fingerprint_, agni_fingerprints_,
            gaussian_symm_func_fps_, pymatgen_data_, magpie_data_
        ] for i, site in enumerate(structure)]

        pool = multiprocessing.Pool(processes=processes)
        all_descrs = np.array(pool.map(get_all_site_descrs, data_list))

    except (AttributeError, IndexError) as error:
        pass

    return all_descrs
Beispiel #4
0
    def __init__(self, materials, descriptors, mat_query=None, **kwargs):
        """
        Calculates site-based descriptors (e.g., coordination numbers
        with different near-neighbor finding approaches) for materials and
        runs statistics analysis on selected descriptor types
        (order parameter-based site fingerprints).  The latter is
        useful as a definition of a structure fingerprint
        on the basis of local coordination information.
        Furthermore, composition descriptors are calculated
        (Magpie element property vector).

        Args:
            materials (Store): Store of materials documents.
            descriptors (Store): Store of composition, site, and
                                 structure descriptor data such
                                 as tetrahedral order parameter or
                                 fraction of being 8-fold coordinated.
            mat_query (dict): dictionary to limit materials to be analyzed.
        """

        self.materials = materials
        self.descriptors = descriptors
        self.mat_query = mat_query if mat_query else {}

        # Set up all targeted site descriptors.
        self.sds = {}
        for nn in nn_target_classes:
            nn_ = getattr(pymatgen.analysis.local_env, nn)
            k = 'cn_{}'.format(nn)
            self.sds[k] = CoordinationNumber(nn_(), use_weights='none')
            k = 'cn_wt_{}'.format(nn)
            self.sds[k] = CoordinationNumber(nn_(), use_weights='sum')
        self.all_output_pieces = {
            'site_descriptors': [k for k in self.sds.keys()]
        }
        self.sds['csf'] = CrystalNNFingerprint.from_preset(
            'ops', distance_cutoffs=None, x_diff_weight=None)
        self.all_output_pieces['statistics'] = ['csf']

        # Set up all targeted composition descriptors.
        self.cds = {}
        self.cds["magpie"] = ElementProperty.from_preset('magpie')
        self.all_output_pieces['composition_descriptors'] = ['magpie']

        self.all_output_pieces['meta'] = ['atomate']

        super().__init__(sources=[materials], targets=[descriptors], **kwargs)
Beispiel #5
0
    def __init__(self, materials, site_descriptors, mat_query=None, **kwargs):
        """
        Calculates site-based descriptors (e.g., coordination numbers
        with different near-neighbor finding approaches) for materials and
        runs statistics analysis on selected descriptor types
        (order parameter-based site fingerprints).  The latter is
        useful as a definition of a structure fingerprint
        on the basis of local coordination information.

        Args:
            materials (Store): Store of materials documents.
            site_descriptors (Store): Store of site-descriptors data such
                                      as tetrahedral order parameter or
                                      fraction of being 8-fold coordinated.
            mat_query (dict): dictionary to limit materials to be analyzed.
        """

        self.materials = materials
        self.site_descriptors = site_descriptors
        self.mat_query = mat_query if mat_query else {}

        # Set up all targeted site descriptors.
        self.sds = {}
        for nn in NearNeighbors.__subclasses__():
            nn_ = getattr(pymatgen.analysis.local_env, nn.__name__)
            t = nn.__name__
            k = 'cn_{}'.format(t)
            self.sds[k] = CoordinationNumber(nn_(), use_weights='none')
            k = 'cn_wt_{}'.format(t)
            self.sds[k] = CoordinationNumber(nn_(), use_weights='sum')
        self.all_output_pieces = {
            'site_descriptors': [k for k in self.sds.keys()]
        }
        self.sds['opsf'] = OPSiteFingerprint()
        self.sds['csf'] = CrystalSiteFingerprint.from_preset('ops')
        self.all_output_pieces['statistics'] = ['opsf', 'csf']

        super().__init__(sources=[materials],
                         targets=[site_descriptors],
                         **kwargs)
Beispiel #6
0
class DeBreuck2020Featurizer(modnet.featurizers.MODFeaturizer):
    """ Featurizer presets used for the paper 'Machine learning
    materials properties for small datasets' by Pierre-Paul De Breuck,
    Geoffroy Hautier & Gian-Marco Rignanese, arXiv:2004.14766 (2020).

    Uses most of the featurizers implemented by matminer at the time of
    writing with their default hyperparameters and presets.

    """
    from matminer.featurizers.composition import (
        AtomicOrbitals,
        AtomicPackingEfficiency,
        BandCenter,
        # CohesiveEnergy, - This descriptor was not used in the paper preset
        # ElectronAffinity, - This descriptor was not used in the paper preset
        ElectronegativityDiff,
        ElementFraction,
        ElementProperty,
        IonProperty,
        Miedema,
        OxidationStates,
        Stoichiometry,
        TMetalFraction,
        ValenceOrbital,
        YangSolidSolution,
    )
    from matminer.featurizers.structure import (
        # BagofBonds, - This descriptor was not used in the paper preset
        BondFractions,
        ChemicalOrdering,
        CoulombMatrix,
        DensityFeatures,
        EwaldEnergy,
        GlobalSymmetryFeatures,
        MaximumPackingEfficiency,
        # PartialRadialDistributionFunction,
        RadialDistributionFunction,
        SineCoulombMatrix,
        StructuralHeterogeneity,
        XRDPowderPattern,
    )

    from matminer.featurizers.site import (
        AGNIFingerprints,
        AverageBondAngle,
        AverageBondLength,
        BondOrientationalParameter,
        ChemEnvSiteFingerprint,
        CoordinationNumber,
        CrystalNNFingerprint,
        GaussianSymmFunc,
        GeneralizedRadialDistributionFunction,
        LocalPropertyDifference,
        OPSiteFingerprint,
        VoronoiFingerprint,
    )

    composition_featurizers = (
        AtomicOrbitals(),
        AtomicPackingEfficiency(),
        BandCenter(),
        ElementFraction(),
        ElementProperty.from_preset("magpie"),
        IonProperty(),
        Miedema(),
        Stoichiometry(),
        TMetalFraction(),
        ValenceOrbital(),
        YangSolidSolution(),
    )

    oxide_composition_featurizers = (
        ElectronegativityDiff(),
        OxidationStates(),
    )

    structure_featurizers = (
        DensityFeatures(),
        GlobalSymmetryFeatures(),
        RadialDistributionFunction(),
        CoulombMatrix(),
        # PartialRadialDistributionFunction(),
        SineCoulombMatrix(),
        EwaldEnergy(),
        BondFractions(),
        StructuralHeterogeneity(),
        MaximumPackingEfficiency(),
        ChemicalOrdering(),
        XRDPowderPattern(),
        # BagofBonds(),
    )
    site_featurizers = (
        AGNIFingerprints(),
        AverageBondAngle(VoronoiNN()),
        AverageBondLength(VoronoiNN()),
        BondOrientationalParameter(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        CrystalNNFingerprint.from_preset("ops"),
        GaussianSymmFunc(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        LocalPropertyDifference(),
        OPSiteFingerprint(),
        VoronoiFingerprint(),
    )

    def featurize_composition(self, df):
        """ Applies the preset composition featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """
        df = super().featurize_composition(df)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
        df['AtomicOrbitals|HOMO_character'] = df[
            'AtomicOrbitals|HOMO_character'].map(_orbitals)
        df['AtomicOrbitals|LUMO_character'] = df[
            'AtomicOrbitals|LUMO_character'].map(_orbitals)

        df['AtomicOrbitals|HOMO_element'] = df[
            'AtomicOrbitals|HOMO_element'].apply(
                lambda x: -1 if not isinstance(x, str) else Element(x).Z)
        df['AtomicOrbitals|LUMO_element'] = df[
            'AtomicOrbitals|LUMO_element'].apply(
                lambda x: -1 if not isinstance(x, str) else Element(x).Z)

        df = df.replace([np.inf, -np.inf, np.nan], 0)

        return modnet.featurizers.clean_df(df)

    def featurize_structure(self, df):
        """ Applies the preset structural featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """
        df = super().featurize_structure(df)

        dist = df[
            "RadialDistributionFunction|radial distribution function"].iloc[0][
                'distances'][:50]
        for i, d in enumerate(dist):
            _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format(
                d)
            df[_rdf_key] = df[
                "RadialDistributionFunction|radial distribution function"].apply(
                    lambda x: x['distribution'][i])

        df = df.drop("RadialDistributionFunction|radial distribution function",
                     axis=1)

        _crystal_system = {
            "cubic": 1,
            "tetragonal": 2,
            "orthorombic": 3,
            "hexagonal": 4,
            "trigonal": 5,
            "monoclinic": 6,
            "triclinic": 7
        }

        def _int_map(x):
            if x == np.nan:
                return 0
            elif x:
                return 1
            else:
                return 0

        df["GlobalSymmetryFeatures|crystal_system"] = df[
            "GlobalSymmetryFeatures|crystal_system"].map(_crystal_system)
        df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[
            "GlobalSymmetryFeatures|is_centrosymmetric"].map(_int_map)

        return modnet.featurizers.clean_df(df)

    def featurize_site(self, df):
        """ Applies the preset site featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.

        """

        # rename some features for backwards compatibility with pretrained models
        aliases = {
            "GeneralizedRadialDistributionFunction": "GeneralizedRDF",
            "AGNIFingerprints": "AGNIFingerPrint",
            "BondOrientationalParameter": "BondOrientationParameter",
            "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc",
        }
        df = super().featurize_site(df, aliases=aliases)
        df = df.loc[:, (df != 0).any(axis=0)]

        return modnet.featurizers.clean_df(df)
Beispiel #7
0
 def test_cns(self):
     cnv = CoordinationNumber.from_preset('VoronoiNN')
     self.assertEqual(len(cnv.feature_labels()), 1)
     self.assertEqual(cnv.feature_labels()[0], 'CN_VoronoiNN')
     self.assertAlmostEqual(cnv.featurize(self.sc, 0)[0], 6)
     self.assertAlmostEqual(cnv.featurize(self.cscl, 0)[0], 14)
     self.assertAlmostEqual(cnv.featurize(self.cscl, 1)[0], 14)
     self.assertEqual(len(cnv.citations()), 2)
     cnv = CoordinationNumber(VoronoiNN(), use_weights='sum')
     self.assertEqual(cnv.feature_labels()[0], 'CN_VoronoiNN')
     self.assertAlmostEqual(cnv.featurize(self.cscl, 0)[0], 9.2584516)
     self.assertAlmostEqual(cnv.featurize(self.cscl, 1)[0], 9.2584516)
     self.assertEqual(len(cnv.citations()), 2)
     cnv = CoordinationNumber(VoronoiNN(), use_weights='effective')
     self.assertEqual(cnv.feature_labels()[0], 'CN_VoronoiNN')
     self.assertAlmostEqual(cnv.featurize(self.cscl, 0)[0], 11.648923254)
     self.assertAlmostEqual(cnv.featurize(self.cscl, 1)[0], 11.648923254)
     self.assertEqual(len(cnv.citations()), 2)
     cnj = CoordinationNumber.from_preset('JmolNN')
     self.assertEqual(cnj.feature_labels()[0], 'CN_JmolNN')
     self.assertAlmostEqual(cnj.featurize(self.sc, 0)[0], 0)
     self.assertAlmostEqual(cnj.featurize(self.cscl, 0)[0], 0)
     self.assertAlmostEqual(cnj.featurize(self.cscl, 1)[0], 0)
     self.assertEqual(len(cnj.citations()), 1)
     jmnn = JmolNN(el_radius_updates={"Al": 1.55, "Cl": 1.7, "Cs": 1.7})
     cnj = CoordinationNumber(jmnn)
     self.assertEqual(cnj.feature_labels()[0], 'CN_JmolNN')
     self.assertAlmostEqual(cnj.featurize(self.sc, 0)[0], 6)
     self.assertAlmostEqual(cnj.featurize(self.cscl, 0)[0], 8)
     self.assertAlmostEqual(cnj.featurize(self.cscl, 1)[0], 8)
     self.assertEqual(len(cnj.citations()), 1)
     cnmd = CoordinationNumber.from_preset('MinimumDistanceNN')
     self.assertEqual(cnmd.feature_labels()[0], 'CN_MinimumDistanceNN')
     self.assertAlmostEqual(cnmd.featurize(self.sc, 0)[0], 6)
     self.assertAlmostEqual(cnmd.featurize(self.cscl, 0)[0], 8)
     self.assertAlmostEqual(cnmd.featurize(self.cscl, 1)[0], 8)
     self.assertEqual(len(cnmd.citations()), 1)
     cnmok = CoordinationNumber.from_preset('MinimumOKeeffeNN')
     self.assertEqual(cnmok.feature_labels()[0], 'CN_MinimumOKeeffeNN')
     self.assertAlmostEqual(cnmok.featurize(self.sc, 0)[0], 6)
     self.assertAlmostEqual(cnmok.featurize(self.cscl, 0)[0], 8)
     self.assertAlmostEqual(cnmok.featurize(self.cscl, 1)[0], 6)
     self.assertEqual(len(cnmok.citations()), 2)
     cnmvire = CoordinationNumber.from_preset('MinimumVIRENN')
     self.assertEqual(cnmvire.feature_labels()[0], 'CN_MinimumVIRENN')
     self.assertAlmostEqual(cnmvire.featurize(self.sc, 0)[0], 6)
     self.assertAlmostEqual(cnmvire.featurize(self.cscl, 0)[0], 8)
     self.assertAlmostEqual(cnmvire.featurize(self.cscl, 1)[0], 14)
     self.assertEqual(len(cnmvire.citations()), 2)
     self.assertEqual(len(cnmvire.implementors()), 2)
     self.assertEqual(cnmvire.implementors()[0], 'Nils E. R. Zimmermann')
Beispiel #8
0
class FUTURE_PROSPECTS_2021(featurizer.extendedMODFeaturizer):

    from matminer.featurizers.composition import (
        AtomicOrbitals,
        AtomicPackingEfficiency,
        BandCenter,
        CohesiveEnergy,
        ElectronAffinity,
        ElectronegativityDiff,
        ElementFraction,
        ElementProperty,
        IonProperty,
        Miedema,
        OxidationStates,
        Stoichiometry,
        TMetalFraction,
        ValenceOrbital,
        YangSolidSolution,
    )
    from matminer.featurizers.structure import (
        BagofBonds,
        BondFractions,
        ChemicalOrdering,
        CoulombMatrix,
        DensityFeatures,
        EwaldEnergy,
        GlobalSymmetryFeatures,
        MaximumPackingEfficiency,
        PartialRadialDistributionFunction,
        RadialDistributionFunction,
        SineCoulombMatrix,
        StructuralHeterogeneity,
        XRDPowderPattern,
    )

    from matminer.featurizers.site import (
        AGNIFingerprints,
        AverageBondAngle,
        AverageBondLength,
        BondOrientationalParameter,
        ChemEnvSiteFingerprint,
        CoordinationNumber,
        CrystalNNFingerprint,
        GaussianSymmFunc,
        GeneralizedRadialDistributionFunction,
        LocalPropertyDifference,
        OPSiteFingerprint,
        VoronoiFingerprint,
    )
    from matminer.featurizers.dos import (
        DOSFeaturizer,
        SiteDOS,
        Hybridization,
        DosAsymmetry,
    )
    from matminer.featurizers.bandstructure import (
        BandFeaturizer,
        BranchPointEnergy
    )

    composition_featurizers = (
        AtomicOrbitals(),
        AtomicPackingEfficiency(),
        BandCenter(),
        ElementFraction(),
        ElementProperty.from_preset("magpie"),
        IonProperty(),
        Miedema(),
        Stoichiometry(),
        TMetalFraction(),
        ValenceOrbital(),
        YangSolidSolution(),
    )

    oxid_composition_featurizers = (
        ElectronegativityDiff(),
        OxidationStates(),
    )

    structure_featurizers = (
        DensityFeatures(),
        GlobalSymmetryFeatures(),
        RadialDistributionFunction(),
        CoulombMatrix(),
        #PartialRadialDistributionFunction(), #Introduces a large amount of features
        SineCoulombMatrix(),
        EwaldEnergy(),
        BondFractions(),
        StructuralHeterogeneity(),
        MaximumPackingEfficiency(),
        ChemicalOrdering(),
        XRDPowderPattern(),
    )
    site_featurizers = (
        AGNIFingerprints(),
        AverageBondAngle(VoronoiNN()),
        AverageBondLength(VoronoiNN()),
        BondOrientationalParameter(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        CrystalNNFingerprint.from_preset("ops"),
        GaussianSymmFunc(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        LocalPropertyDifference(),
        OPSiteFingerprint(),
        VoronoiFingerprint(),
    )

    dos_featurizers = (
        DOSFeaturizer(),
        SiteDOS(),
        Hybridization()
    )

    band_featurizers = (
        BandFeaturizer(),
        BranchPointEnergy()
    )
    def __init__(self, n_jobs=None):
            self._n_jobs = n_jobs

    def featurize_composition(self, df):
        """Applies the preset composition featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_composition(df)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}
        df["AtomicOrbitals|HOMO_character"] = df["AtomicOrbitals|HOMO_character"].map(
            _orbitals
        )
        df["AtomicOrbitals|LUMO_character"] = df["AtomicOrbitals|LUMO_character"].map(
            _orbitals
        )

        df["AtomicOrbitals|HOMO_element"] = df["AtomicOrbitals|HOMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )
        df["AtomicOrbitals|LUMO_element"] = df["AtomicOrbitals|LUMO_element"].apply(
            lambda x: -1 if not isinstance(x, str) else Element(x).Z
        )

        return clean_df(df)

    def featurize_structure(self, df):
        """Applies the preset structural featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """
        df = super().featurize_structure(df)

        dist = df["RadialDistributionFunction|radial distribution function"].iloc[0][
            "distances"
        ][:50]
        for i, d in enumerate(dist):
            _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format(
                d
            )
            df[_rdf_key] = df[
                "RadialDistributionFunction|radial distribution function"
            ].apply(lambda x: x["distribution"][i])

        df = df.drop("RadialDistributionFunction|radial distribution function", axis=1)

        _crystal_system = {
            "cubic": 1,
            "tetragonal": 2,
            "orthorombic": 3,
            "hexagonal": 4,
            "trigonal": 5,
            "monoclinic": 6,
            "triclinic": 7,
        }

        def _int_map(x):
            if x == np.nan:
                return 0
            elif x:
                return 1
            else:
                return 0

        df["GlobalSymmetryFeatures|crystal_system"] = df[
            "GlobalSymmetryFeatures|crystal_system"
        ].map(_crystal_system)
        df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[
            "GlobalSymmetryFeatures|is_centrosymmetric"
        ].map(_int_map)

        return clean_df(df)

    def featurize_dos(self, df):
        """Applies the presetdos featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_dos(df)


        hotencodeColumns = ["DOSFeaturizer|vbm_specie_1","DOSFeaturizer|cbm_specie_1"]

        one_hot = pd.get_dummies(df[hotencodeColumns])
        df = df.drop(hotencodeColumns, axis = 1).join(one_hot)

        _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}

        df["DOSFeaturizer|vbm_character_1"] = df[
           "DOSFeaturizer|vbm_character_1"
           ].map(_orbitals)
        df["DOSFeaturizer|cbm_character_1"] = df[
           "DOSFeaturizer|cbm_character_1"
           ].map(_orbitals)

        # Splitting one feature into several floating features
        # e.g. number;number;number into three columns
        splitColumns = ["DOSFeaturizer|cbm_location_1", "DOSFeaturizer|vbm_location_1"]

        for column in splitColumns:
            try:
                newColumns = df[column].str.split(";", n = 2, expand = True)
                for i in range(0,3):
                    df[column + "_" + str(i)] = np.array(newColumns[i]).astype(np.float)
            except:
                continue
        df = df.drop(splitColumns, axis=1)
        df = df.drop(["dos"], axis=1)
        return clean_df(df)

    def featurize_bandstructure(self, df):
        """Applies the preset band structure featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        df = super().featurize_bandstructure(df)

        def _int_map(x):
            if str(x) == "False":
                return 0
            elif str(x) == "True":
                return 1

        df["BandFeaturizer|is_gap_direct"] = df[
            "BandFeaturizer|is_gap_direct"
        ].map(_int_map)


        df = df.drop(["bandstructure"], axis=1)

        return clean_df(df)


    def featurize_site(self, df):
        """Applies the preset site featurizers to the input dataframe,
        renames some fields and cleans the output dataframe.
        """

        aliases = {
            "GeneralizedRadialDistributionFunction": "GeneralizedRDF",
            "AGNIFingerprints": "AGNIFingerPrint",
            "BondOrientationalParameter": "BondOrientationParameter",
            "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc",
        }
        df = super().featurize_site(df, aliases=aliases)
        df = df.loc[:, (df != 0).any(axis=0)]

        return clean_df(df)
Beispiel #9
0
 def test_cns(self):
     cnv = CoordinationNumber.from_preset('VoronoiNN')
     self.assertEqual(len(cnv.feature_labels()), 1)
     self.assertEqual(cnv.feature_labels()[0], 'CN_VoronoiNN')
     self.assertAlmostEqual(cnv.featurize(self.sc, 0)[0], 6)
     self.assertAlmostEqual(cnv.featurize(self.cscl, 0)[0], 14)
     self.assertAlmostEqual(cnv.featurize(self.cscl, 1)[0], 14)
     self.assertEqual(len(cnv.citations()), 2)
     cnv = CoordinationNumber(VoronoiNN(), use_weights='sum')
     self.assertEqual(cnv.feature_labels()[0], 'CN_VoronoiNN')
     self.assertAlmostEqual(cnv.featurize(self.cscl, 0)[0], 9.2584516)
     self.assertAlmostEqual(cnv.featurize(self.cscl, 1)[0], 9.2584516)
     self.assertEqual(len(cnv.citations()), 2)
     cnv = CoordinationNumber(VoronoiNN(), use_weights='effective')
     self.assertEqual(cnv.feature_labels()[0], 'CN_VoronoiNN')
     self.assertAlmostEqual(cnv.featurize(self.cscl, 0)[0], 11.648923254)
     self.assertAlmostEqual(cnv.featurize(self.cscl, 1)[0], 11.648923254)
     self.assertEqual(len(cnv.citations()), 2)
     cnj = CoordinationNumber.from_preset('JmolNN')
     self.assertEqual(cnj.feature_labels()[0], 'CN_JmolNN')
     self.assertAlmostEqual(cnj.featurize(self.sc, 0)[0], 0)
     self.assertAlmostEqual(cnj.featurize(self.cscl, 0)[0], 0)
     self.assertAlmostEqual(cnj.featurize(self.cscl, 1)[0], 0)
     self.assertEqual(len(cnj.citations()), 1)
     jmnn = JmolNN(el_radius_updates={"Al": 1.55, "Cl": 1.7, "Cs": 1.7})
     cnj = CoordinationNumber(jmnn)
     self.assertEqual(cnj.feature_labels()[0], 'CN_JmolNN')
     self.assertAlmostEqual(cnj.featurize(self.sc, 0)[0], 6)
     self.assertAlmostEqual(cnj.featurize(self.cscl, 0)[0], 8)
     self.assertAlmostEqual(cnj.featurize(self.cscl, 1)[0], 8)
     self.assertEqual(len(cnj.citations()), 1)
     cnmd = CoordinationNumber.from_preset('MinimumDistanceNN')
     self.assertEqual(cnmd.feature_labels()[0], 'CN_MinimumDistanceNN')
     self.assertAlmostEqual(cnmd.featurize(self.sc, 0)[0], 6)
     self.assertAlmostEqual(cnmd.featurize(self.cscl, 0)[0], 8)
     self.assertAlmostEqual(cnmd.featurize(self.cscl, 1)[0], 8)
     self.assertEqual(len(cnmd.citations()), 1)
     cnmok = CoordinationNumber.from_preset('MinimumOKeeffeNN')
     self.assertEqual(cnmok.feature_labels()[0], 'CN_MinimumOKeeffeNN')
     self.assertAlmostEqual(cnmok.featurize(self.sc, 0)[0], 6)
     self.assertAlmostEqual(cnmok.featurize(self.cscl, 0)[0], 8)
     self.assertAlmostEqual(cnmok.featurize(self.cscl, 1)[0], 6)
     self.assertEqual(len(cnmok.citations()), 2)
     cnmvire = CoordinationNumber.from_preset('MinimumVIRENN')
     self.assertEqual(cnmvire.feature_labels()[0], 'CN_MinimumVIRENN')
     self.assertAlmostEqual(cnmvire.featurize(self.sc, 0)[0], 6)
     self.assertAlmostEqual(cnmvire.featurize(self.cscl, 0)[0], 8)
     self.assertAlmostEqual(cnmvire.featurize(self.cscl, 1)[0], 14)
     self.assertEqual(len(cnmvire.citations()), 2)
     self.assertEqual(len(cnmvire.implementors()), 2)
     self.assertEqual(cnmvire.implementors()[0], 'Nils E. R. Zimmermann')
Beispiel #10
0
    def from_preset(preset, **kwargs):
        """
        Create a SiteStatsFingerprint class according to a preset

        Args:
            preset (str) - Name of preset
            kwargs - Options for SiteStatsFingerprint
        """

        if preset == "CrystalNNFingerprint_cn":
            return SiteStatsFingerprint(
                CrystalNNFingerprint.from_preset("cn", cation_anion=False),
                **kwargs)

        elif preset == "CrystalNNFingerprint_cn_cation_anion":
            return SiteStatsFingerprint(
                CrystalNNFingerprint.from_preset("cn", cation_anion=True),
                **kwargs)

        elif preset == "CrystalNNFingerprint_ops":
            return SiteStatsFingerprint(
                CrystalNNFingerprint.from_preset("ops", cation_anion=False),
                **kwargs)

        elif preset == "CrystalNNFingerprint_ops_cation_anion":
            return SiteStatsFingerprint(
                CrystalNNFingerprint.from_preset("ops", cation_anion=True),
                **kwargs)

        elif preset == "OPSiteFingerprint":
            return SiteStatsFingerprint(OPSiteFingerprint(), **kwargs)

        elif preset == "LocalPropertyDifference_ward-prb-2017":
            return SiteStatsFingerprint(
                LocalPropertyDifference.from_preset("ward-prb-2017"),
                stats=["minimum", "maximum", "range", "mean", "avg_dev"])

        elif preset == "CoordinationNumber_ward-prb-2017":
            return SiteStatsFingerprint(
                CoordinationNumber(nn=VoronoiNN(weight='area'),
                                   use_weights="effective"),
                stats=["minimum", "maximum", "range", "mean", "avg_dev"])

        elif preset == "Composition-dejong2016_AD":
            return SiteStatsFingerprint(
                LocalPropertyDifference(properties=[
                    "Number", "AtomicWeight", "Column", "Row",
                    "CovalentRadius", "Electronegativity"
                ],
                                        signed=False),
                stats=['holder_mean::%d' % d
                       for d in range(0, 4 + 1)] + ['std_dev'],
            )

        elif preset == "Composition-dejong2016_SD":
            return SiteStatsFingerprint(
                LocalPropertyDifference(properties=[
                    "Number", "AtomicWeight", "Column", "Row",
                    "CovalentRadius", "Electronegativity"
                ],
                                        signed=True),
                stats=['holder_mean::%d' % d for d in [1, 2, 4]] + ['std_dev'],
            )

        elif preset == "BondLength-dejong2016":
            return SiteStatsFingerprint(
                AverageBondLength(VoronoiNN()),
                stats=['holder_mean::%d' % d for d in range(-4, 4 + 1)] +
                ['std_dev', 'geom_std_dev'])

        elif preset == "BondAngle-dejong2016":
            return SiteStatsFingerprint(
                AverageBondAngle(VoronoiNN()),
                stats=['holder_mean::%d' % d for d in range(-4, 4 + 1)] +
                ['std_dev', 'geom_std_dev'])

        else:
            # TODO: Why assume coordination number? Should this just raise an error? - lw
            # One of the various Coordination Number presets:
            # MinimumVIRENN, MinimumDistanceNN, JmolNN, VoronoiNN, etc.
            try:
                return SiteStatsFingerprint(
                    CoordinationNumber.from_preset(preset), **kwargs)
            except:
                pass

        raise ValueError("Unrecognized preset!")
def AddFeatures(df):  # Add features by Matminer
    from matminer.featurizers.conversions import StrToComposition
    df = StrToComposition().featurize_dataframe(df, "formula")

    from matminer.featurizers.composition import ElementProperty

    ep_feat = ElementProperty.from_preset(preset_name="magpie")
    df = ep_feat.featurize_dataframe(
        df, col_id="composition"
    )  # input the "composition" column to the featurizer

    from matminer.featurizers.conversions import CompositionToOxidComposition
    from matminer.featurizers.composition import OxidationStates

    df = CompositionToOxidComposition().featurize_dataframe(df, "composition")

    os_feat = OxidationStates()
    df = os_feat.featurize_dataframe(df, "composition_oxid")

    from matminer.featurizers.composition import ElectronAffinity

    ea_feat = ElectronAffinity()
    df = ea_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import BandCenter

    bc_feat = BandCenter()
    df = bc_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import CohesiveEnergy

    ce_feat = CohesiveEnergy()
    df = ce_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import Miedema

    m_feat = Miedema()
    df = m_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True)

    from matminer.featurizers.composition import TMetalFraction

    tmf_feat = TMetalFraction()
    df = tmf_feat.featurize_dataframe(df,
                                      "composition_oxid",
                                      ignore_errors=True)

    from matminer.featurizers.composition import ValenceOrbital

    vo_feat = ValenceOrbital()
    df = vo_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import YangSolidSolution

    yss_feat = YangSolidSolution()
    df = yss_feat.featurize_dataframe(df,
                                      "composition_oxid",
                                      ignore_errors=True)

    from matminer.featurizers.structure import GlobalSymmetryFeatures

    # This is the border between compositional features and structural features. Comment out the following featurizers to use only compostional features.

    gsf_feat = GlobalSymmetryFeatures()
    df = gsf_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import StructuralComplexity
    sc_feat = StructuralComplexity()
    df = sc_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import ChemicalOrdering
    co_feat = ChemicalOrdering()
    df = co_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import MaximumPackingEfficiency
    mpe_feat = MaximumPackingEfficiency()
    df = mpe_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import MinimumRelativeDistances
    mrd_feat = MinimumRelativeDistances()
    df = mrd_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import StructuralHeterogeneity
    sh_feat = StructuralHeterogeneity()
    df = sh_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import SiteStatsFingerprint

    from matminer.featurizers.site import AverageBondLength
    from pymatgen.analysis.local_env import CrystalNN
    bl_feat = SiteStatsFingerprint(
        AverageBondLength(CrystalNN(search_cutoff=20)))
    df = bl_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import AverageBondAngle
    ba_feat = SiteStatsFingerprint(
        AverageBondAngle(CrystalNN(search_cutoff=20)))
    df = ba_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import BondOrientationalParameter
    bop_feat = SiteStatsFingerprint(BondOrientationalParameter())
    df = bop_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import CoordinationNumber
    cn_feat = SiteStatsFingerprint(CoordinationNumber())
    df = cn_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import DensityFeatures
    df_feat = DensityFeatures()
    df = df_feat.featurize_dataframe(df, "structure", ignore_errors=True)
    return (df)
Beispiel #12
0
def structure_to_convmol(structure,
                         properties=ELEMENTAL_PROPERTIES,
                         max_atoms=200,
                         max_features=41,
                         tolerance_distance=0.25):
    atomic_radii = {
        'At': 1.50,
        'Bk': 1.70,
        'Cm': 1.74,
        'Fr': 2.60,
        'He': 0.28,
        'Kr': 1.16,
        'Lr': 1.71,
        'Md': 1.94,
        'Ne': 0.58,
        'No': 1.97,
        'Rn': 1.50,
        'Xe': 1.40,
    }

    distance_matrix = structure.distance_matrix

    for index, x in np.ndenumerate(distance_matrix):
        radius_1 = Element(structure._sites[
            index[0]].specie).atomic_radius or atomic_radii[str(
                structure._sites[index[0]].specie)]
        radius_2 = Element(structure._sites[
            index[1]].specie).atomic_radius or atomic_radii[str(
                structure._sites[index[1]].specie)]
        max_distance = radius_1 + radius_2 + tolerance_distance
        if x > max_distance:
            distance_matrix[index] = 0
        else:
            distance_matrix[index] = 1
    np.fill_diagonal(distance_matrix, 1)
    atom_features = []

    for i, site in enumerate(structure._sites):
        atom_feature_vector = []
        for atom_property in properties:
            min_value = np.nanmin(
                np.array(list(atom_property.values()), dtype=float))
            max_value = np.nanmax(
                np.array(list(atom_property.values()), dtype=float))
            if atom_property[str(Element(site.specie))] is not None:
                atom_feature_vector.append(
                    (atom_property[str(Element(site.specie))] - min_value) /
                    (max_value - min_value))
            else:
                atom_feature_vector.append(None)

        voronoi_min = np.array([
            0.0, 10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0,
            1.0
        ])
        voronoi_max = np.array([
            120.0, 135.0, 11.0, 3.0, 11.0, 12.0, 18.0, 7.0, 17.0, 17.0, 6.0,
            2.0, 6.0, 7.0
        ])
        voronoi_fps = VoronoiFingerprint().featurize(structure, i)
        i_fold_symmetry_indices = voronoi_fps[8:16]
        voronoi_stats = (np.array(voronoi_fps[16:]) -
                         voronoi_min) / (voronoi_max - voronoi_min)
        atom_feature_vector.extend(i_fold_symmetry_indices +
                                   voronoi_stats.tolist())

        coord_min = np.array([1])
        coord_max = np.array([36])
        coord_fps = (
            (CoordinationNumber.from_preset("MinimumDistanceNN").featurize(
                structure, i) - coord_min) / (coord_max - coord_min)).tolist()
        atom_feature_vector.extend(coord_fps)

        atom_features.append(atom_feature_vector)

    atom_features = np.array(atom_features, dtype=np.float)

    if np.isnan(atom_features).any():
        raise ValueError('feature vector contains nan value')

    return (zfill(distance_matrix, max_atoms,
                  max_atoms), zfill(atom_features, max_atoms,
                                    max_features), len(structure.sites))
Beispiel #13
0
                                            StructureComposition,
                                            MaximumPackingEfficiency)
from matminer.featurizers.composition import ElementProperty, Stoichiometry, ValenceOrbital, IonProperty
from matminer.featurizers.site import CoordinationNumber, LocalPropertyDifference
from matminer.utils.data import MagpieData

element_properties = ('Electronegativity', 'Row', 'Column', 'Number',
                      'MendeleevNumber', 'AtomicWeight', 'CovalentRadius',
                      'MeltingT', 'NsValence', 'NpValence', 'NdValence',
                      'NfValence', 'NValence', 'NsUnfilled', 'NpUnfilled',
                      'NdUnfilled', 'NfUnfilled', 'NUnfilled', 'GSvolume_pa',
                      'SpaceGroupNumber', 'GSbandgap', 'GSmagmom')

#The following features will be created by using matminer package.
featurizer = MultipleFeaturizer([
    SiteStatsFingerprint(CoordinationNumber().from_preset('VoronoiNN'),
                         stats=('mean', 'std_dev', 'minimum', 'maximum')),
    StructuralHeterogeneity(),
    ChemicalOrdering(),
    MaximumPackingEfficiency(),
    SiteStatsFingerprint(
        LocalPropertyDifference(properties=element_properties),
        stats=('mean', 'std_dev', 'minimum', 'maximum', 'range')),
    StructureComposition(Stoichiometry()),
    StructureComposition(ElementProperty.from_preset("magpie")),
    StructureComposition(ValenceOrbital(props=['frac'])),
    StructureComposition(IonProperty(fast=True))
])

#Generate VT based features from the material's crystal lat_params.
feature_data = featurizer.featurize_dataframe(df,
Beispiel #14
0
def featurize_site(df: pd.DataFrame, site_stats=("mean", "std_dev")) -> pd.DataFrame:
    """ Decorate input `pandas.DataFrame` of structures with site
    features from matminer.

    Currently creates the set of all matminer structure features with
    the `matminer.featurizers.structure.SiteStatsFingerprint`.

    Args:
        df (pandas.DataFrame): the input dataframe with `"structure"`
            column containing `pymatgen.Structure` objects.
        site_stats (Tuple[str]): the matminer site stats to use in the
            `SiteStatsFingerprint` for all features.

    Returns:
        pandas.DataFrame: the decorated DataFrame.

    """

    logging.info("Applying site featurizers...")

    df = df.copy()
    df.columns = ["Input data|" + x for x in df.columns]

    site_fingerprints = (
        AGNIFingerprints(),
        GeneralizedRadialDistributionFunction.from_preset("gaussian"),
        OPSiteFingerprint(),
        CrystalNNFingerprint.from_preset("ops"),
        VoronoiFingerprint(),
        GaussianSymmFunc(),
        ChemEnvSiteFingerprint.from_preset("simple"),
        CoordinationNumber(),
        LocalPropertyDifference(),
        BondOrientationalParameter(),
        AverageBondLength(VoronoiNN()),
        AverageBondAngle(VoronoiNN())
    )

    for fingerprint in site_fingerprints:
        site_stats_fingerprint = SiteStatsFingerprint(
            fingerprint,
            stats=site_stats
        )

        df = site_stats_fingerprint.featurize_dataframe(
            df,
            "Input data|structure",
            multiindex=False,
            ignore_errors=True
        )

        fingerprint_name = fingerprint.__class__.__name__

        # rename some features for backwards compatibility with pretrained models
        if fingerprint_name == "GeneralizedRadialDistributionFunction":
            fingerprint_name = "GeneralizedRDF"
        elif fingerprint_name == "AGNIFingerprints":
            fingerprint_name = "AGNIFingerPrint"
        elif fingerprint_name == "BondOrientationalParameter":
            fingerprint_name = "BondOrientationParameter"
        elif fingerprint_name == "GaussianSymmFunc":
            fingerprint_name = "ChemEnvSiteFingerprint|GaussianSymmFunc"

        if "|" not in fingerprint_name:
            fingerprint_name += "|"

        df.columns = [f"{fingerprint_name}{x}" if "|" not in x else x for x in df.columns]

    df = df.loc[:, (df != 0).any(axis=0)]

    return clean_df(df)