def __init__(self, materials, site_descriptors, query=None, **kwargs): """ Calculates site descriptors for materials Args: materials (Store): Store of materials documents site_descriptors (Store): Store of site-descriptors data such as tetrahedral order parameter or percentage of 8-fold coordination query (dict): dictionary to limit materials to be analyzed """ self.materials = materials self.site_descriptors = site_descriptors self.query = query if query else {} # Set up all targeted site descriptors. self.sds = {} for nn in NearNeighbors.__subclasses__(): nn_ = getattr(pymatgen.analysis.local_env, nn.__name__) t = nn.__name__ if nn.__name__ \ not in cls_to_abbrev.keys() \ else cls_to_abbrev[nn.__name__] k = 'cn_{}'.format(t) self.sds[k] = CoordinationNumber(nn_(), use_weights=False) k = 'cn_wt_{}'.format(t) self.sds[k] = CoordinationNumber(nn_(), use_weights=True) self.sds['opsf'] = OPSiteFingerprint() #self.sds['csf'] = CrystalSiteFingerprint.from_preset('ops') super().__init__(sources=[materials], targets=[site_descriptors], **kwargs)
def __init__(self, materials, descriptors, **kwargs): """ Calculates site-based descriptors (e.g., coordination numbers with different near-neighbor finding approaches) for materials and runs statistics analysis on selected descriptor types (order parameter-based site fingerprints). The latter is useful as a definition of a structure fingerprint on the basis of local coordination information. Furthermore, composition descriptors are calculated (Magpie element property vector). Args: materials (Store): Store of materials documents. descriptors (Store): Store of composition, site, and structure descriptor data such as tetrahedral order parameter or fraction of being 8-fold coordinated. mat_query (dict): dictionary to limit materials to be analyzed. """ self.materials = materials self.descriptors = descriptors # Set up all targeted site descriptors. self.sds = {} for nn in nn_target_classes: nn_ = getattr(local_env, nn) k = "cn_{}".format(nn) self.sds[k] = CoordinationNumber(nn_(), use_weights="none") k = "cn_wt_{}".format(nn) self.sds[k] = CoordinationNumber(nn_(), use_weights="sum") self.all_output_pieces = {"site_descriptors": [k for k in self.sds.keys()]} self.sds["csf"] = CrystalNNFingerprint.from_preset("ops", distance_cutoffs=None, x_diff_weight=None) self.all_output_pieces["statistics"] = ["csf"] # Set up all targeted composition descriptors. self.cds = {} self.cds["magpie"] = ElementProperty.from_preset("magpie") self.all_output_pieces["composition_descriptors"] = ["magpie"] self.all_output_pieces["meta"] = ["atomate"] super().__init__(source=materials, target=descriptors, ufn=self.calc, projection=["structure"], **kwargs)
def get_fps(structure, cutoff=10.0, processes=8): all_descrs = [] try: coordination_number_ = CoordinationNumber.from_preset('VoronoiNN') voronoi_fps_ = VoronoiFingerprintModified( cutoff=cutoff).featurize_structure(structure) crystal_nn_fingerprint_ = CrystalNNFingerprint.from_preset('cn') op_site_fingerprint_ = OPSiteFingerprint() agni_fingerprints_ = AGNIFingerprints() gaussian_symm_func_fps_ = GaussianSymmFuncModified( ).featurize_structure(structure) pymatgen_data_ = PymatgenData() magpie_data_ = MagpieData() data_list = [[ structure, i, site, coordination_number_, voronoi_fps_, crystal_nn_fingerprint_, op_site_fingerprint_, agni_fingerprints_, gaussian_symm_func_fps_, pymatgen_data_, magpie_data_ ] for i, site in enumerate(structure)] pool = multiprocessing.Pool(processes=processes) all_descrs = np.array(pool.map(get_all_site_descrs, data_list)) except (AttributeError, IndexError) as error: pass return all_descrs
def __init__(self, materials, descriptors, mat_query=None, **kwargs): """ Calculates site-based descriptors (e.g., coordination numbers with different near-neighbor finding approaches) for materials and runs statistics analysis on selected descriptor types (order parameter-based site fingerprints). The latter is useful as a definition of a structure fingerprint on the basis of local coordination information. Furthermore, composition descriptors are calculated (Magpie element property vector). Args: materials (Store): Store of materials documents. descriptors (Store): Store of composition, site, and structure descriptor data such as tetrahedral order parameter or fraction of being 8-fold coordinated. mat_query (dict): dictionary to limit materials to be analyzed. """ self.materials = materials self.descriptors = descriptors self.mat_query = mat_query if mat_query else {} # Set up all targeted site descriptors. self.sds = {} for nn in nn_target_classes: nn_ = getattr(pymatgen.analysis.local_env, nn) k = 'cn_{}'.format(nn) self.sds[k] = CoordinationNumber(nn_(), use_weights='none') k = 'cn_wt_{}'.format(nn) self.sds[k] = CoordinationNumber(nn_(), use_weights='sum') self.all_output_pieces = { 'site_descriptors': [k for k in self.sds.keys()] } self.sds['csf'] = CrystalNNFingerprint.from_preset( 'ops', distance_cutoffs=None, x_diff_weight=None) self.all_output_pieces['statistics'] = ['csf'] # Set up all targeted composition descriptors. self.cds = {} self.cds["magpie"] = ElementProperty.from_preset('magpie') self.all_output_pieces['composition_descriptors'] = ['magpie'] self.all_output_pieces['meta'] = ['atomate'] super().__init__(sources=[materials], targets=[descriptors], **kwargs)
def __init__(self, materials, site_descriptors, mat_query=None, **kwargs): """ Calculates site-based descriptors (e.g., coordination numbers with different near-neighbor finding approaches) for materials and runs statistics analysis on selected descriptor types (order parameter-based site fingerprints). The latter is useful as a definition of a structure fingerprint on the basis of local coordination information. Args: materials (Store): Store of materials documents. site_descriptors (Store): Store of site-descriptors data such as tetrahedral order parameter or fraction of being 8-fold coordinated. mat_query (dict): dictionary to limit materials to be analyzed. """ self.materials = materials self.site_descriptors = site_descriptors self.mat_query = mat_query if mat_query else {} # Set up all targeted site descriptors. self.sds = {} for nn in NearNeighbors.__subclasses__(): nn_ = getattr(pymatgen.analysis.local_env, nn.__name__) t = nn.__name__ k = 'cn_{}'.format(t) self.sds[k] = CoordinationNumber(nn_(), use_weights='none') k = 'cn_wt_{}'.format(t) self.sds[k] = CoordinationNumber(nn_(), use_weights='sum') self.all_output_pieces = { 'site_descriptors': [k for k in self.sds.keys()] } self.sds['opsf'] = OPSiteFingerprint() self.sds['csf'] = CrystalSiteFingerprint.from_preset('ops') self.all_output_pieces['statistics'] = ['opsf', 'csf'] super().__init__(sources=[materials], targets=[site_descriptors], **kwargs)
class DeBreuck2020Featurizer(modnet.featurizers.MODFeaturizer): """ Featurizer presets used for the paper 'Machine learning materials properties for small datasets' by Pierre-Paul De Breuck, Geoffroy Hautier & Gian-Marco Rignanese, arXiv:2004.14766 (2020). Uses most of the featurizers implemented by matminer at the time of writing with their default hyperparameters and presets. """ from matminer.featurizers.composition import ( AtomicOrbitals, AtomicPackingEfficiency, BandCenter, # CohesiveEnergy, - This descriptor was not used in the paper preset # ElectronAffinity, - This descriptor was not used in the paper preset ElectronegativityDiff, ElementFraction, ElementProperty, IonProperty, Miedema, OxidationStates, Stoichiometry, TMetalFraction, ValenceOrbital, YangSolidSolution, ) from matminer.featurizers.structure import ( # BagofBonds, - This descriptor was not used in the paper preset BondFractions, ChemicalOrdering, CoulombMatrix, DensityFeatures, EwaldEnergy, GlobalSymmetryFeatures, MaximumPackingEfficiency, # PartialRadialDistributionFunction, RadialDistributionFunction, SineCoulombMatrix, StructuralHeterogeneity, XRDPowderPattern, ) from matminer.featurizers.site import ( AGNIFingerprints, AverageBondAngle, AverageBondLength, BondOrientationalParameter, ChemEnvSiteFingerprint, CoordinationNumber, CrystalNNFingerprint, GaussianSymmFunc, GeneralizedRadialDistributionFunction, LocalPropertyDifference, OPSiteFingerprint, VoronoiFingerprint, ) composition_featurizers = ( AtomicOrbitals(), AtomicPackingEfficiency(), BandCenter(), ElementFraction(), ElementProperty.from_preset("magpie"), IonProperty(), Miedema(), Stoichiometry(), TMetalFraction(), ValenceOrbital(), YangSolidSolution(), ) oxide_composition_featurizers = ( ElectronegativityDiff(), OxidationStates(), ) structure_featurizers = ( DensityFeatures(), GlobalSymmetryFeatures(), RadialDistributionFunction(), CoulombMatrix(), # PartialRadialDistributionFunction(), SineCoulombMatrix(), EwaldEnergy(), BondFractions(), StructuralHeterogeneity(), MaximumPackingEfficiency(), ChemicalOrdering(), XRDPowderPattern(), # BagofBonds(), ) site_featurizers = ( AGNIFingerprints(), AverageBondAngle(VoronoiNN()), AverageBondLength(VoronoiNN()), BondOrientationalParameter(), ChemEnvSiteFingerprint.from_preset("simple"), CoordinationNumber(), CrystalNNFingerprint.from_preset("ops"), GaussianSymmFunc(), GeneralizedRadialDistributionFunction.from_preset("gaussian"), LocalPropertyDifference(), OPSiteFingerprint(), VoronoiFingerprint(), ) def featurize_composition(self, df): """ Applies the preset composition featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_composition(df) _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4} df['AtomicOrbitals|HOMO_character'] = df[ 'AtomicOrbitals|HOMO_character'].map(_orbitals) df['AtomicOrbitals|LUMO_character'] = df[ 'AtomicOrbitals|LUMO_character'].map(_orbitals) df['AtomicOrbitals|HOMO_element'] = df[ 'AtomicOrbitals|HOMO_element'].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z) df['AtomicOrbitals|LUMO_element'] = df[ 'AtomicOrbitals|LUMO_element'].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z) df = df.replace([np.inf, -np.inf, np.nan], 0) return modnet.featurizers.clean_df(df) def featurize_structure(self, df): """ Applies the preset structural featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_structure(df) dist = df[ "RadialDistributionFunction|radial distribution function"].iloc[0][ 'distances'][:50] for i, d in enumerate(dist): _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format( d) df[_rdf_key] = df[ "RadialDistributionFunction|radial distribution function"].apply( lambda x: x['distribution'][i]) df = df.drop("RadialDistributionFunction|radial distribution function", axis=1) _crystal_system = { "cubic": 1, "tetragonal": 2, "orthorombic": 3, "hexagonal": 4, "trigonal": 5, "monoclinic": 6, "triclinic": 7 } def _int_map(x): if x == np.nan: return 0 elif x: return 1 else: return 0 df["GlobalSymmetryFeatures|crystal_system"] = df[ "GlobalSymmetryFeatures|crystal_system"].map(_crystal_system) df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[ "GlobalSymmetryFeatures|is_centrosymmetric"].map(_int_map) return modnet.featurizers.clean_df(df) def featurize_site(self, df): """ Applies the preset site featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ # rename some features for backwards compatibility with pretrained models aliases = { "GeneralizedRadialDistributionFunction": "GeneralizedRDF", "AGNIFingerprints": "AGNIFingerPrint", "BondOrientationalParameter": "BondOrientationParameter", "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc", } df = super().featurize_site(df, aliases=aliases) df = df.loc[:, (df != 0).any(axis=0)] return modnet.featurizers.clean_df(df)
def test_cns(self): cnv = CoordinationNumber.from_preset('VoronoiNN') self.assertEqual(len(cnv.feature_labels()), 1) self.assertEqual(cnv.feature_labels()[0], 'CN_VoronoiNN') self.assertAlmostEqual(cnv.featurize(self.sc, 0)[0], 6) self.assertAlmostEqual(cnv.featurize(self.cscl, 0)[0], 14) self.assertAlmostEqual(cnv.featurize(self.cscl, 1)[0], 14) self.assertEqual(len(cnv.citations()), 2) cnv = CoordinationNumber(VoronoiNN(), use_weights='sum') self.assertEqual(cnv.feature_labels()[0], 'CN_VoronoiNN') self.assertAlmostEqual(cnv.featurize(self.cscl, 0)[0], 9.2584516) self.assertAlmostEqual(cnv.featurize(self.cscl, 1)[0], 9.2584516) self.assertEqual(len(cnv.citations()), 2) cnv = CoordinationNumber(VoronoiNN(), use_weights='effective') self.assertEqual(cnv.feature_labels()[0], 'CN_VoronoiNN') self.assertAlmostEqual(cnv.featurize(self.cscl, 0)[0], 11.648923254) self.assertAlmostEqual(cnv.featurize(self.cscl, 1)[0], 11.648923254) self.assertEqual(len(cnv.citations()), 2) cnj = CoordinationNumber.from_preset('JmolNN') self.assertEqual(cnj.feature_labels()[0], 'CN_JmolNN') self.assertAlmostEqual(cnj.featurize(self.sc, 0)[0], 0) self.assertAlmostEqual(cnj.featurize(self.cscl, 0)[0], 0) self.assertAlmostEqual(cnj.featurize(self.cscl, 1)[0], 0) self.assertEqual(len(cnj.citations()), 1) jmnn = JmolNN(el_radius_updates={"Al": 1.55, "Cl": 1.7, "Cs": 1.7}) cnj = CoordinationNumber(jmnn) self.assertEqual(cnj.feature_labels()[0], 'CN_JmolNN') self.assertAlmostEqual(cnj.featurize(self.sc, 0)[0], 6) self.assertAlmostEqual(cnj.featurize(self.cscl, 0)[0], 8) self.assertAlmostEqual(cnj.featurize(self.cscl, 1)[0], 8) self.assertEqual(len(cnj.citations()), 1) cnmd = CoordinationNumber.from_preset('MinimumDistanceNN') self.assertEqual(cnmd.feature_labels()[0], 'CN_MinimumDistanceNN') self.assertAlmostEqual(cnmd.featurize(self.sc, 0)[0], 6) self.assertAlmostEqual(cnmd.featurize(self.cscl, 0)[0], 8) self.assertAlmostEqual(cnmd.featurize(self.cscl, 1)[0], 8) self.assertEqual(len(cnmd.citations()), 1) cnmok = CoordinationNumber.from_preset('MinimumOKeeffeNN') self.assertEqual(cnmok.feature_labels()[0], 'CN_MinimumOKeeffeNN') self.assertAlmostEqual(cnmok.featurize(self.sc, 0)[0], 6) self.assertAlmostEqual(cnmok.featurize(self.cscl, 0)[0], 8) self.assertAlmostEqual(cnmok.featurize(self.cscl, 1)[0], 6) self.assertEqual(len(cnmok.citations()), 2) cnmvire = CoordinationNumber.from_preset('MinimumVIRENN') self.assertEqual(cnmvire.feature_labels()[0], 'CN_MinimumVIRENN') self.assertAlmostEqual(cnmvire.featurize(self.sc, 0)[0], 6) self.assertAlmostEqual(cnmvire.featurize(self.cscl, 0)[0], 8) self.assertAlmostEqual(cnmvire.featurize(self.cscl, 1)[0], 14) self.assertEqual(len(cnmvire.citations()), 2) self.assertEqual(len(cnmvire.implementors()), 2) self.assertEqual(cnmvire.implementors()[0], 'Nils E. R. Zimmermann')
class FUTURE_PROSPECTS_2021(featurizer.extendedMODFeaturizer): from matminer.featurizers.composition import ( AtomicOrbitals, AtomicPackingEfficiency, BandCenter, CohesiveEnergy, ElectronAffinity, ElectronegativityDiff, ElementFraction, ElementProperty, IonProperty, Miedema, OxidationStates, Stoichiometry, TMetalFraction, ValenceOrbital, YangSolidSolution, ) from matminer.featurizers.structure import ( BagofBonds, BondFractions, ChemicalOrdering, CoulombMatrix, DensityFeatures, EwaldEnergy, GlobalSymmetryFeatures, MaximumPackingEfficiency, PartialRadialDistributionFunction, RadialDistributionFunction, SineCoulombMatrix, StructuralHeterogeneity, XRDPowderPattern, ) from matminer.featurizers.site import ( AGNIFingerprints, AverageBondAngle, AverageBondLength, BondOrientationalParameter, ChemEnvSiteFingerprint, CoordinationNumber, CrystalNNFingerprint, GaussianSymmFunc, GeneralizedRadialDistributionFunction, LocalPropertyDifference, OPSiteFingerprint, VoronoiFingerprint, ) from matminer.featurizers.dos import ( DOSFeaturizer, SiteDOS, Hybridization, DosAsymmetry, ) from matminer.featurizers.bandstructure import ( BandFeaturizer, BranchPointEnergy ) composition_featurizers = ( AtomicOrbitals(), AtomicPackingEfficiency(), BandCenter(), ElementFraction(), ElementProperty.from_preset("magpie"), IonProperty(), Miedema(), Stoichiometry(), TMetalFraction(), ValenceOrbital(), YangSolidSolution(), ) oxid_composition_featurizers = ( ElectronegativityDiff(), OxidationStates(), ) structure_featurizers = ( DensityFeatures(), GlobalSymmetryFeatures(), RadialDistributionFunction(), CoulombMatrix(), #PartialRadialDistributionFunction(), #Introduces a large amount of features SineCoulombMatrix(), EwaldEnergy(), BondFractions(), StructuralHeterogeneity(), MaximumPackingEfficiency(), ChemicalOrdering(), XRDPowderPattern(), ) site_featurizers = ( AGNIFingerprints(), AverageBondAngle(VoronoiNN()), AverageBondLength(VoronoiNN()), BondOrientationalParameter(), ChemEnvSiteFingerprint.from_preset("simple"), CoordinationNumber(), CrystalNNFingerprint.from_preset("ops"), GaussianSymmFunc(), GeneralizedRadialDistributionFunction.from_preset("gaussian"), LocalPropertyDifference(), OPSiteFingerprint(), VoronoiFingerprint(), ) dos_featurizers = ( DOSFeaturizer(), SiteDOS(), Hybridization() ) band_featurizers = ( BandFeaturizer(), BranchPointEnergy() ) def __init__(self, n_jobs=None): self._n_jobs = n_jobs def featurize_composition(self, df): """Applies the preset composition featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_composition(df) _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4} df["AtomicOrbitals|HOMO_character"] = df["AtomicOrbitals|HOMO_character"].map( _orbitals ) df["AtomicOrbitals|LUMO_character"] = df["AtomicOrbitals|LUMO_character"].map( _orbitals ) df["AtomicOrbitals|HOMO_element"] = df["AtomicOrbitals|HOMO_element"].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z ) df["AtomicOrbitals|LUMO_element"] = df["AtomicOrbitals|LUMO_element"].apply( lambda x: -1 if not isinstance(x, str) else Element(x).Z ) return clean_df(df) def featurize_structure(self, df): """Applies the preset structural featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_structure(df) dist = df["RadialDistributionFunction|radial distribution function"].iloc[0][ "distances" ][:50] for i, d in enumerate(dist): _rdf_key = "RadialDistributionFunction|radial distribution function|d_{:.2f}".format( d ) df[_rdf_key] = df[ "RadialDistributionFunction|radial distribution function" ].apply(lambda x: x["distribution"][i]) df = df.drop("RadialDistributionFunction|radial distribution function", axis=1) _crystal_system = { "cubic": 1, "tetragonal": 2, "orthorombic": 3, "hexagonal": 4, "trigonal": 5, "monoclinic": 6, "triclinic": 7, } def _int_map(x): if x == np.nan: return 0 elif x: return 1 else: return 0 df["GlobalSymmetryFeatures|crystal_system"] = df[ "GlobalSymmetryFeatures|crystal_system" ].map(_crystal_system) df["GlobalSymmetryFeatures|is_centrosymmetric"] = df[ "GlobalSymmetryFeatures|is_centrosymmetric" ].map(_int_map) return clean_df(df) def featurize_dos(self, df): """Applies the presetdos featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_dos(df) hotencodeColumns = ["DOSFeaturizer|vbm_specie_1","DOSFeaturizer|cbm_specie_1"] one_hot = pd.get_dummies(df[hotencodeColumns]) df = df.drop(hotencodeColumns, axis = 1).join(one_hot) _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4} df["DOSFeaturizer|vbm_character_1"] = df[ "DOSFeaturizer|vbm_character_1" ].map(_orbitals) df["DOSFeaturizer|cbm_character_1"] = df[ "DOSFeaturizer|cbm_character_1" ].map(_orbitals) # Splitting one feature into several floating features # e.g. number;number;number into three columns splitColumns = ["DOSFeaturizer|cbm_location_1", "DOSFeaturizer|vbm_location_1"] for column in splitColumns: try: newColumns = df[column].str.split(";", n = 2, expand = True) for i in range(0,3): df[column + "_" + str(i)] = np.array(newColumns[i]).astype(np.float) except: continue df = df.drop(splitColumns, axis=1) df = df.drop(["dos"], axis=1) return clean_df(df) def featurize_bandstructure(self, df): """Applies the preset band structure featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ df = super().featurize_bandstructure(df) def _int_map(x): if str(x) == "False": return 0 elif str(x) == "True": return 1 df["BandFeaturizer|is_gap_direct"] = df[ "BandFeaturizer|is_gap_direct" ].map(_int_map) df = df.drop(["bandstructure"], axis=1) return clean_df(df) def featurize_site(self, df): """Applies the preset site featurizers to the input dataframe, renames some fields and cleans the output dataframe. """ aliases = { "GeneralizedRadialDistributionFunction": "GeneralizedRDF", "AGNIFingerprints": "AGNIFingerPrint", "BondOrientationalParameter": "BondOrientationParameter", "GaussianSymmFunc": "ChemEnvSiteFingerprint|GaussianSymmFunc", } df = super().featurize_site(df, aliases=aliases) df = df.loc[:, (df != 0).any(axis=0)] return clean_df(df)
def from_preset(preset, **kwargs): """ Create a SiteStatsFingerprint class according to a preset Args: preset (str) - Name of preset kwargs - Options for SiteStatsFingerprint """ if preset == "CrystalNNFingerprint_cn": return SiteStatsFingerprint( CrystalNNFingerprint.from_preset("cn", cation_anion=False), **kwargs) elif preset == "CrystalNNFingerprint_cn_cation_anion": return SiteStatsFingerprint( CrystalNNFingerprint.from_preset("cn", cation_anion=True), **kwargs) elif preset == "CrystalNNFingerprint_ops": return SiteStatsFingerprint( CrystalNNFingerprint.from_preset("ops", cation_anion=False), **kwargs) elif preset == "CrystalNNFingerprint_ops_cation_anion": return SiteStatsFingerprint( CrystalNNFingerprint.from_preset("ops", cation_anion=True), **kwargs) elif preset == "OPSiteFingerprint": return SiteStatsFingerprint(OPSiteFingerprint(), **kwargs) elif preset == "LocalPropertyDifference_ward-prb-2017": return SiteStatsFingerprint( LocalPropertyDifference.from_preset("ward-prb-2017"), stats=["minimum", "maximum", "range", "mean", "avg_dev"]) elif preset == "CoordinationNumber_ward-prb-2017": return SiteStatsFingerprint( CoordinationNumber(nn=VoronoiNN(weight='area'), use_weights="effective"), stats=["minimum", "maximum", "range", "mean", "avg_dev"]) elif preset == "Composition-dejong2016_AD": return SiteStatsFingerprint( LocalPropertyDifference(properties=[ "Number", "AtomicWeight", "Column", "Row", "CovalentRadius", "Electronegativity" ], signed=False), stats=['holder_mean::%d' % d for d in range(0, 4 + 1)] + ['std_dev'], ) elif preset == "Composition-dejong2016_SD": return SiteStatsFingerprint( LocalPropertyDifference(properties=[ "Number", "AtomicWeight", "Column", "Row", "CovalentRadius", "Electronegativity" ], signed=True), stats=['holder_mean::%d' % d for d in [1, 2, 4]] + ['std_dev'], ) elif preset == "BondLength-dejong2016": return SiteStatsFingerprint( AverageBondLength(VoronoiNN()), stats=['holder_mean::%d' % d for d in range(-4, 4 + 1)] + ['std_dev', 'geom_std_dev']) elif preset == "BondAngle-dejong2016": return SiteStatsFingerprint( AverageBondAngle(VoronoiNN()), stats=['holder_mean::%d' % d for d in range(-4, 4 + 1)] + ['std_dev', 'geom_std_dev']) else: # TODO: Why assume coordination number? Should this just raise an error? - lw # One of the various Coordination Number presets: # MinimumVIRENN, MinimumDistanceNN, JmolNN, VoronoiNN, etc. try: return SiteStatsFingerprint( CoordinationNumber.from_preset(preset), **kwargs) except: pass raise ValueError("Unrecognized preset!")
def AddFeatures(df): # Add features by Matminer from matminer.featurizers.conversions import StrToComposition df = StrToComposition().featurize_dataframe(df, "formula") from matminer.featurizers.composition import ElementProperty ep_feat = ElementProperty.from_preset(preset_name="magpie") df = ep_feat.featurize_dataframe( df, col_id="composition" ) # input the "composition" column to the featurizer from matminer.featurizers.conversions import CompositionToOxidComposition from matminer.featurizers.composition import OxidationStates df = CompositionToOxidComposition().featurize_dataframe(df, "composition") os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, "composition_oxid") from matminer.featurizers.composition import ElectronAffinity ea_feat = ElectronAffinity() df = ea_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import BandCenter bc_feat = BandCenter() df = bc_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import CohesiveEnergy ce_feat = CohesiveEnergy() df = ce_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import Miedema m_feat = Miedema() df = m_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import TMetalFraction tmf_feat = TMetalFraction() df = tmf_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import ValenceOrbital vo_feat = ValenceOrbital() df = vo_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.composition import YangSolidSolution yss_feat = YangSolidSolution() df = yss_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True) from matminer.featurizers.structure import GlobalSymmetryFeatures # This is the border between compositional features and structural features. Comment out the following featurizers to use only compostional features. gsf_feat = GlobalSymmetryFeatures() df = gsf_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import StructuralComplexity sc_feat = StructuralComplexity() df = sc_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import ChemicalOrdering co_feat = ChemicalOrdering() df = co_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import MaximumPackingEfficiency mpe_feat = MaximumPackingEfficiency() df = mpe_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import MinimumRelativeDistances mrd_feat = MinimumRelativeDistances() df = mrd_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import StructuralHeterogeneity sh_feat = StructuralHeterogeneity() df = sh_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import SiteStatsFingerprint from matminer.featurizers.site import AverageBondLength from pymatgen.analysis.local_env import CrystalNN bl_feat = SiteStatsFingerprint( AverageBondLength(CrystalNN(search_cutoff=20))) df = bl_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import AverageBondAngle ba_feat = SiteStatsFingerprint( AverageBondAngle(CrystalNN(search_cutoff=20))) df = ba_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import BondOrientationalParameter bop_feat = SiteStatsFingerprint(BondOrientationalParameter()) df = bop_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.site import CoordinationNumber cn_feat = SiteStatsFingerprint(CoordinationNumber()) df = cn_feat.featurize_dataframe(df, "structure", ignore_errors=True) from matminer.featurizers.structure import DensityFeatures df_feat = DensityFeatures() df = df_feat.featurize_dataframe(df, "structure", ignore_errors=True) return (df)
def structure_to_convmol(structure, properties=ELEMENTAL_PROPERTIES, max_atoms=200, max_features=41, tolerance_distance=0.25): atomic_radii = { 'At': 1.50, 'Bk': 1.70, 'Cm': 1.74, 'Fr': 2.60, 'He': 0.28, 'Kr': 1.16, 'Lr': 1.71, 'Md': 1.94, 'Ne': 0.58, 'No': 1.97, 'Rn': 1.50, 'Xe': 1.40, } distance_matrix = structure.distance_matrix for index, x in np.ndenumerate(distance_matrix): radius_1 = Element(structure._sites[ index[0]].specie).atomic_radius or atomic_radii[str( structure._sites[index[0]].specie)] radius_2 = Element(structure._sites[ index[1]].specie).atomic_radius or atomic_radii[str( structure._sites[index[1]].specie)] max_distance = radius_1 + radius_2 + tolerance_distance if x > max_distance: distance_matrix[index] = 0 else: distance_matrix[index] = 1 np.fill_diagonal(distance_matrix, 1) atom_features = [] for i, site in enumerate(structure._sites): atom_feature_vector = [] for atom_property in properties: min_value = np.nanmin( np.array(list(atom_property.values()), dtype=float)) max_value = np.nanmax( np.array(list(atom_property.values()), dtype=float)) if atom_property[str(Element(site.specie))] is not None: atom_feature_vector.append( (atom_property[str(Element(site.specie))] - min_value) / (max_value - min_value)) else: atom_feature_vector.append(None) voronoi_min = np.array([ 0.0, 10.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0 ]) voronoi_max = np.array([ 120.0, 135.0, 11.0, 3.0, 11.0, 12.0, 18.0, 7.0, 17.0, 17.0, 6.0, 2.0, 6.0, 7.0 ]) voronoi_fps = VoronoiFingerprint().featurize(structure, i) i_fold_symmetry_indices = voronoi_fps[8:16] voronoi_stats = (np.array(voronoi_fps[16:]) - voronoi_min) / (voronoi_max - voronoi_min) atom_feature_vector.extend(i_fold_symmetry_indices + voronoi_stats.tolist()) coord_min = np.array([1]) coord_max = np.array([36]) coord_fps = ( (CoordinationNumber.from_preset("MinimumDistanceNN").featurize( structure, i) - coord_min) / (coord_max - coord_min)).tolist() atom_feature_vector.extend(coord_fps) atom_features.append(atom_feature_vector) atom_features = np.array(atom_features, dtype=np.float) if np.isnan(atom_features).any(): raise ValueError('feature vector contains nan value') return (zfill(distance_matrix, max_atoms, max_atoms), zfill(atom_features, max_atoms, max_features), len(structure.sites))
StructureComposition, MaximumPackingEfficiency) from matminer.featurizers.composition import ElementProperty, Stoichiometry, ValenceOrbital, IonProperty from matminer.featurizers.site import CoordinationNumber, LocalPropertyDifference from matminer.utils.data import MagpieData element_properties = ('Electronegativity', 'Row', 'Column', 'Number', 'MendeleevNumber', 'AtomicWeight', 'CovalentRadius', 'MeltingT', 'NsValence', 'NpValence', 'NdValence', 'NfValence', 'NValence', 'NsUnfilled', 'NpUnfilled', 'NdUnfilled', 'NfUnfilled', 'NUnfilled', 'GSvolume_pa', 'SpaceGroupNumber', 'GSbandgap', 'GSmagmom') #The following features will be created by using matminer package. featurizer = MultipleFeaturizer([ SiteStatsFingerprint(CoordinationNumber().from_preset('VoronoiNN'), stats=('mean', 'std_dev', 'minimum', 'maximum')), StructuralHeterogeneity(), ChemicalOrdering(), MaximumPackingEfficiency(), SiteStatsFingerprint( LocalPropertyDifference(properties=element_properties), stats=('mean', 'std_dev', 'minimum', 'maximum', 'range')), StructureComposition(Stoichiometry()), StructureComposition(ElementProperty.from_preset("magpie")), StructureComposition(ValenceOrbital(props=['frac'])), StructureComposition(IonProperty(fast=True)) ]) #Generate VT based features from the material's crystal lat_params. feature_data = featurizer.featurize_dataframe(df,
def featurize_site(df: pd.DataFrame, site_stats=("mean", "std_dev")) -> pd.DataFrame: """ Decorate input `pandas.DataFrame` of structures with site features from matminer. Currently creates the set of all matminer structure features with the `matminer.featurizers.structure.SiteStatsFingerprint`. Args: df (pandas.DataFrame): the input dataframe with `"structure"` column containing `pymatgen.Structure` objects. site_stats (Tuple[str]): the matminer site stats to use in the `SiteStatsFingerprint` for all features. Returns: pandas.DataFrame: the decorated DataFrame. """ logging.info("Applying site featurizers...") df = df.copy() df.columns = ["Input data|" + x for x in df.columns] site_fingerprints = ( AGNIFingerprints(), GeneralizedRadialDistributionFunction.from_preset("gaussian"), OPSiteFingerprint(), CrystalNNFingerprint.from_preset("ops"), VoronoiFingerprint(), GaussianSymmFunc(), ChemEnvSiteFingerprint.from_preset("simple"), CoordinationNumber(), LocalPropertyDifference(), BondOrientationalParameter(), AverageBondLength(VoronoiNN()), AverageBondAngle(VoronoiNN()) ) for fingerprint in site_fingerprints: site_stats_fingerprint = SiteStatsFingerprint( fingerprint, stats=site_stats ) df = site_stats_fingerprint.featurize_dataframe( df, "Input data|structure", multiindex=False, ignore_errors=True ) fingerprint_name = fingerprint.__class__.__name__ # rename some features for backwards compatibility with pretrained models if fingerprint_name == "GeneralizedRadialDistributionFunction": fingerprint_name = "GeneralizedRDF" elif fingerprint_name == "AGNIFingerprints": fingerprint_name = "AGNIFingerPrint" elif fingerprint_name == "BondOrientationalParameter": fingerprint_name = "BondOrientationParameter" elif fingerprint_name == "GaussianSymmFunc": fingerprint_name = "ChemEnvSiteFingerprint|GaussianSymmFunc" if "|" not in fingerprint_name: fingerprint_name += "|" df.columns = [f"{fingerprint_name}{x}" if "|" not in x else x for x in df.columns] df = df.loc[:, (df != 0).any(axis=0)] return clean_df(df)