def get_structure_properties(structure: Structure, mode: str = 'all') -> dict:

    if mode == 'all':
        featurizer = MultipleFeaturizer([
            SiteStatsFingerprint.from_preset(
                'CoordinationNumber_ward-prb-2017'),
            StructuralHeterogeneity(),
            ChemicalOrdering(),
            DensityFeatures(),
            MaximumPackingEfficiency(),
            SiteStatsFingerprint.from_preset(
                'LocalPropertyDifference_ward-prb-2017'),
            StructureComposition(Stoichiometry()),
            StructureComposition(ElementProperty.from_preset('magpie')),
            StructureComposition(ValenceOrbital(props=['frac'])),
        ])
    else:
        # Calculate only those which do not need a Voronoi tesselation
        featurizer = MultipleFeaturizer([
            DensityFeatures(),
            StructureComposition(Stoichiometry()),
            StructureComposition(ElementProperty.from_preset('magpie')),
            StructureComposition(ValenceOrbital(props=['frac'])),
        ])

    X = featurizer.featurize(structure)

    matminer_dict = dict(list(zip(featurizer.feature_labels(), X)))

    matminer_dict['volume'] = structure.volume
    return matminer_dict
def composition_featurizer(df_input: pd.DataFrame, **kwargs) -> pd.DataFrame:
    """Return a Pandas DataFrame with all compositional features"""

    # generate the "composition" column
    df_comp = StrToComposition().featurize_dataframe(df_input,
                                                     col_id="Compound")
    # generate features based on elemental properites
    ep_featurizer = ElementProperty.from_preset(preset_name="magpie")
    ep_featurizer.featurize_dataframe(df_comp,
                                      col_id="composition",
                                      inplace=True)
    # generate the "composition_oxid" column based on guessed oxidation states
    CompositionToOxidComposition(
        return_original_on_error=True, **kwargs).featurize_dataframe(
            # ignore errors from non-integer stoichiometries
            df_comp,
            "composition",
            ignore_errors=True,
            inplace=True)
    # correct oxidation states
    df_comp = correct_comp_oxid(df_comp)
    # generate features based on oxidation states
    os_featurizer = OxidationStates()
    os_featurizer.featurize_dataframe(df_comp,
                                      "composition_oxid",
                                      ignore_errors=True,
                                      inplace=True)
    # remove compounds with predicted oxidation states of 0
    return df_comp[df_comp["minimum oxidation state"] != 0]
 def test_elem_matminer(self):
     df_elem = ElementProperty.from_preset("matminer").featurize_dataframe(self.df, col_id="composition")
     self.assertAlmostEqual(df_elem["minimum melting_point"][0], 54.8, 1)
     self.assertTrue(math.isnan(df_elem["maximum bulk_modulus"][0]))
     self.assertAlmostEqual(df_elem["range X"][0], 1.61, 1)
     self.assertAlmostEqual(df_elem["mean X"][0], 2.796, 1)
     self.assertAlmostEqual(df_elem["maximum block"][0], 3, 1)
Exemple #4
0
 def test_elem_deml(self):
     df_elem_deml = ElementProperty.from_preset("deml").featurize_dataframe(self.df, col_id="composition")
     self.assertAlmostEqual(df_elem_deml["minimum atom_num"][0], 8)
     self.assertAlmostEqual(df_elem_deml["maximum atom_num"][0], 26)
     self.assertAlmostEqual(df_elem_deml["range atom_num"][0], 18)
     self.assertAlmostEqual(df_elem_deml["mean atom_num"][0], 15.2)
     self.assertAlmostEqual(df_elem_deml["std_dev atom_num"][0], 12.7279, 4)
Exemple #5
0
    def featurize_structures(self, featurizer=None, **kwargs):
        """
        Featurizes the hypothetical structures available from
        hypo_structures method. Hypothetical structures for which
        featurization fails are removed and valid structures are
        made available as valid_structures

        Args:
            featurizer (Featurizer): A MatMiner Featurizer.
                Defaults to MultipleFeaturizer with PRB Ward
                Voronoi descriptors.
            **kwargs (dict): kwargs passed to featurize_many
                method of featurizer.

        Returns:
            (pandas.DataFrame): features

        """
        # Note the redundancy here is for pandas to work
        if self.hypo_structures is None:
            warnings.warn("No structures available. Generating structures.")
            self.get_structures()

        print("Generating features")
        featurizer = featurizer if featurizer else MultipleFeaturizer([
            SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"),
            StructuralHeterogeneity(),
            ChemicalOrdering(),
            MaximumPackingEfficiency(),
            SiteStatsFingerprint.from_preset("LocalPropertyDifference_ward-prb-2017"),
            StructureComposition(Stoichiometry()),
            StructureComposition(ElementProperty.from_preset("magpie")),
            StructureComposition(ValenceOrbital(props=['frac'])),
            StructureComposition(IonProperty(fast=True))
        ])

        features = featurizer.featurize_many(
            self.hypo_structures['structure'],
            ignore_errors=True, **kwargs)

        n_species, formula = [], []
        for s in self.hypo_structures['structure']:
            n_species.append(len(s.composition.elements))
            formula.append(s.composition.formula)

        self._features_df = pd.DataFrame.from_records(
            features, columns=featurizer.feature_labels())
        self._features_df.index = self.hypo_structures.index
        self._features_df['N_species'] = n_species
        self._features_df['Composition'] = formula
        self._features_df['structure'] = self.hypo_structures['structure']
        self.features = self._features_df.dropna(axis=0, how='any')
        self.features = self.features.reindex(sorted(self.features.columns), axis=1)

        self._valid_structure_labels = list(self.features.index)
        self.valid_structures = self.hypo_structures.loc[self._valid_structure_labels]

        print("{} out of {} structures were successfully featurized.".format(
            self.features.shape[0], self._features_df.shape[0]))
        return self.features
    def _featurize(self, composition: "pymatgen.Composition"):
        """
    Calculate chemical fingerprint from crystal composition.

    Parameters
    ----------
    composition: pymatgen.Composition object
      Composition object.

    Returns
    -------
    feats: np.ndarray
      Vector of properties and statistics derived from chemical
      stoichiometry. Some values may be NaN.

    """
        try:
            from matminer.featurizers.composition import ElementProperty
        except ModuleNotFoundError:
            raise ValueError("This class requires matminer to be installed.")

        ep = ElementProperty.from_preset(self.data_source)

        try:
            feats = ep.featurize(composition)
        except:
            feats = []

        return np.array(feats)
Exemple #7
0
 def __init__(self, pbar=False):
     self.regressor = RandomForestRegressor(n_estimators=500, n_jobs=-1, verbose=3)
     self.stc = StrToComposition()
     ep = ElementProperty.from_preset("magpie")
     ef = ElementFraction()
     self.featurizer = MultipleFeaturizer([ep, ef])
     self.pbar = pbar
Exemple #8
0
def tran_feat_composition(
    df,
    var_formula="FORMULA",
    preset_name="magpie",
    append=True,
    ignore_errors=True,
    **kwargs,
):
    r"""Featurize a dataset using matminer

    Featurize chemical composition using matminer package.

    Args:
        df (DataFrame): Data to featurize
        var_formula (string): Column in df with chemical formula; formula
            given as string
        append (bool): Append results to original columns?
        preset_name (string): Matminer featurization preset

    Kwargs:
        ignore_errors (bool): Do not throw an error while parsing formulae; set to
            True to return NaN's for invalid formulae.

    Notes:
        - A pre-processor and wrapper for matminer.featurizers.composition

    References:
        Ward, L., Dunn, A., Faghaninia, A., Zimmermann, N. E. R., Bajaj, S., Wang, Q., Montoya, J. H., Chen, J., Bystrom, K., Dylla, M., Chard, K., Asta, M., Persson, K., Snyder, G. J., Foster, I., Jain, A., Matminer: An open source toolkit for materials data mining. Comput. Mater. Sci. 152, 60-69 (2018).

    Examples:
        >>> import grama as gr
        >>> from grama.tran import tf_feat_composition
        >>> (
        >>>     gr.df_make(FORMULA=["C6H12O6"])
        >>>     >> gr.tf_feat_composition()
        >>> )

    """
    ## Check invariants

    ## Featurize
    featurizer = ElementProperty.from_preset(preset_name=preset_name)
    df_res = StrToComposition().featurize_dataframe(
        df[[var_formula]],
        var_formula,
        ignore_errors=ignore_errors,
    )
    df_res = featurizer.featurize_dataframe(
        df_res,
        col_id="composition",
        ignore_errors=ignore_errors,
        **kwargs,
    )
    df_res.drop(columns=[var_formula, "composition"], inplace=True)

    ## Concatenate as necessary
    if append:
        df_res = concat((df, df_res), axis=1)

    return df_res
  def _featurize(self, comp):
    """
    Calculate chemical fingerprint from crystal composition.

    Parameters
    ----------
    comp : str
      Reduced formula of crystal.

    Returns
    -------
    feats: np.ndarray
      Vector of properties and statistics derived from chemical
      stoichiometry. Some values may be NaN.

    """

    from pymatgen import Composition
    from matminer.featurizers.composition import ElementProperty

    # Get pymatgen Composition object
    c = Composition(comp)

    ep = ElementProperty.from_preset(self.data_source)

    try:
      feats = ep.featurize(c)
    except:
      feats = []

    return np.array(feats)
 def test_elem_matminer(self):
     df_elem = ElementProperty.from_preset("matminer").featurize_dataframe(
         self.df, col_id="composition")
     self.assertAlmostEqual(df_elem["minimum melting_point"][0], 54.8, 1)
     self.assertTrue(math.isnan(df_elem["maximum bulk_modulus"][0]))
     self.assertAlmostEqual(df_elem["range X"][0], 1.61, 1)
     self.assertAlmostEqual(df_elem["mean X"][0], 2.796, 1)
Exemple #11
0
def featurize_composition(df: pd.DataFrame) -> pd.DataFrame:
    """ Decorate input `pandas.DataFrame` of structures with composition
    features from matminer.

    Currently applies the set of all matminer composition features.

    Args:
        df (pandas.DataFrame): the input dataframe with `"structure"`
            column containing `pymatgen.Structure` objects.

    Returns:
        pandas.DataFrame: the decorated DataFrame.

    """
    logging.info("Applying composition featurizers...")
    df = df.copy()
    df['composition'] = df['structure'].apply(lambda s: s.composition)
    featurizer = MultipleFeaturizer([ElementProperty.from_preset("magpie"),
                                     AtomicOrbitals(),
                                     BandCenter(),
                                     # ElectronAffinity(), - This descriptor was not used in the paper preset
                                     Stoichiometry(),
                                     ValenceOrbital(),
                                     IonProperty(),
                                     ElementFraction(),
                                     TMetalFraction(),
                                     # CohesiveEnergy(), - This descriptor was not used in the paper preset
                                     Miedema(),
                                     YangSolidSolution(),
                                     AtomicPackingEfficiency(),
                                     ])

    df = featurizer.featurize_dataframe(df, "composition", multiindex=True, ignore_errors=True)
    df.columns = df.columns.map('|'.join).str.strip('|')

    ox_featurizer = MultipleFeaturizer([OxidationStates(),
                                        ElectronegativityDiff()
                                        ])

    df = CompositionToOxidComposition().featurize_dataframe(df, "Input Data|composition")

    df = ox_featurizer.featurize_dataframe(df, "composition_oxid", multiindex=True, ignore_errors=True)
    df = df.rename(columns={'Input Data': ''})
    df.columns = df.columns.map('|'.join).str.strip('|')

    _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}

    df['AtomicOrbitals|HOMO_character'] = df['AtomicOrbitals|HOMO_character'].map(_orbitals)
    df['AtomicOrbitals|LUMO_character'] = df['AtomicOrbitals|LUMO_character'].map(_orbitals)

    df['AtomicOrbitals|HOMO_element'] = df['AtomicOrbitals|HOMO_element'].apply(
        lambda x: -1 if not isinstance(x, str) else Element(x).Z
    )
    df['AtomicOrbitals|LUMO_element'] = df['AtomicOrbitals|LUMO_element'].apply(
        lambda x: -1 if not isinstance(x, str) else Element(x).Z
    )

    df = df.replace([np.inf, -np.inf, np.nan], 0)

    return clean_df(df)
    def __init__(self,
                 cation_site=None,
                 site_ox_lim={
                     'A': [0, 10],
                     'B': [0, 10],
                     'X': [-10, 0]
                 },
                 site_base_ox={
                     'A': 2,
                     'B': 4,
                     'X': -2
                 },
                 ordered_formulas=False,
                 A_site_occupancy=1,
                 anions=None):

        if cation_site is None and ordered_formulas is False:
            raise ValueError(
                'Either cation sites must be assigned, or formulas must be ordered. Otherwise site assignments can not be determined'
            )

        self.cation_site = cation_site
        self.site_ox_lim = site_ox_lim
        self.site_base_ox = site_base_ox
        self.ordered_formulas = ordered_formulas
        self.A_site_occupancy = A_site_occupancy
        self.anions = anions

        #matminer featurizers
        self.ValenceOrbital = ValenceOrbital()
        self.AtomicOrbitals = AtomicOrbitalsMod()
        self.CohesiveEnergy = CohesiveEnergy()
        #custom ElementProperty featurizer
        elemental_properties = [
            'BoilingT', 'MeltingT', 'BulkModulus', 'ShearModulus', 'Row',
            'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber',
            'Density', 'MolarVolume', 'FusionEnthalpy', 'HeatVaporization',
            'NsUnfilled', 'NpUnfilled', 'NdUnfilled', 'NfUnfilled',
            'Polarizability', 'ThermalConductivity'
        ]
        self.ElementProperty = ElementProperty(
            data_source='magpie',
            features=elemental_properties,
            stats=["mean", "std_dev", "range"])

        self.check_matminer_featurizers()
        self.featurize_options = {}
Exemple #13
0
 def test_elem(self):
     df_elem = ElementProperty.from_preset("magpie").featurize_dataframe(self.df, col_id="composition")
     self.assertAlmostEqual(df_elem["minimum Number"][0], 8)
     self.assertAlmostEqual(df_elem["maximum Number"][0], 26)
     self.assertAlmostEqual(df_elem["range Number"][0], 18)
     self.assertAlmostEqual(df_elem["mean Number"][0], 15.2)
     self.assertAlmostEqual(df_elem["avg_dev Number"][0], 8.64)
     self.assertAlmostEqual(df_elem["mode Number"][0], 8)
Exemple #14
0
 def test_elem_megnet_el(self):
     ep = ElementProperty.from_preset("megnet_el")
     df_elem = ep.featurize_dataframe(self.df, col_id="composition")
     self.assertAlmostEqual(df_elem["MEGNetElementData maximum embedding 1"].iloc[0], 0.127333, places=6)
     self.assertAlmostEqual(df_elem["MEGNetElementData maximum embedding 1"].iloc[1], 0.127333, places=6)
     self.assertAlmostEqual(df_elem["MEGNetElementData maximum embedding 11"].iloc[0], 0.160505, places=6)
     self.assertAlmostEqual(df_elem["MEGNetElementData maximum embedding 11"].iloc[1], 0.160505, places=6)
     self.assertTrue(ep.citations())
def magpie_feature(formula):
    data = [formula]
    df = pd.DataFrame(data, columns=["formula"])
    df["composition"] = df["formula"].transform(str_to_composition)
    ep_feat = ElementProperty.from_preset(preset_name="magpie")
    df = ep_feat.featurize_dataframe(df, col_id="composition")
    df.drop(labels=["composition"], axis=1, inplace=True)
    return df.iloc[0, 1:].to_numpy()
Exemple #16
0
def Magpie(formulas):
    if isinstance(formulas, str):
        formulas = [formulas]
    ep_feat = ElementProperty.from_preset(preset_name="magpie")
    df = pd.DataFrame({"formula": formulas})
    df["composition"] = df["formula"].transform(str_to_composition)
    df = ep_feat.featurize_dataframe(df, col_id="composition")
    df.drop(labels=["composition", "formula"], axis=1, inplace=True)
    return np.array(df).astype(np.float32)
Exemple #17
0
 def test_fere_corr(self):
     df_fere_corr = ElementProperty(features=["FERE correction"],
                                    stats=["minimum", "maximum", "range", "mean", "std_dev"],
                                    data_source="deml")\
         .featurize_dataframe(self.df, col_id="composition")
     self.assertAlmostEqual(df_fere_corr["minimum FERE correction"][0], -0.15213431610903)
     self.assertAlmostEqual(df_fere_corr["maximum FERE correction"][0], 0.23)
     self.assertAlmostEqual(df_fere_corr["range FERE correction"][0], 0.382134316)
     self.assertAlmostEqual(df_fere_corr["mean FERE correction"][0], 0.077146274)
     self.assertAlmostEqual(df_fere_corr["std_dev FERE correction"][0], 0.270209766)
Exemple #18
0
 def test_elem_matscholar_el(self):
     df_elem = ElementProperty.from_preset("matscholar_el").featurize_dataframe(self.df, col_id="composition")
     self.assertAlmostEqual(df_elem["range matscholar_el_149"].iloc[0],
                            0.06827970966696739)
     self.assertAlmostEqual(df_elem["range matscholar_el_149"].iloc[1],
                            0.06827970966696739)
     self.assertAlmostEqual(df_elem["mean matscholar_el_18"].iloc[0],
                            -0.020534400502219795)
     self.assertAlmostEqual(df_elem["mean matscholar_el_18"].iloc[1],
                            -0.02483355056028813)
Exemple #19
0
	def __init__(self,radius_type='ionic_radius',normalize_formula=False):
		self.radius_type = radius_type
		self.normalize_formula = normalize_formula
		self.ValenceOrbital = ValenceOrbital()
		self.AtomicOrbitals = AtomicOrbitalsMod()
		self.CohesiveEnergy = CohesiveEnergy()
		self.BandCenter = BandCenter()
		self.ValenceOrbitalEnergy = ValenceOrbitalEnergy()
		#custom ElementProperty featurizer
		elemental_properties = ['BoilingT', 'MeltingT',
			'BulkModulus', 'ShearModulus', 
			'Row', 'Column', 'Number', 'MendeleevNumber', 'SpaceGroupNumber',
			'Density','MolarVolume',
			'FusionEnthalpy','HeatVaporization',
			'Polarizability', 
			'ThermalConductivity']
		self.ElementProperty = ElementProperty(data_source='magpie',features=elemental_properties,
						  stats=["mean", "std_dev"])
		#check matminer featurizers
		self.check_matminer_featurizers()
Exemple #20
0
  def __init__(self, data_source: str = 'matminer'):
    """
    Parameters
    ----------
    data_source: str of "matminer", "magpie" or "deml" (default "matminer")
      Source for element property data.
    """
    try:
      from matminer.featurizers.composition import ElementProperty
    except ModuleNotFoundError:
      raise ImportError("This class requires matminer to be installed.")

    self.data_source = data_source
    self.ep_featurizer = ElementProperty.from_preset(self.data_source)
Exemple #21
0
def similarity(_parents, target):
    featurizer = MultipleFeaturizer([
        SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"),
        StructuralHeterogeneity(),
        ChemicalOrdering(),
        MaximumPackingEfficiency(),
        SiteStatsFingerprint.from_preset(
            "LocalPropertyDifference_ward-prb-2017"),
        StructureComposition(Stoichiometry()),
        StructureComposition(ElementProperty.from_preset("magpie")),
        StructureComposition(ValenceOrbital(props=["frac"])),
        StructureComposition(IonProperty(fast=True)),
    ])

    # HACK celery doesn't work with multiprocessing (used by matminer)
    try:
        from celery import current_task
        if current_task:
            featurizer.set_n_jobs(1)
    except ImportError:
        pass

    x_target = pd.DataFrame.from_records([featurizer.featurize(target)],
                                         columns=featurizer.feature_labels())
    x_parent = pd.DataFrame.from_records(
        featurizer.featurize_many(_parents, ignore_errors=True, pbar=False),
        columns=featurizer.feature_labels(),
    )
    nulls = x_parent[x_parent.isnull().any(axis=1)].index.values
    x_parent.fillna(100000, inplace=True)

    x_target = x_target.reindex(sorted(x_target.columns), axis=1)
    x_parent = x_parent.reindex(sorted(x_parent.columns), axis=1)

    with open(os.path.join(settings.rxn_files, "scaler2.pickle"), "rb") as f:
        scaler = pickle.load(f)
    with open(os.path.join(settings.rxn_files, "quantiles.pickle"), "rb") as f:
        quantiles = pickle.load(f)

    X = scaler.transform(x_parent.append(x_target))

    D = [pairwise_distances(np.array([row, X[-1]]))[0, 1] for row in X[:-1]]

    _res = []
    for d in D:
        _res.append(np.linspace(0, 1, 101)[np.abs(quantiles - d).argmin()])
    _res = np.array(_res)
    _res[nulls] = -1
    return _res
Exemple #22
0
    def __init__(self, materials, descriptors, **kwargs):
        """
        Calculates site-based descriptors (e.g., coordination numbers
        with different near-neighbor finding approaches) for materials and
        runs statistics analysis on selected descriptor types
        (order parameter-based site fingerprints).  The latter is
        useful as a definition of a structure fingerprint
        on the basis of local coordination information.
        Furthermore, composition descriptors are calculated
        (Magpie element property vector).

        Args:
            materials (Store): Store of materials documents.
            descriptors (Store): Store of composition, site, and
                                 structure descriptor data such
                                 as tetrahedral order parameter or
                                 fraction of being 8-fold coordinated.
            mat_query (dict): dictionary to limit materials to be analyzed.
        """

        self.materials = materials
        self.descriptors = descriptors

        # Set up all targeted site descriptors.
        self.sds = {}
        for nn in nn_target_classes:
            nn_ = getattr(local_env, nn)
            k = "cn_{}".format(nn)
            self.sds[k] = CoordinationNumber(nn_(), use_weights="none")
            k = "cn_wt_{}".format(nn)
            self.sds[k] = CoordinationNumber(nn_(), use_weights="sum")
        self.all_output_pieces = {"site_descriptors": [k for k in self.sds.keys()]}
        self.sds["csf"] = CrystalNNFingerprint.from_preset("ops",
                                                           distance_cutoffs=None,
                                                           x_diff_weight=None)
        self.all_output_pieces["statistics"] = ["csf"]

        # Set up all targeted composition descriptors.
        self.cds = {}
        self.cds["magpie"] = ElementProperty.from_preset("magpie")
        self.all_output_pieces["composition_descriptors"] = ["magpie"]

        self.all_output_pieces["meta"] = ["atomate"]

        super().__init__(source=materials,
                         target=descriptors,
                         ufn=self.calc,
                         projection=["structure"],
                         **kwargs)
 def test_elem_deml(self):
     df_elem_deml = ElementProperty.from_preset("deml").featurize_dataframe(
         self.df, col_id="composition")
     self.assertAlmostEqual(df_elem_deml["minimum atom_num"][0], 8)
     self.assertAlmostEqual(df_elem_deml["maximum atom_num"][0], 26)
     self.assertAlmostEqual(df_elem_deml["range atom_num"][0], 18)
     self.assertAlmostEqual(df_elem_deml["mean atom_num"][0], 15.2)
     self.assertAlmostEqual(df_elem_deml["std_dev atom_num"][0], 8.81816307)
     #Charge dependent property
     self.assertAlmostEqual(df_elem_deml["minimum magn_moment"][0], 0)
     self.assertAlmostEqual(df_elem_deml["maximum magn_moment"][0], 5.2)
     self.assertAlmostEqual(df_elem_deml["range magn_moment"][0], 5.2)
     self.assertAlmostEqual(df_elem_deml["mean magn_moment"][0], 2.08)
     self.assertAlmostEqual(df_elem_deml["std_dev magn_moment"][0],
                            2.547469332)
Exemple #24
0
    def test_composition_features(self):
        comp = ElementProperty.from_preset("magpie")
        f = StructureComposition(featurizer=comp)

        # Test the fitting (should not crash)
        f.fit([self.nacl, self.diamond])

        # Test the features
        features = f.featurize(self.nacl)
        self.assertArrayAlmostEqual(comp.featurize(self.nacl.composition),
                                    features)

        # Test the citations/implementors
        self.assertEqual(comp.citations(), f.citations())
        self.assertEqual(comp.implementors(), f.implementors())
Exemple #25
0
    def test_composition_features(self):
        comp = ElementProperty.from_preset("magpie")
        f = StructureComposition(featurizer=comp)

        # Test the fitting (should not crash)
        f.fit([self.nacl, self.diamond])

        # Test the features
        features = f.featurize(self.nacl)
        self.assertArrayAlmostEqual(comp.featurize(self.nacl.composition),
                                    features)

        # Test the citations/implementors
        self.assertEqual(comp.citations(), f.citations())
        self.assertEqual(comp.implementors(), f.implementors())
Exemple #26
0
    def __init__(self, materials, descriptors, mat_query=None, **kwargs):
        """
        Calculates site-based descriptors (e.g., coordination numbers
        with different near-neighbor finding approaches) for materials and
        runs statistics analysis on selected descriptor types
        (order parameter-based site fingerprints).  The latter is
        useful as a definition of a structure fingerprint
        on the basis of local coordination information.
        Furthermore, composition descriptors are calculated
        (Magpie element property vector).

        Args:
            materials (Store): Store of materials documents.
            descriptors (Store): Store of composition, site, and
                                 structure descriptor data such
                                 as tetrahedral order parameter or
                                 fraction of being 8-fold coordinated.
            mat_query (dict): dictionary to limit materials to be analyzed.
        """

        self.materials = materials
        self.descriptors = descriptors
        self.mat_query = mat_query if mat_query else {}

        # Set up all targeted site descriptors.
        self.sds = {}
        for nn in nn_target_classes:
            nn_ = getattr(pymatgen.analysis.local_env, nn)
            k = 'cn_{}'.format(nn)
            self.sds[k] = CoordinationNumber(nn_(), use_weights='none')
            k = 'cn_wt_{}'.format(nn)
            self.sds[k] = CoordinationNumber(nn_(), use_weights='sum')
        self.all_output_pieces = {
            'site_descriptors': [k for k in self.sds.keys()]
        }
        self.sds['csf'] = CrystalNNFingerprint.from_preset(
            'ops', distance_cutoffs=None, x_diff_weight=None)
        self.all_output_pieces['statistics'] = ['csf']

        # Set up all targeted composition descriptors.
        self.cds = {}
        self.cds["magpie"] = ElementProperty.from_preset('magpie')
        self.all_output_pieces['composition_descriptors'] = ['magpie']

        self.all_output_pieces['meta'] = ['atomate']

        super().__init__(sources=[materials], targets=[descriptors], **kwargs)
Exemple #27
0
def generate_data(name):
    #这个函数作用,输入是指定的文件名,输出增加了gaps,is_daoti,以及其他共计145特征的完整向量矩阵
    #name='test_plus_gaps.csv'
    df=pd.read_csv(name,index_col=[0])
    df['gaps']=-10.0   
    df_gap=pd.read_csv("gaps.csv",index_col = [0])
    print(df_gap.index)
    i=0    
    str_s=""
    for j in range(len(df_gap.index)):
        #先打印二者的id
       # print(df.index[i])
        str_s='mp-'+str(df_gap.index[j])
        if(str_s==df.index[i]):
            df.iloc[i,-1]=df_gap.iloc[j,0]
            i=i+1
            #print("确实一样") 
    print("合并完毕")

    #同样的方法我们来建立不同的分类
    df['is_daoti']=-2
    for i in range(len(df.index)):
        if(df.ix[i,-2]==0):
            df.ix[i,-1]=1
        else:
            df.ix[i,-1]=0
    print("分类feature建立完成")   
    
#首先使用describe获得对于数据的整体把握
    print(df.describe())
    df.describe().to_csv('general_look_jie.csv')
#通过观察数据发现并没有什么异常之处
    df=StrToComposition().featurize_dataframe(df,'full_formula',ignore_errors=True)
    print(df.head())   
    #print(df['composition'])
    ep_feat=ElementProperty.from_preset(preset_name='magpie')
    df=ep_feat.featurize_dataframe(df,col_id='composition',ignore_errors=True)#将composition这一列作为特征化的输入
    print(df.head())
    #print(ep_feat.citations())
    #df.to_csv("plus the composition.csv")
    #以上这部分是将formula转化为composition并转化feature

    df=CompositionToOxidComposition().featurize_dataframe(df,col_id='composition')#引入了氧化态的相关特征
    os_feat=OxidationStates()
    df=os_feat.featurize_dataframe(df,col_id='composition_oxid')
    new_name='2d_vector_plus.csv'
    df.to_csv(new_name)
def generate_data():
    df = load_elastic_tensor()
    df.to_csv('原始elastic数据.csv')
    print(df.columns)

    unwanted_columns = [
        'volume', 'nsites', 'compliance_tensor', 'elastic_tensor',
        'elastic_tensor_original', 'K_Voigt', 'G_Voigt', 'K_Reuss', 'G_Reuss'
    ]
    df = df.drop(unwanted_columns, axis=1)
    print(df.head())
    df.to_csv('扔掉不需要的部分.csv')

    #首先使用describe获得对于数据的整体把握
    print(df.describe())
    df.describe().to_csv('general_look.csv')
    #通过观察数据发现并没有什么异常之处
    df = StrToComposition().featurize_dataframe(df, 'formula')
    print(df.head())
    df.to_csv('引入composition.csv')

    #下一步,我们需要其中一个特征化来增加一系列的特征算符
    ep_feat = ElementProperty.from_preset(preset_name='magpie')
    df = ep_feat.featurize_dataframe(
        df, col_id='composition')  #将composition这一列作为特征化的输入
    print(df.head())
    print(ep_feat.citations())
    df.to_csv('将composition特征化后.csv')

    #开始引入新的特征化算符吧
    df = CompositionToOxidComposition().featurize_dataframe(
        df, 'composition')  #引入了氧化态的相关特征
    os_feat = OxidationStates()
    df = os_feat.featurize_dataframe(df, col_id='composition_oxid')
    print(df.head())
    df.to_csv('引入氧化态之后.csv')

    #其实除了基于composition的特征之外还有很多其他的,比如基于结构的
    df_feat = DensityFeatures()
    df = df_feat.featurize_dataframe(df, 'structure')
    print(df.head())
    df.to_csv('引入结构中的密度.csv')
    print(df_feat.feature_labels())
def test_featurizers():
    df = pd.read_csv('test.csv', index_col=[0])
    df = StrToComposition().featurize_dataframe(df, 'formula')
    print(df.head())
    #下一步,我们需要其中一个特征化来增加一系列的特征算符
    ep_feat = ElementProperty.from_preset(preset_name='magpie')
    df = ep_feat.featurize_dataframe(
        df, col_id='composition')  #将composition这一列作为特征化的输入
    print(df.head())
    print(ep_feat.citations())
    #df.to_csv('将composition特征化后.csv')

    #开始引入新的特征化算符吧
    df = CompositionToOxidComposition().featurize_dataframe(
        df, 'composition')  #引入了氧化态的相关特征
    os_feat = OxidationStates()
    df = os_feat.featurize_dataframe(df, col_id='composition_oxid')
    print(df.head())
    df.to_csv('after_test.csv')
Exemple #30
0
    def test_exclude_by_users(self):
        """
        Test custom args for featurizers to use.
        """
        df = copy.copy(self.test_df.iloc[:self.limit])
        target = "K_VRH"
        exclude = ["ElementProperty"]

        ep = ElementProperty.from_preset("matminer")
        ep_feats = ep.feature_labels()

        # Test to make sure excluded does not show up
        af = AutoFeaturizer(exclude=exclude, preset="fast")
        af.fit(df, target)
        df = af.fit_transform(df, target)

        self.assertTrue(af.auto_featurizer)
        self.assertIn("ElementProperty", af.exclude)
        self.assertFalse(any([f in df.columns for f in ep_feats]))
Exemple #31
0
 def test_elem_megnet_el(self):
     ep = ElementProperty.from_preset("megnet_el")
     df_elem = ep.featurize_dataframe(self.df, col_id="composition")
     self.assertAlmostEqual(
         df_elem["MEGNetElementData maximum embedding 1"].iloc[0],
         0.127333,
         places=6)
     self.assertAlmostEqual(
         df_elem["MEGNetElementData maximum embedding 1"].iloc[1],
         0.127333,
         places=6)
     self.assertAlmostEqual(
         df_elem["MEGNetElementData maximum embedding 11"].iloc[0],
         0.160505,
         places=6)
     self.assertAlmostEqual(
         df_elem["MEGNetElementData maximum embedding 11"].iloc[1],
         0.160505,
         places=6)
     self.assertTrue(ep.citations())
Exemple #32
0
	def __init__(self,normalize_formula=False):
		self.normalize_formula = normalize_formula
		# don't need ValenceOrbital - valence counts etc. covered in ElementProperty.from_preset('magpie')
		# self.ValenceOrbital = ValenceOrbital()
		self.AtomicOrbitals = AtomicOrbitalsMod()
		self.CohesiveEnergy = CohesiveEnergy()
		self.BandCenter = BandCenter()
		self.ValenceOrbitalEnergy = ValenceOrbitalEnergy()
		# ElementProperty featurizer with magpie properties plus additional properties
		self.ElementProperty = ElementProperty.from_preset('magpie')
		self.ElementProperty.features += ['BoilingT', 
					'BulkModulus', 'ShearModulus', 
					'Density','MolarVolume',
					'FusionEnthalpy','HeatVaporization',
					'Polarizability', 
					'ThermalConductivity']
		# range, min, max are irrelevant inside the ternary
		# self.ElementProperty.stats = ['mean', 'avg_dev','mode']

		# check matminer featurizers
		self.check_matminer_featurizers()
Exemple #33
0
    def test_featurizers_by_users(self):
        df = copy.copy(self.test_df.iloc[:self.limit])
        target = "K_VRH"

        dn = DensityFeatures()
        gsf = GlobalSymmetryFeatures()
        featurizers = {"structure": [dn, gsf]}

        af = AutoFeaturizer(featurizers=featurizers)
        df = af.fit_transform(df, target)

        # Ensure that the featurizers are not set automatically, metaselection
        # is not used, exclude is None and featurizers not passed by the users
        # are not used.
        self.assertFalse(af.auto_featurizer)
        self.assertTrue(af.exclude == [])
        self.assertIn(dn, af.featurizers["structure"])
        self.assertIn(gsf, af.featurizers["structure"])
        ep = ElementProperty.from_preset("matminer")
        ep_feats = ep.feature_labels()
        self.assertFalse(any([f in df.columns for f in ep_feats]))