コード例 #1
0
ファイル: test_conversions.py プロジェクト: wcreus/matminer
    def test_composition_to_oxidcomposition(self):
        df = DataFrame(data={"composition": [Composition("Fe2O3")]})
        cto = CompositionToOxidComposition()
        df = cto.featurize_dataframe(df, 'composition')
        self.assertEqual(df["composition_oxid"].tolist()[0],
                         Composition({
                             "Fe3+": 2,
                             "O2-": 3
                         }))

        # test error handling
        df = DataFrame(data={"composition": [Composition("Fe2O3")]})
        cto = CompositionToOxidComposition(return_original_on_error=False,
                                           max_sites=2)
        self.assertRaises(ValueError, cto.featurize_dataframe, df,
                          'composition')

        # check non oxi state structure returned correctly
        cto = CompositionToOxidComposition(return_original_on_error=True,
                                           max_sites=2)
        df = cto.featurize_dataframe(df, 'composition')
        self.assertEqual(df["composition_oxid"].tolist()[0],
                         Composition({
                             "Fe": 2,
                             "O": 3
                         }))
コード例 #2
0
def composition_featurizer(df_input: pd.DataFrame, **kwargs) -> pd.DataFrame:
    """Return a Pandas DataFrame with all compositional features"""

    # generate the "composition" column
    df_comp = StrToComposition().featurize_dataframe(df_input,
                                                     col_id="Compound")
    # generate features based on elemental properites
    ep_featurizer = ElementProperty.from_preset(preset_name="magpie")
    ep_featurizer.featurize_dataframe(df_comp,
                                      col_id="composition",
                                      inplace=True)
    # generate the "composition_oxid" column based on guessed oxidation states
    CompositionToOxidComposition(
        return_original_on_error=True, **kwargs).featurize_dataframe(
            # ignore errors from non-integer stoichiometries
            df_comp,
            "composition",
            ignore_errors=True,
            inplace=True)
    # correct oxidation states
    df_comp = correct_comp_oxid(df_comp)
    # generate features based on oxidation states
    os_featurizer = OxidationStates()
    os_featurizer.featurize_dataframe(df_comp,
                                      "composition_oxid",
                                      ignore_errors=True,
                                      inplace=True)
    # remove compounds with predicted oxidation states of 0
    return df_comp[df_comp["minimum oxidation state"] != 0]
コード例 #3
0
ファイル: preprocessing.py プロジェクト: wwchung91/modnet
def featurize_composition(df: pd.DataFrame) -> pd.DataFrame:
    """ Decorate input `pandas.DataFrame` of structures with composition
    features from matminer.

    Currently applies the set of all matminer composition features.

    Args:
        df (pandas.DataFrame): the input dataframe with `"structure"`
            column containing `pymatgen.Structure` objects.

    Returns:
        pandas.DataFrame: the decorated DataFrame.

    """
    logging.info("Applying composition featurizers...")
    df = df.copy()
    df['composition'] = df['structure'].apply(lambda s: s.composition)
    featurizer = MultipleFeaturizer([ElementProperty.from_preset("magpie"),
                                     AtomicOrbitals(),
                                     BandCenter(),
                                     # ElectronAffinity(), - This descriptor was not used in the paper preset
                                     Stoichiometry(),
                                     ValenceOrbital(),
                                     IonProperty(),
                                     ElementFraction(),
                                     TMetalFraction(),
                                     # CohesiveEnergy(), - This descriptor was not used in the paper preset
                                     Miedema(),
                                     YangSolidSolution(),
                                     AtomicPackingEfficiency(),
                                     ])

    df = featurizer.featurize_dataframe(df, "composition", multiindex=True, ignore_errors=True)
    df.columns = df.columns.map('|'.join).str.strip('|')

    ox_featurizer = MultipleFeaturizer([OxidationStates(),
                                        ElectronegativityDiff()
                                        ])

    df = CompositionToOxidComposition().featurize_dataframe(df, "Input Data|composition")

    df = ox_featurizer.featurize_dataframe(df, "composition_oxid", multiindex=True, ignore_errors=True)
    df = df.rename(columns={'Input Data': ''})
    df.columns = df.columns.map('|'.join).str.strip('|')

    _orbitals = {"s": 1, "p": 2, "d": 3, "f": 4}

    df['AtomicOrbitals|HOMO_character'] = df['AtomicOrbitals|HOMO_character'].map(_orbitals)
    df['AtomicOrbitals|LUMO_character'] = df['AtomicOrbitals|LUMO_character'].map(_orbitals)

    df['AtomicOrbitals|HOMO_element'] = df['AtomicOrbitals|HOMO_element'].apply(
        lambda x: -1 if not isinstance(x, str) else Element(x).Z
    )
    df['AtomicOrbitals|LUMO_element'] = df['AtomicOrbitals|LUMO_element'].apply(
        lambda x: -1 if not isinstance(x, str) else Element(x).Z
    )

    df = df.replace([np.inf, -np.inf, np.nan], 0)

    return clean_df(df)
コード例 #4
0
ファイル: featurizers.py プロジェクト: ppdebreuck/modnet
    def featurize_composition(self, df: pd.DataFrame) -> pd.DataFrame:
        """Decorate input `pandas.DataFrame` of structures with composition
        features from matminer, specified by the MODFeaturizer preset.

        Currently applies the set of all matminer composition features.

        Arguments:
            df: the input dataframe with a `"structure"` column
                containing `pymatgen.Structure` objects.

        Returns:
            pandas.DataFrame: the decorated DataFrame, or an empty
                DataFrame if no composition/oxidation featurizers
                exist for this class.

        """

        df = df.copy()

        if self.composition_featurizers:

            LOG.info("Applying composition featurizers...")
            df["composition"] = df["structure"].apply(lambda s: s.composition)
            df = self._fit_apply_featurizers(
                df, self.composition_featurizers, "composition"
            )
            df = df.rename(columns={"Input Data": ""})
            df.columns = df.columns.map("|".join).str.strip("|")

        if self.oxid_composition_featurizers:
            LOG.info("Applying oxidation state featurizers...")
            if getattr(self, "fast_oxid", False):
                df = CompositionToOxidComposition(
                    all_oxi_states=False, max_sites=-1
                ).featurize_dataframe(df, "composition")
            else:
                df = CompositionToOxidComposition().featurize_dataframe(
                    df, "composition"
                )
            df = self._fit_apply_featurizers(
                df, self.oxid_composition_featurizers, "composition_oxid"
            )
            df = df.rename(columns={"Input Data": ""})
            df.columns = df.columns.map("|".join).str.strip("|")

        return df
コード例 #5
0
 def test_composition_to_oxidcomposition(self):
     df = DataFrame(data={"composition": [Composition("Fe2O3")]})
     cto = CompositionToOxidComposition()
     df = cto.featurize_dataframe(df, 'composition')
     self.assertEqual(df["composition_oxid"].tolist()[0],
                      Composition({
                          "Fe3+": 2,
                          "O2-": 3
                      }))
コード例 #6
0
ファイル: test_composition.py プロジェクト: FilipchukB/P1
    def test_miedema_all(self):
        df = pd.DataFrame({
            "composition": [
                Composition("TiZr"),
                Composition("Mg10Cu50Ca40"),
                Composition("Fe2O3")
            ]
        })
        miedema = Miedema(struct_types='all')
        self.assertTrue(miedema.precheck(df["composition"].iloc[0]))
        self.assertFalse(miedema.precheck(df["composition"].iloc[-1]))
        self.assertAlmostEqual(miedema.precheck_dataframe(df, "composition"),
                               2 / 3)

        # test precheck for oxidation-state decorated compositions
        df = CompositionToOxidComposition(return_original_on_error=True).\
            featurize_dataframe(df, 'composition')
        self.assertTrue(miedema.precheck(df["composition_oxid"].iloc[0]))
        self.assertFalse(miedema.precheck(df["composition_oxid"].iloc[-1]))
        self.assertAlmostEqual(
            miedema.precheck_dataframe(df, "composition_oxid"), 2 / 3)

        mfps = miedema.featurize_dataframe(df, col_id="composition")
        self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][0],
                               -0.003445022152)
        self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][0], 0.0707658836300)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755)

        self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][1],
                               -0.235125978427)
        self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][1], -0.164541848271)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][1],
                               -0.05280843311)

        self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_inter'][2]),
                               True)
        self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_amor'][2]),
                               True)
        self.assertAlmostEqual(math.isnan(mfps['Miedema_deltaH_ss_min'][2]),
                               True)

        # make sure featurization works equally for compositions with or without
        # oxidation states
        mfps = miedema.featurize_dataframe(df, col_id="composition_oxid")
        self.assertAlmostEqual(mfps['Miedema_deltaH_inter'][0],
                               -0.003445022152)
        self.assertAlmostEqual(mfps['Miedema_deltaH_amor'][0], 0.0707658836300)
        self.assertAlmostEqual(mfps['Miedema_deltaH_ss_min'][0], 0.03663599755)
コード例 #7
0
def generate_data(name):
    #这个函数作用,输入是指定的文件名,输出增加了gaps,is_daoti,以及其他共计145特征的完整向量矩阵
    #name='test_plus_gaps.csv'
    df=pd.read_csv(name,index_col=[0])
    df['gaps']=-10.0   
    df_gap=pd.read_csv("gaps.csv",index_col = [0])
    print(df_gap.index)
    i=0    
    str_s=""
    for j in range(len(df_gap.index)):
        #先打印二者的id
       # print(df.index[i])
        str_s='mp-'+str(df_gap.index[j])
        if(str_s==df.index[i]):
            df.iloc[i,-1]=df_gap.iloc[j,0]
            i=i+1
            #print("确实一样") 
    print("合并完毕")

    #同样的方法我们来建立不同的分类
    df['is_daoti']=-2
    for i in range(len(df.index)):
        if(df.ix[i,-2]==0):
            df.ix[i,-1]=1
        else:
            df.ix[i,-1]=0
    print("分类feature建立完成")   
    
#首先使用describe获得对于数据的整体把握
    print(df.describe())
    df.describe().to_csv('general_look_jie.csv')
#通过观察数据发现并没有什么异常之处
    df=StrToComposition().featurize_dataframe(df,'full_formula',ignore_errors=True)
    print(df.head())   
    #print(df['composition'])
    ep_feat=ElementProperty.from_preset(preset_name='magpie')
    df=ep_feat.featurize_dataframe(df,col_id='composition',ignore_errors=True)#将composition这一列作为特征化的输入
    print(df.head())
    #print(ep_feat.citations())
    #df.to_csv("plus the composition.csv")
    #以上这部分是将formula转化为composition并转化feature

    df=CompositionToOxidComposition().featurize_dataframe(df,col_id='composition')#引入了氧化态的相关特征
    os_feat=OxidationStates()
    df=os_feat.featurize_dataframe(df,col_id='composition_oxid')
    new_name='2d_vector_plus.csv'
    df.to_csv(new_name)
コード例 #8
0
def generate_data():
    df = load_elastic_tensor()
    df.to_csv('原始elastic数据.csv')
    print(df.columns)

    unwanted_columns = [
        'volume', 'nsites', 'compliance_tensor', 'elastic_tensor',
        'elastic_tensor_original', 'K_Voigt', 'G_Voigt', 'K_Reuss', 'G_Reuss'
    ]
    df = df.drop(unwanted_columns, axis=1)
    print(df.head())
    df.to_csv('扔掉不需要的部分.csv')

    #首先使用describe获得对于数据的整体把握
    print(df.describe())
    df.describe().to_csv('general_look.csv')
    #通过观察数据发现并没有什么异常之处
    df = StrToComposition().featurize_dataframe(df, 'formula')
    print(df.head())
    df.to_csv('引入composition.csv')

    #下一步,我们需要其中一个特征化来增加一系列的特征算符
    ep_feat = ElementProperty.from_preset(preset_name='magpie')
    df = ep_feat.featurize_dataframe(
        df, col_id='composition')  #将composition这一列作为特征化的输入
    print(df.head())
    print(ep_feat.citations())
    df.to_csv('将composition特征化后.csv')

    #开始引入新的特征化算符吧
    df = CompositionToOxidComposition().featurize_dataframe(
        df, 'composition')  #引入了氧化态的相关特征
    os_feat = OxidationStates()
    df = os_feat.featurize_dataframe(df, col_id='composition_oxid')
    print(df.head())
    df.to_csv('引入氧化态之后.csv')

    #其实除了基于composition的特征之外还有很多其他的,比如基于结构的
    df_feat = DensityFeatures()
    df = df_feat.featurize_dataframe(df, 'structure')
    print(df.head())
    df.to_csv('引入结构中的密度.csv')
    print(df_feat.feature_labels())
コード例 #9
0
def test_featurizers():
    df = pd.read_csv('test.csv', index_col=[0])
    df = StrToComposition().featurize_dataframe(df, 'formula')
    print(df.head())
    #下一步,我们需要其中一个特征化来增加一系列的特征算符
    ep_feat = ElementProperty.from_preset(preset_name='magpie')
    df = ep_feat.featurize_dataframe(
        df, col_id='composition')  #将composition这一列作为特征化的输入
    print(df.head())
    print(ep_feat.citations())
    #df.to_csv('将composition特征化后.csv')

    #开始引入新的特征化算符吧
    df = CompositionToOxidComposition().featurize_dataframe(
        df, 'composition')  #引入了氧化态的相关特征
    os_feat = OxidationStates()
    df = os_feat.featurize_dataframe(df, col_id='composition_oxid')
    print(df.head())
    df.to_csv('after_test.csv')
コード例 #10
0
    def featurize_composition(self, df: pd.DataFrame) -> pd.DataFrame:
        """ Decorate input `pandas.DataFrame` of structures with composition
        features from matminer, specified by the MODFeaturizer preset.

        Currently applies the set of all matminer composition features.

        Arguments:
            df: the input dataframe with a `"structure"` column
                containing `pymatgen.Structure` objects.

        Returns:
            pandas.DataFrame: the decorated DataFrame, or an empty
                DataFrame if no composition/oxidation featurizers
                exist for this class.

        """

        if not (self.composition_featurizers
                or self.oxide_composition_featurizers):
            return pd.DataFrame([])

        df = df.copy()

        if self.composition_featurizers:
            logging.info("Applying composition featurizers...")
            df['composition'] = df['structure'].apply(lambda s: s.composition)
            df = self._fit_apply_featurizers(df, self.composition_featurizers,
                                             "composition")
            df = df.replace([np.inf, -np.inf, np.nan], 0)
            df = df.rename(columns={'Input Data': ''})
            df.columns = df.columns.map('|'.join).str.strip('|')

        if self.oxide_composition_featurizers:
            logging.info("Applying oxidation state featurizers...")
            df = CompositionToOxidComposition().featurize_dataframe(
                df, "composition")
            df = self._fit_apply_featurizers(
                df, self.oxide_composition_featurizers, "composition_oxid")
            df = df.rename(columns={'Input Data': ''})
            df.columns = df.columns.map('|'.join).str.strip('|')

        return df
コード例 #11
0
ファイル: test_composition.py プロジェクト: FilipchukB/P1
    def test_yang(self):
        comps = list(
            map(Composition, [
                "ZrHfTiCuNi", "CuNi", "CoCrFeNiCuAl0.3", "CoCrFeNiCuAl", "LaO3"
            ]))

        # Run the featurization
        feat = YangSolidSolution()

        df = pd.DataFrame({"composition": comps})
        self.assertFalse(feat.precheck(df["composition"].iloc[-1]))
        self.assertAlmostEqual(feat.precheck_dataframe(df, "composition"),
                               0.8,
                               places=2)

        # test precheck for oxidation-state decorated compositions
        df = CompositionToOxidComposition(return_original_on_error=True). \
            featurize_dataframe(df, 'composition')
        self.assertFalse(feat.precheck(df["composition_oxid"].iloc[-1]))
        self.assertAlmostEqual(feat.precheck_dataframe(df, "composition_oxid"),
                               0.8,
                               places=2)

        feat.set_n_jobs(1)
        features = feat.featurize_many(comps)

        # Check the results
        #  These are compared to results from the original paper,
        #   except for CoCrFeNiCuAl0.3, where the paper reports a value
        #   exactly 1/10th of what I compute using Excel and matminer
        # I use a high tolerance because matminer uses a different source
        #   of radii than the original paper (do not have Kittel's atomic
        #   radii available)
        self.assertEqual((5, 2), np.array(features).shape)
        self.assertArrayAlmostEqual([0.95, 0.1021], features[0], decimal=2)
        self.assertArrayAlmostEqual([2.22, 0.0], features[1], decimal=2)
        self.assertArrayAlmostEqual([158.5, 0.0315], features[2], decimal=1)
        self.assertArrayAlmostEqual([5.06, 0.0482], features[3], decimal=1)
コード例 #12
0
import numpy as np
np.savez_compressed("heusler_all.npz", data=data_3)

# In[ ]:

# Featurization
# This part is done with reference to the matiner examples
from matminer.featurizers.composition import ElementProperty

ep_feat = ElementProperty.from_preset(preset_name="magpie")
data_3 = ep_feat.featurize_dataframe(data_3, col_id="composition")

from matminer.featurizers.conversions import CompositionToOxidComposition
from matminer.featurizers.composition import OxidationStates

data_3 = CompositionToOxidComposition().featurize_dataframe(
    data_3, "composition")

os_feat = OxidationStates()
data_3 = os_feat.featurize_dataframe(data_3, "composition_oxid")

from matminer.featurizers.structure import DensityFeatures

df_feat = DensityFeatures()
data_3 = df_feat.featurize_dataframe(data_3, "structure")

unwanted_columns = [
    "elasticity", "material_id", "nsites", "compliance_tensor",
    "elastic_tensor", "elastic_tensor_original", "K_Voigt", "G_Voigt",
    "K_Reuss", "G_Reuss", "warnings"
]
data_4 = data_3.drop(unwanted_columns, axis=1)
コード例 #13
0
ファイル: predict_RF.py プロジェクト: alvarotb/scm2020
#Block 1 - Loading dataframe
'''
# arbitrary inputs - Li must be excluded to ensure consistency
data = [['mp-1025496', 'Nb1 Se2'], ['mp-977563', 'Nb1 Ir2'],
        ['mp-864631', 'Nb1 Rh2'], ['mp-3368', 'Nb3 O8']]

fdf = pd.DataFrame(data, columns=['Id', 'Reduced Formula'])

## Initial conversion to matminer objects
from matminer.featurizers.conversions import StrToComposition

fdf = StrToComposition().featurize_dataframe(fdf, 'Reduced Formula')

from matminer.featurizers.conversions import CompositionToOxidComposition

fdf = CompositionToOxidComposition().featurize_dataframe(fdf, 'composition')

print("The initial dataset has {}".format(fdf.shape))
print(fdf.head())
'''
Block 2 - Featurization
'''
#
# -- start F1
from matminer.featurizers.composition import ElementProperty

ep_feat = ElementProperty.from_preset(preset_name='magpie')
fdf = ep_feat.featurize_dataframe(fdf,
                                  col_id='composition',
                                  ignore_errors=True)
                    "elastic_tensor_original", "K_Voigt", "G_Voigt", "K_Reuss", "G_Reuss"]
df = df.drop(unwanted_columns, axis=1)

from matminer.featurizers.conversions import StrToComposition

df = StrToComposition().featurize_dataframe(df, 'formula')

from matminer.featurizers.composition import ElementProperty

ep_feat = ElementProperty.from_preset(preset_name="magpie")
df = ep_feat.featurize_dataframe(df, col_id='composition')

from matminer.featurizers.conversions import CompositionToOxidComposition
from matminer.featurizers.composition import OxidationStates

df = CompositionToOxidComposition().featurize_dataframe(df, "composition")

os_feat = OxidationStates()
df = os_feat.featurize_dataframe(df, "composition_oxid")

from matminer.featurizers.structure import DensityFeatures

df_feat = DensityFeatures()
df = df_feat.featurize_dataframe(df, col_id='structure')

y = df['K_VRH'].values
excluded = ["G_VRH", "K_VRH", "elastic_anisotropy", "formula", "material_id",
            "poisson_ratio", "structure", "composition", "composition_oxid"]
X = df.drop(excluded, axis=1)
print("There are {} possible descriptors:\n\n{}".format(X.shape[1], X.columns.values))
                    'cif', 'kpoint_density', 'poscar']
df = df.drop(unwanted_columns, axis=1)

from matminer.featurizers.conversions import StrToComposition

sc_feat = StrToComposition()
df = sc_feat.featurize_dataframe(df, col_id='formula')

from matminer.featurizers.composition import ElementProperty

ep_feat = ElementProperty.from_preset(preset_name='magpie')
df = ep_feat.featurize_dataframe(df, col_id='composition')

from matminer.featurizers.conversions import CompositionToOxidComposition

co_feat = CompositionToOxidComposition()
df = co_feat.featurize_dataframe(df, col_id='composition')

from matminer.featurizers.composition import OxidationStates

os_feat = OxidationStates()
df = os_feat.featurize_dataframe(df, col_id='composition_oxid')

from matminer.featurizers.structure import DensityFeatures

df_feat = DensityFeatures()
df = df_feat.featurize_dataframe(df, col_id='structure')

"""
formula, structure, elastic_anisotropy, G_Reuss, G_VRH, G_Voigt, K_Reuss, K_VRH, K_Voigt,
poisson_ratio, compliance_tensor, elastic_tensor, elastic_tensor_original, composition
コード例 #16
0
ファイル: core.py プロジェクト: kmu/automatminer
    def _tidy_column(self, df, featurizer_type):
        """
        Various conversions to homogenize columns for featurization input.
        For example, take a column of compositions and ensure they are decorated
        with oxidation states, are not strings, etc.

        Args:
            df (pandas.DataFrame)
            featurizer_type: The key defining the featurizer input. For example,
                composition featurizers should have featurizer_type of
                "composition".

        Returns:
            df (pandas.DataFrame): DataFrame with featurizer_type column
                ready for featurization.
        """
        # todo: Make the following conversions more robust (no [0] type checking)
        type_tester = df[featurizer_type].iloc[0]

        if featurizer_type == self.composition_col:
            # Convert formulas to composition objects
            if isinstance(type_tester, str):
                self.logger.info(
                    self._log_prefix +
                    "Compositions detected as strings. Attempting "
                    "conversion to Composition objects...")
                stc = StrToComposition(overwrite_data=True,
                                       target_col_id=featurizer_type)
                df = stc.featurize_dataframe(df,
                                             featurizer_type,
                                             multiindex=self.multiindex,
                                             ignore_errors=True,
                                             inplace=False)

            elif isinstance(type_tester, dict):
                self.logger.info(self._log_prefix +
                                 "Compositions detected as dicts. Attempting "
                                 "conversion to Composition objects...")
                df[featurizer_type] = [
                    Composition.from_dict(d) for d in df[featurizer_type]
                ]

            # Convert non-oxidstate containing comps to oxidstate comps
            if self.guess_oxistates:
                self.logger.info(
                    self._log_prefix +
                    "Guessing oxidation states of compositions, as"
                    " they were not present in input.")
                cto = CompositionToOxidComposition(
                    target_col_id=featurizer_type,
                    overwrite_data=True,
                    return_original_on_error=True,
                    max_sites=-50)
                try:
                    df = cto.featurize_dataframe(df,
                                                 featurizer_type,
                                                 multiindex=self.multiindex,
                                                 inplace=False)
                except Exception as e:
                    self.logger.info(self._log_prefix +
                                     "Could not decorate oxidation states due "
                                     "to {}. Excluding featurizers based on "
                                     "composition oxistates".format(e))
                    classes_require_oxi = [
                        c.__class__.__name__
                        for c in CompositionFeaturizers().need_oxi
                    ]
                    self.exclude.extend(classes_require_oxi)

        else:
            # Convert structure/bs/dos dicts to objects (robust already)
            if isinstance(type_tester, (dict, str)):
                self.logger.info(self._log_prefix.capitalize() +
                                 "{} detected as string or dict. Attempting "
                                 "conversion to {} objects..."
                                 "".format(featurizer_type, featurizer_type))
                if isinstance(type_tester, str):
                    raise ValueError("{} column is type {}. Cannot convert."
                                     "".format(featurizer_type,
                                               type(type_tester)))
                dto = DictToObject(overwrite_data=True,
                                   target_col_id=featurizer_type)
                df = dto.featurize_dataframe(df,
                                             featurizer_type,
                                             inplace=False)

                # Decorate with oxidstates
                if featurizer_type == self.structure_col and \
                        self.guess_oxistates:
                    self.logger.info(
                        self._log_prefix +
                        "Guessing oxidation states of structures if they were "
                        "not present in input.")
                    sto = StructureToOxidStructure(
                        target_col_id=featurizer_type,
                        overwrite_data=True,
                        return_original_on_error=True,
                        max_sites=-50)
                    try:
                        df = sto.featurize_dataframe(
                            df,
                            featurizer_type,
                            multiindex=self.multiindex,
                            inplace=False)
                    except Exception as e:
                        self.logger.info(
                            self._log_prefix +
                            "Could not decorate oxidation states on structures "
                            "due to {}.".format(e))
        return df
コード例 #17
0
    #
    ##structural heterogeneity
    #from matminer.featurizers.structure import StructuralHeterogeneity
    #structural_heterogeneity = StructuralHeterogeneity()
    #structural_heterogeneity.set_n_jobs(28)
    #labels.append(structural_heterogeneity.feature_labels())
    #df  = structural_heterogeneity.featurize_dataframe(df, 'structures',ignore_errors=False)

    #convert structure to composition
    from matminer.featurizers.conversions import StructureToComposition
    structures_to_compositions = StructureToComposition()
    df = structures_to_compositions.featurize_dataframe(df, 'structures')

    #convert composition to oxidcomposition
    from matminer.featurizers.conversions import CompositionToOxidComposition
    OxidCompositions = CompositionToOxidComposition()
    print(OxidCompositions.feature_labels())
    df = OxidCompositions.featurize_dataframe(df, 'composition')

    #CohesiveEnergy
    from matminer.featurizers.composition import CohesiveEnergy
    cohesive_energy = CohesiveEnergy()
    cohesive_energy.set_n_jobs(28)
    labels.append(cohesive_energy.feature_labels())
    df = cohesive_energy.featurize_dataframe(df,
                                             'composition',
                                             ignore_errors=True)

    #ValenceOrbital
    from matminer.featurizers.composition import ValenceOrbital
    valence_orbital = ValenceOrbital()
コード例 #18
0
def AddFeatures(df):  # Add features by Matminer
    from matminer.featurizers.conversions import StrToComposition
    df = StrToComposition().featurize_dataframe(df, "formula")

    from matminer.featurizers.composition import ElementProperty

    ep_feat = ElementProperty.from_preset(preset_name="magpie")
    df = ep_feat.featurize_dataframe(
        df, col_id="composition"
    )  # input the "composition" column to the featurizer

    from matminer.featurizers.conversions import CompositionToOxidComposition
    from matminer.featurizers.composition import OxidationStates

    df = CompositionToOxidComposition().featurize_dataframe(df, "composition")

    os_feat = OxidationStates()
    df = os_feat.featurize_dataframe(df, "composition_oxid")

    from matminer.featurizers.composition import ElectronAffinity

    ea_feat = ElectronAffinity()
    df = ea_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import BandCenter

    bc_feat = BandCenter()
    df = bc_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import CohesiveEnergy

    ce_feat = CohesiveEnergy()
    df = ce_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import Miedema

    m_feat = Miedema()
    df = m_feat.featurize_dataframe(df, "composition_oxid", ignore_errors=True)

    from matminer.featurizers.composition import TMetalFraction

    tmf_feat = TMetalFraction()
    df = tmf_feat.featurize_dataframe(df,
                                      "composition_oxid",
                                      ignore_errors=True)

    from matminer.featurizers.composition import ValenceOrbital

    vo_feat = ValenceOrbital()
    df = vo_feat.featurize_dataframe(df,
                                     "composition_oxid",
                                     ignore_errors=True)

    from matminer.featurizers.composition import YangSolidSolution

    yss_feat = YangSolidSolution()
    df = yss_feat.featurize_dataframe(df,
                                      "composition_oxid",
                                      ignore_errors=True)

    from matminer.featurizers.structure import GlobalSymmetryFeatures

    # This is the border between compositional features and structural features. Comment out the following featurizers to use only compostional features.

    gsf_feat = GlobalSymmetryFeatures()
    df = gsf_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import StructuralComplexity
    sc_feat = StructuralComplexity()
    df = sc_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import ChemicalOrdering
    co_feat = ChemicalOrdering()
    df = co_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import MaximumPackingEfficiency
    mpe_feat = MaximumPackingEfficiency()
    df = mpe_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import MinimumRelativeDistances
    mrd_feat = MinimumRelativeDistances()
    df = mrd_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import StructuralHeterogeneity
    sh_feat = StructuralHeterogeneity()
    df = sh_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import SiteStatsFingerprint

    from matminer.featurizers.site import AverageBondLength
    from pymatgen.analysis.local_env import CrystalNN
    bl_feat = SiteStatsFingerprint(
        AverageBondLength(CrystalNN(search_cutoff=20)))
    df = bl_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import AverageBondAngle
    ba_feat = SiteStatsFingerprint(
        AverageBondAngle(CrystalNN(search_cutoff=20)))
    df = ba_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import BondOrientationalParameter
    bop_feat = SiteStatsFingerprint(BondOrientationalParameter())
    df = bop_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.site import CoordinationNumber
    cn_feat = SiteStatsFingerprint(CoordinationNumber())
    df = cn_feat.featurize_dataframe(df, "structure", ignore_errors=True)

    from matminer.featurizers.structure import DensityFeatures
    df_feat = DensityFeatures()
    df = df_feat.featurize_dataframe(df, "structure", ignore_errors=True)
    return (df)