def test_global_symmetry(self): gsf = GlobalSymmetryFeatures() self.assertEqual(gsf.featurize(self.diamond), [227, "cubic", 1, True])
def _extract_features(self, df_input): """ Extract features using Matminer from the 'structure' column in df_input Args: df_input (DataFrame): Pandas DataFrame whcih conatains features from Materials Project Database of the input samples Returns: df_extracted (DataFrame): Pandas DataFrame which contains features of input samples extracted using Matminer """ # Dropping the 'theoretical' column df_input.drop(columns=["theoretical"], inplace=True) # Extracting the features dfeat = DensityFeatures() symmfeat = GlobalSymmetryFeatures() mfeat = Meredig() cefeat = CohesiveEnergy() df_input["density"] = df_input.structure.apply( lambda x: dfeat.featurize(x)[0]) df_input["vpa"] = df_input.structure.apply( lambda x: dfeat.featurize(x)[1]) df_input["packing fraction"] = df_input.structure.apply( lambda x: dfeat.featurize(x)[2]) df_input["spacegroup_num"] = df_input.structure.apply( lambda x: symmfeat.featurize(x)[0]) df_input["cohesive_energy"] = df_input.apply( lambda x: cefeat.featurize( x.structure.composition, formation_energy_per_atom=x.formation_energy_per_atom, )[0], axis=1, ) df_input["mean AtomicWeight"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-17]) df_input["range AtomicRadius"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-12]) df_input["mean AtomicRadius"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-11]) df_input["range Electronegativity"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-10]) df_input["mean Electronegativity"] = df_input.structure.apply( lambda x: mfeat.featurize(x.composition)[-9]) # Drop 'structure' column df_input.drop(columns=["structure"], inplace=True) # ignore compounds that failed to featurize df_extracted = df_input.fillna( df_input.mean()).query("cohesive_energy > 0.0") # Re-arranging the 'PU Label' column pu_label = df_extracted["PU_label"] df_extracted = df_extracted.drop(["PU_label"], axis=1) df_extracted["PU_label"] = pu_label # Drop the icsd_ids column df_extracted.drop(columns=["icsd_ids"], inplace=True) return df_extracted