class TestPymatgenData(TestCase): def setUp(self): self.data_source = PymatgenData() def test_get_property(self): self.assertAlmostEqual(9.012182, self.data_source.get_elemental_property(Element("Be"), "atomic_mass")) self.assertAlmostEqual(1.26, self.data_source.get_charge_dependent_property(Element("Ac"), 3, "ionic_radii")) def test_get_oxidation(self): self.assertEqual((3,), self.data_source.get_oxidation_states(Element("Nd"))) self.data_source.use_common_oxi_states = False self.assertEqual((2, 3), self.data_source.get_oxidation_states(Element("Nd")))
def get_fps(structure, cutoff=10.0, processes=8): all_descrs = [] try: coordination_number_ = CoordinationNumber.from_preset('VoronoiNN') voronoi_fps_ = VoronoiFingerprintModified( cutoff=cutoff).featurize_structure(structure) crystal_nn_fingerprint_ = CrystalNNFingerprint.from_preset('cn') op_site_fingerprint_ = OPSiteFingerprint() agni_fingerprints_ = AGNIFingerprints() gaussian_symm_func_fps_ = GaussianSymmFuncModified( ).featurize_structure(structure) pymatgen_data_ = PymatgenData() magpie_data_ = MagpieData() data_list = [[ structure, i, site, coordination_number_, voronoi_fps_, crystal_nn_fingerprint_, op_site_fingerprint_, agni_fingerprints_, gaussian_symm_func_fps_, pymatgen_data_, magpie_data_ ] for i, site in enumerate(structure)] pool = multiprocessing.Pool(processes=processes) all_descrs = np.array(pool.map(get_all_site_descrs, data_list)) except (AttributeError, IndexError) as error: pass return all_descrs
def __init__(self, data_source=PymatgenData(), fast=False): """ Args: data_source - (OxidationStateMixin) - A AbstractData class that supports the `get_oxidation_state` method. fast - (boolean) whether to assume elements exist in a single oxidation state, which can dramatically accelerate the calculation of whether an ionic compound is possible, but will miss heterovalent compounds like Fe3O4. """ self.data_source = data_source self.fast = fast
def __init__(self, data_source, features, stats): if data_source == "pymatgen": self.data_source = PymatgenData() elif data_source == "magpie": self.data_source = MagpieData() elif data_source == "deml": self.data_source = DemlData() else: self.data_source = data_source self.features = features self.stats = stats
def __init__(self, data_source, features, stats): if data_source == "pymatgen": self.data_source = PymatgenData() elif data_source == "magpie": self.data_source = MagpieData() elif data_source == "deml": self.data_source = DemlData() elif data_source == "matscholar_el": self.data_source = MatscholarElementData() elif data_source == "megnet_el": self.data_source = MEGNetElementData() else: self.data_source = data_source self.features = features self.stats = stats # Initialize stats computer self.pstats = PropertyStats()
def setUp(self): self.data_source = PymatgenData()
df=df.set_index("material_id") df = df[df['elasticity.K_VRH'] > 0] df = df[df['e_above_hull'] < 0.1] df['vpa'] = df['volume']/df['nsites'] df['poisson_ratio']=df[["elasticity.K_VRH","elasticity.G_VRH"]].apply(lambda x:(3*x["elasticity.K_VRH"]-2*x["elasticity.G_VRH"])/(6*x["elasticity.K_VRH"]+2*x["elasticity.G_VRH"]),axis=1) from matminer.featurizers.conversions import StrToComposition df = StrToComposition().featurize_dataframe(df, "pretty_formula") from matminer.featurizers.composition import ElementProperty ep_feat = ElementProperty.from_preset(preset_name="magpie") df = ep_feat.featurize_dataframe(df, col_id="composition") # input the "composition" column to the featurizer from matminer.featurizers.conversions import CompositionToOxidComposition from matminer.featurizers.composition import OxidationStates df = CompositionToOxidComposition().featurize_dataframe(df, "composition") os_feat = OxidationStates() df = os_feat.featurize_dataframe(df, "composition_oxid") dataset = PymatgenData() descriptors = ['row', 'group', 'atomic_mass', 'atomic_radius', 'boiling_point', 'melting_point', 'X'] stats = ["mean", "std_dev"] ep = ElementProperty(data_source=dataset, features=descriptors, stats=stats) df = ep.featurize_dataframe(df, "composition") #Remove NaN values df = df.dropna() #y = df['elasticity.K_VRH'].values y=df['Tensile Strength, Yield'].values excluded = ["elasticity.G_VRH", "elasticity.K_VRH", "pretty_formula", 'volume','nsites','spacegroup.symbol','e_above_hull','Tensile Strength, Yield','Elongation at Break ','Tensile Strength,Ultimate', "poisson_ratio", "composition", "composition_oxid"]#"elastic_anisotropy" X = df.drop(excluded, axis=1) print("There are {} possible descriptors:\n\n{}".format(X.shape[1], X.columns.values)) from sklearn.linear_model import LinearRegression