def test_prechecking(self): target = "K_VRH" af = AutoFeaturizer(preset="best") df = self.test_df[['composition', target]] af.fit(df, target) classes = [f.__class__.__name__ for f in af.featurizers["composition"]] self.assertNotIn("YangSolidSolution", classes) self.assertNotIn("Miedema", classes) self.assertIn("ElementProperty", classes)
def test_use_metaselector(self): # Test to see if metaselector works for this dataset df = copy.copy(self.test_df.iloc[:self.limit]) target = "K_VRH" af = AutoFeaturizer() af.fit(df, target) self.assertIsNotNone(af.metaselector) dataset_mfs = af.metaselector.dataset_mfs self.assertIn("composition_metafeatures", dataset_mfs.keys()) self.assertIn("structure_metafeatures", dataset_mfs.keys()) self.assertIsNotNone(dataset_mfs["composition_metafeatures"]) self.assertIsNotNone(dataset_mfs["structure_metafeatures"]) comp_mfs = dataset_mfs["composition_metafeatures"] self.assertEqual(comp_mfs["number_of_compositions"], 5) self.assertAlmostEqual(comp_mfs["percent_of_all_metal"], 0.2) self.assertAlmostEqual(comp_mfs["percent_of_metal_nonmetal"], 0.8) self.assertAlmostEqual(comp_mfs["percent_of_all_nonmetal"], 0.0) self.assertAlmostEqual(comp_mfs["percent_of_contain_trans_metal"], 0.8) self.assertEqual(comp_mfs["number_of_different_elements"], 7) self.assertAlmostEqual(comp_mfs["avg_number_of_elements"], 2.2) self.assertEqual(comp_mfs["max_number_of_elements"], 3) self.assertEqual(comp_mfs["min_number_of_elements"], 1) struct_mfs = dataset_mfs["structure_metafeatures"] self.assertEqual(struct_mfs["number_of_structures"], 5) self.assertAlmostEqual(struct_mfs["percent_of_ordered_structures"], 1.0) self.assertAlmostEqual(struct_mfs["avg_number_of_sites"], 7.0) self.assertEqual(struct_mfs["max_number_of_sites"], 12) self.assertEqual( struct_mfs["number_of_different_elements_in_structures"], 7) excludes = af.metaselector.excludes self.assertIn("IonProperty", excludes) self.assertIn("Miedema", excludes) self.assertIn("OxidationStates", excludes) self.assertIn("YangSolidSolution", excludes) self.assertIn("TMetalFraction", excludes) self.assertIn("ElectronegativityDiff", excludes) self.assertIn("CationProperty", excludes) self.assertIn("ElectronAffinity", excludes) df = af.fit_transform(df, target) ef = ElectronAffinity() ef_feats = ef.feature_labels() self.assertFalse(any([f in df.columns for f in ef_feats])) self.assertFalse(any([f in df.columns for f in ef_feats]))
def test_transferability(self): """ Test that an autofeaturizer object is able to be fit on one dataset and applied to another. """ target = "K_VRH" cols = ["composition", target] df1 = self.test_df[cols].iloc[:self.limit] df2 = self.test_df[cols].iloc[-1 * self.limit:] af = AutoFeaturizer() af.fit(df1, target) df2 = af.transform(df2, target) self.assertAlmostEqual(df2[target].iloc[0], 111.788114, places=5) self.assertAlmostEqual(df2["minimum X"].iloc[1], 1.36, places=2)
def test_prechecking(self): target = "K_VRH" af = AutoFeaturizer(preset="express") df = self.test_df[["composition", target]] # Increase the minimum precheck fraction for purposes of this test af.min_precheck_frac = 0.99 af.fit(df, target) classes = [f.__class__.__name__ for f in af.featurizers["composition"]] # both of these should be around 0.922 precheck fraction, so they fail # the precheck test. self.assertNotIn("YangSolidSolution", classes) self.assertNotIn("Miedema", classes) # ElementProperty precheck is correct for all entries, so it should pass self.assertIn("ElementProperty", classes)
def test_exclude_by_users(self): """ Test custom args for featurizers to use. """ df = copy.copy(self.test_df.iloc[:self.limit]) target = "K_VRH" exclude = ["ElementProperty"] ep = ElementProperty.from_preset("matminer") ep_feats = ep.feature_labels() # Test to make sure excluded does not show up af = AutoFeaturizer(exclude=exclude, preset="fast") af.fit(df, target) df = af.fit_transform(df, target) self.assertTrue(af.auto_featurizer) self.assertIn("ElementProperty", af.exclude) self.assertFalse(any([f in df.columns for f in ep_feats]))