Beispiel #1
0
 def test_prechecking(self):
     target = "K_VRH"
     af = AutoFeaturizer(preset="best")
     df = self.test_df[['composition', target]]
     af.fit(df, target)
     classes = [f.__class__.__name__ for f in af.featurizers["composition"]]
     self.assertNotIn("YangSolidSolution", classes)
     self.assertNotIn("Miedema", classes)
     self.assertIn("ElementProperty", classes)
Beispiel #2
0
    def test_use_metaselector(self):
        # Test to see if metaselector works for this dataset
        df = copy.copy(self.test_df.iloc[:self.limit])
        target = "K_VRH"

        af = AutoFeaturizer()
        af.fit(df, target)

        self.assertIsNotNone(af.metaselector)
        dataset_mfs = af.metaselector.dataset_mfs
        self.assertIn("composition_metafeatures", dataset_mfs.keys())
        self.assertIn("structure_metafeatures", dataset_mfs.keys())
        self.assertIsNotNone(dataset_mfs["composition_metafeatures"])
        self.assertIsNotNone(dataset_mfs["structure_metafeatures"])

        comp_mfs = dataset_mfs["composition_metafeatures"]
        self.assertEqual(comp_mfs["number_of_compositions"], 5)
        self.assertAlmostEqual(comp_mfs["percent_of_all_metal"], 0.2)
        self.assertAlmostEqual(comp_mfs["percent_of_metal_nonmetal"], 0.8)
        self.assertAlmostEqual(comp_mfs["percent_of_all_nonmetal"], 0.0)
        self.assertAlmostEqual(comp_mfs["percent_of_contain_trans_metal"], 0.8)
        self.assertEqual(comp_mfs["number_of_different_elements"], 7)
        self.assertAlmostEqual(comp_mfs["avg_number_of_elements"], 2.2)
        self.assertEqual(comp_mfs["max_number_of_elements"], 3)
        self.assertEqual(comp_mfs["min_number_of_elements"], 1)

        struct_mfs = dataset_mfs["structure_metafeatures"]
        self.assertEqual(struct_mfs["number_of_structures"], 5)
        self.assertAlmostEqual(struct_mfs["percent_of_ordered_structures"],
                               1.0)
        self.assertAlmostEqual(struct_mfs["avg_number_of_sites"], 7.0)
        self.assertEqual(struct_mfs["max_number_of_sites"], 12)
        self.assertEqual(
            struct_mfs["number_of_different_elements_in_structures"], 7)

        excludes = af.metaselector.excludes
        self.assertIn("IonProperty", excludes)
        self.assertIn("Miedema", excludes)
        self.assertIn("OxidationStates", excludes)
        self.assertIn("YangSolidSolution", excludes)
        self.assertIn("TMetalFraction", excludes)
        self.assertIn("ElectronegativityDiff", excludes)
        self.assertIn("CationProperty", excludes)
        self.assertIn("ElectronAffinity", excludes)

        df = af.fit_transform(df, target)
        ef = ElectronAffinity()
        ef_feats = ef.feature_labels()
        self.assertFalse(any([f in df.columns for f in ef_feats]))
        self.assertFalse(any([f in df.columns for f in ef_feats]))
Beispiel #3
0
    def test_transferability(self):
        """
        Test that an autofeaturizer object is able to be fit on one dataset
        and applied to another.
        """
        target = "K_VRH"
        cols = ["composition", target]
        df1 = self.test_df[cols].iloc[:self.limit]
        df2 = self.test_df[cols].iloc[-1 * self.limit:]

        af = AutoFeaturizer()
        af.fit(df1, target)

        df2 = af.transform(df2, target)
        self.assertAlmostEqual(df2[target].iloc[0], 111.788114, places=5)
        self.assertAlmostEqual(df2["minimum X"].iloc[1], 1.36, places=2)
Beispiel #4
0
    def test_prechecking(self):
        target = "K_VRH"
        af = AutoFeaturizer(preset="express")
        df = self.test_df[["composition", target]]

        # Increase the minimum precheck fraction for purposes of this test
        af.min_precheck_frac = 0.99

        af.fit(df, target)
        classes = [f.__class__.__name__ for f in af.featurizers["composition"]]

        # both of these should be around 0.922 precheck fraction, so they fail
        # the precheck test.
        self.assertNotIn("YangSolidSolution", classes)
        self.assertNotIn("Miedema", classes)

        # ElementProperty precheck is correct for all entries, so it should pass
        self.assertIn("ElementProperty", classes)
Beispiel #5
0
    def test_exclude_by_users(self):
        """
        Test custom args for featurizers to use.
        """
        df = copy.copy(self.test_df.iloc[:self.limit])
        target = "K_VRH"
        exclude = ["ElementProperty"]

        ep = ElementProperty.from_preset("matminer")
        ep_feats = ep.feature_labels()

        # Test to make sure excluded does not show up
        af = AutoFeaturizer(exclude=exclude, preset="fast")
        af.fit(df, target)
        df = af.fit_transform(df, target)

        self.assertTrue(af.auto_featurizer)
        self.assertIn("ElementProperty", af.exclude)
        self.assertFalse(any([f in df.columns for f in ep_feats]))