def test_feature_mismatching(self): tpot = TPOTAdaptor(**self.common_tpot_kwargs) target_key = "K_VRH" df1 = self.train_df df2 = self.test_df.rename(columns={'mean X': "some other feature"}) tpot.fit(df1, target_key) with self.assertRaises(MatbenchError): tpot.predict(df2, target_key)
def test_regression(self): target_key = "K_VRH" tpot = TPOTAdaptor(**self.common_tpot_kwargs) tpot.fit(self.train_df, target_key) test_w_predictions = tpot.predict(self.test_df, target_key) y_true = test_w_predictions[target_key] y_test = test_w_predictions[target_key + " predicted"] self.assertTrue(r2_score(y_true, y_test) > 0.75)
def test_classification(self): tpot = TPOTAdaptor(**self.common_tpot_kwargs) max_kvrh = 50 classifier_key = "K_VRH > {}?".format(max_kvrh) train_df = self.train_df.rename(columns={"K_VRH": classifier_key}) test_df = self.test_df.rename(columns={"K_VRH": classifier_key}) train_df[classifier_key] = train_df[classifier_key] > max_kvrh test_df[classifier_key] = test_df[classifier_key] > max_kvrh tpot.fit(train_df, classifier_key) print(tpot.mode) test_w_predictions = tpot.predict(test_df, classifier_key) y_true = test_w_predictions[classifier_key] y_test = test_w_predictions[classifier_key + " predicted"] self.assertTrue(f1_score(y_true, y_test) > 0.75)
sleep(1) # COMPARE TO MATBENCH df = load_tehrani_superhard_mat(data="basic_descriptors") df = df.drop(["formula", "material_id", "shear_modulus", "initial_structure"], axis=1) traindf = df.iloc[:floor(.8 * len(df))] testdf = df.iloc[floor(.8 * len(df)):] target = "bulk_modulus" # Get top-level transformers autofeater = AutoFeaturizer() cleaner = DataCleaner() reducer = FeatureReducer() learner = TPOTAdaptor("regression", max_time_mins=5) # Fit transformers on training data traindf = autofeater.fit_transform(traindf, target) traindf = cleaner.fit_transform(traindf, target) traindf = reducer.fit_transform(traindf, target) learner.fit(traindf, target) # Apply the same transformations to the testing data testdf = autofeater.transform(testdf, target) testdf = cleaner.transform(testdf, target) testdf = reducer.transform(testdf, target) testdf = learner.predict(testdf, target) #predict validation data print(testdf)