Ejemplo n.º 1
0
        def test_load_glass_ternary_landolt(self):
            df = load_glass_ternary_landolt()
            self.assertTrue(isinstance(df, pd.DataFrame))
            self.assertEqual(len(df), 6118)

            df = load_glass_ternary_landolt(unique_composition=False)
            self.assertTrue(isinstance(df, pd.DataFrame))
            self.assertEqual(len(df), 7191)

            df = load_glass_ternary_landolt(processing="meltspin")
            self.assertTrue(isinstance(df, pd.DataFrame))
            self.assertEqual(len(df), 5736)

            df = load_glass_ternary_landolt(processing="sputtering")
            self.assertTrue(isinstance(df, pd.DataFrame))
            self.assertEqual(len(df), 403)

            with self.assertRaises(ValueError):
                df = load_glass_ternary_landolt(processing="spittering")
Ejemplo n.º 2
0
        def test_load_glass_ternary_landolt(self):
            df = load_glass_ternary_landolt()
            self.assertTrue(isinstance(df, pd.DataFrame))
            self.assertEqual(len(df), 6118)

            df = load_glass_ternary_landolt(unique_composition=False)
            self.assertTrue(isinstance(df, pd.DataFrame))
            self.assertEqual(len(df), 7191)

            df = load_glass_ternary_landolt(processing="meltspin")
            self.assertTrue(isinstance(df, pd.DataFrame))
            self.assertEqual(len(df), 5736)

            df = load_glass_ternary_landolt(processing="sputtering")
            self.assertTrue(isinstance(df, pd.DataFrame))
            self.assertEqual(len(df), 403)

            with self.assertRaises(ValueError):
                df = load_glass_ternary_landolt(processing="spittering")
target = "gfa"
timelimit_secs = 7200
rs = 29

feature_output_path = "example_data/matbench_data/featurized_data/"
model_tmp_path = r'example_data/matbench_data/autosklearn_output/tmp/'
model_output_path = r'example_data/matbench_data/autosklearn_output/output/'

feature_output_file = \
    os.path.join(feature_output_path,
                 "{}_all_featurized_data.csv".format(data_name))

if os.path.exists(feature_output_file):
    df = pd.read_csv(feature_output_file, index_col=0)
else:
    df_init = load_glass_ternary_landolt()

    prof = Profile()
    prof.enable()

    featzer = Featurize()
    df_feats = featzer.featurize_formula(df_init, featurizers="all")
    prep = PreProcess(max_colnull=0.1)
    df = prep.preprocess(df_feats)

    prof.create_stats()
    print("featurize time:\n")
    pstats.Stats(prof).strip_dirs().sort_stats("time").print_stats(5)

    if os.path.exists(feature_output_path):
        print("output path: {} exists!".format(feature_output_path))