예제 #1
0
def test_main_featurize_function():
    """Test main featurize function"""
    test_setup()

    shutil.copy(
        pjoin(DATA_PATH, "testfeature1.py"),
        cfg.CUSTOM_FEATURE_SCRIPT_FOLDER)
    results_msg = featurize.featurize(
        headerfile_path=pjoin(
            cfg.UPLOAD_FOLDER,
            "asas_training_subset_classes_with_metadata.dat"),
        zipfile_path=pjoin(cfg.UPLOAD_FOLDER,
                           "asas_training_subset.tar.gz"),
        features_to_use=["std_err", "f"],
        featureset_id="test", is_test=True,
        custom_script_path=pjoin(cfg.CUSTOM_FEATURE_SCRIPT_FOLDER,
                                 "testfeature1.py"),)
    assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER,
                                "test_features.csv")))
    assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER,
                                "test_classes.npy")))
    class_list = list(np.load(pjoin(cfg.FEATURES_FOLDER, "test_classes.npy")))
    os.remove(pjoin(cfg.FEATURES_FOLDER, "test_classes.npy"))
    df = pd.io.parsers.read_csv(pjoin(cfg.FEATURES_FOLDER,
                                "test_features.csv"))
    cols = df.columns
    values = df.values
    os.remove(pjoin(cfg.FEATURES_FOLDER, "test_features.csv"))
    os.remove(pjoin(cfg.FEATURES_FOLDER,
                    "test_features_with_classes.csv"))
    assert("std_err" in cols)
    assert("f" in cols)
    assert(all(class_name in ['Mira', 'Herbig_AEBE', 'Beta_Lyrae',
                              'Classical_Cepheid', 'W_Ursae_Maj', 'Delta_Scuti']
               for class_name in class_list))
def test_feature_generation():
    for f in glob.glob(os.path.join(cfg.FEATURES_FOLDER, '/*.csv')):
        os.remove(f)

    tic = time.time()
    this_dir = os.path.join(os.path.dirname(__file__))

    featurize.featurize(
        os.path.join(cfg.UPLOAD_FOLDER, "asas_training_subset_classes.dat"),
        os.path.join(cfg.UPLOAD_FOLDER, "asas_training_subset.tar.gz"),
        featureset_id="testfeatset", is_test=True, USE_DISCO=False,
        features_to_use=cfg.features_list_science
    )

    delta = time.time() - tic

    def features_from_csv(filename):
        with open(filename) as f:
            feature_names = f.readline().strip().split(",")
            feature_values = np.loadtxt(f, delimiter=',')

        return feature_names, feature_values

    features_extracted, values_computed = features_from_csv(
        os.path.join(cfg.FEATURES_FOLDER, "testfeatset_features.csv"))

    features_expected, values_expected = features_from_csv(
        os.path.join(this_dir, "data/expected_features.csv"))

    os.remove(os.path.join(cfg.FEATURES_FOLDER, "testfeatset_features.csv"))
    os.remove(os.path.join(cfg.FEATURES_FOLDER,
                           "testfeatset_classes.npy"))
    npt.assert_equal(len(features_extracted), 81)
    npt.assert_equal(features_extracted, features_expected)
    npt.assert_array_almost_equal(values_computed, values_expected)

    # Ensure this test takes less than a minute to run
    assert delta < 60
예제 #3
0
def test_featurize():
    """Test main featurize function."""
    results_msg = featurize.featurize(
        headerfile_path=pjoin(cfg.UPLOAD_FOLDER, "asas_training_subset_classes_with_metadata.dat"),
        zipfile_path=pjoin(cfg.UPLOAD_FOLDER, "asas_training_subset.tar.gz"),
        features_to_use=["std_err"],  # #TEMP# TCP still broken under py3
        featureset_id="TESTRUN",
        is_test=True,
        custom_script_path=pjoin(cfg.CUSTOM_FEATURE_SCRIPT_FOLDER, "testfeature1.py"),
        USE_DISCO=False,
    )
    assert os.path.exists(pjoin(cfg.FEATURES_FOLDER, "TESTRUN_features.csv"))
    assert not os.path.exists(pjoin(cfg.FEATURES_FOLDER, "TESTRUN_features_with_classes.csv"))
    assert os.path.exists(pjoin(cfg.MLTSP_PACKAGE_PATH, "Flask/static/data", "TESTRUN_features_with_classes.csv"))
    assert os.path.exists(pjoin(cfg.FEATURES_FOLDER, "TESTRUN_classes.npy"))
    df = pd.io.parsers.read_csv(pjoin(cfg.FEATURES_FOLDER, "TESTRUN_features.csv"))
    cols = df.columns
    values = df.values
    assert "std_err" in cols
예제 #4
0
def test_main_featurize_function_disco():
    """Test main featurize function - using Disco"""
    test_setup()

    shutil.copy(
        pjoin(DATA_PATH, "testfeature1.py"),
        cfg.CUSTOM_FEATURE_SCRIPT_FOLDER)
    results_msg = featurize.featurize(
        headerfile_path=pjoin(
            cfg.UPLOAD_FOLDER,
            "asas_training_subset_classes_with_metadata.dat"),
        zipfile_path=pjoin(cfg.UPLOAD_FOLDER,
                           "asas_training_subset.tar.gz"),
        features_to_use=["std_err", "freq1_harmonics_freq_0"],
        featureset_id="test", is_test=True,
        custom_script_path=None,  # TODO: Doesn't work when using Disco!!!
        USE_DISCO=True)
    assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER,
                                "test_features.csv")))
    assert(os.path.exists(pjoin(cfg.FEATURES_FOLDER,
                                "test_classes.npy")))
    class_list = list(np.load(pjoin(cfg.FEATURES_FOLDER, "test_classes.npy")))
    os.remove(pjoin(cfg.FEATURES_FOLDER, "test_classes.npy"))
    df = pd.io.parsers.read_csv(pjoin(cfg.FEATURES_FOLDER,
                                "test_features.csv"))
    cols = df.columns
    values = df.values
    os.remove(pjoin(cfg.FEATURES_FOLDER, "test_features.csv"))
    os.remove(pjoin(pjoin(cfg.MLTSP_PACKAGE_PATH,
                          "Flask/static/data"),
                    "test_features_with_classes.csv"))
    assert("std_err" in cols)
    assert("freq1_harmonics_freq_0" in cols)
    assert(all(class_name in ['Mira', 'Herbig_AEBE', 'Beta_Lyrae',
                              'Classical_Cepheid', 'W_Ursae_Maj', 'Delta_Scuti']
               for class_name in class_list))