def test_featurize_prediction_data_in_parallel(): """Test parallel featurization of prediction TS data""" disco_running = util.check_disco_running() if not disco_running: print("Disco not running - aborting test.") return generate_model() shutil.copy(pjoin(DATA_PATH, "215153_215176_218272_218934.tar.gz"), cfg.UPLOAD_FOLDER) shutil.copy(pjoin(DATA_PATH, "testfeature1.py"), pjoin(cfg.CUSTOM_FEATURE_SCRIPT_FOLDER, "TESTRUN_CF.py")) features_and_tsdata_dict = prl_proc.featurize_prediction_data_in_parallel( os.path.join(DATA_PATH, "215153_215176_218272_218934.tar.gz"), "TEMP_TEST01") print(features_and_tsdata_dict) for fname in ( pjoin(cfg.UPLOAD_FOLDER, "215153_215176_218272_218934.tar.gz"), pjoin(cfg.FEATURES_FOLDER, "TESTRUN_features.csv"), pjoin(cfg.FEATURES_FOLDER, "TESTRUN_classes.npy"), pjoin(cfg.MODELS_FOLDER, "TESTRUN_RF.pkl"), pjoin(cfg.CUSTOM_FEATURE_SCRIPT_FOLDER, "TESTRUN_CF.py")): if os.path.exists(fname): os.remove(fname) assert "std_err" in \ features_and_tsdata_dict["dotastro_218934.dat"]["features_dict"]
def test_featurize_in_parallel(): """Test main parallelized featurization function""" if not util.check_disco_running(): print("Disco not running - aborting test.") return fname_features_dict = prl_proc.featurize_in_parallel( pjoin(DATA_PATH, "asas_training_subset_classes.dat"), pjoin(DATA_PATH, "asas_training_subset.tar.gz"), features_to_use=["std_err", "freq1_harmonics_freq_0"], is_test=True, custom_script_path=None) print(fname_features_dict) assert isinstance(fname_features_dict, dict) for k, v in fname_features_dict.items(): assert "std_err" in v and "freq1_harmonics_freq_0" in v