def setUp(self) -> None: np.random.seed(0) self.objects = pd.DataFrame( index=[ 'ZTF18abakgtm', 'ZTF18abvvcko', 'ZTF17aaaaaxg', 'ZTF18aaveorp' ], columns=['corrected'], data=np.random.choice( a=[False, True], size=(4,) ) ) preprocess_ztf = DetectionsPreprocessorZTF() raw_det_ZTF18abakgtm = pd.read_csv( os.path.join(EXAMPLES_PATH, 'ZTF18abakgtm_det.csv'), index_col="oid") raw_det_ZTF18abakgtm['sigmapsf_corr_ext'] = raw_det_ZTF18abakgtm['sigmapsf_corr'] raw_det_ZTF18abvvcko = pd.read_csv( os.path.join(EXAMPLES_PATH, 'ZTF18abvvcko_det.csv'), index_col="oid") raw_det_ZTF18abvvcko['sigmapsf_corr_ext'] = raw_det_ZTF18abvvcko['sigmapsf_corr'] raw_det_ZTF17aaaaaxg = pd.read_csv( os.path.join(EXAMPLES_PATH, 'ZTF17aaaaaxg_det.csv'), index_col="oid") raw_det_ZTF17aaaaaxg['sigmapsf_corr_ext'] = raw_det_ZTF17aaaaaxg['sigmapsf_corr'] raw_det_ZTF18aaveorp = pd.read_csv( os.path.join(EXAMPLES_PATH, 'ZTF18aaveorp_det.csv'), index_col="oid") raw_det_ZTF18aaveorp['sigmapsf_corr_ext'] = raw_det_ZTF18aaveorp['sigmapsf_corr'] keys = [ 'mjd', 'fid', 'magpsf_corr', 'sigmapsf_corr_ext', 'isdiffpos', 'magpsf', 'sigmapsf', 'ra', 'dec', 'sgscore1', 'rb' ] self.detections = pd.concat( [raw_det_ZTF17aaaaaxg[keys], raw_det_ZTF18abvvcko[keys], raw_det_ZTF18abakgtm[keys], raw_det_ZTF18aaveorp[keys]], axis=0 ) self.detections = preprocess_ztf.get_magpsf_ml( self.detections, objects=self.objects ) self.detections = self.detections[['fid', 'magpsf_ml']]
def test_non_numeric_columns(self): # print(self.detections.head()) # print(self.detections.dtypes) with self.assertRaises(TypeError): _ = self.detections.sigmapsf_corr > 0.0 preprocessor = DetectionsPreprocessorZTF() preprocessed_detections = preprocessor.preprocess_detections( self.detections)
def setUp(self) -> None: self.preprocessor_ztf = DetectionsPreprocessorZTF() self.raw_data_ZTF18abakgtm = pd.read_csv( os.path.join(EXAMPLES_PATH, 'ZTF18abakgtm_det.csv'), index_col="oid") self.raw_data_ZTF18abakgtm['sigmapsf_corr_ext'] = self.raw_data_ZTF18abakgtm['sigmapsf_corr'] self.fake_objects = pd.DataFrame( columns=['corrected'], index=['ZTF18abakgtm'], data=[[True]] )
def setUp(self) -> None: self.preprocess_ztf = DetectionsPreprocessorZTF() self.fake_objects = pd.DataFrame( index=['ZTF17aaaaaxg', 'ZTF18abvvcko', 'ZTF18abakgtm'], data=[[True], [False], [True]], columns=['corrected']) self.raw_det_ZTF18abakgtm = pd.read_csv(os.path.join( EXAMPLES_PATH, 'ZTF18abakgtm_det.csv'), index_col="oid") self.raw_det_ZTF18abakgtm[ 'sigmapsf_corr_ext'] = self.raw_det_ZTF18abakgtm['sigmapsf_corr'] self.raw_nondet_ZTF18abakgtm = pd.read_csv(os.path.join( EXAMPLES_PATH, 'ZTF18abakgtm_nondet.csv'), index_col="oid") self.det_ZTF18abakgtm = self.preprocess_ztf.preprocess( self.raw_det_ZTF18abakgtm, objects=self.fake_objects) self.raw_det_ZTF18abvvcko = pd.read_csv(os.path.join( EXAMPLES_PATH, 'ZTF18abvvcko_det.csv'), index_col="oid") self.raw_det_ZTF18abvvcko[ 'sigmapsf_corr_ext'] = self.raw_det_ZTF18abvvcko['sigmapsf_corr'] self.raw_nondet_ZTF18abvvcko = pd.read_csv(os.path.join( EXAMPLES_PATH, 'ZTF18abvvcko_nondet.csv'), index_col="oid") self.det_ZTF18abvvcko = self.preprocess_ztf.preprocess( self.raw_det_ZTF18abvvcko, objects=self.fake_objects) self.raw_det_ZTF17aaaaaxg = pd.read_csv(os.path.join( EXAMPLES_PATH, 'ZTF17aaaaaxg_det.csv'), index_col="oid") self.raw_det_ZTF17aaaaaxg[ 'sigmapsf_corr_ext'] = self.raw_det_ZTF17aaaaaxg['sigmapsf_corr'] self.raw_nondet_ZTF17aaaaaxg = pd.read_csv(os.path.join( EXAMPLES_PATH, 'ZTF17aaaaaxg_nondet.csv'), index_col="oid") self.det_ZTF17aaaaaxg = self.preprocess_ztf.preprocess( self.raw_det_ZTF17aaaaaxg, objects=self.fake_objects)
def setUp(self) -> None: np.random.seed(0) self.detections = pd.read_pickle( os.path.join(EXAMPLES_PATH, "periodic_light_curves.pkl")) self.labels = pd.read_pickle( os.path.join(EXAMPLES_PATH, "periodic_light_curve_labels.pkl")) self.detections["sigmapsf_corr_ext"] = self.detections["sigmapsf_corr"] self.fake_objects = pd.DataFrame( index=self.labels.index, columns=["corrected"], data=np.random.choice(a=[False, True], size=(len(self.labels), )), ) self.detections = DetectionsPreprocessorZTF().get_magpsf_ml( self.detections, objects=self.fake_objects)
class TestObjectsMethods(unittest.TestCase): def setUp(self) -> None: self.preprocessor_ztf = DetectionsPreprocessorZTF() self.raw_data_ZTF18abakgtm = pd.read_csv( os.path.join(EXAMPLES_PATH, 'ZTF18abakgtm_det.csv'), index_col="oid") self.raw_data_ZTF18abakgtm['sigmapsf_corr_ext'] = self.raw_data_ZTF18abakgtm['sigmapsf_corr'] self.fake_objects = pd.DataFrame( columns=['corrected'], index=['ZTF18abakgtm'], data=[[True]] ) def test_preprocess_one_object(self): self.assertEqual(self.preprocessor_ztf.has_necessary_columns( self.preprocessor_ztf.get_magpsf_ml( self.raw_data_ZTF18abakgtm, self.fake_objects )), True) self.assertEqual(self.raw_data_ZTF18abakgtm.index.name, "oid") preprocessed_data = self.preprocessor_ztf.preprocess( self.raw_data_ZTF18abakgtm, objects=self.fake_objects) self.assertEqual(type(preprocessed_data), pd.DataFrame)
def setUp(self) -> None: self.detections = pd.read_pickle( os.path.join(EXAMPLES_PATH, 'periodic_light_curves.pkl')) self.labels = pd.read_pickle( os.path.join(EXAMPLES_PATH, 'periodic_light_curve_labels.pkl')) self.detections['sigmapsf_corr_ext'] = self.detections['sigmapsf_corr'] self.fake_objects = pd.DataFrame(index=self.labels.index, columns=['corrected'], data=np.random.choice( a=[False, True], size=(len(self.labels), ))) self.detections = DetectionsPreprocessorZTF().get_magpsf_ml( self.detections, objects=self.fake_objects) self.logger = logging.getLogger() self.logger.setLevel(logging.INFO)
def setUp(self) -> None: np.random.seed(0) self.objects = pd.DataFrame( index=[ "ZTF18abakgtm", "ZTF18abvvcko", "ZTF17aaaaaxg", "ZTF18aaveorp" ], columns=["corrected"], data=np.random.choice(a=[False, True], size=(4, )), ) preprocess_ztf = DetectionsPreprocessorZTF() raw_det_ZTF18abakgtm = pd.read_csv(os.path.join( EXAMPLES_PATH, "ZTF18abakgtm_det.csv"), index_col="oid") raw_det_ZTF18abakgtm["sigmapsf_corr_ext"] = raw_det_ZTF18abakgtm[ "sigmapsf_corr"] raw_det_ZTF18abvvcko = pd.read_csv(os.path.join( EXAMPLES_PATH, "ZTF18abvvcko_det.csv"), index_col="oid") raw_det_ZTF18abvvcko["sigmapsf_corr_ext"] = raw_det_ZTF18abvvcko[ "sigmapsf_corr"] raw_det_ZTF17aaaaaxg = pd.read_csv(os.path.join( EXAMPLES_PATH, "ZTF17aaaaaxg_det.csv"), index_col="oid") raw_det_ZTF17aaaaaxg["sigmapsf_corr_ext"] = raw_det_ZTF17aaaaaxg[ "sigmapsf_corr"] raw_det_ZTF18aaveorp = pd.read_csv(os.path.join( EXAMPLES_PATH, "ZTF18aaveorp_det.csv"), index_col="oid") raw_det_ZTF18aaveorp["sigmapsf_corr_ext"] = raw_det_ZTF18aaveorp[ "sigmapsf_corr"] keys = [ "mjd", "fid", "magpsf_corr", "sigmapsf_corr_ext", "isdiffpos", "magpsf", "sigmapsf", "ra", "dec", "sgscore1", "rb", ] self.detections = pd.concat( [ raw_det_ZTF17aaaaaxg[keys], raw_det_ZTF18abvvcko[keys], raw_det_ZTF18abakgtm[keys], raw_det_ZTF18aaveorp[keys], ], axis=0, ) self.detections = preprocess_ztf.get_magpsf_ml(self.detections, objects=self.objects) self.detections = self.detections[["sgscore1"]]
class TestObjectsMethods(unittest.TestCase): def setUp(self) -> None: self.preprocess_ztf = DetectionsPreprocessorZTF() self.fake_objects = pd.DataFrame( index=['ZTF17aaaaaxg', 'ZTF18abvvcko', 'ZTF18abakgtm'], data=[[True], [False], [True]], columns=['corrected']) self.raw_det_ZTF18abakgtm = pd.read_csv(os.path.join( EXAMPLES_PATH, 'ZTF18abakgtm_det.csv'), index_col="oid") self.raw_det_ZTF18abakgtm[ 'sigmapsf_corr_ext'] = self.raw_det_ZTF18abakgtm['sigmapsf_corr'] self.raw_nondet_ZTF18abakgtm = pd.read_csv(os.path.join( EXAMPLES_PATH, 'ZTF18abakgtm_nondet.csv'), index_col="oid") self.det_ZTF18abakgtm = self.preprocess_ztf.preprocess( self.raw_det_ZTF18abakgtm, objects=self.fake_objects) self.raw_det_ZTF18abvvcko = pd.read_csv(os.path.join( EXAMPLES_PATH, 'ZTF18abvvcko_det.csv'), index_col="oid") self.raw_det_ZTF18abvvcko[ 'sigmapsf_corr_ext'] = self.raw_det_ZTF18abvvcko['sigmapsf_corr'] self.raw_nondet_ZTF18abvvcko = pd.read_csv(os.path.join( EXAMPLES_PATH, 'ZTF18abvvcko_nondet.csv'), index_col="oid") self.det_ZTF18abvvcko = self.preprocess_ztf.preprocess( self.raw_det_ZTF18abvvcko, objects=self.fake_objects) self.raw_det_ZTF17aaaaaxg = pd.read_csv(os.path.join( EXAMPLES_PATH, 'ZTF17aaaaaxg_det.csv'), index_col="oid") self.raw_det_ZTF17aaaaaxg[ 'sigmapsf_corr_ext'] = self.raw_det_ZTF17aaaaaxg['sigmapsf_corr'] self.raw_nondet_ZTF17aaaaaxg = pd.read_csv(os.path.join( EXAMPLES_PATH, 'ZTF17aaaaaxg_nondet.csv'), index_col="oid") self.det_ZTF17aaaaaxg = self.preprocess_ztf.preprocess( self.raw_det_ZTF17aaaaaxg, objects=self.fake_objects) def turbofats_features(self): turbofats_extractor = TurboFatsFeatureExtractor() turbofats_fs = turbofats_extractor.compute_features( self.det_ZTF18abakgtm) expected_cols = [ 'Amplitude', 'AndersonDarling', 'Autocor_length', 'Beyond1Std', 'Con', 'Eta_e', 'Gskew', 'MaxSlope', 'Mean', 'Meanvariance', 'MedianAbsDev', 'MedianBRP', 'PairSlopeTrend', 'PercentAmplitude', 'Q31', 'Rcs', 'Skew', 'SmallKurtosis', 'Std', 'StetsonK', 'Harmonics_mag_1', 'Harmonics_mag_2', 'Harmonics_mag_3', 'Harmonics_mag_4', 'Harmonics_mag_5', 'Harmonics_mag_6', 'Harmonics_mag_7', 'Harmonics_phase_2', 'Harmonics_phase_3', 'Harmonics_phase_4', 'Harmonics_phase_5', 'Harmonics_phase_6', 'Harmonics_phase_7', 'Harmonics_mse', 'Pvar', 'ExcessVar', 'GP_DRW_sigma', 'GP_DRW_tau', 'SF_ML_amplitude', 'SF_ML_gamma', 'IAR_phi', 'LinearTrend' ] self.assertEqual(type(turbofats_fs), pd.DataFrame) self.assertEqual(len(turbofats_fs), len(expected_cols)) self.assertListEqual(turbofats_fs.columns, expected_cols) def test_color_features(self): color_extractor = ColorFeatureExtractor() color_fs = color_extractor.compute_features(self.det_ZTF17aaaaaxg) self.assertEqual(type(color_fs), pd.DataFrame) self.assertEqual(len(color_fs.columns), len(color_extractor.get_features_keys())) self.assertListEqual(list(color_fs.columns), color_extractor.get_features_keys()) def test_galactic_coordinates_features(self): galactic_extractor = GalacticCoordinatesExtractor() galactic_fs = galactic_extractor.compute_features( self.det_ZTF18abakgtm) self.assertEqual(type(galactic_fs), pd.DataFrame) self.assertEqual(len(galactic_fs.columns), 2) self.assertListEqual(list(galactic_fs.columns), galactic_extractor.get_features_keys()) def test_real_bogus_features(self): rb_extractor = RealBogusExtractor() rb_fs = rb_extractor.compute_features(self.det_ZTF18abakgtm) self.assertEqual(type(rb_fs), pd.DataFrame) def test_sg_score_features(self): sg_score = SGScoreExtractor() sg_fs = sg_score.compute_features(self.det_ZTF18abakgtm) self.assertEqual(type(sg_fs), pd.DataFrame) def test_sn_detections_features(self): sn_det_extractor = SupernovaeDetectionFeatureExtractor() sn_fs = sn_det_extractor.compute_features(self.det_ZTF18abakgtm) self.assertEqual(type(sn_fs), pd.DataFrame) def test_sn_non_detections_features(self): sn_non_det_extractor = SupernovaeDetectionAndNonDetectionFeatureExtractor( ) sn_non_det = sn_non_det_extractor.compute_features( self.det_ZTF18abakgtm, non_detections=self.raw_nondet_ZTF18abakgtm) self.assertEqual(type(sn_non_det), pd.DataFrame) def test_wise_features_ok(self): wise_extractor = WiseStaticExtractor() wise_features = wise_extractor.compute_features( self.det_ZTF17aaaaaxg, non_detections=self.raw_nondet_ZTF17aaaaaxg) self.assertEqual(type(wise_features), pd.DataFrame) self.assertEqual(len(wise_features.dropna()), 1) def test_wise_features_not_found(self): wise_extractor = WiseStaticExtractor() wise_features = wise_extractor.compute_features( self.det_ZTF18abakgtm, non_detections=self.raw_nondet_ZTF18abakgtm) self.assertEqual(type(wise_features), pd.DataFrame) self.assertEqual(len(wise_features.dropna()), 0)
unused_classes = ['TDE', 'ZZ'] rename_class_dictionary = { 'EA': 'EB', 'EB/EW': 'EB', 'RSCVn': 'Periodic-Other', 'SNIIb': 'SNII', 'SNIIn': 'SNII' } labels = labels[~labels.classALeRCE.isin(unused_classes)].copy() labels['classALeRCE'] = labels['classALeRCE'].map( rename_class_dictionary).fillna(labels['classALeRCE']) # Intersecting labels and detections valid_oids = detections.index.unique().intersection(labels.index.unique()) labeled_detections = detections.loc[valid_oids] labels = labels.loc[valid_oids].copy() valid_oids = valid_oids.intersection(non_detections.index.unique()) labeled_non_detections = non_detections.loc[valid_oids] # ZTF preprocessing preprocessor_ztf = DetectionsPreprocessorZTF() labeled_detections = preprocessor_ztf.preprocess(labeled_detections) # Save data labeled_detections.to_pickle('dataset_detections.pkl') labeled_non_detections.to_pickle('dataset_non_detections.pkl') labels.to_pickle('dataset_labels.pkl')
import sys import pandas as pd from late_classifier.features import DetectionsPreprocessorZTF detections = pd.read_pickle(sys.argv[1]) non_detections = pd.read_pickle(sys.argv[2]) preprocessor_ztf = DetectionsPreprocessorZTF() detections = preprocessor_ztf.preprocess(detections) oid_intersection = detections.index.unique().intersection( non_detections.index.unique()) non_detections = non_detections.loc[oid_intersection] detections.to_pickle(sys.argv[3]) non_detections.to_pickle(sys.argv[4])