def obtain_meta_data_frame_for_available_lightcurves(self): """ Prepares the meta data frame with the transit information based on known planet transits. """ dispositions = self.load_toi_dispositions_in_project_format() suspected_planet_dispositions = dispositions[ (dispositions['disposition'] != 'FP') & dispositions['transit_epoch'].notna() & dispositions['transit_period'].notna() & dispositions['transit_duration'].notna()] lightcurve_paths = list(self.lightcurve_directory.glob('**/*lc.fits')) tess_data_interface = TessDataInterface() tic_ids = [ tess_data_interface.get_tic_id_from_single_sector_obs_id(path.name) for path in lightcurve_paths ] sectors = [ tess_data_interface.get_sector_from_single_sector_obs_id(path.name) for path in lightcurve_paths ] lightcurve_meta_data = pd.DataFrame({ 'lightcurve_path': list(map(str, lightcurve_paths)), 'TIC ID': tic_ids, 'Sector': sectors }) meta_data_frame_with_candidate_nans = pd.merge( suspected_planet_dispositions, lightcurve_meta_data, how='inner', on=['TIC ID', 'Sector']) self.meta_data_frame = meta_data_frame_with_candidate_nans.dropna()
def obtain_meta_data_frame_for_available_lightcurves(self): """ Gets the available meta disposition data from Liang Yu's work and combines it with the available lightcurve data, throwing out any data that doesn't have its counterpart. :return: The meta data frame containing the lightcurve paths and meta data needed to generate labels. """ # noinspection SpellCheckingInspection columns_to_use = ['tic_id', 'Disposition', 'Epoc', 'Period', 'Duration', 'Sectors'] liang_yu_dispositions = pd.read_csv(self.liang_yu_dispositions_path, usecols=columns_to_use) # noinspection SpellCheckingInspection liang_yu_dispositions.rename(columns={'tic_id': 'TIC ID', 'Disposition': 'disposition', 'Epoc': 'transit_epoch', 'Period': 'transit_period', 'Duration': 'transit_duration', 'Sectors': 'Sector'}, inplace=True) liang_yu_dispositions = liang_yu_dispositions[(liang_yu_dispositions['disposition'] != 'PC') | (liang_yu_dispositions['transit_epoch'].notna() & liang_yu_dispositions['transit_period'].notna() & liang_yu_dispositions['transit_duration'].notna())] lightcurve_paths = list(self.lightcurve_directory.glob('*lc.fits')) tess_data_interface = TessDataInterface() tic_ids = [tess_data_interface.get_tic_id_from_single_sector_obs_id(path.name) for path in lightcurve_paths] sectors = [tess_data_interface.get_sector_from_single_sector_obs_id(path.name) for path in lightcurve_paths] lightcurve_meta_data = pd.DataFrame({'lightcurve_path': list(map(str, lightcurve_paths)), 'TIC ID': tic_ids, 'Sector': sectors}) meta_data_frame_with_candidate_nans = pd.merge(liang_yu_dispositions, lightcurve_meta_data, how='inner', on=['TIC ID', 'Sector']) self.meta_data_frame = meta_data_frame_with_candidate_nans.dropna()
def __init__(self): super().__init__() self.data_directory: Path = Path('data/microlensing') self.lightcurve_directory: Path = self.data_directory.joinpath( 'lightcurves') self.synthetic_signal_directory: Path = self.data_directory.joinpath( 'synthetic_signals') self.tess_data_interface = TessDataInterface() self.time_steps_per_example = 20000
def test_new_tess_data_interface_sets_astroquery_api_limits(self): from astroquery.mast import Observations Observations.TIMEOUT = 600 Observations.PAGESIZE = 50000 TessDataInterface() assert Observations.TIMEOUT == 2000 assert Observations.PAGESIZE == 3000
def general_preprocessing( self, example_path_tensor: tf.Tensor) -> (tf.Tensor, tf.Tensor): """ Loads and preprocesses the data. :param example_path_tensor: The tensor containing the path to the example to load. :return: The example and its corresponding label. """ example_path = example_path_tensor.numpy().decode('utf-8') tess_data_interface = TessDataInterface() fluxes, times = tess_data_interface.load_fluxes_and_times_from_fits_file( example_path) fluxes = self.normalize(fluxes) time_differences = np.diff(times, prepend=times[0]) example = np.stack([fluxes, time_differences], axis=-1) if self.is_positive(example_path): label = self.generate_label(example_path, times) else: label = np.zeros_like(fluxes) return tf.convert_to_tensor( example, dtype=tf.float32), tf.convert_to_tensor(label, dtype=tf.float32)
def download_liang_yu_database(self): """ Downloads the database used in `Liang Yu's work <https://arxiv.org/pdf/1904.02726.pdf>`_. """ print('Clearing data directory...') self.clear_data_directory() print("Downloading Liang Yu's disposition CSV...") liang_yu_csv_url = 'https://raw.githubusercontent.com/yuliang419/Astronet-Triage/master/astronet/tces.csv' response = requests.get(liang_yu_csv_url) with open(self.liang_yu_dispositions_path, 'wb') as csv_file: csv_file.write(response.content) print('Downloading TESS observation list...') tess_data_interface = TessDataInterface() tess_observations = tess_data_interface.get_all_tess_time_series_observations() single_sector_observations = tess_data_interface.filter_for_single_sector_observations(tess_observations) single_sector_observations = tess_data_interface.add_tic_id_column_to_single_sector_observations( single_sector_observations) single_sector_observations = tess_data_interface.add_sector_column_to_single_sector_observations( single_sector_observations) print("Downloading lightcurves which appear in Liang Yu's disposition...") # noinspection SpellCheckingInspection columns_to_use = ['tic_id', 'Disposition', 'Epoc', 'Period', 'Duration', 'Sectors'] liang_yu_dispositions = pd.read_csv(self.liang_yu_dispositions_path, usecols=columns_to_use) liang_yu_observations = pd.merge(single_sector_observations, liang_yu_dispositions, how='inner', left_on=['TIC ID', 'Sector'], right_on=['tic_id', 'Sectors']) number_of_observations_not_found = liang_yu_dispositions.shape[0] - liang_yu_observations.shape[0] print(f"{liang_yu_observations.shape[0]} observations found that match Liang Yu's entries.") print(f'Liang Yu used the FFIs, not the lightcurve products, so many will be missing.') print(f"No observations found for {number_of_observations_not_found} entries in Liang Yu's disposition.") liang_yu_data_products = tess_data_interface.get_product_list(liang_yu_observations) liang_yu_lightcurve_data_products = liang_yu_data_products[ liang_yu_data_products['productFilename'].str.endswith('lc.fits') ] download_manifest = tess_data_interface.download_products(liang_yu_lightcurve_data_products, data_directory=self.data_directory) print(f'Moving lightcurves to {self.lightcurve_directory}...') for file_path_string in download_manifest['Local Path']: file_path = Path(file_path_string) file_path.rename(self.lightcurve_directory.joinpath(file_path.name)) print('Database ready.')
def downloading_lightcurve(tic_id, sector): tess_data_interface = TessDataInterface() lightcurve_path = tess_data_interface.download_lightcurve( tic_id=tic_id, sector=sector, save_directory='lightcurves') print('You\'re using: ', lightcurve_path) return lightcurve_path
database.lightcurve_directory.joinpath( 'tess2018319095959-s0005-0000000117979897-0125-s_lc.fits')) ] # Uncomment below to run the inference for all validation files. # example_paths = pd.read_csv(saved_log_directory.joinpath('validation.csv'), header=None)[0].values print('Loading model...') model = ConvolutionalLstm() model.load_weights(str(saved_log_directory.joinpath('model.ckpt'))) print('Inferring and plotting...') for example_path in example_paths: example, label = database.evaluation_preprocessing( tf.convert_to_tensor(example_path)) prediction = model.predict(tf.expand_dims(example, axis=0))[0] tess_data_interface = TessDataInterface() fluxes, times = tess_data_interface.load_fluxes_and_times_from_fits_file( example_path) label, prediction = database.inference_postprocessing( label, prediction, times.shape[0]) tic_id = tess_data_interface.get_tic_id_from_single_sector_obs_id( Path(example_path).stem) sector = tess_data_interface.get_sector_from_single_sector_obs_id( Path(example_path).stem) plot_title = f'TIC {tic_id} sector {sector}' plot_lightcurve(times, fluxes, label, prediction, title=plot_title, save_path=f'{plot_title}.png')
class TessSyntheticInjectedDatabase(LightcurveDatabase): """ A class to represent the database for injecting synthetic signals into real TESS data. """ def __init__(self): super().__init__() self.data_directory: Path = Path('data/microlensing') self.lightcurve_directory: Path = self.data_directory.joinpath( 'lightcurves') self.synthetic_signal_directory: Path = self.data_directory.joinpath( 'synthetic_signals') self.tess_data_interface = TessDataInterface() self.time_steps_per_example = 20000 def generate_datasets(self): all_lightcurve_paths = list(self.lightcurve_directory.glob('*.fits')) all_synthetic_paths = list( map(str, self.synthetic_signal_directory.glob('*.feather'))) synthetic_signal_paths_dataset = tf.data.Dataset.from_tensor_slices( all_synthetic_paths) lightcurve_paths_datasets = self.get_training_and_validation_datasets_for_file_paths( all_lightcurve_paths) training_lightcurve_paths_dataset, validation_lightcurve_paths_dataset = lightcurve_paths_datasets shuffled_training_lightcurve_paths_dataset = training_lightcurve_paths_dataset.shuffle( buffer_size=len(list(training_lightcurve_paths_dataset))) shuffled_synthetic_signal_paths_dataset = synthetic_signal_paths_dataset.shuffle( buffer_size=len(list(synthetic_signal_paths_dataset))).repeat() zipped_training_paths_dataset = tf.data.Dataset.zip( (shuffled_training_lightcurve_paths_dataset, shuffled_synthetic_signal_paths_dataset)) output_types = (tf.float32, tf.float32) output_shapes = [(self.time_steps_per_example, 1), (1, )] lightcurve_training_dataset = map_py_function_to_dataset( zipped_training_paths_dataset, self.train_and_validation_preprocessing, self.number_of_parallel_processes_per_map, output_types=output_types, output_shapes=output_shapes, flat_map=True) batched_training_dataset = lightcurve_training_dataset.batch( self.batch_size) prefetch_training_dataset = batched_training_dataset.prefetch( tf.data.experimental.AUTOTUNE) shuffled_validation_lightcurve_paths_dataset = validation_lightcurve_paths_dataset.shuffle( buffer_size=len(list(validation_lightcurve_paths_dataset))) zipped_validation_paths_dataset = tf.data.Dataset.zip( (shuffled_validation_lightcurve_paths_dataset, shuffled_synthetic_signal_paths_dataset)) lightcurve_validation_dataset = map_py_function_to_dataset( zipped_validation_paths_dataset, self.train_and_validation_preprocessing, self.number_of_parallel_processes_per_map, output_types=output_types, output_shapes=output_shapes, flat_map=True) batched_validation_dataset = lightcurve_validation_dataset.batch( self.batch_size) prefetch_validation_dataset = batched_validation_dataset.prefetch( tf.data.experimental.AUTOTUNE) return prefetch_training_dataset, prefetch_validation_dataset def train_and_validation_preprocessing( self, lightcurve_path_tensor: tf.Tensor, synthetic_signal_path_tensor: tf.Tensor, ) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray): """ The training and validation preprocessing. :param lightcurve_path_tensor: The lightcurve's path to be preprocessed. :param synthetic_signal_path_tensor: The synthetic signal's path to be injected. :return: Two examples, one negative un-injected signal and one positive injected signal (paired as a tuple), and the corresponding labels (paired as a tuple). Expected to have a post flat mapping to make each element of the data be an individual example and label pair. """ lightcurve_path = lightcurve_path_tensor.numpy().decode('utf-8') synthetic_signal_path = synthetic_signal_path_tensor.numpy().decode( 'utf-8') fluxes, times = self.load_fluxes_and_times_from_lightcurve_path( lightcurve_path) synthetic_magnifications, synthetic_times = self.load_magnifications_and_times_from_synthetic_signal_path( synthetic_signal_path) fluxes_with_injected_signal = self.inject_signal_into_lightcurve( fluxes, times, synthetic_magnifications, synthetic_times) fluxes = self.flux_preprocessing(fluxes) fluxes_with_injected_signal = self.flux_preprocessing( fluxes_with_injected_signal) lightcurve = np.expand_dims(fluxes, axis=-1) lightcurve_with_injected_signal = np.expand_dims( fluxes_with_injected_signal, axis=-1) examples = (lightcurve, lightcurve_with_injected_signal) labels = (np.array([0]), np.array([1])) return examples, labels def load_fluxes_and_times_from_lightcurve_path(self, lightcurve_path): fluxes, times = self.tess_data_interface.load_fluxes_and_times_from_fits_file( lightcurve_path) return fluxes, times def load_magnifications_and_times_from_synthetic_signal_path( self, synthetic_signal_path): synthetic_signal = pd.read_feather(synthetic_signal_path) synthetic_magnifications, synthetic_times = synthetic_signal[ 'Magnification'], synthetic_signal['Time (hours)'] synthetic_times = synthetic_times / 24 # Convert hours to days. return synthetic_magnifications, synthetic_times def flux_preprocessing(self, fluxes: np.ndarray, evaluation_mode=False) -> np.ndarray: """ Preprocessing for the flux. :param fluxes: The flux array to preprocess. :param evaluation_mode: If the preprocessing should be consistent for evaluation. :return: The preprocessed flux array. """ normalized_fluxes = self.normalize(fluxes) uniform_length_fluxes = self.make_uniform_length( normalized_fluxes, self.time_steps_per_example, randomize=not evaluation_mode) return uniform_length_fluxes @staticmethod def inject_signal_into_lightcurve(lightcurve_fluxes: np.ndarray, lightcurve_times: np.ndarray, signal_magnifications: np.ndarray, signal_times: np.ndarray): """ Injects a synthetic magnification signal into real lightcurve fluxes. :param lightcurve_fluxes: The fluxes of the lightcurve to be injected into. :param lightcurve_times: The times of the flux observations of the lightcurve. :param signal_magnifications: The synthetic magnifications to inject. :param signal_times: The times of the synthetic magnifications. :return: The fluxes with the injected signal. """ median_flux = np.median(lightcurve_fluxes) signal_fluxes = (signal_magnifications * median_flux) - median_flux signal_flux_interpolator = interp1d(signal_times, signal_fluxes, bounds_error=True) lightcurve_relative_times = lightcurve_times - np.min(lightcurve_times) interpolated_signal_fluxes = signal_flux_interpolator( lightcurve_relative_times) fluxes_with_injected_signal = lightcurve_fluxes + interpolated_signal_fluxes return fluxes_with_injected_signal
def tess_data_interface(self) -> TessDataInterface: return TessDataInterface()
def download_exofop_toi_database( self, number_of_negative_lightcurves_to_download=10000): """ Downloads the `ExoFOP database <https://exofop.ipac.caltech.edu/tess/view_toi.php>`_. """ print('Clearing data directory...') self.clear_data_directory() print("Downloading ExoFOP TOI disposition CSV...") toi_csv_url = 'https://exofop.ipac.caltech.edu/tess/download_toi.php?sort=toi&output=csv' response = requests.get(toi_csv_url) with open(self.toi_dispositions_path, 'wb') as csv_file: csv_file.write(response.content) print('Downloading TESS observation list...') tess_data_interface = TessDataInterface() tess_observations = tess_data_interface.get_all_tess_time_series_observations( ) single_sector_observations = tess_data_interface.filter_for_single_sector_observations( tess_observations) single_sector_observations = tess_data_interface.add_tic_id_column_to_single_sector_observations( single_sector_observations) single_sector_observations = tess_data_interface.add_sector_column_to_single_sector_observations( single_sector_observations) print( "Downloading lightcurves which are confirmed or suspected planets in TOI dispositions..." ) # noinspection SpellCheckingInspection toi_dispositions = self.load_toi_dispositions_in_project_format() suspected_planet_dispositions = toi_dispositions[ toi_dispositions['disposition'] != 'FP'] suspected_planet_observations = pd.merge(single_sector_observations, suspected_planet_dispositions, how='inner', on=['TIC ID', 'Sector']) observations_not_found = suspected_planet_dispositions.shape[ 0] - suspected_planet_observations.shape[0] print( f"{suspected_planet_observations.shape[0]} observations found that match the TOI dispositions." ) print( f"No observations found for {observations_not_found} entries in TOI dispositions." ) suspected_planet_data_products = tess_data_interface.get_product_list( suspected_planet_observations) suspected_planet_lightcurve_data_products = suspected_planet_data_products[ suspected_planet_data_products['productFilename'].str.endswith( 'lc.fits')] suspected_planet_download_manifest = tess_data_interface.download_products( suspected_planet_lightcurve_data_products, data_directory=self.data_directory) print(f'Moving lightcurves to {self.lightcurve_directory}...') for file_path_string in suspected_planet_download_manifest[ 'Local Path']: file_path = Path(file_path_string) file_path.rename(self.lightcurve_directory.joinpath( file_path.name)) print( "Downloading lightcurves which are not in TOI dispositions and do not have TCEs (not planets)..." ) print( f'Download limited to {number_of_negative_lightcurves_to_download} lightcurves...' ) # noinspection SpellCheckingInspection toi_tic_ids = toi_dispositions['TIC ID'].values not_toi_observations = single_sector_observations[ ~single_sector_observations['TIC ID']. isin(toi_tic_ids) # Don't include even false positives. ] not_toi_observations = not_toi_observations.sample(frac=1, random_state=0) # Shorten product list obtaining. not_toi_observations = not_toi_observations.head( number_of_negative_lightcurves_to_download * 2) not_toi_data_products = tess_data_interface.get_product_list( not_toi_observations) not_toi_data_products = tess_data_interface.add_tic_id_column_to_single_sector_observations( not_toi_data_products) not_toi_lightcurve_data_products = not_toi_data_products[ not_toi_data_products['productFilename'].str.endswith('lc.fits')] not_toi_data_validation_data_products = not_toi_data_products[ not_toi_data_products['productFilename'].str.endswith('dvr.xml')] tic_ids_with_dv = not_toi_data_validation_data_products[ 'TIC ID'].values not_planet_lightcurve_data_products = not_toi_lightcurve_data_products[ ~not_toi_lightcurve_data_products['TIC ID']. isin(tic_ids_with_dv) # Remove any lightcurves with TCEs. ] # Shuffle rows. not_planet_lightcurve_data_products = not_planet_lightcurve_data_products.sample( frac=1, random_state=0) not_planet_download_manifest = tess_data_interface.download_products( not_planet_lightcurve_data_products.head( number_of_negative_lightcurves_to_download), data_directory=self.data_directory) print(f'Moving lightcurves to {self.lightcurve_directory}...') for file_path_string in not_planet_download_manifest['Local Path']: file_path = Path(file_path_string) file_path.rename(self.lightcurve_directory.joinpath( file_path.name)) print('Database ready.')