def obtain_meta_data_frame_for_available_lightcurves(self):
     """
     Prepares the meta data frame with the transit information based on known planet transits.
     """
     dispositions = self.load_toi_dispositions_in_project_format()
     suspected_planet_dispositions = dispositions[
         (dispositions['disposition'] != 'FP')
         & dispositions['transit_epoch'].notna()
         & dispositions['transit_period'].notna()
         & dispositions['transit_duration'].notna()]
     lightcurve_paths = list(self.lightcurve_directory.glob('**/*lc.fits'))
     tess_data_interface = TessDataInterface()
     tic_ids = [
         tess_data_interface.get_tic_id_from_single_sector_obs_id(path.name)
         for path in lightcurve_paths
     ]
     sectors = [
         tess_data_interface.get_sector_from_single_sector_obs_id(path.name)
         for path in lightcurve_paths
     ]
     lightcurve_meta_data = pd.DataFrame({
         'lightcurve_path':
         list(map(str, lightcurve_paths)),
         'TIC ID':
         tic_ids,
         'Sector':
         sectors
     })
     meta_data_frame_with_candidate_nans = pd.merge(
         suspected_planet_dispositions,
         lightcurve_meta_data,
         how='inner',
         on=['TIC ID', 'Sector'])
     self.meta_data_frame = meta_data_frame_with_candidate_nans.dropna()
    def obtain_meta_data_frame_for_available_lightcurves(self):
        """
        Gets the available meta disposition data from Liang Yu's work and combines it with the available lightcurve
        data, throwing out any data that doesn't have its counterpart.

        :return: The meta data frame containing the lightcurve paths and meta data needed to generate labels.
        """
        # noinspection SpellCheckingInspection
        columns_to_use = ['tic_id', 'Disposition', 'Epoc', 'Period', 'Duration', 'Sectors']
        liang_yu_dispositions = pd.read_csv(self.liang_yu_dispositions_path, usecols=columns_to_use)
        # noinspection SpellCheckingInspection
        liang_yu_dispositions.rename(columns={'tic_id': 'TIC ID', 'Disposition': 'disposition', 'Epoc': 'transit_epoch',
                                              'Period': 'transit_period', 'Duration': 'transit_duration',
                                              'Sectors': 'Sector'}, inplace=True)
        liang_yu_dispositions = liang_yu_dispositions[(liang_yu_dispositions['disposition'] != 'PC') |
                                                      (liang_yu_dispositions['transit_epoch'].notna() &
                                                       liang_yu_dispositions['transit_period'].notna() &
                                                       liang_yu_dispositions['transit_duration'].notna())]
        lightcurve_paths = list(self.lightcurve_directory.glob('*lc.fits'))
        tess_data_interface = TessDataInterface()
        tic_ids = [tess_data_interface.get_tic_id_from_single_sector_obs_id(path.name) for path in lightcurve_paths]
        sectors = [tess_data_interface.get_sector_from_single_sector_obs_id(path.name) for path in lightcurve_paths]
        lightcurve_meta_data = pd.DataFrame({'lightcurve_path': list(map(str, lightcurve_paths)), 'TIC ID': tic_ids,
                                             'Sector': sectors})
        meta_data_frame_with_candidate_nans = pd.merge(liang_yu_dispositions, lightcurve_meta_data,
                                                       how='inner', on=['TIC ID', 'Sector'])
        self.meta_data_frame = meta_data_frame_with_candidate_nans.dropna()
Example #3
0
 def __init__(self):
     super().__init__()
     self.data_directory: Path = Path('data/microlensing')
     self.lightcurve_directory: Path = self.data_directory.joinpath(
         'lightcurves')
     self.synthetic_signal_directory: Path = self.data_directory.joinpath(
         'synthetic_signals')
     self.tess_data_interface = TessDataInterface()
     self.time_steps_per_example = 20000
 def test_new_tess_data_interface_sets_astroquery_api_limits(self):
     from astroquery.mast import Observations
     Observations.TIMEOUT = 600
     Observations.PAGESIZE = 50000
     TessDataInterface()
     assert Observations.TIMEOUT == 2000
     assert Observations.PAGESIZE == 3000
Example #5
0
    def general_preprocessing(
            self, example_path_tensor: tf.Tensor) -> (tf.Tensor, tf.Tensor):
        """
        Loads and preprocesses the data.

        :param example_path_tensor: The tensor containing the path to the example to load.
        :return: The example and its corresponding label.
        """
        example_path = example_path_tensor.numpy().decode('utf-8')
        tess_data_interface = TessDataInterface()
        fluxes, times = tess_data_interface.load_fluxes_and_times_from_fits_file(
            example_path)
        fluxes = self.normalize(fluxes)
        time_differences = np.diff(times, prepend=times[0])
        example = np.stack([fluxes, time_differences], axis=-1)
        if self.is_positive(example_path):
            label = self.generate_label(example_path, times)
        else:
            label = np.zeros_like(fluxes)
        return tf.convert_to_tensor(
            example, dtype=tf.float32), tf.convert_to_tensor(label,
                                                             dtype=tf.float32)
 def download_liang_yu_database(self):
     """
     Downloads the database used in `Liang Yu's work <https://arxiv.org/pdf/1904.02726.pdf>`_.
     """
     print('Clearing data directory...')
     self.clear_data_directory()
     print("Downloading Liang Yu's disposition CSV...")
     liang_yu_csv_url = 'https://raw.githubusercontent.com/yuliang419/Astronet-Triage/master/astronet/tces.csv'
     response = requests.get(liang_yu_csv_url)
     with open(self.liang_yu_dispositions_path, 'wb') as csv_file:
         csv_file.write(response.content)
     print('Downloading TESS observation list...')
     tess_data_interface = TessDataInterface()
     tess_observations = tess_data_interface.get_all_tess_time_series_observations()
     single_sector_observations = tess_data_interface.filter_for_single_sector_observations(tess_observations)
     single_sector_observations = tess_data_interface.add_tic_id_column_to_single_sector_observations(
         single_sector_observations)
     single_sector_observations = tess_data_interface.add_sector_column_to_single_sector_observations(
         single_sector_observations)
     print("Downloading lightcurves which appear in Liang Yu's disposition...")
     # noinspection SpellCheckingInspection
     columns_to_use = ['tic_id', 'Disposition', 'Epoc', 'Period', 'Duration', 'Sectors']
     liang_yu_dispositions = pd.read_csv(self.liang_yu_dispositions_path, usecols=columns_to_use)
     liang_yu_observations = pd.merge(single_sector_observations, liang_yu_dispositions, how='inner',
                                      left_on=['TIC ID', 'Sector'], right_on=['tic_id', 'Sectors'])
     number_of_observations_not_found = liang_yu_dispositions.shape[0] - liang_yu_observations.shape[0]
     print(f"{liang_yu_observations.shape[0]} observations found that match Liang Yu's entries.")
     print(f'Liang Yu used the FFIs, not the lightcurve products, so many will be missing.')
     print(f"No observations found for {number_of_observations_not_found} entries in Liang Yu's disposition.")
     liang_yu_data_products = tess_data_interface.get_product_list(liang_yu_observations)
     liang_yu_lightcurve_data_products = liang_yu_data_products[
         liang_yu_data_products['productFilename'].str.endswith('lc.fits')
     ]
     download_manifest = tess_data_interface.download_products(liang_yu_lightcurve_data_products,
                                                               data_directory=self.data_directory)
     print(f'Moving lightcurves to {self.lightcurve_directory}...')
     for file_path_string in download_manifest['Local Path']:
         file_path = Path(file_path_string)
         file_path.rename(self.lightcurve_directory.joinpath(file_path.name))
     print('Database ready.')
Example #7
0
def downloading_lightcurve(tic_id, sector):
    tess_data_interface = TessDataInterface()
    lightcurve_path = tess_data_interface.download_lightcurve(
        tic_id=tic_id, sector=sector, save_directory='lightcurves')
    print('You\'re using: ', lightcurve_path)
    return lightcurve_path
Example #8
0
        database.lightcurve_directory.joinpath(
            'tess2018319095959-s0005-0000000117979897-0125-s_lc.fits'))
]
# Uncomment below to run the inference for all validation files.
# example_paths = pd.read_csv(saved_log_directory.joinpath('validation.csv'), header=None)[0].values

print('Loading model...')
model = ConvolutionalLstm()
model.load_weights(str(saved_log_directory.joinpath('model.ckpt')))

print('Inferring and plotting...')
for example_path in example_paths:
    example, label = database.evaluation_preprocessing(
        tf.convert_to_tensor(example_path))
    prediction = model.predict(tf.expand_dims(example, axis=0))[0]
    tess_data_interface = TessDataInterface()
    fluxes, times = tess_data_interface.load_fluxes_and_times_from_fits_file(
        example_path)
    label, prediction = database.inference_postprocessing(
        label, prediction, times.shape[0])
    tic_id = tess_data_interface.get_tic_id_from_single_sector_obs_id(
        Path(example_path).stem)
    sector = tess_data_interface.get_sector_from_single_sector_obs_id(
        Path(example_path).stem)
    plot_title = f'TIC {tic_id} sector {sector}'
    plot_lightcurve(times,
                    fluxes,
                    label,
                    prediction,
                    title=plot_title,
                    save_path=f'{plot_title}.png')
Example #9
0
class TessSyntheticInjectedDatabase(LightcurveDatabase):
    """
    A class to represent the database for injecting synthetic signals into real TESS data.
    """
    def __init__(self):
        super().__init__()
        self.data_directory: Path = Path('data/microlensing')
        self.lightcurve_directory: Path = self.data_directory.joinpath(
            'lightcurves')
        self.synthetic_signal_directory: Path = self.data_directory.joinpath(
            'synthetic_signals')
        self.tess_data_interface = TessDataInterface()
        self.time_steps_per_example = 20000

    def generate_datasets(self):
        all_lightcurve_paths = list(self.lightcurve_directory.glob('*.fits'))
        all_synthetic_paths = list(
            map(str, self.synthetic_signal_directory.glob('*.feather')))
        synthetic_signal_paths_dataset = tf.data.Dataset.from_tensor_slices(
            all_synthetic_paths)
        lightcurve_paths_datasets = self.get_training_and_validation_datasets_for_file_paths(
            all_lightcurve_paths)
        training_lightcurve_paths_dataset, validation_lightcurve_paths_dataset = lightcurve_paths_datasets
        shuffled_training_lightcurve_paths_dataset = training_lightcurve_paths_dataset.shuffle(
            buffer_size=len(list(training_lightcurve_paths_dataset)))
        shuffled_synthetic_signal_paths_dataset = synthetic_signal_paths_dataset.shuffle(
            buffer_size=len(list(synthetic_signal_paths_dataset))).repeat()
        zipped_training_paths_dataset = tf.data.Dataset.zip(
            (shuffled_training_lightcurve_paths_dataset,
             shuffled_synthetic_signal_paths_dataset))
        output_types = (tf.float32, tf.float32)
        output_shapes = [(self.time_steps_per_example, 1), (1, )]
        lightcurve_training_dataset = map_py_function_to_dataset(
            zipped_training_paths_dataset,
            self.train_and_validation_preprocessing,
            self.number_of_parallel_processes_per_map,
            output_types=output_types,
            output_shapes=output_shapes,
            flat_map=True)
        batched_training_dataset = lightcurve_training_dataset.batch(
            self.batch_size)
        prefetch_training_dataset = batched_training_dataset.prefetch(
            tf.data.experimental.AUTOTUNE)
        shuffled_validation_lightcurve_paths_dataset = validation_lightcurve_paths_dataset.shuffle(
            buffer_size=len(list(validation_lightcurve_paths_dataset)))
        zipped_validation_paths_dataset = tf.data.Dataset.zip(
            (shuffled_validation_lightcurve_paths_dataset,
             shuffled_synthetic_signal_paths_dataset))
        lightcurve_validation_dataset = map_py_function_to_dataset(
            zipped_validation_paths_dataset,
            self.train_and_validation_preprocessing,
            self.number_of_parallel_processes_per_map,
            output_types=output_types,
            output_shapes=output_shapes,
            flat_map=True)
        batched_validation_dataset = lightcurve_validation_dataset.batch(
            self.batch_size)
        prefetch_validation_dataset = batched_validation_dataset.prefetch(
            tf.data.experimental.AUTOTUNE)
        return prefetch_training_dataset, prefetch_validation_dataset

    def train_and_validation_preprocessing(
        self,
        lightcurve_path_tensor: tf.Tensor,
        synthetic_signal_path_tensor: tf.Tensor,
    ) -> (np.ndarray, np.ndarray, np.ndarray, np.ndarray):
        """
        The training and validation preprocessing.

        :param lightcurve_path_tensor: The lightcurve's path to be preprocessed.
        :param synthetic_signal_path_tensor: The synthetic signal's path to be injected.
        :return: Two examples, one negative un-injected signal and one positive injected signal (paired as a tuple),
                 and the corresponding labels (paired as a tuple). Expected to have a post flat mapping to make each
                 element of the data be an individual example and label pair.
        """
        lightcurve_path = lightcurve_path_tensor.numpy().decode('utf-8')
        synthetic_signal_path = synthetic_signal_path_tensor.numpy().decode(
            'utf-8')
        fluxes, times = self.load_fluxes_and_times_from_lightcurve_path(
            lightcurve_path)
        synthetic_magnifications, synthetic_times = self.load_magnifications_and_times_from_synthetic_signal_path(
            synthetic_signal_path)
        fluxes_with_injected_signal = self.inject_signal_into_lightcurve(
            fluxes, times, synthetic_magnifications, synthetic_times)
        fluxes = self.flux_preprocessing(fluxes)
        fluxes_with_injected_signal = self.flux_preprocessing(
            fluxes_with_injected_signal)
        lightcurve = np.expand_dims(fluxes, axis=-1)
        lightcurve_with_injected_signal = np.expand_dims(
            fluxes_with_injected_signal, axis=-1)
        examples = (lightcurve, lightcurve_with_injected_signal)
        labels = (np.array([0]), np.array([1]))
        return examples, labels

    def load_fluxes_and_times_from_lightcurve_path(self, lightcurve_path):
        fluxes, times = self.tess_data_interface.load_fluxes_and_times_from_fits_file(
            lightcurve_path)
        return fluxes, times

    def load_magnifications_and_times_from_synthetic_signal_path(
            self, synthetic_signal_path):
        synthetic_signal = pd.read_feather(synthetic_signal_path)
        synthetic_magnifications, synthetic_times = synthetic_signal[
            'Magnification'], synthetic_signal['Time (hours)']
        synthetic_times = synthetic_times / 24  # Convert hours to days.
        return synthetic_magnifications, synthetic_times

    def flux_preprocessing(self,
                           fluxes: np.ndarray,
                           evaluation_mode=False) -> np.ndarray:
        """
        Preprocessing for the flux.

        :param fluxes: The flux array to preprocess.
        :param evaluation_mode: If the preprocessing should be consistent for evaluation.
        :return: The preprocessed flux array.
        """
        normalized_fluxes = self.normalize(fluxes)
        uniform_length_fluxes = self.make_uniform_length(
            normalized_fluxes,
            self.time_steps_per_example,
            randomize=not evaluation_mode)
        return uniform_length_fluxes

    @staticmethod
    def inject_signal_into_lightcurve(lightcurve_fluxes: np.ndarray,
                                      lightcurve_times: np.ndarray,
                                      signal_magnifications: np.ndarray,
                                      signal_times: np.ndarray):
        """
        Injects a synthetic magnification signal into real lightcurve fluxes.

        :param lightcurve_fluxes: The fluxes of the lightcurve to be injected into.
        :param lightcurve_times: The times of the flux observations of the lightcurve.
        :param signal_magnifications: The synthetic magnifications to inject.
        :param signal_times: The times of the synthetic magnifications.
        :return: The fluxes with the injected signal.
        """
        median_flux = np.median(lightcurve_fluxes)
        signal_fluxes = (signal_magnifications * median_flux) - median_flux
        signal_flux_interpolator = interp1d(signal_times,
                                            signal_fluxes,
                                            bounds_error=True)
        lightcurve_relative_times = lightcurve_times - np.min(lightcurve_times)
        interpolated_signal_fluxes = signal_flux_interpolator(
            lightcurve_relative_times)
        fluxes_with_injected_signal = lightcurve_fluxes + interpolated_signal_fluxes
        return fluxes_with_injected_signal
 def tess_data_interface(self) -> TessDataInterface:
     return TessDataInterface()
 def download_exofop_toi_database(
         self, number_of_negative_lightcurves_to_download=10000):
     """
     Downloads the `ExoFOP database <https://exofop.ipac.caltech.edu/tess/view_toi.php>`_.
     """
     print('Clearing data directory...')
     self.clear_data_directory()
     print("Downloading ExoFOP TOI disposition CSV...")
     toi_csv_url = 'https://exofop.ipac.caltech.edu/tess/download_toi.php?sort=toi&output=csv'
     response = requests.get(toi_csv_url)
     with open(self.toi_dispositions_path, 'wb') as csv_file:
         csv_file.write(response.content)
     print('Downloading TESS observation list...')
     tess_data_interface = TessDataInterface()
     tess_observations = tess_data_interface.get_all_tess_time_series_observations(
     )
     single_sector_observations = tess_data_interface.filter_for_single_sector_observations(
         tess_observations)
     single_sector_observations = tess_data_interface.add_tic_id_column_to_single_sector_observations(
         single_sector_observations)
     single_sector_observations = tess_data_interface.add_sector_column_to_single_sector_observations(
         single_sector_observations)
     print(
         "Downloading lightcurves which are confirmed or suspected planets in TOI dispositions..."
     )
     # noinspection SpellCheckingInspection
     toi_dispositions = self.load_toi_dispositions_in_project_format()
     suspected_planet_dispositions = toi_dispositions[
         toi_dispositions['disposition'] != 'FP']
     suspected_planet_observations = pd.merge(single_sector_observations,
                                              suspected_planet_dispositions,
                                              how='inner',
                                              on=['TIC ID', 'Sector'])
     observations_not_found = suspected_planet_dispositions.shape[
         0] - suspected_planet_observations.shape[0]
     print(
         f"{suspected_planet_observations.shape[0]} observations found that match the TOI dispositions."
     )
     print(
         f"No observations found for {observations_not_found} entries in TOI dispositions."
     )
     suspected_planet_data_products = tess_data_interface.get_product_list(
         suspected_planet_observations)
     suspected_planet_lightcurve_data_products = suspected_planet_data_products[
         suspected_planet_data_products['productFilename'].str.endswith(
             'lc.fits')]
     suspected_planet_download_manifest = tess_data_interface.download_products(
         suspected_planet_lightcurve_data_products,
         data_directory=self.data_directory)
     print(f'Moving lightcurves to {self.lightcurve_directory}...')
     for file_path_string in suspected_planet_download_manifest[
             'Local Path']:
         file_path = Path(file_path_string)
         file_path.rename(self.lightcurve_directory.joinpath(
             file_path.name))
     print(
         "Downloading lightcurves which are not in TOI dispositions and do not have TCEs (not planets)..."
     )
     print(
         f'Download limited to {number_of_negative_lightcurves_to_download} lightcurves...'
     )
     # noinspection SpellCheckingInspection
     toi_tic_ids = toi_dispositions['TIC ID'].values
     not_toi_observations = single_sector_observations[
         ~single_sector_observations['TIC ID'].
         isin(toi_tic_ids)  # Don't include even false positives.
     ]
     not_toi_observations = not_toi_observations.sample(frac=1,
                                                        random_state=0)
     # Shorten product list obtaining.
     not_toi_observations = not_toi_observations.head(
         number_of_negative_lightcurves_to_download * 2)
     not_toi_data_products = tess_data_interface.get_product_list(
         not_toi_observations)
     not_toi_data_products = tess_data_interface.add_tic_id_column_to_single_sector_observations(
         not_toi_data_products)
     not_toi_lightcurve_data_products = not_toi_data_products[
         not_toi_data_products['productFilename'].str.endswith('lc.fits')]
     not_toi_data_validation_data_products = not_toi_data_products[
         not_toi_data_products['productFilename'].str.endswith('dvr.xml')]
     tic_ids_with_dv = not_toi_data_validation_data_products[
         'TIC ID'].values
     not_planet_lightcurve_data_products = not_toi_lightcurve_data_products[
         ~not_toi_lightcurve_data_products['TIC ID'].
         isin(tic_ids_with_dv)  # Remove any lightcurves with TCEs.
     ]
     # Shuffle rows.
     not_planet_lightcurve_data_products = not_planet_lightcurve_data_products.sample(
         frac=1, random_state=0)
     not_planet_download_manifest = tess_data_interface.download_products(
         not_planet_lightcurve_data_products.head(
             number_of_negative_lightcurves_to_download),
         data_directory=self.data_directory)
     print(f'Moving lightcurves to {self.lightcurve_directory}...')
     for file_path_string in not_planet_download_manifest['Local Path']:
         file_path = Path(file_path_string)
         file_path.rename(self.lightcurve_directory.joinpath(
             file_path.name))
     print('Database ready.')