Esempio n. 1
0
 def __init__(self, tic_id, sectors=None):
     self.title = f'TIC {tic_id}'
     tess_toi_data_interface = TessToiDataInterface()
     self.tess_data_interface = TessDataInterface()
     dispositions_data_frame = tess_toi_data_interface.retrieve_exofop_toi_and_ctoi_planet_disposition_for_tic_id(
         tic_id)
     if dispositions_data_frame.shape[0] == 0:
         print('No known ExoFOP dispositions found.')
     # Use context options to not truncate printed data.
     else:
         with pd.option_context('display.max_rows', None,
                                'display.max_columns', None,
                                'display.width', None):
             print(dispositions_data_frame)
     if sectors is None:
         sectors = self.tess_data_interface.get_sectors_target_appears_in(
             tic_id)
     if isinstance(sectors, int):
         sectors = [sectors]
     self.sectors = sectors
     light_curve = self.stitch_fluxes_flux_errors_and_times_for_target_from_mast(
         tic_id, sectors)
     relative_flux_errors, relative_fluxes, times = light_curve
     self.tic_id = tic_id
     self.times = times
     self.relative_fluxes = relative_fluxes
     self.relative_flux_errors = relative_flux_errors
     tic_row = self.tess_data_interface.get_tess_input_catalog_row(tic_id)
     self.star_radius = tic_row['rad']
     print(self.star_radius)
     self.period = None
     self.depth = None
     self.transit_epoch = None
Esempio n. 2
0
 def obtain_meta_data_frame_for_available_lightcurves(self):
     """
     Prepares the meta data frame with the transit information based on known planet transits.
     """
     dispositions = self.load_toi_dispositions_in_project_format()
     suspected_planet_dispositions = dispositions[
         (dispositions['disposition'] != 'FP')
         & dispositions['transit_epoch'].notna()
         & dispositions['transit_period'].notna()
         & dispositions['transit_duration'].notna()]
     lightcurve_paths = list(self.lightcurve_directory.glob('**/*lc.fits'))
     tess_data_interface = TessDataInterface()
     tic_ids = [
         tess_data_interface.get_tic_id_from_single_sector_obs_id(path.name)
         for path in lightcurve_paths
     ]
     sectors = [
         tess_data_interface.get_sector_from_single_sector_obs_id(path.name)
         for path in lightcurve_paths
     ]
     lightcurve_meta_data = pd.DataFrame({
         'lightcurve_path':
         list(map(str, lightcurve_paths)),
         'TIC ID':
         tic_ids,
         'Sector':
         sectors
     })
     meta_data_frame_with_candidate_nans = pd.merge(
         suspected_planet_dispositions,
         lightcurve_meta_data,
         how='inner',
         on=['TIC ID', 'Sector'])
     self.meta_data_frame = meta_data_frame_with_candidate_nans.dropna()
Esempio n. 3
0
def download():
    """
    Downloads and prepares the data needed for the GPU experiments.
    """
    tess_data_interface = TessDataInterface()
    tess_data_interface.download_two_minute_cadence_light_curves(
        Path('data/tess_two_minute_cadence_light_curves'))
    build_tables()
 def __init__(self, data_directory='data/self_lensing_binaries'):
     super().__init__(data_directory=data_directory)
     self.lightcurve_directory: Path = self.data_directory.joinpath(
         'lightcurves')
     self.synthetic_signal_directory: Path = self.data_directory.joinpath(
         'synthetic_signals')
     self.tess_data_interface = TessDataInterface()
     self.time_steps_per_example = 20000
     self.shuffle_buffer_size = 10000
     self.allow_out_of_bounds_injection = False
Esempio n. 5
0
def download():
    """
    Downloads and prepares the data needed for the quick start tutorial.
    """
    tess_data_interface = TessDataInterface()
    tess_data_interface.download_two_minute_cadence_light_curves(
        Path('data/tess_two_minute_cadence_light_curves'), limit=10000)
    tess_toi_data_interface = TessToiDataInterface()
    tess_toi_data_interface.download_exofop_toi_light_curves_to_directory(
        Path('data/tess_two_minute_cadence_light_curves'))
    build_tables()
Esempio n. 6
0
class TessTwoMinuteCadenceLightCurveCollection(SqlMetadataLightCurveCollection):
    """
    A light curve collection of the TESS two minute cadence data.
    """
    tess_data_interface = TessDataInterface()
    tess_two_minute_cadence_light_curve_metadata_manger = TessTwoMinuteCadenceLightCurveMetadataManger()

    def __init__(self, dataset_splits: Union[List[int], None] = None, flux_type: TessFluxType = TessFluxType.PDCSAP):
        super().__init__()
        self.data_directory: Path = Path('data/tess_two_minute_cadence_light_curves')
        self.label = 0
        self.dataset_splits: Union[List[int], None] = dataset_splits
        self.flux_type: TessFluxType = flux_type

    def get_sql_query(self) -> Select:
        """
        Gets the SQL query for the database models for the light curve collection.

        :return: The SQL query.
        """
        query = TessTwoMinuteCadenceLightCurveMetadata().select(TessTwoMinuteCadenceLightCurveMetadata.path)
        if self.dataset_splits is not None:
            query = query.where(TessTwoMinuteCadenceLightCurveMetadata.dataset_split.in_(self.dataset_splits))
        return query

    def get_path_from_model(self, model: MetadatabaseModel) -> Path:
        """
        Gets the light curve path from the SQL database model.

        :return: The path to the light curve.
        """
        return Path(self.tess_two_minute_cadence_light_curve_metadata_manger.
                    light_curve_root_directory_path.joinpath(model.path))

    def load_times_and_fluxes_from_path(self, path: Path) -> (np.ndarray, np.ndarray):
        """
        Loads the times and fluxes from a given light curve path.

        :param path: The path to the light curve file.
        :return: The times and the fluxes of the light curve.
        """
        fluxes, times = self.tess_data_interface.load_fluxes_and_times_from_fits_file(path, self.flux_type)
        return times, fluxes

    def load_times_and_magnifications_from_path(self, path: Path) -> (np.ndarray, np.ndarray):
        """
        Loads the times and magnifications from a given path as an injectable signal.

        :param path: The path to the light curve/signal file.
        :return: The times and the magnifications of the light curve/signal.
        """
        fluxes, times = self.tess_data_interface.load_fluxes_and_times_from_fits_file(path, self.flux_type)
        magnifications, times = self.generate_synthetic_signal_from_real_data(fluxes, times)
        return times, magnifications

    def download(self):
        """
        Downloads the light curve collection.
        """
        self.tess_data_interface.download_two_minute_cadence_light_curves(self.data_directory)
 def test_new_tess_data_interface_sets_astroquery_api_limits(self):
     from astroquery.mast import Observations
     Observations.TIMEOUT = 600
     Observations.PAGESIZE = 50000
     TessDataInterface()
     assert Observations.TIMEOUT == 2000
     assert Observations.PAGESIZE == 3000
Esempio n. 8
0
class ViewEntity:
    """A class grouping the objects related to one entity for the viewer."""
    tess_data_interface = TessDataInterface()
    tess_toi_data_interface = TessToiDataInterface()
    vetter = TransitVetter()

    def __init__(self):
        self.index: Union[int, None] = None
        self.confidence: Union[float, None] = None
        self.light_curve: Union[TessLightCurve, None] = None
        self.target: Union[TessTarget, None] = None
        self.has_exofop_dispositions: Union[bool, None] = None
        self.has_nearby_toi: Union[bool, None] = None

    @classmethod
    async def from_identifier_data_frame_row(cls, identifier_data_frame_row):
        """
        Creates the view entity based on a data frame row from an infer output.

        :param identifier_data_frame_row: The row of the data frame that should be used to prepare the view entity.
        :return: The view entity.
        """
        view_entity = cls()
        loop = asyncio.get_running_loop()
        light_curve_path_string = identifier_data_frame_row['light_curve_path']
        light_curve_path = Path(light_curve_path_string)
        load_light_curve_task = loop.run_in_executor(None, view_entity.load_light_curve_from_identifier,
                                                     light_curve_path)
        tic_id, sector = TessFfiLightCurve.get_tic_id_and_sector_from_file_path(light_curve_path)
        load_has_dispositions_task = loop.run_in_executor(
            None, view_entity.tess_toi_data_interface.has_any_exofop_dispositions_for_tic_id, tic_id
        )
        tic_id_only_target = TessTarget()  # Create a stand-in target to use for other parallel preloading.
        tic_id_only_target.tic_id = tic_id
        load_nearby_toi_task = loop.run_in_executor(
            None, view_entity.vetter.has_nearby_toi_targets, tic_id_only_target
        )
        load_target_task = loop.run_in_executor(None, TessTarget.from_tic_id, tic_id)
        light_curve, target, has_dispositions, has_nearby_toi = await asyncio.gather(
            load_light_curve_task, load_target_task, load_has_dispositions_task, load_nearby_toi_task)
        view_entity.has_nearby_toi = has_nearby_toi
        view_entity.has_exofop_dispositions = has_dispositions
        view_entity.index = identifier_data_frame_row['index']
        view_entity.confidence = identifier_data_frame_row['confidence']
        view_entity.light_curve = light_curve
        view_entity.target = target
        return view_entity

    @staticmethod
    def load_light_curve_from_identifier(identifier: Any) -> TessLightCurve:
        """
        Loads a light curve from a generic identifier.

        :param identifier: The identifier of the light curve.
        :return: The light curve.
        """
        light_curve = TessFfiLightCurve.from_path(identifier)
        light_curve.convert_to_relative_scale()
        return light_curve
class TessTwoMinuteCadenceLightCurveMetadataManger:
    """
    A class for managing the meta data of the two minute cadence TESS light curves.
    """
    tess_data_interface = TessDataInterface()

    def __init__(self):
        self.light_curve_root_directory_path = Path('data/tess_two_minute_cadence_light_curves')

    def insert_multiple_rows_from_paths_into_database(self, light_curve_paths: List[Path]):
        """
        Inserts sets of light curve paths into the table.

        :param light_curve_paths: The list of paths to insert.
        """
        row_dictionary_list = []
        table_name = convert_class_to_table_name(TessTwoMinuteCadenceLightCurveMetadata)
        for light_curve_path in light_curve_paths:
            tic_id, sector = self.tess_data_interface.get_tic_id_and_sector_from_file_path(light_curve_path)
            uuid_name = f'{table_name} TIC {tic_id} sector {sector}'
            uuid = metadatabase_uuid(uuid_name)
            dataset_split = dataset_split_from_uuid(uuid)
            row_dictionary_list.append({TessTwoMinuteCadenceLightCurveMetadata.path.name: str(light_curve_path),
                                        TessTwoMinuteCadenceLightCurveMetadata.tic_id.name: tic_id,
                                        TessTwoMinuteCadenceLightCurveMetadata.sector.name: sector,
                                        TessTwoMinuteCadenceLightCurveMetadata.dataset_split.name: dataset_split})
        with metadatabase.atomic():
            TessTwoMinuteCadenceLightCurveMetadata.insert_many(row_dictionary_list).execute()

    def populate_sql_database(self):
        """
        Populates the SQL database based on the light curve files.
        """
        print('Populating the TESS two minute cadence light curve meta data table...')
        path_glob = self.light_curve_root_directory_path.glob('**/*.fits')
        row_count = 0
        batch_paths = []
        with metadatabase.atomic():
            for index, path in enumerate(path_glob):
                batch_paths.append(path.relative_to(self.light_curve_root_directory_path))
                row_count += 1
                if index % 1000 == 0 and index != 0:
                    self.insert_multiple_rows_from_paths_into_database(batch_paths)
                    batch_paths = []
                    print(f'{index} rows inserted...', end='\r')
            if len(batch_paths) > 0:
                self.insert_multiple_rows_from_paths_into_database(batch_paths)
        print(f'TESS two minute cadence light curve meta data table populated. {row_count} rows added.')

    def build_table(self):
        """
        Builds the SQL table.
        """
        TessTwoMinuteCadenceLightCurveMetadata.drop_table()
        TessTwoMinuteCadenceLightCurveMetadata.create_table()
        SchemaManager(TessTwoMinuteCadenceLightCurveMetadata).drop_indexes()  # To allow for fast insert.
        self.populate_sql_database()
        print('Building indexes...')
        SchemaManager(TessTwoMinuteCadenceLightCurveMetadata).create_indexes()  # Since we dropped them before.
    def general_preprocessing(
            self, example_path_tensor: tf.Tensor) -> (tf.Tensor, tf.Tensor):
        """
        Loads and preprocesses the data.

        :param example_path_tensor: The tensor containing the path to the example to load.
        :return: The example and its corresponding label.
        """
        example_path = example_path_tensor.numpy().decode('utf-8')
        tess_data_interface = TessDataInterface()
        fluxes, times = tess_data_interface.load_fluxes_and_times_from_fits_file(
            example_path)
        fluxes = self.normalize(fluxes)
        time_differences = np.diff(times, prepend=times[0])
        example = np.stack([fluxes, time_differences], axis=-1)
        if self.is_positive(example_path):
            label = self.generate_label(example_path, times)
        else:
            label = np.zeros_like(fluxes)
        return tf.convert_to_tensor(
            example, dtype=tf.float32), tf.convert_to_tensor(label,
                                                             dtype=tf.float32)
Esempio n. 11
0
    def download_exofop_toi_light_curves_to_directory(self, directory: Path):
        """
        Downloads the `ExoFOP database <https://exofop.ipac.caltech.edu/tess/view_toi.php>`_ light curve files to the
        given directory.

        :param directory: The directory to download the light curves to. Defaults to the data interface directory.
        """
        print("Downloading ExoFOP TOI disposition CSV...")
        tess_data_interface = TessDataInterface()
        if isinstance(directory, str):
            directory = Path(directory)
        tic_ids = self.toi_dispositions[ToiColumns.tic_id.value].unique()
        print('Downloading TESS observation list...')
        single_sector_observations = tess_data_interface.get_all_two_minute_single_sector_observations(
            tic_ids)
        print(
            "Downloading light curves which are confirmed or suspected planets in TOI dispositions..."
        )
        suspected_planet_dispositions = self.toi_dispositions[
            self.toi_dispositions[ToiColumns.disposition.value] != 'FP']
        suspected_planet_observations = pd.merge(
            single_sector_observations,
            suspected_planet_dispositions,
            how='inner',
            on=[ToiColumns.tic_id.value, ToiColumns.sector.value])
        suspected_planet_data_products = tess_data_interface.get_product_list(
            suspected_planet_observations)
        suspected_planet_light_curve_data_products = suspected_planet_data_products[
            suspected_planet_data_products['productFilename'].str.endswith(
                'lc.fits')]
        suspected_planet_download_manifest = tess_data_interface.download_products(
            suspected_planet_light_curve_data_products,
            data_directory=self.data_directory)
        print(f'Verifying and moving light curves to {directory}...')
        directory.mkdir(parents=True, exist_ok=True)
        for row_index, row in suspected_planet_download_manifest.iterrows():
            if row['Status'] == 'COMPLETE':
                file_path = Path(row['Local Path'])
                file_path.rename(directory.joinpath(file_path.name))
Esempio n. 12
0
 def download_exofop_toi_lightcurves_to_synthetic_directory(self):
     """
     Downloads the `ExoFOP database <https://exofop.ipac.caltech.edu/tess/view_toi.php>`_ lightcurve files to the
     synthetic directory.
     """
     print("Downloading ExoFOP TOI disposition CSV...")
     self.create_data_directories()
     toi_csv_url = 'https://exofop.ipac.caltech.edu/tess/download_toi.php?sort=toi&output=csv'
     response = requests.get(toi_csv_url)
     with self.toi_dispositions_path.open('wb') as csv_file:
         csv_file.write(response.content)
     toi_dispositions = self.tess_toi_data_interface.dispositions
     tic_ids = toi_dispositions[ToiColumns.tic_id.value].unique()
     print('Downloading TESS obdservation list...')
     tess_data_interface = TessDataInterface()
     tess_observations = tess_data_interface.get_all_tess_time_series_observations(tic_id=tic_ids)
     single_sector_observations = tess_data_interface.filter_for_single_sector_observations(tess_observations)
     single_sector_observations = tess_data_interface.add_tic_id_column_to_single_sector_observations(
         single_sector_observations)
     single_sector_observations = tess_data_interface.add_sector_column_to_single_sector_observations(
         single_sector_observations)
     print("Downloading lightcurves which are confirmed or suspected planets in TOI dispositions...")
     suspected_planet_dispositions = toi_dispositions[toi_dispositions[ToiColumns.disposition.value] != 'FP']
     suspected_planet_observations = pd.merge(single_sector_observations, suspected_planet_dispositions, how='inner',
                                              on=[ToiColumns.tic_id.value, ToiColumns.sector.value])
     observations_not_found = suspected_planet_dispositions.shape[0] - suspected_planet_observations.shape[0]
     print(f"{suspected_planet_observations.shape[0]} observations found that match the TOI dispositions.")
     print(f"No observations found for {observations_not_found} entries in TOI dispositions.")
     suspected_planet_data_products = tess_data_interface.get_product_list(suspected_planet_observations)
     suspected_planet_lightcurve_data_products = suspected_planet_data_products[
         suspected_planet_data_products['productFilename'].str.endswith('lc.fits')
     ]
     suspected_planet_download_manifest = tess_data_interface.download_products(
         suspected_planet_lightcurve_data_products, data_directory=self.data_directory)
     print(f'Moving lightcurves to {self.synthetic_signal_directory}...')
     for file_path_string in suspected_planet_download_manifest['Local Path']:
         file_path = Path(file_path_string)
         file_path.rename(self.synthetic_signal_directory.joinpath(file_path.name))
Esempio n. 13
0
 def download_catalog_eclipsing_binaries(self):
     """
     Downloads the eclipsing binaries listed in Brian Powell's catalog to the synthetic signals directory.
     """
     catalog = pd.read_csv(self.catalog_csv_path)
     catalog = catalog[catalog['2min'] == 1]
     tess_data_interface = TessDataInterface()
     tess_observations = tess_data_interface.get_all_tess_time_series_observations(tic_id=catalog['ID'])
     single_sector_observations = tess_data_interface.filter_for_single_sector_observations(tess_observations)
     single_sector_observations = tess_data_interface.add_tic_id_column_to_single_sector_observations(
         single_sector_observations)
     single_sector_observations = tess_data_interface.add_sector_column_to_single_sector_observations(
         single_sector_observations)
     single_sector_data_products = tess_data_interface.get_product_list(single_sector_observations)
     data_products = single_sector_data_products[
         single_sector_data_products['productFilename'].str.endswith('lc.fits')
     ]
     download_manifest = self.tess_data_interface.download_products(
         data_products, data_directory=self.data_directory)
     print(f'Moving lightcurves to {self.synthetic_signal_directory}...')
     self.synthetic_signal_directory.mkdir(parents=True, exist_ok=True)
     for file_path_string in download_manifest['Local Path']:
         file_path = Path(file_path_string)
         file_path.rename(self.synthetic_signal_directory.joinpath(file_path.name))
Esempio n. 14
0
 def download_exofop_toi_database(
         self, number_of_negative_lightcurves_to_download=10000):
     """
     Downloads the `ExoFOP database <https://exofop.ipac.caltech.edu/tess/view_toi.php>`_.
     """
     # print('Clearing data directory...')
     # self.clear_data_directory()
     print("Downloading ExoFOP TOI disposition CSV...")
     toi_csv_url = 'https://exofop.ipac.caltech.edu/tess/download_toi.php?sort=toi&output=csv'
     response = requests.get(toi_csv_url)
     with self.toi_dispositions_path.open('wb') as csv_file:
         csv_file.write(response.content)
     print('Downloading TESS observation list...')
     tess_data_interface = TessDataInterface()
     tess_observations = tess_data_interface.get_all_tess_time_series_observations(
     )
     single_sector_observations = tess_data_interface.filter_for_single_sector_observations(
         tess_observations)
     single_sector_observations = tess_data_interface.add_tic_id_column_to_single_sector_observations(
         single_sector_observations)
     single_sector_observations = tess_data_interface.add_sector_column_to_single_sector_observations(
         single_sector_observations)
     print(
         "Downloading lightcurves which are confirmed or suspected planets in TOI dispositions..."
     )
     tess_toi_data_interface = TessToiDataInterface()
     toi_database.tess_data_interface.download_exofop_toi_lightcurves_to_directory(
         toi_database.synthetic_signal_directory)
     toi_dispositions = tess_toi_data_interface.load_toi_dispositions_in_project_format(
     )
     print(
         "Downloading lightcurves which are not in TOI dispositions and do not have TCEs (not planets)..."
     )
     print(
         f'Download limited to {number_of_negative_lightcurves_to_download} lightcurves...'
     )
     # noinspection SpellCheckingInspection
     toi_tic_ids = toi_dispositions['TIC ID'].values
     not_toi_observations = single_sector_observations[
         ~single_sector_observations['TIC ID'].
         isin(toi_tic_ids)  # Don't include even false positives.
     ]
     not_toi_observations = not_toi_observations.sample(frac=1,
                                                        random_state=0)
     # Shorten product list obtaining.
     not_toi_observations = not_toi_observations.head(
         number_of_negative_lightcurves_to_download * 2)
     not_toi_data_products = tess_data_interface.get_product_list(
         not_toi_observations)
     not_toi_data_products = tess_data_interface.add_tic_id_column_to_single_sector_observations(
         not_toi_data_products)
     not_toi_lightcurve_data_products = not_toi_data_products[
         not_toi_data_products['productFilename'].str.endswith('lc.fits')]
     not_toi_data_validation_data_products = not_toi_data_products[
         not_toi_data_products['productFilename'].str.endswith('dvr.xml')]
     tic_ids_with_dv = not_toi_data_validation_data_products[
         'TIC ID'].values
     not_planet_lightcurve_data_products = not_toi_lightcurve_data_products[
         ~not_toi_lightcurve_data_products['TIC ID'].
         isin(tic_ids_with_dv)  # Remove any lightcurves with TCEs.
     ]
     # Shuffle rows.
     not_planet_lightcurve_data_products = not_planet_lightcurve_data_products.sample(
         frac=1, random_state=0)
     not_planet_download_manifest = tess_data_interface.download_products(
         not_planet_lightcurve_data_products.head(
             number_of_negative_lightcurves_to_download),
         data_directory=self.data_directory)
     print(
         f'Verifying and moving lightcurves to {self.lightcurve_directory}...'
     )
     self.lightcurve_directory.mkdir(parents=True, exist_ok=True)
     for file_path_string in not_planet_download_manifest['Local Path']:
         file_path = Path(file_path_string)
         lightcurve_path = self.lightcurve_directory.joinpath(
             file_path.name)
         try:
             file_path.rename(lightcurve_path)
             hdu_list = fits.open(str(lightcurve_path))
             lightcurve = hdu_list[1].data
             _ = lightcurve['TIME'][0]
         except (OSError, TypeError):
             print(
                 f'{file_path} seems to be corrupt. Re-downloading and replacing.'
             )
             sector = tess_data_interface.get_sector_from_single_sector_obs_id(
                 str(lightcurve_path.stem))
             tic_id = tess_data_interface.get_tic_id_from_single_sector_obs_id(
                 str(lightcurve_path.stem))
             tess_data_interface.download_lightcurve(
                 tic_id, sector, save_directory=lightcurve_path.parent)
     print('Database ready.')
Esempio n. 15
0
class TessFfiLightCurve(TessLightCurve):
    """
    A class to for a class to represent a TESS FFI light curve.
    """
    mast_tess_data_interface = TessDataInterface()

    def __init__(self):
        super().__init__()
        self.flux_column_names = [
            TessFfiColumnName.CORRECTED_FLUX.value,
            TessFfiColumnName.RAW_FLUX.value
        ]

    @classmethod
    def from_path(
        cls,
        path: Path,
        column_names_to_load: Union[List[TessFfiColumnName], None] = None
    ) -> TessFfiLightCurve:
        """
        Creates an FFI TESS light curve from a path to one of Brian Powell's pickle files.

        :param path: The path to the pickle file to load.
        :param column_names_to_load: The FFI light curve columns to load from the pickle file. By default, all will be
                                     loaded. Selecting specific ones may speed the process when loading many light
                                     curves.
        :return: The light curve.
        """
        light_curve = cls()
        light_curve.time_column_name = TessFfiColumnName.TIME__BTJD.value
        if column_names_to_load is None:
            column_names_to_load = list(TessFfiColumnName)
        with path.open('rb') as pickle_file:
            light_curve_data_dictionary = pickle.load(pickle_file)
            for column_name in column_names_to_load:
                pickle_index = TessFfiPickleIndex[column_name.name]
                light_curve.data_frame[
                    column_name.value] = light_curve_data_dictionary[
                        pickle_index.value]
        light_curve.tic_id, light_curve.sector = light_curve.get_tic_id_and_sector_from_file_path(
            path)
        return light_curve

    @staticmethod
    def get_tic_id_and_sector_from_file_path(
            path: Union[Path, str]) -> (int, Union[int, None]):
        """
        Gets the TIC ID and sector from commonly encountered file name patterns.

        :param path: The path of the file to extract the TIC ID and sector.
        :return: The TIC ID and sector. The sector might be omitted (as None).
        """
        if isinstance(path, Path):
            path = str(path)
        # Search for Brian Powell's FFI path convention with directory structure sector, magnitude, target.
        # E.g., "tesslcs_sector_12/tesslcs_tmag_1_2/tesslc_290374453"
        match = re.search(
            r'tesslcs_sector_(\d+)(?:_104)?/tesslcs_tmag_\d+_\d+/tesslc_(\d+)',
            path)
        if match:
            return int(match.group(2)), int(match.group(1))
        # Search for Brian Powell's FFI path convention with only the file name containing the target.
        # E.g., "tesslc_290374453"
        match = re.search(r'tesslc_(\d+)', path)
        if match:
            return int(match.group(1)), None
        # Raise an error if none of the patterns matched.
        raise ValueError(
            f'{path} does not match a known pattern to extract TIC ID and sector from.'
        )

    @staticmethod
    def get_floor_magnitude_from_file_path(file_path: Union[Path, str]) -> int:
        """
        Gets the floor magnitude from the FFI file path.

        :param file_path: The path of the file to extract the magnitude.
        :return: The magnitude floored.
        """
        if isinstance(file_path, Path):
            file_path = str(file_path)
        # Search for Brian Powell's FFI path convention with directory structure sector, magnitude, target.
        # E.g., "tesslcs_sector_12/tesslcs_tmag_1_2/tesslc_290374453"
        match = re.search(
            r'tesslcs_sector_\d+(?:_104)?/tesslcs_tmag_(\d+)_\d+/tesslc_\d+',
            file_path)
        if match:
            return int(match.group(1))
        raise ValueError(
            f'{file_path} does not match a known pattern to extract magnitude from.'
        )

    @staticmethod
    def get_magnitude_from_file(file_path: Union[Path, str]) -> float:
        """
        Loads the magnitude from the file.

        :param file_path: The path to the file.
        :return: The magnitude of the target.
        """
        with file_path.open('rb') as pickle_file:
            light_curve = pickle.load(pickle_file)
        magnitude = light_curve[TessFfiPickleIndex.TESS_MAGNITUDE.value]
        return magnitude

    @classmethod
    def load_fluxes_and_times_from_pickle_file(
        cls,
        file_path: Union[Path, str],
        flux_column_name: TessFfiColumnName = TessFfiColumnName.CORRECTED_FLUX
    ) -> (np.ndarray, np.ndarray):
        """
        Loads the fluxes and times from one of Brian Powell's FFI pickle files.

        :param file_path: The path to the pickle file to load.
        :param flux_column_name: The flux type to load.
        :return: The fluxes and the times.
        """
        if not isinstance(file_path, Path):
            file_path = Path(file_path)
        light_curve = cls.from_path(file_path,
                                    column_names_to_load=[
                                        TessFfiColumnName.TIME__BTJD,
                                        flux_column_name
                                    ])
        fluxes = light_curve.data_frame[flux_column_name.value]
        times = light_curve.data_frame[TessFfiColumnName.TIME__BTJD.value]
        assert times.shape == fluxes.shape
        return fluxes, times

    @classmethod
    def load_fluxes_flux_errors_and_times_from_pickle_file(
        cls,
        file_path: Union[Path, str],
        flux_column_name: TessFfiColumnName = TessFfiColumnName.CORRECTED_FLUX
    ) -> (np.ndarray, np.ndarray, np.ndarray):
        """
        Loads the fluxes, flux errors, and times from one of Brian Powell's FFI pickle files.

        :param file_path: The path to the pickle file to load.
        :param flux_column_name: The flux type to load.
        :return: The fluxes, flux errors, and times.
        """
        if not isinstance(file_path, Path):
            file_path = Path(file_path)
        light_curve = cls.from_path(file_path,
                                    column_names_to_load=[
                                        TessFfiColumnName.TIME__BTJD,
                                        TessFfiColumnName.FLUX_ERROR,
                                        flux_column_name
                                    ])
        fluxes = light_curve.data_frame[flux_column_name.value]
        flux_errors = light_curve.data_frame[
            TessFfiColumnName.FLUX_ERROR.value]
        times = light_curve.data_frame[TessFfiColumnName.TIME__BTJD.value]
        assert times.shape == fluxes.shape
        return fluxes, flux_errors, times
Esempio n. 16
0
from bokeh.document import without_document_lock
from bokeh.models import ColumnDataSource, LinearColorMapper, Column, Button, Row, Div, Range1d, DataRange1d, CustomJS, \
    TableColumn, DataTable
from bokeh.events import Tap
from bokeh.plotting import Figure
from bokeh.server.server import Server

import pymc3 as pm
import theano.tensor as tt
import exoplanet as xo
from tornado import gen

from ramjet.data_interface.tess_toi_data_interface import TessToiDataInterface
from ramjet.data_interface.tess_data_interface import TessDataInterface, TessFluxType

tess_data_interface = TessDataInterface()
tess_toi_data_interface = TessToiDataInterface()


class Target:
    def __init__(self, light_curve_path):
        self.tic_id, self.sector = tess_data_interface.get_tic_id_and_sector_from_file_path(
            light_curve_path)
        self.pdcsap_fluxes: Union[np.ndarray, None] = None
        self.normalized_pdcsap_fluxes: Union[np.ndarray, None] = None
        self.pdcsap_flux_errors: Union[np.ndarray, None] = None
        self.normalized_pdcsap_flux_errors: Union[np.ndarray, None] = None
        self.sap_fluxes: Union[np.ndarray, None] = None
        self.normalized_sap_fluxes: Union[np.ndarray, None] = None
        self.load_light_curve()
        self.has_known_exofop_disposition = self.check_for_known_exofop_dispositions(
class TessTwoMinuteCadenceLightCurve(TessLightCurve):
    """
    A class to represent a TESS two minute cadence light curve.
    """
    mast_tess_data_interface = TessDataInterface()

    def __init__(self):
        super().__init__()
        self.flux_column_names = [
            TessTwoMinuteCadenceColumnName.PDCSAP_FLUX.value,
            TessTwoMinuteCadenceColumnName.SAP_FLUX.value
        ]

    @classmethod
    def from_path(
        cls,
        path: Path,
        fits_indexes_to_load: Union[List[TessTwoMinuteCadenceMastFitsIndex],
                                    None] = None
    ) -> TessTwoMinuteCadenceLightCurve:
        """
        Creates a TESS two minute light curve from a path to the MAST FITS file.

        :param path: The path to the FITS file to load.
        :param fits_indexes_to_load: The indexes to load from the FITS file. By default, all will be loaded. Selecting
                                     specific ones may speed the process when loading many light curves.
        :return: The light curve.
        """
        light_curve = cls()
        light_curve.time_column_name = TessTwoMinuteCadenceColumnName.TIME__BTJD.value
        if fits_indexes_to_load is None:
            fits_indexes_to_load = list(TessTwoMinuteCadenceMastFitsIndex)
        with fits.open(path) as hdu_list:
            light_curve_table = hdu_list[
                1].data  # Light curve information is in first extension table.
            for fits_index in fits_indexes_to_load:
                column_name = TessTwoMinuteCadenceColumnName[fits_index.name]
                light_curve.data_frame[column_name.value] = light_curve_table[
                    fits_index.value]
        light_curve.tic_id, light_curve.sector = cls.get_tic_id_and_sector_from_file_path(
            path)
        return light_curve

    @classmethod
    def from_mast(
        cls,
        tic_id: int,
        sector: int,
        fits_indexes_to_load: Union[List[TessTwoMinuteCadenceMastFitsIndex],
                                    None] = None
    ) -> TessTwoMinuteCadenceLightCurve:
        """
        Downloads a FITS file from MAST and creates a TESS two minute light curve from it.

        :param tic_id: The TIC ID of the target.
        :param sector: The sector of the observation.
        :param fits_indexes_to_load: The indexes to load from the FITS file. By default, all will be loaded. Selecting
                                     specific ones may speed the process when loading many light curves.
        :return: The light curve.
        """
        light_curve_path = cls.mast_tess_data_interface.download_two_minute_cadence_light_curve(
            tic_id=tic_id, sector=sector)
        light_curve = cls.from_path(path=light_curve_path,
                                    fits_indexes_to_load=fits_indexes_to_load)
        return light_curve

    @classmethod
    def from_identifier(cls,
                        identifier: Any) -> TessTwoMinuteCadenceLightCurve:
        """
        Loads the light curve in a generalized way, attempting to infer the light curve based on the passed identifier.

        :param identifier: The identifier of the light curve. Could come in various forms.
        :return: The light curve.
        """
        integer_types = (int, np.integer)
        if isinstance(identifier, Path):
            return cls.from_path(path=identifier)
        elif isinstance(
                identifier,
                tuple) and (isinstance(identifier[0], integer_types)
                            and isinstance(identifier[1], integer_types)):
            tic_id = identifier[0]
            sector = identifier[1]
            return cls.from_mast(tic_id=tic_id, sector=sector)
        elif isinstance(identifier, str):
            tic_id, sector = cls.get_tic_id_and_sector_from_identifier_string(
                identifier)
            return cls.from_mast(tic_id=tic_id, sector=sector)
        else:
            raise ValueError(
                f'{identifier} does not match a known type to infer the light curve identifier from.'
            )

    @staticmethod
    def get_tic_id_and_sector_from_file_path(
            file_path: Path) -> (int, Union[int, None]):
        """
        Gets the TIC ID and sector from commonly encountered file name patterns.

        :param file_path: The path of the file to extract the TIC ID and sector.
        :return: The TIC ID and sector. The sector might be omitted (as None).
        """
        file_name = file_path.stem
        tic_id, sector = TessTwoMinuteCadenceLightCurve.get_tic_id_and_sector_from_identifier_string(
            file_name)
        return tic_id, sector

    @staticmethod
    def get_tic_id_and_sector_from_identifier_string(
            identifier_string: str) -> (int, Union[int, None]):
        """
        Gets the TIC ID and sector from commonly encountered identifier string patterns.

        :param identifier_string: The string to extract the TIC ID and sector.
        :return: The TIC ID and sector. The sector might be omitted (as None).
        """
        # Search for the human readable version. E.g., "TIC 169480782 sector 5"
        match = re.search(r'TIC (\d+) sector (\d+)', identifier_string)
        if match:
            return int(match.group(1)), int(match.group(2))
        # Search for the human readable TIC only version. E.g., "TIC 169480782"
        match = re.search(r'TIC (\d+)', identifier_string)
        if match:
            return int(match.group(1)), None
        # Search for the TESS obs_id version. E.g., "tess2018319095959-s0005-0000000278956474-0125-s"
        match = re.search(r'tess\d+-s(\d+)-(\d+)-\d+-s', identifier_string)
        if match:
            return int(match.group(2)), int(match.group(1))
        # Raise an error if none of the patterns matched.
        raise ValueError(
            f'{identifier_string} does not match a known pattern to extract TIC ID and sector from.'
        )
Esempio n. 18
0
class TransitFitter:
    """
    A class to fit a transit.
    """
    def __init__(self, tic_id, sectors=None):
        self.title = f'TIC {tic_id}'
        tess_toi_data_interface = TessToiDataInterface()
        self.tess_data_interface = TessDataInterface()
        dispositions_data_frame = tess_toi_data_interface.retrieve_exofop_toi_and_ctoi_planet_disposition_for_tic_id(
            tic_id)
        if dispositions_data_frame.shape[0] == 0:
            print('No known ExoFOP dispositions found.')
        # Use context options to not truncate printed data.
        else:
            with pd.option_context('display.max_rows', None,
                                   'display.max_columns', None,
                                   'display.width', None):
                print(dispositions_data_frame)
        if sectors is None:
            sectors = self.tess_data_interface.get_sectors_target_appears_in(
                tic_id)
        if isinstance(sectors, int):
            sectors = [sectors]
        self.sectors = sectors
        light_curve = self.stitch_fluxes_flux_errors_and_times_for_target_from_mast(
            tic_id, sectors)
        relative_flux_errors, relative_fluxes, times = light_curve
        self.tic_id = tic_id
        self.times = times
        self.relative_fluxes = relative_fluxes
        self.relative_flux_errors = relative_flux_errors
        tic_row = self.tess_data_interface.get_tess_input_catalog_row(tic_id)
        self.star_radius = tic_row['rad']
        print(self.star_radius)
        self.period = None
        self.depth = None
        self.transit_epoch = None

    def bokeh_application(self, bokeh_document):
        light_curve_figure = self.create_light_curve_figure()
        folded_figure = self.create_folded_figured_based_on_clicks_in_unfolded_figure(
            light_curve_figure)
        run_fitting_button = Button(label='Run fitting')
        initial_fit_figure, parameters_table = self.create_mcmc_fit_figures(
            run_fitting_button)
        column = Column(light_curve_figure, folded_figure, run_fitting_button,
                        initial_fit_figure, parameters_table)
        column.sizing_mode = 'stretch_width'
        bokeh_document.add_root(column)

    def create_light_curve_figure(self):
        figure = Figure(title=self.title,
                        x_axis_label='Time (BTJD)',
                        y_axis_label='Relative flux',
                        active_drag='box_zoom')
        data_source = ColumnDataSource({
            'Time (BTJD)': self.times,
            'Relative flux': self.relative_fluxes
        })
        self.plot_light_curve_source(figure, data_source)
        figure.sizing_mode = 'stretch_width'
        return figure

    def create_folded_figured_based_on_clicks_in_unfolded_figure(
            self, unfolded_figure):
        # Setup empty period recording clicks for folding.
        event_coordinates = []
        event_coordinates_data_source = ColumnDataSource({
            'Time (BTJD)': [],
            'Relative flux': []
        })
        unfolded_figure.circle('Time (BTJD)',
                               'Relative flux',
                               source=event_coordinates_data_source,
                               color='red',
                               alpha=0.8)  # Will be updated.
        # Prepare the folded plot.
        folded_data_source = ColumnDataSource({
            'Relative flux': self.relative_fluxes,
            'Folded time (days)': [],
            'Time (BTJD)': self.times
        })
        folded_figure = Figure(x_axis_label='Folded time (days)',
                               y_axis_label='Relative flux',
                               title=f'Folded {self.title}')
        self.plot_light_curve_source(folded_figure,
                                     folded_data_source,
                                     time_column_name='Folded time (days)')
        folded_figure.sizing_mode = 'stretch_width'
        self_ = self

        def click_unfolded_figure_callback(
                tap_event):  # Setup what should happen when a click occurs.
            event_coordinate = tap_event.x, tap_event.y
            event_coordinates.append(event_coordinate)
            event_coordinates_data_source.data = {
                'Time (BTJD)':
                [coordinate[0] for coordinate in event_coordinates],
                'Relative flux':
                [coordinate[1] for coordinate in event_coordinates]
            }
            if len(
                    event_coordinates
            ) > 1:  # If we have more than 1 period click, we can start folding.
                event_times = [
                    coordinate[0] for coordinate in event_coordinates
                ]
                epoch, period = self.calculate_epoch_and_period_from_approximate_event_times(
                    event_times)
                folded_times = self.fold_times(self_.times, epoch, period)
                folded_data_source.data['Folded time (days)'] = folded_times
                # folded_figure.x_range.start = -period/10
                # folded_figure.x_range.end = period/10
                self_.period = period
                self_.transit_epoch = epoch
                period_depths = [
                    coordinate[1] for coordinate in event_coordinates
                ]
                self_.depth = np.abs(np.mean(period_depths))

        unfolded_figure.on_event(Tap, click_unfolded_figure_callback)
        return folded_figure

    def create_mcmc_fit_figures(self, run_fitting_button):
        initial_fit_data_source = ColumnDataSource({
            'Folded time (days)': [],
            'Relative flux': [],
            'Fit': [],
            'Fit time': [],
            'Time (BTJD)': self.times
        })
        self_ = self
        initial_fit_figure = Figure(x_axis_label='Folded time (days)',
                                    y_axis_label='Relative flux',
                                    title=f'Initial fit {self.title}')
        parameters_table_data_source = ColumnDataSource(pd.DataFrame())
        parameters_table_columns = [
            TableColumn(field=column, title=column)
            for column in ['parameter', 'mean', 'sd', 'r_hat']
        ]
        parameters_table = DataTable(source=parameters_table_data_source,
                                     columns=parameters_table_columns,
                                     editable=True)

        def run_fitting():
            with pm.Model() as model:
                # Stellar parameters
                mean = pm.Normal("mean", mu=0.0, sigma=10.0 * 1e-3)
                u = xo.distributions.QuadLimbDark("u")
                star_params = [mean, u]

                # Gaussian process noise model
                sigma = pm.InverseGamma("sigma",
                                        alpha=3.0,
                                        beta=2 *
                                        np.median(self_.relative_flux_errors))
                log_Sw4 = pm.Normal("log_Sw4", mu=0.0, sigma=10.0)
                log_w0 = pm.Normal("log_w0",
                                   mu=np.log(2 * np.pi / 10.0),
                                   sigma=10.0)
                kernel = xo.gp.terms.SHOTerm(log_Sw4=log_Sw4,
                                             log_w0=log_w0,
                                             Q=1.0 / 3)
                noise_params = [sigma, log_Sw4, log_w0]

                # Planet parameters
                log_ror = pm.Normal("log_ror",
                                    mu=0.5 * np.log(self_.depth),
                                    sigma=10.0 * 1e-3)
                ror = pm.Deterministic("ror", tt.exp(log_ror))
                depth = pm.Deterministic("depth", tt.square(ror))

                # Orbital parameters
                log_period = pm.Normal("log_period",
                                       mu=np.log(self_.period),
                                       sigma=1.0)
                t0 = pm.Normal("t0", mu=self_.transit_epoch, sigma=1.0)
                log_dur = pm.Normal("log_dur", mu=np.log(0.1), sigma=10.0)
                b = xo.distributions.ImpactParameter("b", ror=ror)

                period = pm.Deterministic("period", tt.exp(log_period))
                dur = pm.Deterministic("dur", tt.exp(log_dur))

                # Set up the orbit
                orbit = xo.orbits.KeplerianOrbit(period=period,
                                                 duration=dur,
                                                 t0=t0,
                                                 b=b,
                                                 r_star=self.star_radius)

                # We're going to track the implied density for reasons that will become clear later
                pm.Deterministic("rho_circ", orbit.rho_star)

                # Set up the mean transit model
                star = xo.LimbDarkLightCurve(u)

                def lc_model(t):
                    return mean + tt.sum(star.get_light_curve(
                        orbit=orbit, r=ror * self.star_radius, t=t),
                                         axis=-1)

                # Finally the GP observation model
                gp = xo.gp.GP(kernel,
                              self_.times,
                              (self_.relative_flux_errors**2) + (sigma**2),
                              mean=lc_model)
                gp.marginal("obs", observed=self_.relative_fluxes)

                # Double check that everything looks good - we shouldn't see any NaNs!
                print(model.check_test_point())

                # Optimize the model
                map_soln = model.test_point
                map_soln = xo.optimize(map_soln, [sigma])
                map_soln = xo.optimize(map_soln, [log_ror, b, log_dur])
                map_soln = xo.optimize(map_soln, noise_params)
                map_soln = xo.optimize(map_soln, star_params)
                map_soln = xo.optimize(map_soln)

            with model:
                gp_pred, lc_pred = xo.eval_in_model(
                    [gp.predict(), lc_model(self_.times)], map_soln)

            x_fold = (self_.times - map_soln["t0"] + 0.5 * map_soln["period"]
                      ) % map_soln["period"] - 0.5 * map_soln["period"]
            inds = np.argsort(x_fold)
            initial_fit_data_source.data['Folded time (days)'] = x_fold
            initial_fit_data_source.data[
                'Relative flux'] = self_.relative_fluxes - gp_pred - map_soln[
                    "mean"]
            initial_fit_data_source.data[
                'Fit'] = lc_pred[inds] - map_soln["mean"]
            initial_fit_data_source.data['Fit time'] = x_fold[
                inds]  # TODO: This is terrible, you should be able to line them up *afterward* to not make a duplicate time column

            with model:
                trace = pm.sample(
                    tune=2000,
                    draws=2000,
                    start=map_soln,
                    chains=4,
                    step=xo.get_dense_nuts_step(target_accept=0.9),
                )

            trace_summary = pm.summary(
                trace, round_to='none'
            )  # Not a typo. PyMC3 wants 'none' as a string here.
            epoch = round(
                trace_summary['mean']['t0'],
                3)  # Round the epoch differently, as BTJD needs more digits.
            trace_summary['mean'] = self_.round_series_to_significant_figures(
                trace_summary['mean'], 5)
            trace_summary['mean']['t0'] = epoch
            parameters_table_data_source.data = trace_summary
            parameters_table_data_source.data[
                'parameter'] = trace_summary.index
            with pd.option_context('display.max_columns', None,
                                   'display.max_rows', None):
                print(trace_summary)
                print(f'Star radius: {self.star_radius}')
            # TODO: This should not happen automatically. Only after a button click.
            # scopes = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
            # credentials = Credentials.from_service_account_file(
            #     'ramjet/analysis/google_spreadsheet_credentials.json', scopes=scopes)
            # gc = gspread.authorize(credentials)
            # sh = gc.open('Ramjet transit candidates shared for vetting')
            # worksheet = sh.get_worksheet(0)
            # # Find first empty row.
            # empty_row_index = 1
            # for row_index in itertools.count(start=1):
            #     row_values = worksheet.row_values(row_index)
            #     if len(row_values) == 0:
            #         empty_row_index = row_index
            #         break
            # worksheet.update_cell(empty_row_index, 1, self_.tic_id)
            # worksheet.update_cell(empty_row_index, 2, str(self_.sectors).replace('[', '').replace(']', '')),
            # worksheet.update_cell(empty_row_index, 3, trace_summary['mean']['t0'])
            # worksheet.update_cell(empty_row_index, 4, trace_summary['mean']['period'])
            # worksheet.update_cell(empty_row_index, 5, trace_summary['mean']['depth'])
            # worksheet.update_cell(empty_row_index, 6, trace_summary['mean']['dur'])
            # worksheet.update_cell(empty_row_index, 7, self_.star_radius)
            # worksheet.update_cell(empty_row_index, 8, trace_summary['mean']['ror'] * self_.star_radius)

        run_fitting_button.on_click(run_fitting)
        self.plot_light_curve_source(initial_fit_figure,
                                     initial_fit_data_source,
                                     time_column_name='Folded time (days)')
        initial_fit_figure.line('Fit time',
                                'Fit',
                                source=initial_fit_data_source,
                                color='black',
                                line_width=3)
        initial_fit_figure.sizing_mode = 'stretch_width'

        return initial_fit_figure, parameters_table

    @staticmethod
    def plot_light_curve_source(figure: Figure,
                                data_source: ColumnDataSource,
                                time_column_name: str = 'Time (BTJD)',
                                flux_column_name: str = 'Relative flux',
                                color_value_column_name: str = 'Time (BTJD)'):
        """
        Plots the light curve data source on the passed figure.

        :param figure: The figure to plot to.
        :param data_source: The data source containing the light curve data.
        :param time_column_name: The name of the time column whose values will be used on the x axis.
        :param flux_column_name: The name of the flux column whose values will be used on the y axis.
        :param color_value_column_name: The name of the column whose values will be used to determine data point color.
        """
        mapper = LinearColorMapper(
            palette='Plasma256',
            low=np.min(data_source.data[color_value_column_name]),
            high=np.max(data_source.data[color_value_column_name]))
        colors = {'field': color_value_column_name, 'transform': mapper}
        figure.circle(time_column_name,
                      flux_column_name,
                      source=data_source,
                      fill_color=colors,
                      fill_alpha=0.1,
                      line_color=colors,
                      line_alpha=0.4)

    def stitch_fluxes_flux_errors_and_times_for_target_from_mast(
        self,
        tic_id: int,
        sectors: Union[int, List[int], None] = None
    ) -> (np.ndarray, np.ndarray, np.ndarray):
        """
        Downloads light curves from MAST for a given TIC ID and stitches them together.

        :param tic_id: The target TIC ID.
        :param sectors: The sectors to download and stitch together. Defaults to None which will download all available.
        :return: The fluxes, flux errors, and times of the stitched light curves.
        """
        relative_fluxes_arrays = []
        relative_flux_errors_arrays = []
        times_arrays = []
        for sector in sectors:
            light_curve_path = self.tess_data_interface.download_two_minute_cadence_light_curve(
                tic_id, sector)
            light_curve = self.tess_data_interface.load_fluxes_flux_errors_and_times_from_fits_file(
                light_curve_path)
            sector_fluxes, sector_flux_errors, sector_times = light_curve
            sector_flux_median = np.median(sector_fluxes)
            sector_normalized_fluxes = sector_fluxes / sector_flux_median - 1
            sector_normalized_flux_errors = sector_flux_errors / sector_flux_median
            relative_fluxes_arrays.append(sector_normalized_fluxes)
            relative_flux_errors_arrays.append(sector_normalized_flux_errors)
            times_arrays.append(sector_times)
        times = np.concatenate(times_arrays).astype(np.float64)
        relative_fluxes = np.concatenate(relative_fluxes_arrays).astype(
            np.float64)
        relative_flux_errors = np.concatenate(
            relative_flux_errors_arrays).astype(np.float64)
        return relative_flux_errors, relative_fluxes, times

    @staticmethod
    def calculate_epoch_and_period_from_approximate_event_times(
            event_times: List[float]) -> (float, float):
        """
        Calculates the period and epoch of a signal given selected event times. The epoch is set to the first event
        chronologically.

        :param event_times: The times of the events.
        :return: The epoch and period.
        """
        sorted_event_times = np.sort(event_times)
        epoch = sorted_event_times[0]
        event_time_differences = np.diff(sorted_event_times)
        # Assume the smallest difference is close to a single period.
        smallest_time_difference = np.min(event_time_differences)
        # Get all differences close to the smallest difference to estimate a single period difference.
        threshold_from_smallest = smallest_time_difference * 0.1
        single_period_differences = event_time_differences[
            np.abs(event_time_differences -
                   smallest_time_difference) < threshold_from_smallest]
        period_estimate_from_single_period_events = np.mean(
            single_period_differences)
        # Using the above estimate, estimate the number of cycles in larger time differences.
        cycles_per_time_difference = np.rint(
            event_time_differences / period_estimate_from_single_period_events)
        period_estimates = event_time_differences / cycles_per_time_difference
        # Weight the larger differences more heavily, based on the number of cycles estimated.
        period = np.average(period_estimates,
                            weights=cycles_per_time_difference)
        return epoch, period

    @staticmethod
    def fold_times(times: np.ndarray, epoch: float,
                   period: float) -> np.ndarray:
        """
        Folds an array of times based on an epoch and period.

        :param times: The times to fold.
        :param epoch: The epoch of the fold.
        :param period: The period of the fold.
        :return: The folded times.
        """
        half_period = (period / 2)
        half_period_offset_epoch_times = times - (epoch - half_period)
        half_period_offset_folded_times = half_period_offset_epoch_times % period
        folded_times = half_period_offset_folded_times - half_period
        return folded_times

    @staticmethod
    def round_series_to_significant_figures(
            series: pd.Series, significant_figures: int) -> pd.Series:
        """
        Rounds a series to a given number of significant figures.

        :param series: The series to round.
        :param significant_figures: The number of signficant figures to round to.
        :return: The rounded series.
        """
        def round_value_to_significant_figures(value):
            """Rounds a value to the outer scope number of significant figures"""
            return round(
                value, significant_figures - 1 -
                int(math.floor(math.log10(abs(value)))))

        return series.apply(round_value_to_significant_figures)
class TessSyntheticInjectedDatabase(LightcurveDatabase):
    """
    A class to represent the database for injecting synthetic signals into real TESS data.
    """
    def __init__(self, data_directory='data/self_lensing_binaries'):
        super().__init__(data_directory=data_directory)
        self.lightcurve_directory: Path = self.data_directory.joinpath(
            'lightcurves')
        self.synthetic_signal_directory: Path = self.data_directory.joinpath(
            'synthetic_signals')
        self.tess_data_interface = TessDataInterface()
        self.time_steps_per_example = 20000
        self.shuffle_buffer_size = 10000
        self.allow_out_of_bounds_injection = False

    def generate_datasets(self) -> (tf.data.Dataset, tf.data.Dataset):
        """
        Generates the training and validation datasets for the database.

        :return: The training and validation dataset.
        """
        synthetic_signal_paths_dataset = self.paths_dataset_from_list_or_generator_factory(
            self.get_all_synthetic_signal_paths)
        lightcurve_paths_datasets = self.get_training_and_validation_datasets_for_file_paths(
            self.get_all_lightcurve_paths)
        training_lightcurve_paths_dataset, validation_lightcurve_paths_dataset = lightcurve_paths_datasets
        shuffled_training_lightcurve_paths_dataset = training_lightcurve_paths_dataset.repeat(
        ).shuffle(buffer_size=self.shuffle_buffer_size)
        shuffled_synthetic_signal_paths_dataset = synthetic_signal_paths_dataset.repeat(
        ).shuffle(buffer_size=self.shuffle_buffer_size)
        zipped_training_paths_dataset = tf.data.Dataset.zip(
            (shuffled_training_lightcurve_paths_dataset,
             shuffled_synthetic_signal_paths_dataset))
        output_types = (tf.float32, tf.float32)
        output_shapes = [(self.time_steps_per_example, 1), (1, )]
        lightcurve_training_dataset = map_py_function_to_dataset(
            zipped_training_paths_dataset,
            self.train_and_validation_preprocessing,
            self.number_of_parallel_processes_per_map,
            output_types=output_types,
            output_shapes=output_shapes,
            flat_map=True)
        batched_training_dataset = lightcurve_training_dataset.batch(
            self.batch_size)
        prefetch_training_dataset = batched_training_dataset.prefetch(
            tf.data.experimental.AUTOTUNE)
        shuffled_validation_lightcurve_paths_dataset = validation_lightcurve_paths_dataset.repeat(
        ).shuffle(buffer_size=self.shuffle_buffer_size)
        zipped_validation_paths_dataset = tf.data.Dataset.zip(
            (shuffled_validation_lightcurve_paths_dataset,
             shuffled_synthetic_signal_paths_dataset))
        lightcurve_validation_dataset = map_py_function_to_dataset(
            zipped_validation_paths_dataset,
            self.train_and_validation_preprocessing,
            self.number_of_parallel_processes_per_map,
            output_types=output_types,
            output_shapes=output_shapes,
            flat_map=True)
        batched_validation_dataset = lightcurve_validation_dataset.batch(
            self.batch_size)
        prefetch_validation_dataset = batched_validation_dataset.prefetch(
            tf.data.experimental.AUTOTUNE)
        return prefetch_training_dataset, prefetch_validation_dataset

    def get_all_lightcurve_paths(self) -> Iterable[Path]:
        """
        Returns the list of all lightcurves to use. Expected to be overridden for subclass databases.

        :return: The list of lightcurves.
        """
        lightcurve_paths = self.lightcurve_directory.glob('**/*.fits')
        return lightcurve_paths

    def get_all_synthetic_signal_paths(self) -> Iterable[Path]:
        """
        Returns the list of all synthetic signals to use. Expected to be overridden for subclass databases.

        :return: The list of synthetic signals.
        """
        synthetic_signal_paths = self.synthetic_signal_directory.glob(
            '**/*.feather')
        return synthetic_signal_paths

    def train_and_validation_preprocessing(
        self,
        lightcurve_path_tensor: tf.Tensor,
        synthetic_signal_path_tensor: tf.Tensor,
    ) -> ((np.ndarray, np.ndarray), (np.ndarray, np.ndarray)):
        """
        The training and validation preprocessing.

        :param lightcurve_path_tensor: The lightcurve's path to be preprocessed.
        :param synthetic_signal_path_tensor: The synthetic signal's path to be injected.
        :return: Two examples, one negative un-injected signal and one positive injected signal (paired as a tuple),
                 and the corresponding labels (paired as a tuple). Expected to have a post flat mapping to make each
                 element of the data be an individual example and label pair.
        """
        lightcurve_path = lightcurve_path_tensor.numpy().decode('utf-8')
        synthetic_signal_path = synthetic_signal_path_tensor.numpy().decode(
            'utf-8')
        fluxes, times = self.load_fluxes_and_times_from_lightcurve_path(
            lightcurve_path)
        synthetic_magnifications, synthetic_times = self.load_magnifications_and_times_from_synthetic_signal_path(
            synthetic_signal_path)
        fluxes_with_injected_signal = self.inject_signal_into_lightcurve(
            fluxes, times, synthetic_magnifications, synthetic_times)
        time_seed = int(time.time())
        fluxes = self.flux_preprocessing(fluxes, seed=time_seed)
        fluxes_with_injected_signal = self.flux_preprocessing(
            fluxes_with_injected_signal, seed=time_seed)
        lightcurve = np.expand_dims(fluxes, axis=-1)
        lightcurve_with_injected_signal = np.expand_dims(
            fluxes_with_injected_signal, axis=-1)
        examples = (lightcurve, lightcurve_with_injected_signal)
        labels = (np.array([0]), np.array([1]))
        return examples, labels

    def load_fluxes_and_times_from_lightcurve_path(
            self, lightcurve_path: str) -> (np.ndarray, np.ndarray):
        """
        Loads the lightcurve from the path given. Should be overridden to fit a specific database's file format.

        :param lightcurve_path: The path to the lightcurve file.
        :return: The fluxes and times of the lightcurve
        """
        fluxes, times = self.tess_data_interface.load_fluxes_and_times_from_fits_file(
            lightcurve_path)
        return fluxes, times

    def load_magnifications_and_times_from_synthetic_signal_path(
            self, synthetic_signal_path: str) -> (np.ndarray, np.ndarray):
        """
        Loads the synthetic signal from the path given. Should be overridden to fit a specific database's file format.

        :param synthetic_signal_path: The path to the synthetic signal data file.
        :return: The magnifications and relative times of the synthetic signal.
        """
        synthetic_signal = pd.read_feather(synthetic_signal_path)
        synthetic_magnifications, synthetic_times = synthetic_signal[
            'Magnification'], synthetic_signal['Time (hours)']
        synthetic_times = synthetic_times / 24  # Convert hours to days.
        synthetic_times -= 30 * np.random.random()
        return synthetic_magnifications, synthetic_times

    def flux_preprocessing(self,
                           fluxes: np.ndarray,
                           evaluation_mode: bool = False,
                           seed: int = None) -> np.ndarray:
        """
        Preprocessing for the flux.

        :param fluxes: The flux array to preprocess.
        :param evaluation_mode: If the preprocessing should be consistent for evaluation.
        :param seed: Seed for the randomization.
        :return: The preprocessed flux array.
        """
        normalized_fluxes = self.normalize(fluxes)
        uniform_length_fluxes = self.make_uniform_length(
            normalized_fluxes,
            self.time_steps_per_example,
            randomize=not evaluation_mode,
            seed=seed)
        return uniform_length_fluxes

    def inject_signal_into_lightcurve(self, lightcurve_fluxes: np.ndarray,
                                      lightcurve_times: np.ndarray,
                                      signal_magnifications: np.ndarray,
                                      signal_times: np.ndarray):
        """
        Injects a synthetic magnification signal into real lightcurve fluxes.

        :param lightcurve_fluxes: The fluxes of the lightcurve to be injected into.
        :param lightcurve_times: The times of the flux observations of the lightcurve.
        :param signal_magnifications: The synthetic magnifications to inject.
        :param signal_times: The times of the synthetic magnifications.
        :return: The fluxes with the injected signal.
        """
        median_flux = np.median(lightcurve_fluxes)
        signal_fluxes = (signal_magnifications * median_flux) - median_flux
        if self.allow_out_of_bounds_injection:
            signal_flux_interpolator = interp1d(signal_times,
                                                signal_fluxes,
                                                bounds_error=False,
                                                fill_value=0)
        else:
            signal_flux_interpolator = interp1d(signal_times,
                                                signal_fluxes,
                                                bounds_error=True)
        lightcurve_relative_times = lightcurve_times - np.min(lightcurve_times)
        interpolated_signal_fluxes = signal_flux_interpolator(
            lightcurve_relative_times)
        fluxes_with_injected_signal = lightcurve_fluxes + interpolated_signal_fluxes
        return fluxes_with_injected_signal

    def infer_preprocessing(
            self, lightcurve_path_tensor: tf.string) -> (str, np.array):
        """
        Preprocesses a lightcurve for inference. Returns the lightcurve path, as directly linking this to the
        lightcurve can ease analysis when using multiprocessing, where the order of the inputs is inconsistent.

        :param lightcurve_path_tensor: A tensor containing the path of the lightcurve to preprocess.
        :return: The path of the lightcurve and the preprocessed lightcurve.
        """
        lightcurve_path = lightcurve_path_tensor.numpy().decode('utf-8')
        fluxes, times = self.load_fluxes_and_times_from_lightcurve_path(
            lightcurve_path)
        fluxes = self.flux_preprocessing(fluxes, evaluation_mode=True)
        lightcurve = np.expand_dims(fluxes, axis=-1)
        return lightcurve_path, lightcurve

    @staticmethod
    def generate_synthetic_signal_from_real_data(
            fluxes: np.ndarray, times: np.ndarray) -> (np.ndarray, np.ndarray):
        """
        Takes real lightcurve data and converts it to a form that can be used for synthetic lightcurve injection.

        :param fluxes: The real lightcurve fluxes.
        :param times: The real lightcurve times.
        :return: Fake synthetic magnifications and times.
        """
        flux_median = np.median(fluxes)
        normalized_fluxes = fluxes / flux_median
        relative_times = times - np.min(times)
        return normalized_fluxes, relative_times
Esempio n. 20
0
 def tess_data_interface(self) -> TessDataInterface:
     return TessDataInterface()
Esempio n. 21
0
 def download_exofop_toi_database(
         self, number_of_negative_lightcurves_to_download=10000):
     """
     Downloads the `ExoFOP database <https://exofop.ipac.caltech.edu/tess/view_toi.php>`_.
     """
     print('Clearing data directory...')
     self.clear_data_directory()
     print("Downloading ExoFOP TOI disposition CSV...")
     toi_csv_url = 'https://exofop.ipac.caltech.edu/tess/download_toi.php?sort=toi&output=csv'
     response = requests.get(toi_csv_url)
     with self.toi_dispositions_path.open('wb') as csv_file:
         csv_file.write(response.content)
     print('Downloading TESS observation list...')
     tess_data_interface = TessDataInterface()
     tess_observations = tess_data_interface.get_all_tess_time_series_observations(
     )
     single_sector_observations = tess_data_interface.filter_for_single_sector_observations(
         tess_observations)
     single_sector_observations = tess_data_interface.add_tic_id_column_to_single_sector_observations(
         single_sector_observations)
     single_sector_observations = tess_data_interface.add_sector_column_to_single_sector_observations(
         single_sector_observations)
     print(
         "Downloading lightcurves which are confirmed or suspected planets in TOI dispositions..."
     )
     toi_dispositions = self.load_toi_dispositions_in_project_format()
     suspected_planet_dispositions = toi_dispositions[
         toi_dispositions['disposition'] != 'FP']
     suspected_planet_observations = pd.merge(single_sector_observations,
                                              suspected_planet_dispositions,
                                              how='inner',
                                              on=['TIC ID', 'Sector'])
     observations_not_found = suspected_planet_dispositions.shape[
         0] - suspected_planet_observations.shape[0]
     print(
         f"{suspected_planet_observations.shape[0]} observations found that match the TOI dispositions."
     )
     print(
         f"No observations found for {observations_not_found} entries in TOI dispositions."
     )
     suspected_planet_data_products = tess_data_interface.get_product_list(
         suspected_planet_observations)
     suspected_planet_lightcurve_data_products = suspected_planet_data_products[
         suspected_planet_data_products['productFilename'].str.endswith(
             'lc.fits')]
     suspected_planet_download_manifest = tess_data_interface.download_products(
         suspected_planet_lightcurve_data_products,
         data_directory=self.data_directory)
     print(f'Moving lightcurves to {self.lightcurve_directory}...')
     for file_path_string in suspected_planet_download_manifest[
             'Local Path']:
         file_path = Path(file_path_string)
         file_path.rename(self.lightcurve_directory.joinpath(
             file_path.name))
     print(
         "Downloading lightcurves which are not in TOI dispositions and do not have TCEs (not planets)..."
     )
     print(
         f'Download limited to {number_of_negative_lightcurves_to_download} lightcurves...'
     )
     # noinspection SpellCheckingInspection
     toi_tic_ids = toi_dispositions['TIC ID'].values
     not_toi_observations = single_sector_observations[
         ~single_sector_observations['TIC ID'].
         isin(toi_tic_ids)  # Don't include even false positives.
     ]
     not_toi_observations = not_toi_observations.sample(frac=1,
                                                        random_state=0)
     # Shorten product list obtaining.
     not_toi_observations = not_toi_observations.head(
         number_of_negative_lightcurves_to_download * 2)
     not_toi_data_products = tess_data_interface.get_product_list(
         not_toi_observations)
     not_toi_data_products = tess_data_interface.add_tic_id_column_to_single_sector_observations(
         not_toi_data_products)
     not_toi_lightcurve_data_products = not_toi_data_products[
         not_toi_data_products['productFilename'].str.endswith('lc.fits')]
     not_toi_data_validation_data_products = not_toi_data_products[
         not_toi_data_products['productFilename'].str.endswith('dvr.xml')]
     tic_ids_with_dv = not_toi_data_validation_data_products[
         'TIC ID'].values
     not_planet_lightcurve_data_products = not_toi_lightcurve_data_products[
         ~not_toi_lightcurve_data_products['TIC ID'].
         isin(tic_ids_with_dv)  # Remove any lightcurves with TCEs.
     ]
     # Shuffle rows.
     not_planet_lightcurve_data_products = not_planet_lightcurve_data_products.sample(
         frac=1, random_state=0)
     not_planet_download_manifest = tess_data_interface.download_products(
         not_planet_lightcurve_data_products.head(
             number_of_negative_lightcurves_to_download),
         data_directory=self.data_directory)
     print(f'Moving lightcurves to {self.lightcurve_directory}...')
     for file_path_string in not_planet_download_manifest['Local Path']:
         file_path = Path(file_path_string)
         file_path.rename(self.lightcurve_directory.joinpath(
             file_path.name))
     print('Database ready.')
Esempio n. 22
0
class TessTargetMetadataManger:
    """
    A class for managing the metadata of TESS targets.
    """
    tess_data_interface = TessDataInterface()

    def __init__(self):
        self.light_curve_root_directory_path = Path(
            'data/tess_two_minute_cadence_light_curves')

    def insert_multiple_rows_from_paths_into_database(
            self, light_curve_paths: List[Path]) -> int:
        """
        Inserts sets targets into the table from light curve paths.

        :param light_curve_paths: The list of paths to insert.
        :return: The number of rows inserted.
        """
        row_dictionary_list = []
        table_name = convert_class_to_table_name(TessTargetMetadata)
        for light_curve_path in light_curve_paths:
            tic_id, _ = self.tess_data_interface.get_tic_id_and_sector_from_file_path(
                light_curve_path)
            uuid_name = f'{table_name} TIC {tic_id}'
            uuid = metadatabase_uuid(uuid_name)
            dataset_split = dataset_split_from_uuid(uuid)
            row_dictionary_list.append({
                TessTargetMetadata.tic_id.name:
                tic_id,
                TessTargetMetadata.dataset_split.name:
                dataset_split
            })
        with metadatabase.atomic():
            number_of_rows_inserted = TessTargetMetadata.insert_many(
                row_dictionary_list).on_conflict_ignore().execute()
        return number_of_rows_inserted

    def populate_sql_database(self):
        """
        Populates the SQL database based on the light curve files.
        """
        print('Populating the TESS target light curve metadata table...')
        path_glob = self.light_curve_root_directory_path.glob('**/*.fits')
        row_count = 0
        batch_paths = []
        batch_dataset_splits = []
        with metadatabase.atomic():
            for index, path in enumerate(path_glob):
                batch_paths.append(
                    path.relative_to(self.light_curve_root_directory_path))
                batch_dataset_splits.append(index % 10)
                if index % 1000 == 0 and index != 0:
                    row_count += self.insert_multiple_rows_from_paths_into_database(
                        batch_paths)
                    batch_paths = []
                    batch_dataset_splits = []
                    print(f'{row_count} rows inserted...', end='\r')
            if len(batch_paths) > 0:
                row_count += self.insert_multiple_rows_from_paths_into_database(
                    batch_paths)
        print(f'TESS target metadata table populated. {row_count} rows added.')

    def build_table(self):
        """
        Builds the SQL table.
        """
        TessTargetMetadata.drop_table()
        TessTargetMetadata.create_table()
        self.populate_sql_database()