def insert_multiple_rows_from_paths_into_database( self, light_curve_paths: List[Path]) -> int: """ Inserts sets targets into the table from light curve paths. :param light_curve_paths: The list of paths to insert. :return: The number of rows inserted. """ row_dictionary_list = [] table_name = convert_class_to_table_name(TessTargetMetadata) for light_curve_path in light_curve_paths: tic_id, _ = self.tess_data_interface.get_tic_id_and_sector_from_file_path( light_curve_path) uuid_name = f'{table_name} TIC {tic_id}' uuid = metadatabase_uuid(uuid_name) dataset_split = dataset_split_from_uuid(uuid) row_dictionary_list.append({ TessTargetMetadata.tic_id.name: tic_id, TessTargetMetadata.dataset_split.name: dataset_split }) with metadatabase.atomic(): number_of_rows_inserted = TessTargetMetadata.insert_many( row_dictionary_list).on_conflict_ignore().execute() return number_of_rows_inserted
def populate_sql_database(self): """ Populates the SQL database based on the light curve files. """ print('Populating the TESS two minute cadence light curve meta data table...') path_glob = self.light_curve_root_directory_path.glob('**/*.fits') row_count = 0 batch_paths = [] with metadatabase.atomic(): for index, path in enumerate(path_glob): batch_paths.append(path.relative_to(self.light_curve_root_directory_path)) row_count += 1 if index % 1000 == 0 and index != 0: self.insert_multiple_rows_from_paths_into_database(batch_paths) batch_paths = [] print(f'{index} rows inserted...', end='\r') if len(batch_paths) > 0: self.insert_multiple_rows_from_paths_into_database(batch_paths) print(f'TESS two minute cadence light curve meta data table populated. {row_count} rows added.')
def insert_multiple_rows_from_paths_into_database(self, light_curve_paths: List[Path]): """ Inserts sets of light curve paths into the table. :param light_curve_paths: The list of paths to insert. """ row_dictionary_list = [] table_name = convert_class_to_table_name(TessTwoMinuteCadenceLightCurveMetadata) for light_curve_path in light_curve_paths: tic_id, sector = self.tess_data_interface.get_tic_id_and_sector_from_file_path(light_curve_path) uuid_name = f'{table_name} TIC {tic_id} sector {sector}' uuid = metadatabase_uuid(uuid_name) dataset_split = dataset_split_from_uuid(uuid) row_dictionary_list.append({TessTwoMinuteCadenceLightCurveMetadata.path.name: str(light_curve_path), TessTwoMinuteCadenceLightCurveMetadata.tic_id.name: tic_id, TessTwoMinuteCadenceLightCurveMetadata.sector.name: sector, TessTwoMinuteCadenceLightCurveMetadata.dataset_split.name: dataset_split}) with metadatabase.atomic(): TessTwoMinuteCadenceLightCurveMetadata.insert_many(row_dictionary_list).execute()
def add_tic_ids_as_confirmed(tic_ids: List[int]): """ Adds the passed TIC IDs as confirmed transits. :param tic_ids: The list of TIC IDs. """ rows_added = 0 with metadatabase.atomic(): for tic_id in tic_ids: query = TessTransitMetadata.select().where( TessTransitMetadata.tic_id == tic_id) if query.exists(): continue transit = TessTransitMetadata() transit.tic_id = tic_id transit.disposition = Disposition.CONFIRMED.value transit.save() rows_added += 1 print(f'{rows_added} rows added.')
def build_table(): """ Builds the TESS transit metadata table. """ print('Building TESS transit metadata table...') tess_toi_data_interface = TessToiDataInterface() toi_dispositions = tess_toi_data_interface.toi_dispositions ctoi_dispositions = tess_toi_data_interface.ctoi_dispositions toi_filtered_dispositions = toi_dispositions.filter( [ToiColumns.tic_id.value, ToiColumns.disposition.value]) ctoi_filtered_dispositions = ctoi_dispositions.filter( [ToiColumns.tic_id.value, ToiColumns.disposition.value]) all_dispositions = pd.concat( [toi_filtered_dispositions, ctoi_filtered_dispositions], ignore_index=True) target_grouped_dispositions = all_dispositions.groupby( ToiColumns.tic_id.value)[ToiColumns.disposition.value].apply(set) row_count = 0 metadatabase.drop_tables([TessTransitMetadata]) metadatabase.create_tables([TessTransitMetadata]) with metadatabase.atomic(): for tic_id, disposition_set in target_grouped_dispositions.items(): # As a target can have multiple dispositions, use the most forgiving available disposition. if 'KP' in disposition_set or 'CP' in disposition_set: database_disposition = Disposition.CONFIRMED.value elif 'PC' in disposition_set or '' in disposition_set or 'APC' in disposition_set: database_disposition = Disposition.CANDIDATE.value elif 'FP' in disposition_set or 'FA' in disposition_set: database_disposition = Disposition.FALSE_POSITIVE.value else: warnings.warn( f'Dispositions for TIC {tic_id} are {disposition_set}, which does not contain a known' f'disposition.') continue row = TessTransitMetadata(tic_id=tic_id, disposition=database_disposition) row.save() row_count += 1 print(f'Table built. {row_count} rows added.')
def insert_multiple_rows_from_paths_into_database(self, light_curve_paths: List[Path]): """ Inserts sets of light curve paths into the table. :param light_curve_paths: The list of paths to insert. """ row_dictionary_list = [] table_name = convert_class_to_table_name(TessFfiLightCurveMetadata) for light_curve_path in light_curve_paths: tic_id, sector = TessFfiLightCurve.get_tic_id_and_sector_from_file_path(light_curve_path) magnitude = TessFfiLightCurve.get_floor_magnitude_from_file_path(light_curve_path) relative_path = light_curve_path.relative_to(self.light_curve_root_directory_path) uuid_name = f'{table_name} TIC {tic_id} sector {sector}' uuid = metadatabase_uuid(uuid_name) dataset_split = dataset_split_from_uuid(uuid) row_dictionary_list.append({TessFfiLightCurveMetadata.path.name: str(relative_path), TessFfiLightCurveMetadata.tic_id.name: tic_id, TessFfiLightCurveMetadata.sector.name: sector, TessFfiLightCurveMetadata.magnitude.name: magnitude, TessFfiLightCurveMetadata.dataset_split.name: dataset_split}) with metadatabase.atomic(): TessFfiLightCurveMetadata.insert_many(row_dictionary_list).execute()
def populate_sql_database(self): """ Populates the SQL database based on the light curve files. """ print('Populating the TESS FFI light curve meta data table...', flush=True) single_sector_path_globs = [] for sector in range(1, 27): single_sector_path_glob = self.light_curve_root_directory_path.glob( f'tesslcs_sector_{sector}_104/tesslcs_tmag_*_*/tesslc_*.pkl') single_sector_path_globs.append(single_sector_path_glob) path_glob = itertools.chain(*single_sector_path_globs) row_count = 0 batch_paths = [] with metadatabase.atomic(): for index, path in enumerate(path_glob): batch_paths.append(path) row_count += 1 if index % 1000 == 0 and index != 0: self.insert_multiple_rows_from_paths_into_database(batch_paths) batch_paths = [] print(f'{index} rows inserted...', end='\r', flush=True) if len(batch_paths) > 0: self.insert_multiple_rows_from_paths_into_database(batch_paths) print(f'TESS FFI light curve meta data table populated. {row_count} rows added.', flush=True)