def _update_data_by_remove_repeated_ids(first_loop_data: DataBase, light_curve_data: DataBase, id_key_name: str, pool_labels_class: str = 'Ia') -> Tuple[ DataBase, DataBase]: """ Updates first loop and initial data by removing repetitive id indices Parameters ---------- first_loop_data first loop light curve data light_curve_data initial light curve training data id_key_name object identification key name pool_labels_class pool labels class name """ repeated_id_flags = np.in1d( first_loop_data.pool_metadata[id_key_name].values, light_curve_data.train_metadata[id_key_name].values) first_loop_data.pool_metadata = first_loop_data.pool_metadata[ ~repeated_id_flags] first_loop_data.pool_features = first_loop_data.pool_features[ ~repeated_id_flags] pool_labels = ( first_loop_data.pool_metadata['type'].values == pool_labels_class) first_loop_data.pool_labels = pool_labels.astype(int) light_curve_data.pool_features = first_loop_data.pool_features light_curve_data.pool_metadata = first_loop_data.pool_metadata light_curve_data.pool_labels = first_loop_data.pool_labels return first_loop_data, light_curve_data
def _update_light_curve_data_for_next_epoch( light_curve_data: DataBase, next_day_data: DataBase, canonical_data: DataBase, is_queryable: bool, strategy: str, is_separate_files: bool) -> DataBase: """ Updates samples for next epoch Parameters ---------- light_curve_data light curve learning data next_day_data next day light curve data canonical_data canonical strategy light curve data is_queryable If True, allow queries only on objects flagged as queryable. Default is True. strategy Query strategy. Options are (all can be run with budget): "UncSampling", "UncSamplingEntropy", "UncSamplingLeastConfident", "UncSamplingMargin", "QBDMI", "QBDEntropy", "RandomSampling", is_separate_files If True, consider samples separately read from independent files. Default is False. """ light_curve_data.pool_metadata = next_day_data.pool_metadata light_curve_data.pool_features = next_day_data.pool_features light_curve_data.pool_labels = next_day_data.pool_labels if not is_separate_files: light_curve_data.test_metadata = next_day_data.test_metadata light_curve_data.test_features = next_day_data.test_features light_curve_data.test_labels = next_day_data.test_labels light_curve_data.validation_metadata = next_day_data.validation_metadata light_curve_data.validation_features = next_day_data.validation_features light_curve_data.validation_labels = next_day_data.validation_labels if strategy == 'canonical': light_curve_data.queryable_ids = canonical_data.queryable_ids if is_queryable: queryable_flag = light_curve_data.pool_metadata['queryable'].values light_curve_data.queryable_ids = light_curve_data.pool_metadata[ 'id'].values[queryable_flag] else: light_curve_data.queryable_ids = light_curve_data.pool_metadata[ 'id'].values return light_curve_data
def _update_next_day_pool_data(next_day_data: DataBase, next_day_pool_metadata_indices) -> DataBase: """ Removes metadata value data from next day pool sample Parameters ---------- next_day_data next day light curve data next_day_pool_metadata_indices indices of metadata value in next day light curve data """ # remove obj from pool sample next_day_data.pool_metadata = next_day_data.pool_metadata.drop( next_day_data.pool_metadata.index[next_day_pool_metadata_indices]) next_day_data.pool_labels = np.delete( next_day_data.pool_labels, next_day_pool_metadata_indices, axis=0) next_day_data.pool_features = np.delete( next_day_data.pool_features, next_day_pool_metadata_indices, axis=0) return next_day_data