def test_generate_and_extract_per_band_features(): lc = TwoBandTestLc( a=lightcurve.BandData( time=np.array([1, 2.0]), flux=np.array([4, 2.0]), flux_err=np.array([5, 2.0]), ), b=lightcurve.BandData( time=np.array([1, 2.0]), flux=np.array([2, 2.0]), flux_err=np.array([3, 2.0]), ), ) def feature_extractor(band_data: lightcurve.BandData): return { 'first_flux': band_data.flux[0], 'first_flux_err': band_data.flux_err[0], } band_settings = band_settings_params.BandSettings(['a', 'b']) features = band_settings.generate_per_band_features(feature_extractor, lc) assert features == { 'band_a.first_flux': 4.0, 'band_a.first_flux_err': 5.0, 'band_b.first_flux': 2.0, 'band_b.first_flux_err': 3.0, } band_a_features = band_settings.get_band_features(features, 'a') assert band_a_features == { 'first_flux': 4.0, 'first_flux_err': 5.0, }
def apply(self, bd) -> lightcurve.BandData: return lightcurve.BandData( self._time_fn(bd.time), self._flux_fn(bd.flux), self._flux_err_fn(bd.flux_err), bd.detected, )
def format_dense_multi_band_from_lc_dict(lc_dict, band_order=('g', 'r', 'i', 'z')) -> SNDatasetLC: """Formats a multi-band LC dictionary to a dense dataset. Currently reformats a time series to dense data, as if every curve had sampled at the same time. This method is currently slow. :param lc_dict: Dictionary from lc_dict_for_id. :param band_order: Order of expected bands. :return: Light curve. """ if frozenset(lc_dict.keys()) != frozenset(band_order): raise ValueError("Unexpected keys {}".format(lc_dict.keys())) def closest_in_time(band, time): index = np.argmin(np.abs(band[:, 0] - time)) # [3]-shaped array return band[index] bands = [lc_dict[band] for band in band_order] # [num_points, num_bands, 3]-shaped array dense_data = np.array([[closest_in_time(band, time) for band in bands] for time in bands[0][:, 0]], dtype=np.float64) band_data: typing.Dict[str, lightcurve.BandData] = {} for i, band in enumerate(band_order): band_data[band] = lightcurve.BandData( time=dense_data[:, i, 0], flux=dense_data[:, i, 1], flux_err=dense_data[:, i, 2] ) return SNDatasetLC(**band_data)
def apply(self, bd) -> lightcurve.BandData: new_x = self._dilate_time * (bd.time + self._translate_time) new_y = self._dilate_flux * (bd.flux + self._translate_flux) # TODO: does error really behave this way? new_yerr = np.sqrt(self._dilate_flux) * bd.flux_err return lightcurve.BandData(new_x, new_y, new_yerr, detected=bd.detected)
def _get_band_from_raw(cls, conn, dataset, obj_id, band_id): q = '''select mjd, flux, flux_err, detected from {} where object_id = ? and passband = ? order by mjd'''.format(dataset) cursor = conn.execute(q, [obj_id, band_id]) times, fluxes, flux_errs, detected = [ np.array(series) for series in zip(*cursor.fetchall()) ] return lightcurve.BandData(times, fluxes, flux_errs, detected)
def _get_band_from_blobs(cls, conn, dataset, obj_id, band_id): res = conn.execute( ''' select mjd, flux, flux_err, detected from {}_blob where object_id = ? and passband = ? '''.format(dataset), [obj_id, band_id]) raw_times, raw_fluxes, raw_flux_errs, raw_detected = res.fetchone() times = np.frombuffer(raw_times, dtype=np.float64) fluxes = np.frombuffer(raw_fluxes, dtype=np.float64) flux_errs = np.frombuffer(raw_flux_errs, dtype=np.float64) detected = np.frombuffer(raw_detected[::8], dtype=np.bool8) return lightcurve.BandData(times, fluxes, flux_errs, detected)
def bcolz_get_lcs_by_obj_ids( cls, bcolz_source: PlasticcBcolzSource, dataset: str, obj_ids: typing.List[int]) -> typing.List['PlasticcDatasetLC']: """Gets a list of light curves by object_id. :param bcolz_source: Data source instance, usually PlasticcBcolzSource.get_default(). :param dataset: Name of the dataset, usually 'test_set' or 'training_set'. :param obj_ids: List of IDs. Should be unique, but seems to work OK otherwise. """ index_table = bcolz_source.get_pandas_index(dataset) bcolz_table = bcolz_source.get_table(dataset) meta_table = bcolz_source.get_table(dataset + '_metadata') if not obj_ids: return [] lcs = {} try: index_rows = index_table.loc[obj_ids].itertuples() except KeyError as e: raise KeyError( "Couldn't find requested object IDs in index! Original error: {!r}" .format(e)) for object_id, index_row in zip(obj_ids, index_rows): subsel = bcolz_table[index_row.start_idx:index_row.end_idx] bands = {} for band_idx, band_name in enumerate(cls.expected_bands): passband_sel = subsel[subsel['passband'] == band_idx] bands[band_name] = lightcurve.BandData( passband_sel['mjd'], passband_sel['flux'], passband_sel['flux_err'], passband_sel['detected'], ) lcs[object_id] = cls(**bands) meta_object_ids = meta_table['object_id'][:] meta_row_mask = np.isin(meta_object_ids, obj_ids, assume_unique=True) meta_rows = meta_table[meta_row_mask] object_id_name_index = meta_table.names.index('object_id') for meta_row in meta_rows: obj_id = meta_row[object_id_name_index] lc = lcs[obj_id] lc.meta = dict(zip(meta_table.names, meta_row)) return list(lcs.values())
def apply(self, band): indices = np.arange(0, len(band.time)) preserve_mask = np.abs(band.time - self.preserve_time) < self.preserve_time_radius preserved = indices[preserve_mask] indices = indices[~preserve_mask] num_points = len(indices) # usually 2 but allow lower if very small hard_min_size = min(num_points, 2) min_size = max(hard_min_size, int(self.min_rate * num_points)) max_size = int(math.ceil(self.max_rate * num_points)) if min_size >= max_size: sample_size = min_size else: sample_size = self.rng.randint(min_size, max_size) sample_indices = np.sort( np.concatenate([ preserved, self.rng.choice(indices, size=sample_size, replace=False) ])) return lightcurve.BandData(time=band.time[sample_indices], flux=band.flux[sample_indices], flux_err=band.flux_err[sample_indices], detected=band.detected[sample_indices])
def make_super_easy(cls, time=None): time = time if time is not None else np.array([2.0, 3.0]) band = lightcurve.BandData( time=time, flux=np.array([5.0, 6.0]), flux_err=np.array([1.0, 1.0]) ) return TestLC(b=band)