def test_generate_and_extract_per_band_features():
    lc = TwoBandTestLc(
        a=lightcurve.BandData(
            time=np.array([1, 2.0]),
            flux=np.array([4, 2.0]),
            flux_err=np.array([5, 2.0]),
        ),
        b=lightcurve.BandData(
            time=np.array([1, 2.0]),
            flux=np.array([2, 2.0]),
            flux_err=np.array([3, 2.0]),
        ),
    )

    def feature_extractor(band_data: lightcurve.BandData):
        return {
            'first_flux': band_data.flux[0],
            'first_flux_err': band_data.flux_err[0],
        }

    band_settings = band_settings_params.BandSettings(['a', 'b'])
    features = band_settings.generate_per_band_features(feature_extractor, lc)
    assert features == {
        'band_a.first_flux': 4.0,
        'band_a.first_flux_err': 5.0,
        'band_b.first_flux': 2.0,
        'band_b.first_flux_err': 3.0,
    }

    band_a_features = band_settings.get_band_features(features, 'a')
    assert band_a_features == {
        'first_flux': 4.0,
        'first_flux_err': 5.0,
    }
Exemple #2
0
 def apply(self, bd) -> lightcurve.BandData:
     return lightcurve.BandData(
         self._time_fn(bd.time),
         self._flux_fn(bd.flux),
         self._flux_err_fn(bd.flux_err),
         bd.detected,
     )
Exemple #3
0
def format_dense_multi_band_from_lc_dict(lc_dict, band_order=('g', 'r', 'i',
                                                              'z')) -> SNDatasetLC:
    """Formats a multi-band LC dictionary to a dense dataset.

    Currently reformats a time series to dense data, as if every curve had sampled at the same time.
    This method is currently slow.

    :param lc_dict: Dictionary from lc_dict_for_id.
    :param band_order: Order of expected bands.
    :return: Light curve.
    """
    if frozenset(lc_dict.keys()) != frozenset(band_order):
        raise ValueError("Unexpected keys {}".format(lc_dict.keys()))

    def closest_in_time(band, time):
        index = np.argmin(np.abs(band[:, 0] - time))
        # [3]-shaped array
        return band[index]

    bands = [lc_dict[band] for band in band_order]

    # [num_points, num_bands, 3]-shaped array
    dense_data = np.array([[closest_in_time(band, time)
                            for band in bands]
                           for time in bands[0][:, 0]],
                          dtype=np.float64)

    band_data: typing.Dict[str, lightcurve.BandData] = {}
    for i, band in enumerate(band_order):
        band_data[band] = lightcurve.BandData(
            time=dense_data[:, i, 0],
            flux=dense_data[:, i, 1],
            flux_err=dense_data[:, i, 2]
        )
    return SNDatasetLC(**band_data)
Exemple #4
0
 def apply(self, bd) -> lightcurve.BandData:
     new_x = self._dilate_time * (bd.time + self._translate_time)
     new_y = self._dilate_flux * (bd.flux + self._translate_flux)
     # TODO: does error really behave this way?
     new_yerr = np.sqrt(self._dilate_flux) * bd.flux_err
     return lightcurve.BandData(new_x,
                                new_y,
                                new_yerr,
                                detected=bd.detected)
Exemple #5
0
    def _get_band_from_raw(cls, conn, dataset, obj_id, band_id):

        q = '''select mjd, flux, flux_err, detected
                from {}
                where object_id = ? and passband = ?
                order by mjd'''.format(dataset)
        cursor = conn.execute(q, [obj_id, band_id])
        times, fluxes, flux_errs, detected = [
            np.array(series) for series in zip(*cursor.fetchall())
        ]
        return lightcurve.BandData(times, fluxes, flux_errs, detected)
Exemple #6
0
 def _get_band_from_blobs(cls, conn, dataset, obj_id, band_id):
     res = conn.execute(
         '''
         select mjd, flux, flux_err, detected
         from {}_blob
         where object_id = ?
         and passband = ?
         '''.format(dataset), [obj_id, band_id])
     raw_times, raw_fluxes, raw_flux_errs, raw_detected = res.fetchone()
     times = np.frombuffer(raw_times, dtype=np.float64)
     fluxes = np.frombuffer(raw_fluxes, dtype=np.float64)
     flux_errs = np.frombuffer(raw_flux_errs, dtype=np.float64)
     detected = np.frombuffer(raw_detected[::8], dtype=np.bool8)
     return lightcurve.BandData(times, fluxes, flux_errs, detected)
Exemple #7
0
    def bcolz_get_lcs_by_obj_ids(
            cls, bcolz_source: PlasticcBcolzSource, dataset: str,
            obj_ids: typing.List[int]) -> typing.List['PlasticcDatasetLC']:
        """Gets a list of light curves by object_id.

        :param bcolz_source: Data source instance, usually PlasticcBcolzSource.get_default().
        :param dataset: Name of the dataset, usually 'test_set' or 'training_set'.
        :param obj_ids: List of IDs. Should be unique, but seems to work OK otherwise.
        """
        index_table = bcolz_source.get_pandas_index(dataset)
        bcolz_table = bcolz_source.get_table(dataset)
        meta_table = bcolz_source.get_table(dataset + '_metadata')
        if not obj_ids:
            return []

        lcs = {}
        try:
            index_rows = index_table.loc[obj_ids].itertuples()
        except KeyError as e:
            raise KeyError(
                "Couldn't find requested object IDs in index! Original error: {!r}"
                .format(e))
        for object_id, index_row in zip(obj_ids, index_rows):
            subsel = bcolz_table[index_row.start_idx:index_row.end_idx]
            bands = {}
            for band_idx, band_name in enumerate(cls.expected_bands):
                passband_sel = subsel[subsel['passband'] == band_idx]
                bands[band_name] = lightcurve.BandData(
                    passband_sel['mjd'],
                    passband_sel['flux'],
                    passband_sel['flux_err'],
                    passband_sel['detected'],
                )
            lcs[object_id] = cls(**bands)

        meta_object_ids = meta_table['object_id'][:]
        meta_row_mask = np.isin(meta_object_ids, obj_ids, assume_unique=True)
        meta_rows = meta_table[meta_row_mask]

        object_id_name_index = meta_table.names.index('object_id')
        for meta_row in meta_rows:
            obj_id = meta_row[object_id_name_index]
            lc = lcs[obj_id]
            lc.meta = dict(zip(meta_table.names, meta_row))
        return list(lcs.values())
Exemple #8
0
 def apply(self, band):
     indices = np.arange(0, len(band.time))
     preserve_mask = np.abs(band.time -
                            self.preserve_time) < self.preserve_time_radius
     preserved = indices[preserve_mask]
     indices = indices[~preserve_mask]
     num_points = len(indices)
     # usually 2 but allow lower if very small
     hard_min_size = min(num_points, 2)
     min_size = max(hard_min_size, int(self.min_rate * num_points))
     max_size = int(math.ceil(self.max_rate * num_points))
     if min_size >= max_size:
         sample_size = min_size
     else:
         sample_size = self.rng.randint(min_size, max_size)
     sample_indices = np.sort(
         np.concatenate([
             preserved,
             self.rng.choice(indices, size=sample_size, replace=False)
         ]))
     return lightcurve.BandData(time=band.time[sample_indices],
                                flux=band.flux[sample_indices],
                                flux_err=band.flux_err[sample_indices],
                                detected=band.detected[sample_indices])
Exemple #9
0
 def make_super_easy(cls, time=None):
     time = time if time is not None else np.array([2.0, 3.0])
     band = lightcurve.BandData(
         time=time, flux=np.array([5.0, 6.0]), flux_err=np.array([1.0, 1.0])
     )
     return TestLC(b=band)