예제 #1
0
def test_generate_and_extract_per_band_features():
    lc = TwoBandTestLc(
        a=lightcurve.BandData(
            time=np.array([1, 2.0]),
            flux=np.array([4, 2.0]),
            flux_err=np.array([5, 2.0]),
        ),
        b=lightcurve.BandData(
            time=np.array([1, 2.0]),
            flux=np.array([2, 2.0]),
            flux_err=np.array([3, 2.0]),
        ),
    )

    def feature_extractor(band_data: lightcurve.BandData):
        return {
            'first_flux': band_data.flux[0],
            'first_flux_err': band_data.flux_err[0],
        }

    band_settings = band_settings_params.BandSettings(['a', 'b'])
    features = band_settings.generate_per_band_features(feature_extractor, lc)
    assert features == {
        'band_a.first_flux': 4.0,
        'band_a.first_flux_err': 5.0,
        'band_b.first_flux': 2.0,
        'band_b.first_flux_err': 3.0,
    }

    band_a_features = band_settings.get_band_features(features, 'a')
    assert band_a_features == {
        'first_flux': 4.0,
        'first_flux_err': 5.0,
    }
def test_extraction():
    """Tests that extracting dflux_dt from light curves works as expected.

    This uses the default "make_super_easy" class, which has time values [2, 3] and
    flux values [5, 6]. We get features for the first and second points, and then construct
    a dataset by concatenating these two features together. The first batch element
    (corresponding to the first point) should have one non-masked feature with dflux/dt=1,
    in the first "after" position; similarly the second batch element should have one
    non-masked feature with dflux/dt=1, in the first "before" position.
    """
    lc = simulate.TestLC.make_super_easy()
    band_settings = band_settings_params.BandSettings(bands=['b'])
    fex = raw_value_features.RawValueExtractor(window_size=4,
                                               band_settings=band_settings)
    first_point_features = fex.extract(lc, 1.9)
    second_point_features = fex.extract(lc, 3.0)

    with tf.Graph().as_default() as g:
        dataset1 = tf.data.Dataset.from_tensors(first_point_features)
        dataset2 = tf.data.Dataset.from_tensors(second_point_features)
        dataset = dataset1.concatenate(dataset2).batch(2, drop_remainder=True)
        inp = dataset.make_one_shot_iterator().get_next()
        window_features = dense_extracted_features.WindowFeatures(
            band_features=band_settings.get_band_features(inp, band_name='b'),
            batch_size=2,
            window_size=4,
            band_time_diff=0.2,
        )
        before_flux = inp['band_b.before_flux']
        dflux_dt = window_features.dflux_dt(clip_magnitude=7.0)
        dflux_dt_masked = window_features.masked(dflux_dt, 0, [])

        # Variation where each band has to be sampled within a very strict tolerance.
        window_features_strict = dense_extracted_features.WindowFeatures(
            band_features=band_settings.get_band_features(inp, band_name='b'),
            batch_size=2,
            window_size=4,
            band_time_diff=0.01,
        )
        dflux_dt_masked_strict = window_features_strict.masked(dflux_dt, 0, [])

        with tf.Session(graph=g) as sess:
            values = sess.run({
                'before_flux': before_flux,
                'dflux_dt': dflux_dt,
                'dflux_dt_masked': dflux_dt_masked,
                'dflux_dt_masked_strict': dflux_dt_masked_strict
            })
            values = {k: v.tolist() for k, v in values.items()}

    assert values == {
        'before_flux': [[0.0, 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, 5.0]],
        'dflux_dt': [[2.5, 2.5, 2.5, 2.5, 1.0, 2.5, 2.5, 2.5],
                     [2.0, 2.0, 2.0, 1.0, 2.0, 2.0, 2.0, 2.0]],
        'dflux_dt_masked': [[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
                            [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]],
        'dflux_dt_masked_strict': [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                                   [0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]],
    }
def _make_test_dataset(params):
    source = plasticc_data.PlasticcBcolzSource.get_default()
    lcs = plasticc_data.PlasticcDatasetLC.bcolz_get_lcs_by_obj_ids(
        bcolz_source=source, dataset="training_set", obj_ids=[745, 10757])
    band_settings = band_settings_params.BandSettings(bands=params["lc_bands"])
    fex = raw_value_features.RawValueExtractor(
        window_size=params["window_size"], band_settings=band_settings)
    first_point_features = fex.extract(lcs[0], lcs[0].all_times_unique()[10])
    second_point_features = fex.extract(lcs[1], lcs[1].all_times_unique()[10])
    dataset1 = tf.data.Dataset.from_tensors(first_point_features)
    dataset2 = tf.data.Dataset.from_tensors(second_point_features)
    dataset = tf.data.Dataset.zip((dataset1, dataset2)).map(_prefix_lr)
    return dataset.batch(1, drop_remainder=True)
def test_dense_feature_extraction():
    source = plasticc_data.PlasticcBcolzSource.get_default()
    lc, = plasticc_data.PlasticcDatasetLC.bcolz_get_lcs_by_obj_ids(
        bcolz_source=source, dataset="training_set", obj_ids=[1598])

    def model_fn(features, labels, mode, params):
        del labels  # unused
        band_settings = band_settings_params.BandSettings.from_params(params)
        results = dense_extracted_features.feature_model_fn(features, params)
        by_band = tf.unstack(results, axis=4)
        predictions = {
            band: tensor
            for band, tensor in zip(band_settings.bands, by_band)
        }
        predictions["time"] = features["time"]
        return tf.estimator.EstimatorSpec(mode=mode,
                                          predictions=predictions,
                                          loss=tf.constant(0.0),
                                          train_op=tf.no_op())

    window_size = 10
    rve = raw_value_features.RawValueExtractor(
        window_size=window_size,
        band_settings=band_settings_params.BandSettings(lc.expected_bands))
    data_gen = per_point_dataset.PerPointDatasetGenerator(
        extract_fcn=rve.extract,
        batch_size=5,
    )

    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       params={
                                           'batch_size': 5,
                                           'window_size': window_size,
                                           'flux_scale_epsilon': 0.5,
                                           'lc_bands': lc.expected_bands,
                                       })
    predictions = list(
        data_gen.predict_single_lc(estimator, lc, arrays_to_list=False))
    array = predictions[100]['y']
    assert array.shape == (
        20, 3, 32
    )  # 2 * window_size, channels (dflux/dt, dflux, dtime), nbands
    time_array = array[:, 2, :]
    # Should be monotonically increasing as the window shifts, since WindowFeatures
    # computes (point in window time) - (selected time). Should be monotonically
    # decreasing along bins, since each bin fuzzily represents whether the actual value is
    # greater than the bin's center value. As the bin centers increase, these fuzzy
    # greater than values should decrease.
    assert (time_array[1:, :] >= time_array[:-1, :]).all()
    assert (time_array[:, 1:] <= time_array[:, :-1]).all()
예제 #5
0
def test_basic_extraction():
    lc = simulate.TestLC.make_super_easy()
    fex = raw_value_features.RawValueExtractor(
        window_size=4,
        band_settings=band_settings_params.BandSettings(bands=['b']))
    first_point_features = fex.extract(lc, 2)
    assert first_point_features['band_b.before_padding'] == 4
    assert first_point_features['band_b.after_padding'] == 3
    assert first_point_features['band_b.closest_time_diff'] == 0
    assert first_point_features['band_b.after_flux'].tolist() == [6, 0, 0, 0]

    second_point_features = fex.extract(lc, 3)
    assert second_point_features['band_b.before_padding'] == 3
    assert second_point_features['band_b.after_padding'] == 4
    assert second_point_features['band_b.closest_time_diff'] == 0
    assert second_point_features['band_b.before_flux'].tolist() == [0, 0, 0, 5]

    time_delta = (second_point_features['band_b.closest_time_in_band'] -
                  second_point_features['band_b.before_time'])
    assert time_delta.tolist()[-1] == 1.0
예제 #6
0
def test_raw_features_dataset():
    lc = simulate.TestLC.make_super_easy()
    assert len(lc.bands['b'].time) == 2, "TestLC.make_super_easy() changed"
    rve = raw_value_features.RawValueExtractor(
        window_size=4, band_settings=band_settings_params.BandSettings(['b']))
    data_gen = per_point_dataset.PerPointDatasetGenerator(
        extract_fcn=rve.extract,
        batch_size=5,
    )

    with tf.Graph().as_default() as g:
        dataset, num_batches, num_non_padding = data_gen.make_dataset(lc)
        assert num_non_padding == 2, "LC has 2 time points"
        assert num_batches == 1
        iterator = dataset.make_one_shot_iterator().get_next()
        with tf.Session(graph=g) as sess:
            assert sess.run(iterator)['band_b.before_padding'].tolist() == [
                4, 3, 4, 4, 4
            ]
        for key, tensor_shape in dataset.output_shapes.items():
            concrete_shape = list(map(
                int, tensor_shape))  # All tensors should have concrete shapes
            assert concrete_shape[
                0] == 5, "First dimension should be batch dimension."

    estimator = tf.estimator.Estimator(model_fn=model_fn,
                                       params={
                                           'batch_size': 5,
                                           'window_size': 4,
                                           'flux_scale_epsilon': 0.5,
                                           'lc_bands': ['b'],
                                       })
    predictions = list(data_gen.predict_single_lc(estimator, lc))
    assert predictions == [{
        'is_max_soft': [0.11920291930437088],
        'time': 2.0
    }, {
        'is_max_soft': [0.8807970285415649],
        'time': 3.0
    }]
예제 #7
0
 def __init__(self, bands, window_size):
     self.band_settings = band_settings_params.BandSettings(bands=bands)
     fex = raw_value_features.RawValueExtractor(
         window_size=window_size, band_settings=self.band_settings)
     super(RawValuesFullPositives, self).__init__(fex=fex, label=True)