Example #1
0
def test_featurize_time_series_pandas_metafeatures():
    """Test featurize function for metafeatures passed as Series/DataFrames."""
    t, m, e = sample_values()
    features_to_use = ['amplitude', 'std_err']
    meta_features = pd.Series({'meta1': 0.5})
    fset = featurize.featurize_time_series(t,
                                           m,
                                           e,
                                           features_to_use,
                                           meta_features,
                                           scheduler=dask.get)
    npt.assert_allclose(fset['meta1'], 0.5)

    n_series = 5
    list_of_series = [sample_values() for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    meta_features = pd.DataFrame({
        'meta1': [0.5] * n_series,
        'meta2': [0.8] * n_series
    })
    fset = featurize.featurize_time_series(times,
                                           values,
                                           errors,
                                           features_to_use,
                                           meta_features,
                                           scheduler=dask.get)
    npt.assert_allclose(fset['meta1'], 0.5)
    npt.assert_allclose(fset['meta2'], 0.8)
Example #2
0
def test_transform_ts_files():
    n_class1 = 4
    n_class2 = 8
    transform_type = "Train/Test Split"
    time_series = [TimeSeries(*sample_values(), target='class1')
                   for i in range(n_class1)]
    time_series += [TimeSeries(*sample_values(), target='class2')
                    for i in range(n_class2)]
    output = transformation.transform_ts_files(time_series, transform_type)
    npt.assert_equal(len(output), 2)
Example #3
0
def test_train_test_split_ratios():
    n_class1 = 4
    n_class2 = 8
    transform_type = "Train/Test Split"
    time_series = [TimeSeries(*sample_values(), target='class1')
                   for i in range(n_class1)]
    time_series += [TimeSeries(*sample_values(), target='class2')
                    for i in range(n_class2)]
    outputs = transformation.train_test_split(
        time_series, test_size=0.5, train_size=0.5)
    npt.assert_equal(len(outputs[1]), len(time_series) / 2)
    npt.assert_equal(len(outputs[0]), len(time_series) / 2)
Example #4
0
def test_featurize_time_series_default_errors():
    """Test featurize wrapper function for time series w/ missing errors"""
    n_channels = 3
    t, m, _ = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    meta_features = {}
    fset = featurize.featurize_time_series(t,
                                           m,
                                           None,
                                           features_to_use,
                                           meta_features,
                                           scheduler=dask.get)

    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    fset = featurize.featurize_time_series(t,
                                           m,
                                           None,
                                           features_to_use,
                                           meta_features,
                                           scheduler=dask.get)

    t = t[0][0]
    m = m[0][0]
    fset = featurize.featurize_time_series(t,
                                           m,
                                           None,
                                           features_to_use,
                                           meta_features,
                                           scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
Example #5
0
def test_featurize_time_series_default_times():
    """Test featurize wrapper function for time series w/ missing times"""
    n_channels = 3
    _, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    meta_features = {}
    fset = featurize.featurize_time_series(None,
                                           m,
                                           e,
                                           features_to_use,
                                           meta_features,
                                           scheduler=get_sync)

    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    e = [[e[0], e[1][0:-5], e[2][0:-10]]]
    fset = featurize.featurize_time_series(None,
                                           m,
                                           e,
                                           features_to_use,
                                           meta_features,
                                           scheduler=get_sync)

    m = m[0][0]
    e = e[0][0]
    fset = featurize.featurize_time_series(None,
                                           m,
                                           e,
                                           features_to_use,
                                           meta_features,
                                           scheduler=get_sync)
Example #6
0
def test_ignore_exceptions():
    import cesium.features.graphs

    def raise_exc(x):
        raise ValueError()

    old_value = cesium.features.graphs.dask_feature_graph['mean']
    try:
        cesium.features.graphs.dask_feature_graph['mean'] = (raise_exc, 't')
        t, m, e = sample_values()
        features_to_use = ['mean']
        with pytest.raises(ValueError):
            fset = featurize.featurize_time_series(t,
                                                   m,
                                                   e,
                                                   features_to_use,
                                                   scheduler=dask.get,
                                                   raise_exceptions=True)
        fset = featurize.featurize_time_series(t,
                                               m,
                                               e,
                                               features_to_use,
                                               scheduler=dask.get,
                                               raise_exceptions=False)
        assert np.isnan(fset.values).all()
    finally:
        cesium.features.graphs.dask_feature_graph['mean'] = old_value
Example #7
0
def test_featurize_time_series_single():
    """Test featurize wrapper function for single time series"""
    t, m, e = sample_values()
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert fset['amplitude'].values.dtype == np.float64
Example #8
0
def test_featurize_time_series_single():
    """Test featurize wrapper function for single time series"""
    t, m, e = sample_values()
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert fset['amplitude'].values.dtype == np.float64
Example #9
0
def test_train_test_split():
    # Mock out unevenly-labeled test data: 4 class1, 8 class2
    n_class1 = 4
    n_class2 = 8
    transform_type = "Train/Test Split"
    time_series = [TimeSeries(*sample_values(), target='class1')
                   for i in range(n_class1)]
    time_series += [TimeSeries(*sample_values(), target='class2')
                    for i in range(n_class2)]
    np.random.seed(0)
    train, test = transformation.transform_ts_files(time_series,
                                                    transform_type)
    npt.assert_equal(
        sum(ts.target == 'class1' for ts in train), 1 * n_class1 / 2)
    npt.assert_equal(sum(ts.target == 'class1' for ts in test), n_class1 / 2)
    npt.assert_equal(
        sum(ts.target == 'class2' for ts in train), 1 * n_class2 / 2)
    npt.assert_equal(sum(ts.target == 'class2' for ts in test), n_class2 / 2)
Example #10
0
def test_featurize_time_series_single_multichannel():
    """Test featurize wrapper function for single multichannel time series"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Example #11
0
def test_featurize_time_series_single_multichannel():
    """Test featurize wrapper function for single multichannel time series"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Example #12
0
def test_featurize_time_series_pandas_metafeatures():
    """Test featurize function for metafeatures passed as Series/DataFrames."""
    t, m, e = sample_values()
    features_to_use = ['amplitude', 'std_err']
    meta_features = pd.Series({'meta1': 0.5})
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    npt.assert_allclose(fset['meta1'], 0.5)

    n_series = 5
    list_of_series = [sample_values() for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    meta_features = pd.DataFrame({'meta1': [0.5] * n_series,
                                  'meta2': [0.8] * n_series})
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use,
                                           meta_features, scheduler=dask.get)
    npt.assert_allclose(fset['meta1'], 0.5)
    npt.assert_allclose(fset['meta2'], 0.8)
Example #13
0
def test_featurize_time_series_single():
    """Test featurize wrapper function for single time series"""
    t, m, e = sample_values()
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.target.values, ['class1'])
Example #14
0
def test_featurize_time_series_multiple():
    """Test featurize wrapper function for multiple time series"""
    n_series = 5
    list_of_series = [sample_values() for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    meta_features = [{'meta1': 0.5}] * n_series
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use,
                                           meta_features, scheduler=dask.get)
    npt.assert_array_equal(sorted(fset.columns.get_level_values('feature')),
                           ['amplitude', 'meta1', 'std_err'])
Example #15
0
def test_featurize_time_series_multiple():
    """Test featurize wrapper function for multiple time series"""
    n_series = 5
    list_of_series = [sample_values() for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    meta_features = [{'meta1': 0.5}] * n_series
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use,
                                           meta_features, scheduler=dask.get)
    npt.assert_array_equal(sorted(fset.columns.get_level_values('feature')),
                           ['amplitude', 'meta1', 'std_err'])
Example #16
0
def test_featurize_time_series_no_targets():
    t, m, e = sample_values()
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           targets=None,
                                           meta_features=meta_features,
                                           scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    assert('target' not in fset)
Example #17
0
def test_featurize_time_series_uneven_multichannel():
    """Test featurize wrapper function for uneven-length multichannel data"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    e = [[e[0], e[1][0:-5], e[2][0:-10]]]
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Example #18
0
def test_featurize_time_series_uneven_multichannel():
    """Test featurize wrapper function for uneven-length multichannel data"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    e = [[e[0], e[1][0:-5], e[2][0:-10]]]
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Example #19
0
def test_featurize_time_series_multiple_multichannel():
    """Test featurize wrapper function for multiple multichannel time series"""
    n_series = 5
    n_channels = 3
    list_of_series = [sample_values(channels=n_channels)
                      for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Example #20
0
def test_featurize_time_series_custom_functions():
    """Test featurize wrapper function for time series w/ custom functions"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err', 'test_f']
    meta_features = {'meta1': 0.5}
    custom_functions = {'test_f': lambda t, m, e: np.pi}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features,
                                           custom_functions=custom_functions,
                                           scheduler=dask.get)
    npt.assert_array_equal(fset['test_f', 0], np.pi)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Example #21
0
def test_featurize_time_series_custom_functions():
    """Test featurize wrapper function for time series w/ custom functions"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err', 'test_f']
    meta_features = {'meta1': 0.5}
    custom_functions = {'test_f': lambda t, m, e: np.pi}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features,
                                           custom_functions=custom_functions,
                                           scheduler=dask.get)
    npt.assert_array_equal(fset['test_f', 0], np.pi)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Example #22
0
def test_featurize_time_series_multiple_multichannel():
    """Test featurize wrapper function for multiple multichannel time series"""
    n_series = 5
    n_channels = 3
    list_of_series = [sample_values(channels=n_channels)
                      for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert 'meta1' in fset.columns
Example #23
0
def test_featurize_time_series_multiple():
    """Test featurize wrapper function for multiple time series"""
    n_series = 5
    list_of_series = [sample_values() for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    targets = np.array(['class1'] * n_series)
    meta_features = [{'meta1': 0.5}] * n_series
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use, targets,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.target.values, ['class1'] * n_series)
Example #24
0
def test_featurize_time_series_single_multichannel():
    """Test featurize wrapper function for single multichannel time series"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.target.values, ['class1'])
Example #25
0
def test_featurize_time_series_custom_dask_graph():
    """Test featurize wrapper function for time series w/ custom dask graph"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err', 'test_f', 'test_meta']
    meta_features = {'meta1': 0.5}
    custom_functions = {'test_f': (lambda x: x.min() - x.max(), 'amplitude'),
                        'test_meta': (lambda x: 2. * x, 'meta1')}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features,
                                           custom_functions=custom_functions,
                                           scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert ('test_f', 0) in fset.columns
    assert ('test_meta', 0) in fset.columns
Example #26
0
def test_featurize_time_series_custom_dask_graph():
    """Test featurize wrapper function for time series w/ custom dask graph"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err', 'test_f', 'test_meta']
    meta_features = {'meta1': 0.5}
    custom_functions = {'test_f': (lambda x: x.min() - x.max(), 'amplitude'),
                        'test_meta': (lambda x: 2. * x, 'meta1')}
    fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                           meta_features,
                                           custom_functions=custom_functions,
                                           scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
    assert ('test_f', 0) in fset.columns
    assert ('test_meta', 0) in fset.columns
Example #27
0
def test_featurize_time_series_uneven_multichannel():
    """Test featurize wrapper function for uneven-length multichannel data"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    e = [[e[0], e[1][0:-5], e[2][0:-10]]]
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.target.values, ['class1'])
Example #28
0
def test_featurize_time_series_custom_dask_graph():
    """Test featurize wrapper function for time series w/ custom dask graph"""
    n_channels = 3
    t, m, e = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err', 'test_f']
    target = 'class1'
    meta_features = {'meta1': 0.5}
    custom_functions = {'test_f': (lambda x: x.min() - x.max(), 'amplitude'),
                        'test_meta': (lambda x: 2. * x, 'meta1')}
    fset = featurize.featurize_time_series(t, m, e, features_to_use, target,
                                           meta_features,
                                           custom_functions=custom_functions,
                                           scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err', 'test_f'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.target.values, ['class1'])
Example #29
0
def test_ignore_exceptions():
    import cesium.features.graphs
    def raise_exc(x):
        raise ValueError()
    old_value = cesium.features.graphs.dask_feature_graph['mean']
    try:
        cesium.features.graphs.dask_feature_graph['mean'] = (raise_exc, 't')
        t, m, e = sample_values()
        features_to_use = ['mean']
        with pytest.raises(ValueError):
            fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                                   scheduler=dask.get,
                                                   raise_exceptions=True)
        fset = featurize.featurize_time_series(t, m, e, features_to_use,
                                               scheduler=dask.get,
                                               raise_exceptions=False)
        assert np.isnan(fset.values).all()
    finally:
        cesium.features.graphs.dask_feature_graph['mean'] = old_value
Example #30
0
def test_featurize_time_series_default_errors():
    """Test featurize wrapper function for time series w/ missing errors"""
    n_channels = 3
    t, m, _ = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    meta_features = {}
    fset = featurize.featurize_time_series(t, m, None, features_to_use,
                                           meta_features, scheduler=dask.get)

    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    fset = featurize.featurize_time_series(t, m, None, features_to_use,
                                           meta_features, scheduler=dask.get)

    t = t[0][0]
    m = m[0][0]
    fset = featurize.featurize_time_series(t, m, None, features_to_use,
                                           meta_features, scheduler=dask.get)
    assert ('amplitude', 0) in fset.columns
Example #31
0
def test_featurize_time_series_multiple_multichannel():
    """Test featurize wrapper function for multiple multichannel time series"""
    n_series = 5
    n_channels = 3
    list_of_series = [sample_values(channels=n_channels)
                      for i in range(n_series)]
    times, values, errors = [list(x) for x in zip(*list_of_series)]
    features_to_use = ['amplitude', 'std_err']
    targets = np.array(['class1', 'class1', 'class1', 'class2', 'class2'])
    meta_features = {'meta1': 0.5}
    fset = featurize.featurize_time_series(times, values, errors,
                                           features_to_use, targets,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(sorted(fset.data_vars),
                           ['amplitude', 'meta1', 'std_err'])
    npt.assert_array_equal(fset.channel, np.arange(n_channels))
    npt.assert_array_equal(sorted(fset.amplitude.coords),
                           ['channel', 'name', 'target'])
    npt.assert_array_equal(fset.target.values, targets)
Example #32
0
def test_featurize_time_series_default_errors():
    """Test featurize wrapper function for time series w/ missing errors"""
    n_channels = 3
    t, m, _ = sample_values(channels=n_channels)
    features_to_use = ['amplitude', 'std_err']
    target = 'class1'
    meta_features = {}
    fset = featurize.featurize_time_series(t, m, None, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(fset.channel, np.arange(n_channels))

    t = [[t, t[0:-5], t[0:-10]]]
    m = [[m[0], m[1][0:-5], m[2][0:-10]]]
    fset = featurize.featurize_time_series(t, m, None, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(fset.channel, np.arange(n_channels))

    t = t[0][0]
    m = m[0][0]
    fset = featurize.featurize_time_series(t, m, None, features_to_use, target,
                                           meta_features, scheduler=get_sync)
    npt.assert_array_equal(fset.channel, [0])