def test_featurize_time_series_pandas_metafeatures(): """Test featurize function for metafeatures passed as Series/DataFrames.""" t, m, e = sample_values() features_to_use = ['amplitude', 'std_err'] meta_features = pd.Series({'meta1': 0.5}) fset = featurize.featurize_time_series(t, m, e, features_to_use, meta_features, scheduler=dask.get) npt.assert_allclose(fset['meta1'], 0.5) n_series = 5 list_of_series = [sample_values() for i in range(n_series)] times, values, errors = [list(x) for x in zip(*list_of_series)] features_to_use = ['amplitude', 'std_err'] meta_features = pd.DataFrame({ 'meta1': [0.5] * n_series, 'meta2': [0.8] * n_series }) fset = featurize.featurize_time_series(times, values, errors, features_to_use, meta_features, scheduler=dask.get) npt.assert_allclose(fset['meta1'], 0.5) npt.assert_allclose(fset['meta2'], 0.8)
def test_transform_ts_files(): n_class1 = 4 n_class2 = 8 transform_type = "Train/Test Split" time_series = [TimeSeries(*sample_values(), target='class1') for i in range(n_class1)] time_series += [TimeSeries(*sample_values(), target='class2') for i in range(n_class2)] output = transformation.transform_ts_files(time_series, transform_type) npt.assert_equal(len(output), 2)
def test_train_test_split_ratios(): n_class1 = 4 n_class2 = 8 transform_type = "Train/Test Split" time_series = [TimeSeries(*sample_values(), target='class1') for i in range(n_class1)] time_series += [TimeSeries(*sample_values(), target='class2') for i in range(n_class2)] outputs = transformation.train_test_split( time_series, test_size=0.5, train_size=0.5) npt.assert_equal(len(outputs[1]), len(time_series) / 2) npt.assert_equal(len(outputs[0]), len(time_series) / 2)
def test_featurize_time_series_default_errors(): """Test featurize wrapper function for time series w/ missing errors""" n_channels = 3 t, m, _ = sample_values(channels=n_channels) features_to_use = ['amplitude', 'std_err'] meta_features = {} fset = featurize.featurize_time_series(t, m, None, features_to_use, meta_features, scheduler=dask.get) t = [[t, t[0:-5], t[0:-10]]] m = [[m[0], m[1][0:-5], m[2][0:-10]]] fset = featurize.featurize_time_series(t, m, None, features_to_use, meta_features, scheduler=dask.get) t = t[0][0] m = m[0][0] fset = featurize.featurize_time_series(t, m, None, features_to_use, meta_features, scheduler=dask.get) assert ('amplitude', 0) in fset.columns
def test_featurize_time_series_default_times(): """Test featurize wrapper function for time series w/ missing times""" n_channels = 3 _, m, e = sample_values(channels=n_channels) features_to_use = ['amplitude', 'std_err'] meta_features = {} fset = featurize.featurize_time_series(None, m, e, features_to_use, meta_features, scheduler=get_sync) m = [[m[0], m[1][0:-5], m[2][0:-10]]] e = [[e[0], e[1][0:-5], e[2][0:-10]]] fset = featurize.featurize_time_series(None, m, e, features_to_use, meta_features, scheduler=get_sync) m = m[0][0] e = e[0][0] fset = featurize.featurize_time_series(None, m, e, features_to_use, meta_features, scheduler=get_sync)
def test_ignore_exceptions(): import cesium.features.graphs def raise_exc(x): raise ValueError() old_value = cesium.features.graphs.dask_feature_graph['mean'] try: cesium.features.graphs.dask_feature_graph['mean'] = (raise_exc, 't') t, m, e = sample_values() features_to_use = ['mean'] with pytest.raises(ValueError): fset = featurize.featurize_time_series(t, m, e, features_to_use, scheduler=dask.get, raise_exceptions=True) fset = featurize.featurize_time_series(t, m, e, features_to_use, scheduler=dask.get, raise_exceptions=False) assert np.isnan(fset.values).all() finally: cesium.features.graphs.dask_feature_graph['mean'] = old_value
def test_featurize_time_series_single(): """Test featurize wrapper function for single time series""" t, m, e = sample_values() features_to_use = ['amplitude', 'std_err'] meta_features = {'meta1': 0.5} fset = featurize.featurize_time_series(t, m, e, features_to_use, meta_features, scheduler=dask.get) assert fset['amplitude'].values.dtype == np.float64
def test_train_test_split(): # Mock out unevenly-labeled test data: 4 class1, 8 class2 n_class1 = 4 n_class2 = 8 transform_type = "Train/Test Split" time_series = [TimeSeries(*sample_values(), target='class1') for i in range(n_class1)] time_series += [TimeSeries(*sample_values(), target='class2') for i in range(n_class2)] np.random.seed(0) train, test = transformation.transform_ts_files(time_series, transform_type) npt.assert_equal( sum(ts.target == 'class1' for ts in train), 1 * n_class1 / 2) npt.assert_equal(sum(ts.target == 'class1' for ts in test), n_class1 / 2) npt.assert_equal( sum(ts.target == 'class2' for ts in train), 1 * n_class2 / 2) npt.assert_equal(sum(ts.target == 'class2' for ts in test), n_class2 / 2)
def test_featurize_time_series_single_multichannel(): """Test featurize wrapper function for single multichannel time series""" n_channels = 3 t, m, e = sample_values(channels=n_channels) features_to_use = ['amplitude', 'std_err'] meta_features = {'meta1': 0.5} fset = featurize.featurize_time_series(t, m, e, features_to_use, meta_features, scheduler=dask.get) assert ('amplitude', 0) in fset.columns assert 'meta1' in fset.columns
def test_featurize_time_series_pandas_metafeatures(): """Test featurize function for metafeatures passed as Series/DataFrames.""" t, m, e = sample_values() features_to_use = ['amplitude', 'std_err'] meta_features = pd.Series({'meta1': 0.5}) fset = featurize.featurize_time_series(t, m, e, features_to_use, meta_features, scheduler=dask.get) npt.assert_allclose(fset['meta1'], 0.5) n_series = 5 list_of_series = [sample_values() for i in range(n_series)] times, values, errors = [list(x) for x in zip(*list_of_series)] features_to_use = ['amplitude', 'std_err'] meta_features = pd.DataFrame({'meta1': [0.5] * n_series, 'meta2': [0.8] * n_series}) fset = featurize.featurize_time_series(times, values, errors, features_to_use, meta_features, scheduler=dask.get) npt.assert_allclose(fset['meta1'], 0.5) npt.assert_allclose(fset['meta2'], 0.8)
def test_featurize_time_series_single(): """Test featurize wrapper function for single time series""" t, m, e = sample_values() features_to_use = ['amplitude', 'std_err'] target = 'class1' meta_features = {'meta1': 0.5} fset = featurize.featurize_time_series(t, m, e, features_to_use, target, meta_features, scheduler=get_sync) npt.assert_array_equal(sorted(fset.data_vars), ['amplitude', 'meta1', 'std_err']) npt.assert_array_equal(fset.target.values, ['class1'])
def test_featurize_time_series_multiple(): """Test featurize wrapper function for multiple time series""" n_series = 5 list_of_series = [sample_values() for i in range(n_series)] times, values, errors = [list(x) for x in zip(*list_of_series)] features_to_use = ['amplitude', 'std_err'] meta_features = [{'meta1': 0.5}] * n_series fset = featurize.featurize_time_series(times, values, errors, features_to_use, meta_features, scheduler=dask.get) npt.assert_array_equal(sorted(fset.columns.get_level_values('feature')), ['amplitude', 'meta1', 'std_err'])
def test_featurize_time_series_no_targets(): t, m, e = sample_values() features_to_use = ['amplitude', 'std_err'] target = 'class1' meta_features = {'meta1': 0.5} fset = featurize.featurize_time_series(t, m, e, features_to_use, targets=None, meta_features=meta_features, scheduler=get_sync) npt.assert_array_equal(sorted(fset.data_vars), ['amplitude', 'meta1', 'std_err']) assert('target' not in fset)
def test_featurize_time_series_uneven_multichannel(): """Test featurize wrapper function for uneven-length multichannel data""" n_channels = 3 t, m, e = sample_values(channels=n_channels) t = [[t, t[0:-5], t[0:-10]]] m = [[m[0], m[1][0:-5], m[2][0:-10]]] e = [[e[0], e[1][0:-5], e[2][0:-10]]] features_to_use = ['amplitude', 'std_err'] meta_features = {'meta1': 0.5} fset = featurize.featurize_time_series(t, m, e, features_to_use, meta_features, scheduler=dask.get) assert ('amplitude', 0) in fset.columns assert 'meta1' in fset.columns
def test_featurize_time_series_multiple_multichannel(): """Test featurize wrapper function for multiple multichannel time series""" n_series = 5 n_channels = 3 list_of_series = [sample_values(channels=n_channels) for i in range(n_series)] times, values, errors = [list(x) for x in zip(*list_of_series)] features_to_use = ['amplitude', 'std_err'] meta_features = {'meta1': 0.5} fset = featurize.featurize_time_series(times, values, errors, features_to_use, meta_features, scheduler=dask.get) assert ('amplitude', 0) in fset.columns assert 'meta1' in fset.columns
def test_featurize_time_series_custom_functions(): """Test featurize wrapper function for time series w/ custom functions""" n_channels = 3 t, m, e = sample_values(channels=n_channels) features_to_use = ['amplitude', 'std_err', 'test_f'] meta_features = {'meta1': 0.5} custom_functions = {'test_f': lambda t, m, e: np.pi} fset = featurize.featurize_time_series(t, m, e, features_to_use, meta_features, custom_functions=custom_functions, scheduler=dask.get) npt.assert_array_equal(fset['test_f', 0], np.pi) assert ('amplitude', 0) in fset.columns assert 'meta1' in fset.columns
def test_featurize_time_series_multiple(): """Test featurize wrapper function for multiple time series""" n_series = 5 list_of_series = [sample_values() for i in range(n_series)] times, values, errors = [list(x) for x in zip(*list_of_series)] features_to_use = ['amplitude', 'std_err'] targets = np.array(['class1'] * n_series) meta_features = [{'meta1': 0.5}] * n_series fset = featurize.featurize_time_series(times, values, errors, features_to_use, targets, meta_features, scheduler=get_sync) npt.assert_array_equal(sorted(fset.data_vars), ['amplitude', 'meta1', 'std_err']) npt.assert_array_equal(fset.target.values, ['class1'] * n_series)
def test_featurize_time_series_single_multichannel(): """Test featurize wrapper function for single multichannel time series""" n_channels = 3 t, m, e = sample_values(channels=n_channels) features_to_use = ['amplitude', 'std_err'] target = 'class1' meta_features = {'meta1': 0.5} fset = featurize.featurize_time_series(t, m, e, features_to_use, target, meta_features, scheduler=get_sync) npt.assert_array_equal(sorted(fset.data_vars), ['amplitude', 'meta1', 'std_err']) npt.assert_array_equal(fset.channel, np.arange(n_channels)) npt.assert_array_equal(sorted(fset.amplitude.coords), ['channel', 'name', 'target']) npt.assert_array_equal(fset.target.values, ['class1'])
def test_featurize_time_series_custom_dask_graph(): """Test featurize wrapper function for time series w/ custom dask graph""" n_channels = 3 t, m, e = sample_values(channels=n_channels) features_to_use = ['amplitude', 'std_err', 'test_f', 'test_meta'] meta_features = {'meta1': 0.5} custom_functions = {'test_f': (lambda x: x.min() - x.max(), 'amplitude'), 'test_meta': (lambda x: 2. * x, 'meta1')} fset = featurize.featurize_time_series(t, m, e, features_to_use, meta_features, custom_functions=custom_functions, scheduler=dask.get) assert ('amplitude', 0) in fset.columns assert ('test_f', 0) in fset.columns assert ('test_meta', 0) in fset.columns
def test_featurize_time_series_uneven_multichannel(): """Test featurize wrapper function for uneven-length multichannel data""" n_channels = 3 t, m, e = sample_values(channels=n_channels) t = [[t, t[0:-5], t[0:-10]]] m = [[m[0], m[1][0:-5], m[2][0:-10]]] e = [[e[0], e[1][0:-5], e[2][0:-10]]] features_to_use = ['amplitude', 'std_err'] target = 'class1' meta_features = {'meta1': 0.5} fset = featurize.featurize_time_series(t, m, e, features_to_use, target, meta_features, scheduler=get_sync) npt.assert_array_equal(sorted(fset.data_vars), ['amplitude', 'meta1', 'std_err']) npt.assert_array_equal(fset.channel, np.arange(n_channels)) npt.assert_array_equal(sorted(fset.amplitude.coords), ['channel', 'name', 'target']) npt.assert_array_equal(fset.target.values, ['class1'])
def test_featurize_time_series_custom_dask_graph(): """Test featurize wrapper function for time series w/ custom dask graph""" n_channels = 3 t, m, e = sample_values(channels=n_channels) features_to_use = ['amplitude', 'std_err', 'test_f'] target = 'class1' meta_features = {'meta1': 0.5} custom_functions = {'test_f': (lambda x: x.min() - x.max(), 'amplitude'), 'test_meta': (lambda x: 2. * x, 'meta1')} fset = featurize.featurize_time_series(t, m, e, features_to_use, target, meta_features, custom_functions=custom_functions, scheduler=get_sync) npt.assert_array_equal(sorted(fset.data_vars), ['amplitude', 'meta1', 'std_err', 'test_f']) npt.assert_array_equal(fset.channel, np.arange(n_channels)) npt.assert_array_equal(sorted(fset.amplitude.coords), ['channel', 'name', 'target']) npt.assert_array_equal(fset.target.values, ['class1'])
def test_featurize_time_series_multiple_multichannel(): """Test featurize wrapper function for multiple multichannel time series""" n_series = 5 n_channels = 3 list_of_series = [sample_values(channels=n_channels) for i in range(n_series)] times, values, errors = [list(x) for x in zip(*list_of_series)] features_to_use = ['amplitude', 'std_err'] targets = np.array(['class1', 'class1', 'class1', 'class2', 'class2']) meta_features = {'meta1': 0.5} fset = featurize.featurize_time_series(times, values, errors, features_to_use, targets, meta_features, scheduler=get_sync) npt.assert_array_equal(sorted(fset.data_vars), ['amplitude', 'meta1', 'std_err']) npt.assert_array_equal(fset.channel, np.arange(n_channels)) npt.assert_array_equal(sorted(fset.amplitude.coords), ['channel', 'name', 'target']) npt.assert_array_equal(fset.target.values, targets)
def test_featurize_time_series_default_errors(): """Test featurize wrapper function for time series w/ missing errors""" n_channels = 3 t, m, _ = sample_values(channels=n_channels) features_to_use = ['amplitude', 'std_err'] target = 'class1' meta_features = {} fset = featurize.featurize_time_series(t, m, None, features_to_use, target, meta_features, scheduler=get_sync) npt.assert_array_equal(fset.channel, np.arange(n_channels)) t = [[t, t[0:-5], t[0:-10]]] m = [[m[0], m[1][0:-5], m[2][0:-10]]] fset = featurize.featurize_time_series(t, m, None, features_to_use, target, meta_features, scheduler=get_sync) npt.assert_array_equal(fset.channel, np.arange(n_channels)) t = t[0][0] m = m[0][0] fset = featurize.featurize_time_series(t, m, None, features_to_use, target, meta_features, scheduler=get_sync) npt.assert_array_equal(fset.channel, [0])