def test_accumulate_start_stop_2D(random): node = Pipeline(steps=dummy_classifier, buffer_size='5s') start = now() events = [ ['accumulation_starts', ''], ['accumulation_stops', ''] ] times = pd.date_range(start=start, periods=2, freq='10s') node.i_events.set(events, times, ['label', 'data']) stream = DummyData(start_date=start, rate=1, jitter=0) node.i_training.data = stream.next(100) node.update() assert len(node._X_train) == 10
def test_reindex_indices(): data = DummyData() node = Reindex(rate=10) node.i.data = data.next() node.update() assert node.o.data.index.values[0] == np.datetime64( "2017-12-31 23:59:59.998745401") assert node.o.data.index.values[-1] == np.datetime64( "2018-01-01 00:00:00.898745401") node.i.data = data.next() node.update() assert node.o.data.index.values[0] == np.datetime64( "2018-01-01 00:00:00.998745401") assert node.o.data.index.values[-1] == np.datetime64( "2018-01-01 00:00:01.898745401")
def test_receive_3D_unsupervised(): node = Pipeline(steps=dummy_transformer, fit=False, mode='transform', meta_label=None) node.i_0.data = DummyData().next() node.update() assert node._X[0].shape == (10, 5) assert node._y == None assert node._dimensions == 3
def test_predict_3D_output(): node = Pipeline(steps=dummy_classifier, mode='predict', meta_label='target') stream = DummyData(start_date=now()) node.i_training_0.data = stream.next(5) node.i_training_1.data = stream.next(5) node.i_training_0.meta = { 'target': 0 } node.i_training_1.meta = { 'target': 1 } node.i_events.data = make_event('training_starts') while node._status != 3: node.update() node.i_0.data = stream.next(5) node.i_1.data = stream.next(5) node.i_0.meta = {'index': 0} node.i_1.meta = {'index': 1} node.update() assert len(node.o_events.data) == 2 assert node.o_events.meta == {'epochs': [{'index': 0}, {'index': 1}]}
def test_transform_2D_output(random): node = Pipeline(steps=dummy_transformer, mode='fit_transform') columns = ['A', 'B', 'C', 'D', 'E'] node.i.data = DummyData(start_date=now()).next() node.i.meta = {'foo': 'bar'} node.i.data.columns = columns node.update() assert np.array_equal(node.i.data.index.values, node.o.data.index.values) assert list(node.o.data.columns) == columns assert node.o.meta == node.i.meta
def test_trim_2D(random): node = Pipeline(steps=dummy_classifier) data = DummyData(rate=1).next(20) node._X_train = data.values node._X_train_indices = np.array(data.index.values, dtype=np.datetime64) start = np.datetime64('2018-01-01T00:00:05') stop = np.datetime64('2018-01-01T00:00:15') node._dimensions = 2 node._accumulate(start, stop) assert len(node._X_train_indices) == 10 assert len(node._X_train) == 10
def test_trim_3D(random): node = Pipeline(steps=dummy_classifier) node.i_training_0.data = DummyData(start_date='2018-01-01T00:00:00').next() node.i_training_1.data = DummyData(start_date='2018-01-01T00:00:10').next() node.i_training_2.data = DummyData(start_date='2018-01-01T00:00:20').next() node.i_training_3.data = DummyData(start_date='2018-01-01T00:00:30').next() node.i_training_0.meta = { 'epoch': { 'context': { 'target': 1 }}} node.i_training_1.meta = { 'epoch': { 'context': { 'target': 2 }}} node.i_training_2.meta = { 'epoch': { 'context': { 'target': 3 }}} node.i_training_3.meta = { 'epoch': { 'context': { 'target': 4 }}} node._accumulation_start = np.datetime64('2017-12-31T00:00:00') node._accumulation_stop = np.datetime64('2018-01-01T00:01:00') node._status = 1 node.update() node._dimensions = 0 # Bypass accumulation start = np.datetime64('2018-01-01T00:00:05') stop = np.datetime64('2018-01-01T00:00:25') node._accumulate(start, stop) assert len(node._X_train_indices) == 2 assert len(node._X_train) == 2 assert len(node._y_train) == 2 assert node._y_train.tolist() == [2, 3]
def test_transform_3D_output(random): pipeline = [ {'module': 'test_ml', 'class': 'Vectorizer'}, {'module': 'test_ml', 'class': 'DummyTransformer'}, {'module': 'test_ml', 'class': 'Shaper', 'args': { 'shape': (2, -1, 5) }} ] node = Pipeline(steps=pipeline, mode='fit_transform', meta_label=None) columns = ['A', 'B', 'C', 'D', 'E'] stream = DummyData(start_date=now()) node.i_0.data = stream.next() node.i_1.data = stream.next() node.i_0.data.columns = columns node.i_1.data.columns = columns node.i_0.meta = {'index': 0} node.i_1.meta = {'index': 1} node.update() assert len(list(node.iterate('o_*'))) == 2 assert np.array_equal(node.i_0.data.index.values, node.o_0.data.index.values) assert list(node.i_0.data.columns) == columns assert list(node.i_1.data.columns) == columns assert node.o_0.meta == node.i_0.meta assert node.o_1.meta == node.i_1.meta
def test_passthrough(): node = Pipeline(steps=dummy_classifier, passthrough=True) streamer = DummyData() node.i_training.data = streamer.next() node.i_training_0.data = streamer.next() node.i_events.data = make_event('foobar') node.i.data = streamer.next() node.i_0.data = streamer.next() node.i_1.data = streamer.next() node.i.meta = {'foobar': 42} node.update() assert len(list(node.iterate('o*'))) == 3 assert node.o.data.equals(node.i.data) assert node.o_0.data.equals(node.i_0.data) assert node.o_0.data.equals(node.i_0.data) assert node.o.meta == node.i.meta
def test_convert(): # Filenames src = os.path.join(tempfile.gettempdir(), "test.hdf") dst = os.path.join(tempfile.gettempdir(), "test.bdf") # Make fake signal rate = 100 channels = ["ch1", "ch2", "ch3", "ch4", "ch5"] eeg = DummyData(rate=rate, round=2, cols=channels).next(300) # Make fake events timestamps = np.array( ["2018-01-01 00:00:00", "2018-01-01 00:00:01", "2018-01-01 00:00:02"], dtype='datetime64') cols = ["label", "data"] rows = [["start", "{'mood': 'happy'}"], ["something", None], ["stop", "{'mood': 42}"]] events = pd.DataFrame(rows, index=timestamps, columns=cols) # Save to HDF store = pd.HDFStore(src) store.append("/eeg", eeg) store.get_node("/eeg")._v_attrs["meta"] = {"rate": rate} store.append("/events", events) store.close() # Convert convert(src) # Compute MD5 md5 = hashlib.md5() file = open(dst, "rb") md5.update(file.read()) #assert md5.hexdigest() == "27a686606e589b67fc9888802afef57c" assert True os.unlink(src)
from timeflux_ml.nodes.fit import Fit from timeflux_ml.nodes.transform import Transform # ------------------------------------------------------ # Use-case #1 : Not supervised fit Pipeline on pandas data # ------------------------------------------------------ # params: # has_targets = False # # Inputs: DataFrame # Outputs: DataFrame # # eg: MinMax scaler calibrated on streaming data. num_cols = 5 data = DummyData(rate=10, jitter=.05, num_cols=num_cols) node_fit = Fit(pipeline_steps={'scaler': 'sklearn.preprocessing.MinMaxScaler'}, has_targets=False) def test_fit_no_data(): """Fit received empty DataFrame no data""" node_fit.i.data = pd.DataFrame() node_fit.update() assert node_fit.o.meta == {} def test_fit_data(): """Fit received some data and fit pipeline in a thread """ calibration_size = 30 output_data = pd.DataFrame()
def test_3D_training(random): node = Pipeline(steps=dummy_classifier) node.i_training_0.data = DummyData().next() node.update() assert node._dimensions == 3
def generator(rate=10, jitter=.05): """Create object to mimic data streaming """ generator = DummyData(rate=rate, jitter=jitter) return generator
"""Tests for axis.py""" import pandas as pd from timeflux.helpers.testing import DummyData from timeflux.nodes.axis import AddSuffix, Rename, RenameColumns pandas_data = DummyData(cols=['0', '1', '2']) def test_add_suffix(): node = AddSuffix(suffix='_foo') pandas_data.reset() node.i.data = pandas_data.next(20) node.update() pd.testing.assert_index_equal(node.o.data.columns, pd.Index(['0_foo', '1_foo', '2_foo'])) def test_rename(): node = Rename(mapper={'0': 'new_0'}, axis=1) pandas_data.reset() node.i.data = pandas_data.next(20) node.update() pd.testing.assert_index_equal(node.o.data.columns, pd.Index(['new_0', '1', '2'])) def test_rename_columns(): # wrong length of parameter `list` node = RenameColumns(names=['foo'])
def test_2D_no_training(random): node = Pipeline(steps=dummy_transformer, mode='fit_transform') node.i.data = DummyData().next() node.update() assert node._dimensions == 2
"""Tests for accumulate.py""" import pandas as pd import xarray as xr from timeflux.helpers.testing import DummyData, DummyXArray from timeflux.nodes.accumulate import AppendDataFrame, AppendDataArray xarray_data = DummyXArray() pandas_data = DummyData() def test_append_dataframe(): """"Test node AppendDataFrame""" node = AppendDataFrame() pandas_data.reset() node.clear() # gate is not closed, data should be accumulated but not released # first chunk node.i.data = pandas_data.next(5) node.update() # assert no output assert node.o.data == None # assert the data has been buffered pd.testing.assert_frame_equal(pandas_data._data.iloc[:5, :], node._data) # second chunk node.clear() node.i.data = pandas_data.next(10) node.update() # assert no output assert node.o.data == None # assert the buffer is the concatenation of the 2 accumulated chunks
def test_3D_no_training(random): node = Pipeline(steps=dummy_classifier, mode='fit_predict') node.i_0.data = DummyData().next() node.update() assert node._dimensions == 3
def test_reindex_rate_constructor(): data = DummyData() node = Reindex(rate=10) node.update() assert node._rate == 10
def test_receive_2D(): node = Pipeline(steps=dummy_transformer, fit=False, mode='transform') node.i.data = DummyData().next() node.update() assert node._X.shape == (10, 5) assert node._dimensions == 2
import numpy as np import pandas as pd import pytest import logging from timeflux.core.exceptions import WorkerInterrupt from timeflux.helpers.testing import DummyData, Looper from timeflux.nodes.dejitter import Snap, Interpolate rate = 10 dummy_data_with_jitter = DummyData(rate=rate, jitter=.05) num_cols = 5 dummy_data_no_jitter = DummyData(rate=rate, jitter=0.0, cols=[f'ch{k}' for k in range(num_cols)]) def test_round_on_data_with_jitter(): data = dummy_data_with_jitter data.reset() node = Snap(rate=rate) node.i.data = data.next(6) node.update() expected_data = pd.DataFrame( [ [0.185133, 0.541901, 0.872946, 0.732225, 0.806561], [0.658783, 0.692277, 0.849196, 0.249668, 0.489425], [0.221209, 0.987668, 0.944059, 0.039427, 0.705575], [0.925248, 0.180575, 0.567945, 0.915488, 0.033946], [0.69742, 0.297349, 0.924396, 0.971058, 0.944266], [0.474214, 0.862043, 0.844549, 0.3191, 0.828915]
def test_receive_3D_invalid_label(caplog): node = Pipeline(steps=dummy_classifier, mode='fit_predict') node.i_0.data = DummyData().next() node.update() assert caplog.record_tuples[0][2] == 'Invalid label' assert node._X == None
import numpy as np import pandas as pd import pytest from timeflux.core.exceptions import WorkerInterrupt from timeflux.helpers.testing import DummyData, Looper from timeflux.nodes.dejitter import Snap, Interpolate rate = 10 dummy_data_with_jitter = DummyData(rate=rate, jitter=.05) num_cols = 5 dummy_data_no_jitter = DummyData(rate=rate, jitter=0.0, cols=[f'ch{k}' for k in range(num_cols)]) dummy_data_not_monotonic = DummyData(rate=rate, jitter=.05, num_rows=100) # swap rows 51 and 52 to have a DataFrame with not monotonic index dummy_data_not_monotonic._data = dummy_data_not_monotonic._data.iloc[ list(np.arange(0, 50)) + [52, 51] + list(np.arange(52, 100))] assert ~ dummy_data_not_monotonic._data.index.is_monotonic def test_round_on_data_with_jitter(): data = dummy_data_with_jitter data.reset() node = Snap(rate=rate) node.i.data = data.next(6) node.update() expected_data = pd.DataFrame( [ [0.185133, 0.541901, 0.872946, 0.732225, 0.806561],
def test_transform(): node = Pipeline(steps=dummy_transformer, fit=False, mode='transform', meta_label=None) node.i.data = DummyData().next() node.update() expected = node.i.data.values * 2 assert np.array_equal(expected, node._out)
"""Tests for nodes from timeflux_dsp.nodes.spectral""" import numpy as np import pandas as pd import pytest import xarray as xr from timeflux.helpers.testing import DummyData from timeflux_dsp.nodes.spectral import FFT fs = 10 data = DummyData(rate=fs, jitter=0.05) all_data = data.next(50) def test_welch(): data.reset() node = FFT(fs=fs, return_onesided=False) node.i.data = data.next(5) node.update() expected_freqs = [0.0, 2.0, 4.0, -4.0, -2.0] expected_times = [pd.Timestamp("2018-01-01 00:00:00.396560186")] expected_data = np.array([ [ 2.687793 + 0.0j, 2.69977 + 0.0j, 4.158542 + 0.0j, 2.907866 + 0.0j,
"""Tests for query nodes""" import pandas as pd import pytest from timeflux.core.exceptions import WorkerInterrupt from timeflux.helpers.testing import DummyData from timeflux.nodes.query import LocQuery, SelectRange, XsQuery fs = 10 data = DummyData(rate=fs, jitter=.05, num_cols=6) all_data = data._data def test_locquery(): data.reset() data._data.columns = ['A', 'B', 'C', 'D', 'E', 'F'] node = LocQuery(key=('A', 'E'), axis=1) node.i.data = data.next(3) node.update() expected_data = pd.DataFrame( [[0.185133, 0.806561], [0.692277, 0.221209], [0.944059, 0.180575]], [ pd.Timestamp('2017-12-31 23:59:59.998745401'), pd.Timestamp('2018-01-01 00:00:00.104507143'), pd.Timestamp('2018-01-01 00:00:00.202319939'), ], ['A', 'E']) pd.testing.assert_frame_equal(node.o.data, expected_data) # test query with wrong key: "R" not in the input columns with pytest.raises(WorkerInterrupt): node = LocQuery(key=['R'], axis=1) node.i.data = data.next()