Example #1
0
def test_segmentxyforecast():
    Nt = 100
    width = 5
    nvars = 5

    # lets do a forecast test
    seg = transform.SegmentXYForecast(width=width, forecast=5)
    Xt = [
        np.random.rand(Nt, nvars),
        np.random.rand(2 * Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    Xc = np.random.rand(3, 4)
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    X = TS_Data(Xt, Xc)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width, nvars)
    assert Xsc.shape == (N, 4)

    # univariate X
    nvars = 1
    seg = transform.SegmentXYForecast(width=width, forecast=5)
    X = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width)
Example #2
0
def test_util():
    df = load_watch()

    data = util.make_ts_data(df['X'], df['side'])
    util.get_ts_data_parts(data)

    util.check_ts_data(data, df['y'])
    util.check_ts_data(df['X'], df['y'])

    util.ts_stats(df['X'], df['y'], fs=1., class_labels=df['y_labels'])
Example #3
0
def test_util():
    df = load_watch()

    data = TS_Data(df['X'], df['side'])
    Xt, Xc = util.get_ts_data_parts(data)

    assert np.array_equal(Xc, df['side'])
    assert np.all([np.array_equal(Xt[i], df['X'][i]) for i in range(len(df['X']))])

    util.check_ts_data(data, df['y'])
    util.check_ts_data(df['X'], df['y'])

    util.ts_stats(df['X'], df['y'], fs=1., class_labels=df['y_labels'])
Example #4
0
def test_function_transform():
    constant = 10
    identity = transform.FunctionTransformer()

    def replace(Xt, value):
        return np.ones(Xt.shape) * value

    custom = transform.FunctionTransformer(replace,
                                           func_kwargs={"value": constant})

    # univariate ts
    X = np.random.rand(100, 10)
    y = np.ones(100)

    identity.fit(X, y)
    Xtrans = identity.transform(X)
    assert Xtrans is X

    custom.fit(X, y)
    Xtrans = custom.transform(X)
    assert np.array_equal(Xtrans, np.ones(X.shape) * constant)

    # multivariate ts
    X = np.random.rand(100, 10, 4)
    y = np.ones(100)

    identity.fit(X, y)
    Xtrans = identity.transform(X)
    assert Xtrans is X

    custom.fit(X, y)
    Xtrans = custom.transform(X)
    assert np.array_equal(Xtrans, np.ones(X.shape) * constant)

    # ts with univariate contextual data
    Xt = np.random.rand(100, 10, 4)
    Xc = np.random.rand(100)
    X = TS_Data(Xt, Xc)
    y = np.ones(100)

    identity.fit(X, y)
    Xtrans = identity.transform(X)
    assert Xtrans is X

    custom.fit(X, y)
    Xtrans = custom.transform(X)
    Xtt, Xtc = get_ts_data_parts(Xtrans)
    assert np.array_equal(Xtt, np.ones(Xt.shape) * constant)
    assert Xtc is Xc

    # ts with multivariate contextual data
    Xt = np.random.rand(100, 10, 4)
    Xc = np.random.rand(100, 3)
    X = TS_Data(Xt, Xc)
    y = np.ones(100)

    identity.fit(X, y)
    Xtrans = identity.transform(X)
    assert Xtrans is X

    custom.fit(X, y)
    Xtrans = custom.transform(X)
    Xtt, Xtc = get_ts_data_parts(Xtrans)
    assert np.array_equal(Xtt, np.ones(Xt.shape) * constant)
    assert Xtc is Xc

    # test resampling
    def resample(Xt):
        return Xt.reshape(1, -1)

    illegal_resampler = transform.FunctionTransformer(resample)
    X = np.random.rand(100, 10)
    y = np.ones(100)
    illegal_resampler.fit(X, y)
    with pytest.raises(ValueError):
        Xtrans = illegal_resampler.transform(X)
Example #5
0
def test_pad_trunc():
    Nt = 100
    width = 5
    nvars = 5
    seg = transform.PadTrunc(width=width)

    # multivariate ts data without context data
    X = [
        np.random.rand(Nt, nvars),
        np.random.rand(Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    y = [np.random.rand(Nt), np.random.rand(Nt), np.random.rand(Nt)]
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    N = len(ys)
    assert Xs.shape == (N, width, nvars)
    assert np.all([np.equal(X[i][0:width], Xs[i]) for i in range(len(X))])
    assert np.all([np.equal(y[i][0:width], ys[i]) for i in range(len(y))])

    # univariate ts data without context data
    X = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(3 * Nt)]
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(3 * Nt)]
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    N = len(ys)
    assert Xs.shape == (N, width)
    assert np.all([np.equal(X[i][0:width], Xs[i]) for i in range(len(X))])
    assert np.all([np.equal(y[i][0:width], ys[i]) for i in range(len(y))])

    # multivariate ts data with context data
    Xt = [
        np.random.rand(Nt, nvars),
        np.random.rand(2 * Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    Xc = np.random.rand(3, 4)
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    X = TS_Data(Xt, Xc)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width, nvars)
    assert Xsc.shape == (N, 4)
    assert np.all([np.equal(Xt[i][0:width], Xst[i]) for i in range(len(Xt))])
    assert np.all([np.equal(Xc[i], Xsc[i]) for i in range(len(Xt))])
    assert np.all([np.equal(y[i][0:width], ys[i]) for i in range(len(y))])

    # ts data with univariate context data
    Xt = [
        np.random.rand(Nt, nvars),
        np.random.rand(2 * Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    Xc = np.random.rand(3)
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    X = TS_Data(Xt, Xc)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width, nvars)
    assert Xsc.shape == (N, )
    assert np.all([np.equal(Xt[i][0:width], Xst[i]) for i in range(len(Xt))])
    assert np.all([np.equal(Xc[i], Xsc[i]) for i in range(len(Xt))])
    assert np.all([np.equal(y[i][0:width], ys[i]) for i in range(len(y))])

    # same number as context vars and time vars
    # this would cause broadcasting failure before implementation of TS_Data class
    Xt = [
        np.random.rand(Nt, nvars),
        np.random.rand(2 * Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    Xc = np.random.rand(3, nvars)
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    X = TS_Data(Xt, Xc)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width, nvars)
    assert Xsc.shape == (N, 5)
    assert np.all([np.equal(Xt[i][0:width], Xst[i]) for i in range(len(Xt))])
    assert np.all([np.equal(Xc[i], Xsc[i]) for i in range(len(Xt))])
    assert np.all([np.equal(y[i][0:width], ys[i]) for i in range(len(y))])

    width = 5
    nvars = 5
    seg = transform.PadTrunc(width=width)

    # multivariate ts data without context data
    X = [
        np.random.rand(100, nvars),
        np.random.rand(100, nvars),
        np.random.rand(100, nvars)
    ]
    y = np.random.rand(3)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    N = len(ys)
    assert Xs.shape == (N, width, nvars)
    assert np.all([np.equal(X[i][0:width], Xs[i]) for i in range(len(Xt))])
    assert np.all([np.equal(y[i], ys[i]) for i in range(len(y))])

    # univariate ts data without context
    X = [np.random.rand(100), np.random.rand(100), np.random.rand(100)]
    y = np.random.rand(3)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    N = len(ys)
    assert Xs.shape == (N, width)
    assert np.all([np.equal(X[i][0:width], Xs[i]) for i in range(len(Xt))])
    assert np.all([np.equal(y[i], ys[i]) for i in range(len(y))])

    # multivariate ts data with context data
    Xt = [
        np.random.rand(100, nvars),
        np.random.rand(200, nvars),
        np.random.rand(50, nvars)
    ]
    Xc = np.random.rand(3, 4)
    y = np.random.rand(3)
    X = TS_Data(Xt, Xc)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width, nvars)
    assert Xsc.shape == (N, 4)
    assert np.all([np.equal(Xt[i][0:width], Xst[i]) for i in range(len(Xt))])
    assert np.all([np.equal(Xc[i], Xsc[i]) for i in range(len(Xt))])
    assert np.all([np.equal(y[i], ys[i]) for i in range(len(y))])
Example #6
0
def test_segmentxyforecast():
    # test illegal parameter settings
    with pytest.raises(ValueError):
        transform.SegmentXYForecast(width=0)  # illegal width value
    with pytest.raises(ValueError):
        transform.SegmentXYForecast(
            overlap=None, step=None)  # either overlap or step must be defined
    with pytest.raises(ValueError):
        transform.SegmentXYForecast(overlap=-1,
                                    step=None)  # illegal overlap value
    with pytest.raises(ValueError):
        transform.SegmentXYForecast(step=0)  # illegal step value
    with pytest.raises(ValueError):
        transform.SegmentXYForecast(order=None)  # illegal order
    with pytest.raises(ValueError):
        transform.SegmentXYForecast(forecast=0)  # illegal forecast value

    # test _step property working as expected
    seg = transform.SegmentXYForecast(width=10, overlap=0.5)
    assert seg._step == 5

    # test precedence of step over overlap
    seg = transform.SegmentXYForecast(width=10, overlap=1, step=1)
    assert seg._step == 1

    # illegal overlap value, but valid step value
    seg = transform.SegmentXYForecast(overlap=-1, step=1)
    assert seg._step == 1

    # test shape of segmented data
    Nt = 100
    width = 5
    nvars = 5

    # lets do a forecast test
    seg = transform.SegmentXYForecast(width=width, forecast=5)
    Xt = [
        np.random.rand(Nt, nvars),
        np.random.rand(2 * Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    Xc = np.random.rand(3, 4)
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    X = TS_Data(Xt, Xc)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width, nvars)
    assert Xsc.shape == (N, 4)

    # univariate X
    nvars = 1
    seg = transform.SegmentXYForecast(width=width, forecast=5)
    X = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width)
Example #7
0
def test_segmentxy():
    # test illegal parameter settings
    with pytest.raises(ValueError):
        transform.SegmentXY(width=0)  # illegal width value
    with pytest.raises(ValueError):
        transform.SegmentXY(
            overlap=None, step=None)  # either overlap or step must be defined
    with pytest.raises(ValueError):
        transform.SegmentXY(overlap=-1, step=None)  # illegal overlap value
    with pytest.raises(ValueError):
        transform.SegmentXY(step=0)  # illegal step value
    with pytest.raises(ValueError):
        transform.SegmentXY(order=None)  # illegal order

    # test _step property working as expected
    seg = transform.SegmentXY(width=10, overlap=0.5)
    assert seg._step == 5

    # test precedence of step over overlap
    seg = transform.SegmentXY(width=10, overlap=1, step=1)
    assert seg._step == 1

    # illegal overlap value, but valid step value
    seg = transform.SegmentXY(overlap=-1, step=1)
    assert seg._step == 1

    # test shape of segmented data
    Nt = 100
    width = 5
    nvars = 5
    seg = transform.SegmentXY(width=width)

    # multivariate ts data without context data
    X = [
        np.random.rand(Nt, nvars),
        np.random.rand(Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    y = [np.random.rand(Nt), np.random.rand(Nt), np.random.rand(Nt)]
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    N = len(ys)
    assert Xs.shape == (N, width, nvars)

    # univariate ts data without context data
    X = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(3 * Nt)]
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(3 * Nt)]
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    N = len(ys)
    assert Xs.shape == (N, width)

    # multivariate ts data with context data
    Xt = [
        np.random.rand(Nt, nvars),
        np.random.rand(2 * Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    Xc = np.random.rand(3, 4)
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    X = TS_Data(Xt, Xc)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width, nvars)
    assert Xsc.shape == (N, 4)

    # ts data with univariate context data
    Xt = [
        np.random.rand(Nt, nvars),
        np.random.rand(2 * Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    Xc = np.random.rand(3)
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    X = TS_Data(Xt, Xc)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width, nvars)
    assert Xsc.shape == (N, 1)

    # same number as context vars and time vars
    # this would cause broadcasting failure before implementation of TS_Data class
    Xt = [
        np.random.rand(Nt, nvars),
        np.random.rand(2 * Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    Xc = np.random.rand(3, nvars)
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    X = TS_Data(Xt, Xc)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width, nvars)
    assert Xsc.shape == (N, 5)
Example #8
0
def test_segmentxy():
    Nt = 100
    width = 5
    nvars = 5
    seg = transform.SegmentXY(width=width)

    # multivariate ts data without context data
    X = [
        np.random.rand(Nt, nvars),
        np.random.rand(Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    y = [np.random.rand(Nt), np.random.rand(Nt), np.random.rand(Nt)]
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    N = len(ys)
    assert Xs.shape == (N, width, nvars)

    # univariate ts data without context data
    X = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(3 * Nt)]
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(3 * Nt)]
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    N = len(ys)
    assert Xs.shape == (N, width)

    # multivariate ts data with context data
    Xt = [
        np.random.rand(Nt, nvars),
        np.random.rand(2 * Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    Xc = np.random.rand(3, 4)
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    X = TS_Data(Xt, Xc)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width, nvars)
    assert Xsc.shape == (N, 4)

    # ts data with univariate context data
    Xt = [
        np.random.rand(Nt, nvars),
        np.random.rand(2 * Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    Xc = np.random.rand(3)
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    X = TS_Data(Xt, Xc)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width, nvars)
    assert Xsc.shape == (N, 1)

    # same number as context vars and time vars
    # this would cause broadcasting failure before implementation of TS_Data class
    Xt = [
        np.random.rand(Nt, nvars),
        np.random.rand(2 * Nt, nvars),
        np.random.rand(Nt, nvars)
    ]
    Xc = np.random.rand(3, nvars)
    y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)]
    X = TS_Data(Xt, Xc)
    seg.fit(X, y)
    Xs, ys, _ = seg.transform(X, y)
    Xst, Xsc = get_ts_data_parts(Xs)
    N = len(ys)
    assert Xst.shape == (N, width, nvars)
    assert Xsc.shape == (N, 5)
Example #9
0
def test_patch_sampler():
    # test patch_sampler on a class without a fit_resample function
    class EmptyClass(object):
        pass

    with pytest.raises(TypeError):
        transform.patch_sampler(EmptyClass)

    # test patch_sampler on a mocked imbalanced-learn Sampler class
    unpatched_sampler = MockImblearnSampler()
    patched_sampler = transform.patch_sampler(MockImblearnSampler)(
        shuffle=True, random_state=0)
    assert str(patched_sampler.__class__) != str(unpatched_sampler.__class__)
    pickled_sampler = pickle.dumps(patched_sampler)
    unpickled_sampler = pickle.loads(pickled_sampler)
    assert str(patched_sampler.__class__) == str(unpickled_sampler.__class__)

    # test representation
    assert "mocked_param" in repr(patched_sampler)
    assert "random_state" in repr(patched_sampler)
    assert "shuffle" in repr(patched_sampler)

    # multivariate ts
    X = np.random.rand(100, 10, 4)
    y = np.ones(100)
    Xt, yt, _ = patched_sampler.transform(X, y)
    assert Xt is X
    assert yt is y
    Xt, yt, _ = patched_sampler.fit_transform(X, y)
    X, y = shuffle(mock_resample(X), mock_resample(y), random_state=0)
    assert np.array_equal(Xt, X)
    assert np.array_equal(yt, y)

    # ts with multivariate contextual data
    X = TS_Data(np.random.rand(100, 10, 4), np.random.rand(100, 3))
    Xt_orig, _ = get_ts_data_parts(X)
    y = np.ones(100)
    Xt, yt, _ = patched_sampler.transform(X, y)
    assert Xt is X
    assert yt is y
    Xt, yt, _ = patched_sampler.fit_transform(X, y)
    Xtt, Xtc = get_ts_data_parts(Xt)
    Xt_orig, y = shuffle(mock_resample(Xt_orig),
                         mock_resample(y),
                         random_state=0)
    assert np.array_equal(Xtt, Xt_orig)
    assert np.array_equal(yt, y)

    # ts with univariate contextual data
    X = TS_Data(np.random.rand(100, 10, 4), np.random.rand(100))
    Xt_orig, _ = get_ts_data_parts(X)
    y = np.ones(100)
    Xt, yt, _ = patched_sampler.transform(X, y)
    assert Xt is X
    assert yt is y
    Xt, yt, _ = patched_sampler.fit_transform(X, y)
    Xtt, Xtc = get_ts_data_parts(Xt)
    Xt_orig, y = shuffle(mock_resample(Xt_orig),
                         mock_resample(y),
                         random_state=0)
    assert np.array_equal(Xtt, Xt_orig)
    assert np.array_equal(yt, y)

    # univariate ts
    X = np.random.rand(100, 10)
    y = np.ones(100)
    Xt, yt, _ = patched_sampler.transform(X, y)
    assert Xt is X
    assert yt is y
    Xt, yt, _ = patched_sampler.fit_transform(X, y)
    X, y = shuffle(mock_resample(X), mock_resample(y), random_state=0)
    assert np.array_equal(Xt, X)
    assert np.array_equal(yt, y)
Example #10
0
    def transform(self, X, y, sample_weight=None):
        check_is_fitted(self, ['reference_windows_', 'num_new_ts_'])

        Xt, Xc = get_ts_data_parts(X)
        yt = y
        N = len(Xt)  # Number of time series

        # preallocate new time series data
        Xt_trans = [None] * self.num_new_ts_
        y_trans = [None] * self.num_new_ts_
        Xc_trans = np.recarray(shape=(self.num_new_ts_, ),
                               dtype=contextual_recarray_dtype)

        k = 0
        pbar = tqdm(total=self.num_new_ts_,
                    desc="Segment",
                    disable=(not self._VERBOSE),
                    file=sys.stdout)

        # get time series which should be segmented together
        for window, selection_idx in zip(
                self.reference_windows_,
                moving_window(range(N), window_size=self.n)):

            # get reference windows for segmentation
            for starting_timestamp, ending_timestamp in window:

                # segment each time series
                for idx in selection_idx:
                    ts = Xt[idx]
                    start_idx = find_nearest(ts[:, 0], starting_timestamp)

                    if self.enforce_size and Xc is not None:
                        stop_idx = start_idx + round2int(
                            self.window_length * Xc[idx].sr)
                    else:
                        stop_idx = find_nearest(ts[:, 0], ending_timestamp)

                    if stop_idx < start_idx:
                        raise ValueError

                    Xt_trans[k] = ts[start_idx:stop_idx, :]

                    if Xc is not None:
                        Xc_trans[k] = Xc[idx]

                    if yt is not None and len(yt[0].shape) >= 2:
                        start_idx = find_nearest(yt[idx][:, 0],
                                                 starting_timestamp)

                        if self.enforce_size:
                            stop_idx = start_idx + round2int(
                                self.window_length * get_sampling_rate(
                                    yt[idx][:, 0], t_unit=self._T_UNIT))
                        else:
                            stop_idx = find_nearest(yt[idx][:, 0],
                                                    ending_timestamp)

                        if stop_idx < start_idx:
                            raise ValueError

                        y_trans[k] = yt[idx][start_idx:stop_idx, :]
                    elif yt is not None:
                        y_trans[k] = yt[idx][start_idx:stop_idx]

                    pbar.update(1)
                    k += 1

        pbar.close()

        assert len([
            1 for ts, y in zip(Xt_trans, y_trans) if ts is None or y is None
        ]) == 0, "[CRITICAL] Missing segments."

        # --- find empty windows and delete them ---
        empty_windows = []

        for i, (ts, y) in enumerate(zip(Xt_trans, y_trans)):
            if len(ts) <= 1 or len(y) <= 1:
                empty_windows.append(i)

        if len(empty_windows) > 0:
            Xt_trans = np.delete(Xt_trans, empty_windows)

            if Xc is not None:
                Xc_trans = np.delete(Xc_trans, empty_windows)

            if y is not None:
                y_trans = np.delete(y_trans, empty_windows)

        num_empty_windows = len(empty_windows)

        if num_empty_windows > 0:
            logger.warning(
                f"[{self.__class__.__name__}] {num_empty_windows} windows could not be processed "
                f"and thus removed")

        # --- finalize ---
        if Xc is not None:
            Xt = TS_Data(Xt_trans, Xc_trans)
        else:
            Xt = Xt_trans

        return Xt, y_trans, sample_weight
Example #11
0
    def fit(self, X, y=None):
        Xt, Xc = get_ts_data_parts(X)
        N = len(Xt)  # Number of time series

        # ensure that Xt is an numpy array. necessary for indexing
        Xt = np.array(Xt)

        # check if 'n' is valid
        if ((Xc is not None) and (np.unique(Xc.desc).size != self.n)) or \
                (not isinstance(self.n, int)) or (N % self.n != 0):
            n_suggestion = np.unique(
                Xc.desc).size if Xc is not None else proper_divs(N)
            print(
                f"[WARNING] The value of 'n' ({self.n}) is suspicious. Should be {n_suggestion} most likely."
            )

        self.reference_windows_ = []  # list of reference windows
        self.num_new_ts_ = 0  # number of new time series after transformation

        for selection_idx in moving_window(range(N), window_size=self.n):
            start = np.min([ts[0, 0] for ts in Xt[selection_idx]])
            stop = np.max([ts[-1, 0] for ts in Xt[selection_idx]])
            duration = stop - start

            if duration > (60 * 60):
                print(
                    f"[WARN] duration for reference time is quite high ({duration / 60 / 60:.2f}h)."
                    f" Most likely the parameter 'n' is wrong.")

            if Xc is not None:
                factor = 10**(
                    abs(get_exponent(np.max(Xc[selection_idx].sr)) + 1))

                print(f"[INFO] segment {Xc.desc[selection_idx]} together")
            else:
                factor = 1e5

            divisors = np.array(
                list(proper_divs(round2int(
                    self.window_length * factor)))) / factor

            if Xc is not None:
                precision = divisors[argnear(
                    divisors, 1 / (np.max(Xc[selection_idx].sr) * 2))]
            else:
                precision = min(divisors)

            num = round2int(duration / precision)

            # generate reference time stamps
            t_ref, step = np.linspace(start=0,
                                      stop=duration,
                                      num=num,
                                      retstep=True)
            t_ref += start

            # window length in seconds to number of samples
            window_size = round2int(self.window_length / step)

            # get window iterator
            wins = moving_window(sequence=t_ref,
                                 window_size=window_size,
                                 step_size=round2int(window_size *
                                                     (1 - self.overlap)),
                                 incomplete=False)

            # remove unused timestamps from reference windows
            win_ref = [(win[0], win[-1]) for win in wins]

            self.reference_windows_.append(win_ref)
            self.num_new_ts_ += (len(win_ref) * self.n)

        return self