Ejemplo n.º 1
0
def test_dft_mft(use_fallback_dft, norm):
    # load training data
    X, y = load_gunpoint(split="train", return_X_y=True)
    X_tab = from_nested_to_2d_array(X, return_numpy=True)

    word_length = 6
    alphabet_size = 4

    # Single DFT transformation
    window_size = np.shape(X_tab)[1]

    p = SFA(
        word_length=6,
        alphabet_size=4,
        window_size=window_size,
        norm=norm,
        use_fallback_dft=use_fallback_dft,
    ).fit(X, y)

    if use_fallback_dft:
        dft = p._discrete_fourier_transform(X_tab[0], word_length, norm, 1, True)
    else:
        dft = p._fast_fourier_transform(X_tab[0])

    mft = p._mft(X_tab[0])

    assert (mft - dft < 0.0001).all()

    # Windowed DFT transformation
    window_size = 140

    p = SFA(
        word_length=word_length,
        alphabet_size=alphabet_size,
        window_size=window_size,
        norm=norm,
        use_fallback_dft=use_fallback_dft,
    ).fit(X, y)

    mft = p._mft(X_tab[0])
    for i in range(len(X_tab[0]) - window_size + 1):
        if use_fallback_dft:
            dft = p._discrete_fourier_transform(
                X_tab[0, i : window_size + i], word_length, norm, 1, True
            )
        else:
            dft = p._fast_fourier_transform(X_tab[0, i : window_size + i])

        assert (mft[i] - dft < 0.001).all()

    assert len(mft) == len(X_tab[0]) - window_size + 1
    assert len(mft[0]) == word_length
Ejemplo n.º 2
0
def test_word_lengths(
    word_length, alphabet_size, window_size, bigrams, levels, use_fallback_dft
):
    # load training data
    X, y = load_gunpoint(split="train", return_X_y=True)

    p = SFA(
        word_length=word_length,
        alphabet_size=alphabet_size,
        window_size=window_size,
        bigrams=bigrams,
        levels=levels,
        use_fallback_dft=use_fallback_dft,
    ).fit(X, y)

    assert p.breakpoints is not None
    _ = p.transform(X, y)
Ejemplo n.º 3
0
def test_transformer(binning_method):
    # load training data
    X, y = load_gunpoint(split="train", return_X_y=True)

    word_length = 6
    alphabet_size = 4

    p = SFA(
        word_length=word_length,
        alphabet_size=alphabet_size,
        binning_method=binning_method,
    )
    p.fit(X, y)

    assert p.breakpoints.shape == (word_length, alphabet_size)
    i = sys.float_info.max if binning_method == "information-gain" else 0
    assert np.equal(i, p.breakpoints[1, :-1]).all()  # imag component is 0 or inf
    _ = p.transform(X, y)
Ejemplo n.º 4
0
def test_transformer():
    # load training data
    X, y = load_gunpoint(split="train", return_X_y=True)

    word_length = 6
    alphabet_size = 4

    p = SFA(
        word_length=word_length,
        alphabet_size=alphabet_size,
        anova=False,
        binning_method="equi-depth",
    ).fit(X, y)

    # print("Equi Depth")
    # print(p.breakpoints)

    assert p.breakpoints.shape == (word_length, alphabet_size)
    assert np.equal(0, p.breakpoints[1, :-1]).all()  # imag component is 0
    _ = p.transform(X, y)

    p = SFA(
        word_length=word_length,
        alphabet_size=alphabet_size,
        anova=False,
        binning_method="equi-width",
    ).fit(X, y)

    # print("Equi Width")
    # print(p.breakpoints)

    assert p.breakpoints.shape == (word_length, alphabet_size)
    assert np.equal(0, p.breakpoints[1, :-1]).all()  # imag component is 0
    _ = p.transform(X, y)

    p = SFA(
        word_length=word_length,
        alphabet_size=alphabet_size,
        anova=False,
        binning_method="information-gain",
    ).fit(X, y)
    # print("Information Gain")
    # print(p.breakpoints)

    assert p.breakpoints.shape == (word_length, alphabet_size)

    # print(p.breakpoints[1, :-1])
    assert np.equal(sys.float_info.max, p.breakpoints[1, :-1]).all()
    _ = p.transform(X, y)
Ejemplo n.º 5
0
def test_bit_size():
    # load training data
    X, y = load_gunpoint(split="train", return_X_y=True)

    word_length = 40
    alphabet_size = 12
    window_size = 75

    p = SFA(
        word_length=word_length,
        alphabet_size=alphabet_size,
        levels=2,
        bigrams=True,
        window_size=window_size,
    ).fit(X, y)

    w = p.transform(X)
    lengths = [x.bit_length() for x in list(w[0][0].keys())]

    assert np.min(lengths) > 128
    assert len(p.word_list(list(w[0][0].keys())[0])[0]) == 40
Ejemplo n.º 6
0
def test_sfa_anova(binning_method):
    # load training data
    X, y = load_gunpoint(split="train", return_X_y=True)

    word_length = 6
    alphabet_size = 4

    # SFA with ANOVA one-sided test
    window_size = 32
    p = SFA(
        word_length=word_length,
        anova=True,
        alphabet_size=alphabet_size,
        window_size=window_size,
        binning_method=binning_method,
    ).fit(X, y)

    assert p.breakpoints.shape == (word_length, alphabet_size)
    _ = p.transform(X, y)

    # SFA with first feq coefficients
    p2 = SFA(
        word_length=word_length,
        anova=False,
        alphabet_size=alphabet_size,
        window_size=window_size,
        binning_method=binning_method,
    ).fit(X, y)

    assert p.dft_length != p2.dft_length
    assert (p.breakpoints != p2.breakpoints).any()
    _ = p2.transform(X, y)
Ejemplo n.º 7
0
def test_word_lengths():
    # load training data
    X, y = load_gunpoint(split="train", return_X_y=True)

    word_lengths = [6, 7]
    alphabet_size = 4
    window_sizes = [5, 6]

    try:
        for binning in ["equi-depth", "information-gain"]:
            for word_length in word_lengths:
                for bigrams in [True, False]:
                    for norm in [True, False]:
                        for anova in [True, False]:
                            for window_size in window_sizes:
                                p = SFA(
                                    word_length=word_length,
                                    anova=anova,
                                    alphabet_size=alphabet_size,
                                    bigrams=bigrams,
                                    window_size=window_size,
                                    norm=norm,
                                    binning_method=binning,
                                ).fit(X, y)

                                # print("Norm", norm, "Anova", anova)
                                # print(np.shape(p.breakpoints), word_length,
                                #      window_size)
                                # print("dft_length", p.dft_length,
                                #      "word_length", p.word_length)
                                assert p.breakpoints is not None

                                _ = p.transform(X, y)

    except Exception as err:
        raise AssertionError("An unexpected exception {0} raised.".format(
            repr(err)))
Ejemplo n.º 8
0
def test_dft_mft():
    # load training data
    X, Y = load_gunpoint(split="train", return_X_y=True)
    X_tab = from_nested_to_2d_array(X, return_numpy=True)

    word_length = 6
    alphabet_size = 4

    # print("Single DFT transformation")
    window_size = np.shape(X_tab)[1]
    p = SFA(
        word_length=word_length,
        alphabet_size=alphabet_size,
        window_size=window_size,
        binning_method="equi-depth",
    ).fit(X, Y)
    dft = p._discrete_fourier_transform(X_tab[0])
    mft = p._mft(X_tab[0])

    assert (mft - dft < 0.0001).all()

    # print("Windowed DFT transformation")

    for norm in [True, False]:
        for window_size in [140]:
            p = SFA(
                word_length=word_length,
                norm=norm,
                alphabet_size=alphabet_size,
                window_size=window_size,
                binning_method="equi-depth",
            ).fit(X, Y)
            mft = p._mft(X_tab[0])
            for i in range(len(X_tab[0]) - window_size + 1):
                dft_transformed = p._discrete_fourier_transform(
                    X_tab[0, i:window_size + i])
                assert (mft[i] - dft_transformed < 0.001).all()

            assert len(mft) == len(X_tab[0]) - window_size + 1
            assert len(mft[0]) == word_length
Ejemplo n.º 9
0
def test_sfa_anova():
    # load training data
    X, y = load_gunpoint(split="train", return_X_y=True)

    word_length = 6
    alphabet_size = 4

    for binning in ["information-gain", "equi-depth"]:
        # print("SFA with ANOVA one-sided test")
        window_size = 32
        p = SFA(
            word_length=word_length,
            anova=True,
            alphabet_size=alphabet_size,
            window_size=window_size,
            binning_method=binning,
        ).fit(X, y)

        # print(p.breakpoints)
        # print(p.support)
        # print(p.dft_length)

        assert p.breakpoints.shape == (word_length, alphabet_size)
        _ = p.transform(X, y)

        # print("SFA with first feq coefficients")
        p2 = SFA(
            word_length=word_length,
            anova=False,
            alphabet_size=alphabet_size,
            window_size=window_size,
            binning_method=binning,
        ).fit(X, y)

        # print(p2.breakpoints)
        # print(p2.support)
        # print(p2.dft_length)

        assert p.dft_length != p2.dft_length
        assert (p.breakpoints != p2.breakpoints).any()
        _ = p2.transform(X, y)
Ejemplo n.º 10
0
def test_typed_dict():
    # load training data
    X, y = load_gunpoint(split="train", return_X_y=True)

    word_length = 6
    alphabet_size = 4

    p = SFA(
        word_length=word_length,
        alphabet_size=alphabet_size,
        levels=2,
        typed_dict=True,
    )
    p.fit(X, y)
    word_list = p.bag_to_string(p.transform(X, y)[0][0])

    word_length = 6
    alphabet_size = 4

    p2 = SFA(
        word_length=word_length,
        alphabet_size=alphabet_size,
        levels=2,
        typed_dict=False,
    )
    p2.fit(X, y)
    word_list2 = p2.bag_to_string(p2.transform(X, y)[0][0])

    assert word_list == word_list2