コード例 #1
0
def test_trace_kmeans_differencing():
    with util_numpy.test_uses_numpy() as np, util_numpy.test_uses_scipy() as scipy:
        k = 4
        max_it = 10
        max_dba_it = 20
        nb_prob_samples = 0
        use_c = True
        rsrc_fn = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'rsrc', 'Trace_TRAIN.txt')
        data = np.loadtxt(rsrc_fn)
        labels = data[:, 0]
        series = data[:, 1:]
        mask = np.full((len(labels),), False, dtype=bool)
        mask[:] = (labels == 1)
        # c = series[0, :]
        print(type(series))
        print(series.shape)
        window = int(series.shape[1] * 0.5)

        # Differencing
        # The baseline differences are not relevant thus we cluster based
        # on the result of differencing.
        # Also the high-freq noise dominates the local differences, thus
        # we apply a low-pass filter first.
        signal = scipy.import_signal()
        series_orig = series.copy()
        series = np.diff(series, n=1, axis=1)
        fs = 100  # sample rate, Hz
        cutoff = 10  # cut off frequency, Hz
        nyq = 0.5 * fs  # Nyquist frequency
        b, a = signal.butter(2, cutoff / nyq, btype='low', analog=False, output='ba')
        series = signal.filtfilt(b, a, series, axis=1)

        # Perform k-means
        tic = time.perf_counter()
        model = KMeans(k=k, max_it=max_it, max_dba_it=max_dba_it, drop_stddev=1,
                       nb_prob_samples=nb_prob_samples,
                       dists_options={"window": window},
                       initialize_with_kmedoids=False,
                       initialize_with_kmeanspp=True)
        try:
            cluster_idx, performed_it = model.fit(series, use_c=use_c, use_parallel=False)
        except PyClusteringException:
            return
        toc = time.perf_counter()
        print(f'DBA ({performed_it} iterations: {toc - tic:0.4f} sec')

        if directory and not dtwvis.test_without_visualization():
            try:
                import matplotlib.pyplot as plt
            except ImportError:
                raise MatplotlibException("No matplotlib available")
            fig, ax = plt.subplots(nrows=k, ncols=3, figsize=(10,4),
                                   sharex='all', sharey='all')
            fn = directory / "test_trace_barycenter.png"

            all_idx = set()
            mask = np.full((k, len(series_orig)), False, dtype=bool)
            for ki in range(k):
                ax[ki, 0].plot(model.means[ki])
                for idx in cluster_idx[ki]:
                    ax[ki, 2].plot(series_orig[idx], alpha=0.3)
                    mask[ki, idx] = True
                    if idx in all_idx:
                        raise Exception(f'Series in multiple clusters: {idx}')
                    all_idx.add(idx)

            series_orig = (series_orig - series_orig.mean(axis=1)[:, None]) / series_orig.std(axis=1)[:, None]
            for ki, mean in enumerate(model.means):
                # dba = dba_loop(series_orig, c=None, mask=mask[ki, :],
                #                max_it=max_it, thr=None, use_c=use_c,
                #                nb_prob_samples=nb_prob_samples)
                print(mean.shape)
                dba = np.r_[0, mean].cumsum()
                ax[ki, 1].plot(dba)
            assert(len(all_idx) == len(series))
            ax[0, 0].set_title("DBA Differencing + LP")
            ax[0, 1].set_title("DBA Original series")
            ax[0, 2].set_title("Clustered series")
            fig.savefig(str(fn))
            plt.close()

            fig, ax = plt.subplots(nrows=k, ncols=1, figsize=(5, 4),
                                   sharex='all', sharey='all')
            fn = directory / "test_trace_barycenter_solution.png"
            for i in range(len(labels)):
                ax[int(labels[i]) - 1].plot(series_orig[i], alpha=0.3)
            fig.savefig(str(fn))
            plt.close()
コード例 #2
0
def test_bug_size():
    """Two series of length 1500 should not trigger a size error.

    The warping paths matrix is of size 1501**2 = 2_253_001.
    If using 64bit values: 1501**2*64/(8*1024*1024) = 17.2MiB.
    """
    with util_numpy.test_uses_numpy() as np:
        s1 = np.random.rand(1500)
        s2 = np.random.rand(1500)
        d1, _ = dtw.warping_paths_fast(s1, s2)
        d2, _ = dtw.warping_paths(s1, s2)
        assert d1 == pytest.approx(d2)


if __name__ == "__main__":
    with util_numpy.test_uses_numpy() as np:
        np.set_printoptions(precision=2, linewidth=120)
    logger.setLevel(logging.WARNING)
    sh = logging.StreamHandler(sys.stdout)
    logger.addHandler(sh)
    # test_bug1()
    # test_distance1_a()
    # test_distance1_b()
    # test_distance2_a()
    # test_distance2_b()
    # test_distance2_c()
    # test_distance3_a()
    # test_distance4()
    # test_distance6()
    # test_bug1_psi()
    # test_bug2()
コード例 #3
0
def test_bug2():
    with util_numpy.test_uses_numpy() as np:
        s1 = np.array([
            5.005335029629605081e-01, 5.157722489130834864e-01,
            4.804319657333316340e-01, 4.520537745752661318e-01,
            4.867408184050183717e-01, 4.806534229629605415e-01,
            4.530552579964135518e-01, 4.667067057333316171e-01,
            4.567955137333316040e-01, 4.414902037333315876e-01,
            4.240597964014319321e-01, 4.225263829008334970e-01,
            4.030970017333316280e-01, 4.404482984865574768e-01,
            3.852339312962939077e-01, 3.634947117333316435e-01,
            3.861488867383516266e-01, 3.413363679008334928e-01,
            3.451913457333316004e-01, 3.695692377333316680e-01,
            3.434781337333315809e-01, 3.063217006568062506e-01,
            2.845283817333316145e-01, 2.955394357333315791e-01,
            3.151374838781335619e-01, 2.561411067352764026e-01,
            2.301194263297469400e-01, 2.478605028202762184e-01,
            1.972828198566299318e-01, 2.150545617333316228e-01,
            2.232865857333316273e-01, 2.492665580680986370e-01,
            2.144049374050155388e-01, 2.079081117333316520e-01,
            1.879600957333316391e-01, 1.638555197333316227e-01,
            1.425566689000865583e-01, 2.016327177333316067e-01,
            2.290943870240647606e-01, 1.900932117333316296e-01,
            1.503233018025057766e-01, 1.970833717333316248e-01,
            1.999393777333316191e-01, 2.018818837333316019e-01,
            2.554168153357214144e-01, 2.345002377333316179e-01,
            2.407103957333316113e-01, 2.762874997333316096e-01,
            3.059693477333316203e-01, 3.328774862341668528e-01,
            3.583867537333316200e-01, 3.743879884050183016e-01,
            4.266385131705089373e-01, 4.445410410742424712e-01,
            4.642271795675002033e-01, 4.402678696630802357e-01,
            4.814591396296271641e-01, 5.317886460815400840e-01,
            5.548714817383517683e-01, 5.062713000716849709e-01,
            5.431524597333317050e-01, 5.537961812962939323e-01,
            5.720852595675002261e-01, 5.933977447347652534e-01,
            5.845479257333316969e-01, 6.133363017333317568e-01,
            6.276481431102108877e-01, 6.132085097333317414e-01,
            5.922371597333316862e-01, 5.778388756463566089e-01
        ])
        s2 = np.array([
            5.584292601075275808e-01, 5.214504501075275522e-01,
            4.877978901075275542e-01, 5.078206201075274873e-01,
            4.769738701075275644e-01, 4.478925501075275428e-01,
            4.242528301075275676e-01, 4.307546401075275644e-01,
            4.370594201075275187e-01, 4.331284101075275617e-01,
            4.810766301075275475e-01, 4.250942801075275335e-01,
            3.973955801075275684e-01, 4.380910701075275693e-01,
            3.786794801075275552e-01, 3.850050201075275180e-01,
            3.576176301075275621e-01, 2.987050201075275302e-01,
            3.377542001075275468e-01, 3.262601401075275187e-01,
            3.278248801075275276e-01, 3.347294101075275474e-01,
            3.222199801075275594e-01, 3.372712101075275304e-01,
            2.526810801075275448e-01, 1.774206901075275622e-01,
            2.384015601075275825e-01, 2.419624201075275816e-01,
            1.694136001075275677e-01, 1.983933401075275715e-01,
            2.272449101075275646e-01, 1.490059201075275563e-01,
            1.416013701075275744e-01, 1.997542401075275698e-01,
            1.791462801075275613e-01, 1.712680901075275819e-01,
            1.851759601075275707e-01, 1.450854801075275591e-01,
            1.041379601075275718e-01, 9.028068310752757064e-02,
            1.358144301075275839e-01, 2.006444701075275616e-01,
            2.003521501075275768e-01, 2.100136501075275663e-01,
            2.521797401075275280e-01, 2.364524601075275734e-01,
            2.236850301075275771e-01, 2.873612101075275205e-01,
            3.358473801075275156e-01, 3.288144201075275386e-01,
            3.195859301075275605e-01, 3.482947201075275445e-01,
            4.032929801075275655e-01, 4.566962501075275682e-01,
            5.173766201075274962e-01, 5.463256501075275384e-01,
            5.172673701075275465e-01, 5.054312901075275200e-01,
            5.344046101075274890e-01, 5.389180101075274898e-01,
            5.188896901075275014e-01, 5.484243401075274971e-01,
            5.899157901075275934e-01, 5.987863201075275255e-01,
            6.357147701075275270e-01, 6.277379101075275525e-01,
            5.519873201075274904e-01, 5.634240801075275362e-01,
            6.307956401075275332e-01, 6.488636001075275272e-01
        ])
        res1 = dtw.distance(s1, s2)
        res2 = dtw.distance(s1, s2, max_dist=.20)
        res3, _m3 = dtw.warping_paths(s1, s2)
        res4, _m4 = dtw.warping_paths(s1, s2, max_dist=.20)
        # print(res1)
        # print(res2)
        # print(res3)
        # print(res4)
        # np.savetxt('/Users/wannes/Desktop/debug/m3.txt', m3)
        # np.savetxt('/Users/wannes/Desktop/debug/m4.txt', m4)
        assert res1 == pytest.approx(res2)
        assert res1 == pytest.approx(res3)
        assert res1 == pytest.approx(res4)
コード例 #4
0
def test_trace_kmeans():
    with util_numpy.test_uses_numpy() as np:
        k = 4
        max_it = 10
        max_dba_it = 20
        rsrc_fn = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'rsrc', 'Trace_TRAIN.txt')
        data = np.loadtxt(rsrc_fn)
        labels = data[:, 0]
        series = data[:, 1:]
        mask = np.full((len(labels),), False, dtype=bool)
        mask[:] = (labels == 1)
        # c = series[0, :]
        print(type(series))
        print(series.shape)
        window = int(series.shape[1] * 0.5)

        # Z-normalize sequences
        series = (series - series.mean(axis=1)[:, None]) / series.std(axis=1)[:, None]

        # Align start and/or end values
        # avg_start = series[:, :20].mean(axis=1)
        # avg_end = series[:, 20:].mean(axis=1)
        # series = (series - avg_start[:, None])

        # Perform k-means
        tic = time.perf_counter()
        model = KMeans(k=k, max_it=max_it, max_dba_it=max_dba_it, drop_stddev=1,
                       nb_prob_samples=0,
                       dists_options={"window": window},
                       initialize_with_kmedoids=False,
                       initialize_with_kmeanspp=True)
        try:
            cluster_idx, performed_it = model.fit(series, use_c=True, use_parallel=False)
        except PyClusteringException:
            return
        toc = time.perf_counter()
        print(f'DBA ({performed_it} iterations: {toc - tic:0.4f} sec')

        if directory and not dtwvis.test_without_visualization():
            try:
                import matplotlib.pyplot as plt
            except ImportError:
                raise MatplotlibException("No matplotlib available")
            fig, ax = plt.subplots(nrows=k, ncols=2, figsize=(10,4),
                                   sharex='all', sharey='all')
            fn = directory / "test_trace_barycenter.png"

            all_idx = set()
            for ki in range(k):
                ax[ki, 0].plot(model.means[ki])
                for idx in cluster_idx[ki]:
                    ax[ki, 1].plot(series[idx], alpha=0.3)
                    if idx in all_idx:
                        raise Exception(f'Series in multiple clusters: {idx}')
                    all_idx.add(idx)
            assert(len(all_idx) == len(series))
            fig.savefig(str(fn))
            plt.close()

            fig, ax = plt.subplots(nrows=k, ncols=1, figsize=(5, 4),
                                   sharex='all', sharey='all')
            fn = directory / "test_trace_barycenter_solution.png"
            for i in range(len(labels)):
                ax[int(labels[i]) - 1].plot(series[i], alpha=0.3)
            fig.savefig(str(fn))
            plt.close()
コード例 #5
0
def test_distance_matrix1_b():
    with util_numpy.test_uses_numpy() as np:
        s = [[0, 0, 1, 2, 1, 0, 1, 0, 0], [0, 1, 2, 0, 0, 0, 0, 0, 0]]
        s = [np.array(si) for si in s]
        m2 = dtw.distance_matrix(s, parallel=True, use_c=False)
        assert m2[0, 1] == pytest.approx(math.sqrt(2))
コード例 #6
0
def test_distance6():
    with util_numpy.test_uses_numpy() as np:
        s1 = np.array([0, 0, 1, 2, 1, 0, 1, 0, 0], dtype=np.double)
        s2 = np.array([0.0, 1, 2, 0, 0, 0, 0, 0, 0])
        d = dtw.distance_fast(s1, s2, window=2)
コード例 #7
0
def test_distance1_d():
    with util_numpy.test_uses_numpy() as np:
        s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0])
        s2 = np.array([0., 1, 2, 0, 0, 0, 0, 0, 0])
        d = dtw.distance_fast(s1, s2)
        assert (d) == pytest.approx(math.sqrt(2))
コード例 #8
0
def test_distance1_c():
    with util_numpy.test_uses_numpy() as np:
        s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0])
        s2 = np.array([0, 1, 2, 0, 0, 0, 0, 0, 0], dtype=np.double)
        d3 = dtw.distance_fast(s1, s2)
        assert (d3) == pytest.approx(math.sqrt(2))
コード例 #9
0
def test_decisiontree(directory=None):
    with util_numpy.test_uses_numpy() as np:
        features = np.array([
            [0.5395256916996046, 0.5925000000000002],
            [0.507905138339921, 0.6900000000000002],
            [0.7430830039525692, 0.7150000000000001],
            [0.7391304347826088, 0.7300000000000002],
            [0.6857707509881423, 0.4700000000000002],
            [0.7272727272727273, 0.40500000000000014],
            [0.6936758893280632, 0.4125000000000002],
            [0.6897233201581027, 0.26000000000000023],
            [0.616600790513834, 0.5025000000000002],
            [0.5810276679841897, 0.4550000000000002],
            [0.4841897233201582, 0.3875000000000002],
            [0.3181818181818181, 0.3600000000000001],
            [0.28063241106719367, 0.47250000000000014],
            [0.2549407114624505, 0.5725000000000002],
            [0.39920948616600793, 0.6125000000000002],
            [0.39525691699604737, 0.6175000000000002],
            [0.375494071146245, 0.6475000000000001],
            [0.3359683794466403, 0.6350000000000001],
            [0.34584980237154145, 0.7275000000000001],
            [0.38537549407114624, 0.7375000000000002],
            [0.2075098814229248, 0.8650000000000001],
            [0.3774703557312252, 0.7600000000000001],
            [0.4624505928853755, 0.7500000000000001],
            [0.5276679841897233, 0.8425],
            [0.6383399209486166, 0.8925000000000001],
            [0.6798418972332015, 0.8275000000000001],
            [0.782608695652174, 0.7550000000000001],
            [0.7608695652173912, 0.5575000000000001],
            [0.8537549407114624, 0.5550000000000002],
            [0.8972332015810277, 0.27000000000000024],
            [0.7549407114624507, 0.1575000000000003],
            [0.5790513833992094, 0.1525000000000002],
            [0.5118577075098814, 0.2100000000000002],
            [0.43083003952569165, 0.03500000000000014],
            [0.4209486166007905, 0.05500000000000016],
            [0.3320158102766798, 0.16000000000000025],
            [0.22332015810276673, 0.05250000000000021],
            [0.011857707509881382, 0.2975000000000001],
            [0.14229249011857703, 0.4425000000000002],
            [0.19565217391304346, 0.5900000000000001]
        ])
        targets = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                            1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
        if directory:
            import matplotlib.pyplot as plt
            plt.figure(figsize=(3, 3))
            plt.scatter(features[:20, 0], features[:20, 1], marker="+")
            plt.scatter(features[20:, 0], features[20:, 1], marker=".")
            plt.xlim([-0.1, 1.1])
            plt.ylim([-0.1, 1.1])
            plt.savefig(str(directory / "features.png"))
            plt.close()

        clf = dtww.DecisionTreeClassifier()
        clf.fit(features, targets, use_feature_once=False)

        if directory:
            try:
                from sklearn.tree import export_graphviz
            except ImportError:
                return
            export_graphviz(clf, out_file=str(directory / "hierarchy.dot"))
コード例 #10
0
def test_distance1_a():
    with util_numpy.test_uses_numpy() as np:
        s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0])
        s2 = np.array([0., 1, 2, 0, 0, 0, 0, 0, 0])
        d = ed.distance_fast(s1, s2)
        assert (d) == pytest.approx(2.8284271247461903)
コード例 #11
0
def test_kdistance2():
    with util_numpy.test_uses_numpy() as np:
        values = np.array([0., 0., 0.])
        thr = 0.0
        kd = dtww.DecisionTreeClassifier.kdistance(values, thr)
        assert kd == pytest.approx(0.0)
コード例 #12
0
def test_kdistance():
    with util_numpy.test_uses_numpy() as np:
        values = np.array([1, 2, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9])
        thr = 4.5
        kd = dtww.DecisionTreeClassifier.kdistance(values, thr)
        assert kd == pytest.approx(1.5)
コード例 #13
0
def test_split():
    with util_numpy.test_uses_numpy() as np:
        values = np.array([1, 2, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9])
        targets = np.array([1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0])
        ig, thr, _h0 = dtww.DecisionTreeClassifier.informationgain_continuous(targets, values)
        assert thr == pytest.approx(4.5)
コード例 #14
0
ファイル: test_dtw.py プロジェクト: wannesm/dtaidistance
def test_distance1_b():
    with util_numpy.test_uses_numpy() as np:
        s1 = [0, 0, 1, 2, 1, 0, 1, 0, 0]
        s2 = [0, 1, 2, 0, 0, 0, 0, 0, 0]
        d2, wps = dtw.warping_paths(s1, s2)
        assert d2 == pytest.approx(math.sqrt(2))
コード例 #15
0
def test_distance1_b():
    with util_numpy.test_uses_numpy() as np:
        s1 = np.array([[0, 0], [0, 1], [2, 1], [0, 1],  [0, 0]], dtype=np.double)
        s2 = np.array([[0, 0], [2, 1], [0, 1], [0, .5], [0, 0]], dtype=np.double)
        d1 = dtw_ndim.distance_fast(s1, s2)