def test_trace_kmeans_differencing(): with util_numpy.test_uses_numpy() as np, util_numpy.test_uses_scipy() as scipy: k = 4 max_it = 10 max_dba_it = 20 nb_prob_samples = 0 use_c = True rsrc_fn = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'rsrc', 'Trace_TRAIN.txt') data = np.loadtxt(rsrc_fn) labels = data[:, 0] series = data[:, 1:] mask = np.full((len(labels),), False, dtype=bool) mask[:] = (labels == 1) # c = series[0, :] print(type(series)) print(series.shape) window = int(series.shape[1] * 0.5) # Differencing # The baseline differences are not relevant thus we cluster based # on the result of differencing. # Also the high-freq noise dominates the local differences, thus # we apply a low-pass filter first. signal = scipy.import_signal() series_orig = series.copy() series = np.diff(series, n=1, axis=1) fs = 100 # sample rate, Hz cutoff = 10 # cut off frequency, Hz nyq = 0.5 * fs # Nyquist frequency b, a = signal.butter(2, cutoff / nyq, btype='low', analog=False, output='ba') series = signal.filtfilt(b, a, series, axis=1) # Perform k-means tic = time.perf_counter() model = KMeans(k=k, max_it=max_it, max_dba_it=max_dba_it, drop_stddev=1, nb_prob_samples=nb_prob_samples, dists_options={"window": window}, initialize_with_kmedoids=False, initialize_with_kmeanspp=True) try: cluster_idx, performed_it = model.fit(series, use_c=use_c, use_parallel=False) except PyClusteringException: return toc = time.perf_counter() print(f'DBA ({performed_it} iterations: {toc - tic:0.4f} sec') if directory and not dtwvis.test_without_visualization(): try: import matplotlib.pyplot as plt except ImportError: raise MatplotlibException("No matplotlib available") fig, ax = plt.subplots(nrows=k, ncols=3, figsize=(10,4), sharex='all', sharey='all') fn = directory / "test_trace_barycenter.png" all_idx = set() mask = np.full((k, len(series_orig)), False, dtype=bool) for ki in range(k): ax[ki, 0].plot(model.means[ki]) for idx in cluster_idx[ki]: ax[ki, 2].plot(series_orig[idx], alpha=0.3) mask[ki, idx] = True if idx in all_idx: raise Exception(f'Series in multiple clusters: {idx}') all_idx.add(idx) series_orig = (series_orig - series_orig.mean(axis=1)[:, None]) / series_orig.std(axis=1)[:, None] for ki, mean in enumerate(model.means): # dba = dba_loop(series_orig, c=None, mask=mask[ki, :], # max_it=max_it, thr=None, use_c=use_c, # nb_prob_samples=nb_prob_samples) print(mean.shape) dba = np.r_[0, mean].cumsum() ax[ki, 1].plot(dba) assert(len(all_idx) == len(series)) ax[0, 0].set_title("DBA Differencing + LP") ax[0, 1].set_title("DBA Original series") ax[0, 2].set_title("Clustered series") fig.savefig(str(fn)) plt.close() fig, ax = plt.subplots(nrows=k, ncols=1, figsize=(5, 4), sharex='all', sharey='all') fn = directory / "test_trace_barycenter_solution.png" for i in range(len(labels)): ax[int(labels[i]) - 1].plot(series_orig[i], alpha=0.3) fig.savefig(str(fn)) plt.close()
def test_bug_size(): """Two series of length 1500 should not trigger a size error. The warping paths matrix is of size 1501**2 = 2_253_001. If using 64bit values: 1501**2*64/(8*1024*1024) = 17.2MiB. """ with util_numpy.test_uses_numpy() as np: s1 = np.random.rand(1500) s2 = np.random.rand(1500) d1, _ = dtw.warping_paths_fast(s1, s2) d2, _ = dtw.warping_paths(s1, s2) assert d1 == pytest.approx(d2) if __name__ == "__main__": with util_numpy.test_uses_numpy() as np: np.set_printoptions(precision=2, linewidth=120) logger.setLevel(logging.WARNING) sh = logging.StreamHandler(sys.stdout) logger.addHandler(sh) # test_bug1() # test_distance1_a() # test_distance1_b() # test_distance2_a() # test_distance2_b() # test_distance2_c() # test_distance3_a() # test_distance4() # test_distance6() # test_bug1_psi() # test_bug2()
def test_bug2(): with util_numpy.test_uses_numpy() as np: s1 = np.array([ 5.005335029629605081e-01, 5.157722489130834864e-01, 4.804319657333316340e-01, 4.520537745752661318e-01, 4.867408184050183717e-01, 4.806534229629605415e-01, 4.530552579964135518e-01, 4.667067057333316171e-01, 4.567955137333316040e-01, 4.414902037333315876e-01, 4.240597964014319321e-01, 4.225263829008334970e-01, 4.030970017333316280e-01, 4.404482984865574768e-01, 3.852339312962939077e-01, 3.634947117333316435e-01, 3.861488867383516266e-01, 3.413363679008334928e-01, 3.451913457333316004e-01, 3.695692377333316680e-01, 3.434781337333315809e-01, 3.063217006568062506e-01, 2.845283817333316145e-01, 2.955394357333315791e-01, 3.151374838781335619e-01, 2.561411067352764026e-01, 2.301194263297469400e-01, 2.478605028202762184e-01, 1.972828198566299318e-01, 2.150545617333316228e-01, 2.232865857333316273e-01, 2.492665580680986370e-01, 2.144049374050155388e-01, 2.079081117333316520e-01, 1.879600957333316391e-01, 1.638555197333316227e-01, 1.425566689000865583e-01, 2.016327177333316067e-01, 2.290943870240647606e-01, 1.900932117333316296e-01, 1.503233018025057766e-01, 1.970833717333316248e-01, 1.999393777333316191e-01, 2.018818837333316019e-01, 2.554168153357214144e-01, 2.345002377333316179e-01, 2.407103957333316113e-01, 2.762874997333316096e-01, 3.059693477333316203e-01, 3.328774862341668528e-01, 3.583867537333316200e-01, 3.743879884050183016e-01, 4.266385131705089373e-01, 4.445410410742424712e-01, 4.642271795675002033e-01, 4.402678696630802357e-01, 4.814591396296271641e-01, 5.317886460815400840e-01, 5.548714817383517683e-01, 5.062713000716849709e-01, 5.431524597333317050e-01, 5.537961812962939323e-01, 5.720852595675002261e-01, 5.933977447347652534e-01, 5.845479257333316969e-01, 6.133363017333317568e-01, 6.276481431102108877e-01, 6.132085097333317414e-01, 5.922371597333316862e-01, 5.778388756463566089e-01 ]) s2 = np.array([ 5.584292601075275808e-01, 5.214504501075275522e-01, 4.877978901075275542e-01, 5.078206201075274873e-01, 4.769738701075275644e-01, 4.478925501075275428e-01, 4.242528301075275676e-01, 4.307546401075275644e-01, 4.370594201075275187e-01, 4.331284101075275617e-01, 4.810766301075275475e-01, 4.250942801075275335e-01, 3.973955801075275684e-01, 4.380910701075275693e-01, 3.786794801075275552e-01, 3.850050201075275180e-01, 3.576176301075275621e-01, 2.987050201075275302e-01, 3.377542001075275468e-01, 3.262601401075275187e-01, 3.278248801075275276e-01, 3.347294101075275474e-01, 3.222199801075275594e-01, 3.372712101075275304e-01, 2.526810801075275448e-01, 1.774206901075275622e-01, 2.384015601075275825e-01, 2.419624201075275816e-01, 1.694136001075275677e-01, 1.983933401075275715e-01, 2.272449101075275646e-01, 1.490059201075275563e-01, 1.416013701075275744e-01, 1.997542401075275698e-01, 1.791462801075275613e-01, 1.712680901075275819e-01, 1.851759601075275707e-01, 1.450854801075275591e-01, 1.041379601075275718e-01, 9.028068310752757064e-02, 1.358144301075275839e-01, 2.006444701075275616e-01, 2.003521501075275768e-01, 2.100136501075275663e-01, 2.521797401075275280e-01, 2.364524601075275734e-01, 2.236850301075275771e-01, 2.873612101075275205e-01, 3.358473801075275156e-01, 3.288144201075275386e-01, 3.195859301075275605e-01, 3.482947201075275445e-01, 4.032929801075275655e-01, 4.566962501075275682e-01, 5.173766201075274962e-01, 5.463256501075275384e-01, 5.172673701075275465e-01, 5.054312901075275200e-01, 5.344046101075274890e-01, 5.389180101075274898e-01, 5.188896901075275014e-01, 5.484243401075274971e-01, 5.899157901075275934e-01, 5.987863201075275255e-01, 6.357147701075275270e-01, 6.277379101075275525e-01, 5.519873201075274904e-01, 5.634240801075275362e-01, 6.307956401075275332e-01, 6.488636001075275272e-01 ]) res1 = dtw.distance(s1, s2) res2 = dtw.distance(s1, s2, max_dist=.20) res3, _m3 = dtw.warping_paths(s1, s2) res4, _m4 = dtw.warping_paths(s1, s2, max_dist=.20) # print(res1) # print(res2) # print(res3) # print(res4) # np.savetxt('/Users/wannes/Desktop/debug/m3.txt', m3) # np.savetxt('/Users/wannes/Desktop/debug/m4.txt', m4) assert res1 == pytest.approx(res2) assert res1 == pytest.approx(res3) assert res1 == pytest.approx(res4)
def test_trace_kmeans(): with util_numpy.test_uses_numpy() as np: k = 4 max_it = 10 max_dba_it = 20 rsrc_fn = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'rsrc', 'Trace_TRAIN.txt') data = np.loadtxt(rsrc_fn) labels = data[:, 0] series = data[:, 1:] mask = np.full((len(labels),), False, dtype=bool) mask[:] = (labels == 1) # c = series[0, :] print(type(series)) print(series.shape) window = int(series.shape[1] * 0.5) # Z-normalize sequences series = (series - series.mean(axis=1)[:, None]) / series.std(axis=1)[:, None] # Align start and/or end values # avg_start = series[:, :20].mean(axis=1) # avg_end = series[:, 20:].mean(axis=1) # series = (series - avg_start[:, None]) # Perform k-means tic = time.perf_counter() model = KMeans(k=k, max_it=max_it, max_dba_it=max_dba_it, drop_stddev=1, nb_prob_samples=0, dists_options={"window": window}, initialize_with_kmedoids=False, initialize_with_kmeanspp=True) try: cluster_idx, performed_it = model.fit(series, use_c=True, use_parallel=False) except PyClusteringException: return toc = time.perf_counter() print(f'DBA ({performed_it} iterations: {toc - tic:0.4f} sec') if directory and not dtwvis.test_without_visualization(): try: import matplotlib.pyplot as plt except ImportError: raise MatplotlibException("No matplotlib available") fig, ax = plt.subplots(nrows=k, ncols=2, figsize=(10,4), sharex='all', sharey='all') fn = directory / "test_trace_barycenter.png" all_idx = set() for ki in range(k): ax[ki, 0].plot(model.means[ki]) for idx in cluster_idx[ki]: ax[ki, 1].plot(series[idx], alpha=0.3) if idx in all_idx: raise Exception(f'Series in multiple clusters: {idx}') all_idx.add(idx) assert(len(all_idx) == len(series)) fig.savefig(str(fn)) plt.close() fig, ax = plt.subplots(nrows=k, ncols=1, figsize=(5, 4), sharex='all', sharey='all') fn = directory / "test_trace_barycenter_solution.png" for i in range(len(labels)): ax[int(labels[i]) - 1].plot(series[i], alpha=0.3) fig.savefig(str(fn)) plt.close()
def test_distance_matrix1_b(): with util_numpy.test_uses_numpy() as np: s = [[0, 0, 1, 2, 1, 0, 1, 0, 0], [0, 1, 2, 0, 0, 0, 0, 0, 0]] s = [np.array(si) for si in s] m2 = dtw.distance_matrix(s, parallel=True, use_c=False) assert m2[0, 1] == pytest.approx(math.sqrt(2))
def test_distance6(): with util_numpy.test_uses_numpy() as np: s1 = np.array([0, 0, 1, 2, 1, 0, 1, 0, 0], dtype=np.double) s2 = np.array([0.0, 1, 2, 0, 0, 0, 0, 0, 0]) d = dtw.distance_fast(s1, s2, window=2)
def test_distance1_d(): with util_numpy.test_uses_numpy() as np: s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0]) s2 = np.array([0., 1, 2, 0, 0, 0, 0, 0, 0]) d = dtw.distance_fast(s1, s2) assert (d) == pytest.approx(math.sqrt(2))
def test_distance1_c(): with util_numpy.test_uses_numpy() as np: s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0]) s2 = np.array([0, 1, 2, 0, 0, 0, 0, 0, 0], dtype=np.double) d3 = dtw.distance_fast(s1, s2) assert (d3) == pytest.approx(math.sqrt(2))
def test_decisiontree(directory=None): with util_numpy.test_uses_numpy() as np: features = np.array([ [0.5395256916996046, 0.5925000000000002], [0.507905138339921, 0.6900000000000002], [0.7430830039525692, 0.7150000000000001], [0.7391304347826088, 0.7300000000000002], [0.6857707509881423, 0.4700000000000002], [0.7272727272727273, 0.40500000000000014], [0.6936758893280632, 0.4125000000000002], [0.6897233201581027, 0.26000000000000023], [0.616600790513834, 0.5025000000000002], [0.5810276679841897, 0.4550000000000002], [0.4841897233201582, 0.3875000000000002], [0.3181818181818181, 0.3600000000000001], [0.28063241106719367, 0.47250000000000014], [0.2549407114624505, 0.5725000000000002], [0.39920948616600793, 0.6125000000000002], [0.39525691699604737, 0.6175000000000002], [0.375494071146245, 0.6475000000000001], [0.3359683794466403, 0.6350000000000001], [0.34584980237154145, 0.7275000000000001], [0.38537549407114624, 0.7375000000000002], [0.2075098814229248, 0.8650000000000001], [0.3774703557312252, 0.7600000000000001], [0.4624505928853755, 0.7500000000000001], [0.5276679841897233, 0.8425], [0.6383399209486166, 0.8925000000000001], [0.6798418972332015, 0.8275000000000001], [0.782608695652174, 0.7550000000000001], [0.7608695652173912, 0.5575000000000001], [0.8537549407114624, 0.5550000000000002], [0.8972332015810277, 0.27000000000000024], [0.7549407114624507, 0.1575000000000003], [0.5790513833992094, 0.1525000000000002], [0.5118577075098814, 0.2100000000000002], [0.43083003952569165, 0.03500000000000014], [0.4209486166007905, 0.05500000000000016], [0.3320158102766798, 0.16000000000000025], [0.22332015810276673, 0.05250000000000021], [0.011857707509881382, 0.2975000000000001], [0.14229249011857703, 0.4425000000000002], [0.19565217391304346, 0.5900000000000001] ]) targets = np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) if directory: import matplotlib.pyplot as plt plt.figure(figsize=(3, 3)) plt.scatter(features[:20, 0], features[:20, 1], marker="+") plt.scatter(features[20:, 0], features[20:, 1], marker=".") plt.xlim([-0.1, 1.1]) plt.ylim([-0.1, 1.1]) plt.savefig(str(directory / "features.png")) plt.close() clf = dtww.DecisionTreeClassifier() clf.fit(features, targets, use_feature_once=False) if directory: try: from sklearn.tree import export_graphviz except ImportError: return export_graphviz(clf, out_file=str(directory / "hierarchy.dot"))
def test_distance1_a(): with util_numpy.test_uses_numpy() as np: s1 = np.array([0., 0, 1, 2, 1, 0, 1, 0, 0]) s2 = np.array([0., 1, 2, 0, 0, 0, 0, 0, 0]) d = ed.distance_fast(s1, s2) assert (d) == pytest.approx(2.8284271247461903)
def test_kdistance2(): with util_numpy.test_uses_numpy() as np: values = np.array([0., 0., 0.]) thr = 0.0 kd = dtww.DecisionTreeClassifier.kdistance(values, thr) assert kd == pytest.approx(0.0)
def test_kdistance(): with util_numpy.test_uses_numpy() as np: values = np.array([1, 2, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9]) thr = 4.5 kd = dtww.DecisionTreeClassifier.kdistance(values, thr) assert kd == pytest.approx(1.5)
def test_split(): with util_numpy.test_uses_numpy() as np: values = np.array([1, 2, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9]) targets = np.array([1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0]) ig, thr, _h0 = dtww.DecisionTreeClassifier.informationgain_continuous(targets, values) assert thr == pytest.approx(4.5)
def test_distance1_b(): with util_numpy.test_uses_numpy() as np: s1 = [0, 0, 1, 2, 1, 0, 1, 0, 0] s2 = [0, 1, 2, 0, 0, 0, 0, 0, 0] d2, wps = dtw.warping_paths(s1, s2) assert d2 == pytest.approx(math.sqrt(2))
def test_distance1_b(): with util_numpy.test_uses_numpy() as np: s1 = np.array([[0, 0], [0, 1], [2, 1], [0, 1], [0, 0]], dtype=np.double) s2 = np.array([[0, 0], [2, 1], [0, 1], [0, .5], [0, 0]], dtype=np.double) d1 = dtw_ndim.distance_fast(s1, s2)