def test_dtw_symmetric(self): x = mfcc(self.tidigits[17]['samples']) y = mfcc(self.tidigits[3]['samples']) dist = lambda x, y: np.linalg.norm(x - y, ord=2) d1 = dtw(x, y, dist=dist, debug=False) d2 = dtw(y, x, dist=dist, debug=False) self.assertEqual(d2, d1)
def test_dtw(self): x = mfcc(self.tidigits[12]['samples']) y = mfcc(self.tidigits[22]['samples']) dist = lambda x, y: np.linalg.norm(x - y, ord=2) d1, cost1, acc_cost1, path1 = dtw_mod.dtw(x, y, dist=dist) d2 = dtw(x, y, dist=dist, debug=False) assert_almost_equal(d1, d2)
def test_dtw_zero(self): x = mfcc(self.tidigits[7]['samples']) dist = lambda x, y: np.linalg.norm(x - y, ord=2) d1, cost1, acc_cost1, path1 = dtw_mod.dtw(x, x, dist=dist) d2 = dtw(x, x, dist=dist, debug=False) assert_almost_equal(d1, d2) # assert_allclose(d2, 0.0) self.assertEqual(d2, 0.0)
def q7_global_distance(): global_distances = np.zeros((44, 44)) all_mfcc = [] for i in range(44): all_mfcc.append(mfcc(data_dict[i]['samples'])) for i in range(44): for j in range(44): if i == j: continue elif global_distances[j, i] != 0: global_distances[i, j] = global_distances[j, i] else: global_d, _, acc_d, _ = dtw(all_mfcc[i], all_mfcc[j], euclidean) global_distances[i, j] = global_d plot_p_color_mesh(global_distances, 'global distance matrix') np.save('global distance.npy', global_distances)
def test_dtw_ideal(self): x = np.array([2, 0, 1, 1, 2, 4, 2, 1, 2, 0]).reshape(-1, 1) y = np.array([1, 1, 2, 4, 2, 1, 2, 0]).reshape(-1, 1) dist = lambda x, y: np.linalg.norm(x - y, ord=2) d1, cost1, acc_cost1, path1 = dtw_mod.dtw(x, y, dist=dist) d2, cost2, acc_cost2, path2 = dtw(x, y, dist=dist, debug=True) assert_almost_equal(d1, d2) assert_allclose(cost1, cost2) assert_allclose(acc_cost1, acc_cost2) path_r_idx, path_c_idx = path1 self.assertEqual(len(path2), len(path_r_idx)) self.assertEqual(len(path2), len(path_c_idx)) for idx, (i, j) in enumerate(path2): self.assertEqual(i, path_r_idx[idx]) self.assertEqual(j, path_c_idx[idx])
Calculate a global distance matrix and save the gdist file """ import numpy as np import config from lab1_proto import dtw from lab1_proto import mfcc from scipy.cluster.hierarchy import dendrogram, linkage from matplotlib import pyplot as plt if __name__ == "__main__": data = np.load('data/lab1_data.npz')['data'] # calcualte the global distance matrix ndata = len(data) # global_dist matrix is symmetric # the diagonal terms are zeros global_dist = np.zeros((ndata, ndata)) cnt = 0 for i, j in zip(*np.triu_indices(ndata, k=1)): feature_i = mfcc(data[i]['samples']) feature_j = mfcc(data[j]['samples']) d = dtw(feature_i, feature_j) global_dist[i, j] = d global_dist[j, i] = d cnt += 1 if cnt % 100 == 0: print("Calculated %d global distances" % cnt, flush=True) np.save(config.gdist_npy_file, global_dist)
from sklearn.mixture import GaussianMixture gmm = GaussianMixture(n_components=4).fit(data_mfcc) labels = gmm.predict(data_mfcc) from sklearn.manifold import TSNE X_embedded = TSNE(n_components=2).fit_transform(data_mfcc) plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=labels, s=40, cmap='viridis') for i in [16, 17, 38, 39]: samples = data[i]['samples'] s = mfcc(samples) if (i == 0): data_gmm = s else: data_gmm = np.append(data_mfcc, s, axis=0) gmm = GaussianMixture(n_components=32).fit(data_gmm) labels = gmm.predict(data_gmm) sample_ = data.shape[0] result = np.zeros([sample_, sample_]) for i in range(sample_): mfcc1 = mfcc(data[i]['samples']) for j in range(sample_): mfcc2 = mfcc(data[j]['samples']) result[i, j] = dtw(mfcc1, mfcc2, scipy.spatial.distance.euclidean) plt.plot(result) from scipy.cluster.hierarchy import dendrogram, linkage Z = linkage(result, 'complete') fig = plt.figure(figsize=(25, 10)) dn = dendrogram(Z)