def analyze_all(V, dfun=distance.euc_normed, dfw=True, **kwargs): """ Perform DTW(+DFW) timbre analysis on list of audio files, return pair-wise costs inputs: V - vowel analysis pool dfun - distance function to use [bregman.distance.euc_normed] dfw - whether to perform dynamic FREQUENCY warping (dfw) after DTW [True] **kwargs - arguments to dpcore.dp DTW algorithm outputs: Z - pair-wise cost matrix (timbre time/frequency warp) between audio files """ Z = zeros((len(V),len(V))) kwargs['penalty'] = kwargs.pop('penalty',0.1) kwargs['gutter'] = kwargs.pop('gutter',0.0) for i,a in enumerate(V): for j,b in enumerate(V): D = dfun(a['mfccs'],b['mfccs']) p,q,c,phi = dpcore.dp(D, **kwargs) if not dfw: Z[i,j] = diag(dfun(a['lqft'][p],b['lqft'][q])).mean() else: alpha = optimal_vtln(b['lqft'][q], a['lqft'][p], 'symmetric') print "Optimal DFW (VTLN) warp alpha=", alpha Vi_warped = vtln(b['lqft'][q], 'symmetric', alpha) Z[i,j] = diag(dfun(Vi_warped,b['lqft'][q])).mean() return Z
def tsne_all(V, ref_idx=9, dfun=distance.euc_normed, dfw=True, tsne=True, perplexity=3, plotting=False, **kwargs): """ Perform tsne / DTW / DFW timbre analysis on list of audio files, return pair-wise costs Uses tsne library inputs: V - vowel analysis pool ref_idx - reference audio file, all audio warped to this [9] dfun - distance function to use [bregman.distance.euc_normed] dfw - whether to perform dynamic FREQUENCY warping (dfw) after DTW [True] tsne - whether to return t-SNE or just DTW spectrum [True] perplexity - how many near neighbours in t-SNE [3] plotting - whether to plot the t-SNE result [False] **kwargs - arguments to dpcore.dp DTW algorithm outputs: Z - 2D projection map of audio files """ Z = [] kwargs['penalty'] = kwargs.pop('penalty',0.1) kwargs['gutter'] = kwargs.pop('gutter',0.0) for i in xrange(len(V)): C = dfun(V[ref_idx]['mfccs'],V[i]['mfccs']) p,q,c,phi = dpcore.dp(C, **kwargs) if not dfw: Z.append(V[i]['lqft'][q]) else: alpha = optimal_vtln(V[i]['lqft'][q], V[ref_idx]['lqft'][p], 'symmetric') print "Optimal DFW (VTLN) warp alpha=", alpha Vi_warped = vtln(V[i]['lqft'][q], 'symmetric', alpha) Z.append(Vi_warped) if tsne: X = array([zz.flatten() for zz in Z], dtype='f8') Z = _tsne(X, perplexity=perplexity, plotting=plotting) return Z
def analyze_all(V, dfun=distance.euc_normed, dfw=True, **kwargs): """ Perform DTW(+DFW) timbre analysis on list of audio files, return pair-wise costs inputs: V - vowel analysis pool dfun - distance function to use [bregman.distance.euc_normed] dfw - whether to perform dynamic FREQUENCY warping (dfw) after DTW [True] **kwargs - arguments to dpcore.dp DTW algorithm outputs: Z - pair-wise cost matrix (timbre time/frequency warp) between audio files """ Z = zeros((len(V), len(V))) kwargs['penalty'] = kwargs.pop('penalty', 0.1) kwargs['gutter'] = kwargs.pop('gutter', 0.0) for i, a in enumerate(V): for j, b in enumerate(V): D = dfun(a['mfccs'], b['mfccs']) p, q, c, phi = dpcore.dp(D, **kwargs) if not dfw: Z[i, j] = diag(dfun(a['lqft'][p], b['lqft'][q])).mean() else: alpha = optimal_vtln(b['lqft'][q], a['lqft'][p], 'symmetric') print "Optimal DFW (VTLN) warp alpha=", alpha Vi_warped = vtln(b['lqft'][q], 'symmetric', alpha) Z[i, j] = diag(dfun(Vi_warped, b['lqft'][q])).mean() return Z
def dtw(u,v): sm = cdist(u[:, np.newaxis],v[:, np.newaxis],metric='euclidean') p, q, C, phi = dp(sm) global i if i % 10000 == 0: print(i) i+=1 return C[-1,-1]
def align_vowels(X,Y, dfun=distance.euc_normed): """ DTW align vowels based on spectral data in X and Y Uses dpcore dynamic programming library inpus: X, Y - spectral data in rows (num columns must be equal) outputs: p, q, c - Dynamic Time Warp coefs: X[p]<-->Y[q], c=cost """ Df = dfun(X,Y) p,q,C,phi = dpcore.dp(Df, penalty=0.1, gutter=0.1) return {'p':p,'q':q,'C':C}
def align_vowels(X, Y, dfun=distance.euc_normed): """ DTW align vowels based on spectral data in X and Y Uses dpcore dynamic programming library inpus: X, Y - spectral data in rows (num columns must be equal) outputs: p, q, c - Dynamic Time Warp coefs: X[p]<-->Y[q], c=cost """ Df = dfun(X, Y) p, q, C, phi = dpcore.dp(Df, penalty=0.1, gutter=0.1) return {'p': p, 'q': q, 'C': C}
def tsne_all(V, ref_idx=9, dfun=distance.euc_normed, dfw=True, tsne=True, perplexity=3, plotting=False, **kwargs): """ Perform tsne / DTW / DFW timbre analysis on list of audio files, return pair-wise costs Uses tsne library inputs: V - vowel analysis pool ref_idx - reference audio file, all audio warped to this [9] dfun - distance function to use [bregman.distance.euc_normed] dfw - whether to perform dynamic FREQUENCY warping (dfw) after DTW [True] tsne - whether to return t-SNE or just DTW spectrum [True] perplexity - how many near neighbours in t-SNE [3] plotting - whether to plot the t-SNE result [False] **kwargs - arguments to dpcore.dp DTW algorithm outputs: Z - 2D projection map of audio files """ Z = [] kwargs['penalty'] = kwargs.pop('penalty', 0.1) kwargs['gutter'] = kwargs.pop('gutter', 0.0) for i in xrange(len(V)): C = dfun(V[ref_idx]['mfccs'], V[i]['mfccs']) p, q, c, phi = dpcore.dp(C, **kwargs) if not dfw: Z.append(V[i]['lqft'][q]) else: alpha = optimal_vtln(V[i]['lqft'][q], V[ref_idx]['lqft'][p], 'symmetric') print "Optimal DFW (VTLN) warp alpha=", alpha Vi_warped = vtln(V[i]['lqft'][q], 'symmetric', alpha) Z.append(Vi_warped) if tsne: X = array([zz.flatten() for zz in Z], dtype='f8') Z = _tsne(X, perplexity=perplexity, plotting=plotting) return Z
# <codecell> plt.imshow(DC-DP, interpolation='none') print np.max(np.abs(DC-DP)) # <codecell> plt.imshow(phiC-phiP, interpolation='none') # <codecell> MM = np.random.rand(5, 5) pen = 0.2 gut = 0.3 p,q,C,phi = dpcore.dp(MM, pen, gut) print p, q print MM print C print "best cost =", C[p[-1],q[-1]], "=", np.sum(MM[p, q])+pen*(np.sum(phi[p, q]>0)) plt.imshow(MM, interpolation='none', cmap='binary') plt.hold(True) plt.plot(q,p,'-r') plt.hold(False) plt.show() # <codecell> M2 = np.copy(M) M2[20:30,20:30] += np.random.rand(10,10) M2[10:40,10:40] += np.random.rand(30,30)
def dtwC(dist_mat, penalty): p, q, C, phi = dpcore.dp(dist_mat, penalty=penalty) return p, q, C, phi