def Dynamic_Time_Warping_with_cointegration(mindata, pair, table): stock1_series = mindata[str(table.stock1[pair])].values stock2_series = mindata[str(table.stock2[pair])].values #print("stock2_old_series",stock2_series) new_stock1_series = table.w1[pair] * np.log(stock1_series) new_stock2_series = -table.w2[pair] * np.log( stock2_series) + table.mu[pair] #print(stock1_series) # print(stock2_series) #spread = table.w1[pair] * np.log(tick_data[str(table.stock1[pair])]) + table.w2[pair] * np.log( #tick_data[str(table.stock2[pair])]) #alignment = dtw(stock1_series, stock2_series, keep_internals=True) #alignment.plot(xlab = str(table.stock1[pair]) , ylab = str(table.stock2[pair]) ,type="threeway") alignment = dtw(new_stock1_series, new_stock2_series, keep_internals=True, window_type="sakoechiba", window_args={'window_size': 10}) #alignment.plot(type="twoway",offset=0) # alignment.plot(xlab = str(table.stock1[pair]) , ylab = str(table.stock2[pair]) ,type="threeway") #print(alignment.index1s) #print(alignment.index2s) matrix = build_dynamic_time_warping_index(alignment.index1s, alignment.index2s) dynamic_stock2_series = [] for i in range(len(matrix)): #print(matrix[i,:]) v = np.argwhere(matrix[i, :] == 1) v = v.flatten().tolist() #print(v) new_values = 0 for j in v: new_values += stock2_series[j] #print(new_values) new_values = new_values / len(v) dynamic_stock2_series.append(new_values) #print(dynamic_stock2_series-stock2_series) new_dynamic_stock2_series = -table.w2[pair] * np.log( dynamic_stock2_series) + table.mu[pair] """ recaculate the weight of cointegration write model selection function """ print(table.w1[pair], table.w1[pair], cointegration_weight(stock1_series, dynamic_stock2_series)) alignment2 = dtw(stock1_series, dynamic_stock2_series, keep_internals=True, window_type="sakoechiba", window_args={'window_size': 10}) return alignment.distance
def test_cdist(self): from scipy.spatial.distance import cdist query = np.vstack([np.arange(1, 11), np.ones(10)]).T ref = np.vstack([np.arange(11, 16), 2*np.ones(5)]).T cxdist = cdist(query, ref, metric="cityblock") d1 = dtw(query, ref, dist_method="cityblock").distance d2 = dtw(cxdist).distance assert_approx_equal(d1, d2)
def predict(test_data_dir, work_dir, predictoin_path): print('#load model and template') model = load_model(wav2vec_model_path) template = np.load(os.path.join(work_dir, 'template.npy')) with open(os.path.join(work_dir, 'threshold')) as f: threshold = float(f.readline()) threshold_max, threshold_min = 0.6, 0.45 #manually set super-hyparams threshold = max(threshold_min, min(threshold_max, threshold)) print('#do predict') predict_results = [] _len = len(template) _step = max(1, _len // 5) for test_wav in sorted(os.listdir(test_data_dir)): wav = librosa.load(os.path.join(test_data_dir, test_wav), 16000)[0] feature = feature_extract(model, wav) all_dis = [] for i in range(0, max(len(feature) - _len, _step), _step): alignment = dtw(feature[i:min(i + _len, len(feature))], template, keep_internals=True, dist_method='cosine') all_dis.append(alignment.normalizedDistance) #predict_results.append((test_wav, str(min(all_dis)), '1' if min(all_dis) < threshold else '0')) predict_results.append( (test_wav, '1' if min(all_dis) < threshold else '0')) print('#save result') with open(prediction_path, 'w') as f: for predict_result in predict_results: f.write('\t'.join(predict_result) + '\n')
def dwt_dist(candidate_id_x, candidate_id_y): source = fix_index(signatures.get_group( int(candidate_id_y)))['Power'] reference = fix_index(signatures.get_group( int(candidate_id_x)))['Power'] return float( dtw(source, reference, distance_only=True).distance)
def dtw_distance(data=None, dist_method='euclidean'): # data x = data if data is not None else deaths_df() dtmin = x.date.min() # dtw distance regs = x.region.unique() reg2idx = {r: i for i, r in enumerate(regs)} D = np.zeros((len(regs), len(regs))) for name1, group1 in x.groupby('region'): x1 = (group1.date - dtmin).apply(lambda i: i.days) x2 = group1.deaths_1K for name2, group2 in x.groupby('region'): y1 = (group2.date - dtmin).apply(lambda i: i.days) y2 = group2.deaths_1K # knn smoother fx1 = _predict_knn(x1, x2) fx2 = _predict_knn(y1, y2) score = dtw( fx1, fx2, #group1.deaths_1K, group2.deaths_1K, dist_method=dist_method) D[reg2idx[name1], reg2idx[name2]] = score.normalizedDistance # scale to [0,1] D /= D.max() # return return D
def test_ldist_asymmetricP1(self): ds = dtw(ldist, keep_internals=True, step_pattern=asymmetricP1) assert_equal(ds.distance, 3) assert_array_equal(ds.index1, i("1 2 3 3 4 5 6")) assert_array_equal(ds.index1s, i("1 2 3 5 6")) assert_array_equal(ds.index2, i("1 2 3 4 5 5 6")) assert_array_equal(ds.index2s, i("1 2 4 5 6"))
def test_matrix(self): dm = 10 * np.ones((4, 4)) + np.eye(4) al = dtw(dm, keep_internals=True) assert_array_equal( al.costMatrix, np.array([[11., 21., 31., 41.], [21., 32., 41., 51.], [31., 41., 52., 61.], [41., 51., 61., 72.]]))
def test_issue_5(self): idx = np.linspace(0, 6.28, num=100) query = np.sin(idx) idx1 = np.linspace(0, 6.28, num=70) template = np.cos(idx1) + 0.5 alignment = dtw(query, template, step_pattern=rabinerJuangStepPattern( ptype=4, slope_weighting="c"), keep_internals=True, open_end=False, open_begin=False) dist = alignment.distance test_index2 = alignment.index2 assert_approx_equal(dist, 52.9795) ref_index2 = [ 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40, 40, 41, 41, 42, 42, 43, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69 ] assert_array_equal(test_index2, ref_index2) assert_equal(len(test_index2), 100)
def DTW_Distance_Comp(ts1, ts2, WS, path_plot, dist_only): """ Parameters ---------- ts1 : np.array First time series ts2 : np.array Second time series WS : int Window size for sakoeChibaWindow path_plot : boolean Set it to True if you would like to see the warping path plot dist_only : boolean Set it to True if you woyld like to only compute the distance for faster computation Returns ------- int DTW distance between given two time series """ DTW = dtw(ts1, ts2, keep_internals=True, window_type=sakoeChibaWindow, window_args={'window_size': WS}, dist_method='cityblock', step_pattern='symmetricP0', distance_only=dist_only) if path_plot: DTW.plot(type="threeway") return DTW.distance
def selected_from_dd(*args): global current_test current_test = tkvar.get() t1 = threading.Thread(target=audio_popup) t1.start() start = time.perf_counter() yTest, srTest = librosa.load(cg_dirname + "/" + current_test) mfccTest = librosa.feature.mfcc(yTest, srTest) mfccTest = preprocess_mfcc(mfccTest) dists = [] for i in range(len(mfcc_arr)): mfcci = mfcc_arr[i] disti = dtw(mfcci.T, mfccTest.T, dist=lambda x, y: np.exp(np.linalg.norm(x - y, ord=1)))[0] dists.append(disti) # plt.plot(dists) min_dist = min(dists) min_dist_index = dists.index(min_dist) pre = int(y[min_dist_index]) output = label[pre] tt = time.perf_counter()-start output = "Input File : "+str(current_test)+".\nThe spoken word is : "+str(output)+".\nTime taken for Recognition : "+str(tt)+"\n" sop.insert(INSERT, output) global flag_audio_pop if flag_audio_pop == 1: t1.join() flag_audio_pop = 0
def dtw_features(trace, template, step_pattern='MATLAB'): res = {} if step_pattern != 'MATLAB': alignment = dtw(trace, template, step_pattern=step_pattern, keep_internals=True) ns = trace.shape[0] # n_samples nt = template.shape[0] C = alignment.localCostMatrix D = alignment.costMatrix idx_min = np.argmin(C[-1, 1:]) if ns >= nt else np.argmin( C[1:, -1]) # called pathlen briefly in .m files res['w'] = np.stack( [alignment.index2, alignment.index1], axis=1 ) # not sure why these need to be stacked backwards, but they do res['pathlen'] = min([ idx_min, template.shape[0] ]) # bug in matlab code here, chooses wrong template axis res['dt'] = alignment.distance d_l = D[-1, idx_min] if ns >= nt else D[idx_min, -1] res['dt_l'] = d_l else: res['dt'], res['dt_l'], res['w'], pathlen = dtw_matlab(trace, template) res['pathlen'] = min([ pathlen, template.shape[0] ]) # bug in matlab code here, chooses wrong template axis return res
def test_open_begin_end(self): query = np.arange(2, 4)+.01 ref = np.arange(4)+1 obe = dtw(query, ref, open_begin=True, open_end=True, step_pattern=asymmetric) assert_approx_equal(obe.distance, 0.02) assert_array_equal(obe.index2, i("2 3"))
def test_backtrack(self): x = np.array([1, 2, 3]) y = np.array([2, 3, 4, 5, 6]) al = dtw(x, y) assert_array_equal(al.index1, np.array([0, 1, 2, 2, 2, 2])) assert_array_equal(al.index1s, np.array([0, 1, 2, 2, 2, 2])) assert_array_equal(al.index2, np.array([0, 0, 1, 2, 3, 4])) assert_array_equal(al.index2s, np.array([0, 0, 1, 2, 3, 4]))
def test_example_ds(self): ldist = np.full( (6,6), 1.0) ldist[1,:] = 0 ldist[:,4] = 0 ldist[1,4] = .01 ds = dtw(ldist, keep_internals=True) pds = countPaths(ds) assert_equal(pds, 1683)
def test_example_da(self): ldist = np.full( (6,6), 1.0) ldist[1,:] = 0 ldist[:,4] = 0 ldist[1,4] = .01 da = dtw(ldist, step_pattern=asymmetric, keep_internals=True) pda = countPaths(da) assert_equal(pda, 51)
def test_rectangular(self): # Hand-checked x = np.array([1, 2, 3]) y = np.array([2, 3, 4, 5, 6]) al = dtw(x, y, keep_internals=True) assert_array_equal( al.costMatrix, np.array([[1., 3., 6., 10., 15.], [1., 2., 4., 7., 11.], [2., 1., 2., 4., 7.]])) assert_approx_equal(al.normalizedDistance, 0.875)
def dtws(gt_traj, sim_traj): dist_matrix = scipy.spatial.distance_matrix(sim_traj, gt_traj) alignment = dtw(dist_matrix, keep_internals=True) ## Display the warping curve, i.e. the alignment curve alignment.plot(type="alignment") a = alignment.index1 b = alignment.index2 plt.plot(np.cumsum(alignment.costMatrix[(a, b)])) plt.show()
def dtw_plot(x, y, output=None, font_size=13, *args, **kw): # data deaths = _covid.deaths_df() idx_dt = deaths.date.unique() # date # smooth plt.rcParams.update({'font.size': font_size}) try: x1, x2, fx = _covid.deaths_smooth(x, deaths) y1, y2, fy = _covid.deaths_smooth(y, deaths) except: return # map names onto code d = dtw(fx, fy, keep_internals=True, step_pattern=rabinerJuangStepPattern(6, "c")) # maxlen xts, yts = d.query, d.reference maxlen = max(len(xts), len(yts)) xts = numpy.pad(xts, (0, maxlen - len(xts)), "constant", constant_values=numpy.nan) yts = numpy.pad(yts, (0, maxlen - len(yts)), "constant", constant_values=numpy.nan) # init plot fig, ax = plt.subplots() idx = numpy.linspace(0, len(d.index1) - 1) idx = numpy.array(idx).astype(int) # plot connections for i in idx: Lx = [idx_dt[d.index1[i]], idx_dt[d.index2[i]]] Ly = [xts[d.index1[i]], yts[d.index2[i]]] ax.plot(Lx, Ly, c='gray', linestyle='--', linewidth=1.5) # plot two lines ax.plot(x1, xts, label=x, color='k') ax.plot(x1, yts, label=y) # rest of plot #ax.set_title('%s - %s' % (x,y)) ax.set_ylabel('deaths') ax.set_xlabel('date') ax.legend() if output is None: plt.show() else: with open('%s/dtw_%s_%s.png' % (output, x, y), 'wb') as fp: fig.savefig(fp) plt.close(fig=fig)
def datas_split(ts): # x,y表示前后的两个时间段的数据对比 # 找到时间周期T ts_T = statsmodels.seasonal_decompose('') # 数据切割成,N/T 分 T = 64 dist, cost, acc, path = dtw(T, T + 1, dist=lambda x, y: np.linalg.norm(x - y, ord=1))
def test_asymmetric(self): lm = np.array( [[1, 1, 2, 2, 3, 3], [1, 1, 1, 2, 2, 2], [3, 1, 2, 2, 3, 3], [3, 1, 2, 1, 1, 2], [3, 2, 1, 2, 1, 2], [3, 3, 3, 2, 1, 2]], dtype=np.double) alignment = dtw(lm, step_pattern=asymmetric, keep_internals=True) assert_array_equal( alignment.costMatrix, np.array([[1., nan, nan, nan, nan, nan], [2., 2., 2., nan, nan, nan], [5., 3., 4., 4., 5., nan], [8., 4., 5., 4., 5., 6.], [11., 6., 5., 6., 5., 6.], [14., 9., 8., 7., 6., 7.]]))
def test_ldist_symmetric2(self): ds = dtw(ldist, keep_internals=True) assert_equal(ds.distance, 2) assert_array_equal(ds.index1, i("1 2 2 2 3 4 5 6 6")) assert_array_equal(ds.index2, i("1 2 3 4 5 5 5 5 6")) assert_array_equal(ds.costMatrix, np.array( [[1, 2, 3, 4, 4.00, 5.00], [1, 1, 1, 1, 1.01, 1.01], [2, 2, 2, 2, 1.00, 2.00], [3, 3, 3, 3, 1.00, 2.00], [4, 4, 4, 4, 1.00, 2.00], [5, 5, 5, 5, 1.00, 2.00]], dtype=float))
def similarity_funcs(input): # 50 x 150 x 1 # 50 x 150 remove_last_dim = tf.reshape(input, [input.shape[0], input.shape[1]]) # 1225 x 150 total_distance = 0 for i in range(remove_last_dim.shape[0]): for j in range(i + 1, remove_last_dim.shape[0]): # total_distance += tf_dtw_with_matrix(tf.cast(remove_last_dim[i], dtype=tf.float64), tf.cast(remove_last_dim[j], dtype=tf.float64)) # dist = tf.linalg.norm(remove_last_dim[i] - remove_last_dim[j]) total_distance += dtw(tf.cast(remove_last_dim[i], dtype=tf.float64), tf.cast(remove_last_dim[j], dtype=tf.float64), distance_only=True).distance return total_distance / (remove_last_dim.shape[0] * (remove_last_dim.shape[0] - 1) / 2)
def test(t_feat, feats): # d, f = read_trainset() res = {} for name in feats: dists, paths, tpath = dtw(feats[name], t_feat) res[name] = np.linalg.norm(dists) label = min(res, key=res.get) print(label) res_f = feats[label] # test_f = feats[test] dists, paths, tpath = dtw(feats[label], t_feat) if get_dist(dists) >= 600: label = 'None' p = Plotter(get_dist(dists), tpath, dists, paths, t_feat, res_f, label) p.show_hand_nodetail() plt.show() else: p = Plotter(get_dist(dists), tpath, dists, paths, t_feat, res_f, label) p.show_hand() p.show_total_features() plt.show()
def slice_correspondences(reference, target, sigma, is_reversed=False, is_continuity=True): """ Find slice correspondences with Dynamic Time Warping Parameters ---------- reference: np.ndarray reference image target: np.ndarray target sigma: float gaussian standard deviation is_reversed: bool whether the images are reversed in the z-axis is_continuity: bool whether to enforce continuity (slice numbers are monotonically increasing) Returns ---------- np.ndarray correspondence indices """ relative_area_reference = relative_area(reference) relative_area_target = relative_area(target) if is_reversed: relative_area_target = relative_area_target[::-1] relative_area_target = gaussian_filter1d(np.copy(relative_area_target), sigma) alignment = dtw(relative_area_target, relative_area_reference, step_pattern=asymmetric, keep_internals=True, open_begin=True, open_end=True) correspondences = alignment.index2 if is_continuity: correspondences = enforce_continuity_values(correspondences) if is_reversed: correspondences = correspondences[::-1] return correspondences
def threadsensor(): """ connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # connection to grab data value connection.connect((host,port1)) while 1: datasetSensor = connection.recv(1024) dummy = datasetSensor.split(' ',1) dummy[-1] = dummy[-1].strip() value.update(dummy[1]) """ x = [0, 0, 1, 1, 2, 4, 2, 1, 2, 0] y = [1, 1, 1, 2, 2, 2, 2, 3, 2, 0] dist, cost, path = dtw(x, y) print 'Minimum distance found:', dist
def get_hits(x_signal, signal, x_template, template): len_signal = signal.shape[0] len_template = template.shape[0] x = [] dist = [] for i in range(0, len_signal-len_template, int(len(template)/3)): print(i / (len_signal - len_template)) al = dtw(signal[i:i+len_template], template) di = al.distance xi = x_signal[i+len_template] dist.append(di) x.append(xi) return np.array(x), np.array(dist)
def get_dtw(series1, series2, sequamce_length): euclidean_norm = lambda x, y: np.abs(x - y) dtw_series = np.zeros(len(series1)) print dtw_series for i in range(sequamce_length, len(series1)): sub_series1 = series1[i - sequamce_length:i] sub_series2 = series2[i - sequamce_length:i] distance, cost_matrix, acc_cost_matrix, path = dtw(sub_series1, sub_series2, dist=euclidean_norm) dtw_series[i] = distance print(distance) # # You can also visualise the accumulated cost and the shortest path # import matplotlib.pyplot as plt # plt.imshow(acc_cost_matrix.T, origin='lower', cmap='gray', interpolation='nearest') # plt.plot(path[0], path[1], 'w') # plt.show() return dtw_series
def recognize_all(a): start = time.perf_counter() dirname = cg_dirname files = test_files Test_Result = [] for j in range(len(files)): start1 = time.perf_counter() yTest, srTest = librosa.load(dirname + "/" + files[j]) mfccTest = librosa.feature.mfcc(yTest, srTest) mfccTest = preprocess_mfcc(mfccTest) dists = [] for i in range(len(mfcc_arr)): mfcci = mfcc_arr[i] disti = dtw(mfcci.T, mfccTest.T, dist=lambda x, y: np.exp(np.linalg.norm(x - y, ord=1)))[0] dists.append(disti) min_dist = min(dists) min_dist_index = dists.index(min_dist) pre = int(y[min_dist_index]) output = label[pre] tt = time.perf_counter() - start1 output = "Input File : " + current_test + ".\nThe spoken word is : " + output + ".\nTime taken for Recognition : " + str(tt) + "\n" micl.insert(INSERT, output) Test_Result.append(label[pre]) tt = time.perf_counter() - start output = "\nTotal Time taken for Recognizing "+str(len(test_files))+" Testing files : " +str(tt) + "\n" micl.insert(INSERT, output) #Accuracy eng_arr = ["aabe","baith","bera","eti","godh","hamar","hey","jaahun","kaabar" ,"kahat","karat","khaabe","koti","laika","mor","pirat" , "rengat" , "terat" , "toora" , "tuman"] j=0 correct = 0 total_files = len(test_files) for i in test_files: lis = list(i.split('_')) index = eng_arr.index(lis[0]) true_value = label[index] if Test_Result[j]==true_value: correct+=1 j+=1 accuracy = (correct/total_files)*100 output = "\nAccuracy of the complete Recognition : " + str(correct) + " out of " + str(total_files) + ".\nAccuracy percentage : "+str(accuracy)+"\n" micl.insert(INSERT, output)
def select_template(enroll_wavs, model): #extract features first features = [feature_extract(model, wav) for wav in enroll_wavs] #find the best template distance_each = [] for i, j in list(itertools.combinations(list(range(len(features))), 2)): alignment = dtw(features[i], features[j], keep_internals=True, distance_only=True, dist_method='cosine') distance_each.append((i, alignment.normalizedDistance)) distance_each.append((j, alignment.normalizedDistance)) low_scores = list(zip(*sorted(distance_each, key=lambda x: x[1])[:6]))[0] feature_index = np.argmax(np.bincount(low_scores)) select_feature = features[feature_index] #save the largest distance as threshold largest_distance = max(list(zip(*distance_each))[1]) return select_feature, largest_distance
def wav_graph(x, y): y1, sr1 = librosa.load(x) y2, sr2 = librosa.load(y) #Showing multiple plots using subplot plt.subplot(1, 2, 1) mfcc1 = librosa.feature.mfcc(y1, sr1) #Computing MFCC values librosa.display.specshow(mfcc1) plt.subplot(1, 2, 2) mfcc2 = librosa.feature.mfcc(y2, sr2) librosa.display.specshow(mfcc2) dist, cost, path = dtw(mfcc1.T, mfcc2.T) print("The normalized distance between the two : ", dist) # 0 for similar audios plt.imshow(cost.T, origin='lower', cmap=plt.get_cmap('gray'), interpolation='nearest') plt.plot(path[0], path[1], 'w') #creating plot for DTW plt.show()
print 'complete' time = np.zeros(len(timeSeries.elements)) DRIVER_NODES = np.zeros(len(timeSeries.elements)) for i in range(0, len(timeSeries.elements)): time[i] = timeSeries.elements[i].timestamp DRIVER_NODES[i] = timeSeries.elements[i].value profile2 = convolution.extract_profile(name, time, DRIVER_NODES, ent[1]) profile2 = normalize(profile2) # print profile2 # Run the DTW algorithm on both of them # value = dtw(profile, profile2) match = [] for opro in patterns: value = dtw(opro[0], profile2) match.append(value) # # Finally use the KNN algorithm to determine the closest match! # from scipy.spatial import KDTree # lookup = KDTree(match) # print lookup.data # pts = np.array([[-3]]) # print lookup.query(pts, k=4) # for i in lookup.query(pts, k=1)[1]: # print labels[i] minv = 999 mini = 0 for i in xrange(len(match)): v = match[i]
if (len(data.shape) > 1): data = data[:,0] xsize = min(10*rate, 407500) x = data[0:xsize] X = stft(x) peaks = grid_peaks(X, 10) yy[k] = hash_pmaps(peaks) print 'DTW & EDRP by entire sequences' # B.4 (dtw by entire sequence) for n in tracks: for m in tracks: if n == m: continue z1 = conv_to_1D(yy[n], (0,N)) # get 1-D of entire sequence z2 = conv_to_1D(yy[m], (0,N)) dist1, cost, path = dtw(z1, z2) # dist2 = edrp1(z1, z2, len(z1), len(z2)) #causes maximum recursion depth :( dist3 = edrp2(z1, z2) print n+1, m+1, dist1, dist3 print '\nDTW by window approach' # B.5 (dtw by sliding window) n = 0 # wave 1 (audio 1) m = 3 # wave 4 (audio 2) distance = [] z1 = conv_to_1D(yy[n], samples[n]) # mutual sentence L = samples[n][1] - samples[n][0] # start & end point for l in range(N-L): # window approach z2 = conv_to_1D(yy[m], (l, l+L)) dist, cost, path = dtw(z1, z2) distance += [dist] graph_accel(distance)