def spellChecker(queryFreq): sortedQuery = sorted(queryFreq.items(), key=operator.itemgetter(1), reverse=True) spellDict = defaultdict(str) numDict = defaultdict(int) for i in xrange(len(sortedQuery)-1, -1, -1): cur_dict = sortedQuery[i] if len(cur_dict[0]) <= 4: continue for j in xrange(0, i): com_dict = sortedQuery[j] if (len(cur_dict[0]) == len(com_dict[0])) and (com_dict[1] >= 10 * cur_dict[1]): dist, cost, path = dtw(list(cur_dict[0]), list(com_dict[0]), customDist) if cost[len(cur_dict[0]) - 1][len(com_dict[0]) - 1] <= 1 : if (not cur_dict[0] in spellDict) or (com_dict[1] > numDict[cur_dict[0]]): spellDict[cur_dict[0]] = com_dict[0] numDict[cur_dict[0]] = com_dict[1] #print cur_dict[0] + ' ==> ' + com_dict[0] + ' #: ' + str(cur_dict[1]) + ' ==> ' + str(com_dict[1]) if (len(com_dict[0]) > 4) and (abs(len(cur_dict[0]) - len(com_dict[0])) <= 2) and (com_dict[1] >= 10 * cur_dict[1]): dist, cost, path = dtw(list(cur_dict[0]), list(com_dict[0]), customDist) if cost[len(cur_dict[0]) - 1][len(com_dict[0]) - 1] <= 2 : if (not cur_dict[0] in spellDict) or (com_dict[1] > numDict[cur_dict[0]]): spellDict[cur_dict[0]] = com_dict[0] numDict[cur_dict[0]] = com_dict[1] #print cur_dict[0] + ' ==> ' + com_dict[0] + ' #: ' + str(cur_dict[1]) + ' ==> ' + str(com_dict[1]) for word in spellDict: correctWord = spellDict[word] while correctWord in spellDict: correctWord = spellDict[correctWord] spellDict[word] = correctWord return spellDict
def twoDDW(x,y,dist_func): """ Computes 2-dimensional Dynamic Warping of two images. :param 2D-array x: N1*M1 array :param 2D-array y: N2*M2 array :param func dist: distance used as cost measure Returns the minimum distance, the accumulated cost matrix, and the wrap path. """ N1,M1=x.shape N2,M2=y.shape max_value=(N1,M1,N2,M2) transpose_x = x.transpose() transpose_y = y.transpose() cummulative = zeros(max_value) for i in Calculation_Order(N1,M1,N2,M2): ''' cost1 =DTW(R1, R2)+DTW(C1, C2), where R1 is the i1-th row in image I1 from column 1 through column i1, C1 is the j1- th column in image I2 from row 1 through row j1. ''' R1=x[i[0],0:i[0]] if len(R1)==0: R1=array([0]) R2=y[i[2],0:i[2]] if len(R2) == 0: R2=array([0]) C1=transpose_x[i[3],0:i[3]] if len(C1) == 0: C1=array([0]) C2=transpose_y[i[3],0:i[3]] if len(C2) == 0: C2=array([0]) dist,cost2,acc,path=dtw(R1.reshape(-1,1),R2.reshape(-1,1),dist=dist_func) DTW_R1_R2=dist dist,cost2,acc,path=dtw(C1.reshape(-1,1),C2.reshape(-1,1),dist=dist_func) DTW_C1_C2=dist final_cost = inf if isZeros(i): final_cost = 0 else: for num in range(15): new_stage = prev_stage(i,num) cost2 = 0 prev_prev = prev_stage(new_stage,num) if verify_boundaries(prev_prev,max_value): cost2+= cummulative[prev_prev] cost2 += cost(DTW_R1_R2,DTW_C1_C2,num) if verify_boundaries(new_stage,max_value): if cost2 < final_cost: final_cost = cost2 cummulative[i[0],i[1],i[2],i[3]]=final_cost print "***************" print " twoDDW done " print "***************" print "***************" print " Traceback started " print "***************" return cummulative[-1,-1,-1,-1] / sum(cummulative.shape), cummulative, local_traceback(cummulative,N1,M1,N2,M2)
def test_dtw(self): points1 = numpy.array([[0,0],[2,4],[4,4],[6,9]]) points2 = numpy.array([[0,0],[0,2],[2,4],[6,6]]) corr_fast = dtw.dtw(points1, points2) corr_slow = dtw.dtw(points1, points2, debug=True) assert_array_equal(corr_fast.matrix(), corr_slow.matrix()) expected = numpy.array([[0,0],[0,1],[1,2],[2,2],[3,3]]) assert_array_equal(corr_fast.pairs(), expected)
def getDistances(f1, f2): mfcc1 = f1[0] mspec1 = f1[1] mfcc2 = f2[0] mspec2 = f2[1] dist, cost, acc, path = dtw(mfcc1.T, mfcc2.T, dist=lambda x, y: norm(x - y, ord=2)) # print path dist2, cost2, acc2, path2 = dtw(mspec1.T, mspec2.T, dist=lambda x, y: norm(x - y,ord=2)) return dist + dist2, dist * dist2
def mediaProcess(): media_event_pattern = [] time.sleep(1) # delay the thread for the first time. connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # connection to grab data value connection.connect((host,6001)) chunk = 1 sample = 1 while chunk <= 100: TP = 0 waktu = 0 while sample <= 100: start_time = time.time() # start time to count the execution time counter = 0 while counter < chunk: # accuracy adjusment datasetSensor = connection.recv(1024) #print 'ERROR:', datasetSensor # sometimes the server send nasty dataset that caused error. dummy = datasetSensor.strip().split('\r\n')[0] parts = dummy.split() if len(parts) > 1: event_pattern_value = float(parts[1]) MediaValue.update(event_pattern_value) if counter == 0: media_event_pattern.append(MediaValue.content()) counter += 1 elif media_event_pattern[counter-1] != MediaValue.content(): media_event_pattern.append(MediaValue.content()) counter += 1 light_off_dist, light_off_cost, light_off_path = dtw(living_light_off, media_event_pattern) light_on_dist, light_on_cost, light_on_path = dtw(living_light_on, media_event_pattern) if light_off_dist < light_on_dist: currentLiving.update(0) #print time.strftime("Living room light is off - %s seconds", time.localtime(int(time.time()-start_time))) TP += 1 sample += 1 waktu = waktu + int(time.strftime("%s", time.localtime(int(time.time()-start_time)))) del media_event_pattern[:] elif light_on_dist < light_off_dist: currentLiving.update(1) #print time.strftime("Living room light is on - %s seconds", time.localtime(int(time.time()-start_time))) #TP += 1 sample += 1 waktu = waktu + int(time.strftime("%s", time.localtime(int(time.time()-start_time)))) del media_event_pattern[:] else : print "Undefine" del media_event_pattern[:] print "LivingOff,",chunk,",",TP,",",waktu/100 sample = 1 chunk += 1
def getAllDistances(f1, GTF): distances = [] mfcc1 = f1[0] mspec1 = f1[1] for instance in GTF: mfcc2 = instance[1][0] mspec2 = instance[1][1] dist, cost, acc, path = dtw(mfcc1.T, mfcc2.T, dist=lambda x, y: norm(x - y, ord=2)) dist2, cost2, acc2, path2 = dtw(mspec1.T, mspec2.T, dist=lambda x, y: norm(x - y,ord=2)) # print "#####LEN", len(mfcc1.T), len(mfcc1.T[0]) # print "#####LEN", len(mfcc2.T), len(mfcc2.T[0]) # print "#####DLEN", len(path[0]) distances.append((instance[0], dist + dist2)) return distances
def results(self): #Loading audio files #Extract MFCC features and use dtw to compare the distance between two MFCCs y1, sr1 = librosa.load('output1.wav') y2, sr2 = librosa.load('output2.wav') mfcc1 = librosa.feature.mfcc(y1,sr1) #Computing MFCC values mfcc2 = librosa.feature.mfcc(y2,sr2) dist, cost, path = dtw(mfcc1.T, mfcc2.T) #Set a threshold for our game's ranking system if dist <= 40: self.textEdit_2.setText("You did a great job! ^^") elif dist <= 50: self.textEdit_2.setText("You did good.") elif dist <= 60: self.textEdit_2.setText("You're fine.") else: self.textEdit_2.setText("You are poor at this game... TT") self.rank.append(dist) self.textEdit_3.setText(str(self.rank[self.count])) self.outputRank += "Player " + str(self.count) + " got " + str(self.rank[self.count]) + "\n\n" self.count = self.count + 1
def align_signal(s_, t, w=5, has_time=True, get_distance=False): """ every column of s or t is a time series every row is dimensions of signal at one time point w size is symmetric. w=5 means the window has size 11. """ # t = t.transpose(1, 0) # s = s.transpose(1, 0) if has_time: s_ = s_[:, 1:] t = t[:, 1:] dist_fun = euclidean_distances dist_, cost_, acc_, path_ = dtw(s_, t, dist_fun) path_ = np.array(path_) warped_t = t[path_[1, :], :] new_t = np.zeros(s_.shape) for i in range(warped_t.shape[0]): new_t[path_[0, i], :] = warped_t[i, :] if has_time: Ts = np.arange(1, s_.shape[0] + 1) Ts = Ts.reshape(-1, 1) new_t = np.hstack((Ts, new_t)) if get_distance: return new_t, dist_ return new_t
def dist_test(region, type1, type2): data1 = get_data(region, type1) data2 = get_data(region, type2) dist, cost, path = dtw(data1, data2) print(dist) plot_dtw(data1, data2, cost, path) return dist
def get_distance(arr1, arr2, norm=distance, acc_option=True): ''' parameters: ----------- arr1, arr2: np.arrays from prepared_smooth_array function np array [time, emotions], with smoothed counts time1 [emotion1, emotion2, emotion3,...] time2 [emotion1, emotion2, emotion3,...] norm: the measure of distance used, defaults to a distance than puts more weight on bigger peaks of different heights. acc_option: defaults to True, to use acc_dtw (accelerated version of Dynamic Time Wrapping). Setting option to False leads to standard dtw returns: -------- minimum distance note: possible to expand to get other things than min_dist ''' if acc_option: min_dist, cost_matrix, acc_cost_matrix, wrap_path =\ acc_dtw.dtw(arr1, arr2) else: min_dist, cost_matrix, acc_cost_matrix, wrap_path =\ dtw.dtw(arr1, arr2, dist=norm) return min_dist
def dtw_avg_distance(sig, test_sigs): score = 0 for test_sig in test_sigs: dist, cost, acc, path = dtw(sig, test_sig, distance) score += dist return score / len(test_sigs)
def mediaProcess(): media_event_pattern = [] time.sleep(1) # delay the thread for the first time. connection = socket.socket(socket.AF_INET, socket.SOCK_STREAM) # connection to grab data value connection.connect((host,7002)) while 1: start_time = time.time() # start time to count the execution time counter = 0 while counter < 10: # accuracy adjusment datasetSensor = connection.recv(1024) #print 'ERROR:', datasetSensor # sometimes the server send nasty dataset that caused error. dummy = datasetSensor.strip().split('\r\n')[0] parts = dummy.split() if len(parts) > 1: event_pattern_value = float(parts[1]) MediaValue.update(event_pattern_value) if counter == 0: media_event_pattern.append(MediaValue.content()) counter += 1 elif media_event_pattern[counter-1] != MediaValue.content(): media_event_pattern.append(MediaValue.content()) counter += 1 light_off_dist, light_off_cost, light_off_path = dtw(living_light_off, media_event_pattern) light_on_dist, light_on_cost, light_on_path = dtw(living_light_on, media_event_pattern) if light_off_dist < light_on_dist: curentvalue = 0 if curentvalue != currentLiving.content(): print time.strftime("Living room light is off - %s seconds", time.localtime(int(time.time()-start_time))) lamp1.on = False currentLiving.update(curentvalue) del media_event_pattern[:] elif light_on_dist < light_off_dist: curentvalue = 1 lamp1.on = True lamp1.xy = normal lamp1.brightness = 50 if curentvalue != currentLiving.content(): print time.strftime("Living room light is on - %s seconds", time.localtime(int(time.time()-start_time))) currentLiving.update(curentvalue) del media_event_pattern[:] else : print "Undefine" del media_event_pattern[:]
def WorkerDTW(testing_array, training_array, bandwidth, label_names, results_queue): gotRight = 0 for testing in testing_array: training_id, _ = dtw.dtw(testing, training_array, bandwidth) if testing.label == training_array[training_id].label: gotRight += 1 report_single(label_names[testing_array[0].label], gotRight, len(testing_array)) results_queue.append(gotRight)
def analyze(self): d, c, p = dtw(self.x, self.y) #self.dist, self.cost, self.path = dtw(self.x, self.y, dist=self.norm) self.dist = d self.cost = c self.path = p #get_distance(self) return d
def pitch_vector_distance(pa,pb): la = ~np.isnan(pa) lb = ~np.isnan(pb) x = pa[la] y = pb[lb] dist, cost, path = dtw(x,y) return dist
def distance_dtw(X,Y): from dtw import dtw X=(X) Y=(Y) #print('X,Y=',X,Y) #print(great_circle(X, Y).km) #sys.exit(0) distace1,_,_ = dtw(X,Y) return distace1
def cluster_distance(cluster1,cluster2): veclist1 = cluster1.vec veclist2 = cluster2.vec dislist = [] for vec1 in veclist1: for vec2 in veclist2: distance,cost,path = dtw(vec1,vec2) dislist.append(distance) return np.mean(dislist)
def get_distance(alphabet, audio_file): y1, sr1 = librosa.load(audio_file) mfcc1 = librosa.feature.mfcc(y1,sr1) distance_matrix = {} if alphabet not in mfcc_values: return {alphabet: '1000000000'} for (mfcc2, reference_file) in mfcc_values[str(alphabet)]: dist, cost, accumulated_cost, path = dtw(mfcc1.T, mfcc2.T, dist=my_custom_norm) distance_matrix[reference_file] = '%.4f' %dist return distance_matrix
def make_cost_matrix(audio_file, intervals, labels, dist, level): """Computes the cost matrix of the DTW from the given audio file. Parameters ---------- audio_file : str Path to the audio file. intervals : np.array Intervals containing the estimated boundaries. labels : np.array Estimated segment labels. dist : fun Distance function to be used for the DTW level : str Level in the hierarchy. Returns ------- D : np.array DTW scores. P : list List containing np.arrays() representing the DTW paths. """ # Computes the features (return existing ones if already computed) cqgram, intframes = compute_features(audio_file, intervals, level) # Score matrix D = np.nan * np.zeros((len(labels), len(labels)), dtype=np.float32) np.fill_diagonal(D, 0) # Path matrix P = [] for i in range(len(labels)): P.append([np.nan] * len(labels)) for i in range(len(labels)): P[i][i] = 0 for i in range(len(labels)): x_slice = cqgram[:, intframes[i, 0]:intframes[i, 1]].T if intframes[i, 1] - intframes[i, 0] < 2: continue for j in range(i+1, len(labels)): if intframes[j, 1] - intframes[j, 0] < 2: continue y_slice = cqgram[:, intframes[j, 0]:intframes[j, 1]].T dtw_cost, distance, path = dtw.dtw(x_slice, y_slice, dist=dist) D[i, j] = dtw_cost D[j, i] = D[i, j] path = list(path) path[0] = np.asarray(path[0], dtype=np.int32) path[1] = np.asarray(path[1], dtype=np.int32) P[i][j] = path return D, P
def test_fast_vs_normal_1D(self): x = np.random.rand(np.random.randint(2, 100)) y = np.random.rand(np.random.randint(2, 100)) d1, c1, acc1, p1 = dtw(x, y, dist=lambda x, y: np.abs((x - y))) d2, c2, acc2, p2 = accelerated_dtw(x, y, 'euclidean') self.assertAlmostEqual(d1, d2) self.assertAlmostEqual((c1 - c2).sum(), 0) self.assertAlmostEqual((acc1 - acc2).sum(), 0) self.assertTrue((p1[0] == p2[0]).all()) self.assertTrue((p1[1] == p2[1]).all())
def iterate_hidden_signature_estimate(hidden_signature, signature): n_samples, _ = hidden_signature.shape _, _, _, path = dtw(hidden_signature, signature, distance) sig_prime = np.zeros(shape=hidden_signature.shape) num_x = np.zeros(shape=(n_samples, 1)) for (x, y) in zip(*path): sig_prime[x] += signature[y] num_x[x] += 1.0 sig_prime /= num_x return sig_prime
def compare(arg, dirname, fnames): for fname in fnames: if test in fname: # Get MFCC for each test sample test_features = mfcc(signal = waveio.wave_from_file(os.path.join(dirname, fname))[0], fs = waveio.wave_from_file(os.path.join(dirname, fname))[1]) # Record overhead and score for each test sample versus each template.(Each sample is of a dict corresponding to each template) overhead[fname]= {} score[fname] = {} for template in template_feature.keys(): time_start = time() score[fname][template] = dtw(template_feature[template], test_features) overhead[fname][template] = time() - time_start
def test_specific_case(self): x = np.array([1.0, 0.9, 1.2, 2.3, 3.8, 3.3, 4.2, 1.9, 0.5, 0.3, 0.3]) y = np.array([0.5, 1.0, 0.9, 1.2, 2.3, 3.8, 3.3, 4.2, 1.9, 0.5, 0.3]) euclidean = lambda x, y: np.abs((x - y)) d1, _, _, _ = accelerated_dtw(x, y, 'euclidean') d2, _, _, _ = accelerated_dtw(x, y, dist=euclidean) d3, _, _, _ = dtw(x, y, dist=euclidean) self.assertAlmostEqual(d1, 0.022727272727272728) self.assertAlmostEqual(d2, 0.022727272727272728) self.assertAlmostEqual(d3, 0.022727272727272728)
def align(query_feats, candidate_feats, use_dtw): """Align videos based on nearest neighbor or dynamic time warping. """ if use_dtw: dist_fn = lambda x, y: np.sum((x - y)**2) # noqa: E731 _, _, _, path = dtw(query_feats, candidate_feats, dist=dist_fn) _, uix = np.unique(path[0], return_index=True) nns = path[1][uix] else: nns = [] for i in range(len(query_feats)): nn_frame_id, nn_dist = get_nn(candidate_feats, query_feats[i]) nns.append(nn_frame_id) return np.asarray(nns)
def similarity_function (i : int, j: int, G1: nx.Graph, G2: nx.Graph, neigh_size) -> float: ''' Calculate similarity between node i from graph G1 and node j from graph G2. ''' rings_i = [sorted(ring) for ring in get_rings(i,G1,neigh_size)] rings_j = [sorted(ring) for ring in get_rings(j,G2,neigh_size)] rings_i = rings_i[:min(len(rings_i), len(rings_j))] rings_j = rings_j[:min(len(rings_i), len(rings_j))] distance = [dtw(x, y, lambda x, y: np.abs(x-y))[0]/k for (k, x, y) in zip(range(1, len(rings_i)+1), rings_i, rings_j)] return math.exp(-sum(distance))
def get_aver_dist_1_cluster(batch): aver_dist = 0.0 num_user = range(len(batch)) index = [] for _ in range(200): choice = random.choice(num_user) index.append(choice) num_user.remove(choice) for i in index[0:100]: for j in index[100:200]: dist = dtw(batch[i], batch[j], dist=custom_dist) aver_dist = aver_dist + dist[0] return float(aver_dist) / (len(index) * len(index) / 4)
def dtw_aligment(self): """Aligment of data using DTW algorithm :return offsets: list of offset betweem the firt sample an the others """ x = self.lista[0][0:,self.axis].reshape(-1, 1) for k in range (1,self.n): y = self.lista[k][0:,self.axis].reshape(-1, 1) dist, cost, acc, path = dtw(x, y, dist=lambda x, y: norm(x - y, ord=1)) map_x = path[0] map_y = path[1] counts = bincount(path[0]) self.offsets[k] = int(mean(map_x - map_y)) return self.offsets
def corrolate(list1, list2, corr='dtw'): # get the front overlapping slice min_len = min(len(list1), len(list2)) list1, list2 = list1[:min_len], list2[:min_len] if corr == 'spearman': res, _ = spearmanr(list1, list2) elif corr == 'pearson': res, _ = pearsonr(list1, list2) elif corr == 'kendall': res, _ = kendalltau(list1, list2) else: res = dtw.dtw(list1, list2) return res
def dtw_dist(s, t): """ Dynamic time warping distance between two sequences. :param s: :param t: :return: """ s = list(map(int, s)) t = list(map(int, t)) d, M, C, path = dtw.dtw(s, t, dist) return d
def dtw_calc_best_x(comb): best = 99999999999 for i in list(comb): x = i euclidean_norm = lambda x, y: np.abs(x - y) d, cost_matrix, acc_cost_matrix, path = dtw(x, y, dist=euclidean_norm) if d < best: best = d best_acc_cost_matrix = acc_cost_matrix best_path = path _best_x = x return _best_x
def recognition(name): # name = '.mfc/close' + str(i) + '.mfc' mfcc = mfc_handle.read_mfc(name) min = sys.maxsize flag = -1 for k in range(len(modellist)): dtws = dtw.dtw(modellist[k], mfcc) if dtws < min: min = dtws flag = k + 1 print(name + '识别结果为:' + wordlist[flag - 1] + '\n') return flag
def dtw_sqi(x, template_type=0): """Using DTW to get the mapping point distance between a signal and its template. The DTW SQI is the ratio of the distance sum to the trace of cost matrix. The closer to 1 the better SQI. Parameters ---------- x : array_like, signal containing int or float values. template_type : int, 0: ppg_absolute_dual_skewness_template, 1: ppg_dual_double_frequency_template, 2: ppg_nonlinear_dynamic_system_template, 3: ecg_dynamic_template default = 0 Returns ------- """ check_valid_signal(x) if template_type > 3 or type(template_type) != int: raise ValueError("Invalid template type") if template_type == 0: reference = ppg_nonlinear_dynamic_system_template(len(x)).reshape(-1) elif template_type == 1: reference = ppg_dual_double_frequency_template(len(x)) if template_type == 2: reference = ppg_absolute_dual_skewness_template(len(x)) if template_type == 3: reference = ecg_dynamic_template(len(x)) alignmentOBE = dtw(x, reference, keep_internals=True, step_pattern='asymmetric', open_end=True, open_begin=True) match_distance = [] for i in range(len(alignmentOBE.index2)): match_distance.append( alignmentOBE.costMatrix[i][alignmentOBE.index2[i]]) trace = alignmentOBE.costMatrix.trace() if trace == 0: ratio = float(1) else: ratio = float(np.sum(match_distance) / trace) return ratio
def dtwc(x, y, derivative=False, startbc=True, steppattern='symmetric0', wincond = "nowindow", r=0.0, onlydist=True): """Dynamic Time Warping. Input * *x* - [1D numpy array float / list] first time series * *y* - [1D numpy array float / list] second time series * *derivative* - [bool] Derivative DTW (DDTW). * *startbc* - [bool] (0, 0) boundary condition * *steppattern* - [string] step pattern ('symmetric', 'asymmetric', 'quasisymmetric') * *wincond* - [string] window condition ('nowindow', 'sakoechiba') * *r* - [float] sakoe-chiba window length * *onlydist* - [bool] linear space-complexity implementation. Only the current and previous columns are kept in memory. Output * *d* - [float] normalized distance * *px* - [1D numpy array int] optimal warping path (for x time series) (for onlydist=False) * *py* - [1D numpy array int] optimal warping path (for y time series) (for onlydist=False) * *cost* - [2D numpy array float] cost matrix (for onlydist=False) """ if steppattern == 'symmetric0': sp = 0 elif steppattern == 'asymmetric0': sp = 1 elif steppattern == 'quasisymmetric0': sp = 2 else: raise ValueError('step pattern %s is not available' % steppattern) if wincond == 'nowindow': wc = 0 elif wincond == 'sakoechiba': wc = 1 else: raise ValueError('window condition %s is not available' % wincond) if derivative: xi = dtw.der(x) yi = dtw.der(y) else: xi = x yi = y return dtw.dtw(xi, yi, startbc=startbc, steppattern=sp, onlydist=onlydist, wincond=wc, r=r)
def classify(test, train, k=5, scale=False, verification=True, coeff=3): '''Classifies each observation in `test` based on `train` using the described models. Parameter `scale` controls whether wsola scaling should be used. WARNING: Parameter `k` is not used and `verification` should only be True.''' for s1 in test: test_data = s1.data test_mfcc = psf.mfcc(test_data, samplerate=rate, winstep=0.016, winfunc=np.hamming, nfft=1200) results = [] for s2 in train: if s1 == s2: # Skip identical observations, in case it is found in both test and train set print('Skipping', s2) continue if verification and s1.claim != s2.name: # In verification mode, only check the user that the speaker is claiming to be continue if scale: # print('scaling') # print('s1', s1.size, 's2', s2.size) scale_speed = s1.size/s2.size # print('Scaling by', scale_speed) test_data = wsola_sample(s1.data, speed=scale_speed) # print('After:', test_data.shape, s2.size) # print('Extracting features') test_mfcc = psf.mfcc(test_data, samplerate=s1.rate, winstep=0.016, winfunc=np.hamming, nfft=1200) d, *pth = dtw(test_mfcc, s2.data, dist=distance.euclidean) results.append((d, s2)) results = sorted(results)[:k] if verification: prediction = results[0] print(s1) print(prediction[1]) print(prediction[0], '\t', end='') a.append(prediction[0]) outcome = not prediction[0] > (m+coeff*s) yield parse(s1, prediction[1], outcome) else: # Not used anymore score_by_speaker = defaultdict(lambda: (0, 0)) # name: (times_found_in_neighs, sum_of_distance) for res in results[:k]: score_by_speaker[res[1].name] = (score_by_speaker[res[1].name][0]+1, score_by_speaker[res[1].name][1]+res[0]) # get max times found in neighs max_neighs = score_by_speaker[sorted(dict(score_by_speaker), key=score_by_speaker.get, reverse=True)[0]][0] # get neigh found max times with lowest distance neighs_with_max = {speaker: v[1] for speaker, v in dict(score_by_speaker).items() if v[0] == max_neighs} return sorted(neighs_with_max, key=neighs_with_max.get)[0] == s1.name
def _getDocDTWDist ( x, y, lenX, lenY, lenLimit = 200 ) : if lenX == 0 or lenY == 0 : if lenX == 0 and lenY == 0 : return 0.0 else : return 1.0 x = numpy.array( [ ord( i ) for i in x[ 0 : lenLimit ] ] ) x = x.reshape( -1, 1 ) y = numpy.array( [ ord( i ) for i in y[ 0 : lenLimit ] ] ) y = y.reshape( -1, 1 ) distFunc = lambda x, y : 0.0 if x == y else 1.0 dist, cost, acc, path = dtw.dtw( x, y, dist = distFunc ) return dist
def test_res_fig(directory): obj = sem2dpack(directory, component='x') obj.decimate_sig(q=4, filter_s=True) traces = obj.decimated_veloc[:, 115:135] n = traces.shape[1] out_array = np.zeros((n - 1, 2 * n - 3)) for j in range(n - 1): for i in range(n - 1 - j): out_array[j, j + (i * 2)] = dtw(traces[:, i], traces[:, i + j + 1], dist=euclidean)[0] db.set_trace()
def findCentroid(mfccVec): distList = [None] * (NUM_SAMPLES) for i in range(0, NUM_SAMPLES): m1 = np.array(mfccVec[i]) distTemp = 0 for j in range(0, NUM_SAMPLES): if (i != j): m2 = np.array(mfccVec[j]) distTemp = distTemp + dtw( m1.T, m2.T, dist=lambda x, y: np.exp(np.linalg.norm(x - y, ord=1)))[0] distList[i] = distTemp return distList
def DTWReference(df, reference_df, max_warp_distance=None): window_type = None window_args = {} step_pattern = dtw.asymmetric if max_warp_distance is not None: window_type = "sakoechiba" window_args = {'window_size': max_warp_distance} dtw_df = pd.DataFrame(data=np.zeros((df.shape[0], df.shape[1])), columns=df.columns, index=df.index) for i in range(df.shape[1]): #warp = dtw.dtw(df.iloc[:,i].values, reference_df.values, keep_internals=True, step_pattern=step_pattern, window_type=window_type, window_args=window_args) warp = dtw.dtw(reference_df.values, df.iloc[:,i].values, keep_internals=True, step_pattern=step_pattern, window_type=window_type, window_args=window_args) # Reassemble the warped signals and resample them to align with the original df index dtw_df.iloc[:, i] = df.iloc[:,i].iloc[warp.index2].values return dtw_df
def algorithm(): _, DATA = wavfile.read('1.wav') ddata = DATA[::DISCR_CONSTANT] y = np.loadtxt('1.txt')[::DISCR_CONSTANT].reshape(-1, 1) l2_norm = lambda x, y: (x - y)**2 distances = [] for piece in (mit.windowed(ddata, n=floor(2 * LEN_OF_CLAP / DISCR_CONSTANT), step=floor(LEN_OF_CLAP / DISCR_CONSTANT), fillvalue=0)): x = np.array(piece).reshape(-1, 1) dist, _, _, _ = dtw(x, y, dist=l2_norm) distances.append(dist) return distances
def stop(): global temp_holder global training_samples global Y if (Y == []): temp_holder_copy = list(temp_holder) training_samples.append(temp_holder_copy) temp_holder.clear() else: dtws = [] for x in training_samples: distance = dtw(x, Y, euclidean) dtws.append(distance) min_dist = min(dtws) print("MIN DISTANCE: ", mind_dist)
def multi_dtw(a, b, normalize = True): global sen1 global sen2 sen1 = a sen2 = b if(normalize): a = prep.scale(a) b = prep.scale(b) indexa = np.arange(a.shape[1]) indexb = np.arange(b.shape[1]) return dtw(indexa, indexb, dist = my_custom_norm)
def calculate_dtw(time_list, df_data=None): total_len = len(time_list) dtw_matrix = np.zeros((total_len, total_len)) for idx, item in enumerate(time_list): left_num = total_len - (idx + 1) current_segment = df_data.loc[(df_data['time'] >= item[0]) & (df_data['time'] < item[1]),:] for j in range(left_num): nxt_id = j+1 compare_time = time_list[nxt_id] print(compare_time[0]) compare_segment = df_data.loc[(df_data['time'] >= compare_time[0]) & (df_data['time'] < compare_time[1]),:] dtw_return = dtw(current_segment.loc[:,'wrist_bvp'].values, compare_segment.loc[:,'wrist_bvp'].values, dist=norm) dtw_matrix[idx][j] = dtw_return[0] return dtw_matrix
def _getDocDTWDist(x, y, lenX, lenY, lenLimit=200): if lenX == 0 or lenY == 0: if lenX == 0 and lenY == 0: return 0.0 else: return 1.0 x = numpy.array([ord(i) for i in x[0:lenLimit]]) x = x.reshape(-1, 1) y = numpy.array([ord(i) for i in y[0:lenLimit]]) y = y.reshape(-1, 1) distFunc = lambda x, y: 0.0 if x == y else 1.0 dist, cost, acc, path = dtw.dtw(x, y, dist=distFunc) return dist
def manifold_warping_linear(X, Y, num_dims, Wx, Wy, mu=0.9, metric=SquaredL2, threshold=0.01, max_iters=100, eps=1e-8): projecting_aligner = lambda A, B, corr: ManifoldLinear( A, B, corr, num_dims, Wx, Wy, mu=mu, eps=eps) correlating_aligner = lambda A, B: dtw(A, B, metric=metric) return alternating_alignments(X, Y, projecting_aligner, correlating_aligner, threshold, max_iters)
def DTW(data1, data2, keypoint_name=" "): ts1 = list(data1) ts2 = list(data2) if(len(ts1)==0): return 0 x = np.array(ts1).reshape(-1, 1) y = np.array(ts2).reshape(-1, 1) euclidean_norm = lambda x, y: np.abs(x - y) d, cost_matrix, acc_cost_matrix, path = dtw(x, y, dist=euclidean_norm) #pg.Paint_CostMatrix(acc_cost_matrix.T, path, keypoint_name) ts1_dtw = [] for i in range(1,len(path[0])): if(i==1): ts1_dtw.append(ts1[path[0][0]]) if(path[1][i-1]==path[1][i]): continue else: ts1_dtw.append(ts1[path[0][i]]) #print(keypoint_name) # print('max-min : '+str(max(ts1_dtw)-min(ts1_dtw))) # print('max_min2 : '+str(max(ts2)-min(ts2))) # print('stddev : '+str(np.std(ts1_dtw))) # print('stddev2 : '+str(np.std(ts2))) # print('var : '+str(np.cov(ts1_dtw, ts2)[0][1])) #print('coef : '+str(np.cov(ts1_dtw,ts2)[0][1]/(np.std(ts1_dtw)*np.std(ts2)))) #print(ts1_dtw) ts1_std = np.std(ts1_dtw) ts2_std = np.std(ts2) cov = np.cov(ts1_dtw, ts2)[0][1] if(ts1_std==0 or ts2_std==0): return 0 if((ts1_std<=4 and max(ts1_dtw)-min(ts1_dtw)<20) and ts2_std<=4 and max(ts2)-min(ts2)<20): cov = 1 ts1_std=1 ts2_std=1 corrcoef = round(cov/(ts1_std*ts2_std),1) #pg.Paint_ComparisonGraph(ts1, ts2, ts1_dtw, keypoint_name) #print('corrcoef : ', end='') return corrcoef
def get_predict_array2(whole_array, lag, difference=0): result = [] error_sum = [] for i in range(0, len(whole_array) - 1): if i < 10: result.append(0) error_sum.append(0) else: predict_array = np.array(ar_predict_next_array(whole_array[i-10:i], lag,predict_length,difference)).reshape(-1,1) next_array = np.array(whole_array[i:i+predict_length]).reshape(-1,1) dist, cost, acc, path = dtw(predict_array, next_array, dist=lambda predict_array, next_array: np.linalg.norm( predict_array - next_array, ord=1)) result.append(dist) error_sum.append(dist + error_sum[i - 1]) return result, error_sum
def get_seeds(src_net, tgt_net, num_seeds): #we're going to need to skip some, friend src_degs = sorted(nx.degree(src_net).items(), key=operator.itemgetter(1), reverse=True) tgt_degs = sorted(nx.degree(tgt_net).items(), key=operator.itemgetter(1), reverse=True) src_dists = list( itertools.islice(map(operator.itemgetter(1), src_degs), num_seeds)) tgt_dists = list( itertools.islice(map(operator.itemgetter(1), tgt_degs), num_seeds)) dist, cost, path = dtw.dtw(src_dists, tgt_dists, dist=l2_norm) seeds = path_to_seeds(path) print "actual number of seeds: ", len(seeds) return seeds
def selectColor (mfccVec, userInput): distList = [None] * (NUM_CENTROIDS) for i in range(0, NUM_CENTROIDS): m1 = np.array(mfccVec[i]) distList[i] = dtw(m1.T, userInput.T,dist = lambda x, y: np.exp(np.linalg.norm(x - y, ord=1)))[0] idx = distList.index(min(distList)) color = "" if (idx == 0): color = "BRANCO" if (idx == 1): color = "AZUL" if (idx == 2): color = "VERMELHO" return color
def simple_dtw(r, t): split1, split2 = handleList.simple_select(r), handleList.simple_select( t) f = lambda lst: [i[1] for i in lst] g = lambda idx, l: [l[i] for i in idx] # pyplot.subplot(2, 1, 1) # handleList.draw(r, f(split1)) # pyplot.subplot(2, 1, 2) # handleList.draw(t, f(split2)) x = g(f(split1), r) y = g(f(split2), t) # print(x, y) d = dtw( np.array(x).reshape(-1, 1), np.array(y).reshape(-1, 1), lambda x, y: np.abs(x - y)) return d[0] / len(d[-1])
def split_dtw(r, t): split1, split2 = QiQi.fenge(r, t) # f = lambda lst: [i[1] for i in lst] g = lambda idx, l: [l[i] for i in idx] # pyplot.subplot(2, 1, 1) # handleList.draw(r, split1) # pyplot.subplot(2, 1, 2) # handleList.draw(t, split2) x = g(split1, r) y = g(split2, t) # print(x, y) d = dtw( np.array(x).reshape(-1, 1), np.array(y).reshape(-1, 1), lambda x, y: np.abs(x - y)) return d[0] / len(d[-1])
def section_dtw(r, t): split1, split2 = QiQi.fenge(r, t) # print(split1, split2, len(r), len(t)) idx1 = 0 idx2 = 0 d = 0 for i in range(len(split1)): x = r[idx1:split1[i]] y = t[idx2:split2[i]] ans = dtw( np.array(x).reshape(-1, 1), np.array(y).reshape(-1, 1), lambda x, y: np.abs(x - y)) d += ans[0] / len(ans[-1]) idx1 = split1[i] idx2 = split2[i] return d / (len(split1) + 1)
def test_fast_vs_normal_ND(self): N = np.random.randint(2, 100) m1 = np.random.randint(2, 100) m2 = np.random.randint(2, 100) x = np.random.rand(m1, N) y = np.random.rand(m2, N) d1, c1, acc1, p1 = dtw(x, y, dist=lambda x, y: np.linalg.norm((x - y))) d2, c2, acc2, p2 = accelerated_dtw(x, y, 'euclidean') self.assertAlmostEqual(d1, d2) self.assertAlmostEqual((c1 - c2).sum(), 0) self.assertAlmostEqual((acc1 - acc2).sum(), 0) self.assertTrue((p1[0] == p2[0]).all()) self.assertTrue((p1[1] == p2[1]).all())
def concurrent_cdm(flows, dist_func, limits): """ Concurrently building the Condensed Distance Matrix (CDM) Executed when CONCURRENT_EXEC = True :param flows: the list of flows loaded from the dataset :param dist_func: the dist function that will be passed to DTW :param limits: pair containing the staring index and the last index. look at `_triu_indexes` above :return: a CDM, ideally every entry (i, j) contains dtw(i, j). """ # return dtw(flows[pair[0]], flows[pair[1]], dist_func)[0] i_start = limits[0] i_end = limits[1] return [dtw(flows[i], flows[j], dist_func)[0] for i in range(i_start, i_end) for j in range(i + 1, N_FLOWS)]
def compare(control_path, exp_path): """ Compares two wav files and returns a score. Uses mel frequency ceptrum coefficients as well as dynamic time warping. :param control_path: the 'correct' wav - what you are comparing to :param exp_path: the unknown wav """ (rate,sig) = wavread(control_path) (rate2,sig2) = wavread(exp_path) x = mfcc(sig,rate) y = mfcc(sig2,rate2) dist, cost, acc = dtw.dtw(x, y, dist=lambda x, y: dtw.norm(x - y, ord=1))\ return dist
def stop(*args): END_SAMPLE = True if (not training_samples): temp_holder_copy = list(temp_holder) training_samples.append(temp_holder_copy) temp_holder.clear() if not (Y == []): dtws = [] for x in training_samples: # Perform DTW on each training sample, finding min distance = dtw(x, Y, euclidean) dtws.append(distance) min_dist = min(dtws) print("MINIMUM DISTANCE: ", min_dist) print(training_samples)
def compare(control_path, exp_path): """ Compares two wav files and returns a score. Uses mel frequency ceptrum coefficients as well as dynamic time warping. :param control_path: the 'correct' wav - what you are comparing to :param exp_path: the unknown wav """ (rate, sig) = wavread(control_path) (rate2, sig2) = wavread(exp_path) x = mfcc(sig, rate) y = mfcc(sig2, rate2) dist, cost, acc = dtw.dtw(x, y, dist=lambda x, y: dtw.norm(x - y, ord=1))\ return dist
def cross_validation(data, k): #print data limit = int(ceil(len(data) * 0.7)) #print limit success_rate = 0 # repeat k times for o in range(k): success = 0 random.shuffle(data) # shuffle data train_data = data[:limit] # take training set 70% test_data = data[limit:] # take rest for testing #realjpid = [] realjpid = zip(*test_data)[0] predictedjpid = [] closest = [] for test, i in zip(test_data, range(len(test_data))): for train, j in zip(train_data, range(len(train_data))): dist, cost, acc, path = dtw(zip(*test_data)[1][i], zip(*train_data)[1][j], dist=haversine) closest.append( [dist, zip(*train_data)[1][j], zip(*train_data)[0][j]]) # sort and take min 5 closest.sort(key=itemgetter(0)) closest = closest[0:5] predictedjpid.append(knn(closest)) for l in range(len(realjpid)): if realjpid[l] == predictedjpid[l]: success += 1 print "real = ", realjpid[l], " predicted = ", predictedjpid[l] success_rate += float(success) / float(len(realjpid)) #print success_rate print o, " fold ended" success_rate = success_rate / k print "total success rate = ", success_rate
def similarity_calc_pattern(who_compare, compare_with): # print(who_compare) # print(compare_with) """ Compare two behaviors by their patterns :param who_compare: :param compare_with: :return: """ # stretch x0 to a length close to 100. # Temporarily only consider len(x0) < 100. factor_x1 = round(100 / len(who_compare)) x1 = np.kron(who_compare, np.ones((factor_x1, 1))) # print('x1', x1) # stretch x0 to a height close to 100. # Temporarily only consider vertical range of x0 < 100. factor_y1 = 100.0 / (who_compare.max() - who_compare.min() ) if who_compare.max() != who_compare.min() else 1 series_1 = np.array([(i - who_compare.min()) * factor_y1 for i in x1]).reshape(-1, 1) # print('series_1', series_1) # stretch y0 to a length close to 100. # Temporarily only consider len(x0) < 100. factor_x2 = round(100 / len(compare_with)) x2 = np.kron(compare_with, np.ones((factor_x2, 1))) # stretch y0 to a height close to 100. # Temporarily only consider vertical range of x0 < 100. factor_y2 = 100.0 / (compare_with.max() - compare_with.min() ) if compare_with.max() != compare_with.min() else 1 series_2 = np.array([(i - compare_with.min()) * factor_y2 for i in x2]).reshape(-1, 1) comparison_figure = Figure(figsize=(5, 4)) comparison_plot = comparison_figure.add_subplot(111) dist, cost, acc, path = dtw(series_1, series_2, dist=lambda x, y: np.linalg.norm(x - y, ord=1)) # print(basic_behavior, dist) comparison_plot.plot(series_1) comparison_plot.plot(series_2) # print(" Distance: {}".format(dist)) return dist, comparison_figure
def computeDistace(mfcc1, mfcc2): print("Finding DTW between the 2 mfccs") dist, cost, acc_cost, path = dtw(mfcc1.T, mfcc2.T, dist=lambda x, y: norm(x - y, ord=1)) print('Normalized distance between the two sounds:', dist) plt.imshow(cost.T, origin='lower', cmap=plt.get_cmap('gray'), interpolation='nearest') plt.plot(path[0], path[1], 'w') plt.xlim((-0.5, cost.shape[0] - 0.5)) plt.ylim((-0.5, cost.shape[1] - 0.5)) plt.xlabel('MFCC Column Index (FET)') plt.ylabel('MFCC Column Index (Non-FET)') plt.show()
def estimate_twf(orgdata, tardata, distance='melcd', fast=True, otflag=None): if distance == 'melcd': def distance_func(x, y): return melcd(x, y) else: raise ValueError('other distance metrics than melcd does not support.') if otflag is None: # use dtw or fastdtw if fast: _, path = fastdtw(orgdata, tardata, dist=distance_func) twf = np.array(path).T else: _, _, _, twf = dtw(orgdata, tardata, distance_func) return twf