def dwt_filter_with_reference(self, beats_dict, ref, threshold, interp_dim): good_beats = [] mask = {} dwt_dist = {} keys = list(sorted(beats_dict.keys())) for k in keys: b = beats_dict[k] _b = np.array(b) _b = _b - _b.min() _b = _b / _b.max() x, _b = interpolate_beat(_b, interp_dim) dist = dtw(_b, ref) dwt_dist[k] = "%.2f" % dist if dist < threshold: good_beats.append(b) mask[k] = True else: mask[k] = False return good_beats, mask, dwt_dist
def ts_match(df1, df2, start, length): df1 = df1[start:start + length] time_start = df1.index[0] print("匹配的时间起点是{}".format(time_start)) # print("相应的数据序列为{}".format(df1)) window = 48 match_time_start = None for j in df2.index: if j > time_start: print("下游匹配到的时间起点是{}".format(j)) match_time_start = df2.index.tolist().index(j) print("下游对应的序号为{}".format(match_time_start)) break dtw_score = list() match_df2 = None # match_df2 = df2[347:347 + length] for o in range(window): cut_df2 = df2[match_time_start + o:match_time_start + length + o] dtw_sim = dtw(df1['氨氮'].values, cut_df2['氨氮'].values, global_constraint="sakoe_chiba", sakoe_chiba_radius=3) # dtw_sim = dtw(TimeSeriesScalerMeanVariance().fit_transform(df1['氨氮'].values), # TimeSeriesScalerMeanVariance().fit_transform(cut_df2['氨氮'].values), # global_constraint="sakoe_chiba", sakoe_chiba_radius=3) # mod_dtw, _ = de.TimeSeriesSimilarity(df1.tolist(), cut_df2.tolist()) dtw_score.append(dtw_sim) if dtw_score.index(max(dtw_score)) == o: print("最大的dtw得分偏移了{}".format(o)) print("相应的开始时间点为{}".format(cut_df2.index[0])) match_df2 = cut_df2 print("滑动窗口的得分列表为{}".format(dtw_score)) return match_df2, dtw_score.index(max(dtw_score))
def classify_DTW(self): correct_count = 0 for i in range(self.mytestseries.shape[0]): for j in range(self.class_distance.shape[1]): #align=mypair.DTW(self.mytestseries[i,:],self.myclassavgs[j,:]) #align.calcglobalcost_UDTW() #align.findwarppath() self.class_distance[0, j] = metrics.dtw( self.mytestseries[i, :], self.myclassavgs[j, :] ) #math.sqrt(sum(math.square(align.Warpedfv-align.Warpedsv))) classified_as = list(self.class_distance[0]).index( min(self.class_distance[0])) print('distance from each classes:', self.class_distance) if classified_as == self.zero_indexed_traget_class[i]: print('Data:', i, ' target:', self.zero_indexed_traget_class[i], ' classified as:', classified_as) self.confusion_matrix[classified_as, classified_as] += 1 correct_count = correct_count + 1 else: print('Data:', i, ' target:', self.zero_indexed_traget_class[i], ' classified as:', classified_as) self.confusion_matrix[self.zero_indexed_traget_class[i], classified_as] += 1 return (correct_count / self.mytestseries.shape[0]) * 100
def _get_dtw_score( self, adata1, adata2, genenamelist, ): dist_list = [] expression1 = adata1[:, genenamelist].X expression2 = adata2[:, genenamelist].X if not isinstance(expression1, np.ndarray): expression1 = expression1.toarray() if not isinstance(expression2, np.ndarray): expression2 = expression2.toarray() mmscaler = MinMaxScaler() expression1 = mmscaler.fit_transform(expression1) expression2 = mmscaler.fit_transform(expression2) for i in range(len(genenamelist)): dist = dtw( expression1[:, i], expression2[:, i]) dist_list.append(dist) return dist_list
def dtw_image(imarray): r, c, b = tuple(imarray.shape) ref1 = [0, 0, 0, 0, 0, 0] ref2 = [65535, 65535, 65535, 65535, 65535, 65535] #ref2 = imarray[10, 0, :] ref3 = imarray[20, 0, :] ref4 = imarray[30, 0, :] ref5 = imarray[40, 0, :] ref6 = imarray[50, 0, :] ref7 = imarray[60, 0, :] ref8 = imarray[40, 0, :] ref9 = imarray[40, 0, :] dtw_image = np.zeros([r, c], dtype=float) for i in range(r): for j in range(c): distance1 = ts.dtw(ref1, imarray[i, j, :]) #distance2 = ts.dtw(ref2, imarray[i, j, :]) #distance3 = ts.dtw(ref3, imarray[i, j, :]) #distance4 = ts.dtw(ref4, imarray[i, j, :]) #distance5 = ts.dtw(ref5, imarray[i, j, :]) #distance6 = ts.dtw(ref6, imarray[i, j, :]) #distance7 = ts.dtw(ref7, imarray[i, j, :]) #distance8 = ts.dtw(ref8, imarray[i, j, :]) #distance9 = ts.dtw(ref9, imarray[i, j, :]) #distance=min(distance1,distance2,distance3,distance4,distance5) dtw_image[i, j] = distance1 #dtw_image=(dtw_image/dtw_image.max())*255.0 return dtw_image
def calculate_dict(mfcc_values, rolloff_values, names, labels): final_dict = dict() for i in names: final_dict[i] = [] for id1, (mf1, ro1, nm1, lb1) in enumerate(zip(mfcc_values, rolloff_values, names, labels)): for id2, (mf2, ro2, nm2, lb2) in enumerate( zip(mfcc_values, rolloff_values, names, labels)): if id1 < id2: current_dtw = dtw(mf1, mf2) # current_dtw = dtw(mf1 + ro1, mf2 + ro2) final_dict[nm1].append({ "name": nm2, "label": lb2, "distance": current_dtw }) final_dict[nm2].append({ "name": nm1, "label": lb1, "distance": current_dtw }) for final_key, final_item in final_dict.items(): final_dict[final_key] = sorted(final_item, key=itemgetter('distance')) # print(key, len(final_dict[key])) return final_dict
def create_clusters(x, stratify, **kwargs): # Find indices for each PID in dataset pid_indices = find_indices(stratify) # Create DTW Average pid_averages, pid_labels = ts_average(x, pid_indices) # Create distance matrix with DTW on averages dtw_matrix = np.zeros((len(pid_averages), len(pid_averages))) for i in range(len(pid_averages)): for j in range(len(pid_averages)): dtw_matrix[i, j] = dtw(pid_averages[i], pid_averages[j]) # Cluster using DBSCAN clusters = DBSCAN(**kwargs).fit(dtw_matrix) # Create dictionary of clusters and pid labels label_dict = {} for i, j in zip(clusters.labels_, pid_labels): label_dict[j] = i # Pass back indices for the original data based on clusters temp_labels = np.array([i.split('_')[0] for i in stratify]) final_dict = {} for i in label_dict: if label_dict[i] not in final_dict: final_dict[label_dict[i]] = np.where(temp_labels == i)[0] else: final_dict[label_dict[i]] = np.concatenate( (final_dict[label_dict[i]], np.where(temp_labels == i)[0])) return final_dict
def dtw_similarity(template_signal,acc_signal_windows): dist_similarity = [] for i, v in enumerate(acc_signal_windows): dist_similarity.append([dtw(w, template_signal[i]) for w in v]) return dist_similarity
def eval_model(self, net, loader, batch_size, gamma, verbose=1, target_mean=0, target_std=0): criterion = torch.nn.MSELoss() losses_mse = [] losses_dtw = [] losses_tdi = [] for i, data in enumerate(loader, 0): loss_mse, loss_dtw, loss_tdi = torch.tensor(0), torch.tensor( 0), torch.tensor(0) # get the inputs inputs, target = data # inputs, target, breakpoints = data inputs = torch.tensor(inputs, dtype=torch.float32).to(self.device) target = torch.tensor(target, dtype=torch.float32).to(self.device) # batch_size, N_output = target.shape[0:2] outputs = net(inputs) # MSE loss_mse = criterion(target, outputs) loss_dtw, loss_tdi = 0, 0 # DTW and TDI for k in range(batch_size): target_k_cpu = target[k, :, 0:1].view(-1).detach().cpu().numpy() output_k_cpu = outputs[k, :, 0:1].view(-1).detach().cpu().numpy() loss_dtw += dtw(target_k_cpu, output_k_cpu) path, sim = dtw_path(target_k_cpu, output_k_cpu) Dist = 0 for i, j in path: Dist += (i - j) * (i - j) loss_tdi += Dist / (self.N_output * self.N_output) loss_dtw = loss_dtw / batch_size loss_tdi = loss_tdi / batch_size # print statistics losses_mse.append(loss_mse.item()) losses_dtw.append(loss_dtw) losses_tdi.append(loss_tdi) ## TODO plotting eval print(' Eval mse= ', np.array(losses_mse).mean(), ' dtw= ', np.array(losses_dtw).mean(), ' tdi= ', np.array(losses_tdi).mean())
def evaluate_iteration(model, criterion, X_test_left, X_test_right, y_test): model.eval() x_test_left = np.transpose(X_test_left, [1, 0, 2]) x_test_right = np.transpose(X_test_right, [1, 0, 2]) y_test = np.transpose(y_test, [1, 0, 2]) x_test_left_tensor = numpy_to_tvar(x_test_left) x_test_right_tensor = numpy_to_tvar(x_test_right) y_test_tensor = numpy_to_tvar(y_test) output, atten = model(x_test_left_tensor, x_test_right_tensor, y_test_tensor, 0) loss = criterion(output, y_test_tensor) loss_mse, loss_dtw, loss_tdi = 0, 0, 0 loss_mae, loss_RMSLE, loss_RMSE = 0, 0, 0 for k in range(BATCH_SIZE): target_k_cpu = y_test_tensor[:, k, 0:1].view(-1).detach().cpu().numpy() output_k_cpu = output[:, k, 0:1].view(-1).detach().cpu().numpy() loss_dtw += dtw(target_k_cpu, output_k_cpu) path, sim = dtw_path(target_k_cpu, output_k_cpu) Dist = 0 for i, j in path: Dist += (i-j)*(i-j) loss_tdi += Dist / (N_output*N_output) loss_mae += mean_absolute_error(target_k_cpu, output_k_cpu) loss_RMSLE += np.sqrt(mean_squared_error(target_k_cpu, output_k_cpu)) loss_RMSE += np.sqrt(mean_squared_error(target_k_cpu, output_k_cpu)) loss_dtw = loss_dtw / BATCH_SIZE loss_tdi = loss_tdi / BATCH_SIZE loss_mae = loss_mae / BATCH_SIZE loss_RMSLE = loss_RMSLE / BATCH_SIZE loss_RMSE = loss_RMSE / BATCH_SIZE # # metric # output_numpy = output.cpu().data.numpy() # y_test_numpy = y_test_tensor.cpu().data.numpy() # loss_mae = mean_absolute_error(y_test_numpy,output_numpy) # loss_RMSLE = np.sqrt(mean_squared_error(y_test_numpy,output_numpy)) # loss_RMSE = np.sqrt(mean_squared_error(y_test_numpy,output_numpy)) # test_loss_meter.add(loss.item()) # plot_result(output, y_test_tensor) # show_attention(x_test_left_tensor, x_test_right_tensor,output,atten) # plt.show() return loss.item(), loss_mae, loss_RMSLE, loss_RMSE, loss_dtw
def true_dilate(target, pred, alpha): # target, pred [seq_length] N_output = target.shape[0] loss_dtw = dtw(target, pred) path, sim = dtw_path(target, pred) Dist = 0 for ii, jj in path: Dist += (ii - jj) * (ii - jj) loss_tdi = Dist / (N_output * N_output) loss_dilate = alpha * loss_dtw + (1 - alpha) * loss_tdi return loss_dtw, loss_tdi, loss_dilate
def search_top_n_similar_ts(ts_query=None, data=None, n=10, use_lb_kim=False): """For the query ts, search the top-n similar ts in data object, return the searching result. """ start = time() min_heap, time_spend = [], 0 for ind, ts_candidate in enumerate(data): # Initializing minimum heap(n + 1 for excluding itself) if len(min_heap) < n + 1: dtw_dist = -dtw(ts_query, ts_candidate) hq.heappush(min_heap, [dtw_dist, ind]) continue # STEP 1: lb_kim_hierarchy puring # ------------------- bsf = min_heap[0][0] if use_lb_kim: lb_kim = -np.sqrt(lb_kim_hierarchy(ts_query, ts_candidate, bsf**2)) if lb_kim < bsf: continue # STEP 2: DTW calculation # ------------------- dtw_dist = -dtw(ts_query, ts_candidate) if dtw_dist < bsf: continue else: hq.heapreplace(min_heap, [dtw_dist, ind]) end = time() time_spend = end - start # Saving results top_n_searching_res = sorted(min_heap, key=lambda t: -t[0]) top_n_searching_res = [[-t[0], t[1]] for t in top_n_searching_res][1:] searching_res = {} searching_res["top_n_searching_res"] = top_n_searching_res searching_res["total_searched_ts"] = len(data) searching_res["total_time_spend"] = time_spend return searching_res
def quantization_error(self, data): """Returns the quantization error computed as the average distance between each input sample and its best matching unit.""" self._check_input_len(data) error = 0 for x in data: error += dtw(x, self._weights[self.winner(x)], global_constraint=self.gl_const, sakoe_chiba_radius=self.scr, itakura_max_slope=self.ims) return error / len(data)
def nn_dtw(self, tv): costs = [] for nn in self.curr_nn_inds: tr_v = self.X_train[nn] cost = dtw(tr_v, tv, global_constraint="sakoe_chiba", sakoe_chiba_radius=self.sakoe_chiba_radius) costs.append(cost) sorted_cost_inds = np.argsort(np.array(costs)) self.curr_nn_inds = np.asarray(self.curr_nn_inds)[sorted_cost_inds] self.curr_nn_inds = self.curr_nn_inds[:self.n_neighbors]
def nn_dtw(self): dtw_nbrs_all_query = [] for te_idx, nbrs in enumerate(self.nbrs_all_query): costs = [] for nn in nbrs: tr_v = self.X_train[nn] te_v = self.X_test[te_idx] cost = dtw(tr_v, te_v, global_constraint="sakoe_chiba", sakoe_chiba_radius=self.sakoe_chiba_radius) costs.append(cost) sorted_cost_inds = np.argsort(np.array(costs)) nbrs = np.asarray(nbrs)[sorted_cost_inds] nbrs = nbrs[:self.n_neighbors] dtw_nbrs_all_query.append(nbrs) self.nbrs_all_query = dtw_nbrs_all_query
def dtw_distances(X, Y=None, Y_norm_squared=None, squared=False, X_norm_squared=None): X = np.asarray(X, order='c') Y = np.asarray(Y, order='c') s = X.shape sB = Y.shape if len(s) != 2: raise ValueError("XA must be a 2-dimensional array.") if len(sB) != 2: raise ValueError("XB must be a 2-dimensional array.") if s[1] != sB[1]: raise ValueError( "XA and XB must have the same number of columns " "(i.e. feature dimension.)", s, sB) mA = s[0] mB = sB[0] XA = [] for itemsx in X: XB = [] for itemx in itemsx: XB.append(itemx) XA.append(XB) YA = [] for itemsy in Y: YB = [] for itemy in itemsy: YB.append(itemy) YA.append(YB) dm = [] for i in range(0, mA): dm2 = [] for j in range(0, mB): U = XA[i] V = YA[j] dm2.append(dtw(U, V)) dm.append(dm2) return np.array(dm)
def _activate(self, x): """Updates matrix activation_map, in this matrix the element i,j is the response of the neuron i,j to x.""" # s = subtract(x, self._weights) # x - w it = nditer(self._activation_map, flags=['multi_index']) while not it.finished: # || x - w || activation_cell = 0 for i in range(self._bands): activation_cell += self._w[i] * dtw( x[i], self._weights[i][it.multi_index], global_constraint=self.gl_const, sakoe_chiba_radius=self.scr, itakura_max_slope=self.ims) self._activation_map[it.multi_index] = activation_cell it.iternext()
def fit_dtw(self): signals = [ self.gyro_x ] #, self.accel_y, self.accel_z, self.gyro_x, self.gyro_y, self.gyro_z] signals_out = [np.zeros((sg.shape[0], sg.shape[0])) for sg in signals] for k, sensor in enumerate(signals): sensor -= np.mean(sensor, axis=1).reshape(sensor.shape[0], 1) sensor /= (np.max(sensor, axis=1) - np.min(sensor, axis=1)).reshape(sensor.shape[0], 1) for i, template in enumerate(sensor): for j, s in enumerate(sensor): sim = dtw(template, s) # correlation = np.corrcoef(template.reshape(-1), s.reshape(-1)) signals_out[k][i, j] = sim print(k, i) sns.heatmap(pd.DataFrame(signals_out[0])) plt.show()
def create_dtw_scores(data_yearly_agg: pd.DataFrame) -> pd.DataFrame: # remove our GENT_CONTROL city/state from our city data state_city_lookup = get_city_state_lookup(data_yearly_agg, ['State', 'City']) state_city_lookup.remove(('GENT_CONTROL', 'GENT_CONTROL')) # dtw df for each city when compared to our GENT_CONTROL average dtw_df = pd.DataFrame(columns=['State', 'City', 'dtw_score']) # Gent. control df control_df = data_yearly_agg.loc[ data_yearly_agg.State == 'GENT_CONTROL'].loc[data_yearly_agg.City == 'GENT_CONTROL'] control_df.drop(columns=['State', 'City', 'Date'], inplace=True) for i in range(0, len(state_city_lookup)): # State/City names state = state_city_lookup[i][0] city = state_city_lookup[i][1] # locate each city and drop columns single_city_df = data_yearly_agg.loc[ data_yearly_agg.State == state].loc[data_yearly_agg.City == city] single_city_df.drop(columns=['State', 'City', 'Date'], inplace=True) # calculate DTW score dtw_score = dtw(single_city_df.values, control_df.values) # Append city row to dtw df dtw_df = dtw_df.append( { 'State': state, 'City': city, 'dtw_score': dtw_score }, ignore_index=True) # add our GENT_CONTROL back with a 0.0 DTW score dtw_df = dtw_df.append( { 'State': 'GENT_CONTROL', 'City': 'GENT_CONTROL', 'dtw_score': 0.0 }, ignore_index=True) return dtw_df
def dtw_clustering(rdir=ROOT_DIR): ROOT_DIR = rdir filenames = [] names = [] for r,d,f in os.walk(ROOT_DIR): if d is not None: for company in d: names.append(company) for file in f: if file.endswith('.csv'): filenames.append(r+'/'+file) df_list = [pd.read_csv(filename, delimiter=";", engine='python', encoding='utf-8').sort_index(axis=0, ascending=False).reset_index() for filename in filenames] df_list = [df[df['Vol.'] != '-'].reset_index() for df in df_list] df_list = [df['Price'] for df in df_list] df_list = [(df-df.min())/(df.max()-df.min()) for df in df_list] from tslearn.metrics import dtw sz = len(filenames) distance_matrix = np.zeros((sz,sz)) for i in range(sz): for j in range(sz): distance_matrix[i,j] = dtw(df_list[i], df_list[j]) from sklearn.cluster import AgglomerativeClustering num_cluster = 3 clustering = AgglomerativeClustering(num_cluster, 'precomputed',linkage='complete') clustering.fit(distance_matrix) print(clustering.labels_) cluster_corp = [] cluster_files = [] for i in range(num_cluster): cluster_corp.append(list(np.array(names)[clustering.labels_==i])) cluster_files.append(list(np.array(filenames)[clustering.labels_==i])) return cluster_corp, cluster_files #, cluster_file_te
def assign_cluster(list_centroids, seq): """Returns the label of a given sequence. Using the Dynamic Time Warping (DTW) similarity measure, it compares sequence similarities between each centroid and the given sequence. The centroid with the least DTW distance is returned as the cluster label. Parameters ---------- list_centroids: list of lists (e.g. [[0], [0, 1]] ) sequences which have been selected as centroids of the dataset seq: list (e.g. [0, 1] ) The given sequence """ dtw_distances = np.zeros(len(list_centroids)) for i, centroid in enumerate(list_centroids): dtw_distances[i] = dtw(centroid, seq) ordered_labels_by_dtw_distances = [ x for _, x in sorted(zip(dtw_distances, list_centroids)) ] return ordered_labels_by_dtw_distances[0]