def compute_dtw_norm(x, y, dist_for_cdist, norm_div): """ :param x: :param y: the two array must have the same number of columns (but the nb of lines can be different) :param dist_for_cdist: :param dist_for_cdist: distance used by cdist, can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'. if 'kl', then instead of dtw with a distance, we use kl divergence :return: dtw distance between x and y """ if dist_for_cdist == "kl": D1, C, D2, path, norm, long = dtw.accelerated_dtw( x, y, dist=dtw.kl_divergence, norm_comput=True) else: D1, C, D2, path, norm, long = dtw.accelerated_dtw(x, y, dist=dist_for_cdist, norm_comput=True) #print('path', path) if not norm_div: return norm else: return float(norm) / float(long)
def test_specific_case(self): x = np.array([1.0, 0.9, 1.2, 2.3, 3.8, 3.3, 4.2, 1.9, 0.5, 0.3, 0.3]) y = np.array([0.5, 1.0, 0.9, 1.2, 2.3, 3.8, 3.3, 4.2, 1.9, 0.5, 0.3]) euclidean = lambda x, y: np.abs((x - y)) d1, _, _, _ = accelerated_dtw(x, y, 'euclidean') d2, _, _, _ = accelerated_dtw(x, y, dist=euclidean) d3, _, _, _ = dtw(x, y, dist=euclidean) self.assertAlmostEqual(d1, 0.022727272727272728) self.assertAlmostEqual(d2, 0.022727272727272728) self.assertAlmostEqual(d3, 0.022727272727272728)
def dyn_timewarp(self, dyads, unit, dtw_vals=None, sma_wind=None): ''' compute dynamic time warping distance ''' if dyads is None or len(dyads) == 0: dyad_str = np.array( [dyad[0] + '-' + dyad[1] for dyad in self.complete_dyads]) dtws = pd.DataFrame({'Dyad': dyad_str, 'dtw': dtw_vals}) return (dtws) dfi_score, dfp_score = self.extract_scores(dyads[0], unit) if sma_wind is not None: dfi_score = self.sma(dfi_score, sma_wind) dfp_score = self.sma(dfp_score, sma_wind) pass dtw = accelerated_dtw(dfi_score, dfp_score, dist='euclidean') dtw_val = dtw[0] / len(dfi_score) # normalize distance if dtw_vals is None: dtw_vals = [dtw_val] pass else: dtw_vals.append(dtw_val) pass dtws = self.dyn_timewarp(dyads[1:], unit, dtw_vals, sma_wind=sma_wind) return (dtws)
def predict(self, data: list): """ Assingn new series based on precalculated centroid. Parameters ----------------------- data : a list of pandas Series Returns ----------------------- assignments: a dictionary {cluster: index_series} """ assignments_new = {} for e in tqdm(range(len(self.cluster_centers_))): assignments_new.update({e: []}) for ind, i in enumerate(data): dist = [] for _, j in enumerate(self.cluster_centers_): fastDTW, _, _, _ = accelerated_dtw(array(i), array(j), dist=self.criterion, warp=self.w) dist.append(fastDTW) clust = dist.index(min(dist)) assignments_new[clust].append(ind) return assignments_new
def plot_longterm_dtw(): X_train, X_test, Y_train, Y_test, pid_test = read_data_physionet_4() print(X_train.shape, Y_train.shape) np.random.seed(0) for warp in [1,2,3]: for _ in tqdm(range(50)): idx1 = np.random.choice(list(range(X_train.shape[0]))) idx2 = np.random.choice(list(range(X_train.shape[0]))) seg1 = np.squeeze(X_train[idx1]) seg2 = np.squeeze(X_train[idx2]) seg1 = seg1 - np.min(seg1) + 3 seg2 = seg2 - np.max(seg2) - 3 start = timeit.default_timer() d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(seg1, seg2, dist='euclidean', warp=warp) end = timeit.default_timer() print ("time: {} s".format(end-start)) plt.figure(figsize=(20,3)) plt.plot(seg1) plt.plot(seg2) for i in range(0, len(path[0]), 50): x1 = path[0][i] x2 = path[1][i] y1 = seg1[x1] y2 = seg2[x2] plt.plot([x1, x2], [y1, y2], c='r') plt.savefig('img_dtw_rhythm/{}_{}_{}_{:.4f}.png'.format(warp, idx1, idx2, d))
def dtwDrawPath(ref, tar, sr=8000): mfcc1 = DTW.getMFCC(ref, sr, 200, 180) mfcc2 = DTW.getMFCC(tar, sr, 200, 180) print("Reference shape:", mfcc1.shape) print("Target shape:", mfcc2.shape) dist, costs, accost, path = accelerated_dtw( mfcc1, mfcc2, dist=lambda x, y: norm(x - y, ord=2)) print("Distance:", dist) print("Costs:", costs.shape) print("Accumulated costs:", accost.shape) print("Matching path:", path) plt.rcParams['font.sans-serif'] = ['SimHei'] # 指定默认字体 plt.rcParams['axes.unicode_minus'] = False plt.rcParams['font.size'] = 16 plt.imshow(accost, interpolation='nearest', cmap='bone', origin='lower') plt.plot(path[1], path[0], c='r', label='匹配路径') plt.ylabel("匹配模板") plt.xlabel("待匹配目标") plt.colorbar() plt.title("累计误差矩阵匹配路径") plt.legend() plt.show()
def plot_shortterm_dtw(): wrap = 1 # average wave dtw X_train, X_test, Y_train, Y_test, pid_test = read_data_physionet_4() print(X_train.shape, Y_train.shape) np.random.seed(0) for _ in tqdm(range(300)): idx1 = np.random.choice(list(range(X_train.shape[0]))) idx2 = np.random.choice(list(range(X_train.shape[0]))) seg1 = np.squeeze(X_train[idx1]) seg2 = np.squeeze(X_train[idx2]) beat1 = get_avg_beat(seg1) beat2 = get_avg_beat(seg2) beat1_dist = beat1 - np.mean(beat1) beat2_dist = beat2 - np.mean(beat2) beat1_plot = beat1 - np.min(beat1) + 3 beat2_plot = beat2 - np.max(beat2) - 3 start = timeit.default_timer() d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(beat1_dist, beat2_dist, dist='euclidean', warp=wrap) end = timeit.default_timer() print ("time: {} s".format(end-start)) plt.figure(figsize=(5,3)) plt.plot(beat1_plot) plt.plot(beat2_plot) for i in range(0, len(path[0]), 10): x1 = path[0][i] x2 = path[1][i] y1 = beat1_plot[x1] y2 = beat2_plot[x2] plt.plot([x1, x2], [y1, y2], c='r') plt.savefig('img_dtw_beat/{:.4f}_{}_{}.png'.format(d, idx1, idx2))
def have_all_dtwx(x, y, dist_for_cdist): """ :param x: :param y: the two array must have the same number of columns (but the nb of lines can be different) :param dist_for_cdist: :param dist_for_cdist: distance used by cdist, can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'. if 'kl', then instead of dtw with a distance, we use kl divergence :return: dtw distance between x and y """ if dist_for_cdist == "kl": return dtw.accelerated_dtw(x, y, dist=dtw.kl_divergence) else: return dtw.accelerated_dtw(x, y, dist=dist_for_cdist)
def eval_pair(pair): """ある被験者の動画aと動画bの類似度を計算する.""" person, a, b = pair hdfpath = DATAPATH + "/01" + str(person + 1).zfill(2) + ".hdf" with h5py.File(hdfpath, 'r') as read_file: x = read_file[make_ev(a)].value y = read_file[make_ev(b)].value dist, cost, acc, path = accelerated_dtw(x, y, 'euclidean') return dist
def compute_ABX_results(file_abx, folder_files, dist_for_cdist): """ Compute results of ABX test on a set for a set of triplets :param file_abx: text file with list of filename A B X in order :param folder_files: path to feature files .csv to use :param dist_for_cdist: distance used by cdist, can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'. if 'kl', then instead of dtw with a distance, we use kl divergence :return: results list of [A, B, X, truueX, result] and distances list of [A, B, X, trueX, AX, BX] """ list_trip_OTH_TGT_X_trueX = extract_list_files_compare(file_abx) results = [] # A, B, X then result between A and B distances = [] # A, B, X then AX and BX for trip in list_trip_OTH_TGT_X_trueX: triplet = list_trip_OTH_TGT_X_trueX[trip] OTH = np.loadtxt(folder_files + '/' + triplet[0] + '.csv', delimiter=',') TGT = np.loadtxt(folder_files + '/' + triplet[1] + '.csv', delimiter=',') X = np.loadtxt(folder_files + '/' + triplet[2] + '.csv', delimiter=',') #print(triplet[0], triplet[1], triplet[2]) #print(A.shape, B.shape, X.shape) if OTH.shape[1] != X.shape[1] or TGT.shape[1] != X.shape[1]: #print("in") OTH = np.swapaxes(OTH, 0, 1) TGT = np.swapaxes(TGT, 0, 1) X = np.swapaxes(X, 0, 1) else: pass #print("out") if dist_for_cdist == "kl": OTHX = dtw.accelerated_dtw(OTH, X, dist=dtw.kl_divergence)[0] TGTX = dtw.accelerated_dtw(TGT, X, dist=dtw.kl_divergence)[0] else: OTHX = dtw.accelerated_dtw(OTH, X, dist=dist_for_cdist)[0] TGTX = dtw.accelerated_dtw(TGT, X, dist=dist_for_cdist)[0] result = 'OTH' if OTHX < TGTX else 'TGT' results.append(triplet + [result]) distances.append(triplet + [OTHX, TGTX]) list_trip_OTH_TGT_X_trueX[trip].append(OTHX) list_trip_OTH_TGT_X_trueX[trip].append(TGTX) return results, distances, list_trip_OTH_TGT_X_trueX
def multiFrameDTW(self, target): mfcc = DTW.getMFCC(target, self.sr) proba = np.zeros(10) for i, num in enumerate(DTW.refs): # 按数字分类 for ref in num: # 每个数字有几个标准模板,标准模板存在一定数量的帧 dist, _, _, _ = accelerated_dtw( ref, mfcc, dist=lambda x, y: norm(x - y, ord=2)) proba[i] += dist print("Number proba: ", proba) return np.argmin(proba)
def DynTimeWarp(col1, col2, TITLE): d1 = series[col1].interpolate().values #I didn't need to interpolate though, becuas eI don't have any missing values d2 = series[col2].interpolate().values d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(d1,d2, dist='euclidean') plt.imshow(acc_cost_matrix.T, origin='lower', interpolation='nearest') plt.plot(path[0], path[1], 'w') plt.xlabel(col1) plt.ylabel(col2) plt.title(f'DTW Minimum Path with minimum distance: {np.round(d,2)}') plt.savefig(TITLE)
def similar_in_two_group(data1, data2): res = 0 count = 0 for i in data1.keys(): for j in data2.keys(): # simi = pearsonr(data[i], data[j])[0] simi = np.linalg.norm(data1[i] - data2[j]) simi, _, _, _ = dt.accelerated_dtw(data1[i], data2[j], 'euclidean') res += simi count += 1 res /= count return res
def test_fast_vs_normal_1D(self): x = np.random.rand(np.random.randint(2, 100)) y = np.random.rand(np.random.randint(2, 100)) d1, c1, acc1, p1 = dtw(x, y, dist=lambda x, y: np.abs((x - y))) d2, c2, acc2, p2 = accelerated_dtw(x, y, 'euclidean') self.assertAlmostEqual(d1, d2) self.assertAlmostEqual((c1 - c2).sum(), 0) self.assertAlmostEqual((acc1 - acc2).sum(), 0) self.assertTrue((p1[0] == p2[0]).all()) self.assertTrue((p1[1] == p2[1]).all())
def _generalized_inertia(self, centroids, labels, data): inertia = 0 for e, centroid in enumerate(centroids): members = labels[e] for member_index in members: i = centroid j = data[member_index] fastDTW, _, _, _ = accelerated_dtw(i.values, j.values, dist=self.criterion, warp=self.w) inertia += fastDTW**2 return inertia
def get_dtw_distance(feature_list1, feature_list2, a_label, b_label): d, cost_matrix, acc_cost_matrix, path = dtw.accelerated_dtw(feature_list1, feature_list2, dist='cosine', warp=1) # plt.imshow(cost_matrix.T, origin='lower', cmap='gray', interpolation='nearest') # plt.plot(path[0], path[1], 'w') # plt.xlabel('label: ' + str(a_label)) # plt.ylabel('label: ' + str(b_label)) # plt.title(str(a_label[0] == b_label[0]) + ' dist:' + str(d)) # print(a_label, b_label) # plt.show() return d
def test_dis(self): array_query = np.asarray(self.query) closest_nodes = list() dist = float('inf') bsf = float('-inf') for i in range(0, len(self.exemplars)): exemplar = np.asarray(self.exemplars[i]) if (exemplar == array_query).all(): return i dist = dtw.accelerated_dtw(array_query, exemplar, 'euclidean') for d in dist: print("Imprimo:", d) print("--------CAMBIO-------")
def similar_in_one_group(data): res = 0 k = list(data.keys()) count = 0 for i in range(len(k)): j = 0 while j < i: # simi = pearsonr(data[dt[i]], data[dt[j]])[0] #simi = np.linalg.norm(data[dt[i]] - data[dt[j]]) simi, _, _, _ = dt.accelerated_dtw(data[k[i]], data[k[j]], 'euclidean') res += simi count += 1 j += 1 res /= count return res
def dtw(): from dtw import dtw, accelerated_dtw new_cases = new_cases_timeseries_table() total_cases_df = total_cases() washington_workplace_mobility = pd.read_csv( 'data/washington_counties/workplaces.csv') dates = list(washington_workplace_mobility.columns[3:]) plot_county_names = [] plot_corr = [] p_vals = [] for state, county in zip(washington_workplace_mobility.state.values, washington_workplace_mobility.county.values): covid_county_timeseries = total_cases_df[ (total_cases_df['state'] == state) & (total_cases_df['county'] == county)] workplace_mobility = washington_workplace_mobility[ (washington_workplace_mobility['state'] == state) & (washington_workplace_mobility['county'] == county)] print(state, county) this_county_covid_total_cases = covid_county_timeseries[ dates].interpolate().values[0] this_county_workplace_mobility = workplace_mobility.interpolate( ).values[0][3:] if county == 'King': d, cost_matrix, acc_cost_matrix, path = accelerated_dtw( this_county_workplace_mobility, this_county_covid_total_cases, dist='euclidean') plt.imshow(acc_cost_matrix.T, origin='lower', cmap='gray', interpolation='nearest') plt.plot(path[0], path[1], 'w') plt.xlabel('Subject1') plt.ylabel('Subject2') plt.title( f'DTW Minimum Path with minimum distance: {np.round(d,2)}') plt.show()
def dynamic_time_warping(df, feature1, feature2): """Compute and plot dynamic time warping of feature1 and feature2, e.g. instant_phase_sync(df, "Inflation", "Wage") compute and plot the dynamic_time_wraping between the "Inflation" and the "Wage" columns :param: df, pandas.DataFrame, data contains different features (columns) :param: feature1, str, name of the column, e.g. "Inflation" :param: feature2, str, name of another column e.g. "Wage" """ d1 = df[feature1].interpolate().values d2 = df[feature2].interpolate().values d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(d1,d2, dist='euclidean') plt.imshow(acc_cost_matrix.T, origin='lower', cmap='gray', interpolation='nearest') plt.plot(path[0], path[1], 'w') plt.xlabel(feature1) plt.ylabel(feature2) plt.title(f'DTW Minimum Path with minimum distance: {np.round(d,2)}') plt.show()
def average_asynchrony(list_ecg, ecg_comparaison, plot=False): """ Parameters : - list_ecg : (4096,12) corresponding - ecg_comparaison : ECG to compare considering asynchrony If plot : plots meaningful curves about asynchrony Returns: array (2,) 0 : average all over the 12 ECGs of the l1 difference of indexes obtained with dtw 1 : average all over the 12 ECGs of the l2 difference of indexes obtained with dtw """ d2 = ecg_comparaison.reshape(-1, 1) res_l1 = 0 res_l2 = 0 for k in range(12): d1 = list_ecg[k].reshape(-1, 1) d, cost_matrix, acc_cost_matrix, path = accelerated_dtw( d1, d2, dist='euclidean') res_l1 += np.sum((path[1] - path[0]) / 400) res_l2 += np.sum((path[1] - path[0]) * (path[1] - path[0]) / 16000) if plot: figure = plt.figure(figsize=(10, 10)) ax1 = figure.add_subplot(2, 1, 1) ax1.imshow(acc_cost_matrix.T, origin='lower', cmap='gray', interpolation='nearest') ax1.plot(path[0], path[1], 'w') ax1.set_xlabel('ECG 0') ax1.set_ylabel('ECG 1') ax1.set_title( f'DTW Minimum Path with minimum distance: {np.round(d, 2)}') ax2 = figure.add_subplot(2, 1, 2) ax2.plot(list_ecg[0], label="ecg0") ax2.plot(list_ecg[1], label="ecg1") ax2.set_title("Comparaison des ECG") ax2.legend() plt.show() return np.array([res_l1 / 12, res_l2 / 12])
def __call__(self, u, v, k=None): if (u, v) in self.dist_dic: return self.dist_dic[u, v] k = self.k if k is None else k seq_u = self.sorted_degree_matrix_list[k][u].data seq_v = self.sorted_degree_matrix_list[k][v].data result = accelerated_dtw(seq_u, seq_v, 'euclidean') dist = result[0] assert dist >= 0, 'dist: %.4f, %s_%s' % (dist, str(seq_u), str(seq_v)) if k == 0: return dist elif k == self.k: r = dist + self.__call__(u, v, k-1) self.dist_dic[u, v] = r self.dist_dic[v, u] = r return r else: return dist + self.__call__(u, v, k-1)
def _kmeans_iteration(self, data, centroids): """A single iteration of k-means lloyd. Parameters ---------- data : a list of pandas Series centroids : the current centroids as list of pandas Series, as many as self.num_clust Returns ----------------------- assignements : the current samples assignements as dictionary in the form { e : [index] } where e is the centroid number and the indexes in the list are the indexes of the data elements in the relevent centroid """ # compute assignements assignments = {e: [] for e in range(self.num_clust)} for ind, i in enumerate(data): min_dist = float('inf') closest_clust = None for c_ind, j in enumerate(centroids): fastDTW, _, _, _ = accelerated_dtw(array(i), array(j), dist=self.criterion, warp=self.w) if fastDTW <= min_dist: min_dist = fastDTW closest_clust = c_ind if closest_clust in assignments: assignments[closest_clust].append(ind) # update centroids new_centroids = centroids.copy() for key in assignments: clust_sum = 0 for k in assignments[key]: clust_sum = clust_sum + data[k] if len(assignments[key]) > 0: new_centroids[key] = clust_sum / len(assignments[key]) return assignments, new_centroids
def run_dtw(ori_val, gen_val, metric): fontsize = 22 matplotlib.rcParams.update({'font.size': fontsize}) fig, axs = plt.subplots(2, 3, figsize=(27, 18)) fig.suptitle('Dynamic Time Warping for ' + metric) min_paths = [] for i in range(len(gen_val)): y = math.floor(i / 3) x = i % 3 d, cost_matrix, acc_cost_matrix, path = accelerated_dtw( ori_val, gen_val[i], dist='euclidean') min_paths.append(d) axs[y, x].imshow(acc_cost_matrix.T, origin='lower', cmap='gray', interpolation='nearest') axs[y, x].plot(path[0], path[1], 'w') # axs[y,x].set_title(f'dtw min path distance: {np.round(d,2)}', fontsize=fontsize) axs[y, x].set(xlabel='generated data run 1', ylabel='original data run 1') for ax in axs.flat: ax.set(xlabel='generated data', ylabel='original data') for ax in axs.flat: ax.label_outer() plt.tight_layout() plt.savefig('dtw_lstm.pdf') plt.show() min_pathss = np.array(min_paths) min_paths_max = min_pathss.max() min_paths_max min_pathss = min_pathss / min_paths_max return min_pathss
def compute_batch(i_batch): start_idx = i_batch*batch_size end_idx = start_idx + batch_size if end_idx > n_sample: end_idx = n_sample batch_beat = all_beat[start_idx:end_idx] print(batch_beat.shape) local_batch_beat = batch_beat.shape[0] mat = np.zeros((local_batch_beat, local_batch_beat)) for i in tqdm(range(0, local_batch_beat-1)): for j in range(i+1, local_batch_beat): beat1 = batch_beat[i] beat2 = batch_beat[j] beat1_dist = beat1 - np.mean(beat1) beat2_dist = beat2 - np.mean(beat2) d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(beat1_dist, beat2_dist, dist='euclidean', warp=wrap) mat[i,j] = d np.save('sim_mat/{}.npy'.format(i_batch), mat)
def time_distance(signature, runtime): """ This method allow to compute similarity between 2 signals using dinamic time warping. Args: signature (pandas.DataFrame): signal used as signature for the comparison runtime (pandas.DataFrame): signal to compare with the signature Returns: pandas.DataFrame """ try: signature.sort_values(by=["TimeStamp"], inplace=True, ascending=True) runtime.sort_values(by=["TimeStamp"], inplace=True, ascending=True) result = runtime.copy().tail(1) signature = signature["Value"].values runtime = runtime["Value"].values result_dtw = accelerated_dtw(signature, runtime, dist=euclidean) kpi = max(0, 1 - result_dtw[0]) result["Value"] = kpi return result except: raise
# if no start time means we don't use matched words from previous sentence for estimation if aw_within == []: guessing_words_fuc(sw['sentence_words'], sw['start_time'], sw['end_time'], estimated_words, output_table_as_ls, 'full') leftovers = {"words": []} else: # dtw # acm = accumulated cost matrix, use path here print(np.array(sw['sentence_words_stemmed'])) print(np.array(aw_within_stemmed_words)) distance, cost, acm, path = accelerated_dtw( np.array(sw['sentence_words_stemmed']), np.array(aw_within_stemmed_words), edit_distance, warp=1) #print(path) # at the moment it's not good at dealing with situations where one amazon word is aligned with multiple # subtitle words (i.e. amazon get fewer words than the subtitle), so e.g. if "this, this" in subtitle is # aligned with the same "this" in the transcript, the second wouldn't get a timing because it would be repeated # can count how many instances like these there are first ... path_reo = reorganize_path(path) for i in range(0, len(path_reo)): # container of words that don't have a time tag amazon_word_indexes = path_reo[i][1] sentence_word_indexes = path_reo[i][0] #print(aw_stemmed_words_aligned) levenshtein_similarity = [
df_match.loc[list_match_date4[i].isoformat()]['CLOSE'].to_numpy()[-1] - list_match_moves[i][-1]) # %% for문 돌며 dtw 계산 #list_dtw = [] from dtw import accelerated_dtw from tqdm import tqdm max_rank = 20 list_price_chg_forecast = [] for np_moves_match in tqdm(list_match_moves): # match_moves의 각 하루치 일자 당 반복하며 list_dtw = [] price_chg_forecast = 0 for np_moves_data in (list_data_moves): d, _, _, _ = accelerated_dtw(stats.zscore(np_moves_match), stats.zscore(np_moves_data), 'euclidean') list_dtw.append(d) list_rank = stats.rankdata(list_dtw).tolist() sum_chg_mult_weight = 0 sum_weight = 0 for rank in range(1, max_rank + 1): index = list_rank.index(rank) weight = 1 / list_dtw[index] #date3 = list_data_date3[index] date4 = list_data_date4[index] pivot_price = list_data_moves[index][-1] close_price = df_data.loc[date4.isoformat()]['CLOSE'].to_numpy()[-1] price_chg = close_price - pivot_price sum_chg_mult_weight += price_chg * weight sum_weight += weight
def fastdtw(x, y): euclidean = lambda x, y: (x - y)**2 dist, cost_matrix, acc_cost_matrix, path = accelerated_dtw(x, y, euclidean) return dist
## df_last = df_chunks[-1] sizeleft = tsize - len(df_last) if sizeleft < int(tsize / 2) + 1: curatedlist.append(df_last) else: curatedlist.append(dfraw) #%% calculated distance scores based on DTW algorithm print('calculating dtw..') distMatrix = np.zeros([len(curatedlist), len(curatedlist)]) for i in range(len(curatedlist)): for j in range(len(curatedlist)): df_1 = curatedlist[i] df_2 = curatedlist[j] aX = df_1.xumap.values bX = df_1.yumap.values aY = df_2.xumap.values bY = df_2.yumap.values X = np.concatenate((aX.reshape(-1, 1), bX.reshape(-1, 1)), axis=1) Y = np.concatenate((aY.reshape(-1, 1), bY.reshape(-1, 1)), axis=1) d, _, _, _ = accelerated_dtw(X, Y, dist='euclidean') distMatrix[i, j] = d #%% cluster using louvain print('caculating louvain') G = nx.from_numpy_matrix(distMatrix) nx.draw(G) partition = community.best_partition(G)