def compute_dtw_norm(x, y, dist_for_cdist, norm_div):
    """

        :param x:
        :param y: the two array must have the same number of columns (but the nb of lines can be different)
        :param dist_for_cdist: :param dist_for_cdist: distance used by cdist, can be 'braycurtis', 'canberra',
        'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming',
        'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto',
        'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'.
        if 'kl', then instead of dtw with a distance, we use kl divergence
        :return: dtw distance between x and y
    """

    if dist_for_cdist == "kl":
        D1, C, D2, path, norm, long = dtw.accelerated_dtw(
            x, y, dist=dtw.kl_divergence, norm_comput=True)
    else:
        D1, C, D2, path, norm, long = dtw.accelerated_dtw(x,
                                                          y,
                                                          dist=dist_for_cdist,
                                                          norm_comput=True)

    #print('path', path)
    if not norm_div:
        return norm
    else:
        return float(norm) / float(long)
Ejemplo n.º 2
0
    def test_specific_case(self):
        x = np.array([1.0, 0.9, 1.2, 2.3, 3.8, 3.3, 4.2, 1.9, 0.5, 0.3, 0.3])
        y = np.array([0.5, 1.0, 0.9, 1.2, 2.3, 3.8, 3.3, 4.2, 1.9, 0.5, 0.3])

        euclidean = lambda x, y: np.abs((x - y))

        d1, _, _, _ = accelerated_dtw(x, y, 'euclidean')
        d2, _, _, _ = accelerated_dtw(x, y, dist=euclidean)
        d3, _, _, _ = dtw(x, y, dist=euclidean)

        self.assertAlmostEqual(d1, 0.022727272727272728)
        self.assertAlmostEqual(d2, 0.022727272727272728)
        self.assertAlmostEqual(d3, 0.022727272727272728)
Ejemplo n.º 3
0
    def dyn_timewarp(self, dyads, unit, dtw_vals=None, sma_wind=None):
        '''
        compute dynamic time warping distance
        '''
        if dyads is None or len(dyads) == 0:
            dyad_str = np.array(
                [dyad[0] + '-' + dyad[1] for dyad in self.complete_dyads])
            dtws = pd.DataFrame({'Dyad': dyad_str, 'dtw': dtw_vals})
            return (dtws)

        dfi_score, dfp_score = self.extract_scores(dyads[0], unit)

        if sma_wind is not None:
            dfi_score = self.sma(dfi_score, sma_wind)
            dfp_score = self.sma(dfp_score, sma_wind)
            pass

        dtw = accelerated_dtw(dfi_score, dfp_score, dist='euclidean')

        dtw_val = dtw[0] / len(dfi_score)  # normalize distance

        if dtw_vals is None:
            dtw_vals = [dtw_val]
            pass
        else:
            dtw_vals.append(dtw_val)
            pass

        dtws = self.dyn_timewarp(dyads[1:], unit, dtw_vals, sma_wind=sma_wind)
        return (dtws)
Ejemplo n.º 4
0
    def predict(self, data: list):
        """
        Assingn new series based on precalculated centroid.

        Parameters
        -----------------------
        data : a list of pandas Series

        Returns
        -----------------------
        assignments: a dictionary {cluster: index_series}
        """

        assignments_new = {}

        for e in tqdm(range(len(self.cluster_centers_))):
            assignments_new.update({e: []})
        for ind, i in enumerate(data):
            dist = []
            for _, j in enumerate(self.cluster_centers_):
                fastDTW, _, _, _ = accelerated_dtw(array(i),
                                                   array(j),
                                                   dist=self.criterion,
                                                   warp=self.w)
                dist.append(fastDTW)
            clust = dist.index(min(dist))
            assignments_new[clust].append(ind)
        return assignments_new
Ejemplo n.º 5
0
def plot_longterm_dtw():
    X_train, X_test, Y_train, Y_test, pid_test = read_data_physionet_4()
    print(X_train.shape, Y_train.shape)
    np.random.seed(0)

    for warp in [1,2,3]:

        for _ in tqdm(range(50)):
            idx1 = np.random.choice(list(range(X_train.shape[0])))
            idx2 = np.random.choice(list(range(X_train.shape[0])))
            seg1 = np.squeeze(X_train[idx1])
            seg2 = np.squeeze(X_train[idx2])
            seg1 = seg1 - np.min(seg1) + 3
            seg2 = seg2 - np.max(seg2) - 3

            start = timeit.default_timer()
            d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(seg1, seg2, dist='euclidean', warp=warp)
            end = timeit.default_timer()
            print ("time: {} s".format(end-start))
            plt.figure(figsize=(20,3))
            plt.plot(seg1)
            plt.plot(seg2)
            for i in range(0, len(path[0]), 50):
                x1 = path[0][i]
                x2 = path[1][i]
                y1 = seg1[x1]
                y2 = seg2[x2]
                plt.plot([x1, x2], [y1, y2], c='r')
            plt.savefig('img_dtw_rhythm/{}_{}_{}_{:.4f}.png'.format(warp, idx1, idx2, d))
Ejemplo n.º 6
0
 def dtwDrawPath(ref, tar, sr=8000):
     mfcc1 = DTW.getMFCC(ref, sr, 200, 180)
     mfcc2 = DTW.getMFCC(tar, sr, 200, 180)
     print("Reference shape:", mfcc1.shape)
     print("Target shape:", mfcc2.shape)
     dist, costs, accost, path = accelerated_dtw(
         mfcc1, mfcc2, dist=lambda x, y: norm(x - y, ord=2))
     print("Distance:", dist)
     print("Costs:", costs.shape)
     print("Accumulated costs:", accost.shape)
     print("Matching path:", path)
     plt.rcParams['font.sans-serif'] = ['SimHei']  # 指定默认字体
     plt.rcParams['axes.unicode_minus'] = False
     plt.rcParams['font.size'] = 16
     plt.imshow(accost,
                interpolation='nearest',
                cmap='bone',
                origin='lower')
     plt.plot(path[1], path[0], c='r', label='匹配路径')
     plt.ylabel("匹配模板")
     plt.xlabel("待匹配目标")
     plt.colorbar()
     plt.title("累计误差矩阵匹配路径")
     plt.legend()
     plt.show()
Ejemplo n.º 7
0
def plot_shortterm_dtw():

    wrap = 1
    # average wave dtw
    X_train, X_test, Y_train, Y_test, pid_test = read_data_physionet_4()
    print(X_train.shape, Y_train.shape)
    np.random.seed(0)

    for _ in tqdm(range(300)):
        idx1 = np.random.choice(list(range(X_train.shape[0])))
        idx2 = np.random.choice(list(range(X_train.shape[0])))
        seg1 = np.squeeze(X_train[idx1])
        seg2 = np.squeeze(X_train[idx2])
        beat1 = get_avg_beat(seg1)
        beat2 = get_avg_beat(seg2)
        beat1_dist = beat1 - np.mean(beat1)
        beat2_dist = beat2 - np.mean(beat2)
        beat1_plot = beat1 - np.min(beat1) + 3
        beat2_plot = beat2 - np.max(beat2) - 3
        
        start = timeit.default_timer()
        d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(beat1_dist, beat2_dist, dist='euclidean', warp=wrap)
        end = timeit.default_timer()
        print ("time: {} s".format(end-start))
        plt.figure(figsize=(5,3))
        plt.plot(beat1_plot)
        plt.plot(beat2_plot)
        for i in range(0, len(path[0]), 10):
            x1 = path[0][i]
            x2 = path[1][i]
            y1 = beat1_plot[x1]
            y2 = beat2_plot[x2]
            plt.plot([x1, x2], [y1, y2], c='r')
        plt.savefig('img_dtw_beat/{:.4f}_{}_{}.png'.format(d, idx1, idx2))
def have_all_dtwx(x, y, dist_for_cdist):
    """

    :param x:
    :param y: the two array must have the same number of columns (but the nb of lines can be different)
    :param dist_for_cdist: :param dist_for_cdist: distance used by cdist, can be 'braycurtis', 'canberra',
    'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming',
    'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto',
    'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'.
    if 'kl', then instead of dtw with a distance, we use kl divergence
    :return: dtw distance between x and y
    """

    if dist_for_cdist == "kl":
        return dtw.accelerated_dtw(x, y, dist=dtw.kl_divergence)
    else:
        return dtw.accelerated_dtw(x, y, dist=dist_for_cdist)
Ejemplo n.º 9
0
def eval_pair(pair):
    """ある被験者の動画aと動画bの類似度を計算する."""
    person, a, b = pair
    hdfpath = DATAPATH + "/01" + str(person + 1).zfill(2) + ".hdf"
    with h5py.File(hdfpath, 'r') as read_file:
        x = read_file[make_ev(a)].value
        y = read_file[make_ev(b)].value
        dist, cost, acc, path = accelerated_dtw(x, y, 'euclidean')
        return dist
def compute_ABX_results(file_abx, folder_files, dist_for_cdist):
    """
    Compute results of ABX test on a set for a set of triplets
    :param file_abx: text file with list of filename A B X in order
    :param folder_files: path to feature files .csv to use
    :param dist_for_cdist: distance used by cdist, can be 'braycurtis', 'canberra',
    'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming',
    'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto',
    'russellrao', 'seuclidean', 'sokalmichener', 'sokalsneath', 'sqeuclidean', 'wminkowski', 'yule'.
    if 'kl', then instead of dtw with a distance, we use kl divergence
    :return: results list of [A, B, X, truueX, result] and distances list of [A, B, X, trueX, AX, BX]
    """
    list_trip_OTH_TGT_X_trueX = extract_list_files_compare(file_abx)
    results = []  # A, B, X then result between A and B
    distances = []  # A, B, X then AX and BX
    for trip in list_trip_OTH_TGT_X_trueX:
        triplet = list_trip_OTH_TGT_X_trueX[trip]
        OTH = np.loadtxt(folder_files + '/' + triplet[0] + '.csv',
                         delimiter=',')
        TGT = np.loadtxt(folder_files + '/' + triplet[1] + '.csv',
                         delimiter=',')
        X = np.loadtxt(folder_files + '/' + triplet[2] + '.csv', delimiter=',')
        #print(triplet[0], triplet[1], triplet[2])
        #print(A.shape, B.shape, X.shape)
        if OTH.shape[1] != X.shape[1] or TGT.shape[1] != X.shape[1]:
            #print("in")
            OTH = np.swapaxes(OTH, 0, 1)
            TGT = np.swapaxes(TGT, 0, 1)
            X = np.swapaxes(X, 0, 1)
        else:
            pass
            #print("out")
        if dist_for_cdist == "kl":
            OTHX = dtw.accelerated_dtw(OTH, X, dist=dtw.kl_divergence)[0]
            TGTX = dtw.accelerated_dtw(TGT, X, dist=dtw.kl_divergence)[0]
        else:
            OTHX = dtw.accelerated_dtw(OTH, X, dist=dist_for_cdist)[0]
            TGTX = dtw.accelerated_dtw(TGT, X, dist=dist_for_cdist)[0]
        result = 'OTH' if OTHX < TGTX else 'TGT'
        results.append(triplet + [result])
        distances.append(triplet + [OTHX, TGTX])
        list_trip_OTH_TGT_X_trueX[trip].append(OTHX)
        list_trip_OTH_TGT_X_trueX[trip].append(TGTX)
    return results, distances, list_trip_OTH_TGT_X_trueX
Ejemplo n.º 11
0
 def multiFrameDTW(self, target):
     mfcc = DTW.getMFCC(target, self.sr)
     proba = np.zeros(10)
     for i, num in enumerate(DTW.refs):  # 按数字分类
         for ref in num:  # 每个数字有几个标准模板,标准模板存在一定数量的帧
             dist, _, _, _ = accelerated_dtw(
                 ref, mfcc, dist=lambda x, y: norm(x - y, ord=2))
             proba[i] += dist
     print("Number proba: ", proba)
     return np.argmin(proba)
Ejemplo n.º 12
0
def DynTimeWarp(col1, col2, TITLE):
    d1 = series[col1].interpolate().values #I didn't need to interpolate though, becuas eI don't have any missing values
    d2 = series[col2].interpolate().values
    d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(d1,d2, dist='euclidean')
    plt.imshow(acc_cost_matrix.T, origin='lower', interpolation='nearest')
    plt.plot(path[0], path[1], 'w')
    plt.xlabel(col1)
    plt.ylabel(col2)
    plt.title(f'DTW Minimum Path with minimum distance: {np.round(d,2)}')
    plt.savefig(TITLE)
Ejemplo n.º 13
0
def similar_in_two_group(data1, data2):
    res = 0
    count = 0
    for i in data1.keys():
        for j in data2.keys():
            # simi = pearsonr(data[i], data[j])[0]
            simi = np.linalg.norm(data1[i] - data2[j])
            simi, _, _, _ = dt.accelerated_dtw(data1[i], data2[j], 'euclidean')
            res += simi
            count += 1
    res /= count
    return res
Ejemplo n.º 14
0
    def test_fast_vs_normal_1D(self):
        x = np.random.rand(np.random.randint(2, 100))
        y = np.random.rand(np.random.randint(2, 100))

        d1, c1, acc1, p1 = dtw(x, y, dist=lambda x, y: np.abs((x - y)))
        d2, c2, acc2, p2 = accelerated_dtw(x, y, 'euclidean')

        self.assertAlmostEqual(d1, d2)
        self.assertAlmostEqual((c1 - c2).sum(), 0)
        self.assertAlmostEqual((acc1 - acc2).sum(), 0)
        self.assertTrue((p1[0] == p2[0]).all())
        self.assertTrue((p1[1] == p2[1]).all())
Ejemplo n.º 15
0
 def _generalized_inertia(self, centroids, labels, data):
     inertia = 0
     for e, centroid in enumerate(centroids):
         members = labels[e]
         for member_index in members:
             i = centroid
             j = data[member_index]
             fastDTW, _, _, _ = accelerated_dtw(i.values,
                                                j.values,
                                                dist=self.criterion,
                                                warp=self.w)
             inertia += fastDTW**2
     return inertia
Ejemplo n.º 16
0
def get_dtw_distance(feature_list1, feature_list2, a_label, b_label):
    d, cost_matrix, acc_cost_matrix, path = dtw.accelerated_dtw(feature_list1,
                                                                feature_list2,
                                                                dist='cosine',
                                                                warp=1)

    # plt.imshow(cost_matrix.T, origin='lower', cmap='gray', interpolation='nearest')
    # plt.plot(path[0], path[1], 'w')
    # plt.xlabel('label: ' + str(a_label))
    # plt.ylabel('label: ' + str(b_label))
    # plt.title(str(a_label[0] == b_label[0]) + ' dist:' + str(d))
    # print(a_label, b_label)
    # plt.show()
    return d
    def test_dis(self):
        array_query = np.asarray(self.query)
        closest_nodes = list()
        dist = float('inf')
        bsf = float('-inf')

        for i in range(0, len(self.exemplars)):
            exemplar = np.asarray(self.exemplars[i])

            if (exemplar == array_query).all():
                return i
            dist = dtw.accelerated_dtw(array_query, exemplar, 'euclidean')
            for d in dist:
                print("Imprimo:", d)
            print("--------CAMBIO-------")
Ejemplo n.º 18
0
def similar_in_one_group(data):
    res = 0
    k = list(data.keys())
    count = 0
    for i in range(len(k)):
        j = 0
        while j < i:
            # simi = pearsonr(data[dt[i]], data[dt[j]])[0]
            #simi = np.linalg.norm(data[dt[i]] - data[dt[j]])
            simi, _, _, _ = dt.accelerated_dtw(data[k[i]], data[k[j]],
                                               'euclidean')
            res += simi
            count += 1
            j += 1
    res /= count
    return res
Ejemplo n.º 19
0
def dtw():
    from dtw import dtw, accelerated_dtw

    new_cases = new_cases_timeseries_table()
    total_cases_df = total_cases()

    washington_workplace_mobility = pd.read_csv(
        'data/washington_counties/workplaces.csv')
    dates = list(washington_workplace_mobility.columns[3:])

    plot_county_names = []
    plot_corr = []
    p_vals = []
    for state, county in zip(washington_workplace_mobility.state.values,
                             washington_workplace_mobility.county.values):

        covid_county_timeseries = total_cases_df[
            (total_cases_df['state'] == state)
            & (total_cases_df['county'] == county)]
        workplace_mobility = washington_workplace_mobility[
            (washington_workplace_mobility['state'] == state)
            & (washington_workplace_mobility['county'] == county)]

        print(state, county)

        this_county_covid_total_cases = covid_county_timeseries[
            dates].interpolate().values[0]
        this_county_workplace_mobility = workplace_mobility.interpolate(
        ).values[0][3:]

        if county == 'King':
            d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(
                this_county_workplace_mobility,
                this_county_covid_total_cases,
                dist='euclidean')

            plt.imshow(acc_cost_matrix.T,
                       origin='lower',
                       cmap='gray',
                       interpolation='nearest')
            plt.plot(path[0], path[1], 'w')
            plt.xlabel('Subject1')
            plt.ylabel('Subject2')
            plt.title(
                f'DTW Minimum Path with minimum distance: {np.round(d,2)}')
            plt.show()
Ejemplo n.º 20
0
def dynamic_time_warping(df, feature1, feature2):
    """Compute and plot dynamic time warping of feature1 and feature2, 
    e.g. instant_phase_sync(df, "Inflation", "Wage") compute and plot the dynamic_time_wraping between the "Inflation" and the "Wage" columns

    :param: df, pandas.DataFrame, data contains different features (columns)
    :param: feature1, str, name of the column, e.g. "Inflation"
    :param: feature2, str, name of another column e.g. "Wage"
    """
    d1 = df[feature1].interpolate().values
    d2 = df[feature2].interpolate().values
    d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(d1,d2, dist='euclidean')

    plt.imshow(acc_cost_matrix.T, origin='lower', cmap='gray', interpolation='nearest')
    plt.plot(path[0], path[1], 'w')
    plt.xlabel(feature1)
    plt.ylabel(feature2)
    plt.title(f'DTW Minimum Path with minimum distance: {np.round(d,2)}')
    plt.show()
def average_asynchrony(list_ecg, ecg_comparaison, plot=False):
    """
    Parameters :
        - list_ecg : (4096,12) corresponding
        - ecg_comparaison : ECG to compare considering asynchrony

    If plot :
        plots meaningful curves about asynchrony

    Returns: array (2,)
        0 : average all over the 12 ECGs of the l1 difference of indexes obtained with dtw
        1 : average all over the 12 ECGs of the l2 difference of indexes obtained with dtw
    """

    d2 = ecg_comparaison.reshape(-1, 1)
    res_l1 = 0
    res_l2 = 0
    for k in range(12):
        d1 = list_ecg[k].reshape(-1, 1)
        d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(
            d1, d2, dist='euclidean')
        res_l1 += np.sum((path[1] - path[0]) / 400)
        res_l2 += np.sum((path[1] - path[0]) * (path[1] - path[0]) / 16000)
        if plot:
            figure = plt.figure(figsize=(10, 10))
            ax1 = figure.add_subplot(2, 1, 1)
            ax1.imshow(acc_cost_matrix.T,
                       origin='lower',
                       cmap='gray',
                       interpolation='nearest')
            ax1.plot(path[0], path[1], 'w')
            ax1.set_xlabel('ECG 0')
            ax1.set_ylabel('ECG 1')
            ax1.set_title(
                f'DTW Minimum Path with minimum distance: {np.round(d, 2)}')

            ax2 = figure.add_subplot(2, 1, 2)
            ax2.plot(list_ecg[0], label="ecg0")
            ax2.plot(list_ecg[1], label="ecg1")
            ax2.set_title("Comparaison des ECG")
            ax2.legend()

            plt.show()
    return np.array([res_l1 / 12, res_l2 / 12])
Ejemplo n.º 22
0
 def __call__(self, u, v, k=None):
     if (u, v) in self.dist_dic:
         return self.dist_dic[u, v]
     
     k = self.k if k is None else k
     seq_u = self.sorted_degree_matrix_list[k][u].data
     seq_v = self.sorted_degree_matrix_list[k][v].data
     result = accelerated_dtw(seq_u, seq_v, 'euclidean')
     dist = result[0]
     assert dist >= 0, 'dist: %.4f, %s_%s' % (dist, str(seq_u), str(seq_v))
     if k == 0:
         return dist
     elif k == self.k:
         r = dist + self.__call__(u, v, k-1)
         self.dist_dic[u, v] = r
         self.dist_dic[v, u] = r
         return r
     else:
         return dist + self.__call__(u, v, k-1)
Ejemplo n.º 23
0
    def _kmeans_iteration(self, data, centroids):
        """A single iteration of k-means lloyd.
    
        Parameters
        ----------
        data : a list of pandas Series

        centroids : the current centroids as list of pandas Series, as many as self.num_clust

        Returns
        -----------------------
        assignements : the current samples assignements as dictionary in the form { e : [index] } 
                       where e is the centroid number and the indexes in the list are the indexes 
                       of the data elements in the relevent centroid 
                                
        """
        # compute assignements
        assignments = {e: [] for e in range(self.num_clust)}
        for ind, i in enumerate(data):
            min_dist = float('inf')
            closest_clust = None
            for c_ind, j in enumerate(centroids):
                fastDTW, _, _, _ = accelerated_dtw(array(i),
                                                   array(j),
                                                   dist=self.criterion,
                                                   warp=self.w)
                if fastDTW <= min_dist:
                    min_dist = fastDTW
                    closest_clust = c_ind
            if closest_clust in assignments:
                assignments[closest_clust].append(ind)
        # update centroids
        new_centroids = centroids.copy()
        for key in assignments:
            clust_sum = 0
            for k in assignments[key]:
                clust_sum = clust_sum + data[k]
            if len(assignments[key]) > 0:
                new_centroids[key] = clust_sum / len(assignments[key])

        return assignments, new_centroids
Ejemplo n.º 24
0
def run_dtw(ori_val, gen_val, metric):
    fontsize = 22
    matplotlib.rcParams.update({'font.size': fontsize})
    fig, axs = plt.subplots(2, 3, figsize=(27, 18))
    fig.suptitle('Dynamic Time Warping for ' + metric)
    min_paths = []

    for i in range(len(gen_val)):
        y = math.floor(i / 3)
        x = i % 3

        d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(
            ori_val, gen_val[i], dist='euclidean')
        min_paths.append(d)
        axs[y, x].imshow(acc_cost_matrix.T,
                         origin='lower',
                         cmap='gray',
                         interpolation='nearest')
        axs[y, x].plot(path[0], path[1], 'w')
        #  axs[y,x].set_title(f'dtw min path distance: {np.round(d,2)}', fontsize=fontsize)
        axs[y, x].set(xlabel='generated data run 1',
                      ylabel='original data run 1')

    for ax in axs.flat:
        ax.set(xlabel='generated data', ylabel='original data')

    for ax in axs.flat:
        ax.label_outer()

    plt.tight_layout()
    plt.savefig('dtw_lstm.pdf')
    plt.show()

    min_pathss = np.array(min_paths)
    min_paths_max = min_pathss.max()
    min_paths_max
    min_pathss = min_pathss / min_paths_max
    return min_pathss
Ejemplo n.º 25
0
def compute_batch(i_batch):
    start_idx = i_batch*batch_size
    end_idx = start_idx + batch_size
    if end_idx > n_sample:
        end_idx = n_sample

    batch_beat = all_beat[start_idx:end_idx]
    print(batch_beat.shape)
    local_batch_beat = batch_beat.shape[0]
    mat = np.zeros((local_batch_beat, local_batch_beat))

    for i in tqdm(range(0, local_batch_beat-1)):
        for j in range(i+1, local_batch_beat):
        
            beat1 = batch_beat[i]
            beat2 = batch_beat[j]
            beat1_dist = beat1 - np.mean(beat1)
            beat2_dist = beat2 - np.mean(beat2)
            
            d, cost_matrix, acc_cost_matrix, path = accelerated_dtw(beat1_dist, beat2_dist, dist='euclidean', warp=wrap)
            mat[i,j] = d

    np.save('sim_mat/{}.npy'.format(i_batch), mat)
Ejemplo n.º 26
0
 def time_distance(signature, runtime):
     """
         This method allow to compute similarity between 2 signals using
         dinamic time warping.
         Args:
             signature (pandas.DataFrame): signal used as signature for the comparison
             runtime (pandas.DataFrame): signal to compare with the signature
         Returns:
             pandas.DataFrame
     """
     try:
         signature.sort_values(by=["TimeStamp"],
                               inplace=True,
                               ascending=True)
         runtime.sort_values(by=["TimeStamp"], inplace=True, ascending=True)
         result = runtime.copy().tail(1)
         signature = signature["Value"].values
         runtime = runtime["Value"].values
         result_dtw = accelerated_dtw(signature, runtime, dist=euclidean)
         kpi = max(0, 1 - result_dtw[0])
         result["Value"] = kpi
         return result
     except:
         raise
Ejemplo n.º 27
0
        # if no start time means we don't use matched words from previous sentence for estimation

        if aw_within == []:
            guessing_words_fuc(sw['sentence_words'], sw['start_time'],
                               sw['end_time'], estimated_words,
                               output_table_as_ls, 'full')
            leftovers = {"words": []}
        else:

            # dtw
            # acm = accumulated cost matrix, use path here
            print(np.array(sw['sentence_words_stemmed']))
            print(np.array(aw_within_stemmed_words))
            distance, cost, acm, path = accelerated_dtw(
                np.array(sw['sentence_words_stemmed']),
                np.array(aw_within_stemmed_words),
                edit_distance,
                warp=1)
            #print(path)

            # at the moment it's not good at dealing with situations where one amazon word is aligned with multiple
            # subtitle words (i.e. amazon get fewer words than the subtitle), so e.g. if "this, this" in subtitle is
            # aligned with the same "this" in the transcript, the second wouldn't get a timing because it would be repeated
            # can count how many instances like these there are first ...
            path_reo = reorganize_path(path)
            for i in range(0, len(path_reo)):
                # container of words that don't have a time tag
                amazon_word_indexes = path_reo[i][1]
                sentence_word_indexes = path_reo[i][0]
                #print(aw_stemmed_words_aligned)
                levenshtein_similarity = [
Ejemplo n.º 28
0
        df_match.loc[list_match_date4[i].isoformat()]['CLOSE'].to_numpy()[-1] -
        list_match_moves[i][-1])

# %% for문 돌며 dtw 계산
#list_dtw = []

from dtw import accelerated_dtw
from tqdm import tqdm
max_rank = 20
list_price_chg_forecast = []

for np_moves_match in tqdm(list_match_moves):  # match_moves의 각 하루치 일자 당 반복하며
    list_dtw = []
    price_chg_forecast = 0
    for np_moves_data in (list_data_moves):
        d, _, _, _ = accelerated_dtw(stats.zscore(np_moves_match),
                                     stats.zscore(np_moves_data), 'euclidean')
        list_dtw.append(d)
        list_rank = stats.rankdata(list_dtw).tolist()

    sum_chg_mult_weight = 0
    sum_weight = 0
    for rank in range(1, max_rank + 1):
        index = list_rank.index(rank)
        weight = 1 / list_dtw[index]
        #date3 = list_data_date3[index]
        date4 = list_data_date4[index]
        pivot_price = list_data_moves[index][-1]
        close_price = df_data.loc[date4.isoformat()]['CLOSE'].to_numpy()[-1]
        price_chg = close_price - pivot_price
        sum_chg_mult_weight += price_chg * weight
        sum_weight += weight
Ejemplo n.º 29
0
def fastdtw(x, y):
    euclidean = lambda x, y: (x - y)**2
    dist, cost_matrix, acc_cost_matrix, path = accelerated_dtw(x, y, euclidean)
    return dist
Ejemplo n.º 30
0
        ##
        df_last = df_chunks[-1]
        sizeleft = tsize - len(df_last)
        if sizeleft < int(tsize / 2) + 1:

            curatedlist.append(df_last)

    else:
        curatedlist.append(dfraw)

#%% calculated distance scores based on DTW algorithm
print('calculating dtw..')
distMatrix = np.zeros([len(curatedlist), len(curatedlist)])
for i in range(len(curatedlist)):
    for j in range(len(curatedlist)):
        df_1 = curatedlist[i]
        df_2 = curatedlist[j]
        aX = df_1.xumap.values
        bX = df_1.yumap.values
        aY = df_2.xumap.values
        bY = df_2.yumap.values
        X = np.concatenate((aX.reshape(-1, 1), bX.reshape(-1, 1)), axis=1)
        Y = np.concatenate((aY.reshape(-1, 1), bY.reshape(-1, 1)), axis=1)
        d, _, _, _ = accelerated_dtw(X, Y, dist='euclidean')
        distMatrix[i, j] = d
#%% cluster using louvain
print('caculating louvain')
G = nx.from_numpy_matrix(distMatrix)
nx.draw(G)
partition = community.best_partition(G)