Ejemplo n.º 1
0
    def dwt_filter_with_reference(self, beats_dict, ref, threshold,
                                  interp_dim):
        good_beats = []
        mask = {}
        dwt_dist = {}

        keys = list(sorted(beats_dict.keys()))
        for k in keys:
            b = beats_dict[k]
            _b = np.array(b)
            _b = _b - _b.min()
            _b = _b / _b.max()

            x, _b = interpolate_beat(_b, interp_dim)

            dist = dtw(_b, ref)
            dwt_dist[k] = "%.2f" % dist

            if dist < threshold:
                good_beats.append(b)
                mask[k] = True
            else:
                mask[k] = False

        return good_beats, mask, dwt_dist
Ejemplo n.º 2
0
def ts_match(df1, df2, start, length):
    df1 = df1[start:start + length]
    time_start = df1.index[0]
    print("匹配的时间起点是{}".format(time_start))
    # print("相应的数据序列为{}".format(df1))
    window = 48
    match_time_start = None
    for j in df2.index:
        if j > time_start:
            print("下游匹配到的时间起点是{}".format(j))
            match_time_start = df2.index.tolist().index(j)
            print("下游对应的序号为{}".format(match_time_start))
            break
    dtw_score = list()
    match_df2 = None
    # match_df2 = df2[347:347 + length]
    for o in range(window):
        cut_df2 = df2[match_time_start + o:match_time_start + length + o]
        dtw_sim = dtw(df1['氨氮'].values,
                      cut_df2['氨氮'].values,
                      global_constraint="sakoe_chiba",
                      sakoe_chiba_radius=3)
        # dtw_sim = dtw(TimeSeriesScalerMeanVariance().fit_transform(df1['氨氮'].values),
        #               TimeSeriesScalerMeanVariance().fit_transform(cut_df2['氨氮'].values),
        #               global_constraint="sakoe_chiba", sakoe_chiba_radius=3)
        # mod_dtw, _ = de.TimeSeriesSimilarity(df1.tolist(), cut_df2.tolist())
        dtw_score.append(dtw_sim)
        if dtw_score.index(max(dtw_score)) == o:
            print("最大的dtw得分偏移了{}".format(o))
            print("相应的开始时间点为{}".format(cut_df2.index[0]))
            match_df2 = cut_df2
    print("滑动窗口的得分列表为{}".format(dtw_score))
    return match_df2, dtw_score.index(max(dtw_score))
 def classify_DTW(self):
     correct_count = 0
     for i in range(self.mytestseries.shape[0]):
         for j in range(self.class_distance.shape[1]):
             #align=mypair.DTW(self.mytestseries[i,:],self.myclassavgs[j,:])
             #align.calcglobalcost_UDTW()
             #align.findwarppath()
             self.class_distance[0, j] = metrics.dtw(
                 self.mytestseries[i, :], self.myclassavgs[j, :]
             )  #math.sqrt(sum(math.square(align.Warpedfv-align.Warpedsv)))
         classified_as = list(self.class_distance[0]).index(
             min(self.class_distance[0]))
         print('distance from each classes:', self.class_distance)
         if classified_as == self.zero_indexed_traget_class[i]:
             print('Data:', i, ' target:',
                   self.zero_indexed_traget_class[i], ' classified as:',
                   classified_as)
             self.confusion_matrix[classified_as, classified_as] += 1
             correct_count = correct_count + 1
         else:
             print('Data:', i, ' target:',
                   self.zero_indexed_traget_class[i], ' classified as:',
                   classified_as)
             self.confusion_matrix[self.zero_indexed_traget_class[i],
                                   classified_as] += 1
     return (correct_count / self.mytestseries.shape[0]) * 100
Ejemplo n.º 4
0
    def _get_dtw_score(
        self,
        adata1,
        adata2,
        genenamelist,
    ):
        dist_list = []
        expression1 = adata1[:, genenamelist].X
        expression2 = adata2[:, genenamelist].X

        if not isinstance(expression1, np.ndarray):
            expression1 = expression1.toarray()
        if not isinstance(expression2, np.ndarray):
            expression2 = expression2.toarray()

        mmscaler = MinMaxScaler()
        expression1 = mmscaler.fit_transform(expression1)
        expression2 = mmscaler.fit_transform(expression2)

        for i in range(len(genenamelist)):
            dist = dtw(
                expression1[:, i], expression2[:, i])
            dist_list.append(dist)

        return dist_list
Ejemplo n.º 5
0
def dtw_image(imarray):
    r, c, b = tuple(imarray.shape)
    ref1 = [0, 0, 0, 0, 0, 0]
    ref2 = [65535, 65535, 65535, 65535, 65535, 65535]
    #ref2 = imarray[10, 0, :]
    ref3 = imarray[20, 0, :]
    ref4 = imarray[30, 0, :]
    ref5 = imarray[40, 0, :]
    ref6 = imarray[50, 0, :]
    ref7 = imarray[60, 0, :]
    ref8 = imarray[40, 0, :]
    ref9 = imarray[40, 0, :]
    dtw_image = np.zeros([r, c], dtype=float)
    for i in range(r):
        for j in range(c):
            distance1 = ts.dtw(ref1, imarray[i, j, :])
            #distance2 = ts.dtw(ref2, imarray[i, j, :])
            #distance3 = ts.dtw(ref3, imarray[i, j, :])
            #distance4 = ts.dtw(ref4, imarray[i, j, :])
            #distance5 = ts.dtw(ref5, imarray[i, j, :])
            #distance6 = ts.dtw(ref6, imarray[i, j, :])
            #distance7 = ts.dtw(ref7, imarray[i, j, :])
            #distance8 = ts.dtw(ref8, imarray[i, j, :])
            #distance9 = ts.dtw(ref9, imarray[i, j, :])
            #distance=min(distance1,distance2,distance3,distance4,distance5)
            dtw_image[i, j] = distance1
    #dtw_image=(dtw_image/dtw_image.max())*255.0
    return dtw_image
Ejemplo n.º 6
0
def calculate_dict(mfcc_values, rolloff_values, names, labels):
    final_dict = dict()
    for i in names:
        final_dict[i] = []
    for id1, (mf1, ro1, nm1,
              lb1) in enumerate(zip(mfcc_values, rolloff_values, names,
                                    labels)):
        for id2, (mf2, ro2, nm2, lb2) in enumerate(
                zip(mfcc_values, rolloff_values, names, labels)):
            if id1 < id2:
                current_dtw = dtw(mf1, mf2)
                # current_dtw = dtw(mf1 + ro1, mf2 + ro2)
                final_dict[nm1].append({
                    "name": nm2,
                    "label": lb2,
                    "distance": current_dtw
                })
                final_dict[nm2].append({
                    "name": nm1,
                    "label": lb1,
                    "distance": current_dtw
                })
    for final_key, final_item in final_dict.items():
        final_dict[final_key] = sorted(final_item, key=itemgetter('distance'))
        # print(key, len(final_dict[key]))
    return final_dict
Ejemplo n.º 7
0
def create_clusters(x, stratify, **kwargs):
    # Find indices for each PID in dataset
    pid_indices = find_indices(stratify)
    # Create DTW Average
    pid_averages, pid_labels = ts_average(x, pid_indices)
    # Create distance matrix with DTW on averages
    dtw_matrix = np.zeros((len(pid_averages), len(pid_averages)))
    for i in range(len(pid_averages)):
        for j in range(len(pid_averages)):
            dtw_matrix[i, j] = dtw(pid_averages[i], pid_averages[j])
    # Cluster using DBSCAN
    clusters = DBSCAN(**kwargs).fit(dtw_matrix)
    # Create dictionary of clusters and pid labels
    label_dict = {}
    for i, j in zip(clusters.labels_, pid_labels):
        label_dict[j] = i
    # Pass back indices for the original data based on clusters
    temp_labels = np.array([i.split('_')[0] for i in stratify])

    final_dict = {}
    for i in label_dict:
        if label_dict[i] not in final_dict:
            final_dict[label_dict[i]] = np.where(temp_labels == i)[0]
        else:
            final_dict[label_dict[i]] = np.concatenate(
                (final_dict[label_dict[i]], np.where(temp_labels == i)[0]))

    return final_dict
def dtw_similarity(template_signal,acc_signal_windows):
    
    dist_similarity = []
    for i, v in enumerate(acc_signal_windows):
                
        dist_similarity.append([dtw(w, template_signal[i]) for w in v])

    return dist_similarity
    def eval_model(self,
                   net,
                   loader,
                   batch_size,
                   gamma,
                   verbose=1,
                   target_mean=0,
                   target_std=0):
        criterion = torch.nn.MSELoss()
        losses_mse = []
        losses_dtw = []
        losses_tdi = []

        for i, data in enumerate(loader, 0):
            loss_mse, loss_dtw, loss_tdi = torch.tensor(0), torch.tensor(
                0), torch.tensor(0)
            # get the inputs
            inputs, target = data

            # inputs, target, breakpoints = data

            inputs = torch.tensor(inputs, dtype=torch.float32).to(self.device)
            target = torch.tensor(target, dtype=torch.float32).to(self.device)
            # batch_size, N_output = target.shape[0:2]
            outputs = net(inputs)

            # MSE
            loss_mse = criterion(target, outputs)
            loss_dtw, loss_tdi = 0, 0
            # DTW and TDI
            for k in range(batch_size):
                target_k_cpu = target[k, :,
                                      0:1].view(-1).detach().cpu().numpy()
                output_k_cpu = outputs[k, :,
                                       0:1].view(-1).detach().cpu().numpy()

                loss_dtw += dtw(target_k_cpu, output_k_cpu)
                path, sim = dtw_path(target_k_cpu, output_k_cpu)

                Dist = 0
                for i, j in path:
                    Dist += (i - j) * (i - j)
                loss_tdi += Dist / (self.N_output * self.N_output)

            loss_dtw = loss_dtw / batch_size
            loss_tdi = loss_tdi / batch_size

            # print statistics
            losses_mse.append(loss_mse.item())
            losses_dtw.append(loss_dtw)
            losses_tdi.append(loss_tdi)
            ## TODO plotting eval

        print(' Eval mse= ',
              np.array(losses_mse).mean(), ' dtw= ',
              np.array(losses_dtw).mean(), ' tdi= ',
              np.array(losses_tdi).mean())
Ejemplo n.º 10
0
def evaluate_iteration(model, criterion, X_test_left, X_test_right, y_test):
    model.eval()

    x_test_left = np.transpose(X_test_left, [1, 0, 2])
    x_test_right = np.transpose(X_test_right, [1, 0, 2])
    y_test = np.transpose(y_test, [1, 0, 2])

    x_test_left_tensor = numpy_to_tvar(x_test_left)
    x_test_right_tensor = numpy_to_tvar(x_test_right)

    y_test_tensor = numpy_to_tvar(y_test)

    output, atten = model(x_test_left_tensor,
                          x_test_right_tensor, y_test_tensor, 0)

    loss = criterion(output, y_test_tensor)
    loss_mse, loss_dtw, loss_tdi = 0, 0, 0
    loss_mae, loss_RMSLE, loss_RMSE = 0, 0, 0

    for k in range(BATCH_SIZE):
        target_k_cpu = y_test_tensor[:, k, 0:1].view(-1).detach().cpu().numpy()
        output_k_cpu = output[:, k, 0:1].view(-1).detach().cpu().numpy()

        loss_dtw += dtw(target_k_cpu, output_k_cpu)
        path, sim = dtw_path(target_k_cpu, output_k_cpu)

        Dist = 0
        for i, j in path:
            Dist += (i-j)*(i-j)
        loss_tdi += Dist / (N_output*N_output)

        loss_mae += mean_absolute_error(target_k_cpu, output_k_cpu)
        loss_RMSLE += np.sqrt(mean_squared_error(target_k_cpu, output_k_cpu))
        loss_RMSE += np.sqrt(mean_squared_error(target_k_cpu, output_k_cpu))

    loss_dtw = loss_dtw / BATCH_SIZE
    loss_tdi = loss_tdi / BATCH_SIZE
    loss_mae = loss_mae / BATCH_SIZE
    loss_RMSLE = loss_RMSLE / BATCH_SIZE
    loss_RMSE = loss_RMSE / BATCH_SIZE

    # # metric
    # output_numpy = output.cpu().data.numpy()
    # y_test_numpy = y_test_tensor.cpu().data.numpy()

    # loss_mae = mean_absolute_error(y_test_numpy,output_numpy)
    # loss_RMSLE = np.sqrt(mean_squared_error(y_test_numpy,output_numpy))
    # loss_RMSE = np.sqrt(mean_squared_error(y_test_numpy,output_numpy))

    # test_loss_meter.add(loss.item())

    # plot_result(output, y_test_tensor)
    # show_attention(x_test_left_tensor, x_test_right_tensor,output,atten)
    # plt.show()

    return loss.item(), loss_mae, loss_RMSLE, loss_RMSE, loss_dtw
Ejemplo n.º 11
0
def true_dilate(target, pred, alpha):  # target, pred [seq_length]
    N_output = target.shape[0]
    loss_dtw = dtw(target, pred)
    path, sim = dtw_path(target, pred)
    Dist = 0
    for ii, jj in path:
        Dist += (ii - jj) * (ii - jj)
    loss_tdi = Dist / (N_output * N_output)
    loss_dilate = alpha * loss_dtw + (1 - alpha) * loss_tdi
    return loss_dtw, loss_tdi, loss_dilate
Ejemplo n.º 12
0
def search_top_n_similar_ts(ts_query=None, data=None, n=10, use_lb_kim=False):
    """For the query ts, search the top-n similar ts in data object, return
       the searching result.
    """
    start = time()
    min_heap, time_spend = [], 0

    for ind, ts_candidate in enumerate(data):
        # Initializing minimum heap(n + 1 for excluding itself)
        if len(min_heap) < n + 1:
            dtw_dist = -dtw(ts_query, ts_candidate)
            hq.heappush(min_heap, [dtw_dist, ind])
            continue

        # STEP 1: lb_kim_hierarchy puring
        # -------------------
        bsf = min_heap[0][0]
        if use_lb_kim:
            lb_kim = -np.sqrt(lb_kim_hierarchy(ts_query, ts_candidate, bsf**2))
            if lb_kim < bsf:
                continue

        # STEP 2: DTW calculation
        # -------------------
        dtw_dist = -dtw(ts_query, ts_candidate)
        if dtw_dist < bsf:
            continue
        else:
            hq.heapreplace(min_heap, [dtw_dist, ind])
    end = time()
    time_spend = end - start

    # Saving results
    top_n_searching_res = sorted(min_heap, key=lambda t: -t[0])
    top_n_searching_res = [[-t[0], t[1]] for t in top_n_searching_res][1:]

    searching_res = {}
    searching_res["top_n_searching_res"] = top_n_searching_res
    searching_res["total_searched_ts"] = len(data)
    searching_res["total_time_spend"] = time_spend
    return searching_res
Ejemplo n.º 13
0
 def quantization_error(self, data):
     """Returns the quantization error computed as the average
     distance between each input sample and its best matching unit."""
     self._check_input_len(data)
     error = 0
     for x in data:
         error += dtw(x,
                      self._weights[self.winner(x)],
                      global_constraint=self.gl_const,
                      sakoe_chiba_radius=self.scr,
                      itakura_max_slope=self.ims)
     return error / len(data)
Ejemplo n.º 14
0
 def nn_dtw(self, tv):
     costs = []
     for nn in self.curr_nn_inds:
         tr_v = self.X_train[nn]
         cost = dtw(tr_v,
                    tv,
                    global_constraint="sakoe_chiba",
                    sakoe_chiba_radius=self.sakoe_chiba_radius)
         costs.append(cost)
     sorted_cost_inds = np.argsort(np.array(costs))
     self.curr_nn_inds = np.asarray(self.curr_nn_inds)[sorted_cost_inds]
     self.curr_nn_inds = self.curr_nn_inds[:self.n_neighbors]
Ejemplo n.º 15
0
 def nn_dtw(self):
     dtw_nbrs_all_query = []
     for te_idx, nbrs in enumerate(self.nbrs_all_query):
         costs = []
         for nn in nbrs:
             tr_v = self.X_train[nn]
             te_v = self.X_test[te_idx]
             cost = dtw(tr_v,
                        te_v,
                        global_constraint="sakoe_chiba",
                        sakoe_chiba_radius=self.sakoe_chiba_radius)
             costs.append(cost)
         sorted_cost_inds = np.argsort(np.array(costs))
         nbrs = np.asarray(nbrs)[sorted_cost_inds]
         nbrs = nbrs[:self.n_neighbors]
         dtw_nbrs_all_query.append(nbrs)
     self.nbrs_all_query = dtw_nbrs_all_query
def dtw_distances(X,
                  Y=None,
                  Y_norm_squared=None,
                  squared=False,
                  X_norm_squared=None):

    X = np.asarray(X, order='c')
    Y = np.asarray(Y, order='c')

    s = X.shape
    sB = Y.shape
    if len(s) != 2:
        raise ValueError("XA must be a 2-dimensional array.")
    if len(sB) != 2:
        raise ValueError("XB must be a 2-dimensional array.")
    if s[1] != sB[1]:
        raise ValueError(
            "XA and XB must have the same number of columns "
            "(i.e. feature dimension.)", s, sB)

    mA = s[0]
    mB = sB[0]

    XA = []
    for itemsx in X:
        XB = []
        for itemx in itemsx:
            XB.append(itemx)
        XA.append(XB)

    YA = []
    for itemsy in Y:
        YB = []
        for itemy in itemsy:
            YB.append(itemy)
        YA.append(YB)

    dm = []
    for i in range(0, mA):
        dm2 = []
        for j in range(0, mB):
            U = XA[i]
            V = YA[j]
            dm2.append(dtw(U, V))
        dm.append(dm2)
    return np.array(dm)
Ejemplo n.º 17
0
 def _activate(self, x):
     """Updates matrix activation_map, in this matrix
        the element i,j is the response of the neuron i,j to x."""
     # s = subtract(x, self._weights)  # x - w
     it = nditer(self._activation_map, flags=['multi_index'])
     while not it.finished:
         # || x - w ||
         activation_cell = 0
         for i in range(self._bands):
             activation_cell += self._w[i] * dtw(
                 x[i],
                 self._weights[i][it.multi_index],
                 global_constraint=self.gl_const,
                 sakoe_chiba_radius=self.scr,
                 itakura_max_slope=self.ims)
             self._activation_map[it.multi_index] = activation_cell
         it.iternext()
 def fit_dtw(self):
     signals = [
         self.gyro_x
     ]  #, self.accel_y, self.accel_z, self.gyro_x, self.gyro_y, self.gyro_z]
     signals_out = [np.zeros((sg.shape[0], sg.shape[0])) for sg in signals]
     for k, sensor in enumerate(signals):
         sensor -= np.mean(sensor, axis=1).reshape(sensor.shape[0], 1)
         sensor /= (np.max(sensor, axis=1) -
                    np.min(sensor, axis=1)).reshape(sensor.shape[0], 1)
         for i, template in enumerate(sensor):
             for j, s in enumerate(sensor):
                 sim = dtw(template, s)
                 # correlation = np.corrcoef(template.reshape(-1), s.reshape(-1))
                 signals_out[k][i, j] = sim
             print(k, i)
     sns.heatmap(pd.DataFrame(signals_out[0]))
     plt.show()
def create_dtw_scores(data_yearly_agg: pd.DataFrame) -> pd.DataFrame:
    # remove our GENT_CONTROL city/state from our city data
    state_city_lookup = get_city_state_lookup(data_yearly_agg,
                                              ['State', 'City'])
    state_city_lookup.remove(('GENT_CONTROL', 'GENT_CONTROL'))

    # dtw df for each city when compared to our GENT_CONTROL average
    dtw_df = pd.DataFrame(columns=['State', 'City', 'dtw_score'])

    # Gent. control df
    control_df = data_yearly_agg.loc[
        data_yearly_agg.State == 'GENT_CONTROL'].loc[data_yearly_agg.City ==
                                                     'GENT_CONTROL']
    control_df.drop(columns=['State', 'City', 'Date'], inplace=True)

    for i in range(0, len(state_city_lookup)):
        # State/City names
        state = state_city_lookup[i][0]
        city = state_city_lookup[i][1]

        # locate each city and drop columns
        single_city_df = data_yearly_agg.loc[
            data_yearly_agg.State == state].loc[data_yearly_agg.City == city]
        single_city_df.drop(columns=['State', 'City', 'Date'], inplace=True)

        # calculate DTW score
        dtw_score = dtw(single_city_df.values, control_df.values)
        # Append city row to dtw df
        dtw_df = dtw_df.append(
            {
                'State': state,
                'City': city,
                'dtw_score': dtw_score
            },
            ignore_index=True)

    # add our GENT_CONTROL back with a 0.0 DTW score
    dtw_df = dtw_df.append(
        {
            'State': 'GENT_CONTROL',
            'City': 'GENT_CONTROL',
            'dtw_score': 0.0
        },
        ignore_index=True)
    return dtw_df
def dtw_clustering(rdir=ROOT_DIR):

    ROOT_DIR = rdir
    filenames = []
    names = []
    for r,d,f in os.walk(ROOT_DIR):
        if d is not None:
            for company in d:
                names.append(company)

        for file in f:
            if file.endswith('.csv'):
                filenames.append(r+'/'+file)

    df_list = [pd.read_csv(filename, delimiter=";", engine='python', encoding='utf-8').sort_index(axis=0, ascending=False).reset_index() for filename in filenames]
    df_list = [df[df['Vol.'] != '-'].reset_index() for df in df_list]
    df_list = [df['Price'] for df in df_list]
    df_list = [(df-df.min())/(df.max()-df.min()) for df in df_list]

    from tslearn.metrics import dtw
    sz = len(filenames)
    distance_matrix = np.zeros((sz,sz))
    for i in range(sz):
        for j in range(sz):
            distance_matrix[i,j] = dtw(df_list[i], df_list[j])

    from sklearn.cluster import AgglomerativeClustering
    num_cluster = 3
    clustering = AgglomerativeClustering(num_cluster, 'precomputed',linkage='complete')
    clustering.fit(distance_matrix)
    
    print(clustering.labels_)
    
    cluster_corp = []
    cluster_files = [] 
    for i in range(num_cluster):
        cluster_corp.append(list(np.array(names)[clustering.labels_==i]))
        cluster_files.append(list(np.array(filenames)[clustering.labels_==i]))

    return cluster_corp, cluster_files #, cluster_file_te
Ejemplo n.º 21
0
def assign_cluster(list_centroids, seq):
    """Returns the label of a given sequence.

    Using the Dynamic Time Warping (DTW) similarity measure, it compares sequence similarities
    between  each centroid and the given sequence. The centroid with the least DTW distance is
    returned as the cluster label.

    Parameters
    ----------
    list_centroids: list of lists (e.g. [[0], [0, 1]] )
        sequences which have been selected as centroids of the dataset
    seq: list (e.g. [0, 1] )
        The given sequence
    """
    dtw_distances = np.zeros(len(list_centroids))
    for i, centroid in enumerate(list_centroids):
        dtw_distances[i] = dtw(centroid, seq)

    ordered_labels_by_dtw_distances = [
        x for _, x in sorted(zip(dtw_distances, list_centroids))
    ]

    return ordered_labels_by_dtw_distances[0]