def find_lowest(self):
        self.__get_data__()
        min_distance = 0
        min_path = None
        min_mid = None
        candidates = get_molecules.get_mid_list(conn)

        i = 0
        for mid in candidates:
            if mid is 144:
                continue

            if mid in [3,4,94,35,34,49, 38, 37,36, 39, 92]:
                continue
            #if mid in [3, 1, 29, 48, 22, 76, 125, 24, 83, 46, 80, 82, 44]:
            #    continue
            #print mid
            frequencies, intensities = get_peaks.get_frequency_intensity_list(conn, mid)#,
                                                                              #max=self.max_frequency,
                                                                              #min=0)
            #frequencies, intensities = get_peaks.get_frequency_intensity_list(conn, mid)
            distance, path = fastdtw(self.efreqlist, frequencies, dist=euclidean)

            if min_path is None:
                min_path = path
                min_distance = distance
                min_mid = mid
            elif distance < min_distance:
                min_distance = distance
                min_path = path
                min_mid = mid
        print min_distance
        print min_mid
        print get_molecules.getName(conn, min_mid)
예제 #2
0
 def nearest(self, rsp):
     test_seq = rsp
     all_dists = [[fastdtw(test_seq, r, dist=euclidean)[0]
                   for r in random.sample(class_seqs, self.K)]
                  for class_seqs in self.tr_seqs]
     min_dists = [min(dists) for dists in all_dists]
     return np.argmin(min_dists)
예제 #3
0
def process(filename):
    '''
    The function derives dtw alignment path given source mag and target mag
    :param filename: path to src mag file
    '''
    file_id = os.path.basename(filename).split(".")[0]
    print(file_id)

    ### DTW alignment -- align source with target parameters ###
    src_mag_file = os.path.join(src_feat_dir, file_id + ".mag")
    tgt_mag_file = os.path.join(tgt_feat_dir, file_id + ".mag")

    src_features, src_frame_number = io_funcs.load_binary_file_frame(src_mag_file, mag_dim)
    tgt_features, tgt_frame_number = io_funcs.load_binary_file_frame(tgt_mag_file, mag_dim)

    ### dtw align src with tgt ###
    distance, dtw_path = fastdtw.fastdtw(src_features, tgt_features)

    ### load dtw path
    dtw_path_dict = load_dtw_path(dtw_path)
    assert len(dtw_path_dict)==tgt_frame_number   # dtw length not matched

    ### align features
    aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".mag") , os.path.join(src_aligned_feat_dir, file_id + ".mag") , mag_dim , dtw_path_dict)
    aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".real"), os.path.join(src_aligned_feat_dir, file_id + ".real"), real_dim, dtw_path_dict)
    aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".imag"), os.path.join(src_aligned_feat_dir, file_id + ".imag"), imag_dim, dtw_path_dict)
    aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".lf0") , os.path.join(src_aligned_feat_dir, file_id + ".lf0") , lf0_dim , dtw_path_dict)
예제 #4
0
파일: main2.py 프로젝트: gohilankit/DBCAD
def isCorePoint(minPts, epsDist, currlong, currlat, minlong, minlat, rangelong, rangelat, timeindex, timewindow):

  # for i in range(num_files):
  #   f = netcdf.netcdf_file('air.2m.gauss.' + str(i+1979) + '.nc', 'r')
  #   print(f.air)
  countwithineps=1    #count self. hence 1

  #currtimeslice = []
  #for iter in range(-1*timewindow/2+1, timewindow/2+1):
    ## [-4, -3, -2, -1, 0, 1, 2, 3, 4]
    #currtimeslice.append(f.variables['air'][timeindex + iter][0][currlat][currlong])
  lowertimeindex=timeindex - timewindow/2
  uppertimeindex=timeindex + timewindow/2 + 1
  currtimeslice = f.variables['air'][lowertimeindex : uppertimeindex, 0, currlat, currlong]
  currindex=(currlat - minlat)*rangelong + (currlong - minlong)
  smallDTWDict={}     #key is index of neighbourlong and neighbourlat
  for iter in range(8):
    neighbourindex=(currlat+neighbourlat[iter] - minlat)*rangelong + (currlong+neighbourlong[iter] - minlong)
    #checking neighbours of current, using neighbourlong and neighbourlat
    #neighbourtimeslice=[]
    #for iter2 in range(-1*timewindow/2+1, timewindow/2+1):
    # neighbourtimeslice.append(f.variables['air'][timeindex + iter2][0][currlat+iter][currlong+iter])
    neighbourtimeslice = f.variables['air'][lowertimeindex : uppertimeindex, 0, currlat+neighbourlat[iter], currlong+neighbourlong[iter]]

    #distance variable available outside if statement.
    if str(neighbourindex)+"_"+str(currindex) in distancedict:        #if previously inserted, from current vertex's perspective: "neighbourindex_currindex"
      distance = distancedict[str(neighbourindex)+"_"+str(currindex)]
    else:
      distance, path = fastdtw(currtimeslice, neighbourtimeslice, dist=euclidean)
      encode=str(currindex)+"_"+str(neighbourindex)         #not previously inserted. so cache it. from current vertex's perspective: "currindex_neighbourindex"
      distancedict[encode]=distance
      del distancedict[encode]    #don't need anymore. neighbour fetched the distance. keep memory util low.
    #print ("iter: ", iter)
    #print ("distance:", distance)
    smallDTWDict[iter]=distance
    if distance < epsDist:
      countwithineps+=1

  if countwithineps >= minPts:
    globalcorepoints.add(currindex)
    if currindex in globalnoisepoints:
      globalnoisepoints.remove(currindex)

    for iter in range(8):
      #Add edge to points which are within epsDist. use smallDTWDict
      neighbourindex=(currlat+neighbourlat[iter] - minlat)*rangelong + (currlong+neighbourlong[iter] - minlong)
      if smallDTWDict[iter] < epsDist:
        #Consider neighbors for spatially bordered point but don't add them to the graph
        if currlat+neighbourlat[iter] >= minlat and currlat+neighbourlat[iter] <minlat+rangelat and currlong+neighbourlong[iter] >=minlong and currlong+neighbourlong[iter]<minlong+rangelong:
          #print(currindex)
          #print(neighbourindex)
          #print(currlat+neighbourlat[iter])
          #print(minlat)
          #print(rangelat)
          #print(currlong+neighbourlong[iter])
          #print(minlong)
          #print(rangelong)
          g.add_edge(currindex, neighbourindex)
          if (neighbourindex) in globalnoisepoints:
            globalnoisepoints.remove(neighbourindex)
예제 #5
0
    def __init__(self, x, y, dist=lambda x, y: numpy.linalg.norm(x - y), radius=1) -> None:
        assert x.ndim == 2 and y.ndim == 2

        _, path = fastdtw.fastdtw(x, y, radius=radius, dist=dist)
        path = numpy.array(path)
        self.normed_path_x = path[:, 0] / len(x)
        self.normed_path_y = path[:, 1] / len(y)
	def search(self,value):
		# type(value) = list
		memo = []
		memo_distance = []

		new_value = [] # spare 5 #
		for tmp in value:
			if tmp == 2 :
				new_value.append(10)
			elif tmp ==6:
				new_value.append(3)
			elif tmp ==1:
				new_value.append(11)
			elif tmp ==3:
				new_value.append(13)
			elif tmp ==7:
				new_value.append(14)
			elif tmp ==9:
				new_value.append(15)
			else :
				new_value.append(tmp)


		for pathStr, pathList in self.pathDictionary.iteritems():
			distance, path = fastdtw(new_value, pathList, dist=euclidean)
			memo.append(pathStr)
			memo_distance.append(distance)
		print ('[*] Min_DTW_Distance : %s'%(str(min(memo_distance))) )
		if min(memo_distance) < 40:
			idx = memo_distance.index(min(memo_distance))
			return memo[idx]
		else:
			return 'None'
예제 #7
0
 def dist(X,Y):
  CX = np.std(X,axis = 0)
  CY = np.std(Y,axis = 0)
  M = len(raio)
  c = 0
  for i in np.arange(M): 
   c = c + fastdtw(X[i],Y[i],radius = radius)[0]/(CX[i] + CY[i] + beta)
  return c
예제 #8
0
def compute_distance(time_series1, time_series2, 
                     distance_measure='sts'):
    if distance_measure == 'sts':
        # Take the difference of the slopes, square and sum
        # The square distance is OK here, we don't need to sqrt it
        distance = np.sum(np.subtract(time_series1.slopes, 
                                      time_series2.slopes)**2)
    elif distance_measure == 'dtw':
        distance, path = fastdtw(time_series1.data, time_series2.data)
    return distance
    def check_max_distance(self, frequencies, list):
        distance, path = fastdtw(self.efreqlist, frequencies, dist=euclidean)

        if self.min_distance is None:
            self.min_distance = distance
            self.min_path = path
            self.combination_list = list
        elif distance < self.min_distance:
            self.min_distance = distance
            self.min_path = path
            self.combination_list = list
예제 #10
0
def compute_dtw_dist(part_list, degree_list, dist_func):
    dtw_dict = {}
    for v1, nbs in part_list:
        lists_v1 = degree_list[v1] # orderd degree list of v1
        for v2 in nbs:
            lists_v2 = degree_list[v2] # orderd degree list of v2
            max_layer = min(len(lists_v1), len(lists_v2))
            dtw_dict[v1, v2] = {}
            for layer in range(0, max_layer):
                dist, path = fastdtw(lists_v1[layer], lists_v2[layer], radius=1, dist=dist_func)
                dtw_dict[v1, v2][layer] = dist
    return dtw_dict
예제 #11
0
 def realtime_dtw(self, mfccs):
     ref, ref_labels = self.load_data(case=False)
     num = int(self.num_samples / 10)
     aver = []
     cost = []
     for j in range(len(ref)):
         distance, path = fastdtw(ref[j], mfccs, dist=euclidean)
         cost.append(distance)
         if (j + 1) % num == 0:
             aver.append(sum(cost) / num)
     pred = np.argmin(aver)
     print(pred, '\n')
예제 #12
0
def align(ref, el, radius=5):

    distance, profile = fastdtw(ref, el, radius=5)

    new_ref = []
    new_el = []

    for p in profile:
        new_ref.append(ref[p[0]])
        new_el.append(el[p[1]])

    return distance, np.array(new_ref), np.array(new_el)
예제 #13
0
def pairwise_fastdtw(X, **kwargs):
    X = [list(enumerate(pattern)) for pattern in X]
    triu = [
        fastdtw(X[i], X[j], **kwargs)[0] if i != j else 0
        for i in range(len(X)) for j in range(i, len(X))
    ]

    matrix = np.zeros([len(X)] * 2)
    matrix[np.triu_indices(len(X))] = triu
    matrix += np.tril(matrix.T, -1)

    return matrix
예제 #14
0
def DTW_Matrix(feature_matrix):
	from fastdtw import fastdtw
	from scipy.spatial.distance import euclidean
	print("Starting Dynamic Time Warping between signals..")
	dtw_matrix = np.zeros(((feature_matrix.shape[0]), (feature_matrix.shape[0])))
	for i in range(feature_matrix.shape[0]):
		for j in range(i, (feature_matrix.shape[0]), 1):
			d, path = fastdtw(feature_matrix[i,:], feature_matrix[j,:], dist = euclidean)
			dtw_matrix[i][j] = d

	print("Successfully developed matrix.")		
	return dtw_matrix
    def StartClassification(self, ArrayToClassify):

        result = 0.0
        shortest_path = ""

        # looping on every stroke in dataset and add in array for classification
        for i in TrainingFilePath:

            file = i.split()
            dataset_wave = []

            f = open(str(file), "r")

            # adding every frame data in arr2 then add all frames in dataset_wave
            for x in f:
                line = x.split()
                arr2 = []
                for l in range(15):
                    arr2.append(float(line[i]))

                    dataset_wave.append(arr2)

                dataset_wavee = np.array(dataset_wave)
                app_wavee = np.array(ArrayToClassify)
                distance = 0.0

                distance, path = fastdtw(dataset_wavee,
                                         app_wavee,
                                         dist=euclidean)

                if (result == 0.0):

                    result = distance
                    shortest_path = file
                elif (result > distance):
                    result = distance
                    shortest_path = file

        string = shortest_path.split('/')
        isMistake = False
        StrokeType = ""
        ErrorType = ""
        if (string[1] == "Wrong"):
            self.noWrong += 1
            isMistake = True
            ErrorType = string[3]
        else:
            self.noCorrect += 1

        StrokeType = string[2]

        Session.GetSessionInfo(ArrayToClassify)
        Session.InsertClassificationResult(ErrorType, isMistake, StrokeType)
예제 #16
0
def fastdtw_algorithm(t1, t2, key1, key2, n1=None, n2=None):
    """
    快速动态时间规整算法
    :param t1:时间序列1
    :param t2:时间序列2
    :param n1:时间序列1对应的事件的个数序列
    :param n2:时间序列2对应的事件的个数序列
    :param key1:时间序列1标志
    :param key2:时间序列2标志
    :return:
    """
    # t为两时间序列的近似距离,默认绝对值距离作为度量标准。
    # path为对应关系。每个元素为一个2维元组。
    # 执行动态时间规整
    t, path = fastdtw(t1, t2)

    # 获得每一对“下标对应关系”
    index1 = [item[0] for item in path]
    index2 = [item[1] for item in path]

    # dtw算法得到的对齐的新的时间序列
    new_t1 = [t1[item] for item in index1]
    new_t2 = [t2[item] for item in index2]

    # 绘图,便于可视化,在y=1和y=5画线
    y1 = len(new_t1) * [1]
    y2 = len(new_t2) * [5]
    l1 = plt.scatter(new_t1, y1, c='r')
    l2 = plt.scatter(new_t2, y2, c='g')
    plt.ylim(0, 6)
    """绘制dtw算法得到的对应关系"""
    # 构建坐标
    coordinate1 = zip(new_t1, y1)
    coordinate2 = zip(new_t2, y2)

    # 定义坐标
    xx = 0
    yy = 1
    # 绘制对应关系
    for item1, item2 in zip(coordinate1, coordinate2):
        plt.plot((item1[xx], item2[xx]), (item1[yy], item2[yy]), c='b')

    plt.xlabel(u"时间/秒, ", fontproperties='SimHei')
    plt.title(u"动态时间规整对应关系图", fontproperties='SimHei')
    plt.legend(prop={'family': 'SimHei', 'size': 15})
    plt.legend(handles=[
        l1,
        l2,
    ], labels=[key1, key2], loc='upper left')
    plt.show()

    # 绘制QQ图
    plot_qq(new_t1, new_t2, key1, key2, n1, n2)
def dtw_distance_one_vs_all(data):
    """
    Calculates the Dynamic Time Warping distance from a base template example
    """
    global y
    n, _ = data.shape
    dist = np.zeros(shape=n)
    for i in xrange(n):
        dist[i], _ = fastdtw(data[i], y, dist=euclidean)
        sys.stdout.write('.')
        sys.stdout.flush()
    return dist.reshape(-1, 1)
예제 #18
0
def ts_distance(x,y,radius=2):
    """
    Compute Dynamic Time Warping Distance for two time series. Rule of thumb:
    the first element of X tells if the area is not valid (-2000 is Null value)
    """
    if x[0] < 0 or y[0] < 0:
        return 0.0
    else:
        dist, path = dtw.fastdtw(x, y, radius=radius)
        std1 = np.std(x)
        std2 = np.std(y)
        return dist/np.sqrt(std1*std2)
    def cal_fastdtw(self, cycle1, cycle2, rotate):
        """Return fast dtw distance of the two cycles.

		:param cycle1: the first cycle stored the normalized maginitude data.
		:param cycle2: the second cycle stored the normalized maginitude data.
		:return: Return the dtw distance. NOte: when the lengths of the data in two cycle is the same, dtw and manhattan are the same.
		"""

        min_distance, path = fastdtw(cycle1, cycle2, dist=euclidean)

        if rotate:
            for i in range(len(cycle1)):
                distance, path = fastdtw(np.roll(cycle1, i),
                                         cycle2,
                                         dist=euclidean)
                if distance < min_distance:
                    min_distance = distance
        else:
            pass

        return min_distance
    def distance_analysis(self):
        data = self.time_series_data
        Name = data['Name']
        companies = list(set(Name))
        distances_o_c = []
        distances_h_l = []
        for index, company in enumerate(companies):
            print index + 1, company
            all_time_series = data.loc[data['Name'] == company]
            ts_open = np.array(all_time_series['open'])
            ts_open = ts_open[~np.isnan(ts_open)]
            ts_close = np.array(all_time_series['close'])
            ts_close = ts_close[~np.isnan(ts_close)]
            ts_high = np.array(all_time_series['high'])
            ts_high = ts_high[~np.isnan(ts_high)]
            ts_low = np.array(all_time_series['low'])
            ts_low = ts_low[~np.isnan(ts_low)]

            distance_o_c, path = fastdtw(ts_open, ts_close, dist=euclidean)
            distance_h_l, path = fastdtw(ts_high, ts_low, dist=euclidean)
            distances_o_c.append(distance_o_c)
            distances_h_l.append(distance_h_l)

        print np.max(distances_o_c), np.min(distances_o_c), np.mean(
            distances_o_c), np.std(distances_o_c)
        print np.max(distances_h_l), np.min(distances_h_l), np.mean(
            distances_h_l), np.std(distances_h_l)

        eje_x = [x for x in range(len(companies))]
        legends = ['Open vs Close', 'High vs Low']

        plt.scatter(eje_x, distances_o_c, c='blue', s=20)
        plt.scatter(eje_x, distances_h_l, c='red', s=20)
        plt.legend(legends, loc='upper right')
        plt.xlabel('Companies from SP-500')
        plt.ylabel('Distances')
        plt.title(
            'Comparing the distances between Open vs Close and High vs Low values'
        )
        plt.show()
예제 #21
0
    def DTWSimilarity(self, dataX, dataY, gyroscope=False):
        '''
    Provide the loaded dataFiles, then this function will extract the accX, accY & accZ for you. When gyroscope parameter is set to True, it will also calculate the alpha, beta and gamma aswell, but disabled due to speed
    Arguments:
      dataX: First data object
      dataY: Second data object
    '''
        self.dataX = dataX
        self.dataY = dataY

        X, path = fastdtw(dataX['accX'], dataY['accX'])
        Y, path = fastdtw(dataX['accY'], dataY['accY'])
        Z, path = fastdtw(dataX['accZ'], dataY['accZ'])
        if (gyroscope):
            alpha, path = fastdtw(dataX['alpha'], dataY['alpha'])
            beta, path = fastdtw(dataX['beta'], dataY['beta'])
            gamma, path = fastdtw(dataX['gamma'], dataY['gamma'])
        '''
    Calculate similarity function as written in paper:
    (Akl, A., Feng, C., & Valaee, S. (2011). A novel accelerometer-based gesture recognition system. IEEE Transactions on Signal Processing, 59(12), 6197-6205.
    ISO 690)
    '''
        if (gyroscope):
            #return -1 * ((X**2) + (Y**2) + (Z**2) + (alpha**2) + (beta**2) + (gamma**2))
            return math.sqrt((X**2) + (Y**2) + (Z**2) + (alpha**2) +
                             (beta**2) + (gamma**2))
        else:
            #first line looks like euclidian, second line is the paper implementation.
            #return -1 * ((X**2) + (Y**2) + (Z**2))
            return math.sqrt((X**2) + (Y**2) + (Z**2))
예제 #22
0
def make(save_name, tag, X, Y, Z):
    col_count = np.array(X).shape[0]
    score = [0] * col_count
    score_b = [0] * col_count

    for i in range(0, col_count):
        sum = 0
        for j in range(0, col_count):
            if i != j:
                dist_x, path_x = fastdtw(X[i], X[j])
                #dist_y, path_y = fastdtw(Y[i], Y[j])
                dist_z, path_z = fastdtw(Z[i], Z[j])
                dist = (dist_x * dist_x) + (dist_z * dist_z)
                sum = dist * dist
        score[i] = sum
        print(tag + "\tsum : " + str(score[i]))

    workbook = xlwt.Workbook(encoding='utf-8')  # utf-8 인코딩 방식의 workbook 생성
    ws_x = workbook.add_sheet("x")  # 시트 생성
    ws_y = workbook.add_sheet("y")  # 시트 생성
    ws_z = workbook.add_sheet("z")  # 시트 생성

    for i in range(0, col_count):
        min = 9999999999999
        min_index = 0
        for j in range(0, col_count):
            if score_b[j] == 0:
                if score[j] < min:
                    min = score[j]
                    min_index = j
        score_b[min_index] = 1

        print("result min : " + str(min) + "\tindex : " + str(min_index))
        xlen = np.array(X[min_index]).shape[0]
        for j in range(0, xlen):
            ws_x.write(i, j, X[min_index][j])
            ws_y.write(i, j, Y[min_index][j])
            ws_z.write(i, j, Z[min_index][j])

    workbook.save(save_name)
예제 #23
0
def calc_distances_all(vertices,list_vertices,degreeList, commonList, part, compactDegree = False):

    distances_r = {}
    distances_q = {}
    cont = 0

    if compactDegree:
        dist_func = cost_max
    else:
        dist_func = cost

    for v1 in vertices:
        lists_v1 = degreeList[v1]
        common_v1 = commonList[v1]

        for v2 in list_vertices[cont]:
            lists_v2 = degreeList[v2]
            common_v2 = commonList[v2]
            
            max_layer = min(len(lists_v1),len(lists_v2))
            distances_r[v1,v2] = {}
            distances_q[v1,v2] = {}

            for layer in range(0,max_layer):
                #t0 = time()
                dist_r, path = fastdtw(lists_v1[layer],lists_v2[layer],radius=1,dist=dist_func)
                dist_q, path = fastdtw(common_v1[layer],common_v2[layer],radius=1,dist=cost)
                #t1 = time()
                #logging.info('D ({} , {}), Tempo fastDTW da camada {} : {}s . Distância: {}'.format(v1,v2,layer,(t1-t0),dist))    
                distances_r[v1,v2][layer] = dist_r
                distances_q[v1,v2][layer] = dist_q
                

        cont += 1

    preprocess_consolides_distances(distances_r)
    preprocess_consolides_distances(distances_q)
    saveVariableOnDisk(distances_r,'distances-r-'+str(part))
    saveVariableOnDisk(distances_q,'distances-q-'+str(part))
    return
예제 #24
0
def Harsh_ChangeLine_Left(
        input_df, Model_param=Model_param['troca_faixa_esquerda_agressiva']):
    r1 = 3
    x1 = fastdtw(input_df, Model_param[r1][1])[0]
    if x1 <= Model_param[r1][0]:
        r2 = 1
        x2 = fastdtw(input_df, Model_param[r2][1])[0]
        if x2 <= 511.02:
            r3 = 0
            x3 = fastdtw(input_df, Model_param[r3][1])[0]
            if x3 <= Model_param[r3][0]:
                return True
            else:
                return False
        else:
            return True
    else:

        r2 = 1
        x2 = fastdtw(input_df, Model_param[r2][1])[0]
        if x2 <= 837.11:
            r3 = 1
            x3 = fastdtw(input_df, Model_param[r3][1])[0]
            if x3 <= 408.96:
                return True
            else:
                return False
        else:
            r3 = 2
            x3 = fastdtw(input_df, Model_param[r3][1])[0]
            if x3 <= Model_param[r3][0]:
                return True
            else:
                return False
예제 #25
0
def calculate(
    file_list: List[str],
    gt_file_list: List[str],
    args: argparse.Namespace,
    mcd_dict: Dict,
):
    """Calculate MCD."""
    for i, gen_path in enumerate(file_list):
        corresponding_list = list(
            filter(lambda gt_path: _get_basename(gt_path) in gen_path,
                   gt_file_list))
        assert len(corresponding_list) == 1
        gt_path = corresponding_list[0]
        gt_basename = _get_basename(gt_path)

        # load wav file as int16
        gen_x, gen_fs = sf.read(gen_path, dtype="int16")
        gt_x, gt_fs = sf.read(gt_path, dtype="int16")

        fs = gen_fs
        if gen_fs != gt_fs:
            gt_x = librosa.resample(gt_x.astype(np.float), gt_fs, gen_fs)

        # extract ground truth and converted features
        gen_mcep = sptk_extract(
            x=gen_x,
            fs=fs,
            n_fft=args.n_fft,
            n_shift=args.n_shift,
            mcep_dim=args.mcep_dim,
            mcep_alpha=args.mcep_alpha,
        )
        gt_mcep = sptk_extract(
            x=gt_x,
            fs=fs,
            n_fft=args.n_fft,
            n_shift=args.n_shift,
            mcep_dim=args.mcep_dim,
            mcep_alpha=args.mcep_alpha,
        )

        # DTW
        _, path = fastdtw(gen_mcep, gt_mcep, dist=spatial.distance.euclidean)
        twf = np.array(path).T
        gen_mcep_dtw = gen_mcep[twf[0]]
        gt_mcep_dtw = gt_mcep[twf[1]]

        # MCD
        diff2sum = np.sum((gen_mcep_dtw - gt_mcep_dtw)**2, 1)
        mcd = np.mean(10.0 / np.log(10.0) * np.sqrt(2 * diff2sum), 0)
        logging.info(f"{gt_basename} {mcd:.4f}")
        mcd_dict[gt_basename] = mcd
예제 #26
0
def algo(input_file_name):  # input_file_name -> must include dir + name.

    all_options_path = r'/home/akiva/Documents/Do_not_delete/all_options_emphsis'
    audio_sentence = get_audio_sentence(input_file_name)
    print('audio_sentence = ', audio_sentence)
    # creates all the emphasis options:
    a = create_all_emph_options(all_options_path, audio_sentence)
    print('a=', a)
    # dtw:
    frames = 50  # 20
    first_frame = 30
    mfccs = 20  # upto 26!
    test_frac = 0.2

    # input_file_name = './DTW single file/see%the%bombers fly up.wav'
    # Extract MFCCs from input wav file
    print(input_file_name)
    (rate, sig) = wav.read(input_file_name)
    mfcc_feat = mfcc(sig, rate)
    curr = logfbank(sig, rate)
    input_file_data = curr[first_frame:(first_frame + frames), 0:mfccs] / 20

    # Extract MFCCs from each permutation of 1 emphasized word
    file_names = []
    distances = []
    #for file_name in listdir('./DTW single file'):
    for file_name in listdir(all_options_path):
        print(file_name)
        file_names.append(file_name)
        #(rate, sig) = wav.read("./DTW single file/" + file_name)
        (rate, sig) = wav.read(all_options_path + "/" + file_name)
        mfcc_feat = mfcc(sig, rate)
        curr = logfbank(sig, rate)
        current_file_data = curr[first_frame:(first_frame + frames),
                                 0:mfccs] / 20
        distance, path = fastdtw(input_file_data,
                                 current_file_data,
                                 dist=euclidean)
        distances.append(distance)

    print(file_names)
    print(distances)
    min_distance_index = min(enumerate(distances), key=itemgetter(1))[0]
    print(min_distance_index)

    s = file_names[min_distance_index]
    c = '%'
    start_end_indexes = [pos for pos, char in enumerate(s) if char == c]
    print(start_end_indexes)
    answer = file_names[min_distance_index][start_end_indexes[0] +
                                            1:start_end_indexes[1]]
    return answer
예제 #27
0
def pattern_finder(bName_1, bName_2):

    bName1_csv = bName_1 + ".csv"
    bName2_csv = bName_2 + ".csv"

    bData_1 = pd.read_csv(bName1_csv)
    bData_2 = pd.read_csv(bName2_csv)

    # Create pandas data frame from csv
    df_1 = pd.DataFrame(bData_1, columns=['id', 'date', 'stars', 'text'])
    df_2 = pd.DataFrame(bData_2, columns=['id', 'date', 'stars', 'text'])

    # https://stackoverflow.com/questions/44128600/how-should-i-handle-duplicate-times-in-time-series-data-with-pandas

    # Data frame one for time series 1
    df_1['date'] = pd.to_datetime(df_1['date'], format='%Y-%m-%d')
    df_1['date'] = df_1['date'] + pd.to_timedelta(
        df_1.groupby('date').cumcount(), unit='h')
    df_1 = df_1.sort_values(by=['date'])
    new_df_1 = df_1.set_index('date')
    new_df_1 = new_df_1.ix['2012-6-1':'2017-5-1']
    #print new_df_1
    ts_1 = pd.Series.rolling(new_df_1['stars'], window=100).mean()
    ts_1 = ts_1.dropna()

    # Data frame two for time series 2
    df_2['date'] = pd.to_datetime(df_2['date'], format='%Y-%m-%d')
    df_2['date'] = df_2['date'] + pd.to_timedelta(
        df_2.groupby('date').cumcount(), unit='h')
    df_2 = df_2.sort_values(by=['date'])
    new_df_2 = df_2.set_index('date')
    new_df_2 = new_df_2.ix['2012-6-1':'2017-5-1']
    #print new_df_2['stars']
    #print new_df_2

    ts_2 = pd.Series.rolling(new_df_2['stars'], window=100).mean()
    ts_2 = ts_2.dropna()

    # Following is done because DTW requires same length time series
    compare_len = min(len(ts_1), len(ts_2))

    dist = np.linalg.norm(ts_1.iloc[-compare_len:].values -
                          ts_2.iloc[-compare_len:].values)
    #print dist

    x = ts_1.iloc[-compare_len:].values
    y = ts_2.iloc[-compare_len:].values
    #print x
    #print y
    dtw_dist, path = fastdtw(x, y, dist=euclidean)
    print dtw_dist
    return dtw_dist
예제 #28
0
def main():
    hole_time = time.time()
    trainSet = pd.read_csv('train_set.csv',
                           converters={"Trajectory": literal_eval})
    testSet = pd.read_csv('test_set_a1.csv',
                          converters={"Trajectory": literal_eval})
    for i in range(0, 5):
        start_time = time.time()
        lon = []
        lat = []
        dist = []
        param = []
        for y in range(0, len(testSet['Trajectory'][i])):
            temp = [
                testSet['Trajectory'][i][y][1], testSet['Trajectory'][i][y][2]
            ]
            lon.append(testSet['Trajectory'][i][y][1])
            lat.append(testSet['Trajectory'][i][y][2])
            param.append(temp)
        for k in range(1, len(trainSet)):
            paramtrain = []
            for y in range(0, len(trainSet['Trajectory'][k])):
                temp = [
                    trainSet['Trajectory'][k][y][1],
                    trainSet['Trajectory'][k][y][2]
                ]
                paramtrain.append(temp)
            distance, path = fastdtw(np.asarray(param),
                                     np.asarray(paramtrain),
                                     dist=haversine)
            dist.append((distance, trainSet['journeyPatternId'][k], k))
        sortedDist = sorted(dist, key=operator.itemgetter(0))
        elapsed_time = time.time() - start_time
        gmap = gmplot.GoogleMapPlotter(lat[len(lat) / 2], lon[len(lon) / 2],
                                       11)
        gmap.plot(lat, lon, 'forestgreen', edge_width=9)
        name = "test_" + str(i + 1) + "_time_" + str(elapsed_time) + ".html"
        gmap.draw(name)
        for y in range(0, 5):
            l = sortedDist[y][2]
            lontrain = []
            lattrain = []
            for m in range(0, len(trainSet['Trajectory'][l])):
                lontrain.append(trainSet['Trajectory'][l][m][1])
                lattrain.append(trainSet['Trajectory'][l][m][2])
            gmap = gmplot.GoogleMapPlotter(lattrain[len(lattrain) / 2],
                                           lontrain[len(lontrain) / 2], 11)
            gmap.plot(lattrain, lontrain, 'forestgreen', edge_width=9)
            name = "test_" + str(i + 1) + "_neighbor_" + str(y) + "_jp_" + str(
                sortedDist[y][1]) + "dist" + str(sortedDist[y][0]) + ".html"
            gmap.draw(name)
    hole_time = time.time() - hole_time
예제 #29
0
def _fastdtw(args):
    arr1, a, b, radius = args
    arr2 = parse(b)
    return (
        a,
        b,
        fastdtw(
            arr1,
            arr2,
            # dist=lambda x, y: (int(x) != int(y)),  # When not using https://github.com/orestisfl/fastdtw/
            radius=radius,
        )[0],
    )
예제 #30
0
def dtw_dist(mfcc1, mfcc2):
    '''Dynamic time warping'''
    min_len = min(mfcc1.shape[1], mfcc2.shape[1])

    #print(mfcc1.shape, mfcc2.shape)

    mfcc1 = mfcc1[:, :min_len].T
    mfcc2 = mfcc2[:, :min_len].T

    dist, path = fastdtw(mfcc1, mfcc2, dist=euclidean)
    dist /= mfcc1.shape[0] * mfcc2.shape[0]

    return dist
예제 #31
0
파일: knnplot.py 프로젝트: gohilankit/DBCAD
def testFunc():
  timewindow=9
  iter=0
  for timeIndex in range (4,5,1):
    for iterlong in range(1,190):
      for iterlat in range(1,92):
        currtimeslice = f.variables['air'][timeIndex - timewindow/2  : timeIndex + timewindow/2 + 1, 0, iterlat, iterlong]
        for iter in range(8):
           neighbourtimeslice = f.variables['air'][timeIndex - timewindow/2  : timeIndex + timewindow/2 + 1, 0, iterlat +neighbourlat[iter], iterlong+neighbourlong[iter]]
           distance, path = fastdtw(currtimeslice, neighbourtimeslice, dist=euclidean)
           #print distance
           dist.append((distance,iter))
           iter+=1
예제 #32
0
def perform_dtw(ts_1, ts_2):

	# Following is done because DTW requires same length time series
	compare_len = min(len(ts_1),len(ts_2))

	x = ts_1.iloc[-compare_len:].values
	y = ts_2.iloc[-compare_len:].values

	dtw_dist, path = fastdtw(x,y, dist=euclidean)

	print dtw_dist

	return dtw_dist
예제 #33
0
def perform_quartile_dtw(ts_1, ts_2):

	# Following is done because DTW requires same length time series
	compare_len = min(len(ts_1),len(ts_2))

	x = ts_1[-compare_len:]
	y = ts_2[-compare_len:]

	dtw_dist, path = fastdtw(ts_1, ts_2, dist=euclidean)

	print dtw_dist

	return dtw_dist
예제 #34
0
def dist(df):
    res = pd.DataFrame(index=np.arange(df.shape[1]),
                       columns=np.arange(df.shape[1]))
    for i in range(df.shape[1]):
        res.iloc[i, i] = 0
        for j in range(i + 1, df.shape[1]):
            #d,p = fastdtw(df[i].dropna(),df[j].dropna(), dist = euclidean)
            d, p = fastdtw(df[i].dropna().values,
                           df[j].dropna().values,
                           dist=euclidean)
            res.iloc[i, j] = d
            res.iloc[j, i] = d
    return res
예제 #35
0
def run_dtw_process(params):
    ref_key, point_clouds = params
    dtw_results = dict()
    cost = 0
    pi = point_clouds[ref_key]
    for k, pj in point_clouds.items():
        print("start dtw", ref_key, k)
        #path, D = run_dtw(pi, pj)
        #path_cost = sum([D[c[0], c[1]] for c in path])
        path_cost, path = fastdtw(pi, pj, dist=_transform_invariant_point_cloud_distance)
        dtw_results[k] = path
        cost += path_cost
    return cost/len(point_clouds), dtw_results
예제 #36
0
파일: FastDTW.py 프로젝트: trinhkhoi/MIR
def calculate_distance(data, source, target):
    try:
        # calculate distance of source and target for each task
        s = data[source].T
        t = data[target].T
        distance, _ = fastdtw(s, t, radius=100)
        if distance >= 9999999:
            print(' ======== source: ', source, 'target: ', target, 'distance: ', distance)

        similarity = distance / 10000000
        return similarity
    except Exception as ex:
        raise_exception(calculate_distance.__name__, ex)
예제 #37
0
def dtwDist(x, y):
    try:
        from fastdtw import fastdtw
    except ImportError:
        util.missing_module('fastdtw')
    try:
        from scipy.spatial.distance import euclidean
    except ImportError:
        util.missing_module('scipy')
    """Dynamic Time Warping Distance"""
    dist, path_data = fastdtw(x, y, dist=euclidean)
    #print('\n\n', path_data, '\n\n')
    return dist, path_data
예제 #38
0
def getDTWPath(x, y):
    distance, path = fastdtw(x, y, dist=euclidean)
    plt.plot(x, label='x')
    plt.plot(y, label='y')
    for x_, y_ in path:
        plt.plot([x_, y_], [x[x_], y[y_]],
                 color='gray',
                 linestyle='dotted',
                 linewidth=1)
    plt.legend()
    plt.title('Our two temporal sequences')
    plt.show()
    return path
def NCC(centroids, instance):

    """
    Returns dictionary with distances between instance and centroid per class
    and class label of minimal distance
    using fastdtw
    """

    distances = {label : fastdtw.fastdtw(instance, centroids[label])[0]
                                for label in centroids.keys()}
    class_label = min(distances, key=distances.get)

    return distances, class_label
예제 #40
0
def dtwDist(x, y):
    try:
        from fastdtw import fastdtw
    except ImportError:
        print(ImportError, "fastdtw package is not installed.")

    try:
        from scipy.spatial.distance import euclidean
    except ImportError:
        print(ImportError, "scipy package is not installed.")
    """Dynamic Time Warping Distance"""
    dist, _ = fastdtw(x, y, dist=euclidean)
    return dist
예제 #41
0
 def run(self):
     result = np.empty((0, self.len_data), float)
     for i in self.my_range:
         x = pd.concat([pd.DataFrame(range(1,len(self.data)+1)),self.data.ix[:,i]], axis =1)
         x = np.array(x)
         
         temp = np.empty(shape = [1, self.len_data])
         for j in range(self.len_data):
             y = pd.concat([pd.DataFrame(range(1,len(self.data)+1)),self.data.ix[:,i]], axis =1)
             y = np.array(y)
             distance = fastdtw(x, y, dist= cosine)
             temp[0,j] = distance
         result = np.append(result, np.array(temp), axis=0)
     score = pd.DataFrame(data = result)
     score.to_csv(out_path +str(self.index).zfill(3) + "thread.txt", header = None, index = False)
예제 #42
0
def dtw_pr(pr0, pr1):
    # Flatten pr to compute the path
    pr0_flat = sum_along_instru_dim(pr0)
    pr1_flat = sum_along_instru_dim(pr1)

    def fun_thresh(y):
        return np.minimum(y, 1).astype(int)

    distance, path = fastdtw(pr0_flat, pr1_flat, dist=lambda a, b: euclidean(fun_thresh(a), fun_thresh(b)))
    # Get paths
    path0 = [e[0] for e in path]
    path1 = [e[1] for e in path]

    pr0_warp = warp_pr_aux(pr0, path0)
    pr1_warp = warp_pr_aux(pr1, path1)

    return pr0_warp, pr1_warp
예제 #43
0
def testFunc():
  global globaldist
  timewindow=9
  iter=0
  temp=range(4,7)
  for timeIndex in temp:
    for iterlong in range(1,190):
      for iterlat in range(1,92):
        currtimeslice = f.variables['air'][timeIndex - timewindow/2  : timeIndex + timewindow/2 + 1, 0, iterlat, iterlong]
        currdist=[]
        for iter in range(8):
           neighbourtimeslice = f.variables['air'][timeIndex - timewindow/2  : timeIndex + timewindow/2 + 1, 0, iterlat +neighbourlat[iter], iterlong+neighbourlong[iter]]
           distance, path = fastdtw(currtimeslice, neighbourtimeslice, dist=euclidean)
           #print distance
           #dist.append((distance,iter))
           currdist.append(distance)

        currdist.sort()
        globaldist+=currdist[0:2]
예제 #44
0
def estimate_twf(orgdata, tardata, distance='melcd', fast=True, otflag=None):
    """time warping function estimator

    Parameters
    ---------
    orgdata : array, shape(`T_org`, `dim`)
        Array of source feature
    tardata : array, shape(`T_tar`, `dim`)
        Array of target feature
    distance : str, optional
        distance function
        `melcd` : mel-cepstrum distortion
    fast : bool, optional
        Use fastdtw instead of dtw
        Default set to `True`
    otflag : str,
        Perform alignment into either original or target length
        `org` : align into original length
        `tar` : align into target length
        Default set to None

    Returns
    ---------
    twf : array, shape(`2`, `T`)
        Time warping function between original and target
    """

    if distance == 'melcd':
        def distance_func(x, y): return melcd(x, y)
    else:
        raise ValueError('other distance metrics than melcd does not support.')

    if fast:
        _, path = fastdtw(orgdata, tardata, dist=distance_func)
        twf = np.array(path).T
    else:
        _, _, _, twf = dtw(orgdata, tardata, distance_func)

    if otflag is not None:
        twf = modify_twf(twf, otflag=otflag)

    return twf
 def get1NN(self,mfcc_feat2):
     minrd=1e40
     mkrd="none_none_0000"
     for k in sorted(self.mfccs.keys()):
         mfcc_feat1=self.mfccs[k]
         l1=len(mfcc_feat1)
         l2=len(mfcc_feat2)
         #discriminating by length
         if abs(l1-l2)<l2*self.ldis:
             distance, path = fastdtw(mfcc_feat1, mfcc_feat2, dist=euclidean)
             rd=distance/len(path) #Normalize distance bi path length
             #print k,distance,len(mfcc_feat2),len(mfcc_feat1), len(path),rd
             print k,len(mfcc_feat2),len(mfcc_feat1),rd
             if rd<minrd:
                 minrd=rd
                 mkrd=k
     if minrd>self.distThreshold:
         return "none_none_0000",minrd
     else:
         return mkrd,minrd
예제 #46
0
파일: main3.py 프로젝트: gohilankit/DBCAD
def isCorePoint(minPts, epsDist, currlong, currlat, minlong, minlat, rangelong, rangelat, timeindex, timewindow):

  # for i in range(num_files):
  #   f = netcdf.netcdf_file('air.2m.gauss.' + str(i+1979) + '.nc', 'r')
  #   print(f.air)
  countwithineps=1    #count self. hence 1

  #currtimeslice = []
  #for iter in range(-1*timewindow/2+1, timewindow/2+1):
    ## [-4, -3, -2, -1, 0, 1, 2, 3, 4]
    #currtimeslice.append(f.variables['air'][timeindex + iter][0][currlat][currlong])

  currtimeslice = f.variables['air'][timeindex - timewindow/2 : timeindex + timewindow/2+1, 0, currlat, currlong]
  smallDTWDict={}     #key is index of neighbourlong and neighbourlat
  for iter in range(8):
    #checking neighbours of current, using neighbourlong and neighbourlat
    #neighbourtimeslice=[]
    #for iter2 in range(-1*timewindow/2+1, timewindow/2+1):
    # neighbourtimeslice.append(f.variables['air'][timeindex + iter2][0][currlat+iter][currlong+iter])
    neighbourtimeslice = f.variables['air'][timeindex - timewindow/2 : timeindex + timewindow/2+1, 0, currlat+neighbourlat[iter], currlong+neighbourlong[iter]]
    distance, path = fastdtw(currtimeslice, neighbourtimeslice, dist=euclidean)
    #print ("iter: ", iter)
    #print ("distance:", distance)
    smallDTWDict[iter]=distance
    if distance < epsDist:
      countwithineps+=1

  if countwithineps >= minPts:
    globalcorepoints.add((currlat - minlat)*rangelong + (currlong - minlong))
    if (currlat - minlat)*rangelong + (currlong - minlong) in globalnoisepoints:
      globalnoisepoints.remove((currlat - minlat)*rangelong + (currlong - minlong))

    for iter in range(8):
      #Add edge to points which are within epsDist. use smallDTWDict
      if smallDTWDict[iter] < epsDist:
        #Consider neighbors for spatially bordered point but don't add them to the graph
        if currlat+neighbourlat[iter] in range(minlat, minlat+rangelat) and currlong+neighbourlong[iter] in range(minlong,minlong+rangelong):
          g.add_edge((currlat - minlat)*rangelong + (currlong - minlong), (currlat+neighbourlat[iter] - minlat)*rangelong + (currlong+neighbourlong[iter] - minlong))
          if (currlat+neighbourlat[iter] - minlat)*rangelong + (currlong+neighbourlong[iter] - minlong) in globalnoisepoints:
            globalnoisepoints.remove((currlat+neighbourlat[iter] - minlat)*rangelong + (currlong+neighbourlong[iter] - minlong))
예제 #47
0
def calc_dtw(x_train, x_test, train_len, test_len, radius=1, total_shifts = 7):
    """
    Calculates the DTW distance between the test cases and the training data
    after applying a series of time shifts to the test data
    
    Returns an array of the DTW dist of each shifted MFCC against the training
    prompt, and prints out the time taken to run the calculation
    """
    
    master_dist = []
    for i,x in enumerate(x_test):
        mfcc_dist = []
        # Default: For 7 total vectors - 3 shifts left, no shift, and 3 shifts right @ 15% range
        max_shift = x.shape[1]*0.15   # Indicate % range here
        # Total shifts will always be an odd number so there is the same number of shifts in each direction
        total_shifts = total_shifts + 1 if total_shifts % 2 == 0 else total_shifts
        shift = int(max_shift/int(total_shifts/2))
        for d in range(shift * int(total_shifts/2) * -1, shift * int(total_shifts/2) + 1, shift):
            dist = []
            for i2,x2 in enumerate(x_train):
                len_threshold = max(train_len[i2]*0.3, 5)
                min_thres = train_len[i2] - len_threshold
                max_thres = train_len[i2] + len_threshold

                # Run DTW dist if stored phrase is within -/+ 30% seconds as requested test phrase
                if min_thres <= test_len[i] <= max_thres:
                    distance, path = fastdtw(np.roll(x,d).T, x2.T, radius=radius, dist=lambda x, y: norm(x - y))
                # else assume they are not the same by assuming a very large distance
                else:
                    distance = 1000000

                dist.append(distance)

            mfcc_dist.append(dist)
        master_dist.append(mfcc_dist)
        
    #print('MFCCs:{0}, Radius:{1}, Time:{2:.2f} sec'.format(x_train[0].shape[0], radius))
    
    return master_dist
예제 #48
0
    def get_nearest_n_dtw(self, train, label, test):
        """
        :param train: Training dataset. Must be pandas object.
        :param label: Training label.
        :param test: Some point of test data. Must be numpy array object.
        :return: The nearest points of training dataset (with DTW metrics).
        """
        nn_dist_array, nn_ts_ls, nn_label_array = numpy.array([]), [], numpy.array([])
        te_ele = numpy.array(test).reshape(-1, 1)

        for i, tr_ele in enumerate(train):

            # sys.stdout.write('\r%d' % i)
            # sys.stdout.flush()

            tr_ele_ls = tr_ele.tolist()
            tr_ele = numpy.array(tr_ele).reshape(-1, 1)
            dist, path = fastdtw(te_ele, tr_ele, dist=euclidean)

            if len(nn_dist_array) < self.nn_num:
                nn_dist_array = numpy.append(nn_dist_array, dist)
                nn_ts_ls.append(tr_ele_ls)
                nn_label_array = numpy.append(nn_label_array, label[i])
            elif numpy.max(nn_dist_array) > dist:

                if numpy.max(nn_dist_array) < self.max_dist:
                    break

                max_ind = numpy.argmax(nn_dist_array)
                nn_dist_array[max_ind] = dist
                nn_ts_ls[max_ind] = tr_ele_ls
                nn_label_array[max_ind] = label[i]
            else:
                continue
        nn_ts_array = numpy.array(nn_ts_ls)

        return nn_dist_array, nn_ts_array, nn_label_array
예제 #49
0
def testfdtw(x,y,color):
    D,dist,path = fastdtw.fastdtw(x,y)
    mat = np.zeros((len(x),len(y),4))
    maxcost = max(filter(lambda x:x!=np.inf,map(lambda x:x[0],D.values())))
    print "\t",maxcost,(len(x)+len(y))/2
    #mat.fill(maxcost+1)
    mat.fill(np.inf)
    for i,j in D:
        mat[i-1,j-1] = D[i,j]
    smat = mat[:,:,0]
    #smat[np.isinf(smat)] = maxcost+1
    #smat = maxcost+1-smat
    #print smat



    #plt.imshow(smat,
    #           interpolation="none",
    #           cmap=plt.cm.gray,
    #           #norm=matplotlib.colors.LogNorm()
    #)

    plotp = np.array(path).T
    plt.plot(plotp[1],plotp[0],color=color)
    def find_lowest(self):
        """
        :return: mid, distance, path
        """
        min_distance = 0
        min_path = None
        min_mid = None
        candidates = self.candidates #get_molecules.get_mid_list(conn)

        i = 0
        for mid in candidates:

            frequencies, intensities = get_peaks.get_frequency_intensity_list(conn,
                                                                              mid,
                                                                              max=self.max_frequency,
                                                                              min=0)  # ,

            try:
                distance, path = fastdtw(self.efreqlist, frequencies, dist=euclidean)
            except IndexError:
                continue

            if min_path is None:
                min_path = path
                min_distance = distance
                min_mid = mid
            elif distance < min_distance:
                min_distance = distance
                min_path = path
                min_mid = mid

        print min_distance
        print min_mid
        print get_molecules.getName(conn, min_mid)

        return min_mid, min_distance, min_path
예제 #51
0
파일: fdtw.py 프로젝트: samhealer/magpie
# print a - b
# print (a-b)**2

#euclidean mod dist
euc = []
euc.append(0)
euc.append(7)
euc.append(4)
euc.append(9)
euc.append(2)
euc.append(5)
euc.append(11)
euc.append(8)
euc.append(3)
euc.append(10)
euc.append(6)
euc.append(1)

# dist_c = lambda a,b: (((a[0] - b[0]) **2) + (((a[1] - b[1])) **2)  + (((a[2] - b[2])) **2)) **0.5
dist_c = lambda a,b: ((((a[1] - b[1]) * 0.125) **2)  + (((a[2] - b[2]) * 0.25) **2)) **0.5
dist_mod = lambda a,b: ((euc.index(int(a[0] - b[0]) % 12) **2) + (((a[1] - b[1]) * 0.125) **2)  + (((a[2] - b[2]) * 0.25) **2)) **0.5

dist_x = lambda a,b: ((a[0] - b[0]) **2) 
# print (ground[0][0] - match[0][0]) ** 2)

startLoad = time.clock()
print fastdtw.fastdtw(lmr_ground,lmr_match, dist= dist_mod)[0]
print fastdtw.fastdtw(lmr_ground,lmr_mismatch, dist= dist_mod)[0]
endLoad = time.clock()
print "load time = " + str(endLoad-startLoad)
예제 #52
0
 def distance(self, v1, v2):
     distance, path = fastdtw(v1, v2, dist=euclidean)
     return distance
import numpy as np
from scipy.spatial.distance import euclidean
from matplotlib import pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.cluster.hierarchy import cophenet
from scipy.spatial.distance import pdist
from sklearn import preprocessing



from fastdtw import fastdtw

x = np.array([[1,1], [2,2], [3,3], [4,4], [5,5]])
y = np.array([[2,2], [3,3], [4,4]])
print x.shape, y.shape
distance, path = fastdtw(x, y, dist=euclidean)
print(distance, path)

Z = linkage(path, 'centroid')


plt.figure(figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    Z,
    leaf_rotation=90.,
    leaf_font_size=8.,
)
plt.show()
예제 #54
0
def shapeEncoding(arrData_raw, nCodingWndSize, nNeighbors=3):
    '''
        souce encoding according to signal shape
        
        Parameters:
        -----------
        arrData : 
                    data
        nCodingWndSize : 
                        coding window size in number of data points
        nNeighbors : 
                    the number of neighbor window to determine the data range,
                    this value should be an odd number
        
        Returns:
        --------
        lsCode : 
                a list of integer codes
        arrDataShape : 
                        the numpy.array of approximating shapes
        
    '''
    lsDataCode = []
    arrDataShape = None
    arrWndShape = None
    nDataLen = len(arrData_raw)
    arrData = arrData_raw / np.max(arrData_raw)
    
    for nStartIndex in xrange(0, nDataLen, nCodingWndSize):
        nEndIndex = nStartIndex + nCodingWndSize
        if(nEndIndex > nDataLen):
            # forget about the last segment if it is shorter than nWndSize
            break
        
        arrWndData = arrData[nStartIndex: nEndIndex]
        arrWndData_shift = arrWndData - np.min(arrWndData) # remove base line
        
#        # find nearby windows
#        nNeighborStart, nNeighborEnd = None, None
#        if (nStartIndex-(nNeighbors-1)/2*nCodingWndSize <= 0):
#            nNeighborStart = 0
#            nNeighborEnd = min(len(arrData), 
#                               nNeighborStart + nNeighbors*nCodingWndSize)
#        elif (nEndIndex + (nNeighbors-1)/2*nCodingWndSize >= nDataLen):
#            nNeighborEnd = len(arrData)
#            nNeighborStart = max(0, nNeighborEnd-nNeighbors*nCodingWndSize)
#        else:
#            nNeighborStart = max(0,\
#                                 nStartIndex-(nNeighbors-1)/2*nCodingWndSize)
#            nNeighborEnd = min(len(arrData), 
#                               nNeighborStart + nNeighbors*nCodingWndSize)
#        # compute the max range of neighbors    
#        dMaxNeighborRange = 0.0
#        for s in xrange(nNeighborStart, nNeighborEnd, nCodingWndSize):
#            dRange = np.ptp(arrData[s:s+nCodingWndSize])
#            if(dRange >= dMaxNeighborRange):
#                dMaxNeighborRange = dRange
            
        # generate patterns
        dcPatterns = generateShapeTemplates(nCodingWndSize, 
                                            np.ptp(arrWndData_shift) )
        
        # examine the shape of data within window
        nDebugIndex = None
        if (nDebugIndex is not None and \
            nStartIndex <= nDebugIndex and nEndIndex > nDebugIndex):
            pdb.set_trace()        
        
        # find nearest pattern
        nCode = None
        if(np.std(arrWndData_shift) <= FLAT_PATTERN_STD ):
            nCode = SHAPE_CODE_FLAT
            arrWndShape = np.zeros(nCodingWndSize)
        else:
            dCriteria = float("inf")
            for i, arrShape in dcPatterns.iteritems():
                dDis, path = fastdtw(arrWndData_shift, arrShape,
                                 dist=lambda a, b: abs(a-b)**2.0 )
                if(dDis < dCriteria):
                    dCriteria = dDis
                    nCode = i
                    arrWndShape = arrShape / np.max(arrShape)
                    
        # update arrDataShape and code list
        arrDataShape = arrWndShape if arrDataShape is None \
            else np.concatenate([arrDataShape, arrWndShape])
        lsDataCode.append(nCode)
    return lsDataCode, arrDataShape
예제 #55
0
 def test_2d_fastdtw(self):
     distance = fastdtw(self.x_2d, self.y_2d, dist=self.dist_2d)[0]
     self.assertAlmostEqual(distance, ((1+1)**0.5)*2)
예제 #56
0
 def test_1d_fastdtw(self):
     distance = fastdtw(self.x_1d, self.y_1d)[0]
     self.assertEqual(distance, 2)
예제 #57
0
파일: pdist_mt.py 프로젝트: mmssouza/idsc
def dist(X,Y):
 CX = np.std(X,axis = 1).mean()
 CY = np.std(Y,axis = 1).mean() 
 return fastdtw(X,Y,dist = cost,radius = radius)[0]/(CX + CY + beta)
예제 #58
0
    def DTW_train(self):
        try:
            features_names = ['In', 'F1', 'F2', 'F3']

            non_native_sentence = []
            native_sentence = []

            for key, val in self.DTW_Y_train.items():
                non_native_sentence.append(key)
                native_sentence.append(key)

            already_used = []
            for j in range(len(non_native_sentence)):
                val = non_native_sentence[j]
                val = clean_filename_TextGrid(val)
                val = clean_filename_numbers(val)

                for k in range(len(native_sentence)):
                    if native_sentence[k] == non_native_sentence[j]:
                        continue

                    sec_val = native_sentence[k]
                    sec_val = clean_filename_TextGrid(sec_val)
                    sec_val = clean_filename_numbers(sec_val)

                    if sec_val != val:
                        continue

                    if native_sentence[k] in already_used:
                        continue

                    already_used.append(native_sentence[k])

                    non_native = self.DTW_X_train[non_native_sentence[j]]
                    native = self.DTW_X_train[native_sentence[k]]

                    # DTW operation
                    print "Comparing: {} and {}".format(non_native_sentence[j], native_sentence[k])

                    # not DTW between the same person
                    if np.array_equal(non_native, native):
                        continue

                    with open(self.dtw_comparison_native_directory, 'a') as the_file:
                        the_file.write(
                            "Non native: {} - Native: {}\n".format(non_native_sentence[j], native_sentence[k]))

                        for feat in range(4):
                            dist, path = fastdtw(non_native[:, feat], native[:, feat])

                            path_x = [point[0] for point in path]
                            path_y = [point[1] for point in path]

                            length_x = len(path_x)
                            length_y = len(path_y)

                            assert length_x == length_y  # just to be sure :)

                            distance = []
                            for i in range(length_x):
                                distance.append(abs(path_x[i] - path_y[i]))

                            # calculate a value for similarity
                            min_distance = min(distance)
                            max_distance = max(distance)

                            norm = []
                            for i in range(len(distance)):
                                z = float(distance[i] - min_distance) / float(max_distance - min_distance)
                                norm.append(z)

                            similarity = 100 - (100 * statistics.mean(norm))
                            the_file.write("Similarity of {0}: {1:.2f}%\n".format(features_names[feat], similarity))

                            self.distance_cost_plot(path)
                            plt.plot(path_x, path_y)
                    plt.show()
                    x = 0

        except:
            print "Error: ", sys.exc_info()
            raise
예제 #59
0
#kMns.fit(mfccAll)
#print kMns.predict(mfccs["paco_no_001"])
#print kMns.predict(mfccs["paco_uno_001"])

os.system("sox -r 16000 -t alsa default recording.wav silence 1 0.1 1% 1 1.5 1%")
(rate2,sig2) = wav.read("recording.wav")
#sig2=pp.maxabs_scale(sig2)
sig2=pp.maxabs_scale(sig2)#
mfcc_feat2 = mfcc(sig2,rate2)
#mfcc_feat2=scale(mfcc_feat2)#Standarizar?

mind=1e40
minrd=1e40
for k in sorted(mfccs.keys()):
    mfcc_feat1=mfccs[k]
    distance, path = fastdtw(mfcc_feat1, mfcc_feat2, dist=euclidean)
    rd=distance/len(path)
    #print k,distance, len(path),rd
    if distance<mind:
        mind=distance
        mk=k
    if rd<minrd:
        minrd=rd
        mkrd=k
print mk,mind
print mkrd,minrd

#cmd="cp recording.wav /home/francisco/voz/{}.wav".format(mk+"0")
#os.system(cmd)

mfcc_feat1=mfccs[mk]
예제 #60
0
    def DTW_test(self):
        try:
            features_names = ['In', 'F1', 'F2', 'F3']
            already_used = []
            sentences = []
            with open(self.sentences_directory) as sentences_file:
                lines = sentences_file.readlines()
                for s in lines:
                    s = s.replace('\n', '')
                    sentences.append(s)

            for i in range(len(self.DTW_X_test)):

                # retrieve the sentence from the test set
                non_native = self.DTW_X_test[i]
                non_native_phonemes = self.DTW_Y_test[i]
                non_native_sentence = ""
                for key, val in self.dictionary_testset.items():
                    arr = np.array(val)
                    if np.array_equal(arr, non_native_phonemes):
                        non_native_sentence = key
                        break

                for sen in sentences:
                    # compare the non-native sentence with the classification set
                    if sen in non_native_sentence:

                        # retrieve the "same" sentence from the training set
                        for j in range(len(self.DTW_X_train)):
                            native = self.DTW_X_train[j]
                            native_phonemes = self.DTW_Y_train[j]
                            native_sentence = ""
                            for key, val in self.dictionary_trainset.items():

                                # if the sentence is the same
                                if sen in key:
                                    if np.array_equal(val, native_phonemes):

                                        # check if I already used this sentence
                                        if key in already_used:
                                            continue

                                        # save it and apply DTW
                                        native_sentence = key
                                        already_used.append(key)

                                        # debug
                                        print "Comparing: {} and {}".format(non_native_sentence, native_sentence)

                                        with open(self.dtw_comparison_directory, 'a') as the_file:
                                            the_file.write("Non native: {} - Native: {}\n".format(non_native_sentence,
                                                                                                  native_sentence))

                                            for feat in range(4):
                                                dist, path = fastdtw(non_native[:, feat], native[:, feat])

                                                path_x = [point[0] for point in path]
                                                path_y = [point[1] for point in path]

                                                length_x = len(path_x)
                                                length_y = len(path_y)

                                                assert length_x == length_y  # just to be sure :)

                                                distance = []
                                                for i in range(length_x):
                                                    distance.append(abs(path_x[i] - path_y[i]))

                                                # calculate a value for similarity
                                                min_distance = min(distance)
                                                max_distance = max(distance)

                                                norm = []
                                                for i in range(len(distance)):
                                                    z = float(distance[i] - min_distance) / float(
                                                        max_distance - min_distance)
                                                    norm.append(z)

                                                similarity = 100 - (100 * statistics.mean(norm))
                                                the_file.write(
                                                    "Similarity of {0}: {1:.2f}%\n".format(features_names[feat],
                                                                                           similarity))

                                                # self.distance_cost_plot(path)
                                                # plt.plot([int(i[0]) for i in path], [int(i[1]) for i in path])
                                                # plt.show()
        except:
            print "Error: ", sys.exc_info()
            raise