Exemple #1
0
def mfcc(y1, y2, y3, sr1, sr2, sr3, yTest, srTest):
    # import dtw
    # import editdistance

    # Convert the data to mfcc:
    mfcc1 = librosa.feature.mfcc(y1, sr1, n_mfcc=20)
    mfcc2 = librosa.feature.mfcc(y2, sr2, n_mfcc=20)
    mfcc3 = librosa.feature.mfcc(y3, sr3, n_mfcc=20)
    mfccTest = librosa.feature.mfcc(yTest, srTest)

    # Remove mean and normalize each column of MFCC
    import copy

    def preprocess_mfcc(mfcc):
        mfcc_cp = copy.deepcopy(mfcc)
        for i in xrange(mfcc.shape[1]):
            mfcc_cp[:, i] = mfcc[:, i] - np.mean(mfcc[:, i])
            mfcc_cp[:, i] = mfcc_cp[:, i] / np.max(np.abs(mfcc_cp[:, i]))
        return mfcc_cp

    mfcc1 = preprocess_mfcc(mfcc1)
    mfcc2 = preprocess_mfcc(mfcc2)
    mfcc3 = preprocess_mfcc(mfcc3)
    mfccTest = preprocess_mfcc(mfccTest)

    window_size = mfcc1.shape[1]
    dists = np.zeros(mfccTest.shape[1] - window_size)

    for i in range(len(dists)):
        mfcci = mfccTest[:, i:i + window_size]
        dist1i = librosa.dtw(
            mfcc1.T,
            mfcci.T,
            dist=lambda x, y: np.exp(np.linalg.norm(x - y, ord=1)))[0]
        dist2i = librosa.dtw(
            mfcc2.T,
            mfcci.T,
            dist=lambda x, y: np.exp(np.linalg.norm(x - y, ord=1)))[0]
        dist3i = librosa.dtw(
            mfcc3.T,
            mfcci.T,
            dist=lambda x, y: np.exp(np.linalg.norm(x - y, ord=1)))[0]
        dists[i] = (dist1i + dist2i + dist3i) / 3
    import matplotlib.pyplot as plt
    plt.plot(dists)

    # select minimum distance window
    word_match_idx = dists.argmin()
    # convert MFCC to time domain
    word_match_idx_bnds = np.array(
        [word_match_idx, np.ceil(word_match_idx + window_size)])
    samples_per_mfcc = 512
    word_samp_bounds = (2 / 2) + (word_match_idx_bnds * samples_per_mfcc)

    word = yTest[word_samp_bounds[0]:word_samp_bounds[1]]
Exemple #2
0
def test_dtw_subseq_sym():
    Y = np.array([10., 10., 0., 1., 2., 3., 10., 10.])
    X = np.arange(4)

    gt_wp_XY = np.array([[3, 5], [2, 4], [1, 3], [0, 2]])
    gt_wp_YX = np.array([[5, 3], [4, 2], [3, 1], [2, 0]])

    _, mut_wp_XY = librosa.dtw(X, Y, subseq=True)
    _, mut_wp_YX = librosa.dtw(Y, X, subseq=True)

    assert np.array_equal(gt_wp_XY, mut_wp_XY)
    assert np.array_equal(gt_wp_YX, mut_wp_YX)
Exemple #3
0
def test_dtw_subseq_sym():
    Y = np.array([10., 10., 0., 1., 2., 3., 10., 10.])
    X = np.arange(4)

    gt_wp_XY = np.array([[3, 5], [2, 4], [1, 3], [0, 2]])
    gt_wp_YX = np.array([[5, 3], [4, 2], [3, 1], [2, 0]])

    _, mut_wp_XY = librosa.dtw(X, Y, subseq=True)
    _, mut_wp_YX = librosa.dtw(Y, X, subseq=True)

    assert np.array_equal(gt_wp_XY, mut_wp_XY)
    assert np.array_equal(gt_wp_YX, mut_wp_YX)
Exemple #4
0
def mfcc_dtw(y, sr,yTest,srTest):
    # Convert the data to mfcc:
    mfcc = librosa.feature.mfcc(y, sr, n_mfcc=24,n_fft=2048, hop_length=512) # n_fft=10240, hop_length=2560
    mfccTest = librosa.feature.mfcc(yTest, srTest, n_mfcc=24, n_fft=2048, hop_length=512)
    # get delta mfccs
    mfcc_delta=librosa.feature.delta(mfcc)
    mfccTest_delta=librosa.feature.delta(mfccTest)
    # then merge
    mfcc=np.concatenate((mfcc,mfcc_delta),axis=0)
    mfccTest = np.concatenate((mfccTest, mfccTest_delta), axis=0)

    # mfcc = mfcc1.mean(1)
    # mfccTest = mfccTest.mean(1)

    # Remove mean and normalize each column of MFCC
    import copy
    def preprocess_mfcc(mfcc):
        mfcc_cp = copy.deepcopy(mfcc)
        for i in xrange(mfcc.shape[1]):
            mfcc_cp[:, i] = mfcc[:, i] - np.mean(mfcc[:, i])
            mfcc_cp[:, i] = mfcc_cp[:, i] / np.max(np.abs(mfcc_cp[:, i]))
        return mfcc_cp

    mfcc = preprocess_mfcc(mfcc)
    mfccTest = preprocess_mfcc(mfccTest)

    #average MFCC over all frames
    mfcc=mfcc.mean(1)
    mfccTest=mfccTest.mean(1)

    #Calculate the distances from the test signal
    d, wp = librosa.dtw(mfccTest, mfcc, metric='euclidean')

    return d[d.shape[0] - 1][d.shape[1] - 1]
Exemple #5
0
    def plotAndSave(self, Y, Z, fig_name="compare", extension="png"):
        status = False
        fig_name = fig_name + '.' + extension
        try:
            if self.fileExists(TEMP_FOLDER, fig_name):
                os.remove(TEMP_FOLDER + fig_name)
        except Exception as e:
            print(e)
        try:
            D, wp = librosa.dtw(Y, Z, subseq=True)
            [N, M] = D.shape
            # print(D[N-1,M-1])
            plt.figure(fig_name)
            plt.subplot(2, 1, 1)
            librosa.display.specshow(D, x_axis='frames', y_axis='frames')
            plt.plot(wp[:, 1], wp[:, 0], label='Optimal path', color='y')
            plt.legend()
            plt.subplot(2, 1, 2)
            plt.plot(D[-1, :] / wp.shape[0])
            plt.xlim([0, Y.shape[1]])
            plt.ylim([0, 2])
            plt.title('Matching cost function')
            plt.tight_layout()
            plt.savefig(TEMP_FOLDER + fig_name)
            plt.clf()
            status = True
        except Exception as e:
            print('Plotting failed.')
            status = False

        return status
Exemple #6
0
def MCC_with_DTW(sample, dest):
    '''
    This function check simillarity of sound between sample and dest.
    Ignoring magnitude between sample and dest.
    Args : sample, dest
        sample - sound to compare.
        dest - sound to compare.
    Returns:
        simillarity of sample and dest.
    Raises:
        nothing.
    '''
    # MCC : Magnitude Control Compare.
    largest_sample = 0.000000001
    for i in range(0, len(sample)):
        if largest_sample < sample[i]:
            largest_sample = sample[i]

    largest_dest = 0.000000001
    for i in range(0, len(dest)):
        if largest_dest < dest[i]:
            largest_dest = dest[i]

    # Comapre largest value and multiply to one.
    temp = []
    for i in range(0, len(dest)):
        temp.append(dest[i] * largest_sample / largest_dest)
        dtwed, _ = lb.dtw(sample, temp, subseq=True)

    # dtwed[-1, -1] is simillarity of sounds.
    return abs(dtwed[-1, -1])
Exemple #7
0
def dtw_score(X, Y):
    from librosa import dtw

    D, wp = dtw(X, Y)
    minpath = max([X.shape[1], Y.shape[1]])

    return float(D[-1, -1] / minpath)
Exemple #8
0
def mfcc_dtw(y, sr, yTest, srTest):
    # Calculate MFCC of test and reference, return the DTW distance between them
    # First convert the data to mfcc:
    mfcc = librosa.feature.mfcc(y, sr, n_mfcc=24,n_fft=2048, hop_length=512) # n_fft=10240, hop_length=2560
    mfccTest = librosa.feature.mfcc(yTest, srTest, n_mfcc=24, n_fft=2048, hop_length=512)
    # get delta mfccs
    mfcc_delta=librosa.feature.delta(mfcc)
    mfccTest_delta=librosa.feature.delta(mfccTest)
    # then merge
    mfcc=np.concatenate((mfcc,mfcc_delta),axis=0)
    mfccTest = np.concatenate((mfccTest, mfccTest_delta), axis=0)

    # Remove mean and normalize each column of MFCC
    import copy
    def preprocess_mfcc(mfcc):
        mfcc_cp = copy.deepcopy(mfcc)
        for i in xrange(mfcc.shape[1]):
            mfcc_cp[:, i] = mfcc[:, i] - np.mean(mfcc[:, i])
            mfcc_cp[:, i] = mfcc_cp[:, i] / np.max(np.abs(mfcc_cp[:, i]))
        return mfcc_cp

    mfcc = preprocess_mfcc(mfcc)
    mfccTest = preprocess_mfcc(mfccTest)

    #average MFCC over all frames
    mfcc=mfcc.mean(1)
    mfccTest=mfccTest.mean(1)

    # Calculate the distances from the test signal
    d, wp = librosa.dtw(mfccTest, mfcc, metric='euclidean')
    return d[d.shape[0] - 1][d.shape[1] - 1]
Exemple #9
0
def test_dtw_global():
    # Example taken from:
    # Meinard Mueller, Fundamentals of Music Processing
    X = np.array([[1, 3, 3, 8, 1]])
    Y = np.array([[2, 0, 0, 8, 7, 2]])

    gt_D = np.array([[1., 2., 3., 10., 16., 17.], [2., 4., 5., 8., 12., 13.],
                     [3., 5., 7., 10., 12., 13.], [9., 11., 13., 7., 8., 14.],
                     [10, 10., 11., 14., 13., 9.]])

    mut_D, _ = librosa.dtw(X, Y)
    assert np.array_equal(gt_D, mut_D)

    # Check that it works without backtracking
    mut_D2 = librosa.dtw(X, Y, backtrack=False)
    assert np.array_equal(mut_D, mut_D2)
Exemple #10
0
def test_dtw_global():
    # Example taken from:
    # Meinard Mueller, Fundamentals of Music Processing
    X = np.array([[1, 3, 3, 8, 1]])
    Y = np.array([[2, 0, 0, 8, 7, 2]])

    gt_D = np.array([[1., 2., 3., 10., 16., 17.],
                     [2., 4., 5., 8., 12., 13.],
                     [3., 5., 7., 10., 12., 13.],
                     [9., 11., 13., 7., 8., 14.],
                     [10, 10., 11., 14., 13., 9.]])

    mut_D, _ = librosa.dtw(X, Y)
    assert np.array_equal(gt_D, mut_D)

    # Check that it works without backtracking
    mut_D2 = librosa.dtw(X, Y, backtrack=False)
    assert np.array_equal(mut_D, mut_D2)
Exemple #11
0
def mfcc(y1,y2,y3,sr1,sr2,sr3,yTest,srTest):
    # import dtw
    # import editdistance

    # Convert the data to mfcc:
    mfcc1 = librosa.feature.mfcc(y1, sr1,n_mfcc=20)
    mfcc2 = librosa.feature.mfcc(y2, sr2,n_mfcc=20)
    mfcc3 = librosa.feature.mfcc(y3, sr3,n_mfcc=20)
    mfccTest = librosa.feature.mfcc(yTest, srTest)

    # Remove mean and normalize each column of MFCC
    import copy
    def preprocess_mfcc(mfcc):
        mfcc_cp = copy.deepcopy(mfcc)
        for i in xrange(mfcc.shape[1]):
            mfcc_cp[:, i] = mfcc[:, i] - np.mean(mfcc[:, i])
            mfcc_cp[:, i] = mfcc_cp[:, i] / np.max(np.abs(mfcc_cp[:, i]))
        return mfcc_cp

    mfcc1 = preprocess_mfcc(mfcc1)
    mfcc2 = preprocess_mfcc(mfcc2)
    mfcc3 = preprocess_mfcc(mfcc3)
    mfccTest = preprocess_mfcc(mfccTest)

    window_size = mfcc1.shape[1]
    dists = np.zeros(mfccTest.shape[1] - window_size)

    for i in range(len(dists)):
        mfcci = mfccTest[:, i:i + window_size]
        dist1i = librosa.dtw(mfcc1.T, mfcci.T, dist=lambda x, y: np.exp(np.linalg.norm(x - y, ord=1)))[0]
        dist2i = librosa.dtw(mfcc2.T, mfcci.T, dist=lambda x, y: np.exp(np.linalg.norm(x - y, ord=1)))[0]
        dist3i = librosa.dtw(mfcc3.T, mfcci.T, dist=lambda x, y: np.exp(np.linalg.norm(x - y, ord=1)))[0]
        dists[i] = (dist1i + dist2i + dist3i) / 3
    import matplotlib.pyplot as plt
    plt.plot(dists)

    # select minimum distance window
    word_match_idx = dists.argmin()
    # convert MFCC to time domain
    word_match_idx_bnds = np.array([word_match_idx, np.ceil(word_match_idx + window_size)])
    samples_per_mfcc = 512
    word_samp_bounds = (2 / 2) + (word_match_idx_bnds * samples_per_mfcc)

    word = yTest[word_samp_bounds[0]:word_samp_bounds[1]]
Exemple #12
0
 def compare_test_record_2_learning_list(self, test_element):
     dist = list()
     for i in range(0, len(self.learning_list)):
         y = self.learning_list[i].mfcc
         D, wp = librosa.dtw(test_element, y, subseq=True)
         dist.append(D[-1, -1])
     if min(dist) > MIN_DIST:
         return "nie rozpoznano"
     else:
         return self.learning_list[dist.index(min(dist))].name
Exemple #13
0
def test_dtw_global_diagonal():
    # query is a linear ramp
    X = np.linspace(0.1, 1, 10)
    Y = X

    gt_wp = list(zip(list(range(10)), list(range(10))))[::-1]

    mut_D, mut_wp = librosa.dtw(X, Y, subseq=True, metric='cosine',
                                step_sizes_sigma=np.array([[1, 1]]),
                                weights_mul=np.array([1, ]))

    assert np.array_equal(np.asarray(gt_wp), np.asarray(mut_wp))
Exemple #14
0
def test_dtw_global_diagonal():
    # query is a linear ramp
    X = np.linspace(0.1, 1, 10)
    Y = X

    gt_wp = list(zip(list(range(10)), list(range(10))))[::-1]

    mut_D, mut_wp = librosa.dtw(X, Y, subseq=True, metric='cosine',
                                step_sizes_sigma=np.array([[1, 1]]),
                                weights_mul=np.array([1, ]))

    assert np.array_equal(np.asarray(gt_wp), np.asarray(mut_wp))
Exemple #15
0
def compareto(audio, reference):
    xy, xsr = audio
    yy, ysr = reference
    
    mfccX = feature.mfcc(y=xy, sr=xsr)
    mfccY = feature.mfcc(y=yy, sr=ysr) 
    
    chromaX = feature.chroma_cqt(y=xy, sr=xsr)
    chromaY = feature.chroma_cqt(y=yy, sr=ysr) 
    
    distances = []
    score = 0
    
    D, wp = dtw(mfccX[0], mfccY[0])
    score += getscore(wp) * 2
    
    D, wp = dtw(chromaX, chromaY)
    score += getscore(wp)
    
    distances.append(score / 3)
    
    return sum(distances) / len(distances)
Exemple #16
0
def FindTask(Record_File_Path):
    Compare_File_Path = './comparing voice data/'
    Language_test, fs0 = lib.load(Record_File_Path)
    Language_ch, fs1 = lib.load(Compare_File_Path + 'translate_ch.wav')
    Language_en, fs2 = lib.load(Compare_File_Path + 'translate_en.wav')
    Language_jp, fs3 = lib.load(Compare_File_Path + 'translate_jp.wav')
    Time_ch, fs4 = lib.load(Compare_File_Path + 'time_ch.wav')
    Time_en, fs5 = lib.load(Compare_File_Path + 'time_en.wav')
    Time_jp, fs6 = lib.load(Compare_File_Path + 'time_jp.wav')
    
    MFCC_test = lib.feature.mfcc(y=pre_emphasis(signal = Language_test), sr=fs0, n_mfcc=20)
    MFCC_lang_ch = lib.feature.mfcc(y=Language_ch, sr=fs1, n_mfcc=20)
    MFCC_lang_en = lib.feature.mfcc(y=Language_en, sr=fs2, n_mfcc=20)
    MFCC_lang_jp = lib.feature.mfcc(y=Language_jp, sr=fs3, n_mfcc=20)
    MFCC_time_ch = lib.feature.mfcc(y=Time_ch, sr=fs4, n_mfcc=20)
    MFCC_time_en = lib.feature.mfcc(y=Time_en, sr=fs5, n_mfcc=20)
    MFCC_time_jp = lib.feature.mfcc(y=Time_jp, sr=fs6, n_mfcc=20)
    D_lang_ch, wp_lang_ch = lib.dtw(MFCC_test, MFCC_lang_ch)
    D_lang_en, wp_lang_en = lib.dtw(MFCC_test, MFCC_lang_en)
    D_lang_jp, wp_lang_jp = lib.dtw(MFCC_test, MFCC_lang_jp)
    D_time_ch, wp_time_ch = lib.dtw(MFCC_test, MFCC_time_ch)
    D_time_en, wp_time_en = lib.dtw(MFCC_test, MFCC_time_en)
    D_time_jp, wp_time_jp = lib.dtw(MFCC_test, MFCC_time_jp)
    
#    D1 = D_lang_ch[wp_lang_ch[-1, 0], wp_lang_ch[-1, 1]]
#    D2 = D_lang_en[wp_lang_en[-1, 0], wp_lang_en[-1, 1]]
#    D3 = D_lang_jp[wp_lang_jp[-1, 0], wp_lang_jp[-1, 1]]
#    D4 = D_time_ch[wp_time_ch[-1, 0], wp_time_ch[-1, 1]]
#    D5 = D_time_en[wp_time_en[-1, 0], wp_time_en[-1, 1]]
#    D6 = D_time_jp[wp_time_jp[-1, 0], wp_time_jp[-1, 1]]
#    
#    Shortest_D = min(D1, D2, D3, D4, D5, D6)
#    if(Shortest_D==D1):
#        FindLanguage(Record_File_Path, 0)
#    elif(Shortest_D==D2):
#        FindLanguage(Record_File_Path, 1)
#    elif(Shortest_D==D3):
#        FindLanguage(Record_File_Path, 2)
#    elif(Shortest_D==D4):
#        FindTime(0)
#    elif(Shortest_D==D5):
#        FindTime(1)
#    elif(Shortest_D==D6):
#        FindTime(2)
    
    Shortest_D = min(D_lang_ch[-1,-1], D_lang_en[-1,-1], D_lang_jp[-1,-1], D_time_ch[-1,-1], D_time_en[-1,-1], D_time_jp[-1,-1])
    if(Shortest_D==D_lang_ch[-1,-1]):
        FindLanguage(Record_File_Path, 0)
    elif(Shortest_D==D_lang_en[-1,-1]):
        FindLanguage(Record_File_Path, 1)
    elif(Shortest_D==D_lang_jp[-1,-1]):
        FindLanguage(Record_File_Path, 2)
    elif(Shortest_D==D_time_ch[-1,-1]):
        FindTime(0)
    elif(Shortest_D==D_time_en[-1,-1]):
        FindTime(1)
    elif(Shortest_D==D_time_jp[-1,-1]):
        FindTime(2)
Exemple #17
0
def test_dtw_subseq():
    # query is a linear ramp
    X = np.linspace(0, 1, 100)

    # database is query surrounded by noise
    noise_len = 200
    noise = np.random.rand(noise_len)
    Y = np.concatenate((noise, noise, X, noise))

    _, mut_wp = librosa.dtw(X, Y, subseq=True)

    # estimated sequence has to match original sequence
    # note the +1 due to python indexing
    mut_X = Y[mut_wp[-1][1]:mut_wp[0][1]+1]
    assert np.array_equal(X, mut_X)
Exemple #18
0
def test_dtw_global_constrained():
    # Example taken from:
    # Meinard Mueller, Fundamentals of Music Processing
    X = np.array([[1, 3, 3, 8, 1]])
    Y = np.array([[2, 0, 0, 8, 7, 2]])

    # With band_rad = 0.5, the GT distance array is
    gt_D = np.array([[1., 2., 3., np.inf, np.inf, np.inf],
                     [2., 4., 5., 8., np.inf, np.inf],
                     [np.inf, 5., 7., 10., 12., np.inf],
                     [np.inf, np.inf, 13., 7., 8., 14.],
                     [np.inf, np.inf, np.inf, 14., 13., 9.]])

    mut_D = librosa.dtw(X, Y, backtrack=False, global_constraints=True, band_rad=0.5)
    assert np.array_equal(gt_D, mut_D)
Exemple #19
0
def MCC_with_DTW(sample, dest) :
    largest_sample = 0
    for i in range(0, len(sample)) :
        if largest_sample < sample[i] :
            largest_sample = sample[i]

    largest_dest = 0
    for i in range(0, len(dest)) :
        if largest_dest < dest[i] :
            largest_dest = dest[i]
    temp = []
    for i in range(0, len(dest)) :
        temp.append(dest[i] * largest_sample / largest_dest)
    #MCC Code above.
    #Magnitude Control Compare.
    D, wp = librosa.dtw(sample, temp, subseq = True)
    return abs(D[-1,-1])
Exemple #20
0
def test_dtw_global_supplied_distance_matrix():
    # Example taken from:
    # Meinard Mueller, Fundamentals of Music Processing
    X = np.array([[1, 3, 3, 8, 1]])
    Y = np.array([[2, 0, 0, 8, 7, 2]])

    # Precompute distance matrix.
    C = cdist(X.T, Y.T, metric='euclidean')

    gt_D = np.array([[1., 2., 3., 10., 16., 17.], [2., 4., 5., 8., 12., 13.],
                     [3., 5., 7., 10., 12., 13.], [9., 11., 13., 7., 8., 14.],
                     [10, 10., 11., 14., 13., 9.]])

    # Supply precomputed distance matrix and specify an invalid distance
    # metric to verify that it isn't used.
    mut_D, _ = librosa.dtw(C=C, metric='invalid')

    assert np.array_equal(gt_D, mut_D)
Exemple #21
0
def test_dtw_global_constrained():
    # Example taken from:
    # Meinard Mueller, Fundamentals of Music Processing
    X = np.array([[1, 3, 3, 8, 1]])
    Y = np.array([[2, 0, 0, 8, 7, 2]])

    # With band_rad = 0.5, the GT distance array is
    gt_D = np.array([[1., 2., 3., np.inf, np.inf, np.inf],
                     [2., 4., 5., 8., np.inf, np.inf],
                     [np.inf, 5., 7., 10., 12., np.inf],
                     [np.inf, np.inf, 13., 7., 8., 14.],
                     [np.inf, np.inf, np.inf, 14., 13., 9.]])

    mut_D = librosa.dtw(X,
                        Y,
                        backtrack=False,
                        global_constraints=True,
                        band_rad=0.5)
    assert np.array_equal(gt_D, mut_D)
Exemple #22
0
def MCC_with_DTW(sample, dest):
    largest_sample = 0
    for i in range(0, len(sample)):
        if largest_sample < sample[i]:
            largest_sample = sample[i]

    largest_dest = 0
    for i in range(0, len(dest)):
        if largest_dest < dest[i]:
            largest_dest = dest[i]
    temp = []
    print(largest_sample / largest_dest)
    for i in range(0, len(dest)):
        temp.append(dest[i] * largest_sample / largest_dest)
    #MCC Code above.
    #Magnitude Control Compare.
    print("Start Calc DTW.")
    D, wp = librosa.dtw(sample, temp, subseq=True)
    return D, wp
Exemple #23
0
def test_dtw_global_supplied_distance_matrix():
    # Example taken from:
    # Meinard Mueller, Fundamentals of Music Processing
    X = np.array([[1, 3, 3, 8, 1]])
    Y = np.array([[2, 0, 0, 8, 7, 2]])

    # Precompute distance matrix.
    C = cdist(X.T, Y.T, metric='euclidean')

    gt_D = np.array([[1., 2., 3., 10., 16., 17.],
                     [2., 4., 5., 8., 12., 13.],
                     [3., 5., 7., 10., 12., 13.],
                     [9., 11., 13., 7., 8., 14.],
                     [10, 10., 11., 14., 13., 9.]])

    # Supply precomputed distance matrix and specify an invalid distance
    # metric to verify that it isn't used.
    mut_D, _ = librosa.dtw(C=C, metric='invalid')

    assert np.array_equal(gt_D, mut_D)
def FindTask(Record_File_Path):
    Compare_File_Path = './comparing voice data/'
    Language_test, fs0 = lib.load(Record_File_Path)
    Language_ch, fs1 = lib.load(Compare_File_Path + 'translate_ch.wav')
    Language_en, fs2 = lib.load(Compare_File_Path + 'translate_en.wav')
    Language_jp, fs3 = lib.load(Compare_File_Path + 'translate_jp.wav')
    Time_ch, fs4 = lib.load(Compare_File_Path + 'time_ch2.wav')
    Time_en, fs5 = lib.load(Compare_File_Path + 'time_en.wav')
    Time_jp, fs6 = lib.load(Compare_File_Path + 'time_jp.wav')

    MFCC_test = lib.feature.mfcc(y=pre_emphasis(signal=Language_test),
                                 sr=fs0,
                                 n_mfcc=20)
    MFCC_lang_ch = lib.feature.mfcc(y=Language_ch, sr=fs1, n_mfcc=20)
    MFCC_lang_en = lib.feature.mfcc(y=Language_en, sr=fs2, n_mfcc=20)
    MFCC_lang_jp = lib.feature.mfcc(y=Language_jp, sr=fs3, n_mfcc=20)
    MFCC_time_ch = lib.feature.mfcc(y=Time_ch, sr=fs4, n_mfcc=20)
    MFCC_time_en = lib.feature.mfcc(y=Time_en, sr=fs5, n_mfcc=20)
    MFCC_time_jp = lib.feature.mfcc(y=Time_jp, sr=fs6, n_mfcc=20)
    D_lang_ch, wp_ch = lib.dtw(MFCC_test, MFCC_lang_ch)
    D_lang_en, wp_en = lib.dtw(MFCC_test, MFCC_lang_en)
    D_lang_jp, wp_jp = lib.dtw(MFCC_test, MFCC_lang_jp)
    D_time_ch, wp_ch = lib.dtw(MFCC_test, MFCC_time_ch)
    D_time_en, wp_en = lib.dtw(MFCC_test, MFCC_time_en)
    D_time_jp, wp_jp = lib.dtw(MFCC_test, MFCC_time_jp)

    g = D_lang_ch[-1, -1]
    gg = D_lang_en[-1, -1]
    ggg = D_lang_jp[-1, -1]
    gggg = D_time_ch[-1, -1]
    ggggg = D_time_en[-1, -1]
    gggggg = D_time_jp[-1, -1]
    Shortest_D = min(D_lang_ch[-1, -1], D_lang_en[-1, -1], D_lang_jp[-1, -1],
                     D_time_ch[-1, -1], D_time_en[-1, -1], D_time_jp[-1, -1])
    if (Shortest_D == D_lang_ch[-1, -1]):
        FindLanguage(Record_File_Path, 0)
    elif (Shortest_D == D_lang_en[-1, -1]):
        FindLanguage(Record_File_Path, 1)
    elif (Shortest_D == D_lang_jp[-1, -1]):
        FindLanguage(Record_File_Path, 2)
    elif (Shortest_D == D_time_ch[-1, -1]):
        FindTime(0)
    elif (Shortest_D == D_time_en[-1, -1]):
        FindTime(1)
    else:
        FindTime(2)
Exemple #25
0
def test_dtw_incompatible_args_02():
    librosa.dtw(C=None, X=None, Y=None)
Exemple #26
0
def test_dtw_incompatible_args_01():
    librosa.dtw(C=1, X=1, Y=1)
Exemple #27
0
 def librosa_dtw(X,Y):
     D, wp = librosa.dtw(X, Y, subseq=True)
     return D,wp
Exemple #28
0
def test_dtw_incompatible_args_01():
    librosa.dtw(C=1, X=1, Y=1)
Exemple #29
0
def FindLanguage(Record_File_Path, Language_code):
    Response_File_Path = './response voice data/'
    if(Language_code==0):
        playsound(Response_File_Path+'language_response_ch.wav')
        time.sleep(0.5)
        playsound(Response_File_Path+'language_select_ch.wav')
        char_append = 'ch'
    elif(Language_code==1):
        playsound(Response_File_Path+'language_response_en.wav')
        time.sleep(0.5)
        playsound(Response_File_Path+'language_select_en.wav')
        char_append = 'en'
    else:
        playsound(Response_File_Path+'language_response_jp.wav')
        time.sleep(0.5)
        playsound(Response_File_Path+'language_select_jp.wav')
        char_append = 'jp'
        
    DetectSound(Record_File_Path)
    Compare_File_Path = './comparing voice data/'
    Language_test, fs0 = lib.load(Record_File_Path)
    MFCC_test = lib.feature.mfcc(y=pre_emphasis(signal = Language_test), sr=fs0, n_mfcc=20)
    
    
    if(Language_code==0):
        Language_en_ch, fs1 = lib.load(Compare_File_Path + 'language_en_ch.wav')
        Language_jp_ch, fs2 = lib.load(Compare_File_Path + 'language_jp_ch.wav')
        MFCC_lang_en_ch = lib.feature.mfcc(y=Language_en_ch, sr=fs1, n_mfcc=20)
        MFCC_lang_jp_ch = lib.feature.mfcc(y=Language_jp_ch, sr=fs2, n_mfcc=20)
        D_lang_en_ch, wp_en_ch = lib.dtw(MFCC_test, MFCC_lang_en_ch)
        D_lang_jp_ch, wp_jp_ch = lib.dtw(MFCC_test, MFCC_lang_jp_ch)
        compare1 = D_lang_en_ch[-1, -1]
        compare2 = D_lang_jp_ch[-1, -1]
        if(compare1<compare2):
            translate_lang = 1
        else:
            translate_lang = 2
    elif(Language_code==1):
        Language_ch_en, fs1 = lib.load(Compare_File_Path + 'language_ch_en.wav')
        Language_jp_en, fs2 = lib.load(Compare_File_Path + 'language_jp_en.wav')
        MFCC_lang_ch_en = lib.feature.mfcc(y=Language_ch_en, sr=fs1, n_mfcc=20)
        MFCC_lang_jp_en = lib.feature.mfcc(y=Language_jp_en, sr=fs2, n_mfcc=20)
        D_lang_ch_en, wp_ch_en = lib.dtw(MFCC_test, MFCC_lang_ch_en)
        D_lang_jp_en, wp_jp_en = lib.dtw(MFCC_test, MFCC_lang_jp_en)
        compare1 = D_lang_ch_en[-1, -1]
        compare2 = D_lang_jp_en[-1, -1]
        if(compare1<compare2):
            translate_lang = 0
        else:
            translate_lang = 2
    else:
        Language_ch_jp, fs1 = lib.load(Compare_File_Path + 'language_ch_jp.wav')
        Language_en_jp, fs2 = lib.load(Compare_File_Path + 'language_en_jp.wav')
        MFCC_lang_ch_jp = lib.feature.mfcc(y=Language_ch_jp, sr=fs1, n_mfcc=20)
        MFCC_lang_en_jp = lib.feature.mfcc(y=Language_en_jp, sr=fs2, n_mfcc=20)
        D_lang_ch_jp, wp_ch_jp = lib.dtw(MFCC_test, MFCC_lang_ch_jp)
        D_lang_en_jp, wp_en_jp = lib.dtw(MFCC_test, MFCC_lang_en_jp)
        compare1 = D_lang_ch_jp[-1, -1]
        compare2 = D_lang_en_jp[-1, -1]
        if(compare1<compare2):
            translate_lang = 0
        else:
            translate_lang = 1
        
    playsound(Response_File_Path+'say_number_'+char_append+'.wav')
    TranslateNumber(Language_code, translate_lang, Record_File_Path)
def alignment_dtw(gt_cens_all,       \
                  gt_tempo,          \
                  gt_start_frame,    \
                  gt_end_frame,      \
                  input_clip_cens,   \
                  input_clip_tempo,  \
                  tempo_adj_man,     \
                  clip_length,       \
                  cens_fps,          \
                  tempo_change_ratio_limit_dtw,  \
                  tempo_change_ratio_limit_clip, \
                  tempo_max_song,    \
                  tempo_min_song,    \
                  ):

    # find estemated ground truth frame from all song frame
    est_gt_cens = gt_cens_all[:, gt_start_frame:gt_end_frame]

    # calculate scaled input
    gt_audio_length = est_gt_cens.shape[1]
    input_audio_row_num = input_clip_cens.shape[0]
    scaled_input = align_2_target(input_clip_cens, input_audio_row_num,
                                  gt_audio_length)

    # run DTW(scaled input, estimated GT audio) here
    cost_matrix, wp = librosa.dtw(scaled_input, est_gt_cens,  \
                        global_constraints=True,    \
                        band_rad=tempo_change_ratio_limit_dtw,  \
                        subseq=True)

    pre_reg_x = wp[:, 1]
    pre_reg_x = pre_reg_x[::-1]
    pre_reg_x_with_coef = np.vstack([pre_reg_x, np.ones(len(pre_reg_x))]).T
    pre_reg_y = wp[:, 0]
    pre_reg_y = pre_reg_y[::-1]

    reg_slope, reg_coef = np.linalg.lstsq(pre_reg_x_with_coef, pre_reg_y)[0]
    reg_residuals = np.linalg.lstsq(pre_reg_x_with_coef, pre_reg_y)[1]

    start_chp = int(0.03 * len(pre_reg_x))
    end_chp = int(0.97 * len(pre_reg_x))
    x_change_point = []  # find all x change point
    for i in range(start_chp, end_chp):
        if pre_reg_x[i + 1] > pre_reg_x[i]:
            x_change_point.append(i + 1)

    slope_x_length = np.int(len(x_change_point) * 0.032)
    #slope_x_length = 20
    slope_list = []
    for j in range(0, len(x_change_point) - slope_x_length - 1):
        delta_x = np.float(pre_reg_x[x_change_point[j + slope_x_length]] -
                           pre_reg_x[x_change_point[j]])
        delta_y = np.float(pre_reg_y[x_change_point[j + slope_x_length]] -
                           pre_reg_y[x_change_point[j]])
        if (delta_x > 0) and (delta_y > 0):
            slop_n = delta_y / delta_x
            slope_list.append(slop_n)

    sorted_slope_list = quicksort(slope_list)

    slope_list_len = len(sorted_slope_list)
    start_list = int(slope_list_len * 0.25)
    end_list = int(slope_list_len * 0.75)
    final_slope = np.mean(sorted_slope_list[start_list:end_list])

    middle_x = np.int((pre_reg_x[len(pre_reg_x) - 1] + pre_reg_x[0]) / 2)

    middle_x_index = 0
    for k in range(0, len(pre_reg_x)):
        if (pre_reg_x[k] == middle_x):
            middle_x_index = k

    old_line_y = reg_slope * pre_reg_x[middle_x_index] + reg_coef
    new_line_y = final_slope * pre_reg_x[middle_x_index] + reg_coef
    line_dy_center = old_line_y - new_line_y

    zzz_dtw_input_is_faster = 0
    zzz_dtw_input_is_slower = 0

    if (final_slope * est_gt_cens.shape[1] + reg_coef + line_dy_center >
            est_gt_cens.shape[1]):
        zzz_dtw_input_is_slower = 1
    else:
        zzz_dtw_input_is_faster = 1

    zzz_dtw_cal_tempo_ratio = est_gt_cens.shape[1] / np.float(
        final_slope * est_gt_cens.shape[1] + reg_coef + line_dy_center)

    # set change ratio limit
    zzz_dtw_cal_tempo_ratio = min((1.0 + tempo_change_ratio_limit_clip),
                                  zzz_dtw_cal_tempo_ratio)
    zzz_dtw_cal_tempo_ratio = max(1.0 / (1 + tempo_change_ratio_limit_clip),
                                  zzz_dtw_cal_tempo_ratio)
    #print (zzz_dtw_cal_tempo_ratio)

    # manually overwrite tempo
    zzz_dtw_cal_tempo_ratio = zzz_dtw_cal_tempo_ratio * (
        1.0 + float(tempo_adj_man) / 100.0)

    zzz_dtw_cal_input_tempo = np.float(
        input_clip_tempo) * zzz_dtw_cal_tempo_ratio

    # force output tempo in a range
    zzz_dtw_cal_input_tempo = max(tempo_min_song, zzz_dtw_cal_input_tempo)
    zzz_dtw_cal_input_tempo = min(tempo_max_song, zzz_dtw_cal_input_tempo)

    zzz_est_gt_endframe = gt_start_frame + np.int(
        est_gt_cens.shape[1] * zzz_dtw_cal_tempo_ratio)

    zzz_pre_reg_x = pre_reg_x
    zzz_pre_reg_y = pre_reg_y

    zzz_final_reg_slope = final_slope

    zzz_line_offset = reg_coef + line_dy_center

    zzz_reg_residuals = reg_residuals

    # calculate total cost value
    wp_length = wp.shape[0]
    X_start = wp[wp_length - 1, 0]
    X_end = wp[0, 0]
    Y_start = wp[wp_length - 1, 1]
    Y_end = wp[0, 1]
    total_best_path_cost = abs(cost_matrix[X_end, Y_end] -
                               cost_matrix[X_start, Y_start])

    zzz_dtw_cost = total_best_path_cost

    zzz_gt_length = est_gt_cens.shape[1]


    return  zzz_pre_reg_x,            \
            zzz_pre_reg_y,            \
            zzz_final_reg_slope,      \
            zzz_line_offset,          \
            zzz_reg_residuals,        \
            zzz_gt_length,            \
            zzz_dtw_cost,             \
            zzz_dtw_cal_input_tempo,  \
            zzz_dtw_cal_tempo_ratio,  \
            zzz_est_gt_endframe,      \
            zzz_dtw_input_is_slower,  \
            zzz_dtw_input_is_faster
Exemple #31
0
def get_alignment_from_audio(body, response):
    """Calculate alignment of an MEI file to an audio file.
    
    Returns a dictionary containing IDs of rests and notes as keys
    and their corresponding position in the audiofile as values."""

    multipart_data = list(body.keys())

    if 'mei' not in multipart_data or 'audio' not in multipart_data:
        response.status = HTTP_BAD_REQUEST
        return 'Please provide MEI and audio file.'

    # Work in temporary directory
    with tempfile.TemporaryDirectory() as temp_dir:
        # Load audiofile
        audio_path = os.path.join(temp_dir, 'audio')
        # Write audio to temporary file
        with open(audio_path, mode=('wb')) as audio_file:
            audio_file.write(body['audio'])
        # Read audiofile into array
        try:
            wave_data, sr = librosa.load(audio_path)
        except NoBackendError:
            response.status = HTTP_UNSUPPORTED_MEDIA_TYPE
            return 'Unsupported audio format.'

    # Generate timestamps for all notes and rests of the MEI file
    Mei = jpype.JPackage('meico').mei.Mei  # Get Mei class
    try:
        mei_xml = body['mei'].decode('utf-8')  # Extract MEI data from body
        mei = Mei(mei_xml, False)  # Read in MEI data
        mei.addIds()
        mei.exportMsm(
            720, True, False
        )  # Generate timestamps with ppq=720, no channel 10, no cleanup
        debug_mei_xml = mei.toXML()
    except jpype.JavaException as error:
        response.status = HTTP_BAD_REQUEST
        return traceback.format_exc()

    # Calculate MEI chroma features
    chroma_mei, id_to_chroma_index = from_meico(debug_mei_xml)

    # Calculate audio chroma features
    chroma_size = round(len(wave_data) / chroma_mei.shape[1])
    chroma_audio = librosa.feature.chroma_stft(y=wave_data,
                                               sr=sr,
                                               hop_length=chroma_size)

    # Calculate warping path
    path = librosa.dtw(chroma_mei, chroma_audio)[1]
    path_dict = {key: value for (key, value) in path}

    # Extract mappings
    id_to_time = {}
    chroma_length = len(wave_data) / sr / chroma_audio.shape[1]
    for identifier in id_to_chroma_index:
        id_to_time[identifier] = path_dict[
            id_to_chroma_index[identifier]] * chroma_length

    return id_to_time
Exemple #32
0
def test_dtw_incompatible_sigma_diag():
    X = np.array([[1, 3, 3, 8, 1, 2]])
    Y = np.array([[2, 0, 0, 8, 7]])
    librosa.dtw(X=X, Y=Y, step_sizes_sigma=np.ones((1, 2), dtype=int))
Exemple #33
0
def get_alignment_from_yt(body, response):
    """Calculate alignment of an MEI file to an youtube video.

    Returns a dictionary containing IDs of rests and notes as keys
    and their corresponding position in the youtube video as values."""

    multipart_data = list(body.keys())

    if 'mei' not in multipart_data or 'youtube-url' not in body:
        response.status = HTTP_BAD_REQUEST
        return 'Please provide MEI and a valid YouTube link.'

    # Work in temporary directory
    with tempfile.TemporaryDirectory() as temp_dir:
        # Download YouTube video
        youtube_url = body['youtube-url']
        video_path = os.path.join(temp_dir, 'youtube.audio')
        download_video(
            youtube_url, 249, video_path
        )  # 249 = <Stream: itag="249" mime_type="audio/webm" abr="50kbps" acodec="opus">

        # Extract audio using FFmpeg
        audio_path = os.path.join(temp_dir, 'audio.wav')
        cmd = [
            'ffmpeg', '-i', video_path, '-acodec', 'pcm_s16le', '-ac', '2',
            audio_path
        ]
        subprocess.call(cmd)

        # Trim silence in audio file and read into numpy array
        audio = AudioSegment.from_file(audio_path, format='wav')
        audio = audio.split_to_mono()[0]
        sr = audio.frame_rate
        trim_start = detect_leading_silence(audio)
        trim_end = detect_leading_silence(audio.reverse())
        trimmed = audio[trim_start:len(audio) - trim_end]
        wave_data = np.asarray(trimmed.get_array_of_samples(), dtype=np.float)

    # Generate timestamps for all notes and rests of the MEI file
    Mei = jpype.JPackage('meico').mei.Mei  # Get Mei class
    try:
        mei_xml = body['mei'].decode('utf-8')  # Extract MEI data from body
        mei = Mei(mei_xml, False)  # Read in MEI data
        mei.addIds()
        mei.exportMsm(
            720, True, False
        )  # Generate timestamps with ppq=720, no channel 10, no cleanup
        debug_mei_xml = mei.toXML()
    except jpype.JavaException as error:
        response.status = HTTP_BAD_REQUEST
        return traceback.format_exc()

    # Calculate MEI chroma features
    chroma_mei, id_to_chroma_index = from_meico(debug_mei_xml)

    # Calculate audio chroma features
    chroma_size = round(len(wave_data) / chroma_mei.shape[1])
    chroma_audio = librosa.feature.chroma_stft(y=wave_data,
                                               sr=sr,
                                               hop_length=chroma_size)

    # Calculate warping path
    path = librosa.dtw(chroma_mei, chroma_audio)[1]
    path_dict = {key: value for (key, value) in path}

    # Extract mappings
    id_to_time = {}
    chroma_length = len(wave_data) / sr / chroma_audio.shape[1]
    for identifier in id_to_chroma_index:
        id_to_time[identifier] = path_dict[
            id_to_chroma_index[identifier]] * chroma_length
        id_to_time[identifier] += trim_start / 1000  # Offset for trimmed audio

    return id_to_time
Exemple #34
0
def dtw(n, m):
    D, wp = librosa.dtw(n, m)
    return wp
Exemple #35
0
def TranslateNumber(MotherLan, TransLan, Record_File_Path):
    DetectSound(Record_File_Path)
    Response_File_Path = './response voice data/'
    if MotherLan == 0:
        MotherLanAppend = 'ch'
    elif MotherLan == 1:
        MotherLanAppend = 'en'
    elif MotherLan == 2:
        MotherLanAppend = 'jp'
        
    if TransLan == 0:
        TransLanAppend = 'ch'
    elif TransLan == 1:
        TransLanAppend = 'en'
    elif TransLan == 2:
        TransLanAppend = 'jp'
        
    Language_test, fs = lib.load(Record_File_Path)
    compare0, fs0 = lib.load(Response_File_Path + '0_response_' + MotherLanAppend + '.wav')
    compare1, fs1 = lib.load(Response_File_Path + '1_response_' + MotherLanAppend + '.wav')
    compare2, fs2 = lib.load(Response_File_Path + '2_response_' + MotherLanAppend + '.wav')
    compare3, fs3 = lib.load(Response_File_Path + '3_response_' + MotherLanAppend + '.wav')
    compare4, fs4 = lib.load(Response_File_Path + '4_response_' + MotherLanAppend + '.wav')
    compare5, fs5 = lib.load(Response_File_Path + '5_response_' + MotherLanAppend + '.wav')
    compare6, fs6 = lib.load(Response_File_Path + '6_response_' + MotherLanAppend + '.wav')
    compare7, fs7 = lib.load(Response_File_Path + '7_response_' + MotherLanAppend + '.wav')
    compare8, fs8 = lib.load(Response_File_Path + '8_response_' + MotherLanAppend + '.wav')
    compare9, fs9 = lib.load(Response_File_Path + '9_response_' + MotherLanAppend + '.wav')
#    plt.plot(Language_test)
#    plt.show()
    
    Language_test = pre_emphasis(signal = Language_test)
#    plt.plot(Language_test)
#    plt.show()
    
#    Language_test = butter_lowpass_filter(Language_test, 1000, fs, 6)
#    plt.plot(Language_test)
#    plt.show()
    
    test = Language_test
    
#    test = []    
#    for i in range(len(Language_test)-1):
#        if not((Language_test[i] < 0.005 and Language_test[i] > -0.005) and (Language_test[i+1] < 0.005 and Language_test[i+1] > -0.005)):
#            test = np.hstack((test,Language_test[i]))
            
#    plt.plot(test)
#    plt.show()
    
    D_compare0, wp_0 = lib.dtw(lib.feature.mfcc(y=test, sr=fs, n_mfcc=30), lib.feature.mfcc(y=compare0, sr=fs0, n_mfcc=30))
    D_compare1, wp_1 = lib.dtw(lib.feature.mfcc(y=test, sr=fs, n_mfcc=30), lib.feature.mfcc(y=compare1, sr=fs1, n_mfcc=30))
    D_compare2, wp_2 = lib.dtw(lib.feature.mfcc(y=test, sr=fs, n_mfcc=30), lib.feature.mfcc(y=compare2, sr=fs2, n_mfcc=30))
    D_compare3, wp_3 = lib.dtw(lib.feature.mfcc(y=test, sr=fs, n_mfcc=30), lib.feature.mfcc(y=compare3, sr=fs3, n_mfcc=30))
    D_compare4, wp_4 = lib.dtw(lib.feature.mfcc(y=test, sr=fs, n_mfcc=30), lib.feature.mfcc(y=compare4, sr=fs4, n_mfcc=30))
    D_compare5, wp_5 = lib.dtw(lib.feature.mfcc(y=test, sr=fs, n_mfcc=30), lib.feature.mfcc(y=compare5, sr=fs5, n_mfcc=30))
    D_compare6, wp_6 = lib.dtw(lib.feature.mfcc(y=test, sr=fs, n_mfcc=30), lib.feature.mfcc(y=compare6, sr=fs6, n_mfcc=30))
    D_compare7, wp_7 = lib.dtw(lib.feature.mfcc(y=test, sr=fs, n_mfcc=30), lib.feature.mfcc(y=compare7, sr=fs7, n_mfcc=30))
    D_compare8, wp_8 = lib.dtw(lib.feature.mfcc(y=test, sr=fs, n_mfcc=30), lib.feature.mfcc(y=compare8, sr=fs8, n_mfcc=30))
    D_compare9, wp_9 = lib.dtw(lib.feature.mfcc(y=test, sr=fs, n_mfcc=30), lib.feature.mfcc(y=compare9, sr=fs9, n_mfcc=30))
        
    Shortest_D = min(D_compare0[-1,-1], D_compare1[-1,-1],\
                     D_compare2[-1,-1], D_compare3[-1,-1],\
                     D_compare4[-1,-1], D_compare5[-1,-1],\
                     D_compare6[-1,-1], D_compare7[-1,-1],\
                     D_compare8[-1,-1], D_compare9[-1,-1])
        
    if Shortest_D == D_compare0[-1,-1]: 
        playsound(Response_File_Path+'0_response_' + TransLanAppend + '.wav')
    elif Shortest_D == D_compare1[-1,-1]:
        playsound(Response_File_Path+'1_response_' + TransLanAppend + '.wav')
    elif Shortest_D == D_compare2[-1,-1]:
        playsound(Response_File_Path+'2_response_' + TransLanAppend + '.wav')
    elif Shortest_D == D_compare3[-1,-1]:
        playsound(Response_File_Path+'3_response_' + TransLanAppend + '.wav')
    elif Shortest_D == D_compare4[-1,-1]:
        playsound(Response_File_Path+'4_response_' + TransLanAppend + '.wav')
    elif Shortest_D == D_compare5[-1,-1]:
        playsound(Response_File_Path+'5_response_' + TransLanAppend + '.wav')
    elif Shortest_D == D_compare6[-1,-1]:
        playsound(Response_File_Path+'6_response_' + TransLanAppend + '.wav')
    elif Shortest_D == D_compare7[-1,-1]:
        playsound(Response_File_Path+'7_response_' + TransLanAppend + '.wav')
    elif Shortest_D == D_compare8[-1,-1]:
        playsound(Response_File_Path+'8_response_' + TransLanAppend + '.wav')
    elif Shortest_D == D_compare9[-1,-1]:
        playsound(Response_File_Path+'9_response_' + TransLanAppend + '.wav')  
                    
    return D_compare0[-1,-1], D_compare1[-1,-1], D_compare2[-1,-1], D_compare3[-1,-1], D_compare4[-1,-1], D_compare5[-1,-1], D_compare6[-1,-1], D_compare7[-1,-1], D_compare8[-1,-1], D_compare9[-1,-1]
Exemple #36
0
def test_dtw_incompatible_sigma_add():
    X = np.array([[1, 3, 3, 8, 1]])
    Y = np.array([[2, 0, 0, 8, 7, 2]])
    librosa.dtw(X=X, Y=Y, weights_add=np.arange(10))
def TranslateNumber(MotherLan, TransLan, Record_File_Path):
    DetectSound(Record_File_Path)
    Response_File_Path = './response voice data/'
    if MotherLan == 0:
        Language_test, fs = lib.load(Record_File_Path)
        compare0, fs0 = lib.load(Response_File_Path + '0_response_ch.wav')
        compare1, fs1 = lib.load(Response_File_Path + '1_response_ch.wav')
        compare2, fs2 = lib.load(Response_File_Path + '2_response_ch.wav')
        compare3, fs3 = lib.load(Response_File_Path + '3_response_ch.wav')
        compare4, fs4 = lib.load(Response_File_Path + '4_response_ch.wav')
        compare5, fs5 = lib.load(Response_File_Path + '5_response_ch.wav')
        compare6, fs6 = lib.load(Response_File_Path + '6_response_ch.wav')
        compare7, fs7 = lib.load(Response_File_Path + '7_response_ch.wav')
        compare8, fs8 = lib.load(Response_File_Path + '8_response_ch.wav')
        compare9, fs9 = lib.load(Response_File_Path + '9_response_ch.wav')
        MFCC_test = lib.feature.mfcc(y=pre_emphasis(signal=Language_test),
                                     sr=fs,
                                     n_mfcc=20)

        D_compare0, wp_0 = lib.dtw(
            MFCC_test, lib.feature.mfcc(y=compare0, sr=fs0, n_mfcc=20))
        D_compare1, wp_1 = lib.dtw(
            MFCC_test, lib.feature.mfcc(y=compare1, sr=fs1, n_mfcc=20))
        D_compare2, wp_2 = lib.dtw(
            MFCC_test, lib.feature.mfcc(y=compare2, sr=fs2, n_mfcc=20))
        D_compare3, wp_3 = lib.dtw(
            MFCC_test, lib.feature.mfcc(y=compare3, sr=fs3, n_mfcc=20))
        D_compare4, wp_4 = lib.dtw(
            MFCC_test, lib.feature.mfcc(y=compare4, sr=fs4, n_mfcc=20))
        D_compare5, wp_5 = lib.dtw(
            MFCC_test, lib.feature.mfcc(y=compare5, sr=fs5, n_mfcc=20))
        D_compare6, wp_6 = lib.dtw(
            MFCC_test, lib.feature.mfcc(y=compare6, sr=fs6, n_mfcc=20))
        D_compare7, wp_7 = lib.dtw(
            MFCC_test, lib.feature.mfcc(y=compare7, sr=fs7, n_mfcc=20))
        D_compare8, wp_8 = lib.dtw(
            MFCC_test, lib.feature.mfcc(y=compare8, sr=fs8, n_mfcc=20))
        D_compare9, wp_9 = lib.dtw(
            MFCC_test, lib.feature.mfcc(y=compare9, sr=fs9, n_mfcc=20))

        Shortest_D = min(D_compare0[-1,-1], D_compare1[-1,-1],\
                         D_compare2[-1,-1], D_compare3[-1,-1],\
                         D_compare4[-1,-1], D_compare5[-1,-1],\
                         D_compare6[-1,-1], D_compare7[-1,-1],\
                         D_compare8[-1,-1], D_compare9[-1,-1])

        if Shortest_D == D_compare0[-1, -1]:
            if TransLan == 1:
                playsound(Response_File_Path + '0_response_en.wav')
            elif TransLan == 2:
                playsound(Response_File_Path + '0_response_jp.wav')
        elif Shortest_D == D_compare1[-1, -1]:
            if TransLan == 1:
                playsound(Response_File_Path + '1_response_en.wav')
            elif TransLan == 2:
                playsound(Response_File_Path + '1_response_jp.wav')
        elif Shortest_D == D_compare2[-1, -1]:
            if TransLan == 1:
                playsound(Response_File_Path + '2_response_en.wav')
            elif TransLan == 2:
                playsound(Response_File_Path + '2_response_jp.wav')
        elif Shortest_D == D_compare3[-1, -1]:
            if TransLan == 1:
                playsound(Response_File_Path + '3_response_en.wav')
            elif TransLan == 2:
                playsound(Response_File_Path + '3_response_jp.wav')
        elif Shortest_D == D_compare4[-1, -1]:
            if TransLan == 1:
                playsound(Response_File_Path + '4_response_en.wav')
            elif TransLan == 2:
                playsound(Response_File_Path + '4_response_jp.wav')
        elif Shortest_D == D_compare5[-1, -1]:
            if TransLan == 1:
                playsound(Response_File_Path + '5_response_en.wav')
            elif TransLan == 2:
                playsound(Response_File_Path + '5_response_jp.wav')
        elif Shortest_D == D_compare6[-1, -1]:
            if TransLan == 1:
                playsound(Response_File_Path + '6_response_en.wav')
            elif TransLan == 2:
                playsound(Response_File_Path + '6_response_jp.wav')
        elif Shortest_D == D_compare7[-1, -1]:
            if TransLan == 1:
                playsound(Response_File_Path + '7_response_en.wav')
            elif TransLan == 2:
                playsound(Response_File_Path + '7_response_jp.wav')
        elif Shortest_D == D_compare8[-1, -1]:
            if TransLan == 1:
                playsound(Response_File_Path + '8_response_en.wav')
            elif TransLan == 2:
                playsound(Response_File_Path + '8_response_jp.wav')
        elif Shortest_D == D_compare9[-1, -1]:
            if TransLan == 1:
                playsound(Response_File_Path + '9_response_en.wav')
            elif TransLan == 2:
                playsound(Response_File_Path + '9_response_jp.wav')

        return D_compare0[-1, -1], D_compare1[-1, -1], D_compare2[
            -1, -1], D_compare3[-1, -1], D_compare4[-1, -1], D_compare5[
                -1, -1], D_compare6[-1, -1], D_compare7[-1, -1], D_compare8[
                    -1, -1], D_compare9[-1, -1]

    #elif MotherLan == 1:

    #elif MotherLan == 2:

    else:
        print('Invalid mother_lan paramater')
from DSPbox import MFCC
import scipy.io.wavfile as wav
import numpy as np
import librosa

rate, signal = wav.read('./Observation.wav')
obser = MFCC(signal, rate)
result = []
for i in range(5):
    rate, signal = wav.read('./{:d}.wav'.format(i + 1))
    compare = MFCC(signal, rate)
    d = np.zeros((len(obser) + 1, len(compare) + 1))
    for x in range(len(obser)):
        d[x + 1, 1] = abs(compare[0] - obser[x]) + d[x, 1]
    for y in range(len(compare)):
        d[1, y + 1] = abs(compare[y] - obser[0]) + d[1, y]
    for y in range(2, len(compare) + 1):
        for x in range(2, len(obser) + 1):
            d[x, y] = abs(compare[y - 1] - obser[x - 1]) + min(
                d[x - 1, y], d[x, y - 1], d[x - 1, y - 1])
    result.append(d[-1, -1])
    print(i + 1, "->", d[-1, -1])
    print(i + 1, "->", librosa.dtw(obser, compare)[0][-1, -1], "(by librosa)")
print("最相似:", np.argmin(result) + 1)
Exemple #39
0
def test_1d_input():
    X = np.array([[1], [3], [3], [8], [1]])
    Y = np.array([[2], [0], [0], [8], [7], [2]])
    librosa.dtw(X=X, Y=Y)
Exemple #40
0
def DTW(mfcc1, mfcc2):
    # Calculate the distances from the test signal to ref
    d, wp = librosa.dtw(mfcc1, mfcc2, metric='euclidean')
    return d[d.shape[0] - 1][d.shape[1] - 1]
import time

    
if __name__ == '__main__':
    fs, sig = wavfile.read('Observation.wav','r')
    obser = dsp.MFCC(sig,fs)
    #print(obser)

    file = ['1.wav', '2.wav', '3.wav', '4.wav', '5.wav']
    Distance = []

    tStart = time.time()#計時開始
    for index in file:
        fs, sig = wavfile.read(index ,'r') 
        obser_ = dsp.MFCC(sig,fs)
        D, wp =lb.dtw(obser, obser_)# Obervation與 wav1 都是音訊檔的MFCC特徵向量#D是路徑迴溯矩陣
        #print('Wp=',wp)
        #print('D=',D)
        A = (D[D.shape[0]-1 ,D.shape[1]-1]) #最後的累積距離值A(N,M)
        print(index,'.wav A(N,M)=',A,sep='')
        Distance.append(A)
        #print (Distance)
    
    count =1
    for index in Distance:
        ++count
        if index == min(Distance):
            print ('Ans = ',count,'.wav', sep='')
            
    tEnd = time.time()
    print ("DTW by python cost %f sec" % (tEnd - tStart))#會自動做近位
Exemple #42
0
def test_dtw_incompatible_args_02():
    librosa.dtw(C=None, X=None, Y=None)
Exemple #43
0
def test_1d_input():
    X = np.array([[1], [3], [3], [8], [1]])
    Y = np.array([[2], [0], [0], [8], [7], [2]])
    librosa.dtw(X=X, Y=Y)