Ejemplo n.º 1
0
def lineWordCount(textgrid_file, score_file):
    '''
    :param textgrid_file: annotation file
    :return: numLines, numWords, numDians
    '''

    numLines, numWords, numDians, numPhos = 0, 0, 0, 0
    wordDurationList, dianDurationList, nvcDurationList, vcDurationList = [],[],[],[]

    entireLine = textgridParser.textGrid2WordList(textgrid_file,
                                                  whichTier='line')
    entireWordList = textgridParser.textGrid2WordList(textgrid_file,
                                                      whichTier='pinyin')
    entireDianList = textgridParser.textGrid2WordList(textgrid_file,
                                                      whichTier='dian')
    entirePhoList = textgridParser.textGrid2WordList(textgrid_file,
                                                     whichTier='details')

    utterance_durations, bpm = scoreParser.csvDurationScoreParser(score_file)

    # parser word list for each line,
    if len(entireWordList):
        nestedWordLists, numLines, _ = textgridParser.wordListsParseByLines(
            entireLine, entireWordList)
        nestedWordLists_filtered = []
        numWords = 0
        for ii, wordList in enumerate(nestedWordLists):
            if int(bpm[ii]):
                # omit non score lines
                nestedWordLists_filtered.append(wordList)
                numWords += len(wordList[1])
        numLines = len(nestedWordLists_filtered)

        wordDurationList = wordDuration(nestedWordLists_filtered)

    if len(entireDianList):
        nestedWordLists, _, _ = textgridParser.wordListsParseByLines(
            entireLine, entireDianList)
        nestedWordLists_filtered = []
        numDians = 0
        for ii, wordList in enumerate(nestedWordLists):
            if int(bpm[ii]):
                nestedWordLists_filtered.append(wordList)
                numDians += len(wordList[1])
        dianDurationList = wordDuration(nestedWordLists_filtered)

    if len(entirePhoList):
        nestedWordLists, _, _ = textgridParser.wordListsParseByLines(
            entireLine, entirePhoList)
        nestedWordLists_filtered = []
        numPhos = 0
        for ii, wordList in enumerate(nestedWordLists):
            if int(bpm[ii]):
                nestedWordLists_filtered.append(wordList)
                numPhos += len(wordList[1])
        nvcDurationList, vcDurationList = phoDuration(nestedWordLists_filtered)

    return numLines, numWords, numDians, numPhos, wordDurationList, dianDurationList, nvcDurationList, vcDurationList
Ejemplo n.º 2
0
def getBoundaryNumber(recordings, dataset_path):

    listOnset = []
    for i_recording, recording_name in enumerate(recordings):
        groundtruth_textgrid_file   = os.path.join(aCapella_root, dataset_path, annotation_path, recording_name+'.TextGrid')
        score_file                  = os.path.join(aCapella_root, dataset_path, score_path,      recording_name+'.csv')

        if not os.path.isfile(score_file):
            print 'Score not found: ' + score_file
            continue

        lineList = textgridParser.textGrid2WordList(groundtruth_textgrid_file, whichTier='line')
        utteranceList = textgridParser.textGrid2WordList(groundtruth_textgrid_file, whichTier='dianSilence')

        # parse lines of groundtruth
        nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines(lineList, utteranceList)

        # parse score
        utterance_durations, bpm = scoreParser.csvDurationScoreParser(score_file)

        # create the ground truth lab files
        numOnset = 0
        for idx, list in enumerate(nestedUtteranceLists):
            if int(bpm[idx]):
                print 'Counting onset number ... ' + recording_name + ' phrase ' + str(idx + 1)

                ul = list[1]
                numOnsetLine = len(ul)-1 # we don't count the first onset
                numOnset += numOnsetLine

        listOnset += [[recording_name, numOnset]]
    return listOnset
Ejemplo n.º 3
0
def phraseBoundaryWriter(textgrid_file, outputFilename):
    '''
    Write phrase boundary from textgrid into outputFilename, example: .syll.lab
    :param textgrid_file:
    :param outputFilename:
    :return:
    '''

    # read phrase list and utterance list
    lineList = textgridParser.textGrid2WordList(textgrid_file,
                                                whichTier='line')
    utteranceList = textgridParser.textGrid2WordList(textgrid_file,
                                                     whichTier='dianSilence')

    # parse lines of groundtruth
    nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines(
        lineList, utteranceList)

    # phrase start, end time
    nonEmptyLineList = []

    for list in nestedUtteranceLists:
        nonEmptyLineList.append(list[0])

    boundaryLabWriter(nonEmptyLineList, outputFilename)
Ejemplo n.º 4
0
def dump_training_data_textgrid_helper(wav_path,
                                       textgrid_path,
                                       recording_name,
                                       tier_parent=None,
                                       tier_child=None):
    """
    load audio, textgrid
    :param wav_path:
    :param textgrid_path:
    :param artist_name:
    :param recording_name:
    :param tier_parent: parent tier can be line
    :param tier_childL child tier can be syllable
    :return:
    """

    ground_truth_textgrid_file = os.path.join(textgrid_path,
                                              recording_name + '.TextGrid')
    wav_file = os.path.join(wav_path, recording_name + '.wav')
    line_list = textGrid2WordList(ground_truth_textgrid_file,
                                  whichTier=tier_parent)
    utterance_list = textGrid2WordList(ground_truth_textgrid_file,
                                       whichTier=tier_child)

    # parse lines of groundtruth
    nested_utterance_lists, num_lines, num_utterances = wordListsParseByLines(
        line_list, utterance_list)

    # load audio
    log_mel = getMFCCBands2DMadmom(wav_file, fs, hopsize_t, channel=1)

    return nested_utterance_lists, log_mel
def getBoundaryNumber(textgrid_path, score_path):
    """
    output a list to show the syllable number for each aria,
    the syllable number is extracted from the textgrid
    the textgrid needs to have a score
    :param textgrid_path:
    :param score_path:
    :return:
    """
    listOnset = []
    list_file_path_name = []
    for file_path_name in os.walk(textgrid_path):
        list_file_path_name.append(file_path_name)

    list_artist_level_path = list_file_path_name[0][1]

    for artist_path in list_artist_level_path:

        textgrid_artist_path = join(textgrid_path, artist_path)
        recording_names = [f for f in os.listdir(textgrid_artist_path) if os.path.isfile(join(textgrid_artist_path, f))]

        for rn in recording_names:
            rn = rn.split('.')[0]
            groundtruth_textgrid_file = join(textgrid_path, artist_path, rn+'.TextGrid')
            # if artist_path=='danAll' or artist_path=='laosheng':
            #     score_file = join(score_path, rn+'.csv')
            # else:
            score_file = join(score_path, artist_path, rn + '.csv')

            if not os.path.isfile(score_file):
                continue
            # print(groundtruth_textgrid_file)

            lineList = textgridParser.textGrid2WordList(groundtruth_textgrid_file, whichTier='line')
            utteranceList = textgridParser.textGrid2WordList(groundtruth_textgrid_file, whichTier='dianSilence')

            # parse lines of groundtruth
            nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines(lineList,
                                                                                                 utteranceList)

            # parse score
            _, utterance_durations, bpm = scoreParser.csvDurationScoreParser(score_file)

            # create the ground truth lab files
            numOnset = 0
            for idx, list in enumerate(nestedUtteranceLists):
                try:
                    if float(bpm[idx]):
                        print('Counting onset number ... ' + rn + ' phrase ' + str(idx + 1))

                        ul = list[1]
                        numOnsetLine = len(ul) - 1  # we don't count the first onset
                        numOnset += numOnsetLine
                except IndexError:
                    print(idx, 'not exist for recording', rn)

            listOnset += [[artist_path, rn, numOnset]]

    return listOnset
Ejemplo n.º 6
0
def textgrid_syllable_phoneme_parser(textgrid_file, tier1, tier2):
    """
    Parse the textgrid file,
    :param textgrid_file: filename
    :param tier1: syllable tier
    :param tier2: phoneme tier
    :return: syllable and phoneme lists
    """
    line_list = textGrid2WordList(textgrid_file, whichTier='line')
    syllable_list = textGrid2WordList(textgrid_file, whichTier=tier1)
    phoneme_list = textGrid2WordList(textgrid_file, whichTier=tier2)

    # parse lines of groundtruth
    nested_syllable_lists, _, _ = wordListsParseByLines(
        line_list, syllable_list)
    nested_phoneme_lists, _, _ = wordListsParseByLines(line_list, phoneme_list)

    return nested_syllable_lists, nested_phoneme_lists
def dump_feature_onset_helper(wav_path, textgrid_path, artist_name,
                              recording_name):

    groundtruth_textgrid_file = os.path.join(textgrid_path, artist_name,
                                             recording_name + '.TextGrid')
    wav_file = os.path.join(wav_path, artist_name, recording_name + '.wav')

    lineList = textGrid2WordList(groundtruth_textgrid_file, whichTier='line')
    utteranceList = textGrid2WordList(groundtruth_textgrid_file,
                                      whichTier='dianSilence')
    phonemeList = textGrid2WordList(groundtruth_textgrid_file,
                                    whichTier='details')

    # parse lines of groundtruth
    nestedUtteranceLists, numLines, numUtterances = wordListsParseByLines(
        lineList, utteranceList)
    nestedPhonemeLists, _, _ = wordListsParseByLines(lineList, phonemeList)

    # load audio
    mfcc = get_log_mel_madmom(wav_file, fs, hopsize_t, channel=1)

    return nestedUtteranceLists, nestedPhonemeLists, mfcc, phonemeList
def lineWordCount(textgrid_file):
    """
    :param textgrid_file: annotation file
    :return: numLines, numWords, numDians
    """

    numLines, numWords, numDians = 0, 0, 0
    wordDurationList, dianDurationList = [], []

    entireLine = textgridParser.textGrid2WordList(textgrid_file, whichTier="line")
    entireWordList = textgridParser.textGrid2WordList(textgrid_file, whichTier="pinyin")
    entireDianList = textgridParser.textGrid2WordList(textgrid_file, whichTier="dian")

    # parser word list for each line,
    if len(entireWordList):
        nestedWordLists, numLines, numWords = textgridParser.wordListsParseByLines(entireLine, entireWordList)
        wordDurationList = wordDuration(nestedWordLists)

    if len(entireDianList):
        nestedWordLists, numLines, numDians = textgridParser.wordListsParseByLines(entireLine, entireDianList)
        dianDurationList = wordDuration(nestedWordLists)

    return numLines, numWords, numDians, wordDurationList, dianDurationList
Ejemplo n.º 9
0
def dump_feature_onset_helper(lab, wav_path, textgrid_path, score_path,
                              artist_name, recording_name, feature_type):
    """
    load or parse audio, textgrid
    :param lab:
    :param wav_path:
    :param textgrid_path:
    :param score_path:
    :param artist_name:
    :param recording_name:
    :param feature_type:
    :return:
    """
    if not lab:
        ground_truth_textgrid_file = os.path.join(textgrid_path, artist_name,
                                                  recording_name + '.TextGrid')
        wav_file = os.path.join(wav_path, artist_name, recording_name + '.wav')
        line_list = textGrid2WordList(ground_truth_textgrid_file,
                                      whichTier='line')
        utterance_list = textGrid2WordList(ground_truth_textgrid_file,
                                           whichTier='dianSilence')

        # parse lines of groundtruth
        nested_utterance_lists, num_lines, num_utterances = wordListsParseByLines(
            line_list, utterance_list)
    else:
        ground_truth_textgrid_file = os.path.join(textgrid_path, artist_name,
                                                  recording_name + '.lab')
        wav_file = os.path.join(wav_path, artist_name, recording_name + '.mp3')
        nested_utterance_lists = [
            lab2WordList(ground_truth_textgrid_file, label=True)
        ]

    # parse score
    score_file = os.path.join(score_path, artist_name, recording_name + '.csv')
    _, utterance_durations, bpm = csvDurationScoreParser(score_file)

    # load audio
    if feature_type == 'madmom':
        mfcc = getMFCCBands2DMadmom(wav_file, fs, hopsize_t, channel=1)
    else:
        print(feature_type + ' is not exist.')
        raise

    return nested_utterance_lists, utterance_durations, bpm, mfcc
Ejemplo n.º 10
0
def phraseBoundaryWriter(textgrid_file, outputFilename):
    """
    Write phrase boundary from textgrid into outputFilename, example: .syll.lab
    :param textgrid_file:
    :param outputFilename:
    :return:
    """

    # read phrase list and utterance list
    lineList = textgridParser.textGrid2WordList(textgrid_file, whichTier="line")
    utteranceList = textgridParser.textGrid2WordList(textgrid_file, whichTier="utterance")

    # parse lines of groundtruth
    nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines(lineList, utteranceList)

    # phrase start, end time
    nonEmptyLineList = []

    for list in nestedUtteranceLists:
        nonEmptyLineList.append(list[0])

    boundaryLabWriter(nonEmptyLineList, outputFilename)
Ejemplo n.º 11
0
def batch_eval(annotation_path,
               segSyllable_path,
               score_path,
               groundtruth_path,
               eval_details_path,
               recordings,
               tolerance,
               label=False,
               decoding_method='viterbi'):

    sumDetectedBoundaries, sumGroundtruthPhrases, sumGroundtruthBoundaries, sumCorrect, sumOnsetCorrect, \
    sumOffsetCorrect, sumInsertion, sumDeletion = 0 ,0 ,0 ,0 ,0 ,0, 0, 0

    for artist_path, recording_name in recordings:

        if annotation_path:
            groundtruth_textgrid_file = os.path.join(
                annotation_path, artist_path, recording_name + '.TextGrid')
            groundtruth_lab_file_head = os.path.join(groundtruth_path,
                                                     artist_path)
        else:
            groundtruth_syllable_lab = os.path.join(groundtruth_path,
                                                    artist_path,
                                                    recording_name + '.lab')

        detected_lab_file_head = os.path.join(segSyllable_path, artist_path,
                                              recording_name)

        score_file = os.path.join(score_path, artist_path,
                                  recording_name + '.csv')

        # parse score
        if annotation_path:
            _, _, utterance_durations, bpm = scoreParser.csvScorePinyinParser(
                score_file)
        else:
            _, utterance_durations, bpm = scoreParser.csvDurationScoreParser(
                score_file)

        if eval_details_path:
            eval_result_details_file_head = os.path.join(
                eval_details_path, artist_path)

        if not os.path.isfile(score_file):
            print 'Score not found: ' + score_file
            continue

        if annotation_path:
            # create ground truth lab path, if not exist
            if not os.path.isdir(groundtruth_lab_file_head):
                os.makedirs(groundtruth_lab_file_head)

            if not os.path.isdir(eval_result_details_file_head):
                os.makedirs(eval_result_details_file_head)

            lineList = textgridParser.textGrid2WordList(
                groundtruth_textgrid_file, whichTier='line')
            utteranceList = textgridParser.textGrid2WordList(
                groundtruth_textgrid_file, whichTier='dianSilence')

            # parse lines of groundtruth
            nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines(
                lineList, utteranceList)

            # create the ground truth lab files
            for idx, list in enumerate(nestedUtteranceLists):
                try:
                    print(bpm[idx])
                except IndexError:
                    continue

                if float(bpm[idx]):
                    print 'Creating ground truth lab ... ' + recording_name + ' phrase ' + str(
                        idx + 1)

                    ul = list[1]
                    firstStartTime = ul[0][0]
                    groundtruthBoundaries = [
                        (np.array(ul_element[:2]) - firstStartTime).tolist() +
                        [ul_element[2]] for ul_element in ul
                    ]
                    groundtruth_syllable_lab = join(
                        groundtruth_lab_file_head,
                        recording_name + '_' + str(idx + 1) + '.syll.lab')

                    with open(groundtruth_syllable_lab, "wb") as text_file:
                        for gtbs in groundtruthBoundaries:
                            text_file.write("{0} {1} {2}\n".format(
                                gtbs[0], gtbs[1], gtbs[2]))
        else:
            nestedUtteranceLists = [
                labParser.lab2WordList(groundtruth_syllable_lab, label=label)
            ]

        for idx, list in enumerate(nestedUtteranceLists):
            try:
                print(bpm[idx])
            except IndexError:
                continue

            if float(bpm[idx]):
                print 'Evaluating... ' + recording_name + ' phrase ' + str(
                    idx + 1)

                if annotation_path:
                    ul = list[1]
                    firstStartTime = ul[0][0]
                    groundtruthBoundaries = [
                        (np.array(ul_element[:2]) - firstStartTime).tolist() +
                        [ul_element[2]] for ul_element in ul
                    ]
                else:
                    firstStartTime = list[0][0]
                    groundtruthBoundaries = [
                        (np.array(ul_element[:2]) - firstStartTime).tolist() +
                        [ul_element[2]] for ul_element in list
                    ]

                detected_syllable_lab = detected_lab_file_head + '_' + str(
                    idx + 1) + '.syll.lab'
                if not os.path.isfile(detected_syllable_lab):
                    print 'Syll lab file not found: ' + detected_syllable_lab
                    continue

                # read boundary detected lab into python list
                lab_label = True if decoding_method == 'viterbi' else False
                detectedBoundaries = labParser.lab2WordList(
                    detected_syllable_lab, label=lab_label)

                numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, \
                numInsertion, numDeletion, correct_list = evaluation2.boundaryEval(groundtruthBoundaries, detectedBoundaries, tolerance, label)

                sumDetectedBoundaries += numDetectedBoundaries
                sumGroundtruthBoundaries += numGroundtruthBoundaries
                sumGroundtruthPhrases += 1
                sumCorrect += numCorrect
                sumOnsetCorrect += numOnsetCorrect
                sumOffsetCorrect += numOffsetCorrect
                sumInsertion += numInsertion
                sumDeletion += numDeletion

                # if numCorrect/float(numGroundtruthBoundaries) < 0.7:
                print "Detected: {0}, Ground truth: {1}, Correct: {2}, Onset correct: {3}, " \
                      "Offset correct: {4}, Insertion: {5}, Deletion: {6}\n".\
                    format(numDetectedBoundaries, numGroundtruthBoundaries,numCorrect, numOnsetCorrect,
                           numOffsetCorrect, numInsertion, numDeletion)

    return sumDetectedBoundaries, sumGroundtruthBoundaries, sumGroundtruthPhrases, sumCorrect, sumOnsetCorrect, \
           sumOffsetCorrect, sumInsertion, sumDeletion
Ejemplo n.º 12
0
def batch_eval(aCapella_root,
               dataset_path,
               annotation_path,
               segPhrase_path,
               segSyllable_path,
               score_path,
               recordings,
               tolerance,
               label=True):

    sumDetectedBoundaries, sumGroundtruthPhrases, sumGroundtruthBoundaries, sumCorrect, sumOnsetCorrect, \
    sumOffsetCorrect, sumInsertion, sumDeletion = 0 ,0 ,0 ,0 ,0 ,0, 0, 0

    for i_recording, recording_name in enumerate(recordings):

        groundtruth_textgrid_file = os.path.join(aCapella_root, dataset_path,
                                                 annotation_path,
                                                 recording_name + '.TextGrid')
        phrase_boundary_lab_file = os.path.join(aCapella_root, dataset_path,
                                                segPhrase_path,
                                                recording_name + '.lab')
        # syll-o-matic output
        # detected_lab_file_head      = os.path.join(aCapella_root, dataset_path, segSyllable_path,recording_name)
        # jan output
        detected_lab_file_head = os.path.join(segSyllable_path, dataset_path,
                                              recording_name)

        score_file = os.path.join(aCapella_root, dataset_path, score_path,
                                  recording_name + '.csv')

        groundtruth_lab_file_head = os.path.join(aCapella_root, dataset_path,
                                                 groundtruth_lab_path,
                                                 recording_name)
        eval_result_details_file_head = os.path.join(aCapella_root,
                                                     dataset_path,
                                                     eval_details_path,
                                                     recording_name)

        if not os.path.isfile(score_file):
            print 'Score not found: ' + score_file
            continue

        # create ground truth lab path, if not exist
        if not os.path.isdir(groundtruth_lab_file_head):
            os.makedirs(groundtruth_lab_file_head)

        if not os.path.isdir(eval_result_details_file_head):
            os.makedirs(eval_result_details_file_head)

        lineList = textgridParser.textGrid2WordList(groundtruth_textgrid_file,
                                                    whichTier='line')
        utteranceList = textgridParser.textGrid2WordList(
            groundtruth_textgrid_file, whichTier='dianSilence')

        # parse lines of groundtruth
        nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines(
            lineList, utteranceList)

        # parse score
        utterance_durations, bpm = scoreParser.csvDurationScoreParser(
            score_file)

        # create the ground truth lab files
        for idx, list in enumerate(nestedUtteranceLists):
            if int(bpm[idx]):
                print 'Creating ground truth lab ... ' + recording_name + ' phrase ' + str(
                    idx + 1)

                ul = list[1]
                firstStartTime = ul[0][0]
                groundtruthBoundaries = [
                    (np.array(ul_element[:2]) - firstStartTime).tolist() +
                    [ul_element[2]] for ul_element in ul
                ]
                groundtruth_syllable_lab = groundtruth_lab_file_head + '_' + str(
                    idx + 1) + '.syll.lab'

                with open(groundtruth_syllable_lab, "wb") as text_file:
                    for gtbs in groundtruthBoundaries:
                        text_file.write("{0} {1} {2}\n".format(
                            gtbs[0], gtbs[1], gtbs[2]))

        # syllable boundaries groundtruth of each line
        # eval_details_csv    = eval_result_details_file_head+'.csv'
        # with open(eval_details_csv, 'wb') as csv_file:
        #     csv_writer = csv.writer(csv_file)

        for idx, list in enumerate(nestedUtteranceLists):
            if int(bpm[idx]):
                print 'Evaluating... ' + recording_name + ' phrase ' + str(
                    idx + 1)

                ul = list[1]
                firstStartTime = ul[0][0]
                groundtruthBoundaries = [
                    (np.array(ul_element[:2]) - firstStartTime).tolist() +
                    [ul_element[2]] for ul_element in ul
                ]

                detected_syllable_lab = detected_lab_file_head + '_' + str(
                    idx + 1) + '.syll.lab'
                if not os.path.isfile(detected_syllable_lab):
                    print 'Syll lab file not found: ' + detected_syllable_lab
                    continue

                # read boundary detected lab into python list
                detectedBoundaries = labParser.lab2WordList(
                    detected_syllable_lab, withLabel=label)

                #
                numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, \
                numInsertion, numDeletion, correct_list = evaluation2.boundaryEval(groundtruthBoundaries, detectedBoundaries, tolerance, label)

                sumDetectedBoundaries += numDetectedBoundaries
                sumGroundtruthBoundaries += numGroundtruthBoundaries
                sumGroundtruthPhrases += 1
                sumCorrect += numCorrect
                sumOnsetCorrect += numOnsetCorrect
                sumOffsetCorrect += numOffsetCorrect
                sumInsertion += numInsertion
                sumDeletion += numDeletion

                if numCorrect / float(numGroundtruthBoundaries) < 0.7:
                    print "Detected: {0}, Ground truth: {1}, Correct: {2}, Onset correct: {3}, " \
                          "Offset correct: {4}, Insertion: {5}, Deletion: {6}\n".\
                        format(numDetectedBoundaries, numGroundtruthBoundaries,numCorrect, numOnsetCorrect,
                               numOffsetCorrect, numInsertion, numDeletion)

                    # csv_writer.writerow([recording_name+'_'+str(idx+1),
                    #                      numDetectedBoundaries,
                    #                      numGroundtruthBoundaries,
                    #                      numCorrect,
                    #                      numInsertion,
                    #                      numDeletion,
                    #                      correct_list])

    return sumDetectedBoundaries, sumGroundtruthBoundaries, sumGroundtruthPhrases, sumCorrect, sumOnsetCorrect, \
           sumOffsetCorrect, sumInsertion, sumDeletion
Ejemplo n.º 13
0
def batch_eval(aCapella_root, dataset_path, annotation_path, segPhrase_path, segSyllable_path, score_path, recordings, tolerance):

    sumDetectedBoundaries, sumGroundtruthPhrases, sumGroundtruthBoundaries, sumCorrect, sumOnsetCorrect, \
    sumOffsetCorrect, sumInsertion, sumDeletion = 0 ,0 ,0 ,0 ,0 ,0, 0, 0

    for i_recording, recording_name in enumerate(recordings):

        groundtruth_textgrid_file   = os.path.join(aCapella_root, dataset_path, annotation_path, recording_name+'.TextGrid')
        phrase_boundary_lab_file    = os.path.join(aCapella_root, dataset_path, segPhrase_path,  recording_name+'.lab')
        detected_lab_file_head      = os.path.join(aCapella_root, dataset_path, segSyllable_path,recording_name)
        score_file                  = os.path.join(aCapella_root, dataset_path, score_path,      recording_name+'.csv')

        if not os.path.isfile(score_file):
            print 'Score not found: ' + score_file
            continue

        lineList                    = textgridParser.textGrid2WordList(groundtruth_textgrid_file, whichTier='line')
        utteranceList               = textgridParser.textGrid2WordList(groundtruth_textgrid_file, whichTier='dianSilence')

        # parse lines of groundtruth
        nestedUtteranceLists, numLines, numUtterances   = textgridParser.wordListsParseByLines(lineList, utteranceList)

        # parse score
        utterance_durations, bpm                        = scoreParser.csvDurationScoreParser(score_file)


        # syllable boundaries groundtruth of each line
        for idx, list in enumerate(nestedUtteranceLists):
            if int(bpm[idx]):
                print 'Evaluating... ' + recording_name + ' phrase ' + str(idx+1)

                ul = list[1]
                firstStartTime          = ul[0][0]
                groundtruthBoundaries   = [(np.array(ul_element[:2]) - firstStartTime).tolist() for ul_element in ul]

                detected_syllable_lab   = detected_lab_file_head+'_'+str(idx+1)+'.syll.lab'
                if not os.path.isfile(detected_syllable_lab):
                    print 'Syll lab file not found: ' + detected_syllable_lab
                    continue

                # read boundary detected lab into python list
                detectedBoundaries          = labParser.lab2WordList(detected_syllable_lab)

                # read boundary groundtruth textgrid into python list

                # for segment in utteranceList:
                #     asciiLine = segment[2].encode("ascii", "replace")
                #     if len(asciiLine.replace(" ", "")):
                #         groundtruthBoundaries.append(segment[0:2])
                #
                # print groundtruthBoundaries

                #
                numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, \
                numInsertion, numDeletion = evaluation.boundaryEval(groundtruthBoundaries, detectedBoundaries, tolerance)

                sumDetectedBoundaries       += numDetectedBoundaries
                sumGroundtruthBoundaries    += numGroundtruthBoundaries
                sumGroundtruthPhrases       += 1
                sumCorrect                  += numCorrect
                sumOnsetCorrect             += numOnsetCorrect
                sumOffsetCorrect            += numOffsetCorrect
                sumInsertion                += numInsertion
                sumDeletion                 += numDeletion

                if numCorrect/float(numGroundtruthBoundaries) < 0.7:
                    print "Detected: {0}, Ground truth: {1}, Correct: {2}, Onset correct: {3}, " \
                          "Offset correct: {4}, Insertion: {5}, Deletion: {6}\n".\
                        format(numDetectedBoundaries, numGroundtruthBoundaries,numCorrect, numOnsetCorrect,
                               numOffsetCorrect, numInsertion, numDeletion)

    return sumDetectedBoundaries, sumGroundtruthBoundaries, sumGroundtruthPhrases, sumCorrect, sumOnsetCorrect, \
           sumOffsetCorrect, sumInsertion, sumDeletion