Python textGrid2WordListの例、textgridParser.textGrid2WordList Pythonの例

コード例 #1

0

ファイルを表示

def textgrid_syllable_phoneme_parser(textgrid_file, tier1, tier2):
    """
    Parse the textgrid file,
    :param textgrid_file: filename
    :param tier1: syllable tier
    :param tier2: phoneme tier
    :return: syllable and phoneme lists
    """
    line_list = textGrid2WordList(textgrid_file, whichTier='line')
    syllable_list = textGrid2WordList(textgrid_file, whichTier=tier1)
    phoneme_list = textGrid2WordList(textgrid_file, whichTier=tier2)

    # parse lines of groundtruth
    nested_syllable_lists, _, _ = wordListsParseByLines(
        line_list, syllable_list)
    nested_phoneme_lists, _, _ = wordListsParseByLines(line_list, phoneme_list)

    return nested_syllable_lists, nested_phoneme_lists

コード例 #2

0

ファイルを表示

ファイル: eval_georgi.py プロジェクト: malgabri/jingjuSyllabicSegmentaion

def g_eval(groundtruth_textgrid_filename, georgi_alignment_filename, tolerance):

    boundaryList                = georgiParser.syllables_total_parser(georgi_alignment_filename)

    try:

        utteranceList               = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dian', utf16=False)

        utteranceDuration           = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dianDuration', utf16=False)

    except:
        utteranceList               = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dian', utf16=True)

        utteranceDuration           = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dianDuration', utf16=True)

    # remove empty dian
    tempGroundtruthList         = []
    groundtruthDuration         = []
    for idx, utterance in enumerate(utteranceList):
        if len(utterance[2].strip()):# and float(utteranceDuration[idx][2]):
            tempGroundtruthList.append(utterance)
            groundtruthDuration.append(float(utteranceDuration[idx][2]))

    # remove 0 duration
    detectedBoundaryList    = []
    groundtruthList         = []
    for idx, utterance in enumerate(tempGroundtruthList):
        if groundtruthDuration[idx]:
            groundtruthList.append(tempGroundtruthList[idx])
            detectedBoundaryList.append(boundaryList[idx])

    numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, numInsertion, numDeletion, correct_list = \
        evaluation2.boundaryEval(groundtruthList,detectedBoundaryList,tolerance)

    print "Detected: {0}, Ground truth: {1}, Correct: {2}, Onset correct: {3}, " \
                              "Offset correct: {4}, Insertion: {5}, Deletion: {6}\n".\
                            format(numDetectedBoundaries, numGroundtruthBoundaries,numCorrect, numOnsetCorrect,
                                   numOffsetCorrect, numInsertion, numDeletion)

    return  numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, numInsertion, numDeletion

コード例 #3

0

ファイルを表示

ファイル: eval_georgi.py プロジェクト: ronggong/jingjuSyllabicSegmentaion

def g_eval(groundtruth_textgrid_filename, georgi_alignment_filename, tolerance):

    boundaryList                = georgiParser.syllables_total_parser(georgi_alignment_filename)

    try:

        utteranceList               = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dian', utf16=False)

        utteranceDuration           = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dianDuration', utf16=False)

    except:
        utteranceList               = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dian', utf16=True)

        utteranceDuration           = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dianDuration', utf16=True)

    # remove empty dian
    tempGroundtruthList         = []
    groundtruthDuration         = []
    for idx, utterance in enumerate(utteranceList):
        if len(utterance[2].strip()):# and float(utteranceDuration[idx][2]):
            tempGroundtruthList.append(utterance)
            groundtruthDuration.append(float(utteranceDuration[idx][2]))

    # remove 0 duration
    detectedBoundaryList    = []
    groundtruthList         = []
    for idx, utterance in enumerate(tempGroundtruthList):
        if groundtruthDuration[idx]:
            groundtruthList.append(tempGroundtruthList[idx])
            detectedBoundaryList.append(boundaryList[idx])

    numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, numInsertion, numDeletion = \
        evaluation.boundaryEval(groundtruthList,detectedBoundaryList,tolerance)

    print "Detected: {0}, Ground truth: {1}, Correct: {2}, Onset correct: {3}, " \
                              "Offset correct: {4}, Insertion: {5}, Deletion: {6}\n".\
                            format(numDetectedBoundaries, numGroundtruthBoundaries,numCorrect, numOnsetCorrect,
                                   numOffsetCorrect, numInsertion, numDeletion)

    return  numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, numInsertion, numDeletion

コード例 #4

0

ファイルを表示

ファイル: labWriter.py プロジェクト: malgabri/jingjuSyllabicSegmentaion

def phraseBoundaryWriter(textgrid_file, outputFilename):
    '''
    Write phrase boundary from textgrid into outputFilename, example: .syll.lab
    :param textgrid_file:
    :param outputFilename:
    :return:
    '''

    # read phrase list and utterance list
    lineList                    = textgridParser.textGrid2WordList(textgrid_file, whichTier='line')
    utteranceList               = textgridParser.textGrid2WordList(textgrid_file, whichTier='utterance')

    # parse lines of groundtruth
    nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines(lineList, utteranceList)

    # phrase start, end time
    nonEmptyLineList            = []

    for list in nestedUtteranceLists:
        nonEmptyLineList.append(list[0])

    boundaryLabWriter(nonEmptyLineList, outputFilename)

コード例 #5

0

ファイルを表示

ファイル: training_sample_collection_joint.py プロジェクト: rohitma38/interspeech2018_submission01

def dump_feature_onset_helper(wav_path, textgrid_path, artist_name,
                              recording_name):

    groundtruth_textgrid_file = os.path.join(textgrid_path, artist_name,
                                             recording_name + '.TextGrid')
    wav_file = os.path.join(wav_path, artist_name, recording_name + '.wav')

    lineList = textGrid2WordList(groundtruth_textgrid_file, whichTier='line')
    utteranceList = textGrid2WordList(groundtruth_textgrid_file,
                                      whichTier='dianSilence')
    phonemeList = textGrid2WordList(groundtruth_textgrid_file,
                                    whichTier='details')

    # parse lines of groundtruth
    nestedUtteranceLists, numLines, numUtterances = wordListsParseByLines(
        lineList, utteranceList)
    nestedPhonemeLists, _, _ = wordListsParseByLines(lineList, phonemeList)

    # load audio
    mfcc = get_log_mel_madmom(wav_file, fs, hopsize_t, channel=1)

    return nestedUtteranceLists, nestedPhonemeLists, mfcc, phonemeList

コード例 #6

0

ファイルを表示

ファイル: labWriter.py プロジェクト: ronggong/jingjuSyllabicSegmentaion

def phraseBoundaryWriter(textgrid_file, outputFilename):
    """
    Write phrase boundary from textgrid into outputFilename, example: .syll.lab
    :param textgrid_file:
    :param outputFilename:
    :return:
    """

    # read phrase list and utterance list
    lineList = textgridParser.textGrid2WordList(textgrid_file, whichTier="line")
    utteranceList = textgridParser.textGrid2WordList(textgrid_file, whichTier="utterance")

    # parse lines of groundtruth
    nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines(lineList, utteranceList)

    # phrase start, end time
    nonEmptyLineList = []

    for list in nestedUtteranceLists:
        nonEmptyLineList.append(list[0])

    boundaryLabWriter(nonEmptyLineList, outputFilename)

コード例 #7

0

ファイルを表示

ファイル: statistics.py プロジェクト: ronggong/jingjuSyllabicSegmentaion

def lineWordCount(textgrid_file):
    """
    :param textgrid_file: annotation file
    :return: numLines, numWords, numDians
    """

    numLines, numWords, numDians = 0, 0, 0
    wordDurationList, dianDurationList = [], []

    entireLine = textgridParser.textGrid2WordList(textgrid_file, whichTier="line")
    entireWordList = textgridParser.textGrid2WordList(textgrid_file, whichTier="pinyin")
    entireDianList = textgridParser.textGrid2WordList(textgrid_file, whichTier="dian")

    # parser word list for each line,
    if len(entireWordList):
        nestedWordLists, numLines, numWords = textgridParser.wordListsParseByLines(entireLine, entireWordList)
        wordDurationList = wordDuration(nestedWordLists)

    if len(entireDianList):
        nestedWordLists, numLines, numDians = textgridParser.wordListsParseByLines(entireLine, entireDianList)
        dianDurationList = wordDuration(nestedWordLists)

    return numLines, numWords, numDians, wordDurationList, dianDurationList

コード例 #8

0

ファイルを表示

ファイル: lyricsExtractorTextgrid.py プロジェクト: ronggong/DLfM2017

def lyrics_textgrid_csvwriter(filenames_textgrid,
                              textgrid_path,
                              path_to_save,
                              extension=''):

    for fn in filenames_textgrid:
        fn_full = os.path.join(textgrid_path, fn + extension)
        list_line = textGrid2WordList(fn_full, whichTier='line')

        fn_csv = os.path.join(path_to_save, fn + '.csv')
        with open(fn_csv, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile, delimiter=';')
            for line in list_line:
                if len(line[2]):
                    writer.writerow([line[2]])

コード例 #9

0

ファイルを表示

def batch_eval(annotation_path,
               segSyllable_path,
               score_path,
               groundtruth_path,
               eval_details_path,
               recordings,
               tolerance,
               label=False,
               decoding_method='viterbi'):

    sumDetectedBoundaries, sumGroundtruthPhrases, sumGroundtruthBoundaries, sumCorrect, sumOnsetCorrect, \
    sumOffsetCorrect, sumInsertion, sumDeletion = 0 ,0 ,0 ,0 ,0 ,0, 0, 0

    for artist_path, recording_name in recordings:

        if annotation_path:
            groundtruth_textgrid_file = os.path.join(
                annotation_path, artist_path, recording_name + '.TextGrid')
            groundtruth_lab_file_head = os.path.join(groundtruth_path,
                                                     artist_path)
        else:
            groundtruth_syllable_lab = os.path.join(groundtruth_path,
                                                    artist_path,
                                                    recording_name + '.lab')

        detected_lab_file_head = os.path.join(segSyllable_path, artist_path,
                                              recording_name)

        score_file = os.path.join(score_path, artist_path,
                                  recording_name + '.csv')

        # parse score
        if annotation_path:
            _, _, utterance_durations, bpm = scoreParser.csvScorePinyinParser(
                score_file)
        else:
            _, utterance_durations, bpm = scoreParser.csvDurationScoreParser(
                score_file)

        if eval_details_path:
            eval_result_details_file_head = os.path.join(
                eval_details_path, artist_path)

        if not os.path.isfile(score_file):
            print 'Score not found: ' + score_file
            continue

        if annotation_path:
            # create ground truth lab path, if not exist
            if not os.path.isdir(groundtruth_lab_file_head):
                os.makedirs(groundtruth_lab_file_head)

            if not os.path.isdir(eval_result_details_file_head):
                os.makedirs(eval_result_details_file_head)

            lineList = textgridParser.textGrid2WordList(
                groundtruth_textgrid_file, whichTier='line')
            utteranceList = textgridParser.textGrid2WordList(
                groundtruth_textgrid_file, whichTier='dianSilence')

            # parse lines of groundtruth
            nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines(
                lineList, utteranceList)

            # create the ground truth lab files
            for idx, list in enumerate(nestedUtteranceLists):
                try:
                    print(bpm[idx])
                except IndexError:
                    continue

                if float(bpm[idx]):
                    print 'Creating ground truth lab ... ' + recording_name + ' phrase ' + str(
                        idx + 1)

                    ul = list[1]
                    firstStartTime = ul[0][0]
                    groundtruthBoundaries = [
                        (np.array(ul_element[:2]) - firstStartTime).tolist() +
                        [ul_element[2]] for ul_element in ul
                    ]
                    groundtruth_syllable_lab = join(
                        groundtruth_lab_file_head,
                        recording_name + '_' + str(idx + 1) + '.syll.lab')

                    with open(groundtruth_syllable_lab, "wb") as text_file:
                        for gtbs in groundtruthBoundaries:
                            text_file.write("{0} {1} {2}\n".format(
                                gtbs[0], gtbs[1], gtbs[2]))
        else:
            nestedUtteranceLists = [
                labParser.lab2WordList(groundtruth_syllable_lab, label=label)
            ]

        for idx, list in enumerate(nestedUtteranceLists):
            try:
                print(bpm[idx])
            except IndexError:
                continue

            if float(bpm[idx]):
                print 'Evaluating... ' + recording_name + ' phrase ' + str(
                    idx + 1)

                if annotation_path:
                    ul = list[1]
                    firstStartTime = ul[0][0]
                    groundtruthBoundaries = [
                        (np.array(ul_element[:2]) - firstStartTime).tolist() +
                        [ul_element[2]] for ul_element in ul
                    ]
                else:
                    firstStartTime = list[0][0]
                    groundtruthBoundaries = [
                        (np.array(ul_element[:2]) - firstStartTime).tolist() +
                        [ul_element[2]] for ul_element in list
                    ]

                detected_syllable_lab = detected_lab_file_head + '_' + str(
                    idx + 1) + '.syll.lab'
                if not os.path.isfile(detected_syllable_lab):
                    print 'Syll lab file not found: ' + detected_syllable_lab
                    continue

                # read boundary detected lab into python list
                lab_label = True if decoding_method == 'viterbi' else False
                detectedBoundaries = labParser.lab2WordList(
                    detected_syllable_lab, label=lab_label)

                numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, \
                numInsertion, numDeletion, correct_list = evaluation2.boundaryEval(groundtruthBoundaries, detectedBoundaries, tolerance, label)

                sumDetectedBoundaries += numDetectedBoundaries
                sumGroundtruthBoundaries += numGroundtruthBoundaries
                sumGroundtruthPhrases += 1
                sumCorrect += numCorrect
                sumOnsetCorrect += numOnsetCorrect
                sumOffsetCorrect += numOffsetCorrect
                sumInsertion += numInsertion
                sumDeletion += numDeletion

                # if numCorrect/float(numGroundtruthBoundaries) < 0.7:
                print "Detected: {0}, Ground truth: {1}, Correct: {2}, Onset correct: {3}, " \
                      "Offset correct: {4}, Insertion: {5}, Deletion: {6}\n".\
                    format(numDetectedBoundaries, numGroundtruthBoundaries,numCorrect, numOnsetCorrect,
                           numOffsetCorrect, numInsertion, numDeletion)

    return sumDetectedBoundaries, sumGroundtruthBoundaries, sumGroundtruthPhrases, sumCorrect, sumOnsetCorrect, \
           sumOffsetCorrect, sumInsertion, sumDeletion

コード例 #10

0

ファイルを表示

def batch_eval(aCapella_root,
               dataset_path,
               annotation_path,
               segPhrase_path,
               segSyllable_path,
               score_path,
               recordings,
               tolerance,
               label=True):

    sumDetectedBoundaries, sumGroundtruthPhrases, sumGroundtruthBoundaries, sumCorrect, sumOnsetCorrect, \
    sumOffsetCorrect, sumInsertion, sumDeletion = 0 ,0 ,0 ,0 ,0 ,0, 0, 0

    for i_recording, recording_name in enumerate(recordings):

        groundtruth_textgrid_file = os.path.join(aCapella_root, dataset_path,
                                                 annotation_path,
                                                 recording_name + '.TextGrid')
        phrase_boundary_lab_file = os.path.join(aCapella_root, dataset_path,
                                                segPhrase_path,
                                                recording_name + '.lab')
        # syll-o-matic output
        # detected_lab_file_head      = os.path.join(aCapella_root, dataset_path, segSyllable_path,recording_name)
        # jan output
        detected_lab_file_head = os.path.join(segSyllable_path, dataset_path,
                                              recording_name)

        score_file = os.path.join(aCapella_root, dataset_path, score_path,
                                  recording_name + '.csv')

        groundtruth_lab_file_head = os.path.join(aCapella_root, dataset_path,
                                                 groundtruth_lab_path,
                                                 recording_name)
        eval_result_details_file_head = os.path.join(aCapella_root,
                                                     dataset_path,
                                                     eval_details_path,
                                                     recording_name)

        if not os.path.isfile(score_file):
            print 'Score not found: ' + score_file
            continue

        # create ground truth lab path, if not exist
        if not os.path.isdir(groundtruth_lab_file_head):
            os.makedirs(groundtruth_lab_file_head)

        if not os.path.isdir(eval_result_details_file_head):
            os.makedirs(eval_result_details_file_head)

        lineList = textgridParser.textGrid2WordList(groundtruth_textgrid_file,
                                                    whichTier='line')
        utteranceList = textgridParser.textGrid2WordList(
            groundtruth_textgrid_file, whichTier='dianSilence')

        # parse lines of groundtruth
        nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines(
            lineList, utteranceList)

        # parse score
        utterance_durations, bpm = scoreParser.csvDurationScoreParser(
            score_file)

        # create the ground truth lab files
        for idx, list in enumerate(nestedUtteranceLists):
            if int(bpm[idx]):
                print 'Creating ground truth lab ... ' + recording_name + ' phrase ' + str(
                    idx + 1)

                ul = list[1]
                firstStartTime = ul[0][0]
                groundtruthBoundaries = [
                    (np.array(ul_element[:2]) - firstStartTime).tolist() +
                    [ul_element[2]] for ul_element in ul
                ]
                groundtruth_syllable_lab = groundtruth_lab_file_head + '_' + str(
                    idx + 1) + '.syll.lab'

                with open(groundtruth_syllable_lab, "wb") as text_file:
                    for gtbs in groundtruthBoundaries:
                        text_file.write("{0} {1} {2}\n".format(
                            gtbs[0], gtbs[1], gtbs[2]))

        # syllable boundaries groundtruth of each line
        # eval_details_csv    = eval_result_details_file_head+'.csv'
        # with open(eval_details_csv, 'wb') as csv_file:
        #     csv_writer = csv.writer(csv_file)

        for idx, list in enumerate(nestedUtteranceLists):
            if int(bpm[idx]):
                print 'Evaluating... ' + recording_name + ' phrase ' + str(
                    idx + 1)

                ul = list[1]
                firstStartTime = ul[0][0]
                groundtruthBoundaries = [
                    (np.array(ul_element[:2]) - firstStartTime).tolist() +
                    [ul_element[2]] for ul_element in ul
                ]

                detected_syllable_lab = detected_lab_file_head + '_' + str(
                    idx + 1) + '.syll.lab'
                if not os.path.isfile(detected_syllable_lab):
                    print 'Syll lab file not found: ' + detected_syllable_lab
                    continue

                # read boundary detected lab into python list
                detectedBoundaries = labParser.lab2WordList(
                    detected_syllable_lab, withLabel=label)

                #
                numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, \
                numInsertion, numDeletion, correct_list = evaluation2.boundaryEval(groundtruthBoundaries, detectedBoundaries, tolerance, label)

                sumDetectedBoundaries += numDetectedBoundaries
                sumGroundtruthBoundaries += numGroundtruthBoundaries
                sumGroundtruthPhrases += 1
                sumCorrect += numCorrect
                sumOnsetCorrect += numOnsetCorrect
                sumOffsetCorrect += numOffsetCorrect
                sumInsertion += numInsertion
                sumDeletion += numDeletion

                if numCorrect / float(numGroundtruthBoundaries) < 0.7:
                    print "Detected: {0}, Ground truth: {1}, Correct: {2}, Onset correct: {3}, " \
                          "Offset correct: {4}, Insertion: {5}, Deletion: {6}\n".\
                        format(numDetectedBoundaries, numGroundtruthBoundaries,numCorrect, numOnsetCorrect,
                               numOffsetCorrect, numInsertion, numDeletion)

                    # csv_writer.writerow([recording_name+'_'+str(idx+1),
                    #                      numDetectedBoundaries,
                    #                      numGroundtruthBoundaries,
                    #                      numCorrect,
                    #                      numInsertion,
                    #                      numDeletion,
                    #                      correct_list])

    return sumDetectedBoundaries, sumGroundtruthBoundaries, sumGroundtruthPhrases, sumCorrect, sumOnsetCorrect, \
           sumOffsetCorrect, sumInsertion, sumDeletion

コード例 #11

0

ファイルを表示

ファイル: eval_demo.py プロジェクト: ronggong/jingjuSyllabicSegmentaion

def batch_eval(aCapella_root, dataset_path, annotation_path, segPhrase_path, segSyllable_path, score_path, recordings, tolerance):

    sumDetectedBoundaries, sumGroundtruthPhrases, sumGroundtruthBoundaries, sumCorrect, sumOnsetCorrect, \
    sumOffsetCorrect, sumInsertion, sumDeletion = 0 ,0 ,0 ,0 ,0 ,0, 0, 0

    for i_recording, recording_name in enumerate(recordings):

        groundtruth_textgrid_file   = os.path.join(aCapella_root, dataset_path, annotation_path, recording_name+'.TextGrid')
        phrase_boundary_lab_file    = os.path.join(aCapella_root, dataset_path, segPhrase_path,  recording_name+'.lab')
        detected_lab_file_head      = os.path.join(aCapella_root, dataset_path, segSyllable_path,recording_name)
        score_file                  = os.path.join(aCapella_root, dataset_path, score_path,      recording_name+'.csv')

        if not os.path.isfile(score_file):
            print 'Score not found: ' + score_file
            continue

        lineList                    = textgridParser.textGrid2WordList(groundtruth_textgrid_file, whichTier='line')
        utteranceList               = textgridParser.textGrid2WordList(groundtruth_textgrid_file, whichTier='dianSilence')

        # parse lines of groundtruth
        nestedUtteranceLists, numLines, numUtterances   = textgridParser.wordListsParseByLines(lineList, utteranceList)

        # parse score
        utterance_durations, bpm                        = scoreParser.csvDurationScoreParser(score_file)


        # syllable boundaries groundtruth of each line
        for idx, list in enumerate(nestedUtteranceLists):
            if int(bpm[idx]):
                print 'Evaluating... ' + recording_name + ' phrase ' + str(idx+1)

                ul = list[1]
                firstStartTime          = ul[0][0]
                groundtruthBoundaries   = [(np.array(ul_element[:2]) - firstStartTime).tolist() for ul_element in ul]

                detected_syllable_lab   = detected_lab_file_head+'_'+str(idx+1)+'.syll.lab'
                if not os.path.isfile(detected_syllable_lab):
                    print 'Syll lab file not found: ' + detected_syllable_lab
                    continue

                # read boundary detected lab into python list
                detectedBoundaries          = labParser.lab2WordList(detected_syllable_lab)

                # read boundary groundtruth textgrid into python list

                # for segment in utteranceList:
                #     asciiLine = segment[2].encode("ascii", "replace")
                #     if len(asciiLine.replace(" ", "")):
                #         groundtruthBoundaries.append(segment[0:2])
                #
                # print groundtruthBoundaries

                #
                numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, \
                numInsertion, numDeletion = evaluation.boundaryEval(groundtruthBoundaries, detectedBoundaries, tolerance)

                sumDetectedBoundaries       += numDetectedBoundaries
                sumGroundtruthBoundaries    += numGroundtruthBoundaries
                sumGroundtruthPhrases       += 1
                sumCorrect                  += numCorrect
                sumOnsetCorrect             += numOnsetCorrect
                sumOffsetCorrect            += numOffsetCorrect
                sumInsertion                += numInsertion
                sumDeletion                 += numDeletion

                if numCorrect/float(numGroundtruthBoundaries) < 0.7:
                    print "Detected: {0}, Ground truth: {1}, Correct: {2}, Onset correct: {3}, " \
                          "Offset correct: {4}, Insertion: {5}, Deletion: {6}\n".\
                        format(numDetectedBoundaries, numGroundtruthBoundaries,numCorrect, numOnsetCorrect,
                               numOffsetCorrect, numInsertion, numDeletion)

    return sumDetectedBoundaries, sumGroundtruthBoundaries, sumGroundtruthPhrases, sumCorrect, sumOnsetCorrect, \
           sumOffsetCorrect, sumInsertion, sumDeletion

コード例 #12

0

ファイルを表示

'''
This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.

Author: Rong Gong, MTG-UPF, [email protected]
17 January 2016
'''

import textgridParser

groundtruth_textgrid_file = '../laosheng/lsxp-Wo_zheng_zai-Kong_cheng_ji01-upf.TextGrid'

# parse the phrase boundary, and its content
lineList = textgridParser.textGrid2WordList(groundtruth_textgrid_file,
                                            whichTier='line')

# parse the dian Tier
dianList = textgridParser.textGrid2WordList(groundtruth_textgrid_file,
                                            whichTier='dian')

print lineList
print dianSilenceList