def textgrid_syllable_phoneme_parser(textgrid_file, tier1, tier2): """ Parse the textgrid file, :param textgrid_file: filename :param tier1: syllable tier :param tier2: phoneme tier :return: syllable and phoneme lists """ line_list = textGrid2WordList(textgrid_file, whichTier='line') syllable_list = textGrid2WordList(textgrid_file, whichTier=tier1) phoneme_list = textGrid2WordList(textgrid_file, whichTier=tier2) # parse lines of groundtruth nested_syllable_lists, _, _ = wordListsParseByLines( line_list, syllable_list) nested_phoneme_lists, _, _ = wordListsParseByLines(line_list, phoneme_list) return nested_syllable_lists, nested_phoneme_lists
def g_eval(groundtruth_textgrid_filename, georgi_alignment_filename, tolerance): boundaryList = georgiParser.syllables_total_parser(georgi_alignment_filename) try: utteranceList = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dian', utf16=False) utteranceDuration = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dianDuration', utf16=False) except: utteranceList = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dian', utf16=True) utteranceDuration = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dianDuration', utf16=True) # remove empty dian tempGroundtruthList = [] groundtruthDuration = [] for idx, utterance in enumerate(utteranceList): if len(utterance[2].strip()):# and float(utteranceDuration[idx][2]): tempGroundtruthList.append(utterance) groundtruthDuration.append(float(utteranceDuration[idx][2])) # remove 0 duration detectedBoundaryList = [] groundtruthList = [] for idx, utterance in enumerate(tempGroundtruthList): if groundtruthDuration[idx]: groundtruthList.append(tempGroundtruthList[idx]) detectedBoundaryList.append(boundaryList[idx]) numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, numInsertion, numDeletion, correct_list = \ evaluation2.boundaryEval(groundtruthList,detectedBoundaryList,tolerance) print "Detected: {0}, Ground truth: {1}, Correct: {2}, Onset correct: {3}, " \ "Offset correct: {4}, Insertion: {5}, Deletion: {6}\n".\ format(numDetectedBoundaries, numGroundtruthBoundaries,numCorrect, numOnsetCorrect, numOffsetCorrect, numInsertion, numDeletion) return numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, numInsertion, numDeletion
def g_eval(groundtruth_textgrid_filename, georgi_alignment_filename, tolerance): boundaryList = georgiParser.syllables_total_parser(georgi_alignment_filename) try: utteranceList = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dian', utf16=False) utteranceDuration = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dianDuration', utf16=False) except: utteranceList = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dian', utf16=True) utteranceDuration = textgridParser.textGrid2WordList(groundtruth_textgrid_filename, whichTier='dianDuration', utf16=True) # remove empty dian tempGroundtruthList = [] groundtruthDuration = [] for idx, utterance in enumerate(utteranceList): if len(utterance[2].strip()):# and float(utteranceDuration[idx][2]): tempGroundtruthList.append(utterance) groundtruthDuration.append(float(utteranceDuration[idx][2])) # remove 0 duration detectedBoundaryList = [] groundtruthList = [] for idx, utterance in enumerate(tempGroundtruthList): if groundtruthDuration[idx]: groundtruthList.append(tempGroundtruthList[idx]) detectedBoundaryList.append(boundaryList[idx]) numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, numInsertion, numDeletion = \ evaluation.boundaryEval(groundtruthList,detectedBoundaryList,tolerance) print "Detected: {0}, Ground truth: {1}, Correct: {2}, Onset correct: {3}, " \ "Offset correct: {4}, Insertion: {5}, Deletion: {6}\n".\ format(numDetectedBoundaries, numGroundtruthBoundaries,numCorrect, numOnsetCorrect, numOffsetCorrect, numInsertion, numDeletion) return numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, numInsertion, numDeletion
def phraseBoundaryWriter(textgrid_file, outputFilename): ''' Write phrase boundary from textgrid into outputFilename, example: .syll.lab :param textgrid_file: :param outputFilename: :return: ''' # read phrase list and utterance list lineList = textgridParser.textGrid2WordList(textgrid_file, whichTier='line') utteranceList = textgridParser.textGrid2WordList(textgrid_file, whichTier='utterance') # parse lines of groundtruth nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines(lineList, utteranceList) # phrase start, end time nonEmptyLineList = [] for list in nestedUtteranceLists: nonEmptyLineList.append(list[0]) boundaryLabWriter(nonEmptyLineList, outputFilename)
def dump_feature_onset_helper(wav_path, textgrid_path, artist_name, recording_name): groundtruth_textgrid_file = os.path.join(textgrid_path, artist_name, recording_name + '.TextGrid') wav_file = os.path.join(wav_path, artist_name, recording_name + '.wav') lineList = textGrid2WordList(groundtruth_textgrid_file, whichTier='line') utteranceList = textGrid2WordList(groundtruth_textgrid_file, whichTier='dianSilence') phonemeList = textGrid2WordList(groundtruth_textgrid_file, whichTier='details') # parse lines of groundtruth nestedUtteranceLists, numLines, numUtterances = wordListsParseByLines( lineList, utteranceList) nestedPhonemeLists, _, _ = wordListsParseByLines(lineList, phonemeList) # load audio mfcc = get_log_mel_madmom(wav_file, fs, hopsize_t, channel=1) return nestedUtteranceLists, nestedPhonemeLists, mfcc, phonemeList
def phraseBoundaryWriter(textgrid_file, outputFilename): """ Write phrase boundary from textgrid into outputFilename, example: .syll.lab :param textgrid_file: :param outputFilename: :return: """ # read phrase list and utterance list lineList = textgridParser.textGrid2WordList(textgrid_file, whichTier="line") utteranceList = textgridParser.textGrid2WordList(textgrid_file, whichTier="utterance") # parse lines of groundtruth nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines(lineList, utteranceList) # phrase start, end time nonEmptyLineList = [] for list in nestedUtteranceLists: nonEmptyLineList.append(list[0]) boundaryLabWriter(nonEmptyLineList, outputFilename)
def lineWordCount(textgrid_file): """ :param textgrid_file: annotation file :return: numLines, numWords, numDians """ numLines, numWords, numDians = 0, 0, 0 wordDurationList, dianDurationList = [], [] entireLine = textgridParser.textGrid2WordList(textgrid_file, whichTier="line") entireWordList = textgridParser.textGrid2WordList(textgrid_file, whichTier="pinyin") entireDianList = textgridParser.textGrid2WordList(textgrid_file, whichTier="dian") # parser word list for each line, if len(entireWordList): nestedWordLists, numLines, numWords = textgridParser.wordListsParseByLines(entireLine, entireWordList) wordDurationList = wordDuration(nestedWordLists) if len(entireDianList): nestedWordLists, numLines, numDians = textgridParser.wordListsParseByLines(entireLine, entireDianList) dianDurationList = wordDuration(nestedWordLists) return numLines, numWords, numDians, wordDurationList, dianDurationList
def lyrics_textgrid_csvwriter(filenames_textgrid, textgrid_path, path_to_save, extension=''): for fn in filenames_textgrid: fn_full = os.path.join(textgrid_path, fn + extension) list_line = textGrid2WordList(fn_full, whichTier='line') fn_csv = os.path.join(path_to_save, fn + '.csv') with open(fn_csv, 'w', newline='') as csvfile: writer = csv.writer(csvfile, delimiter=';') for line in list_line: if len(line[2]): writer.writerow([line[2]])
def batch_eval(annotation_path, segSyllable_path, score_path, groundtruth_path, eval_details_path, recordings, tolerance, label=False, decoding_method='viterbi'): sumDetectedBoundaries, sumGroundtruthPhrases, sumGroundtruthBoundaries, sumCorrect, sumOnsetCorrect, \ sumOffsetCorrect, sumInsertion, sumDeletion = 0 ,0 ,0 ,0 ,0 ,0, 0, 0 for artist_path, recording_name in recordings: if annotation_path: groundtruth_textgrid_file = os.path.join( annotation_path, artist_path, recording_name + '.TextGrid') groundtruth_lab_file_head = os.path.join(groundtruth_path, artist_path) else: groundtruth_syllable_lab = os.path.join(groundtruth_path, artist_path, recording_name + '.lab') detected_lab_file_head = os.path.join(segSyllable_path, artist_path, recording_name) score_file = os.path.join(score_path, artist_path, recording_name + '.csv') # parse score if annotation_path: _, _, utterance_durations, bpm = scoreParser.csvScorePinyinParser( score_file) else: _, utterance_durations, bpm = scoreParser.csvDurationScoreParser( score_file) if eval_details_path: eval_result_details_file_head = os.path.join( eval_details_path, artist_path) if not os.path.isfile(score_file): print 'Score not found: ' + score_file continue if annotation_path: # create ground truth lab path, if not exist if not os.path.isdir(groundtruth_lab_file_head): os.makedirs(groundtruth_lab_file_head) if not os.path.isdir(eval_result_details_file_head): os.makedirs(eval_result_details_file_head) lineList = textgridParser.textGrid2WordList( groundtruth_textgrid_file, whichTier='line') utteranceList = textgridParser.textGrid2WordList( groundtruth_textgrid_file, whichTier='dianSilence') # parse lines of groundtruth nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines( lineList, utteranceList) # create the ground truth lab files for idx, list in enumerate(nestedUtteranceLists): try: print(bpm[idx]) except IndexError: continue if float(bpm[idx]): print 'Creating ground truth lab ... ' + recording_name + ' phrase ' + str( idx + 1) ul = list[1] firstStartTime = ul[0][0] groundtruthBoundaries = [ (np.array(ul_element[:2]) - firstStartTime).tolist() + [ul_element[2]] for ul_element in ul ] groundtruth_syllable_lab = join( groundtruth_lab_file_head, recording_name + '_' + str(idx + 1) + '.syll.lab') with open(groundtruth_syllable_lab, "wb") as text_file: for gtbs in groundtruthBoundaries: text_file.write("{0} {1} {2}\n".format( gtbs[0], gtbs[1], gtbs[2])) else: nestedUtteranceLists = [ labParser.lab2WordList(groundtruth_syllable_lab, label=label) ] for idx, list in enumerate(nestedUtteranceLists): try: print(bpm[idx]) except IndexError: continue if float(bpm[idx]): print 'Evaluating... ' + recording_name + ' phrase ' + str( idx + 1) if annotation_path: ul = list[1] firstStartTime = ul[0][0] groundtruthBoundaries = [ (np.array(ul_element[:2]) - firstStartTime).tolist() + [ul_element[2]] for ul_element in ul ] else: firstStartTime = list[0][0] groundtruthBoundaries = [ (np.array(ul_element[:2]) - firstStartTime).tolist() + [ul_element[2]] for ul_element in list ] detected_syllable_lab = detected_lab_file_head + '_' + str( idx + 1) + '.syll.lab' if not os.path.isfile(detected_syllable_lab): print 'Syll lab file not found: ' + detected_syllable_lab continue # read boundary detected lab into python list lab_label = True if decoding_method == 'viterbi' else False detectedBoundaries = labParser.lab2WordList( detected_syllable_lab, label=lab_label) numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, \ numInsertion, numDeletion, correct_list = evaluation2.boundaryEval(groundtruthBoundaries, detectedBoundaries, tolerance, label) sumDetectedBoundaries += numDetectedBoundaries sumGroundtruthBoundaries += numGroundtruthBoundaries sumGroundtruthPhrases += 1 sumCorrect += numCorrect sumOnsetCorrect += numOnsetCorrect sumOffsetCorrect += numOffsetCorrect sumInsertion += numInsertion sumDeletion += numDeletion # if numCorrect/float(numGroundtruthBoundaries) < 0.7: print "Detected: {0}, Ground truth: {1}, Correct: {2}, Onset correct: {3}, " \ "Offset correct: {4}, Insertion: {5}, Deletion: {6}\n".\ format(numDetectedBoundaries, numGroundtruthBoundaries,numCorrect, numOnsetCorrect, numOffsetCorrect, numInsertion, numDeletion) return sumDetectedBoundaries, sumGroundtruthBoundaries, sumGroundtruthPhrases, sumCorrect, sumOnsetCorrect, \ sumOffsetCorrect, sumInsertion, sumDeletion
def batch_eval(aCapella_root, dataset_path, annotation_path, segPhrase_path, segSyllable_path, score_path, recordings, tolerance, label=True): sumDetectedBoundaries, sumGroundtruthPhrases, sumGroundtruthBoundaries, sumCorrect, sumOnsetCorrect, \ sumOffsetCorrect, sumInsertion, sumDeletion = 0 ,0 ,0 ,0 ,0 ,0, 0, 0 for i_recording, recording_name in enumerate(recordings): groundtruth_textgrid_file = os.path.join(aCapella_root, dataset_path, annotation_path, recording_name + '.TextGrid') phrase_boundary_lab_file = os.path.join(aCapella_root, dataset_path, segPhrase_path, recording_name + '.lab') # syll-o-matic output # detected_lab_file_head = os.path.join(aCapella_root, dataset_path, segSyllable_path,recording_name) # jan output detected_lab_file_head = os.path.join(segSyllable_path, dataset_path, recording_name) score_file = os.path.join(aCapella_root, dataset_path, score_path, recording_name + '.csv') groundtruth_lab_file_head = os.path.join(aCapella_root, dataset_path, groundtruth_lab_path, recording_name) eval_result_details_file_head = os.path.join(aCapella_root, dataset_path, eval_details_path, recording_name) if not os.path.isfile(score_file): print 'Score not found: ' + score_file continue # create ground truth lab path, if not exist if not os.path.isdir(groundtruth_lab_file_head): os.makedirs(groundtruth_lab_file_head) if not os.path.isdir(eval_result_details_file_head): os.makedirs(eval_result_details_file_head) lineList = textgridParser.textGrid2WordList(groundtruth_textgrid_file, whichTier='line') utteranceList = textgridParser.textGrid2WordList( groundtruth_textgrid_file, whichTier='dianSilence') # parse lines of groundtruth nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines( lineList, utteranceList) # parse score utterance_durations, bpm = scoreParser.csvDurationScoreParser( score_file) # create the ground truth lab files for idx, list in enumerate(nestedUtteranceLists): if int(bpm[idx]): print 'Creating ground truth lab ... ' + recording_name + ' phrase ' + str( idx + 1) ul = list[1] firstStartTime = ul[0][0] groundtruthBoundaries = [ (np.array(ul_element[:2]) - firstStartTime).tolist() + [ul_element[2]] for ul_element in ul ] groundtruth_syllable_lab = groundtruth_lab_file_head + '_' + str( idx + 1) + '.syll.lab' with open(groundtruth_syllable_lab, "wb") as text_file: for gtbs in groundtruthBoundaries: text_file.write("{0} {1} {2}\n".format( gtbs[0], gtbs[1], gtbs[2])) # syllable boundaries groundtruth of each line # eval_details_csv = eval_result_details_file_head+'.csv' # with open(eval_details_csv, 'wb') as csv_file: # csv_writer = csv.writer(csv_file) for idx, list in enumerate(nestedUtteranceLists): if int(bpm[idx]): print 'Evaluating... ' + recording_name + ' phrase ' + str( idx + 1) ul = list[1] firstStartTime = ul[0][0] groundtruthBoundaries = [ (np.array(ul_element[:2]) - firstStartTime).tolist() + [ul_element[2]] for ul_element in ul ] detected_syllable_lab = detected_lab_file_head + '_' + str( idx + 1) + '.syll.lab' if not os.path.isfile(detected_syllable_lab): print 'Syll lab file not found: ' + detected_syllable_lab continue # read boundary detected lab into python list detectedBoundaries = labParser.lab2WordList( detected_syllable_lab, withLabel=label) # numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, \ numInsertion, numDeletion, correct_list = evaluation2.boundaryEval(groundtruthBoundaries, detectedBoundaries, tolerance, label) sumDetectedBoundaries += numDetectedBoundaries sumGroundtruthBoundaries += numGroundtruthBoundaries sumGroundtruthPhrases += 1 sumCorrect += numCorrect sumOnsetCorrect += numOnsetCorrect sumOffsetCorrect += numOffsetCorrect sumInsertion += numInsertion sumDeletion += numDeletion if numCorrect / float(numGroundtruthBoundaries) < 0.7: print "Detected: {0}, Ground truth: {1}, Correct: {2}, Onset correct: {3}, " \ "Offset correct: {4}, Insertion: {5}, Deletion: {6}\n".\ format(numDetectedBoundaries, numGroundtruthBoundaries,numCorrect, numOnsetCorrect, numOffsetCorrect, numInsertion, numDeletion) # csv_writer.writerow([recording_name+'_'+str(idx+1), # numDetectedBoundaries, # numGroundtruthBoundaries, # numCorrect, # numInsertion, # numDeletion, # correct_list]) return sumDetectedBoundaries, sumGroundtruthBoundaries, sumGroundtruthPhrases, sumCorrect, sumOnsetCorrect, \ sumOffsetCorrect, sumInsertion, sumDeletion
def batch_eval(aCapella_root, dataset_path, annotation_path, segPhrase_path, segSyllable_path, score_path, recordings, tolerance): sumDetectedBoundaries, sumGroundtruthPhrases, sumGroundtruthBoundaries, sumCorrect, sumOnsetCorrect, \ sumOffsetCorrect, sumInsertion, sumDeletion = 0 ,0 ,0 ,0 ,0 ,0, 0, 0 for i_recording, recording_name in enumerate(recordings): groundtruth_textgrid_file = os.path.join(aCapella_root, dataset_path, annotation_path, recording_name+'.TextGrid') phrase_boundary_lab_file = os.path.join(aCapella_root, dataset_path, segPhrase_path, recording_name+'.lab') detected_lab_file_head = os.path.join(aCapella_root, dataset_path, segSyllable_path,recording_name) score_file = os.path.join(aCapella_root, dataset_path, score_path, recording_name+'.csv') if not os.path.isfile(score_file): print 'Score not found: ' + score_file continue lineList = textgridParser.textGrid2WordList(groundtruth_textgrid_file, whichTier='line') utteranceList = textgridParser.textGrid2WordList(groundtruth_textgrid_file, whichTier='dianSilence') # parse lines of groundtruth nestedUtteranceLists, numLines, numUtterances = textgridParser.wordListsParseByLines(lineList, utteranceList) # parse score utterance_durations, bpm = scoreParser.csvDurationScoreParser(score_file) # syllable boundaries groundtruth of each line for idx, list in enumerate(nestedUtteranceLists): if int(bpm[idx]): print 'Evaluating... ' + recording_name + ' phrase ' + str(idx+1) ul = list[1] firstStartTime = ul[0][0] groundtruthBoundaries = [(np.array(ul_element[:2]) - firstStartTime).tolist() for ul_element in ul] detected_syllable_lab = detected_lab_file_head+'_'+str(idx+1)+'.syll.lab' if not os.path.isfile(detected_syllable_lab): print 'Syll lab file not found: ' + detected_syllable_lab continue # read boundary detected lab into python list detectedBoundaries = labParser.lab2WordList(detected_syllable_lab) # read boundary groundtruth textgrid into python list # for segment in utteranceList: # asciiLine = segment[2].encode("ascii", "replace") # if len(asciiLine.replace(" ", "")): # groundtruthBoundaries.append(segment[0:2]) # # print groundtruthBoundaries # numDetectedBoundaries, numGroundtruthBoundaries, numCorrect, numOnsetCorrect, numOffsetCorrect, \ numInsertion, numDeletion = evaluation.boundaryEval(groundtruthBoundaries, detectedBoundaries, tolerance) sumDetectedBoundaries += numDetectedBoundaries sumGroundtruthBoundaries += numGroundtruthBoundaries sumGroundtruthPhrases += 1 sumCorrect += numCorrect sumOnsetCorrect += numOnsetCorrect sumOffsetCorrect += numOffsetCorrect sumInsertion += numInsertion sumDeletion += numDeletion if numCorrect/float(numGroundtruthBoundaries) < 0.7: print "Detected: {0}, Ground truth: {1}, Correct: {2}, Onset correct: {3}, " \ "Offset correct: {4}, Insertion: {5}, Deletion: {6}\n".\ format(numDetectedBoundaries, numGroundtruthBoundaries,numCorrect, numOnsetCorrect, numOffsetCorrect, numInsertion, numDeletion) return sumDetectedBoundaries, sumGroundtruthBoundaries, sumGroundtruthPhrases, sumCorrect, sumOnsetCorrect, \ sumOffsetCorrect, sumInsertion, sumDeletion
''' This work is licensed under a Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. Author: Rong Gong, MTG-UPF, [email protected] 17 January 2016 ''' import textgridParser groundtruth_textgrid_file = '../laosheng/lsxp-Wo_zheng_zai-Kong_cheng_ji01-upf.TextGrid' # parse the phrase boundary, and its content lineList = textgridParser.textGrid2WordList(groundtruth_textgrid_file, whichTier='line') # parse the dian Tier dianList = textgridParser.textGrid2WordList(groundtruth_textgrid_file, whichTier='dian') print lineList print dianSilenceList