def deleteUnlabeledIntervals(tgPath, wavPath, tierName, outputPath): """ Does not assume TextGrid and wav files are inside same directory """ utils.makeDir(outputPath) for name in utils.findFiles(tgPath, filterExt=".TextGrid", stripExt=True): tg = tgio.openTextGrid(join(tgPath, name + ".TextGrid")) # Get the unlabeled intervals tier = tg.tierDict[tierName].fillInBlanks() entryList = [entry for entry in tier.entryList if entry[2] == ""] wavFN = join(wavPath, name + ".wav") outputWavFN = join(outputPath, name + ".wav") # Sometimes the textgrid and wav file differ by some small amount # If the textgrid is longer, the script crashes wavDur = _getSoundFileDuration(wavFN) if entryList[-1][1] > wavDur and entryList[-1][0] < wavDur: entryList[-1] = (entryList[-1][0], wavDur, "") praatio_scripts.deleteWavSections(wavFN, outputWavFN, entryList, doShrink=False)
def autoSegmentSpeech(praatEXE, inputWavPath, rawTGPath, finalTGPath): utils.makeDir(finalTGPath) praat_scripts.annotateSilences(praatEXE, inputWavPath, rawTGPath) for tgFN in utils.findFiles(rawTGPath, filterExt=".TextGrid"): markTranscriptForAnnotations(join(rawTGPath, tgFN), "silences", join(finalTGPath, tgFN))
def generateSingleIPUTextgrids(wavPath, txtPath, outputPath, nameMod=None, addPause=True): ''' Generates a textgrid with a single IPU for each wave file This constitutes the first step of SPPAS, chunking a recording into utterances. In the cases when there is only a single utterance, SPPAS sometimes makes errors (or is not configured properly by the user). This script is strictly for those situations. If there are multiple audio files for each transcript, you can derive the transcript name using /nameMod/ If there is a chance of even a slight segment of silence on the edges of the audio file, /addPause/ should be True. ''' if nameMod is None: nameMod = lambda x: x if not os.path.exists(outputPath): os.mkdir(outputPath) wavList = utils.findFiles(wavPath, filterExt=".wav", stripExt=True) for wavName in wavList: transcriptName = nameMod(wavName) # Add initial and final small pauses to each transcript with io.open(join(txtPath, transcriptName + ".txt"), "r") as fd: txt = fd.read() if addPause is True: txt = "+ %s +" % txt.lower() wavFN = join(wavPath, wavName + ".wav") dur = praatio_scripts.audioio.WavQueryObj(wavFN).getDuration() tg = tgio.Textgrid() tier = tgio.IntervalTier("ipu", [ (0, dur, txt), ], 0, dur) tg.addTier(tier) tg.save(join(outputPath, wavName + ".TextGrid"))
def generateSingleIPUTextgrids(wavPath, txtPath, outputPath, nameMod=None, addPause=True): ''' Generates a textgrid with a single IPU for each wave file This constitutes the first step of SPPAS, chunking a recording into utterances. In the cases when there is only a single utterance, SPPAS sometimes makes errors (or is not configured properly by the user). This script is strictly for those situations. If there are multiple audio files for each transcript, you can derive the transcript name using /nameMod/ If there is a chance of even a slight segment of silence on the edges of the audio file, /addPause/ should be True. ''' if nameMod is None: nameMod = lambda x: x if not os.path.exists(outputPath): os.mkdir(outputPath) wavList = utils.findFiles(wavPath, filterExt=".wav", stripExt=True) for wavName in wavList: transcriptName = nameMod(wavName) # Add initial and final small pauses to each transcript with io.open(join(txtPath, transcriptName + ".txt"), "r") as fd: txt = fd.read() if addPause is True: txt = "+ %s +" % txt.lower() wavFN = join(wavPath, wavName + ".wav") dur = praatio_scripts.audioio.WavQueryObj(wavFN).getDuration() tg = tgio.Textgrid() tier = tgio.IntervalTier("ipu", [(0, dur, txt), ], 0, dur) tg.addTier(tier) tg.save(join(outputPath, wavName + ".TextGrid"))
def padEndWithSilence(indir, outdir): utils.makeDir(outdir) for name in utils.findFiles(indir, filterExt=".wav", stripExt=False): inwavfile = join(indir, name) outwavfile = join(outdir, name) inwav = wave.open(inwavfile, 'rb') outwav = wave.open(outwavfile, 'wb') data = inwav.readframes(inwav.getnframes()) silence = '\x00' * 200000 outdata = data + silence outwav.setnchannels(inwav.getnchannels()) outwav.setsampwidth(inwav.getsampwidth()) outwav.setframerate(inwav.getframerate()) outwav.setcomptype('NONE', 'not compressed') outwav.writeframes(outdata) inwav.close() outwav.close()
def sppasPostProcess(tgPath, outputPath, removeTierList=None, renameTierList=None, deleteIntermediateFiles=False): ''' Cleanup SPPAS output files. Remove unused tier names. Rename tier names. If /deleteIntermediateFiles/ is True, the xra and intermediate textgrids produced by SPPAS will be deleted. If /replaceOrigTextgrids/ is True, the input textgrids to SPPAS will be replaced by the textgrids SPPAS outputs ''' if not os.path.exists(outputPath): os.mkdir(outputPath) if removeTierList is None: removeTierList = REMOVE_TIER_LIST if renameTierList is None: renameTierList = RENAME_TIER_LIST # Remove intermediate files if deleteIntermediateFiles is True: lowerTGList = utils.findFiles(tgPath, filterExt=".textgrid") xraList = utils.findFiles(tgPath, ".xra") removeList = lowerTGList + xraList for fn in removeList: os.remove(join(tgPath, fn)) # Replace the textgrids input to SPPAS (suffixed with '-merge') # with the textgrids that SPPAS output tgFNList = utils.findFiles(tgPath, filterExt=".TextGrid") tgFNList = [fn for fn in tgFNList if '-merge' in fn] # Clean up the textgrids output by SPPAS # Rename tiers, delete tiers, and convert the phonetic tier # from xsampa to IPA for mergeFN in tgFNList: mergeName = os.path.splitext(mergeFN)[0] nonMergeName = mergeName.split('-merge')[0] if not os.path.exists(join(outputPath, nonMergeName + ".wav")): shutil.copy(join(tgPath, nonMergeName + ".wav"), join(outputPath, nonMergeName + ".wav")) if os.path.exists(join(outputPath, nonMergeName + ".TextGrid")): print("Skipping %s -- already exists" % mergeName + ".TextGrid") continue # Open tg file and remove jittered boundaries tg = praatio_scripts.alignBoundariesAcrossTiers(join(tgPath, mergeFN), maxDifference=0.001) # Remove tiers for name in removeTierList: if name in tg.tierNameList: tg.removeTier(name) # Rename tiers for fromName, toName in renameTierList: if fromName in tg.tierNameList: tg.renameTier(fromName, toName) # Convert phones to IPA tg = _xsampaToIPATier(tg, "phones") # # Typically, the start and end of a spass file is silent but an # # utterance with only a single ipu will not acount for this. Make # # a tiny amount of space for the user to be able to shift the # # tier if needed. # for tierName in tg.tierNameList: # tier = tg.tierDict[tierName] # start, stop, label = tier.entryList[0] # if decimalEqual(start, 0) and stop > 0.01: # tier.entryList[0] = (0.01, stop, label) # # start, stop, label = tier.entryList[-1] # duration = tg.maxTimestamp # if decimalEqual(stop, duration) and start < duration - 0.01: # tier.entryList[-1] = (start, duration - 0.01, label) tg.save(join(outputPath, nonMergeName + ".TextGrid"))
def getPitchData(tgpath, wavpath, outpath, matlabExePath, matlabScriptPath, praatExePath, praatScriptPath): """ Generates clean textgrid files with the mother's speech isolated from room noise and child speech. Directory variables below which are ALL CAPS, such as WAV_DIR and EPOCH_DIR hold files which are referenced throughout the workflow as opposed to directories which contain textgrids at a certain stage of processing. Directories ending in numbers such as textgrids_tier_names_checked_(01) are considered to hold textgrids at certaining milestones of processing, and are placed into the working directory instead of the TEMP directory. """ def _nextStep(n): if n == int(n): return n+1.0 else: return math.ceil(n) # initialize # tg_dir = join(path, "TEXTGRIDS_INTERVALS_MARKED") tg_dir = tgpath # WAV_DIR = join(path, "WAVS") WAV_DIR = wavpath path = outpath io.make_dir(path) TEMP = tempfile.mkdtemp() tgStep = 0.0 praatStep = 0.0 uweStep = 0.0 wavStep = 0.0 # ensure the tier names are consistent tgStep+=0.1 new_tg_dir = join(path, "_textgrids_{}_tier_names_checked".format(tgStep)) general.renameTiers( tg_dir ,new_tg_dir ) tg_dir = new_tg_dir # replace all labels from Mother tier other than MS tgStep+=0.1 new_tg_dir = join(path, "_textgrids_{}_MS_label_only_in_Mother_tier".format(tgStep)) general.removeIntervalsFromTierByLabel( tg_dir ,"Mother" ,"MS" ,new_tg_dir ,removeAllBut=True ) tg_dir = new_tg_dir # crop portions of intervals in Mother tier overlapping with Mother's Backchannel tier, # meaning that all portions of MS intervals overlapping with LA intervals (laughter) are cropped tgStep+=0.1 new_tg_dir = join(path, "_textgrids_{}_LA_removed".format(tgStep)) general.isolateMotherSpeech( tg_dir ,"Mother's Backchannel" ,new_tg_dir ) tg_dir = new_tg_dir # set current tg_dir as directory referenced after preprocessing and before cropping TG_PREPROCESSED = tg_dir # crop portions of intervals in Mother tier overlapping with Child tier, then Room tier, then both. # after each cropping, intervals shorter than can be processed are removed from the new Mother tiers # non-overlapping portions of intervals in Mother tier are retained tgStep = _nextStep(tgStep) TG_CS_RMVD_DIR = join(path, "_textgrids_{}_child_removed".format(tgStep)) tgStep = _nextStep(tgStep) TG_ROOM_RMVD_DIR = join(path, "_textgrids_{}_room_removed".format(tgStep)) tgStep = _nextStep(tgStep) TG_CS_ROOM_RMVD_DIR = join(path, "_textgrids_{}_child_room_removed".format(tgStep)) general.isolateMotherSpeech( tg_dir ,"Child" ,join(TEMP, "cs_rmvd") ) general.filterShortIntervalsFromTier( join(TEMP, "cs_rmvd") ,"Mother" ,0.15 ,TG_CS_RMVD_DIR ) general.isolateMotherSpeech( tg_dir ,"Room" ,join(TEMP, "rm_rmvd") ) general.filterShortIntervalsFromTier( join(TEMP, "rm_rmvd") ,"Mother" ,0.15 ,TG_ROOM_RMVD_DIR ) general.isolateMotherSpeech( TG_CS_RMVD_DIR ,"Room" ,join(TEMP, "cs_rm_rmvd") ) general.filterShortIntervalsFromTier( join(TEMP, "cs_rm_rmvd") ,"Mother" ,0.15 ,TG_CS_ROOM_RMVD_DIR ) ################################ # TODO: Delete these lines ################################ # TG_CS_ROOM_RMVD_DIR = join(path, "TEXTGRIDS_FROM_OLD_CODE") # TG_CS_RMVD_DIR = join(path, "TEXTGRIDS_OLD_CODE_CS_RMVD") # TG_ROOM_RMVD_DIR = join(path, "TEXTGRIDS_OLD_CODE_ROOM_RMVD") ################################ ################################ ################################ tg_dir = TG_CS_ROOM_RMVD_DIR # create directory of tg_info files (tier entry information as plain text listing) TG_INFO_DIR = join(path, "__tg_info") general.extractTGInfo( tg_dir ,TG_INFO_DIR ,"Mother" ,searchForMothersSpeech=False ) # generate an epoch file (.txt file) corresponding to the Epochs tier in each textgrid (start, stop, label) EPOCH_DIR = join(path, "__epochs") general.generateEpochFiles( tg_dir ,WAV_DIR ,EPOCH_DIR ) # pad wav files with about two seconds of silence at the end # the next step does not process wav files successfuly if the end of the last MS interval is too near the end of the wav wavStep = _nextStep(wavStep) new_wav_dir = join(path, "_wavs_{}_padded_w_silence".format(wavStep)) padEndWithSilence( WAV_DIR ,new_wav_dir ) WAV_DIR = new_wav_dir # remove intervals from Mother tier not marked MS # this is done in order to try to eliminate loud noises which affect how praat extracts F0 when processing entire wav files wavStep = _nextStep(wavStep) new_wav_dir = join(path, "_wavs_{}_nonMS_zeroed_out".format(wavStep)) deleteUnlabeledIntervals( tg_dir ,WAV_DIR ,"Mother" ,new_wav_dir ) WAV_DIR = new_wav_dir # extract syllable nuclei to determine speech rate (MATLAB REQUIRED) wav_temp_dir = join(TEMP, "_subset_wav_files") uweStep = _nextStep(uweStep) syllable_nuclei_dir = join(path, "_uwe_{}_syllable_nuclei_whole".format(uweStep)) tgStep = _nextStep(tgStep) new_tg_dir = join(path, "_textgrids_{}_syllable_nuclei_added".format(tgStep)) markupTextgridWithSyllableNuclei( WAV_DIR ,tg_dir ,"Mother" ,wav_temp_dir ,syllable_nuclei_dir ,matlabExePath ,matlabScriptPath ,new_tg_dir ,printCmd=True ,outputTGFlag=False ) tg_dir = new_tg_dir # acoustic analysis uweStep = _nextStep(uweStep) nucleus_listing_per_file_dir = join(path, "_uwe_{}_nucleus_listing_mothers_speech".format(uweStep)) uweStep = _nextStep(uweStep) SPEECH_RATE_PER_EPOCH_DIR = join(path, "_uwe_{}_speech_rate_for_epochs".format(uweStep)) general.aggregateSpeechRate( TG_INFO_DIR ,syllable_nuclei_dir ,nucleus_listing_per_file_dir ,44100 ) general.uwePhoneCountForEpochs( EPOCH_DIR ,TG_INFO_DIR ,nucleus_listing_per_file_dir ,SPEECH_RATE_PER_EPOCH_DIR ) # The following code can be run over the whole audio files, regardless of epoch # or textgrids (we'll extract pitch information for the intervals and # epochs later) # The first Praat section below extracts pitch data from one wav file with # unlabled intervals silenced. # # The second Praat section splits labeled intervals into subwavs. # # It is recommended to use the first section. # # Regardless of which is used make sure the corresponding aggregate section is # uncommented below, or that both are if both full wavs and subwavs are used. praatStep = _nextStep(praatStep) praat_dir = join(path, "_praat_{}_75Hz_750Hz_fullwav".format(praatStep)) utils.makeDir(praat_dir) praatStep+=0.1 PI_FULLWAV_DIR = join(path, "_praat_{}_75Hz_750Hz_fullwav_filter9".format(praatStep)) utils.makeDir(PI_FULLWAV_DIR) for fn in utils.findFiles(WAV_DIR, filterExt=".wav", stripExt=True): print(fn+".wav") userPitchData = pitch_and_intensity.audioToPI( inputPath=WAV_DIR ,inputFN=fn+".wav" ,outputPath=praat_dir ,outputFN=fn+".txt" ,praatEXE=praatExePath ,minPitch=75 ,maxPitch=750 ,sampleStep=0.01 ,silenceThreshold=0.03 # ,silenceThreshold=0.01 # ,silenceThreshold=0.001 # ,silenceThreshold=0.0001 # ,silenceThreshold=0.00001 ,forceRegenerate=True # ,tgPath=tg_dir # ,tgFN=fn+".TextGrid" # ,tierName="Mother" # ,tmpOutputPath=TEMP ) filteredPitchData = pitch_and_intensity.generatePIMeasures( userPitchData ,tg_dir ,fn+".TextGrid" ,tierName="Epochs" ,doPitch=True ,medianFilterWindowSize=9 ) with open(join(PI_FULLWAV_DIR, fn+'.txt'), 'w') as outfile: for line in filteredPitchData: line = [str(x) for x in line] outfile.write(",".join(line)+'\n') # praatStep = _nextStep(praatStep) # praat_dir = join(path, "_praat_{}_75Hz_750Hz_subwav".format(praatStep)) # utils.makeDir(praat_dir) # # praatStep+=0.1 # PI_SUBWAV_DIR = join(path, "_praat_{}_75Hz_750Hz_subwav_filter9".format(praatStep)) # utils.makeDir(PI_SUBWAV_DIR) # # for fn in utils.findFiles(WAV_DIR, filterExt=".wav", stripExt=True): # print(fn+".wav") # userPitchData = pitch_and_intensity.audioToPI( # inputPath=WAV_DIR # ,inputFN=fn+".wav" # ,outputPath=praat_dir # ,outputFN=fn+".txt" # ,praatEXE=praatExePath # ,minPitch=75 # ,maxPitch=750 # ,sampleStep=0.01 # ,silenceThreshold=0.03 ## ,silenceThreshold=0.01 ## ,silenceThreshold=0.001 ## ,silenceThreshold=0.0001 ## ,silenceThreshold=0.00001 # ,forceRegenerate=True # ,tgPath=tg_dir # ,tgFN=fn+".TextGrid" # ,tierName="Mother" # ,tmpOutputPath=TEMP # ) # filteredPitchData = pitch_and_intensity.generatePIMeasures( # userPitchData # ,tg_dir # ,fn+".TextGrid" # ,tierName="Epochs" # ,doPitch=True # ,medianFilterWindowSize=9 # ) # with open(join(PI_SUBWAV_DIR, fn+'.txt'), 'w') as outfile: # for line in filteredPitchData: # line = [str(x) for x in line] # outfile.write(",".join(line)+'\n') EVENT_DIR = join(path, "__event_frequency_and_duration") general.eventStructurePerEpoch( EPOCH_DIR ,TG_CS_ROOM_RMVD_DIR ,TG_CS_RMVD_DIR ,TG_ROOM_RMVD_DIR ,TG_PREPROCESSED ,EVENT_DIR ,"Mother" ,"Mother's Backchannel" ) # TODO: generalize this so that 'P' is not output for every type of session EPOCH_ROW_HEADER_DIR = join(path, "__epoch_row_header") general.generateEpochRowHeader( EPOCH_DIR ,EPOCH_ROW_HEADER_DIR ,"P" ) headerStr = ("file,id,session,interval,int_start,int_end,int_dur," "ms_dur_s,ms_freq,ms_child_speech_filtered_dur_s," "ms_noise_filtered_dur_s,ms_full_dur_s,lost_ms_dur_s," "fp_dur_s,fp_freq,la_dur_s,la_freq," "uwe_sylcnt,f0_mean," "f0_max,f0_min,f0_range,f0_var,f0_std" ) general.aggregateFeatures( path ,[ os.path.split(EPOCH_ROW_HEADER_DIR)[1] ,os.path.split(EVENT_DIR)[1] ,os.path.split(SPEECH_RATE_PER_EPOCH_DIR)[1] ,os.path.split(PI_FULLWAV_DIR)[1] ] ,"__aggr_fullwav" ,headerStr ) # general.aggregateFeatures( # path # ,[ # os.path.split(EPOCH_ROW_HEADER_DIR)[1] # ,os.path.split(EVENT_DIR)[1] # ,os.path.split(SPEECH_RATE_PER_EPOCH_DIR)[1] # ,os.path.split(PI_SUBWAV_DIR)[1] # ] # ,"__aggr_subwav" # ,headerStr # ) # remove the temp directory shutil.rmtree(TEMP)