def make_speech_corpus(top, dest, txtdest, snddest, srcfolder): '''This function tests whether the information in an spl file is sufficient to extract the recording and text. It also creates a directory name based on the speaker id and the sessions id for the processed files.''' spls = os.listdir(srcfolder) for splfile in sorted(spls): if os.path.splitext(splfile)[1] != ".spl": continue # Parse the spl file and check whether key information has been found. # This is necessary because not all files are complete, some contain errors # from maual editing and some spl files point to recordings that do not # exit in the corpus session = Session(os.path.abspath(srcfolder), splfile) if session.speaker_id == "": # ignore if there is no speaker continue if not session.wavdir: # ignore if there is no matching directory continue if len(session.record_states) < 2: # unsure whether this has an effect continue session.sessiondir = os.path.join(dest, session.filestem) + "." + session.speaker_id # create_parallel_file_list(session, snddest, txtdest)
def make_speech_corpus(top, dest, txtdest, snddest, srcfolder): '''This function tests whether the information in an spl file is sufficient to extract the recording and text. It also creates a directory name based on the speaker id and the sessions id for the processed files.''' spls = os.listdir(srcfolder) for splfile in sorted(spls): if os.path.splitext(splfile)[1] != ".spl": continue # Parse the spl file and check whether key information has been found. # This is necessary because not all files are complete, some contain errors # from maual editing and some spl files point to recordings that do not # exit in the corpus session = Session(os.path.abspath(srcfolder), splfile) if session.speaker_id == "": # ignore if there is no speaker continue if not session.wavdir: # ignore if there is no matching directory continue if len(session.record_states) < 2: # unsure whether this has an effect continue session.sessiondir = os.path.join( dest, session.filestem) + "." + session.speaker_id # create_parallel_file_list(session, snddest, txtdest)
def make_speech_corpus(top, dest, srcfolder): global n spls = os.listdir(srcfolder) for splfile in sorted(spls): if os.path.splitext(splfile)[1] != ".spl": continue session = Session(os.path.abspath(srcfolder), splfile) if session.speaker_id == "": # ignore if there is no speaker continue if not session.wavdir: # ignore if there is no matching directory continue if len(session.record_states) < 2: # unsure whether this has an effect continue session.sessiondir = os.path.join(dest, session.filestem) +"."+ session.speaker_id if os.path.exists(session.sessiondir): n += 1 session.sessiondir = session.sessiondir+ "_" +str(n) session.speaker_id+ "_" +str(n) os.mkdir(session.sessiondir) create_parallel_files(session)