Esempio n. 1
0
def make_speech_corpus(top, dest, txtdest, snddest, srcfolder):
    '''This function tests whether the information in an spl file is sufficient to
    extract the recording and text. It also creates a directory name based on the
    speaker id and the sessions id for the processed files.'''
    
    spls = os.listdir(srcfolder)
    for splfile in sorted(spls):
        if os.path.splitext(splfile)[1] != ".spl":
            continue

        # Parse the spl file and check whether key information has been found.
        # This is necessary because not all files are complete, some contain errors
        # from maual editing and some spl files point to recordings that do not
        # exit in the corpus
        session = Session(os.path.abspath(srcfolder), splfile)
        if session.speaker_id == "":  # ignore if there is no speaker
            continue
        if not session.wavdir:  # ignore if there is no matching directory
            continue
        if len(session.record_states) < 2:  # unsure whether this has an effect
            continue
        session.sessiondir = os.path.join(dest, session.filestem) + "." + session.speaker_id

        # 
        create_parallel_file_list(session, snddest, txtdest)
Esempio n. 2
0
def make_speech_corpus(top, dest, txtdest, snddest, srcfolder):
    '''This function tests whether the information in an spl file is sufficient to
    extract the recording and text. It also creates a directory name based on the
    speaker id and the sessions id for the processed files.'''

    spls = os.listdir(srcfolder)
    for splfile in sorted(spls):
        if os.path.splitext(splfile)[1] != ".spl":
            continue

        # Parse the spl file and check whether key information has been found.
        # This is necessary because not all files are complete, some contain errors
        # from maual editing and some spl files point to recordings that do not
        # exit in the corpus
        session = Session(os.path.abspath(srcfolder), splfile)
        if session.speaker_id == "":  # ignore if there is no speaker
            continue
        if not session.wavdir:  # ignore if there is no matching directory
            continue
        if len(session.record_states) < 2:  # unsure whether this has an effect
            continue
        session.sessiondir = os.path.join(
            dest, session.filestem) + "." + session.speaker_id

        #
        create_parallel_file_list(session, snddest, txtdest)
Esempio n. 3
0
def make_speech_corpus(top, dest, srcfolder):
    global n
    spls = os.listdir(srcfolder)
    for splfile in sorted(spls):
        if os.path.splitext(splfile)[1] != ".spl":
            continue
        
        session = Session(os.path.abspath(srcfolder), splfile)
        if session.speaker_id == "": # ignore if there is no speaker
            continue
        if not session.wavdir: # ignore if there is no matching directory
            continue
        if len(session.record_states) < 2: # unsure whether this has an effect
            continue
        session.sessiondir = os.path.join(dest, session.filestem) +"."+ session.speaker_id
        if os.path.exists(session.sessiondir):
            n += 1
            session.sessiondir = session.sessiondir+ "_" +str(n)
            session.speaker_id+ "_" +str(n)
        os.mkdir(session.sessiondir)
        
        create_parallel_files(session)
Esempio n. 4
0
def make_speech_corpus(top, dest, srcfolder):
    global n
    spls = os.listdir(srcfolder)
    for splfile in sorted(spls):
        if os.path.splitext(splfile)[1] != ".spl":
            continue
        
        session = Session(os.path.abspath(srcfolder), splfile)
        if session.speaker_id == "": # ignore if there is no speaker
            continue
        if not session.wavdir: # ignore if there is no matching directory
            continue
        if len(session.record_states) < 2: # unsure whether this has an effect
            continue
        session.sessiondir = os.path.join(dest, session.filestem) +"."+ session.speaker_id
        if os.path.exists(session.sessiondir):
            n += 1
            session.sessiondir = session.sessiondir+ "_" +str(n)
            session.speaker_id+ "_" +str(n)
        os.mkdir(session.sessiondir)
        
        create_parallel_files(session)