예제 #1
0
def main(verbose=False, *speech_corpora):
    """Scan directory for audio files and convert them to wav files

    For each speech corpus `speech_corpus`

    1. the resulting wav files are written to the directory
       `.speechrc.wav16`/<speech_corpus>/

    2. the transcripts in data/src/speech/<speech_corpus>/transcripts_*.csv are
       updated.
    """
    misc.init_app('speech_audio_scan')

    config = misc.load_config('.speechrc')

    speech_corpora_dir = Path(config.get("speech", "speech_corpora"))
    wav16 = Path(config.get("speech", "wav16"))

    if len(speech_corpora) < 1:
        logging.error("At least one speech corpus must be provided.")
        sys.exit(1)

    if verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    exit_if_corpus_is_missing(speech_corpora_dir, speech_corpora)

    for speech_corpus in speech_corpora:
        transcripts = Transcripts(corpus_name=speech_corpus, create_db=True)
        out_wav16_subdir = wav16 / speech_corpus
        out_wav16_subdir.mkdir(parents=True, exist_ok=True)
        in_root_corpus_dir = speech_corpora_dir / speech_corpus

        scan_audiodir(str(in_root_corpus_dir), transcripts,
                      str(out_wav16_subdir))

        transcripts.save()
        print speech_corpus, "new transcripts saved."
        print
예제 #2
0
                t = missing_token
            else:
                t = tokenize(ts['ts'])[0]

            lex_edit(t)

    #
    # fini
    #

    curses.nocbreak()
    stdscr.keypad(0)
    curses.echo()
    curses.endwin()

    transcripts.save()
    print "new transcripts saved."
    print

    lex.save()
    print "new lexicon saved."
    print

except:
    curses.nocbreak()
    stdscr.keypad(0)
    curses.echo()
    curses.endwin()

    print u"*** ERROR: Unexpected error:", sys.exc_info()[0]
    traceback.print_exc()