def align_corpus(corpus_dir, dict_path, output_directory, temp_dir,
                 output_model_path, args):
    if temp_dir == '':
        temp_dir = TEMP_DIR
    else:
        temp_dir = os.path.expanduser(temp_dir)
    corpus_name = os.path.basename(corpus_dir)
    if corpus_name == '':
        corpus_dir = os.path.dirname(corpus_dir)
        corpus_name = os.path.basename(corpus_dir)
    data_directory = os.path.join(temp_dir, corpus_name)
    if args.clean:
        shutil.rmtree(data_directory, ignore_errors=True)
        shutil.rmtree(output_directory, ignore_errors=True)

    os.makedirs(data_directory, exist_ok=True)
    os.makedirs(output_directory, exist_ok=True)

    dictionary = Dictionary(dict_path, data_directory)
    dictionary.write()
    corpus = Corpus(corpus_dir,
                    data_directory,
                    args.speaker_characters,
                    num_jobs=args.num_jobs)
    print(corpus.speaker_utterance_info())
    corpus.write()
    corpus.create_mfccs()
    corpus.setup_splits(dictionary)
    utt_oov_path = os.path.join(corpus.split_directory, 'utterance_oovs.txt')
    if os.path.exists(utt_oov_path):
        shutil.copy(utt_oov_path, output_directory)
    oov_path = os.path.join(corpus.split_directory, 'oovs_found.txt')
    if os.path.exists(oov_path):
        shutil.copy(oov_path, output_directory)
    mono_params = {'align_often': not args.fast}
    tri_params = {'align_often': not args.fast}
    tri_fmllr_params = {'align_often': not args.fast}
    a = TrainableAligner(corpus,
                         dictionary,
                         output_directory,
                         temp_directory=data_directory,
                         mono_params=mono_params,
                         tri_params=tri_params,
                         tri_fmllr_params=tri_fmllr_params,
                         num_jobs=args.num_jobs)
    a.verbose = args.verbose
    a.train_mono()
    a.export_textgrids()
    a.train_tri()
    a.export_textgrids()
    a.train_tri_fmllr()
    a.export_textgrids()
    if output_model_path is not None:
        a.save(output_model_path)
Ejemplo n.º 2
0
def align_corpus(model_path, corpus_dir,  output_directory, temp_dir, args, debug = False):
    all_begin = time.time()
    if temp_dir == '':
        temp_dir = TEMP_DIR
    else:
        temp_dir = os.path.expanduser(temp_dir)
    corpus_name = os.path.basename(corpus_dir)
    if corpus_name == '':
        corpus_dir = os.path.dirname(corpus_dir)
        corpus_name = os.path.basename(corpus_dir)
    data_directory = os.path.join(temp_dir, corpus_name)
    if args.clean:
        shutil.rmtree(data_directory, ignore_errors = True)
        shutil.rmtree(output_directory, ignore_errors = True)

    os.makedirs(data_directory, exist_ok = True)
    os.makedirs(output_directory, exist_ok = True)
    begin = time.time()
    corpus = Corpus(corpus_dir, data_directory, args.speaker_characters, num_jobs = args.num_jobs)
    print(corpus.speaker_utterance_info())
    corpus.write()
    if debug:
        print('Wrote corpus information in {} seconds'.format(time.time() - begin))
    begin = time.time()
    corpus.create_mfccs()
    if debug:
        print('Calculated mfccs in {} seconds'.format(time.time() - begin))
    archive = Archive(model_path)
    begin = time.time()
    a = PretrainedAligner(archive, corpus, output_directory,
                        temp_directory = data_directory, num_jobs = args.num_jobs, speaker_independent = args.no_speaker_adaptation)
    if debug:
        print('Setup pretrained aligner in {} seconds'.format(time.time() - begin))
    a.verbose = args.verbose
    begin = time.time()
    corpus.setup_splits(a.dictionary)
    if debug:
        print('Setup splits in {} seconds'.format(time.time() - begin))
    utt_oov_path = os.path.join(corpus.split_directory, 'utterance_oovs.txt')
    if os.path.exists(utt_oov_path):
        shutil.copy(utt_oov_path, output_directory)
    oov_path = os.path.join(corpus.split_directory, 'oovs_found.txt')
    if os.path.exists(oov_path):
        shutil.copy(oov_path, output_directory)
    begin = time.time()
    a.do_align()
    if debug:
        print('Performed alignment in {} seconds'.format(time.time() - begin))
    begin = time.time()
    a.export_textgrids()
    if debug:
        print('Exported textgrids in {} seconds'.format(time.time() - begin))
    print('Done! Everything took {} seconds'.format(time.time() - all_begin))
def align_corpus(corpus_dir, dict_path,  output_directory, temp_dir,
            output_model_path, args):
    if temp_dir == '':
        temp_dir = TEMP_DIR
    else:
        temp_dir = os.path.expanduser(temp_dir)
    corpus_name = os.path.basename(corpus_dir)
    if corpus_name == '':
        corpus_dir = os.path.dirname(corpus_dir)
        corpus_name = os.path.basename(corpus_dir)
    data_directory = os.path.join(temp_dir, corpus_name)
    if args.clean:
        shutil.rmtree(data_directory, ignore_errors = True)
        shutil.rmtree(output_directory, ignore_errors = True)

    os.makedirs(data_directory, exist_ok = True)
    os.makedirs(output_directory, exist_ok = True)

    corpus = Corpus(corpus_dir, data_directory, args.speaker_characters, num_jobs = args.num_jobs)
    print(corpus.speaker_utterance_info())
    corpus.write()
    corpus.create_mfccs()
    dictionary = Dictionary(dict_path, data_directory, word_set=corpus.word_set)
    dictionary.write()
    corpus.setup_splits(dictionary)
    utt_oov_path = os.path.join(corpus.split_directory, 'utterance_oovs.txt')
    if os.path.exists(utt_oov_path):
        shutil.copy(utt_oov_path, output_directory)
    oov_path = os.path.join(corpus.split_directory, 'oovs_found.txt')
    if os.path.exists(oov_path):
        shutil.copy(oov_path, output_directory)
    mono_params = {'align_often': not args.fast}
    tri_params = {'align_often': not args.fast}
    tri_fmllr_params = {'align_often': not args.fast}
    a = TrainableAligner(corpus, dictionary, output_directory,
                        temp_directory = data_directory,
                        mono_params = mono_params, tri_params = tri_params,
                        tri_fmllr_params = tri_fmllr_params, num_jobs = args.num_jobs)
    a.verbose = args.verbose
    a.train_mono()
    a.export_textgrids()
    a.train_tri()
    a.export_textgrids()
    a.train_tri_fmllr()
    a.export_textgrids()
    if output_model_path is not None:
        a.save(output_model_path)
def align_corpus_no_dict(args):
    if not args.temp_directory:
        temp_dir = TEMP_DIR
    else:
        temp_dir = os.path.expanduser(args.temp_directory)
    corpus_name = os.path.basename(args.corpus_directory)
    data_directory = os.path.join(temp_dir, corpus_name)
    if args.clean:
        shutil.rmtree(data_directory, ignore_errors=True)
        shutil.rmtree(args.output_directory, ignore_errors=True)

    os.makedirs(data_directory, exist_ok=True)
    os.makedirs(args.output_directory, exist_ok=True)

    corpus = Corpus(args.corpus_directory,
                    data_directory,
                    args.speaker_characters,
                    num_jobs=getattr(args, 'num_jobs', 3),
                    debug=getattr(args, 'debug', False),
                    ignore_exceptions=getattr(args, 'ignore_exceptions',
                                              False))
    print(corpus.speaker_utterance_info())
    dictionary = no_dictionary(corpus, data_directory)
    mono_params = {'align_often': not args.fast}
    tri_params = {'align_often': not args.fast}
    tri_fmllr_params = {'align_often': not args.fast}
    a = TrainableAligner(corpus,
                         dictionary,
                         args.output_directory,
                         temp_directory=data_directory,
                         mono_params=mono_params,
                         tri_params=tri_params,
                         tri_fmllr_params=tri_fmllr_params,
                         num_jobs=args.num_jobs,
                         debug=args.debug,
                         skip_input=getattr(args, 'quiet', False))

    a.verbose = args.verbose
    a.train_mono()
    a.export_textgrids()
    a.train_tri()
    a.export_textgrids()
    a.train_tri_fmllr()
    a.export_textgrids()
    if args.output_model_path is not None:
        a.save(args.output_model_path)
Ejemplo n.º 5
0
def align_corpus(args, skip_input=False):
    all_begin = time.time()
    if not args.temp_directory:
        temp_dir = TEMP_DIR
    else:
        temp_dir = os.path.expanduser(args.temp_directory)
    corpus_name = os.path.basename(args.corpus_directory)
    if corpus_name == '':
        args.corpus_directory = os.path.dirname(args.corpus_directory)
        corpus_name = os.path.basename(args.corpus_directory)
    data_directory = os.path.join(temp_dir, corpus_name)
    conf_path = os.path.join(data_directory, 'config.yml')
    if os.path.exists(conf_path):
        with open(conf_path, 'r') as f:
            conf = yaml.load(f)
    else:
        conf = {'dirty': False,
                'begin': time.time(),
                'version': __version__,
                'type': 'align',
                'corpus_directory': args.corpus_directory,
                'dictionary_path': args.dictionary_path}
    if getattr(args, 'clean', False) \
            or conf['dirty'] or conf['type'] != 'align' \
            or conf['corpus_directory'] != args.corpus_directory\
            or conf['version'] != __version__\
            or conf['dictionary_path'] != args.dictionary_path:
        shutil.rmtree(data_directory, ignore_errors=True)
        shutil.rmtree(args.output_directory, ignore_errors=True)

    os.makedirs(data_directory, exist_ok=True)
    os.makedirs(args.output_directory, exist_ok=True)
    use_speaker_info = not args.no_speaker_adaptation
    try:
        corpus = Corpus(args.corpus_directory, data_directory,
                        speaker_characters=args.speaker_characters,
                        num_jobs=args.num_jobs,
                        use_speaker_information=use_speaker_info,
                        ignore_exceptions=getattr(args, 'ignore_exceptions', False))
        print(corpus.speaker_utterance_info())
        acoustic_model = AcousticModel(args.acoustic_model_path)
        dictionary = Dictionary(args.dictionary_path, data_directory, word_set=corpus.word_set)
        acoustic_model.validate(dictionary)
        begin = time.time()
        a = PretrainedAligner(corpus, dictionary, acoustic_model, args.output_directory, temp_directory=data_directory,
                              num_jobs=getattr(args, 'num_jobs', 3),
                              speaker_independent=getattr(args, 'no_speaker_adaptation', False),
                              debug=getattr(args, 'debug', False))
        if getattr(args, 'errors', False):
            check = a.test_utterance_transcriptions()
            if not skip_input and not check:
                user_input = input('Would you like to abort to fix transcription issues? (Y/N)')
                if user_input.lower() == 'y':
                    return
        if args.debug:
            print('Setup pretrained aligner in {} seconds'.format(time.time() - begin))
        a.verbose = args.verbose
        utt_oov_path = os.path.join(corpus.split_directory, 'utterance_oovs.txt')
        if os.path.exists(utt_oov_path):
            shutil.copy(utt_oov_path, args.output_directory)
        oov_path = os.path.join(corpus.split_directory, 'oovs_found.txt')
        if os.path.exists(oov_path):
            shutil.copy(oov_path, args.output_directory)
        if not skip_input and a.dictionary.oovs_found:
            user_input = input(
                'There were words not found in the dictionary. Would you like to abort to fix them? (Y/N)')
            if user_input.lower() == 'y':
                return
        begin = time.time()
        a.do_align()
        if args.debug:
            print('Performed alignment in {} seconds'.format(time.time() - begin))
        begin = time.time()
        a.export_textgrids()
        if args.debug:
            print('Exported TextGrids in {} seconds'.format(time.time() - begin))
        print('Done! Everything took {} seconds'.format(time.time() - all_begin))
    except:
        conf['dirty'] = True
        raise
    finally:
        with open(conf_path, 'w') as f:
            yaml.dump(conf, f)
Ejemplo n.º 6
0
def align_corpus(args):
    all_begin = time.time()
    if not args.temp_directory:
        temp_dir = TEMP_DIR
    else:
        temp_dir = os.path.expanduser(args.temp_directory)
    corpus_name = os.path.basename(args.corpus_directory)
    if corpus_name == '':
        args.corpus_directory = os.path.dirname(args.corpus_directory)
        corpus_name = os.path.basename(args.corpus_directory)
    data_directory = os.path.join(temp_dir, corpus_name)
    conf_path = os.path.join(data_directory, 'config.yml')
    if os.path.exists(conf_path):
        with open(conf_path, 'r') as f:
            conf = yaml.load(f, Loader=yaml.SafeLoader)
    else:
        conf = {
            'dirty': False,
            'begin': time.time(),
            'version': __version__,
            'type': 'align',
            'corpus_directory': args.corpus_directory,
            'dictionary_path': args.dictionary_path
        }
    if getattr(args, 'clean', False) \
            or conf['dirty'] or conf['type'] != 'align' \
            or conf['corpus_directory'] != args.corpus_directory \
            or conf['version'] != __version__ \
            or conf['dictionary_path'] != args.dictionary_path:
        shutil.rmtree(data_directory, ignore_errors=True)

    os.makedirs(data_directory, exist_ok=True)
    os.makedirs(args.output_directory, exist_ok=True)
    try:
        corpus = Corpus(args.corpus_directory,
                        data_directory,
                        speaker_characters=args.speaker_characters,
                        num_jobs=args.num_jobs,
                        ignore_exceptions=getattr(args, 'ignore_exceptions',
                                                  False))
        if corpus.issues_check:
            print('WARNING: Some issues parsing the corpus were detected. '
                  'Please run the validator to get more information.')
        print(corpus.speaker_utterance_info())
        acoustic_model = AcousticModel(args.acoustic_model_path)
        dictionary = Dictionary(args.dictionary_path,
                                data_directory,
                                word_set=corpus.word_set)
        acoustic_model.validate(dictionary)

        begin = time.time()
        if args.config_path:
            align_config = align_yaml_to_config(args.config_path)
        else:
            align_config = load_basic_align()
        a = PretrainedAligner(corpus,
                              dictionary,
                              acoustic_model,
                              align_config,
                              args.output_directory,
                              temp_directory=data_directory,
                              debug=getattr(args, 'debug', False))
        if args.debug:
            print('Setup pretrained aligner in {} seconds'.format(time.time() -
                                                                  begin))
        a.verbose = args.verbose

        begin = time.time()
        a.align()
        if args.debug:
            print('Performed alignment in {} seconds'.format(time.time() -
                                                             begin))

        begin = time.time()
        a.export_textgrids()
        if args.debug:
            print('Exported TextGrids in {} seconds'.format(time.time() -
                                                            begin))
        print('Done! Everything took {} seconds'.format(time.time() -
                                                        all_begin))
    except:
        conf['dirty'] = True
        raise
    finally:
        with open(conf_path, 'w') as f:
            yaml.dump(conf, f)
Ejemplo n.º 7
0
def align_corpus(args):
    all_begin = time.time()
    if not args.temp_directory:
        temp_dir = TEMP_DIR
    else:
        temp_dir = os.path.expanduser(args.temp_directory)
    corpus_name = os.path.basename(args.corpus_directory)
    if corpus_name == '':
        args.corpus_directory = os.path.dirname(args.corpus_directory)
        corpus_name = os.path.basename(args.corpus_directory)
    data_directory = os.path.join(temp_dir, corpus_name)
    conf_path = os.path.join(data_directory, 'config.yml')
    if os.path.exists(conf_path):
        with open(conf_path, 'r') as f:
            conf = yaml.load(f)
    else:
        conf = {'dirty': False,
                'begin': time.time(),
                'version': __version__,
                'type': 'align',
                'corpus_directory': args.corpus_directory,
                'dictionary_path': args.dictionary_path}
    if getattr(args, 'clean', False) \
            or conf['dirty'] or conf['type'] != 'align' \
            or conf['corpus_directory'] != args.corpus_directory \
            or conf['version'] != __version__ \
            or conf['dictionary_path'] != args.dictionary_path:
        shutil.rmtree(data_directory, ignore_errors=True)

    os.makedirs(data_directory, exist_ok=True)
    os.makedirs(args.output_directory, exist_ok=True)
    try:
        corpus = Corpus(args.corpus_directory, data_directory,
                        speaker_characters=args.speaker_characters,
                        num_jobs=args.num_jobs,
                        ignore_exceptions=getattr(args, 'ignore_exceptions', False))
        if corpus.issues_check:
            print('WARNING: Some issues parsing the corpus were detected. '
                  'Please run the validator to get more information.')
        print(corpus.speaker_utterance_info())
        acoustic_model = AcousticModel(args.acoustic_model_path)
        dictionary = Dictionary(args.dictionary_path, data_directory, word_set=corpus.word_set)
        acoustic_model.validate(dictionary)

        begin = time.time()
        if args.config_path:
            align_config = align_yaml_to_config(args.config_path)
        else:
            align_config = load_basic_align()
        a = PretrainedAligner(corpus, dictionary, acoustic_model, align_config, args.output_directory,
                              temp_directory=data_directory,
                              debug=getattr(args, 'debug', False))
        if getattr(args, 'errors', False):
            check = a.test_utterance_transcriptions()
            if not getattr(args, 'quiet', False) and not check:
                user_input = input('Would you like to abort to fix transcription issues? (Y/N)')
                if user_input.lower() == 'y':
                    return
        if args.debug:
            print('Setup pretrained aligner in {} seconds'.format(time.time() - begin))
        a.verbose = args.verbose

        begin = time.time()
        a.align()
        if args.debug:
            print('Performed alignment in {} seconds'.format(time.time() - begin))

        begin = time.time()
        a.export_textgrids()
        if args.debug:
            print('Exported TextGrids in {} seconds'.format(time.time() - begin))
        print('Done! Everything took {} seconds'.format(time.time() - all_begin))
    except:
        conf['dirty'] = True
        raise
    finally:
        with open(conf_path, 'w') as f:
            yaml.dump(conf, f)
Ejemplo n.º 8
0
def align_corpus(args):
    all_begin = time.time()
    if not args.temp_directory:
        temp_dir = TEMP_DIR
    else:
        temp_dir = os.path.expanduser(args.temp_directory)
    corpus_name = os.path.basename(args.corpus_directory)
    if corpus_name == "":
        args.corpus_directory = os.path.dirname(args.corpus_directory)
        corpus_name = os.path.basename(args.corpus_directory)
    data_directory = os.path.join(temp_dir, corpus_name)
    conf_path = os.path.join(data_directory, "config.yml")
    if os.path.exists(conf_path):
        with open(conf_path, "r") as f:
            conf = yaml.load(f)
    else:
        conf = {
            "dirty": False,
            "begin": time.time(),
            "version": __version__,
            "type": "align",
            "corpus_directory": args.corpus_directory,
            "dictionary_path": args.dictionary_path,
        }
    if (
        getattr(args, "clean", False)
        or conf["dirty"]
        or conf["type"] != "align"
        or conf["corpus_directory"] != args.corpus_directory
        or conf["version"] != __version__
        or conf["dictionary_path"] != args.dictionary_path
    ):
        shutil.rmtree(data_directory, ignore_errors=True)

    os.makedirs(data_directory, exist_ok=True)
    os.makedirs(args.output_directory, exist_ok=True)
    try:
        corpus = Corpus(
            args.corpus_directory,
            data_directory,
            speaker_characters=args.speaker_characters,
            num_jobs=args.num_jobs,
            ignore_exceptions=getattr(args, "ignore_exceptions", False),
        )
        if corpus.issues_check:
            print(
                "WARNING: Some issues parsing the corpus were detected. "
                "Please run the validator to get more information."
            )
        print(corpus.speaker_utterance_info())
        acoustic_model = AcousticModel(args.acoustic_model_path)
        dictionary = Dictionary(
            args.dictionary_path, data_directory, word_set=corpus.word_set
        )
        acoustic_model.validate(dictionary)

        begin = time.time()
        if args.config_path:
            align_config = align_yaml_to_config(args.config_path)
        else:
            align_config = load_basic_align()
        a = PretrainedAligner(
            corpus,
            dictionary,
            acoustic_model,
            align_config,
            args.output_directory,
            temp_directory=data_directory,
            debug=getattr(args, "debug", False),
        )
        if getattr(args, "errors", False):
            check = a.test_utterance_transcriptions()
            if not getattr(args, "quiet", False) and not check:
                user_input = input(
                    "Would you like to abort to fix transcription issues? (Y/N)"
                )
                if user_input.lower() == "y":
                    return
        if args.debug:
            print("Setup pretrained aligner in {} seconds".format(time.time() - begin))
        a.verbose = args.verbose

        begin = time.time()
        a.align()
        if args.debug:
            print("Performed alignment in {} seconds".format(time.time() - begin))

        begin = time.time()
        a.export_textgrids()
        if args.debug:
            print("Exported TextGrids in {} seconds".format(time.time() - begin))
        print("Done! Everything took {} seconds".format(time.time() - all_begin))
    except:
        conf["dirty"] = True
        raise
    finally:
        with open(conf_path, "w") as f:
            yaml.dump(conf, f)