예제 #1
0
파일: main.py 프로젝트: Rhcsky/KoSpeech
def train(config: DictConfig) -> nn.DataParallel:
    random.seed(config.train.seed)
    torch.manual_seed(config.train.seed)
    torch.cuda.manual_seed_all(config.train.seed)
    device = check_envirionment(config.train.use_cuda)
    if hasattr(config.train,
               "num_threads") and int(config.train.num_threads) > 0:
        torch.set_num_threads(config.train.num_threads)

    vocab = KsponSpeechVocabulary(
        f'/home/seungmin/dmount/KoSpeech/data/vocab/aihub_{config.train.output_unit}_vocabs.csv',
        output_unit=config.train.output_unit,
    )

    if not config.train.resume:
        epoch_time_step, trainset_list, validset = split_dataset(
            config, config.train.transcripts_path, vocab)
        model = build_model(config, vocab, device)

        optimizer = get_optimizer(model, config)
        lr_scheduler = get_lr_scheduler(config, optimizer, epoch_time_step)

        optimizer = Optimizer(optimizer, lr_scheduler,
                              config.train.total_steps,
                              config.train.max_grad_norm)
        criterion = get_criterion(config, vocab)

    else:
        trainset_list = None
        validset = None
        model = None
        optimizer = None
        epoch_time_step = None
        criterion = get_criterion(config, vocab)

    trainer = SupervisedTrainer(
        optimizer=optimizer,
        criterion=criterion,
        trainset_list=trainset_list,
        validset=validset,
        num_workers=config.train.num_workers,
        device=device,
        teacher_forcing_step=config.model.teacher_forcing_step,
        min_teacher_forcing_ratio=config.model.min_teacher_forcing_ratio,
        print_every=config.train.print_every,
        save_result_every=config.train.save_result_every,
        checkpoint_every=config.train.checkpoint_every,
        architecture=config.model.architecture,
        vocab=vocab,
        joint_ctc_attention=config.model.joint_ctc_attention,
    )
    model = trainer.train(
        model=model,
        batch_size=config.train.batch_size,
        epoch_time_step=epoch_time_step,
        num_epochs=config.train.num_epochs,
        teacher_forcing_ratio=config.model.teacher_forcing_ratio,
        resume=config.train.resume,
    )
    return model
예제 #2
0
def train(opt):
    random.seed(opt.seed)
    torch.manual_seed(opt.seed)
    torch.cuda.manual_seed_all(opt.seed)
    device = check_envirionment(opt.use_cuda)

    if not opt.resume:
        audio_paths, script_paths = load_data_list(opt.data_list_path, opt.dataset_path)

        epoch_time_step, trainset_list, validset = split_dataset(opt, audio_paths, script_paths)
        model = build_model(opt, device)

        optimizer = optim.Adam(model.module.parameters(), lr=opt.init_lr, weight_decay=opt.weight_decay)

        if opt.rampup_period > 0:
            scheduler = RampUpLR(optimizer, opt.init_lr, opt.high_plateau_lr, opt.rampup_period)
            optimizer = Optimizer(optimizer, scheduler, opt.rampup_period, opt.max_grad_norm)
        else:
            optimizer = Optimizer(optimizer, None, 0, opt.max_grad_norm)

        criterion = LabelSmoothedCrossEntropyLoss(
            num_classes=len(char2id), ignore_index=PAD_token,
            smoothing=opt.label_smoothing, dim=-1,
            reduction=opt.reduction, architecture=opt.architecture
        ).to(device)

    else:
        trainset_list = None
        validset = None
        model = None
        optimizer = None
        criterion = LabelSmoothedCrossEntropyLoss(
            num_classes=len(char2id), ignore_index=PAD_token,
            smoothing=opt.label_smoothing, dim=-1,
            reduction=opt.reduction, architecture=opt.architecture
        ).to(device)
        epoch_time_step = None

    trainer = SupervisedTrainer(
        optimizer=optimizer, criterion=criterion, trainset_list=trainset_list,
        validset=validset, num_workers=opt.num_workers,
        high_plateau_lr=opt.high_plateau_lr, low_plateau_lr=opt.low_plateau_lr,
        decay_threshold=opt.decay_threshold, exp_decay_period=opt.exp_decay_period,
        device=device, teacher_forcing_step=opt.teacher_forcing_step,
        min_teacher_forcing_ratio=opt.min_teacher_forcing_ratio, print_every=opt.print_every,
        save_result_every=opt.save_result_every, checkpoint_every=opt.checkpoint_every,
        architecture=opt.architecture
    )
    model = trainer.train(
        model=model,
        batch_size=opt.batch_size,
        epoch_time_step=epoch_time_step,
        num_epochs=opt.num_epochs,
        teacher_forcing_ratio=opt.teacher_forcing_ratio,
        resume=opt.resume
    )
    return model
예제 #3
0
def train(config: DictConfig) -> nn.DataParallel:
    random.seed(config.train.seed)
    torch.manual_seed(config.train.seed)
    torch.cuda.manual_seed_all(config.train.seed)
    device = check_envirionment(config.train.use_cuda)

    if config.train.dataset == 'kspon':
        if config.train.output_unit == 'subword':
            vocab = KsponSpeechVocabulary(
                vocab_path=KSPONSPEECH_VOCAB_PATH,
                output_unit=config.train.output_unit,
                sp_model_path=KSPONSPEECH_SP_MODEL_PATH,
            )
        else:
            vocab = KsponSpeechVocabulary(
                f'../../../data/vocab/aihub_{config.train.output_unit}_vocabs.csv',
                output_unit=config.train.output_unit,
            )

    elif config.train.dataset == 'libri':
        vocab = LibriSpeechVocabulary(LIBRISPEECH_VOCAB_PATH, LIBRISPEECH_TOKENIZER_PATH)

    else:
        raise ValueError("Unsupported Dataset : {0}".format(config.train.dataset))

    if not config.train.resume:
        epoch_time_step, trainset_list, validset = split_dataset(config, config.train.transcripts_path, vocab)
        model = build_model(config, vocab, device)

        optimizer = get_optimizer(model, config)
        lr_scheduler = get_lr_scheduler(config, optimizer, epoch_time_step)

        optimizer = Optimizer(optimizer, lr_scheduler, config.train.warmup_steps, config.train.max_grad_norm)
        criterion = get_criterion(config, vocab)

    else:
        trainset_list = None
        validset = None
        model = None
        optimizer = None
        epoch_time_step = None
        criterion = get_criterion(config, vocab)

    trainer = SupervisedTrainer(
        optimizer=optimizer,
        criterion=criterion,
        trainset_list=trainset_list,
        validset=validset,
        num_workers=config.train.num_workers,
        device=device,
        teacher_forcing_step=config.model.teacher_forcing_step,
        min_teacher_forcing_ratio=config.model.min_teacher_forcing_ratio,
        print_every=config.train.print_every,
        save_result_every=config.train.save_result_every,
        checkpoint_every=config.train.checkpoint_every,
        architecture=config.model.architecture,
        vocab=vocab,
        joint_ctc_attention=config.model.joint_ctc_attention,
    )
    model = trainer.train(
        model=model,
        batch_size=config.train.batch_size,
        epoch_time_step=epoch_time_step,
        num_epochs=config.train.num_epochs,
        teacher_forcing_ratio=config.model.teacher_forcing_ratio,
        resume=config.train.resume,
    )
    return model
예제 #4
0
def train(config: DictConfig):
    random.seed(config.train.seed)
    torch.manual_seed(config.train.seed)
    torch.cuda.manual_seed_all(config.train.seed)
    device = check_envirionment(config.train.use_cuda)

    if config.train.dataset == 'kspon':
        if config.train.output_unit == 'subword':
            vocab = KsponSpeechVocabulary(
                vocab_path='../../../data/vocab/kspon_sentencepiece.vocab',
                output_unit=config.train.output_unit,
                sp_model_path='../../../data/vocab/kspon_sentencepiece.model',
            )
        else:
            vocab = KsponSpeechVocabulary(
                f'../../../data/vocab/aihub_{config.train.output_unit}_vocabs.csv',
                output_unit=config.train.output_unit)

    elif config.train.dataset == 'libri':
        vocab = LibriSpeechVocabulary('../../../data/vocab/tokenizer.vocab',
                                      '../../../data/vocab/tokenizer.model')

    else:
        raise ValueError("Unsupported Dataset : {0}".format(
            config.train.dataset))

    if not config.train.resume:
        epoch_time_step, trainset_list, validset = split_dataset(
            config, config.train.transcripts_path, vocab)
        model = build_model(config, vocab, device)

        optimizer = get_optimizer(model, config)

        lr_scheduler = TriStageLRScheduler(
            optimizer=optimizer,
            init_lr=config.train.init_lr,
            peak_lr=config.train.peak_lr,
            final_lr=config.train.final_lr,
            init_lr_scale=config.train.init_lr_scale,
            final_lr_scale=config.train.final_lr_scale,
            warmup_steps=config.train.warmup_steps,
            total_steps=int(config.train.num_epochs * epoch_time_step))
        optimizer = Optimizer(optimizer, lr_scheduler,
                              config.train.warmup_steps,
                              config.train.max_grad_norm)
        criterion = get_criterion(config, vocab)

    else:
        trainset_list = None
        validset = None
        model = None
        optimizer = None
        epoch_time_step = None
        criterion = get_criterion(config, vocab)

    trainer = SupervisedTrainer(
        optimizer=optimizer,
        criterion=criterion,
        trainset_list=trainset_list,
        validset=validset,
        num_workers=config.train.num_workers,
        device=device,
        teacher_forcing_step=config.model.teacher_forcing_step,
        min_teacher_forcing_ratio=config.model.min_teacher_forcing_ratio,
        print_every=config.train.print_every,
        save_result_every=config.train.save_result_every,
        checkpoint_every=config.train.checkpoint_every,
        architecture=config.model.architecture,
        vocab=vocab,
        joint_ctc_attention=config.model.joint_ctc_attention,
    )
    model = trainer.train(
        model=model,
        batch_size=config.train.batch_size,
        epoch_time_step=epoch_time_step,
        num_epochs=config.train.num_epochs,
        teacher_forcing_ratio=config.model.teacher_forcing_ratio,
        resume=config.train.resume,
    )
    return model
예제 #5
0
파일: main.py 프로젝트: rickyHong/KoSpeech
def train(opt):
    random.seed(opt.seed)
    torch.manual_seed(opt.seed)
    torch.cuda.manual_seed_all(opt.seed)
    device = check_envirionment(opt.use_cuda)

    if opt.dataset == 'kspon':
        if opt.output_unit == 'subword':
            vocab = KsponSpeechVocabulary(
                vocab_path='../data/vocab/kspon_sentencepiece.vocab',
                output_unit=opt.output_unit,
                sp_model_path='../data/vocab/kspon_sentencepiece.model')
        else:
            vocab = KsponSpeechVocabulary(
                f'../data/vocab/aihub_{opt.output_unit}_vocabs.csv',
                output_unit=opt.output_unit)

    elif opt.dataset == 'libri':
        vocab = LibriSpeechVocabulary('../data/vocab/tokenizer.vocab',
                                      '../data/vocab/tokenizer.model')

    else:
        raise ValueError("Unsupported Dataset : {0}".format(opt.dataset))

    if not opt.resume:
        epoch_time_step, trainset_list, validset = split_dataset(
            opt, opt.transcripts_path, vocab)
        model = build_model(opt, vocab, device)

        if opt.optimizer.lower() == 'adam':
            optimizer = optim.Adam(model.module.parameters(),
                                   lr=opt.init_lr,
                                   weight_decay=opt.weight_decay)
        elif opt.optimizer.lower() == 'radam':
            optimizer = RAdam(model.module.parameters(),
                              lr=opt.init_lr,
                              weight_decay=opt.weight_decay)
        elif opt.optimizer.lower() == 'adamp':
            optimizer = AdamP(model.module.parameters(),
                              lr=opt.init_lr,
                              weight_decay=opt.weight_decay)
        elif opt.optimizer.lower() == 'adadelta':
            optimizer = optim.Adadelta(model.module.parameters(),
                                       lr=opt.init_lr,
                                       weight_decay=opt.weight_decay)
        elif opt.optimizer.lower() == 'adagrad':
            optimizer = optim.Adagrad(model.module.parameters(),
                                      lr=opt.init_lr,
                                      weight_decay=opt.weight_decay)
        else:
            raise ValueError(
                f"Unsupported Optimizer, Supported Optimizer : Adam, RAdam, Adadelta, Adagrad"
            )

        lr_scheduler = TriStageLRScheduler(optimizer=optimizer,
                                           init_lr=opt.init_lr,
                                           peak_lr=opt.peak_lr,
                                           final_lr=opt.final_lr,
                                           init_lr_scale=opt.init_lr_scale,
                                           final_lr_scale=opt.final_lr_scale,
                                           warmup_steps=opt.warmup_steps,
                                           total_steps=int(opt.num_epochs *
                                                           epoch_time_step))
        optimizer = Optimizer(optimizer, lr_scheduler, opt.warmup_steps,
                              opt.max_grad_norm)

        if opt.architecture == 'deepspeech2':
            criterion = nn.CTCLoss(blank=vocab.blank_id,
                                   reduction=opt.reduction).to(device)
        else:
            criterion = LabelSmoothedCrossEntropyLoss(
                num_classes=len(vocab),
                ignore_index=vocab.pad_id,
                smoothing=opt.label_smoothing,
                dim=-1,
                reduction=opt.reduction,
                architecture=opt.architecture).to(device)

    else:
        trainset_list = None
        validset = None
        model = None
        optimizer = None
        criterion = LabelSmoothedCrossEntropyLoss(
            num_classes=len(vocab),
            ignore_index=vocab.pad_id,
            smoothing=opt.label_smoothing,
            dim=-1,
            reduction=opt.reduction,
            architecture=opt.architecture).to(device)
        epoch_time_step = None

    trainer = SupervisedTrainer(
        optimizer=optimizer,
        criterion=criterion,
        trainset_list=trainset_list,
        validset=validset,
        num_workers=opt.num_workers,
        device=device,
        teacher_forcing_step=opt.teacher_forcing_step,
        min_teacher_forcing_ratio=opt.min_teacher_forcing_ratio,
        print_every=opt.print_every,
        save_result_every=opt.save_result_every,
        checkpoint_every=opt.checkpoint_every,
        architecture=opt.architecture,
        vocab=vocab)
    model = trainer.train(model=model,
                          batch_size=opt.batch_size,
                          epoch_time_step=epoch_time_step,
                          num_epochs=opt.num_epochs,
                          teacher_forcing_ratio=opt.teacher_forcing_ratio,
                          resume=opt.resume)
    return model