Beispiel #1
0
def inference(config: DictConfig):
    device = check_envirionment(config.eval.use_cuda)
    model = load_test_model(config.eval, device)

    if config.eval.dataset == 'kspon':
        vocab = KsponSpeechVocabulary(
            f'../../../data/vocab/aihub_{config.eval.output_unit}_vocabs.csv', output_unit=config.eval.output_unit
        )
    elif config.eval.dataset == 'libri':
        vocab = LibriSpeechVocabulary('../../../data/vocab/tokenizer.vocab', 'data/vocab/tokenizer.model')
    else:
        raise ValueError("Unsupported Dataset : {0}".format(config.eval.dataset))

    audio_paths, transcripts = load_dataset(config.eval.transcripts_path)

    testset = SpectrogramDataset(audio_paths=audio_paths, transcripts=transcripts,
                                 sos_id=vocab.sos_id, eos_id=vocab.eos_id,
                                 dataset_path=config.eval.dataset_path,  config=config, spec_augment=False)

    evaluator = Evaluator(
        dataset=testset,
        vocab=vocab,
        batch_size=config.eval.batch_size,
        device=device,
        num_workers=config.eval.num_workers,
        print_every=config.eval.print_every,
        decode=config.eval.decode,
        beam_size=config.eval.k,
    )
    evaluator.evaluate(model)
Beispiel #2
0
def inference(opt):
    device = check_envirionment(opt.use_cuda)
    model = load_test_model(opt, device)

    if opt.dataset == 'kspon':
        vocab = KsponSpeechVocabulary(
            f'../data/vocab/aihub_{opt.output_unit}_vocabs.csv',
            output_unit=opt.output_unit)
    elif opt.dataset == 'libri':
        vocab = LibriSpeechVocabulary('../data/vocab/tokenizer.vocab',
                                      '../data/vocab/tokenizer.model')
    else:
        raise ValueError("Unsupported Dataset : {0}".format(opt.dataset))

    audio_paths, transcripts = load_dataset(opt.transcripts_path)

    testset = SpectrogramDataset(audio_paths=audio_paths,
                                 transcripts=transcripts,
                                 sos_id=vocab.sos_id,
                                 eos_id=vocab.eos_id,
                                 dataset_path=opt.dataset_path,
                                 opt=opt,
                                 spec_augment=False)

    evaluator = Evaluator(testset, vocab, opt.batch_size, device,
                          opt.num_workers, opt.print_every, opt.decode, opt.k)
    evaluator.evaluate(model)
Beispiel #3
0
def inference(opt):
    device = check_envirionment(opt.use_cuda)
    model = load_test_model(opt, device)

    audio_paths, script_paths = load_data_list(opt.data_list_path, opt.dataset_path)
    target_dict = load_targets(script_paths)

    testset = SpectrogramDataset(audio_paths=audio_paths, script_paths=script_paths,  sos_id=SOS_token, eos_id=EOS_token,
                                 target_dict=target_dict,  opt=opt, spec_augment=False, noise_augment=False)

    evaluator = Evaluator(testset, opt.batch_size, device, opt.num_workers, opt.print_every, opt.decode, opt.k)
    evaluator.evaluate(model)
    mfcc = mfcc[:, ::-1]
    mfcc = torch.FloatTensor(np.ascontiguousarray(np.swapaxes(mfcc, 0, 1)))

    return mfcc


parser = argparse.ArgumentParser(description='Run Pretrain')
parser.add_argument('--model_path', type=str, default='../pretrain/model.pt')
parser.add_argument('--audio_path',
                    type=str,
                    default='../pretrain/sample_audio.pcm')
parser.add_argument('--device', type=str, default='cuda')
opt = parser.parse_args()

feature_vector = parse_audio(opt.audio_path, del_silence=True)
input_length = torch.IntTensor([len(feature_vector)])
vocab = KsponSpeechVocabulary('../data/vocab/aihub_character_vocabs.csv')

model = load_test_model(opt, opt.device)
model.eval()

output = model(inputs=feature_vector.unsqueeze(0),
               input_lengths=input_length,
               teacher_forcing_ratio=0.0,
               return_decode_dict=False)
logit = torch.stack(output, dim=1).to(opt.device)
pred = logit.max(-1)[1]

sentence = vocab.label_to_string(pred.cpu().detach().numpy())
print(sentence)