Beispiel #1
0
def main():
    print_arguments(args)
    paddle.init(use_gpu=args.use_gpu,
                rnn_use_batch=True,
                trainer_count=args.trainer_count,
                log_clipping=True)
    train()
Beispiel #2
0
def main():
    print_arguments(args)
    print('开始生成数据列表...')
    create_manifest(annotation_path=args.annotation_path,
                    manifest_path_prefix=args.manifest_prefix)
    print('=' * 70)
    print('开始生成噪声数据列表...')
    create_noise(path='dataset/audio/noise')
    print('=' * 70)

    print('开始生成数据字典...')
    counter = Counter()
    # 获取全部数据列表中的标签字符
    count_manifest(counter, args.manifest_paths)
    # 为每一个字符都生成一个ID
    count_sorted = sorted(counter.items(), key=lambda x: x[1], reverse=True)
    with open(args.vocab_path, 'w', encoding='utf-8') as fout:
        fout.write('<blank>\t-1\n')
        for char, count in count_sorted:
            # 跳过指定的字符阈值,超过这大小的字符都忽略
            if count < args.count_threshold: break
            fout.write('%s\t%d\n' % (char, count))
    print('数据词汇表已生成完成,保存与:%s' % args.vocab_path)
    print('=' * 70)

    print('开始抽取%s条数据计算均值和标准值...' % args.num_samples)
    compute_mean_std(args.manifest_paths, args.num_samples, args.output_path)
    print('=' * 70)
Beispiel #3
0
def main():
    print_arguments(args)
    paddle.init(use_gpu=args.use_gpu,
                rnn_use_batch=True,
                trainer_count=args.trainer_count)

    indir = os.path.dirname(args.inputindex)
    outdir = os.path.dirname(args.outputindex)
    try:
        os.makedirs(indir)
    except OSError:
        pass
    os.system('touch {}'.format(args.inputindex))
    try:
        os.makedirs(outdir)
    except OSError:
        pass

    # generate manifest using input wav files
    with codecs.open(args.outputindex, 'w', 'utf-8') as outfp:
        manifests = iter_manifests(args.inputindex)

        for (audioname, manifest_path,
             result_transcript,
             duration, offset) in decode_all(manifests):
            # save decoding
            result_filename = '{}.result.json'.format(audioname)
            save_decode_result(
                result_transcript,
                os.path.join(outdir, result_filename))
            outfp.write('{} {} {}\n'.format(result_filename, duration, offset))
            print("decoding result saved: {}.".format(result_filename))
        outfp.write('__done__ -1 -1\n')
Beispiel #4
0
    def __init__(self, num_processes_beam_search=1):
        self.num_processes_beam_search = num_processes_beam_search
        print_arguments(args)

        if args.use_gpu:
            self.place = fluid.CUDAPlace(0)
        else:
            self.place = fluid.CPUPlace()

        self._start_server()
def infer_interface(audiolistfile, filenum):
    args.infer_manifest = audiolistfile
    print_arguments(args)
    paddle.init(use_gpu=args.use_gpu,
                rnn_use_batch=True,
                trainer_count=args.trainer_count)

    result = infer(filenum)

    return result
def main():
    print_arguments(args)

    counter = Counter()
    for manifest_path in args.manifest_paths:
        count_manifest(counter, manifest_path)

    count_sorted = sorted(counter.items(), key=lambda x: x[1], reverse=True)
    with codecs.open(args.vocab_path, 'w', 'utf-8') as fout:
        for char, count in count_sorted:
            if count < args.count_threshold: break
            fout.write(char + '\n')
Beispiel #7
0
def main():
    print_arguments(args)

    audio_featurizer = AudioFeaturizer(specgram_type=args.specgram_type)

    def augment_and_featurize(audio_segment):
        return audio_featurizer.featurize(audio_segment)

    normalizer = FeatureNormalizer(
        mean_std_filepath=None,
        manifest_path=args.manifest_path,
        featurize_func=augment_and_featurize,
        num_samples=args.num_samples)
    normalizer.write_to_file(args.output_path)
Beispiel #8
0
def main():
    args = parser.parse_args()
    # this distributed benchmark code can only support gpu environment.
    assert args.use_gpu, "only for gpu implementation."
    if args.use_dgc:
        if args.fuse:
            print("Warning: Use dgc must close fuse for now, so code will set fuse=False")
            args.fuse = False
        if args.fp16:
            print("Warning: DGC unsupport fp16 for now, so code will set fp16=False")
            args.fp16 = False
    print_arguments(args)
    print_paddle_environments()
    check_gpu(args.use_gpu)
    train(args)
Beispiel #9
0
def main():
    print_arguments(args)

    counter = Counter()
    # 获取全部数据列表
    manifest_paths = [path for path in args.manifest_paths.split(',')]
    # 获取全部数据列表中的标签字符
    for manifest_path in manifest_paths:
        count_manifest(counter, manifest_path)
    # 为每一个字符都生成一个ID
    count_sorted = sorted(counter.items(), key=lambda x: x[1], reverse=True)
    with codecs.open(args.vocab_path, 'w', 'utf-8') as fout:
        for char, count in count_sorted:
            # 跳过指定的字符阈值,超过这大小的字符都忽略
            if count < args.count_threshold: break
            fout.write(char + '\n')
Beispiel #10
0
def main():
    print_arguments(args)

    augmentation_pipeline = AugmentationPipeline('{}')
    audio_featurizer = AudioFeaturizer(specgram_type=args.specgram_type)

    def augment_and_featurize(audio_segment):
        augmentation_pipeline.transform_audio(audio_segment)
        return audio_featurizer.featurize(audio_segment)

    # 随机取指定的数量计算平均值归一化
    normalizer = FeatureNormalizer(mean_std_filepath=None,
                                   manifest_path=args.manifest_path,
                                   featurize_func=augment_and_featurize,
                                   num_samples=args.num_samples)
    # 将计算的结果保存的文件中
    normalizer.write_to_file(args.output_path)
Beispiel #11
0
def main(audio_path=''):
    global ds2_model
    global data_generator
    global vocab_list
    print_arguments(args)
    args.audio_path = audio_path
    
    if not ds2_model:
        print("Model loading initiated ...")        
        ds2_model, data_generator, vocab_list = load_model()  
        print("Model loaded Successfully ...")        
        tic = time.time()
        result_transcripts = infer(ds2_model, data_generator, vocab_list)
        toc = time.time()
        print("{} sec required for audio file trnascription".format(toc-tic))
        print("result transcripts: ", result_transcripts)
    else:
        tic = time.time()
        result_transcripts = infer(ds2_model, data_generator, vocab_list)
        toc = time.time()
        print("{} sec required for audio file trnascription".format(toc-tic))
        print("result transcripts: ", result_transcripts)
    return result_transcripts
Beispiel #12
0
def main():
    print_arguments(args)
    tune()
def main():
    print_arguments(args)
    train()
Beispiel #14
0
def main():
    print_arguments(args)
    evaluate()
import numpy as np
import paddle
import pyaudio

from utils.reader import load_audio
from utils.utility import add_arguments, print_arguments

parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
add_arg('input_shape', str, '(1, 257, 257)', '数据输入的形状')
add_arg('threshold', float, 0.7, '判断是否为同一个人的阈值')
add_arg('model_path', str, 'models/infer/model', '预测模型的路径')
args = parser.parse_args()

print_arguments(args)

model = paddle.jit.load(args.model_path)
model.eval()

person_feature = []
person_name = []


def infer(audio_path):
    input_shape = eval(args.input_shape)
    data = load_audio(audio_path, mode='infer', spec_len=input_shape[2])
    data = data[np.newaxis, :]
    data = paddle.to_tensor(data, dtype='float32')
    # 执行预测
    feature = model(data)
Beispiel #16
0
def main():
    print_arguments(args)
    create_manifest(annotation_path=args.annotation_path,
                    manifest_path_prefix=args.manifest_prefix)
Beispiel #17
0
def main():
    print_arguments(args)
    paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count)
    tune()
def main():
    print_arguments(args)
    paddle.init(use_gpu=args.use_gpu,
                rnn_use_batch=True,
                trainer_count=args.trainer_count)
    infer()
Beispiel #19
0
def main():
    args = parser.parse_args()
    print_arguments(args)
    infer(args)
def main():
    print_arguments(args)
    start = time.time()
    text = predict(filename=args.wav_path)
    print("消耗时间:%d, 识别结果: %s" % (round((time.time() - start) * 1000), text))
Beispiel #21
0
def main():
    print_arguments(args)
    infer()
Beispiel #22
0
def main():
    args = parser.parse_args()
    print_arguments(args)
    eval(args)
Beispiel #23
0
def main():
    print_arguments(args)
    infer('transcript')
Beispiel #24
0
def main():
    print_arguments(args)
    text = predict(filename=args.wav_path)
    print("Predict result text: %s" % text.encode('utf-8'))
Beispiel #25
0
def main():
    print_arguments(args)
    paddle.init(use_gpu=args.use_gpu, trainer_count=1)
    start_server()
def main():
    print_arguments(args)
    start_server()
Beispiel #27
0
def main():
    args = parser.parse_args()
    print_arguments(args)
    check_gpu(args.use_gpu)
    train(args)