def main(): print_arguments(args) paddle.init(use_gpu=args.use_gpu, rnn_use_batch=True, trainer_count=args.trainer_count, log_clipping=True) train()
def main(): print_arguments(args) print('开始生成数据列表...') create_manifest(annotation_path=args.annotation_path, manifest_path_prefix=args.manifest_prefix) print('=' * 70) print('开始生成噪声数据列表...') create_noise(path='dataset/audio/noise') print('=' * 70) print('开始生成数据字典...') counter = Counter() # 获取全部数据列表中的标签字符 count_manifest(counter, args.manifest_paths) # 为每一个字符都生成一个ID count_sorted = sorted(counter.items(), key=lambda x: x[1], reverse=True) with open(args.vocab_path, 'w', encoding='utf-8') as fout: fout.write('<blank>\t-1\n') for char, count in count_sorted: # 跳过指定的字符阈值,超过这大小的字符都忽略 if count < args.count_threshold: break fout.write('%s\t%d\n' % (char, count)) print('数据词汇表已生成完成,保存与:%s' % args.vocab_path) print('=' * 70) print('开始抽取%s条数据计算均值和标准值...' % args.num_samples) compute_mean_std(args.manifest_paths, args.num_samples, args.output_path) print('=' * 70)
def main(): print_arguments(args) paddle.init(use_gpu=args.use_gpu, rnn_use_batch=True, trainer_count=args.trainer_count) indir = os.path.dirname(args.inputindex) outdir = os.path.dirname(args.outputindex) try: os.makedirs(indir) except OSError: pass os.system('touch {}'.format(args.inputindex)) try: os.makedirs(outdir) except OSError: pass # generate manifest using input wav files with codecs.open(args.outputindex, 'w', 'utf-8') as outfp: manifests = iter_manifests(args.inputindex) for (audioname, manifest_path, result_transcript, duration, offset) in decode_all(manifests): # save decoding result_filename = '{}.result.json'.format(audioname) save_decode_result( result_transcript, os.path.join(outdir, result_filename)) outfp.write('{} {} {}\n'.format(result_filename, duration, offset)) print("decoding result saved: {}.".format(result_filename)) outfp.write('__done__ -1 -1\n')
def __init__(self, num_processes_beam_search=1): self.num_processes_beam_search = num_processes_beam_search print_arguments(args) if args.use_gpu: self.place = fluid.CUDAPlace(0) else: self.place = fluid.CPUPlace() self._start_server()
def infer_interface(audiolistfile, filenum): args.infer_manifest = audiolistfile print_arguments(args) paddle.init(use_gpu=args.use_gpu, rnn_use_batch=True, trainer_count=args.trainer_count) result = infer(filenum) return result
def main(): print_arguments(args) counter = Counter() for manifest_path in args.manifest_paths: count_manifest(counter, manifest_path) count_sorted = sorted(counter.items(), key=lambda x: x[1], reverse=True) with codecs.open(args.vocab_path, 'w', 'utf-8') as fout: for char, count in count_sorted: if count < args.count_threshold: break fout.write(char + '\n')
def main(): print_arguments(args) audio_featurizer = AudioFeaturizer(specgram_type=args.specgram_type) def augment_and_featurize(audio_segment): return audio_featurizer.featurize(audio_segment) normalizer = FeatureNormalizer( mean_std_filepath=None, manifest_path=args.manifest_path, featurize_func=augment_and_featurize, num_samples=args.num_samples) normalizer.write_to_file(args.output_path)
def main(): args = parser.parse_args() # this distributed benchmark code can only support gpu environment. assert args.use_gpu, "only for gpu implementation." if args.use_dgc: if args.fuse: print("Warning: Use dgc must close fuse for now, so code will set fuse=False") args.fuse = False if args.fp16: print("Warning: DGC unsupport fp16 for now, so code will set fp16=False") args.fp16 = False print_arguments(args) print_paddle_environments() check_gpu(args.use_gpu) train(args)
def main(): print_arguments(args) counter = Counter() # 获取全部数据列表 manifest_paths = [path for path in args.manifest_paths.split(',')] # 获取全部数据列表中的标签字符 for manifest_path in manifest_paths: count_manifest(counter, manifest_path) # 为每一个字符都生成一个ID count_sorted = sorted(counter.items(), key=lambda x: x[1], reverse=True) with codecs.open(args.vocab_path, 'w', 'utf-8') as fout: for char, count in count_sorted: # 跳过指定的字符阈值,超过这大小的字符都忽略 if count < args.count_threshold: break fout.write(char + '\n')
def main(): print_arguments(args) augmentation_pipeline = AugmentationPipeline('{}') audio_featurizer = AudioFeaturizer(specgram_type=args.specgram_type) def augment_and_featurize(audio_segment): augmentation_pipeline.transform_audio(audio_segment) return audio_featurizer.featurize(audio_segment) # 随机取指定的数量计算平均值归一化 normalizer = FeatureNormalizer(mean_std_filepath=None, manifest_path=args.manifest_path, featurize_func=augment_and_featurize, num_samples=args.num_samples) # 将计算的结果保存的文件中 normalizer.write_to_file(args.output_path)
def main(audio_path=''): global ds2_model global data_generator global vocab_list print_arguments(args) args.audio_path = audio_path if not ds2_model: print("Model loading initiated ...") ds2_model, data_generator, vocab_list = load_model() print("Model loaded Successfully ...") tic = time.time() result_transcripts = infer(ds2_model, data_generator, vocab_list) toc = time.time() print("{} sec required for audio file trnascription".format(toc-tic)) print("result transcripts: ", result_transcripts) else: tic = time.time() result_transcripts = infer(ds2_model, data_generator, vocab_list) toc = time.time() print("{} sec required for audio file trnascription".format(toc-tic)) print("result transcripts: ", result_transcripts) return result_transcripts
def main(): print_arguments(args) tune()
def main(): print_arguments(args) train()
def main(): print_arguments(args) evaluate()
import numpy as np import paddle import pyaudio from utils.reader import load_audio from utils.utility import add_arguments, print_arguments parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) add_arg('input_shape', str, '(1, 257, 257)', '数据输入的形状') add_arg('threshold', float, 0.7, '判断是否为同一个人的阈值') add_arg('model_path', str, 'models/infer/model', '预测模型的路径') args = parser.parse_args() print_arguments(args) model = paddle.jit.load(args.model_path) model.eval() person_feature = [] person_name = [] def infer(audio_path): input_shape = eval(args.input_shape) data = load_audio(audio_path, mode='infer', spec_len=input_shape[2]) data = data[np.newaxis, :] data = paddle.to_tensor(data, dtype='float32') # 执行预测 feature = model(data)
def main(): print_arguments(args) create_manifest(annotation_path=args.annotation_path, manifest_path_prefix=args.manifest_prefix)
def main(): print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=args.trainer_count) tune()
def main(): print_arguments(args) paddle.init(use_gpu=args.use_gpu, rnn_use_batch=True, trainer_count=args.trainer_count) infer()
def main(): args = parser.parse_args() print_arguments(args) infer(args)
def main(): print_arguments(args) start = time.time() text = predict(filename=args.wav_path) print("消耗时间:%d, 识别结果: %s" % (round((time.time() - start) * 1000), text))
def main(): print_arguments(args) infer()
def main(): args = parser.parse_args() print_arguments(args) eval(args)
def main(): print_arguments(args) infer('transcript')
def main(): print_arguments(args) text = predict(filename=args.wav_path) print("Predict result text: %s" % text.encode('utf-8'))
def main(): print_arguments(args) paddle.init(use_gpu=args.use_gpu, trainer_count=1) start_server()
def main(): print_arguments(args) start_server()
def main(): args = parser.parse_args() print_arguments(args) check_gpu(args.use_gpu) train(args)