Example #1
0
def main():
    print_arguments(args)
    print('开始生成数据列表...')
    create_manifest(annotation_path=args.annotation_path,
                    manifest_path_prefix=args.manifest_prefix)

    print('开始生成数据字典...')
    counter = Counter()
    count_manifest(counter, args.manifest_path)

    count_sorted = sorted(counter.items(), key=lambda x: x[1], reverse=True)
    with open(args.vocab_path, 'w', encoding='utf-8') as fout:
        labels = ['?']
        for char, count in count_sorted:
            if count < args.count_threshold: break
            labels.append(char)
        fout.write(str(labels).replace("'", '"'))
    print('数据字典生成完成!')

    print('开始抽取%s条数据计算均值和标准值...' % args.num_samples)
    compute_mean_std(args.manifest_path, args.num_samples, args.output_path)
Example #2
0
def main():
    print_arguments(args)
    # 创建保存模型的文件夹
    if not os.path.exists(args.save_model_path):
        os.makedirs(args.save_model_path)
    # 加载数据字典
    with open(args.vocab_path, 'r', encoding='utf-8') as f:
        vocabulary = eval(f.read())
        vocabulary = "".join(vocabulary)
    # 获取模型
    model = GatedConv(vocabulary)
    # 加载预训练模型
    if args.restore_model:
        model = torch.load(args.restore_model)
    model = model.cuda()
    train(model=model,
          train_manifest_path=args.train_manifest_path,
          dev_manifest_path=args.dev_manifest_path,
          vocab_path=args.vocab_path,
          epochs=args.epochs,
          batch_size=args.batch_size,
          learning_rate=args.learning_rate)
Example #3
0
                    type=str,
                    help="language model path. (default: %(default)s)")
parser.add_argument("--dev_manifest_path",
                    default="dataset/manifest.dev",
                    type=str,
                    help="train manifest file path. (default: %(default)s)")
parser.add_argument("--vocab_path",
                    default="dataset/zh_vocab.json",
                    type=str,
                    help="vocab file path. (default: %(default)s)")
parser.add_argument("--batch_size",
                    default=64,
                    type=int,
                    help="number for batch size. (default: %(default)s)")
args = parser.parse_args()
print_arguments(args)

alpha = 0.8
beta = 0.3
cutoff_top_n = 40
cutoff_prob = 1.0
beam_width = 32
num_processes = 4
blank_index = 0

model = torch.load(args.model_path)
model = model.cuda()
model.eval()

# 创建解码器
decoder = CTCBeamDecoder(model.vocabulary, args.lm_path, alpha, beta,