def read_recognizer(inference_config_or_name='latest'): # download specified model automatically if no model exists if len(get_all_models()) == 0: download_model('latest') # create default config if input is the model's name if isinstance(inference_config_or_name, str): model_name = resolve_model_name(inference_config_or_name) inference_config = Namespace(model=model_name, device_id=-1, lang='ipa', approximate=False) else: assert isinstance(inference_config_or_name, Namespace) inference_config = inference_config_or_name model_path = Path(__file__).parent / 'pretrained' / inference_config.model if inference_config.model == 'latest' and not model_path.exists(): download_model(inference_config) assert model_path.exists(), f"{inference_config.model} is not a valid model" # create pm (pm stands for preprocess model: audio -> feature etc..) pm = read_pm(model_path, inference_config) # create am (acoustic model: feature -> logits ) am = read_am(model_path, inference_config) # create lm (language model: logits -> phone) lm = read_lm(model_path, inference_config) return Recognizer(pm, am, lm, inference_config)
parser = argparse.ArgumentParser('Allosaurus phone recognizer') parser.add_argument('-d', '--device_id', type=int, default=-1, help='specify cuda device id to use, -1 means no cuda and will use cpu for inference') parser.add_argument('-m', '--model', type=str, default='latest', help='specify which model to use. default is to use the latest local model') parser.add_argument('-l', '--lang', type=str, default='ipa',help='specify which language inventory to use for recognition. default is to use all phone inventory') parser.add_argument('-i', '--input', type=str, required=True, help='specify your input wav file') parser.add_argument('-a', '--approximate', type=bool, default=False, help='the phone inventory can still hardly to cover all phones. You can use turn on this flag to map missing phones to other similar phones to recognize. The similarity is measured with phonological features') args = parser.parse_args() # check file format assert args.input.endswith('.wav'), " Error: Please use a wav file. other audio files can be converted to wav by sox" # download specified model automatically if no model exists if len(get_all_models()) == 0: download_model('latest') # resolve model's name model_name = resolve_model_name(args.model) if model_name == "none": print("Model ", model_name, " does not exist. Please download this model or use an existing model in list_model") exit(0) args.model = model_name # create recognizer recognizer = read_recognizer(args) # run inference phones = recognizer.recognize(args.input, args.lang) print(phones)