def allosaurus_featurize(wavfile): # load your model model = read_recognizer() labels = [ 'I', 'a', 'aː', 'ã', 'ă', 'b', 'bʲ', 'bʲj', 'bʷ', 'bʼ', 'bː', 'b̞', 'b̤', 'b̥', 'c', 'd', 'dʒ', 'dʲ', 'dː', 'd̚', 'd̥', 'd̪', 'd̯', 'd͡z', 'd͡ʑ', 'd͡ʒ', 'd͡ʒː', 'd͡ʒ̤', 'e', 'eː', 'e̞', 'f', 'fʲ', 'fʷ', 'fː', 'g', 'gʲ', 'gʲj', 'gʷ', 'gː', 'h', 'hʷ', 'i', 'ij', 'iː', 'i̞', 'i̥', 'i̯', 'j', 'k', 'kx', 'kʰ', 'kʲ', 'kʲj', 'kʷ', 'kʷʼ', 'kʼ', 'kː', 'k̟ʲ', 'k̟̚', 'k͡p̚', 'l', 'lʲ', 'lː', 'l̪', 'm', 'mʲ', 'mʲj', 'mʷ', 'mː', 'n', 'nj', 'nʲ', 'nː', 'n̪', 'n̺', 'o', 'oː', 'o̞', 'o̥', 'p', 'pf', 'pʰ', 'pʲ', 'pʲj', 'pʷ', 'pʷʼ', 'pʼ', 'pː', 'p̚', 'q', 'r', 'rː', 's', 'sʲ', 'sʼ', 'sː', 's̪', 't', 'ts', 'tsʰ', 'tɕ', 'tɕʰ', 'tʂ', 'tʂʰ', 'tʃ', 'tʰ', 'tʲ', 'tʷʼ', 'tʼ', 'tː', 't̚', 't̪', 't̪ʰ', 't̪̚', 't͡s', 't͡sʼ', 't͡ɕ', 't͡ɬ', 't͡ʃ', 't͡ʃʲ', 't͡ʃʼ', 't͡ʃː', 'u', 'uə', 'uː', 'u͡w', 'v', 'vʲ', 'vʷ', 'vː', 'v̞', 'v̞ʲ', 'w', 'x', 'x̟ʲ', 'y', 'z', 'zj', 'zʲ', 'z̪', 'ä', 'æ', 'ç', 'çj', 'ð', 'ø', 'ŋ', 'ŋ̟', 'ŋ͡m', 'œ', 'œ̃', 'ɐ', 'ɐ̞', 'ɑ', 'ɑ̱', 'ɒ', 'ɓ', 'ɔ', 'ɔ̃', 'ɕ', 'ɕː', 'ɖ̤', 'ɗ', 'ə', 'ɛ', 'ɛ̃', 'ɟ', 'ɡ', 'ɡʲ', 'ɡ̤', 'ɡ̥', 'ɣ', 'ɣj', 'ɤ', 'ɤɐ̞', 'ɤ̆', 'ɥ', 'ɦ', 'ɨ', 'ɪ', 'ɫ', 'ɯ', 'ɯ̟', 'ɯ̥', 'ɰ', 'ɱ', 'ɲ', 'ɳ', 'ɴ', 'ɵ', 'ɸ', 'ɹ', 'ɹ̩', 'ɻ', 'ɻ̩', 'ɽ', 'ɾ', 'ɾj', 'ɾʲ', 'ɾ̠', 'ʀ', 'ʁ', 'ʁ̝', 'ʂ', 'ʃ', 'ʃʲː', 'ʃ͡ɣ', 'ʈ', 'ʉ̞', 'ʊ', 'ʋ', 'ʋʲ', 'ʌ', 'ʎ', 'ʏ', 'ʐ', 'ʑ', 'ʒ', 'ʒ͡ɣ', 'ʔ', 'ʝ', 'ː', 'β', 'β̞', 'θ', 'χ', 'ә', 'ḁ' ] phone_transcript = model.recognize(wavfile).split() print(phone_transcript) features = list() for i in range(len(labels)): features.append(phone_transcript.count(labels[i])) return features, labels
help='specify your input wav file') args = parser.parse_args() # check file format assert args.input.endswith( '.wav' ), " Error: Please use a wav file. other audio files can be converted to wav by sox" # download specified model automatically if no model exists if len(get_all_models()) == 0: download_model('latest') # resolve model's name model_name = resolve_model_name(args.model) if model_name == "none": print( "Model ", model_name, " does not exist. Please download this model or use an existing model in list_model" ) exit(0) args.model = model_name # create recognizer recognizer = read_recognizer(args) # run inference phones = recognizer.recognize(args.input, args.lang) print(phones)
def test_latest_nonempty(self): audio_file = Path(__file__).parent.parent / 'sample.wav' model = read_recognizer('latest') results = model.recognize(audio_file) self.assertTrue(len(results) > 0)
def test_eng_nonempty_external_model(self): audio_file = Path(__file__).parent.parent / 'sample.wav' model_path = Path(__file__).parent.parent / 'test_model' model = read_recognizer('latest', model_path) results = model.recognize(audio_file, 'eng') self.assertTrue(len(results) > 0)
import json import pickle import subprocess from allosaurus.app import read_recognizer model = read_recognizer() folders = ['map', 'music', 'video', 'weather'] sets = ['train', 'development'] def convert(folder, set): all_phones = {} # get IDs from training set, and find matching recording and translate with open(f'../data/{folder}/{set}.json', 'r', encoding='utf-8') as f: entries = json.load(f) for entry in entries: dlg_id = entry['dlg_id'] for utterence in entry['utterances']: wav_id = utterence['wav_id'] try: transcript = model.recognize( f'../data/{folder}/audios/{wav_id}.wav') except Exception: print('Problematic file:', wav_id) continue phones_list = [phone for phone in transcript.split()] all_phones[wav_id] = phones_list with open(f'../data/{folder}/{set}_transcript.pkl', 'wb') as ff: pickle.dump(all_phones, ff)