def preprocess_arabic(text, preprocess_config, bw = False): text = text.rstrip(punctuation) if bw: text = "".join([bw2ar[l] if l in bw2ar else l for l in text]) phones = '' for word in text.split(' '): if word in punctuation: pass elif len(word.strip()) > 0: phones+=phonetise(word)[0] phones = "{" + "}{".join(phones.split(' ')) + "}" phones = phones.replace("}{", " ") print("Raw Text Sequence: {}".format(text)) print("Phoneme Sequence: {}".format(phones)) sequence = np.array( #TO_DO text_to_sequence( phones, preprocess_config["preprocessing"]["text"]["text_cleaners"] ) ) return np.array(sequence)
def _maybe_get_arpabet(word): pronunciations = phonetise(word) toBeReturned = '{%s}' % pronunciations[0] if len( pronunciations) == 1 else '{%s}' % pronunciations[1] return toBeReturned
def test_2(): actual = phonetise('ثٌمّ') expected = ['^ u0 n mm'] assert actual == expected
def test_1(): actual = phonetise('ثٌمَّ') expected = ['^ u0 n mm a'] assert actual == expected
def test_3(): actual = phonetise('ثكمّ') expected = ['^ k mm'] assert actual == expected