Beispiel #1
0
def preprocess_arabic(text, preprocess_config, bw = False):

    text = text.rstrip(punctuation)
    if bw:
        text = "".join([bw2ar[l] if l in bw2ar else l for l in text])
    phones = ''
    for word in text.split(' '):
        if word in punctuation:
          pass 
        elif len(word.strip()) > 0:
          phones+=phonetise(word)[0]
        
    phones = "{" + "}{".join(phones.split(' ')) + "}"
    phones = phones.replace("}{", " ")

    print("Raw Text Sequence: {}".format(text))
    print("Phoneme Sequence: {}".format(phones))
    sequence = np.array(
        #TO_DO
        text_to_sequence(
            phones, preprocess_config["preprocessing"]["text"]["text_cleaners"]
        )
    )

    return np.array(sequence)
Beispiel #2
0
def _maybe_get_arpabet(word):
    pronunciations = phonetise(word)
    toBeReturned = '{%s}' % pronunciations[0] if len(
        pronunciations) == 1 else '{%s}' % pronunciations[1]
    return toBeReturned
Beispiel #3
0
def test_2():
    actual = phonetise('ثٌمّ')
    expected = ['^ u0 n mm']
    assert actual == expected
Beispiel #4
0
def test_1():
    actual = phonetise('ثٌمَّ')
    expected = ['^ u0 n mm a']
    assert actual == expected
Beispiel #5
0
def test_3():
    actual = phonetise('ثكمّ')
    expected = ['^ k mm']
    assert actual == expected