コード例 #1
0
ファイル: translate.py プロジェクト: sgmenda/Inglish-Meker
def convert(i):
    word = ""
    if ipa.isin_cmu(i):
        word = ipa.convert(i, stress_marks="primary")

    if i in mapEnglishToAnglish:
        if ipa.isin_cmu(mapEnglishToAnglish[i]):
            word = ipa.convert(mapEnglishToAnglish[i], stress_marks="primary")

    if word == "":
        return []

    word = word.replace("ɑ", "aa")
    word = word.replace("æ", "a")
    word = word.replace("mən", "mon")
    word = word.replace("ə", "e")
    word = word.replace("ˈ", "")
    word = word.replace("ɛ", "e")
    word = word.replace("ɪ", "i")
    word = word.replace("ɔ", "o")
    word = word.replace("ʊ", "u")
    word = word.replace("ʧ", "ch")
    word = word.replace("ŋ", "ng")
    word = word.replace("ʃ", "sh")
    word = word.replace("j", "y")
    word = word.replace("ʤ", "j")
    word = word.replace("ʒ", "j")  # z
    word = word.replace("θ", "þ")

    return [(i, word)]
コード例 #2
0
ファイル: englishtopinyin.py プロジェクト: olj1/cedictionary
def convert_english_to_ipa(english_string):
    if ' ' in english_string:
        eng_to_ipa_string = ipa.convert(english_string)
        english_to_ipa_split = eng_to_ipa_string.split()
        return english_to_ipa_split.split(' ')
    else:
        english_string = ipa.convert(english_string)
        return english_string
コード例 #3
0
def pigify(word):
    if word in TRANSFORMS:
        return ipa.convert(TRANSFORMS[word])
    if word in SKIPS:
        return ipa.convert(word)
    elif word in CUSTOM_CONVERSIONS:
        return CUSTOM_CONVERSIONS[word]
    else:
        #if word[-2:] == "'s":
        #    word = word[:-2]
        #    suffix = "eɪz"
        #else:
        suffix = "eɪ"
        word_parts = consonant_cluster(ipa.convert(word))
        return word_parts[1] + word_parts[0] + suffix
コード例 #4
0
ファイル: views.py プロジェクト: gadola/quiz
def taoipa(request):
    quizs = Quizziz.objects.all()
    for quiz in quizs:
        phienam = '/' + ipa.convert(quiz.word) + '/'
        quiz.ipa = phienam
        quiz.save()
    return redirect('/')
コード例 #5
0
def write_wordlist_ipa(overwrite=True):
    # min/max count of numbers
    min_length = 2
    max_length = 5

    words = get_word_list()
    words = random.sample(words, 100)
    major_dict = {}
    with tqdm(words) as tqdm_it:
        for n, word in enumerate(tqdm_it):
            ipa = convert(word)
            nums = word_to_nums(ipa)
            # don't save this value if too many/few numbers
            if '*' not in ipa and min_length <= len(nums) <= max_length:
                major_dict[word] = nums

    # save the dictionary to a json file
    out_fname = 'wordlist_major.json'
    with open(out_fname, 'r', encoding='utf-8') as outfile:
        old_dict = json.load(outfile)
        old_dict.update(major_dict)

    if overwrite:
        old_dict = major_dict

    with open(out_fname, 'w', encoding='utf-8') as outfile:
        json.dump(old_dict,
                  outfile,
                  indent=4,
                  sort_keys=True,
                  ensure_ascii=False)
コード例 #6
0
def draw_word(text, main_color_rgb):
    SPACE = 40
    phonics = '/{}/'.format(ipa.convert(text))

    word_font = ImageFont.truetype('Myriad Pro Bold.ttf', 70)
    word_size = word_font.getsize(text)

    phonics_font = ImageFont.truetype('CALIBRI.TTF', 50)
    phonics_size = phonics_font.getsize(phonics)

    size = (max(word_size[0],
                phonics_size[0]), word_size[1] + phonics_size[1] + SPACE)

    img = Image.new('RGBA', size, (255, 255, 255, 255))
    draw = ImageDraw.Draw(img, 'RGBA')

    draw.text((int((size[0] - word_size[0]) / 2), 0),
              text=text,
              fill=main_color_rgb,
              font=word_font,
              stroke_width=2,
              stroke_fill='black')
    draw.text((int(
        (size[0] - phonics_size[0]) / 2), int(
            (size[1] - word_size[1]) / 2) + SPACE),
              text=phonics,
              fill=main_color_rgb,
              font=phonics_font,
              stroke_width=1,
              stroke_fill='black')

    #im_pil.save(img_path, image.format)
    #img.show()
    return img
コード例 #7
0
def ssmlify(sentence):
    res = '<speak><prosody rate="slow">\n'
    swear_data = """<phoneme alphabet="ipa" ph="%s"/>
<say-as interpret-as="expletive">%s</say-as>
<phoneme alphabet="ipa" ph="%s"/>\n"""
    data = '<phoneme alphabet="ipa" ph="%s">%s</phoneme>'
    data_original = '<phoneme alphabet="ipa" ph="%s"/>%s'
    spoonerism = spoonerify(sentence)
    spoonerism_ipa = spoonerify(ipa.convert(sentence))
    splitted = spoonerism.split()
    splitted_ipa = spoonerism_ipa.split()
    broken = False
    for (i, word) in enumerate(splitted_ipa):
        original = splitted[i]
        if original.lower() in SWEARS:
            res += swear_data % (consonant_cluster(word)[0], original[:-2], word[-1])
            continue
        if not (i < len(splitted) - 1 and i > 0):
            if word[-1] != '*':
                res += data % (word, original)
            else:
                word = word[:-1]
                res += data_original % consonant_cluster(word)
        else:
            res += original
        if i < len(splitted) - 1:
            res += '<break strength="x-weak"/>'
        res += '\n'
    res += '</prosody></speak>\n'
    return res
コード例 #8
0
def list_phonetic_rhymes(word):
    list_phonetic = []
    for word in pronouncing.rhymes(word):
        phonetic = p.convert(word)
        if "*" not in phonetic:
            list_phonetic.append(phonetic)
    return list_phonetic
コード例 #9
0
def upload():
    txt = ""
    _ipa = ""

    dir = 'C:/Users/raymondzhao/myproject/dev.speech/speech/audio/'
    #file = dir + 'recording.wav'
    file = dir + 'english81.wav'

    exists = os.path.isfile(file)

    if exists:
        """
        f = open(file, "wb")
        # the actual file is in request.body
        f.write(request.data)
        f.close()

        demo = sr.AudioFile(file)
        """
        demo = file

        txt = get_post(demo)
        _ipa = ipa.convert(txt)
    
    else:
        print("No file")

    return render_template('ispeech/record.html', posts=txt, _ipa=_ipa)
コード例 #10
0
def get_caption():
    global current_result
    if current_result > 0:
        return ipa.convert(
            open(text_output_dir + "/caption-" + str(current_result) +
                 ".txt").readline())
    return ""
コード例 #11
0
def phonetic_difficulty(word):
    word = clean(word)
    word_ipa = ipa.convert(word)
    word_ipa = clean(word_ipa)

    vowels = 'iɪeɛæɑouʊʌə'
    dorsals = 'kŋgxw'
    fri_aff_liq = 'rfvszxhθðʃʒlʧʤ'
    rhotic = 'r'

    phon = 0
    i = 0
    for i in range(len(word_ipa)):
        if word_ipa[i] in dorsals:
            phon += 1
        if word_ipa[i] in fri_aff_liq:
            phon += 1
        if word_ipa[i] in rhotic:
            phon += 1
    if word_ipa[len(word_ipa) - 1] not in vowels:
        phon += 1
    if ipa.syllable_count(word) >= 3:
        phon += 1
    if check_clusters(word_ipa) == True:
        phon += 1
    if homorganic(word_ipa) == False:
        phon += 1
    return phon
コード例 #12
0
def rec():
    global word
    global word_list

    while (True):
        try:
            conn = sqlite3.connect('words.db', check_same_thread=False)
            r = sr.Recognizer()
            r.energy_threshold = 3000
            mic = sr.Microphone(device_index=0)
            with mic as source:
                print("listening...")
                r.adjust_for_ambient_noise(source)
                audio = r.listen(source)

            speech = r.recognize_google(audio)

            text = ipa.convert(speech)
            if (text != word):
                word = text
                conn.execute("INSERT INTO words (word, ipa) VALUES(?,?)",
                             (text, speech))
                conn.commit()
                conn.close()
            else:
                word = '?'
        except sr.UnknownValueError:
            word = '?'
コード例 #13
0
def convert_to_phonetic(lines):
    phon_lines = []
    for line in lines:
        # Strip punctuation.
        line = eng_to_ipa.convert(line, keep_punct=False)
        # line = line.translate(str.maketrans('', '', string.punctuation))
        phon_lines.append(line)
    return phon_lines
コード例 #14
0
ファイル: utils.py プロジェクト: samurzele/tacotron2
def convert_to_ipa(texts):
    epi = epitran.Epitran('eng-Latn')
    for text_mel_pair in texts:
        text_mel_pair[1] = ipa.convert(text_mel_pair[1])
        foreign_words = re.findall(r"[^ ]{0,}\*", text_mel_pair[1])
        for word in foreign_words:
            text_mel_pair[1] = text_mel_pair[1].replace(
                word, epi.transliterate(word[0:len(word) - 1]))
コード例 #15
0
def str_to_phonetics(texts, *params):
    result = []
    for text in texts:
        text = str(text)
        # result.append(" ".join([ipa.convert(word) for word in words]))
        result.append(ipa.convert(text))

    return result
コード例 #16
0
    def transcribe(self,
                   word,
                   special=False,
                   phonetic=False,
                   derivative=False,
                   debug=False) -> str:

        pre, pure_word, post = '', '', ''
        for l in word:
            if (ord(l) >= 0x41 and ord(l) <= 0x5A) or (
                    ord(l) >= 0x61 and ord(l) <= 0x7A) or (ord(l) == 0x27):
                pure_word += l
            elif special:
                if l == word[0]:
                    pre = l
                elif l == word[-1]:
                    post = l

        # 발음 구하기
        self.phonetic = eng_to_ipa.convert(pure_word)
        if '*' in self.phonetic:
            return word

        if self.errata.get(pure_word):
            self.phonetic = self.errata.get(pure_word)

        # 음절 나누기
        self.syllabify()
        # for bug tracking
        if debug:
            print(self.phonetic, self.phonetic_syllabic, self.CV)

        # 한글로 치환
        self.transcribed = ''
        for i in self.phonetic_syllabic:
            if i == '/':
                self.transcribed += i
            else:
                if self.consonants.get(i):
                    self.transcribed += self.consonants.get(i)
                elif self.vowels.get(i):
                    self.transcribed += self.vowels.get(i)

        # 음운 변화
        self.phonetic_change()
        hangul = ''
        # 첫가끝 결합
        for i in self.transcribed.split('/'):
            hangul += self.compose(i)

        if derivative:
            output = '[{}]<{}>{}'.format(self.phonetic, self.phonetic_syllabic,
                                         hangul)
        elif phonetic:
            output = '[{}]{}'.format(self.phonetic, hangul)
        else:
            output = pre + hangul + post
        return output
コード例 #17
0
def check_clusters(word):
    word = ipa.convert(word)
    pattern = '[^iɪeɛæɑouʊʌə\d\W]{2,}'
    match = re.search(pattern, word)
    if match:
        is_there_cluster = True
    else:
        is_there_cluster = False
    return is_there_cluster
コード例 #18
0
def convert_to_ipa(texts):
    print("Converting training files to IPA notation...")
    epi = epitran.Epitran('eng-Latn', ligatures=True)
    for text_mel_pair in texts:
        text_mel_pair[1] = ipa.convert(english_cleaners(text_mel_pair[1]))
        foreign_words = re.findall(r"[^ ]{0,}\*", text_mel_pair[1])
        for word in foreign_words:
            text_mel_pair[1] = text_mel_pair[1].replace(
                word, epi.transliterate(word[0:len(word) - 1]))
コード例 #19
0
def columnize(df,golden,response):
    df[response] = [i.replace('?', '') for i in df[response]]
    df[response] = [i.replace('x', '').lower() for i in df[response]]
    # Raw Match
    df["match.correct"] = np.where(df[response] == df[golden], 1, 0)
    # Tokenize
    df['token.original'] = [nltk.word_tokenize(i) for i in df[golden]]
    df['token.response'] = [nltk.word_tokenize(i) for i in df[response]]
    # Lemmatize
    df['stem.original'] = [snowball_stemmer.stem(i) for i in df[golden]]
    df['stem.response'] = [snowball_stemmer.stem(i) for i in df[response]]
    # Clean
    df[response] = [i.replace('emare', "i'mer") for i in df[response]]
    # IPA columns
    df['trans.original'] = df[golden].map(lambda x: ipa.convert(x))
    df['trans.response'] = df[response].map(lambda x: ipa.convert(x))
    df["trans.response"] = [i.replace('*', '') for i in df["trans.response"]]
    return df
コード例 #20
0
def make_line(tag_number, ru, en):
    samples = ''
    try:
        samples = get_samples(en)
    except requests.exceptions.HTTPError:
        print('Проблема с полючением примеров.')
        samples = ''

    return ['0%', f'Слова {tag_number}', en, ipa.convert(en), ru, '', samples]
コード例 #21
0
def generate_from_file(tacotron2_path, waveglow_path, text_file, output_directory):

  # Make synthesis paths

  if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print("Creating directory " + output_directory + "...")

  hparams = create_hparams()
  hparams.sampling_rate = 22050

  print("Loading models...")
  model = load_model(hparams)
  model.load_state_dict(torch.load(tacotron2_path)['state_dict'])
  _ = model.cuda().eval().half()

  waveglow = torch.load(waveglow_path)['model']
  waveglow.cuda().eval().half()
  for k in waveglow.convinv:
      k.float()
  denoiser = Denoiser(waveglow)

  genlist = []
  with open(text_file) as file:
    for line in file:
      genlist.append(line.strip())

  for entry in genlist:
    wav_name = "_".join(entry.split(" ")[:4]).lower() + ".wav"

    epi = epitran.Epitran('eng-Latn', ligatures = True)
    if hparams.preprocessing == "ipa":
      entry = ipa.convert(english_cleaners(entry))
      foreign_words = re.findall(r"[^ ]{0,}\*", entry)
      for word in foreign_words:
        entry = entry.replace(word, epi.transliterate(word[0:len(word)-1]))
    if hparams.preprocessing == "arpabet":
      entry = make_arpabet(entry)

    # Text sequencer
    if hparams.preprocessing is not None:
      sequence = np.array(text_to_sequence(entry, None))[None, :]
    else:
      sequence = np.array(text_to_sequence(entry, ['english_cleaners']))[None, :]
    sequence = torch.autograd.Variable(
      torch.from_numpy(sequence)).cuda().long()

    # Synthesis
    mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
    with torch.no_grad():
      audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)
    audio_denoised = denoiser(audio, strength=0.01)[:, 0]

    # Save audio
    print ("Saving " + wav_name)
    write(os.path.join(output_directory, wav_name), hparams.sampling_rate, audio_denoised[0].data.cpu().numpy())
コード例 #22
0
def word_to_number(word: str) -> str:
    letters = major.keys()
    pattern = re.compile(rf"([{''.join(letters)}])\1*")
    converted = ipa.convert(word)
    groups = pattern.findall(converted)
    logging.debug(f'{word} -> {converted} -> {groups}')
    n = ''.join([str(major[_]) for _ in groups])
    if converted.endswith('*'):
        n += '*'
    return n
コード例 #23
0
def get_word_IPA(file):
    f = open("word_ipa_dict.pkl", "wb")
    word_list = [line.strip() for line in open(file)]
    #print(word_list)
    WORD_IPA_DICT = dict()
    for word in word_list:
        if (ipa.isin_cmu(word) == True):
            WORD_IPA_DICT[word] = ipa.convert(word)
    pickle.dump(WORD_IPA_DICT, f)
    f.close()
コード例 #24
0
def getRow(word):
    global TARGET_LANGUAGE
    try:
        transcrypt = ipa.convert(word)
        try:
            translator = google_translator()
            translation = translator.translate(word, lang_tgt=TARGET_LANGUAGE)
        except:
            try:
                translation = translators.deepl(word,
                                                from_language='en',
                                                to_language=TARGET_LANGUAGE)
            except:
                try:
                    translation = translators.bing(word,
                                                   from_language='en',
                                                   to_language=TARGET_LANGUAGE)
                except:
                    try:
                        translation = translators.baidu(
                            word,
                            from_language='en',
                            to_language=TARGET_LANGUAGE)
                    except:
                        translation = 'not available'
        try:
            r = requests.post(
                "https://www.wordhippo.com/what-is/process-form.html",
                data={
                    'word': word,
                    'action': 'Sentences'
                })
            soup = BS(r.text, features="html.parser")
            sentence = soup.findAll(
                'tr', {'id': 'gexv2row1'})[0].findAll('td')[0].text
        except:
            try:
                r = urllib.request.urlopen(
                    'https://sentence.yourdictionary.com/' +
                    word).read().decode("utf8")
                soup = BS(r, features="html.parser")
                sentence = soup.findAll('div',
                                        {'class': 'sentence-item'})[0].text
            except:
                sentence = 'not available'
        # i += 1
        # printProgressBar(i, number_of_words, prefix='Progress:', suffix='Complete', length=50)
        return {
            'word': word,
            'transcrypt': transcrypt,
            'translation': translation,
            'sentence': sentence
        }
    except:
        pass
コード例 #25
0
def populate_ipa_dict_from_text(text):
    """Get all IPA information from eng_to_ipa and save to ipa_dict."""
    ipa_dict = load_ipa_dict()
    words = preprocess(text).split()
    for word in set(words) - set(ipa_dict.keys()):
        ipa = eng_to_ipa.convert(word,
                                 retrieve_all=True,
                                 keep_punct=False,
                                 stress_marks=False)
        ipa_dict[word] = ipa
    save_ipa_dict(ipa_dict)
コード例 #26
0
 def name2ipa(self, name_dict):
     '''
     espeak transcribes names into ipa_chars
     this takes a while
     '''
     start = time.time()
     name_df = pd.DataFrame.from_dict(name_dict,
                                      orient="index",
                                      columns=["name", "sex"])
     name_df["ipa"] = name_df["name"].apply(lambda x: ipa.convert(x))
     print("{} hours".format((time.time() - start) / 3600.))
     return name_df
コード例 #27
0
def homorganic(word):
    dor_lab_cor = 'kŋgxwpbfvmwtdsznlʧʤrj'
    word = ipa.convert(word)
    print(word)
    i = 0
    for i in range(len(word) - 1):
        if word[i] in dor_lab_cor and word[i + 1] in dor_lab_cor:
            homorganic = True
            break
        else:
            homorganic = False
    return homorganic
コード例 #28
0
def phonetics(textfile):
    file_phonetics = open(textfile[:-4] + "_phonetics.txt",
                          "w",
                          encoding="utf-8")
    with open(textfile, 'r', encoding="utf-8") as file:
        lines = file.readlines()

    for line in lines:
        file_phonetics.write(p.convert(line) + "\n")
    file_phonetics.close()

    print(lines)
コード例 #29
0
def new(input):

    if len(input) == 0:
        return input
    #print(input)

    #convert word to IPA
    input = input.lower()
    trans = ipa.convert(input)
    if trans[len(trans) - 1] == '*':
        print(trans)
        print("error, no transcription")
        return input

    #print(trans)
    trans = trans.replace("ˈ", "")
    trans = trans.replace("ˌ", "")
    trans = trans.replace("iɛ", "ie")
    trans = trans.replace("iɪ", "i")
    trans = trans.replace("æŋ", "en")
    trans = trans.replace("ŋ", "ng")
    trans = trans.replace("ð", "th")
    trans = trans.replace("θ", "th")
    trans = trans.replace("ʃ", "sh")
    trans = trans.replace("ngg", "ng")
    trans = trans.replace("ngk", "nk")
    trans = trans.replace("oʊ", "1")
    trans = trans.replace("aɪ", "2")
    trans = trans.replace("aʊ", "3")

    #step 1 generate possible spellings
    ar1 = []
    poss(input, ar1, trans, '', 0)

    #step 2 add possibilites associated with g(ei) and c(ei)
    ar2 = []
    for el in ar1:
        #print(el)
        filter(input, ar2, el, '', 0)

    #rank the possible spellings based on their edit distance from
    #the input word. Uses modified damerau-levenshtein algorithm
    best = 1000
    out = ''
    input = input[0] + input[1:-1].replace('y', 'i') + input[-1:]
    for el in ar2:
        score = rank.dLev(el, input)
        #print(el, "\t", score)
        if score < best:
            out = el
            best = score
    return out
コード例 #30
0
ファイル: english.py プロジェクト: keelimeguy/MusicMakerPy
    def convert_section(cls, line: str, conversion_key: str) -> str:
        if not conversion_key or conversion_key == '[DEFAULT]':
            pass

        elif conversion_key == '[TO_IPA]':
            line = cls._prepare_ipa(line)
            line = ipa.convert(line, keep_punct=False)
            line = cls._clean_ipa(line)

        else:
            logger.info(f"Unknown conversion_key: {conversion_key}")

        return line
# https://github.com/mphilli/English-to-IPA
# python3 setup.py install
# python3 words_with_frequency_and_translation_and_ipa.py

import json
import eng_to_ipa as ipa

with open('words_with_frequency_and_translation.json', 'r') as f:
    data = json.load(f)

for key, val in data.items():
    phonetic_symbol = ipa.convert(key,
                                  keep_punct=False,
                                  retrieve_all=False,
                                  stress_marks="primary")
    if phonetic_symbol:
        val["ipa"] = phonetic_symbol

file = open("words_with_frequency_and_translation_and_ipa.json", "w")
file.write(json.dumps(data, ensure_ascii=False))
file.close()