Esempio n. 1
0
    def google_STT(self,audio):
        client = speech_v1.SpeechClient.from_service_account_json(
            '/data/second-conquest-293723-05738e995f8f.json')

        # Loads the audio into memory
        with io.open(audio, "rb") as audio_file:
            content = audio_file.read()
            audio = speech_v1.RecognitionAudio(content=content)

        encoding = speech_v1.RecognitionConfig.AudioEncoding.ENCODING_UNSPECIFIED

        config = speech_v1.RecognitionConfig(
            encoding=encoding,
            sample_rate_hertz=22050,
            language_code="en-US",
            enable_automatic_punctuation=True,
        )

        # Detects speech in the audio file
        start = time.time()
        response = client.recognize(request={"config": config, "audio": audio})
        text = ''
        for result in response.results:
            text = text + result.alternatives[0].transcript
            text = english_cleaners(text)
        return text
Esempio n. 2
0
def make_arpabet(text):
    # g2p functions
    g2p = G2p()

    # Define punctuation, prevent punctuation curlies, and make replacement dictionary to fix spacing
    punc = "!?,.;:␤#-_'\"()[]\n,."
    punc = list(punc)
    punc_key = list(punc)
    punc_alt = [" " + item for item in punc]
    punc_dict = {}
    for key in punc_alt:
        for value in punc_key:
            punc_dict[key] = value
            punc_key.remove(value)
            break

    # Text processing
    text = " ".join(g2p(english_cleaners(text))).split("  ")
    outlist = []
    for item in text:
        item = item.strip()
        if item not in punc:
            item = "{" + item + "}"
        outlist.append(item)
    text = " ".join(outlist)
    for key, replacement in punc_dict.items():
        text = text.replace(key, replacement)
    return text
Esempio n. 3
0
def convert_to_ipa(texts):
    print("Converting training files to IPA notation...")
    epi = epitran.Epitran('eng-Latn', ligatures=True)
    for text_mel_pair in texts:
        text_mel_pair[1] = ipa.convert(english_cleaners(text_mel_pair[1]))
        foreign_words = re.findall(r"[^ ]{0,}\*", text_mel_pair[1])
        for word in foreign_words:
            text_mel_pair[1] = text_mel_pair[1].replace(
                word, epi.transliterate(word[0:len(word) - 1]))
Esempio n. 4
0
def generate_from_file(tacotron2_path, waveglow_path, text_file, output_directory):

  # Make synthesis paths

  if not os.path.exists(output_directory):
    os.makedirs(output_directory)
    print("Creating directory " + output_directory + "...")

  hparams = create_hparams()
  hparams.sampling_rate = 22050

  print("Loading models...")
  model = load_model(hparams)
  model.load_state_dict(torch.load(tacotron2_path)['state_dict'])
  _ = model.cuda().eval().half()

  waveglow = torch.load(waveglow_path)['model']
  waveglow.cuda().eval().half()
  for k in waveglow.convinv:
      k.float()
  denoiser = Denoiser(waveglow)

  genlist = []
  with open(text_file) as file:
    for line in file:
      genlist.append(line.strip())

  for entry in genlist:
    wav_name = "_".join(entry.split(" ")[:4]).lower() + ".wav"

    epi = epitran.Epitran('eng-Latn', ligatures = True)
    if hparams.preprocessing == "ipa":
      entry = ipa.convert(english_cleaners(entry))
      foreign_words = re.findall(r"[^ ]{0,}\*", entry)
      for word in foreign_words:
        entry = entry.replace(word, epi.transliterate(word[0:len(word)-1]))
    if hparams.preprocessing == "arpabet":
      entry = make_arpabet(entry)

    # Text sequencer
    if hparams.preprocessing is not None:
      sequence = np.array(text_to_sequence(entry, None))[None, :]
    else:
      sequence = np.array(text_to_sequence(entry, ['english_cleaners']))[None, :]
    sequence = torch.autograd.Variable(
      torch.from_numpy(sequence)).cuda().long()

    # Synthesis
    mel_outputs, mel_outputs_postnet, _, alignments = model.inference(sequence)
    with torch.no_grad():
      audio = waveglow.infer(mel_outputs_postnet, sigma=0.666)
    audio_denoised = denoiser(audio, strength=0.01)[:, 0]

    # Save audio
    print ("Saving " + wav_name)
    write(os.path.join(output_directory, wav_name), hparams.sampling_rate, audio_denoised[0].data.cpu().numpy())
Esempio n. 5
0
def get_first_words_idx(script_path):
    with open(script_path) as f:
        text = f.read()
        sents = sent_tokenize(text)
        print('sentences number: ', len(sents))
        text = english_cleaners(text)
        words = text.split(' ')
        first_words_idx = [0]
        # 문장 마지막 단어 인덱스 구하기
        for idx, word in enumerate(words):
            if word.endswith('.') or word.endswith('?') or word.endswith('!'):
                first_words_idx.append(idx + 1)

        return first_words_idx
Esempio n. 6
0
def read_csv(path, fn_encoding='UTF8'):
    # reads csv file into audio snippet name and its transcript
    with open(path, encoding=fn_encoding) as f:
        data = []
        temp = ''  # for storing non-normalized
        for line in f:
            audio_name, audio_transcript, deprecated_1 = line.split('\t')

            replace_audio_name = audio_name.replace("/", "\\")

            audio_transcript = audio_transcript.strip()
            audio_transcript = audio_transcript.replace("\"", "").replace("(", "").replace(")", "")\
                .replace("[", "").replace("]", "")
            audio_normalized_transcript = english_cleaners(audio_transcript)

            data.append(
                Data(replace_audio_name, audio_transcript,
                     audio_normalized_transcript))
        return data
Esempio n. 7
0
    def MFA_file_prep(self, hparams):
        ''' Will Create .lab files for all existing wav files in the hparams.wav_path folder.
    The .lab files will contain the cleaned or normalized text (expanded abbreviations, remove punctuations, expand number, to upper case) from csv_file
    The function will also check if the words in content are there in dict or not
    And will create a dictionary {alien_words: phoneme(alien_words)} using g2p_en
      input - hparams
      output - new_word_dictionary
    '''

        filenames = []
        content = []
        update_words = {}

        with open(hparams.csv_path, encoding='utf-8') as f:
            for lines in f:
                filenames.append(lines.split("|")[0])
                content.append(lines.split('|')[1])
        words = self.load_words_from_dict(hparams)

        for i in range(0, len(filenames)):
            if os.path.exists(f'{hparams.lab_path}/{filenames[i]}.wav'):
                path = os.path.join(hparams.lab_path, filenames[i] + ".lab")
                clean_content = english_cleaners(content[i])
                clean_content = punctuation_removers(
                    clean_content)  # add remove punctuations
                f = open(path, 'w+')
                f.write(clean_content.upper())
                f.close()
                alien = set(clean_content.upper().split()) - set(words)
                alien_update = {i: (g2p(i)) for i in list(alien)}
                update_words = {**update_words, **alien_update}

        if update_words:
            print("update your dictionary using update_dict() of this class")
        else:
            print("No dictionary update required")
        return update_words  #words to update
Esempio n. 8
0
def test_cleaner_pipelines():
    text = 'Mr. Müller ate  2 Apples'
    assert cleaners.english_cleaners(text) == 'mister muller ate two apples'
    assert cleaners.transliteration_cleaners(text) == 'mr. muller ate 2 apples'
    assert cleaners.basic_cleaners(text) == 'mr. müller ate 2 apples'
Esempio n. 9
0
def text_recognition(path, config):
    root, ext = os.path.splitext(path)
    txt_path = root + ".txt"

    if os.path.exists(txt_path):
        with open(txt_path) as f:
            out = json.loads(open(txt_path).read())
            return out

    # if new api account is used, do resetting env file for google credential
    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types

    out = {}
    error_count = 0

    tmp_path = os.path.splitext(path)[0] + ".wav"
    client = speech.SpeechClient()  # Fixed

    while True:
        try:
            # client= speech.SpeechClient() # Causes 10060 max retries exceeded -to OAuth -HK
            content = path[0]

            with io.open(tmp_path, 'rb') as f:
                audio = types.RecognitionAudio(content=f.read())

            config = types.RecognitionConfig(
                encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=16000,
                language_code='en-GB')

            response = client.recognize(config, audio)

            if len(response.results) > 0:
                alternatives = response.results[0].alternatives

                #results = 실제 음성인식된 text
                results = [
                    alternative.transcript for alternative in alternatives
                ]
                assert len(results) == 1, "More than 1 results: {}".format(
                    results)

                #실질적으로 txt
                out = {
                    os.path.basename(path):
                    "" if len(results) == 0 else results[0],
                    "normalized_text": english_cleaners(results[0])
                }
                print(path, results[0], english_cleaners(results[0]))
                break
            break
        except Exception as err:
            raise Exception("OS error: {0}".format(err))

            error_count += 1
            print("Skip warning for {} for {} times".format(path, error_count))

            if error_count > 5:
                break
            else:
                continue

    global removed
    if len(out) is 0:
        # remove file that only has instrument sound.
        os.remove(root + '.wav')
        print(root, '.wav file is removed!')

    else:
        with open(txt_path, 'w') as f:
            json.dump(out, f, indent=2, ensure_ascii=False)

    return out
Esempio n. 10
0
def english_cleaner():
    actual = cleaners.english_cleaners(
        'I want to be there early on the day. Please organize')
    expected = 'I want to be there early on the day. Please organize'
    assert actual == expected