Beispiel #1
0
class TestSave(unittest.TestCase):
    """Test save files"""
    def setUp(self):
        self.tts = TTS(SPEAKERS[0], AUDIO_FORMATS[0], KEY)
        self.tts.generate(TEXT)

        # create temp dir for tests
        self.tmp_dir = "tmp"
        os.makedirs(self.tmp_dir)
        os.chdir(self.tmp_dir)

    def test_save(self):
        path = self.tts.save()
        self.assertTrue(os.path.isfile(path))

    def test_save_as(self):
        filename = "wonderful_speech"

        path = self.tts.save(filename)
        self.assertTrue(os.path.isfile(path))

        path = self.tts.save(filename + "." + AUDIO_FORMATS[0])
        self.assertTrue(os.path.isfile(path))

        # save to another dir
        another_dir = "amazing_directory"
        os.makedirs(another_dir)
        path = self.tts.save(os.path.join(another_dir, filename))
        self.assertTrue(os.path.isfile(path))

    def tearDown(self):
        # exit and remove temp dir
        os.chdir("..")
        rmtree(self.tmp_dir, ignore_errors=True)
Beispiel #2
0
def text_to_speech(text, file, key=YANDEX_API_KEY, speaker=speaker):

    tts = TTS(speaker, "wav", '%s' % key, emotion='good', speed='0.9', quality='lo')
    try:
        tts.generate(text.encode('utf-8'))
    except Exception:
        tts.generate(text)
    tts.save(file)
Beispiel #3
0
 def speak(audioString):
     tts = TTS("ermil",
               "mp3",
               "25d87483-720a-46ea-82bd-7f89d4c95bbd",
               lang='en-US',
               emotion="good")
     tts.generate(audioString + " ")
     tts.save()
     os.system("mpg321 --stereo speech.mp3 ")
def generate_tts(tts_text):
    tts_voice = tts_voices[randint(0, len(tts_voices) - 1)]
    tts_md5 = md5(tts_text.encode('utf-8')).hexdigest()
    tts_path = os.path.join(os.getcwd(), 'media', 'cache',
                            'tts.' + tts_md5 + '.' + tts_voice + '.opus')
    if os.path.exists(tts_path):
        return tts_path
    else:
        try:
            key = config.get('tts', 'api key')
            tts = TTS(tts_voice, 'opus', key, lang='ru_RU', emotion='neutral')
            tts.generate(tts_text)
            return tts.save(tts_path)
        except (NoSectionError, NoOptionError):
            return None
Beispiel #5
0
    def _generate_audio_file(self):
        """
        Generic method used as a Callback in TTSModule
            - must provided the audio file and write it on the disk

        .. raises:: FailToLoadSoundFile
        """

        # Since the gTTS lib disabled the SSL verification we get rid of insecure request warning
        requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

        tts = TTS(self.speaker,"mp3", self.key, self.language)
	tts.generate(text=self.words)
        
        # OK we get the audio we can write the sound file
        tts.save(self.file_path)
        
        # Re enable the warnings to avoid affecting the whole kalliope process 
        warnings.resetwarnings()
Beispiel #6
0
def just_yandex_tts(
        text):  # синтез речи от Яндекс SpeechCloud (Технологии Яндекса)
    try:
        tts = TTS(
            "oksana", "mp3", "60a2b005-738e-42b6-8b78-9ee9b7d57031", speed=1.2
        )  # если не работает, то нужно получить и указать свой ключ от Яндекс SpeechCloud
        tts.generate(text)  #можно менять скорость
        tts.save('speechY.mp3')
    except Exception as e:
        print("[YandexTTS] Не удалось синтезировать речь: {0}".format(e))
        return

    mixer.init()
    mixer.music.load('speechY.mp3')
    mixer.music.play()
    while mixer.music.get_busy():
        time.sleep(0.1)
    mixer.music.stop()
    mixer.music.load(
        'new.waw'
    )  # нужен второй аудио файл, иначе миксер валится из-за permission error!!!
Beispiel #7
0
def say(words):
    #    words= translator.translate(words, dest=language)
    #    words=words.text
    #    words=words.replace("Text, ",'',1)
    #    words=words.strip()
    print(words)
    getConfig(path)
    md5 = hashlib.sha1(words.encode('utf-8')).hexdigest()
    filemp3 = ""
    for file in os.listdir("/tmp/"):
        if file.endswith(md5 + ".wav"):
            filemp3 = (os.path.join(file))

    if filemp3 == md5 + ".wav":
        print("Файл уже записан")
        os.system("aplay -q /tmp/" + filemp3)
    elif filemp3 == md5 + ".mp3":
        print("Файл уже записан")
        s.system("mpg123 -q " + filemp3)

    elif PROVIDERTTS == "Yandex":
        print("Генерируем файл")
        #tts = gTTS(text=words, lang=languageG)
        tts = TTS("alyss", "wav", APIKEYTTS, lang=language, emotion="good")
        tts.generate(words)
        words = hashlib.sha1(words.encode('utf-8')).hexdigest()
        ttsfilename = "/tmp/" + words + ".wav"
        tts.save(ttsfilename)
        os.system("aplay -q " + ttsfilename)
        #os.remove(ttsfilename)

    elif PROVIDERTTS == "Google":
        print("Генерируем файл")
        tts = gTTS(text=words, lang=languageG)
        words = hashlib.sha1(words.encode('utf-8')).hexdigest()
        ttsfilename = "/tmp/" + words + ".mp3"
        tts.save(ttsfilename)
        os.system("mpg123 -q " + ttsfilename)
Beispiel #8
0
class TestGenerate(unittest.TestCase):
    """Test tts."""
    def setUp(self):
        self.tts = TTS(SPEAKERS[0], AUDIO_FORMATS[0], KEY)

    def test_wrong_text(self):
        # empry string
        self.assertRaises(Exception, self.tts.generate, "")

        # a lot of chars
        self.assertRaises(Exception, self.tts.generate,
                          "a" * self.tts.MAX_CHARS)

    def test_data(self):
        self.tts.generate(TEXT)

        # data received
        self.assertIsNotNone(self.tts._data)

        # more words than more data
        self.other_tts = TTS(SPEAKERS[0], AUDIO_FORMATS[0], KEY)
        self.other_tts.generate(TEXT * 2)
        self.assertLess(list(self.tts._data), list(self.other_tts._data))
Beispiel #9
0
def t_to_s(text):
    tts = TTS("oksana", "opus", "b04291f2-5e31-4c8e-af57-1695b7bd5f16")
    tts.generate(text)
    tts.save("ramazan")
    return "ramazan"
Beispiel #10
0
            if len(sys.argv) > 1 and str(sys.argv[1]) == "test":
                # Выводим информацию на экран
                print "to_speaker = " + to_speaker
                print "count_rings = " + str(count_rings)
            # Если скрипт запущен в боевом режиме
            else:
                # Запишем информацию в логи
                write_log("to_speaker = " + to_speaker)
                write_log("count_rings = " + str(count_rings))

            # Если это первый звонок (первый раз прозвенел аппарат телефона при входящем вызове)
            if count_rings == 0:
                # Передадим в Yandex.Speech наш текст для произнесения и запишем полученный файл в формате mp3
                file_mp3 = "/home/asterisk/to_speaker/name_or_number"
                tts = TTS("zahar", "mp3", "*****-****-****-****-***********")
                tts.generate(str(to_speaker))
                tts.save(file_mp3)
                file_mp3 = file_mp3 + ".mp3"

                # Если это внутренний звонок, то сделаем паузу, чтобы сначала зазвенел телефонный аппарат, а потом заговорил робот,
                # иначе, при внутренних звонках, робот говорит раньше, чем звонит телефон, и этим нас пугает немного :)
                if mc.get("is_internal") is not None:
                    mc.delete("is_internal")
                    # Паузу не делаем, на трубках надо было включить "не пропускать первый ring"
                    # time.sleep(2)

            # Если количество звонков меньше пяти
            if count_rings < 5:
                to_speaker_all = '/usr/bin/mplayer -ao alsa -really-quiet -noconsolecontrols ' + file_mp3
                os.system(to_speaker_all)
            # Увеличим количество звонков на один
Beispiel #11
0
def test():
    os.environ["CUDA_VISIBLE_DEVICES"] = config.device_ids

    result_dir = 'temp/' + config.in_file
    motion_dir = result_dir + '/motion/'

    os.mkdir(result_dir)
    os.mkdir(motion_dir)

    pca = torch.FloatTensor(np.load('basics/pca.npy')[:, :6])
    mean = torch.FloatTensor(np.load('basics/mean.npy'))
    decoder = VG_net()
    encoder = AT_net()

    state_dict2 = multi2single(config.vg_model, 1)

    decoder.load_state_dict(state_dict2)

    state_dict = multi2single(config.at_model, 1)
    encoder.load_state_dict(state_dict)

    encoder.eval()
    decoder.eval()
    test_file = result_dir + "/" + config.in_file + ".wav"
    test_file_old = result_dir + "/old_" + config.in_file + ".wav"
    if config.text_tts == "" and config.news_url != "":
        parse_news_content = get_info(config.news_url)['news_content']
    else:
        parse_news_content = config.text_tts
    tts = TTS(config.name_tts,
              "wav",
              "000000-0000-0000-0000-00000000",
              config.lang_tts,
              emotion="neutral",
              speed=1)
    # test content
    tts.generate(parse_news_content[:1999])
    if config.shift == 1:
        tts.save(test_file_old)
        audio_shift(test_file_old, test_file)
    else:
        tts.save(test_file)

    example_image, example_landmark = generator_demo_example_lips(
        config.person)
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
    ])
    example_image = cv2.cvtColor(example_image, cv2.COLOR_BGR2RGB)
    example_image = transform(example_image)

    example_landmark = example_landmark.reshape(
        (1, example_landmark.shape[0] * example_landmark.shape[1]))

    if config.cuda == True:
        example_image = Variable(example_image.view(1, 3, 128, 128)).cuda()
        example_landmark = Variable(
            torch.FloatTensor(example_landmark.astype(float))).cuda()
    else:
        example_image = Variable(example_image.view(1, 3, 128, 128))
        example_landmark = Variable(
            torch.FloatTensor(example_landmark.astype(float)))
    example_landmark = example_landmark * 5.0
    example_landmark = example_landmark - mean.expand_as(example_landmark)
    example_landmark = torch.mm(example_landmark, pca)
    speech, sr = librosa.load(test_file, sr=16000)
    mfcc = python_speech_features.mfcc(speech, 16000, winstep=0.01)
    speech = np.insert(speech, 0, np.zeros(1920))
    speech = np.append(speech, np.zeros(1920))
    mfcc = python_speech_features.mfcc(speech, 16000, winstep=0.01)

    sound, _ = librosa.load(test_file, sr=44100)

    print('=======================================')
    print('Generate images')
    t = time.time()
    ind = 3
    with torch.no_grad():
        fake_lmark = []
        input_mfcc = []
        while ind <= int(mfcc.shape[0] / 4) - 4:
            t_mfcc = mfcc[(ind - 3) * 4:(ind + 4) * 4, 1:]
            t_mfcc = torch.FloatTensor(t_mfcc)
            input_mfcc.append(t_mfcc)
            ind += 1
        input_mfcc = torch.stack(input_mfcc, dim=0)
        input_mfcc = input_mfcc.unsqueeze(0)
        fake_lmark = encoder(example_landmark, input_mfcc)
        fake_lmark = fake_lmark.view(
            fake_lmark.size(0) * fake_lmark.size(1), 6)
        example_landmark = torch.mm(example_landmark, pca.t())
        example_landmark = example_landmark + mean.expand_as(example_landmark)
        fake_lmark[:, 1:6] *= 2 * torch.FloatTensor(
            np.array([1.1, 1.2, 1.3, 1.4, 1.5]))
        fake_lmark = torch.mm(fake_lmark, pca.t())
        fake_lmark = fake_lmark + mean.expand_as(fake_lmark)

        fake_lmark = fake_lmark.unsqueeze(0)

        fake_lmark = fake_lmark.data.cpu().numpy()

        file_mark = result_dir + "/" + config.in_file + ".npy"
        file_mp4 = result_dir + "/" + config.in_file  # + ".mp4"
        np.save(file_mark, fake_lmark)
        mark_paint.mark_video(fake_lmark, motion_dir)

        cmd = 'ffmpeg -framerate 25 -i ' + motion_dir + '%d.png  -filter:v scale=512:-1 -c:v libx264 -pix_fmt yuv420p ' + file_mp4 + '.mp4'
        subprocess.call(cmd, shell=True)
        print('video done')

        cmd = 'ffmpeg -i ' + file_mp4 + '.mp4 -i ' + test_file + ' -c:v copy -c:a aac -strict experimental ' + file_mp4 + '_result.mp4'
        subprocess.call(cmd, shell=True)
        print('video+audio done')

        return file_mark
    return False
class ExampleWindow(QMainWindow):
    def __init__(self):
        QMainWindow.__init__(self)

        self.setMinimumSize(QSize(880, 640))
        self.setWindowTitle("苏维埃社会主义语言学习机")

        # Add text field
        self.txtEdit = QPlainTextEdit(self)
        self.txtEdit.insertPlainText(sampleText)
        self.txtEdit.move(40, 10)
        self.txtEdit.resize(800, 300)

        # Add button
        self.btn1 = QPushButton("学习一个 (F1)", self)
        self.btn1.move(40, 310)
        self.btn1.clicked.connect(self.button1Clicked)
        # Add button
        self.btn2 = QPushButton("念洋文 (F2)", self)
        self.btn2.move(160, 310)
        self.btn2.clicked.connect(self.button2Clicked)

        # Add label
        self.lbl = QTextEdit(self)
        self.lbl.setReadOnly(True)
        self.lbl.move(40, 340)
        self.lbl.resize(800, 200)

        self.tts = TTS("jane", "mp3", "ae918646-fa47-4e66-96b6-6ce44d6d3146")
        self.player = QtMultimedia.QMediaPlayer(self)
        self.show()

    def button1Clicked(self):
        sender = self.sender()
        chosen = self.txtEdit.textCursor().selectedText()
        translated = translator.translate(chosen.strip(), dest='en')
        self.lbl.setText(translated.text)

    def button2Clicked(self):
        chosen = self.txtEdit.textCursor().selectedText()
        if chosen.strip() == "":
            return

        try:
            self.tts.generate(chosen.strip())
        except SSLError:
            pass
        self.tts.save(os.path.join(tempfile.gettempdir(), "tmp.mp3"))
        self.sound = QtMultimedia.QMediaContent(
            QUrl.fromLocalFile(os.path.join(tempfile.gettempdir(), "tmp.mp3")))
        self.player.setMedia(self.sound)
        self.player.setVolume(100)
        self.player.play()

        #QSound("tmp.mp3").play()

    def keyPressEvent(self, e):
        if e.key() == Qt.Key_F1:
            chosen = self.txtEdit.textCursor().selectedText()
            translated = translator.translate(chosen.strip(), dest='en')
            self.lbl.setText(translated.text)
        elif e.key() == Qt.Key_F2:
            chosen = self.txtEdit.textCursor().selectedText()
            if chosen.strip() == "":
                return

            try:
                self.tts.generate(chosen.strip())
            except SSLError:
                pass
            self.tts.save(os.path.join(tempfile.gettempdir(), "tmp.mp3"))
            self.sound = QtMultimedia.QMediaContent(
                QUrl.fromLocalFile(
                    os.path.join(tempfile.gettempdir(), "tmp.mp3")))
            self.player.setMedia(self.sound)
            self.player.setVolume(100)
            self.player.play()
Beispiel #13
0
def t_to_s(text):
	if (len(text)>500): text = text[0:490]
	tts = TTS("oksana", "opus", "b04291f2-5e31-4c8e-af57-1695b7bd5f16", lang="ru_RU", emotion="good")
	tts.generate(text)
	tts.save("ramazan")
	return "ramazan"
Beispiel #14
0
class YandexTTSPlugin(subscriber):
    """
    Uses the Yandex SpeechKit Cloud services.
    SpeechKit Cloud is a multilingual TTS and STT platform developed by Yandex.
    """
    def __init__(self, eb):
        super().__init__()
        self.tts = None
        self.eb = eb

        try:
            self.access_key = conf['TTS']['access_key']
        except KeyError:
            raise ValueError("No Yandex API access!")

        try:
            self.voice = conf['TTS']['voice']
        except KeyError:
            self.voice = "alyss"

        try:
            self.language = conf['MAIN']['language']
        except KeyError:
            self.language = 'ru-RU'

        if self.language.lower() not in ['ru-ru', 'en-en', 'tr-tr', 'uk-ua']:
            raise ValueError("Language '%s' not supported" % self.language)

    def process(self, eventobj):
        event = XEvent(eventobj)
        if event.cmd == 'run':
            self.say(**event.data)
        elif event.cmd == 'shutdown':
            self.eb.halted(__file__)

    def say(self, phrase, **kwargs):
        """
        Method used to utter words using the Yandex TTS plugin
        :param phrase:
        """
        phash = md5(phrase.encode('utf-8')).hexdigest()
        fname = os.path.join(os.getcwd(), 'temp_voice', '%s.wav' % phash)
        try:
            open(fname, 'r')
        except FileNotFoundError:
            lg.info('Preparing TTS...')
            try:
                if not self.tts:
                    self.tts = YaTTS(self.voice,
                                     "wav",
                                     lang=self.language,
                                     key=self.access_key)
            except Exception as ex:
                lg.error("Exception while loading TTS: %s" % str(ex))
                return None
            lg.info('Running TTS for phrase "%s"' % phrase)
            try:
                self.tts.generate(phrase)
                self.tts.save(path=fname)
            except Exception as ex:
                lg.error("Exception while running TTS: %s" % str(ex))
                return None
        with open(fname, 'rb') as f:
            data = f.read()
        if kwargs.get('autoplay', True):
            self.eb.send('audio', 'play', fname)
        return data