Esempio n. 1
0
 def __init__(self,
              credential: Optional[Union[str, Path]] = None) -> NoReturn:
     if credential is None:
         self.client = TextToSpeechClient()
     else:
         self.client = TextToSpeechClient.from_service_account_file(
             filename=credential)
Esempio n. 2
0
def text_to_mp3(client: texttospeech.TextToSpeechClient,
                voice: texttospeech.types.VoiceSelectionParams,
                audio_config: texttospeech.types.AudioConfig, text: str,
                output_file_path: Path) -> None:
    """
    Convert a string into voice and save it in an .mp3 file.

    :param client: TextToSpeechClient instance.
    :param voice: VoiceSelectionParams instance.
    :param audio_config: AudioConfig instance.
    :param text: String to synthesise.
    :param output_file_path: Full path to the output .mp3 file.
    :return: None
    """
    lines = text.splitlines()

    logger.info(f'Synthesising {len(lines)} lines ...')

    output_file_log = output_file_path.parent / (output_file_path.stem +
                                                 '_log.json')

    with open(output_file_path, 'wb') as output_file:
        for (i, text_chunk) in enumerate(lines):
            # skip empty lines
            if len(text_chunk) > 0:
                input_text = texttospeech.types.SynthesisInput(text=text_chunk)
                try:
                    logger.info(
                        f'Synthesising speech for chunk `{i}`, size: `{len(text_chunk)}`'
                    )
                    response = client.synthesize_speech(
                        input_text, voice, audio_config)
                except Exception as e:
                    # If a line could not be synthesised properly, return it along with the error message
                    # It is possible that textract could not extract the text properly.
                    logger.error(
                        f'Speech synthesising failed! Chunk text: `{input_text}`\nError: {e}\n'
                    )
                    _error_log = {
                        'chunk_number': i,
                        'chunk_length': len(text_chunk),
                        'chunk_text': str(text_chunk),
                        'Error message': traceback.format_exc()
                    }
                    with open(f'{output_file_log}', 'w') as log_out:
                        json.dump(_error_log, log_out)
                    continue
                output_file.write(response.audio_content)
                logger.info(f'Audio content written to `{output_file_path}`!')

        logger.info(f'Output saved to `{output_file_path}`')
        logger.info(f'logs at `{output_file_log}`')
Esempio n. 3
0
class TextToSpeech:
    def __init__(self,
                 credential: Optional[Union[str, Path]] = None) -> NoReturn:
        if credential is None:
            self.client = TextToSpeechClient()
        else:
            self.client = TextToSpeechClient.from_service_account_file(
                filename=credential)

    def synthesize(
        self,
        text: str,
        language: str = 'en-US',
        gender: int = 1,
        encoding: enums.AudioEncoding = enums.AudioEncoding.MP3
    ) -> types.SynthesizeSpeechResponse:
        """
        Args:
            text:
            language:
            gender:
            encoding:
        Returns:
        """
        if gender == 0:
            ssml_gender = enums.SsmlVoiceGender.FEMALE
        elif gender == 1:
            ssml_gender = enums.SsmlVoiceGender.MALE
        else:
            ssml_gender = enums.SsmlVoiceGender.NEUTRAL

        synthesis_data = types.SynthesisInput(text=text)
        voice = types.VoiceSelectionParams(language_code=language,
                                           ssml_gender=ssml_gender)
        audio_config = types.AudioConfig(audio_encoding=encoding)

        return self.client.synthesize_speech(input_=synthesis_data,
                                             voice=voice,
                                             audio_config=audio_config)

    @staticmethod
    def save(response: types.SynthesizeSpeechResponse,
             filename: Union[str, Path]) -> NoReturn:
        """
        Args:
            response:
            filename:
        Returns:
        """
        with open(filename, 'wb') as audio_file:
            audio_file.write(response.audio_content)
def main(text, locale='en-US'):
    client = TextToSpeechClient.from_service_account_file(str(SERVICE_FILE))

    input_text = types.SynthesisInput(text=text)

    # Note: the voice can also be specified by name.
    # Names of voices can be retrieved with client.list_voices().
    voice = types.VoiceSelectionParams(
        language_code=locale,
        # ssml_gender=enums.SsmlVoiceGender.FEMALE,
        name="en-US-Wavenet-A")

    audio_config = types.AudioConfig(audio_encoding=enums.AudioEncoding.MP3)

    response = client.synthesize_speech(input_text, voice, audio_config)

    # The response's audio_content is binary.
    with open('output.mp3', 'wb') as out:
        out.write(response.audio_content)
        print('Audio content written to file "output.mp3"')