Exemple #1
0
 def crossfade_diphones(self, diphone_seq, crossover):
     # function to crossfade a diphone sequence, over a specified number of points(crossover value)
     milliseconds = int(self.out.rate * 0.001)
     # create long and short pause
     longpause = simpleaudio.Audio()
     longpause.create_tone(0, 400 * milliseconds, 1)
     shortpause = simpleaudio.Audio()
     shortpause.create_tone(0, 200 * milliseconds, 1)
     for diphone in diphone_seq:
         # for each diphone in the sequence
         try:
             # try to load the corresponding diphone file from wav_folder
             diphone_audio = simpleaudio.Audio()
             diphone_audio.load(diphone_synth.diphones[diphone])
             # and then crossfade the obtained audio array into the self.out audio data
             self.crossfade_arrays(diphone_audio, crossover)
         except KeyError:
             # if the diphone is not in the wav_folder, check if it is valid punctuation
             # and crossfade in the relevant length of silence
             try:
                 if diphone in ['!', '?', ':', '.']:
                     self.crossfade_arrays(longpause, crossover)
                 elif diphone in [',']:
                     self.crossfade_arrays(shortpause, crossover)
                 else:
                     raise KeyError
             # if the diphone is not in the wav_folder or valid punctuation, user is alerted
             except KeyError:
                 print(
                     'Sorry, I am unable to retrieve the audio for the diphone {}. Please recheck that it'
                     'is in the diphones folder supplied.'.format(diphone))
Exemple #2
0
 def get_letters_voice(self, letter_phone_list):
     letter_voice_audio = simpleaudio.Audio()
     for letter in letter_phone_list:
         if re.match('[A-Z]{1,2}[1]', letter):
             letter = letter[:-1]
         temp = simpleaudio.Audio()
         temp.load(self.phones[letter])
         letter_voice_audio.data = np.append(letter_voice_audio.data,
                                             temp.data)
     letter_voice_audio.change_speed(0.4)
     letter_voice_audio.rescale(self.vol)
     letter_voice_audio.play()
Exemple #3
0
    def get_words_voice(self, word_phone_list):
        upper_letters = list('QWERTYUIOPASDFGHJKLZXCVBNM')
        voice_upper = False
        """Extension B – Punctuation
        contains a comma – insert 250ms of silence
        period, question mark or exclamation mark – insert 500ms of silence"""

        for w_index in range(0, len(word_phone_list)):
            #print(word_phone_list[w_index])
            if word_phone_list[w_index] in list(',.!?'):
                if word_phone_list[w_index] == ',':
                    temp = simpleaudio.Audio(rate=16000)
                    temp.create_tone(0, int(0.25 * temp.rate), 0)
                    #print(temp.data)
                    self.res_voice.data = np.append(self.res_voice.data,
                                                    temp.data)
                    continue
                else:
                    temp = simpleaudio.Audio(rate=16000)
                    temp.create_tone(0, int(0.5 * temp.rate), 0)
                    #print(temp.data)
                    self.res_voice.data = np.append(self.res_voice.data,
                                                    temp.data)
                    continue
            if word_phone_list[w_index] in list('{}'):
                continue
            #print(word_phone_list[w_index-1])
            for index in range(0, len(word_phone_list[w_index])):
                #for phone_item in word_phone_list[w_index]:
                phone_key = ''
                for phone_item_lower_item in word_phone_list[w_index][
                        index]:  #不懂??
                    if phone_item_lower_item in upper_letters:
                        phone_key += phone_item_lower_item
                """ Extension D – Emphasis markup  
                     emphasis the word in{} """

                temp = simpleaudio.Audio()
                temp.load(self.phones[phone_key])
                if w_index > 0 and word_phone_list[w_index - 1] == '{':
                    voice_upper = True
                if index == (len(word_phone_list[w_index]) - 1):
                    voice_upper = False
                if voice_upper:
                    temp.data = temp.data * 5
                    #pass#temp.rescale(1)
                else:
                    pass
                    #pass#temp.rescale(self.vol)
                self.res_voice.data = np.append(self.res_voice.data, temp.data)
    def get_audio(self, rate=RATE):
        """
        Return synthesized output as an `Audio` object containing
        the concatenated audio for the input diphone sequence.
        """

        # Create audio sequence from diphones
        output_audio = []
        for diphone in self.diphones:
            filename = self.get_filename(diphone)
            audio = self.audio[filename]
            output_audio.append(audio.data)

        # Instantiate output `Audio` object
        output = simpleaudio.Audio(rate=rate)

        # Concatenate audio and rescale
        output.data = numpy.concatenate(output_audio)
        output.rescale(1.0)
        return output
    def __init__(self, diphones, directory):
        """
        Initialize synthesizer.
        - `diphones` (list): sequence of diphones
        - `audio` (dict): dictionary of filename-audio pairs
        """
        self.diphones = diphones

        # Create mapping from diphone filenames to audio
        self.audio = {}
        for diphone in self.diphones:
            filename = self.get_filename(diphone)
            if filename not in self.audio:

                # Ensure that file exists
                path = os.path.join(directory, filename)
                if not os.path.isfile(path):
                    sys.exit(f"Couldn't locate '{filename}'")

                # Load its contents and add to dictionary
                audio = simpleaudio.Audio()
                audio.load(path)
                self.audio[filename] = audio
Exemple #6
0
    def concat_diphones(self, diph_emphasis=None, smoother=False):
        """
        Description: 
        
        Input : A string of missing diphone
        Output: A string of corresponding subsitution diphone
        """
        # Audio instance to store the TTS audio output
        output = simpleaudio.Audio(rate=16000)

        # Variable to track diphone index and processing diphone_index
        diphone_index = 0

        # Go through the diphones in the ordered diphone sequence
        for each_diphone in self.diphone_seq:

            # Create an Audio instance to store temporary audio data
            temp_diphone = simpleaudio.Audio(rate=16000)
            temp_diphone.data = self.diphones[each_diphone]

            # Extension D Emphasis markup
            # If any emphasis marking is used
            if len(diph_emphasis) > 0:
                # Dont rescale silence (Avoid numpy warning error)
                if each_diphone == "s_short" or each_diphone == "s_long":
                    continue
                # Adjust the volume of the diphone if it's index is marked as emphasis diphone in the set
                elif diphone_index in diph_emphasis:
                    # (EXTRA) Loud fricatives make unpleasant noise, thus the adjustment volume (0.60) is slight less than other emphasis diphones (0.65)
                    if 's' in each_diphone or 'th' in each_diphone or 'f' in each_diphone:
                        adjust_value = 0.60
                    else:
                        adjust_value = 0.65
                    temp_diphone.rescale(adjust_value)
                # (EXTRA) To make a smoother transition of emphasis enhancement, also slightly adjust the diphone that directly follow the emphasis diphones (0.525)
                elif diphone_index - 1 in diph_emphasis:
                    adjust_value = 0.525
                    temp_diphone.rescale(adjust_value)

            # Normal concatenation without smoother
            if smoother == False:
                output.data = np.concatenate((output.data, temp_diphone.data))
            # If smoother is used, implement Extension E - Smoother Concatenation
            else:
                adjust_level = 0.0
                # This loop rescales the 160 data points (10 msc) near the both edges of the diphone
                for index in range(0, 161):
                    if diphone_index > 0:
                        # Except the first diphone:
                        # Scale the data points in the initial 10 msc of current working diphone
                        # Order: Start scaling from the 1st point, 2nd, 3rd... througout the loop (From edge of diphone towards the middle)
                        temp_diphone.data[index] = temp_diphone.data[
                            index] * adjust_level / 160.0
                    if diphone_index < len(diphone_seq) - 1:
                        # Except the last diphone:
                        # Scale the data points in the last 10 msc of of current working diphone
                        # Order: Start scaling from the last point, 2nd last, 3rd last... througout the loop (From edge of diphone towards the middle)
                        temp_diphone.data[-(index + 1)] = temp_diphone.data[-(
                            index + 1)] * adjust_level / 160.0
                    # Turn louder when moving inward in the next round of the loop
                    adjust_level += 1

                # After rescale all, seperate the whole diphone into two portions: (1) initial 10msc, and (2) everything after 10msc
                np.initial10msc = temp_diphone.data[:160]
                np.after10msc = temp_diphone.data[160:]

                # Combine diphone portions together in the output.data
                if diphone_index == 0:
                    # For the 1st diphone, concatenate the whole rocessed diphone data
                    output.data = np.concatenate(
                        (output.data, temp_diphone.data))
                else:
                    # For later diphones, addup/cross-fade the first 10 msc of the current diphone with last 10 msc of the previous diphone (which saved in the output.data in the previous round)
                    output.data[-160:] = output.data[-160:] + np.initial10msc
                    # Concatenate the remaining part of the processed diphone data
                    output.data = np.concatenate((output.data, np.after10msc))
            # Increase monitereing index
            diphone_index += 1
        # Return
        return output
Exemple #7
0
    def get_wavs(self, wav_folder):
        """
        Description: Construct a unique set of required diphones, load the corresponding numpy array 
        from their .wav file, and save the array to a dictionary
        
        Input : A path to wav_folder, a list of requested diphones, a list of diphone features
        Output: A dictionary of diphone audio numpy array
        NOTE  : Focus on efficiecy 
                (1) Only load data from database for each REQUIRED UNIQUE diphones 
                (2) Save data in numpy array instead of object instance
        """
        # Variables to store diphones
        diphone_path = dict([])
        diphones = dict([])

        # To ensure efficiency, I create a list of unique diphones that we need to retrive from the file.
        # This avoid reloading the same file again and again if the syntheisis sentence is long and contains
        # lots of repeating words, e.g. Long sentence with repeating the, a, he, she ....
        unique_diphones = set(
            map(lambda each_diphone: each_diphone, self.diphone_seq))

        # Only go through the database once, storing a complete dictionary of avaliable diphone and their path
        # is still necessary because my required diphone might be a missing diphone, I need to know what other
        # similar diphones in the database I can use.
        for root, dirs, files in os.walk(wav_folder, topdown=False):
            for file in files:
                diphone_path[file] = root + '/' + file

        # Go through the required diphones, use the method in an Audio instance to load the numpy array data,
        # then only store the np array data in the diphone dictionary (i.e. key: diphone, value: np array)
        for required_diphone in unique_diphones:

            # Audio instance to handle audio information
            sound_obj = simpleaudio.Audio(rate=16000)

            # Extension B Punctuation: Short silence (200 ms)
            if required_diphone == "s_short":
                sound_obj.create_noise(3200, 0)
                diphones[required_diphone] = sound_obj.data
            # Extension B Punctuation: Long silence (400 ms)
            elif required_diphone == "s_long":
                sound_obj.create_noise(6400, 0)
                diphones[required_diphone] = sound_obj.data
            else:
                # Handle normal diphones
                try:
                    # Load the audio data from the corresponding path
                    path = diphone_path[required_diphone + ".wav"]
                    sound_obj.load(path)
                    # Save the array data in a dictionary
                    diphones[required_diphone] = sound_obj.data
                except KeyError:
                    # Show error message to user when there is a KeyError which refers to missing diphone in the diphone database
                    print("*** This is a missing diphone: ", required_diphone)
                    # Instead of quiting the program, use the method sub_diphone to find corresponding suitable subsitude diphone
                    sub_diphone = self.sub_diphone(required_diphone)
                    # NOTE: Show message to user about subsitution of diphone
                    print("*** Using subsitude diphone: ", sub_diphone)
                    # Save the array data in the dictionary
                    path = diphone_path[sub_diphone + ".wav"]
                    sound_obj.load(path)
                    diphones[required_diphone] = sound_obj.data

        # Return the complete dictionary that contains diphone array data
        return diphones
Exemple #8
0
    if play == True:
        object.play()


# (PART V) Main module
if __name__ == "__main__":

    # Step 1 - Create an Utterance instance to handle text normalization and annotatioin (incl. translation of number) of input text
    utt = Utterance(input_text=args.phrase[0])

    # Step 2 - Get diphone sequence and feature information (emphrasis) after text normalization
    diph_emphasis = utt.diph_emphasis
    diphone_seq = utt.diphone_seq

    # Step 3 - Create a Synth instance to work on synthesing sound based on the given infomation
    diphone_synth = Synth(wav_folder=args.diphones,
                          diphone_seq=diphone_seq,
                          diph_emphasis=diph_emphasis)

    # Step 4 - Clone the data from the Synth instance 'diphone_synth' that contains concatenated audio data to an output Audio instance 'output'
    output = simpleaudio.Audio(rate=16000)
    output.data = diphone_synth.output.data

    # Step 5 - Further adjustment on overall volume to the final output (if the user use -v <0-100>)
    output = adjust_volume(volume=args.volume, object=output)

    # Step 6 - Save it to the target file (if the user use -o <args.outfile>)
    save(output_file=args.outfile, object=output)

    # Step 7 - Play the final sound output (if the user use -p)
    play_audio(play=args.play, object=output)
Exemple #9
0
    def get_wavs(self, wav_folder):
        ''' This function produces the full wave for the phrase, by concatenating the diphone files together'''

        # list the entire collection of available diphone sounds from the diphone folder in self.diphones
        for root, dirs, files in os.walk(wav_folder, topdown=False):
            for file in files:
                self.diphones.append(file)

        # the diphone sounds for the phrase will be added to this list in numpy array format, dtype = int16
        diphone_sounds = []

        if args.spell:
            # loop through the letters in the word(s) in the normalised phrase,
            # load the corresponding diphones from the diphone folder,
            # access the numpy array of this diphone and append it to diphone sounds
            for word in norm_phrase.split():
                for letter in word:
                    for diphone in diphone_seq[letter]:
                        d = sa.Audio()
                        d.load("diphones/{}".format(diphone))
                        num_array = d.data
                        diphone_sounds.append(num_array)

        else:
            # loop through the list objects in phrase_punc (these are words and allowed punctuation),
            # load the corresponding diphones for the words from the diphone folder, and
            # access the numpy array of this diphone and append it to diphone sounds
            for i in range(len(phrase_punc)):
                if phrase_punc[i] in diphone_seq.keys():
                    for diphone in diphone_seq[phrase_punc[i]]:
                        d = sa.Audio()
                        d.load("args.diphones/{}".format(diphone))
                        num_array = d.data
                        diphone_sounds.append(num_array)

                # for the punctuation list objects
                else:
                    try:
                        # insert a pause from the end of the previous word before the silence
                        d = sa.Audio()
                        d.load("diphones/{}-pau.wav".format(
                            phone_seq[phrase_punc[i - 1]][-1]))
                        num_array = d.data
                        diphone_sounds.append(num_array)

                        if phrase_punc[i] == ',':
                            # insert silence in place of the punctuation
                            silence = np.zeros(2000, dtype=np.int16)
                            diphone_sounds.append(silence)

                        if phrase_punc[i] == '.' or phrase_punc[
                                i] == ':' or phrase_punc[
                                    i] == '!' or phrase_punc[i] == '?':
                            # insert 400ms of silence in place of the punctuation
                            silence = np.zeros(4000, dtype=np.int16)
                            diphone_sounds.append(silence)

                        if i <= range(len(phrase_punc))[-2]:
                            # insert a pause after the silence to the beginning of the next word, only if
                            # there is a next word
                            d.load("diphones/pau-{}.wav".format(
                                phone_seq[phrase_punc[i + 1]][0]))
                            num_array = d.data
                            diphone_sounds.append(num_array)

                    except KeyError:
                        print("Ignoring consecutive punctuation:{}".format(
                            phrase_punc[i]))

        # concatenate the diphone sounds to produce the phrase sound
        phrase_sound = np.concatenate(diphone_sounds)

        # create the instance of the phrase wave file
        x = sa.Audio(rate=16000)
        x.data = phrase_sound

        if args.play:
            if args.volume:
                # scale the volume integer entered by the user so that it can be understood
                # by rescale in SimpleAudio
                volume = int(args.volume) * 0.01
                x.rescale(volume)
            x.play()

        if args.outfile:
            x.save(args.outfile)
 def __init__(self, wav_folder):
     self.diphones = {}
     self.sound = simpleaudio.Audio()
     self.get_wavs(wav_folder)
Exemple #11
0
            dip_seq.append("PAU")  # add a pause at end of the diphone sequence
        result = " ".join(dip_seq)
        return result


if __name__ == "__main__":
    # Initialize utt class, get the diphone sequence and os path
    utt = Utterance(args.phrase[0])
    diphone_seq = utt.get_phone_seq()
    diphone_synth = Synth(os.path.join(os.getcwd(), args.diphones))

    diphone_seq = normalise_diphone_seq(diphone_seq)
    # out is the Audio object which will become your output
    # you need to modify out.data to produce the correct synthesis

    out = sa.Audio(rate=16000)

    print(diphone_seq)

    # insert silence for comma and .?!
    for token in diphone_seq:
        d = sa.Audio(rate=16000)
        if token in ',':
            # 200ms which is 0.2s for comma
            insert_silence(out, 0.20)
        elif token in '.?!':
            insert_silence(out, 0.40)
        else:
            # load the wav file
            d.load(path=diphone_synth.diphones[token])
            # smooth the date using function smoother
Exemple #12
0
    def synthesize(self, diphonelist, crossfade=False):
        """
        This function checks for silence and appends diphones to a
        :param diphonelist: a list of diphones to be synthesized
        :param crossfade: argument passed through argpass that decides whether to crossfade diphones
        :return:
        """
        self.diphonesound = simpleaudio.Audio(rate=16000)
        self.diphone_wavdata_list = []
        for key in diphonelist:
            self.silence_length = 0
            try:  # Which diphone file should be loaded?

                # Delete silence specification in string form (for now...)
                key_no_sil = re.sub('[24]', '', key)

                # Create the string that can find the diphone file
                diphone_file = str(self.wav_folder + '/' +
                                   self.diphones[key_no_sil])

                # load it
                self.diphonesound.load(diphone_file)

                # put audio data into the list (diphone_wavdata_list is a list of arrays)
                self.diphone_wavdata_list.append(self.diphonesound.data)

            except Exception as e:
                strings = [
                    'Diphone {} not present in dictionary.'.format(e),
                    'Backing off...',
                    'Searching for a diphone to fill in for {}'.format(e)
                ]
                printdots(strings)

                # Attempt an emergency key search

                backupkey = self.emergency_diphone(key)

                # Create the string that can find the diphone file
                diphone_file = str(self.wav_folder + '/' +
                                   self.diphones[backupkey])

                # load it
                self.diphonesound.load(diphone_file)

                # put audio data into the list (diphone_wavdata_list is a list of arrays)
                self.diphone_wavdata_list.append(self.diphonesound.data)

            # investigate if a pau item had
            if key[-1] == '2':
                # 200ms of silence
                self.silence_length = 0.2

            if key[-1] == '4':
                # 400ms of silence
                self.silence_length = 0.4

            # append silence to the list if a value was added to variable self.silence_length during loop
            self.add_silence() if self.silence_length != 0 else None

        # reuse this from loading diphones, as the waveform settings/ internal objects will be correct
        self.new_object = self.diphonesound

        # join audio data chunks into one waveform
        self.crossfade() if args.crossfade else self.naively_concatenate()

        return self.new_object
Exemple #13
0
def main():
    # Step 1 - Get input utterance sequence 获取语音序列
    inputseq = args.phrase[0]
    # Step 2 - Put the text in a Sequence instance
    inputseq = Sequence(inputseq)
    print("inputseq:", inputseq)
    print("inputseq.tokens:", inputseq.tokens)

    # hkcan_corpus = pc.hkcancor()
    # for each in inputseq.tokens:
    #     wordinfo = hkcan_corpus.search(character=each)
    # pprint(len(wordinfo))
    # pprint(wordinfo[:3])

    for eachtoken in inputseq.tokens:
        for eachchar in eachtoken.chars:
            print("eachchar:", eachchar)

            eachchar.eachphone = simpleaudio.Audio()

            # Audio instance to handle audio information
            sound_obj = simpleaudio.Audio(rate=48000)

            if eachchar.phone[0] in ["sil_200", "sil_400"]:
                if eachchar.phone[0] == "sil_200":
                    sound_obj.create_noise(9600, 0)
                if eachchar.phone[0] == "sil_400":
                    sound_obj.create_noise(19200, 0)
                eachchar.eachphone.data = sound_obj.data
            else:
                phone = str(eachchar.phone[0])
                if not phone[-1].isdigit():
                    phone = phone + "5"
                eachchar.path = path + phone + ".wav"
                # print('eachchar.path路径:',eachchar.path)
                # print('path路径:',path)
                # print('phone路径:',phone)
                eachchar.eachphone.load(eachchar.path)

    output = simpleaudio.Audio()

    # Variable to track diphone index and processing char_index
    char_index = 0
    # Normal concatenation without smoother
    charlist = []
    for eachtoken in inputseq.tokens:
        for eachchar in eachtoken.chars:
            charlist.append(eachchar.char)

    for eachtoken in inputseq.tokens:
        for eachchar in eachtoken.chars:
            empty_spacing = simpleaudio.Audio(rate=16000)
            empty_spacing.create_noise(40, 0)

            temp_diphone = simpleaudio.Audio(rate=16000)
            temp_diphone.data = eachchar.eachphone.data
            if args.crossfade == False:
                output.data = np.concatenate((output.data, temp_diphone.data))
                output.data = np.concatenate((output.data, empty_spacing.data))
            # If smoother is used, implement Extension E - Smoother Concatenation
            else:
                adjust_level = 0.0
                # This loop rescales the 320 data points (10 msc) near the both edges of the diphone
                for index in range(0, 321):
                    if char_index > 0:
                        # Except the first diphone:
                        # Scale the data points in the initial 10 msc of current working diphone
                        # Order: Start scaling from the 1st point, 2nd, 3rd... througout the loop (From edge of diphone towards the middle)
                        temp_diphone.data[index] = temp_diphone.data[
                            index] * adjust_level / 320.0
                    if char_index < len(charlist) - 1:
                        # Except the last diphone:
                        # Scale the data points in the last 10 msc of of current working diphone
                        # Order: Start scaling from the last point, 2nd last, 3rd last... througout the loop (From edge of diphone towards the middle)
                        temp_diphone.data[-(index + 1)] = temp_diphone.data[-(
                            index + 1)] * adjust_level / 320.0
                    # Turn louder when moving inward in the next round of the loop
                    adjust_level += 1

                # After rescale all, seperate the whole diphone into two portions: (1) initial 10msc, and (2) everything after 10msc
                np.initial10msc = temp_diphone.data[:320]
                np.after10msc = temp_diphone.data[320:]

                # Combine diphone portions together in the output.data
                if char_index == 0:
                    # For the 1st diphone, concatenate the whole rocessed diphone data
                    output.data = np.concatenate(
                        (output.data, temp_diphone.data))
                else:
                    # For later diphones, addup/cross-fade the first 10 msc of the current diphone with last 10 msc of the previous diphone (which saved in the output.data in the previous round)
                    output.data[-320:] = output.data[-320:] + np.initial10msc
                    # Concatenate the remaining part of the processed diphone data
                    output.data = np.concatenate((output.data, np.after10msc))
            # Increase monitereing index
            char_index += 1

    # Step 5 - Further adjustment on overall volume to the final output (if the user use -v <0-100>)
    output = adjust_volume(volume=args.volume, object=output)
    # Todo:volume直接写成要传递的值

    # Step 6 - Save it to the target file (if the user use -o <args.outfile>)
    save(output_file=args.outfile, object=output)
    # Todo:output_file直接写成要传递的值
    save_pickle(output_file=args.outfile, object=output)
    # Todo:output_file直接写成要传递的值
    # Step 7 - Play the final sound output (if the user use -p)
    play_audio(play=args.play, object=output)
Exemple #14
0
 def __init__(self, wav_folder):
     # initialize Synth by creating a dictionary of the diphone wav files in the wav_folder
     self.diphones = {}
     self.out = simpleaudio.Audio()
     self.get_wavs(wav_folder)
Exemple #15
0
 def __init__(self, wav_folder):
     self.phones = {}
     self.vol = 0.2
     self.get_wavs(wav_folder)
     self.res_voice = simpleaudio.Audio()
Exemple #16
0
    signal_reverse = False
    if args.reverse == 'words':
        words_reverse = True
    elif args.reverse == 'phones':
        phones_reverse = True
    elif args.reverse == 'signal':
        signal_reverse = True

    if args.spell:
        phone_seq = utt.get_spell_diphone_seq(words_r=words_reverse, phones_r=phones_reverse)
    else:
        phone_seq = utt.get_diphone_seq(words_r=words_reverse, phones_r=phones_reverse)

    diphone_synth = Synth(wav_folder=args.diphones)

    output_filename = args.outfile if args.outfile else 'out_file.wav'

    diphone_synth.get_diphone_seq_concatenation(phone_seq, output_filename, signal_r=signal_reverse,
                                                emphasis_i=utt.emphasis_markup(), crossfade=args.crossfade)

    out = simpleaudio.Audio()
    out.load(output_filename)

    if args.volume:
        if args.volume < 0 or args.volume > 100:
            raise ValueError("Expected volume value between 0 and 100.")
        out.rescale(args.volume / 100)

    if args.play: out.play()