def get_local_shimmer(sound,
                      min_time=0.,
                      max_time=0.,
                      pitch_floor=75.,
                      pitch_ceiling=600.,
                      period_floor=0.0001,
                      period_ceiling=0.02,
                      max_period_factor=1.3,
                      max_amplitude_factor=1.6):
    """
    Function to calculate (local) shimmer from a periodic PointProcess.
    :param (parselmouth.Sound) sound: sound waveform
    :param (float) min_time: minimum time value considered for time range (t1, t2) (default: 0.)
    :param (float) max_time: maximum time value considered for time range (t1, t2) (default: 0.)
           NOTE: If max_time <= min_time, the entire time domain is considered
    :param (float) pitch_floor: minimum pitch (default: 75.)
    :param (float) pitch_ceiling: maximum pitch (default: 600.)
    :param (float) period_floor: the shortest possible interval that will be used in the computation
           of shimmer, in seconds (default: 0.0001)
    :param (float) period_ceiling: the longest possible interval that will be used in the
           computation of shimmer, in seconds (default: 0.02)
    :param (float) max_period_factor: the largest possible difference between consecutive intervals
           that will be used in the computation of shimmer (default: 1.3)
    :param (float) max_amplitude_factor: maximum amplitude factor for shimmer (default: 1.6)
    :return: value of (local) shimmer
    """
    # Create a PointProcess object
    point_process = call(sound, 'To PointProcess (periodic, cc)', pitch_floor,
                         pitch_ceiling)

    local_shimmer = call([sound, point_process], 'Get shimmer (local)',
                         min_time, max_time, period_floor, period_ceiling,
                         max_period_factor, max_amplitude_factor)

    return local_shimmer
Esempio n. 2
0
    def process(self):
        """cpp"""
        try:
            voice = self.args["voice"]
            pitch_ceiling = self.args["Pitch Ceiling"]
            pitch_floor = self.args["Pitch Floor"]
            spectrum = voice.to_spectrum()
            cepstrum = call(spectrum, "To PowerCepstrum")

            # Call the provided pitch bounds functions
            pitch_floor = self.args["Pitch Floor"]
            pitch_ceiling = self.args["Pitch Ceiling"]

            interpolation = self.args["interpolation"]
            tilt_line_qeufrency_lower_bound = self.args[
                "Tilt line qeufrency lower bound"]
            tilt_line_qeufrency_upper_bound = self.args[
                "Tilt line qeufrency upper bound"]
            linetype = self.args["Line type"]
            fitmethod = self.args["Fit method"]

            cpp = call(
                cepstrum,
                "Get peak prominence",
                pitch_floor,
                pitch_ceiling,
                interpolation,
                tilt_line_qeufrency_lower_bound,
                tilt_line_qeufrency_upper_bound,
                linetype,
                fitmethod,
            )
            return {"cpp": cpp}
        except:
            return {"cpp": "Measurement failed"}
Esempio n. 3
0
def extract_syllable_intervals(file_name):
    print("Extracting syllable intervals from '{}'...".format(file_name))

    # Use Praat script to extract syllables
    # For each file name, we first run the Praat script, passing the desired parameters
    # This script was slightly adapted, as it used to take a directory as argument and loop
    #         over the audio files in that directory but now only takes a single file name
    #         and executes the algorithm for that file
    # As described in the script file, these parameters are: 'Silence threshold (dB)',
    #         'Minimum dip between peaks (dB)', 'Minimum pause duration', and the filename
    objects = run_file('syllable_nuclei.praat', -25, 2, 0.3, file_name)

    # The script selects two objects at the end, the Sound object and the TextGrid
    # These two objects are returned in a list, and now we assign the second one to the variable 'textgrid'
    textgrid = objects[1]

    # Call the Praat command "Get number of points" to query the amount of points in the first tier
    n = call(textgrid, "Get number of points", 1)

    # Make a list that queries the time of the point in the TextGrid for all points 1 to n
    #         (through a Python 'list comprehension', in this case, but one could also repeatedly 'append')
    syllable_nuclei = [
        call(textgrid, "Get time of point", 1, i + 1) for i in range(n)
    ]

    # Use NumPy to calculate intervals between the syllable nuclei
    syllable_intervals = np.diff(syllable_nuclei)
    return syllable_intervals
Esempio n. 4
0
    def to_lpc(self,
               method:str,
               prediction_order:int=16,
               window_length:Real=0.025,
               time_step:Real=0.005,
               pre_emphasis_frequency:Real=50,
               **kwargs:Any) -> pm.Data:
        """

        Parameters
        ----------
        method: str,
        prediction_order: int 16,
        window_length:real number, default 0.025,
        time_step:real number, default 0.005,
        pre_emphasis_frequency:real number, default 50,
        kwargs: dict, optional,
            "tolerance1", "tolerance2" for `method` "marple", both default 1.0e-6
        """
        cmd = f"To LPC ({method})"
        if method in ["autocorrelation", "covariance", "burg"]:
            lpc = call(self, cmd, prediction_order, window_length, time_step, pre_emphasis_frequency)
        elif method == "marple":
            tolerance1 = kwargs.get("tolerance1", 1.0e-6)
            tolerance2 = kwargs.get("tolerance2", 1.0e-6)
            lpc = call(self, cmd, prediction_order, window_length, time_step, pre_emphasis_frequency, tolerance1, tolerance2)
        return lpc
Esempio n. 5
0
def resample(soundObj, target_sampling_rate, precision_ms=50):
    '''Resample soundObj with the target one
    Returns the updated soundObj (praat)
    '''
    raw_sampling_rate = call(soundObj, 'Get sampling frequency')
    soundObj = call(soundObj, 'Resample', target_sampling_rate, precision_ms)
    return soundObj
Esempio n. 6
0
    def relative_position(self, extremum, type, start, end):
        """
        Calculate the relative position of either a maximum or minimum value within a timespan delimited by start and end timestamps
        extremum: one of "maximum" and "minimum"
        type: one of "pitch" and "intensity"
        """

        base = extremum_at = None

        if type == "pitch":
            base = self.pitch_obj
        elif type == "intensity":
            base = self.int_obj

        if type == "pitch":
            extremum_at = praat.call(base, f"Get time of {extremum}", start,
                                     end, "Hertz", "None")
        elif type == "intensity":
            extremum_at = praat.call(base, f"Get time of {extremum}", start,
                                     end, "None")

        time_passed = extremum_at - start
        relative_pos = time_passed / (end - start)

        return relative_pos
Esempio n. 7
0
 def to_formant(self,
                method:str="burg",
                time_step:Optional[Real]=None,
                max_number_of_formants:Real=5.0,
                maximum_formant:Real=5500.0,
                window_length:Real=0.025,
                pre_emphasis_from:Real=50.0,
                number_of_std_dev:Real=1.5,
                maximum_number_of_iterations:Real=5,
                tolerance:Real=1.0e-6) -> pm.Formant:
     """
     
     Parameters
     ----------
     method: str, default "burg", can also be "sl", "keep all", "robust",
     time_step: real number, optional, units in (s),
     max_number_of_formants: real number, default 5.0,
     maximum_formant: real number, default 5500.0, units in (Hz),
     window_length: real number, default 0.025, units in (s),
     pre_emphasis_from: real number, default 50.0, units in (Hz),
     number_of_std_dev: real number, default 1.5,
     maximum_number_of_iterations: real number, default 5,
     tolerance: real number, default 1.0e-6
     """
     m = method.lower()
     if method == "burg":
         return self.to_formant_burg(time_step, max_number_of_formants, maximum_formant, window_length, pre_emphasis_from)
     elif method in ["sl", "split levinson", "split levinson (willems)"]:
         return call(self, "To Formant (sl)", time_step or 0.0, max_number_of_formants, maximum_formant, window_length, pre_emphasis_from)
     elif method == "keep all":
         return call(self, "To Formant (keep all)", time_step or 0.0, max_number_of_formants, maximum_formant, window_length, pre_emphasis_from)
     elif method == "robust":
         return call(self, "To Formant (robust)", time_step or 0.0, max_number_of_formants, maximum_formant, window_length, pre_emphasis_from, number_of_std_dev, maximum_number_of_iterations, tolerance)
def extractPitch(sound, pitchFloor, pitchCeiling, unit, interpolation):
    pitch = call(sound, "To Pitch", 0.0, pitchFloor, pitchCeiling)
    minPitch = call(pitch, "Get minimum", 0, 0, unit, interpolation)
    maxPitch = call(pitch, "Get maximum", 0, 0, unit, interpolation)
    meanPitch = call(pitch, "Get mean", 0, 0, unit)
    sdPitch = call(pitch, "Get standard deviation", 0, 0, unit)
    return minPitch, maxPitch, meanPitch, sdPitch
Esempio n. 9
0
    def get_excursion(self, level=""):
        """
        Extract the pitch excursion with normalization on either the "word" level or the intonation phrase ("ip") level
        """

        if level == "word":
            check_input_df(self.nuclei, ["word_start", "word_end", "f0_max"])

            timestamps_filtered = self.nuclei[
                (self.nuclei["word_start"].notna())
                & (self.nuclei["word_end"].notna())].copy()

            # Calculate 10th percentile of the pitch contour during nucleus
            timestamps_filtered["f0_q10"] = [
                praat.call(
                    self.pitch_obj,
                    "Get quantile",
                    row.word_start,
                    row.word_end,
                    0.1,
                    "Hertz",
                ) for row in timestamps_filtered.itertuples()
            ]

            norm_df = pd.merge(self.nuclei, timestamps_filtered, how="left")

        elif level == "ip":
            check_input_df(self.nuclei, ["ip_start", "ip_end", "f0_max"])

            nuclei_filtered = self.nuclei[(self.nuclei["ip_start"].notna()) & (
                self.nuclei["ip_end"].notna())].copy()

            timestamps_filtered = nuclei_filtered[["ip_start", "ip_end"
                                                   ]].drop_duplicates()

            # Calculate 10th percentile of the pitch contour during nucleus
            timestamps_filtered["f0_q10"] = [
                praat.call(
                    self.pitch_obj,
                    "Get quantile",
                    row.ip_start,
                    row.ip_end,
                    0.1,
                    "Hertz",
                ) for row in timestamps_filtered.itertuples()
            ]

            norm_df = pd.merge(self.nuclei,
                               timestamps_filtered,
                               on=["ip_start", "ip_end"],
                               how="left")

        else:
            raise ValueError("Argument 'level' must be one of ['word', 'ip']")

        # Calculate excursion: 12 * log2(F0_max/F0_10%)
        excursions = np.array(12 *
                              np.log2(norm_df["f0_max"] / norm_df["f0_q10"]))

        return excursions
def extractIntensity(sound, minPitch, timeStep, interpolation):
    intensity = call(sound, "To Intensity", minPitch, timeStep)
    minIntensity = call(intensity, "Get minimum", 0, 0, interpolation)
    maxIntensity = call(intensity, "Get maximum", 0, 0, interpolation)
    meanIntensity = call(intensity, "Get mean", 0, 0)
    sdIntensity = call(intensity, "Get standard deviation", 0, 0)
    return minIntensity, maxIntensity, meanIntensity, sdIntensity
def audio_to_textgrid(audio_no_annot_path, textgrid_path):
    """Generates .TextGrid files from audio using Praat.

    :param audio_no_annot_path: path with audio not annotated.
    :param textgrid_path: path with textgrid generated.
    :returns: None.
    """
    create_data_path(textgrid_path)

    audio_files = os.listdir(audio_no_annot_path)
    print("Procesing " + str(len(audio_files)) + " audio files with Praat, can take a while...")

    data_mod = round(len(audio_files)/10)
    index_mod = 1

    for audio in audio_files:

        sound = parselmouth.Sound(os.path.join(audio_no_annot_path, audio))

        # Take each audio file and convert to textGrid with praat
        noise_reduction = call(sound, "Remove noise", 0.0, 1.0, 0.025, 80, 10000, 40, 'Spectral subtraction') # 'silent', 'sounding'
        manipulation = call(noise_reduction, "To TextGrid (silences)", 100, 0.0, -65.0, 0.8, 0.2, '', 'sounding') # 'silent', 'sounding'   # 0.8, 0.2
                                                                                       # minimun silent and sound intervals
        text_audio = audio[:-4] + '.TextGrid'
        call(manipulation, "Save as text file", os.path.join(textgrid_path, text_audio))

        # Show progresion
        try:
            if index_mod % data_mod == 0:  # it means 10%, 20%, ...
                print(str(int(index_mod/data_mod * 10))+"% ",end="\r")
        except:
            print('',end="\r")
        #end try
        index_mod += 1
def main(wavscp, outdir, text):
    utt2text = dict()
    if not path.exists(outdir):
        mkdir(outdir)
    if text is not None:
        with open(text, 'r') as rf:
            lines = rf.readlines()
        for line in lines:
            uttid = line.split(' ')[0]
            assert uttid not in utt2text.keys(
            ), '[error]utterance name in text file should not be duplicated'
            utt2text[uttid] = ' '.join(line.split(' ')[1:]).strip()

    with open(wavscp, 'r') as rf:
        lines = rf.readlines()
    rule_ch_seq_space = re.compile(
        r'(?<=[\u4e00-\u9fa5])( +)(?=[\u4e00-\u9fa5])')
    for line in tqdm(lines):
        uttid = line.split(' ')[0]
        wav_path = line.split(' ')[1].strip()
        wav_name = path.splitext(path.basename(wav_path))[0]
        output_tgt = path.join(outdir, f'{wav_name}.TextGrid')

        wav_read = praat.call('Read from file', wav_path)
        tg_obj = praat.call(wav_read, 'To TextGrid', 'spk1', '')
        tgt_obj = tg_obj.to_tgt()
        if uttid in utt2text.keys():
            text_content = rule_ch_seq_space.sub('', utt2text[uttid])
            annot_text = tgt.core.Interval(tgt_obj.start_time,
                                           tgt_obj.end_time, text_content)
            tgt_obj.get_tier_by_name('spk1').add_annotation(annot_text)
        tgt.io.write_to_file(tgt_obj,
                             output_tgt,
                             format='long',
                             encoding='utf-8')
Esempio n. 13
0
    def get_all_features(self, f0min, f0max, unit):

        try:

            sound = parselmouth.Sound(self.voiceID) # read the sound
            pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
            Pitch = call(sound, "To Pitch", 0.0, f0min, f0max)
            #Vocal Report
            self.vocal_report = parselmouth.praat.call([sound, Pitch, pointProcess], "Voice report", 0, 0, 75, 600, 1.3, 1.6, 0.03, 0.45)

            #Pitch
            self.get_pitch_parameters(Pitch=Pitch, unit=unit)

            #Harmonicity
            Harmonicity = self.get_harmonicity_parameters(sound)

            #Jitter
            self.get_jitter_parameters(sound=sound, pointProcess=pointProcess)

            #Shimmer

            self.get_shimmer_parameters(sound=sound, pointProcess=pointProcess)

            #Pulse
            self.get_pulse_parameters()

            #Voicing
            self.get_voicing_parameters()

        except Exception as e:
            pass

        self.all_vocal_parameters = self.__dict__

        return self.all_vocal_parameters
Esempio n. 14
0
def get_pitch(sound, _mean = True, _stdev= False, _range = False):
    """
    Gets pitch for each audio frame of audio via parselmouth praat.
    Takes mean or range of the obtained ndarray.

    Parameters
    ----------
    sound:parselmouth object
        audio object
    _stdev:boolean
        True, if want to get standard deviation of pitch
    _range:boolean
        True, if want to get range of deviation of pitch        
        
    Returns
    -------
    float
        Mean pitch of the audio sample.

    Examples
    --------
    >>> get_stdev_energy(y)
    59.78
    """
    pitch = call(sound, "To Pitch", 0.0, 75, 300)
    if _mean:
        mean_pitch = call(pitch, "Get mean", 0, 0,'Hertz')
        return mean_pitch
    if _stdev:
        stdev_pitch = call(pitch, "Get standard deviation", 0 ,0, "Hertz")
        return stdev_pitch
    if _range:
        stdevPitch = call(pitch, "Get standard deviation", 0 ,0, "Hertz")
        range_pitch = 4* stdevPitch
        return range_pitch
Esempio n. 15
0
def measureFormants(sound):
    sound = parselmouth.Sound(sound)  # read the sound
    pointProcess = call(sound, "To PointProcess (periodic, cc)", 75, 500)
    pitch = call(sound, "To Pitch", 0.0, 75, 500)  # check pitch to set formant settings
    meanF0 = call(pitch, "Get mean", 0, 0, "Hertz")  # get mean pitch
    if meanF0 > 150:
        maxFormant = 5500
    else:
        maxFormant = 5000
    formants = call(sound, "To Formant (burg)", 0.0025, 5, maxFormant, 0.025, 50)
    numPoints = call(pointProcess, "Get number of points")

    f1_list = []
    f2_list = []
    f3_list = []
    f4_list = []

    # Measure formants only at glottal pulses
    for point in range(0, numPoints):
        point += 1
        t = call(pointProcess, "Get time from index", point)
        f1 = call(formants, "Get value at time", 1, t, 'Hertz', 'Linear')
        f2 = call(formants, "Get value at time", 2, t, 'Hertz', 'Linear')
        f3 = call(formants, "Get value at time", 3, t, 'Hertz', 'Linear')
        f4 = call(formants, "Get value at time", 4, t, 'Hertz', 'Linear')
        f1_list.append(f1)
        f2_list.append(f2)
        f3_list.append(f3)
        f4_list.append(f4)
        if isinstance(f1, float) == True:
            f1_list.append(f1)
        if isinstance(f2, float) == True:
            f2_list.append(f2)
        if isinstance(f3, float) == True:
            f3_list.append(f3)
        if isinstance(f4, float) == True:
            f4_list.append(f4)
    # calculate mean formants across pulses
    if len(f1_list) > 0:
        f1_mean = sum(f1_list) / len(f1_list)
    else:
        f1_mean = "N/A"
    if len(f2_list) > 0:
        f2_mean = sum(f2_list) / len(f2_list)
    else:
        f2_mean = "N/A"
    if len(f3_list) > 0:
        f3_mean = sum(f3_list) / len(f3_list)
    else:
        f3_mean = "N/A"
    if len(f4_list) > 0:
        f4_mean = sum(f4_list) / len(f4_list)
    else:
        f4_mean = "N/A"

    # calculate median formants across pulses, this is what is used in all subsequent calcualtions
    # you can use mean if you want, just edit the code in the boxes below to replace median with mean

    return f1_mean, f2_mean, f3_mean, f4_mean
Esempio n. 16
0
 def measurePitch(voiceID, f0min, f0max, unit):
     sound = parselmouth.Sound(voiceID)  # read the sound
     pitch = call(sound, "To Pitch", 0.0, f0min,
                  f0max)  #create a praat pitch object
     meanfreq = call(pitch, "Get mean", 0, 0, unit)  # get mean pitch
     sd = call(pitch, "Get standard deviation", 0, 0,
               unit)  # get standard deviation
     return meanfreq, sd
def extractJitterAndShimmer(sound, pitchFloor, pitchCeiling):
    pitch = call(sound, "To Pitch", 0.0, pitchFloor, pitchCeiling)
    pointProcess = call(pitch, "To PointProcess")
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02,
                       1.3)
    localShimmer = call([sound, pointProcess], "Get shimmer (local)", 0, 0,
                        0.0001, 0.02, 1.3, 1.6)
    return localJitter, localShimmer
Esempio n. 18
0
    def get_jitter_parameters(self, sound, pointProcess):

        self.jitter_local = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
        self.jitter_absolute_local = call(pointProcess, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3)
        self.jitter_rap = call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
        self.jitter_ppq5 = call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3)
        self.jitter_ddp = call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)

        return None
Esempio n. 19
0
def manipulateFormants(wav_file, gender, factor):
    sound = parselmouth.Sound(wav_file)
    if gender == "female":
        manipulated_sound = call(sound, "Change gender", 60, 300, factor, 0, 1,
                                 1)
    elif gender == "male":
        manipulated_sound = call(sound, "Change gender", 100, 500, factor, 0,
                                 1, 1)
    return manipulated_sound
Esempio n. 20
0
def validate(intensity_obj, peak_cands):
    """ This function validates the n potential peaks (i.e. potential syllable nuclei) that were found by checking whether they are:
        - followed by a min. 2dB dip (first peak)
        - surrounded by min. 2dB dip (second to penultimate peak) -- DISABLED
        - min. dB dip on any side (second to penultimate peak)
        - preceded by a min. 2dB dip (last peak)
    """

    valid_peaks = []

    for i in range(len(peak_cands) - 1):
        peak = peak_cands[i]

        if i == 0:
            next_peak = peak_cands[i + 1]
            next_intensity_dip = praat.call(intensity_obj, "Get minimum",
                                            peak[0], next_peak[0], "None")
            intensity_diff = abs(peak[1] - next_intensity_dip)

            if intensity_diff > MIN_DIP_BETW_PEAKS:
                valid_peaks.append(peak)

        elif 0 < i < len(peak_cands) - 1:
            next_peak = peak_cands[i + 1]
            next_intensity_dip = praat.call(intensity_obj, "Get minimum",
                                            peak[0], next_peak[0], "None")
            intensity_diff = abs(peak[1] - next_intensity_dip)

            if intensity_diff > MIN_DIP_BETW_PEAKS:
                # DISABLED: Possibility to require a dip before the nucleus as well.
                """prev_peak = peak_cands[i - 1]
                prev_intensity_dip = praat.call(
                    intensity_obj, "Get minimum", prev_peak[0], peak[0], "None"
                )
                intensity_diff = abs(peak[1] - prev_intensity_dip)

                if intensity_diff > MIN_DIP_BETW_PEAKS:"""
                valid_peaks.append(peak)
            else:
                prev_peak = peak_cands[i - 1]
                prev_intensity_dip = praat.call(intensity_obj, "Get minimum",
                                                prev_peak[0], peak[0], "None")
                intensity_diff = abs(peak[1] - prev_intensity_dip)

                if intensity_diff > MIN_DIP_BETW_PEAKS:
                    valid_peaks.append(peak)
        else:
            prev_peak = peak_cands[i - 1]
            prev_intensity_dip = praat.call(intensity_obj, "Get minimum",
                                            prev_peak[0], peak[0], "None")
            intensity_diff = abs(peak[1] - prev_intensity_dip)

            if intensity_diff > MIN_DIP_BETW_PEAKS:
                valid_peaks.append(peak)

    return valid_peaks
Esempio n. 21
0
 def get_formants(phones):
     wav_file = phones.loc[0, "wav"]
     snd = parselmouth.Sound(wav_file)
     formants = call(snd, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50)
     for i in range(1, 5):
         phones["f{}".format(i)] = phones.apply(lambda r: np.nan if r["phone"] not in TimitData.VOWELS
                 else np.array([call(formants, "Get value at time", i, t, 'Hertz', 'Linear') for t in
                           TimitData.get_formant_times(r)]),
                 axis=1)
     return phones
Esempio n. 22
0
def get_avg_pitch(infile):
    sound = parselmouth.Sound(infile)
    Audio(data=sound.values, rate=sound.sampling_frequency)
    manipulation = call(sound, "To Manipulation", 0.001, 75, 600)
    pitch_tier = call(manipulation, "Extract pitch tier")
    pitch = sound.to_pitch()
    pitch_values = pitch.selected_array['frequency']
    # remove values lower than 65Hz (that's about the lowest freq for male voice)
    pitch_values = list(filter(lambda bigval: bigval >= 65, pitch_values))
    return np.mean(pitch_values)
Esempio n. 23
0
def save_pitch_and_pulse(sound):
    manipulation = call(sound, "To Manipulation", 0.01, 75, 600)

    # Save pitch data
    pitch_tier = call(manipulation, "Extract pitch tier")
    pitch_tier_loc = '{}/vocals.PitchTier'.format(output_folder)
    pitch_tier.save_as_text_file(pitch_tier_loc)

    # Save pulse data
    pulse = call(manipulation, "Extract pulses")
    pulse_loc = '{}/vocals.Pulse'.format(output_folder)
    pulse.save_as_text_file(pulse_loc)
Esempio n. 24
0
def measure_pitch(
    voice,
    floor=50,
    ceiling=500,
    method="ac",
    time_step=0,
    max_number_of_candidates=15,
    silence_threshold=0.03,
    voicing_threshold=0.45,
    octave_cost=0.01,
    octave_jump_cost=0.35,
    voiced_unvoiced_cost=0.14,
    unit="Hertz",
    very_accurate="no",
):
    #floor, ceiling = pitch_bounds(voice)
    """
    Args:
        voice:
        floor:
        ceiling:
        method:
        time_step:
        max_number_of_candidates:
        silence_threshold:
        voicing_threshold:
        octave_cost:
        octave_jump_cost:
        voiced_unvoiced_cost:
        unit:
        very_accurate:
    """
    pitch: object = call(
        voice,
        method,
        time_step,
        floor,
        max_number_of_candidates,
        very_accurate,
        silence_threshold,
        voicing_threshold,
        octave_cost,
        octave_jump_cost,
        voiced_unvoiced_cost,
        ceiling,
    )
    mean_f0: float = call(pitch, "Get mean", 0, 0, unit)
    stdev_f0: float = call(pitch, "Get standard deviation", 0, 0,
                           unit)  # get standard deviation
    min_f0: float = call(pitch, "Get minimum", 0, 0, unit, "Parabolic")
    max_f0: float = call(pitch, "Get maximum", 0, 0, unit, "Parabolic")

    return pitch, mean_f0, stdev_f0, min_f0, max_f0
Esempio n. 25
0
    def get_pitch_parameters(self, Pitch, unit):

        self.pitch_mean = call(Pitch, "Get mean", 0, 0, unit)  # get mean pitch
        self.pitch_median = float(re.findall("Median pitch: ([0-9]*\.[0-9]*)",
                                             self.vocal_report)[0])
        self.pitch_std_dev = call(Pitch, "Get standard deviation", 0, 0, unit)  # get standard deviation
        self.pitch_minimum = float(re.findall("Minimum pitch: ([0-9]*\.[0-9]*)",
                                              self.vocal_report)[0])
        self.pitch_maximum = float(re.findall("Maximum pitch: ([0-9]*\.[0-9]*)",
                                              self.vocal_report)[0])

        return None
Esempio n. 26
0
    def process(self):

        sound = self.args["voice"]
        formant_factor = self.args["formant_factor"]
        pitch_factor = self.args["pitch_factor"]
        duration = sound.get_total_duration()
        file_path = self.args["file_path"]
        pitch_range_factor = self.args["pitch_range_factor"]
        duration_factor = 1
        pitch_range_factor = 1
        f0min, f0max = self.pitch_bounds(sound)
        pitch = sound.to_pitch()

        print(f0min, f0max)
        median_pitch = call(pitch, "Get quantile", sound.xmin, sound.xmax, 0.5,
                            "Hertz")
        print(
            f"mean pitch {call(pitch, 'Get mean', sound.xmin, sound.xmax, 'Hertz' )}"
        )
        if formant_factor > 1:
            formant_factor = 1 / formant_factor
        if pitch_factor > 1:
            pitch_factor = 1 / pitch_factor

        print(median_pitch)
        print(pitch_factor)

        new_pitch_median = pitch_factor * median_pitch

        print(new_pitch_median)

        output_file_name = file_path.split("/")[-1].split(".wav")[0]
        output_file_name = (
            f"{output_file_name}_raise_pitch_and_formants_{pitch_factor}_{formant_factor}"
        )

        manipulated_sound = call(
            sound,
            "Change gender",
            f0min,
            f0max,
            formant_factor,
            new_pitch_median,
            pitch_range_factor,
            duration_factor,
        )

        if self.args["normalize amplitude"]:
            manipulated_sound.scale_intensity(70)

        manipulated_sound.name = output_file_name

        return {"voice": manipulated_sound}
Esempio n. 27
0
def get_silence_threshold(sound, lower_quantile):
    """ Calculates silence threshold per sound interval for chunking.

    :param sound: A parselmouth.praat Sound object
    :param lower_quantile: A quantile value (0-1; e.g., 0.5 = median)

    :return sil_threshold: Threshold value to be used for 'To TextGrid (silences)'
    """
    soundint = sound.to_intensity()
    max_intensity = call(soundint, 'Get quantile', 0.0, 0.0, 1)
    sil_intensity = call(soundint, 'Get quantile', 0.0, 0.0, lower_quantile)
    return sil_intensity - max_intensity
Esempio n. 28
0
def chunk_sound (sound, sil_duration, threshold_quantile):
    sil_threshold = get_silence_threshold(sound, threshold_quantile)
    textgrid = detect_silences(sound, sil_threshold, sil_duration)

    n_ints = call(textgrid, 'Count intervals where',
                        1, 'is equal to', 'speech')

    extracted_sounds = call([sound, textgrid],
                            'Extract intervals where',
                            1, True, 'is equal to', 'speech')

    return textgrid, extracted_sounds, n_ints
Esempio n. 29
0
 def measure_jitter(self):
     self.point_process: object = call(self.sound,
                                       "To PointProcess (periodic, cc)", 60,
                                       600)
     self.local_jitter_teva: float = call(
         self.point_process,
         "Get jitter (local)",
         self.start_time,
         self.end_time,
         self.shortest_period,
         self.longest_period,
         self.maximum_period_factor,
     )
Esempio n. 30
0
 def measure_mvd(self):
     #  Maximum Voicing Duration (MVD)
     textgrid = call(self.point_process, "To TextGrid (vuv)", 0.2, 0.1)
     maximum_voicing_durations = []
     number_of_intervals = call(textgrid, "Get number of intervals", 1)
     for interval, number in enumerate(range(number_of_intervals), 1):
         label = call(textgrid, "Get label of interval", 1, interval)
         if "v" in label.lower():
             start = call(textgrid, "Get start point", 1, interval)
             end = call(textgrid, "Get end point", 1, interval)
             maximum_voicing_duration = end - start
             maximum_voicing_durations.append(maximum_voicing_duration)
     self.mvd = max(maximum_voicing_durations)