def get_local_shimmer(sound, min_time=0., max_time=0., pitch_floor=75., pitch_ceiling=600., period_floor=0.0001, period_ceiling=0.02, max_period_factor=1.3, max_amplitude_factor=1.6): """ Function to calculate (local) shimmer from a periodic PointProcess. :param (parselmouth.Sound) sound: sound waveform :param (float) min_time: minimum time value considered for time range (t1, t2) (default: 0.) :param (float) max_time: maximum time value considered for time range (t1, t2) (default: 0.) NOTE: If max_time <= min_time, the entire time domain is considered :param (float) pitch_floor: minimum pitch (default: 75.) :param (float) pitch_ceiling: maximum pitch (default: 600.) :param (float) period_floor: the shortest possible interval that will be used in the computation of shimmer, in seconds (default: 0.0001) :param (float) period_ceiling: the longest possible interval that will be used in the computation of shimmer, in seconds (default: 0.02) :param (float) max_period_factor: the largest possible difference between consecutive intervals that will be used in the computation of shimmer (default: 1.3) :param (float) max_amplitude_factor: maximum amplitude factor for shimmer (default: 1.6) :return: value of (local) shimmer """ # Create a PointProcess object point_process = call(sound, 'To PointProcess (periodic, cc)', pitch_floor, pitch_ceiling) local_shimmer = call([sound, point_process], 'Get shimmer (local)', min_time, max_time, period_floor, period_ceiling, max_period_factor, max_amplitude_factor) return local_shimmer
def process(self): """cpp""" try: voice = self.args["voice"] pitch_ceiling = self.args["Pitch Ceiling"] pitch_floor = self.args["Pitch Floor"] spectrum = voice.to_spectrum() cepstrum = call(spectrum, "To PowerCepstrum") # Call the provided pitch bounds functions pitch_floor = self.args["Pitch Floor"] pitch_ceiling = self.args["Pitch Ceiling"] interpolation = self.args["interpolation"] tilt_line_qeufrency_lower_bound = self.args[ "Tilt line qeufrency lower bound"] tilt_line_qeufrency_upper_bound = self.args[ "Tilt line qeufrency upper bound"] linetype = self.args["Line type"] fitmethod = self.args["Fit method"] cpp = call( cepstrum, "Get peak prominence", pitch_floor, pitch_ceiling, interpolation, tilt_line_qeufrency_lower_bound, tilt_line_qeufrency_upper_bound, linetype, fitmethod, ) return {"cpp": cpp} except: return {"cpp": "Measurement failed"}
def extract_syllable_intervals(file_name): print("Extracting syllable intervals from '{}'...".format(file_name)) # Use Praat script to extract syllables # For each file name, we first run the Praat script, passing the desired parameters # This script was slightly adapted, as it used to take a directory as argument and loop # over the audio files in that directory but now only takes a single file name # and executes the algorithm for that file # As described in the script file, these parameters are: 'Silence threshold (dB)', # 'Minimum dip between peaks (dB)', 'Minimum pause duration', and the filename objects = run_file('syllable_nuclei.praat', -25, 2, 0.3, file_name) # The script selects two objects at the end, the Sound object and the TextGrid # These two objects are returned in a list, and now we assign the second one to the variable 'textgrid' textgrid = objects[1] # Call the Praat command "Get number of points" to query the amount of points in the first tier n = call(textgrid, "Get number of points", 1) # Make a list that queries the time of the point in the TextGrid for all points 1 to n # (through a Python 'list comprehension', in this case, but one could also repeatedly 'append') syllable_nuclei = [ call(textgrid, "Get time of point", 1, i + 1) for i in range(n) ] # Use NumPy to calculate intervals between the syllable nuclei syllable_intervals = np.diff(syllable_nuclei) return syllable_intervals
def to_lpc(self, method:str, prediction_order:int=16, window_length:Real=0.025, time_step:Real=0.005, pre_emphasis_frequency:Real=50, **kwargs:Any) -> pm.Data: """ Parameters ---------- method: str, prediction_order: int 16, window_length:real number, default 0.025, time_step:real number, default 0.005, pre_emphasis_frequency:real number, default 50, kwargs: dict, optional, "tolerance1", "tolerance2" for `method` "marple", both default 1.0e-6 """ cmd = f"To LPC ({method})" if method in ["autocorrelation", "covariance", "burg"]: lpc = call(self, cmd, prediction_order, window_length, time_step, pre_emphasis_frequency) elif method == "marple": tolerance1 = kwargs.get("tolerance1", 1.0e-6) tolerance2 = kwargs.get("tolerance2", 1.0e-6) lpc = call(self, cmd, prediction_order, window_length, time_step, pre_emphasis_frequency, tolerance1, tolerance2) return lpc
def resample(soundObj, target_sampling_rate, precision_ms=50): '''Resample soundObj with the target one Returns the updated soundObj (praat) ''' raw_sampling_rate = call(soundObj, 'Get sampling frequency') soundObj = call(soundObj, 'Resample', target_sampling_rate, precision_ms) return soundObj
def relative_position(self, extremum, type, start, end): """ Calculate the relative position of either a maximum or minimum value within a timespan delimited by start and end timestamps extremum: one of "maximum" and "minimum" type: one of "pitch" and "intensity" """ base = extremum_at = None if type == "pitch": base = self.pitch_obj elif type == "intensity": base = self.int_obj if type == "pitch": extremum_at = praat.call(base, f"Get time of {extremum}", start, end, "Hertz", "None") elif type == "intensity": extremum_at = praat.call(base, f"Get time of {extremum}", start, end, "None") time_passed = extremum_at - start relative_pos = time_passed / (end - start) return relative_pos
def to_formant(self, method:str="burg", time_step:Optional[Real]=None, max_number_of_formants:Real=5.0, maximum_formant:Real=5500.0, window_length:Real=0.025, pre_emphasis_from:Real=50.0, number_of_std_dev:Real=1.5, maximum_number_of_iterations:Real=5, tolerance:Real=1.0e-6) -> pm.Formant: """ Parameters ---------- method: str, default "burg", can also be "sl", "keep all", "robust", time_step: real number, optional, units in (s), max_number_of_formants: real number, default 5.0, maximum_formant: real number, default 5500.0, units in (Hz), window_length: real number, default 0.025, units in (s), pre_emphasis_from: real number, default 50.0, units in (Hz), number_of_std_dev: real number, default 1.5, maximum_number_of_iterations: real number, default 5, tolerance: real number, default 1.0e-6 """ m = method.lower() if method == "burg": return self.to_formant_burg(time_step, max_number_of_formants, maximum_formant, window_length, pre_emphasis_from) elif method in ["sl", "split levinson", "split levinson (willems)"]: return call(self, "To Formant (sl)", time_step or 0.0, max_number_of_formants, maximum_formant, window_length, pre_emphasis_from) elif method == "keep all": return call(self, "To Formant (keep all)", time_step or 0.0, max_number_of_formants, maximum_formant, window_length, pre_emphasis_from) elif method == "robust": return call(self, "To Formant (robust)", time_step or 0.0, max_number_of_formants, maximum_formant, window_length, pre_emphasis_from, number_of_std_dev, maximum_number_of_iterations, tolerance)
def extractPitch(sound, pitchFloor, pitchCeiling, unit, interpolation): pitch = call(sound, "To Pitch", 0.0, pitchFloor, pitchCeiling) minPitch = call(pitch, "Get minimum", 0, 0, unit, interpolation) maxPitch = call(pitch, "Get maximum", 0, 0, unit, interpolation) meanPitch = call(pitch, "Get mean", 0, 0, unit) sdPitch = call(pitch, "Get standard deviation", 0, 0, unit) return minPitch, maxPitch, meanPitch, sdPitch
def get_excursion(self, level=""): """ Extract the pitch excursion with normalization on either the "word" level or the intonation phrase ("ip") level """ if level == "word": check_input_df(self.nuclei, ["word_start", "word_end", "f0_max"]) timestamps_filtered = self.nuclei[ (self.nuclei["word_start"].notna()) & (self.nuclei["word_end"].notna())].copy() # Calculate 10th percentile of the pitch contour during nucleus timestamps_filtered["f0_q10"] = [ praat.call( self.pitch_obj, "Get quantile", row.word_start, row.word_end, 0.1, "Hertz", ) for row in timestamps_filtered.itertuples() ] norm_df = pd.merge(self.nuclei, timestamps_filtered, how="left") elif level == "ip": check_input_df(self.nuclei, ["ip_start", "ip_end", "f0_max"]) nuclei_filtered = self.nuclei[(self.nuclei["ip_start"].notna()) & ( self.nuclei["ip_end"].notna())].copy() timestamps_filtered = nuclei_filtered[["ip_start", "ip_end" ]].drop_duplicates() # Calculate 10th percentile of the pitch contour during nucleus timestamps_filtered["f0_q10"] = [ praat.call( self.pitch_obj, "Get quantile", row.ip_start, row.ip_end, 0.1, "Hertz", ) for row in timestamps_filtered.itertuples() ] norm_df = pd.merge(self.nuclei, timestamps_filtered, on=["ip_start", "ip_end"], how="left") else: raise ValueError("Argument 'level' must be one of ['word', 'ip']") # Calculate excursion: 12 * log2(F0_max/F0_10%) excursions = np.array(12 * np.log2(norm_df["f0_max"] / norm_df["f0_q10"])) return excursions
def extractIntensity(sound, minPitch, timeStep, interpolation): intensity = call(sound, "To Intensity", minPitch, timeStep) minIntensity = call(intensity, "Get minimum", 0, 0, interpolation) maxIntensity = call(intensity, "Get maximum", 0, 0, interpolation) meanIntensity = call(intensity, "Get mean", 0, 0) sdIntensity = call(intensity, "Get standard deviation", 0, 0) return minIntensity, maxIntensity, meanIntensity, sdIntensity
def audio_to_textgrid(audio_no_annot_path, textgrid_path): """Generates .TextGrid files from audio using Praat. :param audio_no_annot_path: path with audio not annotated. :param textgrid_path: path with textgrid generated. :returns: None. """ create_data_path(textgrid_path) audio_files = os.listdir(audio_no_annot_path) print("Procesing " + str(len(audio_files)) + " audio files with Praat, can take a while...") data_mod = round(len(audio_files)/10) index_mod = 1 for audio in audio_files: sound = parselmouth.Sound(os.path.join(audio_no_annot_path, audio)) # Take each audio file and convert to textGrid with praat noise_reduction = call(sound, "Remove noise", 0.0, 1.0, 0.025, 80, 10000, 40, 'Spectral subtraction') # 'silent', 'sounding' manipulation = call(noise_reduction, "To TextGrid (silences)", 100, 0.0, -65.0, 0.8, 0.2, '', 'sounding') # 'silent', 'sounding' # 0.8, 0.2 # minimun silent and sound intervals text_audio = audio[:-4] + '.TextGrid' call(manipulation, "Save as text file", os.path.join(textgrid_path, text_audio)) # Show progresion try: if index_mod % data_mod == 0: # it means 10%, 20%, ... print(str(int(index_mod/data_mod * 10))+"% ",end="\r") except: print('',end="\r") #end try index_mod += 1
def main(wavscp, outdir, text): utt2text = dict() if not path.exists(outdir): mkdir(outdir) if text is not None: with open(text, 'r') as rf: lines = rf.readlines() for line in lines: uttid = line.split(' ')[0] assert uttid not in utt2text.keys( ), '[error]utterance name in text file should not be duplicated' utt2text[uttid] = ' '.join(line.split(' ')[1:]).strip() with open(wavscp, 'r') as rf: lines = rf.readlines() rule_ch_seq_space = re.compile( r'(?<=[\u4e00-\u9fa5])( +)(?=[\u4e00-\u9fa5])') for line in tqdm(lines): uttid = line.split(' ')[0] wav_path = line.split(' ')[1].strip() wav_name = path.splitext(path.basename(wav_path))[0] output_tgt = path.join(outdir, f'{wav_name}.TextGrid') wav_read = praat.call('Read from file', wav_path) tg_obj = praat.call(wav_read, 'To TextGrid', 'spk1', '') tgt_obj = tg_obj.to_tgt() if uttid in utt2text.keys(): text_content = rule_ch_seq_space.sub('', utt2text[uttid]) annot_text = tgt.core.Interval(tgt_obj.start_time, tgt_obj.end_time, text_content) tgt_obj.get_tier_by_name('spk1').add_annotation(annot_text) tgt.io.write_to_file(tgt_obj, output_tgt, format='long', encoding='utf-8')
def get_all_features(self, f0min, f0max, unit): try: sound = parselmouth.Sound(self.voiceID) # read the sound pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max) Pitch = call(sound, "To Pitch", 0.0, f0min, f0max) #Vocal Report self.vocal_report = parselmouth.praat.call([sound, Pitch, pointProcess], "Voice report", 0, 0, 75, 600, 1.3, 1.6, 0.03, 0.45) #Pitch self.get_pitch_parameters(Pitch=Pitch, unit=unit) #Harmonicity Harmonicity = self.get_harmonicity_parameters(sound) #Jitter self.get_jitter_parameters(sound=sound, pointProcess=pointProcess) #Shimmer self.get_shimmer_parameters(sound=sound, pointProcess=pointProcess) #Pulse self.get_pulse_parameters() #Voicing self.get_voicing_parameters() except Exception as e: pass self.all_vocal_parameters = self.__dict__ return self.all_vocal_parameters
def get_pitch(sound, _mean = True, _stdev= False, _range = False): """ Gets pitch for each audio frame of audio via parselmouth praat. Takes mean or range of the obtained ndarray. Parameters ---------- sound:parselmouth object audio object _stdev:boolean True, if want to get standard deviation of pitch _range:boolean True, if want to get range of deviation of pitch Returns ------- float Mean pitch of the audio sample. Examples -------- >>> get_stdev_energy(y) 59.78 """ pitch = call(sound, "To Pitch", 0.0, 75, 300) if _mean: mean_pitch = call(pitch, "Get mean", 0, 0,'Hertz') return mean_pitch if _stdev: stdev_pitch = call(pitch, "Get standard deviation", 0 ,0, "Hertz") return stdev_pitch if _range: stdevPitch = call(pitch, "Get standard deviation", 0 ,0, "Hertz") range_pitch = 4* stdevPitch return range_pitch
def measureFormants(sound): sound = parselmouth.Sound(sound) # read the sound pointProcess = call(sound, "To PointProcess (periodic, cc)", 75, 500) pitch = call(sound, "To Pitch", 0.0, 75, 500) # check pitch to set formant settings meanF0 = call(pitch, "Get mean", 0, 0, "Hertz") # get mean pitch if meanF0 > 150: maxFormant = 5500 else: maxFormant = 5000 formants = call(sound, "To Formant (burg)", 0.0025, 5, maxFormant, 0.025, 50) numPoints = call(pointProcess, "Get number of points") f1_list = [] f2_list = [] f3_list = [] f4_list = [] # Measure formants only at glottal pulses for point in range(0, numPoints): point += 1 t = call(pointProcess, "Get time from index", point) f1 = call(formants, "Get value at time", 1, t, 'Hertz', 'Linear') f2 = call(formants, "Get value at time", 2, t, 'Hertz', 'Linear') f3 = call(formants, "Get value at time", 3, t, 'Hertz', 'Linear') f4 = call(formants, "Get value at time", 4, t, 'Hertz', 'Linear') f1_list.append(f1) f2_list.append(f2) f3_list.append(f3) f4_list.append(f4) if isinstance(f1, float) == True: f1_list.append(f1) if isinstance(f2, float) == True: f2_list.append(f2) if isinstance(f3, float) == True: f3_list.append(f3) if isinstance(f4, float) == True: f4_list.append(f4) # calculate mean formants across pulses if len(f1_list) > 0: f1_mean = sum(f1_list) / len(f1_list) else: f1_mean = "N/A" if len(f2_list) > 0: f2_mean = sum(f2_list) / len(f2_list) else: f2_mean = "N/A" if len(f3_list) > 0: f3_mean = sum(f3_list) / len(f3_list) else: f3_mean = "N/A" if len(f4_list) > 0: f4_mean = sum(f4_list) / len(f4_list) else: f4_mean = "N/A" # calculate median formants across pulses, this is what is used in all subsequent calcualtions # you can use mean if you want, just edit the code in the boxes below to replace median with mean return f1_mean, f2_mean, f3_mean, f4_mean
def measurePitch(voiceID, f0min, f0max, unit): sound = parselmouth.Sound(voiceID) # read the sound pitch = call(sound, "To Pitch", 0.0, f0min, f0max) #create a praat pitch object meanfreq = call(pitch, "Get mean", 0, 0, unit) # get mean pitch sd = call(pitch, "Get standard deviation", 0, 0, unit) # get standard deviation return meanfreq, sd
def extractJitterAndShimmer(sound, pitchFloor, pitchCeiling): pitch = call(sound, "To Pitch", 0.0, pitchFloor, pitchCeiling) pointProcess = call(pitch, "To PointProcess") localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3) localShimmer = call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6) return localJitter, localShimmer
def get_jitter_parameters(self, sound, pointProcess): self.jitter_local = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3) self.jitter_absolute_local = call(pointProcess, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3) self.jitter_rap = call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3) self.jitter_ppq5 = call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3) self.jitter_ddp = call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3) return None
def manipulateFormants(wav_file, gender, factor): sound = parselmouth.Sound(wav_file) if gender == "female": manipulated_sound = call(sound, "Change gender", 60, 300, factor, 0, 1, 1) elif gender == "male": manipulated_sound = call(sound, "Change gender", 100, 500, factor, 0, 1, 1) return manipulated_sound
def validate(intensity_obj, peak_cands): """ This function validates the n potential peaks (i.e. potential syllable nuclei) that were found by checking whether they are: - followed by a min. 2dB dip (first peak) - surrounded by min. 2dB dip (second to penultimate peak) -- DISABLED - min. dB dip on any side (second to penultimate peak) - preceded by a min. 2dB dip (last peak) """ valid_peaks = [] for i in range(len(peak_cands) - 1): peak = peak_cands[i] if i == 0: next_peak = peak_cands[i + 1] next_intensity_dip = praat.call(intensity_obj, "Get minimum", peak[0], next_peak[0], "None") intensity_diff = abs(peak[1] - next_intensity_dip) if intensity_diff > MIN_DIP_BETW_PEAKS: valid_peaks.append(peak) elif 0 < i < len(peak_cands) - 1: next_peak = peak_cands[i + 1] next_intensity_dip = praat.call(intensity_obj, "Get minimum", peak[0], next_peak[0], "None") intensity_diff = abs(peak[1] - next_intensity_dip) if intensity_diff > MIN_DIP_BETW_PEAKS: # DISABLED: Possibility to require a dip before the nucleus as well. """prev_peak = peak_cands[i - 1] prev_intensity_dip = praat.call( intensity_obj, "Get minimum", prev_peak[0], peak[0], "None" ) intensity_diff = abs(peak[1] - prev_intensity_dip) if intensity_diff > MIN_DIP_BETW_PEAKS:""" valid_peaks.append(peak) else: prev_peak = peak_cands[i - 1] prev_intensity_dip = praat.call(intensity_obj, "Get minimum", prev_peak[0], peak[0], "None") intensity_diff = abs(peak[1] - prev_intensity_dip) if intensity_diff > MIN_DIP_BETW_PEAKS: valid_peaks.append(peak) else: prev_peak = peak_cands[i - 1] prev_intensity_dip = praat.call(intensity_obj, "Get minimum", prev_peak[0], peak[0], "None") intensity_diff = abs(peak[1] - prev_intensity_dip) if intensity_diff > MIN_DIP_BETW_PEAKS: valid_peaks.append(peak) return valid_peaks
def get_formants(phones): wav_file = phones.loc[0, "wav"] snd = parselmouth.Sound(wav_file) formants = call(snd, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50) for i in range(1, 5): phones["f{}".format(i)] = phones.apply(lambda r: np.nan if r["phone"] not in TimitData.VOWELS else np.array([call(formants, "Get value at time", i, t, 'Hertz', 'Linear') for t in TimitData.get_formant_times(r)]), axis=1) return phones
def get_avg_pitch(infile): sound = parselmouth.Sound(infile) Audio(data=sound.values, rate=sound.sampling_frequency) manipulation = call(sound, "To Manipulation", 0.001, 75, 600) pitch_tier = call(manipulation, "Extract pitch tier") pitch = sound.to_pitch() pitch_values = pitch.selected_array['frequency'] # remove values lower than 65Hz (that's about the lowest freq for male voice) pitch_values = list(filter(lambda bigval: bigval >= 65, pitch_values)) return np.mean(pitch_values)
def save_pitch_and_pulse(sound): manipulation = call(sound, "To Manipulation", 0.01, 75, 600) # Save pitch data pitch_tier = call(manipulation, "Extract pitch tier") pitch_tier_loc = '{}/vocals.PitchTier'.format(output_folder) pitch_tier.save_as_text_file(pitch_tier_loc) # Save pulse data pulse = call(manipulation, "Extract pulses") pulse_loc = '{}/vocals.Pulse'.format(output_folder) pulse.save_as_text_file(pulse_loc)
def measure_pitch( voice, floor=50, ceiling=500, method="ac", time_step=0, max_number_of_candidates=15, silence_threshold=0.03, voicing_threshold=0.45, octave_cost=0.01, octave_jump_cost=0.35, voiced_unvoiced_cost=0.14, unit="Hertz", very_accurate="no", ): #floor, ceiling = pitch_bounds(voice) """ Args: voice: floor: ceiling: method: time_step: max_number_of_candidates: silence_threshold: voicing_threshold: octave_cost: octave_jump_cost: voiced_unvoiced_cost: unit: very_accurate: """ pitch: object = call( voice, method, time_step, floor, max_number_of_candidates, very_accurate, silence_threshold, voicing_threshold, octave_cost, octave_jump_cost, voiced_unvoiced_cost, ceiling, ) mean_f0: float = call(pitch, "Get mean", 0, 0, unit) stdev_f0: float = call(pitch, "Get standard deviation", 0, 0, unit) # get standard deviation min_f0: float = call(pitch, "Get minimum", 0, 0, unit, "Parabolic") max_f0: float = call(pitch, "Get maximum", 0, 0, unit, "Parabolic") return pitch, mean_f0, stdev_f0, min_f0, max_f0
def get_pitch_parameters(self, Pitch, unit): self.pitch_mean = call(Pitch, "Get mean", 0, 0, unit) # get mean pitch self.pitch_median = float(re.findall("Median pitch: ([0-9]*\.[0-9]*)", self.vocal_report)[0]) self.pitch_std_dev = call(Pitch, "Get standard deviation", 0, 0, unit) # get standard deviation self.pitch_minimum = float(re.findall("Minimum pitch: ([0-9]*\.[0-9]*)", self.vocal_report)[0]) self.pitch_maximum = float(re.findall("Maximum pitch: ([0-9]*\.[0-9]*)", self.vocal_report)[0]) return None
def process(self): sound = self.args["voice"] formant_factor = self.args["formant_factor"] pitch_factor = self.args["pitch_factor"] duration = sound.get_total_duration() file_path = self.args["file_path"] pitch_range_factor = self.args["pitch_range_factor"] duration_factor = 1 pitch_range_factor = 1 f0min, f0max = self.pitch_bounds(sound) pitch = sound.to_pitch() print(f0min, f0max) median_pitch = call(pitch, "Get quantile", sound.xmin, sound.xmax, 0.5, "Hertz") print( f"mean pitch {call(pitch, 'Get mean', sound.xmin, sound.xmax, 'Hertz' )}" ) if formant_factor > 1: formant_factor = 1 / formant_factor if pitch_factor > 1: pitch_factor = 1 / pitch_factor print(median_pitch) print(pitch_factor) new_pitch_median = pitch_factor * median_pitch print(new_pitch_median) output_file_name = file_path.split("/")[-1].split(".wav")[0] output_file_name = ( f"{output_file_name}_raise_pitch_and_formants_{pitch_factor}_{formant_factor}" ) manipulated_sound = call( sound, "Change gender", f0min, f0max, formant_factor, new_pitch_median, pitch_range_factor, duration_factor, ) if self.args["normalize amplitude"]: manipulated_sound.scale_intensity(70) manipulated_sound.name = output_file_name return {"voice": manipulated_sound}
def get_silence_threshold(sound, lower_quantile): """ Calculates silence threshold per sound interval for chunking. :param sound: A parselmouth.praat Sound object :param lower_quantile: A quantile value (0-1; e.g., 0.5 = median) :return sil_threshold: Threshold value to be used for 'To TextGrid (silences)' """ soundint = sound.to_intensity() max_intensity = call(soundint, 'Get quantile', 0.0, 0.0, 1) sil_intensity = call(soundint, 'Get quantile', 0.0, 0.0, lower_quantile) return sil_intensity - max_intensity
def chunk_sound (sound, sil_duration, threshold_quantile): sil_threshold = get_silence_threshold(sound, threshold_quantile) textgrid = detect_silences(sound, sil_threshold, sil_duration) n_ints = call(textgrid, 'Count intervals where', 1, 'is equal to', 'speech') extracted_sounds = call([sound, textgrid], 'Extract intervals where', 1, True, 'is equal to', 'speech') return textgrid, extracted_sounds, n_ints
def measure_jitter(self): self.point_process: object = call(self.sound, "To PointProcess (periodic, cc)", 60, 600) self.local_jitter_teva: float = call( self.point_process, "Get jitter (local)", self.start_time, self.end_time, self.shortest_period, self.longest_period, self.maximum_period_factor, )
def measure_mvd(self): # Maximum Voicing Duration (MVD) textgrid = call(self.point_process, "To TextGrid (vuv)", 0.2, 0.1) maximum_voicing_durations = [] number_of_intervals = call(textgrid, "Get number of intervals", 1) for interval, number in enumerate(range(number_of_intervals), 1): label = call(textgrid, "Get label of interval", 1, interval) if "v" in label.lower(): start = call(textgrid, "Get start point", 1, interval) end = call(textgrid, "Get end point", 1, interval) maximum_voicing_duration = end - start maximum_voicing_durations.append(maximum_voicing_duration) self.mvd = max(maximum_voicing_durations)