def extract_syllable_intervals(file_name): print("Extracting syllable intervals from '{}'...".format(file_name)) # Use Praat script to extract syllables # For each file name, we first run the Praat script, passing the desired parameters # This script was slightly adapted, as it used to take a directory as argument and loop # over the audio files in that directory but now only takes a single file name # and executes the algorithm for that file # As described in the script file, these parameters are: 'Silence threshold (dB)', # 'Minimum dip between peaks (dB)', 'Minimum pause duration', and the filename objects = run_file('syllable_nuclei.praat', -25, 2, 0.3, file_name) # The script selects two objects at the end, the Sound object and the TextGrid # These two objects are returned in a list, and now we assign the second one to the variable 'textgrid' textgrid = objects[1] # Call the Praat command "Get number of points" to query the amount of points in the first tier n = call(textgrid, "Get number of points", 1) # Make a list that queries the time of the point in the TextGrid for all points 1 to n # (through a Python 'list comprehension', in this case, but one could also repeatedly 'append') syllable_nuclei = [ call(textgrid, "Get time of point", 1, i + 1) for i in range(n) ] # Use NumPy to calculate intervals between the syllable nuclei syllable_intervals = np.diff(syllable_nuclei) return syllable_intervals
def myspsr(m, p): sound = p + "/" + m + ".wav" sourcerun = p + "/myspsolution.praat" path = p + "/" objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[2]) # will be the integer number 10 z4 = float(z2[3]) # will be the floating point number 8.3 print("rate_of_speech=", z3, "# syllables/sec original duration") return
def mysptotal(m,p): sound=p+"/"+m+".wav" sourcerun=p+"/myspsolution.praat" path=p+"/" try: objects= run_file(sourcerun, -20, 2, 0.3, "yes",sound,path, 80, 400, 0.01, capture_output=True) # print (objects[0]) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1=str(objects[1]) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2=z1.strip().split() z3=np.array(z2) z4=np.array(z3)[np.newaxis] z5=z4.T return f'''--------------------------------- | Number of Syllables | {z5[0,:][0]} --------------------------------- | Number of Pauses | {z5[1,:][0]} --------------------------------- | Rate of Speech | {z5[2,:][0]} --------------------------------- | Articulation Rate | {z5[3,:][0]} --------------------------------- | Speaking Duration(Sec)| {z5[4,:][0]} --------------------------------- | Original Duration(Sec)| {z5[5,:][0]} --------------------------------- | Balance | {z5[6,:][0]} ---------------------------------''' # dataset=pd.DataFrame({"number_ of_syllables":z5[0,:],"number_of_pauses":z5[1,:],"rate_of_speech":z5[2,:],"articulation_rate":z5[3,:],"speaking_duration":z5[4,:], # "original_duration":z5[5,:],"balance":z5[6,:],"f0_mean":z5[7,:],"f0_std":z5[8,:],"f0_median":z5[9,:],"f0_min":z5[10,:],"f0_max":z5[11,:], # "f0_quantile25":z5[12,:],"f0_quan75":z5[13,:]}) # return dataset.T except: return "Try again the sound of the audio was not clear\n"
def myspp(m, p, q): sound = m sourcerun = p path = q objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[13]) # will be the integer number 10 z4 = float(z2[14]) # will be the floating point number 8.3 db = binom.rvs(n=10, p=z4, size=10000) a = np.array(db) b = np.mean(a) * 100 / 10 return b
def word_per_minutes(m='base', p=r"minor_project"): sound = p + "/" + "dataset" + "/" + "audioFiles" + "/" + m + ".wav" sourcerun = p + "/" + "dataset" + "/" + "essen" + "/" + "myspsolution.praat" path = p + "/" + "dataset" + "/" + "audioFiles" + "/" try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[3]) # will be the integer number 10 z4 = float(z2[3]) # will be the floating point number 8.3 return z3 except: z3 = 0 return "Try again the sound of the audio was not clear"
def myspbala(m, p): sound = p + "/" + "dataset" + "/" + "audioFiles" + "/" + m + ".wav" sourcerun = p + "/" + "dataset" + "/" + "essen" + "/" + "myspsolution.praat" path = p + "/" + "dataset" + "/" + "audioFiles" + "/" try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[3]) # will be the integer number 10 z4 = float(z2[6]) # will be the floating point number 8.3 print("balance=", z4, "# ratio (speaking duration)/(original duration)") except: z4 = 0 print("Try again the sound of the audio was not clear") return
def myspf0sd(m, p): sound = p + "/" + m + ".wav" sourcerun = p + "/myspsolution.praat" path = p + "/" try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[3]) # will be the integer number 10 z4 = float(z2[8]) # will be the floating point number 8.3 print( "f0_SD=", z4, "# Hz global standard deviation of fundamental frequency distribution" ) except: z4 = 0 print("Try again the sound of the audio was not clear") return
def myspbala(m, p): sound = p + "/" + m + ".wav" sourcerun = p + "/myspsolution.praat" path = p + "/" try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[3]) # will be the integer number 10 z4 = float(z2[6]) # will be the floating point number 8.3 # ratio (speaking duration)/(original duration)" return z4 except: z4 = 0 return
def myspf0max(m, p): sound = p + "/" + m + ".wav" sourcerun = p + "/myspsolution.praat" path = p + "/" try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[11]) # will be the integer number 10 z4 = float(z2[11]) # will be the floating point number 8.3 # f0_max, Hz global maximum of fundamental frequency distribution return z3 except: z3 = 0 return
def getStats(text,m,p): sound=p+m+".wav" sourcerun="./myspsolution.praat" path=p objects= run_file(sourcerun, -20, 2, 0.3, "yes",sound,path, 80, 400, 0.01, capture_output=True) z1=str( objects[1]) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2=z1.strip().split() z3=np.array(z2) z4=np.array(z3)[np.newaxis] z5=z4.T words = len(text.split()) duration = z5[5,:] words_per_min = int(words/float(float(z5[5,:])/60)) dataset=pd.DataFrame({"number_of_syllables":z5[0,:],"number_of_pauses":z5[1,:],"rate_of_speech":z5[2,:],"articulation_rate":z5[3,:],"speaking_duration":z5[4,:], "original_duration":z5[5,:],"balance":z5[6,:],"words_per_min":words_per_min}) dataset["number_of_syllables"] = dataset["number_of_syllables"].astype(float) dataset["number_of_pauses"] = dataset["number_of_pauses"].astype(float) dataset["rate_of_speech"] = dataset["rate_of_speech"].astype(float) dataset["articulation_rate"] = dataset["articulation_rate"].astype(float) dataset["speaking_duration"] = dataset["speaking_duration"].astype(float) dataset["original_duration"] = dataset["original_duration"].astype(float) dataset["balance"] = dataset["balance"].astype(float) return dataset.to_dict(orient='index')[0]
def mysppaus(m, p): sound = p + "/" + m + ".wav" sourcerun = p + "/myspsolution.praat" path = p + "/" try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[1]) # will be the integer number 10 z4 = float(z2[3]) # will be the floating point number 8.3 print("number_of_pauses=", z3) except: z3 = 0 print("Try again the sound of the audio was not clear") return
def mysppron(m, p): sound = p + "/" + m + ".wav" sourcerun = p + "/myspsolution.praat" path = p + "/" objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[13]) # will be the integer number 10 z4 = float(z2[14]) # will be the floating point number 8.3 db = binom.rvs(n=10, p=z4, size=10000) a = np.array(db) b = np.mean(a) * 100 / 10 print("Pronunciation_posteriori_probability_score_percentage= :%.2f" % (b)) return
def myspf0q75(m, p): sound = p + "/" + m + ".wav" sourcerun = p + "/myspsolution.praat" path = p + "/" objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[13]) # will be the integer number 10 z4 = float(z2[11]) # will be the floating point number 8.3 print("f0_quan75=", z3, "# Hz global 75th quantile of fundamental frequency distribution") return
def myspod(m, p): sound = p + "/" + m + ".wav" sourcerun = p + "/myspsolution.praat" path = p + "/" try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object print(objects[0]) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z1 = str(objects[1]) z2 = z1.strip().split() z3 = int(z2[3]) # will be the integer number 10 z4 = float(z2[5]) # will be the floating point number 8.3 print("original_duration=", z4, "# sec total speaking duration with pauses") except Exception as e: z4 = 0 print("Try again the sound of the audio was not clear", e) return
def myspf0sd(sound_path): print('processing myspf0sd for file:', sound_path) try: objects = run_file(praat_path, -20, 2, 0.3, "yes", sound_path, textgrid_out_dir, 80, 400, 0.01, capture_output=True) z1 = str(objects[1]) z2 = z1.strip().split() std = float(z2[8]) mean = float(z2[7]) except: std = 0 e = sys.exc_info()[0] print('[error]', e) print("[TODO] The sound of the audio was not clear:", sound_path) raise ValueError return std, mean
def myspp(bp, bg): sound = bg + "/" + "dataset" + "/" + "audioFiles" + "/" + bp + ".wav" sourcerun = bg + "/" + "dataset" + "/" + "essen" + "/" + "myspsolution.praat" path = bg + "/" + "dataset" + "/" + "audioFiles" + "/" objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = int(z2[13]) # will be the integer number 10 z4 = float(z2[14]) # will be the floating point number 8.3 db = binom.rvs(n=10, p=z4, size=10000) a = np.array(db) b = np.mean(a) * 100 / 10 return b
def mysppron(m, p, sound): """ Pronunciation posteriori probability score percentage """ #sound = p+"/"+"dataset"+"/"+"audioFiles"+"/"+m+".wav" sourcerun = p + "/" + "dataset" + "/" + "essen" + "/" + "myspsolution.praat" path = p + "/" + "dataset" + "/" + "audioFiles" + "/" try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object print(objects[0]) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z1 = str(objects[1]) z2 = z1.strip().split() z3 = int(z2[13]) # will be the integer number 10 z4 = float(z2[14]) # will be the floating point number 8.3 db = binom.rvs(n=10, p=z4, size=10000) a = np.array(db) b = np.mean(a) * 100 / 10 print("Pronunciation_posteriori_probability_score_percentage= :%.2f" % (b)) except: print("Try again the sound of the audio was not clear") return
def run_praat_file(c, filename): """ p : path to dataset folder m : path to file returns : objects outputed by the praat script sound = filename print("sound",sound) print("c",c) sourcerun = os.path.join(str(local_path),"components","dataset","essen","myspsolution.praat") path = os.path.join( str(parent_path),"student_interview_data","kevalshah90909@gmail") print("path",path) print("sourcerun",sourcerun) """ local_path = os.getcwd() parent_path = os.path.dirname(filename) sound = filename sourcerun = c + "\\" + "dataset" + "\\" + "essen" + "\\" + "myspsolution.praat" #path = os.path.join(str(parent_path),"student_interview_data","kevalshah90909@gmail") path = parent_path #print("sound ",sound) #print("path ", path) #print("sourcerun", sourcerun) assert os.path.isfile(sound), "Wrong path to audio file" assert os.path.isfile(sourcerun), "Wrong path to praat script" assert os.path.isdir(path), "Wrong path to audio files" try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) # This will print the info from the sound object, and objects[0] # is a parselmouth.Sound object print(objects[0]) # This will print the info from the textgrid object, and objects[1] # is a parselmouth.Data object with a TextGrid inside z1 = str(objects[1]) z2 = z1.strip().split() return z2 except: z3 = 0 print("Try again the sound of the audio was not clear")
def extract_syllable_intervals(file_name): print("Extracting syllable intervals from '{}'...".format(file_name)) # Use Praat script to extract syllables objects = run_file('syllable_nuclei.praat', -25, 2, 0.3, file_name) textgrid = objects[1] n = call(textgrid, "Get number of points", 1) syllable_nuclei = [ call(textgrid, "Get time of point", 1, i + 1) for i in range(n) ] # Use NumPy to calculate intervals between the syllable nuclei syllable_intervals = np.diff(syllable_nuclei) return syllable_intervals
def extract_pisr(source_file, praat_sourcerun=praat_sourcerun, temp_dir=temp_dir): sound = parselmouth.Sound(source_file) pitch = sound.to_pitch(pitch_ceiling=500.0) pitch_values = pitch.selected_array['frequency'] fltrpitch = list(filter(lambda a: a != 0, pitch_values)) min_pitch = min(fltrpitch) max_pitch = max(pitch_values) mean_pitch = sum(fltrpitch) / len(fltrpitch) std_pitch = np.std(fltrpitch, ddof=1) range_pitch = max_pitch - min_pitch intensity = sound.to_intensity(minimum_pitch=100.0) intensity_values = intensity.values fltrintensity = list(filter(lambda a: a != 0, intensity_values[0])) min_intensity = min(fltrintensity) max_intensity = max(intensity_values[0]) mean_intensity = sound.get_intensity() std_intensity = np.std(fltrintensity, ddof=1) range_intensity = max_intensity - min_intensity try: os.makedirs(temp_dir, exist_ok=True) objects = run_file(praat_sourcerun, -20, 2, 0.3, "yes", source_file, temp_dir, 80, 400, 0.01, capture_output=True) print(objects) z1 = str(objects[1]) z2 = z1.strip().split() z3 = int(z2[2]) except: z3 = 0 speaking_rate = z3 return { 'sound': sound, 'pitch': pitch, 'pitch_values': pitch_values, 'fltrpitch': fltrpitch, 'min_pitch': min_pitch, 'max_pitch': max_pitch, 'mean_pitch': mean_pitch, 'std_pitch': std_pitch, 'range_pitch': range_pitch, 'intensity': intensity, 'intensity_values': intensity_values, 'fltrintensity': fltrintensity, 'max_intensity': max_intensity, 'min_intensity': min_intensity, 'mean_intensity': mean_intensity, 'std_intensity': std_intensity, 'range_intensity': range_intensity, 'speaking_rate': speaking_rate }
def run_praat_file(m, p, r): """ A fuction used to return features from the given audio file using .praat file proposed by researchers: Nivja DeJong and Ton Wempe, Paul Boersma and David Weenink , Carlo Gussenhoven. ... Parameters ---------- m : str Path to file. p : str Path to dataset folder. r : str Path with .praat file exists. Returns ---------- Object objects outputed by the praat script """ sound = m path = p sourcerun = r assert os.path.isfile(sound), "Wrong path to audio file" assert os.path.isfile(sourcerun), "Wrong path to praat script" assert os.path.isdir(path), "Wrong path to audio files" try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() return z2 except: return -2
def mysptotal(m, p): sound = p + "/" + "dataset" + "/" + "audioFiles" + "/" + m + ".wav" sourcerun = p + "/" + "dataset" + "/" + "essen" + "/" + "myspsolution.praat" path = p + "/" + "dataset" + "/" + "audioFiles" + "/" try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = np.array(z2) z4 = np.array(z3)[np.newaxis] z5 = z4.T dataset = pd.DataFrame({ "number_ of_syllables": z5[0, :], "number_of_pauses": z5[1, :], "rate_of_speech": z5[2, :], "articulation_rate": z5[3, :], "speaking_duration": z5[4, :], "original_duration": z5[5, :], "balance": z5[6, :], "f0_mean": z5[7, :], "f0_std": z5[8, :], "f0_median": z5[9, :], "f0_min": z5[10, :], "f0_max": z5[11, :], "f0_quantile25": z5[12, :], "f0_quan75": z5[13, :] }) finalDatalist = list(dataset.itertuples(index=False, name=None)) return finalDatalist print(dataset.T) except: print("Try again the sound of the audio was not clear") return
def extract_syllable_intervals(dir_name: str, file_name: str): """Get the ranges of each spoken syllable in an audio file See: Jadoul, Y., Thompson, B., & De Boer, B. (2018). Introducing Parselmouth: A Python interface to Praat. Journal of Phonetics, 71, 1-15. Page 18. Example is largely based off of the one given there. https://billdthompson.github.io/assets/output/Jadoul2018.pdf Args: dir_name: Name of directory file is located in file_name: Name of file to analyze """ # Use Praat script to extract syllables try: objects = run_file('praat_scripts/syllable_nuclei.praat', -40, 2, 0.3, 0.1, True, dir_name, file_name) textgrid = objects[0] num = call(textgrid, "Get number of points", 1) # Get start times of every syllable syllable_nuclei = [ call(textgrid, "Get time of point", 1, i + 1) for i in range(num) ] syllable_intervals = [] for i, start_time in enumerate(syllable_nuclei): # Retrieve the end of the syllable # This is either the start of the next syllable or the end of the # current interval interval = call(textgrid, "Get interval at time", 2, start_time) next_nucleus = _get_or_default(syllable_nuclei, i + 1, float('inf')) interval_end = call(textgrid, "Get end time of interval", 2, interval) stop_time = min(next_nucleus, interval_end) # Convert and stop time to milliseconds start_time = int(round(start_time * 1000)) stop_time = int(round(stop_time * 1000)) # Append a slice so we can easily slice the AudioSegment objects later syllable_intervals.append(slice(start_time, stop_time)) # Remove generated .TextGrid file remove(join(dir_name, file_name + '.TextGrid')) except PraatError: syllable_intervals = [] return syllable_intervals
def mysppron(c, filename): """ Pronunciation posteriori probability score percentage in pronunciation quality evaluation, it is important to calculate the similarity between standard pronunciation and practical pronunciation. The posterior probability is the most stable and efficient for similarity measures. """ sound = filename sourcerun = c + "\\" + "dataset" + "\\" + "essen" + "\\" + "myspsolution.praat" local_path = os.getcwd() parent_path = os.path.dirname(local_path) print(parent_path) c = os.path.join(str(parent_path), "student_interview_data", "kevalshah90909@gmail") path = c try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object print(objects[0]) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z1 = str(objects[1]) z2 = z1.strip().split() z3 = int(z2[13]) # will be the integer number 10 z4 = float(z2[14]) # will be the floating point number 8.3 db = binom.rvs(n=10, p=z4, size=10000) a = np.array(db) b = np.mean(a) * 100 / 10 #print("Pronunciation_posteriori_probability_score_percentage= :%.2f" % (b)) except: print("Try again the sound of the audio was not clear") return -1 return b
def mysptotal(m, p): sound = p + "/" + m + ".wav" sourcerun = p + "/myspsolution.praat" path = p + "/" try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object print(objects[0]) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z1 = str(objects[1]) z2 = z1.strip().split() z3 = np.array(z2) z4 = np.array(z3)[np.newaxis] z5 = z4.T return { "number_ of_syllables": float(z5[0, :]), "number_of_pauses": float(z5[1, :]), "rate_of_speech": float(z5[2, :]), "articulation_rate": float(z5[3, :]), "speaking_duration": float(z5[4, :]), "original_duration": float(z5[5, :]), "balance": float(z5[6, :]), "f0_mean": float(z5[7, :]), "f0_std": float(z5[8, :]), "f0_median": float(z5[9, :]), "f0_min": float(z5[10, :]), "f0_max": float(z5[11, :]), "f0_quantile25": float(z5[12, :]), "f0_quan75": float(z5[13, :]) } except Exception as e: raise e return
def mysppron(m,p): sound=p+"/"+m+".wav" sourcerun=p+"/myspsolution.praat" path=p+"/" try: objects= run_file(sourcerun, -20, 2, 0.3, "yes",sound,path, 80, 400, 0.01, capture_output=True) # print (objects[0]) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1=str( objects[1]) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2=z1.strip().split() z3=int(z2[13]) # will be the integer number 10 z4=float(z2[14]) # will be the floating point number 8.3 db= binom.rvs(n=10,p=z4,size=10000) a=np.array(db) b=np.mean(a)*100/10 return f''' | Pronunciation Score | {round(b,2)}% ---------------------------------''' except: return "Try again the sound of the audio was not clear\n"
def run_praat_file(m, p): """ p : path to dataset folder m : path to file returns : objects outputed by the praat script """ sound = p + "/" + "dataset" + "/" + "audioFiles" + "/" + m + ".wav" sourcerun = p + "/" + "dataset" + "/" + "essen" + "/" + "myspsolution.praat" path = p + "/" + "dataset" + "/" + "audioFiles" + "/" assert os.path.isfile(sound), "Wrong path to audio file" assert os.path.isfile(sourcerun), "Wrong path to praat script" assert os.path.isdir(path), "Wrong path to audio files" try: objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() return z2 except: z3 = 0 print("Try again the sound of the audio was not clear")
def get_features(praat_source, wav_file_path, textgrid_save): try: extracted_data = run_file(praat_source, -20, 2, 0.3, "yes", wav_file_path, textgrid_save, 80, 400, 0.01, capture_output=True) features_values = str(extracted_data[1]).strip().split() features = { "number_ of_syllables": features_values[0], "number_of_pauses": features_values[1], "rate_of_speech": features_values[2], "articulation_rate": features_values[3], "speaking_duration": features_values[4], "original_duration": features_values[5], "balance": features_values[6], "f0_mean": features_values[7], "f0_std": features_values[8], "f0_median": features_values[9], "f0_min": features_values[10], "f0_max": features_values[11], "f0_quantile25": features_values[12], "f0_quan75": features_values[13] } print(features) return features except Exception as e: print("Try again the sound of the audio was not clear") print(e) return
def myprosody(m, p): sound = p + "/" + m + ".wav" sourcerun = p + "/MLTRNL.praat" path = p + "/" outo = p + "/datanewchi22.csv" outst = p + "/datanewchi44.csv" outsy = p + "/datanewchi33.csv" pa2 = p + "/stats.csv" pa7 = p + "/datanewchi44.csv" result_array = np.empty((0, 100)) files = glob.glob(path) result_array = np.empty((0, 27)) objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) z1 = ( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z3 = z1.strip().split() z2 = np.array([z3]) result_array = np.append(result_array, [z3], axis=0) print(z3) np.savetxt(outo, result_array, fmt='%s', delimiter=',') #Data and features analysis df = pd.read_csv(outo, names=[ 'avepauseduratin', 'avelongpause', 'speakingtot', 'avenumberofwords', 'articulationrate', 'inpro', 'f1norm', 'mr', 'q25', 'q50', 'q75', 'std', 'fmax', 'fmin', 'vowelinx1', 'vowelinx2', 'formantmean', 'formantstd', 'nuofwrds', 'npause', 'ins', 'fillerratio', 'xx', 'xxx', 'totsco', 'xxban', 'speakingrate' ], na_values='?') scoreMLdataset = df.drop(['xxx', 'xxban'], axis=1) scoreMLdataset.to_csv(outst, header=False, index=False) newMLdataset = df.drop([ 'avenumberofwords', 'f1norm', 'inpro', 'q25', 'q75', 'vowelinx1', 'nuofwrds', 'npause', 'xx', 'totsco', 'xxban', 'speakingrate', 'fillerratio' ], axis=1) newMLdataset.to_csv(outsy, header=False, index=False) namess = nms = [ 'avepauseduratin', 'avelongpause', 'speakingtot', 'articulationrate', 'mr', 'q50', 'std', 'fmax', 'fmin', 'vowelinx2', 'formantmean', 'formantstd', 'ins', 'xxx' ] df1 = pd.read_csv(outsy, names=namess) nsns = [ 'average_syll_pause_duration', 'No._long_pause', 'speaking_time', 'ave_No._of_words_in_minutes', 'articulation_rate', 'No._words_in_minutes', 'formants_index', 'f0_index', 'f0_quantile_25_index', 'f0_quantile_50_index', 'f0_quantile_75_index', 'f0_std', 'f0_max', 'f0_min', 'No._detected_vowel', 'perc%._correct_vowel', '(f2/f1)_mean', '(f2/f1)_std', 'no._of_words', 'no._of_pauses', 'intonation_index', '(voiced_syll_count)/(no_of_pause)', 'TOEFL_Scale_Score', 'Score_Shannon_index', 'speaking_rate' ] dataframe = pandas.read_csv(pa2) df55 = pandas.read_csv(pa7) dataframe = dataframe.values array = df55.values print( "Compared to native speech, here are the prosodic features of your speech:" ) for i in range(25): sl0 = dataframe[4:7:1, i + 1] score = array[0, i] he = scipy.stats.percentileofscore(sl0, score, kind='strict') if he == 0: he = 25 dfout = "%s:\t %f (%s)" % (nsns[i], he, "% percentile ") print(dfout) elif he >= 25 and he <= 75: dfout = "%s:\t %f (%s)" % (nsns[i], he, "% percentile ") print(dfout) else: dfout = "%s:\t (%s)" % (nsns[i], ":Out of Range") print(dfout)
def myspgend(m, p): sound = p + "/" + m + ".wav" sourcerun = p + "/myspsolution.praat" path = p + "/" objects = run_file(sourcerun, -20, 2, 0.3, "yes", sound, path, 80, 400, 0.01, capture_output=True) print( objects[0] ) # This will print the info from the sound object, and objects[0] is a parselmouth.Sound object z1 = str( objects[1] ) # This will print the info from the textgrid object, and objects[1] is a parselmouth.Data object with a TextGrid inside z2 = z1.strip().split() z3 = float(z2[8]) # will be the integer number 10 z4 = float(z2[7]) # will be the floating point number 8.3 if z4 <= 114: g = 101 j = 3.4 elif z4 > 114 and z4 <= 135: g = 128 j = 4.35 elif z4 > 135 and z4 <= 163: g = 142 j = 4.85 elif z4 > 163 and z4 <= 197: g = 182 j = 2.7 elif z4 > 197 and z4 <= 226: g = 213 j = 4.5 elif z4 > 226: g = 239 j = 5.3 else: print("Voice not recognized") exit() def teset(a, b, c, d): d1 = np.random.wald(a, 1, 1000) d2 = np.random.wald(b, 1, 1000) d3 = ks_2samp(d1, d2) c1 = np.random.normal(a, c, 1000) c2 = np.random.normal(b, d, 1000) c3 = ttest_ind(c1, c2) y = ([d3[0], d3[1], abs(c3[0]), c3[1]]) return y nn = 0 mm = teset(g, j, z4, z3) while (mm[3] > 0.05 and mm[0] > 0.04 or nn < 5): mm = teset(g, j, z4, z3) nn = nn + 1 nnn = nn if mm[3] <= 0.09: mmm = mm[3] else: mmm = 0.35 if z4 > 97 and z4 <= 114: print( "a Male, mood of speech: Showing no emotion, normal, p-value/sample size= :%.2f" % (mmm), (nnn)) elif z4 > 114 and z4 <= 135: print( "a Male, mood of speech: Reading, p-value/sample size= :%.2f" % (mmm), (nnn)) elif z4 > 135 and z4 <= 163: print( "a Male, mood of speech: speaking passionately, p-value/sample size= :%.2f" % (mmm), (nnn)) elif z4 > 163 and z4 <= 197: print( "a female, mood of speech: Showing no emotion, normal, p-value/sample size= :%.2f" % (mmm), (nnn)) elif z4 > 197 and z4 <= 226: print( "a female, mood of speech: Reading, p-value/sample size= :%.2f" % (mmm), (nnn)) elif z4 > 226 and z4 <= 245: print( "a female, mood of speech: speaking passionately, p-value/sample size= :%.2f" % (mmm), (nnn)) else: print("Voice not recognized")