Example #1
0
def segment_shimmer(com_speech_sort, voiced_yes, voiced_no, shimmer_frames,
                    audio_file):
    """
    calculating shimmer for each voice segment
    """
    snd = parselmouth.Sound(audio_file)
    pitch = snd.to_pitch(time_step=.001)

    for idx, vs in enumerate(com_speech_sort):
        try:

            shimmer = np.NaN
            if vs in voiced_yes and len(vs) > 1:

                start_time = pitch.get_time_from_frame_number(vs[0])
                end_time = pitch.get_time_from_frame_number(vs[-1])

                snd_start = int(snd.get_frame_number_from_time(start_time))
                snd_end = int(snd.get_frame_number_from_time(end_time))

                samples = parselmouth.Sound(
                    snd.as_array()[0][snd_start:snd_end])
                shimmer = audio_shimmer(samples)
        except:
            pass

        shimmer_frames[idx] = shimmer
    return shimmer_frames
Example #2
0
def test_from_numpy_array_stereo(sampling_frequency):
    sine_values = np.sin(2 * np.pi * np.arange(sampling_frequency) /
                         sampling_frequency)
    cosine_values = np.sin(2 * np.pi * np.arange(sampling_frequency) /
                           sampling_frequency)
    sound = parselmouth.Sound(np.vstack((sine_values, cosine_values)),
                              sampling_frequency=sampling_frequency)
    assert np.all(sound.values == [sine_values, cosine_values])
    assert sound.n_samples == len(sine_values)
    assert sound.n_channels == 2
    assert sound.sampling_frequency == sampling_frequency
    assert sound.duration == 1

    sound = parselmouth.Sound(np.vstack((sine_values, cosine_values))[::-1,
                                                                      1::3],
                              sampling_frequency=sampling_frequency)
    assert np.all(sound.values == [cosine_values[1::3], sine_values[1::3]])

    with pytest.warns(
            RuntimeWarning,
            match=
            r'Number of channels \([0-9]+\) is greater than number of samples \([0-9]+\)'
    ):
        parselmouth.Sound(np.vstack((sine_values, cosine_values)).T,
                          sampling_frequency=sampling_frequency)
Example #3
0
def plot_contours(sound, language, countdown_label):
    if language == "Mandarin":
        mdl.mand_deepL("user.mp3", countdown_label)
    elif language == "Vietnamese":
        vdl.viet_deepL("user.mp3", countdown_label)
    ref = parselmouth.Sound(sound)
    user = parselmouth.Sound("user.mp3")
    ref = ref.to_pitch().kill_octave_jumps().smooth()
    user = user.to_pitch().kill_octave_jumps().smooth()
    ref_frequencies = get_frequencies(ref)
    user_frequencies = get_frequencies(user)
    ref_indexes = get_indexes(ref_frequencies)
    user_indexes = get_indexes(user_frequencies)
    plt.figure()
    plt.subplot(1, 2, 1)
    plt.title('reference')
    plt.scatter(ref_indexes, ref_frequencies)
    plt.xlim([0, 60])
    plt.ylim([0, 250])
    plt.subplot(1, 2, 2)
    plt.title('user')
    plt.scatter(user_indexes, user_frequencies)
    plt.xlim([0, 60])
    plt.ylim([0, 250])
    plt.show()
    return
Example #4
0
def main(original_audio, new_audio):
    """Our main function here runs record() inside plot_pitch to provide
    the new_audio, and then as a hardcoded input uses the original audio."""
    target_audio = praat.Sound(original_audio)
    recorded_audio = praat.Sound(new_audio)
    #duration = target_audio.get_total_duration() + 1
    plot_pitch(recorded_audio, target_audio)
Example #5
0
def test_from_scalar(sampling_frequency):
    with pytest.raises(
            ValueError,
            match="Cannot create Sound from a single 0-dimensional number"):
        parselmouth.Sound(42, sampling_frequency=sampling_frequency)

    with pytest.raises(
            ValueError,
            match="Cannot create Sound from a single 0-dimensional number"):
        parselmouth.Sound(3.14159, sampling_frequency=sampling_frequency)
Example #6
0
def offset(template: Clip, video: Clip) -> Tuple[float, float]:
    """Find position of this Clip in another Clip (may be negative).

    Returns two values: offset in seconds and cross-correlation score.
    """
    s1 = pm.Sound(template.path).convert_to_mono()
    s2 = pm.Sound(video.path).convert_to_mono()
    cc = s1.cross_correlate(s2, pm.AmplitudeScaling.SUM)
    score = cc.values.max()
    frame = cc.values.argmax()
    offset = cc.frame_number_to_time(frame)
    return offset, score
Example #7
0
 def offset(self, clip: 'Clip') -> (float, float):
     """Find position of this Clip in another Clip (may be negative).
     
     Returns two values: offset in seconds and cross-correlation score.
     """
     s1 = pm.Sound(self.path).convert_to_mono()
     s2 = pm.Sound(clip.path).convert_to_mono()
     cc = s1.cross_correlate(s2, pm.AmplitudeScaling.SUM)
     score = cc.values.max()
     frame = cc.values.argmax()
     offset = cc.frame_number_to_time(frame)
     return offset, score
Example #8
0
def test_from_numpy_array_mono(sampling_frequency):
    sine_values = np.sin(2 * np.pi * np.arange(sampling_frequency) /
                         sampling_frequency)
    sound = parselmouth.Sound(sine_values,
                              sampling_frequency=sampling_frequency)
    assert np.all(sound.values == sine_values[np.newaxis, :])
    assert sound.n_samples == len(sine_values)
    assert sound.n_channels == 1
    assert sound.sampling_frequency == sampling_frequency
    assert sound.duration == 1

    sound = parselmouth.Sound(sine_values[1::3],
                              sampling_frequency=sampling_frequency)
    assert np.all(sound.values == sine_values[np.newaxis, 1::3])
Example #9
0
def measurePitch(voiceID, f0min, f0max, unit):
    sound = parselmouth.Sound(voiceID)  # read the sound
    pitch = call(sound, "To Pitch", 0.0, f0min, f0max)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min,
                        f0max)  #create a praat pitch object
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02,
                       1.3)
    localabsoluteJitter = call(pointProcess, "Get jitter (local, absolute)", 0,
                               0, 0.0001, 0.02, 1.3)
    rapJitter = call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
    ppq5Jitter = call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02,
                      1.3)
    localShimmer = call([sound, pointProcess], "Get shimmer (local)", 0, 0,
                        0.0001, 0.02, 1.3, 1.6)
    localdbShimmer = call([sound, pointProcess], "Get shimmer (local_dB)", 0,
                          0, 0.0001, 0.02, 1.3, 1.6)
    apq3Shimmer = call([sound, pointProcess], "Get shimmer (apq3)", 0, 0,
                       0.0001, 0.02, 1.3, 1.6)
    aqpq5Shimmer = call([sound, pointProcess], "Get shimmer (apq5)", 0, 0,
                        0.0001, 0.02, 1.3, 1.6)
    apq11Shimmer = call([sound, pointProcess], "Get shimmer (apq11)", 0, 0,
                        0.0001, 0.02, 1.3, 1.6)
    harmonicity05 = call(sound, "To Harmonicity (cc)", 0.01, 500, 0.1, 1.0)
    hnr05 = call(harmonicity05, "Get mean", 0, 0)
    harmonicity15 = call(sound, "To Harmonicity (cc)", 0.01, 1500, 0.1, 1.0)
    hnr15 = call(harmonicity15, "Get mean", 0, 0)
    harmonicity25 = call(sound, "To Harmonicity (cc)", 0.01, 2500, 0.1, 1.0)
    hnr25 = call(harmonicity25, "Get mean", 0, 0)
    harmonicity35 = call(sound, "To Harmonicity (cc)", 0.01, 3500, 0.1, 1.0)
    hnr35 = call(harmonicity35, "Get mean", 0, 0)
    harmonicity38 = call(sound, "To Harmonicity (cc)", 0.01, 3800, 0.1, 1.0)
    hnr38 = call(harmonicity38, "Get mean", 0, 0)
    return localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, hnr05, hnr15, hnr25, hnr35, hnr38
Example #10
0
def main():
    audio_files = []
    path = '/home/rosageorge97/MajorProject/Audio/'
    # path = "/home/sunitha/Documents/8th_sem/major_project/dataset/"
    for filename in glob.glob(os.path.join(path, '*.wav')):
        audio_files.append(filename)

    print(audio_files)
    i = 1
    for file in audio_files:
        snd = parselmouth.Sound(file)
        power, intensity = get_base_features(snd)
        duration, mean_pitch, min_pitch, max_pitch = pitch_values(snd)
        spectrogram = get_spectrogram(snd)
        # print(file)
        end_name = file.rsplit('/', 1)[-1]
        csv_file = path + end_name + "_st.csv"
        audio_analysis = convert_csv(csv_file)
        feature_vector = [
            end_name, power, intensity, duration, mean_pitch, min_pitch,
            max_pitch
        ]
        for value in audio_analysis:
            feature_vector.append(value)
        i += 1
        with open('/home/rosageorge97/MajorProject/Results/audio_features.csv',
                  'a',
                  newline='') as file:
            # with open('audio_features.csv', 'a', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(feature_vector)
Example #11
0
def generate_f0_pulses(sound, interpolate=True):
    parselsound = parselmouth.Sound(sound, sound.samplerate_Hz)
    manipulation = parselmouth.praat.call(parselsound, "To Manipulation", 0.01,
                                          75, 600)
    pitch_tier = parselmouth.praat.call(manipulation, "Extract pitch tier")

    pitch = parselsound.to_pitch(time_step=0.01)
    f0_contours = pitch.selected_array['frequency']
    time_in_second = pitch.xs()
    parselmouth.praat.call(pitch_tier, "Remove points between", 0,
                           parselsound.duration)

    if interpolate:
        zeros = (f0_contours == 0)
        mean_frequency = np.median(f0_contours[~zeros])
        f0_contours[0], zeros[0] = mean_frequency, False
        f0_contours[-1], zeros[-1] = mean_frequency, False
        interpolator = scipy.interpolate.PchipInterpolator(
            time_in_second[~zeros], np.log10(f0_contours[~zeros]))
        f0_contours = 10**interpolator(time_in_second)

    for i, t in enumerate(time_in_second):
        parselmouth.praat.call(pitch_tier, "Add point", t, f0_contours[i])

    point_process = parselmouth.praat.call(pitch_tier, "To PointProcess")
    pulse_train = parselmouth.praat.call(point_process, "To Sound (phonation)",
                                         sound.samplerate_Hz, 1.0, 0.05, 0.7,
                                         0.03, 3.0, 4.0)
    pulse_train = np.squeeze(pulse_train)
    new_sound = Sound(pulse_train, sound.samplerate_Hz)
    return new_sound, f0_contours, time_in_second
Example #12
0
def measurePitch(voiceID, f0min, f0max, unit):
    sound = parselmouth.Sound(voiceID)  # read the sound
    pitch = call(sound, "To Pitch", 0.0, f0min,
                 f0max)  #create a praat pitch object
    meanF0 = call(pitch, "Get mean", 0, 0, unit)  # get mean pitch
    stdevF0 = call(pitch, "Get standard deviation", 0, 0,
                   unit)  # get standard deviation
    #harmonicity = call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
    #hnr = call(harmonicity, "Get mean", 0, 0)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02,
                       1.3)
    localabsoluteJitter = call(pointProcess, "Get jitter (local, absolute)", 0,
                               0, 0.0001, 0.02, 1.3)
    rapJitter = call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
    ppq5Jitter = call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02,
                      1.3)
    ddpJitter = call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)
    localShimmer = call([sound, pointProcess], "Get shimmer (local)", 0, 0,
                        0.0001, 0.02, 1.3, 1.6)
    localdbShimmer = call([sound, pointProcess], "Get shimmer (local_dB)", 0,
                          0, 0.0001, 0.02, 1.3, 1.6)
    apq3Shimmer = call([sound, pointProcess], "Get shimmer (apq3)", 0, 0,
                       0.0001, 0.02, 1.3, 1.6)
    aqpq5Shimmer = call([sound, pointProcess], "Get shimmer (apq5)", 0, 0,
                        0.0001, 0.02, 1.3, 1.6)
    apq11Shimmer = call([sound, pointProcess], "Get shimmer (apq11)", 0, 0,
                        0.0001, 0.02, 1.3, 1.6)
    ddaShimmer = call([sound, pointProcess], "Get shimmer (dda)", 0, 0, 0.0001,
                      0.02, 1.3, 1.6)
    voice_report = call([sound, pitch, pointProcess], "Voice report", 0.0, 0.0,
                        f0min, f0max, 1.3, 1.6, 0.03, 0.45)

    return meanF0, stdevF0, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter, localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer, voice_report
Example #13
0
def test_call_parameters(sound):
	assert parselmouth.praat.call(sound, "Add", 0.1) is None
	assert parselmouth.praat.call(sound, "Add", -1) is None
	assert parselmouth.praat.call(sound, "Override sampling frequency", 44100) is None
	with pytest.raises(parselmouth.PraatError, match=r"Argument \".*\" must be greater than 0"):
		assert parselmouth.praat.call(sound, "Override sampling frequency", -10.0) is None

	assert parselmouth.praat.call(sound, "Get time from sample number", 1) == sound.get_time_from_index(1)
	assert tuple(map(int, parselmouth.PRAAT_VERSION.split("."))) < (6, 0, 47)  # Replace with commented assert underneath once Praat version gets updated
	# with pytest.raises(parselmouth.PraatError, match=r"Argument \".*\" should be a whole number"):
	# 	assert parselmouth.praat.call(sound, "Get time from sample number", 0.5) != sound.get_time_from_index(1)
	assert parselmouth.praat.call(sound, "Set value at sample number", 1, 0.0) is None
	with pytest.raises(parselmouth.PraatError, match=r"Argument \".*\" should be a positive whole number"):
		assert parselmouth.praat.call(sound, "Set value at sample number", 0, -1, 0.0) is None

	assert parselmouth.praat.call(sound, "To Spectrum", True) == parselmouth.praat.call(sound, "To Spectrum", 1)
	assert parselmouth.praat.call(sound, "To Spectrum", False) == parselmouth.praat.call(sound, "To Spectrum", "no")

	assert parselmouth.praat.call(sound, "To TextGrid", "points intervals", "points").class_name == "TextGrid"
	assert parselmouth.praat.call("Create Sound from formula", "someSound", 1, 0, 1, 44100, "1/2").name == "someSound"

	many_channels = parselmouth.Sound(np.zeros((10, 1600)), 16000)
	assert parselmouth.praat.call(many_channels, "Extract channels", np.array([2, 3, 5, 7])).n_channels == 4
	assert parselmouth.praat.call(many_channels, "Extract channels", [2, 3, 5, 7]).n_channels == 4
	with pytest.raises(parselmouth.PraatError, match=r"Argument \".*\" should be a numeric vector, not a number"):
		assert parselmouth.praat.call(many_channels, "Extract channels", 4) == 1
	with pytest.raises(parselmouth.PraatError, match=r"Argument \".*\" should be a numeric vector, not a numeric matrix"):
		assert parselmouth.praat.call(many_channels, "Extract channels", np.array([[2, 3, 5, 7]])) == 4
Example #14
0
def test_run_file_relative_paths(sound_path, resources):
    script_path = resources["script.praat"]
    assert os.getcwd() != os.path.abspath(os.path.dirname(script_path))
    assert parselmouth.praat.run_file(
        script_path, os.path.relpath(
            sound_path,
            os.path.dirname(script_path)))[0] == parselmouth.Sound(sound_path)
def draw_pitch_and_intensisty(filename, title):
    filepath = os.path.join(data_dir, filename + '.wav')
    snd = parselmouth.Sound(filepath)

    plt.figure()

    # Plot the pitch contour
    plt.subplot(2, 1, 1)
    plt.title(title)
    pitch = snd.to_pitch()
    # If desired, pre-emphasize the sound fragment before calculating the spectrogram
    pre_emphasized_snd = snd.copy()
    pre_emphasized_snd.pre_emphasize()
    spectrogram = pre_emphasized_snd.to_spectrogram(window_length=0.03,
                                                    maximum_frequency=8000)
    # plt.figure()
    draw_spectrogram(spectrogram)
    plt.twinx()
    draw_pitch(pitch)
    plt.xlim([snd.xmin, snd.xmax])
    # plt.show() # or plt.savefig("spectrogram_0.03.pdf")

    # Plot the intensity
    plt.subplot(2, 1, 2)
    intensity = snd.to_intensity()
    spectrogram = snd.to_spectrogram()
    # plt.figure()
    draw_spectrogram(spectrogram)
    plt.twinx()
    draw_intensity(intensity)
    plt.xlim([snd.xmin, snd.xmax])
    plt.show()  # or plt.savefig("spectrogram.pdf")
def main_get_feature(directory):
    all_audio_features = []
    vowelsDict = defaultdict(list)
    for f in os.listdir(directory):
        vowel = f.split("_")[0][-1]
        if f.endswith('.wav'): # Cheak if wav file than import it otherwise continue
            data_praat = parselmouth.Sound(directory + '/' + f)
            fs_scipy, data_scipy = wavfile.read(directory + '/' + f) # Audio read by the wavfile.read function from scipy has both left channel and right channel data inside of it. Where data[:, 0] is the left channel and data[:, 1] is the right channel.
            data_librosa = librosa.load(directory + '/' + f, sr=fs_scipy)
            Features, Feature_type = get_features(data_librosa[0],data_librosa[1],data_scipy,data_praat)
            
            # Get audio features in a list choose the type features wanted, by uncommenting the relevant line
            feature_number = 1; # 0 all features, 1 scalar features, 2 vectors features, 3 matrix features
            
            if feature_number == 0: all_audio_features.append(Features) #list of all features
            if feature_number == 1: scalar_features = get_features_of_type(Features, Feature_type, scalar); all_audio_features.append(scalar_features); # list of scalar features
            if feature_number == 2: vector_features = get_features_of_type(Features, Feature_type, vector); all_audio_features.append(vector_features); # list of vector features
            if feature_number == 3: matrix_features = get_features_of_type(Features, Feature_type, matrix); all_audio_features.append(matrix_features); # list of matrix features
        
        else:
            continue
        if np.isnan(np.sum(scalar_features)):
            continue
        if '_' in f:
            vowelsDict[vowel].append(np.array(scalar_features))
        else:
            vowelsDict['iau'].append(np.array(scalar_features))
    if 'iau' in vowelsDict: del vowelsDict['iau']     
    # Normalize
    for key in vowelsDict.keys():
        tmp = np.reshape(vowelsDict[key],(len(vowelsDict[key]),len(vowelsDict[key][0])))
        vowelsDict[key] = (tmp-np.min(tmp,axis=0))/(np.max(tmp,axis=0)-np.min(tmp,axis=0))
    return vowelsDict
Example #17
0
    def segment(self):
        audio = pm.Sound(self.audio_path)
        for index, sentence in enumerate(self.transcript):
            #       speaking rate (syllables/second)
            l_line = sentence['line'].lower()
            line = l_line.split()
            syllable_count = reduce(lambda x, y: x + y, map(count_syllable, line)) if sys.version_info[0] < 3 \
                else functools.reduce(lambda x, y: x + y, map(count_syllable, line))
            time_delta = sentence['end'] - sentence['start']
            if time_delta == 0:
                time_delta = 0.01
            sentence['speaking_rate'] = syllable_count / time_delta  # eliminate the error of dividing 0
            #       filler rate  (filler words/ last time)
            filler_count = 0
            for word in line:
                if word in filler_dict:
                    filler_count += 1
            for word in filler_phrase:
                filler_count += l_line.count(word)
            sentence['filler_rate'] = filler_count/time_delta
            sentence['filler_count'] = filler_count

            #       pitch variety ( the difference value between 95 percentile of pitch and that of 5% percentile)
            tmp_segment = audio.extract_part(from_time=sentence['start'], to_time=sentence['end'])
            tmp_pitch = tmp_segment.to_pitch().selected_array['frequency']
            tmp_pitch[tmp_pitch == 0] = np.nan
            tmp_upper_bound = np.nanpercentile(a=tmp_pitch, q=95)
            tmp_lower_bound = np.nanpercentile(a=tmp_pitch, q=5)
            sentence['pitch_variety'] = tmp_upper_bound - tmp_lower_bound
            #       make comments
            sentence['comment'] = Comment(sentence).comment
        return self
def extract_prosodic_features(audio_source,
                              slope_cutoff=0.500,
                              end_cutoff=0.2):
    s = parselmouth.Sound(audio_source)
    p = s.to_pitch()
    voiced_frames = {}
    for i in range(p.get_number_of_frames()):
        if str(p.get_value_in_frame(i)) != 'nan':
            voiced_frames[p.get_time_from_frame_number(
                i)] = p.get_value_in_frame(i)
    sorted_times = sorted(voiced_frames.keys())
    fo_slope_cutoff = sorted_times[-1] - slope_cutoff
    fo_end_cutoff = sorted_times[-1] - end_cutoff
    fo_slope_vals = []
    fo_end_vals = []
    o_fos = []
    for i in sorted(voiced_frames.keys()):
        c_fo = voiced_frames[i]
        if i >= fo_end_cutoff:
            fo_end_vals.append(c_fo)
        else:
            o_fos.append(c_fo)
        if i >= fo_slope_cutoff:
            fo_slope_vals.append([i, c_fo])
    fo_slope_array = np.array(fo_slope_vals)
    slope, intercept, r_value, p_value, std_err = stats.linregress(
        fo_slope_array[:, 0], fo_slope_array[:, 1])
    #fo_slope = np.gradient(np.array(fo_slope_vals), axis=0)
    fo_slope = slope
    return p, fo_slope_vals, fo_slope, fo_end_vals, o_fos
Example #19
0
def measureFormants(sound, f0min, f0max):
    sound = parselmouth.Sound(sound)  # read the sound
    duration = call(sound, "Get total duration")  # duration
    pitch = call(sound, "To Pitch (cc)", 0, f0min, 15, 'no', 0.03, 0.45, 0.01,
                 0.35, 0.14, f0max)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)

    formants = call(sound, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50)
    numPoints = call(pointProcess, "Get number of points")

    f1_list = []
    f2_list = []

    # Measure formants only at glottal pulses
    for point in range(0, numPoints):
        point += 1
        t = call(pointProcess, "Get time from index", point)
        f1 = call(formants, "Get value at time", 1, t, 'Hertz', 'Linear')
        f2 = call(formants, "Get value at time", 2, t, 'Hertz', 'Linear')
        f1_list.append(f1)
        f2_list.append(f2)

    f1_list = [f1 for f1 in f1_list if str(f1) != 'nan']
    f2_list = [f2 for f2 in f2_list if str(f2) != 'nan']

    # calculate mean formants across pulses
    f1_mean = statistics.mean(f1_list)
    f2_mean = statistics.mean(f2_list)

    return f1_mean, f2_mean, f1_list, f2_list
Example #20
0
def train(fp, db):
    for filepath in glob.iglob(fp):
        for file in glob.glob(filepath):
            s = parselmouth.Sound(file)

        pitch = s.to_pitch()
        formant = s.to_formant_burg()
        duration = pitch.get_total_duration()

        values = []
        frames = 400
        if file not in datapoints:
            datapoints[file] = set()
        for i in range(1, int(frames * TRAIN_COEFFICIENT)):
            r = random.randint(1, frames)
            datapoints[file].add(r)
            frame = r/float(frames)
            time = frame * duration
            p = pitch.get_value_at_time(time)
            f1 = formant.get_value_at_time(1, time)
            f2 = formant.get_value_at_time(2, time)

            if not math.isnan(p) and not math.isnan(f1) and not math.isnan(f2):
                features = (f1, f2, p)
                values.append(features)

        # print len(values)
        mean = get_mean(values)
        db[filepath] = (values, mean)
def get_features(path):
    sound = parselmouth.Sound(path)
    pitch = sound.to_pitch()
    pulses = parselmouth.praat.call([sound, pitch], "To PointProcess (cc)")
    voice_report = parselmouth.praat.call([sound, pitch, pulses], "Voice report", 0.0, 0.0, 75, 600, 1.3, 1.6, 0.03,
                                          0.45)
    voice_report = voice_report.split('\n')

    index_list = [0, 1, 7, 12, 16, 22, 29, 33]  # Fejlécek

    vr = []
    for index, element in enumerate(voice_report):
        if index not in index_list:
            vr.append(element)

    numbers = []
    for i in vr:
        numbers += ([float(ele) for ele in re.findall(r"[-+]?\d*\.\d+|\d+", i)])

    numbers[7] = numbers[7] * 10 ** ((-1) * numbers[8])
    numbers[9] = numbers[9] * 10 ** ((-1) * numbers[10])
    numbers[19] = numbers[19] * 10 ** ((-1) * numbers[20])

    index_list = [8, 10, 20, 12, 13, 16, 17, 22, 27, 29, 31]  # Szükségtelen számok

    final_vr = []
    for index, element in enumerate(numbers):
        if index not in index_list:
            final_vr.append(element)

    mfcc = get_MFCC(path)

    return final_vr+mfcc
Example #22
0
    def get_all_features(self, f0min, f0max, unit):

        try:

            sound = parselmouth.Sound(self.voiceID) # read the sound
            pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
            Pitch = call(sound, "To Pitch", 0.0, f0min, f0max)
            #Vocal Report
            self.vocal_report = parselmouth.praat.call([sound, Pitch, pointProcess], "Voice report", 0, 0, 75, 600, 1.3, 1.6, 0.03, 0.45)

            #Pitch
            self.get_pitch_parameters(Pitch=Pitch, unit=unit)

            #Harmonicity
            Harmonicity = self.get_harmonicity_parameters(sound)

            #Jitter
            self.get_jitter_parameters(sound=sound, pointProcess=pointProcess)

            #Shimmer

            self.get_shimmer_parameters(sound=sound, pointProcess=pointProcess)

            #Pulse
            self.get_pulse_parameters()

            #Voicing
            self.get_voicing_parameters()

        except Exception as e:
            pass

        self.all_vocal_parameters = self.__dict__

        return self.all_vocal_parameters
Example #23
0
def test_run_with_parameters(sound_path):
    script = textwrap.dedent("""
	form Test
		positive minPitch 100.0
		real timeStep 0.0
		boolean subtractMean "yes"
	endform
	
	Read from file: "{}"
	To Intensity: minPitch, timeStep, subtractMean
	selectObject: 1
	selectObject: "Intensity the_north_wind_and_the_sun"
	""".format(sound_path))

    min_pitch = 75
    time_step = 0.05
    subtract_mean = False

    assert parselmouth.praat.run(
        script, min_pitch, time_step,
        subtract_mean)[0] == parselmouth.Sound(sound_path).to_intensity(
            min_pitch, time_step, subtract_mean)

    with pytest.raises(parselmouth.PraatError,
                       match="Found 0 arguments but expected more."):
        parselmouth.praat.run(script)
Example #24
0
def draw_pitch(filename, output_fn=None):
    plt = init_set_plt()
    snd = parselmouth.Sound(filename)
    pitch = snd.to_pitch()
    pitch_values = pitch.selected_array['frequency']
    proportion = len(pitch_values[pitch_values > 0]) / len(pitch_values)

    print("=" * 80)
    print(f"Filename: {filename}")
    print(f"Voiced segment proportion: {proportion}")
    print("=" * 80 + "\n")

    pitch_values[pitch_values == 0] = np.nan
    plt.plot(pitch.xs(), pitch_values, 'o', markersize=5, color='w')
    plt.plot(pitch.xs(), pitch_values, 'o', markersize=2)
    plt.grid(False)
    plt.xlim([snd.xmin, snd.xmax])
    plt.ylim(50, 450)
    plt.xlabel("Time (s)", fontsize=24)
    plt.ylabel("Pitch (Hz)", fontsize=24)
    plt.title(filename.split("-")[0], fontsize=20)
    plt.tight_layout()
    if output_fn is not None:
        plt.savefig(output_fn)
    plt.close()
def get_f0_standard_deviation(pathSound, start_time, end_time,
                              voice_max_frequency, voice_min_frequency):
    """
    Get the standard deviation around a mean
    :params pathSound: path to the sound to analyse
    :params start_time: in seconds
    :params end_time : in seconds
    :params voice_max_frequency : maximum frequency of a human being (adult man or adult female)
    :params voice_min_frequency : minimum frequency of a human being (adult man or adult female)
    :returns: standart deviation of the sound
    """
    sound = parselmouth.Sound(pathSound)
    sound = sound.extract_part(from_time=start_time, to_time=end_time)
    pitch = sound.to_pitch()
    pitch_values = pitch.selected_array['frequency']

    sum = 0
    mean = get_f0_mean(pathSound, start_time, end_time, voice_max_frequency,
                       voice_min_frequency)

    for values in pitch_values:
        if values != 0:
            sum += math.pow(values - mean, 2)

    return math.sqrt(sum / len(pitch_values))
Example #26
0
def predict():
    # Get the WAV file name from the request. Must include the .wav extension.
    binary_file_data = request.form['file']

    binary_file_path = "audio.wav"
    with open(binary_file_path, 'w') as f:
        f.write(binary_file_data)

    # Download the sound file from gcp
    sound_file = scipy.io.wavfile.read(binary_file_path)
    sound = parselmouth.Sound(binary_file_path)

    # Calculate features
    fundamental_frequency_features = calculate_fundamental_frequency_features(
        sound_file)
    other_features = engineer_features(sound)

    # Concatenate features in the order the model expects, then make a prediction.
    model_input = np.concatenate(
        [fundamental_frequency_features, other_features])
    for i in range(len(model_input)):
        if np.isnan(model_input[i]):
            model_input[i] = 0
    model_input = np.reshape(model_input, (1, 15))
    prediction_array = MODEL.predict(model_input)

    # We only process one sound file so there should only be one prediction to return.
    prediction = prediction_array[0][0]

    return json.dumps({
        'prediction': str(prediction),
        'averageFundamentalFrequency': str(model_input[0][0]),
        'jitter': str(model_input[0][3]),
        'shimmer': str(model_input[0][8])
    }), 200
Example #27
0
def calculate_pitch(wav, durs):
    mel_len = durs.sum()
    durs_cum = np.cumsum(np.pad(durs, (1, 0)))
    snd = parselmouth.Sound(wav)
    pitch = snd.to_pitch(time_step=snd.duration /
                         (mel_len + 3)).selected_array['frequency']
    assert np.abs(mel_len - pitch.shape[0]) <= 1.0

    # Average pitch over characters
    pitch_char = np.zeros((durs.shape[0], ), dtype=np.float)
    for idx, a, b in zip(range(mel_len), durs_cum[:-1], durs_cum[1:]):
        values = pitch[a:b][np.where(pitch[a:b] != 0.0)[0]]
        pitch_char[idx] = np.mean(values) if len(values) > 0 else 0.0

    # Average to three values per character
    pitch_trichar = np.zeros((3 * durs.shape[0], ), dtype=np.float)

    durs_tri = np.concatenate([dur_chunk_sizes(d, 3) for d in durs])
    durs_tri_cum = np.cumsum(np.pad(durs_tri, (1, 0)))

    for idx, a, b in zip(range(3 * mel_len), durs_tri_cum[:-1],
                         durs_tri_cum[1:]):
        values = pitch[a:b][np.where(pitch[a:b] != 0.0)[0]]
        pitch_trichar[idx] = np.mean(values) if len(values) > 0 else 0.0

    pitch_mel = maybe_pad(pitch, mel_len)
    pitch_char = maybe_pad(pitch_char, len(durs))
    pitch_trichar = maybe_pad(pitch_trichar, len(durs_tri))

    return pitch_mel, pitch_char, pitch_trichar
Example #28
0
def get_prosodic_features(file_loc):

    unit="Hertz"

    filename = file_loc
    sound = parselmouth.Sound(file_loc)
    y, sr = librosa.load(file_loc)
    duration = librosa.get_duration(y=y, sr=sr)
    energy = librosa.feature.rms(y=y)
    #1
    SD_energy = np.std(energy)
    #2
    pitch = call(sound, "To Pitch", 0.0, 75, 300)
    #3

    intensity=call(sound, "To Intensity", 75, 0)

    maxIntensity = call(intensity, "Get maximum", 0, 0,'Parabolic') #Ask if parabolic ok?
    minIntensity= call(intensity, "Get minimum", 0, 0,'Parabolic')

    maxPitch=call(pitch,"Get maximum",0,0,unit,'Parabolic')
    minPitch=call(pitch,"Get minimum",0,0,unit,'Parabolic')
    #4
    #5
    voiced_frames = pitch.count_voiced_frames()
    total_frames = pitch.get_number_of_frames()
    #6
    voiced_to_total_ratio = voiced_frames/total_frames
    #7
    voiced_to_unvoiced_ratio =  voiced_frames / (total_frames - voiced_frames)


    return [SD_energy, maxIntensity, minIntensity, maxPitch, minPitch, voiced_frames, voiced_to_total_ratio, voiced_to_unvoiced_ratio]
def audio_to_textgrid(audio_no_annot_path, textgrid_path):
    """Generates .TextGrid files from audio using Praat.

    :param audio_no_annot_path: path with audio not annotated.
    :param textgrid_path: path with textgrid generated.
    :returns: None.
    """
    create_data_path(textgrid_path)

    audio_files = os.listdir(audio_no_annot_path)
    print("Procesing " + str(len(audio_files)) + " audio files with Praat, can take a while...")

    data_mod = round(len(audio_files)/10)
    index_mod = 1

    for audio in audio_files:

        sound = parselmouth.Sound(os.path.join(audio_no_annot_path, audio))

        # Take each audio file and convert to textGrid with praat
        noise_reduction = call(sound, "Remove noise", 0.0, 1.0, 0.025, 80, 10000, 40, 'Spectral subtraction') # 'silent', 'sounding'
        manipulation = call(noise_reduction, "To TextGrid (silences)", 100, 0.0, -65.0, 0.8, 0.2, '', 'sounding') # 'silent', 'sounding'   # 0.8, 0.2
                                                                                       # minimun silent and sound intervals
        text_audio = audio[:-4] + '.TextGrid'
        call(manipulation, "Save as text file", os.path.join(textgrid_path, text_audio))

        # Show progresion
        try:
            if index_mod % data_mod == 0:  # it means 10%, 20%, ...
                print(str(int(index_mod/data_mod * 10))+"% ",end="\r")
        except:
            print('',end="\r")
        #end try
        index_mod += 1
Example #30
0
def test(fp):
    correct = 0
    total = 0
    for filepath in glob.iglob(fp):
        for file in glob.glob(filepath):
            s = parselmouth.Sound(file)

        pitch = s.to_pitch()
        formant = s.to_formant_burg()
        duration = pitch.get_total_duration()

        values = []
        frames = 400
        for i in range(1, frames):
            if i not in datapoints[file]:
                frame = i/float(frames)
                time = frame * duration
                p = pitch.get_value_at_time(time)
                f1 = formant.get_value_at_time(1, time)
                f2 = formant.get_value_at_time(2, time)

                if not math.isnan(p) and not math.isnan(f1) and not math.isnan(f2):
                    features = (f1, f2, p)
                    values.append(features)

        mean = get_mean(values)
        total += 1
        correct = correct + 1 if predict(mean, data) == filepath else correct

    return (correct, total)