Ejemplo n.º 1
0
def plot_pitches(filename, pitches, confidences, tolerance=0.8, hop_s=(512 // 1), samplerate=(0 // 1)):
    skip = 1
    pitches = array(pitches[skip:])
    confidences = array(confidences[skip:])
    times = [t * hop_s for t in range(len(pitches))]

    fig = plt.figure()

    ax1 = fig.add_subplot(311)
    ax1 = get_waveform_plot(filename, samplerate=samplerate, block_size=hop_s, ax=ax1)
    plt.setp(ax1.get_xticklabels(), visible=False)
    ax1.set_xlabel('')

    ax2 = fig.add_subplot(312, sharex=ax1)
    ground_truth = os.path.splitext(filename)[0] + '.f0.Corrected'
    if os.path.isfile(ground_truth):
        ground_truth = array_from_text_file(ground_truth)
        true_freqs = ground_truth[:, 2]
        true_freqs = ma.masked_where(true_freqs < 2, true_freqs)
        true_times = float(samplerate) * ground_truth[:, 0]
        ax2.plot(true_times, true_freqs, 'r')
        ax2.axis(ymin=0.9 * true_freqs.min(), ymax=1.1 * true_freqs.max())
        # plot raw pitches
        # ax2.plot(times, pitches, '.-')
        # plot cleaned up pitches
        cleaned_pitches = pitches
        cleaned_pitches = ma.masked_where(cleaned_pitches < 0, cleaned_pitches)
        cleaned_pitches = ma.masked_where(cleaned_pitches > 120, cleaned_pitches)
        cleaned_pitches = ma.masked_where(confidences < tolerance, cleaned_pitches)
        ax2.plot(times, cleaned_pitches, 'b.')
        ax2.axis(ymin=0.9 * cleaned_pitches.min(), ymax=1.1 * cleaned_pitches.max())
        # ax2.axis( ymin = 55, ymax = 70 )
        plt.setp(ax2.get_xticklabels(), visible=False)
        ax2.set_ylabel('f0 (midi)')
        # plot confidence
        ax3 = fig.add_subplot(313, sharex=ax1)
        # plot the confidence
        ax3.plot(times, confidences)
        # draw a line at tolerance
        ax3.plot(times, [tolerance] * len(confidences))
        ax3.axis(xmin=times[0], xmax=times[-1])
        ax3.set_ylabel('confidence')
        set_xlabels_sample2time(ax3, times[-1], samplerate)
        savefig(filename + 'fig.png')
        plt.show()
Ejemplo n.º 2
0
# do plotting
from numpy import arange
from demo_waveform_plot import get_waveform_plot
from demo_waveform_plot import set_xlabels_sample2time
import matplotlib.pyplot as plt

fig = plt.figure()
plt.rc("lines", linewidth=".8")
wave = plt.axes([0.1, 0.75, 0.8, 0.19])

get_waveform_plot(source_filename, samplerate, block_size=hop_s, ax=wave)
wave.xaxis.set_visible(False)
wave.yaxis.set_visible(False)

all_times = arange(mfccs.shape[0]) * hop_s
n_coeffs = mfccs.shape[1]
for i in range(n_coeffs):
    ax = plt.axes([0.1, 0.75 - ((i + 1) * 0.65 / n_coeffs), 0.8, 0.65 / n_coeffs], sharex=wave)
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    ax.plot(all_times, mfccs.T[i])

# add time to the last axis
set_xlabels_sample2time(ax, frames_read, samplerate)

# plt.ylabel('spectral descriptor value')
ax.xaxis.set_visible(True)
wave.set_title("MFCC for %s" % source_filename)
plt.show()
Ejemplo n.º 3
0
    ground_truth = array_from_text_file(ground_truth)
    true_freqs = ground_truth[:, 2]
    true_freqs = ma.masked_where(true_freqs < 2, true_freqs)
    true_times = float(samplerate) * ground_truth[:, 0]
    ax2.plot(true_times, true_freqs, 'r')
    ax2.axis(ymin=0.9 * true_freqs.min(), ymax=1.1 * true_freqs.max())
# plot raw pitches
ax2.plot(times, pitches, '.g')
# plot cleaned up pitches
cleaned_pitches = pitches
#cleaned_pitches = ma.masked_where(cleaned_pitches < 0, cleaned_pitches)
#cleaned_pitches = ma.masked_where(cleaned_pitches > 120, cleaned_pitches)
cleaned_pitches = ma.masked_where(confidences < tolerance, cleaned_pitches)
ax2.plot(times, cleaned_pitches, '.-')
#ax2.axis( ymin = 0.9 * cleaned_pitches.min(), ymax = 1.1 * cleaned_pitches.max() )
#ax2.axis( ymin = 55, ymax = 70 )
plt.setp(ax2.get_xticklabels(), visible=False)
ax2.set_ylabel('f0 (midi)')

# plot confidence
ax3 = fig.add_subplot(313, sharex=ax1)
# plot the confidence
ax3.plot(times, confidences)
# draw a line at tolerance
ax3.plot(times, [tolerance] * len(confidences))
ax3.axis(xmin=times[0], xmax=times[-1])
ax3.set_ylabel('confidence')
set_xlabels_sample2time(ax3, times[-1], samplerate)
plt.show()
#plt.savefig(os.path.basename(filename) + '.svg')
Ejemplo n.º 4
0
    if read < hop_s: break

if 1:
    print "done computing, now plotting"
    import matplotlib.pyplot as plt
    from demo_waveform_plot import get_waveform_plot
    from demo_waveform_plot import set_xlabels_sample2time
    fig = plt.figure()
    plt.rc('lines',linewidth='.8')
    wave = plt.axes([0.1, 0.75, 0.8, 0.19])
    get_waveform_plot(filename, samplerate, block_size = hop_s, ax = wave )
    wave.yaxis.set_visible(False)
    wave.xaxis.set_visible(False)

    n_plots = len(energies.T)
    all_desc_times = [ x * hop_s  for x in range(len(energies)) ]
    for i, band in enumerate(energies.T):
        ax = plt.axes ( [0.1, 0.75 - ((i+1) * 0.65 / n_plots),  0.8, 0.65 / n_plots], sharex = wave )
        ax.plot(all_desc_times, band, '-', label = 'band %d' % i)
        #ax.set_ylabel(method, rotation = 0)
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        ax.axis(xmax = all_desc_times[-1], xmin = all_desc_times[0])
        ax.annotate('band %d' % i, xy=(-10, 10),  xycoords='axes points',
                horizontalalignment='right', verticalalignment='bottom',
                )
    set_xlabels_sample2time( ax, all_desc_times[-1], samplerate) 
    #plt.ylabel('spectral descriptor value')
    ax.xaxis.set_visible(True)
    plt.show()
Ejemplo n.º 5
0
    ground_truth = array_from_text_file(ground_truth)
    true_freqs = ground_truth[:,2]
    true_freqs = ma.masked_where(true_freqs < 2, true_freqs)
    true_times = float(samplerate) * ground_truth[:,0]
    ax2.plot(true_times, true_freqs, 'r')
    ax2.axis( ymin = 0.9 * true_freqs.min(), ymax = 1.1 * true_freqs.max() )
# plot raw pitches
ax2.plot(times, pitches, '.g')
# plot cleaned up pitches
cleaned_pitches = pitches
#cleaned_pitches = ma.masked_where(cleaned_pitches < 0, cleaned_pitches)
#cleaned_pitches = ma.masked_where(cleaned_pitches > 120, cleaned_pitches)
cleaned_pitches = ma.masked_where(confidences < tolerance, cleaned_pitches)
ax2.plot(times, cleaned_pitches, '.-')
#ax2.axis( ymin = 0.9 * cleaned_pitches.min(), ymax = 1.1 * cleaned_pitches.max() )
#ax2.axis( ymin = 55, ymax = 70 )
plt.setp(ax2.get_xticklabels(), visible = False)
ax2.set_ylabel('f0 (midi)')

# plot confidence
ax3 = fig.add_subplot(313, sharex = ax1)
# plot the confidence
ax3.plot(times, confidences)
# draw a line at tolerance
ax3.plot(times, [tolerance]*len(confidences))
ax3.axis( xmin = times[0], xmax = times[-1])
ax3.set_ylabel('confidence')
set_xlabels_sample2time(ax3, times[-1], samplerate)
plt.show()
#plt.savefig(os.path.basename(filename) + '.svg')
Ejemplo n.º 6
0
def pitch_track(filename, samplerate, Display=False):
    Display_Plot = Display

    from aubio import source, pitch, freqtomidi

    #########VARY this Value
    tolerance = 0.85
    silence_threshold = 0.00004
    #####################
    # if len(sys.argv) < 2:
    #     print "Usage: %s <filename> [samplerate]" % sys.argv[0]
    #     sys.exit(1)

    #filename = sys.argv[1]

    downsample = 1
    samplerate = 44100 / downsample
    if len( sys.argv ) > 2: samplerate = int(sys.argv[2])

    win_s = 4096 / downsample # fft size
    hop_s = 512  / downsample # hop size

    s = source(filename, samplerate, hop_s)
    samplerate = s.samplerate

    # may be able to use onset tracking for silence threshold information

    pitch_o = pitch("yinfft", win_s, hop_s, samplerate)
    pitch_o.set_unit("midi")
    pitch_o.set_tolerance(tolerance)

    pitches = []
    confidences = []

    #****************************************************
    # total number of frames read
    total_frames = 0
    while True:
        samples, read = s()
        pitch = pitch_o(samples)[0]
        #pitch = int(round(pitch))
        confidence = pitch_o.get_confidence()
        #if confidence < 0.8: pitch = 0.
        #print "%f %f %f" % (total_frames / float(samplerate), pitch, confidence)
        pitches += [pitch]
        confidences += [confidence]
        total_frames += read
        if read < hop_s: break

    if 0: sys.exit(0)

    ####use librosa to get samples###
    signal, sr  = librosa.load(filename, sr = 44100)

    signal2 = []
    start = 0
    for i in range(512, len(signal), hop_s):
        end = i
        signal2.append(np.average(np.square(signal[start:end])))
        start = i

    uplist = []
    dlist = []
    for i in range( len(signal2)-2):
        if signal2[i+2] - signal2[i] > 0.1*signal2[i]:
            uplist.append(i)
        dlist.append(signal2[i+2]-signal2[i])

    ##GET PITCHES CONFIDENCES AND TIMES##
    skip = 1
    pitches = array(pitches[skip:])
    confidences = array(confidences[skip:])
    times = [t * hop_s for t in range(len(pitches))]
    #**************************************


    ground_truth = os.path.splitext(filename)[0] + '.f0.Corrected'
    if os.path.isfile(ground_truth):
        ground_truth = array_from_text_file(ground_truth)
        true_freqs = ground_truth[:,2]
        true_freqs = ma.masked_where(true_freqs < 2, true_freqs)
        true_times = float(samplerate) * ground_truth[:,0]
        ax2.plot(true_times, true_freqs, 'r')
        ax2.axis( ymin = 0.9 * true_freqs.min(), ymax = 1.1 * true_freqs.max() )

    cleaned_pitches = pitches
    cleaned_pitches = ma.masked_where(confidences < tolerance, cleaned_pitches)

    octave_cleaned = octave_error(cleaned_pitches)
    for i in range(1):
        octave_cleaned = octave_error3(octave_cleaned)
        octave_cleaned = silence_mask(octave_cleaned, signal2, silence_threshold)
        octave_cleaned = octave_error(octave_cleaned)
        

        
    #*****************************************************

    # load file and rms
    #trumpet,_ = librosa.load(filename,sr=44100)
    rms =  librosa.feature.rmse(y=signal)

    #create a velocity reference********************************************
    # Get peak rms value for file to set as reference for velocity = 100
    midi_vel_ref = []
    sig_len = int(len(signal)/1024.)
    for si in xrange(0,sig_len - 1):
        start = si * 1024
        stop = start + 1024
        midi_vel_ref = np.append(midi_vel_ref,rms_db(signal[start:stop]))
        
    midi_vel_ref = np.amax(midi_vel_ref)

    x_rms = rms_db(signal)                          #this is going to be the frames for the midi note
    #**********************************

    ###standalone onset detection
    #****************************************
    onsets_clean, onsets_pos = onset_detect(rms, Display = False)
    onsets_clean = np.array(onsets_clean, dtype = int)
    midi_out_new2, midi_out_new =midi_output(onsets_clean, octave_cleaned)

    #print midi_out_new2, onsets_clean, midi_out_new

    for i in range(len(midi_out_new2)):
        start = midi_out_new2[i][1]
        stop = midi_out_new2[i][2]
        midi_vel = midi_velocity(signal[start*512:stop*512], midi_vel_ref)  #this is going to neeed the frames in signal
        midi_out_new2[i].append(midi_vel)
    #print results

    #*********************************************
	output = np.zeros((len(midi_out_new2), len(midi_out_new2[0])))
	
    for i in range(len(output)):
        for j in range(len(output[0])):
            output[i,j] = midi_out_new2[i][j]
    output = np.rint(output)
    output = np.array(output, dtype = int)

    if Display_Plot:
    ####PLOTTING######
    #****************************************************
        fig = plt.figure()
        ax1 = fig.add_subplot(311)
        ax1 = get_waveform_plot(filename, samplerate = samplerate, block_size = hop_s, ax = ax1)
        plt.setp(ax1.get_xticklabels(), visible = False)
        ax1.set_xlabel('')

        #plot cleaned pitches
        ax2 = fig.add_subplot(312, sharex = ax1)
        ax2.plot(times, pitches, '.g')
        ax2.plot(times, cleaned_pitches, '.-')
        ax2.plot(times, octave_cleaned, '.r' )
        plt.setp(ax2.get_xticklabels(), visible = False)
        ax2.set_ylabel('f0 (midi)')
        ax2.vlines(onsets_clean * 512,0,120,color='k',linestyle='--',linewidth=1)

        ##### plot confidence
        ax3 = fig.add_subplot(313, sharex = ax1)
        ax3.plot(times, confidences)
        ax3.plot(times, [tolerance]*len(confidences)) # draw a line at tolerance
        ax3.axis( xmin = times[0], xmax = times[-1])
        ax3.set_ylabel('condidence')
        set_xlabels_sample2time(ax3, times[-1], samplerate)

	print len(output)
	print output
	plt.show(block=False)
    return output, len(signal)
Ejemplo n.º 7
0
if 1:
    print("done computing, now plotting")
    import matplotlib.pyplot as plt
    from demo_waveform_plot import get_waveform_plot
    from demo_waveform_plot import set_xlabels_sample2time
    fig = plt.figure()
    plt.rc('lines',linewidth='.8')
    wave = plt.axes([0.1, 0.75, 0.8, 0.19])
    get_waveform_plot(filename, samplerate, block_size = hop_s, ax = wave )
    wave.yaxis.set_visible(False)
    wave.xaxis.set_visible(False)

    all_desc_times = [ x * hop_s  for x in range(len(all_descs["default"])) ]
    n_methods = len(methods)
    for i, method in enumerate(methods):
        #ax = fig.add_subplot (n_methods, 1, i)
        #plt2 = plt.axes([0.1, 0.1, 0.8, 0.65], sharex = plt1)
        ax = plt.axes ( [0.1, 0.75 - ((i+1) * 0.65 / n_methods),  0.8, 0.65 / n_methods], sharex = wave )
        ax.plot(all_desc_times, all_descs[method], '-', label = method)
        #ax.set_ylabel(method, rotation = 0)
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)
        ax.axis(xmax = all_desc_times[-1], xmin = all_desc_times[0])
        ax.annotate(method, xy=(-10, 0),  xycoords='axes points',
                horizontalalignment='right', verticalalignment='bottom',
                )
    set_xlabels_sample2time(ax, all_desc_times[-1], samplerate)
    #plt.ylabel('spectral descriptor value')
    ax.xaxis.set_visible(True)
    plt.show()
Ejemplo n.º 8
0
from numpy import arange
from demo_waveform_plot import get_waveform_plot
from demo_waveform_plot import set_xlabels_sample2time
import matplotlib.pyplot as plt

fig = plt.figure()
plt.rc('lines', linewidth='.8')
wave = plt.axes([0.1, 0.75, 0.8, 0.19])

get_waveform_plot(source_filename, samplerate, block_size=hop_s, ax=wave)
wave.xaxis.set_visible(False)
wave.yaxis.set_visible(False)

all_times = arange(mfccs.shape[0]) * hop_s
n_coeffs = mfccs.shape[1]
for i in range(n_coeffs):
    ax = plt.axes(
        [0.1, 0.75 - ((i + 1) * 0.65 / n_coeffs), 0.8, 0.65 / n_coeffs],
        sharex=wave)
    ax.xaxis.set_visible(False)
    ax.yaxis.set_visible(False)
    ax.plot(all_times, mfccs.T[i])

# add time to the last axis
set_xlabels_sample2time(ax, frames_read, samplerate)

#plt.ylabel('spectral descriptor value')
ax.xaxis.set_visible(True)
wave.set_title('MFCC for %s' % source_filename)
plt.show()
Ejemplo n.º 9
0
def audio_analysis(filename):
    print type(filename)
    # filename = file_name

    downsample = 1
    samplerate = 44100 / downsample

    win_s = 4096 / downsample  # fft size
    hop_s = 512 / downsample  # hop size

    s = source(filename, samplerate, hop_s)
    samplerate = s.samplerate

    tolerance = 0.8

    pitch_o = pitch("yin", win_s, hop_s, samplerate)
    pitch_o.set_unit("freq")
    pitch_o.set_tolerance(tolerance)

    pitches = []
    confidences = []
    time_stamp = []
    # total number of frames read
    total_frames = 0
    while True:
        samples, read = s()
        pitch = pitch_o(samples)[0]
        #pitch = int(round(pitch))
        confidence = pitch_o.get_confidence()
        #if confidence < 0.8: pitch = 0.
        print "%f %f %f" % (total_frames / float(samplerate), pitch,
                            confidence)
        time_stamp += [(total_frames / float(samplerate))]
        pitches += [pitch]
        confidences += [confidence]
        total_frames += read
        if read < hop_s: break

    if 0: sys.exit(0)

    #print time_stamp
    # Invoking aubiocut to detect when a word of spoken.
    sub = subprocess.Popen(['python', 'aubiocut', filename],
                           stdout=subprocess.PIPE,
                           stderr=subprocess.STDOUT)
    out = sub.communicate()[0]

    # Importing Regular Expression Modules for extracting the output[timestamp] of Aubiocut
    import re

    timestamps = re.findall("\d+.\d+\d+\d+\d+", out)
    print timestamps

    extracted_voice = []

    import math

    for i in timestamps:
        i = float(i)
        for j in range(len(time_stamp)):
            #Using the floor functions the timestamp is extracted when speakers spoke a word.
            temp1 = math.floor(i * 10) / 10
            temp2 = math.floor((time_stamp[j]) * 10) / 10
            #print str(temp1)+ " and "+str(temp2)
            # if pitch >10000 then it is considered Noice in our environment.
            if temp1 == temp2 and pitches[j] < 10000.0:
                #print "True"+str(j)+pitches[j]
                extracted_voice += [pitches[j]]

    print extracted_voice
    avg = 0.0
    for i in extracted_voice:
        avg += i
    avg = avg / (len(extracted_voice))
    print "Average Pitch of Extracted Voice: " + str(avg)
    gender = clf.classify([avg])

    #print pitches
    from numpy import array, ma
    import matplotlib.pyplot as plt
    from demo_waveform_plot import get_waveform_plot, set_xlabels_sample2time

    skip = 1

    pitches = array(pitches[skip:])
    confidences = array(confidences[skip:])
    times = [t * hop_s for t in range(len(pitches))]

    fig = plt.figure()

    ax1 = fig.add_subplot(311)
    ax1 = get_waveform_plot(filename,
                            samplerate=samplerate,
                            block_size=hop_s,
                            ax=ax1)
    plt.setp(ax1.get_xticklabels(), visible=False)
    ax1.set_xlabel('')

    def array_from_text_file(filename, dtype='float'):
        import os.path
        from numpy import array
        filename = os.path.join(os.path.dirname(__file__), filename)
        return array([line.split() for line in open(filename).readlines()],
                     dtype=dtype)

    ax2 = fig.add_subplot(312, sharex=ax1)
    import sys, os.path
    ground_truth = os.path.splitext(filename)[0] + '.f0.Corrected'
    if os.path.isfile(ground_truth):
        ground_truth = array_from_text_file(ground_truth)
        true_freqs = ground_truth[:, 2]
        true_freqs = ma.masked_where(true_freqs < 2, true_freqs)
        true_times = float(samplerate) * ground_truth[:, 0]
        ax2.plot(true_times, true_freqs, 'r')
        ax2.axis(ymin=0.9 * true_freqs.min(), ymax=1.1 * true_freqs.max())
    # plot raw pitches
    ax2.plot(times, pitches, '--g')
    # plot cleaned up pitches
    cleaned_pitches = pitches
    #cleaned_pitches = ma.masked_where(cleaned_pitches < 0, cleaned_pitches)
    #cleaned_pitches = ma.masked_where(cleaned_pitches > 120, cleaned_pitches)
    cleaned_pitches = ma.masked_where(confidences < tolerance, cleaned_pitches)
    ax2.plot(times, cleaned_pitches, '.-')
    #ax2.axis( ymin = 0.9 * cleaned_pitches.min(), ymax = 1.1 * cleaned_pitches.max() )
    #ax2.axis( ymin = 55, ymax = 70 )
    plt.setp(ax2.get_xticklabels(), visible=False)
    ax2.set_ylabel('f0 (Hz)')

    # plot confidence
    ax3 = fig.add_subplot(313, sharex=ax1)
    # plot the confidence
    ax3.plot(times, confidences)
    # draw a line at tolerance
    ax3.plot(times, [tolerance] * len(confidences))
    ax3.axis(xmin=times[0], xmax=times[-1])
    ax3.set_ylabel('condidence')
    set_xlabels_sample2time(ax3, times[-1], samplerate)
    plt.show()
    #plt.savefig(os.path.basename(filename) + '.svg')
    return gender
Ejemplo n.º 10
0
def get_pitch(filename):
    from aubio import source, pitch

    downsample = 1
    samplerate = 44100 // downsample

    win_s = 4096 // downsample  # fft size
    hop_s = 512 // downsample  # hop size

    s = source(filename, samplerate, hop_s)
    samplerate = s.samplerate

    tolerance = 0.8

    pitch_o = pitch("yin", win_s, hop_s, samplerate)
    pitch_o.set_unit("midi")
    pitch_o.set_tolerance(tolerance)

    pitches = []
    confidences = []

    # total number of frames read
    total_frames = 0
    while True:
        samples, read = s()
        pitch = pitch_o(samples)[0]
        #pitch = int(round(pitch))
        confidence = pitch_o.get_confidence()
        #if confidence < 0.8: pitch = 0.
        #print("%f %f %f" % (total_frames / float(samplerate), pitch, confidence))
        pitches += [pitch]
        confidences += [confidence]
        total_frames += read
        if read < hop_s: break

    if 0: sys.exit(0)

    #print pitches
    import os.path
    from numpy import array, ma
    import matplotlib.pyplot as plt
    from demo_waveform_plot import get_waveform_plot, set_xlabels_sample2time

    skip = 1

    pitches = array(pitches[skip:])
    confidences = array(confidences[skip:])
    times = [t * hop_s for t in range(len(pitches))]

    fig = plt.figure()

    ax1 = fig.add_subplot(311)
    ax1 = get_waveform_plot(filename,
                            samplerate=samplerate,
                            block_size=hop_s,
                            ax=ax1)
    plt.setp(ax1.get_xticklabels(), visible=False)
    ax1.set_xlabel('')

    def array_from_text_file(filename, dtype='float'):
        filename = os.path.join(os.path.dirname(__file__), filename)
        return array([line.split() for line in open(filename).readlines()],
                     dtype=dtype)

    ax2 = fig.add_subplot(312, sharex=ax1)
    ground_truth = os.path.splitext(filename)[0] + '.f0.Corrected'
    if os.path.isfile(ground_truth):
        ground_truth = array_from_text_file(ground_truth)
        true_freqs = ground_truth[:, 2]
        true_freqs = ma.masked_where(true_freqs < 2, true_freqs)
        true_times = float(samplerate) * ground_truth[:, 0]
        ax2.plot(true_times, true_freqs, 'r')
        ax2.axis(ymin=0.9 * true_freqs.min(), ymax=1.1 * true_freqs.max())
    ax2.plot(times, pitches, '.g')
    cleaned_pitches = pitches
    cleaned_pitches = ma.masked_where(confidences < tolerance, cleaned_pitches)
    ax2.plot(times, cleaned_pitches, '.-')
    plt.setp(ax2.get_xticklabels(), visible=False)
    ax2.set_ylabel('f0 (midi)')

    ax3 = fig.add_subplot(313, sharex=ax1)
    ax3.plot(times, confidences)
    ax3.plot(times, [tolerance] * len(confidences))
    ax3.axis(xmin=times[0], xmax=times[-1])
    ax3.set_ylabel('condidence')
    set_xlabels_sample2time(ax3, times[-1], samplerate)
    # plt.show()
    plt.savefig(filename.replace('.mp3', '.pdf'))

    significant = confidences > tolerance
    return array(
        times)[significant], pitches[significant], confidences[significant]
Ejemplo n.º 11
0
def audio_analysis (filename):
    print type(filename)
    # filename = file_name

    downsample = 1
    samplerate = 44100 / downsample

    win_s = 4096 / downsample # fft size
    hop_s = 512  / downsample # hop size

    s = source(filename, samplerate, hop_s)
    samplerate = s.samplerate

    tolerance = 0.8

    pitch_o = pitch("yin", win_s, hop_s, samplerate)
    pitch_o.set_unit("freq")
    pitch_o.set_tolerance(tolerance)

    pitches = []
    confidences = []
    time_stamp=[]
    # total number of frames read
    total_frames = 0
    while True:
        samples, read = s()
        pitch = pitch_o(samples)[0]
        #pitch = int(round(pitch))
        confidence = pitch_o.get_confidence()
        #if confidence < 0.8: pitch = 0.
        print "%f %f %f" % (total_frames / float(samplerate), pitch, confidence)
        time_stamp+=[(total_frames/float(samplerate))]
        pitches += [pitch]
        confidences += [confidence]
        total_frames += read
        if read < hop_s: break

    if 0: sys.exit(0)

    #print time_stamp
    # Invoking aubiocut to detect when a word of spoken.
    sub = subprocess.Popen(['python', 'aubiocut',  filename], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
    out = sub.communicate()[0]

    # Importing Regular Expression Modules for extracting the output[timestamp] of Aubiocut
    import re

    timestamps=re.findall("\d+.\d+\d+\d+\d+", out)
    print timestamps

    extracted_voice=[]

    import math

    for i in timestamps:
    	i=float(i)	
    	for j in range(len(time_stamp)):
            #Using the floor functions the timestamp is extracted when speakers spoke a word.
    		temp1=math.floor(i*10)/10
    		temp2=math.floor((time_stamp[j])*10)/10
    		#print str(temp1)+ " and "+str(temp2)
    		# if pitch >10000 then it is considered Noice in our environment.
    		if temp1==temp2 and pitches[j]<10000.0:
    			#print "True"+str(j)+pitches[j]
    			extracted_voice+=[pitches[j]]

    print extracted_voice	
    avg=0.0
    for i in extracted_voice:
    	avg+=i
    avg=avg/(len(extracted_voice))
    print "Average Pitch of Extracted Voice: "+ str(avg)
    gender=clf.classify([avg])

    #print pitches
    from numpy import array, ma
    import matplotlib.pyplot as plt
    from demo_waveform_plot import get_waveform_plot, set_xlabels_sample2time

    skip = 1

    pitches = array(pitches[skip:])
    confidences = array(confidences[skip:])
    times = [t * hop_s for t in range(len(pitches))]

    fig = plt.figure()

    ax1 = fig.add_subplot(311)
    ax1 = get_waveform_plot(filename, samplerate = samplerate, block_size = hop_s, ax = ax1)
    plt.setp(ax1.get_xticklabels(), visible = False)
    ax1.set_xlabel('')

    def array_from_text_file(filename, dtype = 'float'):
        import os.path
        from numpy import array
        filename = os.path.join(os.path.dirname(__file__), filename)
        return array([line.split() for line in open(filename).readlines()],
            dtype = dtype)

    ax2 = fig.add_subplot(312, sharex = ax1)
    import sys, os.path
    ground_truth = os.path.splitext(filename)[0] + '.f0.Corrected'
    if os.path.isfile(ground_truth):
        ground_truth = array_from_text_file(ground_truth)
        true_freqs = ground_truth[:,2]
        true_freqs = ma.masked_where(true_freqs < 2, true_freqs)
        true_times = float(samplerate) * ground_truth[:,0]
        ax2.plot(true_times, true_freqs, 'r')
        ax2.axis( ymin = 0.9 * true_freqs.min(), ymax = 1.1 * true_freqs.max() )
    # plot raw pitches
    ax2.plot(times, pitches, '--g')
    # plot cleaned up pitches
    cleaned_pitches = pitches
    #cleaned_pitches = ma.masked_where(cleaned_pitches < 0, cleaned_pitches)
    #cleaned_pitches = ma.masked_where(cleaned_pitches > 120, cleaned_pitches)
    cleaned_pitches = ma.masked_where(confidences < tolerance, cleaned_pitches)
    ax2.plot(times, cleaned_pitches, '.-')
    #ax2.axis( ymin = 0.9 * cleaned_pitches.min(), ymax = 1.1 * cleaned_pitches.max() )
    #ax2.axis( ymin = 55, ymax = 70 )
    plt.setp(ax2.get_xticklabels(), visible = False)
    ax2.set_ylabel('f0 (Hz)')

    # plot confidence
    ax3 = fig.add_subplot(313, sharex = ax1)
    # plot the confidence
    ax3.plot(times, confidences)
    # draw a line at tolerance
    ax3.plot(times, [tolerance]*len(confidences))
    ax3.axis( xmin = times[0], xmax = times[-1])
    ax3.set_ylabel('condidence')
    set_xlabels_sample2time(ax3, times[-1], samplerate)
    plt.show()
    #plt.savefig(os.path.basename(filename) + '.svg')
    return gender