def segmentclassifyFileWrapper(inputWavFile, model_name, model_type):
    if not os.path.isfile(model_name):
        raise Exception("Input model_name not found!")
    if not os.path.isfile(inputWavFile):
        raise Exception("Input audio file not found!")
    gtFile = ""
    if inputWavFile[-4::]==".wav":
        gtFile = inputWavFile.replace(".wav", ".segments")
    if inputWavFile[-4::]==".mp3":
        gtFile = inputWavFile.replace(".mp3", ".segments")
    aS.mtFileClassification(inputWavFile, model_name, model_type, True, gtFile)
Exemple #2
0
def segmentclassifyFileWrapper(inputWavFile, model_name, model_type):
    if not os.path.isfile(model_name):
        raise Exception("Input model_name not found!")
    if not os.path.isfile(inputWavFile):
        raise Exception("Input audio file not found!")
    gtFile = ""
    if inputWavFile[-4::] == ".wav":
        gtFile = inputWavFile.replace(".wav", ".segments")
    if inputWavFile[-4::] == ".mp3":
        gtFile = inputWavFile.replace(".mp3", ".segments")
    aS.mtFileClassification(inputWavFile, model_name, model_type, True, gtFile)
Exemple #3
0
def find_music(audio_file):
    modelName = "pyAA/data/svmSM"

    [Fs, x] = aIO.readAudioFile(audio_file)
    duration = x.shape[0] / float(Fs)
    t1 = time.clock()
    flagsInd, classNames, acc, CMt = aS.mtFileClassification(
        audio_file, modelName, "svm", False, '')
    [
        Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep,
        computeBEAT
    ] = aT.loadSVModel(modelName)
    t2 = time.clock()
    perTime1 = duration / (t2 - t1)
    flags = [classNames[int(f)] for f in flagsInd]
    (segs, classes) = aS.flags2segs(flags, mtStep)

    i = 0  #len(classes)-1
    file_parts = []

    cbn = sox.Combiner()
    if len(classes) > 1:
        for c in classes:
            if c == 'music':
                start = segs[i][0]
                if i != 0:
                    start -= 0.5
                end = segs[i][1]
                if i != len(classes) - 1:
                    end += 2.5

                file_parts.append((int(start * 1000), int(end * 1000)))
            i += 1

    return file_parts
Exemple #4
0
def func():
    res = [flagsInd, classesAll, acc] = aS.mtFileClassification(PATH_TO_WAV, PATH_TO_SVM, "svm", False, PATH_TO_SEGMENTS_FILE)
    print res
    segments = getSegments(res[0])
    print segments
    f = open(PATH_TO_DIR + "segments", "w")
    f.write(str(segments))
    cutSegments(segments)
Exemple #5
0
def aud_classify(direc, wav_file, model):
    ##test on one file
    global j, l
    print "classifying" + direc + wav_file
    [flagsInd, classesAll, acc,
     CM] = aS.mtFileClassification(direc + wav_file, "models/" + model, "svm",
                                   True)
    print classesAll
    j = flagsInd
    l = classesAll

    return j, l, model
Exemple #6
0
def aud_classify(direc, wav_file):
    ##test on one file
    global j
    global l
    print "classifying" + direc + wav_file
    [flagsInd, classesAll, acc,
     CM] = aS.mtFileClassification(direc + wav_file, "svm_amlo_v1", "svm",
                                   True)
    print classesAll
    j = flagsInd
    l = classesAll
    return j, l
def seg(filename, rec_type):
    [flagsInd, classesAll, acc, CM] = aS.mtFileClassification('audiofiles/' + filename,
                                      "./" + rec_type + 'MusicGenre2', rec_type, False)
    flags = array2list(flagsInd)
    audio_step0, _ = audiofilter(flags)
    with open(os.path.join('seg_result/', filename + '.seg'), 'w') as f:
        for s in audio_step0:
            f.write(str(s) + '\n')
    seg_points = read_seg_points(audio_step0, 8)
    #seg_points = read_segment_points(audio_step0, 8)
    audio_segment('audiofiles/' + filename, seg_points)
    return audio_step0, len(flagsInd)
Exemple #8
0
def find_applause(inputfile,outputfile,to_csv,plot,default_speaker,buffer_secs,script_path):
    wav_source=True
    if inputfile.lower()[-4:]!='.wav':     # Creates a temporary WAV
        wav_source=False                         # if input is MP3
        temp_filename=inputfile.split('/')[-1]+'_temp.wav'
        wav_path='/var/tmp/'+temp_filename   # Pathname for temp WAV
        subprocess.call(['ffmpeg', '-y', '-i', inputfile, wav_path]) # '-y' option overwrites existing file if present
    else:
        wav_path=inputfile
    classifier_model_path = os.path.join(script_path,'data/svm_applause_model')
    output, classesAll, acc, CM = aS.mtFileClassification(wav_path, classifier_model_path, "svm")
    output = list(output)
    applause_secs=[]
    for i, x in enumerate(output):
        if float(x)==1.0:
            applause_secs.append(i)
    applause_ranges=seconds_list_to_ranges(applause_secs)
    if (plot==True)&(len(applause_ranges)>0):
        import matplotlib.pyplot as plt
        import pandas as pd
        import numpy as np
        print(applause_ranges)
        print('\n')
        pd.Series(output).plot()
        plt.title(inputfile.split('/')[-1])
        plt.xlabel('Seconds')
        plt.ylabel('Applause Classification')
        plt.show()
    if wav_source==False:
        os.remove(wav_path)
    if to_csv==True:
        if outputfile=='':
            outputfile=inputfile[:-4]+'_applause.csv'
        if default_speaker=='':
            with open(outputfile, 'w') as csv_fo:
                applause_ranges_expanded=[(start+buffer_secs,0,duration-buffer_secs) for start,duration in applause_ranges]
                csv_writer = csv.writer(csv_fo)
                csv_writer.writerows(applause_ranges_expanded)
        else:
            with open(outputfile, 'w') as csv_fo:
                prev_end='0.0'
                csv_writer = csv.writer(csv_fo)
                for start,duration in applause_ranges:
                    if float(float(start)-float(prev_end)-(float(buffer_secs)*2))>0.0:
                        csv_writer.writerow([float(prev_end)+buffer_secs,1,float(start)-float(prev_end)-(float(buffer_secs)*2),default_speaker.replace(',',';')])
                    if float(float(duration)-buffer_secs)>0:
                        csv_writer.writerow([start+buffer_secs,0,float(duration)-buffer_secs,'Applause'])
                    prev_end=start+duration
                if (prev_end < len(output)):
                    if float(float(len(output)-prev_end)-buffer_secs-1)>0.0:
                        csv_writer.writerow([float(prev_end)+buffer_secs,1,float(len(output)-prev_end)-buffer_secs-1,default_speaker.replace(',',';')]) # "-1" is a kluge to make sure final tag doesn't exceed length of audio file
Exemple #9
0
def classify_file(audiofile, model, model_type, model_color):

    # sets duration of audiofile, for getting the timestamps of each classification
    with contextlib.closing(wave.open(audiofile, 'r')) as f:
        frames = f.getnframes()
        rate = f.getframerate()
        duration = frames / float(rate)
        print(duration)

    # only used for console output, can be removed to speed up runtime
    #   function will throw error because of true flag at end,
    #   console log is still displayed in spite of
    #try:
    #    aS.mtFileClassification(audiofile, model,"svm", True)
    #except TypeError:
    #    print("TypeError")

    # pulls all the data given from the classification function
    [flagsInd, classesAll, acc,
     CM] = aS.mtFileClassification(audiofile, model, "svm")
    # print( flagsInd )
    # print( classesAll )
    # print( acc )
    # print( CM )

    flag_len = len(flagsInd)  # amount of segments made
    segment = duration / flag_len  # length of each time segment

    # dictionary to be built of timestamps and categories
    classify_dict = {'name': model_type, 'color': model_color, 'data': []}

    classify_dict['data'].append({"category": "NO", "time": 0})

    for index in range(flag_len):
        timestamp = segment * index + 1  # current timestamp

        # builds dictionary
        classify_dict['data'].append({
            "category":
            classesAll[int(flagsInd[index])],
            "time":
            timestamp
        })
        # used for console logging
        # print( str( "{ category: '" + classesAll[int(flagsInd[index])] ) + "', time: " + str(timestamp) + " }," )

    return classify_dict
Exemple #10
0
def audio_segmentation(wav_file_path, classifier_type, classifier_path):
    """
		Main function: takes a .wav file, and a scikit-learn classifier as arguments. Returns a partition of the sound track
		into segments of silence, of speech or of music.

		Example: audio_segmentation(path_to_wav_file, 'svm', path_to_svm_pickle_object)
		See pyAudioAnalysis's code and documentation for the list of possible classifiers.
		For this to work, the 'classifier' and 'classifier.arff' files must be in the same directory, as
		('classifier' + 'classifier.arff' = trained_classifier) 
	"""

    sound_segments = aS.mtFileClassification(wav_file_path,
                                             classifier_path,
                                             classifier_type,
                                             return_for_user=True)

    segments = sound_segments['segments']
    classes = sound_segments['classes']
    silences = detect_silences(wav_file_path, 2, 1)

    final_segmentation = incorporate_silences_to_segments(
        segments, classes, silences)
    return final_segmentation
Exemple #11
0
def vad(wav_in, wav_out):

    results, a, b, c = aS.mtFileClassification(
        wav_in, '/home/seni/git/pyAudioAnalysis/data/svmSM', 'svm', False)
    results = [(val + 1) % 2 for val in results]  # flipping 1 and 0s

    fs, data = wav.read(wav_in)

    num_segments = len(results)
    padded_results = [1, 1]
    padded_results.extend(results)
    padded_results.extend([1, 1])
    speech_data = []  # hopefully
    for i in range(2, num_segments + 2):
        #print(padded_results[i])
        segment = data[(i - 2) * fs:(i - 1) * fs]  # -2
        if sum(padded_results[i - 2:i + 3]) == 0:
            continue
        else:
            speech_data.extend(segment)

    speech_data = np.array(speech_data)
    wav.write(wav_out, fs, speech_data)
Exemple #12
0
def getMusicSegmentsFromFile(inputFile):	
	modelType = "svm"
	modelName = "data/svmMovies8classes"
	
	dirOutput = inputFile[0:-4] + "_musicSegments"
	
	if os.path.exists(dirOutput) and dirOutput!=".":
		shutil.rmtree(dirOutput)	
	os.makedirs(dirOutput)	
	
	[Fs, x] = audioBasicIO.readAudioFile(inputFile)	

	if modelType=='svm':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model(modelName)
	elif modelType=='knn':
		[Classifier, MEAN, STD, classNames, mtWin, mtStep, stWin, stStep, compute_beat] = aT.load_model_knn(modelName)

	flagsInd, classNames, acc, CM = aS.mtFileClassification(inputFile, modelName, modelType, plotResults = False, gtFile = "")
	segs, classes = aS.flags2segs(flagsInd, mtStep)

	for i, s in enumerate(segs):
		if (classNames[int(classes[i])] == "Music") and (s[1] - s[0] >= minDuration):
			strOut = "{0:s}{1:.3f}-{2:.3f}.wav".format(dirOutput+os.sep, s[0], s[1])	
			wavfile.write( strOut, Fs, x[int(Fs*s[0]):int(Fs*s[1])])
Exemple #13
0
    def classify(self):
        if self.algo == "ina":
            self.seg = Segmenter()

        counter = 0
        for audioPath in self.media:
            startTime = int(round(time.time()))
            vid = audioPath.split("/")[-1]
            print("### {}/{} Processing {} ###".format(counter,
                                                       len(self.media), vid))
            if self.algo == "ina":
                tmp = self.seg(audioPath)
                tmp2 = str(tmp)
                self.segmentation.append(tmp)
                if ("Male" in tmp2 or "Female" in tmp2) and "Music" in tmp2:
                    self.results.append("Mixed")
                elif "Music" in tmp2:
                    self.results.append("Music")
                elif "Male" in tmp2 or "Female" in tmp2:
                    self.results.append("Speech")

            elif self.algo == "paa":
                [flagsInd, classesAll, acc,
                 CM] = aS.mtFileClassification(audioPath, "svmSM/svmSM", "svm",
                                               False, '')
                res = np.array(flagsInd).mean()
                if res <= 0.1:
                    self.results.append("Speech")
                elif res >= 0.9:
                    self.results.append("Music")
                else:
                    self.results.append("Mixed")

            endTime = int(round(time.time()))
            self.times.append(endTime - startTime)
            counter += 1
Exemple #14
0
def main(argv):	
	
	if argv[1]=="--file":
		getMusicSegmentsFromFile(argv[2])	
		classifyFolderWrapper(argv[2][0:-4] + "_musicSegments", "svm", "data/svmMusicGenre8", True)		
		
	elif argv[1]=="--dir":	
		analyzeDir(argv[2])	
		
	elif argv[1]=="--sim":
		csvFile = argv[2]
		f = []
		fileNames = []
		with open(csvFile, 'rb') as csvfile:
			spamreader = csv.reader(csvfile, delimiter='\t', quotechar='|')
			for j,row in enumerate(spamreader):
				if j>0:
					ftemp = []
					for i in range(1,9):
						ftemp.append(float(row[i]))
					f.append(ftemp)
					R = row[0]
					II = R.find(".wav");
					fileNames.append(row[0][0:II])
			f = numpy.array(f)

			Sim = numpy.zeros((f.shape[0], f.shape[0]))
			for i in range(f.shape[0]):
				for j in range(f.shape[0]):	
					Sim[i,j] = scipy.spatial.distance.cdist(numpy.reshape(f[i,:], (f.shape[1],1)).T, numpy.reshape(f[j,:], (f.shape[1],1)).T, 'cosine')
								
			Sim1 = numpy.reshape(Sim, (Sim.shape[0]*Sim.shape[1], 1))
			plt.hist(Sim1)
			plt.show()

			fo = open(csvFile + "_simMatrix", "wb")
			cPickle.dump(fileNames,  fo, protocol = cPickle.HIGHEST_PROTOCOL)
			cPickle.dump(f, fo, protocol = cPickle.HIGHEST_PROTOCOL)			
			cPickle.dump(Sim, fo, protocol = cPickle.HIGHEST_PROTOCOL)
			fo.close()

	elif argv[1]=="--loadsim":
		try:
			fo = open(argv[2], "rb")
		except IOError:
				print( "didn't find file")
				return
		try:			
			fileNames 	= cPickle.load(fo)
			f 			= cPickle.load(fo)
			Sim 		= cPickle.load(fo)
		except:
			fo.close()
		fo.close()	
		print(fileNames)
		Sim1 = numpy.reshape(Sim, (Sim.shape[0]*Sim.shape[1], 1))
		plt.hist(Sim1)
		plt.show()

	elif argv[1]=="--audio-event-dir":		
		files = "*.wav"
		inputFolder = argv[2]
		if os.path.isdir(inputFolder):
			strFilePattern = os.path.join(inputFolder, files)
		else:
			strFilePattern = inputFolder + files

		wavFilesList = []
		wavFilesList.extend(glob.glob(strFilePattern))
		wavFilesList = sorted(wavFilesList)		
		for i,w in enumerate(wavFilesList):			
			[flagsInd, classesAll, acc, CM] = aS.mtFileClassification(w, "data/svmMovies8classes", "svm", False, '')
			histTemp = numpy.zeros( (len(classesAll), ) )
			for f in flagsInd:
				histTemp[int(f)] += 1.0
			histTemp /= histTemp.sum()
			
			if i==0:
				print( "".ljust(100)+"\t",)
				for C in classesAll:
					print( C.ljust(12)+"\t",)
				print()
			print (w.ljust(100)+"\t",)
			for h in histTemp:				
				print( "{0:.2f}".format(h).ljust(12)+"\t",)
			print()

			
	return 0
Exemple #15
0
def run(wavFileName2,bagFile2):
    time = 0
    segmentDuration = 0
    segments = []

    # >> Open WAVfile 
    #----------------------
    #audioGlobals.wavFileName -> global variable 
    audioGlobals.wavFileName = wavFileName2
    audioGlobals.bagFile = bagFile2

    audioGlobals.spf = wave.open(audioGlobals.wavFileName,'r')
    #Extract Raw Audio from Wav File
    audioGlobals.signal = audioGlobals.spf.readframes(-1)
    audioGlobals.signal = np.fromstring(audioGlobals.signal, 'Int16')
    #self.axes.clear()

    #Get wavFile audioGlabals.duration
    frames = audioGlobals.spf.getnframes()
    rate = audioGlobals.spf.getframerate()
    audioGlobals.duration = frames / float(rate)

    # >> Open CSVfile 
    #----------------------
    # check if .csv exists
    csvFileName = audioGlobals.bagFile.replace(".bag","_audio.csv")
    if os.path.isfile(csvFileName):
        annotationFile = open(csvFileName, 'rb')

        read = csv.reader(annotationFile)
        for row in read:
            row[0] = float(row[0])
            row[1] = float(row[1])
            audioGlobals.annotations.append([row[0], row[1], row[2]])

        # get speakers unic colors for annotation plot and ganttChart
        #print len(audioGlobals.GreenShades)
        for shadeIndex in range(len(audioGlobals.annotations)):
            if audioGlobals.annotations[shadeIndex][2][:8] == 'Speech::':
                #print audioGlobals.greenIndex, len(audioGlobals.GreenShades)-1
                if audioGlobals.greenIndex >= (len(audioGlobals.GreenShades)-1):
                    audioGlobals.greenIndex = 0
                else:
                    audioGlobals.greenIndex = audioGlobals.greenIndex + 1
                #print audioGlobals.greenIndex, shadeIndex
                audioGlobals.shadesAndSpeaker.append([audioGlobals.annotations[shadeIndex][2], audioGlobals.GreenShades[audioGlobals.greenIndex]])

    # >> Call Classifier in case CSVFile not exists 
    #---------------------- 
    else:
        [flagsInd, classesAll, acc,CM] = aS.mtFileClassification(audioGlobals.wavFileName, os.path.abspath('audio/ClassifierMethods/svmModelTest'), 'svm', False)
        # declare classes
        [segs, classes] = aS.flags2segs(flagsInd, 1)
        lengthClass = len(classesAll)
        className = np.arange(lengthClass, dtype=np.float)


        for j in range(len(segs)):
            # no Annotation for Silence segments
            for i in range(len(classesAll)):
                if classes[j] == className[i] and classesAll[i] != 'Silence':
                    audioGlobals.annotations.append([segs[j][0]*1000, segs[j][1]*1000, classesAll[i]])

        # >> Write annotations in csv file
        csvFileName = audioGlobals.bagFile.replace(".bag","_audio.csv")
        annotationFile = open(csvFileName, 'w')
        write = csv.writer(annotationFile)
        write.writerows(audioGlobals.annotations)
        annotationFile.close()
Exemple #16
0
from pyAudioAnalysis import audioSegmentation as aS
[flagsInd, classesAll, acc,
 CM] = aS.mtFileClassification("keys.wav", "svmTaps", "svm", True)
from pyAudioAnalysis import audioTrainTest as aT

from pyAudioAnalysis import audioBasicIO
from pyAudioAnalysis import audioFeatureExtraction
import matplotlib.pyplot as plt

from pyAudioAnalysis import audioSegmentation as aS
[flagsInd, classesAll, acc, CM] = aS.mtFileClassification(audiofile, "data/svmSM", "svm", True, 'data/scottish.segments')

from scipy import fftpack

audiofile='/Users/kaixiwang/Documents/USC/CSCI-576/FinalProject/dataset2/Videos/data_test2.wav'
adaudio=['/Users/kaixiwang/Documents/USC/CSCI-576/FinalProject/dataset/Ads/Subway_Ad_15s.wav','/Users/kaixiwang/Documents/USC/CSCI-576/FinalProject/dataset/Ads/Starbucks_Ad_15s.wav']
[Fs, x] = audioBasicIO.readAudioFile(audiofile);
X = fftpack.fft(x)
freqs = fftpack.fftfreq(len(x)) * Fs
fig, ax = plt.subplots()
ax.stem(freqs, np.abs(X))
ax.set_xlabel('Frequency in Hertz [Hz]')
ax.set_ylabel('Frequency Domain (Spectrum) Magnitude')
ax.set_xlim(-Fs / 2, Fs / 2)
ax.set_ylim(-5, 110)

[Fs, x] = audioBasicIO.readAudioFile(audiofile);
F, f_names = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.025*Fs);
plt.subplot(2,1,1); plt.plot(F[0,:]); plt.xlabel('Frame no'); plt.ylabel(f_names[0]);
plt.subplot(2,1,2); plt.plot(F[1,:]); plt.xlabel('Frame no'); plt.ylabel(f_names[1]); plt.show()

#========================
n = len(channel1)
fourier=fft.fft(ad1)
def run(wavFileName2, bagFile2):
    time = 0
    segmentDuration = 0
    segments = []

    # >> Open WAVfile
    #----------------------
    #audioGlobals.wavFileName -> global variable
    audioGlobals.wavFileName = wavFileName2
    audioGlobals.bagFile = bagFile2

    audioGlobals.spf = wave.open(audioGlobals.wavFileName, 'r')
    #Extract Raw Audio from Wav File
    audioGlobals.signal = audioGlobals.spf.readframes(-1)
    audioGlobals.signal = np.fromstring(audioGlobals.signal, 'Int16')
    #self.axes.clear()

    #Get wavFile audioGlabals.duration
    frames = audioGlobals.spf.getnframes()
    rate = audioGlobals.spf.getframerate()
    audioGlobals.duration = frames / float(rate)

    # >> Open CSVfile
    #----------------------
    # check if .csv exists
    csvFileName = audioGlobals.bagFile.replace(".bag", "_audio.csv")
    if os.path.isfile(csvFileName):
        annotationFile = open(csvFileName, 'rb')

        read = csv.reader(annotationFile)
        for row in read:
            row[0] = float(row[0])
            row[1] = float(row[1])
            audioGlobals.annotations.append([row[0], row[1], row[2]])

        # get speakers unic colors for annotation plot and ganttChart
        #print len(audioGlobals.GreenShades)
        for shadeIndex in range(len(audioGlobals.annotations)):
            if audioGlobals.annotations[shadeIndex][2][:8] == 'Speech::':
                #print audioGlobals.greenIndex, len(audioGlobals.GreenShades)-1
                if audioGlobals.greenIndex >= (len(audioGlobals.GreenShades) -
                                               1):
                    audioGlobals.greenIndex = 0
                else:
                    audioGlobals.greenIndex = audioGlobals.greenIndex + 1
                #print audioGlobals.greenIndex, shadeIndex
                audioGlobals.shadesAndSpeaker.append([
                    audioGlobals.annotations[shadeIndex][2],
                    audioGlobals.GreenShades[audioGlobals.greenIndex]
                ])

    # >> Call Classifier in case CSVFile not exists
    #----------------------
    else:
        [flagsInd, classesAll, acc,
         CM] = aS.mtFileClassification(audioGlobals.wavFileName,
                                       'svmModelTest', 'svm', False)
        # declare classes
        [segs, classes] = aS.flags2segs(flagsInd, 1)
        lengthClass = len(classesAll)
        className = np.arange(lengthClass, dtype=np.float)

        for j in range(len(segs)):
            # no Annotation for Silence segments
            for i in range(len(classesAll)):
                if classes[j] == className[i] and classesAll[i] != 'Silence':
                    audioGlobals.annotations.append(
                        [segs[j][0] * 1000, segs[j][1] * 1000, classesAll[i]])

        # >> Write annotations in csv file
        csvFileName = audioGlobals.bagFile.replace(".bag", "_audio.csv")
        annotationFile = open(csvFileName, 'w')
        write = csv.writer(annotationFile)
        write.writerows(audioGlobals.annotations)
        annotationFile.close()
Exemple #19
0
"""! 
@brief Example 31B
@details: Speech music discrimination and segmentation (using a trained
speech - music segment classifier)
Important: Need to run 31A first to extract speech music model (stored
in svm_speech_music)
@author Theodoros Giannakopoulos {[email protected]}
"""
from pyAudioAnalysis.audioSegmentation import mtFileClassification

if __name__ == '__main__':
    au = "../data/scottish_radio.wav"
    gt = "../data/scottish_radio.segments"
    #    au = "../data/musical_genres_small/hiphop/run_dmc_peter_riper.wav"
    mtFileClassification(au, "svm_speech_music", "svm_rbf", True, gt)
def Classify(wavFileName):
	#Segmatation and Classification
	#os.chdir('Home/Documents/python/audioGraph')
	[flagsInd, classesAll, acc] = aS.mtFileClassification(wavFileName, 'svmModelTest', 'svm', False)
	print flagsInd, classesAll
Exemple #21
0
'''
    Segmentation

'''


import subprocess
from pyAudioAnalysis import audioSegmentation as aS

'''
    Fixed-segment Segmentation & Classification
'''

# [flagsInd, classesAll, acc, CM] = aS.mtFileClassification("../../audio-source/SMTest/voice_speech.mp3", "pyAudioAnalysis/data/svmSM", "svm", True, 'output/voice_speech.segments')
[flagsInd, classesAll, acc, CM] = aS.mtFileClassification("pyAudioAnalysis/data/scottish.wav", "pyAudioAnalysis/data/svmSM", "svm", True, 'output/scottish.segments')

# Command-line use:
# python audioAnalysis.py segmentClassifyFile -i <inputFile> --model <model type (svm or knn)> --modelName <path to classifier model>
# Example:
# python audioAnalysis.py segmentClassifyFile -i data/scottish.wav --model svm --modelName data/svmSM

# subprocess.call("cd pyAudioAnalysis; "
#                 "python audioAnalysis.py segmentClassifyFile -i data/scottish.wav --model svm --modelName data/svmSM", shell=True)

Exemple #22
0













from pyAudioAnalysis import audioSegmentation as aS
[flagsIndknn, classesAll, acc, CM] = aS.mtFileClassification("/Users/mclaugh/Desktop/MLK_2/Martin Luther King   The Three Evils of Society-j8d-IYSM-08.WAV", "/Volumes/McLaughlin-6TB-1/Dropbox/test_set_616_clips/knn_MLK_bg", "knn", True)











############


def main(argv):
	if argv[1] == "-shortTerm":
		for i in range(nExp):
			[Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav");
			duration = x.shape[0] / float(Fs)
			t1 = time.clock()
			F = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.050*Fs);
			t2 = time.clock()
			perTime1 =  duration / (t2-t1); print "short-term feature extraction: {0:.1f} x realtime".format(perTime1)
	elif argv[1] == "-classifyFile":
		for i in range(nExp):
			[Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav");
			duration = x.shape[0] / float(Fs)		
			t1 = time.clock()
			aT.fileClassification("diarizationExample.wav", "svmSM","svm")
			t2 = time.clock()
			perTime1 =  duration / (t2-t1); print "Mid-term feature extraction + classification \t {0:.1f} x realtime".format(perTime1)
	elif argv[1] == "-mtClassify":
		for i in range(nExp):
			[Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav");
			duration = x.shape[0] / float(Fs)		
			t1 = time.clock()
			[flagsInd, classesAll, acc] = aS.mtFileClassification("diarizationExample.wav", "svmSM", "svm", False, '')
			t2 = time.clock()
			perTime1 =  duration / (t2-t1); print "Fix-sized classification - segmentation \t {0:.1f} x realtime".format(perTime1)
	elif argv[1] == "-hmmSegmentation":
		for i in range(nExp):
			[Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav");
			duration = x.shape[0] / float(Fs)		
			t1 = time.clock()
			aS.hmmSegmentation('diarizationExample.wav', 'hmmRadioSM', False, '')             
			t2 = time.clock()
			perTime1 =  duration / (t2-t1); print "HMM-based classification - segmentation \t {0:.1f} x realtime".format(perTime1)
	elif argv[1] == "-silenceRemoval":
		for i in range(nExp):
			[Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav");
			duration = x.shape[0] / float(Fs)				
			t1 = time.clock()
			[Fs, x] = audioBasicIO.readAudioFile("diarizationExample.wav");
			segments = aS.silenceRemoval(x, Fs, 0.050, 0.050, smoothWindow = 1.0, Weight = 0.3, plot = False)
			t2 = time.clock()
			perTime1 =  duration / (t2-t1); print "Silence removal \t {0:.1f} x realtime".format(perTime1)
	elif argv[1] == "-thumbnailing":
		for i in range(nExp):
			[Fs1, x1] = audioBasicIO.readAudioFile("scottish.wav")
			duration1 = x1.shape[0] / float(Fs1)		
			t1 = time.clock()
			[A1, A2, B1, B2, Smatrix] = aS.musicThumbnailing(x1, Fs1, 1.0, 1.0, 15.0)	# find thumbnail endpoints			
			t2 = time.clock()
			perTime1 =  duration1 / (t2-t1); print "Thumbnail \t {0:.1f} x realtime".format(perTime1)
	elif argv[1] == "-diarization-noLDA":
		for i in range(nExp):
			[Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav")
			duration1 = x1.shape[0] / float(Fs1)		
			t1 = time.clock()		
			aS.speakerDiarization("diarizationExample.wav", 4, LDAdim = 0, PLOT = False)
			t2 = time.clock()
			perTime1 =  duration1 / (t2-t1); print "Diarization \t {0:.1f} x realtime".format(perTime1)
	elif argv[1] == "-diarization-LDA":
		for i in range(nExp):
			[Fs1, x1] = audioBasicIO.readAudioFile("diarizationExample.wav")
			duration1 = x1.shape[0] / float(Fs1)		
			t1 = time.clock()		
			aS.speakerDiarization("diarizationExample.wav", 4, PLOT = False)
			t2 = time.clock()
			perTime1 =  duration1 / (t2-t1); print "Diarization \t {0:.1f} x realtime".format(perTime1)
from pyAudioAnalysis import audioSegmentation as aS
[flagsInd, classesAll, acc,
 CM] = aS.mtFileClassification("./audiofiles/c3_0629.wav", "./knnMusicGenre2",
                               "knn", False, './audiofiles/c3_0629.segments')
Exemple #25
0
    x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 3 * * * \n\n\n")
[Fs, x] = audioBasicIO.read_audio_file(root_data_path +
                                       "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo_to_mono(x)
specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stChromagram(
    x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 4 * * * \n\n\n")
aT.featureAndTrain([root_data_path + "SM/speech", root_data_path + "SM/music"],
                   1.0, 1.0, 0.2, 0.2, "svm", "temp", True)

print("\n\n\n * * * TEST 5 * * * \n\n\n")
[flagsInd, classesAll, acc, CM] = aS.mtFileClassification(
    root_data_path + "pyAudioAnalysis/data//scottish.wav",
    root_data_path + "pyAudioAnalysis/data/svmSM", "svm", True,
    root_data_path + 'pyAudioAnalysis/data/scottish.segments')

print("\n\n\n * * * TEST 6 * * * \n\n\n")
aS.trainHMM_fromFile(root_data_path + 'radioFinal/train/bbc4A.wav',
                     root_data_path + 'radioFinal/train/bbc4A.segments',
                     'hmmTemp1', 1.0, 1.0)
aS.trainHMM_fromDir(root_data_path + 'radioFinal/small', 'hmmTemp2', 1.0, 1.0)
aS.hmmSegmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav',
                   'hmmTemp1', True, root_data_path +
                   'pyAudioAnalysis/data//scottish.segments')  # test 1
aS.hmmSegmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav',
                   'hmmTemp2', True, root_data_path +
                   'pyAudioAnalysis/data//scottish.segments')  # test 2

print("\n\n\n * * * TEST 7 * * * \n\n\n")
print("\n\n\n * * * TEST 2 * * * \n\n\n")
[Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo2mono(x)
specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stSpectogram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 3 * * * \n\n\n")
[Fs, x] = audioBasicIO.readAudioFile(root_data_path + "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo2mono(x)
specgram, TimeAxis, FreqAxis = audioFeatureExtraction.stChromagram(x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 4 * * * \n\n\n")
aT.featureAndTrain([root_data_path +"SM/speech",root_data_path + "SM/music"], 1.0, 1.0, 0.2, 0.2, "svm", "temp", True)

print("\n\n\n * * * TEST 5 * * * \n\n\n")
[flagsInd, classesAll, acc, CM] = aS.mtFileClassification(root_data_path + "pyAudioAnalysis/data//scottish.wav", root_data_path + "pyAudioAnalysis/data/svmSM", "svm", True, root_data_path + 'pyAudioAnalysis/data/scottish.segments')

print("\n\n\n * * * TEST 6 * * * \n\n\n")
aS.trainHMM_fromFile(root_data_path + 'radioFinal/train/bbc4A.wav', root_data_path + 'radioFinal/train/bbc4A.segments', 'hmmTemp1', 1.0, 1.0)	
aS.trainHMM_fromDir(root_data_path + 'radioFinal/small', 'hmmTemp2', 1.0, 1.0)
aS.hmmSegmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav', 'hmmTemp1', True, root_data_path + 'pyAudioAnalysis/data//scottish.segments')				# test 1
aS.hmmSegmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav', 'hmmTemp2', True, root_data_path + 'pyAudioAnalysis/data//scottish.segments')				# test 2

print("\n\n\n * * * TEST 7 * * * \n\n\n")
aT.featureAndTrainRegression(root_data_path + "pyAudioAnalysis/data/speechEmotion", 1, 1, 0.050, 0.050, "svm_rbf", "temp.mod", compute_beat=False)
print(aT.fileRegression(root_data_path + "pyAudioAnalysis/data/speechEmotion/01.wav", "temp.mod", "svm_rbf"))

print("\n\n\n * * * TEST 8 * * * \n\n\n")
aT.featureAndTrainRegression(root_data_path + "pyAudioAnalysis/data/speechEmotion", 1, 1, 0.050, 0.050, "svm", "temp.mod", compute_beat=False)
print(aT.fileRegression(root_data_path + "pyAudioAnalysis/data/speechEmotion/01.wav", "temp.mod", "svm"))
Exemple #27
0
def run(wavFileName2, bagFile2):
    global wavFileName
    global bagFile
    global xStart
    global xEnd
    global annotationFlag, annotations, shadesAndSpeaker, greenIndex
    global spf, duration, signal

    time = 0
    segmentDuration = 0
    segments = []

    # >> Open WAVfile
    #----------------------
    #wavFileName -> global variable
    wavFileName = wavFileName2
    bagFile = bagFile2

    spf = wave.open(wavFileName, 'r')
    #Extract Raw Audio from Wav File
    signal = spf.readframes(-1)
    signal = np.fromstring(signal, 'Int16')
    #self.axes.clear()

    #Get wavFile duration
    frames = spf.getnframes()
    rate = spf.getframerate()
    duration = frames / float(rate)

    # >> Open CSVfile
    #----------------------
    # check if .csv exists
    csvFileName = bagFile.replace(".bag", "_audio.csv")
    if os.path.isfile(csvFileName):
        # print '.csv Found !'
        annotationFile = open(csvFileName, 'rb')

        read = csv.reader(annotationFile)
        for row in read:
            row[0] = float(row[0])
            row[1] = float(row[1])
            annotations.append([row[0], row[1], row[2]])

        # get speakers unic colors for annotation plot and ganttChart
        for shadeIndex in range(len(annotations)):
            if annotations[shadeIndex][2][:8] == 'Speech::':
                shadesAndSpeaker.append(
                    [annotations[shadeIndex][2], GreenShades[greenIndex]])
                if greenIndex > len(GreenShades):
                    greenIndex = 0
                else:
                    greenIndex = greenIndex + 1

    # >> Call Classifier in case CSVFile not exists
    #----------------------
    else:
        # print 'classifier...'
        [flagsInd, classesAll,
         acc] = aS.mtFileClassification(wavFileName, 'svmModelTest', 'svm',
                                        False)
        # declare classes
        [segs, classes] = aS.flags2segs(flagsInd, 1)
        lengthClass = len(classesAll)
        className = np.arange(lengthClass, dtype=np.float)

        for j in range(len(segs)):
            # no Annotation for Silence segments
            for i in range(len(classesAll)):
                if classes[j] == className[i] and classesAll[i] != 'Silence':
                    annotations.append(
                        [segs[j][0] * 1000, segs[j][1] * 1000, classesAll[i]])

    # >> Initialize GUI
    #----------------------
    qApp = QtWidgets.QApplication(sys.argv)
    aw = ApplicationWindow()
    aw.setWindowTitle("Audio")
    aw.show()

    # >> Terminate GUI
    #----------------------
    sys.exit(qApp.exec_())
def evaluateSpeechMusic(fileName,
                        modelName,
                        method="svm",
                        postProcess=0,
                        postProcessModelName="",
                        PLOT=False):
    # load grount truth file (matlab annotation)

    matFile = fileName.replace(".wav", "_true.mat")
    if os.path.isfile(matFile):
        matfile = loadmat(matFile)
        segs_gt = matfile["segs_r"]
        classes_gt1 = matfile["classes_r"]
        classes_gt = []
        for c in classes_gt1[0]:
            if c == "M":
                classes_gt.append("music")
            if c == "S" or c == "E":
                classes_gt.append("speech")
        flagsIndGT, classesAllGT = audioSegmentation.segs2flags(
            [s[0] for s in segs_gt], [s[1] for s in segs_gt], classes_gt, 1.0)
    if method == "svm" or method == "randomforest" or method == "gradientboosting" or method == "extratrees":
        # speech-music segmentation:
        [flagsInd, classesAll, acc,
         CM] = audioSegmentation.mtFileClassification(fileName, modelName,
                                                      method, False, '')
    elif method == "hmm":
        [flagsInd, classesAll, _,
         _] = audioSegmentation.hmmSegmentation(fileName,
                                                modelName,
                                                PLOT=False,
                                                gtFileName="")
    elif method == "cnn":
        WIDTH_SEC = 2.4
        [Fs, x] = io.readAudioFile(fileName)
        x = io.stereo2mono(x)
        [flagsInd, classesAll,
         CNNprobs] = mtCNN_classification(x, Fs, WIDTH_SEC, 1.0,
                                          RGB_singleFrame_net, SOUND_mean_RGB,
                                          transformer_RGB, classNamesCNN)

    for i in range(flagsIndGT.shape[0]):
        flagsIndGT[i] = classesAll.index(classesAllGT[flagsIndGT[i]])

    #plt.plot(flagsIndGT, 'r')
    #plt.plot(flagsInd)
    #plt.show()

    #print classesAllGT, classesAll
    if postProcess >= 1:
        # medfilt here!
        flagsInd = scipy.signal.medfilt(flagsInd, 11)
    if postProcess >= 2:  #load HMM
        try:
            fo = open(postProcessModelName, "rb")
        except IOError:
            print "didn't find file"
            return
        try:
            hmm = cPickle.load(fo)
            classesAll = cPickle.load(fo)
        except:
            fo.close()

#Features = audioFeatureExtraction.stFeatureExtraction(x, Fs, 0.050*Fs, 0.050*Fs);    # feature extraction
#[Features, _] = aF.mtFeatureExtraction(x, Fs, mtWin * Fs, mtStep * Fs, round(Fs * 0.050), round(Fs * 0.050))
        flagsInd = hmm.predict(CNNprobs)
        flagsInd = scipy.signal.medfilt(flagsInd, 3)

    if PLOT:
        plt.plot(flagsInd + 0.01)
        plt.plot(flagsIndGT, 'r')
        plt.show()
    CM = np.zeros((2, 2))
    for i in range(min(flagsInd.shape[0], flagsIndGT.shape[0])):
        CM[int(flagsIndGT[i]), int(flagsInd[i])] += 1
    print CM
    return CM, classesAll
def main(argv):
    if argv[1] == "-shortTerm":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            F = audioFeatureExtraction.stFeatureExtraction(
                x, Fs, 0.050 * Fs, 0.050 * Fs)
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print "short-term feature extraction: {0:.1f} x realtime".format(
                perTime1)
    elif argv[1] == "-classifyFile":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            aT.fileClassification("snakehit.wav", "svmSM", "svm")
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print "Mid-term feature extraction + classification \t {0:.1f} x realtime".format(
                perTime1)
    elif argv[1] == "-mtClassify":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            [flagsInd, classesAll,
             acc] = aS.mtFileClassification("snakehit.wav", "svmSM", "svm",
                                            False, '')
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print "Fix-sized classification - segmentation \t {0:.1f} x realtime".format(
                perTime1)
    elif argv[1] == "-hmmSegmentation":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            aS.hmmSegmentation('snakehit.wav', 'hmmRadioSM', False, '')
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print "HMM-based classification - segmentation \t {0:.1f} x realtime".format(
                perTime1)
    elif argv[1] == "-silenceRemoval":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.clock()
            [Fs, x] = audioBasicIO.readAudioFile("snakehit.wav")
            segments = aS.silenceRemoval(x,
                                         Fs,
                                         0.050,
                                         0.050,
                                         smoothWindow=1.0,
                                         Weight=0.3,
                                         plot=False)
            t2 = time.clock()
            perTime1 = duration / (t2 - t1)
            print "Silence removal \t {0:.1f} x realtime".format(perTime1)
    elif argv[1] == "-thumbnailing":
        for i in range(nExp):
            [Fs1, x1] = audioBasicIO.readAudioFile("scottish.wav")
            duration1 = x1.shape[0] / float(Fs1)
            t1 = time.clock()
            [A1, A2, B1, B2,
             Smatrix] = aS.musicThumbnailing(x1, Fs1, 1.0, 1.0,
                                             15.0)  # find thumbnail endpoints
            t2 = time.clock()
            perTime1 = duration1 / (t2 - t1)
            print "Thumbnail \t {0:.1f} x realtime".format(perTime1)
    elif argv[1] == "-diarization-noLDA":
        for i in range(nExp):
            [Fs1, x1] = audioBasicIO.readAudioFile("snakehit.wav")
            duration1 = x1.shape[0] / float(Fs1)
            t1 = time.clock()
            aS.speakerDiarization("snakehit.wav", 4, LDAdim=0, PLOT=False)
            t2 = time.clock()
            perTime1 = duration1 / (t2 - t1)
            print "Diarization \t {0:.1f} x realtime".format(perTime1)
    elif argv[1] == "-diarization-LDA":
        for i in range(nExp):
            [Fs1, x1] = audioBasicIO.readAudioFile("snakehit.wav")
            duration1 = x1.shape[0] / float(Fs1)
            t1 = time.clock()
            aS.speakerDiarization("snakehit.wav", 4, PLOT=False)
            t2 = time.clock()
            perTime1 = duration1 / (t2 - t1)
            print "Diarization \t {0:.1f} x realtime".format(perTime1)
        if not os.path.isfile(modelName):
            modelName = default_modelName
            if not os.path.isfile(modelName):
                print('Cannot locate model file {}'.format(modelName))
    else:
        # detect mic configuration by analyzing input wav file
        modelName = get_model_path(args['inputWavFile'])

    if (args['debug']):
        print('\tusing: {}'.format(modelName))

    model_time = time.time() - start_time
    modelType = "svm"
    gtFile = ""
    returnVal = aS.mtFileClassification(args['inputWavFile'], modelName,
                                        modelType, False, gtFile)
    flagsInd = returnVal[0]
    classNames = returnVal[1]

    flags = [classNames[int(f)] for f in flagsInd]
    (segs, classes) = aS.flags2segs(flags, 1)

    for s in range(len(segs)):
        sg = segs[s]
        diff = int(sg[1]) - int(sg[0])
        if (args['debug']):
            print('{:>6} - {:>6} ({:>6}) : {}').format(sg[0], sg[1], diff,
                                                       classes[s])
        my_segments.append(Segment(int(sg[0]), int(sg[1]), str(classes[s])))

    # Speech and non speech lists
Exemple #31
0
def run(wavFileName2,bagFile2):
    global wavFileName
    global bagFile
    global xStart
    global xEnd
    global annotationFlag, annotations, shadesAndSpeaker, greenIndex
    global spf, duration, signal

    time = 0
    segmentDuration = 0
    segments = []

    # >> Open WAVfile 
    #----------------------
    #wavFileName -> global variable 
    wavFileName = wavFileName2
    bagFile = bagFile2

    spf = wave.open(wavFileName,'r')
    #Extract Raw Audio from Wav File
    signal = spf.readframes(-1)
    signal = np.fromstring(signal, 'Int16')
    #self.axes.clear()

    #Get wavFile duration
    frames = spf.getnframes()
    rate = spf.getframerate()
    duration = frames / float(rate)

    # >> Open CSVfile 
    #----------------------
    # check if .csv exists
    csvFileName = bagFile.replace(".bag","_audio.csv")
    if os.path.isfile(csvFileName):
        # print '.csv Found !'
        annotationFile = open(csvFileName, 'rb')

        read = csv.reader(annotationFile)
        for row in read:
            row[0] = float(row[0])
            row[1] = float(row[1])
            annotations.append([row[0], row[1], row[2]])

        # get speakers unic colors for annotation plot and ganttChart
        for shadeIndex in range(len(annotations)):
            if annotations[shadeIndex][2][:8] == 'Speech::':
                shadesAndSpeaker.append([annotations[shadeIndex][2], GreenShades[greenIndex]])
                if greenIndex > len(GreenShades):
                    greenIndex = 0
                else:
                    greenIndex = greenIndex + 1

    # >> Call Classifier in case CSVFile not exists 
    #---------------------- 
    else:
        # print 'classifier...'
        [flagsInd, classesAll, acc] = aS.mtFileClassification(wavFileName, 'svmModelTest', 'svm', False)
        # declare classes
        [segs, classes] = aS.flags2segs(flagsInd, 1)
        lengthClass = len(classesAll)
        className = np.arange(lengthClass, dtype=np.float)


        for j in range(len(segs)):
            # no Annotation for Silence segments
            for i in range(len(classesAll)):
                if classes[j] == className[i] and classesAll[i] != 'Silence':
                    annotations.append([segs[j][0]*1000, segs[j][1]*1000, classesAll[i]])




    # >> Initialize GUI 
    #----------------------
    qApp = QtWidgets.QApplication(sys.argv)
    aw = ApplicationWindow()
    aw.setWindowTitle("Audio")
    aw.show()

    # >> Terminate GUI 
    #---------------------- 
    sys.exit(qApp.exec_())
def save_out(test_file):
    [flags_ind, classes_all, acc] = aS.mtFileClassification(test_file, args.model, "knn", False)
    np.save(test_file, flags_ind)
    return classes_all
"""
@details: Speech Non-speech discrimination and segmentation (using a trained
speech - non segment classifier)
Important: Need to run speech_non_speech.py first to extract speech non_speech model (stored
in svm_speech_non_speech)
"""
from pyAudioAnalysis.audioSegmentation import mtFileClassification

if __name__ == '__main__':
    au = "/media/vlachos/4e757fbf-09d9-4276-a1f4-af671280a9bb/NCSR-UOP/Multimodal Information Processing and Analysis/audio/speech_non_speech_test.wav"
    gt = "/media/vlachos/4e757fbf-09d9-4276-a1f4-af671280a9bb/NCSR-UOP/Multimodal Information Processing and Analysis/audio/speech_non_speech_test.txt"
    mtFileClassification(au, "svm_speech_non_speech", "svm_rbf", True, gt)
    # boundary speech model
    boundary_model = "model/svmNoLapelSpeechModel"

    # run the classification model on the audio file
    [Result, P, classNames] = aT.fileClassification(wavFile, mic_model, "svm")
    Result = int(Result)

    # if the winner class is boundary_speech return
    # the path of the boundary speech model, otherwise
    # return the path of thelapel speech model
    if classNames[Result] == "boundry_speech":
        return boundary_model
    else:
        return lapel_model


# argument handler
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", required=True, help="path to the audio file")
args = vars(ap.parse_args())
audio_file = args["input"]

# determin speech model for audio file
speech_model = get_model_path(audio_file)

# run predicted speech model to segment audio file
segmentation = aS.mtFileClassification(audio_file,
                                       speech_model,
                                       "svm",
                                       False,
                                       gtFile="")
Exemple #35
0
#
#     # check for silence
#     silent = True
#     # wave frame samples are stored in little endian**
#     # this example works for a single channel 16-bit per sample encoding
#     unpacked_signed_value = struct.unpack("<h", current_frame) # *
#     if abs(unpacked_signed_value[0]) > 500:
#         silent = False
#
#     if silent:
#         print("Frame %s is silent." % wave_file.tell())
#     else:
#         print("Frame %s is not silent." % wave_file.tell())

# rate, data = wf.read('testing.wav')
# # data0 is the data from channel 0.
# data0 = data[:, 0]
#
# print(data0)

# from pydub import AudioSegment
# from pydub.silence import detect_silence, detect_nonsilent
#
# song = AudioSegment.from_wav("soundaudio.wav")
# val = detect_silence(song)
# print(val)

from pyAudioAnalysis import audioSegmentation as aS
[flagsInd, classesAll, acc,
 CM] = aS.mtFileClassification("data/scottish.wav", "data/svmSM", "svm", True,
                               'data/scottish.segments')
Exemple #36
0
 def get_classification():
     au = "../audio/Adam_Driver_and_Michael_Shannon.wav"
     gt = "annotated_data/Adam_Driver_and_Michael_Shannon.segments"
     #    au = "../data/musical_genres_small/hiphop/run_dmc_peter_riper.wav"
     mtFileClassification(au, "diarization", "knn", True, gt)