예제 #1
0
파일: vad_simple.py 프로젝트: shammur/pyVAD
def segmentclassifySMFileWrapper(inputWavFile, model_name, model_type):
    if not os.path.isfile(model_name):
        raise Exception("Input model_name not found!")
    if not os.path.isfile(inputWavFile):
        raise Exception("Input audio file not found!")
    gtFile = ""
    if inputWavFile[-4::]==".wav":
        gtFile = inputWavFile.replace(".wav", ".segments")
    if inputWavFile[-4::]==".mp3":
        gtFile = inputWavFile.replace(".mp3", ".segments")
    aS.mid_term_file_classification(inputWavFile, model_name, model_type, True, gtFile)
예제 #2
0
def test(audiofile_path):
    model_path = r"models/svm_male_female"
    model_type = "svm_rbf"
    plot_results = False

    labels, class_names, mt_step, class_probabilities = mid_term_file_classification(
        audiofile_path, model_path, model_type, plot_results)

    print("labels: ", len(labels))
    # print "merged" segments (use labels_to_segments())
    # print("\nSegments:")
    segs, c, probs = labels_to_segments(labels, class_probabilities, mt_step)

    # print("segs: ", len(segs))
    print("prob test: ", len(probs))

    # for iS, seg in enumerate(segs):
    #     if probs[iS] > 0.6:
    #         print(f'segment {iS} {seg[0]} sec - {seg[1]} sec: {class_names[int(c[iS])]} pro: {probs[iS]}')
    # print("type(segs): ", type(segs))
    # print("type(probs): ", type(probs))

    return probs, segs.tolist(), class_names, c


# if __name__ == "__main__":
#
#     audiofile_path = r"/Users/taanhtuan/Desktop/workproject/basic_audio_analysis-master/data/test.mp3"
#     model_path = r"models/svm_male_female"
#     model_type = "svm_rbf"
#     plot_results = False
#
#     labels, class_names, mt_step, class_probabilities = mid_term_file_classification(audiofile_path, model_path,
#                                                              model_type, plot_results)
#
#     print("labels: ", len(labels))
#     # print "merged" segments (use labels_to_segments())
#     print("\nSegments:")
#     segs, c, probs = labels_to_segments(labels, class_probabilities, mt_step)
#
#     print("segs: ", len(segs))
#     print("prob: ", len(probs))
#
#     for iS, seg in enumerate(segs):
#         if probs[iS] > 0.58:
#             print(f'segment {iS} {seg[0]} sec - {seg[1]} sec: {class_names[int(c[iS])]} pro: {probs[iS]}')
예제 #3
0
"""! 
@brief Example 31B
@details: Speech music discrimination and segmentation (using a trained
speech - music segment classifier)
Important: Need to run 31A first to extract speech music model (stored
in svm_speech_music)
@author Theodoros Giannakopoulos {[email protected]}
"""
from pyAudioAnalysis.audioSegmentation import mid_term_file_classification

if __name__ == '__main__':
    au = "../data/scottish_radio.wav"
    gt = "../data/scottish_radio.segments"
    #    au = "../data/musical_genres_small/hiphop/run_dmc_peter_riper.wav"
    mid_term_file_classification(au, "svm_speech_music", "svm_rbf", True, gt)
예제 #4
0
def main(argv):
    if argv[1] == "-shortTerm":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.read_audio_file("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.time()
            F = MidTermFeatures.short_term_feature_extraction(
                x, Fs, 0.050 * Fs, 0.050 * Fs)
            t2 = time.time()
            perTime1 = duration / (t2 - t1)
            print "short-term feature extraction: {0:.1f} x realtime".format(
                perTime1)
    elif argv[1] == "-classifyFile":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.read_audio_file("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.time()
            aT.file_classification("diarizationExample.wav", "svmSM", "svm")
            t2 = time.time()
            perTime1 = duration / (t2 - t1)
            print "Mid-term feature extraction + classification \t {0:.1f} x realtime".format(
                perTime1)
    elif argv[1] == "-mtClassify":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.read_audio_file("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.time()
            [flagsInd, classesAll,
             acc] = aS.mid_term_file_classification("diarizationExample.wav",
                                                    "svmSM", "svm", False, '')
            t2 = time.time()
            perTime1 = duration / (t2 - t1)
            print "Fix-sized classification - segmentation \t {0:.1f} x realtime".format(
                perTime1)
    elif argv[1] == "-hmmSegmentation":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.read_audio_file("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.time()
            aS.hmm_segmentation('diarizationExample.wav', 'hmmRadioSM', False,
                                '')
            t2 = time.time()
            perTime1 = duration / (t2 - t1)
            print "HMM-based classification - segmentation \t {0:.1f} x realtime".format(
                perTime1)
    elif argv[1] == "-silenceRemoval":
        for i in range(nExp):
            [Fs, x] = audioBasicIO.read_audio_file("diarizationExample.wav")
            duration = x.shape[0] / float(Fs)
            t1 = time.time()
            [Fs, x] = audioBasicIO.read_audio_file("diarizationExample.wav")
            segments = aS.silence_removal(x,
                                          Fs,
                                          0.050,
                                          0.050,
                                          smooth_window=1.0,
                                          Weight=0.3,
                                          plot=False)
            t2 = time.time()
            perTime1 = duration / (t2 - t1)
            print "Silence removal \t {0:.1f} x realtime".format(perTime1)
    elif argv[1] == "-thumbnailing":
        for i in range(nExp):
            [Fs1, x1] = audioBasicIO.read_audio_file("scottish.wav")
            duration1 = x1.shape[0] / float(Fs1)
            t1 = time.time()
            [A1, A2, B1, B2,
             Smatrix] = aS.music_thumbnailing(x1, Fs1, 1.0, 1.0,
                                              15.0)  # find thumbnail endpoints
            t2 = time.time()
            perTime1 = duration1 / (t2 - t1)
            print "Thumbnail \t {0:.1f} x realtime".format(perTime1)
    elif argv[1] == "-diarization-noLDA":
        for i in range(nExp):
            [Fs1, x1] = audioBasicIO.read_audio_file("diarizationExample.wav")
            duration1 = x1.shape[0] / float(Fs1)
            t1 = time.time()
            aS.speaker_diarization("diarizationExample.wav",
                                   4,
                                   LDAdim=0,
                                   PLOT=False)
            t2 = time.time()
            perTime1 = duration1 / (t2 - t1)
            print "Diarization \t {0:.1f} x realtime".format(perTime1)
    elif argv[1] == "-diarization-LDA":
        for i in range(nExp):
            [Fs1, x1] = audioBasicIO.read_audio_file("diarizationExample.wav")
            duration1 = x1.shape[0] / float(Fs1)
            t1 = time.time()
            aS.speaker_diarization("diarizationExample.wav", 4, PLOT=False)
            t2 = time.time()
            perTime1 = duration1 / (t2 - t1)
            print "Diarization \t {0:.1f} x realtime".format(perTime1)
예제 #5
0
from pyAudioAnalysis import audioSegmentation as aS
[flagsInd, classesAll, acc,
 CM] = aS.mid_term_file_classification("303.wav", "data/models/svm_rbf_sm",
                                       "svm", True, 'data/scottish.segments')
예제 #6
0
    x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 3 * * * \n\n\n")
[Fs, x] = audioBasicIO.read_audio_file(root_data_path +
                                       "pyAudioAnalysis/data/doremi.wav")
x = audioBasicIO.stereo_to_mono(x)
specgram, TimeAxis, FreqAxis = ShortTermFeatures.chromagram(
    x, Fs, round(Fs * 0.040), round(Fs * 0.040), True)

print("\n\n\n * * * TEST 4 * * * \n\n\n")
aT.extract_features_and_train([root_data_path + "1/", root_data_path + "2/"],
                              1.0, 1.0, 0.2, 0.2, "svm", "temp", True)

print("\n\n\n * * * TEST 5 * * * \n\n\n")
[flagsInd, classesAll, acc, CM] = aS.mid_term_file_classification(
    root_data_path + "scottish.wav", root_data_path + "models/svm_rbf_sm",
    "svm_rbf", True, root_data_path + 'pyAudioAnalysis/data/scottish.segments')

print("\n\n\n * * * TEST 6 * * * \n\n\n")
aS.train_hmm_from_file(root_data_path + 'radioFinal/train/bbc4A.wav',
                       root_data_path + 'radioFinal/train/bbc4A.segments',
                       'hmmTemp1', 1.0, 1.0)
aS.train_hmm_from_directory(root_data_path + 'radioFinal/small', 'hmmTemp2',
                            1.0, 1.0)
aS.hmm_segmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav',
                    'hmmTemp1', True, root_data_path +
                    'pyAudioAnalysis/data//scottish.segments')  # test 1
aS.hmm_segmentation(root_data_path + 'pyAudioAnalysis/data//scottish.wav',
                    'hmmTemp2', True, root_data_path +
                    'pyAudioAnalysis/data//scottish.segments')  # test 2
예제 #7
0
def test_mt_file_classification():
    labels, class_names, accuracy, cm = aS.mid_term_file_classification(
        "test_data/scottish.wav", "test_data/svm_rbf_sm", "svm_rbf", False,
        "test_data/scottish.segments")
    assert accuracy > 0.95, "Segment-level classification accuracy is low"
예제 #8
0
# "iNNovationMerge DailyCodeHub"

# Theme : Audio segmentation week with Python

# Fix-sized audio segmentation using pretrained eight class SVM model(svm_rbf_movie8class)

from pyAudioAnalysis import audioSegmentation as aS

[flagsInd, classesAll, acc, CM] = aS.mid_term_file_classification("data/scottish.wav", "data/models/svm_rbf_movie8class", "svm", True, 'data/scottish.segments')