Example #1
0
def beatExtractionWrapper(wav_file, plot):
    if not os.path.isfile(wav_file):
        raise Exception("Input audio file not found!")
    [fs, x] = audioBasicIO.read_audio_file(wav_file)
    F, _ = sF.feature_extraction(x, fs, 0.050 * fs, 0.050 * fs)
    bpm, ratio = aF.beat_extraction(F, 0.050, plot)
    print("Beat: {0:d} bpm ".format(int(bpm)))
    print("Ratio: {0:.2f} ".format(ratio))
Example #2
0
def file_regression(input_file, model_name, model_type):
    # Load classifier:

    if not os.path.isfile(input_file):
        print("fileClassification: wav file not found!")
        return -1, -1, -1

    #regression_models = glob.glob(model_name + "_*")   I CHANGED THIS
    regression_models = model_name
    regression_models2 = []
    for r in regression_models:
        if r[-5::] != "MEANS":
            regression_models2.append(r)
    regression_models = regression_models2
    regression_names = []
    for r in regression_models:
        regression_names.append(r[r.rfind("_") + 1::])

    # FEATURE EXTRACTION
    # LOAD ONLY THE FIRST MODEL (for mt_win, etc)
    if model_type == 'svm' or model_type == "svm_rbf" or \
            model_type == 'randomforest':
        _, _, _, mid_window, mid_step, short_window, short_step, compute_beat \
            = load_model(regression_models[0], True)

    # read audio file and convert to mono
    samping_rate, signal = audioBasicIO.read_audio_file(input_file)
    signal = audioBasicIO.stereo_to_mono(signal)
    # feature extraction:
    mid_features, s, _ = \
        aF.mid_feature_extraction(signal, samping_rate, mid_window * samping_rate,
                                  mid_step * samping_rate,
                                  round(samping_rate * short_window),
                                  round(samping_rate * short_step))
    # long term averaging of mid-term statistics
    mid_features = mid_features.mean(axis=1)
    if compute_beat:
        beat, beat_conf = aF.beat_extraction(s, short_step)
        mid_features = np.append(mid_features, beat)
        mid_features = np.append(mid_features, beat_conf)

    # REGRESSION
    R = []
    for ir, r in enumerate(regression_models):
        if not os.path.isfile(r):
            print("fileClassification: input model_name not found!")
            return (-1, -1, -1)
        if model_type == 'svm' or model_type == "svm_rbf" \
                or model_type == 'randomforest':
            model, mean, std, _, _, _, _, _ = load_model(r, True)
        curFV = (mid_features - mean) / std  # normalization
        R.append(regression_wrapper(model, model_type,
                                    curFV))  # classification
    return R, regression_names
Example #3
0
def fileRegression(inputFile, model_name, model_type):
    # Load classifier:

    if not os.path.isfile(inputFile):
        print("fileClassification: wav file not found!")
        return (-1, -1, -1)

    regression_models = glob.glob(model_name + "_*")
    regression_models2 = []
    for r in regression_models:
        if r[-5::] != "MEANS":
            regression_models2.append(r)
    regression_models = regression_models2
    regression_names = []
    for r in regression_models:
        regression_names.append(r[r.rfind("_") + 1::])

    # FEATURE EXTRACTION
    # LOAD ONLY THE FIRST MODEL (for mt_win, etc)
    if model_type == 'svm' or model_type == "svm_rbf" or \
            model_type == 'randomforest':
        [_, _, _, mt_win, mt_step, st_win, st_step, compute_beat] = \
            load_model(regression_models[0], True)

    # read audio file and convert to mono
    [Fs, x] = audioBasicIO.read_audio_file(inputFile)
    x = audioBasicIO.stereo_to_mono(x)
    # feature extraction:
    [mt_features, s, _] = aF.mid_feature_extraction(x, Fs, mt_win * Fs,
                                                    mt_step * Fs,
                                                    round(Fs * st_win),
                                                    round(Fs * st_step))
    # long term averaging of mid-term statistics
    mt_features = mt_features.mean(axis=1)
    if compute_beat:
        [beat, beatConf] = aF.beat_extraction(s, st_step)
        mt_features = np.append(mt_features, beat)
        mt_features = np.append(mt_features, beatConf)

    # REGRESSION
    R = []
    for ir, r in enumerate(regression_models):
        if not os.path.isfile(r):
            print("fileClassification: input model_name not found!")
            return (-1, -1, -1)
        if model_type == 'svm' or model_type == "svm_rbf" \
                or model_type == 'randomforest':
            [model, MEAN, STD, mt_win, mt_step, st_win, st_step,
             compute_beat] = load_model(r, True)
        curFV = (mt_features - MEAN) / STD  # normalization
        R.append(regressionWrapper(model, model_type, curFV))  # classification
    return R, regression_names
Example #4
0
def features(file_path):
    fs, s = aIO.read_audio_file(file_path)
    m_win, m_step, s_win, s_step = 1, 1, 0.1, 0.05
    mid_features, short_features, mid_feature_names = aF.mid_feature_extraction(
        s, fs, round(fs * m_win), round(fs * m_step), round(fs * s_win),
        round(fs * s_step))
    mid_features = np.transpose(mid_features).mean(axis=0)
    beat, beat_conf = aF.beat_extraction(short_features, s_step)
    mid_features = np.append(mid_features, beat)
    mid_features = np.append(mid_features, beat_conf)
    mid_feature_names.append('beat')
    mid_feature_names.append('beat_conf')
    return mid_features, mid_feature_names
Example #5
0
def fileClassification(inputFile, model_name, model_type):
    # Load classifier:

    if not os.path.isfile(model_name):
        print("fileClassification: input model_name not found!")
        return (-1, -1, -1)

    if not os.path.isfile(inputFile):
        print("fileClassification: wav file not found!")
        return (-1, -1, -1)

    if model_type == 'knn':
        [
            classifier, MEAN, STD, classNames, mt_win, mt_step, st_win,
            st_step, compute_beat
        ] = load_model_knn(model_name)
    else:
        [
            classifier, MEAN, STD, classNames, mt_win, mt_step, st_win,
            st_step, compute_beat
        ] = load_model(model_name)

    # read audio file and convert to mono
    [Fs, x] = audioBasicIO.read_audio_file(inputFile)
    x = audioBasicIO.stereo_to_mono(x)

    if Fs == 0:
        # audio file IO problem
        return -1, -1, -1
    if x.shape[0] / float(Fs) <= mt_win:
        return -1, -1, -1

    # feature extraction:
    [mt_features, s, _] = aF.mid_feature_extraction(x, Fs, mt_win * Fs,
                                                    mt_step * Fs,
                                                    round(Fs * st_win),
                                                    round(Fs * st_step))
    # long term averaging of mid-term statistics
    mt_features = mt_features.mean(axis=1)
    if compute_beat:
        [beat, beatConf] = aF.beat_extraction(s, st_step)
        mt_features = np.append(mt_features, beat)
        mt_features = np.append(mt_features, beatConf)
    curFV = (mt_features - MEAN) / STD  # normalization

    # classification
    [Result, P] = classifierWrapper(classifier, model_type, curFV)
    return Result, P, classNames
Example #6
0
def file_classification(input_file, model_name, model_type):
    # Load classifier:

    if not os.path.isfile(model_name):
        print("fileClassification: input model_name not found!")
        return -1, -1, -1

    if not os.path.isfile(input_file):
        print("fileClassification: wav file not found!")
        return -1, -1, -1

    if model_type == 'knn':
        classifier, mean, std, classes, mid_window, mid_step, short_window, \
            short_step, compute_beat = load_model_knn(model_name)
    else:
        classifier, mean, std, classes, mid_window, mid_step, short_window, \
            short_step, compute_beat = load_model(model_name)

    # read audio file and convert to mono
    sampling_rate, signal = audioBasicIO.read_audio_file(input_file)
    signal = audioBasicIO.stereo_to_mono(signal)

    if sampling_rate == 0:
        # audio file IO problem
        return -1, -1, -1
    if signal.shape[0] / float(sampling_rate) <= mid_window:
        return -1, -1, -1

    # feature extraction:
    mid_features, s, _ = \
        aF.mid_feature_extraction(signal, sampling_rate,
                                  mid_window * sampling_rate,
                                  mid_step * sampling_rate,
                                  round(sampling_rate * short_window),
                                  round(sampling_rate * short_step))
    # long term averaging of mid-term statistics
    mid_features = mid_features.mean(axis=1)
    if compute_beat:
        beat, beat_conf = aF.beat_extraction(s, short_step)
        mid_features = np.append(mid_features, beat)
        mid_features = np.append(mid_features, beat_conf)
    feature_vector = (mid_features - mean) / std  # normalization

    # classification
    class_id, probability = classifier_wrapper(classifier, model_type,
                                               feature_vector)
    return class_id, probability, classes
Example #7
0
def vadFolderWrapperMergedByTh(inputFolder, outFolder, smoothingWindow, weight, model_name, threshold):

    if not os.path.isfile(model_name):
        print("fileClassification: input model_name not found!")



    classifier, mean, std, classes, mid_window, mid_step, short_window, \
    short_step, compute_beat = aT.load_model(model_name)

    types = ('*.wav', '*.mp3')

    wavFilesList = []
    for files in types:
        print(inputFolder + files)
        wavFilesList.extend(glob.glob((inputFolder + files)))
    wavFilesList = sorted(wavFilesList)
    if len(wavFilesList) == 0:
        print("No WAV files found!")
        return
    for wavFile in wavFilesList:
        # print(wavFile)
        if not os.path.isfile(wavFile):
            raise Exception("Input audio file not found!")
        base = os.path.splitext(os.path.basename(wavFile))[0]
        folder = outFolder + base + '/'
        if not os.path.exists(folder):
            os.makedirs(folder)
        segfile = open(os.path.join(folder, 'segments'), 'w+')
        segfile2 = open(os.path.join(folder, 'segments_details'), 'w+')

        stack = deque()

        [fs, x] = audioBasicIO.read_audio_file(wavFile)
        segmentLimits = aS.silence_removal(x, fs, 0.05, 0.05, smoothingWindow, weight, False)
        merge=True

        for i, st in enumerate(segmentLimits):


            signal = audioBasicIO.stereo_to_mono(x[int(fs * st[0]):int(fs * st[1])])
            # print('in here', len(segmentLimits), st[0],st[1],classes, type(st))
            if fs == 0:
                continue
                # audio file IO problem
                # return -1, -1, -1

            if signal.shape[0] / float(fs) < mid_window:
                mid_window = signal.shape[0] / float(fs)

            # feature extraction:
            mid_features, s, _ = \
                aF.mid_feature_extraction(signal, fs,
                                          mid_window * fs,
                                          mid_step * fs,
                                          round(fs * short_window),
                                          round(fs * short_step))
            # long term averaging of mid-term statistics
            mid_features = mid_features.mean(axis=1)
            if compute_beat:
                # print('in here3')
                beat, beat_conf = aF.beat_extraction(s, short_step)
                mid_features = np.append(mid_features, beat)
                mid_features = np.append(mid_features, beat_conf)
            feature_vector = (mid_features - mean) / std  # normalization
            # class_id = -1
            # probability = -1
            class_id = classifier.predict(feature_vector.reshape(1, -1))[0]
            # probability = classifier.predict_proba(feature_vector.reshape(1, -1))[0]
            print(class_id, type(class_id))
            label=classes[int(class_id)]

            print(label)
            if label=='speech':
                dur=st[1]-st[0]
                # print('in hereas')
                if merge == True:
                    seg_prev=[]
                    # print('in hereasq12')
                    if len(stack) >0:
                        seg_prev = stack.pop()


                    if len(seg_prev) >0 and st[1]-seg_prev[0] > threshold:
                        # print('in hereas4')
                        seg = [st[0], st[1], label]
                        stack.append(seg_prev)
                        stack.append(seg)
                        merge = True
                    elif len(seg_prev) >0:
                        # print('in hereasqw345')
                        seg = [seg_prev[0], st[1], label]
                        stack.append(seg)
                        merge = True
                    else:
                        seg = [st[0], st[1], label]
                        stack.append(seg)
                        merge = True
                else:
                    # print('in hereas2')
                    seg = [st[0], st[1], label]
                    stack.append(seg)
                    merge = True

            else:
                merge = False
            print(i, merge)
        # print(len(segmentLimits), len(stack))
        for sn in stack:
            # print(type(wavFile), sn[0].shape, sn[1].shape, type(sn[0]), type(sn[1]))

            strName = base + "_" + "{:.3f}".format(sn[0]) + "_" + "{:.3f}".format(sn[1])
            if sn[2] == 'speech':
                strOut = folder + base + "_" + "{:.3f}".format(sn[0]) + "_" + "{:.3f}".format(sn[1]) + ".wav"

                wavfile.write(strOut, fs, x[int(fs * sn[0]):int(fs * sn[1])])
                segfile.write(strName + ' ' + base + ' ' + "{:.3f}".format(sn[0]) + ' ' + "{:.3f}".format(sn[1]) + "\n")
            segfile2.write(strName + ' ' + "{:.3f}".format(sn[0]) + ' ' + "{:.3f}".format(sn[1]) + ' ' + sn[2] + "\n")
    segfile.close()
    segfile2.close()