Python AudioLoaderの例、essentia.standard.AudioLoader Pythonの例

コード例 #1

0

ファイルを表示

    def noisebursts(self): 
        threshold = [2*i for i in range(-5,6)]
        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in threshold:
            print("threshold: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, _, ret = essNoiseburstDetector(audio, threshold=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "NoiseBursts")
            precisionArr.append(precision)
            recallArr.append(recall)
            if (precision + recall) == 0.0:
                FscoreArr.append(0.0)
            else:
                FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/noisethreshold.png", precision=precisionArr, recall=recallArr, Fscore=FscoreArr, x_values=threshold)

        alpha = [float(i)/10 for i in range(1,10)]
        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in alpha:
            print("alpha: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, _, ret = essNoiseburstDetector(audio, alpha=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "NoiseBursts")
            precisionArr.append(precision)
            recallArr.append(recall)
            if (precision + recall) == 0.0:
                FscoreArr.append(0.0)
            else:
                FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/noiseealpha.png", precision=precisionArr,
               recall=recallArr, Fscore=FscoreArr, x_values=alpha)

コード例 #2

0

ファイルを表示

    def bandwidth(self):
        BWsumThreshold = [1e-7, 5e-7, 1e-6, 5e-6, 1e-5, 5e-5, 1e-4, 5e-4]

        bandWidth = BwDetection()

        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in BWsumThreshold:
            print("sumThreshold: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, _, ret = bandWidth(audio, sr, sumThreshold=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Bandwidth")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/BWsumThreshold.png", precision=precisionArr,recall=recallArr, Fscore=FscoreArr, x_values=BWsumThreshold)

        BWConfTh = [0.6, 0.7, 0.8, 0.9]
        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in BWConfTh:
            print("ConfTh: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                bandWidth = BwDetection(confTh=value)
                _, _, ret = bandWidth(audio, sr)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Bandwidth")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/BWConfTh.png", precision=precisionArr,recall=recallArr, Fscore=FscoreArr, x_values=BWConfTh)

コード例 #3

0

ファイルを表示

    def silence(self):
        threshold = [-1*int(10*i) for i in range(1, 11)][::-1]
        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in threshold:
            print("threshold: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, ret = essStartstopDetector(audio, threshold=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Clicks")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/silencethreshold.png", precision=precisionArr,recall=recallArr, Fscore=FscoreArr, x_values=threshold)

        frameSize = [int(2**i) for i in range(5,10)]
        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in frameSize:
            print("frameSize: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, ret = essStartstopDetector(audio, frameSize=value, hopSize=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Clicks")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/silenceframeSize.png", precision=precisionArr, recall=recallArr, Fscore=FscoreArr, x_values=frameSize)

コード例 #4

0

ファイルを表示

ファイル: audio_extract.py プロジェクト: andrebola/EUSIPCO2020

def analyze_misc(filename, segment_duration=20):

    # Compute replay gain and duration on the entire file, then load the
    # segment that is centered in time with replaygain applied
    audio = es.MonoLoader(filename=filename)()
    replaygain = es.ReplayGain()(audio)

    segment_start = (len(audio) / 44100 - segment_duration) / 2
    segment_end = segment_start + segment_duration

    if segment_start < 0 or segment_end > len(audio) / 44100:
        raise ValueError(
            'Segment duration is larger than the input audio duration')

    loader = es.EasyLoader(filename=filename,
                           replayGain=replaygain,
                           startTime=segment_start,
                           endTime=segment_end)

    windowing = es.Windowing(type='blackmanharris62')
    spectrum = es.Spectrum()
    powerspectrum = es.PowerSpectrum()
    centroid = es.Centroid()
    zcr = es.ZeroCrossingRate()
    rms = es.RMS()
    hfc = es.HFC()
    pool = essentia.Pool()

    audio = loader()
    for frame in es.FrameGenerator(audio, frameSize=2048, hopSize=1024):
        frame_spectrum = spectrum(windowing(frame))
        pool.add('rms', rms(frame))
        pool.add('rms_spectrum', rms(frame_spectrum))
        pool.add('hfc', hfc(frame_spectrum))
        pool.add('spectral_centroid', centroid(frame_spectrum))
        pool.add('zcr', zcr(frame))

    audio_st, sr, _, _, _, _ = es.AudioLoader(filename=filename)()
    # Ugly hack because we don't have a StereoResample
    left, right = es.StereoDemuxer()(audio_st)
    resampler = es.Resample(inputSampleRate=sr, outputSampleRate=44100)
    left = resampler(left)
    right = resampler(right)
    audio_st = es.StereoMuxer()(left, right)
    audio_st = es.StereoTrimmer(startTime=segment_start,
                                endTime=segment_end)(audio_st)
    ebu_momentary, _, _, _ = es.LoudnessEBUR128(hopSize=1024 / 44100,
                                                startAtZero=True)(audio_st)
    pool.set('ebu_momentary', ebu_momentary)

    return pool

コード例 #5

0

ファイルを表示

ファイル: test_opt.py プロジェクト: vbadenas/MTG-Audio-Problems-Detection

def Bit_Detection(fpath: str):

    if os.path.splitext(fpath)[1] != ".wav":
        raise ValueError("file must be wav")

    audio, SR, channels, _, br, _ = estd.AudioLoader(filename=fpath)()

    b = int(br / SR / channels)  #number of bits used to code the fpath signal

    if channels >= 1: audio = audio[:, 0]
    audio = (2**b) * ((0.5 * audio) + 0.5)

    possible_b_array = []
    b_tmp = b - 8

    tolerance = 8

    while b_tmp >= 8:
        possible_b_array.append(b_tmp)
        b_tmp -= 8

    chunk_len = 100
    number_of_chunks = 100
    positions = np.random.randint(0,
                                  len(audio) - chunk_len - 1,
                                  size=number_of_chunks)

    audio_to_analyse = []
    for idx in positions:
        audio_to_analyse = [
            *audio_to_analyse, *audio[int(idx):int(idx + chunk_len)]
        ]

    audio_to_analyse = [int(val) for val in audio_to_analyse]

    conf_arr = []
    for possible_b in possible_b_array:
        wrong = 0
        hop = 2**(b - possible_b)
        #tolerance = 8 - b/possible_b
        for val in audio_to_analyse:
            #if possible_b == 16: print(val % hop)
            #if ((val % hop) > tolerance) and ((val % hop) < (hop - tolerance)):
            #    wrong += 1
            if val % hop == 0: wrong += 1

        conf = 1 - wrong / len(audio_to_analyse)
        conf_arr.append(conf)
        print("b:{0}\tprob:{1}".format(possible_b, conf))

    print(possible_b_array, conf_arr)

コード例 #6

0

ファイルを表示

def Bit_Detection_Binary(fpath: str):

    if os.path.splitext(fpath)[1] != ".wav":
        raise ValueError("file must be wav")

    if not os.path.exists(fpath):
        raise ValueError("file {} does not exist".format(fpath))

    audio, SR, channels, _, br, _ = estd.AudioLoader(filename=fpath)()

    b = int(br / SR / channels)  #number of bits used to code the fpath signal

    if b not in [8, 16, 24, 32]:
        raise ValueError("Only bit depths accepted are 8, 16, 24, 32")

    if channels >= 1:
        audio = audio[:, 0]  #if audio is stereo, only get the left channeñ

    #set audio to be ints from -2**(b-1) to 2**(b-1)-1
    #and change type to int32 (32 bit is the highest coding depth allowed)
    audio = (2**(b - 1)) * audio.astype('float64')
    if b == 8: audio = audio.astype('int8')
    elif b == 16: audio = audio.astype('int16')
    elif b == 24: audio = audio.astype('int32')
    elif b == 32: audio = audio.astype('int32')
    else: audio = audio.astype('int64')

    #get 100 random splices of data of 100 samples each one
    chunk_len = 100
    number_of_chunks = 100
    positions = np.random.randint(0,
                                  len(audio) - chunk_len - 1,
                                  size=number_of_chunks)

    audio_to_analyse = []
    for idx in positions:
        audio_to_analyse = [
            *audio_to_analyse, *audio[int(idx):int(idx + chunk_len)]
        ]

    result = [0] * b
    for sample in audio_to_analyse:
        bin_arr = convert_to_bin_array(sample, b)
        result = [a or b for a, b in zip(result, bin_arr)]
    #print(result)
    for i, el in enumerate(reversed(result)):
        if el != 0:
            bits_predicted = len(result) - i
            break
    print(bits_predicted)

コード例 #7

0

ファイルを表示

ファイル: utils.py プロジェクト: malloyca/uvic-music-extractor

def load_audio(path, sample_rate, mono=True):
    """
    Load an audio file using Essentia

    :param path: (str) location of audio file to load
    :param sample_rate: (int) sampling rate to load audio at
    :param mono: (bool) convert file to mono, defaults to True
    :return: audio samples
    """

    # Load audio file
    loader = es.AudioLoader(filename=path)
    results = loader()
    samples = results[0]
    orig_rate = results[1]
    channels = results[2]

    # Make sure we get a mono or stereo audio
    if channels > 2:
        raise RuntimeError("Can't handle more than two audio channels.")

    # If there is only one channel, duplicate the first over to the second.
    # Essentia always loads as a stereo audio file and the right channel is
    # all zeros in this case. We'll convert to a stereo file for some of the
    # processing here such as the Loudness Normalization.
    if channels == 1:
        samples[:, 1] = samples[:, 0]

    # Mix to mono if required
    if mono:
        samples = mix_to_mono(samples)

    # Perform resampling if required
    if orig_rate != sample_rate:
        resample = es.Resample(inputSampleRate=orig_rate,
                               outputSampleRate=sample_rate)

        # Resampling for a stereo audio file
        if not mono:
            resampled_left = resample(samples[:, 0])
            resampled_right = resample(samples[:, 1])
            samples = np.array([resampled_left, resampled_right])
            samples = samples.T

        # Resampling for a mono audio file
        else:
            samples = resample(samples)

    return samples, channels

コード例 #8

0

ファイルを表示

ファイル: audioChannelSep.py プロジェクト: vince-c98/DCASE2017-task1

def channelSep(filename_wav, path_dcase):
    """
    Separate stereo audio into left, right, average and difference
    :param filename_wav:
    :return:
    """
    LOADER = es.AudioLoader(filename=filename_wav)
    audio, sr, num_chan, md5, bitrate, codec = LOADER()
    filename_wav = os.path.basename(filename_wav)
    print(filename_wav)
    wavfile.write(os.path.join(path_dcase, 'audio_left', filename_wav), sr,
                  audio[:, 0])
    wavfile.write(os.path.join(path_dcase, 'audio_right', filename_wav), sr,
                  audio[:, 1])
    wavfile.write(os.path.join(path_dcase, 'audio_average', filename_wav), sr,
                  audio[:, 0] / 2.0 + audio[:, 1] / 2.0)
    wavfile.write(os.path.join(path_dcase, 'audio_difference', filename_wav),
                  sr, audio[:, 0] - audio[:, 1])

コード例 #9

0

ファイルを表示

ファイル: binary_test_multifile.py プロジェクト: vbadenas/MTG-Audio-Problems-Detection

def Bit_Detection_multifile(folder: str):
    if not os.path.exists(folder):
        raise ValueError("{} does not exist".format(folder))

    df = pd.DataFrame()

    for file in os.listdir(folder):

        if os.path.splitext(file)[1] != ".wav":
            print("{} skipped because it was not a wav file".format(file))
            continue

        fpath = os.path.join(folder, file)
        audio, SR, channels, _, br, _ = estd.AudioLoader(filename=fpath)()

        if channels >= 1: audio = audio[:, 0]

        b = int(br / SR /
                channels)  #number of bits used to code the fpath signal

        extracted_b = Bit_Detection_Binary(audio, b)
        #correct_b = min(b,int(file.split('b')[0]))
        df_temp = pd.DataFrame({
            "Filename": [file],
            "Container": [b],
            #"Correct" : [correct_b],
            "Extracted": [extracted_b],
            "Problem in file": [extracted_b < b],
            #"Extracted_correctly" : [correct_b==extracted_b]
        })
        df = df.append(df_temp)
        #print("{}:\tcontainer_bits:{}\tcorrect_bits:{}\textracted_bits:{}\tcorrect:{}".format(file, b, correct_b, extracted_b, correct_b==extracted_b))

    df = df.set_index("Filename")
    print(df)
    with open("results.tsv", "w") as tsv:
        df.to_csv(tsv, sep="\t")

コード例 #10

0

ファイルを表示

    def clicks(self):
        order = [int(2*i) for i in range(1,20)]
        detectionThreshold = [0, 5, 10, 15, 20, 25, 30, 35]
        powerEstimationThreshold = [int(2*i) for i in range(1, 8)]
        silenceThreshold = [-1*int(10*i) for i in range(1, 8)][::-1]

        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in order:
            print("order: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, _, _, ret = essClickDetector(audio, order=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Clicks")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/clicksorder.png", precision=precisionArr,recall=recallArr, Fscore=FscoreArr, x_values=order)

        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in detectionThreshold:
            print("detectionThreshold: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, _, _, ret = essClickDetector(audio, detectionThreshold=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Clicks")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/clicksdetectionThreshold.png", precision=precisionArr,recall=recallArr, Fscore=FscoreArr, x_values=detectionThreshold)

        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in powerEstimationThreshold:
            print("powerEstimationThreshold: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, _, _, ret = essClickDetector(audio, powerEstimationThreshold=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Clicks")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/clickspowerEstimationThreshold.png", precision=precisionArr,recall=recallArr, Fscore=FscoreArr, x_values=powerEstimationThreshold)

        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in silenceThreshold:
            print("silenceThreshold: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, _, _, ret = essClickDetector(audio, silenceThreshold=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Clicks")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/clickssilenceThreshold.png", precision=precisionArr,recall=recallArr, Fscore=FscoreArr, x_values=silenceThreshold)

コード例 #11

0

ファイルを表示

    def hum(self):
        timeWindow = [0.1, 0.3, 0.5, 1, 3, 5]
        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in timeWindow:
            print("sumThreshold: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, ret = essHumDetector(audio, timeWindow=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Hum")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/HumTimeWindow.png", precision=precisionArr,recall=recallArr, Fscore=FscoreArr, x_values=timeWindow)

        minimumDuration = [0.01, 0.07, 0.1, 0.3, 0.5, 1, 3, 5]
        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in minimumDuration:
            print("sumThreshold: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, ret = essHumDetector(audio, minimumDuration=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Hum")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/HumminimumDuration.png", precision=precisionArr,recall=recallArr, Fscore=FscoreArr, x_values=minimumDuration)

        timeContinuity = [0.1, 0.3, 0.5, 1, 3, 5]
        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in timeContinuity:
            print("sumThreshold: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, ret = essHumDetector(audio, timeContinuity=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Hum")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/HumtimeContinuity.png", precision=precisionArr,recall=recallArr, Fscore=FscoreArr, x_values=timeContinuity)

        numberHarmonics = [i for i in range(6)]
        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in numberHarmonics:
            print("sumThreshold: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, ret = essHumDetector(audio, numberHarmonics=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Hum")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/HumnumberHarmonics.png", precision=precisionArr,recall=recallArr, Fscore=FscoreArr, x_values=numberHarmonics)

コード例 #12

0

ファイルを表示

    def lowsnr(self):
        snrnrgThresholdArr = [0.1, 0.3, 0.5, 0.7, 0.9]
        snracThresholdArr = [0.1, 0.3, 0.5, 0.7, 0.9]
        snrThresholdArr = [-3, -1, 1, 3, 5, 7, 9]

        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in snrnrgThresholdArr:
            print("sumThreshold: {} being evaluated".format(value))
            valueResults = []
            lsd = LowSnrDetector(nrgThreshold=value)
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, ret = lsd(audio)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "lowSNR")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/snrnrgThreshold.png", precision=precisionArr, recall=recallArr, Fscore=FscoreArr, x_values=snrnrgThresholdArr)

        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in snracThresholdArr:
            print("sumThreshold: {} being evaluated".format(value))
            valueResults = []
            lsd = LowSnrDetector(acThreshold=value)
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename,
                                                              i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(
                    filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, ret = lsd(audio)
                valueResults.append(
                    (filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(
                valueResults, "lowSNR")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision *
                             recall / (Fbeta**2 * precision + recall))
        u.plot("./results/snracThreshold.png", precision=precisionArr,
               recall=recallArr, Fscore=FscoreArr, x_values=snracThresholdArr)

        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in snrThresholdArr:
            print("sumThreshold: {} being evaluated".format(value))
            valueResults = []
            lsd = LowSnrDetector(snrThreshold=value)
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename,
                                                              i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(
                    filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, ret = lsd(audio)
                valueResults.append(
                    (filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(
                valueResults, "lowSNR")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision *
                             recall / (Fbeta**2 * precision + recall))
        u.plot("./results/snrThreshold.png", precision=precisionArr,
               recall=recallArr, Fscore=FscoreArr, x_values=snrThresholdArr)

コード例 #13

0

ファイルを表示

def detectBW(fpath: str, frame_size: float, hop_size: float, floor_db: float,
             oversample_f: int):

    if os.path.splitext(fpath)[1] != ".wav":
        raise ValueError(
            "file must be wav"
        )  #check if the file has a wav extension, else: raise error
    if not is_power2(oversample_f):
        raise ValueError("oversample factor can only be 1, 2 or 4"
                         )  #check if the oversample factor is a power of two

    #audio loader returns x, sample_rate, number_channels, md5, bit_rate, codec, of which only the first 3 are needed
    audio, SR = estd.AudioLoader(filename=fpath)()[:2]

    if audio.shape[1] != 1:
        audio = (audio[:, 0] + audio[:, 1]) / 2  #if stereo: downmix to mono

    frame_size *= oversample_f  #if an oversample factor is desired, apply it
    f = np.arange(int(frame_size / 2) +
                  1) / frame_size * SR  #initialize frequency vector or xticks

    fc_index_arr = []
    interpolated_spectrum = np.zeros(
        int(frame_size / 2) + 1)  #initialize interpolated_spectrum array
    fft = estd.FFT(size=frame_size)  #declare FFT function
    window = estd.Windowing(size=frame_size,
                            type="hann")  #declare windowing function

    for i, frame in enumerate(
            estd.FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True)):

        frame = window(frame)  #apply window to the frame
        frame_fft = abs(fft(frame))
        frame_fft_db = 20 * np.log10(
            frame_fft + eps)  #calculate frame fft values in db
        #energy_arr.append(energy(frame_fft))
        interp_frame = compute_spectral_envelope(
            frame_fft_db, f, "linear"
        )  #compute the linear interpolation between the values of the maxima of the spectrum
        interp_frame = modify_floor(interp_frame, floor_db, log=True)

        fc_index = compute_fc(interp_frame)
        fc_index_arr.append(fc_index)

        if energy_verification(frame_fft, fc_index):
            fc_index_arr.append(fc_index)
        #else:
        #	fc_index_arr.append(len(f)-1)

        interpolated_spectrum += interp_frame  #append the values to window

    interpolated_spectrum /= i + 1

    #energy_arr = normalise(energy_arr)
    #energy_mask = energy_arr>0.05
    if len(fc_index_arr) == 0: fc_index_arr = [frame_size]

    hist = compute_histogram(fc_index_arr, f)
    fc, conf, binary = compute_mean_fc(hist, fc_index_arr, f, SR)

    print("filename: ", fpath, "mean_fc: ", fc, " conf: ", conf,
          " binary_result: ", binary)

    fig, ax = plt.subplots(3, 1, figsize=(15, 9))
    ax[0].plot(fc_index_arr, "x")
    ax[1].stem(f, hist)
    ax[2].plot(f, interpolated_spectrum)
    ax[2].axvline(x=fc, color="r")
    plt.show()

コード例 #14

0

ファイルを表示

ファイル: plot.py プロジェクト: vbadenas/MTG-Audio-Problems-Detection

import essentia.standard as estd
from essentia import array as esarr
import matplotlib.pyplot as plt
import os
import numpy as np

DIR = "../Dataset/BW detection/"

for file in os.listdir(DIR):

    fpath = os.path.join(DIR, file)

    name, extension = os.path.splitext(file)
    print(file)
    if extension == ".wav":
        x, SR, channels, _, br, _ = estd.AudioLoader(filename=fpath)()

        channels = x.shape[1]
        if channels != 1: x = (x[:, 0] + x[:, 1]) / 2
        print(x.shape, SR, channels, br)

        window = estd.Windowing(size=len(x), type="hann")
        x = window(x)
        N = int(2**(np.ceil(np.log2(len(x)))))
        x = np.append(x, np.zeros(N - len(x)))
        x = esarr(x)
        tfX = estd.FFT()(x)
        tfX = 20 * np.log10(abs(tfX))
        f = np.arange(int(len(x) / 2) + 1) / len(x) * SR
        plt.plot(f, tfX[:int(len(x) / 2) + 1])
        plt.savefig(os.path.join(DIR, name + ".png"))

コード例 #15

0

ファイルを表示

ファイル: PitchDetection.py プロジェクト: torebre/essentia_test

import matplotlib.pyplot as plt

import essentia.standard as ess
import numpy as np

M = 1024
N = 1024
H = 512
fs = 44100

x = ess.MonoLoader(filename='output3.wav', sampleRate=fs)()
ess.AudioLoader()

spectrum = ess.Spectrum(size=N)
window = ess.Windowing(size=M, type='hann')

pitchYin = ess.PitchYin()


hpcp = ess.HPCP()
hpcps = []

spectralPeaks = ess.SpectralPeaks()
pitches = []
pitchConfidences = []



for frame in ess.FrameGenerator(x, frameSize=M, hopSize=H, startFromZero=True):
    pitch, pitchConfidence = pitchYin(frame)
    if pitchConfidence < 0.9:

コード例 #16

0

ファイルを表示

    def saturation(self):
        satEnergyThreshold = [-30, -20, -10, -7, -5, -3, -1, -0.01]
        satDifferentialThreshold = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10]
        satMinimumDuration = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1]

        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in satEnergyThreshold:
            print("energy Threshold: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end = '\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, _, _, ret = essSaturationDetector(audio, energyThreshold=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Saturation")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/satEnergyThreshold.png", precision=precisionArr, recall=recallArr, Fscore=FscoreArr, x_values=satEnergyThreshold)

        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in satDifferentialThreshold:
            print("energy Threshold: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, _, _, ret = essSaturationDetector(audio, differentialThreshold=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Saturation")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/satDifferentialThreshold.png", precision=precisionArr,recall=recallArr, Fscore=FscoreArr, x_values=satDifferentialThreshold)
        
        precisionArr = []
        recallArr = []
        FscoreArr = []
        for value in satMinimumDuration:
            print("energy Threshold: {} being evaluated".format(value))
            valueResults = []
            for i, filename in enumerate(self.files):
                print("Executing file {} number {}/{}".format(filename, i+1, len(self.files)), end='\r')
                audio, sr, channels, _, _, _ = std.AudioLoader(filename=filename)()
                audio = np.sum(audio, axis=1)/channels
                _, _, _, ret = essSaturationDetector(audio, minimumDuration=value)
                valueResults.append((filename.replace(self.wavDatasetPath, ""), ret))
            print('')
            valueResults = sorted(valueResults, key=lambda x: x[0])
            _, precision, recall = self.evaluateValue(valueResults, "Saturation")
            precisionArr.append(precision)
            recallArr.append(recall)
            FscoreArr.append((1 + Fbeta**2) * precision * recall / (Fbeta**2 * precision + recall))
        u.plot("./results/satMinimumDuration.png", precision=precisionArr,recall=recallArr, Fscore=FscoreArr, x_values=satMinimumDuration)

コード例 #17

0

ファイルを表示

def Bit_Detection(fpath: str):

    if os.path.splitext(fpath)[1] != ".wav":
        # check if the file has a wav extension, else: raise error
        raise ValueError("file must be wav")

    #audio loader returns x, sample_rate, number_channels, md5, bit_rate, codec, of which only the first 3 are needed
    audio, SR, channels, _, br, _ = estd.AudioLoader(filename=fpath)()

    b = int(br / SR / channels)  #number of bits used to code the fpath signal
    possible_values = np.arange(2**b)
    #_, ax = plt.subplots(3, channels, figsize=(15, 9))
    print("array created")

    #bits_result = -1
    #conf_result = 1
    audio_int_channel = (2**b) * ((0.5 * audio[:, 0]) + 0.5)
    audio_int_channel = audio_int_channel[:10000]
    hist = compute_histogram(audio_int_channel, possible_values)
    plt.plot(hist, 'x')
    plt.show()
    assert False

    for channel in range(channels):
        audio_int_channel = (2**b) * ((0.5 * audio[:, channel]) + 0.5)

        hist = compute_histogram(audio_int_channel, possible_values)

        #hist_peaks = hist/sum(hist)
        #hist_peaks[hist_peaks <= 0.0001] = 0
        #x_peaks, y_peaks = get_peaks(hist_peaks, possible_values)
        #y_peaks = np.array(y_peaks) * sum(hist)

        #tol = b/2
        #resolution = 2
        #center_x = []
        #center_y = []

        #first_idx = np.argmax(y_peaks) - resolution
        #for i in range(first_idx, first_idx + (3 * resolution + 1)):
        #	center_x.append(x_peaks[i])
        #	center_y.append(y_peaks[i])

        #b_pred = np.round(np.log2(np.mean(np.diff(center_x))))
        #b_pred = max(8,b_pred)
        #hop = 2 ** b_pred
        #print(hop)

        #zero_idx = 2 ** (b - 1)
        #idx_arr = []
        #idx = zero_idx - int(zero_idx/hop)*hop
        #while idx <= 2**b:
        #	idx_arr.append(idx)
        #	idx += hop

        #conf_hist = hist.copy()
        #for x_search in idx_arr:
        #	if (x_search - tol)<0:
        #		conf_hist[:int(x_search + tol)] = 0
        #	elif (x_search + tol)>len(conf_hist):
        #		conf_hist[int(x_search - tol):] = 0
        #	else:
        #		conf_hist[int(x_search - tol):int(x_search + tol)] = 0

        #print("b_pred: ", b_pred, "conf: ", 1-sum(conf_hist)/sum(hist))

        #bits_result = max(bits_result, b_pred)
        #conf_result *= 1-sum(conf_hist)/sum(hist)
        """
		if channels == 1:
			ax[0].plot(audio_int_channel)
			ax[1].plot(possible_values, hist, 'x')
			ax[1].plot(x_peaks, y_peaks/sum(hist), 'x')
			ax[2].plot(possible_values, conf_hist, 'x')
		else:
			ax[0][channel].plot(audio_int_channel)
			ax[1][channel].plot(possible_values, hist, 'x')
			ax[1][channel].plot(x_peaks, y_peaks, 'xr')
			ax[1][channel].plot(x_peaks[np.argmax(y_peaks)], max(y_peaks), 'x')
			ax[2][channel].plot(possible_values, conf_hist, 'x')
		"""
    #print("bits_result: ", bits_result, "conf_result: ", conf_result)
    plt.plot(hist, 'x')
    plt.show()

コード例 #18

0

ファイルを表示

ファイル: energy_computation.py プロジェクト: vbadenas/MTG-Audio-Problems-Detection

def detectBW(fpath: str, frame_size: float, hop_size: float, floor_db: float,
             oversample_f: int):

    # check if the file has a wav extension, else: raise error
    if os.path.splitext(fpath)[1] != ".wav":
        raise ValueError("file must be wav")

    # check if the oversample factor is a power of two
    if not is_power2(oversample_f):
        raise ValueError("oversample factor can only be 1, 2 or 4")

    # audio loader returns x, sample_rate, number_channels, md5, bit_rate, codec, of which only the first 3 are needed
    audio, SR = estd.AudioLoader(filename=fpath)()[:2]

    # if stereo: downmix to mono
    if audio.shape[1] != 1:
        audio = (audio[:, 0] + audio[:, 1]) / 2

    frame_size *= oversample_f  # if an oversample factor is desired, apply it

    fc_index_arr = []
    hist = np.zeros(129)
    fft = estd.FFT(size=frame_size)  # declare FFT function
    window = estd.Windowing(size=frame_size,
                            type="hann")  # declare windowing function
    avg_frames = np.zeros(int(frame_size / 2) + 1)

    max_nrg = max([
        sum(abs(fft(window(frame)))**2) for frame in estd.FrameGenerator(
            audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True)
    ])

    for i, frame in enumerate(
            estd.FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True)):

        frame = window(frame)  # apply window to the frame
        frame_fft = abs(fft(frame))
        nrg = sum(frame_fft**2)

        if nrg >= 0.1 * max_nrg:
            for j in reversed(range(len(frame_fft))):
                if sum(frame_fft[j:] / j) >= 1e-5:
                    j = int(j / frame_size * 128)
                    fc_index_arr.append(j)
                    hist[j] += nrg
                    break
            avg_frames = avg_frames + frame_fft

    if len(fc_index_arr) == 0:
        fc_index_arr.append(128)
        hist[128] += 1

    avg_frames /= (i + 1)
    most_likely_bin, conf, binary = compute_mean_fc(avg_frames,
                                                    fc_index_arr, [],
                                                    SR,
                                                    hist=hist)

    most_likely_bin *= int(frame_size / 128)

    print("f={:0=2f}, conf={:0=2f}, problem={}".format(
        most_likely_bin * SR / frame_size, conf, str(binary)))
    fig, ax = plt.subplots(2, 1, figsize=(15, 9))
    ax[0].plot(20 * np.log10(avg_frames + eps))
    ax[0].axvline(x=most_likely_bin, color='r')
    ax[0].set_ylim(bottom=-120)
    ax[1].stem(hist)
    plt.show()

コード例 #19

0

ファイルを表示

ファイル: test.py プロジェクト: vbadenas/MTG-Audio-Problems-Detection

def main(fpath: str, frame_size: float, hop_size: float, entropy_th: float):

    if os.path.splitext(fpath)[1] != ".wav":
        # check if the file has a wav extension, else: raise error
        raise ValueError("file must be wav")

    #audio loader returns x, sample_rate, number_channels, md5, bit_rate, codec, of which only the first 3 are needed
    audio, _, _, _, _, _ = estd.AudioLoader(filename=fpath)()

    #bit_depth = int(br / SR / channels) #number of bits used to code the fpath signal
    if audio.shape[1] > 1:
        audio = np.reshape(audio, audio.shape[0] * audio.shape[1], order='F')
    audio = audio.astype("float32") / max(audio.astype("float32"))
    #bit_depth = min(bit_depth,16)
    audio = esarr(audio.astype("float16"))
    max_nrg = max([
        sum(frame**2) for frame in estd.FrameGenerator(
            audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True)
    ])
    ac_arr = []
    nrg_arr = []
    sig_pwr = 0
    noise_pwr = 0
    sig_cnt = 0
    noise_cnt = 0
    ac_th = 0.6

    for frame in estd.FrameGenerator(audio,
                                     frameSize=frame_size,
                                     hopSize=hop_size,
                                     startFromZero=True):
        ac = abs(autocorr(frame, mode="half"))
        #ac /= sum(ac)
        #plt.plot(ac); plt.show()
        nrg = sum(frame**2)
        ac = ac[0] / sum(ac) if sum(ac) > 0 else 0
        nrg = nrg / max_nrg if max_nrg > 0 else 0
        ac_arr.append(ac)
        nrg_arr.append(nrg)

    ac_arr /= max(ac_arr)
    for nrg, ac in zip(nrg_arr, ac_arr):
        if nrg < 0.1:
            noise_pwr += nrg**2
            noise_cnt += 1
        else:
            if ac < ac_th:
                sig_pwr += nrg**2
                sig_cnt += 1
            else:
                noise_pwr += nrg**2
                noise_cnt += 1

    if noise_cnt == 0:
        SNR = np.inf
    elif sig_cnt == 0:
        SNR = 10 * np.log10(eps)
    else:
        sig_pwr /= sig_cnt
        noise_pwr /= noise_cnt
        SNR = 10 * np.log10(sig_pwr / noise_pwr)

    print("SNR: ", SNR)
    print("sig: {}, noise: {}".format(sig_cnt, noise_cnt))
    print("conf: ", 1 - abs(noise_cnt - sig_cnt) / (sig_cnt + noise_cnt))
    #print("Max Ent: ", max(ent_arr))
    #arr /= max(arr)
    #arr_env = compute_envelope(arr, np.arange(len(arr)))
    _, ax = plt.subplots(3, 1, figsize=(15, 9))
    ax[0].plot(audio)
    #ax[1].plot(arr_env)
    ax[1].plot(ac_arr)
    #ax[1].plot(ent_arr)
    #ax[1].hlines(entropy_th,xmin = 0, xmax = len(ent_arr))
    ax[2].plot(nrg_arr)
    plt.show()