Esempio n. 1
0
    def get_shannon_index(self, total_bins=8):

        frequency_interval = int(
            np.ceil(1000.0 / (self.fs / float(self.n_fft))))
        bin_probs = []

        i = 0

        psd = (1.0 / self.win_len) * (self.stft**2)
        psd = AudioProcessing.rescale(psd, (0, 1))

        psd_sum = np.sum(psd)

        while i < total_bins * frequency_interval:

            if i + frequency_interval > total_bins * frequency_interval:
                bin = psd[i:, :]
            else:

                bin = psd[i:i + frequency_interval, :]

            bin_probs.append(np.sum(bin))

            i = i + frequency_interval

        bin_probs = np.array(bin_probs)
        bin_probs = bin_probs / psd_sum

        shannon_index = AudioProcessing.get_entropy(bin_probs) / np.log2(
            len(bin_probs))
        return shannon_index
Esempio n. 2
0
    def __init__(self, data, fs, n_fft=512, win_len=512, hop_len=512):

        chop_fraction = data.size % win_len

        if chop_fraction != 0:
            data = data[:-chop_fraction]

        self.data = data
        self.fs = fs

        self.envelope = AudioProcessing.get_envelope(self.data,
                                                     frame_size=win_len)
        self.envelope[self.envelope < 0] = 0

        self.n_fft = n_fft
        self.win_len = win_len
        self.hop_len = hop_len

        self.stft = AudioProcessing.get_stft(self.data, n_fft, win_len,
                                             hop_len)
        self.stft = np.absolute(self.stft)
        self.smooth_spectrogram()

        n_drop = int(np.ceil(500 / (float(self.fs) / self.n_fft)))
        self.stft = self.stft[n_drop:, :]

        self._remove_spectrogram_noise()

        self._calculate_background_noise()
        self._get_temporal_entropy()

        self._get_segments_above_noise()
        self._get_spectral_entropy()
Esempio n. 3
0
def getAcousticIndices(audiofile):
    if (DEBUG_FLAG):
        print(
            "[WORKING] Attempting to run acoustic indices calculator - acousticIndices.py"
        )

    # loop through the files in the directory
    try:
        data, fs = librosa.load(audiofile, sr=None, offset=0, duration=60)
        # mono channel
        data = AudioProcessing.convert_to_mono(data)

        # changing sampling rate
        new_fs = 17640
        data_chunk = AudioProcessing.resample(data, fs, new_fs)

        # extracting indices
        acousticIndices = AcousticIndices(data_chunk, new_fs)
        acoustic_indices = acousticIndices.get_acoustic_indices()

        acoustic_indices = list(map(lambda x: round(x, 4), acoustic_indices))
        if (PREDICTION_VERBOSE):
            print(acoustic_indices)

        acoustic_headers = acousticIndices.get_acoustic_indices_headers()
        acoustic_descs = acousticIndices.get_acoustic_indices_descs()
        # singleResultArray is used to store the results of one file (List of dictionaries)
        singleResultArray = []

        # Traverse the acoustic tags
        for i in range(len(acoustic_headers)):
            # per indices in the length of the acoustic tags,
            # append dictionary items.
            singleResultArray.append({
                "index": acoustic_headers[i],
                "value": acoustic_indices[i],
                "desc": acoustic_descs[i]
            })

            # append result dictionary to the final results array
            if (DEBUG_FLAG):
                print("[WORKING] Calculated " + acoustic_headers[i] +
                      " - acousticIndices.py")
    except Exception as e:
        track = traceback.format_exc()
        print(track)
        singleResultArray = "ERROR_PRESENT"

    if (DEBUG_FLAG):
        print("[SUCCESS] Calculated acoustic indices - acousticIndices.py")
    return singleResultArray
def getAcousticIndices():
    # fileDictionary will be used to store the filecount keys with their respective file information
    fileDictionary = {}

    # Create file counter
    fileCount = 0

    print(
        "[WORKING] Attempting to run acoustic indices calculator - acousticIndices.py"
    )
    # loop through the files in the directory
    for file in os.listdir("instance/upload/"):

        # correct the file path with the prefixed upload folder
        filePath = "instance/upload/" + file
        data, fs = librosa.load(filePath, sr=None, offset=0, duration=60)

        # mono channel
        data = AudioProcessing.convert_to_mono(data)

        # changing sampling rate
        new_fs = 17640
        data_chunk = AudioProcessing.resample(data, fs, new_fs)

        # extracting indices
        acousticIndices = AcousticIndices(data_chunk, new_fs)
        acoustic_indices = acousticIndices.get_acoustic_indices()
        acoustic_headers = acousticIndices.get_acoustic_indices_headers()

        # singleResultArray is used to store the results of one file (List of dictionaries)
        singleResultArray = []

        # Traverse the acoustic tags
        for i in range(len(acoustic_headers)):
            # per indices in the length of the acoustic tags,
            # append dictionary items.
            singleResultArray.append({
                "index": acoustic_headers[i],
                "value": acoustic_indices[i]
            })
        # append result dictionary to the final results array
        print("[WORKING] Calculated " + acoustic_headers[i] +
              " - acousticIndices.py")
        fileDictionary[fileCount] = singleResultArray
        fileCount += 1

    print("[SUCCESS] Calculated acoustic indices - acousticIndices.py")
    return fileDictionary
    def _get_segments_above_noise(self):
        threshold = self.background_noise + 3
        envelope = AudioProcessing.get_envelope(abs(self.data),
                                                frame_size=1024)
        non_zero = envelope[np.nonzero(envelope)]
        data_log = 20 * np.log10(non_zero)

        # print(data_log.shape)
        # kernel = 1/256.0*np.ones(156)
        #
        # data_log = np.convolve(data_log,kernel)
        # print(data_log.shape)

        ind = np.where(data_log > threshold)

        check_array = np.zeros(self.envelope.size)
        check_array[ind] = 1

        diff = np.diff(check_array)

        ones = np.where(diff == 1)[0]
        minus_ones = np.where(diff == -1)[0]

        if ones.size == 0:
            ones = np.array([0])

        if minus_ones.size == 0:
            minus_ones = np.array([check_array.size - 1])

        if ones[0] >= minus_ones[0]:
            ones = np.append(0, ones)

        if ones[-1] >= minus_ones[-1]:
            minus_ones = np.append(minus_ones, [check_array.size - 1])

        segments = []

        # considering segments which are greater than 100 ms
        min_seg_length = 0.1 * self.fs

        for i in range(ones.size):
            seg_duration = (minus_ones[i] - ones[i])

            if seg_duration > min_seg_length:
                segments.append(
                    (ones[i], minus_ones[i], minus_ones[i] - ones[i]))

        if 0:
            # plotting check array

            segment_array = np.zeros(self.data.size)
            for segment in segments:
                segment_array[segment[0]:segment[1]] = 1
            import matplotlib.pyplot as plt
            plt.plot(self.data)
            plt.plot(segment_array)
            plt.show()

        self.segments = segments
Esempio n. 6
0
    def get_spectral_maxima_entropy(self):
        max_bins = []

        for segment in self.segments:
            start = int(float(segment[0]) / self.n_fft)
            stop = int(float(segment[1]) / self.n_fft)

            for i in range(start, stop + 1):
                stft_column = self.stft[:, i]
                max_bins.append(np.argmax(stft_column))

        pdf, bins = AudioProcessing.get_histogram(max_bins,
                                                  bins=np.arange(
                                                      0, self.stft.shape[0]))

        pdf = pdf[np.nonzero(pdf)]
        spectral_max_entropy = AudioProcessing.get_entropy(pdf) / np.log2(
            self.stft.shape[0])

        return spectral_max_entropy
Esempio n. 7
0
    def _get_spectral_entropy(self):

        stft = np.copy(self.stft)
        N = 2**10
        stft = AudioProcessing.rescale(stft, (0, N))
        stft = stft.astype(np.uint16)

        item_frequency = itemfreq(stft)

        total_samples = stft.size

        pmf = []

        for i in range(item_frequency.shape[0]):
            pmf.append(item_frequency[i][1])

        pmf = np.array(pmf)
        pmf = pmf / float(total_samples)

        self.spectral_entropy = AudioProcessing.get_entropy(pmf) / np.log2(N)
Esempio n. 8
0
    def _remove_spectrogram_noise(self):

        new_spec = np.zeros(self.stft.shape)

        for i in range(self.stft.shape[0]):
            row = self.stft[i, :]
            bn_log = AudioProcessing.get_row_background_noise(row)
            bn = 10**(bn_log / 20.0)
            row = row - bn
            row[row < 0] = 0
            new_spec[i, :] = row

        self.stft = new_spec
Esempio n. 9
0
    def _get_temporal_entropy(self):
        envelope = self.envelope[np.nonzero(self.envelope)]
        envelope_energy = envelope**2

        N = 2**10  # (i.e 1024 values of envelope energy possible)

        envelope_energy = AudioProcessing.rescale(envelope_energy, (0, N))
        envelope_energy = envelope_energy.astype(np.uint16)

        item_frequency = itemfreq(envelope_energy)

        total_samples = envelope_energy.size

        pmf = []

        for i in range(item_frequency.shape[0]):
            pmf.append(item_frequency[i][1])

        pmf = np.array(pmf)
        pmf = pmf / float(total_samples)

        self.temporal_entropy = AudioProcessing.get_entropy(pmf) / np.log2(N)
Esempio n. 10
0
    def get_number_of_peaks(self):
        stft = np.copy(self.stft)
        stft = AudioProcessing.rescale(stft, (0, 1))

        no_peaks = 0

        min_distance = np.ceil(
            int(np.ceil(200.0 / (self.fs / float(self.n_fft)))) / 3.0)

        for i in range(stft.shape[1]):
            col = stft[:, i]

            import pdb
            pdb.set_trace()

            peaks = peakutils.indexes(col, thres=0.3, min_dist=min_distance)

            for peak in peaks:
                if peak != 0:
                    if col[peak] - col[peak - 1] > 0.01:
                        no_peaks = no_peaks + 1

        return no_peaks
    def get_spectral_average_variance_entropy(self):

        segments_stft = None

        N = 2**10

        stft = np.copy(self.stft)
        # stft = AudioProcessing.rescale(self.stft,(0,N))
        # stft = stft.astype(np.uint16)

        for segment in self.segments:
            start = int(float(segment[0]) / self.n_fft)
            stop = int(float(segment[1]) / self.n_fft)

            if segments_stft is None:
                segments_stft = stft[:, start:stop]
            else:
                segments_stft = np.concatenate(
                    (segments_stft, stft[:, start:stop]), axis=1)

        average_spectra = np.mean(segments_stft, axis=1)
        var_spectra = np.var(segments_stft, axis=1)

        N = 2**8
        average_spectra = AudioProcessing.rescale(average_spectra, (0, N))
        average_spectra = average_spectra.astype(np.uint8)

        var_spectra = AudioProcessing.rescale(var_spectra, (0, N))
        var_spectra = var_spectra.astype(np.uint8)

        avg_pdf, bins = AudioProcessing.get_histogram(average_spectra,
                                                      bins=np.arange(0, 255))
        var_pdf, bins = AudioProcessing.get_histogram(var_spectra,
                                                      bins=np.arange(0, 255))

        avg_pdf = avg_pdf[np.nonzero(avg_pdf)]
        var_pdf = var_pdf[np.nonzero(var_pdf)]

        average_spectrum_entropy = AudioProcessing.get_entropy(
            avg_pdf) / np.log2(N)
        variance_spectrum_entropy = AudioProcessing.get_entropy(
            var_pdf) / np.log2(N)

        return average_spectrum_entropy, variance_spectrum_entropy
Esempio n. 12
0
    def _calculate_background_noise(self):

        background_noise = AudioProcessing.get_background_noise(self.envelope)
        self.background_noise = background_noise
        feature_headers.append("Spectral Diversity")
        feature_headers.append("Spectral Persistence")

        return feature_headers


if __name__ == "__main__":

    # Audio Filename to be read
    filename = "bird.mp3"

    # considering one minute of the audio - the indices are taken 1 minute audio segments
    data, fs = librosa.load(filename, sr=None, offset=0, duration=60)

    # mono channel
    data = AudioProcessing.convert_to_mono(data)

    # changing sampling rate
    new_fs = 17640
    data_chunk = AudioProcessing.resample(data, fs, new_fs)

    # extracting indices
    acousticIndices = AcousticIndices(data_chunk, new_fs)
    acoustic_indices = acousticIndices.get_acoustic_indices()
    acoustic_headers = acousticIndices.get_acoustic_indices_headers()

    acousticIndices = np.column_stack(acousticIndices)

    # creating a dataframe
    df = pd.DataFrame(acousticIndices)
    df.columns = acoustic_headers