def get_shannon_index(self, total_bins=8): frequency_interval = int( np.ceil(1000.0 / (self.fs / float(self.n_fft)))) bin_probs = [] i = 0 psd = (1.0 / self.win_len) * (self.stft**2) psd = AudioProcessing.rescale(psd, (0, 1)) psd_sum = np.sum(psd) while i < total_bins * frequency_interval: if i + frequency_interval > total_bins * frequency_interval: bin = psd[i:, :] else: bin = psd[i:i + frequency_interval, :] bin_probs.append(np.sum(bin)) i = i + frequency_interval bin_probs = np.array(bin_probs) bin_probs = bin_probs / psd_sum shannon_index = AudioProcessing.get_entropy(bin_probs) / np.log2( len(bin_probs)) return shannon_index
def __init__(self, data, fs, n_fft=512, win_len=512, hop_len=512): chop_fraction = data.size % win_len if chop_fraction != 0: data = data[:-chop_fraction] self.data = data self.fs = fs self.envelope = AudioProcessing.get_envelope(self.data, frame_size=win_len) self.envelope[self.envelope < 0] = 0 self.n_fft = n_fft self.win_len = win_len self.hop_len = hop_len self.stft = AudioProcessing.get_stft(self.data, n_fft, win_len, hop_len) self.stft = np.absolute(self.stft) self.smooth_spectrogram() n_drop = int(np.ceil(500 / (float(self.fs) / self.n_fft))) self.stft = self.stft[n_drop:, :] self._remove_spectrogram_noise() self._calculate_background_noise() self._get_temporal_entropy() self._get_segments_above_noise() self._get_spectral_entropy()
def getAcousticIndices(audiofile): if (DEBUG_FLAG): print( "[WORKING] Attempting to run acoustic indices calculator - acousticIndices.py" ) # loop through the files in the directory try: data, fs = librosa.load(audiofile, sr=None, offset=0, duration=60) # mono channel data = AudioProcessing.convert_to_mono(data) # changing sampling rate new_fs = 17640 data_chunk = AudioProcessing.resample(data, fs, new_fs) # extracting indices acousticIndices = AcousticIndices(data_chunk, new_fs) acoustic_indices = acousticIndices.get_acoustic_indices() acoustic_indices = list(map(lambda x: round(x, 4), acoustic_indices)) if (PREDICTION_VERBOSE): print(acoustic_indices) acoustic_headers = acousticIndices.get_acoustic_indices_headers() acoustic_descs = acousticIndices.get_acoustic_indices_descs() # singleResultArray is used to store the results of one file (List of dictionaries) singleResultArray = [] # Traverse the acoustic tags for i in range(len(acoustic_headers)): # per indices in the length of the acoustic tags, # append dictionary items. singleResultArray.append({ "index": acoustic_headers[i], "value": acoustic_indices[i], "desc": acoustic_descs[i] }) # append result dictionary to the final results array if (DEBUG_FLAG): print("[WORKING] Calculated " + acoustic_headers[i] + " - acousticIndices.py") except Exception as e: track = traceback.format_exc() print(track) singleResultArray = "ERROR_PRESENT" if (DEBUG_FLAG): print("[SUCCESS] Calculated acoustic indices - acousticIndices.py") return singleResultArray
def getAcousticIndices(): # fileDictionary will be used to store the filecount keys with their respective file information fileDictionary = {} # Create file counter fileCount = 0 print( "[WORKING] Attempting to run acoustic indices calculator - acousticIndices.py" ) # loop through the files in the directory for file in os.listdir("instance/upload/"): # correct the file path with the prefixed upload folder filePath = "instance/upload/" + file data, fs = librosa.load(filePath, sr=None, offset=0, duration=60) # mono channel data = AudioProcessing.convert_to_mono(data) # changing sampling rate new_fs = 17640 data_chunk = AudioProcessing.resample(data, fs, new_fs) # extracting indices acousticIndices = AcousticIndices(data_chunk, new_fs) acoustic_indices = acousticIndices.get_acoustic_indices() acoustic_headers = acousticIndices.get_acoustic_indices_headers() # singleResultArray is used to store the results of one file (List of dictionaries) singleResultArray = [] # Traverse the acoustic tags for i in range(len(acoustic_headers)): # per indices in the length of the acoustic tags, # append dictionary items. singleResultArray.append({ "index": acoustic_headers[i], "value": acoustic_indices[i] }) # append result dictionary to the final results array print("[WORKING] Calculated " + acoustic_headers[i] + " - acousticIndices.py") fileDictionary[fileCount] = singleResultArray fileCount += 1 print("[SUCCESS] Calculated acoustic indices - acousticIndices.py") return fileDictionary
def _get_segments_above_noise(self): threshold = self.background_noise + 3 envelope = AudioProcessing.get_envelope(abs(self.data), frame_size=1024) non_zero = envelope[np.nonzero(envelope)] data_log = 20 * np.log10(non_zero) # print(data_log.shape) # kernel = 1/256.0*np.ones(156) # # data_log = np.convolve(data_log,kernel) # print(data_log.shape) ind = np.where(data_log > threshold) check_array = np.zeros(self.envelope.size) check_array[ind] = 1 diff = np.diff(check_array) ones = np.where(diff == 1)[0] minus_ones = np.where(diff == -1)[0] if ones.size == 0: ones = np.array([0]) if minus_ones.size == 0: minus_ones = np.array([check_array.size - 1]) if ones[0] >= minus_ones[0]: ones = np.append(0, ones) if ones[-1] >= minus_ones[-1]: minus_ones = np.append(minus_ones, [check_array.size - 1]) segments = [] # considering segments which are greater than 100 ms min_seg_length = 0.1 * self.fs for i in range(ones.size): seg_duration = (minus_ones[i] - ones[i]) if seg_duration > min_seg_length: segments.append( (ones[i], minus_ones[i], minus_ones[i] - ones[i])) if 0: # plotting check array segment_array = np.zeros(self.data.size) for segment in segments: segment_array[segment[0]:segment[1]] = 1 import matplotlib.pyplot as plt plt.plot(self.data) plt.plot(segment_array) plt.show() self.segments = segments
def get_spectral_maxima_entropy(self): max_bins = [] for segment in self.segments: start = int(float(segment[0]) / self.n_fft) stop = int(float(segment[1]) / self.n_fft) for i in range(start, stop + 1): stft_column = self.stft[:, i] max_bins.append(np.argmax(stft_column)) pdf, bins = AudioProcessing.get_histogram(max_bins, bins=np.arange( 0, self.stft.shape[0])) pdf = pdf[np.nonzero(pdf)] spectral_max_entropy = AudioProcessing.get_entropy(pdf) / np.log2( self.stft.shape[0]) return spectral_max_entropy
def _get_spectral_entropy(self): stft = np.copy(self.stft) N = 2**10 stft = AudioProcessing.rescale(stft, (0, N)) stft = stft.astype(np.uint16) item_frequency = itemfreq(stft) total_samples = stft.size pmf = [] for i in range(item_frequency.shape[0]): pmf.append(item_frequency[i][1]) pmf = np.array(pmf) pmf = pmf / float(total_samples) self.spectral_entropy = AudioProcessing.get_entropy(pmf) / np.log2(N)
def _remove_spectrogram_noise(self): new_spec = np.zeros(self.stft.shape) for i in range(self.stft.shape[0]): row = self.stft[i, :] bn_log = AudioProcessing.get_row_background_noise(row) bn = 10**(bn_log / 20.0) row = row - bn row[row < 0] = 0 new_spec[i, :] = row self.stft = new_spec
def _get_temporal_entropy(self): envelope = self.envelope[np.nonzero(self.envelope)] envelope_energy = envelope**2 N = 2**10 # (i.e 1024 values of envelope energy possible) envelope_energy = AudioProcessing.rescale(envelope_energy, (0, N)) envelope_energy = envelope_energy.astype(np.uint16) item_frequency = itemfreq(envelope_energy) total_samples = envelope_energy.size pmf = [] for i in range(item_frequency.shape[0]): pmf.append(item_frequency[i][1]) pmf = np.array(pmf) pmf = pmf / float(total_samples) self.temporal_entropy = AudioProcessing.get_entropy(pmf) / np.log2(N)
def get_number_of_peaks(self): stft = np.copy(self.stft) stft = AudioProcessing.rescale(stft, (0, 1)) no_peaks = 0 min_distance = np.ceil( int(np.ceil(200.0 / (self.fs / float(self.n_fft)))) / 3.0) for i in range(stft.shape[1]): col = stft[:, i] import pdb pdb.set_trace() peaks = peakutils.indexes(col, thres=0.3, min_dist=min_distance) for peak in peaks: if peak != 0: if col[peak] - col[peak - 1] > 0.01: no_peaks = no_peaks + 1 return no_peaks
def get_spectral_average_variance_entropy(self): segments_stft = None N = 2**10 stft = np.copy(self.stft) # stft = AudioProcessing.rescale(self.stft,(0,N)) # stft = stft.astype(np.uint16) for segment in self.segments: start = int(float(segment[0]) / self.n_fft) stop = int(float(segment[1]) / self.n_fft) if segments_stft is None: segments_stft = stft[:, start:stop] else: segments_stft = np.concatenate( (segments_stft, stft[:, start:stop]), axis=1) average_spectra = np.mean(segments_stft, axis=1) var_spectra = np.var(segments_stft, axis=1) N = 2**8 average_spectra = AudioProcessing.rescale(average_spectra, (0, N)) average_spectra = average_spectra.astype(np.uint8) var_spectra = AudioProcessing.rescale(var_spectra, (0, N)) var_spectra = var_spectra.astype(np.uint8) avg_pdf, bins = AudioProcessing.get_histogram(average_spectra, bins=np.arange(0, 255)) var_pdf, bins = AudioProcessing.get_histogram(var_spectra, bins=np.arange(0, 255)) avg_pdf = avg_pdf[np.nonzero(avg_pdf)] var_pdf = var_pdf[np.nonzero(var_pdf)] average_spectrum_entropy = AudioProcessing.get_entropy( avg_pdf) / np.log2(N) variance_spectrum_entropy = AudioProcessing.get_entropy( var_pdf) / np.log2(N) return average_spectrum_entropy, variance_spectrum_entropy
def _calculate_background_noise(self): background_noise = AudioProcessing.get_background_noise(self.envelope) self.background_noise = background_noise
feature_headers.append("Spectral Diversity") feature_headers.append("Spectral Persistence") return feature_headers if __name__ == "__main__": # Audio Filename to be read filename = "bird.mp3" # considering one minute of the audio - the indices are taken 1 minute audio segments data, fs = librosa.load(filename, sr=None, offset=0, duration=60) # mono channel data = AudioProcessing.convert_to_mono(data) # changing sampling rate new_fs = 17640 data_chunk = AudioProcessing.resample(data, fs, new_fs) # extracting indices acousticIndices = AcousticIndices(data_chunk, new_fs) acoustic_indices = acousticIndices.get_acoustic_indices() acoustic_headers = acousticIndices.get_acoustic_indices_headers() acousticIndices = np.column_stack(acousticIndices) # creating a dataframe df = pd.DataFrame(acousticIndices) df.columns = acoustic_headers