def train_interval_mel_features(interval_dict,pattern = '{:s}',nr_fft = 100, len_fft = 1024, nr_mel_bins = 100, min_freq_wanted = 200, max_freq_wanted = 8000, is_return_fit = False): """ Calculate the mel spectrogram features for each interval in the interval_dict and return a feature matrix X of ndim = [nr_intervals,nr_features] Parameters: ----------- interval_dict : dict Dictionary which keys point to a file_path via pattern and which values are list of lists of [start_time,end_time] for trinaing intervals pattern : string A formatting string to map from the interval_dict keys to file_paths nr_fft : int The number of ffts in each stft calculation len_fft : int The length of each of the nr_fft fourier transforms for each stft nr_mel_bins : int The number of bins in which the mel spectrum is to be divided in min_freq_wanted, max_freq_wanted : float The lowest/highest frequency in the returned mel spectrum Returns: -------- X : ndarray Array containing the flattened stft in mel spectrum for each interval in interval_dict. Each row corresponds to one interval and each colum to one feature of the flattened mel spectrum """ # -- The total number of intervals in the dictionary nr_intervals = np.array([len(interval_dict[file_key]) for file_key in interval_dict]).sum() X = np.zeros((nr_intervals,nr_mel_bins*nr_fft)) t = np.zeros((nr_intervals,2)) i=0 file_interval_tuple = () for file_key in interval_dict: file_name = pattern.format(file_key) print file_name fs,data = spwav.read(file_name) data=data-np.mean(data) for time_interval in interval_dict[file_key]: file_interval_tuple = file_interval_tuple+((file_key,time_interval),) sf = int(time_interval[0]*fs) ef = int(time_interval[1]*fs) spectrum = stft.calc_stft(data[sf:ef],fs = fs, nr_fft = nr_fft, len_fft = len_fft)[0] X[i,:] = mel.stft_to_mel_freq(spectrum,fs=fs,len_fft=len_fft,nr_mel_bins = nr_mel_bins, min_freq_wanted = min_freq_wanted , max_freq_wanted= max_freq_wanted)[0].flatten() t[i] = time_interval i+=1 if is_return_fit: return X,file_interval_tuple return X
def peak_identification(peaks,width_in_s,width_roll_mean = 200, roll_max_peaks_threshold = 4.0,fs = 16000,nr_ffts_per_s = 100, chunk_len_s = 60,len_fft = 1024,is_ret_roll_max_peaks = False): """ Identify isolated peaks in an 1d array calculated by correlation_picking. Parameters: ----------- peaks : ndarary Array containing isolated peaks with a sample rate depending on fft_per_sec = 100 width_in_s : int The width in seconds of an interval in which the maximum is found. I.e. two maxima have to be at least width_in_s apart to be registered as separate. width_roll_mean : int The width used for the rolling mean normalisation of the data for better identification of pattern matches as it only looks for narrow peaks. roll_max_peaks_threshold : float The threshold for when a peak is considered high enough to be added to the returned indices. A peak has to be roll_max_peaks_threshold times larger in amplitude than the rolling mean to be registered as valid peak. fs : float The sample frequency (frames per second) of the data nr_ffts_per_s : int Number of ffts per second in the stft. chunk_len_s : int The length in seconds for each chunked stft. len_fft : int The length of each fft calculation. is_ret_roll_max_peaks : bool Return roll_max_peaks or not. Default is not. Returns: -------- peak_frame_list : list List of frames in the original sound file used in correlation_picking() containing peaks roll_max_peaks : ndarray, if is_ret_roll_max_peaks Rolling maximum of data normalised by its rolling mean. """ _,_,frames_per_sample,sec_per_sample,_ = stft.calc_nr_frames(chunk_len_s,fs,len_fft,chunk_len_s*nr_ffts_per_s) if is_ret_roll_max_peaks: inds,roll_max_peaks = find_peak_ind(peaks,width_in_s/sec_per_sample,width_roll_mean = width_roll_mean, roll_max_peaks_threshold = roll_max_peaks_threshold,is_ret_roll_max_peaks = True) else: inds = find_peak_ind(peaks,width_in_s/sec_per_sample,width_roll_mean = width_roll_mean,roll_max_peaks_threshold = roll_max_peaks_threshold) peak_frame_list = np.array([ind*frames_per_sample for ind in inds]) if is_ret_roll_max_peaks: return peak_frame_list,roll_max_peaks else: return peak_frame_list
def calc_pattern_correlation_chunked(data,pattern,fs,freq_fft_bins ,chunk_len_s = 45, len_fft = 1024, nr_ffts_per_s = 100, pattern_len_s = 2): """ Calculate the average correlation between the stft of a timeseries and a pattern over a certain frequency range. Used for data generation for machine learning as input for peak finding algorithms. Parameters ---------- data : 1D ndarray Timeseries pattern : ndarray The timeseries pattern which is used to calculate the correlation with data fs : float The sample frequency (frames per second) of the data freq_fft_bins : list The frequency bins used for the correlation between pattern and the stft of the data chunk_len_s : int The length in seconds for each chunked stft. len_fft : int The length of each fft calculation. nr_ffts_per_s : int Number of ffts per second in the stft. pattern_len_s : int Length of the pattern in seconds Returns ------- peaks : 1D ndarray The concatenated array of the correlation between data and pattern """ n_frames_chunk,_,_,sec_per_sample,overlap = stft.calc_nr_frames(chunk_len_s,fs,len_fft,chunk_len_s*nr_ffts_per_s) # -- By giving the overlap, the length of the pattern is not necessarily pattern_len_s*nr_ffts_per_s anymore pattern = stft.calc_stft(pattern,0,pattern.shape[0], fs, pattern_len_s*nr_ffts_per_s,overlap=overlap)[0] # -- z-score the pattern pattern = (pattern-np.mean(pattern)) / np.std(pattern) #q75, q50, q25 = np.percentile(pattern, [75 ,50, 25]) #iqr = q75 - q25 #pattern = 1/(1+np.exp(-(pattern -q50)/(iqr/1.35))) # plt.matshow(pattern, origin='lower') # exit() end_frame = 0 start_frame = 0 while end_frame < data.shape[0] - n_frames_chunk: start_frame = end_frame end_frame = end_frame+n_frames_chunk spectrum = stft.calc_stft(data,start_frame,end_frame, fs,chunk_len_s*nr_ffts_per_s)[0] #spectrum = (spectrum - np.mean(spectrum))/np.std(spectrum) print 'spectrum.shape: ',spectrum.shape for i in freq_fft_bins: if i == freq_fft_bins[0]: tmp = np.correlate(spectrum[i,:], pattern[i,:], mode='same', old_behavior=False) print 'tmp.shape: ',tmp.shape print 'spectrum[i,:].shape:' ,spectrum[i,:].shape else : tmp += np.correlate(spectrum[i,:], pattern[i,:], mode='same', old_behavior=False) print 'tmp.shape: ',tmp.shape print 'spectrum[i,:].shape:' ,spectrum[i,:].shape if start_frame == 0: peaks = tmp else: peaks = np.hstack((peaks,tmp)) return peaks
def peak_identification(peaks, width_in_s, width_roll_mean=200, roll_max_peaks_threshold=4.0, fs=16000, nr_ffts_per_s=100, chunk_len_s=60, len_fft=1024, is_ret_roll_max_peaks=False): """ Identify isolated peaks in an 1d array calculated by correlation_picking. Parameters: ----------- peaks : ndarary Array containing isolated peaks with a sample rate depending on fft_per_sec = 100 width_in_s : int The width in seconds of an interval in which the maximum is found. I.e. two maxima have to be at least width_in_s apart to be registered as separate. width_roll_mean : int The width used for the rolling mean normalisation of the data for better identification of pattern matches as it only looks for narrow peaks. roll_max_peaks_threshold : float The threshold for when a peak is considered high enough to be added to the returned indices. A peak has to be roll_max_peaks_threshold times larger in amplitude than the rolling mean to be registered as valid peak. fs : float The sample frequency (frames per second) of the data nr_ffts_per_s : int Number of ffts per second in the stft. chunk_len_s : int The length in seconds for each chunked stft. len_fft : int The length of each fft calculation. is_ret_roll_max_peaks : bool Return roll_max_peaks or not. Default is not. Returns: -------- peak_frame_list : list List of frames in the original sound file used in correlation_picking() containing peaks roll_max_peaks : ndarray, if is_ret_roll_max_peaks Rolling maximum of data normalised by its rolling mean. """ _, _, frames_per_sample, sec_per_sample, _ = stft.calc_nr_frames( chunk_len_s, fs, len_fft, chunk_len_s * nr_ffts_per_s) if is_ret_roll_max_peaks: inds, roll_max_peaks = find_peak_ind( peaks, width_in_s / sec_per_sample, width_roll_mean=width_roll_mean, roll_max_peaks_threshold=roll_max_peaks_threshold, is_ret_roll_max_peaks=True) else: inds = find_peak_ind(peaks, width_in_s / sec_per_sample, width_roll_mean=width_roll_mean, roll_max_peaks_threshold=roll_max_peaks_threshold) peak_frame_list = np.array([ind * frames_per_sample for ind in inds]) if is_ret_roll_max_peaks: return peak_frame_list, roll_max_peaks else: return peak_frame_list
def calc_pattern_correlation_chunked(data, pattern, fs, freq_fft_bins, chunk_len_s=45, len_fft=1024, nr_ffts_per_s=100, pattern_len_s=2): """ Calculate the average correlation between the stft of a timeseries and a pattern over a certain frequency range. Used for data generation for machine learning as input for peak finding algorithms. Parameters ---------- data : 1D ndarray Timeseries pattern : ndarray The timeseries pattern which is used to calculate the correlation with data fs : float The sample frequency (frames per second) of the data freq_fft_bins : list The frequency bins used for the correlation between pattern and the stft of the data chunk_len_s : int The length in seconds for each chunked stft. len_fft : int The length of each fft calculation. nr_ffts_per_s : int Number of ffts per second in the stft. pattern_len_s : int Length of the pattern in seconds Returns ------- peaks : 1D ndarray The concatenated array of the correlation between data and pattern """ n_frames_chunk, _, _, sec_per_sample, overlap = stft.calc_nr_frames( chunk_len_s, fs, len_fft, chunk_len_s * nr_ffts_per_s) # -- By giving the overlap, the length of the pattern is not necessarily pattern_len_s*nr_ffts_per_s anymore pattern = stft.calc_stft(pattern, 0, pattern.shape[0], fs, pattern_len_s * nr_ffts_per_s, overlap=overlap)[0] # -- z-score the pattern pattern = (pattern - np.mean(pattern)) / np.std(pattern) #q75, q50, q25 = np.percentile(pattern, [75 ,50, 25]) #iqr = q75 - q25 #pattern = 1/(1+np.exp(-(pattern -q50)/(iqr/1.35))) # plt.matshow(pattern, origin='lower') # exit() end_frame = 0 start_frame = 0 while end_frame < data.shape[0] - n_frames_chunk: start_frame = end_frame end_frame = end_frame + n_frames_chunk spectrum = stft.calc_stft(data, start_frame, end_frame, fs, chunk_len_s * nr_ffts_per_s)[0] #spectrum = (spectrum - np.mean(spectrum))/np.std(spectrum) print 'spectrum.shape: ', spectrum.shape for i in freq_fft_bins: if i == freq_fft_bins[0]: tmp = np.correlate(spectrum[i, :], pattern[i, :], mode='same', old_behavior=False) print 'tmp.shape: ', tmp.shape print 'spectrum[i,:].shape:', spectrum[i, :].shape else: tmp += np.correlate(spectrum[i, :], pattern[i, :], mode='same', old_behavior=False) print 'tmp.shape: ', tmp.shape print 'spectrum[i,:].shape:', spectrum[i, :].shape if start_frame == 0: peaks = tmp else: peaks = np.hstack((peaks, tmp)) return peaks
def train_interval_mel_features(interval_dict, pattern='{:s}', nr_fft=100, len_fft=1024, nr_mel_bins=100, min_freq_wanted=200, max_freq_wanted=8000, is_return_fit=False): """ Calculate the mel spectrogram features for each interval in the interval_dict and return a feature matrix X of ndim = [nr_intervals,nr_features] Parameters: ----------- interval_dict : dict Dictionary which keys point to a file_path via pattern and which values are list of lists of [start_time,end_time] for trinaing intervals pattern : string A formatting string to map from the interval_dict keys to file_paths nr_fft : int The number of ffts in each stft calculation len_fft : int The length of each of the nr_fft fourier transforms for each stft nr_mel_bins : int The number of bins in which the mel spectrum is to be divided in min_freq_wanted, max_freq_wanted : float The lowest/highest frequency in the returned mel spectrum Returns: -------- X : ndarray Array containing the flattened stft in mel spectrum for each interval in interval_dict. Each row corresponds to one interval and each colum to one feature of the flattened mel spectrum """ # -- The total number of intervals in the dictionary nr_intervals = np.array( [len(interval_dict[file_key]) for file_key in interval_dict]).sum() X = np.zeros((nr_intervals, nr_mel_bins * nr_fft)) t = np.zeros((nr_intervals, 2)) i = 0 file_interval_tuple = () for file_key in interval_dict: file_name = pattern.format(file_key) print file_name fs, data = spwav.read(file_name) data = data - np.mean(data) for time_interval in interval_dict[file_key]: file_interval_tuple = file_interval_tuple + ( (file_key, time_interval), ) sf = int(time_interval[0] * fs) ef = int(time_interval[1] * fs) spectrum = stft.calc_stft(data[sf:ef], fs=fs, nr_fft=nr_fft, len_fft=len_fft)[0] X[i, :] = mel.stft_to_mel_freq( spectrum, fs=fs, len_fft=len_fft, nr_mel_bins=nr_mel_bins, min_freq_wanted=min_freq_wanted, max_freq_wanted=max_freq_wanted)[0].flatten() t[i] = time_interval i += 1 if is_return_fit: return X, file_interval_tuple return X