Ejemplo n.º 1
0
def train_interval_mel_features(interval_dict,pattern = '{:s}',nr_fft = 100, 
                                len_fft = 1024, nr_mel_bins = 100, min_freq_wanted = 200, max_freq_wanted = 8000,
                                is_return_fit = False):
    """
    Calculate the mel spectrogram features for each interval in the interval_dict and return a feature matrix X
    of ndim = [nr_intervals,nr_features] 
    Parameters:
    -----------
    interval_dict : dict
        Dictionary which keys point to a file_path via pattern and which values are list of lists of [start_time,end_time]
        for trinaing intervals
    pattern : string
        A formatting string to map from the interval_dict keys to file_paths
    nr_fft : int
        The number of ffts in each stft calculation
    len_fft : int
        The length of each of the nr_fft fourier transforms for each stft 
    nr_mel_bins : int
        The number of bins in which the mel spectrum is to be divided in
    min_freq_wanted, max_freq_wanted : float
        The lowest/highest frequency in the returned mel spectrum
        
    Returns:
    --------
    X : ndarray
        Array containing the flattened stft in mel spectrum for each interval in interval_dict.
        Each row corresponds to one interval and each colum to one feature of the flattened mel spectrum
    """
    # -- The total number of intervals in the dictionary 
    nr_intervals = np.array([len(interval_dict[file_key]) for file_key in interval_dict]).sum()
    X = np.zeros((nr_intervals,nr_mel_bins*nr_fft))
    t = np.zeros((nr_intervals,2))
    i=0
    file_interval_tuple = ()
    for file_key in interval_dict:
        file_name = pattern.format(file_key)
        print file_name
        fs,data = spwav.read(file_name)
        data=data-np.mean(data)
        for time_interval in interval_dict[file_key]:

            file_interval_tuple = file_interval_tuple+((file_key,time_interval),)
            sf = int(time_interval[0]*fs)
            ef = int(time_interval[1]*fs)
            spectrum = stft.calc_stft(data[sf:ef],fs = fs, nr_fft = nr_fft, len_fft = len_fft)[0]
            X[i,:] = mel.stft_to_mel_freq(spectrum,fs=fs,len_fft=len_fft,nr_mel_bins = nr_mel_bins, 
                                             min_freq_wanted = min_freq_wanted , max_freq_wanted= max_freq_wanted)[0].flatten()  
            t[i] = time_interval
            i+=1 
    if is_return_fit:
        return X,file_interval_tuple
    
    return X
Ejemplo n.º 2
0
def peak_identification(peaks,width_in_s,width_roll_mean = 200,
                        roll_max_peaks_threshold = 4.0,fs = 16000,nr_ffts_per_s = 100,
                        chunk_len_s = 60,len_fft = 1024,is_ret_roll_max_peaks = False):
    """
    Identify isolated peaks in an 1d array calculated by correlation_picking.
    Parameters:
    -----------
    peaks : ndarary
        Array containing isolated peaks with a sample rate depending on fft_per_sec = 100
    width_in_s : int
        The width in seconds of an interval in which the maximum is found. I.e. two maxima have to be at least
        width_in_s apart to be registered as separate.
    width_roll_mean : int
        The width used for the rolling mean normalisation of the data for better identification
        of pattern matches as it only looks for narrow peaks.
    roll_max_peaks_threshold : float
        The threshold for when a peak is considered high enough to be added to the returned indices.
        A peak has to be roll_max_peaks_threshold times larger in amplitude than the rolling mean to be
        registered as valid peak.    
    fs : float
        The sample frequency (frames per second) of the data       
    nr_ffts_per_s : int
        Number of ffts per second in the stft.
    chunk_len_s  : int
        The length in seconds for each chunked stft. 
    len_fft : int
        The length of each fft calculation.     
    is_ret_roll_max_peaks : bool
        Return roll_max_peaks or not. Default is not.
    Returns:
    --------
    peak_frame_list : list
        List of frames in the original sound file used in correlation_picking() containing peaks   
    roll_max_peaks : ndarray, if is_ret_roll_max_peaks
        Rolling maximum of data normalised by its rolling mean.
    """


    _,_,frames_per_sample,sec_per_sample,_ = stft.calc_nr_frames(chunk_len_s,fs,len_fft,chunk_len_s*nr_ffts_per_s)
    if is_ret_roll_max_peaks:
        inds,roll_max_peaks = find_peak_ind(peaks,width_in_s/sec_per_sample,width_roll_mean = width_roll_mean,
                                            roll_max_peaks_threshold = roll_max_peaks_threshold,is_ret_roll_max_peaks = True)
    else:
        inds = find_peak_ind(peaks,width_in_s/sec_per_sample,width_roll_mean = width_roll_mean,roll_max_peaks_threshold = roll_max_peaks_threshold)
    peak_frame_list = np.array([ind*frames_per_sample for ind in inds])
    
    if is_ret_roll_max_peaks:
        return peak_frame_list,roll_max_peaks
    else:
        return peak_frame_list
Ejemplo n.º 3
0
def calc_pattern_correlation_chunked(data,pattern,fs,freq_fft_bins ,chunk_len_s = 45,
                                        len_fft = 1024, nr_ffts_per_s = 100, pattern_len_s = 2):
    """
    Calculate the average correlation between the stft of a timeseries and a pattern over a certain 
    frequency range. Used for data generation for machine learning as input for peak finding algorithms.
    
    Parameters
    ----------
    data : 1D ndarray
        Timeseries
    pattern : ndarray
        The timeseries pattern which is used to calculate the correlation with data
    fs : float
        The sample frequency (frames per second) of the data      
    freq_fft_bins : list
        The frequency bins used for the correlation between pattern and the stft of the data
    chunk_len_s  : int
        The length in seconds for each chunked stft. 
    len_fft : int
        The length of each fft calculation. 
    nr_ffts_per_s : int
        Number of ffts per second in the stft. 
    pattern_len_s : int
        Length of the pattern in seconds
    Returns
    -------
    peaks : 1D ndarray
        The concatenated array of the correlation between data and pattern
 
    """
        
    
    n_frames_chunk,_,_,sec_per_sample,overlap = stft.calc_nr_frames(chunk_len_s,fs,len_fft,chunk_len_s*nr_ffts_per_s)
    # -- By giving the overlap, the length of the pattern is not necessarily pattern_len_s*nr_ffts_per_s anymore
    pattern = stft.calc_stft(pattern,0,pattern.shape[0], fs, pattern_len_s*nr_ffts_per_s,overlap=overlap)[0]
    # -- z-score the pattern
    pattern = (pattern-np.mean(pattern)) / np.std(pattern)
    #q75, q50, q25 = np.percentile(pattern, [75 ,50, 25])
    #iqr = q75 - q25
    #pattern  = 1/(1+np.exp(-(pattern -q50)/(iqr/1.35)))
    
#     plt.matshow(pattern, origin='lower')
#     exit()
    end_frame = 0
    start_frame = 0
    while end_frame < data.shape[0] - n_frames_chunk:
        start_frame = end_frame
        end_frame = end_frame+n_frames_chunk
        spectrum = stft.calc_stft(data,start_frame,end_frame, fs,chunk_len_s*nr_ffts_per_s)[0]
        #spectrum = (spectrum - np.mean(spectrum))/np.std(spectrum)
        print 'spectrum.shape: ',spectrum.shape
        
        for i in freq_fft_bins:
            if i == freq_fft_bins[0]:
                tmp = np.correlate(spectrum[i,:], pattern[i,:], mode='same', old_behavior=False)
                print 'tmp.shape: ',tmp.shape
                print 'spectrum[i,:].shape:' ,spectrum[i,:].shape
            else :
                tmp += np.correlate(spectrum[i,:], pattern[i,:], mode='same', old_behavior=False)
                print 'tmp.shape: ',tmp.shape  
                print 'spectrum[i,:].shape:' ,spectrum[i,:].shape
        if start_frame == 0:
            peaks = tmp
        else:
            peaks = np.hstack((peaks,tmp))
    
    return peaks
Ejemplo n.º 4
0
def peak_identification(peaks,
                        width_in_s,
                        width_roll_mean=200,
                        roll_max_peaks_threshold=4.0,
                        fs=16000,
                        nr_ffts_per_s=100,
                        chunk_len_s=60,
                        len_fft=1024,
                        is_ret_roll_max_peaks=False):
    """
    Identify isolated peaks in an 1d array calculated by correlation_picking.
    Parameters:
    -----------
    peaks : ndarary
        Array containing isolated peaks with a sample rate depending on fft_per_sec = 100
    width_in_s : int
        The width in seconds of an interval in which the maximum is found. I.e. two maxima have to be at least
        width_in_s apart to be registered as separate.
    width_roll_mean : int
        The width used for the rolling mean normalisation of the data for better identification
        of pattern matches as it only looks for narrow peaks.
    roll_max_peaks_threshold : float
        The threshold for when a peak is considered high enough to be added to the returned indices.
        A peak has to be roll_max_peaks_threshold times larger in amplitude than the rolling mean to be
        registered as valid peak.    
    fs : float
        The sample frequency (frames per second) of the data       
    nr_ffts_per_s : int
        Number of ffts per second in the stft.
    chunk_len_s  : int
        The length in seconds for each chunked stft. 
    len_fft : int
        The length of each fft calculation.     
    is_ret_roll_max_peaks : bool
        Return roll_max_peaks or not. Default is not.
    Returns:
    --------
    peak_frame_list : list
        List of frames in the original sound file used in correlation_picking() containing peaks   
    roll_max_peaks : ndarray, if is_ret_roll_max_peaks
        Rolling maximum of data normalised by its rolling mean.
    """

    _, _, frames_per_sample, sec_per_sample, _ = stft.calc_nr_frames(
        chunk_len_s, fs, len_fft, chunk_len_s * nr_ffts_per_s)
    if is_ret_roll_max_peaks:
        inds, roll_max_peaks = find_peak_ind(
            peaks,
            width_in_s / sec_per_sample,
            width_roll_mean=width_roll_mean,
            roll_max_peaks_threshold=roll_max_peaks_threshold,
            is_ret_roll_max_peaks=True)
    else:
        inds = find_peak_ind(peaks,
                             width_in_s / sec_per_sample,
                             width_roll_mean=width_roll_mean,
                             roll_max_peaks_threshold=roll_max_peaks_threshold)
    peak_frame_list = np.array([ind * frames_per_sample for ind in inds])

    if is_ret_roll_max_peaks:
        return peak_frame_list, roll_max_peaks
    else:
        return peak_frame_list
Ejemplo n.º 5
0
def calc_pattern_correlation_chunked(data,
                                     pattern,
                                     fs,
                                     freq_fft_bins,
                                     chunk_len_s=45,
                                     len_fft=1024,
                                     nr_ffts_per_s=100,
                                     pattern_len_s=2):
    """
    Calculate the average correlation between the stft of a timeseries and a pattern over a certain 
    frequency range. Used for data generation for machine learning as input for peak finding algorithms.
    
    Parameters
    ----------
    data : 1D ndarray
        Timeseries
    pattern : ndarray
        The timeseries pattern which is used to calculate the correlation with data
    fs : float
        The sample frequency (frames per second) of the data      
    freq_fft_bins : list
        The frequency bins used for the correlation between pattern and the stft of the data
    chunk_len_s  : int
        The length in seconds for each chunked stft. 
    len_fft : int
        The length of each fft calculation. 
    nr_ffts_per_s : int
        Number of ffts per second in the stft. 
    pattern_len_s : int
        Length of the pattern in seconds
    Returns
    -------
    peaks : 1D ndarray
        The concatenated array of the correlation between data and pattern
 
    """

    n_frames_chunk, _, _, sec_per_sample, overlap = stft.calc_nr_frames(
        chunk_len_s, fs, len_fft, chunk_len_s * nr_ffts_per_s)
    # -- By giving the overlap, the length of the pattern is not necessarily pattern_len_s*nr_ffts_per_s anymore
    pattern = stft.calc_stft(pattern,
                             0,
                             pattern.shape[0],
                             fs,
                             pattern_len_s * nr_ffts_per_s,
                             overlap=overlap)[0]
    # -- z-score the pattern
    pattern = (pattern - np.mean(pattern)) / np.std(pattern)
    #q75, q50, q25 = np.percentile(pattern, [75 ,50, 25])
    #iqr = q75 - q25
    #pattern  = 1/(1+np.exp(-(pattern -q50)/(iqr/1.35)))

    #     plt.matshow(pattern, origin='lower')
    #     exit()
    end_frame = 0
    start_frame = 0
    while end_frame < data.shape[0] - n_frames_chunk:
        start_frame = end_frame
        end_frame = end_frame + n_frames_chunk
        spectrum = stft.calc_stft(data, start_frame, end_frame, fs,
                                  chunk_len_s * nr_ffts_per_s)[0]
        #spectrum = (spectrum - np.mean(spectrum))/np.std(spectrum)
        print 'spectrum.shape: ', spectrum.shape

        for i in freq_fft_bins:
            if i == freq_fft_bins[0]:
                tmp = np.correlate(spectrum[i, :],
                                   pattern[i, :],
                                   mode='same',
                                   old_behavior=False)
                print 'tmp.shape: ', tmp.shape
                print 'spectrum[i,:].shape:', spectrum[i, :].shape
            else:
                tmp += np.correlate(spectrum[i, :],
                                    pattern[i, :],
                                    mode='same',
                                    old_behavior=False)
                print 'tmp.shape: ', tmp.shape
                print 'spectrum[i,:].shape:', spectrum[i, :].shape
        if start_frame == 0:
            peaks = tmp
        else:
            peaks = np.hstack((peaks, tmp))

    return peaks
Ejemplo n.º 6
0
def train_interval_mel_features(interval_dict,
                                pattern='{:s}',
                                nr_fft=100,
                                len_fft=1024,
                                nr_mel_bins=100,
                                min_freq_wanted=200,
                                max_freq_wanted=8000,
                                is_return_fit=False):
    """
    Calculate the mel spectrogram features for each interval in the interval_dict and return a feature matrix X
    of ndim = [nr_intervals,nr_features] 
    Parameters:
    -----------
    interval_dict : dict
        Dictionary which keys point to a file_path via pattern and which values are list of lists of [start_time,end_time]
        for trinaing intervals
    pattern : string
        A formatting string to map from the interval_dict keys to file_paths
    nr_fft : int
        The number of ffts in each stft calculation
    len_fft : int
        The length of each of the nr_fft fourier transforms for each stft 
    nr_mel_bins : int
        The number of bins in which the mel spectrum is to be divided in
    min_freq_wanted, max_freq_wanted : float
        The lowest/highest frequency in the returned mel spectrum
        
    Returns:
    --------
    X : ndarray
        Array containing the flattened stft in mel spectrum for each interval in interval_dict.
        Each row corresponds to one interval and each colum to one feature of the flattened mel spectrum
    """
    # -- The total number of intervals in the dictionary
    nr_intervals = np.array(
        [len(interval_dict[file_key]) for file_key in interval_dict]).sum()
    X = np.zeros((nr_intervals, nr_mel_bins * nr_fft))
    t = np.zeros((nr_intervals, 2))
    i = 0
    file_interval_tuple = ()
    for file_key in interval_dict:
        file_name = pattern.format(file_key)
        print file_name
        fs, data = spwav.read(file_name)
        data = data - np.mean(data)
        for time_interval in interval_dict[file_key]:

            file_interval_tuple = file_interval_tuple + (
                (file_key, time_interval), )
            sf = int(time_interval[0] * fs)
            ef = int(time_interval[1] * fs)
            spectrum = stft.calc_stft(data[sf:ef],
                                      fs=fs,
                                      nr_fft=nr_fft,
                                      len_fft=len_fft)[0]
            X[i, :] = mel.stft_to_mel_freq(
                spectrum,
                fs=fs,
                len_fft=len_fft,
                nr_mel_bins=nr_mel_bins,
                min_freq_wanted=min_freq_wanted,
                max_freq_wanted=max_freq_wanted)[0].flatten()
            t[i] = time_interval
            i += 1
    if is_return_fit:
        return X, file_interval_tuple

    return X