rois = read_audacity_annot( './data/spinetail.txt') ## annotations using Audacity ###=============== compute spectrogram ================= Sxx, tn, fn, ext = spectrogram(s, fs) Sxx = 10 * np.log10(Sxx) rois = format_features(rois, tn, fn) ###=============== from Audacity ================= ### with all labels ax, fig = overlay_rois(Sxx, ext, rois, vmin=-120, vmax=20) # Compute an visualize features shape, params = shape_features(Sxx, resolution='low', rois=rois) plot_shape(shape.mean(), params) # Compute and visualize centroids centroid = centroid_features(Sxx, rois) centroid = format_features(centroid, tn, fn) ax, fig = overlay_centroid(Sxx, ext, centroid, savefig=None, vmin=-120, vmax=20, fig=fig, ax=ax) ###=============== find ROI 2D =================
# Format ROIs and visualize the bounding box on the audio spectrogram. df_rois = format_features(df_rois, tn, fn) ax0, fig0 = overlay_rois(Sxx_db, df_rois, **{ 'vmin': 0, 'vmax': 60, 'extent': ext }) #%% # 2. Compute acoustic features # ---------------------------- # The ``shape_feaures`` function uses bidimensional wavelets to get the texture and spectro-temporal shape coeficients of each ROI. Wavelets have the advantage of being robust when the signal-to-noise ratio is low, and derive homogeneous descriptors which facilitate the clustering process. The wavelet decomposition is performed on the complete spectrogram, hence the coeficients for ROIs do not vary much even when not the time-frequency bounds are not exact. The centroid features gives an estimate of the median frequency of the ROIs. df_shape, params = features.shape_features(Sxx_db, resolution='low', rois=df_rois) df_centroid = features.centroid_features(Sxx_db, df_rois) # Get median frequency and normalize median_freq = fn[np.round(df_centroid.centroid_y).astype(int)] df_centroid['centroid_freq'] = median_freq / fn[-1] #%% # 3. Reduce the dimensionality of the features # -------------------------------------------- # The shape audio features have 26 dimensions. To facilitate the clustering process and visualize the results, it is posible to use non-metric dimensionality reduction algorithm, namely the t-distributed stochastic neighbor embedding (t-SNE), to proyect the data in two dimensions. from sklearn.manifold import TSNE X = df_shape.loc[:, df_shape.columns.str.startswith('shp')]
def batch_feature_rois_no_verb(rois_list, params_features, path_audio): """ Computes features for a list of files Parameters: ---------- params_features: dict Dictionary with the basic parameters to feed find_rois: 'flims', 'tlen', and 'th'. path_flist : str Path to a *.txt file with the list of audio filenames to process path_audio : str Path to the place were the dataset of audio files are stored path_save : str Path with the file name to save the csv Returns: ------- info_features: dic Dictionary with features and all the parameters used to compute the features. Included keys: features, parameters_df, opt_shape, opt_spectro """ ## TODO: when the time limits are too short, the function has problems # load parameters flims = params_features['flims'] opt_spec = params_features['opt_spec'] opt_shape = opt_shape_presets(params_features['opt_shape_str']) # load detection data features = [] for idx, file in enumerate(rois_list): # unpack file values fname = file['fname'] rois_tf = file['rois'] #print(idx+1, '/', len(rois_list), fname) if rois_tf.empty: #print('< No detection on file >') features.append({'fname':fname, 'features': pd.DataFrame()}) else: # load materials: sound, spectrogram s, fs = sound.load(path_audio+fname) im, dt, df, ext = sound.spectrogram(s, fs, nperseg=opt_spec['nperseg'], overlap=opt_spec['overlap'], fcrop=flims, rescale=False, db_range=opt_spec['db_range']) # format rois to bbox ts = np.arange(ext[0], ext[1], dt) f = np.arange(ext[2],ext[3]+df,df) rois_bbox = format_rois(rois_tf, ts, f, fmt='bbox') # roi to image blob im_blobs = rois_to_imblobs(np.zeros(im.shape), rois_bbox) # get features: shape, center frequency im = normalize_2d(im, 0, 1) bbox, params, shape = shape_features(im, im_blobs, resolution='custom', opt_shape=opt_shape) _, cent = centroid(im, im_blobs) cent['frequency']= f[round(cent.y).astype(int)] # y values to frequency # format rois to time-frequency rois_out = format_rois(bbox, ts, f, fmt='tf') # combine into a single df aux_df = pd.concat([rois_out, shape, cent.frequency], axis=1) # aux_df['fname'] = fname features.append({'fname':fname, 'features': aux_df}) # Arranges the data into a dictionary info_features = {'features': features, 'parameters_df': params, 'opt_shape': opt_shape, 'opt_spectro': opt_spec} return info_features
rois_cr = rois_tf.loc[rois_tf.label=='CRER',] rois_sp = rois_tf.loc[rois_tf.label=='SP',] Sxx_power, ts, f, ext = spectrogram(s, fs) Sxx_dB = power2dB(Sxx_power, db_range=90) + 96 # Visualize large vocalizations rois_cr = format_features(rois_cr, ts, f) overlay_rois(Sxx_dB, rois_cr, **{'extent':ext, 'vmin':0, 'vmax':80}) # Visualize short vocalizations rois_sp = format_features(rois_sp, ts, f) overlay_rois(Sxx_dB, rois_sp, **{'extent':ext, 'vmin':0, 'vmax':80}) # Compute an visualize features shape_cr, params = shape_features(Sxx_dB, resolution='med', rois=rois_cr) ax = plot_shape(shape_cr.mean(), params) shape_sp, params = shape_features(Sxx_dB, resolution='med', rois=rois_sp) ax = plot_shape(shape_sp.mean(), params) ######## Simple clustering with PCA # join both shapes dataframe features = shape_cr.append(shape_sp) # Standardizing the dataset X = features.filter(regex='shp*',axis='columns') X_shape = X.values.shape X = X.values.flatten() X = X.reshape(-1, 1)