def _refine_segments_post_vae(dc, seg_dirs, out_seg_dirs, verbose=True): """ Manually remove noise by selecting regions of UMAP latent mean projections. Parameters ---------- dc : ava.data.data_container.DataContainer DataContainer object seg_dirs : ... out_seg_dirs : .... verbose : bool, optional Defaults to ``True``. """ embed = dc.request('latent_mean_umap') bounds = {'x1': [], 'x2': [], 'y1': [], 'y2': []} colors = ['b'] * len(embed) first_iteration = True raise NotImplementedError # Keep drawing boxes around noise. while True: _plot_helper(embed, colors) if first_iteration: if verbose: print("Writing html plot:") first_iteration = False title = "Select unwanted sounds:" tooltip_plot(embed, specs, num_imgs=num_imgs, title=title) if verbose: print("\tDone.") if input("Press [q] to quit drawing rectangles: ") == 'q': break print("Select a rectangle containing noise:") x1 = _get_input("x1: ") x2 = _get_input("x2: ") y1 = _get_input("y1: ") y2 = _get_input("y2: ") bounds['x1'].append(x1) bounds['x2'].append(x2) bounds['y1'].append(y1) bounds['y2'].append(y2) # Update scatter colors. colors = _update_colors(colors, embed, bounds) # Write files to out_seg_dirs. gen = zip(seg_dirs, audio_dirs, out_seg_dirs, repeat(p), repeat(max_len), \ repeat(transform), repeat(bounds), repeat(verbose)) n_jobs = min(len(seg_dirs), os.cpu_count()-1) Parallel(n_jobs=n_jobs)(delayed(_update_segs_helper)(*args) for args in gen)
def clean_collected_data(result, audio_dirs, segment_dirs, template_length, p, \ n=10**4): """ Take a look at the collected data and discard false positives. Parameters ---------- result : ... ... audio_dirs : ... ... segment_dirs : ... ... Notes ----- """ # Collect spectrograms. specs = [] if template_length is not None: delta_i = int(round(template_length * p['fs'])) for filename in result.keys(): fs, audio = wavfile.read(filename) assert fs == p['fs'] for segment in result[filename]: i1 = int(round(segment[0] * fs)) if template_length is None: i2 = int(round(segment[1] * fs)) else: i2 = i1 + delta_i spec, dt = get_spec(fs, audio[i1:i2], p) specs.append(spec) if template_length is None: max_t = max(spec.shape[1] for spec in specs) temp_specs = np.zeros((len(specs), specs[0].shape[0], max_t)) for i, spec in enumerate(specs): temp_specs[i,:,:spec.shape[1]] = spec specs = temp_specs else: specs = np.array(specs) np.random.seed(42) specs = specs[np.random.permutation(len(specs))] np.random.seed(None) # UMAP the spectrograms. transform = umap.UMAP(random_state=42) embedding = transform.fit_transform(specs.reshape(len(specs), -1)) # Plot and ask for user input. bounds = { 'x1s':[], 'x2s':[], 'y1s':[], 'y2s':[], } X, Y = embedding[:,0], embedding[:,1] i = 0 while True: colors = ['b' if in_region(embed, bounds) else 'r' for embed in embedding] print("Selected ", len([c for c in colors if c=='b']), "out of", len(colors)) plt.scatter(X, Y, c=colors, s=0.9, alpha=0.5) for x_tick in np.arange(np.floor(np.min(X)), np.ceil(np.max(X))): plt.axvline(x=x_tick, c='k', alpha=0.1, lw=0.5) for y_tick in np.arange(np.floor(np.min(Y)), np.ceil(np.max(Y))): plt.axhline(y=y_tick, c='k', alpha=0.1, lw=0.5) title = "Select relevant song:" plt.title(title) plt.savefig('temp.pdf') plt.close('all') if i == 0: tooltip_plot(embedding, specs, num_imgs=10**3, title=title) bounds['x1s'].append(float(input('x1: '))) bounds['x2s'].append(float(input('x2: '))) bounds['y1s'].append(float(input('y1: '))) bounds['y2s'].append(float(input('y2: '))) temp = input('(<c> to continue) ') if temp == 'c': break i += 1 np.save('bounds.npy', bounds) # Save only the good segments. num_deleted, num_total = 0, 0 for audio_dir, seg_dir in zip(audio_dirs, segment_dirs): audio_fns = [os.path.join(audio_dir, i) for i in os.listdir(audio_dir) \ if is_audio_file(i)] for audio_fn in audio_fns: fs, audio = wavfile.read(audio_fn) assert fs == p['fs'] segment_fn = audio_fn[:-4] + '.txt' segment_fn = os.path.join(seg_dir, segment_fn) segments = np.loadtxt(segment_fn, \ delimiter=p['delimiter']).reshape(-1,2) if len(segments) == 0: continue new_segments = np.zeros(segments.shape) i = 0 for segment in segments: i1 = int(round(segment[0] * fs)) if template_length is None: i2 = int(round(segment[1] * fs)) else: i2 = i1 + delta_i spec, dt = get_spec(fs, audio[i1:i2], p) if template_length is None: temp_spec = np.zeros((spec.shape[0], max_t)) temp_spec[:,:spec.shape[1]] = spec spec = temp_spec embed = transform.transform(spec.reshape(1,-1)).reshape(2) if in_region(embed, bounds): new_segments[i] = segment[:] i += 1 num_total += 1 else: num_deleted += 1 new_segments = new_segments[:i] np.savetxt(segment_fn, new_segments, fmt='%.5f', \ delimiter=p['delimiter']) print("deleted", num_deleted, "total", num_total)
def clean_collected_segments(result, audio_dirs, segment_dirs, p, \ max_num_specs=10000, verbose=True, img_fn='temp.pdf', \ tooltip_plot_dir='html'): """ Take a look at the collected segments and discard false positives. Parameters ---------- result : dict Output of ``segment_files`` or `read_segment_decisions``. audio_dirs : list of str Directories containing audio. segment_dirs : list of str Directories containing segmenting decisions. p : dict Parameters. Must contain keys: ``'fs'``, ``'min_freq'``, ``'max_freq'``, ``'nperseg'``, ``'noverlap'``, ``'spec_min_val'``, ``'spec_max_val'``. max_num_specs : int, optional Maximum number of spectrograms to feed to UMAP. Deafults to ``10000``. verbose : bool, optional Defaults to ``True``. img_fn : str, optional Image filename. Defaults to ``'temp.pdf'``. tooltip_plot_dir : str, optional Directory to save tooltip plot to. Defaults to ``'html'``. """ # Collect spectrograms. if verbose: print("Collecting spectrograms...") specs = [] for filename in result.keys(): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=WavFileWarning) fs, audio = wavfile.read(filename) assert fs == p['fs'], "Found samplerate=" + str(fs) + \ ", expected " + str(p['fs']) for segment in result[filename]: i1 = int(round(segment[0] * fs)) i2 = int(round(segment[1] * fs)) spec, dt = _get_spec(fs, audio[i1:i2], p) specs.append(spec) if len(specs) == 0: warnings.warn( "Found no spectrograms in " + \ "ava.segmenting.template_segmentation.clean_collected_segments.\n" \ + "Consider reducing the `num_mad` parameter in `segment_files`.", UserWarning ) return max_t = max(spec.shape[1] for spec in specs) temp_specs = np.zeros((len(specs), specs[0].shape[0], max_t)) for i, spec in enumerate(specs): temp_specs[i,:,:spec.shape[1]] = spec specs = temp_specs if len(specs) > max_num_specs: warnings.warn( "Found more spectrograms than `max_num_specs` (" + \ str(max_num_specs) + "). Consider increasing `max_num_specs` or" + \ " `num_mad`.", UserWarning ) if verbose: print("\tCollected",len(specs),"spectrograms.") print("\tSpectrogram shape:", specs.shape[1:]) if len(specs) > max_num_specs: print("\tRandomly sampling", max_num_specs, "spectrograms.") print("\tDone.") np.random.seed(42) specs = specs[np.random.permutation(len(specs))[:max_num_specs]] np.random.seed(None) # UMAP the spectrograms. if verbose: print("Running UMAP. n =", len(specs)) transform = umap.UMAP(random_state=42, metric='correlation') # https://github.com/lmcinnes/umap/issues/252 with warnings.catch_warnings(): try: warnings.filterwarnings("ignore", \ category=NumbaPerformanceWarning) except NameError: pass embedding = transform.fit_transform(specs.reshape(len(specs), -1)) if verbose: print("\tDone.") # Plot and ask for user input. bounds = { 'x1s':[], 'x2s':[], 'y1s':[], 'y2s':[], } bounds_keys = ['x1s', 'x2s', 'y1s', 'y2s'] queries = ['x1: ', 'x2: ', 'y1: ', 'y2: '] X, Y = embedding[:,0], embedding[:,1] i = 0 while True: colors = ['b' if _in_region(embed, bounds) else 'r' for \ embed in embedding] print("Selected", \ len([c for c in colors if c=='b']), "out of", len(colors)) plt.scatter(X, Y, c=colors, s=0.9, alpha=0.5) for x_tick in np.arange(np.floor(np.min(X)), np.ceil(np.max(X))): plt.axvline(x=x_tick, c='k', alpha=0.1, lw=0.5) for y_tick in np.arange(np.floor(np.min(Y)), np.ceil(np.max(Y))): plt.axhline(y=y_tick, c='k', alpha=0.1, lw=0.5) title = "Find relevant song" plt.title(title) plt.savefig(img_fn) plt.close('all') # Plot the tooltip plot. if i == 0: if verbose: print("Writing tooltip plot...") tooltip_plot(embedding, specs, output_dir=tooltip_plot_dir, \ num_imgs=1000, title=title, grid=True) if verbose: print("\tDone.") # Get input from user. for key, query in zip(bounds_keys, queries): answer = 'initial input' while not _is_number(answer): answer = input(query) bounds[key].append(float(answer)) # Continue? temp = input('[Enter] to select more regions, [c] to continue: ') if temp == 'c': break i += 1 # Save only the good segments. if verbose: print("Saving segments...") num_deleted, num_total = 0, 0 for audio_dir, seg_dir in zip(audio_dirs, segment_dirs): audio_fns = [os.path.join(audio_dir, i) for i in os.listdir(audio_dir) \ if _is_wav_file(i)] for audio_fn in audio_fns: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=WavFileWarning) fs, audio = wavfile.read(audio_fn) assert fs == p['fs'], "Found samplerate=" + str(fs) + \ ", expected " + str(p['fs']) segment_fn = os.path.split(audio_fn)[-1][:-4] + '.txt' segment_fn = os.path.join(seg_dir, segment_fn) segments = np.loadtxt(segment_fn).reshape(-1,2) if len(segments) == 0: continue new_segments = np.zeros(segments.shape) i = 0 specs = [] for segment in segments: i1 = int(round(segment[0] * fs)) i2 = int(round(segment[1] * fs)) spec, dt = _get_spec(fs, audio[i1:i2], p) temp_spec = np.zeros((spec.shape[0], max_t)) temp_spec[:, :spec.shape[1]] = spec spec = temp_spec specs.append(spec) specs = np.stack(specs) embed = transform.transform(specs.reshape(specs.shape[0],-1)) for j, segment in enumerate(segments): if _in_region(embed[j], bounds): new_segments[i] = segment[:] i += 1 num_total += 1 else: num_deleted += 1 new_segments = new_segments[:i] np.savetxt(segment_fn, new_segments, fmt='%.5f') if verbose: print("\tdeleted:", num_deleted, "remaining:", num_total) print("\tDone.")
def refine_segments_pre_vae(seg_dirs, audio_dirs, out_seg_dirs, p, \ n_samples=10000, num_imgs=1000, verbose=True, img_fn='temp.pdf', \ tooltip_output_dir='temp'): """ Manually remove noise by selecting regions of UMAP spectrogram projections. First, a tooltip plot of the UMAPed spectrograms will be made (using `ava.plotting.tooltip_plot`) and saved to `tooltip_output_dir`. You should open this plot and see which regions of the UMAP contain noise. Then, when prompted, press return to identify noise, Then enter the coordinates of a rectangle (x1, x2, y1, and y2) in the UMAP projection containing noise, following the prompts. You will be able to see the selected noise regions in the image save at `img_fn`, by default `'temp.pdf'`. When you are finished identifying noise regions, press `'q'` and the original segments from `seg_dirs` that aren't identified as noise (contained in one of the rectangles) are copied to segment files in `out_seg_dirs`. Doesn't support datasets that are too large to fit in memory. Parameters ---------- seg_dirs : list of str Directories containing segmenting information audio_dirs : list of str Directories containing audio files out_seg_dirs : list of str Directories to write updated segmenting information to p : dict Segmenting parameters: TO DO: ADD REFERENCE! n_samples : int, optional Number of spectrograms to feed to UMAP. Defaults to ``10000``. num_imgs : int, optional Number of images to embed in the tooltip plot. Defaults to ``1000``. verbose : bool, optional Defaults to ``True``. img_fn : str, optional Image filename. Defaults to ``'temp.pdf'``. tooltip_output_dir : str, optional Where to save tooltip plot. Defaults to ``'temp'``. """ if verbose: print("\nCleaning segments\n-----------------") print("Collecting spectrograms...") specs, max_len, _ = _get_specs(audio_dirs, seg_dirs, p, max_num_specs=n_samples) specs = np.stack(specs) if verbose: print("Running UMAP... n =", len(specs)) transform = umap.UMAP(n_components=2, n_neighbors=20, min_dist=0.1, \ metric='euclidean', random_state=42) with warnings.catch_warnings(): try: warnings.filterwarnings("ignore", category=NumbaPerformanceWarning) except NameError: pass embed = transform.fit_transform(specs.reshape(len(specs), -1)) if verbose: print("\tDone.") bounds = {'x1': [], 'x2': [], 'y1': [], 'y2': []} colors = ['b'] * len(embed) first_iteration = True # Keep drawing boxes around noise. while True: _plot_helper(embed, colors, verbose=verbose, filename=img_fn) if first_iteration: if verbose: print("Writing html plot:") first_iteration = False title = "Identify unwanted sounds:" tooltip_plot(embed, specs, num_imgs=num_imgs, title=title, \ output_dir=tooltip_output_dir, grid=True) if verbose: print("\tDone.") if input("Press [q] to quit identifying noise or \ [return] to continue: ") == 'q': break print("Enter the coordinates of a rectangle containing noise:") x1 = _get_input("x1: ") x2 = _get_input("x2: ") y1 = _get_input("y1: ") y2 = _get_input("y2: ") bounds['x1'].append(min(x1, x2)) bounds['x2'].append(max(x1, x2)) bounds['y1'].append(min(y1, y2)) bounds['y2'].append(max(y1, y2)) # Update scatter colors. colors = _update_colors(colors, embed, bounds) # Write files to out_seg_dirs. gen = zip(seg_dirs, audio_dirs, out_seg_dirs, repeat(p), repeat(max_len), \ repeat(transform), repeat(bounds), repeat(verbose)) n_jobs = min(len(seg_dirs), os.cpu_count() - 1) Parallel(n_jobs=n_jobs)(delayed(_update_segs_helper)(*args) for args in gen)
def refine_segments_post_vae(dc, seg_dirs, audio_dirs, out_seg_dirs, \ verbose=True, num_imgs=2000, tooltip_output_dir='temp', make_tooltip=True, \ img_fn='temp.pdf'): """ Manually remove noise by selecting regions of UMAP latent mean projection. First, a tooltip plot of the spectrogram latent means will be made (using `ava.plotting.tooltip_plot`) and saved to `tooltip_output_dir`. You should open this plot and see which regions of the UMAP contain noise. Then, when prompted, press return to identify noise, Then enter the coordinates of a rectangle (x1, x2, y1, and y2) in the UMAP projection containing noise, following the prompts. You will be able to see the selected noise regions in the image save at `img_fn`, by default `'temp.pdf'`. When you are finished identifying noise regions, press `'q'` and the original segments from `seg_dirs` that aren't identified as noise (contained in one of the rectangles) are copied to segment files in `out_seg_dirs`. Doesn't support datasets that are too large to fit in memory. Parameters ---------- dc : ava.data.data_container.DataContainer DataContainer object seg_dirs : list of str Original segment directories. out_seg_dirs : list of str Output segment directories. verbose : bool, optional Defaults to ``True``. num_imgs : int, optional Number of images for tooltip plot. Defaults to ``2000``. tooltip_output_dir : str, optional Where to save tooltip plot. Defaults to ``'temp'``. make_tooltip : bool, optional Defaults to ``True``. img_fn : str, optional Where to save """ # Get UMAP embedding. embed = dc.request('latent_mean_umap') bounds = {'x1': [], 'x2': [], 'y1': [], 'y2': []} colors = ['b'] * len(embed) first_iteration = True # Keep drawing boxes around noise. while True: _plot_helper(embed, colors, filename=img_fn, verbose=verbose) if first_iteration and make_tooltip: if verbose: print("Writing html plot:") first_iteration = False title = "Identify unwanted sounds:" specs = dc.request('specs') tooltip_plot(embed, specs, num_imgs=num_imgs, title=title, \ output_dir=tooltip_output_dir, grid=True) if verbose: print("\tDone.") if input("Press [q] to quit identifying noise or \ [return] to continue: ") == 'q': break print("Enter the coordinates of a rectangle containing noise:") x1 = _get_input("x1: ") x2 = _get_input("x2: ") y1 = _get_input("y1: ") y2 = _get_input("y2: ") bounds['x1'].append(min(x1, x2)) bounds['x2'].append(max(x1, x2)) bounds['y1'].append(min(y1, y2)) bounds['y2'].append(max(y1, y2)) # Update scatter colors. colors = _update_colors(colors, embed, bounds) # Write files to out_seg_dirs. audio_fns = dc.request('audio_filenames') segs = np.zeros((len(audio_fns), 2)) segs[:, 0] = dc.request('onsets') segs[:, 1] = dc.request('offsets') good_sylls = np.argwhere(colors == 'b').flatten() good_sylls = [i for i in range(len(colors)) if colors[i] == 'b'] good_sylls = np.array(good_sylls, dtype='int') for fn in np.unique(audio_fns): # File stuff. index = [1 if a in fn else 0 for a in audio_dirs].index(1) seg_fn = os.path.split(fn)[-1][:-4] + '.txt' out_seg_fn = os.path.join(out_seg_dirs[index], seg_fn) seg_fn = os.path.join(seg_dirs[index], seg_fn) if not os.path.exists(out_seg_dirs[index]): os.makedirs(out_seg_dirs[index]) # Collect indices of syllables to save. indices = np.argwhere(audio_fns == fn).flatten() indices = np.intersect1d(indices, good_sylls, assume_unique=True) header = "Cleaned onsets/offsets from: " + seg_fn np.savetxt(out_seg_fn, segs[indices], fmt='%.5f', header=header) # Write empty files if we don't have any syllables from them. for audio_dir, out_seg_dir in zip(audio_dirs, out_seg_dirs): for temp_fn in [ os.path.join(audio_dir, i) for i in os.listdir(audio_dir) ]: if _is_audio_file(temp_fn) and temp_fn not in audio_fns: header = "Cleaned onsets/offsets from: " + temp_fn out_seg_fn = os.path.split(temp_fn)[-1][:-4] + '.txt' out_seg_fn = os.path.join(out_seg_dir, out_seg_fn) np.savetxt(out_seg_fn, np.array([]), header=header) if verbose: msg = "Retained " + str(sum(1 for i in colors if i == 'b')) msg += " out of " + str(len(colors)) + " segments." print(msg)
def refine_segments_pre_vae(seg_dirs, audio_dirs, out_seg_dirs, p, \ n_samples=8000, num_imgs=1000, verbose=True, img_fn='temp.pdf', \ tooltip_output_dir='temp'): """ Manually remove noise by selecting regions of UMAP spectrogram projections. Parameters ---------- seg_dirs : list of str Directories containing segmenting information audio_dirs : list of str Directories containing audio files out_seg_dirs : list of str Directories to write updated segmenting information to p : dict Segmenting parameters: TO DO: ADD REFERENCE! n_samples : int, optional Number of spectrograms to feed to UMAP. Defaults to ``10000``. num_imgs : int, optional Number of images to embed in the tooltip plot. Defaults to ``1000``. verbose : bool, optional Defaults to ``True``. img_fn : str, optional Image filename. Defaults to ``'temp.pdf'``. tooltip_output_dir : str, optional Where to save tooltip plot. Defaults to ``'temp'``. """ if verbose: print("\nCleaning segments\n-----------------") print("Collecting spectrograms...") specs, max_len, _ = _get_specs(audio_dirs, seg_dirs, p, n_samples=n_samples) specs = np.stack(specs) if verbose: print("Running UMAP...") transform = umap.UMAP(n_components=2, n_neighbors=20, min_dist=0.1, \ metric='euclidean', random_state=42) with warnings.catch_warnings(): try: warnings.filterwarnings("ignore", category=NumbaPerformanceWarning) except NameError: pass embed = transform.fit_transform(specs.reshape(len(specs), -1)) if verbose: print("\tDone.") bounds = {'x1': [], 'x2': [], 'y1': [], 'y2': []} colors = ['b'] * len(embed) first_iteration = True # Keep drawing boxes around noise. while True: _plot_helper(embed, colors, verbose=verbose, filename=img_fn) if first_iteration: if verbose: print("Writing html plot:") first_iteration = False title = "Select unwanted sounds:" tooltip_plot(embed, specs, num_imgs=num_imgs, title=title, \ output_dir=tooltip_output_dir) if verbose: print("\tDone.") if input("Press [q] to quit drawing rectangles or [return] continue: " ) == 'q': break print("Select a rectangle containing noise:") x1 = _get_input("x1: ") x2 = _get_input("x2: ") y1 = _get_input("y1: ") y2 = _get_input("y2: ") bounds['x1'].append(min(x1, x2)) bounds['x2'].append(max(x1, x2)) bounds['y1'].append(min(y1, y2)) bounds['y2'].append(max(y1, y2)) # Update scatter colors. colors = _update_colors(colors, embed, bounds) # Write files to out_seg_dirs. gen = zip(seg_dirs, audio_dirs, out_seg_dirs, repeat(p), repeat(max_len), \ repeat(transform), repeat(bounds), repeat(verbose)) n_jobs = min(len(seg_dirs), os.cpu_count() - 1) Parallel(n_jobs=n_jobs)(delayed(_update_segs_helper)(*args) for args in gen)