Ejemplo n.º 1
0
 def test_create_random_noise(self):
     spectro, _metadata = SoundProcessor.load_spectrogram(
         self.one_spectro_file)
     new_spectro, _fname = SoundProcessor.random_noise(spectro,
                                                       noise_type='uniform')
     self.assertTrue((new_spectro > spectro).any())
     self.assertTrue((new_spectro <= 255).all())
    def save_updated_snippet(self, outdir, species, snippet_path, spectro_arr,
                             metadata):
        '''
        Create path name: 
            
            outdir/species/snippet-fname
            
        and save the spectro_arr to that path
        as a .png file with embedded metadata
        
        :param outdir: destination directory
        :type outdir: str
        :param snippet_path: file name or absolute path to snipet
        :type snippet_path: src
        :param spectro_arr: image data
        :type spectro_arr: np.array
        :param metadata: auxiliary info to include in the .png file
        :type metadata: {str : str}
        '''

        # Save the updated snippet_path:
        species_subdir = os.path.join(outdir, species)
        snip_outname = os.path.join(species_subdir,
                                    os.path.basename(snippet_path))
        FileUtils.ensure_directory_existence(snip_outname)
        SoundProcessor.save_image(spectro_arr, snip_outname, metadata)
Ejemplo n.º 3
0
 def create_original_spectrograms(self, samples, n, species_wav_input_dir,
                                  species_spectrogram_output_dir):
     samples = random.sample(samples, int(n))  # choose n from all samples
     for sample_name in samples:
         SoundProcessor.create_spectrogram(sample_name,
                                           species_wav_input_dir,
                                           species_spectrogram_output_dir,
                                           n_mels=128)
Ejemplo n.º 4
0
    def test_chop_one_spectrogram_file(self):
        
        with tempfile.TemporaryDirectory(dir='/tmp',
                                         prefix='chopping', 
                                         ) as dir_nm:
            chopper = SpectrogramChopper(
                self.spectro_root,
                dir_nm,
                overwrite_policy=WhenAlreadyDone.OVERWRITE
                )
            species = Path(self.spectro_file).parent.stem
            outdir  = os.path.join(dir_nm, species)
            true_snippet_time_width = chopper.chop_one_spectro_file(
                self.spectro_file,
                outdir,
                'DOVE',
                skip_size=self.skip_size
                )
            snippet_names = os.listdir(outdir)
            num_expected_snippets = 0
            cur_time = true_snippet_time_width
            while cur_time < self.duration:
                num_expected_snippets += 1
                cur_time += self.skip_size

            self.assertEqual(len(snippet_names), num_expected_snippets)
            
            # Check embedded metadata of one snippet:
            
            _spectro, metadata = SoundProcessor.load_spectrogram(Utils.listdir_abs(outdir)[0])
            self.assertEqual(round(float(metadata['duration(secs)']), 3),
                             round(true_snippet_time_width, 3)
                             )
            self.assertEqual(metadata['species'], 'DOVE')
Ejemplo n.º 5
0
    def test_time_shift(self):

        with tempfile.TemporaryDirectory(prefix='aud_tests',
                                         dir='/tmp') as tmpdir_nm:

            out_file = SoundProcessor.time_shift(self.one_aud_file, tmpdir_nm)
            # Can't do more than try to load the new
            # file and check its length against manually
            # examined truth:
            self.assertEqualDurSR(out_file, self.one_aud_file)
Ejemplo n.º 6
0
    def test_create_one_spectro(self):

        # Test creating one spectrogram from an audio
        # file. Ensure that sample rate, duration, and
        # species are included int the destination spectrogram
        # .png file:
        audio_path = os.path.join(self.cur_dir,
                                  'audio_aug_tst_data/DYSMEN_S/dys1.mp3')
        (aud, sr) = SoundProcessor.load_audio(audio_path)
        with tempfile.NamedTemporaryFile(suffix='.png',
                                         prefix='spectro',
                                         dir='/tmp',
                                         delete=True) as fd:
            SoundProcessor.create_spectrogram(aud,
                                              sr,
                                              fd.name,
                                              info={'species': 'DYSMEN_C'})
            _spectro, info = SoundProcessor.load_spectrogram(fd.name)
            truth = {'sr': '22050', 'duration': '10.8', 'species': 'DYSMEN_C'}
            self.assertDictEqual(info, truth)
Ejemplo n.º 7
0
    def test_create_new_sample(self):
        with tempfile.TemporaryDirectory(dir='/tmp',
                                         prefix='test_spectro') as dst_dir:
            # Time masking:
            aug_spectro_path = self.spectro_augmenter_median.create_new_sample(
                self.one_spectro_file, dst_dir, ImgAugMethod.TMASK)
            # The two images shouldn't be the same:
            orig, _metadata = SoundProcessor.load_spectrogram(
                self.one_spectro_file)
            aug, _metadata = SoundProcessor.load_spectrogram(aug_spectro_path)

            self.assertFalse((orig == aug).all())

            # Frequency masking:

            aug_spectro_path = self.spectro_augmenter_median.create_new_sample(
                self.one_spectro_file, dst_dir, ImgAugMethod.FMASK)
            # The two images shouldn't be the same:
            orig, _metadata = SoundProcessor.load_spectrogram(
                self.one_spectro_file)
            aug, _metadata = SoundProcessor.load_spectrogram(aug_spectro_path)
            self.assertFalse((orig == aug).all())
Ejemplo n.º 8
0
    def set_metadata(cls, png_fpath, info_to_set, outfile=None, setting=False):
        '''
        Modify metadata in a .png file. Distinguishes between
        replacing existing metadata (setting == True), and adding
        to the existing info (set == False). Either way, takes
        a dict of metadata in info_to_set. 
        
        If outfile is None (or same as the input file png_fpath),
        the modification is in-place.
        
        :param png_fpath: input png file
        :type png_fpath: str
        :param info_to_set: dict of metadata information
        :type info_to_set: {str : str}
        :param outfile: if provided, create a new png file with the 
            provided metadata
        :type outfile: {None | str}
        :param setting: whether or not to replace existing metadata
            with info_to_set, or to add. Replacing only for common
            keys
        :type setting: bool
        '''

        if type(info_to_set) != dict:
            raise TypeError(f"info_to_set must be a dict, not {type(dict)}")

        img, metadata = SoundProcessor.load_spectrogram(png_fpath)
        if outfile is None:
            outfile = png_fpath

        if setting:
            metadata = info_to_set
        else:
            metadata.update(info_to_set)

        SoundProcessor.save_image(img, outfile, metadata)
Ejemplo n.º 9
0
    def load_img(self, img_path):
        '''
        Returns a two-tuple: image tensor, img metadata
        
        :param img_path: full path to image
        :type img_path: str
        :return Image loaded as a PIL, then downsized,
            and transformed to a tensor. Plus any metadata
            the image contains
        :rtype (torch.Tensor, {str : str})
        '''

        img_obj_np, metadata = SoundProcessor.load_spectrogram(img_path, to_nparray=True)
        img_obj_tns = torch.tensor(img_obj_np).unsqueeze(dim=0)
        new_img_obj = self.transform_img(img_obj_tns)
        print('bar')
Ejemplo n.º 10
0
    def setUpClass(cls):
        super(TestChopSpectrograms, cls).setUpClass()
        
        cls.skip_size = 2 # sec
        
        cls.cur_dir  = os.path.dirname(__file__)
        cls.spectro_root = os.path.join(cls.cur_dir, 
                                       'spectro_data_long')
        cls.spectro_file = os.path.join(cls.spectro_root, 'DOVE/dove_long.png')
        
        cls.num_spectro_files = len(Utils.find_in_dir_tree(
            cls.spectro_root, 
            pattern='*.png', 
            entry_type='file'))

        _spectro, metadata = SoundProcessor.load_spectrogram(cls.spectro_file)
        try:
            cls.duration      = float(metadata['duration'])
        except KeyError:
            raise AssertionError(f"Spectrogram test file {os.path.basename(cls.spectro_file)} has no duration metadata")
        
        cls.default_win_len = 5 # seconds
Ejemplo n.º 11
0
    def create_one_saliency_map(self, model, img_path):

        # We must run the model in evaluation mode
        model.eval()
    
        img_arr, metadata = SoundProcessor.load_spectrogram(img_path)
    
        img.requires_grad_()
        
        # Forward pass through the model to get the 
        # scores 

        scores = model(img)
        
    
        # Get the index corresponding to the maximum score and the maximum score itself.
        score_max_index = scores.argmax()
        score_max = scores[0,score_max_index]
        

        # Backward function on score_max performs 
        # the backward pass in the computation graph and 
        # calculates the gradient of score_max with respect 
        # to nodes in the computation graph:

        score_max.backward()
    
        # Saliency would be the gradient with respect to the input image now. But note that the input image has 3 channels,
        # R, G and B. To derive a single class saliency value for each pixel (i, j),  we take the maximum magnitude
        # across all colour channels.

        saliency, _ = torch.max(img.grad.data.abs(),dim=1)
        
        # code to plot the saliency map as a heatmap
        plt.imshow(saliency[0], cmap=plt.cm.hot)
        plt.axis('off')
        plt.show()
    def snips_iterator(self, root=None):
        '''
        
        If root is a generator or iterable, we assume that it
        will yield absolute paths to snippets. If root 
        is None, we set root to self.snip_dir.
        Else, assume root is a directory below which snippets
        are found. 
        
        In the latter case: recursively find snippet files, 
        starting with root. Yield one snippet file after 
        the other, if its recording ID matches this
        instance's Raven selection table's ID.
        
        :param root: file or directory with snippet files
        :type root: {root | str | types.GeneratorTypes}
        :return a series of full snippet file paths whose
            file names indicate that the snippets are covered
            in this instance's Raven table.
        :rtype str
        :raise StopIteration
        '''
        if root is None:
            root = self.snip_dir

        metadata_list = []


        if type(root) != str and \
           (isinstance(root, types.GeneratorType) or \
            isinstance(root, Iterable)
            ):

            # Root is a ready made generator.
            snip_gen = root
        else:
            # Make a generator of snippets that originate
            # from the given recording (id):
            snip_gen = self.dir_tree_snip_gen(root)

        # Create the time sorted list of snippet
        # metadata:
        for snip_path in snip_gen:
            _img_arr, metadata = SoundProcessor.load_spectrogram(
                snip_path, to_nparray=False)
            metadata['snip_path'] = snip_path
            metadata_list.append(metadata)

        time_sorted_metadata = sorted(metadata_list,
                                      key=lambda md: md['start_time(secs)'])

        # Now keep feeding the list of sorted
        # metadata:
        # Since Python 3.7 generators raising
        # StopIteration is no longer silently
        # discarded; so need to catch it ourselves:
        try:
            for metadata in time_sorted_metadata:
                yield metadata
            return
        except StopIteration:
            return
Ejemplo n.º 13
0
    def chop_one_audio_file(self, in_dir, species, spectro_fname, out_dir, window_len = 5):
        """
        Generates window_len second sound file snippets
        and associated spectrograms from sound files of
        arbitrary length. 
        
        Performs a time shift on all the wav files in the 
        species directories. The shift is 'rolling' such that
        no information is lost.
    
        :param in_dir: directory of the audio file to chop 
        :type file_name: str
        :param species: the directory names of the species to 
            modify the wav files of. If species=None, all 
            subdirectories will be processed.
        :type species: {None | [str]}
        :param spectro_fname: basefile name of audio file to chop
        :type spectro_fname: str
        :param out_dir: root directory under which spectrogram
            and audio snippets will be saved (in different subdirs)
        :type out_dir: str
        """

        orig, sample_rate = librosa.load(os.path.join(in_dir, species, spectro_fname))
        length = int(librosa.get_duration(orig, sample_rate))
        for start_time in range(length - window_len):
            fpath = Path(spectro_fname)
            window_name = f"{fpath.stem}_sw-start{str(start_time)}"
            window_file_name = str(Path.joinpath(fpath.parent, window_name))

            outfile_spectro = os.path.join(out_dir, 
                                           'spectrograms/', 
                                           species,
                                           f"{window_file_name}.png")
            
            outfile_audio = os.path.join(out_dir, 
                                         'wav-files', 
                                         species, 
                                         f"{window_file_name}.{'wav'}")
            
            
            spectro_done = os.path.exists(outfile_spectro)
            audio_done   = os.path.exists(outfile_audio)

            if spectro_done and audio_done and WhenAlreadyDone.SKIP:
                # No brainer no need to even read the audio excerpt:
                continue
            
            if spectro_done and not audio_done and not self.generate_wav_files:
                continue

            # Need an audio snippet either for
            # a spectrogram or wav file:
            window_audio, sr = librosa.load(os.path.join(in_dir, species, spectro_fname),
                                      offset=start_time, duration=window_len)

            if not spectro_done or (spectro_done and self.overwrite_policy != WhenAlreadyDone.SKIP):
                SoundProcessor.create_spectrogram(window_audio,sr,outfile_spectro)
            

            if self.generate_wav_files:
                if audio_done and self.overwrite_policy == WhenAlreadyDone.SKIP:
                    continue 
                else:
                    sf.write(outfile_audio, window_audio, sr)
    def match_snippet(self, selections, snip_metadata, outdir):
        '''
        Workhorse:
        For each snippet_path S, examines the time span covered by S.
        Finds the selection table row (if any) whose begin/end
        times overlap with S's time span.
        
        Modifies the snippet's metadata by adding:
        
         'low_freq'          : <low bound of frequency involved in vocalization
         'high_freq'         : <high bound of frequency involved in vocalization
         'multiple_species'  : <list of species heard simultaneously>
         'type'              : <whether Song/Call/Call-1/Call-Trill...>
        
        The selections are expected to be a list of dicts as
        produced by Utils.read_raven_selection_table(). This
        means:
        
           o Dicts are sorted by selection start time
           o the d['mix'] is a possibly empty list of species
                names.

        :param selections: list of dicts, each containing the 
            information of one selection table row. 
        :type selections: {str : str}
        :param snip_metadata: metadata of one snippet, with 'snip_path'
            entry added
        :type snip_metadata: {str : Any}
        :param outdir: directory where to write the updated
            snippets. Value is allowed to be same as snippet_path,
            but then the snippets will be updated in place
        :type outdir: str
        '''

        snippet_path = snip_metadata['snip_path']
        # This is the second time the snippet is loaded!!!!
        # First time was in method snips_iterator() of
        # class SelTblSnipsAssoc. See TODO at top for need
        # to avoid this time consuming operation:
        spectro_arr, metadata = SoundProcessor.load_spectrogram(snippet_path)

        # Sanity check:
        try:
            snippet_tstart = float(metadata['start_time(secs)'])
        except Exception as e:
            print(e)

        # End time of snippet:
        snippet_tend = float(metadata['end_time(secs)'])

        # Find the index of the select table row (dict)
        # whose time interval overlaps:

        snippet_interval = Interval(snippet_tstart, snippet_tend)

        # The returned selection dict *may* be a
        # 'phantom' selection, which is created in
        # find_covering_sel() when a snippet straddles
        # multiple selection rows:

        sel_dict = self.find_covering_sel(selections, snippet_interval)

        if sel_dict is None:
            # This snippet_path was not involved in
            # any of the human-created selection
            # rectangles. ******* JUST RETURN IF MIX IS NON-EMPTY?
            metadata['species'] = 'noise'
            self.save_updated_snippet(
                outdir,
                'noise',
                Path(snippet_path).name,  # without parents 
                spectro_arr,
                metadata)
            return

        low_f = sel_dict['Low Freq (Hz)']
        high_f = sel_dict['High Freq (Hz)']
        species = sel_dict['species']
        voc_type = sel_dict['type']  # Song/Call/Song-Trill, etc.
        # Get possibly empty list of species
        # names that also occur in the selection:
        multiple_species = sel_dict['mix']

        # The species, and entries in the mix field
        # will be used as part of file names.
        # So ensure that they have no spaces.
        # Also: convert the "no bird" entries to
        # 'noise':

        if species == 'no bird':
            species = 'noise'
        else:
            species = species.replace(' ', '_')
        new_multiple_species = []
        for entry in multiple_species:
            if entry == 'no bird' or entry == 'noise':
                # Don't add noise as "also-present":
                continue
            else:
                new_multiple_species.append(entry.replace(' ', '_'))

        metadata['species'] = species
        metadata['low_freq'] = low_f
        metadata['high_freq'] = high_f
        metadata['type'] = voc_type
        metadata['multiple_species'] = new_multiple_species

        # If this snippet is marked as noise,
        # but the multiple_species field indicates
        # that the snippet juts into non-noise
        # selections, don't save this snippet as noise
        # right here, but allow the loop over the
        # muiltiple_species below to save it as a
        # non-noise snippet; the second clause of the
        # 'or' says: this snippet truly is noise:

        if species != 'noise' or \
            (species == 'noise' and len(new_multiple_species) == 0):
            self.save_updated_snippet(
                outdir,
                species,
                Path(snippet_path).name,  # without parents 
                spectro_arr,
                metadata)

        # If the snippet_path matched, and contained multiple
        # overlapping calls, create a copy of the snippet_path
        # for each species:
        if len(new_multiple_species) > 0:
            # Ensure the human coder did not include
            # the primary species in the list of overlaps:
            try:
                del new_multiple_species[new_multiple_species.index(species)]
            except (IndexError, ValueError):
                # All good, species wasn't in the list of additionals
                pass
            for overlap_species in new_multiple_species:
                # If this snippet reaches into a selection
                # that simply records "no bird" or "noise",
                # no need to create a phantom, b/c noise
                # is everywhere anyway:
                if overlap_species == 'no bird' or \
                    overlap_species == 'noise':
                    continue
                metadata['species'] = overlap_species
                # New name for a copy of this snippet_path:
                p = Path(snippet_path)
                new_fname = f"{p.stem}_{overlap_species}{p.suffix}"
                self.save_updated_snippet(outdir, overlap_species, new_fname,
                                          spectro_arr, metadata)
Ejemplo n.º 15
0
    def chop_one_spectro_file(
        cls,
        spectro_fname,
        out_dir,
        species_name,
        window_len=5,
        skip_size=2,
        original_duration=None,
        overwrite_policy=WhenAlreadyDone.ASK,
    ):
        """
        Generates window_len second spectrogram snippets
        from spectrograms files of arbitrary length. 
        
        To compute the number of time slices to extract
        for each snippet, the time_slices of the spectrogram time
        slices in fractional seconds must be known. The time_slices
        can be approximated if the play length of the underlying
        audio is known (even if the precise fft settings are unavailable).
        
        If the given .png file contains metadata with a 'duration' 
        key, then the corresponding value is used as the duration of 
        the original audio file in fractional seconds. This metadata
        will be present if the .png file was created with the 
        SoundProcessor.create_spectrogram(). 
        
        To enable use of spectrogram images created elsewhere, callers
        can instead supply original_duration in fractional seconds.
        
        For now, if neither the embedded metadata, nor the original_duration
        is supplied, a ValueError is raised. 
    
        :param spectro_fname: full path to spectrogram file to chop
        :type spectro_fname: str
        :param out_dir: root directory under which spectrogram
            snippets will be saved (in different subdirs)
        :type out_dir: str
        :param species_name: name of species to embed in the 
            metadata of this snippet, and use for determining
            subdirectory where to place the snippet
        :type species_name: str
        :param window_len: number of seconds to be covered by each snippet
        :type window_len: int
        :param skip_size: number of seconds to shift right in 
            time for the start of each chop
        :type skip_size: int
        :param original_duration:
        :raise ValueError: if neither embedded duration metadata is found
            in the given file, nor original_duration is provided
        """

        # Read the spectrogram, getting an np array:
        spectro_arr, metadata = SoundProcessor.load_spectrogram(spectro_fname)
        duration = metadata.get('duration', None)

        if duration is None:
            if original_duration is None:
                raise ValueError(
                    f"Time duration of original recording cannot be determined for {spectro_fname}"
                )
            else:
                duration = float(original_duration)
        else:
            duration = float(duration)

        # If original file is already at or below
        # the single window length, it's a snippet
        # in itself. Copy it to the output with an
        # appropriate snippet name to match the other
        # snippets: wall start time is zero:

        if duration < window_len:
            # No partial snippets
            return
        # Note: Also have sample rate ('sr') and species ('label')
        # in the metadata, but don't need those here.

        _freq_bands, time_slices = spectro_arr.shape
        # Time in fractions of second
        # per spectrogram column:
        twidth = duration / time_slices

        # Integer of duration (which is in seconds):
        time_dur_int = int(np.ceil(duration))
        time_upper_bound = 1 + time_dur_int - skip_size

        # Caller specifies skip_size and window
        # length in *seconds*. Convert to spectrogram
        # time slices (with rounding error):

        samples_win_len = int(window_len // twidth)
        # Does samples_win_len satisfy the
        # minimum spectrogram snippet width for
        # pretrained models?
        samples_win_len = max(cls.MIN_SNIPPET_WIDTH, samples_win_len)

        time_true_each_snippet = samples_win_len * twidth

        samples_skip_size = int(skip_size // twidth)
        samples_upper_bound = int(time_upper_bound // twidth)

        assert (samples_upper_bound <= time_slices)

        for _snip_num, samples_start_idx in enumerate(
                range(0, samples_upper_bound, samples_skip_size)):

            # Absolute start time of this snippet
            # within the entire spectrogram:
            wall_start_time = samples_start_idx * twidth
            # Create a name for the snippet file:
            snippet_path = cls.create_snippet_fpath(spectro_fname,
                                                    round(wall_start_time),
                                                    out_dir)

            spectro_done = os.path.exists(snippet_path)

            if spectro_done:
                if overwrite_policy == WhenAlreadyDone.SKIP:
                    # Next snippet:
                    continue
                elif overwrite_policy == WhenAlreadyDone.ASK:
                    if not Utils.user_confirm(
                            f"Snippet {Path(snippet_path).stem} exists, overwrite?",
                            default='N'):
                        continue

            # Chop: All rows, columns from current
            #       window start for window lenth samples:
            snippet_data = spectro_arr[:, samples_start_idx:samples_start_idx +
                                       samples_win_len]
            _num_rows, num_cols = snippet_data.shape
            if num_cols < samples_win_len:
                # Leave that little spectrogram
                # snippet leftover for Elijah:
                break

            snippet_info = metadata.copy()
            # Add the
            snippet_info['duration(secs)'] = samples_win_len * twidth
            snippet_info['start_time(secs)'] = wall_start_time
            snippet_info['end_time(secs)'] = wall_start_time + (
                samples_win_len * twidth)
            snippet_info['species'] = species_name
            SoundProcessor.save_image(snippet_data, snippet_path, snippet_info)
        return time_true_each_snippet
Ejemplo n.º 16
0
    def extract_metadata(cls, png_src, show=False, printout=False):
        '''
        Given a .png file path, or a directory:
        
            o extract metadata
            o print the metadata if requested (printout == True)
            o show the .png file if requested (show == True)
            o return: if png_src is a dir: a list of metadata dicts
                      else just one metadata dict
        
        If png_src is a directory, all .png files below the
        directory will be found recursively, and printed/shown/returned
        as requested. 
        
        :param png_src: .png file or directory tree root containing 
            .png file
        :type png_src: src
        :param show: whether or not to display the .png file
        :type show: bool
        :param printout: whether or not to print metadata
            to stdout
        :type printout: bool
        :return: list of metadata dicts, if png_src is a dir, 
            else one metadata dict
        :rtype {{src : src} | [{src : src}]}
        '''
        if show:
            # To save startup time, only load
            # matplotlib if needed:
            import matplotlib.pyplot as plt

        if os.path.isfile(png_src):
            img, metadata = SoundProcessor.load_spectrogram(png_src)
            snip_path_md_gen = iter([(png_src, metadata)])
        else:
            snip_path_md_gen = cls.metadata_list(png_src)

        md_list = []
        for snippet_path, metadata in snip_path_md_gen:
            md_list.append(metadata)

            if printout:
                try:
                    print(f"{Path(snippet_path).name}---", end='')
                    # Very inefficient: if using metadata_list()
                    # above, we lost the img array. Could fix,
                    # but not worth it for now:

                    try:
                        print(f"Shape: {img.shape}")
                    except UnboundLocalError:
                        img, metadata = SoundProcessor.load_spectrogram(
                            snippet_path)
                        print(f"Shape: {img.shape}")

                    if len(metadata) == 0:
                        print("No metadata")
                    pprint.pprint(metadata, sort_dicts=False, indent=4)

                    #for key, val in metadata.items():
                    #    print(f"{key} : {val}")
                except Exception as _e:
                    print("No metadata available")

            if show:
                fig = plt.figure()
                ax = fig.add_subplot(1, 1, 1)
                ax.set_title(os.path.basename(snippet_path))
                ax.set_axis_off()
                plt.imshow(img, cmap='gray')
                plt.show()

        return md_list[0] if os.path.isfile(png_src) else md_list
Ejemplo n.º 17
0
    def create_new_sample(self, sample_name, paths, num_augs=1):

        (species_wav_input_dir, species_wav_output_dir,
         species_spectrogram_output_dir) = paths

        aug_choices = np.random.choice(self.AUDIO_AUG_NAMES,
                                       size=num_augs,
                                       p=self.P_DIST,
                                       replace=False)
        # input(f"Aug choices: {aug_choices}")
        # Warping must be done after all the other augmentations take place,
        # after spectrogram is created
        warp = False
        if "warp" in aug_choices:
            warp = True
            aug_choices = aug_choices.tolist()
            # print(f"Aug chioces as list: {aug_choices}")
            aug_choices.remove("warp")
            # print(f"Aug chioces after: {aug_choices}")

        for i in range(len(aug_choices)):
            # print(aug_choices)
            aug_name = aug_choices[i]
            if i != 0:  # if not first augmentation, then, source wav is in output wav directory
                species_wav_input_dir = species_wav_output_dir
            if aug_name == "add_noise":

                # Add_noise; which noise to add will be chosen at random
                updated_name = SoundProcessor.add_background(
                    sample_name,
                    self.NOISE_PATH,
                    species_wav_input_dir,
                    species_wav_output_dir,
                    len_noise_to_add=5.0)
            elif aug_name == "time_shift":
                updated_name = SoundProcessor.time_shift(
                    sample_name, species_wav_input_dir, species_wav_output_dir)
            sample_name = updated_name

        # create new spectrogram if augmented
        if len(aug_choices) != 0:
            sample_name = SoundProcessor.create_spectrogram(
                sample_name,
                species_wav_output_dir,
                species_spectrogram_output_dir,
                n_mels=128)

        if warp:
            #warp
            # if len(aug_choices) +1 > 1:
            #     input(f"num_augs = {len(aug_choices) +1} for {sample_name}")
            sample_name = sample_name[:-len(".wav")] + ".png"
            # Above: if sample is unaugmented to this point, sample_name will be
            # *.wav. Since SoundProcessor.warp_spectrogram expects sample_name to be *.png, we
            # replace extension. If augmented and sample_name is already *.png,
            # there is no change.
            warped_name = SoundProcessor.warp_spectrogram(
                sample_name, species_spectrogram_output_dir,
                species_spectrogram_output_dir)
            # if warp is not the only augmentation,
            # we do not want spectrogram before warp
            if len(aug_choices) != 0:
                assert (warped_name != sample_name)
                fname = os.path.join(species_spectrogram_output_dir,
                                     sample_name)
                os.remove(fname)
Ejemplo n.º 18
0
    def create_new_sample(self,
                          sample_path,
                          out_dir,
                          method,
                          noise_path=None):
        '''
        Given one audio recording and an audio augmentation
        method name, compute that augmentation, create a file name
        that gives insight into the aug applied, and write that
        new audio file to out_dir.
        
        Currently available types of audio augmentation technique:
        
            o adding background sounds
            o randomly changing volume
            o random time shifts

        Returns the full path of the newly created audio file:
        
        :param sample_path: absolute path to audio sample
        :type sample_path: str
        :param out_dir: destination of resulting new samples
        :type out_dir: src
        :param method: the audio augmentation method to apply
        :type method: AudAugMethod
        :param noise_path: full path to audio files with background
            noises to overlay onto audio (wind, rain, etc.). Ignored
            unless method is AudAugMethod.ADD_NOISE.
        :type noise_path: str
        :return: Newly created audio file (full path) or an Exception
            object whose e.args attribute is a tuple with the error
            msg plus a manually added one 
        :rtype: {str | Exception}
        '''
        
        failures = None
        out_path = None
        if method == AudAugMethod.ADD_NOISE:
            if noise_path is None:
                noise_path = AudioAugmenter.NOISE_PATH
            # Add rain, wind, or such at random:
            try:
                out_path = SoundProcessor.add_background(
                        sample_path,
                        self.NOISE_PATH,
                        out_dir, 
                        len_noise_to_add=5.0)
            except Exception as e:
                sample_fname = Path(sample_path).stem
                msg = f"Failed to add background sounds to {sample_fname} ({repr(e)})"
                self.log.err(msg)
                e.args = tuple([e.args[0], msg])
                failures = e

        elif method == AudAugMethod.TIME_SHIFT:
            try:
                out_path = SoundProcessor.time_shift(sample_path, out_dir)
            except Exception as e:
                sample_fname = Path(sample_path).stem
                msg = f"Failed to time shift on {sample_fname} ({repr(e)})"
                self.log.err(msg)
                e.args = tuple([e.args[0], msg])
                failures = e
        elif method == AudAugMethod.VOLUME:
            try:
                out_path = SoundProcessor.change_sample_volume(sample_path, out_dir)
            except Exception as e:
                sample_fname = Path(sample_path).stem
                msg = f"Failed to modify volume on {sample_fname} ({repr(e)})"
                self.log.err(msg)
                e.args = tuple([e.args[0], msg])
                failures = e

        return out_path if failures is None else failures
Ejemplo n.º 19
0
    def create_new_sample(self, sample_path, out_dir, method):
        '''
        Given one spectrogram file, and an image augmentation
        method name, compute that augmentation, create a file name
        that gives insight into the aug applied, and write that
        new spectrogram file to out_dir.
        
        Currently available types of image augmentation technique:
        
            o adding random or uniform sounds
            o frequency masking
            o time masking

        Returns the full path of the newly created spectrogram file:
        
        :param sample_path: absolute path to spectrogram
        :type sample_path: str
        :param out_dir: destination of resulting new spectros
        :type out_dir: src
        :param method: the (spectrogram) image augmentation method to apply
        :type method: ImgAugMethod
        :return: Newly created spectro file (full path) or None,
            if a failure occurred.
        :rtype: {str | None|
        '''

        success = False
        spectro, metadata = SoundProcessor.load_spectrogram(sample_path)
        if method == ImgAugMethod.NOISE:
            try:
                # Default is uniform noise:
                new_spectro, out_fname = SoundProcessor.random_noise(spectro)
                metadata['augmentation'] = 'noise'
                success = True
            except Exception as e:
                sample_fname = Path(sample_path).stem
                self.log.err(
                    f"Failed to add noise to {sample_fname} ({repr(e)})")

        elif method == ImgAugMethod.FMASK:
            try:
                # Horizontal bands:
                new_spectro, out_fname = SoundProcessor.freq_mask(
                    spectro,
                    max_height=15  # num freq bands
                )
                metadata['augmentation'] = 'fmask'
                success = True
            except Exception as e:
                sample_fname = Path(sample_path).stem
                self.log.err(
                    f"Failed to time shift on {sample_fname} ({repr(e)})")

        elif method == ImgAugMethod.TMASK:
            try:
                # Vertical bands:
                new_spectro, out_fname = SoundProcessor.time_mask(
                    spectro,
                    max_width=15  # num time ticks
                )
                metadata['augmentation'] = 'tmask'
                success = True
            except Exception as e:
                sample_fname = Path(sample_path).stem
                self.log.err(
                    f"Failed to time shift on {sample_fname} ({repr(e)})")

        if success:
            sample_p = Path(sample_path)
            appended_fname = sample_p.stem + out_fname + sample_p.suffix
            out_path = os.path.join(out_dir, appended_fname)
            SoundProcessor.save_image(new_spectro, out_path, metadata)
        return out_path if success else None