コード例 #1
0
    def run(self):
        """

        Returns:

        """
        self._compute_spectrograms()
        self.deep_clustering()

        uncollated_masks = []
        for i in range(self.audio_signal.num_channels):
            uncollated_masks += self._extract_masks(i)

        collated_masks = [np.dstack([uncollated_masks[s + ch * self.num_sources]
                                     for ch in range(self.audio_signal.num_channels)])
                          for s in range(self.num_sources)]

        self.masks = []

        for mask in collated_masks:
            if self.mask_type == self.BINARY_MASK:
                mask = np.round(mask)
                mask_object = masks.BinaryMask(mask)
            elif self.mask_type == self.SOFT_MASK:

                mask_object = masks.SoftMask(mask)
            else:
                raise ValueError('Unknown mask type {}!'.format(self.mask_type))
            self.masks.append(mask_object)
        return self.masks
コード例 #2
0
    def run(self):
        """

        Returns:
            background (AudioSignal): An AudioSignal object with repeating background in
            background.audio_data
            (to get the corresponding non-repeating foreground run self.make_audio_signals())

        Example:
             ::

        """
        # High pass filter cutoff freq. (in # of freq. bins), +1 to match MATLAB implementation
        self.high_pass_cutoff = int(
            np.ceil(self.high_pass_cutoff * (self.stft_params.n_fft_bins - 1) /
                    self.audio_signal.sample_rate)) + 1

        # the MATLAB implementation had
        self._compute_spectrograms()

        # separate the mixture background by masking
        background_stft = []
        background_mask = []
        for i in range(self.audio_signal.num_channels):
            repeating_mask = self.compute_ft2d_mask(self.ft2d[:, :, i])
            repeating_mask[
                0:self.
                high_pass_cutoff, :] = 1  # high-pass filter the foreground
            background_mask.append(repeating_mask)

            # apply mask
            stft_with_mask = repeating_mask * self.stft[:, :, i]
            background_stft.append(stft_with_mask)

        background_stft = np.array(background_stft).transpose((1, 2, 0))
        self.background = AudioSignal(
            stft=background_stft, sample_rate=self.audio_signal.sample_rate)
        self.background.istft(
            self.stft_params.window_length,
            self.stft_params.hop_length,
            self.stft_params.window_type,
            overwrite=True,
            use_librosa=self.use_librosa_stft,
            truncate_to_length=self.audio_signal.signal_length)

        background_mask = np.array(background_mask).transpose(
            (1, 2, 0)).astype('float')
        background_mask = masks.SoftMask(background_mask)
        if self.mask_type == self.BINARY_MASK:
            background_mask = background_mask.mask_to_binary(
                self.mask_threshold)

        self.result_masks = [background_mask, background_mask.inverse_mask()]

        return self.result_masks
コード例 #3
0
ファイル: repet_sim.py プロジェクト: zhangwen464/nussl
    def run(self):
        """
        Runs REPET-SIM, a variant of REPET using the cosine similarity matrix to find similar
        frames to do median filtering.

        Returns:

        """
        # High pass filter cutoff freq. (in # of freq. bins), +1 to match MATLAB implementation
        self.high_pass_cutoff = int(
            np.ceil(
                float(self.high_pass_cutoff) *
                (self.stft_params.n_fft_bins - 1) /
                self.audio_signal.sample_rate) + 1)
        low = 1 if self.matlab_fidelity else 0
        self._compute_spectrograms()
        self.similarity_indices = self._get_similarity_indices()

        background_stft = []
        background_mask = []
        for i in range(self.audio_signal.num_channels):
            repeating_mask = self._compute_mask(
                self.magnitude_spectrogram[:, :, i])

            repeating_mask[
                low:self.
                high_pass_cutoff, :] = 1  # high-pass filter the foreground
            background_mask.append(repeating_mask)

            stft_with_mask = repeating_mask * self.stft[:, :, i]
            background_stft.append(stft_with_mask)

        # Set STFT in correct order
        background_stft = np.array(background_stft).transpose((1, 2, 0))
        self._make_background_signal(background_stft)

        # make a mask and return
        background_mask = np.array(background_mask).transpose((1, 2, 0))
        background_mask = masks.SoftMask(background_mask)
        if self.mask_type == self.BINARY_MASK:
            background_mask = background_mask.mask_to_binary(
                self.mask_threshold)

        self.result_masks = [background_mask, background_mask.inverse_mask()]

        return self.result_masks
コード例 #4
0
ファイル: melodia.py プロジェクト: zhangwen464/nussl
    def run(self):
        """

        Returns:
            foreground (AudioSignal): An AudioSignal object with melodic foreground in
            foreground.audio_data
            (to get the corresponding background run self.make_audio_signals())

        Example:
             ::

        """
        # High pass filter cutoff freq. (in # of freq. bins), +1 to match MATLAB implementation
        self.high_pass_cutoff = int(
            np.ceil(self.high_pass_cutoff * (self.stft_params.n_fft_bins - 1) /
                    self.audio_signal.sample_rate)) + 1
        self._compute_spectrum()

        # separate the mixture foreground melody by masking
        if self.melody_signal is None:
            self.extract_melody()
            self.create_melody_signal(100)

        foreground_mask = self.create_harmonic_mask(self.melody_signal)
        foreground_mask[0:self.high_pass_cutoff, :] = 0

        foreground_mask = masks.SoftMask(foreground_mask)
        if self.mask_type == self.BINARY_MASK:
            foreground_mask = foreground_mask.mask_to_binary(
                self.mask_threshold)

        self.foreground_mask = foreground_mask
        self.background_mask = foreground_mask.invert_mask()

        self.foreground = self.audio_signal.apply_mask(foreground_mask)
        self.foreground.istft(
            self.stft_params.window_length,
            self.stft_params.hop_length,
            self.stft_params.window_type,
            overwrite=True,
            use_librosa=self.use_librosa_stft,
            truncate_to_length=self.audio_signal.signal_length)

        return [self.background_mask, self.foreground_mask]
コード例 #5
0
    def run(self):
        """

        Returns:

        Example:
             ::

        """
        self._compute_spectrograms()

        # separate the mixture background by masking
        harmonic_masks = []
        percussive_masks = []
        for i in range(self.audio_signal.num_channels):
            # apply mask
            harmonic_mask, percussive_mask = librosa.decompose.hpss(self.stft[:, :, i],
                                                                    kernel_size=self.kernel_size,
                                                                    mask=True)
            harmonic_masks.append(harmonic_mask)
            percussive_masks.append(percussive_mask)

        # make a new audio signal for the background

        # make a mask and return
        harmonic_mask = np.array(harmonic_masks).transpose((1, 2, 0))
        percussive_mask = np.array(percussive_masks).transpose((1, 2, 0))
        both_masks = [harmonic_mask, percussive_mask]
        
        self.masks = []
        
        for mask in both_masks:
            if self.mask_type == self.BINARY_MASK:
                mask = np.round(mask)
                mask_object = masks.BinaryMask(mask)
            elif self.mask_type == self.SOFT_MASK:
                mask_object = masks.SoftMask(mask)
            else:
                raise ValueError('Unknown mask type {}!'.format(self.mask_type))
            self.masks.append(mask_object)
        return self.masks
コード例 #6
0
    def run(self):
        """
        Creates a list of masks (as :class:`separation.masks.mask_base.MaskBase` objects, either 
        :class:`separation.masks.binary_mask.BinaryMask` or :class:`separation.masks.soft_mask.SoftMask` 
        depending on how the object was instantiated) from a list of known source signals (``source_list`` 
        in the constructor).
        
        Returns a list of :class:`separation.masks.mask_base.MaskBase` objects (one for each input signal) 
        in the order that they were provided when this object was initialized.
        
        Binary masks are created based on the magnitude spectrogram using the following formula:
        
                ``mask = (provided_source.mag_spec >= (mixture_mag_spec - provided_source.mag_spec)``
                ``mask = (20 * np.log10(source.mag_spec / mixture.mag_spec)) > binary_db_threshold``

        Where '``/``' is a element-wise division and '``>``' is element-wise logical greater-than.
        
        
        Soft masks are also created based on the magnitude spectrogram but use the following formula:
        
                1) ``mask = mixture_mag_spec / provided_source.mag_spec``
                
                2) ``mask = log(mask)``
                
                3) ``mask = (mask + abs(min(mask))) / max(mask)``
                
        
        Where all arithmetic operations and log are element-wise. This provides a logarithmically scaled mask that is
        in the interval [0.0, 1.0].
        
        
        Returns:
            estimated_masks (list): List of resultant :class:`separation.masks.mask_base.MaskBase` objects created. 
            Masks in this list are in the same order that ``source_list`` (and :attr:`sources`) are in.
                
        Raises:
            RuntimeError if unknown mask type is provided (Options are [``BinaryMask``, or ``SoftMask``]).

        """
        self._compute_spectrograms()
        self.result_masks = []

        for source in self.sources:
            mag = source.magnitude_spectrogram_data  # Alias this variable, for easy reading
            if self.mask_type == self.BINARY_MASK:
                div = np.divide(mag + constants.EPSILON,
                                self._mixture_mag_spec + constants.EPSILON)
                cur_mask = (20 * np.log10(div)) > self.binary_db_threshold
                mask = masks.BinaryMask(cur_mask)

            elif self.mask_type == self.SOFT_MASK:
                soft_mask = librosa.util.softmask(
                    self.audio_signal.magnitude_spectrogram_data,
                    mag,
                    power=self.power,
                    split_zeros=self.split_zeros)

                mask = masks.SoftMask(soft_mask)
            else:
                raise RuntimeError('Unknown mask type: {}'.format(
                    self.mask_type))

            self.result_masks.append(mask)

        return self.result_masks
コード例 #7
0
    def run(self):
        """ This function calls TransformerNMF on the magnitude spectrogram of each channel in the input audio signal.
        The templates and activation matrices returned are clustered using K-Means clustering. These clusters are used
        to create mask objects for each source. Note: The masks in self.result_masks are not returned in a particular
        order corresponding to the sources, but they are in the same order for each channel.

        Returns:
            result_masks (list): A list of :obj:`MaskBase`-derived objects for each source.
            (to get a list of :obj:`AudioSignal`-derived objects run :func:`make_audio_signals`)

        Example:

        .. code-block:: python
            :linenos:

            signal = nussl.AudioSignal(path_to_input_file='input_name.wav')

            # Set up and run NMF MFCC
            nmf_mfcc =  nussl.NMF_MFCC(signal, num_sources=2) # Returns a binary mask by default
            masks = nmf_mfcc.run()

            # Get audio signals
            sources = nmf_mfcc.make_audio_signals()

            # Output the sources
            for i, source in enumerate(sources):
                output_file_name = str(i) + '.wav'
                source.write_audio_to_file(output_file_name)
        """
        self.audio_signal.stft_params = self.stft_params
        self.audio_signal.stft()

        uncollated_masks = []
        n_chan = self.audio_signal.num_channels
        for ch in range(n_chan):
            channel_stft = self.audio_signal.get_magnitude_spectrogram_channel(
                ch)

            # Set up NMF and run
            nmf = transformer_nmf.TransformerNMF(
                input_matrix=channel_stft,
                num_components=self.num_templates,
                seed=self.random_seed,
                should_do_epsilon=False,
                max_num_iterations=self.num_iterations,
                distance_measure=self.distance_measure)

            channel_activation_matrix, channel_templates_matrix = nmf.transform(
            )

            # Cluster the templates matrix into Mel frequencies and retrieve labels
            cluster_templates = librosa.feature.mfcc(
                S=channel_templates_matrix,
                n_mfcc=self.n_mfcc)[self.mfcc_start:self.mfcc_end]
            self.clusterer.fit_transform(cluster_templates.T)
            self.labeled_templates = self.clusterer.labels_

            # Extract sources from signal
            uncollated_masks += self._extract_masks(channel_templates_matrix,
                                                    channel_activation_matrix,
                                                    ch)

        # Reorder mask arrays so that the channels are collated correctly (this allows for multichannel signals)
        collated_masks = [
            np.dstack([
                uncollated_masks[s + ch * self.num_sources]
                for ch in range(n_chan)
            ]) for s in range(self.num_sources)
        ]

        # Put each numpy array mask into a MaskBase object
        self.result_masks = []
        for mask in collated_masks:
            if self.mask_type == self.BINARY_MASK:
                mask = np.round(mask)
                mask_object = masks.BinaryMask(mask)
            elif self.mask_type == self.SOFT_MASK:
                mask_object = masks.SoftMask(mask)
            else:
                raise ValueError('Unknown mask type {}!'.format(
                    self.mask_type))
            self.result_masks.append(mask_object)

        return self.result_masks
コード例 #8
0
ファイル: repet.py プロジェクト: zhangwen464/nussl
    def run(self):
        """ Runs the original REPET algorithm

        Returns:
            masks (:obj:`MaskBase`): A :obj:`MaskBase`-derived object with repeating background time-frequency data.
            (to get the corresponding non-repeating foreground run :func:`make_audio_signals`)

        Example:
            
        .. code-block:: python
            :linenos:
            
            signal = nussl.AudioSignal(path_to_input_file='input_name.wav')

            # Set up and run Repet
            repet = nussl.Repet(signal)  # Returns a soft mask by default
            masks = repet.run() # or repet()

            # Get audio signals
            background, foreground = repet.make_audio_signals()

            # output the background
            background.write_audio_to_file('background.wav')

        """
        # High pass filter cutoff freq. (in # of freq. bins), +1 to match MATLAB implementation
        self.high_pass_cutoff = int(
            np.ceil(self.high_pass_cutoff * (self.stft_params.n_fft_bins - 1) /
                    self.audio_signal.sample_rate)) + 1

        # the MATLAB implementation had
        low = 1 if self.matlab_fidelity else 0

        self._compute_spectrograms()
        self.repeating_period = self._calculate_repeating_period()

        # separate the mixture background by masking
        background_stft = []
        background_mask = []
        for i in range(self.audio_signal.num_channels):
            repeating_mask = self._compute_repeating_mask(
                self.magnitude_spectrogram[:, :, i])

            repeating_mask[
                low:self.
                high_pass_cutoff, :] = 1  # high-pass filter the foreground
            background_mask.append(repeating_mask)

            # apply mask
            stft_with_mask = repeating_mask * self.stft[:, :, i]
            background_stft.append(stft_with_mask)

        # make a new audio signal for the background
        background_stft = np.array(background_stft).transpose((1, 2, 0))
        self._make_background_signal(background_stft)

        # make a mask and return
        background_mask = np.array(background_mask).transpose((1, 2, 0))
        background_mask = masks.SoftMask(background_mask)
        if self.mask_type == self.BINARY_MASK:
            background_mask = background_mask.mask_to_binary(
                self.mask_threshold)

        self.result_masks = [background_mask, background_mask.inverse_mask()]

        return self.result_masks
コード例 #9
0
    def run(self):
        """
        Creates a list of masks (as :class:`separation.masks.mask_base.MaskBase` objects, either 
        :class:`separation.masks.binary_mask.BinaryMask` or :class:`separation.masks.soft_mask.SoftMask` 
        depending on how the object was instantiated) from a list of known source signals (``source_list`` 
        in the constructor).
        
        Returns a list of :class:`separation.masks.mask_base.MaskBase` objects (one for each input signal) 
        in the order that they were provided when this object was initialized.
        
        Binary masks are created based on the magnitude spectrogram using the following formula:
        
                ``mask = (provided_source.mag_spec >= (mixture_mag_spec - provided_source.mag_spec)``
                
        Where '``-``' is a element-wise subtraction (as if the values were binary ints, 0 or 1) and '``>=``'
        is element-wise logical greater-than-or-equal (again, as if the values were binary ints, 0 or 1).
        
        
        Soft masks are also created based on the magnitude spectrogram but use the following formula:
        
                1) ``mask = mixture_mag_spec / provided_source.mag_spec``
                
                2) ``mask = log(mask)``
                
                3) ``mask = (mask + abs(min(mask))) / max(mask)``
                
        
        Where all arithmetic operations and log are element-wise. This provides a logarithmically scaled mask that is
        in the interval [0.0, 1.0].
        
        
        Returns:
            estimated_masks (list): List of resultant :class:`separation.masks.mask_base.MaskBase` objects created. 
            Masks in this list are in the same order that ``source_list`` (and :attr:`sources`) are in.
                
        Raises:
            RuntimeError if unknown mask type is provided (Options are [``BinaryMask``, or ``SoftMask``]).

        """
        self._compute_spectrograms()
        self.result_masks = []

        for source in self.sources:
            if self.mask_type == self.BINARY_MASK:
                mag = source.magnitude_spectrogram_data  # Alias this variable, for easy reading
                cur_mask = (mag >= (self._mixture_mag_spec - mag))
                mask = masks.BinaryMask(cur_mask)

            elif self.mask_type == self.SOFT_MASK:
                # TODO: This is a kludge. What is the actual right way to do this?
                sm = np.divide(self.audio_signal.magnitude_spectrogram_data,
                               source.magnitude_spectrogram_data)
                # log_sm1 = np.log(sm - np.min(sm) + 1)
                log_sm = np.log(sm)
                log_sm += np.abs(np.min(log_sm))
                log_sm /= np.max(log_sm)
                mask = masks.SoftMask(sm)
            else:
                raise RuntimeError('Unknown mask type: {}'.format(
                    self.mask_type))

            self.result_masks.append(mask)

        return self.result_masks