def run(self): """ Returns: """ self._compute_spectrograms() self.deep_clustering() uncollated_masks = [] for i in range(self.audio_signal.num_channels): uncollated_masks += self._extract_masks(i) collated_masks = [np.dstack([uncollated_masks[s + ch * self.num_sources] for ch in range(self.audio_signal.num_channels)]) for s in range(self.num_sources)] self.masks = [] for mask in collated_masks: if self.mask_type == self.BINARY_MASK: mask = np.round(mask) mask_object = masks.BinaryMask(mask) elif self.mask_type == self.SOFT_MASK: mask_object = masks.SoftMask(mask) else: raise ValueError('Unknown mask type {}!'.format(self.mask_type)) self.masks.append(mask_object) return self.masks
def generate_mask(self, ch, assignments): """ Takes binary Mel spectrogram assignments and generates mask """ if self.audio_signal.stft_data is None: raise ValueError('Cannot extract masks with no signal_stft data') mask = (self.silence_mask[ch, :, :] * assignments) mask = np.dot(mask, self.inverse_mel_filter_bank).T mask += np.abs(mask.min()) mask /= (np.max(mask) + 1e-7) mask = np.round(mask) # mask = np.dstack([mask, mask]) return masks.BinaryMask(mask)
def run(self): """ Returns: Example: :: """ self._compute_spectrograms() # separate the mixture background by masking harmonic_masks = [] percussive_masks = [] for i in range(self.audio_signal.num_channels): # apply mask harmonic_mask, percussive_mask = librosa.decompose.hpss(self.stft[:, :, i], kernel_size=self.kernel_size, mask=True) harmonic_masks.append(harmonic_mask) percussive_masks.append(percussive_mask) # make a new audio signal for the background # make a mask and return harmonic_mask = np.array(harmonic_masks).transpose((1, 2, 0)) percussive_mask = np.array(percussive_masks).transpose((1, 2, 0)) both_masks = [harmonic_mask, percussive_mask] self.masks = [] for mask in both_masks: if self.mask_type == self.BINARY_MASK: mask = np.round(mask) mask_object = masks.BinaryMask(mask) elif self.mask_type == self.SOFT_MASK: mask_object = masks.SoftMask(mask) else: raise ValueError('Unknown mask type {}!'.format(self.mask_type)) self.masks.append(mask_object) return self.masks
def _compute_masks(self): """Receives the attenuation and delay peaks and computes a mask to be applied to the signal for source separation. """ # compute masks for separation best_so_far = np.inf * np.ones_like(self.stft_ch0, dtype=float) for i in range(0, self.num_sources): mask_array = np.zeros_like(self.stft_ch0, dtype=bool) phase = np.exp(-1j * self.frequency_matrix * self.delay_peak[i]) score = np.abs(self.atn_peak[i] * phase * self.stft_ch0 - self.stft_ch1) ** 2 / (1 + self.atn_peak[i] ** 2) mask = (score < best_so_far) mask_array[mask] = True background_mask = masks.BinaryMask(np.array(mask_array)) self.result_masks.append(background_mask) self.result_masks[0].mask = np.logical_xor(self.result_masks[i].mask, self.result_masks[0].mask) best_so_far[mask] = score[mask] # Compute first mask based on what the other masks left remaining self.result_masks[0].mask = np.logical_not(self.result_masks[0].mask) return self.result_masks
def run(self): """ Creates a list of masks (as :class:`separation.masks.mask_base.MaskBase` objects, either :class:`separation.masks.binary_mask.BinaryMask` or :class:`separation.masks.soft_mask.SoftMask` depending on how the object was instantiated) from a list of known source signals (``source_list`` in the constructor). Returns a list of :class:`separation.masks.mask_base.MaskBase` objects (one for each input signal) in the order that they were provided when this object was initialized. Binary masks are created based on the magnitude spectrogram using the following formula: ``mask = (provided_source.mag_spec >= (mixture_mag_spec - provided_source.mag_spec)`` ``mask = (20 * np.log10(source.mag_spec / mixture.mag_spec)) > binary_db_threshold`` Where '``/``' is a element-wise division and '``>``' is element-wise logical greater-than. Soft masks are also created based on the magnitude spectrogram but use the following formula: 1) ``mask = mixture_mag_spec / provided_source.mag_spec`` 2) ``mask = log(mask)`` 3) ``mask = (mask + abs(min(mask))) / max(mask)`` Where all arithmetic operations and log are element-wise. This provides a logarithmically scaled mask that is in the interval [0.0, 1.0]. Returns: estimated_masks (list): List of resultant :class:`separation.masks.mask_base.MaskBase` objects created. Masks in this list are in the same order that ``source_list`` (and :attr:`sources`) are in. Raises: RuntimeError if unknown mask type is provided (Options are [``BinaryMask``, or ``SoftMask``]). """ self._compute_spectrograms() self.result_masks = [] for source in self.sources: mag = source.magnitude_spectrogram_data # Alias this variable, for easy reading if self.mask_type == self.BINARY_MASK: div = np.divide(mag + constants.EPSILON, self._mixture_mag_spec + constants.EPSILON) cur_mask = (20 * np.log10(div)) > self.binary_db_threshold mask = masks.BinaryMask(cur_mask) elif self.mask_type == self.SOFT_MASK: soft_mask = librosa.util.softmask( self.audio_signal.magnitude_spectrogram_data, mag, power=self.power, split_zeros=self.split_zeros) mask = masks.SoftMask(soft_mask) else: raise RuntimeError('Unknown mask type: {}'.format( self.mask_type)) self.result_masks.append(mask) return self.result_masks
def run(self): """ This function calls TransformerNMF on the magnitude spectrogram of each channel in the input audio signal. The templates and activation matrices returned are clustered using K-Means clustering. These clusters are used to create mask objects for each source. Note: The masks in self.result_masks are not returned in a particular order corresponding to the sources, but they are in the same order for each channel. Returns: result_masks (list): A list of :obj:`MaskBase`-derived objects for each source. (to get a list of :obj:`AudioSignal`-derived objects run :func:`make_audio_signals`) Example: .. code-block:: python :linenos: signal = nussl.AudioSignal(path_to_input_file='input_name.wav') # Set up and run NMF MFCC nmf_mfcc = nussl.NMF_MFCC(signal, num_sources=2) # Returns a binary mask by default masks = nmf_mfcc.run() # Get audio signals sources = nmf_mfcc.make_audio_signals() # Output the sources for i, source in enumerate(sources): output_file_name = str(i) + '.wav' source.write_audio_to_file(output_file_name) """ self.audio_signal.stft_params = self.stft_params self.audio_signal.stft() uncollated_masks = [] n_chan = self.audio_signal.num_channels for ch in range(n_chan): channel_stft = self.audio_signal.get_magnitude_spectrogram_channel( ch) # Set up NMF and run nmf = transformer_nmf.TransformerNMF( input_matrix=channel_stft, num_components=self.num_templates, seed=self.random_seed, should_do_epsilon=False, max_num_iterations=self.num_iterations, distance_measure=self.distance_measure) channel_activation_matrix, channel_templates_matrix = nmf.transform( ) # Cluster the templates matrix into Mel frequencies and retrieve labels cluster_templates = librosa.feature.mfcc( S=channel_templates_matrix, n_mfcc=self.n_mfcc)[self.mfcc_start:self.mfcc_end] self.clusterer.fit_transform(cluster_templates.T) self.labeled_templates = self.clusterer.labels_ # Extract sources from signal uncollated_masks += self._extract_masks(channel_templates_matrix, channel_activation_matrix, ch) # Reorder mask arrays so that the channels are collated correctly (this allows for multichannel signals) collated_masks = [ np.dstack([ uncollated_masks[s + ch * self.num_sources] for ch in range(n_chan) ]) for s in range(self.num_sources) ] # Put each numpy array mask into a MaskBase object self.result_masks = [] for mask in collated_masks: if self.mask_type == self.BINARY_MASK: mask = np.round(mask) mask_object = masks.BinaryMask(mask) elif self.mask_type == self.SOFT_MASK: mask_object = masks.SoftMask(mask) else: raise ValueError('Unknown mask type {}!'.format( self.mask_type)) self.result_masks.append(mask_object) return self.result_masks
def run(self): """ Creates a list of masks (as :class:`separation.masks.mask_base.MaskBase` objects, either :class:`separation.masks.binary_mask.BinaryMask` or :class:`separation.masks.soft_mask.SoftMask` depending on how the object was instantiated) from a list of known source signals (``source_list`` in the constructor). Returns a list of :class:`separation.masks.mask_base.MaskBase` objects (one for each input signal) in the order that they were provided when this object was initialized. Binary masks are created based on the magnitude spectrogram using the following formula: ``mask = (provided_source.mag_spec >= (mixture_mag_spec - provided_source.mag_spec)`` Where '``-``' is a element-wise subtraction (as if the values were binary ints, 0 or 1) and '``>=``' is element-wise logical greater-than-or-equal (again, as if the values were binary ints, 0 or 1). Soft masks are also created based on the magnitude spectrogram but use the following formula: 1) ``mask = mixture_mag_spec / provided_source.mag_spec`` 2) ``mask = log(mask)`` 3) ``mask = (mask + abs(min(mask))) / max(mask)`` Where all arithmetic operations and log are element-wise. This provides a logarithmically scaled mask that is in the interval [0.0, 1.0]. Returns: estimated_masks (list): List of resultant :class:`separation.masks.mask_base.MaskBase` objects created. Masks in this list are in the same order that ``source_list`` (and :attr:`sources`) are in. Raises: RuntimeError if unknown mask type is provided (Options are [``BinaryMask``, or ``SoftMask``]). """ self._compute_spectrograms() self.result_masks = [] for source in self.sources: if self.mask_type == self.BINARY_MASK: mag = source.magnitude_spectrogram_data # Alias this variable, for easy reading cur_mask = (mag >= (self._mixture_mag_spec - mag)) mask = masks.BinaryMask(cur_mask) elif self.mask_type == self.SOFT_MASK: # TODO: This is a kludge. What is the actual right way to do this? sm = np.divide(self.audio_signal.magnitude_spectrogram_data, source.magnitude_spectrogram_data) # log_sm1 = np.log(sm - np.min(sm) + 1) log_sm = np.log(sm) log_sm += np.abs(np.min(log_sm)) log_sm /= np.max(log_sm) mask = masks.SoftMask(sm) else: raise RuntimeError('Unknown mask type: {}'.format( self.mask_type)) self.result_masks.append(mask) return self.result_masks