def seperate(self, spectra, cmvn=None, apply_log=True): """ spectra: stft complex results T x F cmvn: python dict contains global mean/std apply_log: using log-spectrogram or not """ if not np.iscomplexobj(spectra): raise ValueError("Input must be matrix in complex value") input_spectra = np.log(np.maximum( np.abs(spectra), EPSILON)) if apply_log else np.abs(spectra) input_spectra = apply_cmvn(input_spectra, cmvn) if cmvn else input_spectra mix_spec, spk1_spec, spk2_spec, speaker_1, speaker_2, Orth_const = self.nnet( th.tensor(input_spectra, dtype=th.float32, device=self.location), None, per_train=False) mask = [] mask.append(spk1_spec / (spk1_spec + spk2_spec + th.finfo(th.float32).eps)) mask.append(spk2_spec / (spk1_spec + spk2_spec + th.finfo(th.float32).eps)) return mask, [ spectra * spk_mask[0].cpu().data.numpy() for spk_mask in mask ]
def _transform(self, mixture_specs, targets_specs_list): """ Transform original spectrogram If mixture_specs is a complex object, it means PAM will be used for training It can be configured in .yaml, egs: apply_abs=false to produce complex results If mixture_specs is real, we will using AM(ratio mask) Arguments: mixture_specs: non-log complex/real spectrogram targets_specs_list: list of non-log complex/real spectrogram for each target speakers Returns: python dictionary with four attributes: num_frames: length of current utterance feature: input feature for networks, egs: log spectrogram + cmvn source_attr: a dictionary with at most 2 keys: spectrogram and phase(for PSM), each contains a tensor target_attr: same keys like source_attr, each keys correspond to a tensor list """ # apply_log and cmvn, for nnet input # NOTE: mixture_specs may be complex or real log_spectra = np.log( np.maximum( np.abs(mixture_specs) if np.iscomplexobj(mixture_specs) else mixture_specs, EPSILON)) if self.mvn_dict: log_spectra = apply_cmvn(log_spectra, self.mvn_dict) # using dict to pack infomation needed in loss source_attr = {} target_attr = {} if np.iscomplexobj(mixture_specs): source_attr["spectrogram"] = th.tensor(np.abs(mixture_specs), dtype=th.float32) target_attr["spectrogram"] = [ th.tensor(np.abs(t), dtype=th.float32) for t in targets_specs_list ] source_attr["phase"] = th.tensor(np.angle(mixture_specs), dtype=th.float32) target_attr["phase"] = [ th.tensor(np.angle(t), dtype=th.float32) for t in targets_specs_list ] else: source_attr["spectrogram"] = th.tensor(mixture_specs, dtype=th.float32) target_attr["spectrogram"] = [ th.tensor(t, dtype=th.float32) for t in targets_specs_list ] return { "num_frames": mixture_specs.shape[0], "feature": th.tensor(log_spectra, dtype=th.float32), "source_attr": source_attr, "target_attr": target_attr }
def seperate(self, spectra, cmvn=None): """ spectra: stft complex results T x F cmvn: python dict contains global mean/std """ if not np.iscomplexobj(spectra): raise ValueError("Input must be matrix in complex value") # compute log-magnitude spectrogram log_spectra = np.log(np.maximum(np.abs(spectra), EPSILON)) # apply cmvn or not log_spectra = apply_cmvn(log_spectra, cmvn) if cmvn else log_spectra out_masks = self.nnet(th.tensor(log_spectra, dtype=th.float32, device=self.location), train=False) spk_masks = [spk_mask.cpu().data.numpy() for spk_mask in out_masks] return spk_masks, [spectra * spk_mask for spk_mask in spk_masks]
def _transform(self, mixture_specs, targets_specs_list): """ Transform from numpy/list to torch types """ # compute vad mask before cmvn vad_mask = compute_vad_mask( mixture_specs, self.vad_threshold, apply_exp=True) # apply cmvn if self.mvn_dict: mixture_specs = apply_cmvn(mixture_specs, self.mvn_dict) # compute target embedding index target_attr = np.argmax(np.array(targets_specs_list), 0) return { "num_frames": mixture_specs.shape[0], "spectrogram": th.tensor(mixture_specs, dtype=th.float32), "target_attr": th.tensor(target_attr, dtype=th.int64), "silent_mask": th.tensor(vad_mask, dtype=th.float32) }
def _transform(self, mixture_specs, targets_specs_list): """ Transform from numpy/list to torch types """ # compute vad mask before cmvn vad_mask = compute_vad_mask(mixture_specs, self.vad_threshold, apply_exp=True) # apply cmvn if self.mvn_dict: mixture_specs = apply_cmvn(mixture_specs, self.mvn_dict) # compute target embedding index target_attr = np.argmax(np.array(targets_specs_list), 0) return { "num_frames": mixture_specs.shape[0], "spectrogram": th.tensor(mixture_specs, dtype=th.float32), "target_attr": th.tensor(target_attr, dtype=th.int64), "silent_mask": th.tensor(vad_mask, dtype=th.float32) }
def seperate(self, spectra, cmvn=None): """ spectra: stft complex results T x F cmvn: python dict contains global mean/std """ if not np.iscomplexobj(spectra): raise ValueError("Input must be matrix in complex value") # compute log-magnitude spectrogram log_spectra = np.log(np.maximum(np.abs(spectra), EPSILON)) # compute vad mask before do mvn vad_mask = compute_vad_mask( log_spectra, threshold_db=40).astype(np.bool) # print("Keep {} bins out of {}".format(np.sum(vad_mask), vad_mask.size)) pca_mat, spk_masks = self._cluster( apply_cmvn(log_spectra, cmvn) if cmvn else log_spectra, vad_mask) return pca_mat, spk_masks, [ spectra * spk_mask for spk_mask in spk_masks ]
def seperate(self, spectra, cmvn=None): """ spectra: stft complex results T x F cmvn: python dict contains global mean/std """ if not np.iscomplexobj(spectra): raise ValueError("Input must be matrix in complex value") # compute log-magnitude spectrogram log_spectra = np.log(np.maximum(np.abs(spectra), EPSILON)) # compute vad mask before do mvn vad_mask = compute_vad_mask(log_spectra, threshold_db=40).astype(np.bool) # print("Keep {} bins out of {}".format(np.sum(vad_mask), vad_mask.size)) pca_mat, spk_masks = self._cluster( apply_cmvn(log_spectra, cmvn) if cmvn else log_spectra, vad_mask) return pca_mat, spk_masks, [ spectra * spk_mask for spk_mask in spk_masks ]