def seperate(self, spectra, cmvn=None, apply_log=True):
        """
            spectra: stft complex results T x F
            cmvn: python dict contains global mean/std
            apply_log: using log-spectrogram or not
        """
        if not np.iscomplexobj(spectra):
            raise ValueError("Input must be matrix in complex value")

        input_spectra = np.log(np.maximum(
            np.abs(spectra), EPSILON)) if apply_log else np.abs(spectra)

        input_spectra = apply_cmvn(input_spectra,
                                   cmvn) if cmvn else input_spectra

        mix_spec, spk1_spec, spk2_spec, speaker_1, speaker_2, Orth_const = self.nnet(
            th.tensor(input_spectra, dtype=th.float32, device=self.location),
            None,
            per_train=False)

        mask = []
        mask.append(spk1_spec /
                    (spk1_spec + spk2_spec + th.finfo(th.float32).eps))
        mask.append(spk2_spec /
                    (spk1_spec + spk2_spec + th.finfo(th.float32).eps))

        return mask, [
            spectra * spk_mask[0].cpu().data.numpy() for spk_mask in mask
        ]
    def _transform(self, mixture_specs, targets_specs_list):
        """
        Transform original spectrogram
            If mixture_specs is a complex object, it means PAM will be used for training
            It can be configured in .yaml, egs: apply_abs=false to produce complex results
            If mixture_specs is real, we will using AM(ratio mask)

        Arguments:
            mixture_specs: non-log complex/real spectrogram
            targets_specs_list: list of non-log complex/real spectrogram for each target speakers
        Returns:
            python dictionary with four attributes:
            num_frames: length of current utterance
            feature: input feature for networks, egs: log spectrogram + cmvn
            source_attr: a dictionary with at most 2 keys: spectrogram and phase(for PSM), each contains a tensor
            target_attr: same keys like source_attr, each keys correspond to a tensor list
        """

        # apply_log and cmvn, for nnet input
        # NOTE: mixture_specs may be complex or real
        log_spectra = np.log(
            np.maximum(
                np.abs(mixture_specs)
                if np.iscomplexobj(mixture_specs) else mixture_specs, EPSILON))

        if self.mvn_dict:
            log_spectra = apply_cmvn(log_spectra, self.mvn_dict)

        # using dict to pack infomation needed in loss
        source_attr = {}
        target_attr = {}

        if np.iscomplexobj(mixture_specs):
            source_attr["spectrogram"] = th.tensor(np.abs(mixture_specs),
                                                   dtype=th.float32)
            target_attr["spectrogram"] = [
                th.tensor(np.abs(t), dtype=th.float32)
                for t in targets_specs_list
            ]
            source_attr["phase"] = th.tensor(np.angle(mixture_specs),
                                             dtype=th.float32)
            target_attr["phase"] = [
                th.tensor(np.angle(t), dtype=th.float32)
                for t in targets_specs_list
            ]
        else:
            source_attr["spectrogram"] = th.tensor(mixture_specs,
                                                   dtype=th.float32)
            target_attr["spectrogram"] = [
                th.tensor(t, dtype=th.float32) for t in targets_specs_list
            ]

        return {
            "num_frames": mixture_specs.shape[0],
            "feature": th.tensor(log_spectra, dtype=th.float32),
            "source_attr": source_attr,
            "target_attr": target_attr
        }
    def seperate(self, spectra, cmvn=None):
        """
            spectra: stft complex results T x F
            cmvn: python dict contains global mean/std
        """
        if not np.iscomplexobj(spectra):
            raise ValueError("Input must be matrix in complex value")
        # compute log-magnitude spectrogram
        log_spectra = np.log(np.maximum(np.abs(spectra), EPSILON))
        # apply cmvn or not
        log_spectra = apply_cmvn(log_spectra, cmvn) if cmvn else log_spectra

        out_masks = self.nnet(th.tensor(log_spectra,
                                        dtype=th.float32,
                                        device=self.location),
                              train=False)
        spk_masks = [spk_mask.cpu().data.numpy() for spk_mask in out_masks]
        return spk_masks, [spectra * spk_mask for spk_mask in spk_masks]
Example #4
0
 def _transform(self, mixture_specs, targets_specs_list):
     """
         Transform from numpy/list to torch types
     """
     # compute vad mask before cmvn
     vad_mask = compute_vad_mask(
         mixture_specs, self.vad_threshold, apply_exp=True)
     # apply cmvn
     if self.mvn_dict:
         mixture_specs = apply_cmvn(mixture_specs, self.mvn_dict)
     # compute target embedding index
     target_attr = np.argmax(np.array(targets_specs_list), 0)
     return {
         "num_frames": mixture_specs.shape[0],
         "spectrogram": th.tensor(mixture_specs, dtype=th.float32),
         "target_attr": th.tensor(target_attr, dtype=th.int64),
         "silent_mask": th.tensor(vad_mask, dtype=th.float32)
     }
Example #5
0
 def _transform(self, mixture_specs, targets_specs_list):
     """
         Transform from numpy/list to torch types
     """
     # compute vad mask before cmvn
     vad_mask = compute_vad_mask(mixture_specs,
                                 self.vad_threshold,
                                 apply_exp=True)
     # apply cmvn
     if self.mvn_dict:
         mixture_specs = apply_cmvn(mixture_specs, self.mvn_dict)
     # compute target embedding index
     target_attr = np.argmax(np.array(targets_specs_list), 0)
     return {
         "num_frames": mixture_specs.shape[0],
         "spectrogram": th.tensor(mixture_specs, dtype=th.float32),
         "target_attr": th.tensor(target_attr, dtype=th.int64),
         "silent_mask": th.tensor(vad_mask, dtype=th.float32)
     }
Example #6
0
    def seperate(self, spectra, cmvn=None):
        """
            spectra: stft complex results T x F
            cmvn: python dict contains global mean/std
        """
        if not np.iscomplexobj(spectra):
            raise ValueError("Input must be matrix in complex value")
        # compute log-magnitude spectrogram
        log_spectra = np.log(np.maximum(np.abs(spectra), EPSILON))
        # compute vad mask before do mvn
        vad_mask = compute_vad_mask(
            log_spectra, threshold_db=40).astype(np.bool)

        # print("Keep {} bins out of {}".format(np.sum(vad_mask), vad_mask.size))
        pca_mat, spk_masks = self._cluster(
            apply_cmvn(log_spectra, cmvn) if cmvn else log_spectra, vad_mask)

        return pca_mat, spk_masks, [
            spectra * spk_mask for spk_mask in spk_masks
        ]
Example #7
0
    def seperate(self, spectra, cmvn=None):
        """
            spectra: stft complex results T x F
            cmvn: python dict contains global mean/std
        """
        if not np.iscomplexobj(spectra):
            raise ValueError("Input must be matrix in complex value")
        # compute log-magnitude spectrogram
        log_spectra = np.log(np.maximum(np.abs(spectra), EPSILON))
        # compute vad mask before do mvn
        vad_mask = compute_vad_mask(log_spectra,
                                    threshold_db=40).astype(np.bool)

        # print("Keep {} bins out of {}".format(np.sum(vad_mask), vad_mask.size))
        pca_mat, spk_masks = self._cluster(
            apply_cmvn(log_spectra, cmvn) if cmvn else log_spectra, vad_mask)

        return pca_mat, spk_masks, [
            spectra * spk_mask for spk_mask in spk_masks
        ]