Ejemplo n.º 1
0
    def _store_features(self, batch, outputs, cv_spkr_name, tdir):
        feats = {}
        feat_type = self.conf["feat_type"]
        decoded = outputs["decoded"]
        for n in range(decoded.size(0)):
            org_spkr_name = batch["org_spkr_name"][n]
            cv_name = org_spkr_name if cv_spkr_name is None else cv_spkr_name
            wavf = tdir / "{}_org-{}_cv-{}.wav".format(batch["flbl"][n],
                                                       org_spkr_name, cv_name)
            wavf.parent.mkdir(parents=True, exist_ok=True)

            # for feat
            flen = batch["flen"][n]
            feat = to_numpy(decoded[n][:flen])
            feats[wavf] = {}
            feats[wavf]["feats"] = self.scaler[feat_type].inverse_transform(
                feat)
            feats[wavf]["normed_feat"] = feat

            # for f0 features
            org_cf0 = self.scaler["lcf0"].inverse_transform(
                to_numpy(batch["lcf0"][n][:flen]))
            cv_cf0 = convert_f0(self.scaler, org_cf0, org_spkr_name, cv_name)
            feats[wavf]["lcf0"] = cv_cf0
            feats[wavf]["normed_lcf0"] = self.scaler["lcf0"].transform(cv_cf0)
            feats[wavf]["uv"] = to_numpy(batch["uv"][n][:flen])
            feats[wavf]["f0"] = np.exp(cv_cf0) * feats[wavf]["uv"]

            if feat_type == "mcep":
                feats[wavf]["cap"] = to_numpy(batch["cap"][n][:flen])
        return feats
Ejemplo n.º 2
0
    def _store_features(self, batch, outputs, cv_spkr_name, tdir):
        feats = {}
        feat_type = self.conf["feat_type"]
        for n in range(outputs["decoded"].size(0)):
            org_spkr_name = batch["org_spkr_name"][n]
            cv_name = org_spkr_name if cv_spkr_name is None else cv_spkr_name
            wavf = tdir / f"{batch['flbl'][n]}_org-{org_spkr_name}_cv-{cv_name}.wav"

            # for feat
            feats[wavf] = {}
            flen = batch["flen"][n]
            feat = to_numpy(outputs["decoded"][n][:flen])
            if feat_type == "mcep" and not self.conf["use_mcep_0th"]:
                mcep_0th = to_numpy(batch["mcep_0th"][n][:flen])
                feat = np.hstack([mcep_0th, feat])
            feats[wavf]["feats"] = self.scaler[feat_type].inverse_transform(
                feat)
            feats[wavf]["normed_feat"] = feat

            # for f0 features
            org_cf0 = self.scaler["lcf0"].inverse_transform(
                to_numpy(batch["lcf0"][n][:flen]))
            cv_cf0 = convert_f0(self.scaler, org_cf0, org_spkr_name, cv_name)
            feats[wavf]["lcf0"] = cv_cf0
            feats[wavf]["normed_lcf0"] = self.scaler["lcf0"].transform(cv_cf0)
            feats[wavf]["uv"] = to_numpy(batch["uv"][n][:flen])
            feats[wavf]["f0"] = np.exp(cv_cf0) * feats[wavf]["uv"]

            if feat_type == "mcep":
                feats[wavf]["cap"] = to_numpy(batch["cap"][n][:flen])
        return feats
Ejemplo n.º 3
0
    def reconstruction(self, batch, tdir="reconstruction"):
        self.conf["n_gl_samples"] = 1
        h = self._generate_conditions(batch, cv_spkr_name=None)
        outputs = self.model["G"].forward(batch["feats"], dec_h=h)
        self._generate_cvwav(batch, outputs, None, tdir=tdir)

        if self.conf["cycle_reconstruction"]:
            recondir = self.expdir / tdir / str(self.steps)
            for cv_spkr_name in self.spkrs.keys():
                h_cv = self._generate_conditions(batch, cv_spkr_name=cv_spkr_name)
                cycle_outputs = self.model["G"].cycle_forward(
                    batch["feats"], org_dec_h=h, cv_dec_h=h_cv
                )
                recon = cycle_outputs[0]["recon"]["decoded"]

                for n in range(recon.size(0)):
                    org_spkr_name = batch["org_spkr_name"][n]
                    cv_name = org_spkr_name if cv_spkr_name is None else cv_spkr_name
                    wavf = recondir / "{}_org-{}_cv-{}.wav".format(
                        batch["flbl"][n], org_spkr_name, org_spkr_name
                    )
                    flen = batch["flen"][n]
                    normed_feat = to_numpy(recon[n][:flen])
                    feat = self.scaler[self.conf["feat_type"]].inverse_transform(
                        normed_feat
                    )
                    mlfb2hdf5(
                        feat,
                        wavf,
                        ext="feats_recon_{}-{}-{}".format(
                            org_spkr_name, cv_name, org_spkr_name
                        ),
                    )
Ejemplo n.º 4
0
 def _get_cvf0(self, batch, spkr_name):
     cv_lcf0s = []
     for n in range(batch["feats"].size(0)):
         org_lcf0 = self.scaler["lcf0"].inverse_transform(
             to_numpy(batch["lcf0"][n]))
         cv_lcf0 = convert_f0(self.scaler, org_lcf0,
                              batch["org_spkr_name"][n], spkr_name)
         normed_cv_lcf0 = self.scaler["lcf0"].transform(cv_lcf0)
         cv_lcf0s.append(torch.tensor(normed_cv_lcf0))
     return torch.stack(cv_lcf0s, dim=0).float().to(self.device)
    def _store_features(self, batch, outputs, cv_spkr_name, tdir):
        def inv_trans(k, feat):
            if k not in self.conf["ignore_scaler"]:
                return self.scaler[k].inverse_transform(feat)
            else:
                return feat

        feats = {}
        feat_type = self.conf["output_feat_type"]
        for n in range(outputs["decoded"].size(0)):
            org_spkr_name = batch["org_spkr_name"][n]
            cv_name = org_spkr_name if cv_spkr_name is None else cv_spkr_name
            wavf = tdir / f"{batch['flbl'][n]}_org-{org_spkr_name}_cv-{cv_name}.wav"

            # feat
            feats[wavf] = {}
            flen = batch["flen"][n]
            feat = to_numpy(outputs["decoded"][n][:flen])
            if feat_type == "mcep":
                feats[wavf]["cap"] = to_numpy(batch["cap"][n][:flen])
                if not self.conf["use_mcep_0th"]:
                    org_mcep_0th = to_numpy(batch["mcep_0th"][n][:flen])
                    org_mcep = to_numpy(batch["in_feats"][n][:flen])
                    feat = np.ascontiguousarray(np.hstack([org_mcep_0th,
                                                           feat]))
                    rmcep = np.ascontiguousarray(
                        np.hstack([org_mcep_0th, org_mcep]))
                    feats[wavf]["rmcep"] = inv_trans(feat_type, rmcep)
                else:
                    feats[wavf]["rmcep"] = None
            feats[wavf]["feats"] = inv_trans(feat_type, feat)

            # f0
            org_cf0 = inv_trans("lcf0", to_numpy(batch["lcf0"][n][:flen]))
            cv_cf0 = convert_f0(self.scaler, org_cf0, org_spkr_name, cv_name)
            feats[wavf]["lcf0"] = cv_cf0
            feats[wavf]["uv"] = to_numpy(batch["uv"][n][:flen])
            feats[wavf]["f0"] = np.exp(cv_cf0) * feats[wavf]["uv"]

            # save normed one as well
            feats[wavf]["normed_lcf0"] = self.scaler["lcf0"].transform(cv_cf0)
            feats[wavf]["normed_feat"] = feat
        return feats