def _store_features(self, batch, outputs, cv_spkr_name, tdir): feats = {} feat_type = self.conf["feat_type"] decoded = outputs["decoded"] for n in range(decoded.size(0)): org_spkr_name = batch["org_spkr_name"][n] cv_name = org_spkr_name if cv_spkr_name is None else cv_spkr_name wavf = tdir / "{}_org-{}_cv-{}.wav".format(batch["flbl"][n], org_spkr_name, cv_name) wavf.parent.mkdir(parents=True, exist_ok=True) # for feat flen = batch["flen"][n] feat = to_numpy(decoded[n][:flen]) feats[wavf] = {} feats[wavf]["feats"] = self.scaler[feat_type].inverse_transform( feat) feats[wavf]["normed_feat"] = feat # for f0 features org_cf0 = self.scaler["lcf0"].inverse_transform( to_numpy(batch["lcf0"][n][:flen])) cv_cf0 = convert_f0(self.scaler, org_cf0, org_spkr_name, cv_name) feats[wavf]["lcf0"] = cv_cf0 feats[wavf]["normed_lcf0"] = self.scaler["lcf0"].transform(cv_cf0) feats[wavf]["uv"] = to_numpy(batch["uv"][n][:flen]) feats[wavf]["f0"] = np.exp(cv_cf0) * feats[wavf]["uv"] if feat_type == "mcep": feats[wavf]["cap"] = to_numpy(batch["cap"][n][:flen]) return feats
def _store_features(self, batch, outputs, cv_spkr_name, tdir): feats = {} feat_type = self.conf["feat_type"] for n in range(outputs["decoded"].size(0)): org_spkr_name = batch["org_spkr_name"][n] cv_name = org_spkr_name if cv_spkr_name is None else cv_spkr_name wavf = tdir / f"{batch['flbl'][n]}_org-{org_spkr_name}_cv-{cv_name}.wav" # for feat feats[wavf] = {} flen = batch["flen"][n] feat = to_numpy(outputs["decoded"][n][:flen]) if feat_type == "mcep" and not self.conf["use_mcep_0th"]: mcep_0th = to_numpy(batch["mcep_0th"][n][:flen]) feat = np.hstack([mcep_0th, feat]) feats[wavf]["feats"] = self.scaler[feat_type].inverse_transform( feat) feats[wavf]["normed_feat"] = feat # for f0 features org_cf0 = self.scaler["lcf0"].inverse_transform( to_numpy(batch["lcf0"][n][:flen])) cv_cf0 = convert_f0(self.scaler, org_cf0, org_spkr_name, cv_name) feats[wavf]["lcf0"] = cv_cf0 feats[wavf]["normed_lcf0"] = self.scaler["lcf0"].transform(cv_cf0) feats[wavf]["uv"] = to_numpy(batch["uv"][n][:flen]) feats[wavf]["f0"] = np.exp(cv_cf0) * feats[wavf]["uv"] if feat_type == "mcep": feats[wavf]["cap"] = to_numpy(batch["cap"][n][:flen]) return feats
def reconstruction(self, batch, tdir="reconstruction"): self.conf["n_gl_samples"] = 1 h = self._generate_conditions(batch, cv_spkr_name=None) outputs = self.model["G"].forward(batch["feats"], dec_h=h) self._generate_cvwav(batch, outputs, None, tdir=tdir) if self.conf["cycle_reconstruction"]: recondir = self.expdir / tdir / str(self.steps) for cv_spkr_name in self.spkrs.keys(): h_cv = self._generate_conditions(batch, cv_spkr_name=cv_spkr_name) cycle_outputs = self.model["G"].cycle_forward( batch["feats"], org_dec_h=h, cv_dec_h=h_cv ) recon = cycle_outputs[0]["recon"]["decoded"] for n in range(recon.size(0)): org_spkr_name = batch["org_spkr_name"][n] cv_name = org_spkr_name if cv_spkr_name is None else cv_spkr_name wavf = recondir / "{}_org-{}_cv-{}.wav".format( batch["flbl"][n], org_spkr_name, org_spkr_name ) flen = batch["flen"][n] normed_feat = to_numpy(recon[n][:flen]) feat = self.scaler[self.conf["feat_type"]].inverse_transform( normed_feat ) mlfb2hdf5( feat, wavf, ext="feats_recon_{}-{}-{}".format( org_spkr_name, cv_name, org_spkr_name ), )
def _get_cvf0(self, batch, spkr_name): cv_lcf0s = [] for n in range(batch["feats"].size(0)): org_lcf0 = self.scaler["lcf0"].inverse_transform( to_numpy(batch["lcf0"][n])) cv_lcf0 = convert_f0(self.scaler, org_lcf0, batch["org_spkr_name"][n], spkr_name) normed_cv_lcf0 = self.scaler["lcf0"].transform(cv_lcf0) cv_lcf0s.append(torch.tensor(normed_cv_lcf0)) return torch.stack(cv_lcf0s, dim=0).float().to(self.device)
def _store_features(self, batch, outputs, cv_spkr_name, tdir): def inv_trans(k, feat): if k not in self.conf["ignore_scaler"]: return self.scaler[k].inverse_transform(feat) else: return feat feats = {} feat_type = self.conf["output_feat_type"] for n in range(outputs["decoded"].size(0)): org_spkr_name = batch["org_spkr_name"][n] cv_name = org_spkr_name if cv_spkr_name is None else cv_spkr_name wavf = tdir / f"{batch['flbl'][n]}_org-{org_spkr_name}_cv-{cv_name}.wav" # feat feats[wavf] = {} flen = batch["flen"][n] feat = to_numpy(outputs["decoded"][n][:flen]) if feat_type == "mcep": feats[wavf]["cap"] = to_numpy(batch["cap"][n][:flen]) if not self.conf["use_mcep_0th"]: org_mcep_0th = to_numpy(batch["mcep_0th"][n][:flen]) org_mcep = to_numpy(batch["in_feats"][n][:flen]) feat = np.ascontiguousarray(np.hstack([org_mcep_0th, feat])) rmcep = np.ascontiguousarray( np.hstack([org_mcep_0th, org_mcep])) feats[wavf]["rmcep"] = inv_trans(feat_type, rmcep) else: feats[wavf]["rmcep"] = None feats[wavf]["feats"] = inv_trans(feat_type, feat) # f0 org_cf0 = inv_trans("lcf0", to_numpy(batch["lcf0"][n][:flen])) cv_cf0 = convert_f0(self.scaler, org_cf0, org_spkr_name, cv_name) feats[wavf]["lcf0"] = cv_cf0 feats[wavf]["uv"] = to_numpy(batch["uv"][n][:flen]) feats[wavf]["f0"] = np.exp(cv_cf0) * feats[wavf]["uv"] # save normed one as well feats[wavf]["normed_lcf0"] = self.scaler["lcf0"].transform(cv_cf0) feats[wavf]["normed_feat"] = feat return feats