def _generate_conditions(self, batch, cv_spkr_name=None, use_cvfeats=False, encoder=False): # create lcf0, uv, h_onehot if cv_spkr_name is not None: spkr_num = self.spkrs[cv_spkr_name] B, T, _ = batch["feats"].size() lcf0 = torch.tensor(self._get_cvf0(batch, cv_spkr_name)).to(self.device) uv = batch["uv"] h_onehot = torch.tensor( create_one_hot(T, self.n_spkrs, spkr_num, B=B)).to(self.device) else: if use_cvfeats: lcf0, uv, h_onehot = batch["cv_lcf0"], batch["uv"], batch[ "cv_h_onehot"] else: lcf0, uv, h_onehot = batch["lcf0"], batch["uv"], batch[ "org_h_onehot"] # return conditions if encoder: if self.conf["encoder_f0"]: return torch.cat([lcf0, uv], dim=-1).to(self.device) else: return None else: if self.conf["decoder_f0"]: return torch.cat([lcf0, uv, h_onehot], dim=-1).to(self.device) else: return h_onehot.to(self.device)
def _get_spkr_conditions(self, batch, cv_spkr_name, use_cvfeats=False): if cv_spkr_name is not None: # use specified cv speaker B, T, _ = batch["in_feats"].size() spkr_num = self.spkrs[cv_spkr_name] h_onehot_np = create_one_hot(T, self.n_spkrs, spkr_num, B=B) h_onehot = torch.tensor(h_onehot_np).to(self.device) h = (torch.ones( (B, T)).long() * self.spkrs[cv_spkr_name]).to(self.device) else: if use_cvfeats: # use randomly selected cv speaker by dataset h = batch["cv_h"].clone() h_onehot = batch["cv_h_onehot"] else: # use org speaker h_onehot = batch["org_h_onehot"] h = batch["org_h"].clone() h[:, :] = h[:, 0:1] # remove ignore_index (i.e., -100) return h, h_onehot
def _prepare_feats(self, batch, cv_spkr_name, use_cvfeats=False): if cv_spkr_name is not None: # use specified cv speaker B, T, _ = batch["feats"].size() spkr_num = self.spkrs[cv_spkr_name] lcf0 = self._get_cvf0(batch, cv_spkr_name) h_onehot_np = create_one_hot(T, self.n_spkrs, spkr_num, B=B) h_onehot = torch.tensor(h_onehot_np).to(self.device) h_scalar = torch.ones((B, T)).long() * self.spkrs[cv_spkr_name] h_scalar = h_scalar.to(self.device) else: if use_cvfeats: # use randomly selected cv speaker by dataset lcf0 = batch["cv_lcf0"] h_onehot = batch["cv_h_onehot"] h_scalar = batch["cv_h_scalar"] else: # use org speaker lcf0 = batch["lcf0"] h_onehot = batch["org_h_onehot"] h_scalar = batch["org_h_scalar"] h_scalar[:, :] = h_scalar[:, 0:1] # remove ignore_index (i.e., -100) return torch.cat([lcf0, batch["uv"]], axis=-1), h_onehot, h_scalar