class TestSet(object): """docstring for TestSet.""" def __init__(self, datadir: str): super(TestSet, self).__init__() self.datadir = datadir self.norm = Normalizer() def choose(self): '''choose one speaker for test''' r = random.choice(speakers) return r def test_data(self, src_speaker=None): '''choose one speaker for conversion''' if src_speaker: r_s = src_speaker else: r_s = self.choose() p = os.path.join(self.datadir, r_s) wavfiles = librosa.util.find_files(p, ext='wav') res = {} for f in wavfiles: filename = os.path.basename(f) wav, _ = librosa.load(f, sr=SAMPLE_RATE, dtype=np.float64) f0, timeaxis, sp, ap, coded_sp = world_features( wav, SAMPLE_RATE, FFTSIZE, FEATURE_DIM) coded_sp_norm = self.norm.forward_process(coded_sp.T, r_s) if not res.__contains__(filename): res[filename] = {} res[filename]['coded_sp_norm'] = np.asarray(coded_sp_norm) res[filename]['f0'] = np.asarray(f0) res[filename]['ap'] = np.asarray(ap) return res, r_s
class TestSet(object): def __init__(self, data_dir: str, sr: int): super(TestSet, self).__init__() self.data_dir = data_dir self.norm = Normalizer() self.sample_rate = sr def choose(self): r = choice(speakers) return r def test_data(self, src_speaker=None): if src_speaker: r_s = src_speaker else: r_s = self.choose() p = os.path.join(self.data_dir, r_s) wavfiles = librosa.util.find_files(p, ext='wav') res = {} for f in wavfiles: filename = os.path.basename(f) wav, _ = librosa.load(f, sr=self.sample_rate, dtype=np.float64) f0, ap, mcep = cal_mcep(wav, self.sample_rate, FEATURE_DIM, FFTSIZE, SHIFTMS, ALPHA) mcep_norm = self.norm.forward_process(mcep, r_s) if not res.__contains__(filename): res[filename] = {} res[filename]['mcep_norm'] = np.asarray(mcep_norm) res[filename]['f0'] = np.asarray(f0) res[filename]['ap'] = np.asarray(ap) return res, r_s
class TestSet(object): """对于测试数据的的说明""" def __init__(self, datadir: str): super(TestSet, self).__init__() self.datadir = datadir # 定义对象norm方法为数据正则化方法 self.norm = Normalizer() # 随机选取发音者方法 def choose(self): """为测试数据选择一个发音者""" # 根据speakers这个序列中随机取出一个speaker作为目标发音者 r = random.choice(speakers) return r # 默认源发音者为空值 def test_data(self, src_speaker=None): """为转换数据选择一个发音者""" # 如果传入了源发音者,即将这个参数赋值给r_s变量 if src_speaker: r_s = src_speaker # 如果没有参数传入 else: # 就自动调用对象的choose方法随机选取一个发音者 r_s = self.choose() # 将这个源发音者的名称和原本的数据集地址拼接在一起作为新路径 # 因为这个数据集是以发音者的名字作为子集的,所以要找到对应发音者的子数据集必须这样处理 # 如果采用不同的数据集命名方式那么处理方式就会不同 p = os.path.join(self.datadir, r_s) # 根据路由找到该路径下的所有wav格式文件 wavfiles = librosa.util.find_files(p, ext='wav') res = {} # 遍历这个数据集对象 for f in wavfiles: # 获取对应的文件名 filename = os.path.basename(f) # librosa.load方法f为文件地址,sr参数为采样率,如果保存原有采样率则赋值为None # dtype为精度,将返回一个音频时间序列和一个音频采样率 # 因为音频采样率不被使用,所以以_作为变量名占位保存 # 该方法还有mono :bool,是否将信号转换为单声道 # offset :float,在此时间之后开始阅读(以秒为单位) # duration:float,仅加载这么多的音频(以秒为单位) wav, _ = librosa.load(f, sr=SAMPLE_RATE, dtype=np.float64) # 使用自定义的world_features获取对应的wav数据 f0, timeaxis, sp, ap, coded_sp = world_features( wav, SAMPLE_RATE, FFTSIZE, FEATURE_DIM) # 调用自定义的forward_process方法对编码频谱包络进行处理 coded_sp_norm = self.norm.forward_process(coded_sp.T, r_s) # 将res添加对应的字典值,第一维键名为文件名 if not res.__contains__(filename): res[filename] = {} res[filename]['coded_sp_norm'] = np.asarray(coded_sp_norm) res[filename]['f0'] = np.asarray(f0) res[filename]['ap'] = np.asarray(ap) # 返回处理后的数据字典与目标发音者标签 return res, r_s