class TestSet(object):
    """docstring for TestSet."""
    def __init__(self, datadir: str):
        super(TestSet, self).__init__()
        self.datadir = datadir
        self.norm = Normalizer()

    def choose(self):
        '''choose one speaker for test'''
        r = random.choice(speakers)
        return r

    def test_data(self, src_speaker=None):
        '''choose one speaker for conversion'''
        if src_speaker:
            r_s = src_speaker
        else:
            r_s = self.choose()
        p = os.path.join(self.datadir, r_s)
        wavfiles = librosa.util.find_files(p, ext='wav')

        res = {}
        for f in wavfiles:
            filename = os.path.basename(f)
            wav, _ = librosa.load(f, sr=SAMPLE_RATE, dtype=np.float64)
            f0, timeaxis, sp, ap, coded_sp = world_features(
                wav, SAMPLE_RATE, FFTSIZE, FEATURE_DIM)
            coded_sp_norm = self.norm.forward_process(coded_sp.T, r_s)

            if not res.__contains__(filename):
                res[filename] = {}
            res[filename]['coded_sp_norm'] = np.asarray(coded_sp_norm)
            res[filename]['f0'] = np.asarray(f0)
            res[filename]['ap'] = np.asarray(ap)
        return res, r_s
Esempio n. 2
0
class TestSet(object):
    def __init__(self, data_dir: str, sr: int):
        super(TestSet, self).__init__()
        self.data_dir = data_dir
        self.norm = Normalizer()
        self.sample_rate = sr

    def choose(self):
        r = choice(speakers)
        return r

    def test_data(self, src_speaker=None):
        if src_speaker:
            r_s = src_speaker
        else:
            r_s = self.choose()

        p = os.path.join(self.data_dir, r_s)
        wavfiles = librosa.util.find_files(p, ext='wav')

        res = {}
        for f in wavfiles:
            filename = os.path.basename(f)
            wav, _ = librosa.load(f, sr=self.sample_rate, dtype=np.float64)
            f0, ap, mcep = cal_mcep(wav, self.sample_rate, FEATURE_DIM,
                                    FFTSIZE, SHIFTMS, ALPHA)
            mcep_norm = self.norm.forward_process(mcep, r_s)

            if not res.__contains__(filename):
                res[filename] = {}
            res[filename]['mcep_norm'] = np.asarray(mcep_norm)
            res[filename]['f0'] = np.asarray(f0)
            res[filename]['ap'] = np.asarray(ap)
        return res, r_s
Esempio n. 3
0
class TestSet(object):
    """对于测试数据的的说明"""
    def __init__(self, datadir: str):
        super(TestSet, self).__init__()
        self.datadir = datadir
        # 定义对象norm方法为数据正则化方法
        self.norm = Normalizer()

    # 随机选取发音者方法
    def choose(self):
        """为测试数据选择一个发音者"""

        # 根据speakers这个序列中随机取出一个speaker作为目标发音者
        r = random.choice(speakers)
        return r

    # 默认源发音者为空值
    def test_data(self, src_speaker=None):
        """为转换数据选择一个发音者"""
        # 如果传入了源发音者,即将这个参数赋值给r_s变量
        if src_speaker:
            r_s = src_speaker
        # 如果没有参数传入
        else:
            # 就自动调用对象的choose方法随机选取一个发音者
            r_s = self.choose()
        # 将这个源发音者的名称和原本的数据集地址拼接在一起作为新路径
        # 因为这个数据集是以发音者的名字作为子集的,所以要找到对应发音者的子数据集必须这样处理
        # 如果采用不同的数据集命名方式那么处理方式就会不同
        p = os.path.join(self.datadir, r_s)
        # 根据路由找到该路径下的所有wav格式文件
        wavfiles = librosa.util.find_files(p, ext='wav')

        res = {}
        # 遍历这个数据集对象
        for f in wavfiles:
            # 获取对应的文件名
            filename = os.path.basename(f)
            # librosa.load方法f为文件地址,sr参数为采样率,如果保存原有采样率则赋值为None
            # dtype为精度,将返回一个音频时间序列和一个音频采样率
            # 因为音频采样率不被使用,所以以_作为变量名占位保存
            # 该方法还有mono :bool,是否将信号转换为单声道
            # offset :float,在此时间之后开始阅读(以秒为单位)
            # duration:float,仅加载这么多的音频(以秒为单位)
            wav, _ = librosa.load(f, sr=SAMPLE_RATE, dtype=np.float64)
            # 使用自定义的world_features获取对应的wav数据
            f0, timeaxis, sp, ap, coded_sp = world_features(
                wav, SAMPLE_RATE, FFTSIZE, FEATURE_DIM)
            # 调用自定义的forward_process方法对编码频谱包络进行处理
            coded_sp_norm = self.norm.forward_process(coded_sp.T, r_s)
            # 将res添加对应的字典值,第一维键名为文件名
            if not res.__contains__(filename):
                res[filename] = {}
            res[filename]['coded_sp_norm'] = np.asarray(coded_sp_norm)
            res[filename]['f0'] = np.asarray(f0)
            res[filename]['ap'] = np.asarray(ap)
        # 返回处理后的数据字典与目标发音者标签
        return res, r_s