def extract_f0_and_npow(wavf):
    """
    F0 and npow extraction

    Parameters
    ----------
    wavf : str,
        File path of waveform file

    Returns
    -------
    dict :
        Dictionary consisting of F0 and npow arrays

    """

    # open waveform
    print("Extract: " + wavf)
    fs, x = wavfile.read(wavf)
    x = np.array(x, dtype=np.float)
    x = low_cut_filter(x, fs, cutoff=70)

    # constract FeatureExtractor class
    feat = FeatureExtractor(analyzer="world", fs=fs)

    # f0 and npow extraction
    f0, _, _ = feat.analyze(x)
    npow = feat.npow()

    return {"f0": f0, "npow": npow}
Exemple #2
0
    def _analyze_world_features(self, x, f0_only=False):
        feat = FeatureExtractor(
            analyzer="world",
            fs=self.conf["fs"],
            fftl=self.conf["fftl"],
            shiftms=self.conf["shiftms"],
            minf0=self.sconf["minf0"],
            maxf0=self.sconf["maxf0"],
        )
        # analyze world based features
        self.feats["f0"], self.feats["spc"], self.feats["ap"] = feat.analyze(x)
        self.feats["uv"], self.feats["cf0"] = convert_continuos_f0(
            self.feats["f0"])
        self.feats["lf0"] = np.log(self.feats["f0"] + EPS)
        self.feats["lcf0"] = np.log(self.feats["cf0"])
        if f0_only:
            return

        if self.conf["fftl"] != 256 and self.conf["fs"] > 16000:
            # NOTE: 256 fft_size sometimes causes errors
            self.feats["mcep"] = feat.mcep(dim=self.conf["mcep_dim"],
                                           alpha=self.conf["mcep_alpha"])
            self.feats["npow"] = feat.npow()
            self.feats["cap"] = feat.codeap()
            cap = self.feats["cap"]
            self.feats["ccap"] = np.zeros(cap.shape)
            self.feats["cap_uv"] = np.zeros(cap.shape)
            for d in range(self.feats["ccap"].shape[-1]):
                cap[np.where(cap[:, d] == max(cap[:, d])), d] = 0.0
                (
                    self.feats["cap_uv"][:, d],
                    self.feats["ccap"][:, d],
                ) = convert_continuos_f0(cap[:, d])
Exemple #3
0
    def _analyze_world_features(self, x):
        feat = FeatureExtractor(
            analyzer="world",
            fs=self.conf["fs"],
            fftl=self.conf["fftl"],
            shiftms=self.conf["shiftms"],
            minf0=self.sconf["minf0"],
            maxf0=self.sconf["maxf0"],
        )

        # analyze world based features
        self.feats["f0"], self.feats["spc"], self.feats["ap"] = feat.analyze(x)
        self.feats["mcep"] = feat.mcep(dim=self.conf["mcep_dim"],
                                       alpha=self.conf["mcep_alpha"])
        self.feats["npow"] = feat.npow()
        self.feats["cap"] = feat.codeap()
        cap = self.feats["cap"]
        self.feats["ccap"] = np.zeros(cap.shape)
        self.feats["cap_uv"] = np.zeros(cap.shape)
        for d in range(self.feats["ccap"].shape[-1]):
            cap[np.where(cap[:, d] == max(cap[:, d])), d] = 0.0
            self.feats["cap_uv"][:, d], self.feats[
                "ccap"][:, d] = convert_continuos_f0(cap[:, d])

        self.feats["uv"], self.feats["cf0"] = convert_continuos_f0(
            self.feats["f0"])
        self.feats["lf0"] = np.log(self.feats["f0"] + EPS)
        self.feats["lcf0"] = np.log(self.feats["cf0"])
Exemple #4
0
 def test_spc_and_npow(self):
     path = dirpath + '/data/test16000.wav'
     fs, x = wavfile.read(path)
     af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5)
     _, spc, _ = af.analyze(x)
     npow = af.npow()
     assert spc.shape[0] == npow.shape[0]
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    dcp = 'Create histogram for speaker-dependent configure'
    parser = argparse.ArgumentParser(description=dcp)
    parser.add_argument('speaker', type=str, help='Input speaker label')
    parser.add_argument('list_file',
                        type=str,
                        help='List file of the input speaker')
    parser.add_argument('wav_dir', type=str, help='Directory of wav file')
    parser.add_argument('figure_dir',
                        type=str,
                        help='Directory for figure output')
    args = parser.parse_args(argv)

    # open list file
    with open(args.list_file, 'r') as fp:
        files = fp.readlines()

    f0s = []
    npows = []
    for f in files:
        # open waveform
        f = f.rstrip()
        wavf = os.path.join(args.wav_dir, f + '.wav')
        fs, x = wavfile.read(wavf)
        x = np.array(x, dtype=np.float)
        print("Extract: " + wavf)

        # constract FeatureExtractor class
        feat = FeatureExtractor(analyzer='world', fs=fs)

        # f0 and npow extraction
        f0, _, _ = feat.analyze(x)
        npow = feat.npow()

        f0s.append(f0)
        npows.append(npow)

    f0s = np.hstack(f0s).flatten()
    npows = np.hstack(npows).flatten()

    # create a histogram to visualize F0 range of the speaker
    f0histogrampath = os.path.join(args.figure_dir,
                                   args.speaker + '_f0histogram.png')
    create_histogram(f0s,
                     f0histogrampath,
                     range_min=40,
                     range_max=700,
                     step=50,
                     xlabel='Fundamental frequency [Hz]')

    # create a histogram to visualize npow range of the speaker
    npowhistogrampath = os.path.join(args.figure_dir,
                                     args.speaker + '_npowhistogram.png')
    create_histogram(npows,
                     npowhistogrampath,
                     range_min=-70,
                     range_max=20,
                     step=10,
                     xlabel="Frame power [dB]")
 def test_spc_and_npow(self):
     path = dirpath + '/data/test16000.wav'
     fs, x = wavfile.read(path)
     af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5)
     _, spc, _ = af.analyze(x)
     npow = af.npow()
     assert spc.shape[0] == npow.shape[0]
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    dcp = 'Create histogram for speaker-dependent configure'
    parser = argparse.ArgumentParser(description=dcp)
    parser.add_argument('speaker', type=str,
                        help='Input speaker label')
    parser.add_argument('list_file', type=str,
                        help='List file of the input speaker')
    parser.add_argument('wav_dir', type=str,
                        help='Directory of wav file')
    parser.add_argument('figure_dir', type=str,
                        help='Directory for figure output')
    args = parser.parse_args(argv)

    # open list file
    with open(args.list_file, 'r') as fp:
        files = fp.readlines()

    f0s = []
    npows = []
    for f in files:
        # open waveform
        f = f.rstrip()
        wavf = os.path.join(args.wav_dir, f + '.wav')
        fs, x = wavfile.read(wavf)
        x = np.array(x, dtype=np.float)
        print("Extract: " + wavf)

        # constract FeatureExtractor class
        feat = FeatureExtractor(analyzer='world', fs=fs)

        # f0 and npow extraction
        f0, _, _ = feat.analyze(x)
        npow = feat.npow()

        f0s.append(f0)
        npows.append(npow)

    f0s = np.hstack(f0s).flatten()
    npows = np.hstack(npows).flatten()

    # create a histogram to visualize F0 range of the speaker
    f0histogrampath = os.path.join(
        args.figure_dir, args.speaker + '_f0histogram.png')
    create_histogram(f0s, f0histogrampath, range_min=40, range_max=700,
                     step=50, xlabel='Fundamental frequency [Hz]')

    # create a histogram to visualize npow range of the speaker
    npowhistogrampath = os.path.join(
        args.figure_dir, args.speaker + '_npowhistogram.png')
    create_histogram(npows, npowhistogrampath, range_min=-70, range_max=20,
                     step=10, xlabel="Frame power [dB]")
Exemple #8
0
    def test_mod_power(self):
        path = os.path.join(dirpath, 'data', 'test16000.wav')
        fs, x = wavfile.read(path)
        af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5)
        f0, _, ap = af.analyze(x)
        mcep = af.mcep(dim=24, alpha=0.42)

        rmcep = mcep
        cvmcep = mcep * 1.50

        modified_cvmcep = mod_power(cvmcep, rmcep, alpha=0.42)

        assert modified_cvmcep.shape == cvmcep.shape
    def test_anasyn_44100(self):
        path = dirpath + '/data/test44100.wav'
        fs, x = wavfile.read(path)
        af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5, minf0=100, fftl=2048)
        f0, spc, ap = af.analyze(x)
        mcep = af.mcep(dim=40, alpha=0.50)

        assert len(np.nonzero(f0)[0]) > 0
        assert spc.shape == ap.shape

        # mcep synthesis
        synth = Synthesizer(fs=fs, fftl=2048, shiftms=5)
        wav = synth.synthesis(f0, mcep, ap, alpha=0.50)
        nun_check(wav)
Exemple #10
0
    def test_anasyn_16000(self):
        path = dirpath + '/data/test16000.wav'
        fs, x = wavfile.read(path)
        af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5, fftl=1024)
        f0, spc, ap = af.analyze(x)
        mcep = af.mcep(dim=24, alpha=0.42)

        assert len(np.nonzero(f0)[0]) > 0
        assert spc.shape == ap.shape

        # synthesize F0, mcep, ap
        synth = Synthesizer(fs=fs, fftl=1024, shiftms=5)
        wav = synth.synthesis(f0, mcep, ap, alpha=0.42)
        nun_check(wav)
    def test_anasyn_16000(self):
        path = dirpath + '/data/test16000.wav'
        fs, x = wavfile.read(path)
        af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5, fftl=1024)
        f0, spc, ap = af.analyze(x)
        mcep = af.mcep(dim=24, alpha=0.42)

        assert len(np.nonzero(f0)[0]) > 0
        assert spc.shape == ap.shape

        # synthesize F0, mcep, ap
        synth = Synthesizer(fs=fs, fftl=1024, shiftms=5)
        wav = synth.synthesis(f0, mcep, ap, alpha=0.42)
        nun_check(wav)
Exemple #12
0
    def test_synthesis_from_codeap(self):
        path = dirpath + '/data/test16000.wav'
        fs, x = wavfile.read(path)
        af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5)
        f0, spc, ap = af.analyze(x)
        codeap = af.codeap()

        assert len(np.nonzero(f0)[0]) > 0
        assert spc.shape == ap.shape

        assert pyworld.get_num_aperiodicities(fs) == codeap.shape[-1]
        ap = pyworld.decode_aperiodicity(codeap, fs, 1024)

        synth = Synthesizer(fs=fs, fftl=1024, shiftms=5)
        wav = synth.synthesis_spc(f0, spc, ap)
        nun_check(wav)
Exemple #13
0
def get_world_features(wavpath, spk, conf, spkr_conf):
    x, fs = sf.read(str(wavpath))
    x = np.array(x, dtype=np.float)
    x = low_cut_filter(x, fs, cutoff=70)
    fe = FeatureExtractor(
        analyzer="world",
        fs=conf["feature"]["fs"],
        fftl=conf["feature"]["fftl"],
        shiftms=conf["feature"]["shiftms"],
        minf0=spkr_conf[spk]["minf0"],
        maxf0=spkr_conf[spk]["maxf0"],
    )
    cv_f0, _, _ = fe.analyze(x)
    cv_mcep = fe.mcep(dim=conf["feature"]["mcep_dim"],
                      alpha=conf["feature"]["mcep_alpha"])
    return cv_mcep, cv_f0
    def test_synthesis_from_codeap(self):
        path = dirpath + '/data/test16000.wav'
        fs, x = wavfile.read(path)
        af = FeatureExtractor(analyzer='world', fs=fs, shiftms=5)
        f0, spc, ap = af.analyze(x)
        codeap = af.codeap()

        assert len(np.nonzero(f0)[0]) > 0
        assert spc.shape == ap.shape

        assert pyworld.get_num_aperiodicities(fs) == codeap.shape[-1]
        ap = pyworld.decode_aperiodicity(codeap, fs, 1024)

        synth = Synthesizer(fs=fs, fftl=1024, shiftms=5)
        wav = synth.synthesis_spc(f0, spc, ap)
        nun_check(wav)
Exemple #15
0
    def test_anasyn_44100(self):
        path = dirpath + '/data/test44100.wav'
        fs, x = wavfile.read(path)
        af = FeatureExtractor(analyzer='world',
                              fs=fs,
                              shiftms=5,
                              minf0=100,
                              fftl=2048)
        f0, spc, ap = af.analyze(x)
        mcep = af.mcep(dim=40, alpha=0.50)

        assert len(np.nonzero(f0)[0]) > 0
        assert spc.shape == ap.shape

        # mcep synthesis
        synth = Synthesizer(fs=fs, fftl=2048, shiftms=5)
        wav = synth.synthesis(f0, mcep, ap, alpha=0.50)
        nun_check(wav)
Exemple #16
0
def main(*argv):
    argv = argv if argv else sys.argv
    # Options for python
    dcp = 'Extract aoucstic features for the speaker'
    parser = argparse.ArgumentParser(description=dcp)
    parser.add_argument('--overwrite',
                        default=False,
                        action='store_true',
                        help='Overwrite h5 file')
    parser.add_argument('speaker', type=str, help='Input speaker label')
    parser.add_argument('ymlf', type=str, help='Yml file of the input speaker')
    parser.add_argument('list_file',
                        type=str,
                        help='List file of the input speaker')
    parser.add_argument('wav_dir',
                        type=str,
                        help='Wav file directory of the speaker')
    parser.add_argument('pair_dir',
                        type=str,
                        help='Directory of the speaker pair')
    args = parser.parse_args(argv)

    # read parameters from speaker yml
    sconf = SpeakerYML(args.ymlf)
    h5_dir = os.path.join(args.pair_dir, 'h5')
    anasyn_dir = os.path.join(args.pair_dir, 'anasyn')
    if not os.path.exists(os.path.join(h5_dir, args.speaker)):
        os.makedirs(os.path.join(h5_dir, args.speaker))
    if not os.path.exists(os.path.join(anasyn_dir, args.speaker)):
        os.makedirs(os.path.join(anasyn_dir, args.speaker))

    # constract FeatureExtractor class
    feat = FeatureExtractor(analyzer=sconf.analyzer,
                            fs=sconf.wav_fs,
                            fftl=sconf.wav_fftl,
                            shiftms=sconf.wav_shiftms,
                            minf0=sconf.f0_minf0,
                            maxf0=sconf.f0_maxf0)

    # constract Synthesizer class
    synthesizer = Synthesizer(fs=sconf.wav_fs,
                              fftl=sconf.wav_fftl,
                              shiftms=sconf.wav_shiftms)

    # open list file
    with open(args.list_file, 'r') as fp:
        for line in fp:
            f = line.rstrip()
            h5f = os.path.join(h5_dir, f + '.h5')

            if (not os.path.exists(h5f)) or args.overwrite:
                wavf = os.path.join(args.wav_dir, f + '.wav')
                fs, x = wavfile.read(wavf)
                x = np.array(x, dtype=np.float)
                x = low_cut_filter(x, fs, cutoff=70)
                assert fs == sconf.wav_fs

                print("Extract acoustic features: " + wavf)

                # analyze F0, spc, and ap
                f0, spc, ap = feat.analyze(x)
                mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha)
                npow = feat.npow()
                codeap = feat.codeap()

                # save features into a hdf5 file
                h5 = HDF5(h5f, mode='w')
                h5.save(f0, ext='f0')
                # h5.save(spc, ext='spc')
                # h5.save(ap, ext='ap')
                h5.save(mcep, ext='mcep')
                h5.save(npow, ext='npow')
                h5.save(codeap, ext='codeap')
                h5.close()

                # analysis/synthesis using F0, mcep, and ap
                wav = synthesizer.synthesis(
                    f0,
                    mcep,
                    ap,
                    alpha=sconf.mcep_alpha,
                )
                wav = np.clip(wav, -32768, 32767)
                anasynf = os.path.join(anasyn_dir, f + '.wav')
                wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16))
            else:
                print("Acoustic features already exist: " + h5f)
def main(*argv):
    argv = argv if argv else sys.argv
    # Options for python
    dcp = 'Extract aoucstic features for the speaker'
    parser = argparse.ArgumentParser(description=dcp)
    parser.add_argument('--overwrite', default=False, action='store_true',
                        help='Overwrite h5 file')
    parser.add_argument('speaker', type=str,
                        help='Input speaker label')
    parser.add_argument('ymlf', type=str,
                        help='Yml file of the input speaker')
    parser.add_argument('list_file', type=str,
                        help='List file of the input speaker')
    parser.add_argument('wav_dir', type=str,
                        help='Wav file directory of the speaker')
    parser.add_argument('pair_dir', type=str,
                        help='Directory of the speaker pair')
    args = parser.parse_args(argv)

    # read parameters from speaker yml
    sconf = SpeakerYML(args.ymlf)
    h5_dir = os.path.join(args.pair_dir, 'h5')
    anasyn_dir = os.path.join(args.pair_dir, 'anasyn')
    if not os.path.exists(os.path.join(h5_dir, args.speaker)):
        os.makedirs(os.path.join(h5_dir, args.speaker))
    if not os.path.exists(os.path.join(anasyn_dir, args.speaker)):
        os.makedirs(os.path.join(anasyn_dir, args.speaker))

    # constract FeatureExtractor class
    feat = FeatureExtractor(analyzer=sconf.analyzer,
                            fs=sconf.wav_fs,
                            fftl=sconf.wav_fftl,
                            shiftms=sconf.wav_shiftms,
                            minf0=sconf.f0_minf0,
                            maxf0=sconf.f0_maxf0)

    # constract Synthesizer class
    synthesizer = Synthesizer(fs=sconf.wav_fs,
                              fftl=sconf.wav_fftl,
                              shiftms=sconf.wav_shiftms)

    # open list file
    with open(args.list_file, 'r') as fp:
        for line in fp:
            f = line.rstrip()
            h5f = os.path.join(h5_dir, f + '.h5')

            if (not os.path.exists(h5f)) or args.overwrite:
                wavf = os.path.join(args.wav_dir, f + '.wav')
                fs, x = wavfile.read(wavf)
                x = np.array(x, dtype=np.float)
                x = low_cut_filter(x, fs, cutoff=70)
                assert fs == sconf.wav_fs

                print("Extract acoustic features: " + wavf)

                # analyze F0, spc, and ap
                f0, spc, ap = feat.analyze(x)
                mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha)
                npow = feat.npow()
                codeap = feat.codeap()

                # save features into a hdf5 file
                h5 = HDF5(h5f, mode='w')
                h5.save(f0, ext='f0')
                # h5.save(spc, ext='spc')
                # h5.save(ap, ext='ap')
                h5.save(mcep, ext='mcep')
                h5.save(npow, ext='npow')
                h5.save(codeap, ext='codeap')
                h5.close()

                # analysis/synthesis using F0, mcep, and ap
                wav = synthesizer.synthesis(f0,
                                            mcep,
                                            ap,
                                            alpha=sconf.mcep_alpha,
                                            )
                wav = np.clip(wav, -32768, 32767)
                anasynf = os.path.join(anasyn_dir, f + '.wav')
                wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16))
            else:
                print("Acoustic features already exist: " + h5f)
def main(*argv):
    argv = argv if argv else sys.argv[1:]
    # Options for python
    description = 'estimate joint feature of source and target speakers'
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('-gmmmode',
                        '--gmmmode',
                        type=str,
                        default=None,
                        help='mode of the GMM [None, diff, or intra]')
    parser.add_argument('org', type=str, help='Original speaker')
    parser.add_argument('tar', type=str, help='Target speaker')
    parser.add_argument('org_yml',
                        type=str,
                        help='Yml file of the original speaker')
    parser.add_argument('pair_yml',
                        type=str,
                        help='Yml file of the speaker pair')
    parser.add_argument('eval_list_file',
                        type=str,
                        help='List file for evaluation')
    parser.add_argument('wav_dir',
                        type=str,
                        help='Directory path of source spekaer')
    parser.add_argument('pair_dir',
                        type=str,
                        help='Directory path of pair directory')
    args = parser.parse_args(argv)

    # read parameters from speaker yml
    sconf = SpeakerYML(args.org_yml)
    pconf = PairYML(args.pair_yml)

    # read GMM for mcep
    mcepgmmpath = os.path.join(args.pair_dir, 'model/GMM_mcep.pkl')
    mcepgmm = GMMConvertor(
        n_mix=pconf.GMM_mcep_n_mix,
        covtype=pconf.GMM_mcep_covtype,
        gmmmode=args.gmmmode,
    )
    param = joblib.load(mcepgmmpath)
    mcepgmm.open_from_param(param)
    print("GMM for mcep conversion mode: {}".format(args.gmmmode))

    # read F0 statistics
    stats_dir = os.path.join(args.pair_dir, 'stats')
    orgstatspath = os.path.join(stats_dir, args.org + '.h5')
    orgstats_h5 = HDF5(orgstatspath, mode='r')
    orgf0stats = orgstats_h5.read(ext='f0stats')
    orgstats_h5.close()

    # read F0 and GV statistics for target
    tarstatspath = os.path.join(stats_dir, args.tar + '.h5')
    tarstats_h5 = HDF5(tarstatspath, mode='r')
    tarf0stats = tarstats_h5.read(ext='f0stats')
    targvstats = tarstats_h5.read(ext='gv')
    tarstats_h5.close()

    # read GV statistics for converted mcep
    cvgvstatspath = os.path.join(args.pair_dir, 'model', 'cvgv.h5')
    cvgvstats_h5 = HDF5(cvgvstatspath, mode='r')
    cvgvstats = cvgvstats_h5.read(ext='cvgv')
    diffcvgvstats = cvgvstats_h5.read(ext='diffcvgv')
    cvgvstats_h5.close()

    mcepgv = GV()
    f0stats = F0statistics()

    # constract FeatureExtractor class
    feat = FeatureExtractor(analyzer=sconf.analyzer,
                            fs=sconf.wav_fs,
                            fftl=sconf.wav_fftl,
                            shiftms=sconf.wav_shiftms,
                            minf0=sconf.f0_minf0,
                            maxf0=sconf.f0_maxf0)

    # constract Synthesizer class
    synthesizer = Synthesizer(fs=sconf.wav_fs,
                              fftl=sconf.wav_fftl,
                              shiftms=sconf.wav_shiftms)

    # test directory
    test_dir = os.path.join(args.pair_dir, 'test')
    os.makedirs(os.path.join(test_dir, args.org), exist_ok=True)

    # conversion in each evaluation file
    with open(args.eval_list_file, 'r') as fp:
        for line in fp:
            # open wav file
            f = line.rstrip()
            wavf = os.path.join(args.wav_dir, f + '.wav')
            fs, x = wavfile.read(wavf)
            x = x.astype(np.float)
            x = low_cut_filter(x, fs, cutoff=70)
            assert fs == sconf.wav_fs

            # analyze F0, mcep, and ap
            f0, spc, ap = feat.analyze(x)
            mcep = feat.mcep(dim=sconf.mcep_dim, alpha=sconf.mcep_alpha)
            mcep_0th = mcep[:, 0]

            # convert F0
            cvf0 = f0stats.convert(f0, orgf0stats, tarf0stats)

            # convert mcep
            cvmcep_wopow = mcepgmm.convert(static_delta(mcep[:, 1:]),
                                           cvtype=pconf.GMM_mcep_cvtype)
            cvmcep = np.c_[mcep_0th, cvmcep_wopow]

            # synthesis VC w/ GV
            if args.gmmmode is None:
                cvmcep_wGV = mcepgv.postfilter(cvmcep,
                                               targvstats,
                                               cvgvstats=cvgvstats,
                                               alpha=pconf.GV_morph_coeff,
                                               startdim=1)
                wav = synthesizer.synthesis(
                    cvf0,
                    cvmcep_wGV,
                    ap,
                    rmcep=mcep,
                    alpha=sconf.mcep_alpha,
                )
                wavpath = os.path.join(test_dir, f + '_VC.wav')

            # synthesis DIFFVC w/ GV
            if args.gmmmode == 'diff':
                cvmcep[:, 0] = 0.0
                cvmcep_wGV = mcepgv.postfilter(mcep + cvmcep,
                                               targvstats,
                                               cvgvstats=diffcvgvstats,
                                               alpha=pconf.GV_morph_coeff,
                                               startdim=1) - mcep
                wav = synthesizer.synthesis_diff(
                    x,
                    cvmcep_wGV,
                    rmcep=mcep,
                    alpha=sconf.mcep_alpha,
                )
                wavpath = os.path.join(test_dir, f + '_DIFFVC.wav')

            # write waveform
            if not os.path.exists(os.path.join(test_dir, f)):
                os.makedirs(os.path.join(test_dir, f))

            wav = np.clip(wav, -32768, 32767)
            wavfile.write(wavpath, fs, wav.astype(np.int16))
            print(wavpath)
def melf0_feature_extract(queue, wav_list, config):
    """Mel-spc w/ F0 feature extraction
    Args:
        queue (multiprocessing.Queue): the queue to store the file name of utterance
        wav_list (list): list of the wav files
        config (dict): feature extraction config
    """
    # define f0 feature extractor
    feature_extractor = FeatureExtractor(
        analyzer="world",
        fs=config['sampling_rate'],
        shiftms=config['shiftms'],
        minf0=config['minf0'],
        maxf0=config['maxf0'],
        fftl=config['fft_size'])
    # extraction
    for i, wav_name in enumerate(wav_list):
        logging.info("now processing %s (%d/%d)" % (wav_name, i + 1, len(wav_list)))

        # load wavfile
        (x, fs) = sf.read(wav_name)

        # check sampling frequency
        if not fs == config['sampling_rate']:
            logging.error("sampling frequency is not matched.")
            sys.exit(1)

        # extract f0 and uv features
        f0, _, _ = feature_extractor.analyze(x)
        uv, cont_f0 = convert_continuos_f0(f0)
        lpf_fs = int(1.0 / (config['shiftms'] * 0.001))
        cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=20)
        next_cutoff = 70
        while not (cont_f0_lpf >= [0]).all():
            cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=next_cutoff)
            next_cutoff *= 2

        # extract mel-spc feature
        mel = logmelfilterbank(x, fs,
                               fft_size=config["fft_size"],
                               hop_size=config["hop_size"],
                               win_length=config["win_length"],
                               window=config["window"],
                               num_mels=config["num_mels"],
                               fmin=config["fmin"],
                               fmax=config["fmax"])

        # concatenate
        cont_f0_lpf = np.expand_dims(cont_f0_lpf, axis=-1)
        uv = np.expand_dims(uv, axis=-1)
        minlen = min(uv.shape[0], mel.shape[0])
        feats = np.concatenate([uv[:minlen, :], cont_f0_lpf[:minlen, :],
                                mel.astype(np.float32)[:minlen, :]], axis=1)
        if config['f0_cont']:
            feats = np.concatenate([uv[:minlen, :], cont_f0_lpf[:minlen, :],
                                mel.astype(np.float32)[:minlen, :]], axis=1)
        else:
            feats = np.concatenate([uv[:minlen, :], f0[:minlen, np.newaxis],
                                mel.astype(np.float32)[:minlen, :]], axis=1)

        # save feature
        feat_name = path_replace(wav_name, config['indir'],
                                 config['outdir'], extname=config['feature_format'])
        write_hdf5(feat_name, "/%s" % (config["feat_type"]), feats)
        if config['save_f0']:
            write_hdf5(feat_name, "/f0", f0)

    queue.put('Finish')
Exemple #20
0
def world_feature_extract(queue, wav_list, config):
    """WORLD feature extraction

    Args:
        queue (multiprocessing.Queue): the queue to store the file name of utterance
        wav_list (list): list of the wav files
        config (dict): feature extraction config

    """
    # define feature extractor
    feature_extractor = FeatureExtractor(analyzer="world",
                                         fs=config['sampling_rate'],
                                         shiftms=config['shiftms'],
                                         minf0=config['minf0'],
                                         maxf0=config['maxf0'],
                                         fftl=config['fft_size'])
    # extraction
    for i, wav_name in enumerate(wav_list):
        logging.info("now processing %s (%d/%d)" %
                     (wav_name, i + 1, len(wav_list)))

        # load wavfile and apply low cut filter
        fs, x = wavfile.read(wav_name)
        x = np.array(x, dtype=np.float32)
        if config['highpass_cutoff'] != 0:
            x = low_cut_filter(x, fs, cutoff=config['highpass_cutoff'])

        # check sampling frequency
        if not fs == config['sampling_rate']:
            logging.error("sampling frequency of %s is not matched." %
                          wav_name)
            sys.exit(1)

        # extract features
        f0, spc, ap = feature_extractor.analyze(x)
        codeap = feature_extractor.codeap()
        mcep = feature_extractor.mcep(dim=config['mcep_dim'],
                                      alpha=config['mcep_alpha'])
        npow = feature_extractor.npow()
        uv, cont_f0 = convert_continuos_f0(f0)
        lpf_fs = int(1.0 / (config['shiftms'] * 0.001))
        cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=20)
        next_cutoff = 70
        while not (cont_f0_lpf >= [0]).all():
            cont_f0_lpf = low_pass_filter(cont_f0, lpf_fs, cutoff=next_cutoff)
            next_cutoff *= 2

        # concatenate
        cont_f0_lpf = np.expand_dims(cont_f0_lpf, axis=-1)
        uv = np.expand_dims(uv, axis=-1)
        feats = np.concatenate([uv, cont_f0_lpf, mcep, codeap], axis=1)

        # save feature
        feat_name = path_replace(wav_name,
                                 config['indir'],
                                 config['outdir'],
                                 extname=config['feature_format'])
        write_hdf5(feat_name, "/%s" % (config["feat_type"]), feats)
        if config['save_f0']:
            write_hdf5(feat_name, "/f0", f0)
        if config['save_ap']:
            write_hdf5(feat_name, "/ap", ap)
        if config['save_spc']:
            write_hdf5(feat_name, "/spc", spc)
        if config['save_npow']:
            write_hdf5(feat_name, "/npow", npow)

    queue.put('Finish')
Exemple #21
0
def main():

    if args.file == 'con':
        file = 'converted'
    elif args.file == 'tar':
        file = 'target'
    else:
        raise ValueError("The file is incorrect")

    feat = FeatureExtractor(analyzer='world',
                            fs=22050,
                            fftl=1024,
                            shiftms=5,
                            minf0=args.minf0,
                            maxf0=args.maxf0)

    # constract Synthesizer class
    synthesizer = Synthesizer(fs=22050, fftl=1024, shiftms=5)

    # open list file
    with open('./list/' + file + '.list', 'r') as fp:
        for line in fp:
            f = line.rstrip()
            h5f = os.path.join('./' + file + '/h5f/', f + '.h5')

            if (not os.path.exists(h5f)):
                wavf = os.path.join('./' + file + '/wav/', f + '.wav')
                fs, x = wavfile.read(wavf)
                x = np.array(x, dtype=np.float)
                x = low_cut_filter(x, fs, cutoff=70)

                print("Extract acoustic features: " + wavf)

                # analyze F0, spc, and ap
                f0, spc, ap = feat.analyze(x)
                mcep = feat.mcep(dim=34, alpha=0.544)
                npow = feat.npow()
                codeap = feat.codeap()

                # save features into a hdf5 file
                h5 = HDF5(h5f, mode='w')
                h5.save(f0, ext='f0')
                # h5.save(spc, ext='spc')
                # h5.save(ap, ext='ap')
                h5.save(mcep, ext='mcep')
                h5.save(npow, ext='npow')
                h5.save(codeap, ext='codeap')
                h5.close()

                # analysis/synthesis using F0, mcep, and ap
                wav = synthesizer.synthesis(
                    f0,
                    mcep,
                    ap,
                    alpha=0.544,
                )
                wav = np.clip(wav, -32768, 32767)
                anasynf = os.path.join('./' + file + '/anasyn/', f + '.wav')
                wavfile.write(anasynf, fs, np.array(wav, dtype=np.int16))
            else:
                print("Acoustic features already exist: " + h5f)