Esempio n. 1
0
def world_synthesis(wav_name, feat_param, f0, ap, spectral, spectral_type):
    """WORLD SPEECH SYNTHESIS
    Args:
        wav_name (str): filename of synthesised wav
        feat_param (dict): acoustic feature parameter dictionary
        f0(np array): pitch features
        ap: aperiodicity features
        spectral: spectral features
        spectral_type: spectral feature type (sp or mcc)
    """
    synthesizer = Synthesizer(fs=feat_param['fs'],
                              fftl=feat_param['fftl'],
                              shiftms=feat_param['shiftms'])

    if spectral_type == 'mcc':
        wav = synthesizer.synthesis(f0,
                                    spectral,
                                    ap,
                                    alpha=feat_param['mcep_alpha'])
    elif spectral_type == 'sp':
        wav = synthesizer.synthesis_spc(f0, spectral, ap)
    else:
        logging.info("Currently support 'mcep' or 'spc' only.")
        raise ValueError

    wav = np.clip(wav, -32768, 32767)
    wavfile.write(wav_name, feat_param['fs'], wav.astype(np.int16))
    logging.info("wrote %s." % (wav_name))
Esempio n. 2
0
def melcepstrum_noise_shaping(wav_list, args):
    """APPLY NOISE SHAPING USING STFT-BASED MCEP"""
    # define synthesizer
    synthesizer = Synthesizer(fs=args.fs, shiftms=args.shiftms, fftl=args.fftl)

    for i, wav_name in enumerate(wav_list):
        logging.info("now processing %s (%d/%d)" %
                     (wav_name, i + 1, len(wav_list)))

        # load wavfile and apply low cut filter
        fs, x = wavfile.read(wav_name)
        if x.dtype != np.int16:
            logging.warn("wav file format is not 16 bit PCM.")
        x = np.float64(x)

        # check sampling frequency
        if not fs == args.fs:
            logging.error("sampling frequency is not matched.")
            sys.exit(1)

        # get frame number
        num_frames = int(1000 * len(x) / fs / args.shiftms) + 1

        # load average mcep
        mlsa_coef = read_hdf5(args.stats, "/mcep/mean") * args.mag
        mlsa_coef[0] = 0.0
        if args.inv:
            mlsa_coef[1:] = -1.0 * mlsa_coef[1:]
        mlsa_coef = np.float64(np.tile(mlsa_coef, [num_frames, 1]))

        # synthesis and write
        x_ns = synthesizer.synthesis_diff(x, mlsa_coef, alpha=args.mcep_alpha)
        x_ns = low_cut_filter(x_ns, args.fs, cutoff=70)
        write_name = args.writedir + "/" + os.path.basename(wav_name)
        wavfile.write(write_name, args.fs, np.int16(x_ns))
Esempio n. 3
0
    def __init__(self, feature_queue: Queue, converted_queue: Queue,
                 mcep_gmm_config: configs.McepGMMConfig,
                 f0_stats_config: configs.F0StatsConfig,
                 gv_config: configs.GVConfig,
                 synthesizer_config: configs.SynthesizerConfig):

        self._mcep_gmm = GMMConvertor(n_mix=mcep_gmm_config.n_mix,
                                      covtype=mcep_gmm_config.covtype,
                                      gmmmode=None)
        self._mcep_gmm.open_from_param(mcep_gmm_config.param)
        self._mcep_gmm_config = mcep_gmm_config

        self._feature_queue: Queue = feature_queue
        self._converted_queue: Queue = converted_queue

        self._f0_stats = F0statistics()
        self._f0_stats_config = f0_stats_config

        self._mcep_gv = GV()
        self._mcep_gv_config = gv_config

        self._synthesizer = Synthesizer(fs=synthesizer_config.fs,
                                        fftl=synthesizer_config.fftl,
                                        shiftms=synthesizer_config.shiftms)
        self._synthesizer_config = synthesizer_config
Esempio n. 4
0
def noise_shaping(wav_list, args):
    """APPLY NOISE SHAPING"""
    # define feature extractor
    feature_extractor = FeatureExtractor(
        analyzer="world",
        fs=args.fs,
        shiftms=args.shiftms,
        fftl=args.fftl)

    # define synthesizer
    synthesizer = Synthesizer(
        fs=args.fs,
        shiftms=args.shiftms,
        fftl=args.fftl)

    for i, feat_id in enumerate(wav_list):
        logging.info("now processing %s (%d/%d)" % (feat_id, i + 1, len(wav_list)))
        # load wavfile and apply low cut filter
        wav_filename = args.outdir.replace("feat_id", feat_id)
        fs, x = wavfile.read(wav_filename)
        wav_type = x.dtype
        x = np.array(x, dtype=np.float64)

        # check sampling frequency
        if not fs == args.fs:
            logging.error("sampling frequency is not matched.")
            sys.exit(1)

        ## extract features (only for get the number of frames)
        f0, _, _ = feature_extractor.analyze(x)
        num_frames = f0.shape[0]

        # load average mcep
        mlsa_coef = read_hdf5(args.stats, "/%s/mean" % args.feature_type)
        mlsa_coef = mlsa_coef[args.mcep_dim_start:args.mcep_dim_end] * args.mag
        mlsa_coef[0] = 0.0
        if args.inv:
            mlsa_coef[1:] = -1.0 * mlsa_coef[1:]
        mlsa_coef = np.tile(mlsa_coef, [num_frames, 1])
        
        # synthesis and write
        x_ns = synthesizer.synthesis_diff(x, mlsa_coef, alpha=args.mcep_alpha)
        x_ns = low_cut_filter(x_ns, args.fs, cutoff=70)
        write_name = args.writedir.replace("feat_id", feat_id)
        # check directory existence
        wav = np.clip(x_ns, -32768, 32767)
        if wav_type == np.int16:
            wavfile.write(write_name, args.fs, np.int16(wav))
        else:
            wavfile.write(write_name, args.fs, wav)
Esempio n. 5
0
class ConverterWorker:
    '''
    特徴量→変換特徴量→修正特徴量→変換音声
    '''
    def __init__(self, feature_queue: Queue, converted_queue: Queue,
                 mcep_gmm_config: configs.McepGMMConfig,
                 f0_stats_config: configs.F0StatsConfig,
                 gv_config: configs.GVConfig,
                 synthesizer_config: configs.SynthesizerConfig):

        self._mcep_gmm = GMMConvertor(n_mix=mcep_gmm_config.n_mix,
                                      covtype=mcep_gmm_config.covtype,
                                      gmmmode=None)
        self._mcep_gmm.open_from_param(mcep_gmm_config.param)
        self._mcep_gmm_config = mcep_gmm_config

        self._feature_queue: Queue = feature_queue
        self._converted_queue: Queue = converted_queue

        self._f0_stats = F0statistics()
        self._f0_stats_config = f0_stats_config

        self._mcep_gv = GV()
        self._mcep_gv_config = gv_config

        self._synthesizer = Synthesizer(fs=synthesizer_config.fs,
                                        fftl=synthesizer_config.fftl,
                                        shiftms=synthesizer_config.shiftms)
        self._synthesizer_config = synthesizer_config

    def convert_from_feature(self, f0, spc, ap, mcep) -> numpy.ndarray:
        cv_f0 = self._f0_stats.convert(f0, self._f0_stats_config.source_stats,
                                       self._f0_stats_config.target_stats)

        cv_mcep_wopow = self._mcep_gmm.convert(
            static_delta(mcep[:, 1:]), cvtype=self._mcep_gmm_config.cvtype)
        cv_mcep = numpy.c_[mcep[:, 0], cv_mcep_wopow]

        cv_mcep_wGV = self._mcep_gv.postfilter(
            cv_mcep,
            self._mcep_gv_config.target_stats,
            cvgvstats=self._mcep_gv_config.cvgv_stats,
            alpha=self._mcep_gv_config.morph_coeff,
            startdim=1)

        output_wav = self._synthesizer.synthesis(
            cv_f0,
            cv_mcep_wGV,
            ap,
            rmcep=mcep,
            alpha=self._synthesizer_config.mcep_alpha)
        return output_wav.clip(-32768, 32767).astype(numpy.core.int16)

    def start(self):
        while True:
            feature = self._feature_queue.get()  # 同期処理
            f0, spc, ap, mcep = feature
            output_wav = self.convert_from_feature(f0, spc, ap, mcep)
            self._converted_queue.put(output_wav)
Esempio n. 6
0
def world_speech_synthesis(queue, wav_list, args):
    """WORLD SPEECH SYNTHESIS
    Parameters
    ----------
    queue : multiprocessing.Queue()
        the queue to store the file name of utterance
    wav_list : list
        list of the wav files
    args : 
        feature extract arguments
    """
    # define ynthesizer
    synthesizer = Synthesizer(fs=args.fs, fftl=args.fftl, shiftms=args.shiftms)
    # synthesis
    for i, wav_name in enumerate(wav_list):
        if args.feature_dir == None:
            restored_name = wav_name.replace("wav",
                                             args.feature_format + "_restored")
            restored_name = restored_name.replace(
                ".%s" % args.feature_format + "_restored", ".wav")
            feat_name = wav_name.replace("wav", args.feature_format)
        else:
            restored_name = rootdir_replace(wav_name,
                                            newdir=args.feature_dir +
                                            "restored")
            feat_name = rootdir_replace(wav_name,
                                        extname=args.feature_format,
                                        newdir=args.feature_dir)
        if os.path.exists(restored_name):
            if args.overwrite:
                logging.info("overwrite %s (%d/%d)" %
                             (restored_name, i + 1, len(wav_list)))
            else:
                logging.info("skip %s (%d/%d)" %
                             (restored_name, i + 1, len(wav_list)))
                continue
        else:
            logging.info("now processing %s (%d/%d)" %
                         (restored_name, i + 1, len(wav_list)))
        # load acoustic features
        if check_hdf5(feat_name, "/world"):
            h = read_hdf5(feat_name, "/world")
        else:
            logging.error("%s is not existed." % (feat_name))
            sys.exit(1)
        if check_hdf5(feat_name, "/f0"):
            f0 = read_hdf5(feat_name, "/f0")
        else:
            uv = h[:, 0].copy(order='C')
            f0 = h[:, args.f0_dim_idx].copy(order='C')  # cont_f0_lpf
            fz_idx = np.where(uv == 0.0)
            f0[fz_idx] = 0.0
        if check_hdf5(feat_name, "/ap"):
            ap = read_hdf5(feat_name, "/ap")
        else:
            codeap = h[:, args.ap_dim_idx:].copy(order='C')
            ap = pyworld.decode_aperiodicity(codeap, args.fs, args.fftl)
        mcep = h[:, args.mcep_dim_start:args.mcep_dim_end].copy(order='C')
        # waveform synthesis
        wav = synthesizer.synthesis(f0, mcep, ap, alpha=args.mcep_alpha)
        wav = np.clip(wav, -32768, 32767)
        wavfile.write(restored_name, args.fs, wav.astype(np.int16))
        #logging.info("wrote %s." % (restored_name))
    queue.put('Finish')
Esempio n. 7
0
def main():
    parser = argparse.ArgumentParser(
        description="making feature file argsurations.")

    parser.add_argument("--waveforms",
                        default=None,
                        help="directory or list of filename of input wavfile")
    parser.add_argument("--stats",
                        default=None,
                        help="filename of hdf5 format")
    parser.add_argument("--writedir",
                        default=None,
                        help="directory to save preprocessed wav file")
    parser.add_argument("--fs",
                        default=FS,
                        type=int,
                        help="Sampling frequency")
    parser.add_argument("--shiftms",
                        default=SHIFTMS,
                        type=int,
                        help="Frame shift in msec")
    parser.add_argument("--fftl", default=FFTL, type=int, help="FFT length")
    parser.add_argument("--mcep_dim_start",
                        default=MCEP_DIM_START,
                        type=int,
                        help="Start index of mel cepstrum")
    parser.add_argument("--mcep_dim_end",
                        default=MCEP_DIM_END,
                        type=int,
                        help="End index of mel cepstrum")
    parser.add_argument("--mcep_alpha",
                        default=MCEP_ALPHA,
                        type=float,
                        help="Alpha of mel cepstrum")
    parser.add_argument("--mag",
                        default=MAG,
                        type=float,
                        help="magnification of noise shaping")
    parser.add_argument("--verbose",
                        default=1,
                        type=int,
                        help="log message level")
    parser.add_argument('--n_jobs',
                        default=1,
                        type=int,
                        help="number of parallel jobs")
    parser.add_argument('--inv',
                        default=False,
                        type=strtobool,
                        help="if True, inverse filtering will be performed")
    args = parser.parse_args()

    # read list
    if os.path.isdir(args.waveforms):
        file_list = sorted(find_files(args.waveforms, "*.wav"))
    else:
        file_list = read_txt(args.waveforms)

    # define feature extractor
    feature_extractor = FeatureExtractor(analyzer="world",
                                         fs=args.fs,
                                         shiftms=args.shiftms,
                                         fftl=args.fftl)

    # define synthesizer
    synthesizer = Synthesizer(fs=args.fs, shiftms=args.shiftms, fftl=args.fftl)

    # check directory existence
    if not os.path.exists(args.writedir):
        os.makedirs(args.writedir)

    def noise_shaping(wav_list):
        for wav_name in wav_list:
            # load wavfile and apply low cut filter
            fs, x = wavfile.read(wav_name)
            wav_type = x.dtype
            x = np.array(x, dtype=np.float64)

            # check sampling frequency
            if not fs == args.fs:
                print("ERROR: sampling frequency is not matched.")
                sys.exit(1)

            # extract features (only for get the number of frames)
            f0, _, _ = feature_extractor.analyze(x)
            num_frames = f0.shape[0]

            # load average mcep
            mlsa_coef = read_hdf5(args.stats, "/mean")
            mlsa_coef = mlsa_coef[args.mcep_dim_start:args.
                                  mcep_dim_end] * args.mag
            mlsa_coef[0] = 0.0
            if args.inv:
                mlsa_coef[1:] = -1.0 * mlsa_coef[1:]
            mlsa_coef = np.tile(mlsa_coef, [num_frames, 1])

            # synthesis and write
            x_ns = synthesizer.synthesis_diff(x,
                                              mlsa_coef,
                                              alpha=args.mcep_alpha)
            x_ns = low_cut_filter(x_ns, args.fs, cutoff=70)
            if wav_type == np.int16:
                write_name = args.writedir + "/" + os.path.basename(wav_name)
                wavfile.write(write_name, args.fs, np.int16(x_ns))
            else:
                wavfile.write(write_name, args.fs, x_ns)

    # divie list
    file_lists = np.array_split(file_list, args.n_jobs)
    file_lists = [f_list.tolist() for f_list in file_lists]

    # multi processing
    processes = []
    for f in file_lists:
        p = mp.Process(target=noise_shaping, args=(f, ))
        p.start()
        processes.append(p)

    # wait for all process
    for p in processes:
        p.join()