Ejemplo n.º 1
0
def run(args):
    griffin_lim_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "window": args.window,
        "center": args.center,
        "transpose": True,
        "epochs": args.epochs
    }

    feature_reader = ScriptReader(args.feat_scp)

    if args.fbank:
        mel_kwargs = {
            "n_mels": args.num_bins,
            "fmin": args.min_freq,
            "fmax": args.max_freq,
            "htk": True
        }
        # N x F
        mel_weights = audio_lib.filters.mel(args.samp_freq,
                                            nfft(args.frame_length),
                                            **mel_kwargs)
        # F x N
        mel_inv_weights = np.linalg.pinv(mel_weights)

    with WaveWriter(
            args.dump_dir, fs=args.samp_freq,
            normalize=args.normalize) as writer:
        for key, spec in feature_reader:
            # if log, tranform to linear
            if args.apply_log:
                spec = np.exp(spec)
            # convert fbank to spectrum
            # feat: T x N
            if args.fbank:
                spec = np.maximum(spec @ np.transpose(mel_inv_weights),
                                  EPSILON)
            # if power spectrum, tranform to magnitude spectrum
            if args.apply_pow:
                spec = np.sqrt(spec)
            if spec.shape[1] - 1 != nfft(args.frame_length) // 2:
                raise RuntimeError("Seems missing --fbank options?")
            # griffin lim
            samps = griffin_lim(spec, **griffin_lim_kwargs)
            writer.write(key, samps)
    logger.info("Processed {:d} utterance done".format(len(feature_reader)))
Ejemplo n.º 2
0
def run(args):
    stft_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "window": args.window,
        "center": args.center,
    }

    FeatureReader = {"numpy": NumpyReader, "kaldi": ScriptReader}
    feature_reader = FeatureReader[args.fmt](args.feat_scp)

    phase_reader = None
    if args.phase_ref:
        phase_reader = SpectrogramReader(
            args.phase_ref,
            **stft_kwargs,
            round_power_of_two=args.round_power_of_two)
        logger.info(f"Using phase reference from {args.phase_ref}")

    with WaveWriter(args.dump_dir, fs=args.sr,
                    normalize=args.normalize) as writer:
        for key, spec in feature_reader:
            logger.info(f"Processing utterance {key}...")
            # if log, tranform to linear
            if args.apply_log:
                spec = np.exp(spec)
            # if power spectrum, tranform to magnitude spectrum
            if args.apply_pow:
                spec = np.sqrt(spec)
            if phase_reader is None:
                # griffin lim
                samps = griffin_lim(spec,
                                    epoches=args.epoches,
                                    transpose=True,
                                    norm=0.8,
                                    **stft_kwargs)
            else:
                if key not in phase_reader:
                    raise KeyError(f"Missing key {key} in phase reader")
                ref = phase_reader[key]
                angle = np.angle(ref[0] if ref.ndim == 3 else ref)
                phase = np.exp(angle * 1j)
                samps = inverse_stft(spec * phase, **stft_kwargs, norm=0.8)
            writer.write(key, samps)
    logger.info(f"Processed {len(feature_reader)} utterance done")