def run(args):
    # shape: T x F, complex
    stft_kwargs = {
        "frame_length": args.frame_length,
        "frame_shift": args.frame_shift,
        "window": args.window,
        "center": args.center,
    }
    spectrogram_reader = SpectrogramReader(args.wav_scp, **stft_kwargs)
    mask_reader = NumpyReader(args.mask_scp) if args.numpy else ScriptReader(
        args.mask_scp)

    num_done = 0
    with WaveWriter(args.dst_dir, fs=args.samp_freq) as writer:
        for key, specs in spectrogram_reader:
            if key in mask_reader:
                num_done += 1
                mask = mask_reader[key]
                if args.transpose:
                    mask = np.transpose(mask)
                logger.info("Processing utterance {}...".format(key))
                if mask.shape != specs.shape:
                    raise ValueError(
                        "Dimention mismatch between mask and spectrogram"
                        "({0[0]} x {0[1]} vs {1[0]} x {1[1]}), need check configures"
                        .format(mask.shape, specs.shape))
                nsamps = spectrogram_reader.nsamps(
                    key) if args.keep_length else None
                norm = spectrogram_reader.samp_norm(key)
                samps = istft(
                    specs * mask, **stft_kwargs, norm=norm, nsamps=nsamps)
                writer.write(key, samps)
    logger.info("Processed {:d} utterances over {:d}".format(
        num_done, len(spectrogram_reader)))
Exemple #2
0
def run(args):
    # shape: T x F, complex
    stft_kwargs = {
        "frame_length": args.frame_length,
        "frame_shift": args.frame_shift,
        "window": args.window,
        "center": args.center,
    }
    spectrogram_reader = SpectrogramReader(args.wav_scp, **stft_kwargs)
    mask_reader = NumpyReader(args.mask_scp) if args.numpy else ScriptReader(
        args.mask_scp)

    num_utts = 0
    fs = args.samp_freq,
    for key, specs in spectrogram_reader:
        if key in mask_reader:
            num_utts += 1
            mask = mask_reader[key]
            if args.transpose:
                mask = np.transpose(mask)
            logger.info("Processing utterance {}...".format(key))
            if mask.shape != specs.shape:
                raise ValueError(
                    "Dimention mismatch between mask and spectrogram"
                    "({0[0]} x {0[1]} vs {1[0]} x {1[1]}), need check configures"
                    .format(mask.shape, specs.shape))
            nsamps = spectrogram_reader.nsamps(
                key) if args.keep_length else None
            istft(os.path.join(args.dst_dir, "{}.wav".format(key)),
                  specs * mask,
                  **stft_kwargs,
                  fs=fs,
                  nsamps=nsamps)
    logger.info("Processed {} utterances".format(num_utts))
Exemple #3
0
def run(args):
    # shape: T x F, complex
    stft_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "window": args.window,
        "center": args.center,
    }
    spectrogram_reader = SpectrogramReader(
        args.wav_scp,
        **stft_kwargs,
        round_power_of_two=args.round_power_of_two)
    phase_reader = None
    if args.phase_ref:
        phase_reader = SpectrogramReader(
            args.phase_ref,
            **stft_kwargs,
            round_power_of_two=args.round_power_of_two)
        logger.info("Using phase reference from {}".format(args.phase_ref))
    MaskReader = {"numpy": NumpyReader, "kaldi": ScriptReader}
    mask_reader = MaskReader[args.fmt](args.mask_scp)

    num_done = 0
    with WaveWriter(args.dst_dir, fs=args.samp_freq) as writer:
        for key, specs in spectrogram_reader:
            # specs: T x F
            if key in mask_reader:
                num_done += 1
                mask = mask_reader[key]
                # mask sure mask in T x F
                _, F = specs.shape
                if mask.shape[0] == F:
                    mask = np.transpose(mask)
                logger.info("Processing utterance {}...".format(key))
                if mask.shape != specs.shape:
                    raise ValueError(
                        "Dimention mismatch between mask and spectrogram"
                        "({0[0]} x {0[1]} vs {1[0]} x {1[1]}), need check configures"
                        .format(mask.shape, specs.shape))
                nsamps = spectrogram_reader.nsamps(
                    key) if args.keep_length else None
                norm = spectrogram_reader.samp_norm(key)
                # use phase from ref
                if phase_reader is not None:
                    angle = np.angle(phase_reader[key])
                    phase = np.exp(angle * 1j)
                    samps = istft(np.abs(specs) * mask * phase,
                                  **stft_kwargs,
                                  norm=norm,
                                  nsamps=nsamps)
                else:
                    samps = istft(specs * mask,
                                  **stft_kwargs,
                                  norm=norm,
                                  nsamps=nsamps)
                writer.write(key, samps)
    logger.info("Processed {:d} utterances over {:d}".format(
        num_done, len(spectrogram_reader)))
Exemple #4
0
def run(args):
    # return complex result
    stft_kwargs = {
        "frame_length": args.frame_length,
        "frame_shift": args.frame_shift,
        "window": args.window,
        "center": args.center
    }
    logger.info("Using mask: {}".format(args.mask.upper()))
    mixture_reader = SpectrogramReader(
        args.mix_scp,
        round_power_of_two=args.round_power_of_two,
        **stft_kwargs)
    ref_scp_list = args.ref_scp.split(",")
    logger.info("Number of speakers: {:d}".format(len(ref_scp_list)))
    targets_reader = [
        SpectrogramReader(scp, **stft_kwargs) for scp in ref_scp_list
    ]
    num_utts = 0
    for key, mixture in tqdm(mixture_reader):
        nsamps = mixture_reader.nsamps(key) if args.keep_length else None
        skip = False
        for reader in targets_reader:
            if key not in reader:
                logger.info("Skip utterance {}, missing targets".format(key))
                skip = True
                break
        if skip:
            continue
        num_utts += 1
        targets_list = [reader[key] for reader in targets_reader]
        spk_masks = compute_mask(mixture, targets_list, args.mask)
        for index, mask in enumerate(spk_masks):
            samps = istft(mixture * mask, **stft_kwargs, nsamps=nsamps)
            write_wav(os.path.join(args.dump_dir,
                                   "spk{:d}/{}.wav".format(index + 1, key)),
                      samps,
                      fs=args.fs)
    logger.info("Processed {} utterance!".format(num_utts))
Exemple #5
0
def run(args):
    # return complex result
    stft_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "window": args.window,
        "center": args.center
    }
    logger.info(f"Using mask: {args.mask.upper()}")
    mixture_reader = SpectrogramReader(
        args.mix_scp,
        round_power_of_two=args.round_power_of_two,
        **stft_kwargs)
    ref_scp_list = args.ref_scp.split(",")
    logger.info(f"Number of speakers: {len(ref_scp_list)}")
    targets_reader = [
        SpectrogramReader(scp, **stft_kwargs) for scp in ref_scp_list
    ]
    num_utts = 0
    for key, mixture in tqdm(mixture_reader):
        nsamps = mixture_reader.nsamps(key) if args.keep_length else None
        skip = False
        for reader in targets_reader:
            if key not in reader:
                logger.info(f"Skip utterance {key}, missing targets")
                skip = True
                break
        if skip:
            continue
        num_utts += 1
        targets_list = [reader[key] for reader in targets_reader]
        spk_masks = compute_mask(mixture, targets_list, args.mask)
        for index, mask in enumerate(spk_masks):
            samps = inverse_stft(mixture * mask, **stft_kwargs, nsamps=nsamps)
            write_wav(os.path.join(args.dump_dir, f"spk{index + 1}/{key}.wav"),
                      samps,
                      sr=args.sr)
    logger.info(f"Processed {num_utts} utterance")
Exemple #6
0
def run(args):
    # shape: T x F, complex
    stft_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "window": args.window,
        "center": args.center,
    }
    spectrogram_reader = SpectrogramReader(
        args.wav_scp,
        **stft_kwargs,
        round_power_of_two=args.round_power_of_two)
    phase_reader = None
    if args.phase_ref:
        phase_reader = SpectrogramReader(
            args.phase_ref,
            **stft_kwargs,
            round_power_of_two=args.round_power_of_two)
        logger.info(f"Using phase reference from {args.phase_ref}")
    MaskReader = {"numpy": NumpyReader, "kaldi": ScriptReader}
    mask_reader = MaskReader[args.fmt](args.mask_scp)

    num_done = 0
    with WaveWriter(args.dst_dir, fs=args.sf) as writer:
        for key, specs in spectrogram_reader:
            # if multi-channel, choose ch0
            if specs.ndim == 3:
                specs = specs[0]
            # specs: T x F
            if key in mask_reader:
                num_done += 1
                mask = mask_reader[key]
                # mask sure mask in T x F
                _, F = specs.shape
                if mask.shape[0] == F:
                    mask = np.transpose(mask)
                logger.info(f"Processing utterance {key}...")
                if mask.shape != specs.shape:
                    raise ValueError(
                        "Dimention mismatch between mask and spectrogram" +
                        f"({mask.shape[0]} x {mask.shape[1]} vs " +
                        f"{specs.shape[0]} x {specs.shape[1]}), need " +
                        "check configures")
                nsamps = spectrogram_reader.nsamps(
                    key) if args.keep_length else None
                norm = spectrogram_reader.maxabs(
                    key) if args.mixed_norm else None
                # use phase from ref
                if phase_reader is not None:
                    angle = np.angle(phase_reader[key])
                    phase = np.exp(angle * 1j)
                    samps = inverse_stft(np.abs(specs) * mask * phase,
                                         **stft_kwargs,
                                         norm=norm,
                                         nsamps=nsamps)
                else:
                    samps = inverse_stft(specs * mask,
                                         **stft_kwargs,
                                         norm=norm,
                                         nsamps=nsamps)
                writer.write(key, samps)
    logger.info(
        f"Processed {num_done:d} utterances over {len(spectrogram_reader):d}")