def run(args): # shape: T x F, complex stft_kwargs = { "frame_length": args.frame_length, "frame_shift": args.frame_shift, "window": args.window, "center": args.center, } spectrogram_reader = SpectrogramReader(args.wav_scp, **stft_kwargs) mask_reader = NumpyReader(args.mask_scp) if args.numpy else ScriptReader( args.mask_scp) num_done = 0 with WaveWriter(args.dst_dir, fs=args.samp_freq) as writer: for key, specs in spectrogram_reader: if key in mask_reader: num_done += 1 mask = mask_reader[key] if args.transpose: mask = np.transpose(mask) logger.info("Processing utterance {}...".format(key)) if mask.shape != specs.shape: raise ValueError( "Dimention mismatch between mask and spectrogram" "({0[0]} x {0[1]} vs {1[0]} x {1[1]}), need check configures" .format(mask.shape, specs.shape)) nsamps = spectrogram_reader.nsamps( key) if args.keep_length else None norm = spectrogram_reader.samp_norm(key) samps = istft( specs * mask, **stft_kwargs, norm=norm, nsamps=nsamps) writer.write(key, samps) logger.info("Processed {:d} utterances over {:d}".format( num_done, len(spectrogram_reader)))
def run(args): # shape: T x F, complex stft_kwargs = { "frame_length": args.frame_length, "frame_shift": args.frame_shift, "window": args.window, "center": args.center, } spectrogram_reader = SpectrogramReader(args.wav_scp, **stft_kwargs) mask_reader = NumpyReader(args.mask_scp) if args.numpy else ScriptReader( args.mask_scp) num_utts = 0 fs = args.samp_freq, for key, specs in spectrogram_reader: if key in mask_reader: num_utts += 1 mask = mask_reader[key] if args.transpose: mask = np.transpose(mask) logger.info("Processing utterance {}...".format(key)) if mask.shape != specs.shape: raise ValueError( "Dimention mismatch between mask and spectrogram" "({0[0]} x {0[1]} vs {1[0]} x {1[1]}), need check configures" .format(mask.shape, specs.shape)) nsamps = spectrogram_reader.nsamps( key) if args.keep_length else None istft(os.path.join(args.dst_dir, "{}.wav".format(key)), specs * mask, **stft_kwargs, fs=fs, nsamps=nsamps) logger.info("Processed {} utterances".format(num_utts))
def run(args): # shape: T x F, complex stft_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "window": args.window, "center": args.center, } spectrogram_reader = SpectrogramReader( args.wav_scp, **stft_kwargs, round_power_of_two=args.round_power_of_two) phase_reader = None if args.phase_ref: phase_reader = SpectrogramReader( args.phase_ref, **stft_kwargs, round_power_of_two=args.round_power_of_two) logger.info("Using phase reference from {}".format(args.phase_ref)) MaskReader = {"numpy": NumpyReader, "kaldi": ScriptReader} mask_reader = MaskReader[args.fmt](args.mask_scp) num_done = 0 with WaveWriter(args.dst_dir, fs=args.samp_freq) as writer: for key, specs in spectrogram_reader: # specs: T x F if key in mask_reader: num_done += 1 mask = mask_reader[key] # mask sure mask in T x F _, F = specs.shape if mask.shape[0] == F: mask = np.transpose(mask) logger.info("Processing utterance {}...".format(key)) if mask.shape != specs.shape: raise ValueError( "Dimention mismatch between mask and spectrogram" "({0[0]} x {0[1]} vs {1[0]} x {1[1]}), need check configures" .format(mask.shape, specs.shape)) nsamps = spectrogram_reader.nsamps( key) if args.keep_length else None norm = spectrogram_reader.samp_norm(key) # use phase from ref if phase_reader is not None: angle = np.angle(phase_reader[key]) phase = np.exp(angle * 1j) samps = istft(np.abs(specs) * mask * phase, **stft_kwargs, norm=norm, nsamps=nsamps) else: samps = istft(specs * mask, **stft_kwargs, norm=norm, nsamps=nsamps) writer.write(key, samps) logger.info("Processed {:d} utterances over {:d}".format( num_done, len(spectrogram_reader)))
def run(args): # return complex result stft_kwargs = { "frame_length": args.frame_length, "frame_shift": args.frame_shift, "window": args.window, "center": args.center } logger.info("Using mask: {}".format(args.mask.upper())) mixture_reader = SpectrogramReader( args.mix_scp, round_power_of_two=args.round_power_of_two, **stft_kwargs) ref_scp_list = args.ref_scp.split(",") logger.info("Number of speakers: {:d}".format(len(ref_scp_list))) targets_reader = [ SpectrogramReader(scp, **stft_kwargs) for scp in ref_scp_list ] num_utts = 0 for key, mixture in tqdm(mixture_reader): nsamps = mixture_reader.nsamps(key) if args.keep_length else None skip = False for reader in targets_reader: if key not in reader: logger.info("Skip utterance {}, missing targets".format(key)) skip = True break if skip: continue num_utts += 1 targets_list = [reader[key] for reader in targets_reader] spk_masks = compute_mask(mixture, targets_list, args.mask) for index, mask in enumerate(spk_masks): samps = istft(mixture * mask, **stft_kwargs, nsamps=nsamps) write_wav(os.path.join(args.dump_dir, "spk{:d}/{}.wav".format(index + 1, key)), samps, fs=args.fs) logger.info("Processed {} utterance!".format(num_utts))
def run(args): # return complex result stft_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "window": args.window, "center": args.center } logger.info(f"Using mask: {args.mask.upper()}") mixture_reader = SpectrogramReader( args.mix_scp, round_power_of_two=args.round_power_of_two, **stft_kwargs) ref_scp_list = args.ref_scp.split(",") logger.info(f"Number of speakers: {len(ref_scp_list)}") targets_reader = [ SpectrogramReader(scp, **stft_kwargs) for scp in ref_scp_list ] num_utts = 0 for key, mixture in tqdm(mixture_reader): nsamps = mixture_reader.nsamps(key) if args.keep_length else None skip = False for reader in targets_reader: if key not in reader: logger.info(f"Skip utterance {key}, missing targets") skip = True break if skip: continue num_utts += 1 targets_list = [reader[key] for reader in targets_reader] spk_masks = compute_mask(mixture, targets_list, args.mask) for index, mask in enumerate(spk_masks): samps = inverse_stft(mixture * mask, **stft_kwargs, nsamps=nsamps) write_wav(os.path.join(args.dump_dir, f"spk{index + 1}/{key}.wav"), samps, sr=args.sr) logger.info(f"Processed {num_utts} utterance")
def run(args): # shape: T x F, complex stft_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "window": args.window, "center": args.center, } spectrogram_reader = SpectrogramReader( args.wav_scp, **stft_kwargs, round_power_of_two=args.round_power_of_two) phase_reader = None if args.phase_ref: phase_reader = SpectrogramReader( args.phase_ref, **stft_kwargs, round_power_of_two=args.round_power_of_two) logger.info(f"Using phase reference from {args.phase_ref}") MaskReader = {"numpy": NumpyReader, "kaldi": ScriptReader} mask_reader = MaskReader[args.fmt](args.mask_scp) num_done = 0 with WaveWriter(args.dst_dir, fs=args.sf) as writer: for key, specs in spectrogram_reader: # if multi-channel, choose ch0 if specs.ndim == 3: specs = specs[0] # specs: T x F if key in mask_reader: num_done += 1 mask = mask_reader[key] # mask sure mask in T x F _, F = specs.shape if mask.shape[0] == F: mask = np.transpose(mask) logger.info(f"Processing utterance {key}...") if mask.shape != specs.shape: raise ValueError( "Dimention mismatch between mask and spectrogram" + f"({mask.shape[0]} x {mask.shape[1]} vs " + f"{specs.shape[0]} x {specs.shape[1]}), need " + "check configures") nsamps = spectrogram_reader.nsamps( key) if args.keep_length else None norm = spectrogram_reader.maxabs( key) if args.mixed_norm else None # use phase from ref if phase_reader is not None: angle = np.angle(phase_reader[key]) phase = np.exp(angle * 1j) samps = inverse_stft(np.abs(specs) * mask * phase, **stft_kwargs, norm=norm, nsamps=nsamps) else: samps = inverse_stft(specs * mask, **stft_kwargs, norm=norm, nsamps=nsamps) writer.write(key, samps) logger.info( f"Processed {num_done:d} utterances over {len(spectrogram_reader):d}")