def run(args): stft_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "window": args.window, "center": args.center, "transpose": False } spectrogram_reader = SpectrogramReader( args.wav_scp, round_power_of_two=args.round_power_of_two, **stft_kwargs) # F x N or B x F x N weights = np.load(args.weights) if weights.ndim == 2: beamformer = FixedBeamformer(weights) beam_index = None else: beamformer = [FixedBeamformer(w) for w in weights] if not args.beam: raise RuntimeError( "--beam must be assigned, as there are multiple beams") beam_index = ScpReader(args.beam, value_processor=lambda x: int) with WaveWriter(args.dst_dir) as writer: for key, stft_mat in spectrogram_reader: logger.info(f"Processing utterance {key}...") if beamformer: beam = beam_index[key] stft_enh = beamformer[beam].run(stft_mat) else: stft_enh = beamformer.run(stft_mat) norm = spectrogram_reader.maxabs(key) samps = inverse_stft(stft_enh, **stft_kwargs, norm=norm) writer.write(key, samps) logger.info(f"Processed {len(spectrogram_reader):d} utterances")
def run(args): stft_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "window": args.window, "center": args.center, "transpose": False } if args.geometry == "linear": topo = str2tuple(args.linear_topo) beamformer = LinearSDBeamformer(topo) logger.info(f"Initialize LinearSDBeamformer for array: {topo}") else: beamformer = CircularSDBeamformer(args.circular_radius, args.circular_around, center=args.circular_center) logger.info( "Initialize CircularSDBeamformer for " + f"radius = {args.circular_radius}, center = {args.circular_center}" ) utt2doa = None doa = None if args.utt2doa: utt2doa = ScpReader(args.utt2doa, value_processor=lambda x: float(x)) logger.info(f"Use --utt2doa={args.utt2doa} for each utterance") else: doa = args.doa if not check_doa(args.geometry, doa): logger.info(f"Invalid doa {doa:.2f} for {args.geometry} array") logger.info(f"Use --doa={doa:.2f} for all utterances") spectrogram_reader = SpectrogramReader( args.wav_scp, round_power_of_two=args.round_power_of_two, **stft_kwargs) done = 0 with WaveWriter(args.dst_dir, sr=args.sr) as writer: for key, stft_src in spectrogram_reader: if utt2doa: if key not in utt2doa: continue doa = utt2doa[key] if not check_doa(args.geometry, doa): logger.info(f"Invalid DoA {doa:.2f} for utterance {key}") continue stft_enh = beamformer.run(doa, stft_src, c=args.speed, sr=args.sr) done += 1 norm = spectrogram_reader.maxabs(key) samps = inverse_stft(stft_enh, **stft_kwargs, norm=norm) writer.write(key, samps) logger.info(f"Processed {done} utterances over {len(spectrogram_reader)}")
def run(args): stft_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "round_power_of_two": args.round_power_of_two, "window": args.window, "center": args.center, # false to comparable with kaldi "transpose": False # F x T } stft_reader = SpectrogramReader(args.wav_scp, **stft_kwargs) if args.utt2idx: utt2idx = ScpReader(args.utt2idx, value_processor=int) logger.info(f"Using --utt2idx={args.utt2idx}") else: utt2idx = None logger.info(f"Using --doa-idx={args.doa_idx}") df_pair = [tuple(map(int, p.split(","))) for p in args.df_pair.split(";")] if not len(df_pair): raise RuntimeError(f"Bad configurations with --pair {args.pair}") logger.info(f"Compute directional feature with {df_pair}") # A x M x F steer_vector = np.load(args.steer_vector) num_done = 0 with ArchiveWriter(args.dup_ark, args.scp) as writer: for key, stft in stft_reader: # sv: M x F if utt2idx is None: idx = [int(v) for v in args.doa_idx.split(",")] dfs = [ directional_feats(stft, steer_vector[i], df_pair=df_pair) for i in idx ] if len(dfs) == 1: df = dfs[0] else: # N x T x F dfs = np.stack(dfs) df = dfs.transpose(1, 0, 2).reshape(dfs.shape[1], -1) elif key in utt2idx: # stft: M x F x T df = directional_feats(stft, steer_vector[utt2idx[key]], df_pair=df_pair) else: logger.warn(f"Missing utt2idx for utterance {key}") continue writer.write(key, df) num_done += 1 if not num_done % 1000: logger.info(f"Processed {num_done:d} utterance...") logger.info(f"Processed {num_done:d} utterances over {len(stft_reader):d}")
def parse_doa(args, online): if args.utt2doa: reader = ScpReader( args.utt2doa, value_processor=lambda doa: process_doa(doa, online), num_tokens=-1) utt2doa = reader.get logger.info(f"Use --utt2doa={args.utt2doa} for each utterance") else: doa = process_doa(args.doa, online) utt2doa = lambda _: doa logger.info(f"Use --doa={doa:.2f} for all utterances") return utt2doa
def run(args): stft_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "window": args.window, "center": args.center, "transpose": False } utt2doa = None doa = None if args.utt2doa: utt2doa = ScpReader(args.utt2doa, value_processor=lambda x: float(x)) logger.info(f"Use utt2doa {args.utt2doa} for each utterance") else: doa = args.doa if doa < 0: doa = 180 + doa if doa < 0 or doa > 180: raise RuntimeError(f"Invalid doa {doa:.2f} for --doa") logger.info(f"Use DoA {doa:.2f} for all utterances") spectrogram_reader = SpectrogramReader( args.wav_scp, round_power_of_two=args.round_power_of_two, **stft_kwargs) done = 0 topo = str2tuple(args.linear_topo) beamformer = LinearDSBeamformer(topo) logger.info(f"Initialize channel LinearDSBeamformer for array: {topo}") with WaveWriter(args.dst_dir, fs=args.fs) as writer: for key, stft_src in spectrogram_reader: if utt2doa: if key not in utt2doa: continue doa = utt2doa[key] if doa < 0: doa = 180 + doa if doa < 0 or doa > 180: logger.info(f"Invalid doa {doa:.2f} for utterance {key}") continue stft_enh = beamformer.run(doa, stft_src, c=args.speed, sr=args.fs) done += 1 norm = spectrogram_reader.maxabs(key) samps = inverse_stft(stft_enh, **stft_kwargs, norm=norm) writer.write(key, samps) logger.info(f"Processed {done} utterances over {len(spectrogram_reader)}")
def __init__(self, spk2class=None): self.s2c = ScpReader(spk2class) if spk2class else None self.snr = defaultdict(float) self.cnt = defaultdict(int)
def __init__(self, text): self.text_reader = [ ScpReader(t, num_tokens=-1, restrict=False) for t in text.split(",") ]