def __init__(self, rt60_opt, absc_opt, room_dim): """ rt60_opt: "" or "a,b", higher priority than absc_opt absc_opt: tuple like (a,b) room_dim: str like "a,b;c,d;e,d" """ self.rt60_opt = rt60_opt if not rt60_opt: self.absc = UniformSampler(absc_opt) else: rt60_r = str2tuple(rt60_opt) self.rt60 = UniformSampler(rt60_r) dim_range = [str2tuple(t) for t in room_dim.split(";")] if len(dim_range) != 3: raise RuntimeError(f"Wrong format with --room-dim={room_dim}") self.dim_sampler = [UniformSampler(c) for c in dim_range]
def run(args): if args.geometry == "linear": topo = np.array(str2tuple(args.linear_topo)) candidate_doa = np.linspace(0, 180, args.num_doas) else: topo = None step = 360 / args.num_doas candidate_doa = np.arange(0, 360, step) sv = [] for doa in candidate_doa: if topo is None: sv.append( circular_steer_vector(args.circular_radius, args.circular_around, doa, args.num_bins, c=args.speed, sr=args.sr, center=args.circular_center)) else: sv.append( linear_steer_vector(topo, doa, args.num_bins, c=args.speed, sr=args.sr)) # A x F x M sv = np.stack(sv) # norm or not if args.normalize: sv = sv / sv.shape[-1]**0.5 # A x M x F sv = sv.transpose(0, 2, 1) np.save(args.steer_vector, sv)
def run(args): stft_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "window": args.window, "center": args.center, "transpose": False } if args.geometry == "linear": topo = str2tuple(args.linear_topo) beamformer = LinearSDBeamformer(topo) logger.info(f"Initialize LinearSDBeamformer for array: {topo}") else: beamformer = CircularSDBeamformer(args.circular_radius, args.circular_around, center=args.circular_center) logger.info( "Initialize CircularSDBeamformer for " + f"radius = {args.circular_radius}, center = {args.circular_center}" ) utt2doa = None doa = None if args.utt2doa: utt2doa = ScpReader(args.utt2doa, value_processor=lambda x: float(x)) logger.info(f"Use --utt2doa={args.utt2doa} for each utterance") else: doa = args.doa if not check_doa(args.geometry, doa): logger.info(f"Invalid doa {doa:.2f} for {args.geometry} array") logger.info(f"Use --doa={doa:.2f} for all utterances") spectrogram_reader = SpectrogramReader( args.wav_scp, round_power_of_two=args.round_power_of_two, **stft_kwargs) done = 0 with WaveWriter(args.dst_dir, sr=args.sr) as writer: for key, stft_src in spectrogram_reader: if utt2doa: if key not in utt2doa: continue doa = utt2doa[key] if not check_doa(args.geometry, doa): logger.info(f"Invalid DoA {doa:.2f} for utterance {key}") continue stft_enh = beamformer.run(doa, stft_src, c=args.speed, sr=args.sr) done += 1 norm = spectrogram_reader.maxabs(key) samps = inverse_stft(stft_enh, **stft_kwargs, norm=norm) writer.write(key, samps) logger.info(f"Processed {done} utterances over {len(spectrogram_reader)}")
def __init__(self, args): if args.gpu and not gpu_rir_available: raise RuntimeError("Please install gpuRIR first if --gpu=True") # make dump dir Path(args.dump_dir).mkdir(exist_ok=True, parents=True) self.rirs_cfg = [] self.room_generator = RoomGenerator(args.rt60, args.abs_range, args.room_dim) self.mx, self.my = args.array_relx, args.array_rely self.array_topo = [str2tuple(t) for t in args.array_topo.split(";")] self.sr = args.sample_rate self.args = args
def run(args): stft_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "window": args.window, "center": args.center, "transpose": False } utt2doa = None doa = None if args.utt2doa: utt2doa = ScpReader(args.utt2doa, value_processor=lambda x: float(x)) logger.info(f"Use utt2doa {args.utt2doa} for each utterance") else: doa = args.doa if doa < 0: doa = 180 + doa if doa < 0 or doa > 180: raise RuntimeError(f"Invalid doa {doa:.2f} for --doa") logger.info(f"Use DoA {doa:.2f} for all utterances") spectrogram_reader = SpectrogramReader( args.wav_scp, round_power_of_two=args.round_power_of_two, **stft_kwargs) done = 0 topo = str2tuple(args.linear_topo) beamformer = LinearDSBeamformer(topo) logger.info(f"Initialize channel LinearDSBeamformer for array: {topo}") with WaveWriter(args.dst_dir, fs=args.fs) as writer: for key, stft_src in spectrogram_reader: if utt2doa: if key not in utt2doa: continue doa = utt2doa[key] if doa < 0: doa = 180 + doa if doa < 0 or doa > 180: logger.info(f"Invalid doa {doa:.2f} for utterance {key}") continue stft_enh = beamformer.run(doa, stft_src, c=args.speed, sr=args.fs) done += 1 norm = spectrogram_reader.maxabs(key) samps = inverse_stft(stft_enh, **stft_kwargs, norm=norm) writer.write(key, samps) logger.info(f"Processed {done} utterances over {len(spectrogram_reader)}")
def run(args): stft_kwargs = { "frame_len": args.frame_len, "frame_hop": args.frame_hop, "round_power_of_two": args.round_power_of_two, "window": args.window, "center": args.center, "transpose": True } steer_vector = np.load(args.steer_vector) logger.info(f"Shape of the steer vector: {steer_vector.shape}") num_doa, _, _ = steer_vector.shape min_doa, max_doa = str2tuple(args.doa_range) if args.output == "radian": angles = np.linspace(min_doa * np.pi / 180, max_doa * np.pi / 180, num_doa + 1) else: angles = np.linspace(min_doa, max_doa, num_doa + 1) spectrogram_reader = SpectrogramReader(args.wav_scp, **stft_kwargs) mask_reader = None if args.mask_scp: mask_reader = [NumpyReader(scp) for scp in args.mask_scp.split(",")] online = (args.chunk_len > 0 and args.look_back > 0) if online: logger.info("Set up in online mode: chunk_len " + f"= {args.chunk_len}, look_back = {args.look_back}") if args.backend == "srp": split_index = lambda sstr: [ tuple(map(int, p.split(","))) for p in sstr.split(";") ] srp_pair = split_index(args.srp_pair) srp_pair = ([t[0] for t in srp_pair], [t[1] for t in srp_pair]) logger.info(f"Choose srp-based algorithm, srp pair is {srp_pair}") else: srp_pair = None with open(args.doa_scp, "w") as doa_out: for key, stft in spectrogram_reader: # stft: M x T x F _, _, F = stft.shape if mask_reader: # T x F => F x T mask = [r[key] for r in mask_reader] if mask_reader else None if args.mask_eps >= 0 and len(mask_reader) > 1: mask = add_wta(mask, eps=args.mask_eps) mask = mask[0] # F x T => T x F if mask.shape[-1] != F: mask = mask.transpose() else: mask = None if not online: if srp_pair: idx = srp_ssl(stft, steer_vector, srp_pair=srp_pair, mask=mask) else: idx = ml_ssl(stft, steer_vector, mask=mask, compression=-1, eps=EPSILON) doa = angles[idx] logger.info(f"Processing utterance {key}: {doa:.4f}") doa_out.write(f"{key}\t{doa:.4f}\n") else: logger.info(f"Processing utterance {key}...") _, T, _ = stft.shape online_doa = [] for t in range(0, T, args.chunk_len): s = max(t - args.look_back, 0) if mask is not None: chunk_mask = mask[..., s:t + args.chunk_len] else: chunk_mask = None stft_chunk = stft[:, s:t + args.chunk_len, :] if srp_pair: idx = srp_ssl(stft_chunk, steer_vector, srp_pair=srp_pair, mask=chunk_mask) else: idx = ml_ssl(stft_chunk, steer_vector, mask=chunk_mask, compression=-1, eps=EPSILON) doa = angles[idx] online_doa.append(doa) doa_str = " ".join([f"{d:.4f}" for d in online_doa]) doa_out.write(f"{key}\t{doa_str}\n") logger.info(f"Processing {len(spectrogram_reader)} utterance done")