예제 #1
0
def run(args):
    stft_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "window": args.window,
        "center": args.center,
        "transpose": False
    }
    spectrogram_reader = SpectrogramReader(
        args.wav_scp,
        round_power_of_two=args.round_power_of_two,
        **stft_kwargs)
    # F x N or B x F x N
    weights = np.load(args.weights)
    if weights.ndim == 2:
        beamformer = FixedBeamformer(weights)
        beam_index = None
    else:
        beamformer = [FixedBeamformer(w) for w in weights]
        if not args.beam:
            raise RuntimeError(
                "--beam must be assigned, as there are multiple beams")
        beam_index = ScpReader(args.beam, value_processor=lambda x: int)
    with WaveWriter(args.dst_dir) as writer:
        for key, stft_mat in spectrogram_reader:
            logger.info(f"Processing utterance {key}...")
            if beamformer:
                beam = beam_index[key]
                stft_enh = beamformer[beam].run(stft_mat)
            else:
                stft_enh = beamformer.run(stft_mat)
            norm = spectrogram_reader.maxabs(key)
            samps = inverse_stft(stft_enh, **stft_kwargs, norm=norm)
            writer.write(key, samps)
    logger.info(f"Processed {len(spectrogram_reader):d} utterances")
예제 #2
0
def run(args):
    stft_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "window": args.window,
        "center": args.center,
        "transpose": False
    }

    if args.geometry == "linear":
        topo = str2tuple(args.linear_topo)
        beamformer = LinearSDBeamformer(topo)
        logger.info(f"Initialize LinearSDBeamformer for array: {topo}")
    else:
        beamformer = CircularSDBeamformer(args.circular_radius,
                                          args.circular_around,
                                          center=args.circular_center)
        logger.info(
            "Initialize CircularSDBeamformer for " +
            f"radius = {args.circular_radius}, center = {args.circular_center}"
        )

    utt2doa = None
    doa = None
    if args.utt2doa:
        utt2doa = ScpReader(args.utt2doa, value_processor=lambda x: float(x))
        logger.info(f"Use --utt2doa={args.utt2doa} for each utterance")
    else:
        doa = args.doa
        if not check_doa(args.geometry, doa):
            logger.info(f"Invalid doa {doa:.2f} for {args.geometry} array")
        logger.info(f"Use --doa={doa:.2f} for all utterances")

    spectrogram_reader = SpectrogramReader(
        args.wav_scp,
        round_power_of_two=args.round_power_of_two,
        **stft_kwargs)

    done = 0
    with WaveWriter(args.dst_dir, sr=args.sr) as writer:
        for key, stft_src in spectrogram_reader:
            if utt2doa:
                if key not in utt2doa:
                    continue
                doa = utt2doa[key]
                if not check_doa(args.geometry, doa):
                    logger.info(f"Invalid DoA {doa:.2f} for utterance {key}")
                    continue
            stft_enh = beamformer.run(doa, stft_src, c=args.speed, sr=args.sr)
            done += 1
            norm = spectrogram_reader.maxabs(key)
            samps = inverse_stft(stft_enh, **stft_kwargs, norm=norm)
            writer.write(key, samps)
    logger.info(f"Processed {done} utterances over {len(spectrogram_reader)}")
예제 #3
0
def run(args):
    stft_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "round_power_of_two": args.round_power_of_two,
        "window": args.window,
        "center": args.center,  # false to comparable with kaldi
        "transpose": False  # F x T
    }
    stft_reader = SpectrogramReader(args.wav_scp, **stft_kwargs)
    if args.utt2idx:
        utt2idx = ScpReader(args.utt2idx, value_processor=int)
        logger.info(f"Using --utt2idx={args.utt2idx}")
    else:
        utt2idx = None
        logger.info(f"Using --doa-idx={args.doa_idx}")

    df_pair = [tuple(map(int, p.split(","))) for p in args.df_pair.split(";")]
    if not len(df_pair):
        raise RuntimeError(f"Bad configurations with --pair {args.pair}")
    logger.info(f"Compute directional feature with {df_pair}")

    # A x M x F
    steer_vector = np.load(args.steer_vector)

    num_done = 0
    with ArchiveWriter(args.dup_ark, args.scp) as writer:
        for key, stft in stft_reader:
            # sv: M x F
            if utt2idx is None:
                idx = [int(v) for v in args.doa_idx.split(",")]
                dfs = [
                    directional_feats(stft, steer_vector[i], df_pair=df_pair)
                    for i in idx
                ]
                if len(dfs) == 1:
                    df = dfs[0]
                else:
                    # N x T x F
                    dfs = np.stack(dfs)
                    df = dfs.transpose(1, 0, 2).reshape(dfs.shape[1], -1)
            elif key in utt2idx:
                # stft: M x F x T
                df = directional_feats(stft,
                                       steer_vector[utt2idx[key]],
                                       df_pair=df_pair)
            else:
                logger.warn(f"Missing utt2idx for utterance {key}")
                continue
            writer.write(key, df)
            num_done += 1
            if not num_done % 1000:
                logger.info(f"Processed {num_done:d} utterance...")
    logger.info(f"Processed {num_done:d} utterances over {len(stft_reader):d}")
예제 #4
0
def parse_doa(args, online):
    if args.utt2doa:
        reader = ScpReader(
            args.utt2doa,
            value_processor=lambda doa: process_doa(doa, online),
            num_tokens=-1)
        utt2doa = reader.get
        logger.info(f"Use --utt2doa={args.utt2doa} for each utterance")
    else:
        doa = process_doa(args.doa, online)
        utt2doa = lambda _: doa
        logger.info(f"Use --doa={doa:.2f} for all utterances")
    return utt2doa
예제 #5
0
def run(args):
    stft_kwargs = {
        "frame_len": args.frame_len,
        "frame_hop": args.frame_hop,
        "window": args.window,
        "center": args.center,
        "transpose": False
    }

    utt2doa = None
    doa = None
    if args.utt2doa:
        utt2doa = ScpReader(args.utt2doa, value_processor=lambda x: float(x))
        logger.info(f"Use utt2doa {args.utt2doa} for each utterance")
    else:
        doa = args.doa
        if doa < 0:
            doa = 180 + doa
        if doa < 0 or doa > 180:
            raise RuntimeError(f"Invalid doa {doa:.2f} for --doa")
        logger.info(f"Use DoA {doa:.2f} for all utterances")

    spectrogram_reader = SpectrogramReader(
        args.wav_scp,
        round_power_of_two=args.round_power_of_two,
        **stft_kwargs)

    done = 0
    topo = str2tuple(args.linear_topo)
    beamformer = LinearDSBeamformer(topo)
    logger.info(f"Initialize channel LinearDSBeamformer for array: {topo}")

    with WaveWriter(args.dst_dir, fs=args.fs) as writer:
        for key, stft_src in spectrogram_reader:
            if utt2doa:
                if key not in utt2doa:
                    continue
                doa = utt2doa[key]
                if doa < 0:
                    doa = 180 + doa
                if doa < 0 or doa > 180:
                    logger.info(f"Invalid doa {doa:.2f} for utterance {key}")
                    continue
            stft_enh = beamformer.run(doa, stft_src, c=args.speed, sr=args.fs)
            done += 1
            norm = spectrogram_reader.maxabs(key)
            samps = inverse_stft(stft_enh, **stft_kwargs, norm=norm)
            writer.write(key, samps)
    logger.info(f"Processed {done} utterances over {len(spectrogram_reader)}")
예제 #6
0
 def __init__(self, spk2class=None):
     self.s2c = ScpReader(spk2class) if spk2class else None
     self.snr = defaultdict(float)
     self.cnt = defaultdict(int)
예제 #7
0
파일: compute_wer.py 프로젝트: funcwj/setk
 def __init__(self, text):
     self.text_reader = [
         ScpReader(t, num_tokens=-1, restrict=False)
         for t in text.split(",")
     ]