Beispiel #1
0
def uttloader(scp_config, reader_kwargs, loader_kwargs, train=True):
    mix_reader = SpectrogramReader(scp_config['mixture'], **reader_kwargs)
    target_reader = [
        SpectrogramReader(scp_config[spk_key], **reader_kwargs)
        for spk_key in scp_config if spk_key[:3] == 'spk'
    ]
    dataset = Dataset(mix_reader, target_reader)
    # modify shuffle status
    loader_kwargs["shuffle"] = train
    # validate perutt if needed
    # if not train:
    #     loader_kwargs["batch_size"] = 1
    # if validate, do not shuffle
    utt_loader = DataLoader(dataset, **loader_kwargs)
    return utt_loader
Beispiel #2
0
def run(args):
    reader_kwargs = {
        "frame_length": args.frame_length,
        "frame_shift": args.frame_shift,
        "window": args.window,
        "center": False,
        "apply_abs": True,
        "apply_log": args.apply_log,
        "apply_pow": args.apply_pow
    }
    num_bins = nfft(args.frame_length) // 2 + 1
    reader = SpectrogramReader(args.wave_scp, **reader_kwargs)
    mean = np.zeros(num_bins)
    std = np.zeros(num_bins)
    num_frames = 0
    # D(X) = E(X^2) - E(X)^2
    for _, spectrogram in tqdm.tqdm(reader):
        num_frames += spectrogram.shape[0]
        mean += np.sum(spectrogram, 0)
        std += np.sum(spectrogram**2, 0)
    mean = mean / num_frames
    std = np.sqrt(std / num_frames - mean**2)
    with open(args.cmvn_dst, "wb") as f:
        cmvn_dict = {"mean": mean, "std": std}
        pickle.dump(cmvn_dict, f)
    print("Totally processed {} frames".format(num_frames))
    print("Global mean: {}".format(mean))
    print("Global std: {}".format(std))
Beispiel #3
0
def run(args):
    # return complex result
    reader_kwargs = {
        "frame_length": args.frame_length,
        "frame_shift": args.frame_shift,
        "window": args.window,
        "center": True
    }
    print(
        "Using {} Mask".format("Ratio" if not args.psm else "Phase Sensitive"))
    mixture_reader = SpectrogramReader(args.mix_scp,
                                       **reader_kwargs,
                                       return_samps=True)
    targets_reader = [
        SpectrogramReader(scp, **reader_kwargs) for scp in args.ref_scp
    ]
    num_utts = 0
    for key, packed in mixture_reader:
        samps, mixture = packed
        norm = np.linalg.norm(samps, np.inf)
        skip = False
        for reader in targets_reader:
            if key not in reader:
                print("Skip utterance {}, missing targets".format(key))
                skip = True
                break
        if skip:
            continue
        num_utts += 1
        if not num_utts % 1000:
            print("Processed {} utterance...".format(num_utts))
        targets_list = [reader[key] for reader in targets_reader]
        spk_masks = compute_mask(mixture, targets_list, args.psm)
        for index, mask in enumerate(spk_masks):
            istft(os.path.join(args.dump_dir,
                               '{}.spk{}.wav'.format(key, index + 1)),
                  mixture * mask,
                  **reader_kwargs,
                  norm=norm,
                  fs=8000,
                  nsamps=samps.size)
    print("Processed {} utterance!".format(num_utts))
Beispiel #4
0
def uttloader(scp_config, reader_kwargs, loader_kwargs, train=True):
    mix_reader = SpectrogramReader(scp_config['mixture'], **reader_kwargs)
    target_reader = [
        SpectrogramReader(scp_config[spk_key], **reader_kwargs)
        for spk_key in scp_config if spk_key[:3] == 'spk'
    ]
    dataset = Datasets(mix_reader, target_reader, mvn_dict= loader_kwargs["mvn_dict"])
    # modify shuffle status
    loader_kwargs["shuffle"] = train
    # validate perutt if needed
    # if not train:
    #     loader_kwargs["batch_size"] = 1
    # if validate, do not shuffle
    #utt_loader = DataLoaders(dataset, **loader_kwargs)

    utt_loader = DataLoader(dataset, batch_size=40,shuffle=loader_kwargs['shuffle'],
                            num_workers=10, sampler=None,drop_last=True,
                            collate_fn=_collate)
#                           mvn_dict= loader_kwargs["mvn_dict"])
    return utt_loader
def run(args):
    num_bins, conf_dict = parse_yaml(args.train_conf)
    reader = SpectrogramReader(args.wave_scp,
                               **conf_dict["spectrogram_reader"])
    mean = np.zeros(num_bins)
    std = np.zeros(num_bins)
    num_frames = 0
    # D(X) = E(X^2) - E(X)^2
    for _, spectrogram in tqdm.tqdm(reader):
        num_frames += spectrogram.shape[0]
        mean += np.sum(spectrogram, 0)
        std += np.sum(spectrogram**2, 0)
    mean = mean / num_frames
    std = np.sqrt(std / num_frames - mean**2)
    with open(args.cmvn_dst, "wb") as f:
        cmvn_dict = {"mean": mean, "std": std}
        pickle.dump(cmvn_dict, f)
    print("Totally processed {} frames".format(num_frames))
    print("Global mean: {}".format(mean))
    print("Global std: {}".format(std))