def uttloader(scp_config, reader_kwargs, loader_kwargs, train=True): mix_reader = SpectrogramReader(scp_config['mixture'], **reader_kwargs) target_reader = [ SpectrogramReader(scp_config[spk_key], **reader_kwargs) for spk_key in scp_config if spk_key[:3] == 'spk' ] dataset = Dataset(mix_reader, target_reader) # modify shuffle status loader_kwargs["shuffle"] = train # validate perutt if needed # if not train: # loader_kwargs["batch_size"] = 1 # if validate, do not shuffle utt_loader = DataLoader(dataset, **loader_kwargs) return utt_loader
def run(args): reader_kwargs = { "frame_length": args.frame_length, "frame_shift": args.frame_shift, "window": args.window, "center": False, "apply_abs": True, "apply_log": args.apply_log, "apply_pow": args.apply_pow } num_bins = nfft(args.frame_length) // 2 + 1 reader = SpectrogramReader(args.wave_scp, **reader_kwargs) mean = np.zeros(num_bins) std = np.zeros(num_bins) num_frames = 0 # D(X) = E(X^2) - E(X)^2 for _, spectrogram in tqdm.tqdm(reader): num_frames += spectrogram.shape[0] mean += np.sum(spectrogram, 0) std += np.sum(spectrogram**2, 0) mean = mean / num_frames std = np.sqrt(std / num_frames - mean**2) with open(args.cmvn_dst, "wb") as f: cmvn_dict = {"mean": mean, "std": std} pickle.dump(cmvn_dict, f) print("Totally processed {} frames".format(num_frames)) print("Global mean: {}".format(mean)) print("Global std: {}".format(std))
def run(args): # return complex result reader_kwargs = { "frame_length": args.frame_length, "frame_shift": args.frame_shift, "window": args.window, "center": True } print( "Using {} Mask".format("Ratio" if not args.psm else "Phase Sensitive")) mixture_reader = SpectrogramReader(args.mix_scp, **reader_kwargs, return_samps=True) targets_reader = [ SpectrogramReader(scp, **reader_kwargs) for scp in args.ref_scp ] num_utts = 0 for key, packed in mixture_reader: samps, mixture = packed norm = np.linalg.norm(samps, np.inf) skip = False for reader in targets_reader: if key not in reader: print("Skip utterance {}, missing targets".format(key)) skip = True break if skip: continue num_utts += 1 if not num_utts % 1000: print("Processed {} utterance...".format(num_utts)) targets_list = [reader[key] for reader in targets_reader] spk_masks = compute_mask(mixture, targets_list, args.psm) for index, mask in enumerate(spk_masks): istft(os.path.join(args.dump_dir, '{}.spk{}.wav'.format(key, index + 1)), mixture * mask, **reader_kwargs, norm=norm, fs=8000, nsamps=samps.size) print("Processed {} utterance!".format(num_utts))
def uttloader(scp_config, reader_kwargs, loader_kwargs, train=True): mix_reader = SpectrogramReader(scp_config['mixture'], **reader_kwargs) target_reader = [ SpectrogramReader(scp_config[spk_key], **reader_kwargs) for spk_key in scp_config if spk_key[:3] == 'spk' ] dataset = Datasets(mix_reader, target_reader, mvn_dict= loader_kwargs["mvn_dict"]) # modify shuffle status loader_kwargs["shuffle"] = train # validate perutt if needed # if not train: # loader_kwargs["batch_size"] = 1 # if validate, do not shuffle #utt_loader = DataLoaders(dataset, **loader_kwargs) utt_loader = DataLoader(dataset, batch_size=40,shuffle=loader_kwargs['shuffle'], num_workers=10, sampler=None,drop_last=True, collate_fn=_collate) # mvn_dict= loader_kwargs["mvn_dict"]) return utt_loader
def run(args): num_bins, conf_dict = parse_yaml(args.train_conf) reader = SpectrogramReader(args.wave_scp, **conf_dict["spectrogram_reader"]) mean = np.zeros(num_bins) std = np.zeros(num_bins) num_frames = 0 # D(X) = E(X^2) - E(X)^2 for _, spectrogram in tqdm.tqdm(reader): num_frames += spectrogram.shape[0] mean += np.sum(spectrogram, 0) std += np.sum(spectrogram**2, 0) mean = mean / num_frames std = np.sqrt(std / num_frames - mean**2) with open(args.cmvn_dst, "wb") as f: cmvn_dict = {"mean": mean, "std": std} pickle.dump(cmvn_dict, f) print("Totally processed {} frames".format(num_frames)) print("Global mean: {}".format(mean)) print("Global std: {}".format(std))