def run(args): num_bins, config_dict = parse_yaml(args.config) dataloader_conf = config_dict["dataloader"] spectrogram_conf = config_dict["spectrogram_reader"] # Load cmvn dict_mvn = dataloader_conf["mvn_dict"] if dict_mvn: if not os.path.exists(dict_mvn): raise FileNotFoundError("Could not find mvn files") with open(dict_mvn, "rb") as f: dict_mvn = pickle.load(f) # default: True apply_log = dataloader_conf[ "apply_log"] if "apply_log" in dataloader_conf else True dcnet = PITNet(num_bins, **config_dict["model"]) frame_length = spectrogram_conf["frame_length"] frame_shift = spectrogram_conf["frame_shift"] window = spectrogram_conf["window"] separator = Separator(dcnet, args.state_dict, cuda=args.cuda) utt_dict = parse_scps(args.wave_scp) num_utts = 0 for key, utt in utt_dict.items(): try: samps, stft_mat = stft(utt, frame_length=frame_length, frame_shift=frame_shift, window=window, center=True, return_samps=True) except FileNotFoundError: print("Skip utterance {}... not found".format(key)) continue print("Processing utterance {}".format(key)) num_utts += 1 norm = np.linalg.norm(samps, np.inf) spk_mask, spk_spectrogram = separator.seperate(stft_mat, cmvn=dict_mvn, apply_log=apply_log) for index, stft_mat in enumerate(spk_spectrogram): istft(os.path.join(args.dump_dir, '{}.spk{}.wav'.format(key, index + 1)), stft_mat, frame_length=frame_length, frame_shift=frame_shift, window=window, center=True, norm=norm, fs=8000, nsamps=samps.size) if args.dump_mask: sio.savemat( os.path.join(args.dump_dir, '{}.spk{}.mat'.format(key, index + 1)), {"mask": spk_mask[index]}) print("Processed {} utterance!".format(num_utts))
def __init__(self, wave_scp, **kwargs): if not os.path.exists(wave_scp): raise FileNotFoundError("Could not find file {}".format(wave_scp)) self.stft_kwargs = kwargs self.wave_dict = parse_scps(wave_scp) self.wave_keys = [key for key in self.wave_dict.keys()] logger.info( "Create SpectrogramReader for {} with {} utterances".format( wave_scp, len(self.wave_dict)))
def run(args): num_bins, config_dict = parse_yaml(args.config) # Load cmvn dict_mvn = config_dict["dataloader"]["mvn_dict"] if dict_mvn: if not os.path.exists(dict_mvn): raise FileNotFoundError("Could not find mvn files") with open(dict_mvn, "rb") as f: dict_mvn = pickle.load(f) dcnet = DCNet(num_bins, **config_dict["dcnet"]) frame_length = config_dict["spectrogram_reader"]["frame_length"] frame_shift = config_dict["spectrogram_reader"]["frame_shift"] window = config_dict["spectrogram_reader"]["window"] cluster = DeepCluster( dcnet, args.dcnet_state, args.num_spks, pca=args.dump_pca, cuda=args.cuda) utt_dict = parse_scps(args.wave_scp) num_utts = 0 for key, utt in utt_dict.items(): try: samps, stft_mat = stft( utt, frame_length=frame_length, frame_shift=frame_shift, window=window, center=True, return_samps=True) except FileNotFoundError: print("Skip utterance {}... not found".format(key)) continue print("Processing utterance {}".format(key)) num_utts += 1 norm = np.linalg.norm(samps, np.inf) pca_mat, spk_mask, spk_spectrogram = cluster.seperate( stft_mat, cmvn=dict_mvn) for index, stft_mat in enumerate(spk_spectrogram): istft( os.path.join(args.dump_dir, '{}.spk{}.wav'.format( key, index + 1)), stft_mat, frame_length=frame_length, frame_shift=frame_shift, window=window, center=True, norm=norm, fs=8000, nsamps=samps.size) if args.dump_mask: sio.savemat( os.path.join(args.dump_dir, '{}.spk{}.mat'.format( key, index + 1)), {"mask": spk_mask[index]}) if args.dump_pca: sio.savemat( os.path.join(args.dump_dir, '{}.mat'.format(key)), {"pca_matrix": pca_mat}) print("Processed {} utterance!".format(num_utts))
def __init__(self, scp_path, addr_processor=lambda x: x): if not os.path.exists(scp_path): raise FileNotFoundError("Could not find file {}".format(scp_path)) self.index_dict = parse_scps(scp_path, addr_processor=addr_processor) self.index_keys = [key for key in self.index_dict.keys()]
def run(args): num_bins, config_dict = parse_yaml(args.config) # Load cmvn dict_mvn = config_dict["dataloader"]["mvn_dict"] if dict_mvn: if not os.path.exists(dict_mvn): raise FileNotFoundError("Could not find mvn files") with open(dict_mvn, "rb") as f: dict_mvn = pickle.load(f) dcnet = DCNet(num_bins, **config_dict["dcnet"]) frame_length = config_dict["spectrogram_reader"]["frame_length"] frame_shift = config_dict["spectrogram_reader"]["frame_shift"] window = config_dict["spectrogram_reader"]["window"] cluster = DeepCluster(dcnet, args.dcnet_state, args.num_spks, pca=args.dump_pca, cuda=args.cuda) utt_dict = parse_scps(args.wave_scp) num_utts = 0 for key, utt in utt_dict.items(): try: samps, stft_mat = stft(utt, frame_length=frame_length, frame_shift=frame_shift, window=window, center=True, return_samps=True) except FileNotFoundError: print("Skip utterance {}... not found".format(key)) continue print("Processing utterance {}".format(key)) num_utts += 1 norm = np.linalg.norm(samps, np.inf) pca_mat, spk_mask, spk_spectrogram = cluster.seperate(stft_mat, cmvn=dict_mvn) for index, stft_mat in enumerate(spk_spectrogram): istft(os.path.join(args.dump_dir, '{}.spk{}.wav'.format(key, index + 1)), stft_mat, frame_length=frame_length, frame_shift=frame_shift, window=window, center=True, norm=norm, fs=8000, nsamps=samps.size) if args.dump_mask: sio.savemat( os.path.join(args.dump_dir, '{}.spk{}.mat'.format(key, index + 1)), {"mask": spk_mask[index]}) if args.dump_pca: sio.savemat(os.path.join(args.dump_dir, '{}.mat'.format(key)), {"pca_matrix": pca_mat}) print("Processed {} utterance!".format(num_utts))