def extract_stats(opts): dset = build_dataset_providers(opts) collater_keys = dset[-1] dset = dset[0] collater = DictCollater() collater.batching_keys.extend(collater_keys) dloader = DataLoader(dset, batch_size=100, shuffle=True, collate_fn=collater, num_workers=opts.num_workers) # Compute estimation of bpe. As we sample chunks randomly, we # should say that an epoch happened after seeing at least as many # chunks as total_train_wav_dur // chunk_size bpe = (dset.total_wav_dur // opts.chunk_size) // 500 data = {} # run one epoch of training data to extract z-stats of minions for bidx, batch in enumerate(dloader, start=1): print('Bidx: {}/{}'.format(bidx, bpe)) for k, v in batch.items(): if k in opts.exclude_keys: continue if k not in data: data[k] = [] data[k].append(v) if bidx >= opts.max_batches: break stats = {} data = dict((k, torch.cat(v)) for k, v in data.items()) for k, v in data.items(): stats[k] = { 'mean': torch.mean(torch.mean(v, dim=2), dim=0), 'std': torch.std(torch.std(v, dim=2), dim=0) } with open(opts.out_file, 'wb') as stats_f: pickle.dump(stats, stats_f)
def extract_stats(opts): trans = Compose([ ToTensor(), MIChunkWav(opts.chunk_size), LPS(hop=opts.hop_size), MFCC(hop=opts.hop_size), Prosody(hop=opts.hop_size) ]) dset = PairWavDataset(opts.data_root, opts.data_cfg, 'train', transform=trans) dloader = DataLoader(dset, batch_size = 100, shuffle=True, collate_fn=DictCollater(), num_workers=opts.num_workers) # Compute estimation of bpe. As we sample chunks randomly, we # should say that an epoch happened after seeing at least as many # chunks as total_train_wav_dur // chunk_size bpe = (dset.total_wav_dur // opts.chunk_size) // 500 data = {} # run one epoch of training data to extract z-stats of minions for bidx, batch in enumerate(dloader, start=1): print('Bidx: {}/{}'.format(bidx, bpe)) for k, v in batch.items(): if k not in data: data[k] = [] data[k].append(v) if bidx >= opts.max_batches: break stats = {} data = dict((k, torch.cat(v)) for k, v in data.items()) for k, v in data.items(): stats[k] = {'mean':torch.mean(torch.mean(v, dim=2), dim=0), 'std':torch.std(torch.std(v, dim=2), dim=0)} with open(opts.out_file, 'wb') as stats_f: pickle.dump(stats, stats_f)
def train(opts): CUDA = True if torch.cuda.is_available() and not opts.no_cuda else False device = 'cuda' if CUDA else 'cpu' num_devices = 1 np.random.seed(opts.seed) random.seed(opts.seed) torch.manual_seed(opts.seed) if CUDA: torch.cuda.manual_seed_all(opts.seed) num_devices = torch.cuda.device_count() print('[*] Using CUDA {} devices'.format(num_devices)) else: print('[!] Using CPU') print('Seeds initialized to {}'.format(opts.seed)) #torch.autograd.set_detect_anomaly(True) # --------------------- # Build Model minions_cfg = worker_parser(opts.net_cfg) #make_transforms(opts, minions_cfg) opts.random_scale = str2bool(opts.random_scale) dsets, collater_keys = build_dataset_providers(opts, minions_cfg) dset, va_dset = dsets # Build collater, appending the keys from the loaded transforms to the # existing default ones collater = DictCollater() collater.batching_keys.extend(collater_keys) dloader = DataLoader(dset, batch_size=opts.batch_size, shuffle=True, collate_fn=collater, num_workers=opts.num_workers,drop_last=True, pin_memory=CUDA) # Compute estimation of bpe. As we sample chunks randomly, we # should say that an epoch happened after seeing at least as many # chunks as total_train_wav_dur // chunk_size bpe = (dset.total_wav_dur // opts.chunk_size) // opts.batch_size print ("Dataset has a total {} hours of training data".format(dset.total_wav_dur/16000/3600.0)) opts.bpe = bpe if opts.do_eval: assert va_dset is not None, ( "Asked to do validation, but failed to build validation set" ) va_dloader = DataLoader(va_dset, batch_size=opts.batch_size, shuffle=True, collate_fn=DictCollater(), num_workers=opts.num_workers,drop_last=True, pin_memory=CUDA) va_bpe = (va_dset.total_wav_dur // opts.chunk_size) // opts.batch_size opts.va_bpe = va_bpe else: va_dloader = None # fastet lr to MI #opts.min_lrs = {'mi':0.001} if opts.fe_cfg is not None: with open(opts.fe_cfg, 'r') as fe_cfg_f: print(fe_cfg_f) fe_cfg = json.load(fe_cfg_f) print(fe_cfg) else: fe_cfg = None # load config file for attention blocks if opts.att_cfg: with open(opts.att_cfg) as f: att_cfg = json.load(f) print(att_cfg) else: att_cfg = None print(str2bool(opts.tensorboard)) Trainer = trainer(frontend_cfg=fe_cfg, att_cfg=att_cfg, minions_cfg=minions_cfg, cfg=vars(opts), backprop_mode=opts.backprop_mode, lr_mode=opts.lr_mode, tensorboard=str2bool(opts.tensorboard), device=device) print(Trainer.model) print('Frontend params: ', Trainer.model.frontend.describe_params()) Trainer.model.to(device) Trainer.train_(dloader, device=device, valid_dataloader=va_dloader)
def train(opts): CUDA = True if torch.cuda.is_available() and not opts.no_cuda else False device = 'cuda' if CUDA else 'cpu' num_devices = 1 np.random.seed(opts.seed) random.seed(opts.seed) torch.manual_seed(opts.seed) if CUDA: torch.cuda.manual_seed_all(opts.seed) num_devices = torch.cuda.device_count() print('[*] Using CUDA {} devices'.format(num_devices)) else: print('[!] Using CPU') print('Seeds initialized to {}'.format(opts.seed)) # --------------------- # Build Model if opts.fe_cfg is not None: with open(opts.fe_cfg, 'r') as fe_cfg_f: fe_cfg = json.load(fe_cfg_f) print(fe_cfg) else: fe_cfg = None minions_cfg = pase_parser(opts.net_cfg) make_transforms(opts, minions_cfg) model = Waveminionet(minions_cfg=minions_cfg, adv_loss=opts.adv_loss, num_devices=num_devices, pretrained_ckpt=opts.pretrained_ckpt, frontend_cfg=fe_cfg) print(model) if opts.net_ckpt is not None: model.load_pretrained(opts.net_ckpt, load_last=True, verbose=True) print('Frontend params: ', model.frontend.describe_params()) model.to(device) trans = make_transforms(opts, minions_cfg) print(trans) # Build Dataset(s) and DataLoader(s) dset = PairWavDataset(opts.data_root, opts.data_cfg, 'train', transform=trans, preload_wav=opts.preload_wav) dloader = DataLoader(dset, batch_size=opts.batch_size, shuffle=True, collate_fn=DictCollater(), num_workers=opts.num_workers, pin_memory=CUDA) # Compute estimation of bpe. As we sample chunks randomly, we # should say that an epoch happened after seeing at least as many # chunks as total_train_wav_dur // chunk_size bpe = (dset.total_wav_dur // opts.chunk_size) // opts.batch_size opts.bpe = bpe if opts.do_eval: va_dset = PairWavDataset(opts.data_root, opts.data_cfg, 'valid', transform=trans, preload_wav=opts.preload_wav) va_dloader = DataLoader(va_dset, batch_size=opts.batch_size, shuffle=False, collate_fn=DictCollater(), num_workers=opts.num_workers, pin_memory=CUDA) va_bpe = (va_dset.total_wav_dur // opts.chunk_size) // opts.batch_size opts.va_bpe = va_bpe else: va_dloader = None # fastet lr to MI #opts.min_lrs = {'mi':0.001} model.train_(dloader, vars(opts), device=device, va_dloader=va_dloader)
def train(opts): CUDA = True if torch.cuda.is_available() and not opts.no_cuda else False device = 'cuda' if CUDA else 'cpu' num_devices = 1 np.random.seed(opts.seed) random.seed(opts.seed) torch.manual_seed(opts.seed) if CUDA: torch.cuda.manual_seed_all(opts.seed) num_devices = torch.cuda.device_count() print('[*] Using CUDA {} devices'.format(num_devices)) else: print('[!] Using CPU') print('Seeds initialized to {}'.format(opts.seed)) # --------------------- # Build Model frontend = wf_builder(opts.fe_cfg) minions_cfg = pase_parser(opts.net_cfg, batch_acum=opts.batch_acum, device=device, frontend=frontend) model = Waveminionet(minions_cfg=minions_cfg, adv_loss=opts.adv_loss, num_devices=num_devices, frontend=frontend) print(model) print('Frontend params: ', model.frontend.describe_params()) model.to(device) trans = make_transforms(opts, minions_cfg) print(trans) if opts.dtrans_cfg is not None: with open(opts.dtrans_cfg, 'r') as dtr_cfg: dtr = json.load(dtr_cfg) #dtr['trans_p'] = opts.distortion_p dist_trans = config_distortions(**dtr) print(dist_trans) else: dist_trans = None # Build Dataset(s) and DataLoader(s) dataset = getattr(pase.dataset, opts.dataset) dset = dataset(opts.data_root, opts.data_cfg, 'train', transform=trans, noise_folder=opts.noise_folder, whisper_folder=opts.whisper_folder, distortion_probability=opts.distortion_p, distortion_transforms=dist_trans, preload_wav=opts.preload_wav) dloader = DataLoader(dset, batch_size=opts.batch_size, shuffle=True, collate_fn=DictCollater(), num_workers=opts.num_workers, pin_memory=CUDA) # Compute estimation of bpe. As we sample chunks randomly, we # should say that an epoch happened after seeing at least as many # chunks as total_train_wav_dur // chunk_size bpe = (dset.total_wav_dur // opts.chunk_size) // opts.batch_size opts.bpe = bpe if opts.do_eval: va_dset = dataset(opts.data_root, opts.data_cfg, 'valid', transform=trans, noise_folder=opts.noise_folder, whisper_folder=opts.whisper_folder, distortion_probability=opts.distortion_p, distortion_transforms=dist_trans, preload_wav=opts.preload_wav) va_dloader = DataLoader(va_dset, batch_size=opts.batch_size, shuffle=False, collate_fn=DictCollater(), num_workers=opts.num_workers, pin_memory=CUDA) va_bpe = (va_dset.total_wav_dur // opts.chunk_size) // opts.batch_size opts.va_bpe = va_bpe else: va_dloader = None # fastet lr to MI #opts.min_lrs = {'mi':0.001} model.train_(dloader, vars(opts), device=device, va_dloader=va_dloader)
def eval(opts): CUDA = True if torch.cuda.is_available() and not opts.no_cuda else False device = 'cuda' if CUDA else 'cpu' np.random.seed(opts.seed) random.seed(opts.seed) torch.manual_seed(opts.seed) if CUDA: torch.cuda.manual_seed_all(opts.seed) print('Seeds initialized to {}'.format(opts.seed)) # --------------------- # Transforms trans = Compose([ ToTensor(), MIChunkWav(opts.chunk_size, random_scale=opts.random_scale), LPS(opts.nfft, hop=opts.stride, win=400), MFCC(hop=opts.stride), Prosody(hop=opts.stride, win=400), ZNorm(opts.stats) ]) print(trans) # --------------------- # Build Dataset(s) and DataLoader(s) dset = PairWavDataset(opts.data_root, opts.data_cfg, 'valid', transform=trans) dloader = DataLoader(dset, batch_size=opts.batch_size, shuffle=False, collate_fn=DictCollater(), num_workers=opts.num_workers) # Compute estimation of bpe. As we sample chunks randomly, we # should say that an epoch happened after seeing at least as many # chunks as total_train_wav_dur // chunk_size bpe = (dset.total_wav_dur // opts.chunk_size) // opts.batch_size # --------------------- # Build Model if opts.fe_cfg is not None: with open(opts.fe_cfg, 'r') as fe_cfg_f: fe_cfg = json.load(fe_cfg_f) print(fe_cfg) else: fe_cfg = None model = Waveminionet(minions_cfg=pase_parser(opts.net_cfg), adv_loss=opts.adv_loss, pretrained_ckpt=opts.pretrained_ckpt, frontend_cfg=fe_cfg) print(model) model.to(device) writer = SummaryWriter(opts.save_path) if opts.max_epoch is not None: # just make a sequential search til max epoch ckpts ckpts = ['fullmodel_e{}.ckpt'.format(e) for e in range(opts.max_epoch)] else: ckpts = opts.ckpts for model_ckpt in ckpts: # name format is fullmodel_e{}.ckpt epoch = int(model_ckpt.split('_')[-1].split('.')[0][1:]) model_ckpt = os.path.join(opts.ckpt_root, model_ckpt) print('Loading ckpt ', model_ckpt) model.load_pretrained(model_ckpt, load_last=True, verbose=False) model.eval_(dloader, opts.batch_size, bpe, log_freq=opts.log_freq, epoch_idx=epoch, writer=writer, device=device)
def eval(opts): CUDA = True if torch.cuda.is_available() and not opts.no_cuda else False device = 'cuda' if CUDA else 'cpu' np.random.seed(opts.seed) random.seed(opts.seed) torch.manual_seed(opts.seed) if CUDA: torch.cuda.manual_seed_all(opts.seed) print('Seeds initialized to {}'.format(opts.seed)) # --------------------- # Transforms trans = Compose([ ToTensor(), MIChunkWav(opts.chunk_size, random_scale=opts.random_scale), Prosody(hop=opts.stride, win=400) ]) with open(opts.stats, 'rb') as stats_f: stats = pickle.load(stats_f) # --------------------- # Build Dataset(s) and DataLoader(s) dset = PairWavDataset(opts.data_root, opts.data_cfg, 'test', transform=trans) dloader = DataLoader(dset, batch_size=opts.batch_size, shuffle=False, collate_fn=DictCollater(), num_workers=opts.num_workers) # Compute estimation of bpe. As we sample chunks randomly, we # should say that an epoch happened after seeing at least as many # chunks as total_train_wav_dur // chunk_size bpe = (dset.total_wav_dur // opts.chunk_size) // opts.batch_size # --------------------- # Build Model if opts.fe_cfg is not None: with open(opts.fe_cfg, 'r') as fe_cfg_f: fe_cfg = json.load(fe_cfg_f) print(fe_cfg) else: fe_cfg = None model = Waveminionet(minions_cfg=pase_parser(opts.net_cfg), frontend_cfg=fe_cfg) print(model) model.to(device) ckpts = opts.ckpts use_epid = False if opts.ckpt_epochs is not None: use_epid = True ckpts = opts.ckpt_epochs if ckpts is None: raise ValueError('Please specify either ckpts or ckpt_epochs') if opts.ckpt_root is None: raise ValueError('Please specify ckpt_root!') ckpts_res = [] for ckpt in ckpts: if use_epid: ckpt_name = 'fullmodel_e{}.ckpt'.format(ckpt) else: ckpt_name = ckpt ckpt_path = os.path.join(opts.ckpt_root, ckpt_name) print('Loading ckpt: ', ckpt_path) model.load_pretrained(ckpt_path, load_last=True, verbose=True) # select prosodic minion pmodel = None for minion in model.minions: if 'prosody' in minion.name: pmodel = minion # select frontend fe = model.frontend ckpts_res.append( forward_dloader(dloader, bpe, fe, pmodel, stats, opts.tags, device)) print('Results for ckpt {}'.format(ckpt_name)) print('-' * 25) for k, v in ckpts_res[-1].items(): print('{}: {}'.format(k, np.mean(v))) print('=' * 25) with open(opts.out_file, 'w') as out_f: out_f.write(json.dumps(ckpts_res, indent=2))
def train(opts): CUDA = True if torch.cuda.is_available() and not opts.no_cuda else False device = 'cuda' if CUDA else 'cpu' num_devices = 1 np.random.seed(opts.seed) random.seed(opts.seed) torch.manual_seed(opts.seed) if CUDA: torch.cuda.manual_seed_all(opts.seed) num_devices = torch.cuda.device_count() print('[*] Using CUDA {} devices'.format(num_devices)) else: print('[!] Using CPU') print('Seeds initialized to {}'.format(opts.seed)) #torch.autograd.set_detect_anomaly(True) # --------------------- # Build Model minions_cfg = worker_parser(opts.net_cfg) #make_transforms(opts, minions_cfg) opts.random_scale = str2bool(opts.random_scale) dsets, collater_keys = build_dataset_providers(opts, minions_cfg) dset, va_dset = dsets # Build collater, appending the keys from the loaded transforms to the # existing default ones collater = DictCollater() collater.batching_keys.extend(collater_keys) dloader = DataLoader(dset, batch_size=opts.batch_size, shuffle=True, collate_fn=collater, num_workers=opts.num_workers, drop_last=True, pin_memory=CUDA) # Compute estimation of bpe. As we sample chunks randomly, we # should say that an epoch happened after seeing at least as many # chunks as total_train_wav_dur // chunk_size bpe = (dset.total_wav_dur // opts.chunk_size) // opts.batch_size print("Dataset has a total {} hours of training data".format( dset.total_wav_dur / 16000 / 3600.0)) opts.bpe = bpe if opts.do_eval: assert va_dset is not None, ( "Asked to do validation, but failed to build validation set") va_dloader = DataLoader(va_dset, batch_size=opts.batch_size, shuffle=True, collate_fn=DictCollater(), num_workers=opts.num_workers, drop_last=True, pin_memory=CUDA) va_bpe = (va_dset.total_wav_dur // opts.chunk_size) // opts.batch_size opts.va_bpe = va_bpe else: va_dloader = None # fastet lr to MI #opts.min_lrs = {'mi':0.001} if opts.fe_cfg is not None: with open(opts.fe_cfg, 'r') as fe_cfg_f: print(fe_cfg_f) fe_cfg = json.load(fe_cfg_f) print(fe_cfg) else: fe_cfg = None # load config file for attention blocks if opts.att_cfg: with open(opts.att_cfg) as f: att_cfg = json.load(f) print(att_cfg) else: att_cfg = None print(str2bool(opts.tensorboard)) Trainer = trainer(frontend_cfg=fe_cfg, att_cfg=att_cfg, minions_cfg=minions_cfg, cfg=vars(opts), backprop_mode=opts.backprop_mode, lr_mode=opts.lr_mode, tensorboard=str2bool(opts.tensorboard), device=device) model_description = str(Trainer.model) tfh = tempfile.NamedTemporaryFile(mode="w") tfh.write(model_description) tfh.flush() print(model_description) num_params = Trainer.model.frontend.describe_params() print(f'Frontend params: {num_params}') # Prepare logging neptune_settings = None npt_exp = None if opts.neptune is not None: with open(opts.neptune, "r") as fh: neptune_settings = json.load(fh) fh.close() neptune.init(neptune_settings["project_name"], api_token=neptune_settings["api_key"]) npt_exp = neptune.create_experiment(params=vars(opts), name=opts.experimentname, tags=opts.tags) else: # running offline neptune.init(backend=neptune.OfflineBackend(), project_qualified_name="offline/PASE+") neptune.log_artifact(tfh.name, "model_description.txt") tfh.close() neptune.set_property("frontend_params", num_params) Trainer.model.to(device) Trainer.train_(dloader, device=device, valid_dataloader=va_dloader)
def cluster(opts): CUDA = True if torch.cuda.is_available() else False device = 'cuda' if CUDA else 'cpu' num_devices = 1 np.random.seed(opts.seed) random.seed(opts.seed) torch.manual_seed(opts.seed) if CUDA: torch.cuda.manual_seed_all(opts.seed) num_devices = torch.cuda.device_count() print('[*] Using CUDA {} devices'.format(num_devices)) else: print('[!] Using CPU') fe = wf_builder(opts.fe_cfg) if opts.fe_ckpt is not None: fe.load_pretrained(opts.fe_ckpt, load_last=True, verbose=True) else: print('WARNING: No pretrained ckpt loaded for FE! Random clustering?') fe.to(device) fe.eval() trans = Compose( [ToTensor(), SingleChunkWav(opts.chunk_size, random_scale=False)]) # Build Dataset(s) and DataLoader(s) dset = PairWavDataset(opts.data_root, opts.data_cfg, 'train', transform=trans) dloader = DataLoader(dset, batch_size=opts.batch_size, shuffle=True, collate_fn=DictCollater(), num_workers=opts.num_workers) # acumulate train chunks and do clustering on them, # with each chunk containing several frames X = [] timings = [] N = opts.num_samples // opts.batch_size beg_t = timeit.default_timer() for bidx in range(1, N + 1, 1): batch = next(dloader.__iter__()) chunk = batch['chunk'] y = fe(chunk.to(device)).mean(dim=2) X.append(y.view(-1, y.size(-1)).cpu().data.numpy()) end_t = timeit.default_timer() timings.append(end_t - beg_t) beg_t = timeit.default_timer() if bidx % opts.log_freq == 0 or bidx >= N: print('Forwarded batch {:4d}/{:4d}, btime: {:.2f} s, ' 'mbtime: {:.2f} s'.format(bidx, N, timings[-1], np.mean(timings)), end='\r') print() X = np.concatenate(X, axis=0) print('Total X shape: ', X.shape) print('Running KMeans...') beg_t = timeit.default_timer() kmeans = KMeans(n_clusters=opts.k_clusters, n_jobs=opts.n_jobs, verbose=0).fit(X) end_t = timeit.default_timer() print('Clusterized in {:.2f} s'.format(end_t - beg_t)) print('Saving KMeans...') with open(os.path.join(opts.save_path, 'kmeans.pkl'), 'wb') as f: pickle.dump(kmeans, f) print('Finished program')