def evaluate(data, model, scale, device): X, y, _, _ = data.split(1, shuffle=False) X, y = prep_data(X, y, device) X = scale(X) model.eval() profiler = Profiler("INFERENCE TIME") profiler.tick() pred = model(X) profiler.tock() print(profiler, end='\n\n') criterion = get_loss('bce') error = criterion(pred, y) print("loss: {:.4f}".format(error)) acc = accuracy(torch.argmin(pred, dim=1), y[:, 0]) print("accuracy: {:.4f}".format(acc))
def train(run_id: str, data_dir: str, validate_data_dir: str, models_dir: Path, umap_every: int, save_every: int, backup_every: int, vis_every: int, validate_every: int, force_restart: bool, visdom_server: str, port: str, no_visdom: bool): # Create a dataset and a dataloader train_dataset = LandmarkDataset(data_dir, img_per_cls, train=True) train_loader = LandmarkDataLoader( train_dataset, cls_per_batch, img_per_cls, num_workers=6, ) validate_dataset = LandmarkDataset(validate_data_dir, v_img_per_cls, train=False) validate_loader = LandmarkDataLoader( validate_dataset, v_cls_per_batch, v_img_per_cls, num_workers=4, ) validate_iter = iter(validate_loader) criterion = torch.nn.CrossEntropyLoss() # Setup the device on which to run the forward pass and the loss. These can be different, # because the forward pass is faster on the GPU whereas the loss is often (depending on your # hyperparameters) faster on the CPU. device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # FIXME: currently, the gradient is None if loss_device is cuda # loss_device = torch.device("cpu") # fixed by https://github.com/CorentinJ/Real-Time-Voice-Cloning/issues/237 loss_device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Create the model and the optimizer model = Encoder(device, loss_device) arc_face = ArcFace(model_embedding_size, num_class, scale=30, m=0.35, device=device) multi_gpu = False # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.device_count() > 1: multi_gpu = True model = torch.nn.DataParallel(model) arc_face = torch.nn.DataParallel(arc_face) model.to(device) arc_face.to(device) optimizer = torch.optim.SGD([{ 'params': model.parameters() }, { 'params': arc_face.parameters() }], lr=learning_rate_init, momentum=0.9) scheduler = torch.optim.lr_scheduler.StepLR(optimizer=optimizer, step_size=25000, gamma=0.5) init_step = 1 # Configure file path for the model state_fpath = models_dir.joinpath(run_id + ".pt") pretrained_path = state_fpath backup_dir = models_dir.joinpath(run_id + "_backups") # Load any existing model if not force_restart: if state_fpath.exists(): print( "Found existing model \"%s\", loading it and resuming training." % run_id) checkpoint = torch.load(pretrained_path) init_step = checkpoint["step"] model.load_state_dict(checkpoint["model_state"]) optimizer.load_state_dict(checkpoint["optimizer_state"]) optimizer.param_groups[0]["lr"] = learning_rate_init else: print("No model \"%s\" found, starting training from scratch." % run_id) else: print("Starting the training from scratch.") model.train() # Initialize the visualization environment vis = Visualizations(run_id, vis_every, server=visdom_server, port=port, disabled=no_visdom) vis.log_dataset(train_dataset) vis.log_params() device_name = str( torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU") vis.log_implementation({"Device": device_name}) # Training loop profiler = Profiler(summarize_every=500, disabled=False) for step, cls_batch in enumerate(train_loader, init_step): profiler.tick("Blocking, waiting for batch (threaded)") # Forward pass inputs = torch.from_numpy(cls_batch.data).float().to(device) labels = torch.from_numpy(cls_batch.labels).long().to(device) sync(device) profiler.tick("Data to %s" % device) embeds = model(inputs) sync(device) profiler.tick("Forward pass") output = arc_face(embeds, labels) loss = criterion(output, labels) sync(device) profiler.tick("Loss") # Backward pass optimizer.zero_grad() loss.backward() profiler.tick("Backward pass") optimizer.step() scheduler.step() profiler.tick("Parameter update") acc = get_acc(output, labels) # Update visualizations # learning_rate = optimizer.param_groups[0]["lr"] vis.update(loss.item(), acc, step) print("step {}, loss: {}, acc: {}".format(step, loss.item(), acc)) # Draw projections and save them to the backup folder if umap_every != 0 and step % umap_every == 0: print("Drawing and saving projections (step %d)" % step) projection_dir = backup_dir / 'projections' projection_dir.mkdir(exist_ok=True, parents=True) projection_fpath = projection_dir.joinpath("%s_umap_%d.png" % (run_id, step)) embeds = embeds.detach() embeds = (embeds / torch.norm(embeds, dim=1, keepdim=True)).cpu().numpy() vis.draw_projections(embeds, img_per_cls, step, projection_fpath) vis.save() # Overwrite the latest version of the model if save_every != 0 and step % save_every == 0: print("Saving the model (step %d)" % step) torch.save( { "step": step + 1, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), }, state_fpath) # Make a backup if backup_every != 0 and step % backup_every == 0: if step > 4000: # don't save until 4k steps print("Making a backup (step %d)" % step) ckpt_dir = backup_dir / 'ckpt' ckpt_dir.mkdir(exist_ok=True, parents=True) backup_fpath = ckpt_dir.joinpath("%s_%d.pt" % (run_id, step)) torch.save( { "step": step + 1, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), }, backup_fpath) # Do validation if validate_every != 0 and step % validate_every == 0: # validation loss, acc model.eval() for i in range(num_validate): with torch.no_grad(): validate_cls_batch = next(validate_iter) validate_inputs = torch.from_numpy( validate_cls_batch.data).float().to(device) validat_labels = torch.from_numpy( validate_cls_batch.labels).long().to(device) validate_embeds = model(validate_inputs) validate_output = arc_face(validate_embeds, validat_labels) validate_loss = criterion(validate_output, validat_labels) validate_acc = get_acc(validate_output, validat_labels) vis.update_validate(validate_loss.item(), validate_acc, step, num_validate) # take the last one for drawing projection projection_dir = backup_dir / 'v_projections' projection_dir.mkdir(exist_ok=True, parents=True) projection_fpath = projection_dir.joinpath("%s_umap_%d.png" % (run_id, step)) validate_embeds = validate_embeds.detach() validate_embeds = (validate_embeds / torch.norm( validate_embeds, dim=1, keepdim=True)).cpu().numpy() vis.draw_projections(validate_embeds, v_img_per_cls, step, projection_fpath, is_validate=True) vis.save() model.train() profiler.tick("Extras (visualizations, saving)")
def train(run_id: str, clean_data_root: Path, models_dir: Path, umap_every: int, save_every: int, backup_every: int, vis_every: int, force_restart: bool, visdom_server: str, no_visdom: bool): dataset = SpeakerVerificationDataset(clean_data_root) loader = SpeakerVerificationDataLoader( dataset, speakers_per_batch, utterances_per_speaker, num_workers=8, ) # cuda device = torch.device("cuda" if torch.cuda.is_available() else "cpu") loss_device = torch.device("cpu") # 创建模型和优化器 model = SpeakerEncoder(device, loss_device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate_init) init_step = 1 # 为模型配置文件路径 state_fpath = models_dir.joinpath(run_id + ".pt") backup_dir = models_dir.joinpath(run_id + "_backups") model.train() # 初始化可视化环境(visdom) vis = Visualizations(run_id, vis_every, server=visdom_server, disabled=no_visdom) device_name = str( torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU") # 开始训练 profiler = Profiler(summarize_every=10, disabled=False) for step, speaker_batch in enumerate(loader, init_step): profiler.tick("Blocking, waiting for batch (threaded)") # 正向传播 inputs = torch.from_numpy(speaker_batch.data).to(device) sync(device) profiler.tick("Data to %s" % device) embeds = model(inputs) sync(device) profiler.tick("Forward pass") embeds_loss = embeds.view( (speakers_per_batch, utterances_per_speaker, -1)).to(loss_device) loss, eer = model.loss(embeds_loss) sync(loss_device) profiler.tick("Loss") # 反向传播 model.zero_grad() loss.backward() profiler.tick("Backward pass") model.do_gradient_ops() optimizer.step() profiler.tick("Parameter update") vis.update(loss.item(), eer, step) # 进行一次UMAP投影可视化并保存图片 if umap_every != 0 and step % umap_every == 0: # print("Drawing and saving projections (step %d)" % step) backup_dir.mkdir(exist_ok=True) projection_fpath = backup_dir.joinpath("%s_umap_%06d.png" % (run_id, step)) embeds = embeds.detach().cpu().numpy() vis.draw_projections(embeds, utterances_per_speaker, step, projection_fpath) vis.save() # 更新模型 if save_every != 0 and step % save_every == 0: # print("Saving the model (step %d)" % step) torch.save( { "step": step + 1, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), }, state_fpath) # 进行一次备份 if backup_every != 0 and step % backup_every == 0: # print("Making a backup (step %d)" % step) backup_dir.mkdir(exist_ok=True) backup_fpath = backup_dir.joinpath("%s_bak_%06d.pt" % (run_id, step)) torch.save( { "step": step + 1, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), }, backup_fpath) profiler.tick("Extras (visualizations, saving)")
def train(run_id: str, clean_data_root: Path, models_dir: Path, umap_every: int, save_every: int, backup_every: int, vis_every: int, force_restart: bool, visdom_server: str, no_visdom: bool): # Create a dataset and a dataloader dataset = SpeakerVerificationDataset(clean_data_root) loader = SpeakerVerificationDataLoader( dataset, speakers_per_batch, utterances_per_speaker, num_workers=8, ) # Setup the device on which to run the forward pass and the loss. These can be different, # because the forward pass is faster on the GPU whereas the loss is often (depending on your # hyperparameters) faster on the CPU. device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # FIXME: currently, the gradient is None if loss_device is cuda loss_device = torch.device("cpu") # Create the model and the optimizer model = SpeakerEncoder(device, loss_device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate_init) init_step = 1 # Configure file path for the model state_fpath = models_dir.joinpath(run_id + ".pt") backup_dir = models_dir.joinpath(run_id + "_backups") # Load any existing model if not force_restart: if state_fpath.exists(): print("Found existing model \"%s\", loading it and resuming training." % run_id) checkpoint = torch.load(state_fpath) init_step = checkpoint["step"] model.load_state_dict(checkpoint["model_state"]) optimizer.load_state_dict(checkpoint["optimizer_state"]) optimizer.param_groups[0]["lr"] = learning_rate_init else: print("No model \"%s\" found, starting training from scratch." % run_id) else: print("Starting the training from scratch.") model.train() # Initialize the visualization environment vis = Visualizations(run_id, vis_every, server=visdom_server, disabled=no_visdom) vis.log_dataset(dataset) vis.log_params() device_name = str(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU") vis.log_implementation({"Device": device_name}) # Training loop profiler = Profiler(summarize_every=10, disabled=False) for step, speaker_batch in enumerate(loader, init_step): profiler.tick("Blocking, waiting for batch (threaded)") # Forward pass inputs = torch.from_numpy(speaker_batch.data).to(device) sync(device) profiler.tick("Data to %s" % device) embeds = model(inputs) sync(device) profiler.tick("Forward pass") embeds_loss = embeds.view((speakers_per_batch, utterances_per_speaker, -1)).to(loss_device) loss, eer = model.loss(embeds_loss) sync(loss_device) profiler.tick("Loss") # Backward pass model.zero_grad() loss.backward() profiler.tick("Backward pass") model.do_gradient_ops() optimizer.step() profiler.tick("Parameter update") # Update visualizations # learning_rate = optimizer.param_groups[0]["lr"] vis.update(loss.item(), eer, step) # Draw projections and save them to the backup folder if umap_every != 0 and step % umap_every == 0: print("Drawing and saving projections (step %d)" % step) backup_dir.mkdir(exist_ok=True) projection_fpath = backup_dir.joinpath("%s_umap_%06d.png" % (run_id, step)) embeds = embeds.detach().cpu().numpy() vis.draw_projections(embeds, utterances_per_speaker, step, projection_fpath) vis.save() # Overwrite the latest version of the model if save_every != 0 and step % save_every == 0: print("Saving the model (step %d)" % step) torch.save({ "step": step + 1, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), }, state_fpath) # Make a backup if backup_every != 0 and step % backup_every == 0: print("Making a backup (step %d)" % step) backup_dir.mkdir(exist_ok=True) backup_fpath = backup_dir.joinpath("%s_bak_%06d.pt" % (run_id, step)) torch.save({ "step": step + 1, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), }, backup_fpath) profiler.tick("Extras (visualizations, saving)")
def train(run_id: str, train_data_root: Path, test_data_root: Path, models_dir: Path, save_every: int, backup_every: int, vis_every: int, force_restart: bool, visdom_server: str, no_visdom: bool): # Create a dataset and a dataloader dataset = SpeakerVerificationDataset(train_data_root) loader = SpeakerVerificationDataLoader( dataset, speakers_per_batch, utterances_per_speaker, num_workers=dataloader_workers, # pin_memory=True, ) test_dataset = SpeakerVerificationDataset(test_data_root) testdata_loader = SpeakerVerificationDataLoader( test_dataset, speakers_per_batch, utterances_per_speaker, num_workers=dataloader_workers, # pin_memory=True, ) # Setup the device on which to run the forward pass and the loss. These can be different, # because the forward pass is faster on the GPU whereas the loss is often (depending on your # hyperparameters) faster on the CPU. device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Create the model and the optimizer model = SpeakerEncoder(device) raw_model = model if torch.cuda.device_count() > 1: print("Use", torch.cuda.device_count(), "GPUs.") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = torch.nn.DataParallel(model) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate_init) init_step = 1 # Configure file path for the model state_fpath = models_dir.joinpath(run_id + ".pt") backup_dir = models_dir.joinpath(run_id + "_backups") # Load any existing model if not force_restart: if state_fpath.exists(): print( "Found existing model \"%s\", loading it and resuming training." % run_id) checkpoint = torch.load(str(state_fpath)) init_step = checkpoint["step"] raw_model.load_state_dict(checkpoint["model_state"]) optimizer.load_state_dict(checkpoint["optimizer_state"]) optimizer.param_groups[0]["lr"] = learning_rate_init else: print("No model \"%s\" found, starting training from scratch." % run_id) else: print("Starting the training from scratch.") model.train() save_interval_s_time = time.time() prt_interval_s_time = time.time() total_loss, total_eer = 0, 0 # Training loop profiler = Profiler(summarize_every=1, disabled=True) for step, speaker_batch in enumerate(loader, init_step): # step_s_time = time.time() sync(device) profiler.tick("Blocking, waiting for batch (threaded)") # Forward pass inputs = torch.from_numpy(speaker_batch.data).to(device) sync(device) profiler.tick("Data to %s" % device) embeds = model(inputs) sync(device) profiler.tick("Forward pass") embeds_loss = embeds.view( (speakers_per_batch, utterances_per_speaker, -1)) loss, eer = raw_model.loss(embeds_loss) # print(loss.item(), flush=True) total_loss += loss.item() total_eer += eer sync(device) profiler.tick("Loss") # Backward pass model.zero_grad() loss.backward() profiler.tick("Backward pass") raw_model.do_gradient_ops() optimizer.step() sync(device) profiler.tick("Parameter update") if step % vis_every == 0: learning_rate = optimizer.param_groups[0]["lr"] prt_interval_e_time = time.time() cost_time = prt_interval_e_time - prt_interval_s_time prt_interval_s_time = prt_interval_e_time print( " Step %06d> %d step cost %d seconds, lr:%.4f, Avg_loss:%.4f, Avg_eer:%.4f." % ( # step, save_every, cost_time, loss.detach().numpy(), eer), flush=True) step, vis_every, cost_time, learning_rate, total_loss / vis_every, total_eer / vis_every), flush=True) total_loss, total_eer = 0, 0 # Overwrite the latest version of the model && test model # save_every = 20 if save_every != 0 and step % save_every == 0: # save torch.save( { "step": step + 1, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), }, str(state_fpath)) # test test_total_loss, test_total_eer = 0.0, 0.0 for test_step, test_batch in enumerate(testdata_loader, 1): testinputs = torch.from_numpy(test_batch.data).to(device) with torch.no_grad(): test_embeds = model(testinputs) test_embeds_loss = test_embeds.view( (speakers_per_batch, utterances_per_speaker, -1)) test_loss, test_eer = raw_model.loss(test_embeds_loss) # print(loss.item(), flush=True) test_total_loss += test_loss.item() test_total_eer += test_eer test_prt_interval = 10 if test_step % test_prt_interval == 0: print( " |--Test Step %06d> Avg_loss:%.4f, Avg_eer:%.4f." % (test_step, test_total_loss / test_step, test_total_eer / test_step), flush=True) if test_step == 50: break # print log save_interval_e_time = time.time() cost_time = save_interval_e_time - save_interval_s_time print( "\n" "++++Step %06d> Saving the model, %d step cost %d seconds." % ( # step, save_every, cost_time, loss.detach().numpy(), eer), flush=True) step, save_every, cost_time), flush=True) save_interval_s_time = save_interval_e_time # Make a backup if backup_every != 0 and step % backup_every == 0: print("Making a backup (step %d)" % step) backup_dir.mkdir(exist_ok=True) backup_fpath = str( backup_dir.joinpath("%s_bak_%06d.pt" % (run_id, step))) torch.save( { "step": step + 1, "model_state": model.state_dict(), "optimizer_state": optimizer.state_dict(), }, backup_fpath) sync(device) profiler.tick("Extras (visualizations, saving)")
def train(data, model, scale, config, device): from nn.optim.sgd import SGD from utils.train_history import TrainHistory X_train, y_train, X_test, y_test = data.split(config.train_part, shuffle=True) assert len(X_train) > 0, "Wrong number of train examples" assert len(X_test) > 0, "Wrong number of test examples" X_train, y_train = prep_data(X_train, y_train, device) X_test, y_test = prep_data(X_test, y_test, device) print("first label in train part: {:.2f}%".format( y_train[:, 0].sum().float() / y_train.shape[0] * 100)) print("first label in test part: {:.2f}%".format( y_test[:, 0].sum().float() / y_test.shape[0] * 100)) scale.fit(torch.cat([X_train, X_test], dim=0)) X_train = scale(X_train) X_test = scale(X_test) criterion = get_loss(config.loss) optimizer = SGD(model, **config.sgd_params) profiler = Profiler("TRAIN TIME") history = TrainHistory(config.epochs, ["loss", "val_loss", "acc", "val_acc"]) for i in range(1, config.epochs + 1): profiler.tick() losses = [] for batch_X, batch_y in batch_iterator(X_train, y_train, config.batch_size, permute=True): output = model(batch_X) losses.append(criterion(output, batch_y).to("cpu")) grad = criterion.backward(output, batch_y) model.backward(grad) optimizer.optimise() test_pred = model(X_test) test_loss = criterion(test_pred, y_test) test_acc = accuracy(torch.argmax(test_pred, dim=1), torch.argmax(y_test, dim=1)) pred = model(X_train) acc = accuracy(torch.argmax(pred, dim=1), torch.argmax(y_train, dim=1)) history.update(i, np.mean(losses), test_loss, acc, test_acc) history.print_progress() if config.cross_validation: idxs = np.random.permutation( np.arange(X_train.shape[0] + X_test.shape[0])) X = torch.cat([X_train, X_test], dim=0) y = torch.cat([y_train, y_test], dim=0) train_num = int(X.shape[0] * config.train_part) X_train = X[idxs[train_num:]] X_test = X[idxs[:train_num]] y_train = y[idxs[train_num:]] y_test = y[idxs[:train_num]] profiler.tock() history.visualize() print('\n', profiler, sep='', end='\n\n')