def __setup_model(self, inference, gpu): # TODO: re-write to pure DDP if inference or gpu is None: self.device = torch.device('cpu') self.model = Encoder_rotation(hparams=self.hparams['model']).to( self.device) else: if torch.cuda.device_count() > 1: if len(gpu) > 1: print("Number of GPUs will be used: ", len(gpu)) self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda. is_available() else "cpu") self.model = Encoder_rotation( hparams=self.hparams['model']).to(self.device) self.model = DP(self.model, device_ids=gpu, output_device=gpu[0]) else: print("Only one GPU will be used") self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda. is_available() else "cpu") self.model = Encoder_rotation( hparams=self.hparams['model']).to(self.device) else: self.device = torch.device( f"cuda:{gpu[0]}" if torch.cuda.is_available() else "cpu") self.model = Encoder_rotation( hparams=self.hparams['model']).to(self.device) print('Only one GPU is available') print('Cuda available: ', torch.cuda.is_available()) return True
def train_model(user_n, movie_n, train_data, val_data, gpus=[], epochs=100, lr=0.3, k=17, batch_size=1000): """ train_model: (user_n, movie_n): net parameter. train_data = (users, movies, scores, weight) 4 1DTensor of Train Data, in same length. val_data = (users, movies, scores) 3 1DTensor of Validation Data, in same length. Returns: model: PyTorch model. """ dataset = D.TensorDataset(*train_data) dataloader = D.DataLoader(dataset, batch_size) model = DualEmbedding(user_n, movie_n, k).cuda() model = DP(model, device_ids=gpus, output_device=gpus[0]) optimizer = optim.SGD(model.parameters(), lr) def criterion(pred, score, weight): return torch.dot(weight, (pred - score)**2) / len(pred) mseloss = nn.MSELoss() (val_users, val_movies, val_scores) = val_data li = list(dataloader) for epoch in range(epochs): running_loss = 0.0 for i, (user, movie, score, weight) in enumerate(li): user = user.cuda(non_blocking=True) movie = movie.cuda(non_blocking=True) score = score.cuda(non_blocking=True) weight = weight.cuda(non_blocking=True) optimizer.zero_grad() pred, l1_loss = model(user, movie) loss = criterion(pred, score, weight) + 1e-2 * l1_loss / len(li) loss.backward() optimizer.step() running_loss += loss.item() if i % 2000 == 0: print(f"batch: {i}") pred, _ = model(val_users, val_movies) val_loss = mseloss(torch.round(pred * 5), val_scores * 5) print( f"epoch: {epoch}, loss: {running_loss / len(li)}, val_loss:{val_loss}" ) return model
def __init__(self, input_size, n_channels, hparams): self.hparams = hparams self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") # define the models self.model = WaveNet(n_channels=n_channels).to(self.device) summary(self.model, (input_size, n_channels)) # self.model.half() if torch.cuda.device_count() > 1: print("Number of GPUs will be used: ", torch.cuda.device_count() - 3) self.model = DP(self.model, device_ids=list( range(torch.cuda.device_count() - 3))) else: print('Only one GPU is available') self.metric = Metric() self.num_workers = 1 ########################## compile the model ############################### # define optimizer self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.hparams['lr'], weight_decay=1e-5) # weights = torch.Tensor([0.025,0.033,0.039,0.046,0.069,0.107,0.189,0.134,0.145,0.262,1]).cuda() self.loss = nn.BCELoss() # CompLoss(self.device) # define early stopping self.early_stopping = EarlyStopping( checkpoint_path=self.hparams['checkpoint_path'] + '/checkpoint.pt', patience=self.hparams['patience'], delta=self.hparams['min_delta'], ) # lr cheduler self.scheduler = ReduceLROnPlateau( optimizer=self.optimizer, mode='max', factor=0.2, patience=3, verbose=True, threshold=self.hparams['min_delta'], threshold_mode='abs', cooldown=0, eps=0, ) self.seed_everything(42) self.threshold = 0.75 self.scaler = torch.cuda.amp.GradScaler()
def __setup_model(self, inference, gpu): # TODO: re-write to pure DDP if inference or gpu is None: self.device = torch.device('cpu') self.model = EfficientNet.from_pretrained( self.hparams['model']['pre_trained_model'], num_classes=self.hparams['model']['n_classes']).to(self.device) # self.model.freeze_layers() else: if torch.cuda.device_count() > 1: if len(gpu) > 1: print("Number of GPUs will be used: ", len(gpu)) self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda. is_available() else "cpu") self.model = EfficientNet.from_pretrained( self.hparams['model']['pre_trained_model'], num_classes=self.hparams['model']['n_classes'], ).to(self.device) self.model = DP(self.model, device_ids=gpu, output_device=gpu[0]) # self.model.module.freeze_layers() else: print("Only one GPU will be used") self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda. is_available() else "cpu") self.model = EfficientNet.from_pretrained( self.hparams['model']['pre_trained_model'], num_classes=self.hparams['model']['n_classes'], ).to(self.device) # self.model.freeze_layers() else: self.device = torch.device( f"cuda:{gpu[0]}" if torch.cuda.is_available() else "cpu") self.model = EfficientNet.from_pretrained( self.hparams['model']['pre_trained_model'], num_classes=self.hparams['model']['n_classes'], ).to(self.device) # self.model.freeze_layers() print('Only one GPU is available') print('Cuda available: ', torch.cuda.is_available()) if self.hparams['freeze']: if len(gpu) > 1: self.model.module.freeze_layers() else: self.model.freeze_layers() return True
def __setup_model(self, inference, gpu): # TODO: re-write to pure DDP if inference or gpu is None: self.device = torch.device('cpu') self.model = EfficientNet.from_pretrained( self.hparams['model']['pre_trained_model']).to(self.device) else: if torch.cuda.device_count() > 1: if len(gpu) > 1: print("Number of GPUs will be used: ", len(gpu)) self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda. is_available() else "cpu") self.model = EfficientNet.from_pretrained( self.hparams['model']['pre_trained_model']).to( self.device) self.model = DP(self.model, device_ids=gpu, output_device=gpu[0]) else: print("Only one GPU will be used") self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda. is_available() else "cpu") self.model = EfficientNet.from_pretrained( self.hparams['model']['pre_trained_model']).to( self.device) else: self.device = torch.device( f"cuda:{gpu[0]}" if torch.cuda.is_available() else "cpu") self.model = EfficientNet.from_pretrained( self.hparams['model']['pre_trained_model']).to(self.device) print('Only one GPU is available') if len(gpu) > 1: self.model.module.build_projection_network( self.hparams['model']['emb_dim'], device=self.device) else: self.model.build_projection_network( self.hparams['model']['emb_dim'], device=self.device) print('Cuda available: ', torch.cuda.is_available()) return True
def on_pretrain_routine_start(self, trainer: Trainer, pl_module: LightningModule) -> None: # must move to device after setup, as during setup, pl_module is still on cpu self.online_evaluator = SSLEvaluator( n_input=self.z_dim, n_classes=self.num_classes, p=self.drop_p, n_hidden=self.hidden_dim, ).to(pl_module.device) # switch fo PL compatibility reasons accel = (trainer.accelerator_connector if hasattr( trainer, "accelerator_connector") else trainer._accelerator_connector) if accel.is_distributed: if accel.use_ddp: from torch.nn.parallel import DistributedDataParallel as DDP self.online_evaluator = DDP(self.online_evaluator, device_ids=[pl_module.device]) elif accel.use_dp: from torch.nn.parallel import DataParallel as DP self.online_evaluator = DP(self.online_evaluator, device_ids=[pl_module.device]) else: rank_zero_warn( "Does not support this type of distributed accelerator. The online evaluator will not sync." ) self.optimizer = torch.optim.Adam(self.online_evaluator.parameters(), lr=1e-4) if self._recovered_callback_state is not None: self.online_evaluator.load_state_dict( self._recovered_callback_state["state_dict"]) self.optimizer.load_state_dict( self._recovered_callback_state["optimizer_state"])
num_classes=10) self.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) def forward(self, x): return torch.softmax(super(MnistResNet, self).forward(x), dim=-1) # net = resnet18() net = MnistResNet() net.cuda() net = DP(net) class ToNumpy(object): def __call__(self, sample): return np.array(sample) data_root = 'dataset' trainset = MNIST(root=data_root, download=True, train=True, transform=torchvision.transforms.Compose( [ToNumpy(), torchvision.transforms.ToTensor()])) valset = MNIST(root=data_root,
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") seed_everything(7) args = parse_args() Path(args.save_path).mkdir(parents=True, exist_ok=True) entity = "demiurge" project = "melgan" load_from_run_id = args.load_from_run_id resume_run_id = args.resume_run_id restore_run_id = load_from_run_id or resume_run_id batch_size = args.batch_size # Getting initial run steps and epoch # if restore run, replace args steps = None if restore_run_id: api = wandb.Api() previous_run = api.run(f"{entity}/{project}/{restore_run_id}") steps = previous_run.lastHistoryStep prev_args = argparse.Namespace(**previous_run.config) args = vars(args) args.update(vars(prev_args)) args = Namespace(**args) args.batch_size = batch_size load_initial_weights = bool(restore_run_id) sampling_rate = args.sampling_rate ratios = args.ratios if isinstance(ratios, str): ratios = ratios.replace(" ", "") ratios = ratios.strip("][").split(",") ratios = [int(i) for i in ratios] ratios = np.array(ratios) if load_from_run_id and resume_run_id: raise RuntimeError("Specify either --load_from_id or --resume_run_id.") if resume_run_id: print(f"Resuming run ID {resume_run_id}.") elif load_from_run_id: print( f"Starting new run with initial weights from run ID {load_from_run_id}." ) else: print("Starting new run from scratch.") # read 1 line in train files to log dataset location train_files = Path(args.data_path) / "train_files.txt" with open(train_files, encoding="utf-8", mode="r") as f: file = f.readline() args.train_file_sample = str(file) wandb.init( entity=entity, project=project, id=resume_run_id, config=args, resume=True if resume_run_id else False, save_code=True, dir=args.save_path, notes=args.notes, ) print("run id: " + str(wandb.run.id)) print("run name: " + str(wandb.run.name)) root = Path(wandb.run.dir) root.mkdir(parents=True, exist_ok=True) #################################### # Dump arguments and create logger # #################################### with open(root / "args.yml", "w") as f: yaml.dump(args, f) wandb.save("args.yml") ############################################### # The file modules.py is needed by the unagan # ############################################### wandb.save(mel2wav.modules.__file__, base_path=".") ####################### # Load PyTorch Models # ####################### netG = Generator(args.n_mel_channels, args.ngf, args.n_residual_layers, ratios=ratios).to(device) netD = Discriminator(args.num_D, args.ndf, args.n_layers_D, args.downsamp_factor).to(device) fft = Audio2Mel( n_mel_channels=args.n_mel_channels, pad_mode=args.pad_mode, sampling_rate=sampling_rate, ).to(device) for model in [netG, netD, fft]: wandb.watch(model) ##################### # Create optimizers # ##################### optG = torch.optim.Adam(netG.parameters(), lr=args.learning_rate, betas=(0.5, 0.9)) optD = torch.optim.Adam(netD.parameters(), lr=args.learning_rate, betas=(0.5, 0.9)) if load_initial_weights: for model, filenames in [ (netG, ["netG.pt", "netG_prev.pt"]), (optG, ["optG.pt", "optG_prev.pt"]), (netD, ["netD.pt", "netD_prev.pt"]), (optD, ["optD.pt", "optD_prev.pt"]), ]: recover_model = False filepath = None for filename in filenames: try: run_path = f"{entity}/{project}/{restore_run_id}" print(f"Restoring {filename} from run path {run_path}") restored_file = wandb.restore(filename, run_path=run_path) filepath = restored_file.name model = load_state_dict_handleDP(model, filepath) recover_model = True break except RuntimeError as e: print("RuntimeError", e) print(f"recover model weight file: '{filename}'' failed") if not recover_model: raise RuntimeError( f"Cannot load model weight files for component {filenames[0]}." ) else: # store successfully recovered model weight file ("***_prev.pt") path_parent = Path(filepath).parent newfilepath = str(path_parent / filenames[1]) os.rename(filepath, newfilepath) wandb.save(newfilepath) if torch.cuda.device_count() > 1: netG = DP(netG).to(device) netD = DP(netD).to(device) fft = DP(fft).to(device) print(f"We have {torch.cuda.device_count()} gpus. Use data parallel.") else: print(f"We have {torch.cuda.device_count()} gpu.") ####################### # Create data loaders # ####################### train_set = AudioDataset( Path(args.data_path) / "train_files.txt", args.seq_len, sampling_rate=sampling_rate, ) test_set = AudioDataset( Path(args.data_path) / "test_files.txt", sampling_rate * 4, sampling_rate=sampling_rate, augment=False, ) wandb.save(str(Path(args.data_path) / "train_files.txt")) wandb.save(str(Path(args.data_path) / "test_files.txt")) train_loader = DataLoader(train_set, batch_size=args.batch_size, num_workers=4) test_loader = DataLoader(test_set, batch_size=1) if len(train_loader) == 0: raise RuntimeError("Train dataset is empty.") if len(test_loader) == 0: raise RuntimeError("Test dataset is empty.") if not restore_run_id: steps = wandb.run.step start_epoch = steps // len(train_loader) print(f"Starting with epoch {start_epoch} and step {steps}.") ########################## # Dumping original audio # ########################## test_voc = [] test_audio = [] samples = [] melImages = [] num_fix_samples = args.n_test_samples - (args.n_test_samples // 2) cmap = cm.get_cmap("inferno") for i, x_t in enumerate(test_loader): x_t = x_t.to(device) s_t = fft(x_t).detach() test_voc.append(s_t.to(device)) test_audio.append(x_t) audio = x_t.squeeze().cpu() save_sample(root / ("original_%d.wav" % i), sampling_rate, audio) samples.append( wandb.Audio(audio, caption=f"sample {i}", sample_rate=sampling_rate)) melImage = s_t.squeeze().detach().cpu().numpy() melImage = (melImage - np.amin(melImage)) / (np.amax(melImage) - np.amin(melImage)) # melImage = Image.fromarray(np.uint8(cmap(melImage)) * 255) # melImage = melImage.resize((melImage.width * 4, melImage.height * 4)) melImages.append(wandb.Image(cmap(melImage), caption=f"sample {i}")) if i == num_fix_samples - 1: break # if not resume_run_id: wandb.log({"audio/original": samples}, step=start_epoch) wandb.log({"mel/original": melImages}, step=start_epoch) # else: # print("We are resuming, skipping logging of original audio.") costs = [] start = time.time() # enable cudnn autotuner to speed up training torch.backends.cudnn.benchmark = True best_mel_reconst = 1000000 for epoch in range(start_epoch, start_epoch + args.epochs + 1): for iterno, x_t in enumerate(train_loader): x_t = x_t.to(device) s_t = fft(x_t).detach() x_pred_t = netG(s_t.to(device)) with torch.no_grad(): s_pred_t = fft(x_pred_t.detach()) s_error = F.l1_loss(s_t, s_pred_t).item() ####################### # Train Discriminator # ####################### D_fake_det = netD(x_pred_t.to(device).detach()) D_real = netD(x_t.to(device)) loss_D = 0 for scale in D_fake_det: loss_D += F.relu(1 + scale[-1]).mean() for scale in D_real: loss_D += F.relu(1 - scale[-1]).mean() netD.zero_grad() loss_D.backward() optD.step() ################### # Train Generator # ################### D_fake = netD(x_pred_t.to(device)) loss_G = 0 for scale in D_fake: loss_G += -scale[-1].mean() loss_feat = 0 feat_weights = 4.0 / (args.n_layers_D + 1) D_weights = 1.0 / args.num_D wt = D_weights * feat_weights for i in range(args.num_D): for j in range(len(D_fake[i]) - 1): loss_feat += wt * F.l1_loss(D_fake[i][j], D_real[i][j].detach()) netG.zero_grad() (loss_G + args.lambda_feat * loss_feat).backward() optG.step() costs.append( [loss_D.item(), loss_G.item(), loss_feat.item(), s_error]) wandb.log( { "loss/discriminator": costs[-1][0], "loss/generator": costs[-1][1], "loss/feature_matching": costs[-1][2], "loss/mel_reconstruction": costs[-1][3], }, step=steps, ) steps += 1 if steps % args.save_interval == 0: st = time.time() with torch.no_grad(): samples = [] melImages = [] # fix samples for i, (voc, _) in enumerate(zip(test_voc, test_audio)): pred_audio = netG(voc) pred_audio = pred_audio.squeeze().cpu() save_sample(root / ("generated_%d.wav" % i), sampling_rate, pred_audio) samples.append( wandb.Audio( pred_audio, caption=f"sample {i}", sample_rate=sampling_rate, )) melImage = voc.squeeze().detach().cpu().numpy() melImage = (melImage - np.amin(melImage)) / ( np.amax(melImage) - np.amin(melImage)) # melImage = Image.fromarray(np.uint8(cmap(melImage)) * 255) # melImage = melImage.resize( # (melImage.width * 4, melImage.height * 4) # ) melImages.append( wandb.Image(cmap(melImage), caption=f"sample {i}")) wandb.log( { "audio/generated": samples, "mel/generated": melImages, "epoch": epoch, }, step=steps, ) # var samples source = [] pred = [] pred_mel = [] num_var_samples = args.n_test_samples - num_fix_samples for i, x_t in enumerate(test_loader): # source x_t = x_t.to(device) audio = x_t.squeeze().cpu() source.append( wandb.Audio(audio, caption=f"sample {i}", sample_rate=sampling_rate)) # pred s_t = fft(x_t).detach() voc = s_t.to(device) pred_audio = netG(voc) pred_audio = pred_audio.squeeze().cpu() pred.append( wandb.Audio( pred_audio, caption=f"sample {i}", sample_rate=sampling_rate, )) melImage = voc.squeeze().detach().cpu().numpy() melImage = (melImage - np.amin(melImage)) / ( np.amax(melImage) - np.amin(melImage)) # melImage = Image.fromarray(np.uint8(cmap(melImage)) * 255) # melImage = melImage.resize( # (melImage.width * 4, melImage.height * 4) # ) pred_mel.append( wandb.Image(cmap(melImage), caption=f"sample {i}")) # stop when reach log sample if i == num_var_samples - 1: break wandb.log( { "audio/var_original": source, "audio/var_generated": pred, "mel/var_generated": pred_mel, }, step=steps, ) print("Saving models ...") torch.save(netG.state_dict(), root / "netG.pt") torch.save(optG.state_dict(), root / "optG.pt") wandb.save(str(root / "netG.pt")) wandb.save(str(root / "optG.pt")) torch.save(netD.state_dict(), root / "netD.pt") torch.save(optD.state_dict(), root / "optD.pt") wandb.save(str(root / "netD.pt")) wandb.save(str(root / "optD.pt")) if np.asarray(costs).mean(0)[-1] < best_mel_reconst: best_mel_reconst = np.asarray(costs).mean(0)[-1] torch.save(netD.state_dict(), root / "best_netD.pt") torch.save(netG.state_dict(), root / "best_netG.pt") wandb.save(str(root / "best_netD.pt")) wandb.save(str(root / "best_netG.pt")) print("Took %5.4fs to generate samples" % (time.time() - st)) print("-" * 100) if steps % args.log_interval == 0: print("Epoch {} | Iters {} / {} | ms/batch {:5.2f} | loss {}". format( epoch, iterno, len(train_loader), 1000 * (time.time() - start) / args.log_interval, np.asarray(costs).mean(0), )) costs = [] start = time.time()
parser.add_argument("--config-path", type=str, required=True) parser.add_argument("--save-dir-path", type=str, default=".") parser.add_argument("--tol", type=float, default=0) parser.add_argument("--batch-size", type=int, default=512) parser.add_argument("--distance", default="l1", choices=["l1", "l2"]) args = parser.parse_args() cfg, G, lidar, device = utils.setup( args.model_path, args.config_path, ema=True, fix_noise=True, ) utils.set_requires_grad(G, False) G = DP(G) # hyperparameters num_step = 1000 perturb_latent = True noise_ratio = 0.75 noise_sigma = 1.0 lr_rampup_ratio = 0.05 lr_rampdown_ratio = 0.25 # prepare reference dataset = define_dataset(cfg.dataset, phase="test") loader = torch.utils.data.DataLoader( dataset, batch_size=args.batch_size, shuffle=False,
# print(f"with configuration\n{dict_to_str(config)}") model_name = f"seq_lab_{config.model_name.lower()}" model_config = deepcopy(ModelCfg[model_name]) model_config.cnn.name = config.cnn_name model_config.rnn.name = config.rnn_name model_config.attn.name = config.attn_name model = ECG_SEQ_LAB_NET_CPSC2019( n_leads=config.n_leads, input_len=config.input_len, config=model_config, ) if torch.cuda.device_count() > 1: model = DP(model) # model = DDP(model) model.to(device=device) model.__DEBUG__ = False try: train( model=model, model_config=model_config, config=train_config, device=device, logger=logger, debug=train_config.debug, ) except KeyboardInterrupt:
def __init__(self, input_size, n_channels, hparams, gpu, inference=False): self.hparams = hparams if inference: self.device = torch.device('cpu') self.model = ECGNet(n_channels=n_channels, hparams=self.hparams).to(self.device) else: if torch.cuda.device_count() > 1: if len(gpu) > 0: print("Number of GPUs will be used: ", len(gpu)) self.device = torch.device(f"cuda:{gpu[0]}" if torch.cuda. is_available() else "cpu") self.model = ECGNet(n_channels=n_channels, hparams=self.hparams).to(self.device) self.model = DP(self.model, device_ids=gpu, output_device=gpu[0]) else: print("Number of GPUs will be used: ", torch.cuda.device_count() - 5) self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.model = ECGNet(n_channels=n_channels, hparams=self.hparams).to(self.device) self.model = DP(self.model, device_ids=list( range(torch.cuda.device_count() - 5))) else: self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.model = ECGNet(n_channels=n_channels, hparams=self.hparams).to(self.device) print('Only one GPU is available') # define the models #summary(self.model, (input_size, n_channels)) #print(torch.cuda.is_available()) self.metric = Metric() self.num_workers = 18 self.threshold = 0.5 ########################## compile the model ############################### # define optimizer self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.hparams['lr']) weights = torch.Tensor([ 1., 1., 1., 1., 0.5, 1., 1., 1., 1., 1., 1., 1., 0.5, 0.5, 1., 1., 1., 1., 0.5, 1., 1., 1., 1., 0.5, 1., 1., 0.5 ]).to(self.device) self.loss = nn.BCELoss(weight=weights) # CompLoss(self.device) # self.decoder_loss = nn.MSELoss() # define early stopping self.early_stopping = EarlyStopping( checkpoint_path=self.hparams['checkpoint_path'] + '/checkpoint' + str(self.hparams['start_fold']) + '.pt', patience=self.hparams['patience'], delta=self.hparams['min_delta'], is_maximize=True, ) # lr cheduler self.scheduler = ReduceLROnPlateau( optimizer=self.optimizer, mode='max', factor=0.2, patience=1, verbose=True, threshold=self.hparams['min_delta'], threshold_mode='abs', cooldown=0, eps=0, ) self.seed_everything(42) self.postprocessing = PostProcessing(fold=self.hparams['start_fold']) self.scaler = torch.cuda.amp.GradScaler()
def set_multiple_gpu(self): if torch.cuda.device_count() > 1: print("more than 1") self.dot_applier = DP(self.dot_applier) self.patch_applier = DP(self.patch_applier) self.detections = DP(self.detections)
def __init__(self, args) -> None: """Use ELM with fintuned language model for sentiment classification Args: args (dict): contain all the arguments needed. - model_name(str): the name of the transformer model - bsz(int): batch size - epoch: epochs to train - type(str): fintuned type - base: train only ELM - finetune_elm: train transformers with ELM directly - finetune_classifier: train transformers with classifier - finetune_classifier_elm: train transformers with classifier, and use elm replace the classifier - finetune_classifier_beta: train transformers with classifier, and use pinv to calculate beta in classifier - learning_rate(float): learning_rate for finetuning """ # load configuration self.model_name = args.get('model_name', 'bert-base-uncased') self.bsz = args.get('batch_size', 10) self.epoch = args.get('epoch_num', 2) self.learning_rate = args.get('learning_rate', 0.001) self.training_type = args.get('training_type', 'base') self.debug = args.get('debug', True) self.eval_epoch = args.get('eval_epoch', 1) self.lr_decay = args.get('learning_rate_decay', 0.99) if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') self.device = device self.n_gpu = torch.cuda.device_count() # load pretrained model if (self.model_name == 'bert-base-uncased') or \ (self.model_name == 'distilbert-base-uncased') or \ (self.model_name == 'albert-base-v2'): self.pretrained_model = AutoModel.from_pretrained(self.model_name) self.pretrained_tokenizer = AutoTokenizer.from_pretrained( self.model_name) input_shape = 768 output_shape = 256 elif (self.model_name == 'prajjwal1/bert-tiny'): self.pretrained_model = AutoModel.from_pretrained(self.model_name) self.pretrained_tokenizer = AutoTokenizer.from_pretrained( self.model_name, model_max_length=512) input_shape = 128 output_shape = 64 elif self.model_name == 'voidful/albert_chinese_xxlarge': self.pretrained_model = AlbertForMaskedLM.from_pretrained( self.model_name) self.pretrained_tokenizer = BertTokenizer.from_pretrained( self.model_name) input_shape = 768 output_shape = 256 else: raise TypeError("Unsupported model name") self.pretrained_model.to(device) device_ids = None if self.n_gpu > 1: device_ids = range(torch.cuda.device_count()) self.pretrained_model = DP(self.pretrained_model, device_ids=device_ids) # load specific model if (self.training_type == 'finetune_classifier') or \ (self.training_type == 'finetune_classifier_elm'): self.classifier = torch.nn.Sequential( torch.nn.Linear(input_shape, 2)) self.loss_func = torch.nn.CrossEntropyLoss() self.classifier.to(device) if self.n_gpu > 1: self.classifier = DP(self.classifier, device_ids=device_ids) if (self.training_type == 'base') or \ (self.training_type =='finetune_classifier_elm'): self.elm = classic_ELM(input_shape, output_shape) if (self.training_type == 'finetune_classifier_linear'): self.elm = classic_ELM(None, None) self.classifier = torch.nn.Sequential( OrderedDict([ ('w', torch.nn.Linear(input_shape, output_shape)), ('act', torch.nn.Sigmoid()), ('beta', torch.nn.Linear(output_shape, 2)), ])) self.loss_func = torch.nn.CrossEntropyLoss() self.classifier.to(device) if self.n_gpu > 1: self.classifier = DP(self.classifier, device_ids=device_ids) # load processor, trainer, evaluator, inferer. processors = { 'base': self.__processor_base__, 'finetune_classifier': self.__processor_base__, 'finetune_classifier_elm': self.__processor_base__, 'finetune_classifier_linear': self.__processor_base__, } trainers = { 'base': self.__train_base__, 'finetune_classifier': self.__train_finetune_classifier__, 'finetune_classifier_elm': self.__train_finetune_classifier_elm__, 'finetune_classifier_linear': self.__train_finetune_classifier_linear__, } evaluators = { 'base': self.__eval_base__, 'finetune_classifier': self.__eval_finetune_classifier__, 'finetune_classifier_elm': self.__eval_base__, 'finetune_classifier_linear': self.__eval_finetune_classifier_linear__, } inferers = { 'base': self.__infer_base__, 'finetune_classifier': self.__infer_finetune_classifier__, 'finetune_classifier_elm': self.__infer_finetune_classifier_elm__, 'finetune_classifier_linear': self.__infer_base__ } self.processor = processors[self.training_type] self.trainer = trainers[self.training_type] self.evaluator = evaluators[self.training_type] self.inferer = inferers[self.training_type]
# also save a copy for wav generation if melgan_run_id: temp_dir = Path(args.wav_generate_dir) temp_dir.mkdir(parents=True, exist_ok=True) shutil.copy(mean_fp, temp_dir / f"mean.{feat_type}.npy") shutil.copy(std_fp, temp_dir / f"std.{feat_type}.npy") mean = torch.from_numpy(np.load(mean_fp)).float().to(device).view( 1, feat_dim, 1) std = torch.from_numpy(np.load(std_fp)).float().to(device).view( 1, feat_dim, 1) # Model if torch.cuda.device_count() > 1: netG = DP(NetG(feat_dim, z_dim, z_scale_factors).to(device)) netD = DP(NetD(feat_dim).to(device)) netE = DP(Encoder(feat_dim, z_dim, z_scale_factors).to(device)) recorder = BEGANRecorder(lambda_k, init_k, gamma) print(f"We have {torch.cuda.device_count()} gpus. Use data parallel.") else: netG = NetG(feat_dim, z_dim, z_scale_factors).to(device) netD = NetD(feat_dim).to(device) netE = Encoder(feat_dim, z_dim, z_scale_factors).to(device) recorder = BEGANRecorder(lambda_k, init_k, gamma) print(f"We have {torch.cuda.device_count()} gpu.") # Optimizers optimizerG = optim.Adam(netG.parameters(), lr=init_lr) optimizerD = optim.Adam(netD.parameters(), lr=init_lr) optimizerE = optim.Adam(netE.parameters(), lr=init_lr)