def main(args): """Main train and evaluation function. Parameters ---------- args: argparse.Namespace Arguments """ formatter = logging.Formatter('%(asctime)s %(levelname)s - %(funcName)s: %(message)s', "%H:%M:%S") logger = logging.getLogger(__name__) logger.setLevel(args.log_level.upper()) stream = logging.StreamHandler() stream.setLevel(args.log_level.upper()) stream.setFormatter(formatter) logger.addHandler(stream) set_seed(args.seed) device = get_device(is_gpu=not args.no_cuda) exp_dir = os.path.join(RES_DIR, args.name) feature_dir = os.path.join(exp_dir, 'training_features') logger.info("Root directory for saving and loading experiments: {}".format(exp_dir)) if not args.is_eval_only: create_safe_directory(feature_dir, logger=logger) # Setting number of epochs to 1, as we need to extract features args.epochs = 1 args.batch_size = 1 # PREPARES DATA data_loader = get_dataloaders(args.dataset, batch_size=args.batch_size, logger=logger, test=False) logger.info("Train {} with {} samples".format(args.dataset, len(data_loader.dataset))) # PREPARES MODEL args.img_size = get_img_size(args.dataset) # stores for metadata model = load_model(exp_dir, filename='model.pt') logger.info('Num parameters in model: {}'.format(get_n_param(model))) # Extract Features model = model.to(device) # make sure trainer and viz on same device fe = FeatureExtractor(model, save_dir=exp_dir, is_progress_bar=not args.no_progress_bar) fe(data_loader, epochs=args.epochs, checkpoint_every=args.checkpoint_every, feature_dir=feature_dir) # SAVE MODEL AND EXPERIMENT INFORMATION # save_model(trainer.model, exp_dir, metadata=vars(args)) print('Done.')
def main(args): set_seed(args.seed) device = torch.device( 'cuda:{}'.format(args.gpu) if torch.cuda.is_available() else 'cpu') exp_dir = os.path.join(RES_DIR, args.name) print("save and load experiments at : {}".format(exp_dir)) if not args.is_eval_only: #train create_directory(exp_dir) # PREPARES TRAINING DATA train_loader = get_dataloaders(args.dataset, batch_size=args.batch_size) ############## # PREPARES MODEL args.img_size = get_img_size(args.dataset) # stores for metadata cs = [1, 64, 128, 1024] model = VLAE(args, args.latent_dim, cs) #TRAINS optimizer = optim.Adam(model.parameters(), lr=args.lr) model = model.to(device) # make sure trainer and viz on same device gif_visualizer = GifTraversalsTraining(model, args.dataset, exp_dir) reg_coeff = [args.reg_coeff0, args.reg_coeff1, args.reg_coeff2] trainer = Trainer(model, optimizer, reg_coeff, device=device, save_dir=exp_dir, is_progress_bar=not args.no_progress_bar, gif_visualizer=gif_visualizer) trainer(args, train_loader, epochs=args.epochs, checkpoint_every=args.checkpoint_every) #SAVE MODEL AND EXPERIMENT INFORMATION save_model(trainer.model, exp_dir, metadata=vars(args)) print("Model has been saved")
def main(args): """Main train and evaluation function. Parameters ---------- args: argparse.Namespace Arguments """ formatter = logging.Formatter('%(asctime)s %(levelname)s - %(funcName)s: %(message)s', "%H:%M:%S") logger = logging.getLogger(__name__) logger.setLevel(args.log_level.upper()) stream = logging.StreamHandler() stream.setLevel(args.log_level.upper()) stream.setFormatter(formatter) logger.addHandler(stream) set_seed(args.seed) device = get_device(is_gpu=not args.no_cuda) exp_dir = os.path.join(RES_DIR, args.name) logger.info("Root directory for saving and loading experiments: {}".format(exp_dir)) # Initialize a new sweep # Arguments: # – sweep_config: the sweep config dictionary defined above # – entity: Set the username for the sweep # – project: Set the project name for the sweep config = wandb.config with wandb.init(name="sweep-reg_anneal-seed", notes='This is a test run', tags=['btcvae', 'dsprites'], entity='neonkitchen', project]'sweep' config = config):
def main(): # Initialise config = parse_args(sys.argv[1:]) device = get_device() set_seed(config.seed) # Data arrs = np.load(config.data, allow_pickle=True).item() train_data = EHR(arrs['X_train'], arrs['Y_train']) valid_data = EHR(arrs['X_valid'], arrs['Y_valid']) train_loader = DataLoader(train_data, batch_size=config.batch_size, shuffle=True, pin_memory=True) valid_loader = DataLoader(valid_data, batch_size=128, shuffle=False, pin_memory=True) # Model n_tokens = int(arrs['X_train'].max()) + 1 # +1 to embedding size for 0 emb model = Net(n_tokens, config.emb_dim, config.rnn_dim, variational=config.variational, layer_norm=config.layer_norm).to(device) optimizer = optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay) # Train for epoch in range(config.epochs): t_loss = train_epoch(model, device, train_loader, optimizer) v_loss, v_auroc, v_auprc = test(model, device, valid_loader) # Save new_model_dir(os.path.join('results', config.name)) torch.save(model.state_dict(), os.path.join(RES_DIR, "model.h5"))
def main(args): """Main train and evaluation function. Parameters ---------- args: argparse.Namespace Arguments """ formatter = logging.Formatter( '%(asctime)s %(levelname)s - %(funcName)s: %(message)s', "%H:%M:%S") logger = logging.getLogger(__name__) logger.setLevel(args.log_level.upper()) stream = logging.StreamHandler() stream.setLevel(args.log_level.upper()) stream.setFormatter(formatter) logger.addHandler(stream) set_seed(args.seed) device = get_device(is_gpu=not args.no_cuda) exp_dir = os.path.join(RES_DIR, args.name) logger.info("Root directory for saving and loading experiments: {}".format( exp_dir)) if not args.is_eval_only: create_safe_directory(exp_dir, logger=logger) if args.loss == "factor": logger.info( "FactorVae needs 2 batches per iteration. To replicate this behavior while being consistent, we double the batch size and the the number of epochs." ) args.batch_size *= 2 args.epochs *= 2 # PREPARES DATA train_loader = get_dataloaders(args.dataset, batch_size=args.batch_size, logger=logger) logger.info("Train {} with {} samples".format( args.dataset, len(train_loader.dataset))) # PREPARES MODEL args.img_size = get_img_size(args.dataset) # stores for metadata model = init_specific_model(args.model_type, args.img_size, args.latent_dim) logger.info('Num parameters in model: {}'.format(get_n_param(model))) # TRAINS optimizer = optim.Adam(model.parameters(), lr=args.lr) model = model.to(device) # make sure trainer and viz on same device gif_visualizer = GifTraversalsTraining(model, args.dataset, exp_dir) loss_f = get_loss_f(args.loss, n_data=len(train_loader.dataset), device=device, **vars(args)) trainer = Trainer(model, optimizer, loss_f, device=device, logger=logger, save_dir=exp_dir, is_progress_bar=not args.no_progress_bar, gif_visualizer=gif_visualizer) trainer( train_loader, epochs=args.epochs, checkpoint_every=args.checkpoint_every, ) # SAVE MODEL AND EXPERIMENT INFORMATION save_model(trainer.model, exp_dir, metadata=vars(args)) if args.is_metrics or not args.no_test: model = load_model(exp_dir, is_gpu=not args.no_cuda) metadata = load_metadata(exp_dir) # TO-DO: currently uses train datatset test_loader = get_dataloaders(metadata["dataset"], batch_size=args.eval_batchsize, shuffle=False, logger=logger) loss_f = get_loss_f(args.loss, n_data=len(test_loader.dataset), device=device, **vars(args)) use_wandb = False if use_wandb: loss = args.loss wandb.init(project="atmlbetavae", config={"VAE_loss": args.loss}) if loss == "betaH": beta = loss_f.beta wandb.config["Beta"] = beta evaluator = Evaluator(model, loss_f, device=device, logger=logger, save_dir=exp_dir, is_progress_bar=not args.no_progress_bar, use_wandb=use_wandb) evaluator(test_loader, is_metrics=args.is_metrics, is_losses=not args.no_test)
args.n_gpu = 1 args.device = device # Setup logging logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO if args.local_rank in [-1, 0] else logging.WARN) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", args.local_rank, device, args.n_gpu, bool(args.local_rank != -1), args.fp16) # Set seed and log params set_seed(args) logger.info("Training/evaluation parameters %s", args) #### Get Models #### dis = discriminative_transformers.PretrainedDiscriminativeTransformer(args) dis_tokenizer = dis.tokenizer gen = generative_transformers.PretrainedTransformerGenerator(args) gen_tokenizer = gen.tokenizer encoder = generative_transformers.PretrainedTransformerGenerator(args) # lets verify that the model generates okay text sampled_text = gen.sample_text(2) print("Starting off, sampled text is ", sampled_text) # TODO: clean up the hardcoded amount if args.gen_model_type in ["gpt2", "ctrl"]: decoder = GRUDecoder(gen.config.n_embd, gen_tokenizer.vocab_size,
""" import os os.environ['CUDA_VISIBLE_DEVICES'] = '0' import tensorflow as tf from tensorflow.keras.models import load_model from tensorflow import keras from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping from models.cnn_resnet import ResNet18, train_step, test_step from utils.dataload import cifar10_dataset, cifar100_dataset, svhn_dataset import matplotlib.pyplot as plt from os.path import isfile from utils.helpers import set_seed, normalize_img, init_epoch, data_load from tqdm import tqdm # setting the seed -- [5, 10, 15, 20, 25] set_seed(15) input_shape = (32, 32, 3) n_classes = 100 batches = 128 ds_train, ds_test = data_load(batches, data_fn=cifar100_dataset) model = ResNet18(input_shape, n_classes) # optim = tf.keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=0.04) optim = tf.keras.optimizers.Adam(lr=0.0001, beta_1=0.5) ckpt = tf.train.Checkpoint(step=tf.Variable(1), mdoel=model, optimizer=optim) manager = tf.train.CheckpointManager( ckpt, directory='checkpoint/resnet18_checkpoint', max_to_keep=20) emanager = tf.train.CheckpointManager( ckpt, directory='checkpoint/resnet18_echeckpoint', max_to_keep=20) ckpt.restore(manager.latest_checkpoint)
def main(args: argparse.Namespace): """Main train and evaluation function.""" formatter = logging.Formatter( '%(asctime)s %(levelname)s - %(funcName)s: %(message)s', "%H:%M:%S") logger = logging.getLogger(__name__) logger.setLevel("INFO") stream = logging.StreamHandler() stream.setLevel("INFO") stream.setFormatter(formatter) logger.addHandler(stream) set_seed(args.seed) device = get_device(is_gpu=not args.no_cuda) exp_dir = os.path.join(RES_DIR, args.name) logger.info( f"Root directory for saving and loading experiments: {exp_dir}") if not args.is_eval_only: create_safe_directory(exp_dir, logger=logger) if args.loss == "factor": logger.info( "FactorVae needs 2 batches per iteration." + "To replicate this behavior, double batch size and epochs.") args.batch_size *= 2 args.epochs *= 2 # PREPARES DATA train_loader = get_dataloaders(args.dataset, noise=args.noise, batch_size=args.batch_size, logger=logger) logger.info( f"Train {args.dataset} with {len(train_loader.dataset)} samples") # PREPARES MODEL args.img_size = get_img_size(args.dataset) # stores for metadata model = VAE(args.img_size, args.latent_dim) logger.info(f'Num parameters in model: {get_n_param(model)}') # TRAINS optimizer = optim.Adam(model.parameters(), lr=args.lr) model = model.to(device) gif_visualizer = GifTraversalsTraining(model, args.dataset, exp_dir) loss_f = get_loss_f(args.loss, n_data=len(train_loader.dataset), device=device, **vars(args)) if args.loss in ['tdGJS', 'tGJS']: loss_optimizer = optim.Adam(loss_f.parameters(), lr=args.lr) else: loss_optimizer = None print(loss_optimizer) trainer = Trainer(model, optimizer, loss_f, device=device, logger=logger, save_dir=exp_dir, is_progress_bar=not args.no_progress_bar, gif_visualizer=gif_visualizer, loss_optimizer=loss_optimizer, denoise=args.noise is not None) trainer( train_loader, epochs=args.epochs, checkpoint_every=args.checkpoint_every, ) # SAVE MODEL AND EXPERIMENT INFORMATION save_model(trainer.model, exp_dir, metadata=vars(args)) # Eval model = load_model(exp_dir, is_gpu=not args.no_cuda) metadata = load_metadata(exp_dir) test_loader = get_dataloaders(metadata["dataset"], noise=args.noise, train=False, batch_size=128, logger=logger) loss_f = get_loss_f(args.loss, n_data=len(test_loader.dataset), device=device, **vars(args)) evaluator = Evaluator(model, loss_f, device=device, is_metrics=args.is_metrics, is_train=False, logger=logger, save_dir=exp_dir, is_progress_bar=not args.no_progress_bar, denoise=args.noise is not None) evaluator(test_loader) # Train set also test_loader = get_dataloaders(metadata["dataset"], train=True, batch_size=128, logger=logger) loss_f = get_loss_f(args.loss, n_data=len(test_loader.dataset), device=device, **vars(args)) evaluator = Evaluator(model, loss_f, device=device, is_metrics=args.is_metrics, is_train=True, logger=logger, save_dir=exp_dir, is_progress_bar=not args.no_progress_bar) evaluator(test_loader)
def main(args): """Main train and evaluation function. Parameters ---------- args: argparse.Namespace Arguments """ # Logging info formatter = logging.Formatter('%(asctime)s %(levelname)s - ' '%(funcName)s: %(message)s', '%H:%M:%S') logger = logging.getLogger(__name__) logger.setLevel('INFO') stream = logging.StreamHandler() stream.setLevel('INFO') stream.setFormatter(formatter) logger.addHandler(stream) set_seed(args.seed) device = torch.device( 'cuda' if torch.cuda.is_available() and args.cuda else 'cpu') model_name = f'{args.name}_lr{args.lr}_z{args.latent_dim}' \ + f'_h{args.hidden_dim}_p{args.p_dropout}' model_dir = os.path.join(args.results, model_name) logger.info(f'Directory for saving and loading models: {model_dir}') if not args.eval: # Model directory new_model_dir(model_dir, logger=logger) # Dataloaders train_loader, valid_loader = get_dataloaders( args.data, args.t_hours, args.n_bins, validation=True, dynamic=args.dynamic, batch_size=args.bs, logger=logger) logger.info( f'Train {args.model_type}-{args.t_hours} ' + f'with {len(train_loader.dataset)} samples') # Load model n_tokens = len(np.load( os.path.join( args.data, '_dicts', f'{args.t_hours}_{args.n_bins}.npy'), allow_pickle=True).item()) model = init_model( args.model_type, n_tokens, args.latent_dim, args.hidden_dim, p_dropout=args.p_dropout, dt=args.dt, weighted=args.weighted, dynamic=args.dynamic) logger.info(f'#params in model: {get_n_param(model)}') # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) loss_f = BCE() model = model.to(device) # Training trainer = Trainer( model, loss_f, optimizer, device=device, logger=logger, save_dir=model_dir, p_bar=args.p_bar) trainer.train( train_loader, valid_loader, epochs=args.epochs, early_stopping=args.early_stopping) # Save model metadata = vars(args) metadata['n_tokens'] = n_tokens save_model(trainer.model, model_dir, metadata=metadata) if args.test: # Load model model = load_model(model_dir, is_gpu=args.cuda) metadata = load_metadata(model_dir) # Dataloader test_loader, _ = get_dataloaders( metadata['data'], metadata['t_hours'], metadata['n_bins'], validation=False, dynamic=metadata['dynamic'], batch_size=128, shuffle=False, logger=logger) # Evaluate loss_f = BCE() evaluator = Trainer( model, loss_f, device=device, logger=logger, save_dir=model_dir, p_bar=args.p_bar) evaluator._valid_epoch(test_loader)
import os os.environ['CUDA_VISIBLE_DEVICES'] = '0' import tensorflow as tf from tensorflow.keras.models import load_model from tensorflow import keras from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping from models.vgg16_cbs import VGG16CustomCbs, train_step, test_step from utils.dataload import cifar10_dataset, cifar100_dataset, svhn_dataset import matplotlib.pyplot as plt from os.path import isfile from utils.helpers import set_seed, normalize_img, init_epoch, data_load from tqdm import tqdm from time import time # setting the seed -- [5, 10, 15, 20, 25] set_seed(10) input_shape = (32, 32, 3) n_classes = 10 batches = 256 ds_train, ds_test = data_load(batches, data_fn=svhn_dataset) model = VGG16CustomCbs(input_shape, n_classes) # optim = tf.keras.optimizers.SGD(lr=0.01, decay=0.05, momentum=0.9) optim = tf.keras.optimizers.Adam(lr=0.0001, beta_1=0.5) ckpt = tf.train.Checkpoint(step=tf.Variable(1), mdoel=model, optimizer=optim) manager = tf.train.CheckpointManager( ckpt, directory='checkpoint/vgg16cbs_checkpoint', max_to_keep=20) emanager = tf.train.CheckpointManager( ckpt, directory='checkpoint/vgg16cbs_echeckpoint', max_to_keep=20) ckpt.restore(manager.latest_checkpoint)
import torch import os from dataloader.utils import load_dataset from decoding.generation import generate_beam from decoding.greedy import greedy_search from architecture.model import CDS from utils.helpers import get_device, calculate_rouge, set_seed from utils.cmdopt import parse_sum_args logging.basicConfig(format="%(asctime)s - %(message)s", level=logging.INFO) opt = parse_sum_args() device = get_device() set_seed(55) def test(dataset, fields, model): already, hypothesis, references = 0, [], [] for batch in dataset: if opt.tf: predictions = greedy_search(model, opt, batch.src, fields, opt.max_length) else: predictions, _ = generate_beam(5, model, opt, batch.src, fields) predictions = [p for p, _ in predictions] hypothesis += [fields["tgt"].decode(p) for p in predictions]
def train_generator_mle(args, train_dataset, model, tokenizer, optimizer, eval_dataset): """ Train the model """ if args.local_rank in [-1, 0]: tb_writer = SummaryWriter() args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu) train_sampler = RandomSampler( train_dataset) if args.local_rank == -1 else DistributedSampler( train_dataset) train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size) if args.max_steps > 0: t_total = args.max_steps args.num_train_epochs = args.max_steps // ( len(train_dataloader) // args.gradient_accumulation_steps) + 1 else: t_total = len( train_dataloader ) // args.gradient_accumulation_steps * args.num_train_epochs # multi-gpu training (should be after apex fp16 initialization) if args.n_gpu > 1: model = torch.nn.DataParallel(model) # Distributed training (should be after apex fp16 initialization) if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) # Train! logger.info("***** Running training *****") logger.info(" Num examples = %d", len(train_dataset)) logger.info(" Num Epochs = %d", args.num_train_epochs) logger.info(" Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size) logger.info( " Total train batch size (w. parallel, distributed & accumulation) = %d", args.train_batch_size * args.gradient_accumulation_steps * (torch.distributed.get_world_size() if args.local_rank != -1 else 1)) logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps) logger.info(" Total optimization steps = %d", t_total) global_step = 0 tr_loss, logging_loss = 0.0, 0.0 model.zero_grad() train_iterator = trange(int(args.num_train_epochs), desc="Epoch", disable=args.local_rank not in [-1, 0]) set_seed( args) # Added here for reproducibility (even between python 2 and 3) for _ in train_iterator: epoch_iterator = tqdm(train_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0]) for step, batch in enumerate(epoch_iterator): inputs, labels = mask_tokens(batch, tokenizer, args) if args.mlm else (batch, batch) inputs = inputs.to(args.device) labels = labels.to(args.device) model.train() outputs = model(inputs, masked_lm_labels=labels) if args.mlm else model( inputs, labels=labels) loss = outputs[ 0] # model outputs are always tuple in pytorch-transformers (see doc) if args.n_gpu > 1: loss = loss.mean( ) # mean() to average on multi-gpu parallel training if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps if args.fp16: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() tr_loss += loss.item() if (step + 1) % args.gradient_accumulation_steps == 0: if args.fp16: torch.nn.utils.clip_grad_norm_( amp.master_params(optimizer), args.max_grad_norm) else: torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() model.zero_grad() global_step += 1 if args.local_rank in [ -1, 0 ] and args.logging_steps > 0 and global_step % args.logging_steps == 0: # Log metrics if args.local_rank == -1 and args.evaluate_during_training: # Only evaluate when single GPU otherwise metrics may not average well results = evaluate(args, model, tokenizer) for key, value in results.items(): tb_writer.add_scalar('eval_{}'.format(key), value, global_step) logging_loss = tr_loss if args.local_rank in [ -1, 0 ] and args.save_steps > 0 and global_step % args.save_steps == 0: # Save model checkpoint output_dir = os.path.join( args.output_dir, 'checkpoint-{}'.format(global_step)) if not os.path.exists(output_dir): os.makedirs(output_dir) model_to_save = model.module if hasattr( model, 'module' ) else model # Take care of distributed/parallel training model_to_save.model.model.save_pretrained(output_dir) torch.save(args, os.path.join(output_dir, 'training_args.bin')) logger.info("Saving model checkpoint to %s", output_dir) if args.max_steps > 0 and global_step > args.max_steps: epoch_iterator.close() break if args.max_steps > 0 and global_step > args.max_steps: train_iterator.close() break if args.local_rank in [-1, 0]: tb_writer.close() return evaluate_generator_mle(args, model, tokenizer, eval_dataset)
def adversarial_train(args, gen, dis, encoder, tokenizer, optimizer, training_dataloader, num_steps, is_discriminator=True): set_seed( args) # Added here for reproductibility (even between python 2 and 3) total_loss = 0 if args.max_steps > 0: t_total = args.max_steps args.num_train_epochs = args.max_steps // ( len(training_dataloader) // args.gradient_accumulation_steps) + 1 else: t_total = len( training_dataloader ) // args.gradient_accumulation_steps * args.num_train_epochs # multi-gpu training (should be after apex fp16 initialization) if args.n_gpu > 1: dis = torch.nn.DataParallel(dis) gen = torch.nn.DataParallel(gen) # Distributed training (should be after apex fp16 initialization) if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) logger.info("***** Running adversarial training *****") logger.info(" Num examples = %d", len(training_dataloader)) logger.info(" Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size) logger.info( " Total train batch size (w. parallel, distributed & accumulation) = %d", args.train_batch_size * args.gradient_accumulation_steps * (torch.distributed.get_world_size() if args.local_rank != -1 else 1)) logger.info(" Gradient Accumulation steps = %d", args.gradient_accumulation_steps) logger.info(" Total optimization steps = %d", t_total) train_iterator = trange(int(1), desc="Epoch", disable=args.local_rank not in [-1, 0]) for _ in train_iterator: epoch_iterator = tqdm(training_dataloader, desc="Iteration", disable=args.local_rank not in [-1, 0]) for step, (batch) in enumerate(epoch_iterator): dis.train() if is_discriminator else gen.train( ) # only optimize the one # Get real embeddings batch = tuple(t.to(args.device) for t in batch) batch_inputs = create_transformer_mapping(batch, "xlnet") real_embedding = encoder(**batch_inputs) d_out_real = discriminator_eval(args, real_embedding, dis, tokenizer) # get fake embeddings gen_batch = gen.sample( args.train_batch_size).cuda() # (1, batch_size, embedding_dim) d_out_fake = discriminator_eval(args, gen_batch, dis, tokenizer) # compute losses and return the relevant one assert d_out_real.shape == d_out_fake.shape, "shapes are not aligned, error" if is_discriminator: _, loss = get_losses(d_out_real, d_out_fake, args.loss_type) else: loss, _ = get_losses(d_out_real, d_out_fake, args.loss_type) epoch_iterator.set_description('loss:{:.4f}'.format(loss.item())) epoch_iterator.update(1) optimize(optimizer, loss, dis if is_discriminator else gen) total_loss += loss.item() average_loss = total_loss / ( num_steps * args.train_batch_size) if num_steps != 0 else 0 return average_loss, optimizer, gen, dis